From ecd8860769fe0767e9f15b0faa6ce4769ba1ea8d Mon Sep 17 00:00:00 2001 From: chihlasm Date: Sat, 21 Feb 2026 15:03:12 -0500 Subject: [PATCH] fix: correct action node schema and improve AI flow quality - Fix action nodes to use next_node_id (not children) for continuation, matching how TreeNavigationPage.tsx navigates action nodes - Validator now requires next_node_id on all action nodes and flags missing ones as broken dead ends - Update _check_branch_termination: action nodes are not dead ends since they continue via next_node_id (validated separately) - Improve scaffold prompt: branch names must describe observable symptoms users can self-identify, not internal category names - Update branch_detail prompt with clearer action node schema, corrected few-shot example showing proper next_node_id on action nodes - Improve assemble_tree root question to be more user-facing Co-Authored-By: Claude Sonnet 4.6 --- backend/app/core/ai_tree_generator_service.py | 56 +++++++++++-------- backend/app/core/ai_tree_validator.py | 26 +++++++-- 2 files changed, 54 insertions(+), 28 deletions(-) diff --git a/backend/app/core/ai_tree_generator_service.py b/backend/app/core/ai_tree_generator_service.py index 59cf2be8..01db3bf9 100644 --- a/backend/app/core/ai_tree_generator_service.py +++ b/backend/app/core/ai_tree_generator_service.py @@ -34,8 +34,10 @@ Context: Your audience is technical MSP staff experienced with Windows Server, A Task: Given a flow type, category, name, description, and environment tags, suggest 4-7 top-level branches for the flow. For TROUBLESHOOTING flows: -- Branches should be symptom-based categories (e.g., "Authentication Failures", "Connectivity Issues", "Performance Degradation") -- Each branch represents a common way the problem manifests +- Branches should describe the symptom the user observes — written as what the user sees or reports +- The branch name becomes a selectable option on the first screen, so it must be self-identifying from a user's perspective +- Good: "Drive letter missing after login", "Mapped drive shows as disconnected (red X)", "Access denied when opening files" +- Bad: "Authentication Failures", "GPO Issues", "Connectivity Problems" — too vague for users to self-identify - Order from most common to least common For PROCEDURE flows: @@ -45,9 +47,9 @@ For PROCEDURE flows: Rules: - Suggest 4-7 branches -- Be specific to the technology/service described — avoid generic branches -- Branch names should be concise (2-5 words) -- Each branch needs a brief description (1 sentence) +- Be specific to the technology/service described — avoid generic internal category names +- Branch names should be concise (3-7 words) and describe the observable symptom or phase +- Each branch needs a brief description (1 sentence) explaining what scenarios it covers - Return ONLY valid JSON, no markdown, no explanation Output format: @@ -62,29 +64,32 @@ You must return ONLY valid JSON — no markdown, no code fences, no explanation. Required node schema: -Decision nodes (branching diagnostic questions): -{"id": "unique-slug", "type": "decision", "question": "The diagnostic question", "help_text": "Optional context or command hint", "options": [{"id": "opt-id", "label": "Answer choice", "next_node_id": "child-node-id"}], "children": []} +Decision nodes (branching diagnostic questions — choose the right path): +{"id": "unique-slug", "type": "decision", "question": "The diagnostic question", "help_text": "Optional context or command hint", "options": [{"id": "opt-id", "label": "Specific observable answer", "next_node_id": "child-node-id"}], "children": []} -Action nodes (investigation or remediation steps): -{"id": "unique-slug", "type": "action", "title": "Short title", "description": "Detailed instructions", "commands": ["PowerShell or CMD commands"], "expected_outcome": "What success looks like", "children": []} +Action nodes (a single investigation or remediation step — MUST have next_node_id pointing to the next node): +{"id": "unique-slug", "type": "action", "title": "Short title", "description": "Detailed instructions", "commands": ["PowerShell or CMD commands"], "expected_outcome": "What success looks like", "next_node_id": "id-of-next-sibling-node"} -Solution nodes (leaf nodes — the resolution): +Solution nodes (leaf nodes — the final resolution, no children): {"id": "unique-slug", "type": "solution", "title": "Resolution title", "description": "Full resolution description", "resolution_steps": ["Step 1", "Step 2"]} -Rules: -1. Generate 3-10 nodes for this branch -2. Start with a decision node if troubleshooting, action node if procedure -3. Every branch path MUST end in a solution node — no dead ends -4. Include realistic MSP commands (PowerShell preferred for Windows) -5. Use unique node IDs prefixed with the branch context (e.g., "dns-check-service") -6. CRITICAL — next_node_id must exactly match the "id" of a direct child in the "children" array of that same node. Never reference an ID that does not appear as a child of the current node. -7. All option labels must be meaningful and specific -8. Decision nodes must have at least 2 options -9. Return a single root node with its children nested inside -10. Build the tree bottom-up in your head: create leaf nodes first, then reference their IDs in parent options +CRITICAL NAVIGATION RULES: +- Decision node: each option's next_node_id MUST exactly match the "id" of a direct child in that decision node's "children" array +- Action node: next_node_id MUST exactly match the "id" of a sibling node (another child of the same parent decision node) +- Every action node MUST have a next_node_id — action nodes with no next step are broken dead ends +- Solution nodes have no children and no next_node_id — they are the terminus +- Every path through the tree MUST end at a solution node -Few-shot example (abbreviated): -{"id": "dns-root", "type": "decision", "question": "Can the client resolve any DNS names?", "help_text": "Run: nslookup google.com", "options": [{"id": "dns-opt-none", "label": "No DNS resolution at all", "next_node_id": "dns-check-service"}, {"id": "dns-opt-partial", "label": "Some names resolve, others don't", "next_node_id": "dns-check-specific"}], "children": [{"id": "dns-check-service", "type": "action", "title": "Check DNS Service", "description": "Verify the DNS Client service is running", "commands": ["Get-Service -Name Dnscache"], "expected_outcome": "Service should be Running", "children": [{"id": "dns-resolved", "type": "solution", "title": "DNS Service Restored", "description": "DNS client service was stopped", "resolution_steps": ["Restart DNS Client service", "Flush DNS cache: ipconfig /flushdns", "Test resolution"]}]}, {"id": "dns-check-specific", "type": "solution", "title": "Selective DNS Failure", "description": "Specific records missing or stale", "resolution_steps": ["Check DNS server configuration", "Verify zone records", "Clear DNS cache"]}]}""" +Additional rules: +1. Generate 4-10 nodes total for this branch +2. Start with a decision node if troubleshooting, action node if procedure +3. Decision nodes must have at least 2 options with specific, observable answer choices +4. Include realistic MSP commands (PowerShell preferred for Windows) +5. Use unique node IDs prefixed with the branch context (e.g., "gpo-check-link") +6. Build the tree bottom-up in your head: create solution/leaf nodes first, then build parent nodes referencing their IDs + +Few-shot example showing correct action node next_node_id usage: +{"id": "dns-root", "type": "decision", "question": "Can the client resolve any DNS names?", "help_text": "Run: nslookup google.com", "options": [{"id": "dns-opt-none", "label": "No — nslookup times out or returns 'server failed'", "next_node_id": "dns-check-service"}, {"id": "dns-opt-partial", "label": "Some names resolve but others fail", "next_node_id": "dns-check-specific"}], "children": [{"id": "dns-check-service", "type": "action", "title": "Check DNS Client Service", "description": "Verify the DNS Client service is running on the affected machine", "commands": ["Get-Service -Name Dnscache | Select-Object Status,StartType"], "expected_outcome": "Status should be Running", "next_node_id": "dns-service-solution"}, {"id": "dns-service-solution", "type": "solution", "title": "DNS Service Was Stopped", "description": "The DNS Client service was stopped, preventing all name resolution", "resolution_steps": ["Run: Start-Service Dnscache", "Set startup type: Set-Service Dnscache -StartupType Automatic", "Flush cache: ipconfig /flushdns", "Test: nslookup google.com"]}, {"id": "dns-check-specific", "type": "solution", "title": "Selective DNS Failure — Stale or Missing Records", "description": "Some records resolve correctly, indicating DNS is functional but specific records are stale or missing", "resolution_steps": ["Check DNS server for missing A/CNAME records", "Clear DNS cache on the DNS server: Clear-DnsServerCache", "Flush client cache: ipconfig /flushdns", "Verify with: nslookup "]}]}""" CORRECTIVE_PROMPT_TEMPLATE = """Your previous JSON was invalid for ResolutionFlow's tree schema. @@ -297,14 +302,17 @@ def assemble_tree( # Determine root question based on flow type if flow_type == "troubleshooting": - root_question = f"What issue is the user experiencing with {name}?" + root_question = f"What is the user experiencing? Select the symptom that best matches their report." + root_help = "Choose the option that most closely describes the user's reported problem." else: root_question = f"Which phase of {name} are you working on?" + root_help = None tree_structure = { "id": "root", "type": "decision", "question": root_question, + **({"help_text": root_help} if root_help else {}), "options": options, "children": children, } diff --git a/backend/app/core/ai_tree_validator.py b/backend/app/core/ai_tree_validator.py index fa6bc6dc..7e1df9a9 100644 --- a/backend/app/core/ai_tree_validator.py +++ b/backend/app/core/ai_tree_validator.py @@ -114,7 +114,12 @@ def validate_generated_tree(tree: dict[str, Any]) -> list[str]: elif node_type == "action": next_id = node.get("next_node_id") - if next_id: + if not next_id: + errors.append( + f"Action node '{node_id}' is missing 'next_node_id'. " + "Every action node must point to the next node (a sibling in the parent's children)." + ) + else: all_referenced_ids.add(next_id) elif node_type == "solution": @@ -148,7 +153,11 @@ def validate_generated_tree(tree: dict[str, Any]) -> list[str]: def _check_branch_termination(node: dict[str, Any], errors: list[str]) -> None: - """Verify every branch eventually reaches a solution node.""" + """Verify every branch eventually reaches a solution node. + + Action nodes continue via next_node_id (validated separately). + Only decision nodes with no children are dead ends. + """ if not isinstance(node, dict): return @@ -159,10 +168,19 @@ def _check_branch_termination(node: dict[str, Any], errors: list[str]) -> None: if node_type == "solution": return # Solution is a valid terminus + if node_type == "action": + # Action nodes continue via next_node_id (a sibling), not children. + # next_node_id presence is validated in _validate_node. + # Recurse into children if present (non-standard but tolerate it). + for child in children: + _check_branch_termination(child, errors) + return + + # Decision node: must have children if not children: errors.append( - f"Node '{node_id}' (type={node_type}) is a dead end — " - "it has no children and is not a solution node" + f"Decision node '{node_id}' is a dead end — " + "it has no children" ) return