Files
resolutionflow/backend/app/services/ai_tree_builder.py
Michael Chihlas ac89e7b2fa fix(l1): resolve PR #193 backend review findings (1,4,5,6,7,8,9,10)
Server-assigns a uuid4 id to every AI-generated node (Finding 1 showstopper:
nodes had no id but the advance protocol keys on node_id, so ai_build walks
never advanced past question 1). Replaces the hidden {"node_type":"meta"}
walked_path convention with real category/problem_text/pending_node columns on
l1_walk_sessions (migration 61dda4f615c6) — fixes junk proposals + off-by-one
depth cap (Findings 8,9), and pending_node replays the served node on re-mount
(no duplicate paid LLM call). Intake honors explicit flow_id and adhoc=True
(Findings 4,5); flow_proposals.l1_session_id FK -> CASCADE (Finding 6 time
bomb); L1 category GET is owner+admin like PATCH and require_account_owner_or_admin
delegates to User.can_manage_account (Finding 7); escalate falls back to default
recipients + filters deleted_at + warns when empty (Finding 10). Cleanups: dead
ticket_ref removed, IntakeResponse per-outcome validator, unused acknowledged
dropped, escalations partial index, restored a deleted audit assertion.

Full Phase 2A backend set: 110 passed / 0 failed.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-09 15:55:45 -04:00

170 lines
7.2 KiB
Python

"""Constrained, node-by-node L1 decision-tree generation (spec §4/§5/§6.1).
Each call produces ONE node given the problem, category, and full walked path.
Generation is constrained to safe/reversible L1 steps and biased to escalate
early. normalize_walked_path() turns a resolved walk into a valid tree object
for flywheel capture.
"""
import logging
from typing import Any, Optional
from uuid import uuid4
from app.core.ai_provider import get_ai_provider
from app.core.config import settings
from app.services.l1_category_service import HARD_FLOOR_TEXT_PATTERNS
from app.services.llm_utils import parse_llm_json
logger = logging.getLogger(__name__)
MAX_DEPTH = 12
VALID_NODE_TYPES = {"question", "instruction", "resolved", "escalate"}
class UnsafeNodeError(ValueError):
"""Raised when a generated node violates the hard floor or is malformed."""
SYSTEM_PROMPT = """\
You are an L1 helpdesk troubleshooting guide builder. Given a problem and the
steps already tried, produce the SINGLE next node of a yes/no decision tree.
HARD RULES:
- Only safe, reversible, observe-or-restart-class steps: checking status, toggling,
restarting, reconnecting, re-entering credentials the USER already knows.
- NEVER produce steps that: edit the registry/system files/boot config; delete or
format data/disks; change credentials/MFA/security/firewall/AV; run elevated or
admin scripts; touch domain controllers/DNS/DHCP or production servers; or have
billing/license impact. These are out of L1 scope.
- When you run out of safe in-scope steps, DO NOT GUESS. Emit an "escalate" node.
Return ONLY a JSON object for ONE node, one of:
{"node_type":"question","text":"<yes/no question>"}
{"node_type":"instruction","text":"<one safe reversible action>"}
{"node_type":"resolved","text":"<confirmation the issue is fixed>"}
{"node_type":"escalate","reason_category":"exhausted_safe_steps","text":"<why>"}
No prose, no markdown fences.
"""
def _assign_id(node: dict[str, Any]) -> dict[str, Any]:
"""Stamp a stable server-side id on a generated node (Finding 1).
The SYSTEM_PROMPT never asks the model for an id — and we must not, since a
model-invented id is neither stable nor trustworthy. But the advance protocol
keys on ``node_id``: without one, the answer to every node is discarded and
the walk can never progress past the first question. So every node the builder
hands back — generated, depth-capped, or generation-failed — gets an id here.
"""
if not node.get("id"):
node["id"] = uuid4().hex[:8]
return node
def _build_context(problem_text: str, category: str, walked_path: list[dict]) -> str:
lines = [f"PROBLEM: {problem_text}", f"CATEGORY: {category}", "STEPS SO FAR:"]
if not walked_path:
lines.append("(none yet — produce the first diagnostic question)")
for i, step in enumerate(walked_path, 1):
ans = step.get("answer")
suffix = f" -> {ans}" if ans else ""
lines.append(f"{i}. [{step.get('node_type','?')}] {step.get('text','')}{suffix}")
return "\n".join(lines)
def validate_node(node: dict[str, Any]) -> dict[str, Any]:
"""Shape + hard-floor validation. Raises UnsafeNodeError on violation."""
if not isinstance(node, dict) or node.get("node_type") not in VALID_NODE_TYPES:
raise UnsafeNodeError(f"invalid node_type: {node!r}")
text = (node.get("text") or "").lower()
for pat in HARD_FLOOR_TEXT_PATTERNS:
if pat in text:
raise UnsafeNodeError(f"hard-floor pattern '{pat}' in node text")
return node
def escalate_if_depth_exceeded(walked_path: list[dict]) -> Optional[dict[str, Any]]:
if len(walked_path) >= MAX_DEPTH:
return _assign_id({
"node_type": "escalate",
"reason_category": "depth_cap",
"text": "Reached the L1 troubleshooting depth limit — escalating to engineering.",
})
return None
async def generate_next_node(
problem_text: str, category: str, walked_path: list[dict]
) -> dict[str, Any]:
"""Generate + validate the next node. Regenerate once on failure, then escalate."""
capped = escalate_if_depth_exceeded(walked_path)
if capped:
return capped
provider = get_ai_provider(settings.get_model_for_action("l1_realtime_build"))
context = _build_context(problem_text, category, walked_path)
for attempt in range(2):
try:
raw, _, _ = await provider.generate_json(
system_prompt=SYSTEM_PROMPT,
messages=[{"role": "user", "content": context}],
max_tokens=1024,
)
node = parse_llm_json(raw)
return _assign_id(validate_node(node))
except Exception as e:
logger.warning("ai_tree_builder node attempt %d failed: %s", attempt + 1, e)
continue
return _assign_id({
"node_type": "escalate",
"reason_category": "generation_failed",
"text": "Could not generate a safe next step — escalating to engineering.",
})
def normalize_walked_path(walked_path: list[dict]) -> dict[str, Any]:
"""Turn a resolved walk into a valid troubleshooting tree (spec §6.1).
Root = first node's id; question nodes' traversed branch points to the next
node, the untraversed branch to a needs_review stub; terminal node ends it.
Returns {id, nodes: {id: node}} — a dict with an id (passes the proposal
approval guard).
"""
nodes: dict[str, Any] = {}
if not walked_path:
root_id = "root"
nodes[root_id] = {"id": root_id, "node_type": "needs_review",
"text": "Empty walk — needs authoring."}
return {"id": root_id, "nodes": nodes}
stub_seq = 0
for i, step in enumerate(walked_path):
nid = step.get("id") or f"n{i+1}"
ntype = step.get("node_type", "question")
nxt = walked_path[i + 1].get("id", f"n{i+2}") if i + 1 < len(walked_path) else None
node: dict[str, Any] = {"id": nid, "node_type": ntype, "text": step.get("text", "")}
if step.get("reason_category"):
node["reason_category"] = step["reason_category"]
if ntype == "question":
answer = (step.get("answer") or "").lower()
stub_seq += 1
stub_id = f"review-{stub_seq}"
nodes[stub_id] = {"id": stub_id, "node_type": "needs_review",
"text": "Branch not explored during the originating call."}
traversed_next = nxt
if traversed_next is None:
# Walk ended on this question (no terminal recorded) — stub the
# branch the tech actually took so the tree has no dangling edge.
stub_seq += 1
traversed_next = f"review-{stub_seq}"
nodes[traversed_next] = {"id": traversed_next, "node_type": "needs_review",
"text": "Walk ended here before a terminal step was reached."}
node["yes_next"] = traversed_next if answer == "yes" else stub_id
node["no_next"] = traversed_next if answer == "no" else stub_id
elif ntype == "instruction":
node["next"] = nxt
nodes[nid] = node
return {"id": walked_path[0].get("id", "n1"), "nodes": nodes}