Live walk defect: the builder generated alternatives questions ("Is Jane's
account a Microsoft account or a local account?") while the UI could only
offer Yes/No. Root cause: SYSTEM_PROMPT mandated a label-less
'<yes/no question>' shape with no way to express the two answers.
- SYSTEM_PROMPT: question nodes must carry yes_label/no_label — the literal
button texts; alternatives questions must use the alternatives as labels.
- validate_node: labels hard-floor-scanned, must be distinct non-empty strings.
- _ensure_labels: server defaults missing labels to Yes/No.
- advance_ai_build: records answer_label (and both labels) in walked_path,
derived from the server-held pending_node — never client-supplied.
- _build_context: LLM context shows the chosen label, not a bare yes/no
(a raw "-> yes" on an alternatives question degrades the next generation).
- normalize_walked_path: captured flywheel trees keep question labels.
- Frontend: buttons render yes_label/no_label; walk transcript and
L1EscalationsSection render answer_label.
Phase 2A backend set: 137 passed / 0 failed / 8 deselected. tsc, eslint,
vite build clean.
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
182 lines
7.4 KiB
Python
182 lines
7.4 KiB
Python
import pytest
|
|
from app.services import ai_tree_builder as atb
|
|
|
|
|
|
class _FakeProvider:
|
|
def __init__(self, raw):
|
|
self._raw = raw
|
|
|
|
async def generate_json(self, *, system_prompt, messages, max_tokens):
|
|
return self._raw, None, None
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_generate_next_node_assigns_id_when_model_omits_it(monkeypatch):
|
|
"""The SYSTEM_PROMPT never asks the model for an id (Finding 1). The server
|
|
must assign one to every generated node, or the advance protocol — which keys
|
|
on node_id — can never record an answer and the walk stalls on question 1."""
|
|
monkeypatch.setattr(
|
|
atb, "get_ai_provider",
|
|
lambda *a, **k: _FakeProvider('{"node_type":"question","text":"Plugged in?"}'),
|
|
)
|
|
node = await atb.generate_next_node("printer down", "printer", [])
|
|
assert node["node_type"] == "question"
|
|
assert node.get("id"), "generated node must carry a server-assigned id"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_generate_next_node_depth_cap_node_has_id(monkeypatch):
|
|
"""The depth-cap escalate node must also carry an id (it is persisted as
|
|
current_node_id and may be appended to walked_path)."""
|
|
walked = [{"node_type": "question", "id": f"n{i}", "text": "?", "answer": "no"}
|
|
for i in range(atb.MAX_DEPTH)]
|
|
node = await atb.generate_next_node("x", "printer", walked)
|
|
assert node["node_type"] == "escalate"
|
|
assert node.get("id")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_generate_next_node_generation_failed_node_has_id(monkeypatch):
|
|
"""When both generation attempts fail, the fallback escalate node carries an id."""
|
|
monkeypatch.setattr(
|
|
atb, "get_ai_provider",
|
|
lambda *a, **k: _FakeProvider("not json at all"),
|
|
)
|
|
node = await atb.generate_next_node("x", "printer", [])
|
|
assert node["node_type"] == "escalate"
|
|
assert node["reason_category"] == "generation_failed"
|
|
assert node.get("id")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Answer labels: the button text must match the question (live-walk defect:
|
|
# "Microsoft account or local account?" rendered with Yes/No buttons).
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_system_prompt_requires_answer_labels():
|
|
"""The prompt must mandate yes_label/no_label on question nodes — the prompt
|
|
forcing label-less '<yes/no question>' output is the root cause of the
|
|
question/button mismatch."""
|
|
assert "yes_label" in atb.SYSTEM_PROMPT and "no_label" in atb.SYSTEM_PROMPT
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_generated_question_passes_labels_through(monkeypatch):
|
|
monkeypatch.setattr(
|
|
atb, "get_ai_provider",
|
|
lambda *a, **k: _FakeProvider(
|
|
'{"node_type":"question",'
|
|
'"text":"Is Jane\'s Windows account a Microsoft account or a local account?",'
|
|
'"yes_label":"Microsoft account","no_label":"Local account"}'
|
|
),
|
|
)
|
|
node = await atb.generate_next_node("login issue", "account_login", [])
|
|
assert node["yes_label"] == "Microsoft account"
|
|
assert node["no_label"] == "Local account"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_question_missing_labels_gets_yes_no_defaults(monkeypatch):
|
|
monkeypatch.setattr(
|
|
atb, "get_ai_provider",
|
|
lambda *a, **k: _FakeProvider('{"node_type":"question","text":"Is the printer powered on?"}'),
|
|
)
|
|
node = await atb.generate_next_node("printer down", "printer", [])
|
|
assert node["yes_label"] == "Yes"
|
|
assert node["no_label"] == "No"
|
|
|
|
|
|
def test_validate_node_rejects_hard_floor_text_in_labels():
|
|
node = {"node_type": "question", "text": "How should we proceed?",
|
|
"yes_label": "Edit the registry", "no_label": "Wait"}
|
|
with pytest.raises(atb.UnsafeNodeError):
|
|
atb.validate_node(node)
|
|
|
|
|
|
def test_validate_node_rejects_indistinct_or_malformed_labels():
|
|
base = {"node_type": "question", "text": "Which network is the laptop on?"}
|
|
with pytest.raises(atb.UnsafeNodeError):
|
|
atb.validate_node({**base, "yes_label": "Wi-Fi", "no_label": "wi-fi "})
|
|
with pytest.raises(atb.UnsafeNodeError):
|
|
atb.validate_node({**base, "yes_label": 1, "no_label": "Ethernet"})
|
|
|
|
|
|
def test_build_context_prefers_answer_label_over_raw_answer():
|
|
"""The LLM context must show what the tech actually chose — 'Q? -> yes' is
|
|
ambiguous for an alternatives question and degrades the next generation."""
|
|
ctx = atb._build_context("login issue", "account_login", [
|
|
{"node_type": "question", "id": "n1",
|
|
"text": "Microsoft account or local account?",
|
|
"answer": "yes", "answer_label": "Microsoft account"},
|
|
])
|
|
assert "-> Microsoft account" in ctx
|
|
assert "-> yes" not in ctx
|
|
|
|
|
|
def test_normalize_walked_path_preserves_question_labels():
|
|
walked = [
|
|
{"node_type": "question", "id": "n1", "text": "Wi-Fi or Ethernet?",
|
|
"answer": "yes", "answer_label": "Wi-Fi",
|
|
"yes_label": "Wi-Fi", "no_label": "Ethernet"},
|
|
{"node_type": "resolved", "id": "n2", "text": "Fixed."},
|
|
]
|
|
tree = atb.normalize_walked_path(walked)
|
|
n1 = tree["nodes"]["n1"]
|
|
assert n1["yes_label"] == "Wi-Fi" and n1["no_label"] == "Ethernet"
|
|
|
|
|
|
def test_validate_node_rejects_hard_floor_text():
|
|
node = {"node_type": "instruction", "id": "n1", "text": "Open regedit and change the key", "next": "generate"}
|
|
with pytest.raises(atb.UnsafeNodeError):
|
|
atb.validate_node(node)
|
|
|
|
|
|
def test_validate_node_accepts_safe_instruction():
|
|
node = {"node_type": "instruction", "id": "n1", "text": "Restart the printer.", "next": "generate"}
|
|
assert atb.validate_node(node)["node_type"] == "instruction"
|
|
|
|
|
|
def test_depth_cap_forces_escalate():
|
|
walked = [{"node_type": "question", "id": f"n{i}", "text": "?", "answer": "no"} for i in range(atb.MAX_DEPTH)]
|
|
node = atb.escalate_if_depth_exceeded(walked)
|
|
assert node is not None and node["node_type"] == "escalate"
|
|
|
|
|
|
def test_normalize_walked_path_builds_valid_tree():
|
|
walked = [
|
|
{"node_type": "question", "id": "n1", "text": "Powered on?", "answer": "no"},
|
|
{"node_type": "instruction", "id": "n2", "text": "Power it on.", "answer": "ack"},
|
|
{"node_type": "resolved", "id": "n3", "text": "Fixed."},
|
|
]
|
|
tree = atb.normalize_walked_path(walked)
|
|
assert isinstance(tree, dict) and tree.get("id") == "n1"
|
|
# untraversed 'yes' branch of n1 became a needs_review stub
|
|
assert any(n["node_type"] == "needs_review" for n in tree["nodes"].values())
|
|
|
|
|
|
def test_normalize_walk_ending_on_question_has_no_none_branches():
|
|
walked = [
|
|
{"node_type": "question", "id": "n1", "text": "Powered on?", "answer": "no"},
|
|
]
|
|
tree = atb.normalize_walked_path(walked)
|
|
n1 = tree["nodes"]["n1"]
|
|
assert n1["yes_next"] is not None and n1["no_next"] is not None
|
|
# both branches must reference real nodes present in the tree
|
|
assert n1["yes_next"] in tree["nodes"] and n1["no_next"] in tree["nodes"]
|
|
|
|
|
|
def test_normalize_preserves_escalate_reason_category():
|
|
walked = [
|
|
{"node_type": "question", "id": "n1", "text": "On?", "answer": "no"},
|
|
{"node_type": "escalate", "id": "n2", "text": "Beyond L1.",
|
|
"reason_category": "exhausted_safe_steps"},
|
|
]
|
|
tree = atb.normalize_walked_path(walked)
|
|
assert tree["nodes"]["n2"]["reason_category"] == "exhausted_safe_steps"
|
|
|
|
|
|
def test_normalize_empty_walk_returns_needs_review_root():
|
|
tree = atb.normalize_walked_path([])
|
|
assert tree["id"] in tree["nodes"]
|
|
assert tree["nodes"][tree["id"]]["node_type"] == "needs_review"
|