fix(l1): answer buttons must match the question — yes_label/no_label end-to-end

Live walk defect: the builder generated alternatives questions ("Is Jane's account a Microsoft account or a local account?") while the UI could only offer Yes/No. Root cause: SYSTEM_PROMPT mandated a label-less '<yes/no question>' shape with no way to express the two answers. - SYSTEM_PROMPT: question nodes must carry yes_label/no_label — the literal button texts; alternatives questions must use the alternatives as labels. - validate_node: labels hard-floor-scanned, must be distinct non-empty strings. - _ensure_labels: server defaults missing labels to Yes/No. - advance_ai_build: records answer_label (and both labels) in walked_path, derived from the server-held pending_node — never client-supplied. - _build_context: LLM context shows the chosen label, not a bare yes/no (a raw "-> yes" on an alternatives question degrades the next generation). - normalize_walked_path: captured flywheel trees keep question labels. - Frontend: buttons render yes_label/no_label; walk transcript and L1EscalationsSection render answer_label. Phase 2A backend set: 137 passed / 0 failed / 8 deselected. tsc, eslint, vite build clean. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-11 15:03:15 -04:00
parent db446e1fd6
commit 9c34d1e82d
7 changed files with 182 additions and 9 deletions
--- a/backend/tests/test_ai_tree_builder.py
+++ b/backend/tests/test_ai_tree_builder.py
@@ -48,6 +48,83 @@ async def test_generate_next_node_generation_failed_node_has_id(monkeypatch):
    assert node.get("id")


+# ---------------------------------------------------------------------------
+# Answer labels: the button text must match the question (live-walk defect:
+# "Microsoft account or local account?" rendered with Yes/No buttons).
+# ---------------------------------------------------------------------------
+
+def test_system_prompt_requires_answer_labels():
+    """The prompt must mandate yes_label/no_label on question nodes — the prompt
+    forcing label-less '<yes/no question>' output is the root cause of the
+    question/button mismatch."""
+    assert "yes_label" in atb.SYSTEM_PROMPT and "no_label" in atb.SYSTEM_PROMPT
+
+
+@pytest.mark.asyncio
+async def test_generated_question_passes_labels_through(monkeypatch):
+    monkeypatch.setattr(
+        atb, "get_ai_provider",
+        lambda *a, **k: _FakeProvider(
+            '{"node_type":"question",'
+            '"text":"Is Jane\'s Windows account a Microsoft account or a local account?",'
+            '"yes_label":"Microsoft account","no_label":"Local account"}'
+        ),
+    )
+    node = await atb.generate_next_node("login issue", "account_login", [])
+    assert node["yes_label"] == "Microsoft account"
+    assert node["no_label"] == "Local account"
+
+
+@pytest.mark.asyncio
+async def test_question_missing_labels_gets_yes_no_defaults(monkeypatch):
+    monkeypatch.setattr(
+        atb, "get_ai_provider",
+        lambda *a, **k: _FakeProvider('{"node_type":"question","text":"Is the printer powered on?"}'),
+    )
+    node = await atb.generate_next_node("printer down", "printer", [])
+    assert node["yes_label"] == "Yes"
+    assert node["no_label"] == "No"
+
+
+def test_validate_node_rejects_hard_floor_text_in_labels():
+    node = {"node_type": "question", "text": "How should we proceed?",
+            "yes_label": "Edit the registry", "no_label": "Wait"}
+    with pytest.raises(atb.UnsafeNodeError):
+        atb.validate_node(node)
+
+
+def test_validate_node_rejects_indistinct_or_malformed_labels():
+    base = {"node_type": "question", "text": "Which network is the laptop on?"}
+    with pytest.raises(atb.UnsafeNodeError):
+        atb.validate_node({**base, "yes_label": "Wi-Fi", "no_label": "wi-fi "})
+    with pytest.raises(atb.UnsafeNodeError):
+        atb.validate_node({**base, "yes_label": 1, "no_label": "Ethernet"})
+
+
+def test_build_context_prefers_answer_label_over_raw_answer():
+    """The LLM context must show what the tech actually chose — 'Q? -> yes' is
+    ambiguous for an alternatives question and degrades the next generation."""
+    ctx = atb._build_context("login issue", "account_login", [
+        {"node_type": "question", "id": "n1",
+         "text": "Microsoft account or local account?",
+         "answer": "yes", "answer_label": "Microsoft account"},
+    ])
+    assert "-> Microsoft account" in ctx
+    assert "-> yes" not in ctx
+
+
+def test_normalize_walked_path_preserves_question_labels():
+    walked = [
+        {"node_type": "question", "id": "n1", "text": "Wi-Fi or Ethernet?",
+         "answer": "yes", "answer_label": "Wi-Fi",
+         "yes_label": "Wi-Fi", "no_label": "Ethernet"},
+        {"node_type": "resolved", "id": "n2", "text": "Fixed."},
+    ]
+    tree = atb.normalize_walked_path(walked)
+    n1 = tree["nodes"]["n1"]
+    assert n1["yes_label"] == "Wi-Fi" and n1["no_label"] == "Ethernet"
+
+
 def test_validate_node_rejects_hard_floor_text():
    node = {"node_type": "instruction", "id": "n1", "text": "Open regedit and change the key", "next": "generate"}
    with pytest.raises(atb.UnsafeNodeError):