fix(l1): answer buttons must match the question — yes_label/no_label end-to-end
Live walk defect: the builder generated alternatives questions ("Is Jane's
account a Microsoft account or a local account?") while the UI could only
offer Yes/No. Root cause: SYSTEM_PROMPT mandated a label-less
'<yes/no question>' shape with no way to express the two answers.
- SYSTEM_PROMPT: question nodes must carry yes_label/no_label — the literal
button texts; alternatives questions must use the alternatives as labels.
- validate_node: labels hard-floor-scanned, must be distinct non-empty strings.
- _ensure_labels: server defaults missing labels to Yes/No.
- advance_ai_build: records answer_label (and both labels) in walked_path,
derived from the server-held pending_node — never client-supplied.
- _build_context: LLM context shows the chosen label, not a bare yes/no
(a raw "-> yes" on an alternatives question degrades the next generation).
- normalize_walked_path: captured flywheel trees keep question labels.
- Frontend: buttons render yes_label/no_label; walk transcript and
L1EscalationsSection render answer_label.
Phase 2A backend set: 137 passed / 0 failed / 8 deselected. tsc, eslint,
vite build clean.
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -48,6 +48,83 @@ async def test_generate_next_node_generation_failed_node_has_id(monkeypatch):
|
||||
assert node.get("id")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Answer labels: the button text must match the question (live-walk defect:
|
||||
# "Microsoft account or local account?" rendered with Yes/No buttons).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_system_prompt_requires_answer_labels():
|
||||
"""The prompt must mandate yes_label/no_label on question nodes — the prompt
|
||||
forcing label-less '<yes/no question>' output is the root cause of the
|
||||
question/button mismatch."""
|
||||
assert "yes_label" in atb.SYSTEM_PROMPT and "no_label" in atb.SYSTEM_PROMPT
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_generated_question_passes_labels_through(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
atb, "get_ai_provider",
|
||||
lambda *a, **k: _FakeProvider(
|
||||
'{"node_type":"question",'
|
||||
'"text":"Is Jane\'s Windows account a Microsoft account or a local account?",'
|
||||
'"yes_label":"Microsoft account","no_label":"Local account"}'
|
||||
),
|
||||
)
|
||||
node = await atb.generate_next_node("login issue", "account_login", [])
|
||||
assert node["yes_label"] == "Microsoft account"
|
||||
assert node["no_label"] == "Local account"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_question_missing_labels_gets_yes_no_defaults(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
atb, "get_ai_provider",
|
||||
lambda *a, **k: _FakeProvider('{"node_type":"question","text":"Is the printer powered on?"}'),
|
||||
)
|
||||
node = await atb.generate_next_node("printer down", "printer", [])
|
||||
assert node["yes_label"] == "Yes"
|
||||
assert node["no_label"] == "No"
|
||||
|
||||
|
||||
def test_validate_node_rejects_hard_floor_text_in_labels():
|
||||
node = {"node_type": "question", "text": "How should we proceed?",
|
||||
"yes_label": "Edit the registry", "no_label": "Wait"}
|
||||
with pytest.raises(atb.UnsafeNodeError):
|
||||
atb.validate_node(node)
|
||||
|
||||
|
||||
def test_validate_node_rejects_indistinct_or_malformed_labels():
|
||||
base = {"node_type": "question", "text": "Which network is the laptop on?"}
|
||||
with pytest.raises(atb.UnsafeNodeError):
|
||||
atb.validate_node({**base, "yes_label": "Wi-Fi", "no_label": "wi-fi "})
|
||||
with pytest.raises(atb.UnsafeNodeError):
|
||||
atb.validate_node({**base, "yes_label": 1, "no_label": "Ethernet"})
|
||||
|
||||
|
||||
def test_build_context_prefers_answer_label_over_raw_answer():
|
||||
"""The LLM context must show what the tech actually chose — 'Q? -> yes' is
|
||||
ambiguous for an alternatives question and degrades the next generation."""
|
||||
ctx = atb._build_context("login issue", "account_login", [
|
||||
{"node_type": "question", "id": "n1",
|
||||
"text": "Microsoft account or local account?",
|
||||
"answer": "yes", "answer_label": "Microsoft account"},
|
||||
])
|
||||
assert "-> Microsoft account" in ctx
|
||||
assert "-> yes" not in ctx
|
||||
|
||||
|
||||
def test_normalize_walked_path_preserves_question_labels():
|
||||
walked = [
|
||||
{"node_type": "question", "id": "n1", "text": "Wi-Fi or Ethernet?",
|
||||
"answer": "yes", "answer_label": "Wi-Fi",
|
||||
"yes_label": "Wi-Fi", "no_label": "Ethernet"},
|
||||
{"node_type": "resolved", "id": "n2", "text": "Fixed."},
|
||||
]
|
||||
tree = atb.normalize_walked_path(walked)
|
||||
n1 = tree["nodes"]["n1"]
|
||||
assert n1["yes_label"] == "Wi-Fi" and n1["no_label"] == "Ethernet"
|
||||
|
||||
|
||||
def test_validate_node_rejects_hard_floor_text():
|
||||
node = {"node_type": "instruction", "id": "n1", "text": "Open regedit and change the key", "next": "generate"}
|
||||
with pytest.raises(atb.UnsafeNodeError):
|
||||
|
||||
@@ -1157,6 +1157,41 @@ async def test_advance_ai_build_replays_pending_node_without_regenerating(
|
||||
assert replay["id"] == first["id"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_advance_ai_build_records_answer_label_from_pending_node(
|
||||
test_db: AsyncSession, monkeypatch,
|
||||
):
|
||||
"""When the served question carried yes_label/no_label, answering it must
|
||||
record the chosen label (answer_label) in walked_path — derived server-side
|
||||
from pending_node, never trusted from the client. 'Microsoft account or
|
||||
local account? -> yes' is meaningless in the transcript and the LLM context."""
|
||||
from app.services import l1_session_service as svc
|
||||
from app.services import ai_tree_builder
|
||||
account = await _make_account(test_db)
|
||||
l1_user = await _make_user(test_db, account_id=account.id)
|
||||
s = await svc.start_ai_build_session(
|
||||
test_db, account_id=account.id, user=l1_user,
|
||||
ticket_id="t-label", ticket_kind="internal",
|
||||
category="account_login", problem_text="login issue")
|
||||
|
||||
async def fake_next(problem, category, walked):
|
||||
return {"node_type": "question", "id": "q-acct",
|
||||
"text": "Is the account a Microsoft account or a local account?",
|
||||
"yes_label": "Microsoft account", "no_label": "Local account"}
|
||||
monkeypatch.setattr(ai_tree_builder, "generate_next_node", fake_next)
|
||||
|
||||
first = await svc.advance_ai_build(
|
||||
test_db, session_id=s.id, problem_text="login issue",
|
||||
category="account_login", node_id=None)
|
||||
await svc.advance_ai_build(
|
||||
test_db, session_id=s.id, problem_text="login issue",
|
||||
category="account_login",
|
||||
node_id=first["id"], node_text=first["text"], answer="yes")
|
||||
refreshed = await test_db.get(L1WalkSession, s.id)
|
||||
assert refreshed.walked_path[0]["answer"] == "yes"
|
||||
assert refreshed.walked_path[0]["answer_label"] == "Microsoft account"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Finding 10: escalation recipient resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user