fix(l1): answer buttons must match the question — yes_label/no_label end-to-end
Live walk defect: the builder generated alternatives questions ("Is Jane's
account a Microsoft account or a local account?") while the UI could only
offer Yes/No. Root cause: SYSTEM_PROMPT mandated a label-less
'<yes/no question>' shape with no way to express the two answers.
- SYSTEM_PROMPT: question nodes must carry yes_label/no_label — the literal
button texts; alternatives questions must use the alternatives as labels.
- validate_node: labels hard-floor-scanned, must be distinct non-empty strings.
- _ensure_labels: server defaults missing labels to Yes/No.
- advance_ai_build: records answer_label (and both labels) in walked_path,
derived from the server-held pending_node — never client-supplied.
- _build_context: LLM context shows the chosen label, not a bare yes/no
(a raw "-> yes" on an alternatives question degrades the next generation).
- normalize_walked_path: captured flywheel trees keep question labels.
- Frontend: buttons render yes_label/no_label; walk transcript and
L1EscalationsSection render answer_label.
Phase 2A backend set: 137 passed / 0 failed / 8 deselected. tsc, eslint,
vite build clean.
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -38,11 +38,19 @@ HARD RULES:
|
|||||||
- When you run out of safe in-scope steps, DO NOT GUESS. Emit an "escalate" node.
|
- When you run out of safe in-scope steps, DO NOT GUESS. Emit an "escalate" node.
|
||||||
|
|
||||||
Return ONLY a JSON object for ONE node, one of:
|
Return ONLY a JSON object for ONE node, one of:
|
||||||
{"node_type":"question","text":"<yes/no question>"}
|
{"node_type":"question","text":"<binary question>","yes_label":"<button text>","no_label":"<button text>"}
|
||||||
{"node_type":"instruction","text":"<one safe reversible action>"}
|
{"node_type":"instruction","text":"<one safe reversible action>"}
|
||||||
{"node_type":"resolved","text":"<confirmation the issue is fixed>"}
|
{"node_type":"resolved","text":"<confirmation the issue is fixed>"}
|
||||||
{"node_type":"escalate","reason_category":"exhausted_safe_steps","text":"<why>"}
|
{"node_type":"escalate","reason_category":"exhausted_safe_steps","text":"<why>"}
|
||||||
No prose, no markdown fences.
|
No prose, no markdown fences.
|
||||||
|
|
||||||
|
QUESTION LABELS: yes_label and no_label are the literal button texts the tech
|
||||||
|
clicks — each must be a direct, complete answer to the question. For a plain
|
||||||
|
yes/no question use "Yes"/"No". If the question offers two alternatives
|
||||||
|
("Is it X or Y?"), the labels MUST be those alternatives (yes_label = the
|
||||||
|
first), e.g. {"text":"Is the account a Microsoft account or a local account?",
|
||||||
|
"yes_label":"Microsoft account","no_label":"Local account"}. Never pair an
|
||||||
|
alternatives question with Yes/No labels. Keep labels under 6 words.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
@@ -60,12 +68,27 @@ def _assign_id(node: dict[str, Any]) -> dict[str, Any]:
|
|||||||
return node
|
return node
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_labels(node: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
"""Default question labels to Yes/No when the model omits them.
|
||||||
|
|
||||||
|
Labels are the literal button texts; downstream (UI, walked_path
|
||||||
|
answer_label, LLM context) assumes every served question carries both.
|
||||||
|
"""
|
||||||
|
if node.get("node_type") == "question":
|
||||||
|
node["yes_label"] = (node.get("yes_label") or "Yes").strip() or "Yes"
|
||||||
|
node["no_label"] = (node.get("no_label") or "No").strip() or "No"
|
||||||
|
return node
|
||||||
|
|
||||||
|
|
||||||
def _build_context(problem_text: str, category: str, walked_path: list[dict]) -> str:
|
def _build_context(problem_text: str, category: str, walked_path: list[dict]) -> str:
|
||||||
lines = [f"PROBLEM: {problem_text}", f"CATEGORY: {category}", "STEPS SO FAR:"]
|
lines = [f"PROBLEM: {problem_text}", f"CATEGORY: {category}", "STEPS SO FAR:"]
|
||||||
if not walked_path:
|
if not walked_path:
|
||||||
lines.append("(none yet — produce the first diagnostic question)")
|
lines.append("(none yet — produce the first diagnostic question)")
|
||||||
for i, step in enumerate(walked_path, 1):
|
for i, step in enumerate(walked_path, 1):
|
||||||
ans = step.get("answer")
|
# Prefer the chosen label: for an alternatives question
|
||||||
|
# ("Microsoft account or local account?"), a raw "yes" is ambiguous
|
||||||
|
# and degrades the next generation.
|
||||||
|
ans = step.get("answer_label") or step.get("answer")
|
||||||
suffix = f" -> {ans}" if ans else ""
|
suffix = f" -> {ans}" if ans else ""
|
||||||
lines.append(f"{i}. [{step.get('node_type','?')}] {step.get('text','')}{suffix}")
|
lines.append(f"{i}. [{step.get('node_type','?')}] {step.get('text','')}{suffix}")
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
@@ -79,6 +102,17 @@ def validate_node(node: dict[str, Any]) -> dict[str, Any]:
|
|||||||
for pat in HARD_FLOOR_TEXT_PATTERNS:
|
for pat in HARD_FLOOR_TEXT_PATTERNS:
|
||||||
if pat in text:
|
if pat in text:
|
||||||
raise UnsafeNodeError(f"hard-floor pattern '{pat}' in node text")
|
raise UnsafeNodeError(f"hard-floor pattern '{pat}' in node text")
|
||||||
|
labels = [node.get(k) for k in ("yes_label", "no_label") if node.get(k) is not None]
|
||||||
|
if labels:
|
||||||
|
if not all(isinstance(lb, str) and lb.strip() for lb in labels):
|
||||||
|
raise UnsafeNodeError(f"malformed answer labels: {labels!r}")
|
||||||
|
if len(labels) == 2 and labels[0].strip().lower() == labels[1].strip().lower():
|
||||||
|
raise UnsafeNodeError(f"indistinct answer labels: {labels!r}")
|
||||||
|
for lb in labels:
|
||||||
|
low = lb.lower()
|
||||||
|
for pat in HARD_FLOOR_TEXT_PATTERNS:
|
||||||
|
if pat in low:
|
||||||
|
raise UnsafeNodeError(f"hard-floor pattern '{pat}' in answer label")
|
||||||
return node
|
return node
|
||||||
|
|
||||||
|
|
||||||
@@ -111,7 +145,7 @@ async def generate_next_node(
|
|||||||
max_tokens=1024,
|
max_tokens=1024,
|
||||||
)
|
)
|
||||||
node = parse_llm_json(raw)
|
node = parse_llm_json(raw)
|
||||||
return _assign_id(validate_node(node))
|
return _assign_id(_ensure_labels(validate_node(node)))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("ai_tree_builder node attempt %d failed: %s", attempt + 1, e)
|
logger.warning("ai_tree_builder node attempt %d failed: %s", attempt + 1, e)
|
||||||
continue
|
continue
|
||||||
@@ -147,6 +181,10 @@ def normalize_walked_path(walked_path: list[dict]) -> dict[str, Any]:
|
|||||||
if step.get("reason_category"):
|
if step.get("reason_category"):
|
||||||
node["reason_category"] = step["reason_category"]
|
node["reason_category"] = step["reason_category"]
|
||||||
if ntype == "question":
|
if ntype == "question":
|
||||||
|
if step.get("yes_label"):
|
||||||
|
node["yes_label"] = step["yes_label"]
|
||||||
|
if step.get("no_label"):
|
||||||
|
node["no_label"] = step["no_label"]
|
||||||
answer = (step.get("answer") or "").lower()
|
answer = (step.get("answer") or "").lower()
|
||||||
stub_seq += 1
|
stub_seq += 1
|
||||||
stub_id = f"review-{stub_seq}"
|
stub_id = f"review-{stub_seq}"
|
||||||
|
|||||||
@@ -183,6 +183,24 @@ async def advance_ai_build(
|
|||||||
"answer": answer,
|
"answer": answer,
|
||||||
"l1_note": note,
|
"l1_note": note,
|
||||||
}
|
}
|
||||||
|
# answer_label: the button text the tech actually clicked. Derived from
|
||||||
|
# the server-held pending_node (never client-supplied) so an
|
||||||
|
# alternatives question ("Microsoft account or local account?") records
|
||||||
|
# "Microsoft account", not a bare "yes", in the transcript, the LLM
|
||||||
|
# context, and the captured flywheel tree.
|
||||||
|
pending = session.pending_node
|
||||||
|
if (
|
||||||
|
answer in ("yes", "no")
|
||||||
|
and isinstance(pending, dict)
|
||||||
|
and pending.get("id") == node_id
|
||||||
|
):
|
||||||
|
label = pending.get(f"{answer}_label")
|
||||||
|
if label:
|
||||||
|
entry["answer_label"] = label
|
||||||
|
if pending.get("yes_label"):
|
||||||
|
entry["yes_label"] = pending["yes_label"]
|
||||||
|
if pending.get("no_label"):
|
||||||
|
entry["no_label"] = pending["no_label"]
|
||||||
# JSONB requires assigning a new list — in-place mutation isn't tracked
|
# JSONB requires assigning a new list — in-place mutation isn't tracked
|
||||||
session.walked_path = [*session.walked_path, entry]
|
session.walked_path = [*session.walked_path, entry]
|
||||||
session.pending_node = None # the served node has now been answered
|
session.pending_node = None # the served node has now been answered
|
||||||
|
|||||||
@@ -48,6 +48,83 @@ async def test_generate_next_node_generation_failed_node_has_id(monkeypatch):
|
|||||||
assert node.get("id")
|
assert node.get("id")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Answer labels: the button text must match the question (live-walk defect:
|
||||||
|
# "Microsoft account or local account?" rendered with Yes/No buttons).
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_system_prompt_requires_answer_labels():
|
||||||
|
"""The prompt must mandate yes_label/no_label on question nodes — the prompt
|
||||||
|
forcing label-less '<yes/no question>' output is the root cause of the
|
||||||
|
question/button mismatch."""
|
||||||
|
assert "yes_label" in atb.SYSTEM_PROMPT and "no_label" in atb.SYSTEM_PROMPT
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_generated_question_passes_labels_through(monkeypatch):
|
||||||
|
monkeypatch.setattr(
|
||||||
|
atb, "get_ai_provider",
|
||||||
|
lambda *a, **k: _FakeProvider(
|
||||||
|
'{"node_type":"question",'
|
||||||
|
'"text":"Is Jane\'s Windows account a Microsoft account or a local account?",'
|
||||||
|
'"yes_label":"Microsoft account","no_label":"Local account"}'
|
||||||
|
),
|
||||||
|
)
|
||||||
|
node = await atb.generate_next_node("login issue", "account_login", [])
|
||||||
|
assert node["yes_label"] == "Microsoft account"
|
||||||
|
assert node["no_label"] == "Local account"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_question_missing_labels_gets_yes_no_defaults(monkeypatch):
|
||||||
|
monkeypatch.setattr(
|
||||||
|
atb, "get_ai_provider",
|
||||||
|
lambda *a, **k: _FakeProvider('{"node_type":"question","text":"Is the printer powered on?"}'),
|
||||||
|
)
|
||||||
|
node = await atb.generate_next_node("printer down", "printer", [])
|
||||||
|
assert node["yes_label"] == "Yes"
|
||||||
|
assert node["no_label"] == "No"
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_node_rejects_hard_floor_text_in_labels():
|
||||||
|
node = {"node_type": "question", "text": "How should we proceed?",
|
||||||
|
"yes_label": "Edit the registry", "no_label": "Wait"}
|
||||||
|
with pytest.raises(atb.UnsafeNodeError):
|
||||||
|
atb.validate_node(node)
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_node_rejects_indistinct_or_malformed_labels():
|
||||||
|
base = {"node_type": "question", "text": "Which network is the laptop on?"}
|
||||||
|
with pytest.raises(atb.UnsafeNodeError):
|
||||||
|
atb.validate_node({**base, "yes_label": "Wi-Fi", "no_label": "wi-fi "})
|
||||||
|
with pytest.raises(atb.UnsafeNodeError):
|
||||||
|
atb.validate_node({**base, "yes_label": 1, "no_label": "Ethernet"})
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_context_prefers_answer_label_over_raw_answer():
|
||||||
|
"""The LLM context must show what the tech actually chose — 'Q? -> yes' is
|
||||||
|
ambiguous for an alternatives question and degrades the next generation."""
|
||||||
|
ctx = atb._build_context("login issue", "account_login", [
|
||||||
|
{"node_type": "question", "id": "n1",
|
||||||
|
"text": "Microsoft account or local account?",
|
||||||
|
"answer": "yes", "answer_label": "Microsoft account"},
|
||||||
|
])
|
||||||
|
assert "-> Microsoft account" in ctx
|
||||||
|
assert "-> yes" not in ctx
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_walked_path_preserves_question_labels():
|
||||||
|
walked = [
|
||||||
|
{"node_type": "question", "id": "n1", "text": "Wi-Fi or Ethernet?",
|
||||||
|
"answer": "yes", "answer_label": "Wi-Fi",
|
||||||
|
"yes_label": "Wi-Fi", "no_label": "Ethernet"},
|
||||||
|
{"node_type": "resolved", "id": "n2", "text": "Fixed."},
|
||||||
|
]
|
||||||
|
tree = atb.normalize_walked_path(walked)
|
||||||
|
n1 = tree["nodes"]["n1"]
|
||||||
|
assert n1["yes_label"] == "Wi-Fi" and n1["no_label"] == "Ethernet"
|
||||||
|
|
||||||
|
|
||||||
def test_validate_node_rejects_hard_floor_text():
|
def test_validate_node_rejects_hard_floor_text():
|
||||||
node = {"node_type": "instruction", "id": "n1", "text": "Open regedit and change the key", "next": "generate"}
|
node = {"node_type": "instruction", "id": "n1", "text": "Open regedit and change the key", "next": "generate"}
|
||||||
with pytest.raises(atb.UnsafeNodeError):
|
with pytest.raises(atb.UnsafeNodeError):
|
||||||
|
|||||||
@@ -1157,6 +1157,41 @@ async def test_advance_ai_build_replays_pending_node_without_regenerating(
|
|||||||
assert replay["id"] == first["id"]
|
assert replay["id"] == first["id"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_advance_ai_build_records_answer_label_from_pending_node(
|
||||||
|
test_db: AsyncSession, monkeypatch,
|
||||||
|
):
|
||||||
|
"""When the served question carried yes_label/no_label, answering it must
|
||||||
|
record the chosen label (answer_label) in walked_path — derived server-side
|
||||||
|
from pending_node, never trusted from the client. 'Microsoft account or
|
||||||
|
local account? -> yes' is meaningless in the transcript and the LLM context."""
|
||||||
|
from app.services import l1_session_service as svc
|
||||||
|
from app.services import ai_tree_builder
|
||||||
|
account = await _make_account(test_db)
|
||||||
|
l1_user = await _make_user(test_db, account_id=account.id)
|
||||||
|
s = await svc.start_ai_build_session(
|
||||||
|
test_db, account_id=account.id, user=l1_user,
|
||||||
|
ticket_id="t-label", ticket_kind="internal",
|
||||||
|
category="account_login", problem_text="login issue")
|
||||||
|
|
||||||
|
async def fake_next(problem, category, walked):
|
||||||
|
return {"node_type": "question", "id": "q-acct",
|
||||||
|
"text": "Is the account a Microsoft account or a local account?",
|
||||||
|
"yes_label": "Microsoft account", "no_label": "Local account"}
|
||||||
|
monkeypatch.setattr(ai_tree_builder, "generate_next_node", fake_next)
|
||||||
|
|
||||||
|
first = await svc.advance_ai_build(
|
||||||
|
test_db, session_id=s.id, problem_text="login issue",
|
||||||
|
category="account_login", node_id=None)
|
||||||
|
await svc.advance_ai_build(
|
||||||
|
test_db, session_id=s.id, problem_text="login issue",
|
||||||
|
category="account_login",
|
||||||
|
node_id=first["id"], node_text=first["text"], answer="yes")
|
||||||
|
refreshed = await test_db.get(L1WalkSession, s.id)
|
||||||
|
assert refreshed.walked_path[0]["answer"] == "yes"
|
||||||
|
assert refreshed.walked_path[0]["answer_label"] == "Microsoft account"
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Finding 10: escalation recipient resolution
|
# Finding 10: escalation recipient resolution
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -63,7 +63,7 @@ export function L1EscalationsSection() {
|
|||||||
<li key={i} className="flex flex-col">
|
<li key={i} className="flex flex-col">
|
||||||
<span className="text-text-muted text-xs">{step.question ?? step.text}</span>
|
<span className="text-text-muted text-xs">{step.question ?? step.text}</span>
|
||||||
{step.answer && (
|
{step.answer && (
|
||||||
<span className="font-medium text-text-primary">→ {step.answer}</span>
|
<span className="font-medium text-text-primary">→ {step.answer_label ?? step.answer}</span>
|
||||||
)}
|
)}
|
||||||
</li>
|
</li>
|
||||||
))}
|
))}
|
||||||
|
|||||||
@@ -167,13 +167,13 @@ export function L1WalkTreeVariant({ session, onSessionUpdate, onDone }: Props) {
|
|||||||
onClick={() => advanceNode({ answer: 'yes' })}
|
onClick={() => advanceNode({ answer: 'yes' })}
|
||||||
className="flex-1 rounded-md bg-accent text-white py-3 text-base font-medium hover:bg-accent/90 min-h-[44px] transition-colors"
|
className="flex-1 rounded-md bg-accent text-white py-3 text-base font-medium hover:bg-accent/90 min-h-[44px] transition-colors"
|
||||||
>
|
>
|
||||||
Yes
|
{node.yes_label ?? 'Yes'}
|
||||||
</button>
|
</button>
|
||||||
<button
|
<button
|
||||||
onClick={() => advanceNode({ answer: 'no' })}
|
onClick={() => advanceNode({ answer: 'no' })}
|
||||||
className="flex-1 rounded-md border border-default py-3 text-base font-medium hover:bg-elevated min-h-[44px] transition-colors"
|
className="flex-1 rounded-md border border-default py-3 text-base font-medium hover:bg-elevated min-h-[44px] transition-colors"
|
||||||
>
|
>
|
||||||
No
|
{node.no_label ?? 'No'}
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</>
|
</>
|
||||||
@@ -252,7 +252,7 @@ export function L1WalkTreeVariant({ session, onSessionUpdate, onDone }: Props) {
|
|||||||
{session.walked_path.map((step, i) => (
|
{session.walked_path.map((step, i) => (
|
||||||
<li key={i} className="flex flex-col">
|
<li key={i} className="flex flex-col">
|
||||||
<span className="text-muted-foreground text-xs">{step.question ?? step.text}</span>
|
<span className="text-muted-foreground text-xs">{step.question ?? step.text}</span>
|
||||||
{step.answer && <span className="font-medium">→ {step.answer}</span>}
|
{step.answer && <span className="font-medium">→ {step.answer_label ?? step.answer}</span>}
|
||||||
{step.l1_note && <span className="text-muted-foreground text-xs italic mt-0.5">{step.l1_note}</span>}
|
{step.l1_note && <span className="text-muted-foreground text-xs italic mt-0.5">{step.l1_note}</span>}
|
||||||
</li>
|
</li>
|
||||||
))}
|
))}
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ export interface WalkStep {
|
|||||||
question?: string
|
question?: string
|
||||||
text?: string
|
text?: string
|
||||||
answer: string | null
|
answer: string | null
|
||||||
|
/** Button text the tech clicked (ai_build); falls back to `answer`. */
|
||||||
|
answer_label?: string
|
||||||
l1_note: string | null
|
l1_note: string | null
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -75,9 +77,12 @@ export interface IntakeResult {
|
|||||||
category?: string // for 'out_of_scope'
|
category?: string // for 'out_of_scope'
|
||||||
}
|
}
|
||||||
|
|
||||||
/** A single node of an AI-built decision tree, returned by /next-node. */
|
/** A single node of an AI-built decision tree, returned by /next-node.
|
||||||
|
* Question nodes carry the literal button texts (yes_label/no_label) so the
|
||||||
|
* choices always match the question ("Microsoft account" / "Local account",
|
||||||
|
* not a mismatched Yes/No). The backend defaults them to Yes/No. */
|
||||||
export type TreeNode =
|
export type TreeNode =
|
||||||
| { node_type: 'question'; id: string; text: string }
|
| { node_type: 'question'; id: string; text: string; yes_label?: string; no_label?: string }
|
||||||
| { node_type: 'instruction'; id: string; text: string }
|
| { node_type: 'instruction'; id: string; text: string }
|
||||||
| { node_type: 'resolved'; id: string; text: string }
|
| { node_type: 'resolved'; id: string; text: string }
|
||||||
| { node_type: 'escalate'; id: string; reason_category?: string; text: string }
|
| { node_type: 'escalate'; id: string; reason_category?: string; text: string }
|
||||||
|
|||||||
Reference in New Issue
Block a user