Files
resolutionflow/backend/app/services/l1_session_service.py
Michael Chihlas 9c34d1e82d fix(l1): answer buttons must match the question — yes_label/no_label end-to-end
Live walk defect: the builder generated alternatives questions ("Is Jane's
account a Microsoft account or a local account?") while the UI could only
offer Yes/No. Root cause: SYSTEM_PROMPT mandated a label-less
'<yes/no question>' shape with no way to express the two answers.

- SYSTEM_PROMPT: question nodes must carry yes_label/no_label — the literal
  button texts; alternatives questions must use the alternatives as labels.
- validate_node: labels hard-floor-scanned, must be distinct non-empty strings.
- _ensure_labels: server defaults missing labels to Yes/No.
- advance_ai_build: records answer_label (and both labels) in walked_path,
  derived from the server-held pending_node — never client-supplied.
- _build_context: LLM context shows the chosen label, not a bare yes/no
  (a raw "-> yes" on an alternatives question degrades the next generation).
- normalize_walked_path: captured flywheel trees keep question labels.
- Frontend: buttons render yes_label/no_label; walk transcript and
  L1EscalationsSection render answer_label.

Phase 2A backend set: 137 passed / 0 failed / 8 deselected. tsc, eslint,
vite build clean.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-11 15:03:15 -04:00

493 lines
17 KiB
Python

"""L1 session lifecycle: start (flow/proposal/adhoc), step, notes, resolve, escalate.
start_* functions live in T12; step/notes are T13; resolve/escalate are T14.
"""
import json
import logging
from datetime import datetime, timezone
from typing import Optional
from uuid import UUID
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.audit import log_audit
from app.models.flow_proposal import FlowProposal
from app.models.l1_walk_session import L1WalkSession
from app.models.user import User
from app.services import ai_tree_builder
from app.services import internal_ticket_service
from app.services.notification_service import notify
logger = logging.getLogger(__name__)
def _resolve_acting_as(user: User) -> Optional[str]:
"""An engineer (whether covering or not) gets tagged for audit when using L1 surface.
Returns 'l1_coverage' for engineers (only engineers WITH the coverage flag should
reach this code path — the require_l1_or_coverage dep gates that). For native
l1_tech users, returns None (no special tag — they ARE l1).
"""
if user.account_role == "engineer":
return "l1_coverage"
return None
async def start_flow_session(
db: AsyncSession,
*,
account_id: UUID,
user: User,
flow_id: UUID,
ticket_id: str,
ticket_kind: str, # 'psa' | 'internal'
) -> L1WalkSession:
"""Start a session walking an authored flow."""
session = L1WalkSession(
account_id=account_id,
created_by_user_id=user.id,
acting_as=_resolve_acting_as(user),
ticket_id=ticket_id,
ticket_kind=ticket_kind,
session_kind="flow",
flow_id=flow_id,
)
db.add(session)
await db.flush()
return session
async def start_proposal_session(
db: AsyncSession,
*,
account_id: UUID,
user: User,
flow_proposal_id: UUID,
ticket_id: str,
ticket_kind: str,
) -> L1WalkSession:
"""Start a session walking an AI-built FlowProposal."""
session = L1WalkSession(
account_id=account_id,
created_by_user_id=user.id,
acting_as=_resolve_acting_as(user),
ticket_id=ticket_id,
ticket_kind=ticket_kind,
session_kind="proposal",
flow_proposal_id=flow_proposal_id,
)
db.add(session)
await db.flush()
return session
async def start_adhoc_session(
db: AsyncSession,
*,
account_id: UUID,
user: User,
ticket_id: str,
ticket_kind: str,
) -> L1WalkSession:
"""Start an ad-hoc session with no tree (free-form note-taking only)."""
session = L1WalkSession(
account_id=account_id,
created_by_user_id=user.id,
acting_as=_resolve_acting_as(user),
ticket_id=ticket_id,
ticket_kind=ticket_kind,
session_kind="adhoc",
)
db.add(session)
await db.flush()
return session
async def start_ai_build_session(
db: AsyncSession,
*,
account_id: UUID,
user: User,
ticket_id: str,
ticket_kind: str,
category: Optional[str] = None,
problem_text: Optional[str] = None,
) -> L1WalkSession:
"""Start an AI-built tree session (nodes generated on demand via next-node).
``category`` and ``problem_text`` are the immutable AI-build context, stored
once here so /next-node never re-derives them (no ticket re-fetch, no
walked_path scan, no hidden meta entry).
"""
session = L1WalkSession(
account_id=account_id,
created_by_user_id=user.id,
acting_as=_resolve_acting_as(user),
ticket_id=ticket_id,
ticket_kind=ticket_kind,
session_kind="ai_build",
category=category,
problem_text=problem_text,
)
db.add(session)
await db.flush()
return session
async def advance_ai_build(
db: AsyncSession,
*,
session_id: UUID,
problem_text: str,
category: str,
node_id: Optional[str] = None,
node_text: Optional[str] = None,
answer: Optional[str] = None,
note: Optional[str] = None,
) -> dict:
"""Append the answered/acked node to walked_path, then generate the next node.
On the first call (node_id is None) nothing is appended — we just generate the
first node. Returns the next node dict (caller persists current_node_id).
Raises ValueError on missing/inactive/non-ai_build session.
``node_text`` is the display text of the node being answered. It is supplied by
the caller/endpoint, which holds the served node. Storing it here ensures that
later nodes receive full prior-step context via ``ai_tree_builder._build_context``
and that captured flywheel trees (``normalize_walked_path``) have meaningful text.
Pending-node replay (Finding 8): the node served but not yet answered is stored
on ``session.pending_node``. When node_id is None and a pending node exists (a
refresh, a StrictMode double-mount, or back/forward), we replay it instead of
firing a fresh paid LLM call that might also swap the question mid-answer.
"""
session = await db.get(L1WalkSession, session_id)
if not session:
raise ValueError(f"L1WalkSession {session_id} not found")
if session.session_kind != "ai_build":
raise ValueError("advance_ai_build requires an ai_build session")
if session.status != "active":
raise ValueError(f"Session {session_id} is not active (status={session.status})")
if node_id is not None:
# node_type inferred from the answer: questions are answered yes/no;
# instructions are acknowledged (answer is None) per the next-node endpoint contract.
# Note: entry uses key "id" (not "node_id" as record_step uses) because
# ai_tree_builder.normalize_walked_path reads step.get("id"); the two coexist
# safely because they are segregated by session_kind.
entry = {
"node_type": "question" if answer in ("yes", "no") else "instruction",
"id": node_id,
"text": node_text or "",
"answer": answer,
"l1_note": note,
}
# answer_label: the button text the tech actually clicked. Derived from
# the server-held pending_node (never client-supplied) so an
# alternatives question ("Microsoft account or local account?") records
# "Microsoft account", not a bare "yes", in the transcript, the LLM
# context, and the captured flywheel tree.
pending = session.pending_node
if (
answer in ("yes", "no")
and isinstance(pending, dict)
and pending.get("id") == node_id
):
label = pending.get(f"{answer}_label")
if label:
entry["answer_label"] = label
if pending.get("yes_label"):
entry["yes_label"] = pending["yes_label"]
if pending.get("no_label"):
entry["no_label"] = pending["no_label"]
# JSONB requires assigning a new list — in-place mutation isn't tracked
session.walked_path = [*session.walked_path, entry]
session.pending_node = None # the served node has now been answered
elif session.pending_node is not None:
# Re-mount before answering — return the already-served node verbatim.
return session.pending_node
next_node = await ai_tree_builder.generate_next_node(
problem_text, category, session.walked_path)
session.pending_node = next_node
session.current_node_id = next_node.get("id")
session.last_step_at = datetime.now(timezone.utc)
await db.flush()
return next_node
async def record_step(
db: AsyncSession,
*,
session_id: UUID,
node_id: str,
question: str,
answer: str,
note: Optional[str] = None,
) -> L1WalkSession:
"""Record an answered step in a tree walk. Appends to walked_path JSONB and
advances current_node_id. Raises ValueError on adhoc sessions or inactive
sessions. Updates last_step_at."""
session = await db.get(L1WalkSession, session_id)
if not session:
raise ValueError(f"L1WalkSession {session_id} not found")
if session.session_kind == "adhoc":
raise ValueError("Cannot record step on adhoc session — use update_notes")
if session.status != "active":
raise ValueError(f"Session {session_id} is not active (status={session.status})")
entry = {
"node_id": node_id,
"question": question,
"answer": answer,
"l1_note": note,
}
# JSONB requires assigning a new list — in-place mutation isn't tracked
session.walked_path = [*session.walked_path, entry]
session.current_node_id = node_id
session.last_step_at = datetime.now(timezone.utc)
await db.flush()
return session
async def update_notes(
db: AsyncSession,
*,
session_id: UUID,
notes: list[dict],
) -> L1WalkSession:
"""Replace walk_notes on an active session. Used by adhoc walks for
debounced autosave. Raises ValueError if missing or inactive. Caps notes
payload at 256KB to prevent unbounded growth."""
session = await db.get(L1WalkSession, session_id)
if not session:
raise ValueError(f"L1WalkSession {session_id} not found")
if session.status != "active":
raise ValueError(f"Session {session_id} is not active (status={session.status})")
encoded_size = len(json.dumps(notes).encode("utf-8"))
if encoded_size > 256 * 1024:
raise ValueError("walk_notes exceeds 256KB cap — consider escalating")
session.walk_notes = notes
session.last_step_at = datetime.now(timezone.utc)
await db.flush()
return session
async def resolve(
db: AsyncSession,
*,
session_id: UUID,
helpful: bool,
resolution_notes: str,
) -> L1WalkSession:
"""Close a session as resolved.
- Sets status='resolved', helpful, resolution_notes, resolved_at.
- On helpful=True AND session_kind='proposal': flips
flow_proposal.validated_by_outcome=True (one-bit aggregate signal).
- Closes the linked internal ticket (PSA close stubbed for Phase 2).
- Raises ValueError on missing or non-active session.
"""
session = await db.get(L1WalkSession, session_id)
if not session:
raise ValueError(f"L1WalkSession {session_id} not found")
if session.status != "active":
raise ValueError(f"Session not active (status={session.status})")
now = datetime.now(timezone.utc)
session.status = "resolved"
session.helpful = helpful
session.resolution_notes = resolution_notes
session.resolved_at = now
session.last_step_at = now
if helpful and session.session_kind == "proposal" and session.flow_proposal_id:
proposal = await db.get(FlowProposal, session.flow_proposal_id)
if proposal:
proposal.validated_by_outcome = True
# Flywheel capture: persist a validated FlowProposal for ai_build sessions
# resolved as helpful. Captures the AI-generated path as training signal.
if helpful and session.session_kind == "ai_build" and session.walked_path:
tree_structure = ai_tree_builder.normalize_walked_path(session.walked_path)
db.add(FlowProposal(
account_id=session.account_id,
l1_session_id=session.id,
source_session_id=None,
proposal_type="new_flow",
title=(session.resolution_notes or "AI L1 resolution")[:255],
proposed_flow_data={"tree_structure": tree_structure, "match_keywords": []},
source="ai_realtime_l1",
validated_by_outcome=True,
linked_ticket_id=session.ticket_id,
linked_ticket_kind=session.ticket_kind,
status="pending",
))
if session.ticket_kind == "internal":
await internal_ticket_service.update_status(
db,
ticket_id=UUID(session.ticket_id),
status="resolved",
resolution_notes=resolution_notes,
)
# PSA close deferred to Phase 2 — no-op for now
await log_audit(
db,
user_id=session.created_by_user_id,
action="l1.session.resolve",
resource_type="l1_walk_session",
resource_id=session.id,
details={
"session_kind": session.session_kind,
"helpful": helpful,
"ticket_id": session.ticket_id,
"ticket_kind": session.ticket_kind,
},
account_id=session.account_id,
acting_as=session.acting_as,
)
await db.flush()
return session
async def escalate(
db: AsyncSession,
*,
session_id: UUID,
reason: str,
reason_category: str,
) -> L1WalkSession:
"""Escalate an active session to engineering.
- Sets status='escalated', escalation_reason, escalation_reason_category, resolved_at.
- Marks the linked internal ticket as escalated (PSA reassign deferred to Phase 2).
- Raises ValueError on missing or non-active session.
"""
session = await db.get(L1WalkSession, session_id)
if not session:
raise ValueError(f"L1WalkSession {session_id} not found")
if session.status != "active":
raise ValueError(f"Session not active (status={session.status})")
now = datetime.now(timezone.utc)
session.status = "escalated"
session.escalation_reason = reason
session.escalation_reason_category = reason_category
session.resolved_at = now
session.last_step_at = now
if session.ticket_kind == "internal":
await internal_ticket_service.update_status(
db,
ticket_id=UUID(session.ticket_id),
status="escalated",
)
# PSA reassign deferred to Phase 2
await log_audit(
db,
user_id=session.created_by_user_id,
action="l1.session.escalate",
resource_type="l1_walk_session",
resource_id=session.id,
details={
"session_kind": session.session_kind,
"escalation_reason_category": reason_category,
"ticket_id": session.ticket_id,
"ticket_kind": session.ticket_kind,
},
account_id=session.account_id,
acting_as=session.acting_as,
)
# Notify engineers (owner/admin/engineer roles) about the escalation.
# Filter soft-deleted users too (is_active alone misses them — handoff_manager
# does the same): a deleted engineer must not be paged.
eng_rows = await db.execute(
select(User.id).where(
User.account_id == session.account_id,
User.is_active.is_(True),
User.deleted_at.is_(None),
User.account_role.in_(("owner", "admin", "engineer")),
)
)
target_ids = [r[0] for r in eng_rows.all()]
if not target_ids:
# No eligible engineer. Passing [] to notify() would suppress the in-app
# notification entirely (explicit-empty is honored). Fall back to the
# default owner/admin recipient set instead of silently dropping it.
logger.warning(
"L1 escalation for session %s has no active engineer recipients; "
"falling back to default owner/admin notification set.",
session.id,
)
await notify(
"l1.session.escalated",
session.account_id,
{
"problem_summary": session.problem_text or session.ticket_id,
"session_id": str(session.id),
"reason_category": reason_category,
},
db,
target_user_ids=target_ids or None,
)
await db.flush()
return session
async def escalate_without_walk(
db: AsyncSession,
*,
account_id: UUID,
user: User,
ticket_id: str,
ticket_kind: str,
reason_category: str,
reason: Optional[str] = None,
) -> L1WalkSession:
"""Create an immediately-escalated session with no walked_path.
Used from the BuildAbortedNoKB screen (no KB content available to walk a
tree). Captures the call as an audit record + escalates the ticket without
requiring a walker session in between.
"""
now = datetime.now(timezone.utc)
session = L1WalkSession(
account_id=account_id,
created_by_user_id=user.id,
acting_as=_resolve_acting_as(user),
ticket_id=ticket_id,
ticket_kind=ticket_kind,
session_kind="adhoc",
status="escalated",
escalation_reason=reason,
escalation_reason_category=reason_category,
resolved_at=now,
last_step_at=now,
)
db.add(session)
if ticket_kind == "internal":
await internal_ticket_service.update_status(
db,
ticket_id=UUID(ticket_id),
status="escalated",
)
await db.flush() # flush first so session.id is populated
await log_audit(
db,
user_id=session.created_by_user_id,
action="l1.session.escalate_no_walk",
resource_type="l1_walk_session",
resource_id=session.id,
details={
"escalation_reason_category": reason_category,
"ticket_id": ticket_id,
"ticket_kind": ticket_kind,
},
account_id=session.account_id,
acting_as=session.acting_as,
)
return session