fix(l1): resolve PR #193 backend review findings (1,4,5,6,7,8,9,10)
Server-assigns a uuid4 id to every AI-generated node (Finding 1 showstopper:
nodes had no id but the advance protocol keys on node_id, so ai_build walks
never advanced past question 1). Replaces the hidden {"node_type":"meta"}
walked_path convention with real category/problem_text/pending_node columns on
l1_walk_sessions (migration 61dda4f615c6) — fixes junk proposals + off-by-one
depth cap (Findings 8,9), and pending_node replays the served node on re-mount
(no duplicate paid LLM call). Intake honors explicit flow_id and adhoc=True
(Findings 4,5); flow_proposals.l1_session_id FK -> CASCADE (Finding 6 time
bomb); L1 category GET is owner+admin like PATCH and require_account_owner_or_admin
delegates to User.can_manage_account (Finding 7); escalate falls back to default
recipients + filters deleted_at + warns when empty (Finding 10). Cleanups: dead
ticket_ref removed, IntakeResponse per-outcome validator, unused acknowledged
dropped, escalations partial index, restored a deleted audit assertion.
Full Phase 2A backend set: 110 passed / 0 failed.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -7,6 +7,7 @@ for flywheel capture.
|
||||
"""
|
||||
import logging
|
||||
from typing import Any, Optional
|
||||
from uuid import uuid4
|
||||
|
||||
from app.core.ai_provider import get_ai_provider
|
||||
from app.core.config import settings
|
||||
@@ -45,19 +46,21 @@ No prose, no markdown fences.
|
||||
"""
|
||||
|
||||
|
||||
def _strip_meta(walked_path: list[dict]) -> list[dict]:
|
||||
"""Drop the hidden ``meta`` entry (category carrier) the intake endpoint seeds.
|
||||
def _assign_id(node: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Stamp a stable server-side id on a generated node (Finding 1).
|
||||
|
||||
The first walked_path entry on an ai_build session may be a
|
||||
``{"node_type": "meta", "category": ...}`` marker used to persist the
|
||||
classified category; it is not a real walk step and must be excluded from
|
||||
both model context and tree normalization.
|
||||
The SYSTEM_PROMPT never asks the model for an id — and we must not, since a
|
||||
model-invented id is neither stable nor trustworthy. But the advance protocol
|
||||
keys on ``node_id``: without one, the answer to every node is discarded and
|
||||
the walk can never progress past the first question. So every node the builder
|
||||
hands back — generated, depth-capped, or generation-failed — gets an id here.
|
||||
"""
|
||||
return [s for s in walked_path if s.get("node_type") != "meta"]
|
||||
if not node.get("id"):
|
||||
node["id"] = uuid4().hex[:8]
|
||||
return node
|
||||
|
||||
|
||||
def _build_context(problem_text: str, category: str, walked_path: list[dict]) -> str:
|
||||
walked_path = _strip_meta(walked_path)
|
||||
lines = [f"PROBLEM: {problem_text}", f"CATEGORY: {category}", "STEPS SO FAR:"]
|
||||
if not walked_path:
|
||||
lines.append("(none yet — produce the first diagnostic question)")
|
||||
@@ -81,11 +84,11 @@ def validate_node(node: dict[str, Any]) -> dict[str, Any]:
|
||||
|
||||
def escalate_if_depth_exceeded(walked_path: list[dict]) -> Optional[dict[str, Any]]:
|
||||
if len(walked_path) >= MAX_DEPTH:
|
||||
return {
|
||||
return _assign_id({
|
||||
"node_type": "escalate",
|
||||
"reason_category": "depth_cap",
|
||||
"text": "Reached the L1 troubleshooting depth limit — escalating to engineering.",
|
||||
}
|
||||
})
|
||||
return None
|
||||
|
||||
|
||||
@@ -108,16 +111,16 @@ async def generate_next_node(
|
||||
max_tokens=1024,
|
||||
)
|
||||
node = parse_llm_json(raw)
|
||||
return validate_node(node)
|
||||
return _assign_id(validate_node(node))
|
||||
except Exception as e:
|
||||
logger.warning("ai_tree_builder node attempt %d failed: %s", attempt + 1, e)
|
||||
continue
|
||||
|
||||
return {
|
||||
return _assign_id({
|
||||
"node_type": "escalate",
|
||||
"reason_category": "generation_failed",
|
||||
"text": "Could not generate a safe next step — escalating to engineering.",
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
def normalize_walked_path(walked_path: list[dict]) -> dict[str, Any]:
|
||||
@@ -128,7 +131,6 @@ def normalize_walked_path(walked_path: list[dict]) -> dict[str, Any]:
|
||||
Returns {id, nodes: {id: node}} — a dict with an id (passes the proposal
|
||||
approval guard).
|
||||
"""
|
||||
walked_path = _strip_meta(walked_path)
|
||||
nodes: dict[str, Any] = {}
|
||||
if not walked_path:
|
||||
root_id = "root"
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
start_* functions live in T12; step/notes are T13; resolve/escalate are T14.
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
from uuid import UUID
|
||||
@@ -18,6 +19,8 @@ from app.services import ai_tree_builder
|
||||
from app.services import internal_ticket_service
|
||||
from app.services.notification_service import notify
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _resolve_acting_as(user: User) -> Optional[str]:
|
||||
"""An engineer (whether covering or not) gets tagged for audit when using L1 surface.
|
||||
@@ -108,8 +111,15 @@ async def start_ai_build_session(
|
||||
user: User,
|
||||
ticket_id: str,
|
||||
ticket_kind: str,
|
||||
category: Optional[str] = None,
|
||||
problem_text: Optional[str] = None,
|
||||
) -> L1WalkSession:
|
||||
"""Start an AI-built tree session (nodes generated on demand via next-node)."""
|
||||
"""Start an AI-built tree session (nodes generated on demand via next-node).
|
||||
|
||||
``category`` and ``problem_text`` are the immutable AI-build context, stored
|
||||
once here so /next-node never re-derives them (no ticket re-fetch, no
|
||||
walked_path scan, no hidden meta entry).
|
||||
"""
|
||||
session = L1WalkSession(
|
||||
account_id=account_id,
|
||||
created_by_user_id=user.id,
|
||||
@@ -117,6 +127,8 @@ async def start_ai_build_session(
|
||||
ticket_id=ticket_id,
|
||||
ticket_kind=ticket_kind,
|
||||
session_kind="ai_build",
|
||||
category=category,
|
||||
problem_text=problem_text,
|
||||
)
|
||||
db.add(session)
|
||||
await db.flush()
|
||||
@@ -144,6 +156,11 @@ async def advance_ai_build(
|
||||
the caller/endpoint, which holds the served node. Storing it here ensures that
|
||||
later nodes receive full prior-step context via ``ai_tree_builder._build_context``
|
||||
and that captured flywheel trees (``normalize_walked_path``) have meaningful text.
|
||||
|
||||
Pending-node replay (Finding 8): the node served but not yet answered is stored
|
||||
on ``session.pending_node``. When node_id is None and a pending node exists (a
|
||||
refresh, a StrictMode double-mount, or back/forward), we replay it instead of
|
||||
firing a fresh paid LLM call that might also swap the question mid-answer.
|
||||
"""
|
||||
session = await db.get(L1WalkSession, session_id)
|
||||
if not session:
|
||||
@@ -168,9 +185,14 @@ async def advance_ai_build(
|
||||
}
|
||||
# JSONB requires assigning a new list — in-place mutation isn't tracked
|
||||
session.walked_path = [*session.walked_path, entry]
|
||||
session.pending_node = None # the served node has now been answered
|
||||
elif session.pending_node is not None:
|
||||
# Re-mount before answering — return the already-served node verbatim.
|
||||
return session.pending_node
|
||||
|
||||
next_node = await ai_tree_builder.generate_next_node(
|
||||
problem_text, category, session.walked_path)
|
||||
session.pending_node = next_node
|
||||
session.current_node_id = next_node.get("id")
|
||||
session.last_step_at = datetime.now(timezone.utc)
|
||||
await db.flush()
|
||||
@@ -361,24 +383,36 @@ async def escalate(
|
||||
)
|
||||
|
||||
# Notify engineers (owner/admin/engineer roles) about the escalation.
|
||||
# Filter soft-deleted users too (is_active alone misses them — handoff_manager
|
||||
# does the same): a deleted engineer must not be paged.
|
||||
eng_rows = await db.execute(
|
||||
select(User.id).where(
|
||||
User.account_id == session.account_id,
|
||||
User.is_active.is_(True),
|
||||
User.deleted_at.is_(None),
|
||||
User.account_role.in_(("owner", "admin", "engineer")),
|
||||
)
|
||||
)
|
||||
target_ids = [r[0] for r in eng_rows.all()]
|
||||
if not target_ids:
|
||||
# No eligible engineer. Passing [] to notify() would suppress the in-app
|
||||
# notification entirely (explicit-empty is honored). Fall back to the
|
||||
# default owner/admin recipient set instead of silently dropping it.
|
||||
logger.warning(
|
||||
"L1 escalation for session %s has no active engineer recipients; "
|
||||
"falling back to default owner/admin notification set.",
|
||||
session.id,
|
||||
)
|
||||
await notify(
|
||||
"l1.session.escalated",
|
||||
session.account_id,
|
||||
{
|
||||
"problem_summary": session.ticket_id,
|
||||
"problem_summary": session.problem_text or session.ticket_id,
|
||||
"session_id": str(session.id),
|
||||
"reason_category": reason_category,
|
||||
},
|
||||
db,
|
||||
target_user_ids=target_ids,
|
||||
target_user_ids=target_ids or None,
|
||||
)
|
||||
|
||||
await db.flush()
|
||||
|
||||
@@ -52,7 +52,6 @@ async def match_or_build(
|
||||
account_id: UUID,
|
||||
problem_text: str,
|
||||
problem_domain: Optional[str],
|
||||
ticket_ref: str, # passed through for caller/session use; not consumed here (Task 10)
|
||||
*,
|
||||
db: AsyncSession,
|
||||
force_build: bool = False,
|
||||
|
||||
Reference in New Issue
Block a user