fix(l1): resolve PR #193 backend review findings (1,4,5,6,7,8,9,10)

Server-assigns a uuid4 id to every AI-generated node (Finding 1 showstopper: nodes had no id but the advance protocol keys on node_id, so ai_build walks never advanced past question 1). Replaces the hidden {"node_type":"meta"} walked_path convention with real category/problem_text/pending_node columns on l1_walk_sessions (migration 61dda4f615c6) — fixes junk proposals + off-by-one depth cap (Findings 8,9), and pending_node replays the served node on re-mount (no duplicate paid LLM call). Intake honors explicit flow_id and adhoc=True (Findings 4,5); flow_proposals.l1_session_id FK -> CASCADE (Finding 6 time bomb); L1 category GET is owner+admin like PATCH and require_account_owner_or_admin delegates to User.can_manage_account (Finding 7); escalate falls back to default recipients + filters deleted_at + warns when empty (Finding 10). Cleanups: dead ticket_ref removed, IntakeResponse per-outcome validator, unused acknowledged dropped, escalations partial index, restored a deleted audit assertion. Full Phase 2A backend set: 110 passed / 0 failed. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-09 15:55:45 -04:00
parent 42a4536c63
commit ac89e7b2fa
17 changed files with 592 additions and 80 deletions
--- a/backend/app/api/deps.py
+++ b/backend/app/api/deps.py
@@ -279,10 +279,11 @@ async def require_account_owner(
 async def require_account_owner_or_admin(
    current_user: Annotated[User, Depends(get_current_active_user)]
 ) -> User:
-    """Require account owner or account-admin (blocks engineers); super_admin bypass."""
-    if current_user.is_super_admin:
-        return current_user
-    if current_user.account_role in ("owner", "admin"):
+    """Require account owner or account-admin (blocks engineers); super_admin bypass.
+
+    Delegates to ``User.can_manage_account`` so the rule lives in exactly one place.
+    """
+    if current_user.can_manage_account:
        return current_user
    raise HTTPException(
        status_code=status.HTTP_403_FORBIDDEN,
--- a/backend/app/api/endpoints/accounts.py
+++ b/backend/app/api/endpoints/accounts.py
@@ -28,7 +28,6 @@ from app.api.deps import (
    require_account_owner,
    require_account_owner_or_admin,
    require_engineer_or_admin,
-    require_l1_or_above,
 )
 from app.services import l1_category_service
 from app.services.seat_enforcement import check_seat_available, get_seat_usage
@@ -175,12 +174,13 @@ async def get_my_account_seat_usage(
@router.get("/me/l1-categories", response_model=L1CategoriesResponse)
 async def get_l1_categories(
    db: Annotated[AsyncSession, Depends(get_db)],
-    current_user: Annotated[User, Depends(require_l1_or_above)],
+    current_user: Annotated[User, Depends(require_account_owner_or_admin)],
 ):
    """The account's enabled L1 AI-build categories + the available + hard-floor lists.

-    Readable by any L1-or-above user (the walker needs to know what's buildable);
-    only owners/admins may change it (PATCH below).
+    Owner/admin only — this is a settings surface, and read and write must agree
+    (the walker gates server-side via match_or_build, it never fetches this). Same
+    dep as PATCH so account admins can both read and save (Finding 7).
    """
    enabled = await l1_category_service.get_enabled_categories(current_user.account_id, db)
    return L1CategoriesResponse(
--- a/backend/app/api/endpoints/l1.py
+++ b/backend/app/api/endpoints/l1.py
@@ -35,6 +35,8 @@ def _to_response(session: L1WalkSession) -> WalkSessionResponse:
    return WalkSessionResponse(
        id=session.id,
        session_kind=session.session_kind,
+        category=session.category,
+        problem_text=session.problem_text,
        flow_id=session.flow_id,
        flow_proposal_id=session.flow_proposal_id,
        current_node_id=session.current_node_id,
@@ -68,6 +70,17 @@ async def _get_session_or_404(
    return session


+async def _create_intake_ticket(db: AsyncSession, payload: IntakeRequest, user: User):
+    return await internal_ticket_service.create_ticket(
+        db,
+        account_id=user.account_id,
+        created_by_user_id=user.id,
+        problem_statement=payload.problem_statement,
+        customer_name=payload.customer_name,
+        customer_contact=payload.customer_contact,
+    )
+
+
@router.post("/intake", response_model=IntakeResponse)
 async def intake(
    payload: IntakeRequest,
@@ -76,18 +89,49 @@ async def intake(
 ):
    """L1 intake (Phase 2A): match a published flow, else gate + build.

-    Runs the match_or_build orchestrator. Outcomes:
+    Two explicit shortcuts run before the matcher (the client already knows what
+    it wants, so re-running the embedding + pgvector + keyword pipeline would be
+    wasteful and — for flow_id — can't reliably re-derive the same flow):
+    - flow_id set  → start that published flow directly (suggest card's "Use this flow").
+    - adhoc=True   → start a free-form ad-hoc walk (out_of_scope prompt's fallback).
+
+    Otherwise match_or_build dispatches:
    - matched  → create ticket + flow session, walk the published flow.
-    - build    → create ticket + ai_build session (category persisted as a hidden
-                 meta entry on walked_path for /next-node), walk an AI-built tree.
+    - build    → create ticket + ai_build session (category + problem_text stored
+                 on the session for /next-node), walk an AI-built tree.
    - suggest  → near-miss prompt; no session created.
    - out_of_scope → category disabled/unknown; no session created.
    """
+    # Explicit flow_id: bypass the matcher, walk the flow the client already holds.
+    if payload.flow_id is not None:
+        ticket = await _create_intake_ticket(db, payload, user)
+        session = await l1_session_service.start_flow_session(
+            db, account_id=user.account_id, user=user, flow_id=payload.flow_id,
+            ticket_id=str(ticket.id), ticket_kind="internal",
+        )
+        await db.commit()
+        return IntakeResponse(
+            outcome="matched", session_id=session.id, session_kind=session.session_kind,
+            ticket_id=str(ticket.id), ticket_kind="internal", flow_id=payload.flow_id,
+        )
+
+    # Explicit ad-hoc walk: the out_of_scope fallback ("Walk it ad-hoc").
+    if payload.adhoc:
+        ticket = await _create_intake_ticket(db, payload, user)
+        session = await l1_session_service.start_adhoc_session(
+            db, account_id=user.account_id, user=user,
+            ticket_id=str(ticket.id), ticket_kind="internal",
+        )
+        await db.commit()
+        return IntakeResponse(
+            outcome="adhoc", session_id=session.id, session_kind=session.session_kind,
+            ticket_id=str(ticket.id), ticket_kind="internal",
+        )
+
    result = await match_or_build.match_or_build(
        user.account_id,
        payload.problem_statement,
        None,
-        ticket_ref="",
        db=db,
        force_build=payload.force_build,
    )
@@ -102,14 +146,7 @@ async def intake(
        )

    # matched OR build → create a ticket and a session
-    ticket = await internal_ticket_service.create_ticket(
-        db,
-        account_id=user.account_id,
-        created_by_user_id=user.id,
-        problem_statement=payload.problem_statement,
-        customer_name=payload.customer_name,
-        customer_contact=payload.customer_contact,
-    )
+    ticket = await _create_intake_ticket(db, payload, user)
    if outcome == "matched":
        session = await l1_session_service.start_flow_session(
            db,
@@ -126,13 +163,9 @@ async def intake(
            user=user,
            ticket_id=str(ticket.id),
            ticket_kind="internal",
+            category=result.get("category", "unknown"),
+            problem_text=payload.problem_statement,
        )
-        # Persist the classified category as a hidden meta entry so /next-node
-        # can recover it (no dedicated column; ai_tree_builder skips meta entries).
-        session.walked_path = [
-            {"node_type": "meta", "category": result.get("category", "unknown")}
-        ]
-        await db.flush()

    await db.commit()
    return IntakeResponse(
@@ -293,27 +326,18 @@ async def next_node(
 ):
    """Record the answer/ack on the current node, then generate the next node.

-    problem_text comes from the linked internal ticket; category from the hidden
-    meta entry seeded at intake (ai_tree_builder skips meta entries). node_text is
-    the rendered text of the node being answered (the client holds it) so the
-    walked path and the captured tree stay legible.
+    problem_text + category are read straight off the session (stored at intake) —
+    no ticket re-fetch, no walked_path scan. node_text is the rendered text of the
+    node being answered (the client holds it) so the walked path and the captured
+    tree stay legible.
    """
    session = await _get_session_or_404(db, session_id, user)
-    ticket = await internal_ticket_service.get_ticket(
-        db, ticket_id=UUID(session.ticket_id)
-    )
-    problem_text = ticket.problem_statement if ticket else ""
-    category = next(
-        (s.get("category") for s in (session.walked_path or [])
-         if s.get("node_type") == "meta"),
-        "unknown",
-    )
    try:
        node = await l1_session_service.advance_ai_build(
            db,
            session_id=session_id,
-            problem_text=problem_text,
-            category=category or "unknown",
+            problem_text=session.problem_text or "",
+            category=session.category or "unknown",
            node_id=payload.node_id,
            node_text=payload.node_text,
            answer=payload.answer,
--- a/backend/app/models/flow_proposal.py
+++ b/backend/app/models/flow_proposal.py
@@ -86,7 +86,13 @@ class FlowProposal(Base):
    )
    l1_session_id: Mapped[Optional[uuid.UUID]] = mapped_column(
        UUID(as_uuid=True),
-        ForeignKey("l1_walk_sessions.id", ondelete="SET NULL"),
+        # CASCADE, not SET NULL: the exactly-one-source CHECK below means an
+        # L1-sourced proposal has source_session_id NULL by construction, so a
+        # SET NULL on l1_session deletion would NULL both columns and the
+        # non-deferrable CHECK would abort the DELETE — making any L1 session
+        # referenced by a proposal undeletable (hard_delete_user, GDPR purge).
+        # The proposal dies with its source, matching source_session_id's CASCADE.
+        ForeignKey("l1_walk_sessions.id", ondelete="CASCADE"),
        nullable=True,
        index=True,
    )
--- a/backend/app/models/l1_walk_session.py
+++ b/backend/app/models/l1_walk_session.py
@@ -8,8 +8,7 @@ import uuid
 from datetime import datetime, timezone
 from typing import Any, Optional, TYPE_CHECKING

-import sqlalchemy as sa
-from sqlalchemy import String, Text, DateTime, Boolean, ForeignKey, CheckConstraint
+from sqlalchemy import String, Text, DateTime, Boolean, ForeignKey, CheckConstraint, Index
 from sqlalchemy import text as sa_text
 from sqlalchemy.orm import Mapped, mapped_column, relationship
 from sqlalchemy.dialects.postgresql import UUID, JSONB
@@ -59,6 +58,12 @@ class L1WalkSession(Base):
            "OR (session_kind IN ('adhoc', 'ai_build') AND flow_id IS NULL AND flow_proposal_id IS NULL)",
            name="ck_l1_walk_sessions_target_consistency",
        ),
+        # Partial index backing GET /l1/escalations (the engineer handoff queue).
+        Index(
+            "ix_l1_walk_sessions_escalated",
+            "account_id", sa_text("last_step_at DESC"),
+            postgresql_where=sa_text("status = 'escalated'"),
+        ),
    )

    id: Mapped[uuid.UUID] = mapped_column(
@@ -86,6 +91,14 @@ class L1WalkSession(Base):

    # ── Session kind + target ──
    session_kind: Mapped[str] = mapped_column(String(20), nullable=False)
+    # AI-build context (ai_build sessions only). Persisted at intake so /next-node
+    # never has to re-fetch the ticket or scan walked_path to recover them — they
+    # are immutable for the life of the session. Replaces the former hidden
+    # ``{"node_type":"meta"}`` walked_path entry (deleted: it leaked into every
+    # consumer that forgot to skip it — junk proposals, off-by-one depth cap,
+    # blank escalation rows).
+    category: Mapped[Optional[str]] = mapped_column(String(100), nullable=True)
+    problem_text: Mapped[Optional[str]] = mapped_column(Text(), nullable=True)
    flow_id: Mapped[Optional[uuid.UUID]] = mapped_column(
        UUID(as_uuid=True),
        ForeignKey("trees.id", ondelete="SET NULL"),
@@ -99,6 +112,12 @@ class L1WalkSession(Base):

    # ── Navigation state ──
    current_node_id: Mapped[Optional[str]] = mapped_column(String(100), nullable=True)
+    # The node served to the tech but not yet answered (ai_build only). Replayed on
+    # the next /next-node call with node_id=None so a refresh / StrictMode double-mount
+    # doesn't fire a fresh paid LLM call (and possibly swap the question mid-answer).
+    pending_node: Mapped[Optional[dict[str, Any]]] = mapped_column(
+        JSONB(), nullable=True,
+    )
    walked_path: Mapped[list[dict[str, Any]]] = mapped_column(
        JSONB(), nullable=False, server_default=sa_text("'[]'::jsonb"),
    )
--- a/backend/app/schemas/l1.py
+++ b/backend/app/schemas/l1.py
@@ -3,33 +3,54 @@ from datetime import datetime
 from typing import Any, Literal, Optional
 from uuid import UUID

-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, model_validator


 class IntakeRequest(BaseModel):
    problem_statement: str = Field(..., min_length=1)
    customer_name: Optional[str] = None
    customer_contact: Optional[str] = None
+    # When set, bypass the matcher and start this published flow directly (the
+    # suggest card's "Use this flow" — the client already holds the flow id).
    flow_id: Optional[UUID] = None
+    # When True, start an ad-hoc free-form walk (the out_of_scope prompt's
+    # "Walk it ad-hoc" fallback). Mutually informative with flow_id/force_build;
+    # flow_id takes precedence if both are somehow set.
+    adhoc: bool = False
    force_build: bool = False


+# Outcomes that start a session (and therefore must carry session_id + ticket).
+_SESSION_OUTCOMES = {"matched", "build", "adhoc"}
+
+
 class IntakeResponse(BaseModel):
-    outcome: Literal["matched", "suggest", "out_of_scope", "build"]
+    outcome: Literal["matched", "suggest", "out_of_scope", "build", "adhoc"]
    session_id: Optional[UUID] = None
    session_kind: Optional[Literal["flow", "proposal", "adhoc", "ai_build"]] = None
    ticket_id: Optional[str] = None
-    ticket_kind: Optional[str] = None
+    ticket_kind: Optional[Literal["psa", "internal"]] = None
    flow_id: Optional[UUID] = None   # for 'matched'
    near_miss: Optional[dict] = None  # for 'suggest'
    category: Optional[str] = None   # for 'out_of_scope'

+    @model_validator(mode="after")
+    def _check_outcome_invariants(self) -> "IntakeResponse":
+        """Restore the per-outcome contract the frontend depends on: a session
+        outcome MUST carry the session_id + ticket the walker navigates to, so a
+        backend regression surfaces here instead of as /l1/walk/undefined."""
+        if self.outcome in _SESSION_OUTCOMES:
+            if self.session_id is None or self.ticket_id is None:
+                raise ValueError(
+                    f"intake outcome '{self.outcome}' requires session_id + ticket_id"
+                )
+        return self
+

 class NextNodeRequest(BaseModel):
    node_id: Optional[str] = None
    node_text: Optional[str] = None  # rendered text of the node being answered (carry-forward Task 8)
-    answer: Optional[str] = None     # 'yes' | 'no' for questions
-    acknowledged: Optional[bool] = None
+    answer: Optional[str] = None     # 'yes' | 'no' for questions; None acks an instruction
    note: Optional[str] = None


@@ -70,6 +91,8 @@ class EscalateWithoutWalkRequest(BaseModel):
 class WalkSessionResponse(BaseModel):
    id: UUID
    session_kind: str
+    category: Optional[str] = None
+    problem_text: Optional[str] = None
    flow_id: Optional[UUID]
    flow_proposal_id: Optional[UUID]
    current_node_id: Optional[str]
--- a/backend/app/services/ai_tree_builder.py
+++ b/backend/app/services/ai_tree_builder.py
@@ -7,6 +7,7 @@ for flywheel capture.
 """
 import logging
 from typing import Any, Optional
+from uuid import uuid4

 from app.core.ai_provider import get_ai_provider
 from app.core.config import settings
@@ -45,19 +46,21 @@ No prose, no markdown fences.
 """


-def _strip_meta(walked_path: list[dict]) -> list[dict]:
-    """Drop the hidden ``meta`` entry (category carrier) the intake endpoint seeds.
+def _assign_id(node: dict[str, Any]) -> dict[str, Any]:
+    """Stamp a stable server-side id on a generated node (Finding 1).

-    The first walked_path entry on an ai_build session may be a
-    ``{"node_type": "meta", "category": ...}`` marker used to persist the
-    classified category; it is not a real walk step and must be excluded from
-    both model context and tree normalization.
+    The SYSTEM_PROMPT never asks the model for an id — and we must not, since a
+    model-invented id is neither stable nor trustworthy. But the advance protocol
+    keys on ``node_id``: without one, the answer to every node is discarded and
+    the walk can never progress past the first question. So every node the builder
+    hands back — generated, depth-capped, or generation-failed — gets an id here.
    """
-    return [s for s in walked_path if s.get("node_type") != "meta"]
+    if not node.get("id"):
+        node["id"] = uuid4().hex[:8]
+    return node


 def _build_context(problem_text: str, category: str, walked_path: list[dict]) -> str:
-    walked_path = _strip_meta(walked_path)
    lines = [f"PROBLEM: {problem_text}", f"CATEGORY: {category}", "STEPS SO FAR:"]
    if not walked_path:
        lines.append("(none yet — produce the first diagnostic question)")
@@ -81,11 +84,11 @@ def validate_node(node: dict[str, Any]) -> dict[str, Any]:

 def escalate_if_depth_exceeded(walked_path: list[dict]) -> Optional[dict[str, Any]]:
    if len(walked_path) >= MAX_DEPTH:
-        return {
+        return _assign_id({
            "node_type": "escalate",
            "reason_category": "depth_cap",
            "text": "Reached the L1 troubleshooting depth limit — escalating to engineering.",
-        }
+        })
    return None


@@ -108,16 +111,16 @@ async def generate_next_node(
                max_tokens=1024,
            )
            node = parse_llm_json(raw)
-            return validate_node(node)
+            return _assign_id(validate_node(node))
        except Exception as e:
            logger.warning("ai_tree_builder node attempt %d failed: %s", attempt + 1, e)
            continue

-    return {
+    return _assign_id({
        "node_type": "escalate",
        "reason_category": "generation_failed",
        "text": "Could not generate a safe next step — escalating to engineering.",
-    }
+    })


 def normalize_walked_path(walked_path: list[dict]) -> dict[str, Any]:
@@ -128,7 +131,6 @@ def normalize_walked_path(walked_path: list[dict]) -> dict[str, Any]:
    Returns {id, nodes: {id: node}} — a dict with an id (passes the proposal
    approval guard).
    """
-    walked_path = _strip_meta(walked_path)
    nodes: dict[str, Any] = {}
    if not walked_path:
        root_id = "root"
--- a/backend/app/services/l1_session_service.py
+++ b/backend/app/services/l1_session_service.py
@@ -3,6 +3,7 @@
 start_* functions live in T12; step/notes are T13; resolve/escalate are T14.
 """
 import json
+import logging
 from datetime import datetime, timezone
 from typing import Optional
 from uuid import UUID
@@ -18,6 +19,8 @@ from app.services import ai_tree_builder
 from app.services import internal_ticket_service
 from app.services.notification_service import notify

+logger = logging.getLogger(__name__)
+

 def _resolve_acting_as(user: User) -> Optional[str]:
    """An engineer (whether covering or not) gets tagged for audit when using L1 surface.
@@ -108,8 +111,15 @@ async def start_ai_build_session(
    user: User,
    ticket_id: str,
    ticket_kind: str,
+    category: Optional[str] = None,
+    problem_text: Optional[str] = None,
 ) -> L1WalkSession:
-    """Start an AI-built tree session (nodes generated on demand via next-node)."""
+    """Start an AI-built tree session (nodes generated on demand via next-node).
+
+    ``category`` and ``problem_text`` are the immutable AI-build context, stored
+    once here so /next-node never re-derives them (no ticket re-fetch, no
+    walked_path scan, no hidden meta entry).
+    """
    session = L1WalkSession(
        account_id=account_id,
        created_by_user_id=user.id,
@@ -117,6 +127,8 @@ async def start_ai_build_session(
        ticket_id=ticket_id,
        ticket_kind=ticket_kind,
        session_kind="ai_build",
+        category=category,
+        problem_text=problem_text,
    )
    db.add(session)
    await db.flush()
@@ -144,6 +156,11 @@ async def advance_ai_build(
    the caller/endpoint, which holds the served node. Storing it here ensures that
    later nodes receive full prior-step context via ``ai_tree_builder._build_context``
    and that captured flywheel trees (``normalize_walked_path``) have meaningful text.
+
+    Pending-node replay (Finding 8): the node served but not yet answered is stored
+    on ``session.pending_node``. When node_id is None and a pending node exists (a
+    refresh, a StrictMode double-mount, or back/forward), we replay it instead of
+    firing a fresh paid LLM call that might also swap the question mid-answer.
    """
    session = await db.get(L1WalkSession, session_id)
    if not session:
@@ -168,9 +185,14 @@ async def advance_ai_build(
        }
        # JSONB requires assigning a new list — in-place mutation isn't tracked
        session.walked_path = [*session.walked_path, entry]
+        session.pending_node = None  # the served node has now been answered
+    elif session.pending_node is not None:
+        # Re-mount before answering — return the already-served node verbatim.
+        return session.pending_node

    next_node = await ai_tree_builder.generate_next_node(
        problem_text, category, session.walked_path)
+    session.pending_node = next_node
    session.current_node_id = next_node.get("id")
    session.last_step_at = datetime.now(timezone.utc)
    await db.flush()
@@ -361,24 +383,36 @@ async def escalate(
    )

    # Notify engineers (owner/admin/engineer roles) about the escalation.
+    # Filter soft-deleted users too (is_active alone misses them — handoff_manager
+    # does the same): a deleted engineer must not be paged.
    eng_rows = await db.execute(
        select(User.id).where(
            User.account_id == session.account_id,
            User.is_active.is_(True),
+            User.deleted_at.is_(None),
            User.account_role.in_(("owner", "admin", "engineer")),
        )
    )
    target_ids = [r[0] for r in eng_rows.all()]
+    if not target_ids:
+        # No eligible engineer. Passing [] to notify() would suppress the in-app
+        # notification entirely (explicit-empty is honored). Fall back to the
+        # default owner/admin recipient set instead of silently dropping it.
+        logger.warning(
+            "L1 escalation for session %s has no active engineer recipients; "
+            "falling back to default owner/admin notification set.",
+            session.id,
+        )
    await notify(
        "l1.session.escalated",
        session.account_id,
        {
-            "problem_summary": session.ticket_id,
+            "problem_summary": session.problem_text or session.ticket_id,
            "session_id": str(session.id),
            "reason_category": reason_category,
        },
        db,
-        target_user_ids=target_ids,
+        target_user_ids=target_ids or None,
    )

    await db.flush()
--- a/backend/app/services/match_or_build.py
+++ b/backend/app/services/match_or_build.py
@@ -52,7 +52,6 @@ async def match_or_build(
    account_id: UUID,
    problem_text: str,
    problem_domain: Optional[str],
-    ticket_ref: str,  # passed through for caller/session use; not consumed here (Task 10)
    *,
    db: AsyncSession,
    force_build: bool = False,