feat(pilot): Phase 2 — What we know (facts) with stable task-lane IDs

Adds the load-bearing structural feature of the FlowPilot migration: a "What we know" panel that holds confirmed facts for a session, fed by AI [PROMOTE] markers and engineer-added notes. Facts feed the resolution note preview (Phase 3) and survive across turns via stable UUIDs assigned to pending_task_lane items. Backend: - FactSynthesisService: create/update/soft-delete facts with atomic state_version bumps; LLM-backed synthesize_from_question/check on the fact_synthesis (Haiku) action tier per Section 6.6. - /api/v1/ai-sessions/{id}/facts CRUD + /facts/promote (proposed_text or via synthesis). PATCH returns 403 for question/diagnostic_check facts (edit the source item instead, Section 7.3). - unified_chat_service: [PROMOTE] marker parser (JSON-block per Section 8.1 spec drift note), stable-UUID assignment for pending_task_lane questions/actions preserved by exact text/label match across turns. - ASSISTANT_SYSTEM_PROMPT: documents [PROMOTE] format, when to/not to emit, hallucination guardrails, source_ref handling. - 17 tests covering parser, stable IDs, service validation, CRUD, editability rule, both promote modes, 422 null-synthesis path, state_version invariant. Frontend: - src/components/pilot/sections/{WhatWeKnow,WhatWeKnowItem,AddNoteButton} — green-gradient section above Questions, dashed-circle check, inline edit/delete gated by the server's editable flag. - TaskLane gains a whatWeKnowSlot prop (existing assistant/ folder kept per the doc's "rename is opportunistic" guidance). - AssistantChatPage fetches facts on selectChat and refetches after each chat send (so [PROMOTE]-synthesized facts appear immediately); auto- opens the lane when facts exist. Verification: end-to-end smoke against the local docker stack confirms all five endpoints (list/create/patch/delete/promote) plus the 403 editability rule. pytest suite verifies the same with mocked LLM. Live [PROMOTE] flow remains untested until used in the UI — the marker shape is covered by parser tests. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-21 21:13:44 -04:00
parent 19cfd71995
commit 625dba7548
15 changed files with 1922 additions and 21 deletions
--- a/backend/app/services/unified_chat_service.py
+++ b/backend/app/services/unified_chat_service.py
@@ -3,10 +3,19 @@
 Replaces assistant_chat_service for new chat sessions. Messages are stored
 in ai_sessions.conversation_messages JSONB. Reuses the same AI calling
 infrastructure and system prompt from assistant_chat_service.
+
+## Markers parsed here
+- `[QUESTIONS]` / `[ACTIONS]` — task-lane items shown to the engineer
+- `[FORK]` — diagnostic forking, creates SessionBranch rows
+- `[PROMOTE]` (Phase 2) — surfaces a fact to the What-we-know section.
+  Items in pending_task_lane carry stable UUIDs (assigned here) so PROMOTE
+  source_refs survive across turns even when the model re-emits the same
+  question/action.
 """
 import json
 import logging
 import re
+import uuid as _uuid
 from typing import Any
 from uuid import UUID

@@ -19,6 +28,7 @@ from app.services.assistant_chat_service import (
    _call_ai,
    _auto_title,
 )
+from app.services.fact_synthesis_service import FactSynthesisService
 from app.services.rag_service import search as rag_search, build_rag_context, extract_suggested_flows

 logger = logging.getLogger(__name__)
@@ -147,6 +157,176 @@ def _parse_questions_marker(ai_content: str) -> tuple[str, list[dict[str, Any]]
    return cleaned, valid_questions


+def _parse_promote_marker(ai_content: str) -> tuple[str, list[dict[str, Any]] | None]:
+    """Extract one or more [PROMOTE]...[/PROMOTE] JSON blocks from AI response.
+
+    Each block contains a JSON object describing a candidate fact:
+      {"source_type": "question"|"diagnostic_check"|"ai_synthesis",
+       "source_ref": "<task_lane_item_uuid>" | null,
+       "text": "<fact text>",
+       "summary": "<short provenance, optional>"}
+
+    Returns (cleaned_content, list_of_items_or_None). All matched blocks are
+    stripped from display text. Invalid items are dropped silently with a
+    warning — a malformed PROMOTE should never break the chat response.
+
+    Per FLOWPILOT-MIGRATION.md Section 8.1, the model emits text + summary
+    inline so no LLM round-trip is needed to persist the fact.
+    """
+    blocks = list(re.finditer(r"\[PROMOTE\]\s*([\s\S]*?)\s*\[/PROMOTE\]", ai_content))
+    if not blocks:
+        return ai_content, None
+
+    items: list[dict[str, Any]] = []
+    for m in blocks:
+        raw = m.group(1).strip()
+        if raw.startswith("```"):
+            raw = re.sub(r"^```(?:json)?\s*", "", raw)
+            raw = re.sub(r"\s*```$", "", raw)
+        try:
+            data = json.loads(raw)
+        except (json.JSONDecodeError, ValueError) as e:
+            logger.warning("Failed to parse [PROMOTE] block: %s", e)
+            continue
+
+        if not isinstance(data, dict):
+            logger.warning("[PROMOTE] block must be a JSON object, got %s", type(data).__name__)
+            continue
+
+        source_type = data.get("source_type")
+        text = (data.get("text") or "").strip()
+        summary = (data.get("summary") or "").strip() or None
+        source_ref_raw = data.get("source_ref")
+
+        if source_type not in ("question", "diagnostic_check", "ai_synthesis"):
+            # `user_note` is engineer-only, not an AI-emittable type.
+            logger.warning("Invalid [PROMOTE] source_type=%r, skipping", source_type)
+            continue
+        if not text:
+            logger.warning("[PROMOTE] block missing text, skipping")
+            continue
+
+        source_ref: UUID | None = None
+        if source_ref_raw:
+            try:
+                source_ref = UUID(str(source_ref_raw))
+            except (ValueError, AttributeError):
+                logger.warning("[PROMOTE] source_ref %r is not a valid UUID, dropping ref", source_ref_raw)
+                source_ref = None
+
+        # `ai_synthesis` must NEVER carry a source_ref (no question/check item
+        # to point at) — surface mistakes from the model rather than tripping
+        # the FactSynthesisService validation later.
+        if source_type == "ai_synthesis":
+            source_ref = None
+
+        items.append({
+            "source_type": source_type,
+            "source_ref": source_ref,
+            "text": text,
+            "summary": summary,
+        })
+
+    # Strip all PROMOTE blocks from display content — engineers see facts in
+    # the What-we-know panel, not as raw markers in the chat.
+    cleaned = re.sub(r"\[PROMOTE\]\s*[\s\S]*?\s*\[/PROMOTE\]", "", ai_content).strip()
+
+    return cleaned, items or None
+
+
+def _assign_stable_task_lane_ids(
+    prev_lane: dict[str, Any] | None,
+    questions: list[dict[str, Any]] | None,
+    actions: list[dict[str, Any]] | None,
+) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
+    """Assign stable UUIDs to task-lane items, preserving them across turns.
+
+    The model often re-emits the same question/action across multiple turns
+    (it is told to keep `_(not yet completed)_` items alive). When the
+    question text matches a prior turn's, we keep the prior UUID so any
+    `session_facts.source_ref` pointing at it stays valid.
+
+    Match key:
+    - Questions: exact `text`
+    - Actions: exact `label`
+
+    Returns the questions/actions lists augmented with an `id` field.
+    """
+    prev_questions = (prev_lane or {}).get("questions") or []
+    prev_actions = (prev_lane or {}).get("actions") or []
+
+    prev_q_ids: dict[str, str] = {
+        str(q.get("text") or "").strip(): str(q["id"])
+        for q in prev_questions
+        if isinstance(q, dict) and q.get("id") and q.get("text")
+    }
+    prev_a_ids: dict[str, str] = {
+        str(a.get("label") or "").strip(): str(a["id"])
+        for a in prev_actions
+        if isinstance(a, dict) and a.get("id") and a.get("label")
+    }
+
+    out_questions: list[dict[str, Any]] = []
+    for q in questions or []:
+        text = str(q.get("text") or "").strip()
+        existing = prev_q_ids.get(text) if text else None
+        out_questions.append({
+            **q,
+            "id": existing or str(_uuid.uuid4()),
+        })
+
+    out_actions: list[dict[str, Any]] = []
+    for a in actions or []:
+        label = str(a.get("label") or "").strip()
+        existing = prev_a_ids.get(label) if label else None
+        out_actions.append({
+            **a,
+            "id": existing or str(_uuid.uuid4()),
+        })
+
+    return out_questions, out_actions
+
+
+async def _persist_promote_items(
+    *,
+    db: AsyncSession,
+    session: AISession,
+    user_id: UUID,
+    items: list[dict[str, Any]],
+) -> None:
+    """Persist parsed [PROMOTE] items as session_facts. Failures are logged.
+
+    A malformed PROMOTE must never break the chat response — the engineer
+    still gets the AI's analysis; the missing fact can be added manually.
+    """
+    if not items:
+        return
+    service = FactSynthesisService(db)
+    for item in items:
+        try:
+            await service.create_fact(
+                session_id=session.id,
+                account_id=session.account_id,
+                user_id=user_id,
+                source_type=item["source_type"],
+                text=item["text"],
+                summary=item["summary"],
+                source_ref=item["source_ref"],
+            )
+        except ValueError:
+            # Validation failure (e.g. empty text after strip, or
+            # source_ref-on-ai_synthesis race). Log and continue — losing
+            # one fact is better than aborting the whole chat turn.
+            logger.warning(
+                "Skipping invalid PROMOTE item for session %s: %r",
+                session.id, item, exc_info=True,
+            )
+        except Exception:
+            logger.exception(
+                "Failed to persist PROMOTE item for session %s", session.id
+            )
+
+
 async def create_chat_session(
    user_id: UUID,
    account_id: UUID,
@@ -251,10 +431,11 @@ async def send_chat_message(
            if session.status == "paused":
                session.status = "active"

-            # Check for fork, actions, and questions markers in branch response too
+            # Check for fork, actions, questions, and promote markers in branch response too
            branch_display, branch_fork_data = _parse_fork_marker(ai_content)
            branch_display, branch_actions_data = _parse_actions_marker(branch_display)
            branch_display, branch_questions_data = _parse_questions_marker(branch_display)
+            branch_display, branch_promote_items = _parse_promote_marker(branch_display)
            if branch_display != ai_content:
                # Store stripped content in branch history
                msgs[-1] = {"role": "assistant", "content": branch_display}
@@ -288,15 +469,30 @@ async def send_chat_message(
                except Exception:
                    logger.exception("Failed to create fork within branch for session %s", session.id)

-            # Persist task lane state on session
+            # Persist task lane state on session — assign stable UUIDs so any
+            # PROMOTE marker emitted later can reference the same items.
            if branch_questions_data or branch_actions_data:
+                stable_qs, stable_as = _assign_stable_task_lane_ids(
+                    session.pending_task_lane,
+                    branch_questions_data,
+                    branch_actions_data,
+                )
                session.pending_task_lane = {
-                    "questions": branch_questions_data or [],
-                    "actions": branch_actions_data or [],
+                    "questions": stable_qs,
+                    "actions": stable_as,
                }
            else:
                session.pending_task_lane = None

+            # Persist any PROMOTE items emitted in this turn. Done AFTER the
+            # task-lane write so source_refs to brand-new items would still
+            # land on persisted UUIDs (the model can also reference IDs from
+            # the previous turn, which were already persisted).
+            if branch_promote_items:
+                await _persist_promote_items(
+                    db=db, session=session, user_id=user_id, items=branch_promote_items,
+                )
+
            suggested_flows = extract_suggested_flows(
                await rag_search(query=message, account_id=account_id, db=db, limit=8)
            )
@@ -343,9 +539,13 @@ async def send_chat_message(
    # Check for questions marker in AI response
    display_content, questions_data = _parse_questions_marker(display_content)

+    # Check for promote markers — facts the AI is surfacing to What we know.
+    display_content, promote_items = _parse_promote_marker(display_content)
+
    logger.info(
-        "Marker parsing results — actions: %s, questions: %s, fork: %s, raw_length: %d, display_length: %d",
+        "Marker parsing results — actions: %s, questions: %s, fork: %s, promote: %d, raw_length: %d, display_length: %d",
        bool(actions_data), bool(questions_data), bool(fork_data),
+        len(promote_items or []),
        len(ai_content), len(display_content),
    )

@@ -410,15 +610,26 @@ async def send_chat_message(
            logger.exception("Failed to create fork for session %s", session_id)
            # Fork failed but chat message still sent — don't break the response

-    # Persist task lane state on session
+    # Persist task lane state on session — assign stable UUIDs so any PROMOTE
+    # marker (this turn or a later one) can reference the same items.
    if questions_data or actions_data:
+        stable_qs, stable_as = _assign_stable_task_lane_ids(
+            session.pending_task_lane, questions_data, actions_data,
+        )
        session.pending_task_lane = {
-            "questions": questions_data or [],
-            "actions": actions_data or [],
+            "questions": stable_qs,
+            "actions": stable_as,
        }
    else:
        session.pending_task_lane = None

+    # Persist any PROMOTE items emitted in this turn. Done after task-lane
+    # assignment so source_refs the model invented this turn already exist.
+    if promote_items:
+        await _persist_promote_items(
+            db=db, session=session, user_id=user_id, items=promote_items,
+        )
+
    suggested_flows = extract_suggested_flows(rag_results)

    return display_content, suggested_flows, session, fork_metadata, actions_data, questions_data