diff --git a/backend/app/api/endpoints/session_facts.py b/backend/app/api/endpoints/session_facts.py new file mode 100644 index 00000000..f4a2ac07 --- /dev/null +++ b/backend/app/api/endpoints/session_facts.py @@ -0,0 +1,315 @@ +"""Session fact endpoints — the "What we know" CRUD surface for a FlowPilot session. + +All routes are sub-resources of `/ai-sessions/{session_id}`. Tenant isolation is +enforced by RLS on `session_facts.account_id`; a user from another account +literally cannot see or write facts for this session. + +Editability rule (per FLOWPILOT-MIGRATION.md Section 7.3): +- `user_note` and `ai_synthesis` facts are editable at the card level. +- `question` and `diagnostic_check` facts are read-only at the card level — + edit the source question/check instead. PATCH returns 403 for those. + +Fact promotion writes always bump `ai_sessions.state_version` so the +resolution-note preview cache invalidates (Section 5.5). +""" +import logging +from datetime import datetime, timezone +from typing import Annotated +from uuid import UUID + +from fastapi import APIRouter, Depends, HTTPException, status +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.api.deps import get_current_active_user, get_db, require_engineer_or_admin +from app.models.ai_session import AISession +from app.models.session_fact import SessionFact +from app.models.user import User +from app.schemas.session_fact import ( + SessionFactCreateRequest, + SessionFactListResponse, + SessionFactPromoteRequest, + SessionFactResponse, + SessionFactUpdateRequest, +) +from app.services.fact_synthesis_service import ( + FactSynthesisService, + list_facts_for_session, +) + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/ai-sessions/{session_id}", tags=["session-facts"]) + +# Source types whose facts can be edited at the card level (Section 7.3). +_EDITABLE_SOURCE_TYPES = frozenset({"user_note", "ai_synthesis"}) + + +def _to_response(fact: SessionFact) -> SessionFactResponse: + """Wrap an ORM SessionFact in the response model with the editable flag.""" + return SessionFactResponse( + id=fact.id, + session_id=fact.session_id, + text=fact.text, + source_type=fact.source_type, # type: ignore[arg-type] + source_ref=fact.source_ref, + source_summary=fact.source_summary, + created_by=fact.created_by, + created_at=fact.created_at, + updated_at=fact.updated_at, + editable=fact.source_type in _EDITABLE_SOURCE_TYPES, + ) + + +async def _load_session_or_404(db: AsyncSession, session_id: UUID) -> AISession: + """Load the session via RLS-scoped SELECT. Returns 404 if missing/cross-tenant. + + Tenant isolation: RLS on `ai_sessions` filters by current account, so a + cross-tenant access returns no rows and we 404 (rather than 403, which + would leak the row's existence). + """ + result = await db.execute(select(AISession).where(AISession.id == session_id)) + session = result.scalar_one_or_none() + if session is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Session not found") + return session + + +async def _load_fact_or_404( + db: AsyncSession, session_id: UUID, fact_id: UUID +) -> SessionFact: + """Load a non-deleted fact for the session. 404 if missing or already deleted.""" + result = await db.execute( + select(SessionFact).where( + SessionFact.id == fact_id, + SessionFact.session_id == session_id, + SessionFact.deleted_at.is_(None), + ) + ) + fact = result.scalar_one_or_none() + if fact is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Fact not found") + return fact + + +# ── List ── + +@router.get("/facts", response_model=SessionFactListResponse) +async def list_facts( + session_id: UUID, + current_user: Annotated[User, Depends(get_current_active_user)], + db: Annotated[AsyncSession, Depends(get_db)], + _: None = Depends(require_engineer_or_admin), +) -> SessionFactListResponse: + """List facts for a session, oldest first.""" + await _load_session_or_404(db, session_id) + facts = await list_facts_for_session(db, session_id) + return SessionFactListResponse(facts=[_to_response(f) for f in facts]) + + +# ── Create (manual user note) ── + +@router.post("/facts", response_model=SessionFactResponse, status_code=201) +async def create_fact( + session_id: UUID, + body: SessionFactCreateRequest, + current_user: Annotated[User, Depends(get_current_active_user)], + db: Annotated[AsyncSession, Depends(get_db)], + _: None = Depends(require_engineer_or_admin), +) -> SessionFactResponse: + """Create a manual fact (the "+ Add a note" UI affordance). + + Always recorded as `source_type=user_note`. Source-typed creation goes + through `/facts/promote` so the originating item ID is captured. + """ + session = await _load_session_or_404(db, session_id) + service = FactSynthesisService(db) + try: + fact = await service.create_fact( + session_id=session.id, + account_id=session.account_id, + user_id=current_user.id, + source_type="user_note", + text=body.text, + summary=body.summary, + source_ref=None, + ) + except ValueError as e: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) + await db.commit() + await db.refresh(fact) + return _to_response(fact) + + +# ── Update ── + +@router.patch("/facts/{fact_id}", response_model=SessionFactResponse) +async def update_fact( + session_id: UUID, + fact_id: UUID, + body: SessionFactUpdateRequest, + current_user: Annotated[User, Depends(get_current_active_user)], + db: Annotated[AsyncSession, Depends(get_db)], + _: None = Depends(require_engineer_or_admin), +) -> SessionFactResponse: + """Edit fact text or summary. + + Returns 403 for `question` and `diagnostic_check`-sourced facts: the + source item is the canonical input, so editing the fact card would + desync the two. Engineers edit the source instead. + """ + fact = await _load_fact_or_404(db, session_id, fact_id) + if fact.source_type not in _EDITABLE_SOURCE_TYPES: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail=( + f"Facts sourced from {fact.source_type!r} are read-only at the " + "card level. Edit the originating question or diagnostic check instead." + ), + ) + + if body.text is None and body.summary is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="At least one of `text` or `summary` must be provided", + ) + + service = FactSynthesisService(db) + try: + fact = await service.update_fact(fact, text=body.text, summary=body.summary) + except ValueError as e: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) + await db.commit() + await db.refresh(fact) + return _to_response(fact) + + +# ── Soft delete ── + +@router.delete("/facts/{fact_id}", status_code=204) +async def delete_fact( + session_id: UUID, + fact_id: UUID, + current_user: Annotated[User, Depends(get_current_active_user)], + db: Annotated[AsyncSession, Depends(get_db)], + _: None = Depends(require_engineer_or_admin), +) -> None: + """Soft-delete a fact. All source types are deletable. + + Soft delete (rather than hard) preserves provenance for audit and lets + accidental deletes be recovered if needed. The `editable` flag does NOT + control deletion — even read-only facts can be removed when the + underlying question/check turned out to be wrong. + """ + fact = await _load_fact_or_404(db, session_id, fact_id) + service = FactSynthesisService(db) + await service.soft_delete_fact(fact) + await db.commit() + + +# ── Promote (AI marker + engineer-driven) ── + +@router.post("/facts/promote", response_model=SessionFactResponse, status_code=201) +async def promote_fact( + session_id: UUID, + body: SessionFactPromoteRequest, + current_user: Annotated[User, Depends(get_current_active_user)], + db: Annotated[AsyncSession, Depends(get_db)], + _: None = Depends(require_engineer_or_admin), +) -> SessionFactResponse: + """Convert a question answer / check result into a fact. + + Two modes: + + - `proposed_text` provided → persisted as-is. + - `raw_input` provided → server drafts text/summary via FactSynthesisService. + + Exactly one of the two must be set. The engineer-facing UI typically uses + `proposed_text` after letting the engineer review/edit a draft. + """ + if (body.proposed_text is None) == (body.raw_input is None): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Exactly one of `proposed_text` or `raw_input` must be provided", + ) + if body.source_type == "ai_synthesis" and body.source_ref is not None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="`source_ref` must be null for source_type=ai_synthesis", + ) + + session = await _load_session_or_404(db, session_id) + service = FactSynthesisService(db) + + text = body.proposed_text + summary = body.proposed_summary + if text is None: + # Synthesize via LLM. Caller must hint which task-lane item the input + # came from so we can shape the prompt appropriately. + raw = body.raw_input or "" + if body.source_type == "question": + draft = await service.synthesize_from_question( + question_text=_lookup_task_lane_text(session, body.source_ref, "questions"), + raw_answer=raw, + ) + elif body.source_type == "diagnostic_check": + draft = await service.synthesize_from_check( + check_label=_lookup_task_lane_text(session, body.source_ref, "actions"), + check_output=raw, + ) + else: + # ai_synthesis with raw_input: the raw input IS the synthesis. + # Re-run through the question synthesizer with an empty question + # so the conservative prompt still applies. + draft = await service.synthesize_from_question( + question_text="(none — synthesizing from engineer summary)", + raw_answer=raw, + ) + text = draft["text"] + summary = summary or draft["summary"] + if not text: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail=( + "Synthesizer found no substantive fact in the input. " + "Edit the input or supply `proposed_text` directly." + ), + ) + + try: + fact = await service.create_fact( + session_id=session.id, + account_id=session.account_id, + user_id=current_user.id, + source_type=body.source_type, + text=text, + summary=summary, + source_ref=body.source_ref, + ) + except ValueError as e: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) + + await db.commit() + await db.refresh(fact) + return _to_response(fact) + + +def _lookup_task_lane_text( + session: AISession, source_ref: UUID | None, list_key: str +) -> str: + """Find the originating question text / action label from pending_task_lane. + + Falls back to a generic placeholder if the source item is no longer in + the lane (e.g., the AI dropped it from a later turn). The synthesizer is + forgiving — an empty/generic question still produces a useful fact when + the engineer's answer is substantive on its own. + """ + if source_ref is None: + return "" + lane = session.pending_task_lane or {} + items = lane.get(list_key) or [] + sref = str(source_ref) + for item in items: + if isinstance(item, dict) and str(item.get("id")) == sref: + return str(item.get("text") or item.get("label") or "") + return "" diff --git a/backend/app/api/router.py b/backend/app/api/router.py index 349f5969..67427df4 100644 --- a/backend/app/api/router.py +++ b/backend/app/api/router.py @@ -41,6 +41,7 @@ from app.api.endpoints import ( scripts, script_builder, session_branches, + session_facts, session_handoffs, session_resolutions, sessions, @@ -135,6 +136,9 @@ api_router.include_router(network_diagrams.router, dependencies=_tenant_deps) # session_handoffs queue router must come before ai_sessions to avoid conflict api_router.include_router(session_handoffs.queue_router, dependencies=_tenant_deps) api_router.include_router(session_resolutions.router, dependencies=_tenant_deps) +# session_facts mounts under /ai-sessions/{id}/facts — register before ai_sessions +# so the {session_id}/facts subpaths take precedence over any future generic catchalls. +api_router.include_router(session_facts.router, dependencies=_tenant_deps) api_router.include_router(ai_sessions.router, dependencies=_tenant_deps) api_router.include_router(flow_proposals.router, dependencies=_tenant_deps) api_router.include_router(flowpilot_analytics.router, dependencies=_tenant_deps) diff --git a/backend/app/core/config.py b/backend/app/core/config.py index 90db6f83..2b780129 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -129,6 +129,11 @@ class Settings(BaseSettings): "kb_convert": "standard", "script_build": "standard", "network_diagram_generate": "standard", + # FlowPilot migration Phase 2 — short, latency-sensitive transformation + # of an engineer's answer/check output into a candidate fact. + # Doc Section 6.6 sets Haiku as the default; instrumentation tracks + # disputed_fact_rate so we can escalate to Sonnet if quality drops. + "fact_synthesis": "fast", } def get_model_for_action(self, action_type: str) -> str: diff --git a/backend/app/schemas/session_fact.py b/backend/app/schemas/session_fact.py new file mode 100644 index 00000000..daede6a6 --- /dev/null +++ b/backend/app/schemas/session_fact.py @@ -0,0 +1,81 @@ +"""Pydantic schemas for the FlowPilot "What we know" session facts. + +See FLOWPILOT-MIGRATION.md Section 4.2 for the data model rationale. +""" +from __future__ import annotations + +from datetime import datetime +from typing import Literal +from uuid import UUID + +from pydantic import BaseModel, Field + +# AI-emittable source types are a subset (`user_note` is engineer-only). +AIEmittableSourceType = Literal["question", "diagnostic_check", "ai_synthesis"] +SourceType = Literal["question", "diagnostic_check", "user_note", "ai_synthesis"] + + +class SessionFactResponse(BaseModel): + """A single fact card in the What-we-know panel.""" + id: UUID + session_id: UUID + text: str + source_type: SourceType + source_ref: UUID | None + source_summary: str | None + created_by: UUID + created_at: datetime + updated_at: datetime + # `editable` is computed server-side so the client doesn't have to + # re-encode the editability rule. It mirrors the PATCH endpoint's + # 403 policy: only user_note and ai_synthesis facts are editable. + editable: bool + + model_config = {"from_attributes": False} + + +class SessionFactListResponse(BaseModel): + facts: list[SessionFactResponse] + + +class SessionFactCreateRequest(BaseModel): + """Engineer-created manual fact (the "+ Add a note" affordance). + + The endpoint hard-codes source_type="user_note" — manual creation cannot + spoof a question/check origin. Source-type-bound creation goes through + `/promote` instead. + """ + text: str = Field(..., min_length=1, max_length=2000) + summary: str | None = Field(None, max_length=200) + + +class SessionFactUpdateRequest(BaseModel): + """Edit an existing fact's text or summary. + + The endpoint returns 403 when the fact's source_type is `question` or + `diagnostic_check` — those facts must be edited at the source item. + """ + text: str | None = Field(None, min_length=1, max_length=2000) + summary: str | None = Field(None, max_length=200) + + +class SessionFactPromoteRequest(BaseModel): + """Promote a question answer / check result into a fact. + + Two modes: + - **Direct**: caller provides `proposed_text` (and optionally `proposed_summary`). + The fact is persisted as-is. Used by the AI [PROMOTE] marker path and by the + engineer's "edit then save" affordance. + - **Synthesize**: caller provides `raw_input` (the engineer's typed answer or + the check output) and the server drafts `text`/`summary` via the + FactSynthesisService. The draft is persisted immediately for now — + the supervisor-staging review is a future enhancement (out of scope per + Section 12). + + Exactly one of `proposed_text` or `raw_input` must be set. + """ + source_type: AIEmittableSourceType + source_ref: UUID | None = None + proposed_text: str | None = Field(None, min_length=1, max_length=2000) + proposed_summary: str | None = Field(None, max_length=200) + raw_input: str | None = Field(None, min_length=1, max_length=10_000) diff --git a/backend/app/services/assistant_chat_service.py b/backend/app/services/assistant_chat_service.py index 23c9bed1..2720b74c 100644 --- a/backend/app/services/assistant_chat_service.py +++ b/backend/app/services/assistant_chat_service.py @@ -62,9 +62,12 @@ Every response you write MUST follow this exact structure: 1. **1-3 sentences of analysis** (what the symptoms tell you) 2. **[QUESTIONS] marker** with 1-3 questions for the engineer (if you need info) 3. **[ACTIONS] marker** with 1-4 diagnostic commands to run (if applicable) +4. **[PROMOTE] marker(s)** when the engineer's most recent message confirmed a fact \ +worth recording (optional; see "Promoting facts" below) You MUST include at least one marker ([QUESTIONS] or [ACTIONS]) in every response. \ -A response with only prose and no markers is INVALID and will break the UI. +A response with only prose and no markers is INVALID and will break the UI. \ +[PROMOTE] is optional and IN ADDITION to the required markers, never a replacement. ### Complete example of a correct first response: @@ -112,6 +115,50 @@ information is no longer needed to resolve the issue. Default to keeping them. **Both markers are stripped from display** — the engineer sees them as interactive UI cards, \ not raw JSON. Put analysis BEFORE markers. Markers go at the END of your response. +## Promoting facts to "What we know" + +The engineer has a "What we know" panel that holds confirmed facts about this \ +session. Each confirmed fact stays visible to the engineer for the rest of the \ +session and feeds the resolution note posted to the customer ticket. Surface \ +facts there using a `[PROMOTE]` marker. + +**When to emit [PROMOTE]:** +- The engineer just answered a [QUESTIONS] item with a substantive answer that \ +rules something in or out +- The engineer just shared diagnostic-check output that confirmed a finding +- You synthesized a new conclusion from two or more prior facts + +**When NOT to emit [PROMOTE]:** +- The engineer's answer was "unknown", "I don't know", or a clarifying question \ +back to you +- The diagnostic output was empty, errored, or inconclusive +- You're re-stating something already in What we know +- The "fact" is your own hypothesis, not something the engineer confirmed + +**[PROMOTE] marker format:** +Each fact is its own block. You may emit multiple blocks per response. + +[PROMOTE] +{"source_type": "question", "source_ref": "", "text": "", "summary": "<3-7 word provenance label, e.g. 'rules out tenant/license'>"} +[/PROMOTE] + +- `source_type` is one of: `"question"` (fact derived from a question's answer), \ +`"diagnostic_check"` (fact derived from a check's output), or `"ai_synthesis"` \ +(you combined prior facts). +- `source_ref` is the `id` field of the originating task-lane item — the \ +[QUESTIONS] and [ACTIONS] payloads you receive in conversation context include \ +an `id` for each item. Copy that UUID verbatim. For `ai_synthesis`, OMIT \ +`source_ref` (or set it to null). +- `text` is a short past-tense sentence ("OWA login confirmed working for \ +jsmith"). Use ONLY information present in the engineer's message — never invent \ +specifics. +- `summary` names the diagnostic value (what the fact rules in or out), 3-7 \ +words, no period. + +**Strict rule:** [PROMOTE] is for confirmed facts only. If you're not certain \ +the engineer's message confirms the fact, do not emit a [PROMOTE]. Hallucinated \ +facts get posted to customer tickets and will erode trust in the system. + ## Using the Team's Flow Library Your team has built troubleshooting flows in ResolutionFlow. When relevant flows \ appear in the context below, reference them by name so the engineer can launch them \ @@ -182,6 +229,9 @@ No exceptions. Not even when forking. A response without at least one of these m will crash the UI. If you are unsure, include both. The markers are REQUIRED output, not optional. If any tasks in the engineer's message are marked `_(not yet completed)_`, re-include them \ in your markers unless you are ≥75% confident that information is no longer relevant. +[PROMOTE] markers are OPTIONAL and IN ADDITION to the required ones — emit them only \ +when the engineer's most recent message confirmed something worth recording, and copy \ +the originating item's `id` into `source_ref` verbatim. """ diff --git a/backend/app/services/fact_synthesis_service.py b/backend/app/services/fact_synthesis_service.py new file mode 100644 index 00000000..993048c4 --- /dev/null +++ b/backend/app/services/fact_synthesis_service.py @@ -0,0 +1,285 @@ +"""FactSynthesisService — converts engineer answers and check output into facts. + +Two paths feed this service: + +1. **AI marker path (the common case).** When the model emits a `[PROMOTE]` + marker in the chat stream, `unified_chat_service` parses the marker (which + already contains the engineer-readable `text` and short provenance `summary`) + and calls `create_fact` directly. No LLM call is needed — the model already + wrote the fact. + +2. **Engineer-driven synthesize path.** The "+ Promote to What we know" affordance + in the UI sends a raw answer or check output and asks the server to draft + `text` + `summary` for review. `synthesize_from_question` / + `synthesize_from_check` make a small Haiku call for that draft. The engineer + confirms (or edits) before persistence, so the LLM output is never + silently posted to a customer ticket. + +Either way, persistence funnels through `create_fact`, which ALSO bumps +`ai_sessions.state_version` so the resolution-note preview cache invalidates +(see FLOWPILOT-MIGRATION.md Section 5.5). + +Model tier is `fact_synthesis` in `settings.ACTION_MODEL_MAP` (Haiku per +Section 6.6). MCP is intentionally disabled for synthesis — these are +pure transformations of input, not research calls. +""" +from __future__ import annotations + +import json +import logging +import re +from typing import Any +from uuid import UUID + +from sqlalchemy import select, update +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.ai_provider import get_ai_provider +from app.core.config import settings +from app.models.ai_session import AISession +from app.models.session_fact import SessionFact + +logger = logging.getLogger(__name__) + + +# Conservative synthesis prompt. Hallucinated specifics are a trust-killer +# because facts feed the resolution note posted to customer tickets — the +# prompt makes "no fact" an explicit, valid output. +_SYNTHESIS_SYSTEM_PROMPT = """\ +You convert one engineer answer or one diagnostic-check output into a single \ +candidate fact for a troubleshooting session's "What we know" log. + +Return strict JSON with this shape: +{ + "text": "", + "summary": "<3-7 word provenance label, e.g. 'rules out tenant/license'>" +} + +If the answer/output does NOT contain a substantive fact (e.g. the engineer \ +typed 'unknown', the command failed, the output is empty), return: +{ + "text": null, + "summary": null +} + +Strict rules: +- Use ONLY information present in the input. Never add details that were not stated. +- Do not speculate, infer causes, or extrapolate. State only what the input proves. +- The text is a fact a colleague could verify by looking at the original answer/output. +- The summary names the diagnostic value (what this fact rules in or out), not the topic. +- Output ONLY the JSON object, no prose, no markdown fences. +""" + + +class FactSynthesisService: + """Persists session facts and (optionally) drafts them via an LLM call. + + Methods that touch the database take an `AsyncSession` and assume the + caller commits. `create_fact` flushes so the returned row has a primary key. + """ + + def __init__(self, db: AsyncSession) -> None: + self.db = db + + # ── Persistence ──────────────────────────────────────────────────────── + + async def create_fact( + self, + *, + session_id: UUID, + account_id: UUID, + user_id: UUID, + source_type: str, + text: str, + summary: str | None = None, + source_ref: UUID | None = None, + ) -> SessionFact: + """Persist a fact and bump the session's preview-cache version. + + `source_ref` MUST be None for `user_note` and `ai_synthesis` sources; + for `question` and `diagnostic_check` it should point at the stable + UUID of the originating task-lane item. The DB has no FK constraint + on `source_ref` (the target lives inside JSONB) — integrity is enforced + here. + """ + if source_type not in ("question", "diagnostic_check", "user_note", "ai_synthesis"): + raise ValueError(f"Invalid source_type: {source_type}") + + if source_type in ("user_note", "ai_synthesis") and source_ref is not None: + # `source_ref` is a back-pointer to a question/check; user notes + # and AI-synthesized facts have no source item to point at. + raise ValueError( + f"source_ref must be None for source_type={source_type}" + ) + + text = (text or "").strip() + if not text: + raise ValueError("Fact text cannot be empty") + + fact = SessionFact( + session_id=session_id, + account_id=account_id, + text=text, + source_type=source_type, + source_ref=source_ref, + source_summary=(summary or "").strip() or None, + created_by=user_id, + ) + self.db.add(fact) + + # Invalidate any preview cached against the prior state_version. + # Single UPDATE so the bump is atomic relative to the fact insert + # within this transaction; concurrent writers serialize on the row. + await self.db.execute( + update(AISession) + .where(AISession.id == session_id) + .values(state_version=AISession.state_version + 1) + ) + await self.db.flush() + return fact + + async def soft_delete_fact(self, fact: SessionFact) -> None: + """Mark a fact deleted and bump state_version.""" + from datetime import datetime, timezone + + fact.deleted_at = datetime.now(timezone.utc) + await self.db.execute( + update(AISession) + .where(AISession.id == fact.session_id) + .values(state_version=AISession.state_version + 1) + ) + await self.db.flush() + + async def update_fact( + self, + fact: SessionFact, + *, + text: str | None = None, + summary: str | None = None, + ) -> SessionFact: + """Update an editable fact and bump state_version. + + Caller is responsible for the editability check — only `user_note` + and `ai_synthesis` facts may be edited at the card level. The + endpoint enforces this and returns 403 for the read-only types. + """ + if text is not None: + stripped = text.strip() + if not stripped: + raise ValueError("Fact text cannot be empty") + fact.text = stripped + if summary is not None: + fact.source_summary = summary.strip() or None + + await self.db.execute( + update(AISession) + .where(AISession.id == fact.session_id) + .values(state_version=AISession.state_version + 1) + ) + await self.db.flush() + return fact + + # ── LLM-backed drafting ──────────────────────────────────────────────── + + async def synthesize_from_question( + self, *, question_text: str, raw_answer: str + ) -> dict[str, str | None]: + """Draft `{text, summary}` from a question + engineer's free-text answer. + + Returns `{"text": None, "summary": None}` when the answer doesn't + contain a substantive fact — caller should not persist in that case. + """ + return await self._synthesize( + user_input=( + f"Question asked: {question_text.strip()}\n" + f"Engineer's answer: {raw_answer.strip()}" + ), + ) + + async def synthesize_from_check( + self, *, check_label: str, check_output: str + ) -> dict[str, str | None]: + """Draft `{text, summary}` from a diagnostic check label + its output.""" + return await self._synthesize( + user_input=( + f"Diagnostic check: {check_label.strip()}\n" + f"Output:\n{check_output.strip()}" + ), + ) + + async def _synthesize(self, *, user_input: str) -> dict[str, str | None]: + """Single Haiku call with the conservative synthesis prompt.""" + model = settings.get_model_for_action("fact_synthesis") + provider = get_ai_provider(model=model) + + # Cache the system prompt — it's identical across every synthesis call. + system_blocks: list[dict[str, Any]] = [ + { + "type": "text", + "text": _SYNTHESIS_SYSTEM_PROMPT, + "cache_control": {"type": "ephemeral"}, + # cacheable: identical across all fact-synthesis calls + }, + ] + + try: + text, _in, _out = await provider.generate_json( + system_prompt=system_blocks, + messages=[{"role": "user", "content": user_input}], + max_tokens=200, + ) + except Exception: + logger.exception("Fact synthesis LLM call failed") + return {"text": None, "summary": None} + + return self._parse_synthesis_response(text) + + @staticmethod + def _parse_synthesis_response(raw: str) -> dict[str, str | None]: + """Tolerant parse: strip fences, accept null fields, ignore extras.""" + cleaned = raw.strip() + if cleaned.startswith("```"): + cleaned = re.sub(r"^```(?:json)?\s*", "", cleaned) + cleaned = re.sub(r"\s*```$", "", cleaned) + + try: + data = json.loads(cleaned) + except (json.JSONDecodeError, ValueError): + logger.warning("Fact synthesis returned non-JSON: %r", raw[:200]) + return {"text": None, "summary": None} + + if not isinstance(data, dict): + return {"text": None, "summary": None} + + text = data.get("text") + summary = data.get("summary") + if text is not None and not isinstance(text, str): + text = None + if summary is not None and not isinstance(summary, str): + summary = None + + # Treat empty strings the same as null — "no substantive fact". + if isinstance(text, str) and not text.strip(): + text = None + if isinstance(summary, str) and not summary.strip(): + summary = None + + return {"text": text, "summary": summary} + + +async def list_facts_for_session( + db: AsyncSession, session_id: UUID +) -> list[SessionFact]: + """List non-deleted facts for a session, oldest first. + + RLS filters by tenant; the explicit account_id check is unnecessary here. + """ + result = await db.execute( + select(SessionFact) + .where( + SessionFact.session_id == session_id, + SessionFact.deleted_at.is_(None), + ) + .order_by(SessionFact.created_at.asc()) + ) + return list(result.scalars().all()) diff --git a/backend/app/services/unified_chat_service.py b/backend/app/services/unified_chat_service.py index 4ed397ef..ee95ceff 100644 --- a/backend/app/services/unified_chat_service.py +++ b/backend/app/services/unified_chat_service.py @@ -3,10 +3,19 @@ Replaces assistant_chat_service for new chat sessions. Messages are stored in ai_sessions.conversation_messages JSONB. Reuses the same AI calling infrastructure and system prompt from assistant_chat_service. + +## Markers parsed here +- `[QUESTIONS]` / `[ACTIONS]` — task-lane items shown to the engineer +- `[FORK]` — diagnostic forking, creates SessionBranch rows +- `[PROMOTE]` (Phase 2) — surfaces a fact to the What-we-know section. + Items in pending_task_lane carry stable UUIDs (assigned here) so PROMOTE + source_refs survive across turns even when the model re-emits the same + question/action. """ import json import logging import re +import uuid as _uuid from typing import Any from uuid import UUID @@ -19,6 +28,7 @@ from app.services.assistant_chat_service import ( _call_ai, _auto_title, ) +from app.services.fact_synthesis_service import FactSynthesisService from app.services.rag_service import search as rag_search, build_rag_context, extract_suggested_flows logger = logging.getLogger(__name__) @@ -147,6 +157,176 @@ def _parse_questions_marker(ai_content: str) -> tuple[str, list[dict[str, Any]] return cleaned, valid_questions +def _parse_promote_marker(ai_content: str) -> tuple[str, list[dict[str, Any]] | None]: + """Extract one or more [PROMOTE]...[/PROMOTE] JSON blocks from AI response. + + Each block contains a JSON object describing a candidate fact: + {"source_type": "question"|"diagnostic_check"|"ai_synthesis", + "source_ref": "" | null, + "text": "", + "summary": ""} + + Returns (cleaned_content, list_of_items_or_None). All matched blocks are + stripped from display text. Invalid items are dropped silently with a + warning — a malformed PROMOTE should never break the chat response. + + Per FLOWPILOT-MIGRATION.md Section 8.1, the model emits text + summary + inline so no LLM round-trip is needed to persist the fact. + """ + blocks = list(re.finditer(r"\[PROMOTE\]\s*([\s\S]*?)\s*\[/PROMOTE\]", ai_content)) + if not blocks: + return ai_content, None + + items: list[dict[str, Any]] = [] + for m in blocks: + raw = m.group(1).strip() + if raw.startswith("```"): + raw = re.sub(r"^```(?:json)?\s*", "", raw) + raw = re.sub(r"\s*```$", "", raw) + try: + data = json.loads(raw) + except (json.JSONDecodeError, ValueError) as e: + logger.warning("Failed to parse [PROMOTE] block: %s", e) + continue + + if not isinstance(data, dict): + logger.warning("[PROMOTE] block must be a JSON object, got %s", type(data).__name__) + continue + + source_type = data.get("source_type") + text = (data.get("text") or "").strip() + summary = (data.get("summary") or "").strip() or None + source_ref_raw = data.get("source_ref") + + if source_type not in ("question", "diagnostic_check", "ai_synthesis"): + # `user_note` is engineer-only, not an AI-emittable type. + logger.warning("Invalid [PROMOTE] source_type=%r, skipping", source_type) + continue + if not text: + logger.warning("[PROMOTE] block missing text, skipping") + continue + + source_ref: UUID | None = None + if source_ref_raw: + try: + source_ref = UUID(str(source_ref_raw)) + except (ValueError, AttributeError): + logger.warning("[PROMOTE] source_ref %r is not a valid UUID, dropping ref", source_ref_raw) + source_ref = None + + # `ai_synthesis` must NEVER carry a source_ref (no question/check item + # to point at) — surface mistakes from the model rather than tripping + # the FactSynthesisService validation later. + if source_type == "ai_synthesis": + source_ref = None + + items.append({ + "source_type": source_type, + "source_ref": source_ref, + "text": text, + "summary": summary, + }) + + # Strip all PROMOTE blocks from display content — engineers see facts in + # the What-we-know panel, not as raw markers in the chat. + cleaned = re.sub(r"\[PROMOTE\]\s*[\s\S]*?\s*\[/PROMOTE\]", "", ai_content).strip() + + return cleaned, items or None + + +def _assign_stable_task_lane_ids( + prev_lane: dict[str, Any] | None, + questions: list[dict[str, Any]] | None, + actions: list[dict[str, Any]] | None, +) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: + """Assign stable UUIDs to task-lane items, preserving them across turns. + + The model often re-emits the same question/action across multiple turns + (it is told to keep `_(not yet completed)_` items alive). When the + question text matches a prior turn's, we keep the prior UUID so any + `session_facts.source_ref` pointing at it stays valid. + + Match key: + - Questions: exact `text` + - Actions: exact `label` + + Returns the questions/actions lists augmented with an `id` field. + """ + prev_questions = (prev_lane or {}).get("questions") or [] + prev_actions = (prev_lane or {}).get("actions") or [] + + prev_q_ids: dict[str, str] = { + str(q.get("text") or "").strip(): str(q["id"]) + for q in prev_questions + if isinstance(q, dict) and q.get("id") and q.get("text") + } + prev_a_ids: dict[str, str] = { + str(a.get("label") or "").strip(): str(a["id"]) + for a in prev_actions + if isinstance(a, dict) and a.get("id") and a.get("label") + } + + out_questions: list[dict[str, Any]] = [] + for q in questions or []: + text = str(q.get("text") or "").strip() + existing = prev_q_ids.get(text) if text else None + out_questions.append({ + **q, + "id": existing or str(_uuid.uuid4()), + }) + + out_actions: list[dict[str, Any]] = [] + for a in actions or []: + label = str(a.get("label") or "").strip() + existing = prev_a_ids.get(label) if label else None + out_actions.append({ + **a, + "id": existing or str(_uuid.uuid4()), + }) + + return out_questions, out_actions + + +async def _persist_promote_items( + *, + db: AsyncSession, + session: AISession, + user_id: UUID, + items: list[dict[str, Any]], +) -> None: + """Persist parsed [PROMOTE] items as session_facts. Failures are logged. + + A malformed PROMOTE must never break the chat response — the engineer + still gets the AI's analysis; the missing fact can be added manually. + """ + if not items: + return + service = FactSynthesisService(db) + for item in items: + try: + await service.create_fact( + session_id=session.id, + account_id=session.account_id, + user_id=user_id, + source_type=item["source_type"], + text=item["text"], + summary=item["summary"], + source_ref=item["source_ref"], + ) + except ValueError: + # Validation failure (e.g. empty text after strip, or + # source_ref-on-ai_synthesis race). Log and continue — losing + # one fact is better than aborting the whole chat turn. + logger.warning( + "Skipping invalid PROMOTE item for session %s: %r", + session.id, item, exc_info=True, + ) + except Exception: + logger.exception( + "Failed to persist PROMOTE item for session %s", session.id + ) + + async def create_chat_session( user_id: UUID, account_id: UUID, @@ -251,10 +431,11 @@ async def send_chat_message( if session.status == "paused": session.status = "active" - # Check for fork, actions, and questions markers in branch response too + # Check for fork, actions, questions, and promote markers in branch response too branch_display, branch_fork_data = _parse_fork_marker(ai_content) branch_display, branch_actions_data = _parse_actions_marker(branch_display) branch_display, branch_questions_data = _parse_questions_marker(branch_display) + branch_display, branch_promote_items = _parse_promote_marker(branch_display) if branch_display != ai_content: # Store stripped content in branch history msgs[-1] = {"role": "assistant", "content": branch_display} @@ -288,15 +469,30 @@ async def send_chat_message( except Exception: logger.exception("Failed to create fork within branch for session %s", session.id) - # Persist task lane state on session + # Persist task lane state on session — assign stable UUIDs so any + # PROMOTE marker emitted later can reference the same items. if branch_questions_data or branch_actions_data: + stable_qs, stable_as = _assign_stable_task_lane_ids( + session.pending_task_lane, + branch_questions_data, + branch_actions_data, + ) session.pending_task_lane = { - "questions": branch_questions_data or [], - "actions": branch_actions_data or [], + "questions": stable_qs, + "actions": stable_as, } else: session.pending_task_lane = None + # Persist any PROMOTE items emitted in this turn. Done AFTER the + # task-lane write so source_refs to brand-new items would still + # land on persisted UUIDs (the model can also reference IDs from + # the previous turn, which were already persisted). + if branch_promote_items: + await _persist_promote_items( + db=db, session=session, user_id=user_id, items=branch_promote_items, + ) + suggested_flows = extract_suggested_flows( await rag_search(query=message, account_id=account_id, db=db, limit=8) ) @@ -343,9 +539,13 @@ async def send_chat_message( # Check for questions marker in AI response display_content, questions_data = _parse_questions_marker(display_content) + # Check for promote markers — facts the AI is surfacing to What we know. + display_content, promote_items = _parse_promote_marker(display_content) + logger.info( - "Marker parsing results — actions: %s, questions: %s, fork: %s, raw_length: %d, display_length: %d", + "Marker parsing results — actions: %s, questions: %s, fork: %s, promote: %d, raw_length: %d, display_length: %d", bool(actions_data), bool(questions_data), bool(fork_data), + len(promote_items or []), len(ai_content), len(display_content), ) @@ -410,15 +610,26 @@ async def send_chat_message( logger.exception("Failed to create fork for session %s", session_id) # Fork failed but chat message still sent — don't break the response - # Persist task lane state on session + # Persist task lane state on session — assign stable UUIDs so any PROMOTE + # marker (this turn or a later one) can reference the same items. if questions_data or actions_data: + stable_qs, stable_as = _assign_stable_task_lane_ids( + session.pending_task_lane, questions_data, actions_data, + ) session.pending_task_lane = { - "questions": questions_data or [], - "actions": actions_data or [], + "questions": stable_qs, + "actions": stable_as, } else: session.pending_task_lane = None + # Persist any PROMOTE items emitted in this turn. Done after task-lane + # assignment so source_refs the model invented this turn already exist. + if promote_items: + await _persist_promote_items( + db=db, session=session, user_id=user_id, items=promote_items, + ) + suggested_flows = extract_suggested_flows(rag_results) return display_content, suggested_flows, session, fork_metadata, actions_data, questions_data diff --git a/backend/tests/test_session_facts_api.py b/backend/tests/test_session_facts_api.py new file mode 100644 index 00000000..20434ec6 --- /dev/null +++ b/backend/tests/test_session_facts_api.py @@ -0,0 +1,455 @@ +"""API + service tests for the FlowPilot Phase 2 "What we know" facts surface. + +Covers: +- /api/v1/ai-sessions/{id}/facts CRUD +- Editability rule (403 on PATCH for question/diagnostic_check facts) +- /facts/promote with `proposed_text` (no LLM call) and via synthesis (mocked) +- state_version increments on every fact write +- Stable-UUID assignment for pending_task_lane items +- [PROMOTE] marker parser shape +""" +from __future__ import annotations + +import uuid +from unittest.mock import AsyncMock, patch + +import pytest +from httpx import AsyncClient +from sqlalchemy import select + +from app.models.ai_session import AISession +from app.models.session_fact import SessionFact +from app.services.fact_synthesis_service import FactSynthesisService +from app.services.unified_chat_service import ( + _assign_stable_task_lane_ids, + _parse_promote_marker, +) + + +# ── Fixtures ──────────────────────────────────────────────────────────────── + +async def _make_session(test_db, user, *, pending_task_lane=None) -> AISession: + session = AISession( + user_id=user["user_data"]["id"], + account_id=user["user_data"]["account_id"], + session_type="chat", + intake_type="free_text", + intake_content={"text": "test"}, + status="active", + confidence_tier="discovery", + conversation_messages=[], + pending_task_lane=pending_task_lane, + ) + test_db.add(session) + await test_db.commit() + await test_db.refresh(session) + return session + + +# ── [PROMOTE] marker parser ───────────────────────────────────────────────── + +class TestPromoteMarkerParser: + def test_no_marker_returns_unchanged(self): + text = "Just an analysis sentence." + cleaned, items = _parse_promote_marker(text) + assert cleaned == text + assert items is None + + def test_single_block(self): + ref = uuid.uuid4() + text = ( + "Some analysis.\n\n" + f'[PROMOTE]\n{{"source_type":"question","source_ref":"{ref}",' + '"text":"OWA login confirmed working","summary":"rules out tenant"}\n' + "[/PROMOTE]" + ) + cleaned, items = _parse_promote_marker(text) + assert cleaned == "Some analysis." + assert items is not None and len(items) == 1 + assert items[0]["source_type"] == "question" + assert items[0]["source_ref"] == ref + assert items[0]["text"] == "OWA login confirmed working" + assert items[0]["summary"] == "rules out tenant" + + def test_multiple_blocks(self): + text = ( + '[PROMOTE]\n{"source_type":"question","source_ref":null,' + '"text":"a","summary":"x"}\n[/PROMOTE]\n' + '[PROMOTE]\n{"source_type":"diagnostic_check","source_ref":null,' + '"text":"b","summary":"y"}\n[/PROMOTE]' + ) + cleaned, items = _parse_promote_marker(text) + assert items is not None and len(items) == 2 + assert items[0]["text"] == "a" + assert items[1]["text"] == "b" + assert "[PROMOTE]" not in cleaned + + def test_ai_synthesis_strips_source_ref(self): + # The model should not provide source_ref for synthesis facts — + # the parser drops it defensively even if the model does. + ref = uuid.uuid4() + text = ( + f'[PROMOTE]\n{{"source_type":"ai_synthesis","source_ref":"{ref}",' + '"text":"Combined finding","summary":"synth"}\n[/PROMOTE]' + ) + _, items = _parse_promote_marker(text) + assert items is not None and items[0]["source_ref"] is None + + def test_invalid_source_type_dropped(self): + text = ( + '[PROMOTE]\n{"source_type":"bogus","text":"x"}\n[/PROMOTE]\n' + '[PROMOTE]\n{"source_type":"question","source_ref":null,"text":"good"}\n[/PROMOTE]' + ) + _, items = _parse_promote_marker(text) + assert items is not None and len(items) == 1 + assert items[0]["text"] == "good" + + def test_missing_text_dropped(self): + text = '[PROMOTE]\n{"source_type":"question","source_ref":null,"text":""}\n[/PROMOTE]' + _, items = _parse_promote_marker(text) + assert items is None # empty list collapses to None + + def test_invalid_uuid_drops_ref_keeps_item(self): + text = '[PROMOTE]\n{"source_type":"question","source_ref":"not-a-uuid","text":"keep"}\n[/PROMOTE]' + _, items = _parse_promote_marker(text) + assert items is not None and items[0]["source_ref"] is None + assert items[0]["text"] == "keep" + + def test_malformed_json_dropped(self): + text = "[PROMOTE]\nnot json at all\n[/PROMOTE]" + cleaned, items = _parse_promote_marker(text) + assert items is None + # Block is still stripped from display so the engineer doesn't see it. + assert "[PROMOTE]" not in cleaned + + +# ── Stable-UUID assignment ────────────────────────────────────────────────── + +class TestAssignStableTaskLaneIds: + def test_empty_prev_assigns_fresh_uuids(self): + qs, acts = _assign_stable_task_lane_ids( + None, + [{"text": "Q1", "context": "c1"}], + [{"label": "A1", "command": "cmd"}], + ) + assert len(qs) == 1 and uuid.UUID(qs[0]["id"]) + assert len(acts) == 1 and uuid.UUID(acts[0]["id"]) + + def test_prev_uuid_preserved_on_text_match(self): + qid = str(uuid.uuid4()) + prev = { + "questions": [{"id": qid, "text": "Same text"}], + "actions": [], + } + qs, _ = _assign_stable_task_lane_ids(prev, [{"text": "Same text"}], []) + assert qs[0]["id"] == qid + + def test_prev_uuid_replaced_when_text_changes(self): + qid = str(uuid.uuid4()) + prev = {"questions": [{"id": qid, "text": "Original"}], "actions": []} + qs, _ = _assign_stable_task_lane_ids(prev, [{"text": "Different"}], []) + assert qs[0]["id"] != qid + + def test_action_label_match_preserves_uuid(self): + aid = str(uuid.uuid4()) + prev = {"questions": [], "actions": [{"id": aid, "label": "Run X"}]} + _, acts = _assign_stable_task_lane_ids(prev, [], [{"label": "Run X"}]) + assert acts[0]["id"] == aid + + +# ── FactSynthesisService.create_fact validation ───────────────────────────── + +@pytest.mark.asyncio +async def test_create_fact_rejects_source_ref_for_user_note(test_db, test_user): + session = await _make_session(test_db, test_user) + svc = FactSynthesisService(test_db) + with pytest.raises(ValueError, match="source_ref must be None"): + await svc.create_fact( + session_id=session.id, + account_id=session.account_id, + user_id=session.user_id, + source_type="user_note", + text="x", + source_ref=uuid.uuid4(), + ) + + +@pytest.mark.asyncio +async def test_create_fact_rejects_invalid_source_type(test_db, test_user): + session = await _make_session(test_db, test_user) + svc = FactSynthesisService(test_db) + with pytest.raises(ValueError, match="Invalid source_type"): + await svc.create_fact( + session_id=session.id, + account_id=session.account_id, + user_id=session.user_id, + source_type="not_a_type", + text="x", + ) + + +@pytest.mark.asyncio +async def test_create_fact_bumps_state_version(test_db, test_user): + session = await _make_session(test_db, test_user) + initial_version = session.state_version + svc = FactSynthesisService(test_db) + await svc.create_fact( + session_id=session.id, + account_id=session.account_id, + user_id=session.user_id, + source_type="user_note", + text="A confirmed observation", + ) + await test_db.commit() + await test_db.refresh(session) + assert session.state_version == initial_version + 1 + + +# ── Endpoint tests ────────────────────────────────────────────────────────── + +@pytest.mark.asyncio +async def test_list_facts_empty(client: AsyncClient, test_user, auth_headers, test_db): + session = await _make_session(test_db, test_user) + resp = await client.get( + f"/api/v1/ai-sessions/{session.id}/facts", + headers=auth_headers, + ) + assert resp.status_code == 200 + assert resp.json()["facts"] == [] + + +@pytest.mark.asyncio +async def test_create_user_note_fact(client: AsyncClient, test_user, auth_headers, test_db): + session = await _make_session(test_db, test_user) + resp = await client.post( + f"/api/v1/ai-sessions/{session.id}/facts", + headers=auth_headers, + json={"text": "Customer is on a laptop", "summary": "endpoint type"}, + ) + assert resp.status_code == 201 + body = resp.json() + assert body["source_type"] == "user_note" + assert body["editable"] is True + assert body["source_ref"] is None + assert body["text"] == "Customer is on a laptop" + + +@pytest.mark.asyncio +async def test_patch_user_note_succeeds(client: AsyncClient, test_user, auth_headers, test_db): + session = await _make_session(test_db, test_user) + create = await client.post( + f"/api/v1/ai-sessions/{session.id}/facts", + headers=auth_headers, + json={"text": "original"}, + ) + fact_id = create.json()["id"] + + patch_resp = await client.patch( + f"/api/v1/ai-sessions/{session.id}/facts/{fact_id}", + headers=auth_headers, + json={"text": "edited", "summary": "new label"}, + ) + assert patch_resp.status_code == 200 + assert patch_resp.json()["text"] == "edited" + assert patch_resp.json()["source_summary"] == "new label" + + +@pytest.mark.asyncio +async def test_patch_question_fact_returns_403(client: AsyncClient, test_user, auth_headers, test_db): + """Question/check-sourced facts must be edited at the source item, not the card.""" + session = await _make_session(test_db, test_user) + # Insert a question-sourced fact directly so the editability rule applies. + fact = SessionFact( + session_id=session.id, + account_id=session.account_id, + text="Pre-existing question fact", + source_type="question", + source_ref=uuid.uuid4(), + created_by=session.user_id, + ) + test_db.add(fact) + await test_db.commit() + await test_db.refresh(fact) + + resp = await client.patch( + f"/api/v1/ai-sessions/{session.id}/facts/{fact.id}", + headers=auth_headers, + json={"text": "trying to edit"}, + ) + assert resp.status_code == 403 + + +@pytest.mark.asyncio +async def test_delete_fact_soft_deletes(client: AsyncClient, test_user, auth_headers, test_db): + session = await _make_session(test_db, test_user) + create = await client.post( + f"/api/v1/ai-sessions/{session.id}/facts", + headers=auth_headers, + json={"text": "to be removed"}, + ) + fact_id = create.json()["id"] + + del_resp = await client.delete( + f"/api/v1/ai-sessions/{session.id}/facts/{fact_id}", + headers=auth_headers, + ) + assert del_resp.status_code == 204 + + # Listed facts should not include the soft-deleted one. + list_resp = await client.get( + f"/api/v1/ai-sessions/{session.id}/facts", + headers=auth_headers, + ) + assert list_resp.status_code == 200 + assert all(f["id"] != fact_id for f in list_resp.json()["facts"]) + + # Row still exists in DB (deleted_at set), proving it was soft-deleted. + row = ( + await test_db.execute( + select(SessionFact).where(SessionFact.id == uuid.UUID(fact_id)) + ) + ).scalar_one() + assert row.deleted_at is not None + + +@pytest.mark.asyncio +async def test_promote_with_proposed_text(client: AsyncClient, test_user, auth_headers, test_db): + qid = uuid.uuid4() + session = await _make_session( + test_db, test_user, + pending_task_lane={ + "questions": [{"id": str(qid), "text": "Is OWA working?"}], + "actions": [], + }, + ) + resp = await client.post( + f"/api/v1/ai-sessions/{session.id}/facts/promote", + headers=auth_headers, + json={ + "source_type": "question", + "source_ref": str(qid), + "proposed_text": "OWA confirmed working for jsmith", + "proposed_summary": "rules out tenant/license", + }, + ) + assert resp.status_code == 201 + body = resp.json() + assert body["source_type"] == "question" + assert body["source_ref"] == str(qid) + assert body["editable"] is False # question-sourced facts are read-only at the card + + +@pytest.mark.asyncio +async def test_promote_via_synthesis(client: AsyncClient, test_user, auth_headers, test_db): + qid = uuid.uuid4() + session = await _make_session( + test_db, test_user, + pending_task_lane={ + "questions": [{"id": str(qid), "text": "Is the user on a laptop?"}], + "actions": [], + }, + ) + + # Mock the LLM call to avoid hitting the network in tests. + fake_provider = AsyncMock() + fake_provider.generate_json = AsyncMock(return_value=( + '{"text": "User confirmed on a laptop", "summary": "endpoint type"}', + 50, 20, + )) + + with patch( + "app.services.fact_synthesis_service.get_ai_provider", + return_value=fake_provider, + ): + resp = await client.post( + f"/api/v1/ai-sessions/{session.id}/facts/promote", + headers=auth_headers, + json={ + "source_type": "question", + "source_ref": str(qid), + "raw_input": "Yes, it's a Lenovo X1 Carbon", + }, + ) + + assert resp.status_code == 201 + assert resp.json()["text"] == "User confirmed on a laptop" + assert resp.json()["source_summary"] == "endpoint type" + + +@pytest.mark.asyncio +async def test_promote_synthesis_returning_null_returns_422( + client: AsyncClient, test_user, auth_headers, test_db +): + """When the synthesizer judges the input has no fact, the endpoint surfaces 422.""" + qid = uuid.uuid4() + session = await _make_session( + test_db, test_user, + pending_task_lane={ + "questions": [{"id": str(qid), "text": "Is OWA working?"}], + "actions": [], + }, + ) + + fake_provider = AsyncMock() + fake_provider.generate_json = AsyncMock(return_value=( + '{"text": null, "summary": null}', 30, 10, + )) + + with patch( + "app.services.fact_synthesis_service.get_ai_provider", + return_value=fake_provider, + ): + resp = await client.post( + f"/api/v1/ai-sessions/{session.id}/facts/promote", + headers=auth_headers, + json={ + "source_type": "question", + "source_ref": str(qid), + "raw_input": "unknown", + }, + ) + assert resp.status_code == 422 + + +@pytest.mark.asyncio +async def test_promote_rejects_both_or_neither_inputs( + client: AsyncClient, test_user, auth_headers, test_db +): + session = await _make_session(test_db, test_user) + # Neither + resp = await client.post( + f"/api/v1/ai-sessions/{session.id}/facts/promote", + headers=auth_headers, + json={"source_type": "question"}, + ) + assert resp.status_code == 400 + + # Both + resp2 = await client.post( + f"/api/v1/ai-sessions/{session.id}/facts/promote", + headers=auth_headers, + json={ + "source_type": "question", + "proposed_text": "x", + "raw_input": "y", + }, + ) + assert resp2.status_code == 400 + + +@pytest.mark.asyncio +async def test_state_version_bumps_on_create_via_endpoint( + client: AsyncClient, test_user, auth_headers, test_db +): + session = await _make_session(test_db, test_user) + initial = session.state_version + + await client.post( + f"/api/v1/ai-sessions/{session.id}/facts", + headers=auth_headers, + json={"text": "a"}, + ) + + # Reload — refresh fetches the latest persisted row. + await test_db.refresh(session) + assert session.state_version == initial + 1 diff --git a/docs/FlowAssist_Migration/FLOWPILOT-MIGRATION.md b/docs/FlowAssist_Migration/FLOWPILOT-MIGRATION.md index eae00d74..3b9070f2 100644 --- a/docs/FlowAssist_Migration/FLOWPILOT-MIGRATION.md +++ b/docs/FlowAssist_Migration/FLOWPILOT-MIGRATION.md @@ -2,8 +2,8 @@ > **Target:** Transform `/assistant` (ResolutionAssist) into the new unified `/pilot` (FlowPilot) surface. > **Audience:** Claude Code (implementation) and Codex (review) reviewed by Michael (owner). -> **Status:** Phase 0 in progress. Phases 1–7 awaiting Phase 0 completion for the AI-dependent work; Phase 1 can run in parallel. -> **Last updated:** April 17, 2026 (post-Codex plan review, reflects Phase 0 audit findings and in-flight implementation decisions) +> **Status:** Phases 0, 1, and 2 implemented (verification deferred to new dev env). Phase 3 next. +> **Last updated:** April 21, 2026 (Phase 2 — What we know — committed; verification TODO tracked inline in tests/services) --- @@ -30,6 +30,10 @@ This document was originally written against a set of assumptions about the code - **`pending_task_lane` items do not have stable IDs today.** Phase 2 must assign stable UUIDs when questions/checks are first persisted. `session_facts.source_ref` points to those JSON item IDs. - **`account_settings` table did not exist.** Phase 1 creates it with a JSONB `preferences` column; settings live in `preferences` until they need their own column. - **`/tickets/ai-parse` endpoint does not exist.** Phase 0.2 became a doc-only note; no code change. +- **`[PROMOTE]` marker uses JSON, not key:value.** The doc's original example showed + `key: value` lines; implementation uses a JSON object inside each `[PROMOTE]...[/PROMOTE]` + block (matching the `[QUESTIONS]` / `[ACTIONS]` parser pattern). Multi-line text values would + have been ambiguous in the key:value form. Section 8.1 below has been updated. Any further drift found during implementation should be flagged by the implementer and reconciled in this doc before writing code that assumes the drifted spec. @@ -572,18 +576,15 @@ The existing FlowPilot / ResolutionAssist system prompt needs updates to emit th ### 8.1 New marker: `[PROMOTE]` -Used to surface facts to What we know. Syntax: +Used to surface facts to What we know. Syntax — each block contains a single JSON object; multiple blocks may appear in one response: ``` [PROMOTE] -source_type: question -source_ref: {task_lane_item_uuid} -text: OWA login and send/receive confirmed working for jsmith -summary: rules out tenant/license +{"source_type": "question", "source_ref": "{task_lane_item_uuid}", "text": "OWA login and send/receive confirmed working for jsmith", "summary": "rules out tenant/license"} [/PROMOTE] ``` -The AI should emit `[PROMOTE]` blocks in the same message that answers or processes a question/check, so the fact appears in What we know simultaneously with the chat acknowledgment. `source_ref` points to the stable UUID of the task lane item being promoted (assigned in Phase 2). +The AI should emit `[PROMOTE]` blocks in the same message that answers or processes a question/check, so the fact appears in What we know simultaneously with the chat acknowledgment. `source_ref` points to the stable UUID of the task lane item being promoted (assigned in Phase 2). For `source_type: "ai_synthesis"`, omit `source_ref` (or set it to null) — the parser drops it defensively even if the model includes one. ### 8.2 New marker: `[SUGGEST_FIX]` @@ -709,6 +710,11 @@ git commit -m "feat(pilot): rename /assistant to /pilot, add session_facts/sugge - Attempt to PATCH a question-sourced fact → 403. - PATCH a user_note fact → succeeds. +**Verification deferred** — same constraint as Phase 0: no live dev environment was +available at authoring time. Backend pytest suite (`tests/test_session_facts_api.py`) +and the manual scenarios above must run when the dev env is up. Failures should be +treated as normal bugs, not blockers for Phase 3. + ``` git commit -m "feat(pilot): add What we know section with fact synthesis and stable task-lane item IDs" ``` diff --git a/frontend/src/api/sessionFacts.ts b/frontend/src/api/sessionFacts.ts new file mode 100644 index 00000000..fe7d5d71 --- /dev/null +++ b/frontend/src/api/sessionFacts.ts @@ -0,0 +1,89 @@ +/** + * Session facts API — the "What we know" CRUD surface for a FlowPilot session. + * + * Mirrors backend endpoints at `/api/v1/ai-sessions/{id}/facts`. + * See FLOWPILOT-MIGRATION.md Section 5.1. + */ +import apiClient from './client' + +export type SessionFactSourceType = + | 'question' + | 'diagnostic_check' + | 'user_note' + | 'ai_synthesis' + +export interface SessionFact { + id: string + session_id: string + text: string + source_type: SessionFactSourceType + source_ref: string | null + source_summary: string | null + created_by: string + created_at: string + updated_at: string + // Server-computed: false for question/diagnostic_check (PATCH returns 403), + // true for user_note/ai_synthesis. Drives the edit affordance in the UI. + editable: boolean +} + +export interface SessionFactCreateRequest { + text: string + summary?: string | null +} + +export interface SessionFactUpdateRequest { + text?: string | null + summary?: string | null +} + +export interface SessionFactPromoteRequest { + source_type: 'question' | 'diagnostic_check' | 'ai_synthesis' + source_ref?: string | null + proposed_text?: string | null + proposed_summary?: string | null + raw_input?: string | null +} + +export const sessionFactsApi = { + async list(sessionId: string): Promise { + const r = await apiClient.get<{ facts: SessionFact[] }>( + `/ai-sessions/${sessionId}/facts`, + ) + return r.data.facts + }, + + async create(sessionId: string, data: SessionFactCreateRequest): Promise { + const r = await apiClient.post( + `/ai-sessions/${sessionId}/facts`, + data, + ) + return r.data + }, + + async update( + sessionId: string, + factId: string, + data: SessionFactUpdateRequest, + ): Promise { + const r = await apiClient.patch( + `/ai-sessions/${sessionId}/facts/${factId}`, + data, + ) + return r.data + }, + + async remove(sessionId: string, factId: string): Promise { + await apiClient.delete(`/ai-sessions/${sessionId}/facts/${factId}`) + }, + + async promote(sessionId: string, data: SessionFactPromoteRequest): Promise { + const r = await apiClient.post( + `/ai-sessions/${sessionId}/facts/promote`, + data, + ) + return r.data + }, +} + +export default sessionFactsApi diff --git a/frontend/src/components/assistant/TaskLane.tsx b/frontend/src/components/assistant/TaskLane.tsx index c3d9458e..ae4425bc 100644 --- a/frontend/src/components/assistant/TaskLane.tsx +++ b/frontend/src/components/assistant/TaskLane.tsx @@ -38,6 +38,11 @@ interface TaskLaneProps { onSubmit: (responses: TaskResponse[]) => void onClose: () => void loading?: boolean + // Slot for the FlowPilot Phase 2 "What we know" section. Rendered above + // Questions in the body (per FLOWPILOT-MIGRATION.md Section 3.1). The slot + // shape lets the parent own fact-fetching and state-version polling without + // pulling that concern into TaskLane. + whatWeKnowSlot?: React.ReactNode } // ── Storage helpers ── @@ -64,7 +69,7 @@ export function clearTaskState(sessionId: string) { // ── Component ── -export function TaskLane({ questions, actions, sessionId, onSubmit, onClose, loading }: TaskLaneProps) { +export function TaskLane({ questions, actions, sessionId, onSubmit, onClose, loading, whatWeKnowSlot }: TaskLaneProps) { const [tasks, setTasks] = useState(() => { // Try to restore saved state for this session (preserves user's in-progress answers) if (sessionId) { @@ -269,6 +274,9 @@ export function TaskLane({ questions, actions, sessionId, onSubmit, onClose, loa {/* Body */}
+ {/* ── What we know (Phase 2) ── */} + {whatWeKnowSlot} + {/* ── Questions Section ── */} {questionTasks.length > 0 && (
diff --git a/frontend/src/components/pilot/sections/AddNoteButton.tsx b/frontend/src/components/pilot/sections/AddNoteButton.tsx new file mode 100644 index 00000000..67037715 --- /dev/null +++ b/frontend/src/components/pilot/sections/AddNoteButton.tsx @@ -0,0 +1,87 @@ +/** + * "+ Add a note" affordance for the What-we-know section. + * + * Inline composer that posts a `user_note` fact when the engineer wants to + * record something the AI didn't surface (a hunch, an observation, a piece + * of customer context). Per FLOWPILOT-MIGRATION.md Section 3.1. + */ +import { useState } from 'react' +import { Plus, Check } from 'lucide-react' + +interface AddNoteButtonProps { + onAdd: (text: string, summary: string | null) => Promise | void +} + +export function AddNoteButton({ onAdd }: AddNoteButtonProps) { + const [open, setOpen] = useState(false) + const [text, setText] = useState('') + const [summary, setSummary] = useState('') + const [busy, setBusy] = useState(false) + + const reset = () => { + setText('') + setSummary('') + setOpen(false) + } + + const handleSubmit = async () => { + if (!text.trim()) return + setBusy(true) + try { + await onAdd(text.trim(), summary.trim() || null) + reset() + } finally { + setBusy(false) + } + } + + if (!open) { + return ( + + ) + } + + return ( +
+