feat: AI marker system prompt fixes, TaskLane activation, and FlowPilot updates

- Fix system prompt to ensure [QUESTIONS]/[ACTIONS] markers in AI responses - Add format reminder injection to user messages for marker compliance - Wire TaskLane activation in prefill and resume paths - Add ActionCardGroup component for structured question/action rendering - Update FlowPilot session and step card components - Update ai-session schemas and types for marker data Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-26 19:57:39 +00:00
parent 37d217b12a
commit 3c0a29115c
14 changed files with 913 additions and 42 deletions
--- a/backend/app/api/endpoints/ai_sessions.py
+++ b/backend/app/api/endpoints/ai_sessions.py
@@ -287,7 +287,7 @@ async def send_chat_message(
        images = await fetch_upload_images(data.upload_ids, account_id, db) or None

    try:
-        ai_content, suggested_flows, session = await unified_chat_service.send_chat_message(
+        ai_content, suggested_flows, session, fork_metadata, actions_data, questions_data = await unified_chat_service.send_chat_message(
            session_id=session_id,
            user_id=user_id,
            account_id=account_id,
@@ -329,6 +329,9 @@ async def send_chat_message(
    return ChatMessageResponse(
        content=ai_content,
        suggested_flows=suggested_flows,
+        fork=fork_metadata,
+        actions=actions_data,
+        questions=questions_data,
    )


--- a/backend/app/schemas/ai_session.py
+++ b/backend/app/schemas/ai_session.py
@@ -250,10 +250,40 @@ class ChatMessageRequest(BaseModel):
    upload_ids: list[UUID] = Field(default_factory=list, max_length=10)


+class ForkBranchInfo(BaseModel):
+    """Branch info returned when a fork is created."""
+    branch_id: str
+    label: str
+
+
+class ForkMetadata(BaseModel):
+    """Metadata returned when the AI suggests a diagnostic fork."""
+    fork_point_id: str
+    fork_reason: str
+    branches: list[ForkBranchInfo]
+    active_branch_id: str
+
+
+class ActionItem(BaseModel):
+    """A single action item for the engineer."""
+    label: str
+    command: str | None = None
+    description: str = ""
+
+
+class QuestionItem(BaseModel):
+    """A question the AI needs answered by the engineer."""
+    text: str
+    context: str = ""
+
+
 class ChatMessageResponse(BaseModel):
    """AI response to a chat message."""
    content: str
    suggested_flows: list[dict[str, Any]] = []
+    fork: ForkMetadata | None = None
+    actions: list[ActionItem] | None = None
+    questions: list[QuestionItem] | None = None


 class AISessionSearchResult(BaseModel):
--- a/backend/app/services/assistant_chat_service.py
+++ b/backend/app/services/assistant_chat_service.py
@@ -33,28 +33,59 @@ deep expertise across the MSP technology stack:
 - PowerShell scripting and automation
 - Security: MFA, Conditional Access, EDR, backup/DR

-## How to Answer
- **Be direct and actionable.** Engineers are mid-ticket — lead with the fix or next \
-diagnostic step, then explain why in one sentence if helpful. Skip background unless asked.
- **Include specifics.** Exact commands, registry paths, config values, port numbers. \
-Vague advice wastes time.
- **Warn before you wreck.** If a step could cause downtime, data loss, or a lockout, \
-say so upfront — before the command.
- **Use structured formatting.** Bullet points for steps, code blocks for commands, \
-bold for key terms. Engineers scan, they don't read essays.
- **Say when you're unsure.** If you don't know the exact answer, say so. Suggest \
-where to verify (vendor docs, a specific KB article) rather than guessing.
+## RESPONSE FORMAT — READ THIS FIRST

-## How to Ask Questions
- **Default to a single focused question.** Ask what you need to know right now to make progress.
- **Use contextual bullets sparingly.** If the question could be ambiguous (e.g., "what error?" \
-when there are multiple common patterns), add 2-3 sub-bullets to help the engineer recognize \
-what you're asking for — but keep it short.
- **Multiple questions only when blocking.** If you genuinely cannot proceed without knowing \
-two things (e.g., both the error message AND which users are affected), preface it clearly: \
-"Before continuing troubleshooting, I need to know: 1) [question], 2) [question]." Use this rarely.
- **Avoid interrogation mode.** Don't fire off 5 questions in a row. Get one answer, make \
-progress, then ask the next question if needed.
+Every response you write MUST follow this exact structure:
+
+1. **1-3 sentences of analysis** (what the symptoms tell you)
+2. **[QUESTIONS] marker** with 1-3 questions for the engineer (if you need info)
+3. **[ACTIONS] marker** with 1-4 diagnostic commands to run (if applicable)
+
+You MUST include at least one marker ([QUESTIONS] or [ACTIONS]) in every response. \
+A response with only prose and no markers is INVALID and will break the UI.
+
+### Complete example of a correct first response:
+
+User: "Outlook disconnects every 10-15 min, Teams drops too, only this one user, WiFi"
+
+Your response:
+
+Both apps dropping on the same 10-15 min cycle on WiFi points to a network-layer \
+timeout — likely DHCP lease renewal, AP roaming, or NIC power management. Single-user \
+scope narrows it to this endpoint.
+
+[QUESTIONS]
+[{"text": "Is this user on a laptop or desktop?", "context": "Laptops have power management and docking transitions that cause WiFi drops"},
+{"text": "Are they on corporate WiFi or working from home?", "context": "Corporate WiFi with multiple APs can cause roaming disconnects"}]
+[/QUESTIONS]
+
+[ACTIONS]
+[{"label": "Check DHCP lease time", "command": "ipconfig /all | Select-String -Pattern 'DHCP|IPv4|Lease|Gateway'", "description": "Short lease times (under 1 hour) cause brief drops at renewal"},
+{"label": "Check NIC power management", "command": "Get-NetAdapterPowerManagement | Select Name, AllowComputerToTurnOffDevice", "description": "If True, Windows is likely killing the adapter during idle periods"},
+{"label": "Check WiFi signal and AP", "command": "netsh wlan show interfaces", "description": "Shows current BSSID, signal strength, and whether they are bouncing between APs"}]
+[/ACTIONS]
+
+### Rules
+
+**Prose rules:**
+- MAXIMUM 3 sentences. No numbered lists. No "Most likely causes: 1... 2... 3..."
+- Never narrate intentions ("I want to check...", "Let's get eyes on..."). Just include markers.
+- Be specific: exact commands, registry paths, port numbers.
+- Warn before destructive actions.
+
+**[QUESTIONS] marker format:**
+- JSON array of objects with `text` (required) and `context` (optional, 1 sentence)
+- 1-3 questions per response
+- Do NOT ask questions inline in your prose. ALL questions go in the marker.
+
+**[ACTIONS] marker format:**
+- JSON array of objects with `label` (required), `command` (optional), `description` (required)
+- 1-4 action items per response
+- Commands should be PowerShell unless context indicates Linux/Mac
+- For GUI-only steps, omit `command`
+
+**Both markers are stripped from display** — the engineer sees them as interactive UI cards, \
+not raw JSON. Put analysis BEFORE markers. Markers go at the END of your response.

 ## Using the Team's Flow Library
 Your team has built troubleshooting flows in ResolutionFlow. When relevant flows \
@@ -73,10 +104,57 @@ When an image is attached, analyze it carefully. Screenshots of error messages,
 config panels, event viewer logs, and network diagrams are common in MSP work. \
 Describe what you see and use the visual information to inform your troubleshooting advice.

+## Diagnostic Forking
+When symptoms point to 2+ different subsystems or root causes, you MUST create a diagnostic \
+fork. Forking tracks the different investigation paths in the background — the engineer \
+sees them in a sidebar and can switch between them anytime.
+
+**IMPORTANT: Forking is invisible to the engineer in the conversation.** You do NOT mention \
+forking, branching, or paths to the engineer. You just continue the conversation naturally. \
+The fork marker is metadata that the system uses behind the scenes.
+
+**You MUST fork when:**
+- Symptoms affect multiple applications or layers (e.g., Outlook AND Teams dropping)
+- The problem could be endpoint-side OR infrastructure-side
+- Multiple well-known causes match the exact same symptom pattern
+
+**Do NOT fork when:**
+- One cause is clearly >80% likely — just investigate that first
+- A single yes/no question would eliminate all but one possibility
+
+**Fork response format:**
+Even when forking, you MUST still follow the RESPONSE FORMAT above. Your response \
+must include [QUESTIONS] and/or [ACTIONS] markers — the fork marker is IN ADDITION \
+to those, not a replacement. Do NOT ask questions in prose — put them in [QUESTIONS].
+
+Structure: 1-3 sentences of analysis → [QUESTIONS] and/or [ACTIONS] → [FORK] at the very end.
+
+Example flow:
+- Engineer: "Outlook disconnects every 15 min, Teams drops too, only one user"
+- You: "The 10-15 min pattern with both apps points to network layer."
+- Then: [QUESTIONS] marker, then [ACTIONS] marker, then [FORK] marker last.
+
+The fork marker is stripped from display — the engineer never sees it. \
+The system creates branches silently. Based on the engineer's answer, you pick \
+the most relevant branch to investigate first.
+
+To create a fork, append this marker AFTER your [QUESTIONS]/[ACTIONS] markers:
+
+[FORK]
+{"fork_reason": "Brief reason", "options": [{"label": "Short name", "description": "One sentence"}, {"label": "Another", "description": "One sentence"}]}
+[/FORK]
+
+2-4 options. Never mention "fork", "branch", or "path" in your visible text.
+
 ## Boundaries
 - Stay focused on IT infrastructure, systems administration, and MSP operations.
 - If a question is clearly outside your domain, say so briefly and redirect.
 - Never fabricate error codes, KB article numbers, or CLI flags. If unsure, say so.
+
+## FINAL REMINDER — THIS OVERRIDES EVERYTHING ABOVE
+Every single response MUST contain [QUESTIONS] and/or [ACTIONS] markers with valid JSON. \
+No exceptions. Not even when forking. A response without at least one of these markers \
+will crash the UI. If you are unsure, include both. The markers are REQUIRED output, not optional.
 """


@@ -174,6 +252,16 @@ async def _call_anthropic_cached(
        }

    # Add the new user message (uncached — it's new each turn)
+    # Append a format reminder to the user message so the model sees it
+    # immediately before generating. This is invisible to the user (stripped
+    # before storage) but critical for structured output compliance.
+    format_reminder = (
+        "\n\n[SYSTEM: Remember — your response MUST end with [QUESTIONS] "
+        "and/or [ACTIONS] markers containing valid JSON arrays. "
+        "Responses without markers break the UI.]"
+    )
+    reminded_message = new_message + format_reminder
+
    # If images are attached, build multimodal content blocks
    if images:
        content_blocks: list[dict[str, Any]] = []
@@ -186,10 +274,10 @@ async def _call_anthropic_cached(
                    "data": img["data"],
                },
            })
-        content_blocks.append({"type": "text", "text": new_message})
+        content_blocks.append({"type": "text", "text": reminded_message})
        messages.append({"role": "user", "content": content_blocks})
    else:
-        messages.append({"role": "user", "content": new_message})
+        messages.append({"role": "user", "content": reminded_message})

    # MCP server config (optional — controlled by settings)
    mcp_servers = anthropic.NOT_GIVEN
--- a/backend/app/services/flowpilot_engine.py
+++ b/backend/app/services/flowpilot_engine.py
@@ -53,7 +53,10 @@ Your response MUST be a valid JSON object with one of these shapes:
 {"type": "action", "content": "What to do", "reasoning": "Internal why", "context_message": "Here's what to try", "action_type": "instruction | script_generation | verification | info_request | open_script_builder", "expected_outcome": "What success looks like", "confidence": 0.78}

 3. Resolution suggestion:
-{"type": "resolution_suggestion", "content": "Summary of what we did", "reasoning": "Internal why", "resolution_summary": "Issue was caused by X, fixed by Y", "confidence": 0.92, "follow_up_recommendations": ["Monitor for 24 hours"]}\
+{"type": "resolution_suggestion", "content": "Summary of what we did", "reasoning": "Internal why", "resolution_summary": "Issue was caused by X, fixed by Y", "confidence": 0.92, "follow_up_recommendations": ["Monitor for 24 hours"]}
+
+4. Diagnostic fork (explore multiple hypotheses in parallel):
+{"type": "fork", "content": "Why we need to branch", "reasoning": "Internal why", "context_message": "Shown to engineer explaining the fork", "fork_reason": "Multiple possible root causes need independent investigation", "options": [{"label": "Branch name", "description": "What this branch will investigate"}], "confidence": 0.45}\
 """

 FLOWPILOT_SYSTEM_PROMPT = """\
@@ -83,6 +86,17 @@ Every response must have a "type" field: "question", "action", or "resolution_su
 - Never suggest restarting or rebooting as a first step — diagnose first
 - Be specific: "Check Event Viewer > System > source NTFS" not "check the logs"

+## DIAGNOSTIC FORKING
+When you detect MULTIPLE equally plausible root causes that require DIFFERENT investigation paths, use a "fork" response to let the engineer explore them as parallel branches. Use forks when:
+- Two or more hypotheses have similar probability and investigating one doesn't help eliminate the other
+- The engineer has tried the obvious path and results are ambiguous (could be DNS OR firewall OR auth)
+- Symptoms point to multiple subsystems (e.g., "slow login" could be AD replication, DNS, or group policy)
+Do NOT fork when:
+- One hypothesis is clearly more likely — just investigate that first
+- You can ask a single question that would eliminate most possibilities
+- The session has fewer than 3 steps (gather more info first)
+Fork options should be 2-4 independent investigation paths. Each option label should be a clear, short hypothesis name (e.g., "DNS Resolution Issue", "AD Replication Lag").
+
 {team_context}

 {matched_flow_context}\
@@ -121,7 +135,7 @@ def _parse_structured_output(raw_text: str) -> dict[str, Any]:
    if not isinstance(data, dict) or "type" not in data:
        raise ValueError("LLM response missing required 'type' field")

-    valid_types = {"question", "action", "resolution_suggestion"}
+    valid_types = {"question", "action", "resolution_suggestion", "fork"}
    if data["type"] not in valid_types:
        raise ValueError(f"Unknown response type: {data['type']}")

@@ -428,6 +442,43 @@ async def process_response(

    await db.flush()

+    # Handle fork: create branches and enrich step content with branch IDs
+    if parsed["type"] == "fork":
+        from app.services.branch_manager import BranchManager
+        mgr = BranchManager(db)
+
+        # Create root branch if this is the first fork in the session
+        if not session.is_branching:
+            root = await mgr.create_root_branch(session.id)
+            # Reassign the step to the root branch
+            step.branch_id = root.id
+
+        fork_options = parsed.get("options", [])
+        fork_point, new_branches = await mgr.create_fork(
+            session_id=session.id,
+            parent_branch_id=session.active_branch_id,
+            trigger_step_id=step.id,
+            fork_reason=parsed.get("fork_reason", ""),
+            options=[{"label": o["label"], "description": o.get("description", "")} for o in fork_options],
+        )
+
+        # Enrich the step content with fork_point_id and branch IDs for frontend
+        enriched_content = dict(step.content or {})
+        enriched_content["fork_point_id"] = str(fork_point.id)
+        enriched_content["fork_branches"] = [
+            {"branch_id": str(b.id), "label": b.label}
+            for b in new_branches
+        ]
+        step.content = enriched_content
+        step.is_fork_point = True
+        step.fork_point_id = fork_point.id
+
+        # Auto-switch to the first branch
+        first_branch = new_branches[0]
+        await mgr.switch_branch(session.id, first_branch.id)
+
+        await db.flush()
+
    # Check if resolution was suggested
    resolution_suggested = parsed["type"] == "resolution_suggestion"
    resolution_summary = parsed.get("resolution_summary") if resolution_suggested else None
@@ -1239,6 +1290,11 @@ def _create_step_from_parsed(
        content["follow_up_recommendations"] = parsed.get("follow_up_recommendations", [])
        content["allow_free_text"] = False
        content["allow_skip"] = False
+    elif parsed["type"] == "fork":
+        content["fork_reason"] = parsed.get("fork_reason", "")
+        content["fork_options"] = parsed.get("options", [])
+        content["allow_free_text"] = False
+        content["allow_skip"] = False

    # Extract options for question type
    options = None
--- a/backend/app/services/unified_chat_service.py
+++ b/backend/app/services/unified_chat_service.py
@@ -4,7 +4,9 @@ Replaces assistant_chat_service for new chat sessions. Messages are stored
 in ai_sessions.conversation_messages JSONB. Reuses the same AI calling
 infrastructure and system prompt from assistant_chat_service.
 """
+import json
 import logging
+import re
 from typing import Any
 from uuid import UUID

@@ -22,6 +24,129 @@ from app.services.rag_service import search as rag_search, build_rag_context, ex
 logger = logging.getLogger(__name__)


+def _parse_fork_marker(ai_content: str) -> tuple[str, dict[str, Any] | None]:
+    """Extract [FORK]...[/FORK] JSON from AI response.
+
+    Returns (cleaned_content, fork_data_or_None).
+    The fork marker is stripped from the display text.
+    """
+    match = re.search(r'\[FORK\]\s*([\s\S]*?)\s*\[/FORK\]', ai_content)
+    if not match:
+        return ai_content, None
+
+    try:
+        raw = match.group(1).strip()
+        # Strip markdown fences if AI wrapped it
+        if raw.startswith("```"):
+            raw = re.sub(r'^```(?:json)?\s*', '', raw)
+            raw = re.sub(r'\s*```$', '', raw)
+        fork_data = json.loads(raw)
+    except (json.JSONDecodeError, ValueError) as e:
+        logger.warning("Failed to parse [FORK] marker: %s", e)
+        return ai_content, None
+
+    # Validate structure
+    if not isinstance(fork_data, dict) or "options" not in fork_data:
+        logger.warning("Invalid [FORK] data — missing 'options'")
+        return ai_content, None
+
+    options = fork_data["options"]
+    if not isinstance(options, list) or len(options) < 2:
+        logger.warning("Invalid [FORK] data — need at least 2 options")
+        return ai_content, None
+
+    # Strip the marker from display text
+    cleaned = ai_content[:match.start()] + ai_content[match.end():]
+    cleaned = cleaned.strip()
+
+    return cleaned, fork_data
+
+
+def _parse_actions_marker(ai_content: str) -> tuple[str, list[dict[str, Any]] | None]:
+    """Extract [ACTIONS]...[/ACTIONS] JSON from AI response.
+
+    Returns (cleaned_content, actions_list_or_None).
+    The actions marker is stripped from the display text.
+    """
+    match = re.search(r'\[ACTIONS\]\s*([\s\S]*?)\s*\[/ACTIONS\]', ai_content)
+    if not match:
+        return ai_content, None
+
+    try:
+        raw = match.group(1).strip()
+        if raw.startswith("```"):
+            raw = re.sub(r'^```(?:json)?\s*', '', raw)
+            raw = re.sub(r'\s*```$', '', raw)
+        actions = json.loads(raw)
+    except (json.JSONDecodeError, ValueError) as e:
+        logger.warning("Failed to parse [ACTIONS] marker: %s", e)
+        return ai_content, None
+
+    if not isinstance(actions, list) or len(actions) == 0:
+        logger.warning("Invalid [ACTIONS] data — need at least 1 action")
+        return ai_content, None
+
+    # Validate each action has at minimum a label
+    valid_actions = []
+    for a in actions:
+        if isinstance(a, dict) and a.get("label"):
+            valid_actions.append({
+                "label": a["label"],
+                "command": a.get("command"),
+                "description": a.get("description", ""),
+            })
+
+    if not valid_actions:
+        return ai_content, None
+
+    cleaned = ai_content[:match.start()] + ai_content[match.end():]
+    cleaned = cleaned.strip()
+
+    return cleaned, valid_actions
+
+
+def _parse_questions_marker(ai_content: str) -> tuple[str, list[dict[str, Any]] | None]:
+    """Extract [QUESTIONS]...[/QUESTIONS] JSON from AI response.
+
+    Returns (cleaned_content, questions_list_or_None).
+    The questions marker is stripped from the display text.
+    """
+    match = re.search(r'\[QUESTIONS\]\s*([\s\S]*?)\s*\[/QUESTIONS\]', ai_content)
+    if not match:
+        return ai_content, None
+
+    try:
+        raw = match.group(1).strip()
+        if raw.startswith("```"):
+            raw = re.sub(r'^```(?:json)?\s*', '', raw)
+            raw = re.sub(r'\s*```$', '', raw)
+        questions = json.loads(raw)
+    except (json.JSONDecodeError, ValueError) as e:
+        logger.warning("Failed to parse [QUESTIONS] marker: %s", e)
+        return ai_content, None
+
+    if not isinstance(questions, list) or len(questions) == 0:
+        logger.warning("Invalid [QUESTIONS] data — need at least 1 question")
+        return ai_content, None
+
+    # Validate each question has at minimum a text field
+    valid_questions = []
+    for q in questions:
+        if isinstance(q, dict) and q.get("text"):
+            valid_questions.append({
+                "text": q["text"],
+                "context": q.get("context", ""),
+            })
+
+    if not valid_questions:
+        return ai_content, None
+
+    cleaned = ai_content[:match.start()] + ai_content[match.end():]
+    cleaned = cleaned.strip()
+
+    return cleaned, valid_questions
+
+
 async def create_chat_session(
    user_id: UUID,
    account_id: UUID,
@@ -58,14 +183,14 @@ async def send_chat_message(
    message: str,
    db: AsyncSession,
    images: list[dict[str, Any]] | None = None,
-) -> tuple[str, list[dict[str, Any]], AISession]:
+) -> tuple[str, list[dict[str, Any]], AISession, dict[str, Any] | None, list[dict[str, Any]] | None, list[dict[str, Any]] | None]:
    """Send a message in a chat session and get AI response.

    Args:
        images: Optional list of {"media_type": str, "data": str (base64)}
                for vision content attached to this message.

-    Returns (ai_content, suggested_flows, session).
+    Returns (ai_content, suggested_flows, session, fork_metadata, actions_data, questions_data).
    """
    result = await db.execute(
        select(AISession).where(
@@ -124,10 +249,47 @@ async def send_chat_message(
            if session.status == "paused":
                session.status = "active"

+            # Check for fork, actions, and questions markers in branch response too
+            branch_display, branch_fork_data = _parse_fork_marker(ai_content)
+            branch_display, branch_actions_data = _parse_actions_marker(branch_display)
+            branch_display, branch_questions_data = _parse_questions_marker(branch_display)
+            if branch_display != ai_content:
+                # Store stripped content in branch history
+                msgs[-1] = {"role": "assistant", "content": branch_display}
+                branch.conversation_messages = msgs
+
+            branch_fork_metadata = None
+            if branch_fork_data:
+                try:
+                    fork_point, new_branches = await manager.create_fork(
+                        session_id=session.id,
+                        parent_branch_id=branch.id,
+                        trigger_step_id=None,
+                        fork_reason=branch_fork_data.get("fork_reason", ""),
+                        options=[
+                            {"label": o["label"], "description": o.get("description", "")}
+                            for o in branch_fork_data["options"]
+                        ],
+                    )
+                    first_branch = new_branches[0]
+                    await manager.switch_branch(session.id, first_branch.id)
+                    branch_fork_metadata = {
+                        "fork_point_id": str(fork_point.id),
+                        "fork_reason": branch_fork_data.get("fork_reason", ""),
+                        "branches": [
+                            {"branch_id": str(b.id), "label": b.label}
+                            for b in new_branches
+                        ],
+                        "active_branch_id": str(first_branch.id),
+                    }
+                    await db.flush()
+                except Exception:
+                    logger.exception("Failed to create fork within branch for session %s", session.id)
+
            suggested_flows = extract_suggested_flows(
                await rag_search(query=message, account_id=account_id, db=db, limit=8)
            )
-            return ai_content, suggested_flows, session
+            return branch_display, suggested_flows, session, branch_fork_metadata, branch_actions_data, branch_questions_data

    # Auto-title from first message if still default
    if session.step_count == 0 and message.strip():
@@ -161,10 +323,27 @@ async def send_chat_message(
        images=images,
    )

-    # Append messages to conversation_messages
+    # Check for fork marker in AI response
+    display_content, fork_data = _parse_fork_marker(ai_content)
+
+    # Check for actions marker in AI response
+    display_content, actions_data = _parse_actions_marker(display_content)
+
+    # Check for questions marker in AI response
+    display_content, questions_data = _parse_questions_marker(display_content)
+
+    logger.info(
+        "Marker parsing results — actions: %s, questions: %s, fork: %s, raw_length: %d, display_length: %d",
+        bool(actions_data), bool(questions_data), bool(fork_data),
+        len(ai_content), len(display_content),
+    )
+
+    # Store DISPLAY content (markers stripped) in conversation_messages.
+    # The format reminder in the user message + system prompt final reminder
+    # are sufficient to keep the AI emitting markers on subsequent turns.
    msgs = list(session.conversation_messages or [])
    msgs.append({"role": "user", "content": message})
-    msgs.append({"role": "assistant", "content": ai_content})
+    msgs.append({"role": "assistant", "content": display_content})
    session.conversation_messages = msgs
    session.step_count += 2  # message count for display
    session.total_input_tokens += input_tokens
@@ -174,6 +353,46 @@ async def send_chat_message(
    if session.status == "paused":
        session.status = "active"

+    # If fork was detected, create branches
+    fork_metadata = None
+    if fork_data:
+        try:
+            from app.services.branch_manager import BranchManager
+            mgr = BranchManager(db)
+
+            # Create root branch if this is the first fork
+            if not session.is_branching:
+                await mgr.create_root_branch(session.id)
+
+            fork_point, new_branches = await mgr.create_fork(
+                session_id=session.id,
+                parent_branch_id=session.active_branch_id,
+                trigger_step_id=None,
+                fork_reason=fork_data.get("fork_reason", ""),
+                options=[
+                    {"label": o["label"], "description": o.get("description", "")}
+                    for o in fork_data["options"]
+                ],
+            )
+
+            # Don't auto-switch — conversation continues on current branch.
+            # Branches appear in sidebar. User switches when ready.
+            fork_metadata = {
+                "fork_point_id": str(fork_point.id),
+                "fork_reason": fork_data.get("fork_reason", ""),
+                "branches": [
+                    {"branch_id": str(b.id), "label": b.label}
+                    for b in new_branches
+                ],
+                "active_branch_id": str(session.active_branch_id) if session.active_branch_id else None,
+            }
+
+            await db.flush()
+            logger.info("Created fork with %d branches for session %s", len(new_branches), session_id)
+        except Exception:
+            logger.exception("Failed to create fork for session %s", session_id)
+            # Fork failed but chat message still sent — don't break the response
+
    suggested_flows = extract_suggested_flows(rag_results)

-    return ai_content, suggested_flows, session
+    return display_content, suggested_flows, session, fork_metadata, actions_data, questions_data