2026-04-30 21:32:16 +00:00
2 changed files with 21 additions and 2 deletions
--- a/backend/app/api/endpoints/ai_sessions.py
+++ b/backend/app/api/endpoints/ai_sessions.py
@@ -873,13 +873,25 @@ async def list_sessions(
    date_to: Optional[datetime] = Query(None),
    q: Optional[str] = Query(None, min_length=2, max_length=200),
 ):
-    """List the current user's AI sessions (owned or picked up)."""
+    """List the current user's AI sessions (owned or picked up).
+
+    "Picked up" includes both the legacy escalation_package.picked_up_by
+    marker (set by flowpilot_engine.pickup_session) AND the new
+    escalated_to_id field (set by HandoffManager.claim_session for the
+    unified handoff/escalate path). Without the escalated_to_id branch
+    the senior tech wouldn't see a session they just claimed in their
+    chat sidebar — the picked-up session lands as the active chat with
+    no entry in the list, which is what the user reported as "4 versions
+    of the session" (their unrelated owned sessions show up while the
+    claimed one is invisible).
+    """
    user_id_str = str(current_user.id)
    query = (
        select(AISession)
        .where(
            or_(
                AISession.user_id == current_user.id,
+                AISession.escalated_to_id == current_user.id,
                AISession.escalation_package["picked_up_by"].as_string() == user_id_str,
            )
        )
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -111,7 +111,14 @@ class Settings(BaseSettings):
    GOOGLE_AI_API_KEY: Optional[str] = None
    AI_MODEL_GEMINI: str = "gemini-2.5-flash"
    AI_MODEL_ANTHROPIC: str = "claude-sonnet-4-6"
-    ESCALATION_AI_ASSESSMENT_TIMEOUT_SECONDS: int = 5
+    # 15s is generous for the click-path; Claude usually returns a 500-token
+    # diagnostic in 4-8s but tail latency on the assessment prompt has hit
+    # 12-14s in the field. Going below this leaves too many escalations with
+    # the "Assessment unavailable — model didn't respond in time" placeholder
+    # the senior sees on the magic-moment screen. Real fix is async generation
+    # (kick off, persist when done, surface "still computing" with refresh) —
+    # that's a follow-up; bumping the bound keeps the wedge demo coherent.
+    ESCALATION_AI_ASSESSMENT_TIMEOUT_SECONDS: int = 15

    # Model tier routing — maps action types to model tiers
    AI_MODEL_TIERS: dict[str, str] = {