diff --git a/backend/app/api/endpoints/ai_sessions.py b/backend/app/api/endpoints/ai_sessions.py index 31a1ec5e..f8ceb9fd 100644 --- a/backend/app/api/endpoints/ai_sessions.py +++ b/backend/app/api/endpoints/ai_sessions.py @@ -873,13 +873,25 @@ async def list_sessions( date_to: Optional[datetime] = Query(None), q: Optional[str] = Query(None, min_length=2, max_length=200), ): - """List the current user's AI sessions (owned or picked up).""" + """List the current user's AI sessions (owned or picked up). + + "Picked up" includes both the legacy escalation_package.picked_up_by + marker (set by flowpilot_engine.pickup_session) AND the new + escalated_to_id field (set by HandoffManager.claim_session for the + unified handoff/escalate path). Without the escalated_to_id branch + the senior tech wouldn't see a session they just claimed in their + chat sidebar — the picked-up session lands as the active chat with + no entry in the list, which is what the user reported as "4 versions + of the session" (their unrelated owned sessions show up while the + claimed one is invisible). + """ user_id_str = str(current_user.id) query = ( select(AISession) .where( or_( AISession.user_id == current_user.id, + AISession.escalated_to_id == current_user.id, AISession.escalation_package["picked_up_by"].as_string() == user_id_str, ) ) diff --git a/backend/app/core/config.py b/backend/app/core/config.py index 985bca98..b3135131 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -111,7 +111,14 @@ class Settings(BaseSettings): GOOGLE_AI_API_KEY: Optional[str] = None AI_MODEL_GEMINI: str = "gemini-2.5-flash" AI_MODEL_ANTHROPIC: str = "claude-sonnet-4-6" - ESCALATION_AI_ASSESSMENT_TIMEOUT_SECONDS: int = 5 + # 15s is generous for the click-path; Claude usually returns a 500-token + # diagnostic in 4-8s but tail latency on the assessment prompt has hit + # 12-14s in the field. Going below this leaves too many escalations with + # the "Assessment unavailable — model didn't respond in time" placeholder + # the senior sees on the magic-moment screen. Real fix is async generation + # (kick off, persist when done, surface "still computing" with refresh) — + # that's a follow-up; bumping the bound keeps the wedge demo coherent. + ESCALATION_AI_ASSESSMENT_TIMEOUT_SECONDS: int = 15 # Model tier routing — maps action types to model tiers AI_MODEL_TIERS: dict[str, str] = {