2026-04-30 21:32:16 +00:00
2 changed files with 21 additions and 2 deletions
--- a/backend/app/api/endpoints/ai_sessions.py
+++ b/backend/app/api/endpoints/ai_sessions.py
@@ -873,13 +873,25 @@ async def list_sessions(
    date_to: Optional[datetime] = Query(None),
    q: Optional[str] = Query(None, min_length=2, max_length=200),
 ):
-    """List the current user's AI sessions (owned or picked up)."""
+    """List the current user's AI sessions (owned or picked up).
    "Picked up" includes both the legacy escalation_package.picked_up_by
    marker (set by flowpilot_engine.pickup_session) AND the new
    escalated_to_id field (set by HandoffManager.claim_session for the
    unified handoff/escalate path). Without the escalated_to_id branch
    the senior tech wouldn't see a session they just claimed in their
    chat sidebar — the picked-up session lands as the active chat with
    no entry in the list, which is what the user reported as "4 versions
    of the session" (their unrelated owned sessions show up while the
    claimed one is invisible).
    """
    user_id_str = str(current_user.id)
    query = (
        select(AISession)
        .where(
            or_(
                AISession.user_id == current_user.id,
                AISession.escalated_to_id == current_user.id,
                AISession.escalation_package["picked_up_by"].as_string() == user_id_str,
            )
        )
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -111,7 +111,14 @@ class Settings(BaseSettings):
    GOOGLE_AI_API_KEY: Optional[str] = None
    AI_MODEL_GEMINI: str = "gemini-2.5-flash"
    AI_MODEL_ANTHROPIC: str = "claude-sonnet-4-6"
-    ESCALATION_AI_ASSESSMENT_TIMEOUT_SECONDS: int = 5
+    # 15s is generous for the click-path; Claude usually returns a 500-token
    # diagnostic in 4-8s but tail latency on the assessment prompt has hit
    # 12-14s in the field. Going below this leaves too many escalations with
    # the "Assessment unavailable — model didn't respond in time" placeholder
    # the senior sees on the magic-moment screen. Real fix is async generation
    # (kick off, persist when done, surface "still computing" with refresh) —
    # that's a follow-up; bumping the bound keeps the wedge demo coherent.
    ESCALATION_AI_ASSESSMENT_TIMEOUT_SECONDS: int = 15
    # Model tier routing — maps action types to model tiers
    AI_MODEL_TIERS: dict[str, str] = {