feat(escalations): Escalation Mode wedge — live arrival + magic-moment pickup #155

Merged
chihlasm merged 34 commits from feat/escalation-metric-endpoint into main 2026-04-30 21:32:16 +00:00
2 changed files with 21 additions and 2 deletions
Showing only changes of commit aca915b047 - Show all commits

View File

@@ -873,13 +873,25 @@ async def list_sessions(
date_to: Optional[datetime] = Query(None),
q: Optional[str] = Query(None, min_length=2, max_length=200),
):
"""List the current user's AI sessions (owned or picked up)."""
"""List the current user's AI sessions (owned or picked up).
"Picked up" includes both the legacy escalation_package.picked_up_by
marker (set by flowpilot_engine.pickup_session) AND the new
escalated_to_id field (set by HandoffManager.claim_session for the
unified handoff/escalate path). Without the escalated_to_id branch
the senior tech wouldn't see a session they just claimed in their
chat sidebar — the picked-up session lands as the active chat with
no entry in the list, which is what the user reported as "4 versions
of the session" (their unrelated owned sessions show up while the
claimed one is invisible).
"""
user_id_str = str(current_user.id)
query = (
select(AISession)
.where(
or_(
AISession.user_id == current_user.id,
AISession.escalated_to_id == current_user.id,
AISession.escalation_package["picked_up_by"].as_string() == user_id_str,
)
)

View File

@@ -111,7 +111,14 @@ class Settings(BaseSettings):
GOOGLE_AI_API_KEY: Optional[str] = None
AI_MODEL_GEMINI: str = "gemini-2.5-flash"
AI_MODEL_ANTHROPIC: str = "claude-sonnet-4-6"
ESCALATION_AI_ASSESSMENT_TIMEOUT_SECONDS: int = 5
# 15s is generous for the click-path; Claude usually returns a 500-token
# diagnostic in 4-8s but tail latency on the assessment prompt has hit
# 12-14s in the field. Going below this leaves too many escalations with
# the "Assessment unavailable — model didn't respond in time" placeholder
# the senior sees on the magic-moment screen. Real fix is async generation
# (kick off, persist when done, surface "still computing" with refresh) —
# that's a follow-up; bumping the bound keeps the wedge demo coherent.
ESCALATION_AI_ASSESSMENT_TIMEOUT_SECONDS: int = 15
# Model tier routing — maps action types to model tiers
AI_MODEL_TIERS: dict[str, str] = {