diff --git a/backend/app/core/config.py b/backend/app/core/config.py index b3135131..afc2fcbc 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -111,14 +111,16 @@ class Settings(BaseSettings): GOOGLE_AI_API_KEY: Optional[str] = None AI_MODEL_GEMINI: str = "gemini-2.5-flash" AI_MODEL_ANTHROPIC: str = "claude-sonnet-4-6" - # 15s is generous for the click-path; Claude usually returns a 500-token - # diagnostic in 4-8s but tail latency on the assessment prompt has hit - # 12-14s in the field. Going below this leaves too many escalations with - # the "Assessment unavailable — model didn't respond in time" placeholder - # the senior sees on the magic-moment screen. Real fix is async generation - # (kick off, persist when done, surface "still computing" with refresh) — - # that's a follow-up; bumping the bound keeps the wedge demo coherent. - ESCALATION_AI_ASSESSMENT_TIMEOUT_SECONDS: int = 15 + # Bound for the diagnostic assessment Sonnet call. Generation runs in a + # FastAPI BackgroundTask (commit e8ba74e), so this no longer blocks the + # senior's click — only how long we wait before publishing + # `handoff_assessment_ready` with has_assessment=false. 15s was hitting + # tail latency on Sonnet (timeout 03:57:35 in field testing 2026-04-29), + # leaving the magic-moment placeholder permanent. 45s is the right + # ceiling: well above Sonnet p99 for a 500-token output, far enough + # below "the senior gives up watching" that we still surface SOMETHING + # on persistent slowness. + ESCALATION_AI_ASSESSMENT_TIMEOUT_SECONDS: int = 45 # Model tier routing — maps action types to model tiers AI_MODEL_TIERS: dict[str, str] = { diff --git a/frontend/src/components/assistant/ConcludeSessionModal.tsx b/frontend/src/components/assistant/ConcludeSessionModal.tsx index 207e3743..25e88424 100644 --- a/frontend/src/components/assistant/ConcludeSessionModal.tsx +++ b/frontend/src/components/assistant/ConcludeSessionModal.tsx @@ -348,6 +348,15 @@ export function ConcludeSessionModal({