feat(escalations): Escalation Mode wedge — live arrival + magic-moment pickup #155

Merged
chihlasm merged 34 commits from feat/escalation-metric-endpoint into main 2026-04-30 21:32:16 +00:00
8 changed files with 218 additions and 41 deletions
Showing only changes of commit e8ba74ed6d - Show all commits

View File

@@ -15,7 +15,7 @@ from datetime import datetime
from typing import Annotated, Optional
from uuid import UUID
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Query, Request, status
from sqlalchemy import or_, select, func, text
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
@@ -466,12 +466,13 @@ async def escalate_session(
request: Request,
session_id: UUID,
data: EscalateSessionRequest,
background_tasks: BackgroundTasks,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
_: None = Depends(require_engineer_or_admin),
):
"""Escalate a FlowPilot session — unified through HandoffManager."""
from app.services.handoff_manager import HandoffManager
from app.services.handoff_manager import HandoffManager, enrich_escalation_async
# Owner-only — matches the original constraint on flowpilot_engine.escalate_session.
session_result = await db.execute(
@@ -507,6 +508,14 @@ async def escalate_session(
await manager.dispatch_escalation_notifications(handoff)
# AI enrichment (Sonnet assessment + enhanced escalation_package) runs
# in the background so the escalating engineer doesn't wait on
# 15-25s of model latency. Result lands on the handoff row when ready;
# the senior's magic-moment screen reads it at pickup time.
background_tasks.add_task(
enrich_escalation_async, handoff.id, current_user.id
)
return SessionCloseResponse(
session_id=session.id,
status=session.status,

View File

@@ -12,7 +12,7 @@ import logging
from typing import Annotated, AsyncGenerator
from uuid import UUID
from fastapi import APIRouter, Depends, HTTPException, Request, status
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request, status
from fastapi.responses import StreamingResponse
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
@@ -41,6 +41,7 @@ router = APIRouter(prefix="/ai-sessions/{session_id}", tags=["session-handoffs"]
async def create_handoff(
session_id: UUID,
body: HandoffCreateRequest,
background_tasks: BackgroundTasks,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
) -> HandoffResponse:
@@ -79,7 +80,15 @@ async def create_handoff(
# a rolled-back handoff. Failures are swallowed inside the manager —
# handoff creation is authoritative; notifications are advisory.
if handoff.intent == "escalate":
from app.services.handoff_manager import enrich_escalation_async
await manager.dispatch_escalation_notifications(handoff)
# AI enrichment (Sonnet assessment + enhanced escalation_package)
# runs in the background after the response is sent so the
# escalating engineer doesn't wait on 15-25s of model latency.
background_tasks.add_task(
enrich_escalation_async, handoff.id, current_user.id
)
return HandoffResponse.model_validate(handoff)

View File

@@ -89,17 +89,16 @@ class HandoffManager:
f"Cannot escalate session in status: {session.status}"
)
# Generate snapshot
# Generate snapshot — fast, no AI calls.
snapshot = await self._generate_snapshot(session)
# Generate AI assessment for escalations
ai_assessment = None
ai_assessment_data = None
if intent == "escalate":
ai_assessment, ai_assessment_data = (
await self._generate_ai_assessment_with_timeout(session)
)
# AI enrichment (assessment + enhanced escalation_package) is now
# deferred to a background task scheduled by the endpoint after
# commit — both calls hit Sonnet and together can take 15-25s,
# which is too long to block the click path. The handoff row lands
# immediately with `ai_assessment=None`; the magic-moment screen
# shows "Assessment still computing" until enrich_async finishes
# and the senior refreshes (or, eventually, polls).
handoff = SessionHandoff(
session_id=session_id,
account_id=session.account_id,
@@ -107,8 +106,8 @@ class HandoffManager:
intent=intent,
source_branch_id=session.active_branch_id,
snapshot=snapshot,
ai_assessment=ai_assessment,
ai_assessment_data=ai_assessment_data,
ai_assessment=None,
ai_assessment_data=None,
engineer_notes=engineer_notes,
priority=priority,
)
@@ -125,27 +124,17 @@ class HandoffManager:
session.handoff_count = (session.handoff_count or 0) + 1
# Dual-write to escalation_package. For escalate, build the
# AI-enhanced package (preserves the legacy rich shape that
# SessionBriefing/PSA writeback consume), then layer in the new
# handoff metadata. For park, the lightweight shape is fine —
# there's no legacy enhanced package for parking.
if intent == "escalate":
enhanced_pkg = await self._build_enhanced_escalation_package(
session, user_id
)
enhanced_pkg["intent"] = intent
enhanced_pkg["engineer_notes"] = engineer_notes
enhanced_pkg["handoff_id"] = str(handoff.id)
enhanced_pkg["snapshot"] = snapshot
session.escalation_package = enhanced_pkg
else:
session.escalation_package = {
"snapshot": snapshot,
"intent": intent,
"engineer_notes": engineer_notes,
"handoff_id": str(handoff.id),
}
# Dual-write the minimal escalation_package shape now. The async
# enrichment task overwrites this with the AI-enhanced shape
# (`steps_tried`, `remaining_hypotheses`, etc.) when it completes —
# consumers that read these fields (PSA writeback, legacy
# SessionBriefing) tolerate either shape.
session.escalation_package = {
"snapshot": snapshot,
"intent": intent,
"engineer_notes": engineer_notes,
"handoff_id": str(handoff.id),
}
await self.db.flush()
return handoff
@@ -211,6 +200,10 @@ class HandoffManager:
"engineer_name": engineer_name,
"escalation_reason": handoff.engineer_notes or "",
"problem_summary": session.problem_summary or "N/A",
# Surface the PSA ticket id in the bell-icon title so two
# similarly-worded escalations are still distinguishable
# at a glance.
"psa_ticket_id": session.psa_ticket_id,
},
self.db,
target_user_ids=target_user_ids,
@@ -247,6 +240,7 @@ class HandoffManager:
)
return {}
async def dispatch_escalation_notifications(
self, handoff: SessionHandoff
) -> int:
@@ -585,3 +579,113 @@ class HandoffManager:
})
return queue_items
async def enrich_escalation_async(handoff_id: UUID, user_id: UUID) -> None:
"""Run the AI enrichment for an escalation handoff in the background.
Scheduled by `/escalate` and `/handoff` (intent=escalate) endpoints via
FastAPI BackgroundTasks. Opens its own DB session because the request
session is closed by the time this runs. Generates:
1. The legacy AI-enhanced escalation_package (Sonnet, ~5-10s) — saved
to `session.escalation_package`, preserving the `intent` /
`engineer_notes` / `handoff_id` keys the dual-write set so legacy
consumers keep working.
2. The diagnostic AI assessment (Sonnet, ~4-15s) — saved to
`handoff.ai_assessment` and `handoff.ai_assessment_data`.
On completion publishes a `handoff_assessment_ready` event on the
escalation bus so any connected magic-moment screen can refresh
without a manual reload. Failures are logged but never propagated —
the click-path-side handoff creation already committed, so worst case
the senior sees the "Assessment still computing" placeholder until
they refresh manually.
"""
from app.core.database import async_session_maker
from app.core.escalation_bus import bus as escalation_bus
async with async_session_maker() as db:
try:
result = await db.execute(
select(SessionHandoff).where(SessionHandoff.id == handoff_id)
)
handoff = result.scalar_one_or_none()
if not handoff or handoff.intent != "escalate":
return
session_result = await db.execute(
select(AISession)
.options(selectinload(AISession.steps), selectinload(AISession.user))
.where(AISession.id == handoff.session_id)
)
session = session_result.scalar_one_or_none()
if not session:
logger.warning(
"enrich_escalation_async: session %s gone for handoff %s",
handoff.session_id,
handoff_id,
)
return
manager = HandoffManager(db)
# Build the enhanced package (Sonnet). Don't fail the whole
# task if it errors — the assessment is independently useful.
try:
enhanced_pkg = await manager._build_enhanced_escalation_package(
session, user_id
)
if enhanced_pkg:
enhanced_pkg["intent"] = "escalate"
enhanced_pkg["engineer_notes"] = handoff.engineer_notes
enhanced_pkg["handoff_id"] = str(handoff.id)
if isinstance(session.escalation_package, dict):
enhanced_pkg.setdefault(
"snapshot", session.escalation_package.get("snapshot")
)
session.escalation_package = enhanced_pkg
except Exception:
logger.exception(
"enrich_escalation_async: enhanced package build failed for handoff %s",
handoff_id,
)
# Generate the diagnostic AI assessment.
try:
ai_assessment, ai_assessment_data = (
await manager._generate_ai_assessment_with_timeout(session)
)
handoff.ai_assessment = ai_assessment
handoff.ai_assessment_data = ai_assessment_data
except Exception:
logger.exception(
"enrich_escalation_async: assessment generation failed for handoff %s",
handoff_id,
)
await db.commit()
try:
await escalation_bus.publish(
handoff.account_id,
{
"type": "handoff_assessment_ready",
"handoff_id": str(handoff.id),
"session_id": str(handoff.session_id),
"has_assessment": handoff.ai_assessment is not None,
},
)
except Exception:
logger.exception(
"enrich_escalation_async: bus publish failed for handoff %s",
handoff_id,
)
except Exception:
logger.exception(
"enrich_escalation_async failed for handoff %s", handoff_id
)
try:
await db.rollback()
except Exception:
pass

View File

@@ -371,13 +371,35 @@ async def _send_teams_message(
def _build_notification_title(event: str, payload: dict[str, Any]) -> str:
"""Human-readable title per event type."""
titles = {
"session.escalated": "Session escalated by {engineer_name}",
# Distinguishability matters in the bell panel: with a generic title
# ("Session escalated by Jane") two different escalations from the
# same junior look like a duplicate notification. Including a short
# problem snippet (and ticket number if present) lets the senior
# tell them apart at a glance.
"session.escalated": "Escalation from {engineer_name}{ticket_suffix}: {problem_snippet}",
"session.high_priority": "High-priority session started: {ticket_number}",
"proposal.pending": "New flow proposal: {title}",
"proposal.approved": "Flow proposal approved: {title}",
"knowledge_gap.detected": "Knowledge gap detected: {gap_type}",
"test": "Test Notification from ResolutionFlow",
}
# Build the escalation-specific derived fields. Done here rather than at
# the call site so every dispatch path (legacy /escalate shim, /handoff,
# any future entry point) gets consistent formatting without each one
# having to repeat the snippet logic.
if event == "session.escalated":
problem = (payload.get("problem_summary") or "").strip()
if not problem or problem.upper() == "N/A":
problem_snippet = "(no summary provided)"
elif len(problem) > 70:
problem_snippet = problem[:67].rstrip() + ""
else:
problem_snippet = problem
ticket = payload.get("psa_ticket_id") or payload.get("ticket_number")
ticket_suffix = f" · #{ticket}" if ticket else ""
payload = {**payload, "problem_snippet": problem_snippet, "ticket_suffix": ticket_suffix}
template = titles.get(event, f"Notification: {event}")
try:
return template.format(**payload)

View File

@@ -219,10 +219,31 @@ function ChatItem({
</div>
) : (
<>
<div className="text-[0.8125rem] font-medium truncate">{chat.title}</div>
<div className="text-[0.6875rem] text-muted-foreground">
{chat.message_count} messages
<div className="flex items-center gap-1.5 min-w-0">
<div className="text-[0.8125rem] font-medium truncate">{chat.title}</div>
{chat.psa_ticket_id && (
<span className="font-mono shrink-0 rounded-md bg-accent-dim px-1.5 py-0.5 text-[0.5625rem] text-accent-text">
#{chat.psa_ticket_id}
</span>
)}
{(chat.status === 'escalated' || chat.status === 'requesting_escalation') && (
<span className="font-sans shrink-0 rounded-md bg-warning-dim px-1.5 py-0.5 text-[0.5625rem] uppercase tracking-wider text-warning border border-warning/20">
Escalated
</span>
)}
</div>
{/* Secondary line: problem snippet when the title doesn't already
carry it, otherwise the message count. Keeps untitled
sessions from collapsing into identical-looking rows. */}
{chat.problem_summary && chat.problem_summary !== chat.title ? (
<div className="text-[0.6875rem] text-muted-foreground truncate">
{chat.problem_summary}
</div>
) : (
<div className="text-[0.6875rem] text-muted-foreground">
{chat.message_count} messages
</div>
)}
</>
)}
</div>

View File

@@ -241,8 +241,9 @@ export function HandoffContextScreen({
<div className="flex items-start gap-2 rounded-lg bg-elevated px-3 py-3 text-xs text-muted-foreground">
<AlertTriangle size={12} className="mt-0.5 shrink-0 text-warning" />
<span>
Assessment unavailable — model didn't respond in time. Pick up
the session to investigate directly.
AI assessment is still generating. Reopen this view in a few
seconds to see it, or pick up the session to investigate
directly.
</span>
</div>
) : (

View File

@@ -440,6 +440,9 @@ export default function AssistantChatPage() {
pinned: false,
created_at: s.created_at,
updated_at: s.created_at,
problem_summary: s.problem_summary,
psa_ticket_id: s.psa_ticket_id,
status: s.status,
})))
} catch {
// silently handle

View File

@@ -5,6 +5,14 @@ export interface ChatListItem {
pinned: boolean
created_at: string
updated_at: string
// Optional secondary fields used by the sidebar to make untitled / generic
// sessions distinguishable. `problem_summary` powers the secondary line
// when the title doesn't already carry it; `psa_ticket_id` shows as a
// monospace badge so PSA-linked sessions are obvious; `status` lets us
// tag escalated / picked-up sessions with a color cue.
problem_summary?: string | null
psa_ticket_id?: string | null
status?: string | null
}
export interface RetentionSettings {