Files
resolutionflow/backend/app/services/handoff_manager.py
Michael Chihlas 029680ab2d
All checks were successful
Mirror to GitHub / mirror (push) Successful in 4s
CI / frontend (pull_request) Successful in 5m8s
CI / backend (pull_request) Successful in 10m13s
CI / e2e (pull_request) Successful in 10m47s
feat(escalations): unify /escalate through HandoffManager
Replaces the legacy flowpilot_engine.escalate_session orchestration with
a single canonical path through HandoffManager. Every escalation now
creates a SessionHandoff row, fans out via the SSE bus, persists
AppNotification rows for the bell icon, dispatches to external channels
(Slack/Teams) via notify(), and emails per-user — regardless of whether
the call entered through /escalate (legacy URL) or /handoff (new URL).
The senior-pickup magic-moment screen now works end-to-end from the
EscalateModal bell-icon path the user just tested.

Backend
- HandoffCreateRequest gains optional target_user_id (the equivalent of
  the legacy escalated_to_id field). Self-targeting rejected.
- HandoffManager.create_handoff handles intent='escalate' end-to-end:
  sets escalation_reason + escalated_to_id, builds the legacy enhanced
  AI escalation_package (Sonnet, lazy-imported from flowpilot_engine,
  graceful fallback on failure), and merges handoff metadata into it.
  Eager-loads session.steps and session.user via selectinload — required
  by both the enhanced-package builder and notify() to avoid
  MissingGreenlet on async lazy access.
- HandoffManager.finalize_escalation generates SessionDocumentation,
  pushes documentation to PSA, and runs notify() — pre-commit so the
  AppNotification rows persist atomically with the handoff.
- HandoffManager.dispatch_escalation_notifications keeps only the
  fire-and-forget IO (bus publish, per-user emails) — runs post-commit.
  Pulls engineer name via a separate User query rather than relying on
  session.user lazy access.
- /handoff endpoint passes target_user_id through and calls
  finalize_escalation pre-commit.
- /escalate endpoint is now a thin shim: owner-only session lookup,
  HandoffManager.create_handoff(intent='escalate'), finalize_escalation,
  commit, dispatch_escalation_notifications, return SessionCloseResponse
  built from documentation + psa_result. flowpilot_engine.escalate_session
  is no longer called by any endpoint.
- pickup_session accepts both 'requesting_escalation' (legacy in-flight
  sessions) and 'escalated' (new canonical) so the migration is seamless
  for sessions already in the queue.
- Escalation queue list and sidebar count now match either status.

Frontend
- useFlowPilotSession optimistic update flips status to 'escalated'
  instead of 'requesting_escalation' so the page state matches the
  unified backend response.

Verified end-to-end live: a fresh /escalate call from the junior produces
status='escalated', a SessionHandoff row, a SessionDocumentation, PSA
push attempted (no_psa for this test session), AND a bell-icon
AppNotification for the team admin with link
/pilot/{session_id}?pickup=true. Backend test suite: 1103 passed.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-27 22:27:26 -04:00

588 lines
22 KiB
Python

"""Handoff management — unified park/escalate with dual-write backward compat.
Creates handoff snapshots, AI assessments (for escalations), claim workflow,
and queue queries. Dual-writes to ai_sessions.escalation_package for
backward compatibility with the existing escalation queue.
For intent='escalate', `create_handoff` also runs the legacy enrichment
that the deprecated `/escalate` endpoint used to do directly: setting
`escalated_to_id`, building the AI-enhanced escalation_package (Sonnet),
and recording escalation_reason. `finalize_escalation` then generates the
SessionDocumentation and pushes to PSA. `dispatch_escalation_notifications`
fans out the bell-icon AppNotification + external channels (Slack/Teams)
on top of per-user emails. The `/escalate` endpoint is now a thin shim
calling these in sequence.
"""
import asyncio
import logging
from datetime import datetime, timezone
from typing import Any
from uuid import UUID
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from app.core.config import settings
from app.core.email import EmailService
from app.core.escalation_bus import bus as escalation_bus
from app.models.ai_session import AISession
from app.models.session_branch import SessionBranch
from app.models.session_handoff import SessionHandoff
from app.models.user import User
from app.schemas.ai_session import SessionDocumentation
from app.services.notification_service import notify
logger = logging.getLogger(__name__)
class HandoffManager:
"""Unified park/escalate handoff management."""
def __init__(self, db: AsyncSession):
self.db = db
async def create_handoff(
self,
session_id: UUID,
intent: str,
engineer_notes: str | None,
user_id: UUID,
priority: str = "normal",
target_user_id: UUID | None = None,
) -> SessionHandoff:
"""Create a handoff (park or escalate).
Generates snapshot, updates session status, dual-writes to
escalation_package for backward compat.
For intent='escalate' also: sets `session.escalation_reason` and
optionally `session.escalated_to_id`, builds the AI-enhanced
escalation package (the rich one the legacy `/escalate` path used
to produce), and merges the handoff metadata into it. Self-targeting
is rejected with ValueError, matching legacy behavior.
"""
# Eager-load steps + user — _build_escalation_package_enhanced and
# finalize_escalation iterate over session.steps to compose the
# legacy enriched package and the SessionDocumentation, and the
# notify() dispatcher reads session.user.name. Without selectinload
# the async session raises MissingGreenlet on attribute access.
result = await self.db.execute(
select(AISession)
.options(
selectinload(AISession.steps),
selectinload(AISession.user),
)
.where(AISession.id == session_id)
)
session = result.scalar_one_or_none()
if not session:
raise ValueError(f"Session {session_id} not found")
if intent == "escalate":
if target_user_id and target_user_id == user_id:
raise ValueError(
"Cannot escalate a session to yourself. Use pause instead."
)
if session.status not in ("active", "paused"):
raise ValueError(
f"Cannot escalate session in status: {session.status}"
)
# Generate snapshot
snapshot = await self._generate_snapshot(session)
# Generate AI assessment for escalations
ai_assessment = None
ai_assessment_data = None
if intent == "escalate":
ai_assessment, ai_assessment_data = (
await self._generate_ai_assessment_with_timeout(session)
)
handoff = SessionHandoff(
session_id=session_id,
account_id=session.account_id,
handed_off_by=user_id,
intent=intent,
source_branch_id=session.active_branch_id,
snapshot=snapshot,
ai_assessment=ai_assessment,
ai_assessment_data=ai_assessment_data,
engineer_notes=engineer_notes,
priority=priority,
)
self.db.add(handoff)
# Update session status
if intent == "park":
session.status = "paused"
elif intent == "escalate":
session.status = "escalated"
session.escalation_reason = engineer_notes
if target_user_id:
session.escalated_to_id = target_user_id
session.handoff_count = (session.handoff_count or 0) + 1
# Dual-write to escalation_package. For escalate, build the
# AI-enhanced package (preserves the legacy rich shape that
# SessionBriefing/PSA writeback consume), then layer in the new
# handoff metadata. For park, the lightweight shape is fine —
# there's no legacy enhanced package for parking.
if intent == "escalate":
enhanced_pkg = await self._build_enhanced_escalation_package(
session, user_id
)
enhanced_pkg["intent"] = intent
enhanced_pkg["engineer_notes"] = engineer_notes
enhanced_pkg["handoff_id"] = str(handoff.id)
enhanced_pkg["snapshot"] = snapshot
session.escalation_package = enhanced_pkg
else:
session.escalation_package = {
"snapshot": snapshot,
"intent": intent,
"engineer_notes": engineer_notes,
"handoff_id": str(handoff.id),
}
await self.db.flush()
return handoff
async def finalize_escalation(
self,
handoff: SessionHandoff,
session: AISession,
user_id: UUID,
) -> tuple[SessionDocumentation | None, dict[str, Any]]:
"""Post-create enrichment for intent='escalate' handoffs.
Generates the SessionDocumentation + pushes documentation to PSA if
a ticket is linked. Returns (documentation, psa_result) so the
legacy `/escalate` shim can map back to SessionCloseResponse. Safe
to call only when handoff.intent == 'escalate' — for park, returns
a no-op no-PSA dict.
"""
if handoff.intent != "escalate":
return None, {
"psa_push_status": "no_psa",
"psa_push_error": None,
"member_mapping_warning": None,
}
# Lazy import to avoid circular dependency: flowpilot_engine imports
# plenty of services at module load time and we don't want
# handoff_manager pulled into that graph at import.
from app.services.flowpilot_engine import (
_generate_documentation,
_push_to_psa,
)
documentation = _generate_documentation(session)
psa_result = await _push_to_psa(session, user_id, self.db)
# Bell-icon AppNotification rows + external account-level channels
# (Slack/Teams webhooks, shared escalations inboxes). This is the
# `notify()` call the legacy /escalate path used to make directly,
# and it has to happen BEFORE the endpoint commits so the
# AppNotification rows land atomically with the handoff. Per-user
# emails come after commit in dispatch_escalation_notifications —
# those are pure IO with no persistent state.
try:
engineer_user = (
await self.db.execute(
select(User).where(User.id == user_id)
)
).scalar_one_or_none()
engineer_name = (
engineer_user.name
if engineer_user and engineer_user.name
else "Unknown"
)
target_user_ids = (
[session.escalated_to_id] if session.escalated_to_id else None
)
await notify(
"session.escalated",
handoff.account_id,
{
"session_id": str(handoff.session_id),
"engineer_name": engineer_name,
"escalation_reason": handoff.engineer_notes or "",
"problem_summary": session.problem_summary or "N/A",
},
self.db,
target_user_ids=target_user_ids,
)
except Exception:
logger.exception(
"notify() dispatch failed for handoff %s", handoff.id
)
return documentation, psa_result
async def _build_enhanced_escalation_package(
self,
session: AISession,
user_id: UUID,
) -> dict[str, Any]:
"""Lazy wrapper around the legacy enhanced-package builder.
The builder lives in flowpilot_engine; we only need it for the
escalate path. Failures are caught here so handoff creation never
depends on the optional Sonnet enrichment — return the minimal
shape on failure.
"""
try:
from app.services.flowpilot_engine import (
_build_escalation_package_enhanced,
)
return await _build_escalation_package_enhanced(session, user_id)
except Exception:
logger.exception(
"Enhanced escalation package build failed for session %s; "
"falling back to minimal package",
session.id,
)
return {}
async def dispatch_escalation_notifications(
self, handoff: SessionHandoff
) -> int:
"""Email engineer-or-admin users in the account about a new escalation.
Call this AFTER `db.commit()` has succeeded — sending email for a
rolled-back handoff is the kind of trust-erosion bug that makes pilot
customers stop trusting the tool. Returns the number of recipients
successfully emailed (best-effort, not authoritative).
Failures are logged but never raise: the wedge demo's reliability
story is "handoff creation always succeeds; notification is best-effort,"
not "handoff creation depends on the email service being up." This is
the graceful-degradation regression the eng + codex reviews both
flagged as critical.
Per-channel delivery records (Codex correction on the dead
`notification_sent` boolean) are a v1.x story — for now the
application logs are the audit trail.
"""
if handoff.intent != "escalate":
return 0
# Publish to the in-memory bus first so connected senior-tech inboxes
# see the new card slide in within ~1s of escalate. This path is
# fire-and-forget (no IO, just memory) so it can sit ahead of the
# email fan-out.
try:
await escalation_bus.publish(
handoff.account_id,
{
"type": "handoff_created",
"handoff_id": str(handoff.id),
"session_id": str(handoff.session_id),
"priority": handoff.priority,
"engineer_notes": handoff.engineer_notes or "",
"created_at": handoff.created_at.isoformat()
if handoff.created_at
else None,
},
)
except Exception:
logger.exception(
"EscalationBus publish failed for handoff %s", handoff.id
)
try:
recipients = (
await self.db.execute(
select(User).where(
User.account_id == handoff.account_id,
User.id != handoff.handed_off_by,
User.account_role.in_(("owner", "admin", "engineer")),
User.is_active.is_(True),
User.deleted_at.is_(None),
)
)
).scalars().all()
if not recipients:
logger.info(
"No notification recipients for handoff %s in account %s",
handoff.id,
handoff.account_id,
)
return 0
# Pull session for the email subject. Fall back to a generic title
# if the session is gone (e.g. cascade delete mid-dispatch).
session_result = await self.db.execute(
select(AISession).where(AISession.id == handoff.session_id)
)
session = session_result.scalar_one_or_none()
problem = (
session.problem_summary if session and session.problem_summary
else "an active session"
)
title = f"New escalation: {problem}"
notes = (handoff.engineer_notes or "").strip()
body = (
"A teammate has escalated a session and is asking for help.\n\n"
f"Reason: {notes if notes else 'No reason provided.'}\n"
f"Priority: {handoff.priority}"
)
link_url = (
f"{settings.FRONTEND_URL.rstrip('/')}/escalations"
if settings.FRONTEND_URL
else None
)
results = await asyncio.gather(
*[
EmailService.send_notification_email(
to_email=r.email,
title=title,
body=body,
link_url=link_url,
)
for r in recipients
],
return_exceptions=True,
)
sent = sum(1 for r in results if r is True)
logger.info(
"Escalation notifications dispatched for handoff %s: %d/%d recipients",
handoff.id,
sent,
len(recipients),
)
return sent
except Exception:
logger.exception(
"Escalation notification dispatch failed for handoff %s",
handoff.id,
)
return 0
async def _generate_snapshot(self, session: AISession) -> dict[str, Any]:
"""Generate a snapshot of the session state at handoff time."""
snapshot: dict[str, Any] = {
"problem_summary": session.problem_summary,
"problem_domain": session.problem_domain,
"status": session.status,
"step_count": session.step_count,
"confidence_tier": session.confidence_tier,
}
# Add branch map if branching is active
if session.is_branching:
branches_result = await self.db.execute(
select(SessionBranch)
.where(SessionBranch.session_id == session.id)
.order_by(SessionBranch.branch_order)
)
branches = list(branches_result.scalars().all())
branch_map = []
for b in branches:
branch_map.append({
"id": str(b.id),
"label": b.label,
"status": b.status,
"status_reason": b.status_reason,
"parent_branch_id": str(b.parent_branch_id) if b.parent_branch_id else None,
})
snapshot["branch_map"] = branch_map
snapshot["active_branch_id"] = str(session.active_branch_id) if session.active_branch_id else None
return snapshot
async def claim_session(
self,
handoff_id: UUID,
claiming_user_id: UUID,
) -> SessionHandoff:
"""Claim a handed-off session."""
result = await self.db.execute(
select(SessionHandoff).where(SessionHandoff.id == handoff_id)
)
handoff = result.scalar_one_or_none()
if not handoff:
raise ValueError(f"Handoff {handoff_id} not found")
handoff.claimed_by = claiming_user_id
handoff.claimed_at = datetime.now(timezone.utc)
# Reactivate session
session_result = await self.db.execute(
select(AISession).where(AISession.id == handoff.session_id)
)
session = session_result.scalar_one()
session.status = "active"
# Dual-write
session.escalated_to_id = claiming_user_id
await self.db.flush()
return handoff
async def _generate_ai_assessment(
self, session: AISession
) -> tuple[str | None, dict[str, Any] | None]:
"""Generate AI diagnostic assessment for escalation handoffs."""
try:
from app.services.assistant_chat_service import _call_ai
context = f"Problem: {session.problem_summary or 'Unknown'}\nDomain: {session.problem_domain or 'Unknown'}"
msgs = session.conversation_messages or []
# Include last 10 messages for context
recent = "\n".join(
f"[{m.get('role', '?')}]: {m.get('content', '')[:200]}"
for m in msgs[-10:]
)
assessment_text, _, _ = await _call_ai(
system_base="You are a diagnostic assessment generator for MSP escalations.",
rag_context="",
history=[],
new_message=(
f"Generate a brief diagnostic assessment for this escalation.\n"
f"{context}\n\nRecent conversation:\n{recent}\n\n"
f"Return: 1) Most likely cause, 2) Suggested next steps, 3) Confidence (low/medium/high)"
),
max_tokens=500,
)
assessment_data = {
"likely_cause": "See assessment text",
"suggested_steps": [],
"confidence": "medium",
}
return assessment_text, assessment_data
except Exception:
logger.exception("Failed to generate AI assessment")
return None, None
async def _generate_ai_assessment_with_timeout(
self, session: AISession
) -> tuple[str | None, dict[str, Any] | None]:
"""Generate optional escalation assessment within the click-path budget."""
timeout = settings.ESCALATION_AI_ASSESSMENT_TIMEOUT_SECONDS
try:
return await asyncio.wait_for(
self._generate_ai_assessment(session),
timeout=timeout,
)
except asyncio.TimeoutError:
logger.warning(
"Escalation AI assessment timed out after %ss for session %s",
timeout,
session.id,
)
return None, None
async def generate_briefing(
self, handoff_id: UUID, claiming_user_id: UUID
) -> str:
"""Generate a natural-language briefing for the engineer claiming the session."""
result = await self.db.execute(
select(SessionHandoff).where(SessionHandoff.id == handoff_id)
)
handoff = result.scalar_one_or_none()
if not handoff:
raise ValueError(f"Handoff {handoff_id} not found")
session_result = await self.db.execute(
select(AISession).where(AISession.id == handoff.session_id)
)
session = session_result.scalar_one()
from app.services.assistant_chat_service import _call_ai
snapshot_text = str(handoff.snapshot)[:2000]
briefing, _, _ = await _call_ai(
system_base="You are a handoff briefing generator for MSP teams.",
rag_context="",
history=[],
new_message=(
f"Generate a concise briefing for an engineer picking up this session.\n"
f"Problem: {session.problem_summary}\n"
f"Intent: {handoff.intent}\n"
f"Engineer notes: {handoff.engineer_notes or 'None'}\n"
f"Snapshot: {snapshot_text}\n"
f"AI Assessment: {handoff.ai_assessment or 'None'}"
),
max_tokens=500,
)
return briefing
async def push_to_psa(self, handoff_id: UUID) -> SessionHandoff:
"""Push handoff notes to PSA via existing psa_documentation_service."""
result = await self.db.execute(
select(SessionHandoff).where(SessionHandoff.id == handoff_id)
)
handoff = result.scalar_one_or_none()
if not handoff:
raise ValueError(f"Handoff {handoff_id} not found")
try:
from app.services.psa_documentation_service import push_session_notes
session_result = await self.db.execute(
select(AISession).where(AISession.id == handoff.session_id)
)
session = session_result.scalar_one()
if session.psa_ticket_id and session.psa_connection_id:
note_id = await push_session_notes(
session=session,
notes_content=handoff.ai_assessment or str(handoff.snapshot),
db=self.db,
)
handoff.psa_note_pushed = True
handoff.psa_note_id = note_id
except Exception:
logger.exception(f"Failed to push handoff {handoff_id} to PSA")
await self.db.flush()
return handoff
async def get_queue(
self,
team_id: UUID | None = None,
account_id: UUID | None = None,
) -> list[dict[str, Any]]:
"""Get team queue of parked + escalated sessions."""
query = (
select(SessionHandoff, AISession)
.join(AISession, SessionHandoff.session_id == AISession.id)
.where(SessionHandoff.claimed_by.is_(None))
.order_by(SessionHandoff.created_at.desc())
)
if team_id:
query = query.where(AISession.team_id == team_id)
elif account_id:
query = query.where(AISession.account_id == account_id)
result = await self.db.execute(query)
rows = result.all()
queue_items = []
for handoff, session in rows:
queue_items.append({
"handoff_id": handoff.id,
"session_id": session.id,
"intent": handoff.intent,
"problem_summary": session.problem_summary,
"problem_domain": session.problem_domain,
"priority": handoff.priority,
"engineer_notes": handoff.engineer_notes,
"created_at": handoff.created_at,
"claimed_by": handoff.claimed_by,
"claimed_at": handoff.claimed_at,
})
return queue_items