feat(analytics): add escalation time-to-first-action metric endpoint

GET /api/v1/analytics/flowpilot/escalations?period={7d,30d,90d} Computes the in-product wedge metric for Escalation Mode: average / median / p95 seconds between SessionHandoff.claimed_at and the first ai_session_step created on the same session after that timestamp. Account-scoped, role-gated to engineer-or-admin. The metric is intentionally NOT called "minutes recovered" — that's the two-metric framing locked by /codex review: this in-product number must be paired with manual baseline (the verbal-handoff stopwatch from The Assignment) to produce the savings claim. Schema's `metric_definition` field surfaces the disclaimer in every response so callers don't oversell it. Implementation notes: - Uses correlated scalar subquery for first-step-after-claim per handoff, aggregates avg/median/p95 in Python (~1k rows/account/month is well within budget; cleaner than percentile_cont gymnastics in SQL) - Excludes unclaimed handoffs (claimed_at IS NULL) - Counts claimed-but-no-action handoffs in n_handoffs_claimed but not in n_handoffs_with_action — surfaces the conversion-rate signal - Floors negative deltas at 0 to handle clock-drift edge cases Tests cover happy path, zero-data, claimed-but-no-action accounting, period window filtering, multi-handoff aggregation, multi-tenant isolation (Phase 4 RLS landmine pattern), viewer-role 403 gate, and period validation. 9 tests, all green. No regressions in existing handoff_manager / session_handoffs suites. First piece of the Approach A wedge build per docs/plans/2026-04-27-escalation-mode-wedge-design.md. Unblocks the queue stat-card and the analytics page. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-27 15:25:46 -04:00
parent d51e95cdfa
commit 52f6d0308f
3 changed files with 498 additions and 1 deletions
--- a/backend/app/api/endpoints/flowpilot_analytics.py
+++ b/backend/app/api/endpoints/flowpilot_analytics.py
@@ -3,8 +3,10 @@
 Endpoints:
  GET /analytics/flowpilot?period=30d — Main dashboard data
  GET /analytics/flowpilot/knowledge-gaps — Knowledge gap report
+  GET /analytics/flowpilot/escalations?period=30d — Escalation handoff metrics
 """
 import logging
+import statistics
 from datetime import datetime, timezone, timedelta
 from typing import Annotated, Optional

@@ -13,10 +15,17 @@ from sqlalchemy import select, func, case, cast, Date, extract
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.core.rate_limit import limiter
-from app.api.deps import get_current_active_user, get_db, require_team_admin
+from app.api.deps import (
+    get_current_active_user,
+    get_db,
+    require_engineer_or_admin,
+    require_team_admin,
+)
 from app.models.user import User
 from app.models.tree import Tree
 from app.models.ai_session import AISession
+from app.models.ai_session_step import AISessionStep
+from app.models.session_handoff import SessionHandoff
 from app.models.flow_proposal import FlowProposal
 from app.models.psa_activity_log import PsaActivityLog
 from app.models.psa_post_log import PsaPostLog
@@ -36,6 +45,7 @@ from app.schemas.flowpilot_analytics import (
    EnhancedPsaMetrics,
    PsaFunnel,
    PsaDailyTrend,
+    EscalationMetrics,
 )
 from app.services.knowledge_gap_service import get_knowledge_gaps, KnowledgeGapReport

@@ -727,3 +737,104 @@ async def get_enhanced_psa_metrics(
        push_funnel=push_funnel,
        daily_trend=daily_trend,
    )
+
+
+# ─── Escalation Mode metrics (wedge stat for /escalations queue + analytics page)
+#
+# Pulls all (handoff.claimed_at, first_step_after_claim.created_at) pairs in the
+# window and aggregates avg/median/p95 of the delta in Python. Pilot scale
+# (~1k rows max per account per month) makes this cheaper and clearer than
+# Postgres percentile_cont gymnastics.
+#
+# IMPORTANT: this is the in-product metric only. The "minutes recovered"
+# sales claim requires manual baseline measurement (see The Assignment in
+# docs/plans/2026-04-27-escalation-mode-wedge-design.md).
+
+
+@router.get("/escalations", response_model=EscalationMetrics)
+@limiter.limit("30/minute")
+async def get_escalation_metrics(
+    request: Request,
+    current_user: Annotated[User, Depends(get_current_active_user)],
+    db: Annotated[AsyncSession, Depends(get_db)],
+    _: None = Depends(require_engineer_or_admin),
+    period: str = Query("30d", pattern="^(7d|30d|90d)$"),
+) -> EscalationMetrics:
+    """Time-to-first-action after escalation claim, account-scoped.
+
+    Returns:
+      n_handoffs_claimed: handoffs in window that were claimed by a senior.
+      n_handoffs_with_action: subset where the senior took at least one
+        action (an ai_session_step row created after claimed_at).
+      avg/median/p95_seconds_to_first_action: aggregates of
+        (first_step.created_at - claimed_at) in seconds.
+
+    Excludes handoffs where claimed_at IS NULL (never claimed) and handoffs
+    where no ai_session_step was created after the claim. Both are
+    counted — n_handoffs_claimed includes "no action yet" handoffs so the
+    conversion rate is visible.
+    """
+    if not current_user.account_id:
+        raise HTTPException(
+            status_code=status.HTTP_403_FORBIDDEN, detail="No account"
+        )
+
+    account_id = current_user.account_id
+    period_start = _get_period_start(period)
+
+    # First-action timestamp per handoff via correlated scalar subquery.
+    first_action_subq = (
+        select(func.min(AISessionStep.created_at))
+        .where(
+            AISessionStep.session_id == SessionHandoff.session_id,
+            AISessionStep.created_at > SessionHandoff.claimed_at,
+        )
+        .correlate(SessionHandoff)
+        .scalar_subquery()
+    )
+
+    rows = (
+        await db.execute(
+            select(
+                SessionHandoff.claimed_at,
+                first_action_subq.label("first_action_at"),
+            ).where(
+                SessionHandoff.account_id == account_id,
+                SessionHandoff.claimed_at.isnot(None),
+                SessionHandoff.claimed_at >= period_start,
+            )
+        )
+    ).all()
+
+    n_handoffs_claimed = len(rows)
+    deltas: list[float] = []
+    for claimed_at, first_action_at in rows:
+        if first_action_at is None:
+            continue
+        delta_s = (first_action_at - claimed_at).total_seconds()
+        # Floor at zero — clock drift between rows could in theory yield a
+        # tiny negative if a step's created_at races claimed_at. Surface as
+        # 0s rather than absurd negative deltas.
+        if delta_s < 0:
+            delta_s = 0.0
+        deltas.append(delta_s)
+
+    n_handoffs_with_action = len(deltas)
+    if n_handoffs_with_action == 0:
+        return EscalationMetrics(
+            period=period,
+            n_handoffs_claimed=n_handoffs_claimed,
+            n_handoffs_with_action=0,
+        )
+
+    sorted_deltas = sorted(deltas)
+    p95_idx = max(0, int(round(0.95 * (n_handoffs_with_action - 1))))
+
+    return EscalationMetrics(
+        period=period,
+        n_handoffs_claimed=n_handoffs_claimed,
+        n_handoffs_with_action=n_handoffs_with_action,
+        avg_seconds_to_first_action=round(statistics.fmean(deltas), 2),
+        median_seconds_to_first_action=round(statistics.median(deltas), 2),
+        p95_seconds_to_first_action=round(sorted_deltas[p95_idx], 2),
+    )