Files
resolutionflow/backend/app/api/endpoints/flowpilot_analytics.py
chihlasm 9bad49d568 feat(knowledge-flywheel): add Phase 3 Knowledge Flywheel — AI analysis, review queue, analytics
Phase 3 implementation:
- AI session analysis service that generates flow proposals from resolved sessions
- APScheduler job for batch processing pending analyses (max_instances=1)
- Knowledge gap detection (weak options, high escalation signals)
- Flow proposals CRUD with team admin review workflow (approve/edit/dismiss/reject)
- FlowPilot analytics dashboard with confidence tiers, PSA metrics, knowledge gaps
- In-session script generator component
- Review queue page with filtering and proposal detail panel

Bug fixes from review (12 total):
- Fix "Edit & Publish" navigating to non-existent /editor/new route
- Hide Approve button for enhancement proposals (require Edit & Publish)
- Add max_instances=1 to scheduler to prevent TOCTOU race
- Fix eventual_success case() double-counting failed retries
- Add tree_structure validation before creating tree from proposal
- Simplify script generator rendering condition
- Add severity style fallback, toFixed on rates, Link instead of <a href>
- Add toast.warning on dismiss failure, fix dedup for domain-less sessions
- Cast Decimal to int in knowledge gap evidence dicts

Also updates CLAUDE.md with lessons 67-71 and Phase 3 project structure.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 05:12:10 +00:00

359 lines
14 KiB
Python

"""FlowPilot Analytics API — MTTR, resolution rates, knowledge coverage.
Endpoints:
GET /analytics/flowpilot?period=30d — Main dashboard data
GET /analytics/flowpilot/knowledge-gaps — Knowledge gap report
"""
import logging
from datetime import datetime, timezone, timedelta
from typing import Annotated, Optional
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
from sqlalchemy import select, func, case, cast, Date, extract
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.rate_limit import limiter
from app.api.deps import get_current_active_user, get_db, require_team_admin
from app.models.user import User
from app.models.tree import Tree
from app.models.ai_session import AISession
from app.models.flow_proposal import FlowProposal
from app.models.psa_post_log import PsaPostLog
from app.schemas.flowpilot_analytics import (
FlowPilotDashboard,
MTTRDataPoint,
DomainBreakdown,
ConfidenceBreakdown,
KnowledgeCoverage,
DomainCoverage,
PsaMetrics,
)
from app.services.knowledge_gap_service import get_knowledge_gaps, KnowledgeGapReport
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/analytics/flowpilot", tags=["flowpilot-analytics"])
def _get_period_start(period: str) -> datetime:
days = {"7d": 7, "30d": 30, "90d": 90}.get(period, 30)
return datetime.now(timezone.utc) - timedelta(days=days)
@router.get("", response_model=FlowPilotDashboard)
@limiter.limit("15/minute")
async def get_dashboard(
request: Request,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
_: None = Depends(require_team_admin),
period: str = Query("30d", pattern="^(7d|30d|90d)$"),
):
"""Get FlowPilot analytics dashboard data."""
if not current_user.account_id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="No account")
account_id = current_user.account_id
period_start = _get_period_start(period)
# ── Session counts ──
counts_result = await db.execute(
select(
func.count(AISession.id).label("total"),
func.sum(case((AISession.status == "resolved", 1), else_=0)).label("resolved"),
func.sum(case((AISession.status.in_(["escalated", "requesting_escalation"]), 1), else_=0)).label("escalated"),
func.sum(case((AISession.status == "abandoned", 1), else_=0)).label("abandoned"),
func.avg(case((AISession.status == "resolved", AISession.step_count), else_=None)).label("avg_steps"),
func.avg(AISession.session_rating).label("avg_rating"),
)
.where(
AISession.account_id == account_id,
AISession.created_at >= period_start,
)
)
row = counts_result.one()
total = row.total or 0
resolved = row.resolved or 0
escalated = row.escalated or 0
abandoned = row.abandoned or 0
avg_steps = float(row.avg_steps or 0)
avg_rating = float(row.avg_rating) if row.avg_rating else None
resolution_rate = (resolved / total * 100) if total > 0 else 0.0
# ── MTTR ──
mttr_result = await db.execute(
select(
func.avg(
extract("epoch", AISession.resolved_at - AISession.created_at) / 60
).label("avg_mttr"),
)
.where(
AISession.account_id == account_id,
AISession.created_at >= period_start,
AISession.status == "resolved",
AISession.resolved_at.isnot(None),
)
)
mttr_row = mttr_result.one()
mttr_minutes = float(mttr_row.avg_mttr) if mttr_row.avg_mttr else None
# ── Average duration ──
duration_result = await db.execute(
select(
func.avg(
extract("epoch", AISession.resolved_at - AISession.created_at) / 60
).label("avg_duration"),
)
.where(
AISession.account_id == account_id,
AISession.created_at >= period_start,
AISession.resolved_at.isnot(None),
)
)
dur_row = duration_result.one()
avg_duration = float(dur_row.avg_duration) if dur_row.avg_duration else 0.0
# ── MTTR trend ──
mttr_trend_result = await db.execute(
select(
cast(AISession.resolved_at, Date).label("day"),
func.avg(
extract("epoch", AISession.resolved_at - AISession.created_at) / 60
).label("mttr"),
func.count(AISession.id).label("count"),
)
.where(
AISession.account_id == account_id,
AISession.created_at >= period_start,
AISession.status == "resolved",
AISession.resolved_at.isnot(None),
)
.group_by(cast(AISession.resolved_at, Date))
.order_by(cast(AISession.resolved_at, Date))
)
mttr_trend = [
MTTRDataPoint(
date=str(r.day),
mttr_minutes=round(float(r.mttr or 0), 1),
session_count=r.count,
)
for r in mttr_trend_result.all()
]
# ── Domain breakdown ──
domain_result = await db.execute(
select(
AISession.problem_domain,
func.count(AISession.id).label("total"),
func.sum(case((AISession.status == "resolved", 1), else_=0)).label("resolved"),
func.sum(case((AISession.status.in_(["escalated", "requesting_escalation"]), 1), else_=0)).label("escalated"),
)
.where(
AISession.account_id == account_id,
AISession.created_at >= period_start,
AISession.problem_domain.isnot(None),
)
.group_by(AISession.problem_domain)
.order_by(func.count(AISession.id).desc())
)
sessions_by_domain = [
DomainBreakdown(
domain=r.problem_domain or "unknown",
total=r.total,
resolved=r.resolved or 0,
escalated=r.escalated or 0,
resolution_rate=round((r.resolved or 0) / r.total * 100, 1) if r.total > 0 else 0.0,
)
for r in domain_result.all()
]
# ── Confidence breakdown ──
confidence_result = await db.execute(
select(
AISession.confidence_tier,
func.count(AISession.id).label("total"),
func.sum(case((AISession.status == "resolved", 1), else_=0)).label("resolved"),
)
.where(
AISession.account_id == account_id,
AISession.created_at >= period_start,
AISession.status.in_(["resolved", "escalated", "requesting_escalation"]),
)
.group_by(AISession.confidence_tier)
)
conf_data = {r.confidence_tier: (r.total or 0, r.resolved or 0) for r in confidence_result.all()}
guided_total, guided_resolved = conf_data.get("guided", (0, 0))
exploring_total, exploring_resolved = conf_data.get("exploring", (0, 0))
discovery_total, discovery_resolved = conf_data.get("discovery", (0, 0))
confidence_breakdown = ConfidenceBreakdown(
guided_sessions=guided_total,
guided_resolution_rate=round(guided_resolved / guided_total * 100, 1) if guided_total > 0 else 0.0,
exploring_sessions=exploring_total,
exploring_resolution_rate=round(exploring_resolved / exploring_total * 100, 1) if exploring_total > 0 else 0.0,
discovery_sessions=discovery_total,
discovery_resolution_rate=round(discovery_resolved / discovery_total * 100, 1) if discovery_total > 0 else 0.0,
)
# ── Knowledge coverage ──
total_flows_result = await db.execute(
select(func.count(Tree.id)).where(Tree.account_id == account_id)
)
total_flows = total_flows_result.scalar() or 0
ai_flows_result = await db.execute(
select(func.count(Tree.id)).where(
Tree.account_id == account_id,
Tree.origin.in_(["ai_generated", "ai_enhanced"]),
)
)
ai_generated_flows = ai_flows_result.scalar() or 0
pending_proposals_result = await db.execute(
select(func.count(FlowProposal.id)).where(
FlowProposal.account_id == account_id,
FlowProposal.status == "pending",
)
)
total_proposals_pending = pending_proposals_result.scalar() or 0
approved_result = await db.execute(
select(func.count(FlowProposal.id)).where(
FlowProposal.account_id == account_id,
FlowProposal.reviewed_at >= period_start,
FlowProposal.status.in_(["approved", "modified"]),
)
)
proposals_approved = approved_result.scalar() or 0
rejected_result = await db.execute(
select(func.count(FlowProposal.id)).where(
FlowProposal.account_id == account_id,
FlowProposal.reviewed_at >= period_start,
FlowProposal.status == "rejected",
)
)
proposals_rejected = rejected_result.scalar() or 0
# Domain coverage
domain_coverage_result = await db.execute(
select(
AISession.problem_domain,
func.count(AISession.id).label("session_count"),
func.sum(case((AISession.confidence_tier == "guided", 1), else_=0)).label("guided_count"),
)
.where(
AISession.account_id == account_id,
AISession.created_at >= period_start,
AISession.problem_domain.isnot(None),
)
.group_by(AISession.problem_domain)
)
domain_flow_counts_result = await db.execute(
select(
Tree.tree_type, # Reuse as domain proxy — not ideal but workable
func.count(Tree.id),
)
.where(Tree.account_id == account_id)
.group_by(Tree.tree_type)
)
# For now, flow_count per domain isn't directly available since Tree doesn't have problem_domain.
# Use match_keywords or just report 0. We'll improve this in Phase 4 with better flow categorization.
domain_cov_data = {}
for r in domain_coverage_result.all():
domain = r.problem_domain or "unknown"
sc = r.session_count or 0
gc = r.guided_count or 0
domain_cov_data[domain] = DomainCoverage(
domain=domain,
flow_count=0, # TODO: match via category/tags in Phase 4
session_count=sc,
guided_rate=round(gc / sc * 100, 1) if sc > 0 else 0.0,
)
knowledge_coverage = KnowledgeCoverage(
total_flows=total_flows,
ai_generated_flows=ai_generated_flows,
total_proposals_pending=total_proposals_pending,
proposals_approved_this_period=proposals_approved,
proposals_rejected_this_period=proposals_rejected,
coverage_by_domain=list(domain_cov_data.values()),
)
# ── PSA metrics ──
psa_metrics = None
psa_linked = await db.execute(
select(func.count(AISession.id)).where(
AISession.account_id == account_id,
AISession.created_at >= period_start,
AISession.psa_ticket_id.isnot(None),
)
)
psa_linked_count = psa_linked.scalar() or 0
if psa_linked_count > 0 and total > 0:
psa_push_result = await db.execute(
select(
func.count(PsaPostLog.id).label("total_pushes"),
func.sum(case((PsaPostLog.status == "success", 1), else_=0)).label("first_success"),
func.sum(case(
((PsaPostLog.status == "success") & (PsaPostLog.retry_count > 0), 1),
else_=0
)).label("retry_success"),
)
.join(AISession, PsaPostLog.ai_session_id == AISession.id)
.where(
AISession.account_id == account_id,
PsaPostLog.ai_session_id.isnot(None),
PsaPostLog.posted_at >= period_start,
)
)
push_row = psa_push_result.one()
total_pushes = push_row.total_pushes or 0
first_success = push_row.first_success or 0
retry_success = push_row.retry_success or 0
psa_metrics = PsaMetrics(
ticket_link_rate=round(psa_linked_count / total * 100, 1),
auto_push_success_rate=round(first_success / total_pushes * 100, 1) if total_pushes > 0 else 0.0,
auto_push_retry_success_rate=round(retry_success / total_pushes * 100, 1) if total_pushes > 0 else 0.0,
total_time_entries_logged=0, # TODO: track from CW time entries
total_hours_logged=0.0,
)
return FlowPilotDashboard(
period=period,
total_sessions=total,
resolved_sessions=resolved,
escalated_sessions=escalated,
abandoned_sessions=abandoned,
resolution_rate=round(resolution_rate, 1),
avg_steps_to_resolution=round(avg_steps, 1),
avg_session_duration_minutes=round(avg_duration, 1),
avg_rating=round(avg_rating, 2) if avg_rating else None,
mttr_minutes=round(mttr_minutes, 1) if mttr_minutes else None,
mttr_trend=mttr_trend,
sessions_by_domain=sessions_by_domain,
confidence_breakdown=confidence_breakdown,
knowledge_coverage=knowledge_coverage,
psa_metrics=psa_metrics,
)
@router.get("/knowledge-gaps", response_model=KnowledgeGapReport)
@limiter.limit("10/minute")
async def get_knowledge_gaps_endpoint(
request: Request,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
_: None = Depends(require_team_admin),
period: str = Query("30d", pattern="^(7d|30d|90d)$"),
):
"""Get knowledge gap analysis report."""
if not current_user.account_id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="No account")
days = {"7d": 7, "30d": 30, "90d": 90}.get(period, 30)
return await get_knowledge_gaps(current_user.account_id, db, period_days=days)