From 94fbb38f84306c863d725d1d20302436c7c99e95 Mon Sep 17 00:00:00 2001 From: chihlasm Date: Fri, 20 Mar 2026 00:36:30 +0000 Subject: [PATCH] =?UTF-8?q?fix(analytics):=20fix=206=20backend=20audit=20i?= =?UTF-8?q?ssues=20=E2=80=94=20domain=20matching,=20funnel=20counts,=20dec?= =?UTF-8?q?imal=20casts,=20dead=20queries?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Issue 1: Normalize domain lookup to lowercase on both sides (flow category names and session problem_domain) to fix case-sensitive mismatch in coverage endpoint - Issue 2: Count distinct ai_session_id (not PsaPostLog.id) in doc_pushed funnel step to avoid inflating counts on retried sessions - Issue 3: Clamp recency_score to [0.0, 1.0] with min/max to handle negative days_since from future timestamps (clock skew/test data) - Issue 4: Wrap func.sum(case(...)) results with int() in dashboard endpoint to handle Decimal returns from asyncpg - Issue 5: Remove dead domain_flow_counts_result query (result fetched but never consumed) to eliminate unnecessary DB round-trip - Issue 6: Replace select(Tree) with select(Tree.id, Tree.name, Tree.tree_type) in flow-quality endpoint to avoid loading large tree_structure JSONB column Co-Authored-By: Claude Opus 4.6 (1M context) --- .../app/api/endpoints/flowpilot_analytics.py | 46 ++++++++----------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/backend/app/api/endpoints/flowpilot_analytics.py b/backend/app/api/endpoints/flowpilot_analytics.py index 75c91378..66a322bb 100644 --- a/backend/app/api/endpoints/flowpilot_analytics.py +++ b/backend/app/api/endpoints/flowpilot_analytics.py @@ -81,10 +81,10 @@ async def get_dashboard( ) ) row = counts_result.one() - total = row.total or 0 - resolved = row.resolved or 0 - escalated = row.escalated or 0 - abandoned = row.abandoned or 0 + total = int(row.total or 0) + resolved = int(row.resolved or 0) + escalated = int(row.escalated or 0) + abandoned = int(row.abandoned or 0) avg_steps = float(row.avg_steps or 0) avg_rating = float(row.avg_rating) if row.avg_rating else None resolution_rate = (resolved / total * 100) if total > 0 else 0.0 @@ -168,10 +168,10 @@ async def get_dashboard( sessions_by_domain = [ DomainBreakdown( domain=r.problem_domain or "unknown", - total=r.total, - resolved=r.resolved or 0, - escalated=r.escalated or 0, - resolution_rate=round((r.resolved or 0) / r.total * 100, 1) if r.total > 0 else 0.0, + total=int(r.total or 0), + resolved=int(r.resolved or 0), + escalated=int(r.escalated or 0), + resolution_rate=round(int(r.resolved or 0) / int(r.total) * 100, 1) if r.total else 0.0, ) for r in domain_result.all() ] @@ -190,7 +190,7 @@ async def get_dashboard( ) .group_by(AISession.confidence_tier) ) - conf_data = {r.confidence_tier: (r.total or 0, r.resolved or 0) for r in confidence_result.all()} + conf_data = {r.confidence_tier: (int(r.total or 0), int(r.resolved or 0)) for r in confidence_result.all()} guided_total, guided_resolved = conf_data.get("guided", (0, 0)) exploring_total, exploring_resolved = conf_data.get("exploring", (0, 0)) @@ -259,14 +259,6 @@ async def get_dashboard( ) .group_by(AISession.problem_domain) ) - domain_flow_counts_result = await db.execute( - select( - Tree.tree_type, # Reuse as domain proxy — not ideal but workable - func.count(Tree.id), - ) - .where(Tree.account_id == account_id) - .group_by(Tree.tree_type) - ) # For now, flow_count per domain isn't directly available since Tree doesn't have problem_domain. # Use match_keywords or just report 0. We'll improve this in Phase 4 with better flow categorization. domain_cov_data = {} @@ -422,10 +414,10 @@ async def get_coverage_heatmap( unmapped_session_count = int(unmapped_result.scalar() or 0) # ── Flow counts per domain: match Category.name to problem_domain ── - # Joins Tree → TreeCategory and groups by category name + # Joins Tree → TreeCategory and groups by lowercased category name for case-insensitive matching flow_counts_result = await db.execute( select( - TreeCategory.name.label("domain"), + func.lower(TreeCategory.name).label("domain"), func.count(Tree.id).label("flow_count"), ) .join(Tree, Tree.category_id == TreeCategory.id) @@ -434,7 +426,7 @@ async def get_coverage_heatmap( Tree.is_active.is_(True), Tree.deleted_at.is_(None), ) - .group_by(TreeCategory.name) + .group_by(func.lower(TreeCategory.name)) ) flow_counts_by_domain: dict[str, int] = { r.domain: int(r.flow_count) for r in flow_counts_result.all() @@ -452,7 +444,7 @@ async def get_coverage_heatmap( domains.append( CoverageDomainRow( domain=domain_name, - flow_count=flow_counts_by_domain.get(domain_name, 0), + flow_count=flow_counts_by_domain.get(domain_name.lower(), 0), session_count=sc, resolution_rate=round(resolved / sc, 4) if sc > 0 else 0.0, escalation_rate=round(escalated / sc, 4) if sc > 0 else 0.0, @@ -486,15 +478,15 @@ async def get_flow_quality( period_start = _get_period_start(period) now = datetime.now(timezone.utc) - # ── Get all active flows ── + # ── Get all active flows (only needed columns — avoids loading large tree_structure JSONB) ── flows_result = await db.execute( - select(Tree).where( + select(Tree.id, Tree.name, Tree.tree_type).where( Tree.account_id == account_id, Tree.is_active.is_(True), Tree.deleted_at.is_(None), ) ) - flows = flows_result.scalars().all() + flows = flows_result.all() if not flows: return FlowQualityResponse(flows=[], top_performers=[], needs_attention=[]) @@ -557,7 +549,7 @@ async def get_flow_quality( if last_ever is not None: last_ever_aware = last_ever.replace(tzinfo=timezone.utc) if last_ever.tzinfo is None else last_ever days_since = (now - last_ever_aware).total_seconds() / 86400 - recency_score = max(0.0, 1.0 - days_since / 90.0) + recency_score = max(0.0, min(1.0, 1.0 - days_since / 90.0)) else: recency_score = 0.0 @@ -673,9 +665,9 @@ async def get_enhanced_psa_metrics( ) linked_to_ticket = int(linked_result.scalar() or 0) - # Sessions with a successful doc push (via PsaPostLog) + # Sessions with a successful doc push (via PsaPostLog) — count unique sessions, not log entries doc_pushed_result = await db.execute( - select(func.count(PsaPostLog.id.distinct())) + select(func.count(PsaPostLog.ai_session_id.distinct())) .join(AISession, PsaPostLog.ai_session_id == AISession.id) .where( AISession.account_id == account_id,