From e1112a9a36f63dc73875662decadb368d476cb9e Mon Sep 17 00:00:00 2001 From: Michael Chihlas Date: Fri, 29 May 2026 16:40:38 -0400 Subject: [PATCH] feat(l1): match_or_build orchestrator + classify (match-first, gate-on-build) Co-Authored-By: Claude Opus 4.7 --- backend/app/services/match_or_build.py | 78 ++++++++++++++++++++ backend/tests/test_match_or_build.py | 98 ++++++++++++++++++++++++++ 2 files changed, 176 insertions(+) create mode 100644 backend/app/services/match_or_build.py create mode 100644 backend/tests/test_match_or_build.py diff --git a/backend/app/services/match_or_build.py b/backend/app/services/match_or_build.py new file mode 100644 index 00000000..6f09b18f --- /dev/null +++ b/backend/app/services/match_or_build.py @@ -0,0 +1,78 @@ +"""Intake orchestrator: match published flows first, gate generic build behind +the account's enabled categories (spec §3). Match runs BEFORE the category gate +so an authored flow is never blocked by category settings (Finding 4).""" +import logging +import re +from typing import Any, Optional +from uuid import UUID + +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.ai_provider import get_ai_provider +from app.core.config import settings +from app.services import flow_matching_engine +from app.services.l1_category_service import ( + DEFAULT_L1_CATEGORIES, get_enabled_categories, is_category_enabled, +) +from app.services.llm_utils import parse_llm_json + +logger = logging.getLogger(__name__) + +MATCH_THRESHOLD = 0.75 # spec §5.3 +SUGGEST_THRESHOLD = 0.60 # spec §5.3 + +_CLASSIFY_PROMPT = ( + "Classify the IT support problem into exactly one of these category keys, " + "or 'unknown'. Return JSON {\"category\":\"\"} only.\nKEYS: " + + ", ".join(DEFAULT_L1_CATEGORIES) +) + + +async def classify(problem_text: str) -> str: + """Map a problem to a category key via a short model call; keyword fallback.""" + try: + provider = get_ai_provider(settings.get_model_for_action("l1_classify")) + raw, _, _ = await provider.generate_json( + system_prompt=_CLASSIFY_PROMPT, + messages=[{"role": "user", "content": problem_text}], + max_tokens=64, + ) + cat = parse_llm_json(raw).get("category", "unknown") + return cat if cat in DEFAULT_L1_CATEGORIES else "unknown" + except Exception as e: # noqa: BLE001 — fall back, never hard-fail intake + logger.warning("classify model call failed (%s); keyword fallback", e) + text = problem_text.lower() + for cat in DEFAULT_L1_CATEGORIES: + if any(re.search(rf"\b{re.escape(tok)}\b", text) for tok in cat.split("_")): + return cat + return "unknown" + + +async def match_or_build( + account_id: UUID, + problem_text: str, + problem_domain: Optional[str], + ticket_ref: str, # passed through for caller/session use; not consumed here (Task 10) + *, + db: AsyncSession, + force_build: bool = False, +) -> dict[str, Any]: + if not force_build: + hits = await flow_matching_engine.find_matches( + problem_text, problem_domain, account_id, db) + best = max(hits, key=lambda h: h["score"], default=None) if hits else None + # find_matches returns tree_id as a UUID object; normalize the public + # contract to str so callers can re-parse with UUID(...) without TypeError. + if best and best["score"] >= MATCH_THRESHOLD: + return {"outcome": "matched", "flow_id": str(best["tree_id"]), "session_kind": "flow"} + if best and best["score"] >= SUGGEST_THRESHOLD: + return {"outcome": "suggest", + "near_miss": {"flow_id": str(best["tree_id"]), "flow_name": best["tree_name"], + "score": best["score"]}, + "can_build": True} + + category = await classify(problem_text) + enabled = await get_enabled_categories(account_id, db) + if not is_category_enabled(category, enabled): + return {"outcome": "out_of_scope", "category": category} + return {"outcome": "build", "session_kind": "ai_build", "category": category} diff --git a/backend/tests/test_match_or_build.py b/backend/tests/test_match_or_build.py new file mode 100644 index 00000000..831a099b --- /dev/null +++ b/backend/tests/test_match_or_build.py @@ -0,0 +1,98 @@ +import uuid +import pytest +from unittest.mock import AsyncMock, patch +from app.services import match_or_build as mob + + +@pytest.mark.asyncio +async def test_match_wins_before_category_gate(): + """A strong published-flow match returns 'matched' even if category disabled.""" + with patch.object(mob.flow_matching_engine, "find_matches", new=AsyncMock( + return_value=[{"tree_id": str(uuid.uuid4()), "tree_name": "VPN", "score": 0.9}])), \ + patch.object(mob, "get_enabled_categories", new=AsyncMock(return_value=[])): + res = await mob.match_or_build(uuid.uuid4(), "vpn down", None, "t1", db=AsyncMock(), force_build=False) + assert res["outcome"] == "matched" + assert res["session_kind"] == "flow" + + +@pytest.mark.asyncio +async def test_suggest_band(): + with patch.object(mob.flow_matching_engine, "find_matches", new=AsyncMock( + return_value=[{"tree_id": str(uuid.uuid4()), "tree_name": "X", "score": 0.66}])): + res = await mob.match_or_build(uuid.uuid4(), "p", None, "t1", db=AsyncMock(), force_build=False) + assert res["outcome"] == "suggest" + assert res["near_miss"]["flow_name"] == "X" + assert "flow_id" in res["near_miss"] and isinstance(res["near_miss"]["flow_id"], str) + assert res["near_miss"]["score"] == 0.66 + assert res["can_build"] is True + + +@pytest.mark.asyncio +async def test_out_of_scope_when_category_disabled_on_build_path(): + with patch.object(mob.flow_matching_engine, "find_matches", new=AsyncMock(return_value=[])), \ + patch.object(mob, "classify", new=AsyncMock(return_value="printer")), \ + patch.object(mob, "get_enabled_categories", new=AsyncMock(return_value=["vpn_connect"])): + res = await mob.match_or_build(uuid.uuid4(), "printer jam", None, "t1", db=AsyncMock(), force_build=False) + assert res["outcome"] == "out_of_scope" + + +@pytest.mark.asyncio +async def test_build_when_enabled_and_no_match(): + with patch.object(mob.flow_matching_engine, "find_matches", new=AsyncMock(return_value=[])), \ + patch.object(mob, "classify", new=AsyncMock(return_value="printer")), \ + patch.object(mob, "get_enabled_categories", new=AsyncMock(return_value=["printer"])): + res = await mob.match_or_build(uuid.uuid4(), "printer jam", None, "t1", db=AsyncMock(), force_build=False) + assert res["outcome"] == "build" + assert res["session_kind"] == "ai_build" + + +@pytest.mark.asyncio +async def test_force_build_skips_match_but_still_gates(): + fm = AsyncMock(return_value=[{"tree_id": str(uuid.uuid4()), "tree_name": "X", "score": 0.99}]) + with patch.object(mob.flow_matching_engine, "find_matches", new=fm), \ + patch.object(mob, "classify", new=AsyncMock(return_value="printer")), \ + patch.object(mob, "get_enabled_categories", new=AsyncMock(return_value=["printer"])): + res = await mob.match_or_build(uuid.uuid4(), "p", None, "t1", db=AsyncMock(), force_build=True) + fm.assert_not_called() + assert res["outcome"] == "build" + + +@pytest.mark.asyncio +async def test_score_exactly_match_threshold_is_matched(): + with patch.object(mob.flow_matching_engine, "find_matches", new=AsyncMock( + return_value=[{"tree_id": str(uuid.uuid4()), "tree_name": "X", "score": 0.75}])): + res = await mob.match_or_build(uuid.uuid4(), "p", None, "t1", db=AsyncMock(), force_build=False) + assert res["outcome"] == "matched" + + +@pytest.mark.asyncio +async def test_score_exactly_suggest_threshold_is_suggest(): + with patch.object(mob.flow_matching_engine, "find_matches", new=AsyncMock( + return_value=[{"tree_id": str(uuid.uuid4()), "tree_name": "X", "score": 0.60}])): + res = await mob.match_or_build(uuid.uuid4(), "p", None, "t1", db=AsyncMock(), force_build=False) + assert res["outcome"] == "suggest" + + +@pytest.mark.asyncio +async def test_score_below_suggest_falls_through_to_build_path(): + with patch.object(mob.flow_matching_engine, "find_matches", new=AsyncMock( + return_value=[{"tree_id": str(uuid.uuid4()), "tree_name": "X", "score": 0.4}])), \ + patch.object(mob, "classify", new=AsyncMock(return_value="printer")), \ + patch.object(mob, "get_enabled_categories", new=AsyncMock(return_value=["printer"])): + res = await mob.match_or_build(uuid.uuid4(), "printer", None, "t1", db=AsyncMock(), force_build=False) + assert res["outcome"] == "build" + + +@pytest.mark.asyncio +async def test_classify_keyword_fallback_matches_word(): + with patch.object(mob, "get_ai_provider", side_effect=RuntimeError("model down")): + cat = await mob.classify("the printer is jammed") + assert cat == "printer" + + +@pytest.mark.asyncio +async def test_classify_keyword_fallback_no_substring_false_match(): + # "have" must NOT match teams_zoom_av via the 'av' token; no real category word present + with patch.object(mob, "get_ai_provider", side_effect=RuntimeError("model down")): + cat = await mob.classify("i have a general question") + assert cat == "unknown"