refactor: consolidate LLM JSON parsing into shared llm_utils module

Extracted duplicate _strip_markdown_fences / _parse_llm_json functions from 7 files into app/services/llm_utils.py. Two shared functions: - strip_markdown_fences(): fence stripping only - parse_llm_json(): fence stripping + JSON parse + error logging Files updated: flowpilot_engine, knowledge_flywheel, session_to_flow_service, ai_tree_generator_service, ai_fix_service, ai_chat_service, kb_conversion_service Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-21 03:25:25 +00:00
parent 0a77215fac
commit 10cf5f45eb
8 changed files with 53 additions and 72 deletions
--- a/backend/app/services/flowpilot_engine.py
+++ b/backend/app/services/flowpilot_engine.py
@@ -17,6 +17,7 @@ from sqlalchemy.orm import selectinload

 from app.core.ai_provider import get_ai_provider
 from app.core.config import settings
+from app.services.llm_utils import parse_llm_json
 from app.services.notification_service import notify
 from app.models.ai_session import AISession
 from app.models.ai_session_step import AISessionStep
@@ -108,22 +109,10 @@ def _confidence_to_tier(confidence: float) -> str:
 def _parse_structured_output(raw_text: str) -> dict[str, Any]:
    """Parse and validate structured JSON from LLM response.

-    Handles common LLM quirks: markdown fences, trailing commas, etc.
+    Uses shared parse_llm_json for fence stripping and JSON parsing,
+    then validates FlowPilot-specific output shape.
    """
-    text = raw_text.strip()
-
-    # Strip markdown code fences if present
-    if text.startswith("```"):
-        lines = text.split("\n")
-        # Remove first line (```json or ```) and last line (```)
-        lines = [l for l in lines if not l.strip().startswith("```")]
-        text = "\n".join(lines).strip()
-
-    try:
-        data = json.loads(text)
-    except json.JSONDecodeError as e:
-        logger.warning("Failed to parse LLM JSON output: %s — raw: %.200s", e, text)
-        raise ValueError(f"Invalid JSON from LLM: {e}") from e
+    data = parse_llm_json(raw_text)

    if not isinstance(data, dict) or "type" not in data:
        raise ValueError("LLM response missing required 'type' field")