From 10cf5f45ebe757ca1d69a63efaf39d3d5156f3d0 Mon Sep 17 00:00:00 2001
From: chihlasm <michael@resolutionflow.com>
Date: Sat, 21 Mar 2026 03:25:25 +0000
Subject: [PATCH] refactor: consolidate LLM JSON parsing into shared llm_utils
 module

Extracted duplicate _strip_markdown_fences / _parse_llm_json functions
from 7 files into app/services/llm_utils.py. Two shared functions:
- strip_markdown_fences(): fence stripping only
- parse_llm_json(): fence stripping + JSON parse + error logging

Files updated: flowpilot_engine, knowledge_flywheel, session_to_flow_service,
ai_tree_generator_service, ai_fix_service, ai_chat_service, kb_conversion_service

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 backend/app/core/ai_chat_service.py           |  8 +---
 backend/app/core/ai_fix_service.py            |  6 +--
 backend/app/core/ai_tree_generator_service.py | 10 +----
 backend/app/core/kb_conversion_service.py     |  8 +---
 backend/app/services/flowpilot_engine.py      | 19 ++-------
 backend/app/services/knowledge_flywheel.py    | 20 ++--------
 backend/app/services/llm_utils.py             | 39 +++++++++++++++++++
 .../app/services/session_to_flow_service.py   | 15 +------
 8 files changed, 53 insertions(+), 72 deletions(-)
 create mode 100644 backend/app/services/llm_utils.py

diff --git a/backend/app/core/ai_chat_service.py b/backend/app/core/ai_chat_service.py
index c4c4ed75..8e1eab64 100644
--- a/backend/app/core/ai_chat_service.py
+++ b/backend/app/core/ai_chat_service.py
@@ -275,13 +275,7 @@ def _build_system_prompt(flow_type: str) -> str:
         return f"{ROLE_PERSONA}\n\n{flow_context}\n\n{SCHEMA_CONTEXT}\n\n{INTERVIEW_PROTOCOL}\n\n{RESPONSE_FORMAT}"
 
 
-def _strip_markdown_fences(text: str) -> str:
-    """Strip markdown code fences if the model wrapped its JSON response."""
-    text = text.strip()
-    match = re.match(r"^```(?:json)?\s*([\s\S]*?)```$", text)
-    if match:
-        return match.group(1).strip()
-    return text
+from app.services.llm_utils import strip_markdown_fences as _strip_markdown_fences
 
 
 def _parse_delta(response: str) -> dict | None:
diff --git a/backend/app/core/ai_fix_service.py b/backend/app/core/ai_fix_service.py
index 02350a15..56325386 100644
--- a/backend/app/core/ai_fix_service.py
+++ b/backend/app/core/ai_fix_service.py
@@ -86,11 +86,7 @@ def _serialize_tree_outline(
     return "\n".join(lines)
 
 
-def _strip_markdown_fences(text: str) -> str:
-    """Strip ```json...``` fences from AI response."""
-    return re.sub(r"^```(?:json)?\s*\n?", "", text.strip(), flags=re.MULTILINE).rstrip(
-        "`"
-    ).strip()
+from app.services.llm_utils import strip_markdown_fences as _strip_markdown_fences
 
 
 def _replace_node_in_tree(
diff --git a/backend/app/core/ai_tree_generator_service.py b/backend/app/core/ai_tree_generator_service.py
index bf560874..2463068f 100644
--- a/backend/app/core/ai_tree_generator_service.py
+++ b/backend/app/core/ai_tree_generator_service.py
@@ -13,6 +13,8 @@ import re
 import uuid
 from typing import Any
 
+from app.services.llm_utils import strip_markdown_fences as _strip_markdown_fences
+
 from app.core.ai_provider import get_ai_provider
 from app.core.config import settings
 from app.core.ai_tree_validator import validate_generated_tree, count_tree_stats
@@ -111,14 +113,6 @@ Return a corrected full JSON object only. No markdown, no prose, no code fences.
 Fix ALL listed errors while maintaining the same troubleshooting/procedural logic."""
 
 
-def _strip_markdown_fences(text: str) -> str:
-    """Strip markdown code fences if the model wrapped its JSON response."""
-    text = text.strip()
-    match = re.match(r"^```(?:json)?\s*([\s\S]*?)```$", text)
-    if match:
-        return match.group(1).strip()
-    return text
-
 
 
 def _estimate_cost(input_tokens: int, output_tokens: int) -> float:
diff --git a/backend/app/core/kb_conversion_service.py b/backend/app/core/kb_conversion_service.py
index ba28fdd2..9bb9edf8 100644
--- a/backend/app/core/kb_conversion_service.py
+++ b/backend/app/core/kb_conversion_service.py
@@ -24,13 +24,7 @@ COST_PER_INPUT_TOKEN = 3.0 / 1_000_000
 COST_PER_OUTPUT_TOKEN = 15.0 / 1_000_000
 
 
-def _strip_markdown_fences(text: str) -> str:
-    """Strip markdown code fences if the model wrapped its JSON response."""
-    text = text.strip()
-    match = re.match(r"^```(?:json)?\s*([\s\S]*?)```$", text)
-    if match:
-        return match.group(1).strip()
-    return text
+from app.services.llm_utils import strip_markdown_fences as _strip_markdown_fences
 
 
 def _try_repair_json(text: str) -> dict | None:
diff --git a/backend/app/services/flowpilot_engine.py b/backend/app/services/flowpilot_engine.py
index 40dbfe87..1164d2b2 100644
--- a/backend/app/services/flowpilot_engine.py
+++ b/backend/app/services/flowpilot_engine.py
@@ -17,6 +17,7 @@ from sqlalchemy.orm import selectinload
 
 from app.core.ai_provider import get_ai_provider
 from app.core.config import settings
+from app.services.llm_utils import parse_llm_json
 from app.services.notification_service import notify
 from app.models.ai_session import AISession
 from app.models.ai_session_step import AISessionStep
@@ -108,22 +109,10 @@ def _confidence_to_tier(confidence: float) -> str:
 def _parse_structured_output(raw_text: str) -> dict[str, Any]:
     """Parse and validate structured JSON from LLM response.
 
-    Handles common LLM quirks: markdown fences, trailing commas, etc.
+    Uses shared parse_llm_json for fence stripping and JSON parsing,
+    then validates FlowPilot-specific output shape.
     """
-    text = raw_text.strip()
-
-    # Strip markdown code fences if present
-    if text.startswith("```"):
-        lines = text.split("\n")
-        # Remove first line (```json or ```) and last line (```)
-        lines = [l for l in lines if not l.strip().startswith("```")]
-        text = "\n".join(lines).strip()
-
-    try:
-        data = json.loads(text)
-    except json.JSONDecodeError as e:
-        logger.warning("Failed to parse LLM JSON output: %s — raw: %.200s", e, text)
-        raise ValueError(f"Invalid JSON from LLM: {e}") from e
+    data = parse_llm_json(raw_text)
 
     if not isinstance(data, dict) or "type" not in data:
         raise ValueError("LLM response missing required 'type' field")
diff --git a/backend/app/services/knowledge_flywheel.py b/backend/app/services/knowledge_flywheel.py
index 7aef8427..36f23cad 100644
--- a/backend/app/services/knowledge_flywheel.py
+++ b/backend/app/services/knowledge_flywheel.py
@@ -20,6 +20,7 @@ from sqlalchemy.orm import selectinload
 
 from app.core.ai_provider import get_ai_provider
 from app.core.config import settings
+from app.services.llm_utils import parse_llm_json
 from app.services.notification_service import notify
 from app.models.ai_session import AISession
 from app.models.ai_session_step import AISessionStep
@@ -316,7 +317,7 @@ async def _propose_new_flow(session: AISession, db: AsyncSession) -> None:
             max_tokens=4096,
         )
 
-        parsed = _parse_llm_json(raw_response)
+        parsed = parse_llm_json(raw_response)
     except Exception as e:
         logger.warning("Knowledge Flywheel LLM call failed for session %s: %s", session.id, e)
         return
@@ -407,7 +408,7 @@ async def _propose_enhancement(session: AISession, db: AsyncSession) -> None:
             max_tokens=4096,
         )
 
-        parsed = _parse_llm_json(raw_response)
+        parsed = parse_llm_json(raw_response)
     except Exception as e:
         logger.warning("Knowledge Flywheel enhancement LLM call failed for session %s: %s", session.id, e)
         return
@@ -451,18 +452,3 @@ async def _propose_enhancement(session: AISession, db: AsyncSession) -> None:
     )
 
 
-def _parse_llm_json(raw_text: str) -> dict[str, Any]:
-    """Parse JSON from LLM response, handling common quirks."""
-    text = raw_text.strip()
-
-    # Strip markdown code fences if present
-    if text.startswith("```"):
-        lines = text.split("\n")
-        lines = [line for line in lines if not line.strip().startswith("```")]
-        text = "\n".join(lines).strip()
-
-    try:
-        return json.loads(text)
-    except json.JSONDecodeError as e:
-        logger.warning("Knowledge Flywheel JSON parse failed: %s — raw: %.300s", e, text)
-        raise ValueError(f"Invalid JSON from LLM: {e}") from e
diff --git a/backend/app/services/llm_utils.py b/backend/app/services/llm_utils.py
new file mode 100644
index 00000000..15f8ac71
--- /dev/null
+++ b/backend/app/services/llm_utils.py
@@ -0,0 +1,39 @@
+"""Shared utilities for parsing LLM responses."""
+
+import json
+import logging
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+
+def strip_markdown_fences(text: str) -> str:
+    """Strip markdown code fences from LLM output, returning raw content.
+
+    Use this when you need just the stripping without JSON parsing
+    (e.g., when the caller has its own error handling for json.loads).
+    """
+    text = text.strip()
+    if text.startswith("```"):
+        lines = text.split("\n")
+        lines = [line for line in lines if not line.strip().startswith("```")]
+        text = "\n".join(lines).strip()
+    return text
+
+
+def parse_llm_json(raw_text: str) -> dict[str, Any]:
+    """Parse JSON from LLM response, handling common quirks.
+
+    Strips markdown code fences (```json ... ``` or ``` ... ```) if present,
+    then parses the remaining text as JSON.
+
+    Raises:
+        ValueError: If the text is not valid JSON after fence stripping.
+    """
+    text = strip_markdown_fences(raw_text)
+
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError as e:
+        logger.warning("LLM JSON parse failed: %s — raw: %.300s", e, text)
+        raise ValueError(f"Invalid JSON from LLM: {e}") from e
diff --git a/backend/app/services/session_to_flow_service.py b/backend/app/services/session_to_flow_service.py
index 4911e9dc..626d3711 100644
--- a/backend/app/services/session_to_flow_service.py
+++ b/backend/app/services/session_to_flow_service.py
@@ -5,7 +5,6 @@ flow with fallback branches, powered by AI.
 """
 import json
 import logging
-import re
 import uuid
 from typing import Any, Optional
 from uuid import UUID
@@ -16,6 +15,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from app.core.ai_provider import get_ai_provider
 from app.core.config import settings
 from app.core.ai_tree_validator import validate_generated_procedural_steps
+from app.services.llm_utils import parse_llm_json
 from app.models.session import Session
 from app.models.tree import Tree
 
@@ -80,13 +80,6 @@ Rules:
 """
 
 
-def _strip_markdown_fences(text: str) -> str:
-    """Strip markdown code fences if the model wrapped its JSON response."""
-    text = text.strip()
-    match = re.match(r"^```(?:json)?\s*([\s\S]*?)```$", text)
-    if match:
-        return match.group(1).strip()
-    return text
 
 
 def _build_session_context(session: Session, tree: Optional[Tree]) -> str:
@@ -222,11 +215,7 @@ async def generate_flow_from_session(
     )
 
     # Strip markdown fences and parse JSON
-    raw_text = _strip_markdown_fences(raw_text)
-    try:
-        generated = json.loads(raw_text)
-    except json.JSONDecodeError as e:
-        raise ValueError(f"AI returned invalid JSON: {e}") from e
+    generated = parse_llm_json(raw_text)
 
     # Validate the generated steps
     val_errors = validate_generated_procedural_steps(generated)