All checks were successful
Mirror to GitHub / mirror (push) Successful in 10s
The "AI parrots example content from system prompt" bug bit us twice in one day across two different prompt sites. Patching individual prompts is treating the symptom; this commit makes the rule structural. Audit + sanitize: - assistant_chat_service.ASSISTANT_SYSTEM_PROMPT — already cleaned in prior commits, but the [FORK] schema still had literal "Brief reason" / "Short name" / "One sentence" placeholders. Replaced with <angle-bracket> placeholders. Anti-parrot rule itself rewritten to describe the failure mode abstractly instead of naming "jsmith" so the rule no longer trips the guardrail (and so the model doesn't see "jsmith" as a token at all). - ai_chat_service.py — removed three concrete-example offenders: "Get-Service ADSync" command literal, the "DC01 server_name" intake form payload (in two places), and the inline interview demos using "Azure AD Sync failures" / "Exchange Online mailbox migration". Replaced with technology-neutral schema descriptions. - ai_tree_generator_service.BRANCH_DETAIL_SYSTEM_PROMPT — replaced the fully-fleshed DNS troubleshooting tree (with literal Dnscache / ipconfig / google.com / Start-Service) with a placeholder schema showing only ID-linkage shape. - kb_conversion_service.PROCEDURAL_SYSTEM_PROMPT — replaced the worked Server Manager + DC01 example payload with a placeholder schema. Guardrail (tests/test_prompt_anti_parrot.py): - Imports every module under app/services/ and app/core/ and walks every uppercase string constant ending in _PROMPT, _SCHEMA, _PROTOCOL, _FORMAT, or _CONTEXT. - test 1: known-leaked-token list (jsmith, DC01, ADSync, Dnscache, google.com, "Outlook keeps", "Teams drops") must not appear in any prompt constant. Add to the list when a new leak shows up in prod — the list IS the audit trail. - test 2: marker blocks ([QUESTIONS], [ACTIONS], [SUGGEST_FIX], etc.) must contain placeholders only. Distinguishes JSON keys (followed by ':', allowed) from JSON values (followed by ',' / ']' / '}', must be <placeholder>); allows pipe-separated enum types (text|password|select) and a small set of fixed enum values (question, diagnostic_check, decision, action, ...). Verified by feeding the test a known-bad block — caught it correctly. Documented the rule in CLAUDE.md → AI / FlowPilot lessons, naming the test as the enforcement point so future contributors know how to extend it (add to the known-leaked list when a new leak surfaces). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
563 lines
20 KiB
Python
563 lines
20 KiB
Python
"""KB Accelerator AI conversion service.
|
|
|
|
Converts extracted KB article text into ResolutionFlow tree structures
|
|
using the Anthropic API (via the shared AI provider layer).
|
|
"""
|
|
import json
|
|
import logging
|
|
import re
|
|
import time
|
|
from typing import Any
|
|
from uuid import UUID
|
|
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.core.ai_provider import get_ai_provider
|
|
from app.core.ai_quota_service import record_ai_usage, get_user_plan
|
|
from app.core.config import settings
|
|
from app.models.kb_import import KBImport, KBImportNode
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Cost estimation (Sonnet pricing)
|
|
COST_PER_INPUT_TOKEN = 3.0 / 1_000_000
|
|
COST_PER_OUTPUT_TOKEN = 15.0 / 1_000_000
|
|
|
|
|
|
from app.services.llm_utils import strip_markdown_fences as _strip_markdown_fences
|
|
|
|
|
|
def _try_repair_json(text: str) -> dict | None:
|
|
"""Attempt to repair common JSON issues from AI responses.
|
|
|
|
Handles: trailing commas, unclosed brackets/braces, truncated responses.
|
|
Returns parsed dict on success, None on failure.
|
|
"""
|
|
# Strip trailing commas before closing brackets/braces
|
|
repaired = re.sub(r",\s*([}\]])", r"\1", text)
|
|
|
|
# Try parsing after comma cleanup
|
|
try:
|
|
return json.loads(repaired)
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
# Try closing unclosed brackets/braces (truncated response)
|
|
# Count open vs close brackets
|
|
open_braces = repaired.count("{") - repaired.count("}")
|
|
open_brackets = repaired.count("[") - repaired.count("]")
|
|
|
|
if open_braces > 0 or open_brackets > 0:
|
|
# Remove any trailing partial key-value pair or string
|
|
# Find the last complete value (ends with }, ], ", number, true, false, null)
|
|
truncated = repaired.rstrip()
|
|
# Strip trailing partial string or key
|
|
truncated = re.sub(r',\s*"[^"]*$', "", truncated) # trailing "partial_key
|
|
truncated = re.sub(r',\s*$', "", truncated) # trailing comma
|
|
|
|
# Close remaining brackets/braces
|
|
truncated += "]" * max(0, open_brackets)
|
|
truncated += "}" * max(0, open_braces)
|
|
|
|
# Re-strip trailing commas that may have appeared
|
|
truncated = re.sub(r",\s*([}\]])", r"\1", truncated)
|
|
|
|
try:
|
|
return json.loads(truncated)
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
return None
|
|
|
|
|
|
def _estimate_cost(input_tokens: int, output_tokens: int) -> float:
|
|
return (input_tokens * COST_PER_INPUT_TOKEN) + (output_tokens * COST_PER_OUTPUT_TOKEN)
|
|
|
|
|
|
# ── System Prompts ──
|
|
|
|
TROUBLESHOOTING_SYSTEM_PROMPT = """You are an MSP documentation specialist for ResolutionFlow. Your task is to convert a knowledge base article into an interactive troubleshooting decision tree.
|
|
|
|
Analyze the article and produce a JSON array of nodes that form a troubleshooting flow. Each node represents either a diagnostic question (decision point) or a resolution (solution).
|
|
|
|
## Node Types
|
|
|
|
- **question**: A diagnostic question with multiple answer options. Each option leads to another node.
|
|
- **resolution**: A terminal node with the solution/fix text.
|
|
- **action**: An instruction step that leads to the next node via next_node_id.
|
|
- **warning**: A caution or important note.
|
|
|
|
## Output Format
|
|
|
|
Return a JSON object with this structure:
|
|
```json
|
|
{
|
|
"title": "Flow title derived from the article",
|
|
"description": "Brief description of what this flow troubleshoots",
|
|
"nodes": [
|
|
{
|
|
"id": "unique-node-id",
|
|
"type": "question",
|
|
"question": "What symptom is the user experiencing?",
|
|
"options": [
|
|
{"label": "Cannot connect", "next_node_id": "check-network"},
|
|
{"label": "Slow performance", "next_node_id": "check-resources"}
|
|
],
|
|
"confidence": 0.95,
|
|
"source_excerpt": "The exact text from the article this node was derived from"
|
|
},
|
|
{
|
|
"id": "check-network",
|
|
"type": "action",
|
|
"question": "Check the network connection and ping the server",
|
|
"next_node_id": "network-result",
|
|
"confidence": 0.88,
|
|
"source_excerpt": "Step 1: Verify network connectivity..."
|
|
},
|
|
{
|
|
"id": "solution-restart",
|
|
"type": "resolution",
|
|
"question": "Restart the service. The issue should now be resolved.",
|
|
"confidence": 0.92,
|
|
"source_excerpt": "Restarting the service resolves the connectivity issue."
|
|
}
|
|
]
|
|
}
|
|
```
|
|
|
|
## Rules
|
|
|
|
1. Every node MUST have a unique `id` (descriptive kebab-case).
|
|
2. Every node MUST have a `confidence` score between 0.0 and 1.0.
|
|
3. Every node MUST have a `source_excerpt` — the exact text from the source article it was derived from.
|
|
4. The first node is the root of the decision tree.
|
|
5. All `next_node_id` and option `next_node_id` references must point to existing node IDs.
|
|
6. Detect implicit branching logic (e.g., "If X, do Y; otherwise Z") and create decision nodes.
|
|
7. Produce at least 3 nodes. Maximum 100 nodes.
|
|
8. Use high confidence (0.9+) for directly stated steps, medium (0.7-0.89) for reasonable inferences, low (<0.7) for significant interpretation.
|
|
9. Return ONLY valid JSON — no markdown fences, no explanation text."""
|
|
|
|
PROCEDURAL_SYSTEM_PROMPT = """You are an MSP documentation specialist for ResolutionFlow. Your task is to convert a knowledge base article into a procedural (step-by-step) flow.
|
|
|
|
Analyze the article and produce a JSON object with sequential steps and detected variables.
|
|
|
|
## Step Types
|
|
|
|
- **step**: A regular instruction step.
|
|
- **section_header**: A section divider/title (no action, just organizational).
|
|
- **warning**: A caution or important note that should be highlighted.
|
|
|
|
## Variable Detection
|
|
|
|
Identify values that would change between executions (server names, IPs, usernames, domains, etc.) and replace them with `[VAR:variable_name]` tokens. Also produce an intake_form that captures these variables before execution.
|
|
|
|
## Output Format
|
|
|
|
Return a JSON object with this SHAPE (DO NOT copy the placeholders below
|
|
verbatim — fill each field with content derived from the actual KB article
|
|
the engineer attached, NOT from this schema):
|
|
```json
|
|
{
|
|
"title": "<procedure title derived from the article>",
|
|
"description": "<brief description of what this procedure accomplishes>",
|
|
"steps": [
|
|
{
|
|
"id": "<unique-kebab-case-id>",
|
|
"type": "step|warning|section_header",
|
|
"content": "<step body — may include [VAR:<your_variable>] interpolation>",
|
|
"confidence": <float 0.0-1.0>,
|
|
"source_excerpt": "<the verbatim sentence/phrase from the article that this step came from>"
|
|
}
|
|
],
|
|
"intake_form": [
|
|
{
|
|
"variable_name": "<snake_case_name fitting THIS procedure>",
|
|
"label": "<Human Label>",
|
|
"field_type": "text|password|select|textarea|number|boolean",
|
|
"required": true|false,
|
|
"display_order": <integer>
|
|
}
|
|
]
|
|
}
|
|
```
|
|
|
|
## Variable Type Mapping
|
|
|
|
- IP addresses → field_type: "text", variable like `ip_address`
|
|
- Server/computer names → field_type: "text", variable like `server_name`
|
|
- Domain names → field_type: "text", variable like `domain_name`
|
|
- Usernames/email → field_type: "text", variable like `username`
|
|
- Port numbers → field_type: "number", variable like `port`
|
|
|
|
## Rules
|
|
|
|
1. Every step MUST have a unique `id` (descriptive kebab-case).
|
|
2. Every step MUST have a `confidence` score between 0.0 and 1.0.
|
|
3. Every step MUST have a `source_excerpt` — the exact text from the source article.
|
|
4. Preserve the original step ordering from the article.
|
|
5. Detect ALL instance-specific values and replace with `[VAR:name]` tokens.
|
|
6. Generate an intake_form entry for each unique variable detected.
|
|
7. Produce at least 2 steps. Maximum 100 steps.
|
|
8. Use high confidence (0.9+) for directly stated steps, medium (0.7-0.89) for inferences, low (<0.7) for significant interpretation.
|
|
9. Return ONLY valid JSON — no markdown fences, no explanation text."""
|
|
|
|
|
|
def _build_user_message(
|
|
source_text: str,
|
|
source_metadata: dict[str, Any] | None,
|
|
source_filename: str | None,
|
|
) -> str:
|
|
"""Build the user message containing the extracted text and metadata."""
|
|
parts = []
|
|
|
|
if source_filename:
|
|
parts.append(f"Source file: {source_filename}")
|
|
|
|
if source_metadata:
|
|
headings = source_metadata.get("headings", [])
|
|
if headings:
|
|
heading_text = ", ".join(
|
|
f"H{h['level']}: {h['text']}" for h in headings[:20]
|
|
)
|
|
parts.append(f"Detected headings: {heading_text}")
|
|
|
|
lists = source_metadata.get("lists", [])
|
|
if lists:
|
|
parts.append(f"Detected {len(lists)} list(s) in the document.")
|
|
|
|
tables = source_metadata.get("tables", [])
|
|
if tables:
|
|
parts.append(f"Detected {len(tables)} table(s) in the document.")
|
|
|
|
parts.append(f"\n--- ARTICLE CONTENT ---\n\n{source_text}")
|
|
|
|
return "\n".join(parts)
|
|
|
|
|
|
def _parse_troubleshooting_response(
|
|
data: dict[str, Any],
|
|
kb_import_id: UUID,
|
|
) -> tuple[list[KBImportNode], str, str | None]:
|
|
"""Parse AI response into KBImportNode records for troubleshooting flows.
|
|
|
|
Returns (nodes, title, description).
|
|
"""
|
|
title = data.get("title", "Imported Troubleshooting Flow")
|
|
description = data.get("description")
|
|
raw_nodes = data.get("nodes", [])
|
|
|
|
if not raw_nodes:
|
|
raise ValueError("AI returned no nodes")
|
|
|
|
# Build parent mapping from the tree structure
|
|
# First node is root (no parent). For others, trace via options/next_node_id.
|
|
node_id_to_parent: dict[str, str | None] = {}
|
|
node_id_to_data: dict[str, dict[str, Any]] = {}
|
|
for node in raw_nodes:
|
|
nid = node.get("id", "")
|
|
node_id_to_data[nid] = node
|
|
if nid not in node_id_to_parent:
|
|
node_id_to_parent[nid] = None # default: no parent
|
|
|
|
# Trace parent relationships (only set if it won't create a cycle)
|
|
def _would_cycle(child: str, parent: str) -> bool:
|
|
"""Check if setting child's parent to parent creates a cycle."""
|
|
visited: set[str] = set()
|
|
cur: str | None = parent
|
|
while cur:
|
|
if cur == child:
|
|
return True
|
|
if cur in visited:
|
|
break
|
|
visited.add(cur)
|
|
cur = node_id_to_parent.get(cur)
|
|
return False
|
|
|
|
for node in raw_nodes:
|
|
nid = node.get("id", "")
|
|
# Options point to children
|
|
for opt in node.get("options", []):
|
|
child_id = opt.get("next_node_id")
|
|
if child_id and child_id in node_id_to_data and not _would_cycle(nid, child_id):
|
|
node_id_to_parent[child_id] = nid
|
|
# next_node_id points to child
|
|
next_id = node.get("next_node_id")
|
|
if next_id and next_id in node_id_to_data and not _would_cycle(nid, next_id):
|
|
node_id_to_parent[next_id] = nid
|
|
|
|
# Create import node records preserving order
|
|
import uuid as uuid_mod
|
|
node_id_map: dict[str, uuid_mod.UUID] = {}
|
|
nodes: list[KBImportNode] = []
|
|
|
|
for order, raw_node in enumerate(raw_nodes):
|
|
node_uuid = uuid_mod.uuid4()
|
|
nid = raw_node.get("id", f"node-{order}")
|
|
node_id_map[nid] = node_uuid
|
|
|
|
for order, raw_node in enumerate(raw_nodes):
|
|
nid = raw_node.get("id", f"node-{order}")
|
|
node_type = raw_node.get("type", "question")
|
|
if node_type == "decision":
|
|
node_type = "question"
|
|
|
|
parent_str_id = node_id_to_parent.get(nid)
|
|
parent_uuid = node_id_map.get(parent_str_id) if parent_str_id else None
|
|
|
|
# Build content JSONB
|
|
content: dict[str, Any] = {
|
|
"original_id": nid,
|
|
"question": raw_node.get("question", ""),
|
|
}
|
|
if raw_node.get("options"):
|
|
content["options"] = raw_node["options"]
|
|
if raw_node.get("next_node_id"):
|
|
content["next_node_id"] = raw_node["next_node_id"]
|
|
|
|
import_node = KBImportNode(
|
|
id=node_id_map[nid],
|
|
kb_import_id=kb_import_id,
|
|
node_order=order,
|
|
node_type=node_type,
|
|
content=content,
|
|
parent_node_id=parent_uuid,
|
|
source_excerpt=raw_node.get("source_excerpt"),
|
|
confidence_score=float(raw_node.get("confidence", 0.5)),
|
|
user_edited=False,
|
|
user_approved=False,
|
|
)
|
|
nodes.append(import_node)
|
|
|
|
return nodes, title, description
|
|
|
|
|
|
def _parse_procedural_response(
|
|
data: dict[str, Any],
|
|
kb_import_id: UUID,
|
|
) -> tuple[list[KBImportNode], str, str | None, list[dict[str, Any]] | None]:
|
|
"""Parse AI response into KBImportNode records for procedural flows.
|
|
|
|
Returns (nodes, title, description, intake_form).
|
|
"""
|
|
title = data.get("title", "Imported Procedure")
|
|
description = data.get("description")
|
|
raw_steps = data.get("steps", [])
|
|
intake_form = data.get("intake_form")
|
|
|
|
if not raw_steps:
|
|
raise ValueError("AI returned no steps")
|
|
|
|
import uuid as uuid_mod
|
|
nodes: list[KBImportNode] = []
|
|
|
|
for order, raw_step in enumerate(raw_steps):
|
|
content: dict[str, Any] = {
|
|
"original_id": raw_step.get("id", f"step-{order}"),
|
|
"content": raw_step.get("content", ""),
|
|
}
|
|
|
|
node_type = raw_step.get("type", "step")
|
|
if node_type not in ("step", "section_header", "warning"):
|
|
node_type = "step"
|
|
|
|
import_node = KBImportNode(
|
|
id=uuid_mod.uuid4(),
|
|
kb_import_id=kb_import_id,
|
|
node_order=order,
|
|
node_type=node_type,
|
|
content=content,
|
|
parent_node_id=None, # Procedural flows are linear
|
|
source_excerpt=raw_step.get("source_excerpt"),
|
|
confidence_score=float(raw_step.get("confidence", 0.5)),
|
|
user_edited=False,
|
|
user_approved=False,
|
|
)
|
|
nodes.append(import_node)
|
|
|
|
return nodes, title, description, intake_form
|
|
|
|
|
|
async def convert_document(
|
|
kb_import: KBImport,
|
|
db: AsyncSession,
|
|
) -> list[KBImportNode]:
|
|
"""Run AI conversion on an extracted KB article.
|
|
|
|
Creates KBImportNode records and updates the kb_import status.
|
|
Returns the created nodes.
|
|
"""
|
|
start_time = time.monotonic()
|
|
|
|
# Select system prompt based on target type
|
|
if kb_import.target_type == "troubleshooting":
|
|
system_prompt = TROUBLESHOOTING_SYSTEM_PROMPT
|
|
else:
|
|
system_prompt = PROCEDURAL_SYSTEM_PROMPT
|
|
|
|
user_message = _build_user_message(
|
|
source_text=kb_import.source_text,
|
|
source_metadata=kb_import.source_metadata,
|
|
source_filename=kb_import.source_filename,
|
|
)
|
|
|
|
# Get AI provider with model routing
|
|
model = settings.get_model_for_action("kb_convert")
|
|
provider = get_ai_provider(model=model)
|
|
|
|
try:
|
|
raw_text, input_tokens, output_tokens = await provider.generate_json(
|
|
system_prompt=[
|
|
{"type": "text", "text": system_prompt},
|
|
# cacheable: one of two stable constants (TROUBLESHOOTING_SYSTEM_PROMPT
|
|
# or PROCEDURAL_SYSTEM_PROMPT) selected by target_type. Each
|
|
# variant caches independently by text content.
|
|
],
|
|
messages=[{"role": "user", "content": user_message}],
|
|
max_tokens=16384,
|
|
)
|
|
except Exception as e:
|
|
logger.error("AI conversion failed for kb_import=%s: %s", kb_import.id, e)
|
|
kb_import.status = "failed"
|
|
kb_import.error_message = f"AI processing error: {str(e)}"
|
|
kb_import.processing_time_ms = int((time.monotonic() - start_time) * 1000)
|
|
await db.flush()
|
|
|
|
# Record failed usage
|
|
plan = await get_user_plan(kb_import.account_id, db)
|
|
await record_ai_usage(
|
|
user_id=kb_import.created_by,
|
|
account_id=kb_import.account_id,
|
|
conversation_id=None,
|
|
generation_type="kb_convert",
|
|
tier=plan,
|
|
input_tokens=0,
|
|
output_tokens=0,
|
|
estimated_cost=0.0,
|
|
succeeded=False,
|
|
counts_toward_quota=False,
|
|
error_code="ai_error",
|
|
extra_data={"kb_import_id": str(kb_import.id)},
|
|
db=db,
|
|
)
|
|
return []
|
|
|
|
# Parse JSON response
|
|
raw_text = _strip_markdown_fences(raw_text)
|
|
try:
|
|
data = json.loads(raw_text)
|
|
except json.JSONDecodeError as e:
|
|
# Attempt JSON repair before giving up
|
|
data = _try_repair_json(raw_text)
|
|
if data is None:
|
|
logger.error(
|
|
"KB conversion JSON parse failed for kb_import=%s (%d chars). "
|
|
"Parse error: %s. Raw response (first 2000 chars): %s",
|
|
kb_import.id, len(raw_text), e, raw_text[:2000],
|
|
)
|
|
kb_import.status = "failed"
|
|
kb_import.error_message = (
|
|
"AI response could not be parsed as valid JSON. "
|
|
"This can happen with very long articles — try again or simplify the article."
|
|
)
|
|
kb_import.processing_time_ms = int((time.monotonic() - start_time) * 1000)
|
|
kb_import.ai_tokens_input = input_tokens
|
|
kb_import.ai_tokens_output = output_tokens
|
|
await db.flush()
|
|
return []
|
|
else:
|
|
logger.info(
|
|
"KB conversion JSON repaired for kb_import=%s (%d chars)",
|
|
kb_import.id, len(raw_text),
|
|
)
|
|
|
|
# Parse into nodes based on target type
|
|
try:
|
|
intake_form = None
|
|
if kb_import.target_type == "troubleshooting":
|
|
nodes, title, description = _parse_troubleshooting_response(
|
|
data, kb_import.id
|
|
)
|
|
else:
|
|
nodes, title, description, intake_form = _parse_procedural_response(
|
|
data, kb_import.id
|
|
)
|
|
except (ValueError, KeyError, TypeError) as e:
|
|
logger.error("KB node parsing failed for kb_import=%s: %s", kb_import.id, e)
|
|
kb_import.status = "failed"
|
|
kb_import.error_message = f"Failed to parse AI response: {e}"
|
|
kb_import.processing_time_ms = int((time.monotonic() - start_time) * 1000)
|
|
kb_import.ai_tokens_input = input_tokens
|
|
kb_import.ai_tokens_output = output_tokens
|
|
await db.flush()
|
|
return []
|
|
|
|
# Persist nodes — insert roots first to satisfy parent_node_id FK,
|
|
# then children in subsequent passes until all are inserted.
|
|
remaining = list(nodes)
|
|
inserted_ids: set[Any] = set()
|
|
while remaining:
|
|
batch = [
|
|
n for n in remaining
|
|
if n.parent_node_id is None or n.parent_node_id in inserted_ids
|
|
]
|
|
if not batch:
|
|
# Circular reference or orphan — force insert remaining to surface the real error
|
|
for n in remaining:
|
|
db.add(n)
|
|
break
|
|
for n in batch:
|
|
db.add(n)
|
|
inserted_ids.add(n.id)
|
|
await db.flush()
|
|
remaining = [n for n in remaining if n.id not in inserted_ids]
|
|
|
|
# Update import record
|
|
elapsed_ms = int((time.monotonic() - start_time) * 1000)
|
|
confidence_scores = [n.confidence_score for n in nodes]
|
|
avg_confidence = sum(confidence_scores) / len(confidence_scores) if confidence_scores else 0.0
|
|
|
|
kb_import.status = "ready"
|
|
kb_import.confidence_avg = avg_confidence
|
|
kb_import.processing_time_ms = elapsed_ms
|
|
kb_import.ai_tokens_input = input_tokens
|
|
kb_import.ai_tokens_output = output_tokens
|
|
|
|
# Store parsed metadata for commit phase
|
|
if not kb_import.source_metadata:
|
|
kb_import.source_metadata = {}
|
|
kb_import.source_metadata["_conversion"] = {
|
|
"title": title,
|
|
"description": description,
|
|
"node_count": len(nodes),
|
|
}
|
|
if intake_form:
|
|
kb_import.source_metadata["_intake_form"] = intake_form
|
|
|
|
await db.flush()
|
|
|
|
# Record successful usage
|
|
plan = await get_user_plan(kb_import.account_id, db)
|
|
cost = _estimate_cost(input_tokens, output_tokens)
|
|
await record_ai_usage(
|
|
user_id=kb_import.created_by,
|
|
account_id=kb_import.account_id,
|
|
conversation_id=None,
|
|
generation_type="kb_convert",
|
|
tier=plan,
|
|
input_tokens=input_tokens,
|
|
output_tokens=output_tokens,
|
|
estimated_cost=cost,
|
|
succeeded=True,
|
|
counts_toward_quota=True,
|
|
error_code=None,
|
|
extra_data={"kb_import_id": str(kb_import.id), "node_count": len(nodes)},
|
|
db=db,
|
|
)
|
|
|
|
logger.info(
|
|
"KB conversion complete: import=%s, nodes=%d, confidence=%.2f, time=%dms, tokens=%d/%d",
|
|
kb_import.id, len(nodes), avg_confidence, elapsed_ms, input_tokens, output_tokens,
|
|
)
|
|
|
|
return nodes
|