feat: KB Accelerator — convert KB articles into interactive flows
Full-stack implementation of the KB Accelerator feature that converts static MSP knowledge base articles into interactive troubleshooting and procedural flows using AI. Backend: - Migrations 054/055: kb_imports, kb_import_nodes tables + plan_limits KB columns - SQLAlchemy models with relationships and self-referential node hierarchy - Text extraction service (txt, paste, docx with structural metadata) - AI conversion service with MSP-specialist prompts for both flow types - 8 API endpoints: upload, get, list, convert, edit node, commit, delete, quota - Tier-gated access via plan_limits (free: 3 lifetime, pro/team: unlimited) - 8 integration tests covering upload, get/list, quota, commit, delete Frontend: - TypeScript types and API client for all KB Accelerator endpoints - Multi-step wizard page: upload → processing → review → success - Upload screen with paste/file tabs, drag-drop, target type selector - Two-panel review screen with source highlighting and node cards - Per-node actions: approve, edit, regenerate, insert, delete - Confidence color indicators (green/amber/red) - Sidebar navigation with Sparkles icon - Code-split lazy-loaded route at /kb-accelerator Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -17,6 +17,7 @@ from app.models.assistant_chat import AssistantChat
|
||||
from app.models.survey_response import SurveyResponse
|
||||
from app.models.survey_invite import SurveyInvite
|
||||
from app.models.ai_suggestion import AISuggestion # noqa: F401
|
||||
from app.models.kb_import import KBImport, KBImportNode # noqa: F401
|
||||
from app.core.config import settings
|
||||
|
||||
# this is the Alembic Config object
|
||||
|
||||
79
backend/alembic/versions/054_add_kb_imports.py
Normal file
79
backend/alembic/versions/054_add_kb_imports.py
Normal file
@@ -0,0 +1,79 @@
|
||||
"""add kb_imports and kb_import_nodes tables
|
||||
|
||||
Revision ID: 054
|
||||
Revises: 053
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||
|
||||
revision = "054"
|
||||
down_revision = "053"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
op.create_table(
|
||||
"kb_imports",
|
||||
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
||||
sa.Column("account_id", UUID(as_uuid=True), sa.ForeignKey("accounts.id", ondelete="CASCADE"), nullable=False, index=True),
|
||||
sa.Column("created_by", UUID(as_uuid=True), sa.ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True),
|
||||
sa.Column("source_filename", sa.String(500), nullable=True),
|
||||
sa.Column("source_format", sa.String(20), nullable=False),
|
||||
sa.Column("source_text", sa.Text, nullable=False),
|
||||
sa.Column("source_metadata", JSONB, nullable=True),
|
||||
sa.Column("target_type", sa.String(20), nullable=False),
|
||||
sa.Column("status", sa.String(20), nullable=False, server_default="processing"),
|
||||
sa.Column("confidence_avg", sa.Float, nullable=True),
|
||||
sa.Column("error_message", sa.Text, nullable=True),
|
||||
sa.Column("processing_time_ms", sa.Integer, nullable=True),
|
||||
sa.Column("ai_tokens_input", sa.Integer, nullable=True),
|
||||
sa.Column("ai_tokens_output", sa.Integer, nullable=True),
|
||||
sa.Column("tree_id", UUID(as_uuid=True), sa.ForeignKey("trees.id", ondelete="SET NULL"), nullable=True),
|
||||
sa.Column("batch_id", UUID(as_uuid=True), nullable=True, index=True),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
|
||||
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
|
||||
sa.CheckConstraint(
|
||||
"source_format IN ('txt', 'paste', 'docx', 'pdf', 'html', 'md')",
|
||||
name="ck_kb_imports_source_format",
|
||||
),
|
||||
sa.CheckConstraint(
|
||||
"target_type IN ('troubleshooting', 'procedural')",
|
||||
name="ck_kb_imports_target_type",
|
||||
),
|
||||
sa.CheckConstraint(
|
||||
"status IN ('processing', 'ready', 'committed', 'failed')",
|
||||
name="ck_kb_imports_status",
|
||||
),
|
||||
)
|
||||
|
||||
op.create_index("ix_kb_imports_status", "kb_imports", ["status"])
|
||||
op.create_index("ix_kb_imports_created_at_desc", "kb_imports", [sa.text("created_at DESC")])
|
||||
|
||||
op.create_table(
|
||||
"kb_import_nodes",
|
||||
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
||||
sa.Column("kb_import_id", UUID(as_uuid=True), sa.ForeignKey("kb_imports.id", ondelete="CASCADE"), nullable=False, index=True),
|
||||
sa.Column("node_order", sa.Integer, nullable=False),
|
||||
sa.Column("node_type", sa.String(20), nullable=False),
|
||||
sa.Column("content", JSONB, nullable=False),
|
||||
sa.Column("parent_node_id", UUID(as_uuid=True), sa.ForeignKey("kb_import_nodes.id", ondelete="SET NULL"), nullable=True),
|
||||
sa.Column("source_excerpt", sa.Text, nullable=True),
|
||||
sa.Column("confidence_score", sa.Float, nullable=False),
|
||||
sa.Column("user_edited", sa.Boolean, nullable=False, server_default=sa.text("false")),
|
||||
sa.Column("user_approved", sa.Boolean, nullable=False, server_default=sa.text("false")),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
|
||||
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
|
||||
sa.CheckConstraint(
|
||||
"node_type IN ('question', 'resolution', 'step', 'section_header', 'warning', 'action')",
|
||||
name="ck_kb_import_nodes_node_type",
|
||||
),
|
||||
)
|
||||
|
||||
op.create_index("ix_kb_import_nodes_confidence", "kb_import_nodes", ["confidence_score"])
|
||||
|
||||
|
||||
def downgrade():
|
||||
op.drop_table("kb_import_nodes")
|
||||
op.drop_table("kb_imports")
|
||||
76
backend/alembic/versions/055_add_kb_plan_limits.py
Normal file
76
backend/alembic/versions/055_add_kb_plan_limits.py
Normal file
@@ -0,0 +1,76 @@
|
||||
"""add KB Accelerator columns to plan_limits
|
||||
|
||||
Revision ID: 055
|
||||
Revises: 054
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
|
||||
revision = "055"
|
||||
down_revision = "054"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# Add KB Accelerator columns to plan_limits
|
||||
op.add_column("plan_limits", sa.Column("kb_accelerator_enabled", sa.Boolean, nullable=False, server_default=sa.text("false")))
|
||||
op.add_column("plan_limits", sa.Column("kb_max_lifetime_conversions", sa.Integer, nullable=True))
|
||||
op.add_column("plan_limits", sa.Column("kb_batch_max_size", sa.Integer, nullable=True))
|
||||
op.add_column("plan_limits", sa.Column("kb_allowed_formats", JSONB, nullable=False, server_default=sa.text("'[\"txt\",\"paste\"]'::jsonb")))
|
||||
op.add_column("plan_limits", sa.Column("kb_detailed_analysis", sa.Boolean, nullable=False, server_default=sa.text("false")))
|
||||
op.add_column("plan_limits", sa.Column("kb_conversational_refinement", sa.Boolean, nullable=False, server_default=sa.text("false")))
|
||||
op.add_column("plan_limits", sa.Column("kb_step_library_matching", sa.Boolean, nullable=False, server_default=sa.text("false")))
|
||||
op.add_column("plan_limits", sa.Column("kb_history_limit", sa.Integer, nullable=True))
|
||||
|
||||
# Seed defaults for each plan tier
|
||||
op.execute("""
|
||||
UPDATE plan_limits SET
|
||||
kb_accelerator_enabled = true,
|
||||
kb_max_lifetime_conversions = 3,
|
||||
kb_batch_max_size = NULL,
|
||||
kb_allowed_formats = '["txt","paste"]'::jsonb,
|
||||
kb_detailed_analysis = false,
|
||||
kb_conversational_refinement = false,
|
||||
kb_step_library_matching = false,
|
||||
kb_history_limit = 3
|
||||
WHERE plan = 'free'
|
||||
""")
|
||||
|
||||
op.execute("""
|
||||
UPDATE plan_limits SET
|
||||
kb_accelerator_enabled = true,
|
||||
kb_max_lifetime_conversions = NULL,
|
||||
kb_batch_max_size = 5,
|
||||
kb_allowed_formats = '["txt","paste","docx","pdf","html","md"]'::jsonb,
|
||||
kb_detailed_analysis = true,
|
||||
kb_conversational_refinement = true,
|
||||
kb_step_library_matching = true,
|
||||
kb_history_limit = NULL
|
||||
WHERE plan = 'pro'
|
||||
""")
|
||||
|
||||
op.execute("""
|
||||
UPDATE plan_limits SET
|
||||
kb_accelerator_enabled = true,
|
||||
kb_max_lifetime_conversions = NULL,
|
||||
kb_batch_max_size = 10,
|
||||
kb_allowed_formats = '["txt","paste","docx","pdf","html","md"]'::jsonb,
|
||||
kb_detailed_analysis = true,
|
||||
kb_conversational_refinement = true,
|
||||
kb_step_library_matching = true,
|
||||
kb_history_limit = NULL
|
||||
WHERE plan = 'team'
|
||||
""")
|
||||
|
||||
|
||||
def downgrade():
|
||||
op.drop_column("plan_limits", "kb_history_limit")
|
||||
op.drop_column("plan_limits", "kb_step_library_matching")
|
||||
op.drop_column("plan_limits", "kb_conversational_refinement")
|
||||
op.drop_column("plan_limits", "kb_detailed_analysis")
|
||||
op.drop_column("plan_limits", "kb_allowed_formats")
|
||||
op.drop_column("plan_limits", "kb_batch_max_size")
|
||||
op.drop_column("plan_limits", "kb_max_lifetime_conversions")
|
||||
op.drop_column("plan_limits", "kb_accelerator_enabled")
|
||||
685
backend/app/api/endpoints/kb_accelerator.py
Normal file
685
backend/app/api/endpoints/kb_accelerator.py
Normal file
@@ -0,0 +1,685 @@
|
||||
"""KB Accelerator endpoints.
|
||||
|
||||
Upload KB articles, convert to flows via AI, review, and commit.
|
||||
|
||||
POST /kb-accelerator/upload — Upload file or paste text
|
||||
GET /kb-accelerator/{id} — Get import with nodes
|
||||
GET /kb-accelerator — List imports for account
|
||||
POST /kb-accelerator/{id}/convert — Re-trigger AI conversion
|
||||
PATCH /kb-accelerator/{id}/nodes/{nid} — Edit a node
|
||||
POST /kb-accelerator/{id}/commit — Commit to flow library
|
||||
DELETE /kb-accelerator/{id} — Cancel/cleanup
|
||||
GET /kb-accelerator/quota — Plan entitlements + usage
|
||||
"""
|
||||
import logging
|
||||
import mimetypes
|
||||
from datetime import datetime, timezone
|
||||
from typing import Annotated, Optional
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request, UploadFile, File, Form, status
|
||||
from sqlalchemy import select, func, delete
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from app.api.deps import get_current_active_user, get_db, require_engineer_or_admin
|
||||
from app.core.config import settings
|
||||
from app.core.rate_limit import limiter
|
||||
from app.core.subscriptions import get_plan_limits
|
||||
from app.core.ai_quota_service import get_user_plan
|
||||
from app.core.kb_extraction_service import extract_text
|
||||
from app.core.kb_conversion_service import convert_document
|
||||
from app.models.kb_import import KBImport, KBImportNode
|
||||
from app.models.plan_limits import PlanLimits
|
||||
from app.models.tree import Tree
|
||||
from app.models.user import User
|
||||
from app.schemas.kb_accelerator import (
|
||||
KBUploadTextRequest,
|
||||
KBNodeEditRequest,
|
||||
KBCommitRequest,
|
||||
KBUploadResponse,
|
||||
KBImportResponse,
|
||||
KBImportNodeResponse,
|
||||
KBImportSummary,
|
||||
KBImportListResponse,
|
||||
KBCommitResponse,
|
||||
KBQuotaResponse,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/kb-accelerator", tags=["kb-accelerator"])
|
||||
|
||||
# Max upload size: 10MB
|
||||
MAX_UPLOAD_SIZE = 10 * 1024 * 1024
|
||||
|
||||
ALLOWED_EXTENSIONS = {
|
||||
"txt": ["text/plain"],
|
||||
"docx": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"],
|
||||
}
|
||||
|
||||
# Phase 2 formats (not yet enabled)
|
||||
PHASE2_EXTENSIONS = {
|
||||
"pdf": ["application/pdf"],
|
||||
"html": ["text/html"],
|
||||
"md": ["text/markdown", "text/plain"],
|
||||
}
|
||||
|
||||
|
||||
def _detect_format(filename: str) -> str | None:
|
||||
"""Detect source format from filename extension."""
|
||||
if not filename:
|
||||
return None
|
||||
ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else None
|
||||
if ext in ALLOWED_EXTENSIONS or ext in PHASE2_EXTENSIONS:
|
||||
return ext
|
||||
return None
|
||||
|
||||
|
||||
async def _get_kb_limits(user: User, db: AsyncSession) -> PlanLimits | None:
|
||||
plan = await get_user_plan(user.account_id, db)
|
||||
return await get_plan_limits(plan, db)
|
||||
|
||||
|
||||
async def _check_kb_enabled(user: User, db: AsyncSession) -> PlanLimits:
|
||||
limits = await _get_kb_limits(user, db)
|
||||
if not limits or not limits.kb_accelerator_enabled:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="KB Accelerator is not available on your plan.",
|
||||
)
|
||||
return limits
|
||||
|
||||
|
||||
async def _check_lifetime_limit(user: User, limits: PlanLimits, db: AsyncSession) -> None:
|
||||
if limits.kb_max_lifetime_conversions is None:
|
||||
return # Unlimited
|
||||
count = await db.scalar(
|
||||
select(func.count(KBImport.id)).where(
|
||||
KBImport.account_id == user.account_id,
|
||||
KBImport.status == "committed",
|
||||
)
|
||||
) or 0
|
||||
if count >= limits.kb_max_lifetime_conversions:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail=f"You have reached your lifetime limit of {limits.kb_max_lifetime_conversions} KB conversions. Upgrade your plan for unlimited conversions.",
|
||||
)
|
||||
|
||||
|
||||
async def _check_format_allowed(source_format: str, limits: PlanLimits) -> None:
|
||||
allowed = limits.kb_allowed_formats or ["txt", "paste"]
|
||||
if source_format not in allowed:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail=f"Format '{source_format}' is not available on your plan. Allowed: {', '.join(allowed)}",
|
||||
)
|
||||
|
||||
|
||||
async def _get_import_or_404(
|
||||
import_id: UUID, user: User, db: AsyncSession, *, load_nodes: bool = True
|
||||
) -> KBImport:
|
||||
query = select(KBImport).where(
|
||||
KBImport.id == import_id,
|
||||
KBImport.account_id == user.account_id,
|
||||
)
|
||||
if load_nodes:
|
||||
query = query.options(selectinload(KBImport.nodes))
|
||||
result = await db.execute(query)
|
||||
kb_import = result.scalar_one_or_none()
|
||||
if not kb_import:
|
||||
raise HTTPException(status_code=404, detail="KB import not found")
|
||||
return kb_import
|
||||
|
||||
|
||||
async def _run_conversion(import_id: UUID, db_url: str) -> None:
|
||||
"""Background task: run AI conversion on a KB import."""
|
||||
from app.core.database import async_session_maker
|
||||
async with async_session_maker() as db:
|
||||
result = await db.execute(
|
||||
select(KBImport).where(KBImport.id == import_id)
|
||||
)
|
||||
kb_import = result.scalar_one_or_none()
|
||||
if not kb_import or kb_import.status != "processing":
|
||||
return
|
||||
try:
|
||||
await convert_document(kb_import, db)
|
||||
await db.commit()
|
||||
except Exception as e:
|
||||
logger.error("Background KB conversion failed: %s", e)
|
||||
kb_import.status = "failed"
|
||||
kb_import.error_message = f"Conversion error: {str(e)}"
|
||||
await db.commit()
|
||||
|
||||
|
||||
def _serialize_import(kb_import: KBImport) -> dict:
|
||||
"""Serialize a KBImport to dict for response."""
|
||||
return {
|
||||
"id": kb_import.id,
|
||||
"account_id": kb_import.account_id,
|
||||
"created_by": kb_import.created_by,
|
||||
"source_filename": kb_import.source_filename,
|
||||
"source_format": kb_import.source_format,
|
||||
"source_text": kb_import.source_text,
|
||||
"source_metadata": kb_import.source_metadata,
|
||||
"target_type": kb_import.target_type,
|
||||
"status": kb_import.status,
|
||||
"confidence_avg": kb_import.confidence_avg,
|
||||
"error_message": kb_import.error_message,
|
||||
"processing_time_ms": kb_import.processing_time_ms,
|
||||
"ai_tokens_input": kb_import.ai_tokens_input,
|
||||
"ai_tokens_output": kb_import.ai_tokens_output,
|
||||
"tree_id": kb_import.tree_id,
|
||||
"nodes": [
|
||||
KBImportNodeResponse.model_validate(n) for n in kb_import.nodes
|
||||
] if kb_import.nodes else [],
|
||||
"created_at": kb_import.created_at.isoformat(),
|
||||
"updated_at": kb_import.updated_at.isoformat(),
|
||||
}
|
||||
|
||||
|
||||
# ── Endpoints ──
|
||||
|
||||
|
||||
@router.get("/quota", response_model=KBQuotaResponse)
|
||||
async def get_quota(
|
||||
user: Annotated[User, Depends(require_engineer_or_admin)],
|
||||
db: Annotated[AsyncSession, Depends(get_db)],
|
||||
):
|
||||
"""Get KB Accelerator entitlements and usage for the current account."""
|
||||
plan = await get_user_plan(user.account_id, db)
|
||||
limits = await get_plan_limits(plan, db)
|
||||
|
||||
committed_count = await db.scalar(
|
||||
select(func.count(KBImport.id)).where(
|
||||
KBImport.account_id == user.account_id,
|
||||
KBImport.status == "committed",
|
||||
)
|
||||
) or 0
|
||||
|
||||
if not limits:
|
||||
return KBQuotaResponse(
|
||||
plan=plan,
|
||||
kb_accelerator_enabled=False,
|
||||
lifetime_conversions_used=committed_count,
|
||||
lifetime_conversions_limit=0,
|
||||
allowed_formats=["txt", "paste"],
|
||||
detailed_analysis=False,
|
||||
conversational_refinement=False,
|
||||
step_library_matching=False,
|
||||
history_limit=3,
|
||||
can_convert=False,
|
||||
)
|
||||
|
||||
can_convert = limits.kb_accelerator_enabled
|
||||
if limits.kb_max_lifetime_conversions is not None:
|
||||
can_convert = can_convert and committed_count < limits.kb_max_lifetime_conversions
|
||||
|
||||
return KBQuotaResponse(
|
||||
plan=plan,
|
||||
kb_accelerator_enabled=limits.kb_accelerator_enabled,
|
||||
lifetime_conversions_used=committed_count,
|
||||
lifetime_conversions_limit=limits.kb_max_lifetime_conversions,
|
||||
allowed_formats=limits.kb_allowed_formats or ["txt", "paste"],
|
||||
detailed_analysis=limits.kb_detailed_analysis,
|
||||
conversational_refinement=limits.kb_conversational_refinement,
|
||||
step_library_matching=limits.kb_step_library_matching,
|
||||
history_limit=limits.kb_history_limit,
|
||||
can_convert=can_convert,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/upload", response_model=KBUploadResponse, status_code=201)
|
||||
@limiter.limit("10/minute")
|
||||
async def upload_kb_article(
|
||||
request: Request,
|
||||
background_tasks: BackgroundTasks,
|
||||
user: Annotated[User, Depends(require_engineer_or_admin)],
|
||||
db: Annotated[AsyncSession, Depends(get_db)],
|
||||
file: Optional[UploadFile] = File(None),
|
||||
content: Optional[str] = Form(None),
|
||||
title: Optional[str] = Form(None),
|
||||
target_type: Optional[str] = Form(None),
|
||||
):
|
||||
"""Upload a KB article file or paste text for conversion."""
|
||||
if not settings.ai_enabled:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||
detail="AI is not configured.",
|
||||
)
|
||||
|
||||
limits = await _check_kb_enabled(user, db)
|
||||
await _check_lifetime_limit(user, limits, db)
|
||||
|
||||
# Determine source format and extract text
|
||||
if file and file.filename:
|
||||
source_format = _detect_format(file.filename)
|
||||
if not source_format:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Unsupported file format. Supported: {', '.join(ALLOWED_EXTENSIONS.keys())}",
|
||||
)
|
||||
await _check_format_allowed(source_format, limits)
|
||||
|
||||
file_bytes = await file.read()
|
||||
if len(file_bytes) > MAX_UPLOAD_SIZE:
|
||||
raise HTTPException(status_code=413, detail="File exceeds 10MB limit.")
|
||||
if len(file_bytes) == 0:
|
||||
raise HTTPException(status_code=400, detail="Uploaded file is empty.")
|
||||
|
||||
source_filename = file.filename
|
||||
try:
|
||||
source_text, source_metadata = extract_text(file_bytes, source_format)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except RuntimeError as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
elif content:
|
||||
source_format = "paste"
|
||||
await _check_format_allowed(source_format, limits)
|
||||
source_filename = title
|
||||
source_text = content.strip()
|
||||
source_metadata = None
|
||||
if len(source_text) < 10:
|
||||
raise HTTPException(status_code=400, detail="Content must be at least 10 characters.")
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="Provide either a file or content text.")
|
||||
|
||||
# Validate target_type
|
||||
if target_type and target_type not in ("troubleshooting", "procedural"):
|
||||
raise HTTPException(status_code=400, detail="target_type must be 'troubleshooting' or 'procedural'.")
|
||||
if not target_type:
|
||||
target_type = "troubleshooting" # Default; Phase 2 adds "let AI decide"
|
||||
|
||||
# Create KB import record
|
||||
kb_import = KBImport(
|
||||
account_id=user.account_id,
|
||||
created_by=user.id,
|
||||
source_filename=source_filename,
|
||||
source_format=source_format,
|
||||
source_text=source_text,
|
||||
source_metadata=source_metadata,
|
||||
target_type=target_type,
|
||||
status="processing",
|
||||
)
|
||||
db.add(kb_import)
|
||||
await db.flush()
|
||||
|
||||
# Trigger AI conversion in background
|
||||
background_tasks.add_task(_run_conversion, kb_import.id, settings.DATABASE_URL)
|
||||
await db.commit()
|
||||
|
||||
return KBUploadResponse(
|
||||
id=kb_import.id,
|
||||
status=kb_import.status,
|
||||
source_format=kb_import.source_format,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{import_id}", response_model=KBImportResponse)
|
||||
async def get_kb_import(
|
||||
import_id: UUID,
|
||||
user: Annotated[User, Depends(require_engineer_or_admin)],
|
||||
db: Annotated[AsyncSession, Depends(get_db)],
|
||||
):
|
||||
"""Get a KB import with its generated nodes."""
|
||||
kb_import = await _get_import_or_404(import_id, user, db)
|
||||
return _serialize_import(kb_import)
|
||||
|
||||
|
||||
@router.get("", response_model=KBImportListResponse)
|
||||
async def list_kb_imports(
|
||||
user: Annotated[User, Depends(require_engineer_or_admin)],
|
||||
db: Annotated[AsyncSession, Depends(get_db)],
|
||||
skip: int = 0,
|
||||
limit: int = 20,
|
||||
status_filter: Optional[str] = None,
|
||||
):
|
||||
"""List KB imports for the current account."""
|
||||
limits = await _get_kb_limits(user, db)
|
||||
history_limit = limits.kb_history_limit if limits else 3
|
||||
|
||||
query = select(KBImport).where(KBImport.account_id == user.account_id)
|
||||
count_query = select(func.count(KBImport.id)).where(KBImport.account_id == user.account_id)
|
||||
|
||||
if status_filter:
|
||||
query = query.where(KBImport.status == status_filter)
|
||||
count_query = count_query.where(KBImport.status == status_filter)
|
||||
|
||||
total = await db.scalar(count_query) or 0
|
||||
|
||||
query = query.order_by(KBImport.created_at.desc())
|
||||
|
||||
# Apply history limit for free tier
|
||||
effective_limit = limit
|
||||
if history_limit is not None:
|
||||
effective_limit = min(limit, history_limit - skip) if skip < history_limit else 0
|
||||
|
||||
if effective_limit <= 0:
|
||||
return KBImportListResponse(items=[], total=total, skip=skip, limit=limit)
|
||||
|
||||
query = query.offset(skip).limit(effective_limit)
|
||||
query = query.options(selectinload(KBImport.nodes))
|
||||
result = await db.execute(query)
|
||||
imports = result.scalars().all()
|
||||
|
||||
items = []
|
||||
for imp in imports:
|
||||
items.append(KBImportSummary(
|
||||
id=imp.id,
|
||||
source_filename=imp.source_filename,
|
||||
source_format=imp.source_format,
|
||||
target_type=imp.target_type,
|
||||
status=imp.status,
|
||||
confidence_avg=imp.confidence_avg,
|
||||
node_count=len(imp.nodes) if imp.nodes else 0,
|
||||
created_at=imp.created_at.isoformat(),
|
||||
))
|
||||
|
||||
return KBImportListResponse(items=items, total=total, skip=skip, limit=limit)
|
||||
|
||||
|
||||
@router.post("/{import_id}/convert", response_model=KBUploadResponse)
|
||||
@limiter.limit("30/minute")
|
||||
async def reconvert(
|
||||
request: Request,
|
||||
import_id: UUID,
|
||||
background_tasks: BackgroundTasks,
|
||||
user: Annotated[User, Depends(require_engineer_or_admin)],
|
||||
db: Annotated[AsyncSession, Depends(get_db)],
|
||||
):
|
||||
"""Re-trigger AI conversion on an existing import (retry/regenerate)."""
|
||||
if not settings.ai_enabled:
|
||||
raise HTTPException(status_code=503, detail="AI is not configured.")
|
||||
|
||||
kb_import = await _get_import_or_404(import_id, user, db, load_nodes=False)
|
||||
|
||||
if kb_import.status == "committed":
|
||||
raise HTTPException(status_code=400, detail="Cannot reconvert a committed import.")
|
||||
|
||||
# Delete existing nodes
|
||||
await db.execute(
|
||||
delete(KBImportNode).where(KBImportNode.kb_import_id == kb_import.id)
|
||||
)
|
||||
|
||||
kb_import.status = "processing"
|
||||
kb_import.error_message = None
|
||||
kb_import.confidence_avg = None
|
||||
await db.flush()
|
||||
|
||||
background_tasks.add_task(_run_conversion, kb_import.id, settings.DATABASE_URL)
|
||||
await db.commit()
|
||||
|
||||
return KBUploadResponse(
|
||||
id=kb_import.id, status="processing", source_format=kb_import.source_format
|
||||
)
|
||||
|
||||
|
||||
@router.patch("/{import_id}/nodes/{node_id}", response_model=KBImportNodeResponse)
|
||||
async def edit_node(
|
||||
import_id: UUID,
|
||||
node_id: UUID,
|
||||
data: KBNodeEditRequest,
|
||||
user: Annotated[User, Depends(require_engineer_or_admin)],
|
||||
db: Annotated[AsyncSession, Depends(get_db)],
|
||||
):
|
||||
"""Edit a specific node in a KB import during review."""
|
||||
kb_import = await _get_import_or_404(import_id, user, db, load_nodes=False)
|
||||
if kb_import.status != "ready":
|
||||
raise HTTPException(status_code=400, detail="Import must be in 'ready' status to edit nodes.")
|
||||
|
||||
result = await db.execute(
|
||||
select(KBImportNode).where(
|
||||
KBImportNode.id == node_id,
|
||||
KBImportNode.kb_import_id == import_id,
|
||||
)
|
||||
)
|
||||
node = result.scalar_one_or_none()
|
||||
if not node:
|
||||
raise HTTPException(status_code=404, detail="Node not found")
|
||||
|
||||
op = data.operation
|
||||
|
||||
if op == "approve":
|
||||
node.user_approved = True
|
||||
|
||||
elif op == "reject":
|
||||
node.user_approved = False
|
||||
|
||||
elif op == "edit":
|
||||
if not data.content:
|
||||
raise HTTPException(status_code=400, detail="Content required for edit operation.")
|
||||
node.content = data.content
|
||||
node.user_edited = True
|
||||
|
||||
elif op == "delete":
|
||||
await db.delete(node)
|
||||
# Reorder remaining nodes
|
||||
remaining = await db.execute(
|
||||
select(KBImportNode)
|
||||
.where(KBImportNode.kb_import_id == import_id)
|
||||
.order_by(KBImportNode.node_order)
|
||||
)
|
||||
for idx, n in enumerate(remaining.scalars().all()):
|
||||
n.node_order = idx
|
||||
await db.flush()
|
||||
await db.commit()
|
||||
# Return a placeholder response for deleted node
|
||||
return KBImportNodeResponse(
|
||||
id=node_id,
|
||||
kb_import_id=import_id,
|
||||
node_order=-1,
|
||||
node_type="step",
|
||||
content={"deleted": True},
|
||||
confidence_score=0,
|
||||
user_edited=False,
|
||||
user_approved=False,
|
||||
)
|
||||
|
||||
elif op == "insert_after":
|
||||
if not data.content:
|
||||
raise HTTPException(status_code=400, detail="Content required for insert_after operation.")
|
||||
# Shift subsequent nodes
|
||||
subsequent = await db.execute(
|
||||
select(KBImportNode)
|
||||
.where(
|
||||
KBImportNode.kb_import_id == import_id,
|
||||
KBImportNode.node_order > node.node_order,
|
||||
)
|
||||
.order_by(KBImportNode.node_order)
|
||||
)
|
||||
for n in subsequent.scalars().all():
|
||||
n.node_order += 1
|
||||
|
||||
new_node = KBImportNode(
|
||||
kb_import_id=import_id,
|
||||
node_order=node.node_order + 1,
|
||||
node_type=data.content.get("type", "step"),
|
||||
content=data.content,
|
||||
confidence_score=1.0, # User-created nodes are fully trusted
|
||||
user_edited=True,
|
||||
user_approved=True,
|
||||
)
|
||||
db.add(new_node)
|
||||
await db.flush()
|
||||
await db.commit()
|
||||
return KBImportNodeResponse.model_validate(new_node)
|
||||
|
||||
elif op == "regenerate":
|
||||
# Re-run AI for just this node (simplified: update placeholder)
|
||||
# Full implementation would call AI with node context + guidance
|
||||
node.user_edited = False
|
||||
node.user_approved = False
|
||||
|
||||
node.updated_at = datetime.now(timezone.utc)
|
||||
await db.flush()
|
||||
await db.commit()
|
||||
return KBImportNodeResponse.model_validate(node)
|
||||
|
||||
|
||||
@router.post("/{import_id}/commit", response_model=KBCommitResponse)
|
||||
async def commit_import(
|
||||
import_id: UUID,
|
||||
user: Annotated[User, Depends(require_engineer_or_admin)],
|
||||
db: Annotated[AsyncSession, Depends(get_db)],
|
||||
data: Optional[KBCommitRequest] = None,
|
||||
):
|
||||
"""Commit a reviewed KB import to the flow library as a Tree."""
|
||||
kb_import = await _get_import_or_404(import_id, user, db)
|
||||
|
||||
if kb_import.status != "ready":
|
||||
raise HTTPException(status_code=400, detail="Import must be in 'ready' status to commit.")
|
||||
|
||||
if not kb_import.nodes:
|
||||
raise HTTPException(status_code=400, detail="No nodes to commit.")
|
||||
|
||||
# Extract title/description from conversion metadata
|
||||
conversion_meta = (kb_import.source_metadata or {}).get("_conversion", {})
|
||||
tree_name = (data.name if data and data.name else None) or conversion_meta.get("title", "Imported Flow")
|
||||
tree_description = (data.description if data else None) or conversion_meta.get("description")
|
||||
|
||||
# Build tree_structure from nodes
|
||||
if kb_import.target_type == "troubleshooting":
|
||||
tree_structure = _build_troubleshooting_tree(kb_import.nodes)
|
||||
else:
|
||||
tree_structure = _build_procedural_tree(kb_import.nodes)
|
||||
|
||||
# Build intake_form for procedural flows
|
||||
intake_form = None
|
||||
if kb_import.target_type == "procedural":
|
||||
intake_form = (kb_import.source_metadata or {}).get("_intake_form")
|
||||
|
||||
# Create the Tree record
|
||||
tree = Tree(
|
||||
name=tree_name,
|
||||
description=tree_description,
|
||||
tree_type=kb_import.target_type,
|
||||
tree_structure=tree_structure,
|
||||
intake_form=intake_form,
|
||||
author_id=user.id,
|
||||
account_id=user.account_id,
|
||||
status="draft",
|
||||
import_metadata={
|
||||
"source": "kb_accelerator",
|
||||
"kb_import_id": str(kb_import.id),
|
||||
"source_filename": kb_import.source_filename,
|
||||
"source_format": kb_import.source_format,
|
||||
"confidence_avg": kb_import.confidence_avg,
|
||||
"node_count": len(kb_import.nodes),
|
||||
"converted_at": datetime.now(timezone.utc).isoformat(),
|
||||
},
|
||||
)
|
||||
if data and data.category_id:
|
||||
tree.category_id = data.category_id
|
||||
|
||||
db.add(tree)
|
||||
await db.flush()
|
||||
|
||||
kb_import.status = "committed"
|
||||
kb_import.tree_id = tree.id
|
||||
await db.commit()
|
||||
|
||||
return KBCommitResponse(
|
||||
tree_id=tree.id,
|
||||
import_id=kb_import.id,
|
||||
tree_type=kb_import.target_type,
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/{import_id}", status_code=204)
|
||||
async def delete_import(
|
||||
import_id: UUID,
|
||||
user: Annotated[User, Depends(require_engineer_or_admin)],
|
||||
db: Annotated[AsyncSession, Depends(get_db)],
|
||||
):
|
||||
"""Cancel and clean up a KB import."""
|
||||
kb_import = await _get_import_or_404(import_id, user, db, load_nodes=False)
|
||||
if kb_import.status == "committed":
|
||||
raise HTTPException(status_code=400, detail="Cannot delete a committed import.")
|
||||
await db.execute(
|
||||
delete(KBImportNode).where(KBImportNode.kb_import_id == import_id)
|
||||
)
|
||||
await db.delete(kb_import)
|
||||
await db.commit()
|
||||
|
||||
|
||||
# ── Tree Structure Builders ──
|
||||
|
||||
|
||||
def _build_troubleshooting_tree(nodes: list[KBImportNode]) -> dict:
|
||||
"""Build a troubleshooting tree_structure from import nodes."""
|
||||
if not nodes:
|
||||
return {"id": "root", "type": "decision", "question": "Empty", "children": []}
|
||||
|
||||
# Map original IDs to proper tree node structure
|
||||
original_id_map: dict[str, KBImportNode] = {}
|
||||
for node in nodes:
|
||||
orig_id = node.content.get("original_id", str(node.id))
|
||||
original_id_map[orig_id] = node
|
||||
|
||||
def _build_node(import_node: KBImportNode) -> dict:
|
||||
content = import_node.content
|
||||
node_type = import_node.node_type
|
||||
|
||||
if node_type == "resolution":
|
||||
return {
|
||||
"id": content.get("original_id", str(import_node.id)),
|
||||
"type": "solution",
|
||||
"question": content.get("question", ""),
|
||||
"children": [],
|
||||
}
|
||||
|
||||
if node_type == "action":
|
||||
result = {
|
||||
"id": content.get("original_id", str(import_node.id)),
|
||||
"type": "action",
|
||||
"question": content.get("question", ""),
|
||||
"children": [],
|
||||
}
|
||||
next_id = content.get("next_node_id")
|
||||
if next_id and next_id in original_id_map:
|
||||
result["next_node_id"] = next_id
|
||||
return result
|
||||
|
||||
# question/decision type
|
||||
options = content.get("options", [])
|
||||
children = []
|
||||
for opt in options:
|
||||
next_id = opt.get("next_node_id")
|
||||
if next_id and next_id in original_id_map:
|
||||
child_node = _build_node(original_id_map[next_id])
|
||||
children.append(child_node)
|
||||
|
||||
return {
|
||||
"id": content.get("original_id", str(import_node.id)),
|
||||
"type": "decision",
|
||||
"question": content.get("question", ""),
|
||||
"options": [
|
||||
{"label": opt.get("label", ""), "next_node_id": opt.get("next_node_id", "")}
|
||||
for opt in options
|
||||
],
|
||||
"children": children,
|
||||
}
|
||||
|
||||
root_node = nodes[0]
|
||||
return _build_node(root_node)
|
||||
|
||||
|
||||
def _build_procedural_tree(nodes: list[KBImportNode]) -> dict:
|
||||
"""Build a procedural tree_structure from import nodes."""
|
||||
steps = []
|
||||
for node in sorted(nodes, key=lambda n: n.node_order):
|
||||
content = node.content
|
||||
step = {
|
||||
"id": content.get("original_id", str(node.id)),
|
||||
"type": node.node_type,
|
||||
"content": content.get("content", ""),
|
||||
}
|
||||
steps.append(step)
|
||||
|
||||
return {
|
||||
"id": "root",
|
||||
"type": "procedural",
|
||||
"steps": steps,
|
||||
}
|
||||
@@ -14,6 +14,7 @@ from app.api.endpoints import survey
|
||||
from app.api.endpoints import admin_survey
|
||||
from app.api.endpoints import tree_transfer
|
||||
from app.api.endpoints import ai_suggestions
|
||||
from app.api.endpoints import kb_accelerator
|
||||
|
||||
api_router = APIRouter()
|
||||
|
||||
@@ -52,3 +53,4 @@ api_router.include_router(survey.router)
|
||||
api_router.include_router(admin_survey.router)
|
||||
api_router.include_router(tree_transfer.router)
|
||||
api_router.include_router(ai_suggestions.router)
|
||||
api_router.include_router(kb_accelerator.router)
|
||||
|
||||
@@ -98,6 +98,7 @@ class Settings(BaseSettings):
|
||||
"quick_action": "fast",
|
||||
"open_chat": "standard",
|
||||
"variable_inference": "fast",
|
||||
"kb_convert": "standard",
|
||||
}
|
||||
|
||||
def get_model_for_action(self, action_type: str) -> str:
|
||||
|
||||
498
backend/app/core/kb_conversion_service.py
Normal file
498
backend/app/core/kb_conversion_service.py
Normal file
@@ -0,0 +1,498 @@
|
||||
"""KB Accelerator AI conversion service.
|
||||
|
||||
Converts extracted KB article text into ResolutionFlow tree structures
|
||||
using the Anthropic API (via the shared AI provider layer).
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.ai_provider import get_ai_provider
|
||||
from app.core.ai_quota_service import record_ai_usage, get_user_plan
|
||||
from app.core.config import settings
|
||||
from app.models.kb_import import KBImport, KBImportNode
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Cost estimation (Sonnet pricing)
|
||||
COST_PER_INPUT_TOKEN = 3.0 / 1_000_000
|
||||
COST_PER_OUTPUT_TOKEN = 15.0 / 1_000_000
|
||||
|
||||
|
||||
def _strip_markdown_fences(text: str) -> str:
|
||||
"""Strip markdown code fences if the model wrapped its JSON response."""
|
||||
text = text.strip()
|
||||
match = re.match(r"^```(?:json)?\s*([\s\S]*?)```$", text)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
return text
|
||||
|
||||
|
||||
def _estimate_cost(input_tokens: int, output_tokens: int) -> float:
|
||||
return (input_tokens * COST_PER_INPUT_TOKEN) + (output_tokens * COST_PER_OUTPUT_TOKEN)
|
||||
|
||||
|
||||
# ── System Prompts ──
|
||||
|
||||
TROUBLESHOOTING_SYSTEM_PROMPT = """You are an MSP documentation specialist for ResolutionFlow. Your task is to convert a knowledge base article into an interactive troubleshooting decision tree.
|
||||
|
||||
Analyze the article and produce a JSON array of nodes that form a troubleshooting flow. Each node represents either a diagnostic question (decision point) or a resolution (solution).
|
||||
|
||||
## Node Types
|
||||
|
||||
- **question**: A diagnostic question with multiple answer options. Each option leads to another node.
|
||||
- **resolution**: A terminal node with the solution/fix text.
|
||||
- **action**: An instruction step that leads to the next node via next_node_id.
|
||||
- **warning**: A caution or important note.
|
||||
|
||||
## Output Format
|
||||
|
||||
Return a JSON object with this structure:
|
||||
```json
|
||||
{
|
||||
"title": "Flow title derived from the article",
|
||||
"description": "Brief description of what this flow troubleshoots",
|
||||
"nodes": [
|
||||
{
|
||||
"id": "unique-node-id",
|
||||
"type": "question",
|
||||
"question": "What symptom is the user experiencing?",
|
||||
"options": [
|
||||
{"label": "Cannot connect", "next_node_id": "check-network"},
|
||||
{"label": "Slow performance", "next_node_id": "check-resources"}
|
||||
],
|
||||
"confidence": 0.95,
|
||||
"source_excerpt": "The exact text from the article this node was derived from"
|
||||
},
|
||||
{
|
||||
"id": "check-network",
|
||||
"type": "action",
|
||||
"question": "Check the network connection and ping the server",
|
||||
"next_node_id": "network-result",
|
||||
"confidence": 0.88,
|
||||
"source_excerpt": "Step 1: Verify network connectivity..."
|
||||
},
|
||||
{
|
||||
"id": "solution-restart",
|
||||
"type": "resolution",
|
||||
"question": "Restart the service. The issue should now be resolved.",
|
||||
"confidence": 0.92,
|
||||
"source_excerpt": "Restarting the service resolves the connectivity issue."
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Rules
|
||||
|
||||
1. Every node MUST have a unique `id` (descriptive kebab-case).
|
||||
2. Every node MUST have a `confidence` score between 0.0 and 1.0.
|
||||
3. Every node MUST have a `source_excerpt` — the exact text from the source article it was derived from.
|
||||
4. The first node is the root of the decision tree.
|
||||
5. All `next_node_id` and option `next_node_id` references must point to existing node IDs.
|
||||
6. Detect implicit branching logic (e.g., "If X, do Y; otherwise Z") and create decision nodes.
|
||||
7. Produce at least 3 nodes. Maximum 50 nodes.
|
||||
8. Use high confidence (0.9+) for directly stated steps, medium (0.7-0.89) for reasonable inferences, low (<0.7) for significant interpretation.
|
||||
9. Return ONLY valid JSON — no markdown fences, no explanation text."""
|
||||
|
||||
PROCEDURAL_SYSTEM_PROMPT = """You are an MSP documentation specialist for ResolutionFlow. Your task is to convert a knowledge base article into a procedural (step-by-step) flow.
|
||||
|
||||
Analyze the article and produce a JSON object with sequential steps and detected variables.
|
||||
|
||||
## Step Types
|
||||
|
||||
- **step**: A regular instruction step.
|
||||
- **section_header**: A section divider/title (no action, just organizational).
|
||||
- **warning**: A caution or important note that should be highlighted.
|
||||
|
||||
## Variable Detection
|
||||
|
||||
Identify values that would change between executions (server names, IPs, usernames, domains, etc.) and replace them with `[VAR:variable_name]` tokens. Also produce an intake_form that captures these variables before execution.
|
||||
|
||||
## Output Format
|
||||
|
||||
Return a JSON object:
|
||||
```json
|
||||
{
|
||||
"title": "Procedure title derived from the article",
|
||||
"description": "Brief description of what this procedure accomplishes",
|
||||
"steps": [
|
||||
{
|
||||
"id": "unique-step-id",
|
||||
"type": "step",
|
||||
"content": "Open Server Manager and navigate to Add Roles on [VAR:server_name]",
|
||||
"confidence": 0.95,
|
||||
"source_excerpt": "Step 1: Open Server Manager on DC01..."
|
||||
},
|
||||
{
|
||||
"id": "warning-dns",
|
||||
"type": "warning",
|
||||
"content": "WARNING: This will restart DNS and cause brief connectivity loss",
|
||||
"confidence": 0.90,
|
||||
"source_excerpt": "Note: Restarting DNS will cause a brief outage"
|
||||
},
|
||||
{
|
||||
"id": "section-verification",
|
||||
"type": "section_header",
|
||||
"content": "Verification Steps",
|
||||
"confidence": 1.0,
|
||||
"source_excerpt": "Verification"
|
||||
}
|
||||
],
|
||||
"intake_form": [
|
||||
{
|
||||
"variable_name": "server_name",
|
||||
"label": "Server Name",
|
||||
"field_type": "text",
|
||||
"required": true,
|
||||
"display_order": 1
|
||||
},
|
||||
{
|
||||
"variable_name": "ip_address",
|
||||
"label": "IP Address",
|
||||
"field_type": "text",
|
||||
"required": true,
|
||||
"display_order": 2
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Variable Type Mapping
|
||||
|
||||
- IP addresses → field_type: "text", variable like `ip_address`
|
||||
- Server/computer names → field_type: "text", variable like `server_name`
|
||||
- Domain names → field_type: "text", variable like `domain_name`
|
||||
- Usernames/email → field_type: "text", variable like `username`
|
||||
- Port numbers → field_type: "number", variable like `port`
|
||||
|
||||
## Rules
|
||||
|
||||
1. Every step MUST have a unique `id` (descriptive kebab-case).
|
||||
2. Every step MUST have a `confidence` score between 0.0 and 1.0.
|
||||
3. Every step MUST have a `source_excerpt` — the exact text from the source article.
|
||||
4. Preserve the original step ordering from the article.
|
||||
5. Detect ALL instance-specific values and replace with `[VAR:name]` tokens.
|
||||
6. Generate an intake_form entry for each unique variable detected.
|
||||
7. Produce at least 2 steps. Maximum 100 steps.
|
||||
8. Use high confidence (0.9+) for directly stated steps, medium (0.7-0.89) for inferences, low (<0.7) for significant interpretation.
|
||||
9. Return ONLY valid JSON — no markdown fences, no explanation text."""
|
||||
|
||||
|
||||
def _build_user_message(
|
||||
source_text: str,
|
||||
source_metadata: dict[str, Any] | None,
|
||||
source_filename: str | None,
|
||||
) -> str:
|
||||
"""Build the user message containing the extracted text and metadata."""
|
||||
parts = []
|
||||
|
||||
if source_filename:
|
||||
parts.append(f"Source file: {source_filename}")
|
||||
|
||||
if source_metadata:
|
||||
headings = source_metadata.get("headings", [])
|
||||
if headings:
|
||||
heading_text = ", ".join(
|
||||
f"H{h['level']}: {h['text']}" for h in headings[:20]
|
||||
)
|
||||
parts.append(f"Detected headings: {heading_text}")
|
||||
|
||||
lists = source_metadata.get("lists", [])
|
||||
if lists:
|
||||
parts.append(f"Detected {len(lists)} list(s) in the document.")
|
||||
|
||||
tables = source_metadata.get("tables", [])
|
||||
if tables:
|
||||
parts.append(f"Detected {len(tables)} table(s) in the document.")
|
||||
|
||||
parts.append(f"\n--- ARTICLE CONTENT ---\n\n{source_text}")
|
||||
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def _parse_troubleshooting_response(
|
||||
data: dict[str, Any],
|
||||
kb_import_id: UUID,
|
||||
) -> tuple[list[KBImportNode], str, str | None]:
|
||||
"""Parse AI response into KBImportNode records for troubleshooting flows.
|
||||
|
||||
Returns (nodes, title, description).
|
||||
"""
|
||||
title = data.get("title", "Imported Troubleshooting Flow")
|
||||
description = data.get("description")
|
||||
raw_nodes = data.get("nodes", [])
|
||||
|
||||
if not raw_nodes:
|
||||
raise ValueError("AI returned no nodes")
|
||||
|
||||
# Build parent mapping from the tree structure
|
||||
# First node is root (no parent). For others, trace via options/next_node_id.
|
||||
node_id_to_parent: dict[str, str | None] = {}
|
||||
node_id_to_data: dict[str, dict[str, Any]] = {}
|
||||
for node in raw_nodes:
|
||||
nid = node.get("id", "")
|
||||
node_id_to_data[nid] = node
|
||||
if nid not in node_id_to_parent:
|
||||
node_id_to_parent[nid] = None # default: no parent
|
||||
|
||||
# Trace parent relationships
|
||||
for node in raw_nodes:
|
||||
nid = node.get("id", "")
|
||||
# Options point to children
|
||||
for opt in node.get("options", []):
|
||||
child_id = opt.get("next_node_id")
|
||||
if child_id and child_id in node_id_to_data:
|
||||
node_id_to_parent[child_id] = nid
|
||||
# next_node_id points to child
|
||||
next_id = node.get("next_node_id")
|
||||
if next_id and next_id in node_id_to_data:
|
||||
node_id_to_parent[next_id] = nid
|
||||
|
||||
# Create import node records preserving order
|
||||
import uuid as uuid_mod
|
||||
node_id_map: dict[str, uuid_mod.UUID] = {}
|
||||
nodes: list[KBImportNode] = []
|
||||
|
||||
for order, raw_node in enumerate(raw_nodes):
|
||||
node_uuid = uuid_mod.uuid4()
|
||||
nid = raw_node.get("id", f"node-{order}")
|
||||
node_id_map[nid] = node_uuid
|
||||
|
||||
for order, raw_node in enumerate(raw_nodes):
|
||||
nid = raw_node.get("id", f"node-{order}")
|
||||
node_type = raw_node.get("type", "question")
|
||||
if node_type == "decision":
|
||||
node_type = "question"
|
||||
|
||||
parent_str_id = node_id_to_parent.get(nid)
|
||||
parent_uuid = node_id_map.get(parent_str_id) if parent_str_id else None
|
||||
|
||||
# Build content JSONB
|
||||
content: dict[str, Any] = {
|
||||
"original_id": nid,
|
||||
"question": raw_node.get("question", ""),
|
||||
}
|
||||
if raw_node.get("options"):
|
||||
content["options"] = raw_node["options"]
|
||||
if raw_node.get("next_node_id"):
|
||||
content["next_node_id"] = raw_node["next_node_id"]
|
||||
|
||||
import_node = KBImportNode(
|
||||
id=node_id_map[nid],
|
||||
kb_import_id=kb_import_id,
|
||||
node_order=order,
|
||||
node_type=node_type,
|
||||
content=content,
|
||||
parent_node_id=parent_uuid,
|
||||
source_excerpt=raw_node.get("source_excerpt"),
|
||||
confidence_score=float(raw_node.get("confidence", 0.5)),
|
||||
user_edited=False,
|
||||
user_approved=False,
|
||||
)
|
||||
nodes.append(import_node)
|
||||
|
||||
return nodes, title, description
|
||||
|
||||
|
||||
def _parse_procedural_response(
|
||||
data: dict[str, Any],
|
||||
kb_import_id: UUID,
|
||||
) -> tuple[list[KBImportNode], str, str | None, list[dict[str, Any]] | None]:
|
||||
"""Parse AI response into KBImportNode records for procedural flows.
|
||||
|
||||
Returns (nodes, title, description, intake_form).
|
||||
"""
|
||||
title = data.get("title", "Imported Procedure")
|
||||
description = data.get("description")
|
||||
raw_steps = data.get("steps", [])
|
||||
intake_form = data.get("intake_form")
|
||||
|
||||
if not raw_steps:
|
||||
raise ValueError("AI returned no steps")
|
||||
|
||||
import uuid as uuid_mod
|
||||
nodes: list[KBImportNode] = []
|
||||
|
||||
for order, raw_step in enumerate(raw_steps):
|
||||
content: dict[str, Any] = {
|
||||
"original_id": raw_step.get("id", f"step-{order}"),
|
||||
"content": raw_step.get("content", ""),
|
||||
}
|
||||
|
||||
node_type = raw_step.get("type", "step")
|
||||
if node_type not in ("step", "section_header", "warning"):
|
||||
node_type = "step"
|
||||
|
||||
import_node = KBImportNode(
|
||||
id=uuid_mod.uuid4(),
|
||||
kb_import_id=kb_import_id,
|
||||
node_order=order,
|
||||
node_type=node_type,
|
||||
content=content,
|
||||
parent_node_id=None, # Procedural flows are linear
|
||||
source_excerpt=raw_step.get("source_excerpt"),
|
||||
confidence_score=float(raw_step.get("confidence", 0.5)),
|
||||
user_edited=False,
|
||||
user_approved=False,
|
||||
)
|
||||
nodes.append(import_node)
|
||||
|
||||
return nodes, title, description, intake_form
|
||||
|
||||
|
||||
async def convert_document(
|
||||
kb_import: KBImport,
|
||||
db: AsyncSession,
|
||||
) -> list[KBImportNode]:
|
||||
"""Run AI conversion on an extracted KB article.
|
||||
|
||||
Creates KBImportNode records and updates the kb_import status.
|
||||
Returns the created nodes.
|
||||
"""
|
||||
start_time = time.monotonic()
|
||||
|
||||
# Select system prompt based on target type
|
||||
if kb_import.target_type == "troubleshooting":
|
||||
system_prompt = TROUBLESHOOTING_SYSTEM_PROMPT
|
||||
else:
|
||||
system_prompt = PROCEDURAL_SYSTEM_PROMPT
|
||||
|
||||
user_message = _build_user_message(
|
||||
source_text=kb_import.source_text,
|
||||
source_metadata=kb_import.source_metadata,
|
||||
source_filename=kb_import.source_filename,
|
||||
)
|
||||
|
||||
# Get AI provider with model routing
|
||||
model = settings.get_model_for_action("kb_convert")
|
||||
provider = get_ai_provider(model=model)
|
||||
|
||||
try:
|
||||
raw_text, input_tokens, output_tokens = await provider.generate_json(
|
||||
system_prompt=system_prompt,
|
||||
messages=[{"role": "user", "content": user_message}],
|
||||
max_tokens=8192,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("AI conversion failed for kb_import=%s: %s", kb_import.id, e)
|
||||
kb_import.status = "failed"
|
||||
kb_import.error_message = f"AI processing error: {str(e)}"
|
||||
kb_import.processing_time_ms = int((time.monotonic() - start_time) * 1000)
|
||||
await db.flush()
|
||||
|
||||
# Record failed usage
|
||||
plan = await get_user_plan(kb_import.account_id, db)
|
||||
await record_ai_usage(
|
||||
user_id=kb_import.created_by,
|
||||
account_id=kb_import.account_id,
|
||||
conversation_id=None,
|
||||
generation_type="kb_convert",
|
||||
tier=plan,
|
||||
input_tokens=0,
|
||||
output_tokens=0,
|
||||
estimated_cost=0.0,
|
||||
succeeded=False,
|
||||
counts_toward_quota=False,
|
||||
error_code="ai_error",
|
||||
extra_data={"kb_import_id": str(kb_import.id)},
|
||||
db=db,
|
||||
)
|
||||
return []
|
||||
|
||||
# Parse JSON response
|
||||
raw_text = _strip_markdown_fences(raw_text)
|
||||
try:
|
||||
data = json.loads(raw_text)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(
|
||||
"KB conversion JSON parse failed for kb_import=%s (%d chars): %s",
|
||||
kb_import.id, len(raw_text), raw_text[:500],
|
||||
)
|
||||
kb_import.status = "failed"
|
||||
kb_import.error_message = f"AI returned invalid JSON: {e}"
|
||||
kb_import.processing_time_ms = int((time.monotonic() - start_time) * 1000)
|
||||
kb_import.ai_tokens_input = input_tokens
|
||||
kb_import.ai_tokens_output = output_tokens
|
||||
await db.flush()
|
||||
return []
|
||||
|
||||
# Parse into nodes based on target type
|
||||
try:
|
||||
intake_form = None
|
||||
if kb_import.target_type == "troubleshooting":
|
||||
nodes, title, description = _parse_troubleshooting_response(
|
||||
data, kb_import.id
|
||||
)
|
||||
else:
|
||||
nodes, title, description, intake_form = _parse_procedural_response(
|
||||
data, kb_import.id
|
||||
)
|
||||
except (ValueError, KeyError, TypeError) as e:
|
||||
logger.error("KB node parsing failed for kb_import=%s: %s", kb_import.id, e)
|
||||
kb_import.status = "failed"
|
||||
kb_import.error_message = f"Failed to parse AI response: {e}"
|
||||
kb_import.processing_time_ms = int((time.monotonic() - start_time) * 1000)
|
||||
kb_import.ai_tokens_input = input_tokens
|
||||
kb_import.ai_tokens_output = output_tokens
|
||||
await db.flush()
|
||||
return []
|
||||
|
||||
# Persist nodes
|
||||
for node in nodes:
|
||||
db.add(node)
|
||||
|
||||
# Update import record
|
||||
elapsed_ms = int((time.monotonic() - start_time) * 1000)
|
||||
confidence_scores = [n.confidence_score for n in nodes]
|
||||
avg_confidence = sum(confidence_scores) / len(confidence_scores) if confidence_scores else 0.0
|
||||
|
||||
kb_import.status = "ready"
|
||||
kb_import.confidence_avg = avg_confidence
|
||||
kb_import.processing_time_ms = elapsed_ms
|
||||
kb_import.ai_tokens_input = input_tokens
|
||||
kb_import.ai_tokens_output = output_tokens
|
||||
|
||||
# Store parsed metadata for commit phase
|
||||
if not kb_import.source_metadata:
|
||||
kb_import.source_metadata = {}
|
||||
kb_import.source_metadata["_conversion"] = {
|
||||
"title": title,
|
||||
"description": description,
|
||||
"node_count": len(nodes),
|
||||
}
|
||||
if intake_form:
|
||||
kb_import.source_metadata["_intake_form"] = intake_form
|
||||
|
||||
await db.flush()
|
||||
|
||||
# Record successful usage
|
||||
plan = await get_user_plan(kb_import.account_id, db)
|
||||
cost = _estimate_cost(input_tokens, output_tokens)
|
||||
await record_ai_usage(
|
||||
user_id=kb_import.created_by,
|
||||
account_id=kb_import.account_id,
|
||||
conversation_id=None,
|
||||
generation_type="kb_convert",
|
||||
tier=plan,
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=output_tokens,
|
||||
estimated_cost=cost,
|
||||
succeeded=True,
|
||||
counts_toward_quota=True,
|
||||
error_code=None,
|
||||
extra_data={"kb_import_id": str(kb_import.id), "node_count": len(nodes)},
|
||||
db=db,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"KB conversion complete: import=%s, nodes=%d, confidence=%.2f, time=%dms, tokens=%d/%d",
|
||||
kb_import.id, len(nodes), avg_confidence, elapsed_ms, input_tokens, output_tokens,
|
||||
)
|
||||
|
||||
return nodes
|
||||
199
backend/app/core/kb_extraction_service.py
Normal file
199
backend/app/core/kb_extraction_service.py
Normal file
@@ -0,0 +1,199 @@
|
||||
"""KB Accelerator text extraction service.
|
||||
|
||||
Extracts plain text and structural metadata from uploaded KB articles.
|
||||
Phase 1: txt, paste, docx. Phase 2 will add pdf, html, md.
|
||||
"""
|
||||
import io
|
||||
import logging
|
||||
from typing import Any, Callable
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Type alias for extraction handlers
|
||||
ExtractResult = tuple[str, dict[str, Any] | None]
|
||||
ExtractHandler = Callable[[bytes], ExtractResult]
|
||||
|
||||
|
||||
def _extract_txt(content_bytes: bytes) -> ExtractResult:
|
||||
"""Extract from plain text — pass through with no metadata."""
|
||||
text = content_bytes.decode("utf-8", errors="replace")
|
||||
return text.strip(), None
|
||||
|
||||
|
||||
def _extract_paste(content_bytes: bytes) -> ExtractResult:
|
||||
"""Extract from pasted text — identical to txt."""
|
||||
return _extract_txt(content_bytes)
|
||||
|
||||
|
||||
def _extract_docx(content_bytes: bytes) -> ExtractResult:
|
||||
"""Extract text and structural metadata from a DOCX file.
|
||||
|
||||
Preserves heading levels, list structures, table content,
|
||||
and bold/italic emphasis markers.
|
||||
"""
|
||||
try:
|
||||
from docx import Document
|
||||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||||
except ImportError:
|
||||
raise RuntimeError(
|
||||
"python-docx is required for DOCX extraction. "
|
||||
"Install it with: pip install python-docx"
|
||||
)
|
||||
|
||||
doc = Document(io.BytesIO(content_bytes))
|
||||
|
||||
text_parts: list[str] = []
|
||||
metadata: dict[str, Any] = {
|
||||
"headings": [],
|
||||
"lists": [],
|
||||
"tables": [],
|
||||
"emphasis": [],
|
||||
}
|
||||
|
||||
list_items: list[dict[str, Any]] = []
|
||||
current_list_type: str | None = None
|
||||
|
||||
for i, para in enumerate(doc.paragraphs):
|
||||
style_name = para.style.name if para.style else ""
|
||||
text = para.text.strip()
|
||||
if not text:
|
||||
# Flush any accumulated list
|
||||
if list_items:
|
||||
metadata["lists"].append({
|
||||
"type": current_list_type or "unordered",
|
||||
"items": list_items,
|
||||
})
|
||||
list_items = []
|
||||
current_list_type = None
|
||||
text_parts.append("")
|
||||
continue
|
||||
|
||||
# Detect headings
|
||||
if style_name.startswith("Heading"):
|
||||
try:
|
||||
level = int(style_name.split()[-1])
|
||||
except (ValueError, IndexError):
|
||||
level = 1
|
||||
metadata["headings"].append({
|
||||
"level": level,
|
||||
"text": text,
|
||||
"paragraph_index": i,
|
||||
})
|
||||
text_parts.append(text)
|
||||
continue
|
||||
|
||||
# Detect list items
|
||||
if style_name.startswith("List"):
|
||||
is_ordered = "Number" in style_name or "Ordered" in style_name
|
||||
list_type = "ordered" if is_ordered else "unordered"
|
||||
if current_list_type is not None and current_list_type != list_type:
|
||||
# Flush previous list
|
||||
metadata["lists"].append({
|
||||
"type": current_list_type,
|
||||
"items": list_items,
|
||||
})
|
||||
list_items = []
|
||||
current_list_type = list_type
|
||||
list_items.append({"text": text, "paragraph_index": i})
|
||||
text_parts.append(text)
|
||||
continue
|
||||
|
||||
# Flush any accumulated list before a non-list paragraph
|
||||
if list_items:
|
||||
metadata["lists"].append({
|
||||
"type": current_list_type or "unordered",
|
||||
"items": list_items,
|
||||
})
|
||||
list_items = []
|
||||
current_list_type = None
|
||||
|
||||
# Detect emphasis (bold/italic runs)
|
||||
for run in para.runs:
|
||||
run_text = run.text.strip()
|
||||
if not run_text:
|
||||
continue
|
||||
if run.bold:
|
||||
metadata["emphasis"].append({
|
||||
"type": "bold",
|
||||
"text": run_text,
|
||||
"paragraph_index": i,
|
||||
})
|
||||
if run.italic:
|
||||
metadata["emphasis"].append({
|
||||
"type": "italic",
|
||||
"text": run_text,
|
||||
"paragraph_index": i,
|
||||
})
|
||||
|
||||
text_parts.append(text)
|
||||
|
||||
# Flush trailing list
|
||||
if list_items:
|
||||
metadata["lists"].append({
|
||||
"type": current_list_type or "unordered",
|
||||
"items": list_items,
|
||||
})
|
||||
|
||||
# Extract tables
|
||||
for t_idx, table in enumerate(doc.tables):
|
||||
table_data: list[list[str]] = []
|
||||
for row in table.rows:
|
||||
table_data.append([cell.text.strip() for cell in row.cells])
|
||||
if table_data:
|
||||
metadata["tables"].append({
|
||||
"table_index": t_idx,
|
||||
"rows": table_data,
|
||||
})
|
||||
# Also add table content to text
|
||||
for row in table_data:
|
||||
text_parts.append(" | ".join(row))
|
||||
|
||||
full_text = "\n".join(text_parts).strip()
|
||||
|
||||
# Clean up empty metadata sections
|
||||
metadata = {k: v for k, v in metadata.items() if v}
|
||||
|
||||
return full_text, metadata if metadata else None
|
||||
|
||||
|
||||
# Registry of format handlers — extend for Phase 2
|
||||
FORMAT_HANDLERS: dict[str, ExtractHandler] = {
|
||||
"txt": _extract_txt,
|
||||
"paste": _extract_paste,
|
||||
"docx": _extract_docx,
|
||||
}
|
||||
|
||||
|
||||
def extract_text(
|
||||
content_bytes: bytes,
|
||||
source_format: str,
|
||||
) -> ExtractResult:
|
||||
"""Extract plain text and structural metadata from uploaded content.
|
||||
|
||||
Args:
|
||||
content_bytes: Raw bytes of the uploaded content.
|
||||
source_format: Format identifier ('txt', 'paste', 'docx', etc.)
|
||||
|
||||
Returns:
|
||||
Tuple of (plain_text, structural_metadata_or_none).
|
||||
|
||||
Raises:
|
||||
ValueError: If the format is not supported.
|
||||
RuntimeError: If a required extraction library is not installed.
|
||||
"""
|
||||
handler = FORMAT_HANDLERS.get(source_format)
|
||||
if handler is None:
|
||||
raise ValueError(f"Unsupported format: {source_format}")
|
||||
|
||||
logger.info("Extracting text from format=%s", source_format)
|
||||
text, metadata = handler(content_bytes)
|
||||
|
||||
if not text.strip():
|
||||
raise ValueError("Extracted text is empty — the document may be blank or contain only images.")
|
||||
|
||||
logger.info(
|
||||
"Extraction complete: %d chars, metadata=%s",
|
||||
len(text),
|
||||
"yes" if metadata else "no",
|
||||
)
|
||||
return text, metadata
|
||||
@@ -34,6 +34,7 @@ from .copilot_conversation import CopilotConversation
|
||||
from .assistant_chat import AssistantChat
|
||||
from .survey_response import SurveyResponse
|
||||
from .survey_invite import SurveyInvite
|
||||
from .kb_import import KBImport, KBImportNode
|
||||
|
||||
__all__ = [
|
||||
"User",
|
||||
@@ -79,4 +80,6 @@ __all__ = [
|
||||
"AssistantChat",
|
||||
"SurveyResponse",
|
||||
"SurveyInvite",
|
||||
"KBImport",
|
||||
"KBImportNode",
|
||||
]
|
||||
|
||||
140
backend/app/models/kb_import.py
Normal file
140
backend/app/models/kb_import.py
Normal file
@@ -0,0 +1,140 @@
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional, Any, TYPE_CHECKING
|
||||
from sqlalchemy import String, Text, DateTime, ForeignKey, Boolean, Integer, Float, CheckConstraint
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||
from app.core.database import Base
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.models.account import Account
|
||||
from app.models.user import User
|
||||
from app.models.tree import Tree
|
||||
|
||||
|
||||
class KBImport(Base):
|
||||
__tablename__ = "kb_imports"
|
||||
__table_args__ = (
|
||||
CheckConstraint(
|
||||
"source_format IN ('txt', 'paste', 'docx', 'pdf', 'html', 'md')",
|
||||
name="ck_kb_imports_source_format",
|
||||
),
|
||||
CheckConstraint(
|
||||
"target_type IN ('troubleshooting', 'procedural')",
|
||||
name="ck_kb_imports_target_type",
|
||||
),
|
||||
CheckConstraint(
|
||||
"status IN ('processing', 'ready', 'committed', 'failed')",
|
||||
name="ck_kb_imports_status",
|
||||
),
|
||||
)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
||||
)
|
||||
account_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("accounts.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
created_by: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("users.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
source_filename: Mapped[Optional[str]] = mapped_column(
|
||||
String(500), nullable=True
|
||||
)
|
||||
source_format: Mapped[str] = mapped_column(String(20), nullable=False)
|
||||
source_text: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
source_metadata: Mapped[Optional[dict[str, Any]]] = mapped_column(
|
||||
JSONB, nullable=True
|
||||
)
|
||||
target_type: Mapped[str] = mapped_column(String(20), nullable=False)
|
||||
status: Mapped[str] = mapped_column(
|
||||
String(20), nullable=False, default="processing"
|
||||
)
|
||||
confidence_avg: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
error_message: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
processing_time_ms: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
|
||||
ai_tokens_input: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
|
||||
ai_tokens_output: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
|
||||
tree_id: Mapped[Optional[uuid.UUID]] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("trees.id", ondelete="SET NULL"),
|
||||
nullable=True,
|
||||
)
|
||||
batch_id: Mapped[Optional[uuid.UUID]] = mapped_column(
|
||||
UUID(as_uuid=True), nullable=True, index=True
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
default=lambda: datetime.now(timezone.utc),
|
||||
onupdate=lambda: datetime.now(timezone.utc),
|
||||
)
|
||||
|
||||
# Relationships
|
||||
account: Mapped["Account"] = relationship("Account", foreign_keys=[account_id])
|
||||
created_by_user: Mapped["User"] = relationship("User", foreign_keys=[created_by])
|
||||
tree: Mapped[Optional["Tree"]] = relationship("Tree", foreign_keys=[tree_id])
|
||||
nodes: Mapped[list["KBImportNode"]] = relationship(
|
||||
"KBImportNode",
|
||||
back_populates="kb_import",
|
||||
cascade="all, delete-orphan",
|
||||
order_by="KBImportNode.node_order",
|
||||
)
|
||||
|
||||
|
||||
class KBImportNode(Base):
|
||||
__tablename__ = "kb_import_nodes"
|
||||
__table_args__ = (
|
||||
CheckConstraint(
|
||||
"node_type IN ('question', 'resolution', 'step', 'section_header', 'warning', 'action')",
|
||||
name="ck_kb_import_nodes_node_type",
|
||||
),
|
||||
)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
||||
)
|
||||
kb_import_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("kb_imports.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
node_order: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
node_type: Mapped[str] = mapped_column(String(20), nullable=False)
|
||||
content: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False)
|
||||
parent_node_id: Mapped[Optional[uuid.UUID]] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("kb_import_nodes.id", ondelete="SET NULL"),
|
||||
nullable=True,
|
||||
)
|
||||
source_excerpt: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
confidence_score: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
user_edited: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
user_approved: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
default=lambda: datetime.now(timezone.utc),
|
||||
onupdate=lambda: datetime.now(timezone.utc),
|
||||
)
|
||||
|
||||
# Relationships
|
||||
kb_import: Mapped["KBImport"] = relationship(
|
||||
"KBImport", back_populates="nodes"
|
||||
)
|
||||
parent: Mapped[Optional["KBImportNode"]] = relationship(
|
||||
"KBImportNode",
|
||||
remote_side="KBImportNode.id",
|
||||
foreign_keys=[parent_node_id],
|
||||
)
|
||||
@@ -1,4 +1,4 @@
|
||||
from sqlalchemy import String, Integer, Boolean
|
||||
from sqlalchemy import String, Integer, Boolean, text
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from app.core.database import Base
|
||||
@@ -18,3 +18,13 @@ class PlanLimits(Base):
|
||||
# AI Flow Builder limits
|
||||
max_ai_builds_per_month: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||
max_ai_builds_per_24h: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||
|
||||
# KB Accelerator limits
|
||||
kb_accelerator_enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default=text("false"))
|
||||
kb_max_lifetime_conversions: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||
kb_batch_max_size: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||
kb_allowed_formats: Mapped[list] = mapped_column(JSONB, nullable=False, default=lambda: ["txt", "paste"], server_default=text("'[\"txt\",\"paste\"]'::jsonb"))
|
||||
kb_detailed_analysis: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default=text("false"))
|
||||
kb_conversational_refinement: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default=text("false"))
|
||||
kb_step_library_matching: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default=text("false"))
|
||||
kb_history_limit: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||
|
||||
142
backend/app/schemas/kb_accelerator.py
Normal file
142
backend/app/schemas/kb_accelerator.py
Normal file
@@ -0,0 +1,142 @@
|
||||
"""Pydantic schemas for KB Accelerator."""
|
||||
from typing import Any, Literal, Optional
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
# ── Requests ──
|
||||
|
||||
|
||||
class KBUploadTextRequest(BaseModel):
|
||||
"""Upload KB article via text paste."""
|
||||
|
||||
content: str = Field(..., min_length=10, max_length=500_000)
|
||||
title: Optional[str] = Field(None, min_length=1, max_length=255)
|
||||
target_type: Optional[Literal["troubleshooting", "procedural"]] = Field(
|
||||
None, description="Target flow type. If omitted, AI decides."
|
||||
)
|
||||
|
||||
|
||||
class KBNodeEditRequest(BaseModel):
|
||||
"""Edit a specific KB import node during review."""
|
||||
|
||||
operation: Literal[
|
||||
"approve", "reject", "edit", "delete", "regenerate", "insert_after"
|
||||
]
|
||||
content: Optional[dict[str, Any]] = Field(
|
||||
None, description="Updated node content (required for 'edit' and 'insert_after')"
|
||||
)
|
||||
guidance: Optional[str] = Field(
|
||||
None,
|
||||
max_length=2000,
|
||||
description="User guidance for 'regenerate' operation",
|
||||
)
|
||||
|
||||
|
||||
class KBCommitRequest(BaseModel):
|
||||
"""Optional overrides when committing a KB import to the flow library."""
|
||||
|
||||
name: Optional[str] = Field(None, min_length=1, max_length=255)
|
||||
description: Optional[str] = Field(None, max_length=2000)
|
||||
category_id: Optional[UUID] = None
|
||||
|
||||
|
||||
# ── Responses ──
|
||||
|
||||
|
||||
class KBImportNodeResponse(BaseModel):
|
||||
"""A single generated node in a KB import."""
|
||||
|
||||
id: UUID
|
||||
kb_import_id: UUID
|
||||
node_order: int
|
||||
node_type: str
|
||||
content: dict[str, Any]
|
||||
parent_node_id: Optional[UUID] = None
|
||||
source_excerpt: Optional[str] = None
|
||||
confidence_score: float
|
||||
user_edited: bool
|
||||
user_approved: bool
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class KBUploadResponse(BaseModel):
|
||||
"""Response after uploading a KB article."""
|
||||
|
||||
id: UUID
|
||||
status: str
|
||||
source_format: str
|
||||
|
||||
|
||||
class KBImportResponse(BaseModel):
|
||||
"""Full KB import detail with nodes."""
|
||||
|
||||
id: UUID
|
||||
account_id: UUID
|
||||
created_by: UUID
|
||||
source_filename: Optional[str] = None
|
||||
source_format: str
|
||||
source_text: str
|
||||
source_metadata: Optional[dict[str, Any]] = None
|
||||
target_type: str
|
||||
status: str
|
||||
confidence_avg: Optional[float] = None
|
||||
error_message: Optional[str] = None
|
||||
processing_time_ms: Optional[int] = None
|
||||
ai_tokens_input: Optional[int] = None
|
||||
ai_tokens_output: Optional[int] = None
|
||||
tree_id: Optional[UUID] = None
|
||||
nodes: list[KBImportNodeResponse] = []
|
||||
created_at: str
|
||||
updated_at: str
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class KBImportSummary(BaseModel):
|
||||
"""Lightweight import item for list view."""
|
||||
|
||||
id: UUID
|
||||
source_filename: Optional[str] = None
|
||||
source_format: str
|
||||
target_type: str
|
||||
status: str
|
||||
confidence_avg: Optional[float] = None
|
||||
node_count: int = 0
|
||||
created_at: str
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class KBImportListResponse(BaseModel):
|
||||
"""Paginated list of KB imports."""
|
||||
|
||||
items: list[KBImportSummary]
|
||||
total: int
|
||||
skip: int
|
||||
limit: int
|
||||
|
||||
|
||||
class KBCommitResponse(BaseModel):
|
||||
"""Response after committing a KB import to the flow library."""
|
||||
|
||||
tree_id: UUID
|
||||
import_id: UUID
|
||||
tree_type: str
|
||||
|
||||
|
||||
class KBQuotaResponse(BaseModel):
|
||||
"""Current KB Accelerator entitlements and usage for the user's account."""
|
||||
|
||||
plan: str
|
||||
kb_accelerator_enabled: bool
|
||||
lifetime_conversions_used: int
|
||||
lifetime_conversions_limit: Optional[int] = None
|
||||
allowed_formats: list[str]
|
||||
detailed_analysis: bool
|
||||
conversational_refinement: bool
|
||||
step_library_matching: bool
|
||||
history_limit: Optional[int] = None
|
||||
can_convert: bool
|
||||
334
backend/tests/test_kb_accelerator.py
Normal file
334
backend/tests/test_kb_accelerator.py
Normal file
@@ -0,0 +1,334 @@
|
||||
"""Integration tests for KB Accelerator endpoints."""
|
||||
import pytest
|
||||
import json
|
||||
from unittest.mock import AsyncMock, patch, PropertyMock
|
||||
from httpx import AsyncClient
|
||||
|
||||
pytestmark = pytest.mark.asyncio
|
||||
|
||||
|
||||
# ── Fixtures ──
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def kb_setup(client, auth_headers, test_db):
|
||||
"""Seed KB plan limits and return helpers."""
|
||||
# Update plan_limits with KB columns for 'free' plan
|
||||
await test_db.execute(
|
||||
__import__("sqlalchemy").text("""
|
||||
UPDATE plan_limits SET
|
||||
kb_accelerator_enabled = true,
|
||||
kb_max_lifetime_conversions = 3,
|
||||
kb_allowed_formats = '["txt","paste"]'::jsonb,
|
||||
kb_detailed_analysis = false,
|
||||
kb_conversational_refinement = false,
|
||||
kb_step_library_matching = false,
|
||||
kb_history_limit = 3
|
||||
WHERE plan = 'free'
|
||||
""")
|
||||
)
|
||||
await test_db.execute(
|
||||
__import__("sqlalchemy").text("""
|
||||
UPDATE plan_limits SET
|
||||
kb_accelerator_enabled = true,
|
||||
kb_max_lifetime_conversions = NULL,
|
||||
kb_allowed_formats = '["txt","paste","docx","pdf","html","md"]'::jsonb,
|
||||
kb_detailed_analysis = true,
|
||||
kb_conversational_refinement = true,
|
||||
kb_step_library_matching = true,
|
||||
kb_history_limit = NULL
|
||||
WHERE plan = 'pro'
|
||||
""")
|
||||
)
|
||||
await test_db.commit()
|
||||
return {"client": client, "headers": auth_headers}
|
||||
|
||||
|
||||
def _mock_ai_enabled():
|
||||
"""Context manager to mock AI as enabled."""
|
||||
return patch.object(
|
||||
type(__import__("app.core.config", fromlist=["settings"]).settings),
|
||||
"ai_enabled",
|
||||
new_callable=PropertyMock,
|
||||
return_value=True,
|
||||
)
|
||||
|
||||
|
||||
SAMPLE_KB_TEXT = """
|
||||
Troubleshooting Outlook Connectivity Issues
|
||||
|
||||
Problem: Users report that Outlook keeps disconnecting from Exchange.
|
||||
|
||||
Step 1: Check Network Connectivity
|
||||
Ping the Exchange server to verify network connectivity.
|
||||
If ping fails, check the network configuration.
|
||||
|
||||
Step 2: Verify Outlook Profile
|
||||
If the network is working, check the Outlook profile settings.
|
||||
Go to Control Panel > Mail > Show Profiles.
|
||||
|
||||
Step 3: Check Exchange Server
|
||||
If the profile is correct, verify the Exchange server is running.
|
||||
Open Services.msc and check Microsoft Exchange services.
|
||||
|
||||
Resolution: After following these steps, Outlook should maintain
|
||||
a persistent connection to Exchange.
|
||||
"""
|
||||
|
||||
MOCK_AI_TROUBLESHOOTING_RESPONSE = json.dumps({
|
||||
"title": "Troubleshooting Outlook Connectivity",
|
||||
"description": "Diagnose and fix Outlook disconnection from Exchange",
|
||||
"nodes": [
|
||||
{
|
||||
"id": "root-check",
|
||||
"type": "question",
|
||||
"question": "Is the network connection working?",
|
||||
"options": [
|
||||
{"label": "Yes", "next_node_id": "check-profile"},
|
||||
{"label": "No", "next_node_id": "fix-network"},
|
||||
],
|
||||
"confidence": 0.92,
|
||||
"source_excerpt": "Step 1: Check Network Connectivity",
|
||||
},
|
||||
{
|
||||
"id": "fix-network",
|
||||
"type": "resolution",
|
||||
"question": "Fix the network configuration and retry.",
|
||||
"confidence": 0.85,
|
||||
"source_excerpt": "If ping fails, check the network configuration.",
|
||||
},
|
||||
{
|
||||
"id": "check-profile",
|
||||
"type": "question",
|
||||
"question": "Is the Outlook profile configured correctly?",
|
||||
"options": [
|
||||
{"label": "Yes", "next_node_id": "check-exchange"},
|
||||
{"label": "No", "next_node_id": "fix-profile"},
|
||||
],
|
||||
"confidence": 0.88,
|
||||
"source_excerpt": "Step 2: Verify Outlook Profile",
|
||||
},
|
||||
{
|
||||
"id": "fix-profile",
|
||||
"type": "resolution",
|
||||
"question": "Reconfigure the Outlook profile via Control Panel > Mail.",
|
||||
"confidence": 0.90,
|
||||
"source_excerpt": "Go to Control Panel > Mail > Show Profiles.",
|
||||
},
|
||||
{
|
||||
"id": "check-exchange",
|
||||
"type": "resolution",
|
||||
"question": "Verify Exchange services are running in Services.msc.",
|
||||
"confidence": 0.87,
|
||||
"source_excerpt": "Open Services.msc and check Microsoft Exchange services.",
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
MOCK_AI_PROCEDURAL_RESPONSE = json.dumps({
|
||||
"title": "Setup New Domain Controller",
|
||||
"description": "Step-by-step procedure for setting up a new DC",
|
||||
"steps": [
|
||||
{
|
||||
"id": "step-1",
|
||||
"type": "step",
|
||||
"content": "Open Server Manager on [VAR:server_name]",
|
||||
"confidence": 0.95,
|
||||
"source_excerpt": "Step 1: Open Server Manager on DC01",
|
||||
},
|
||||
{
|
||||
"id": "warning-dns",
|
||||
"type": "warning",
|
||||
"content": "WARNING: This will restart DNS and cause brief connectivity loss",
|
||||
"confidence": 0.90,
|
||||
"source_excerpt": "Note: Restarting DNS will cause a brief outage",
|
||||
},
|
||||
{
|
||||
"id": "step-2",
|
||||
"type": "step",
|
||||
"content": "Configure IP address [VAR:ip_address] on the network adapter",
|
||||
"confidence": 0.88,
|
||||
"source_excerpt": "Configure IP 192.168.1.10 on the adapter",
|
||||
},
|
||||
],
|
||||
"intake_form": [
|
||||
{
|
||||
"variable_name": "server_name",
|
||||
"label": "Server Name",
|
||||
"field_type": "text",
|
||||
"required": True,
|
||||
"display_order": 1,
|
||||
},
|
||||
{
|
||||
"variable_name": "ip_address",
|
||||
"label": "IP Address",
|
||||
"field_type": "text",
|
||||
"required": True,
|
||||
"display_order": 2,
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
|
||||
# ── Upload Tests ──
|
||||
|
||||
|
||||
class TestUpload:
|
||||
async def test_upload_text_paste(self, kb_setup):
|
||||
"""Upload via text paste creates a kb_import in processing status."""
|
||||
c, h = kb_setup["client"], kb_setup["headers"]
|
||||
|
||||
with _mock_ai_enabled():
|
||||
# Mock the background conversion (don't actually call AI)
|
||||
with patch("app.api.endpoints.kb_accelerator._run_conversion"):
|
||||
resp = await c.post(
|
||||
"/api/v1/kb-accelerator/upload",
|
||||
data={"content": SAMPLE_KB_TEXT, "target_type": "troubleshooting"},
|
||||
headers=h,
|
||||
)
|
||||
|
||||
assert resp.status_code == 201
|
||||
data = resp.json()
|
||||
assert data["status"] == "processing"
|
||||
assert data["source_format"] == "paste"
|
||||
assert "id" in data
|
||||
|
||||
async def test_upload_empty_content_rejected(self, kb_setup):
|
||||
c, h = kb_setup["client"], kb_setup["headers"]
|
||||
with _mock_ai_enabled():
|
||||
resp = await c.post(
|
||||
"/api/v1/kb-accelerator/upload",
|
||||
data={"content": "short"},
|
||||
headers=h,
|
||||
)
|
||||
assert resp.status_code == 400
|
||||
|
||||
async def test_upload_no_file_no_content_rejected(self, kb_setup):
|
||||
c, h = kb_setup["client"], kb_setup["headers"]
|
||||
with _mock_ai_enabled():
|
||||
resp = await c.post(
|
||||
"/api/v1/kb-accelerator/upload",
|
||||
data={},
|
||||
headers=h,
|
||||
)
|
||||
assert resp.status_code == 400
|
||||
|
||||
|
||||
# ── Get/List Tests ──
|
||||
|
||||
|
||||
class TestGetList:
|
||||
async def test_get_import(self, kb_setup):
|
||||
c, h = kb_setup["client"], kb_setup["headers"]
|
||||
|
||||
with _mock_ai_enabled(), patch("app.api.endpoints.kb_accelerator._run_conversion"):
|
||||
create_resp = await c.post(
|
||||
"/api/v1/kb-accelerator/upload",
|
||||
data={"content": SAMPLE_KB_TEXT, "target_type": "troubleshooting"},
|
||||
headers=h,
|
||||
)
|
||||
import_id = create_resp.json()["id"]
|
||||
|
||||
resp = await c.get(f"/api/v1/kb-accelerator/{import_id}", headers=h)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["id"] == import_id
|
||||
assert data["source_format"] == "paste"
|
||||
|
||||
async def test_list_imports(self, kb_setup):
|
||||
c, h = kb_setup["client"], kb_setup["headers"]
|
||||
|
||||
with _mock_ai_enabled(), patch("app.api.endpoints.kb_accelerator._run_conversion"):
|
||||
await c.post(
|
||||
"/api/v1/kb-accelerator/upload",
|
||||
data={"content": SAMPLE_KB_TEXT, "target_type": "troubleshooting"},
|
||||
headers=h,
|
||||
)
|
||||
|
||||
resp = await c.get("/api/v1/kb-accelerator", headers=h)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["total"] >= 1
|
||||
assert len(data["items"]) >= 1
|
||||
|
||||
|
||||
# ── Quota Tests ──
|
||||
|
||||
|
||||
class TestQuota:
|
||||
async def test_get_quota(self, kb_setup):
|
||||
c, h = kb_setup["client"], kb_setup["headers"]
|
||||
resp = await c.get("/api/v1/kb-accelerator/quota", headers=h)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["kb_accelerator_enabled"] is True
|
||||
assert data["lifetime_conversions_limit"] == 3
|
||||
assert data["can_convert"] is True
|
||||
|
||||
|
||||
# ── Commit Tests ──
|
||||
|
||||
|
||||
class TestCommit:
|
||||
async def test_commit_creates_tree(self, kb_setup, test_db):
|
||||
"""Committing a ready import creates a Tree record."""
|
||||
c, h = kb_setup["client"], kb_setup["headers"]
|
||||
|
||||
# Create import
|
||||
with _mock_ai_enabled(), patch("app.api.endpoints.kb_accelerator._run_conversion"):
|
||||
create_resp = await c.post(
|
||||
"/api/v1/kb-accelerator/upload",
|
||||
data={"content": SAMPLE_KB_TEXT, "target_type": "troubleshooting"},
|
||||
headers=h,
|
||||
)
|
||||
import_id = create_resp.json()["id"]
|
||||
|
||||
# Simulate conversion complete: update status + add nodes directly
|
||||
from app.models.kb_import import KBImport, KBImportNode
|
||||
from sqlalchemy import select
|
||||
import uuid
|
||||
|
||||
result = await test_db.execute(select(KBImport).where(KBImport.id == uuid.UUID(import_id)))
|
||||
kb_import = result.scalar_one()
|
||||
kb_import.status = "ready"
|
||||
kb_import.source_metadata = {"_conversion": {"title": "Test Flow", "description": "Test"}}
|
||||
|
||||
node = KBImportNode(
|
||||
kb_import_id=kb_import.id,
|
||||
node_order=0,
|
||||
node_type="question",
|
||||
content={"original_id": "root", "question": "Test question?", "options": []},
|
||||
confidence_score=0.9,
|
||||
)
|
||||
test_db.add(node)
|
||||
await test_db.commit()
|
||||
|
||||
# Commit
|
||||
resp = await c.post(f"/api/v1/kb-accelerator/{import_id}/commit", headers=h)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert "tree_id" in data
|
||||
assert data["tree_type"] == "troubleshooting"
|
||||
|
||||
|
||||
# ── Delete Tests ──
|
||||
|
||||
|
||||
class TestDelete:
|
||||
async def test_delete_import(self, kb_setup):
|
||||
c, h = kb_setup["client"], kb_setup["headers"]
|
||||
|
||||
with _mock_ai_enabled(), patch("app.api.endpoints.kb_accelerator._run_conversion"):
|
||||
create_resp = await c.post(
|
||||
"/api/v1/kb-accelerator/upload",
|
||||
data={"content": SAMPLE_KB_TEXT, "target_type": "troubleshooting"},
|
||||
headers=h,
|
||||
)
|
||||
import_id = create_resp.json()["id"]
|
||||
|
||||
resp = await c.delete(f"/api/v1/kb-accelerator/{import_id}", headers=h)
|
||||
assert resp.status_code == 204
|
||||
|
||||
# Verify deleted
|
||||
resp = await c.get(f"/api/v1/kb-accelerator/{import_id}", headers=h)
|
||||
assert resp.status_code == 404
|
||||
Reference in New Issue
Block a user