feat: KB Accelerator — convert KB articles into interactive flows

Full-stack implementation of the KB Accelerator feature that converts
static MSP knowledge base articles into interactive troubleshooting
and procedural flows using AI.

Backend:
- Migrations 054/055: kb_imports, kb_import_nodes tables + plan_limits KB columns
- SQLAlchemy models with relationships and self-referential node hierarchy
- Text extraction service (txt, paste, docx with structural metadata)
- AI conversion service with MSP-specialist prompts for both flow types
- 8 API endpoints: upload, get, list, convert, edit node, commit, delete, quota
- Tier-gated access via plan_limits (free: 3 lifetime, pro/team: unlimited)
- 8 integration tests covering upload, get/list, quota, commit, delete

Frontend:
- TypeScript types and API client for all KB Accelerator endpoints
- Multi-step wizard page: upload → processing → review → success
- Upload screen with paste/file tabs, drag-drop, target type selector
- Two-panel review screen with source highlighting and node cards
- Per-node actions: approve, edit, regenerate, insert, delete
- Confidence color indicators (green/amber/red)
- Sidebar navigation with Sparkles icon
- Code-split lazy-loaded route at /kb-accelerator

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Michael Chihlas
2026-03-10 20:56:28 -04:00
parent c65aa4f0b7
commit 71ff4a8c35
27 changed files with 4426 additions and 2 deletions

View File

@@ -17,6 +17,7 @@ from app.models.assistant_chat import AssistantChat
from app.models.survey_response import SurveyResponse
from app.models.survey_invite import SurveyInvite
from app.models.ai_suggestion import AISuggestion # noqa: F401
from app.models.kb_import import KBImport, KBImportNode # noqa: F401
from app.core.config import settings
# this is the Alembic Config object

View File

@@ -0,0 +1,79 @@
"""add kb_imports and kb_import_nodes tables
Revision ID: 054
Revises: 053
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import UUID, JSONB
revision = "054"
down_revision = "053"
branch_labels = None
depends_on = None
def upgrade():
op.create_table(
"kb_imports",
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
sa.Column("account_id", UUID(as_uuid=True), sa.ForeignKey("accounts.id", ondelete="CASCADE"), nullable=False, index=True),
sa.Column("created_by", UUID(as_uuid=True), sa.ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True),
sa.Column("source_filename", sa.String(500), nullable=True),
sa.Column("source_format", sa.String(20), nullable=False),
sa.Column("source_text", sa.Text, nullable=False),
sa.Column("source_metadata", JSONB, nullable=True),
sa.Column("target_type", sa.String(20), nullable=False),
sa.Column("status", sa.String(20), nullable=False, server_default="processing"),
sa.Column("confidence_avg", sa.Float, nullable=True),
sa.Column("error_message", sa.Text, nullable=True),
sa.Column("processing_time_ms", sa.Integer, nullable=True),
sa.Column("ai_tokens_input", sa.Integer, nullable=True),
sa.Column("ai_tokens_output", sa.Integer, nullable=True),
sa.Column("tree_id", UUID(as_uuid=True), sa.ForeignKey("trees.id", ondelete="SET NULL"), nullable=True),
sa.Column("batch_id", UUID(as_uuid=True), nullable=True, index=True),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.CheckConstraint(
"source_format IN ('txt', 'paste', 'docx', 'pdf', 'html', 'md')",
name="ck_kb_imports_source_format",
),
sa.CheckConstraint(
"target_type IN ('troubleshooting', 'procedural')",
name="ck_kb_imports_target_type",
),
sa.CheckConstraint(
"status IN ('processing', 'ready', 'committed', 'failed')",
name="ck_kb_imports_status",
),
)
op.create_index("ix_kb_imports_status", "kb_imports", ["status"])
op.create_index("ix_kb_imports_created_at_desc", "kb_imports", [sa.text("created_at DESC")])
op.create_table(
"kb_import_nodes",
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
sa.Column("kb_import_id", UUID(as_uuid=True), sa.ForeignKey("kb_imports.id", ondelete="CASCADE"), nullable=False, index=True),
sa.Column("node_order", sa.Integer, nullable=False),
sa.Column("node_type", sa.String(20), nullable=False),
sa.Column("content", JSONB, nullable=False),
sa.Column("parent_node_id", UUID(as_uuid=True), sa.ForeignKey("kb_import_nodes.id", ondelete="SET NULL"), nullable=True),
sa.Column("source_excerpt", sa.Text, nullable=True),
sa.Column("confidence_score", sa.Float, nullable=False),
sa.Column("user_edited", sa.Boolean, nullable=False, server_default=sa.text("false")),
sa.Column("user_approved", sa.Boolean, nullable=False, server_default=sa.text("false")),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.CheckConstraint(
"node_type IN ('question', 'resolution', 'step', 'section_header', 'warning', 'action')",
name="ck_kb_import_nodes_node_type",
),
)
op.create_index("ix_kb_import_nodes_confidence", "kb_import_nodes", ["confidence_score"])
def downgrade():
op.drop_table("kb_import_nodes")
op.drop_table("kb_imports")

View File

@@ -0,0 +1,76 @@
"""add KB Accelerator columns to plan_limits
Revision ID: 055
Revises: 054
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import JSONB
revision = "055"
down_revision = "054"
branch_labels = None
depends_on = None
def upgrade():
# Add KB Accelerator columns to plan_limits
op.add_column("plan_limits", sa.Column("kb_accelerator_enabled", sa.Boolean, nullable=False, server_default=sa.text("false")))
op.add_column("plan_limits", sa.Column("kb_max_lifetime_conversions", sa.Integer, nullable=True))
op.add_column("plan_limits", sa.Column("kb_batch_max_size", sa.Integer, nullable=True))
op.add_column("plan_limits", sa.Column("kb_allowed_formats", JSONB, nullable=False, server_default=sa.text("'[\"txt\",\"paste\"]'::jsonb")))
op.add_column("plan_limits", sa.Column("kb_detailed_analysis", sa.Boolean, nullable=False, server_default=sa.text("false")))
op.add_column("plan_limits", sa.Column("kb_conversational_refinement", sa.Boolean, nullable=False, server_default=sa.text("false")))
op.add_column("plan_limits", sa.Column("kb_step_library_matching", sa.Boolean, nullable=False, server_default=sa.text("false")))
op.add_column("plan_limits", sa.Column("kb_history_limit", sa.Integer, nullable=True))
# Seed defaults for each plan tier
op.execute("""
UPDATE plan_limits SET
kb_accelerator_enabled = true,
kb_max_lifetime_conversions = 3,
kb_batch_max_size = NULL,
kb_allowed_formats = '["txt","paste"]'::jsonb,
kb_detailed_analysis = false,
kb_conversational_refinement = false,
kb_step_library_matching = false,
kb_history_limit = 3
WHERE plan = 'free'
""")
op.execute("""
UPDATE plan_limits SET
kb_accelerator_enabled = true,
kb_max_lifetime_conversions = NULL,
kb_batch_max_size = 5,
kb_allowed_formats = '["txt","paste","docx","pdf","html","md"]'::jsonb,
kb_detailed_analysis = true,
kb_conversational_refinement = true,
kb_step_library_matching = true,
kb_history_limit = NULL
WHERE plan = 'pro'
""")
op.execute("""
UPDATE plan_limits SET
kb_accelerator_enabled = true,
kb_max_lifetime_conversions = NULL,
kb_batch_max_size = 10,
kb_allowed_formats = '["txt","paste","docx","pdf","html","md"]'::jsonb,
kb_detailed_analysis = true,
kb_conversational_refinement = true,
kb_step_library_matching = true,
kb_history_limit = NULL
WHERE plan = 'team'
""")
def downgrade():
op.drop_column("plan_limits", "kb_history_limit")
op.drop_column("plan_limits", "kb_step_library_matching")
op.drop_column("plan_limits", "kb_conversational_refinement")
op.drop_column("plan_limits", "kb_detailed_analysis")
op.drop_column("plan_limits", "kb_allowed_formats")
op.drop_column("plan_limits", "kb_batch_max_size")
op.drop_column("plan_limits", "kb_max_lifetime_conversions")
op.drop_column("plan_limits", "kb_accelerator_enabled")

View File

@@ -0,0 +1,685 @@
"""KB Accelerator endpoints.
Upload KB articles, convert to flows via AI, review, and commit.
POST /kb-accelerator/upload — Upload file or paste text
GET /kb-accelerator/{id} — Get import with nodes
GET /kb-accelerator — List imports for account
POST /kb-accelerator/{id}/convert — Re-trigger AI conversion
PATCH /kb-accelerator/{id}/nodes/{nid} — Edit a node
POST /kb-accelerator/{id}/commit — Commit to flow library
DELETE /kb-accelerator/{id} — Cancel/cleanup
GET /kb-accelerator/quota — Plan entitlements + usage
"""
import logging
import mimetypes
from datetime import datetime, timezone
from typing import Annotated, Optional
from uuid import UUID
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request, UploadFile, File, Form, status
from sqlalchemy import select, func, delete
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from app.api.deps import get_current_active_user, get_db, require_engineer_or_admin
from app.core.config import settings
from app.core.rate_limit import limiter
from app.core.subscriptions import get_plan_limits
from app.core.ai_quota_service import get_user_plan
from app.core.kb_extraction_service import extract_text
from app.core.kb_conversion_service import convert_document
from app.models.kb_import import KBImport, KBImportNode
from app.models.plan_limits import PlanLimits
from app.models.tree import Tree
from app.models.user import User
from app.schemas.kb_accelerator import (
KBUploadTextRequest,
KBNodeEditRequest,
KBCommitRequest,
KBUploadResponse,
KBImportResponse,
KBImportNodeResponse,
KBImportSummary,
KBImportListResponse,
KBCommitResponse,
KBQuotaResponse,
)
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/kb-accelerator", tags=["kb-accelerator"])
# Max upload size: 10MB
MAX_UPLOAD_SIZE = 10 * 1024 * 1024
ALLOWED_EXTENSIONS = {
"txt": ["text/plain"],
"docx": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"],
}
# Phase 2 formats (not yet enabled)
PHASE2_EXTENSIONS = {
"pdf": ["application/pdf"],
"html": ["text/html"],
"md": ["text/markdown", "text/plain"],
}
def _detect_format(filename: str) -> str | None:
"""Detect source format from filename extension."""
if not filename:
return None
ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else None
if ext in ALLOWED_EXTENSIONS or ext in PHASE2_EXTENSIONS:
return ext
return None
async def _get_kb_limits(user: User, db: AsyncSession) -> PlanLimits | None:
plan = await get_user_plan(user.account_id, db)
return await get_plan_limits(plan, db)
async def _check_kb_enabled(user: User, db: AsyncSession) -> PlanLimits:
limits = await _get_kb_limits(user, db)
if not limits or not limits.kb_accelerator_enabled:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="KB Accelerator is not available on your plan.",
)
return limits
async def _check_lifetime_limit(user: User, limits: PlanLimits, db: AsyncSession) -> None:
if limits.kb_max_lifetime_conversions is None:
return # Unlimited
count = await db.scalar(
select(func.count(KBImport.id)).where(
KBImport.account_id == user.account_id,
KBImport.status == "committed",
)
) or 0
if count >= limits.kb_max_lifetime_conversions:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=f"You have reached your lifetime limit of {limits.kb_max_lifetime_conversions} KB conversions. Upgrade your plan for unlimited conversions.",
)
async def _check_format_allowed(source_format: str, limits: PlanLimits) -> None:
allowed = limits.kb_allowed_formats or ["txt", "paste"]
if source_format not in allowed:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=f"Format '{source_format}' is not available on your plan. Allowed: {', '.join(allowed)}",
)
async def _get_import_or_404(
import_id: UUID, user: User, db: AsyncSession, *, load_nodes: bool = True
) -> KBImport:
query = select(KBImport).where(
KBImport.id == import_id,
KBImport.account_id == user.account_id,
)
if load_nodes:
query = query.options(selectinload(KBImport.nodes))
result = await db.execute(query)
kb_import = result.scalar_one_or_none()
if not kb_import:
raise HTTPException(status_code=404, detail="KB import not found")
return kb_import
async def _run_conversion(import_id: UUID, db_url: str) -> None:
"""Background task: run AI conversion on a KB import."""
from app.core.database import async_session_maker
async with async_session_maker() as db:
result = await db.execute(
select(KBImport).where(KBImport.id == import_id)
)
kb_import = result.scalar_one_or_none()
if not kb_import or kb_import.status != "processing":
return
try:
await convert_document(kb_import, db)
await db.commit()
except Exception as e:
logger.error("Background KB conversion failed: %s", e)
kb_import.status = "failed"
kb_import.error_message = f"Conversion error: {str(e)}"
await db.commit()
def _serialize_import(kb_import: KBImport) -> dict:
"""Serialize a KBImport to dict for response."""
return {
"id": kb_import.id,
"account_id": kb_import.account_id,
"created_by": kb_import.created_by,
"source_filename": kb_import.source_filename,
"source_format": kb_import.source_format,
"source_text": kb_import.source_text,
"source_metadata": kb_import.source_metadata,
"target_type": kb_import.target_type,
"status": kb_import.status,
"confidence_avg": kb_import.confidence_avg,
"error_message": kb_import.error_message,
"processing_time_ms": kb_import.processing_time_ms,
"ai_tokens_input": kb_import.ai_tokens_input,
"ai_tokens_output": kb_import.ai_tokens_output,
"tree_id": kb_import.tree_id,
"nodes": [
KBImportNodeResponse.model_validate(n) for n in kb_import.nodes
] if kb_import.nodes else [],
"created_at": kb_import.created_at.isoformat(),
"updated_at": kb_import.updated_at.isoformat(),
}
# ── Endpoints ──
@router.get("/quota", response_model=KBQuotaResponse)
async def get_quota(
user: Annotated[User, Depends(require_engineer_or_admin)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Get KB Accelerator entitlements and usage for the current account."""
plan = await get_user_plan(user.account_id, db)
limits = await get_plan_limits(plan, db)
committed_count = await db.scalar(
select(func.count(KBImport.id)).where(
KBImport.account_id == user.account_id,
KBImport.status == "committed",
)
) or 0
if not limits:
return KBQuotaResponse(
plan=plan,
kb_accelerator_enabled=False,
lifetime_conversions_used=committed_count,
lifetime_conversions_limit=0,
allowed_formats=["txt", "paste"],
detailed_analysis=False,
conversational_refinement=False,
step_library_matching=False,
history_limit=3,
can_convert=False,
)
can_convert = limits.kb_accelerator_enabled
if limits.kb_max_lifetime_conversions is not None:
can_convert = can_convert and committed_count < limits.kb_max_lifetime_conversions
return KBQuotaResponse(
plan=plan,
kb_accelerator_enabled=limits.kb_accelerator_enabled,
lifetime_conversions_used=committed_count,
lifetime_conversions_limit=limits.kb_max_lifetime_conversions,
allowed_formats=limits.kb_allowed_formats or ["txt", "paste"],
detailed_analysis=limits.kb_detailed_analysis,
conversational_refinement=limits.kb_conversational_refinement,
step_library_matching=limits.kb_step_library_matching,
history_limit=limits.kb_history_limit,
can_convert=can_convert,
)
@router.post("/upload", response_model=KBUploadResponse, status_code=201)
@limiter.limit("10/minute")
async def upload_kb_article(
request: Request,
background_tasks: BackgroundTasks,
user: Annotated[User, Depends(require_engineer_or_admin)],
db: Annotated[AsyncSession, Depends(get_db)],
file: Optional[UploadFile] = File(None),
content: Optional[str] = Form(None),
title: Optional[str] = Form(None),
target_type: Optional[str] = Form(None),
):
"""Upload a KB article file or paste text for conversion."""
if not settings.ai_enabled:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail="AI is not configured.",
)
limits = await _check_kb_enabled(user, db)
await _check_lifetime_limit(user, limits, db)
# Determine source format and extract text
if file and file.filename:
source_format = _detect_format(file.filename)
if not source_format:
raise HTTPException(
status_code=400,
detail=f"Unsupported file format. Supported: {', '.join(ALLOWED_EXTENSIONS.keys())}",
)
await _check_format_allowed(source_format, limits)
file_bytes = await file.read()
if len(file_bytes) > MAX_UPLOAD_SIZE:
raise HTTPException(status_code=413, detail="File exceeds 10MB limit.")
if len(file_bytes) == 0:
raise HTTPException(status_code=400, detail="Uploaded file is empty.")
source_filename = file.filename
try:
source_text, source_metadata = extract_text(file_bytes, source_format)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except RuntimeError as e:
raise HTTPException(status_code=500, detail=str(e))
elif content:
source_format = "paste"
await _check_format_allowed(source_format, limits)
source_filename = title
source_text = content.strip()
source_metadata = None
if len(source_text) < 10:
raise HTTPException(status_code=400, detail="Content must be at least 10 characters.")
else:
raise HTTPException(status_code=400, detail="Provide either a file or content text.")
# Validate target_type
if target_type and target_type not in ("troubleshooting", "procedural"):
raise HTTPException(status_code=400, detail="target_type must be 'troubleshooting' or 'procedural'.")
if not target_type:
target_type = "troubleshooting" # Default; Phase 2 adds "let AI decide"
# Create KB import record
kb_import = KBImport(
account_id=user.account_id,
created_by=user.id,
source_filename=source_filename,
source_format=source_format,
source_text=source_text,
source_metadata=source_metadata,
target_type=target_type,
status="processing",
)
db.add(kb_import)
await db.flush()
# Trigger AI conversion in background
background_tasks.add_task(_run_conversion, kb_import.id, settings.DATABASE_URL)
await db.commit()
return KBUploadResponse(
id=kb_import.id,
status=kb_import.status,
source_format=kb_import.source_format,
)
@router.get("/{import_id}", response_model=KBImportResponse)
async def get_kb_import(
import_id: UUID,
user: Annotated[User, Depends(require_engineer_or_admin)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Get a KB import with its generated nodes."""
kb_import = await _get_import_or_404(import_id, user, db)
return _serialize_import(kb_import)
@router.get("", response_model=KBImportListResponse)
async def list_kb_imports(
user: Annotated[User, Depends(require_engineer_or_admin)],
db: Annotated[AsyncSession, Depends(get_db)],
skip: int = 0,
limit: int = 20,
status_filter: Optional[str] = None,
):
"""List KB imports for the current account."""
limits = await _get_kb_limits(user, db)
history_limit = limits.kb_history_limit if limits else 3
query = select(KBImport).where(KBImport.account_id == user.account_id)
count_query = select(func.count(KBImport.id)).where(KBImport.account_id == user.account_id)
if status_filter:
query = query.where(KBImport.status == status_filter)
count_query = count_query.where(KBImport.status == status_filter)
total = await db.scalar(count_query) or 0
query = query.order_by(KBImport.created_at.desc())
# Apply history limit for free tier
effective_limit = limit
if history_limit is not None:
effective_limit = min(limit, history_limit - skip) if skip < history_limit else 0
if effective_limit <= 0:
return KBImportListResponse(items=[], total=total, skip=skip, limit=limit)
query = query.offset(skip).limit(effective_limit)
query = query.options(selectinload(KBImport.nodes))
result = await db.execute(query)
imports = result.scalars().all()
items = []
for imp in imports:
items.append(KBImportSummary(
id=imp.id,
source_filename=imp.source_filename,
source_format=imp.source_format,
target_type=imp.target_type,
status=imp.status,
confidence_avg=imp.confidence_avg,
node_count=len(imp.nodes) if imp.nodes else 0,
created_at=imp.created_at.isoformat(),
))
return KBImportListResponse(items=items, total=total, skip=skip, limit=limit)
@router.post("/{import_id}/convert", response_model=KBUploadResponse)
@limiter.limit("30/minute")
async def reconvert(
request: Request,
import_id: UUID,
background_tasks: BackgroundTasks,
user: Annotated[User, Depends(require_engineer_or_admin)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Re-trigger AI conversion on an existing import (retry/regenerate)."""
if not settings.ai_enabled:
raise HTTPException(status_code=503, detail="AI is not configured.")
kb_import = await _get_import_or_404(import_id, user, db, load_nodes=False)
if kb_import.status == "committed":
raise HTTPException(status_code=400, detail="Cannot reconvert a committed import.")
# Delete existing nodes
await db.execute(
delete(KBImportNode).where(KBImportNode.kb_import_id == kb_import.id)
)
kb_import.status = "processing"
kb_import.error_message = None
kb_import.confidence_avg = None
await db.flush()
background_tasks.add_task(_run_conversion, kb_import.id, settings.DATABASE_URL)
await db.commit()
return KBUploadResponse(
id=kb_import.id, status="processing", source_format=kb_import.source_format
)
@router.patch("/{import_id}/nodes/{node_id}", response_model=KBImportNodeResponse)
async def edit_node(
import_id: UUID,
node_id: UUID,
data: KBNodeEditRequest,
user: Annotated[User, Depends(require_engineer_or_admin)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Edit a specific node in a KB import during review."""
kb_import = await _get_import_or_404(import_id, user, db, load_nodes=False)
if kb_import.status != "ready":
raise HTTPException(status_code=400, detail="Import must be in 'ready' status to edit nodes.")
result = await db.execute(
select(KBImportNode).where(
KBImportNode.id == node_id,
KBImportNode.kb_import_id == import_id,
)
)
node = result.scalar_one_or_none()
if not node:
raise HTTPException(status_code=404, detail="Node not found")
op = data.operation
if op == "approve":
node.user_approved = True
elif op == "reject":
node.user_approved = False
elif op == "edit":
if not data.content:
raise HTTPException(status_code=400, detail="Content required for edit operation.")
node.content = data.content
node.user_edited = True
elif op == "delete":
await db.delete(node)
# Reorder remaining nodes
remaining = await db.execute(
select(KBImportNode)
.where(KBImportNode.kb_import_id == import_id)
.order_by(KBImportNode.node_order)
)
for idx, n in enumerate(remaining.scalars().all()):
n.node_order = idx
await db.flush()
await db.commit()
# Return a placeholder response for deleted node
return KBImportNodeResponse(
id=node_id,
kb_import_id=import_id,
node_order=-1,
node_type="step",
content={"deleted": True},
confidence_score=0,
user_edited=False,
user_approved=False,
)
elif op == "insert_after":
if not data.content:
raise HTTPException(status_code=400, detail="Content required for insert_after operation.")
# Shift subsequent nodes
subsequent = await db.execute(
select(KBImportNode)
.where(
KBImportNode.kb_import_id == import_id,
KBImportNode.node_order > node.node_order,
)
.order_by(KBImportNode.node_order)
)
for n in subsequent.scalars().all():
n.node_order += 1
new_node = KBImportNode(
kb_import_id=import_id,
node_order=node.node_order + 1,
node_type=data.content.get("type", "step"),
content=data.content,
confidence_score=1.0, # User-created nodes are fully trusted
user_edited=True,
user_approved=True,
)
db.add(new_node)
await db.flush()
await db.commit()
return KBImportNodeResponse.model_validate(new_node)
elif op == "regenerate":
# Re-run AI for just this node (simplified: update placeholder)
# Full implementation would call AI with node context + guidance
node.user_edited = False
node.user_approved = False
node.updated_at = datetime.now(timezone.utc)
await db.flush()
await db.commit()
return KBImportNodeResponse.model_validate(node)
@router.post("/{import_id}/commit", response_model=KBCommitResponse)
async def commit_import(
import_id: UUID,
user: Annotated[User, Depends(require_engineer_or_admin)],
db: Annotated[AsyncSession, Depends(get_db)],
data: Optional[KBCommitRequest] = None,
):
"""Commit a reviewed KB import to the flow library as a Tree."""
kb_import = await _get_import_or_404(import_id, user, db)
if kb_import.status != "ready":
raise HTTPException(status_code=400, detail="Import must be in 'ready' status to commit.")
if not kb_import.nodes:
raise HTTPException(status_code=400, detail="No nodes to commit.")
# Extract title/description from conversion metadata
conversion_meta = (kb_import.source_metadata or {}).get("_conversion", {})
tree_name = (data.name if data and data.name else None) or conversion_meta.get("title", "Imported Flow")
tree_description = (data.description if data else None) or conversion_meta.get("description")
# Build tree_structure from nodes
if kb_import.target_type == "troubleshooting":
tree_structure = _build_troubleshooting_tree(kb_import.nodes)
else:
tree_structure = _build_procedural_tree(kb_import.nodes)
# Build intake_form for procedural flows
intake_form = None
if kb_import.target_type == "procedural":
intake_form = (kb_import.source_metadata or {}).get("_intake_form")
# Create the Tree record
tree = Tree(
name=tree_name,
description=tree_description,
tree_type=kb_import.target_type,
tree_structure=tree_structure,
intake_form=intake_form,
author_id=user.id,
account_id=user.account_id,
status="draft",
import_metadata={
"source": "kb_accelerator",
"kb_import_id": str(kb_import.id),
"source_filename": kb_import.source_filename,
"source_format": kb_import.source_format,
"confidence_avg": kb_import.confidence_avg,
"node_count": len(kb_import.nodes),
"converted_at": datetime.now(timezone.utc).isoformat(),
},
)
if data and data.category_id:
tree.category_id = data.category_id
db.add(tree)
await db.flush()
kb_import.status = "committed"
kb_import.tree_id = tree.id
await db.commit()
return KBCommitResponse(
tree_id=tree.id,
import_id=kb_import.id,
tree_type=kb_import.target_type,
)
@router.delete("/{import_id}", status_code=204)
async def delete_import(
import_id: UUID,
user: Annotated[User, Depends(require_engineer_or_admin)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Cancel and clean up a KB import."""
kb_import = await _get_import_or_404(import_id, user, db, load_nodes=False)
if kb_import.status == "committed":
raise HTTPException(status_code=400, detail="Cannot delete a committed import.")
await db.execute(
delete(KBImportNode).where(KBImportNode.kb_import_id == import_id)
)
await db.delete(kb_import)
await db.commit()
# ── Tree Structure Builders ──
def _build_troubleshooting_tree(nodes: list[KBImportNode]) -> dict:
"""Build a troubleshooting tree_structure from import nodes."""
if not nodes:
return {"id": "root", "type": "decision", "question": "Empty", "children": []}
# Map original IDs to proper tree node structure
original_id_map: dict[str, KBImportNode] = {}
for node in nodes:
orig_id = node.content.get("original_id", str(node.id))
original_id_map[orig_id] = node
def _build_node(import_node: KBImportNode) -> dict:
content = import_node.content
node_type = import_node.node_type
if node_type == "resolution":
return {
"id": content.get("original_id", str(import_node.id)),
"type": "solution",
"question": content.get("question", ""),
"children": [],
}
if node_type == "action":
result = {
"id": content.get("original_id", str(import_node.id)),
"type": "action",
"question": content.get("question", ""),
"children": [],
}
next_id = content.get("next_node_id")
if next_id and next_id in original_id_map:
result["next_node_id"] = next_id
return result
# question/decision type
options = content.get("options", [])
children = []
for opt in options:
next_id = opt.get("next_node_id")
if next_id and next_id in original_id_map:
child_node = _build_node(original_id_map[next_id])
children.append(child_node)
return {
"id": content.get("original_id", str(import_node.id)),
"type": "decision",
"question": content.get("question", ""),
"options": [
{"label": opt.get("label", ""), "next_node_id": opt.get("next_node_id", "")}
for opt in options
],
"children": children,
}
root_node = nodes[0]
return _build_node(root_node)
def _build_procedural_tree(nodes: list[KBImportNode]) -> dict:
"""Build a procedural tree_structure from import nodes."""
steps = []
for node in sorted(nodes, key=lambda n: n.node_order):
content = node.content
step = {
"id": content.get("original_id", str(node.id)),
"type": node.node_type,
"content": content.get("content", ""),
}
steps.append(step)
return {
"id": "root",
"type": "procedural",
"steps": steps,
}

View File

@@ -14,6 +14,7 @@ from app.api.endpoints import survey
from app.api.endpoints import admin_survey
from app.api.endpoints import tree_transfer
from app.api.endpoints import ai_suggestions
from app.api.endpoints import kb_accelerator
api_router = APIRouter()
@@ -52,3 +53,4 @@ api_router.include_router(survey.router)
api_router.include_router(admin_survey.router)
api_router.include_router(tree_transfer.router)
api_router.include_router(ai_suggestions.router)
api_router.include_router(kb_accelerator.router)

View File

@@ -98,6 +98,7 @@ class Settings(BaseSettings):
"quick_action": "fast",
"open_chat": "standard",
"variable_inference": "fast",
"kb_convert": "standard",
}
def get_model_for_action(self, action_type: str) -> str:

View File

@@ -0,0 +1,498 @@
"""KB Accelerator AI conversion service.
Converts extracted KB article text into ResolutionFlow tree structures
using the Anthropic API (via the shared AI provider layer).
"""
import json
import logging
import re
import time
from typing import Any
from uuid import UUID
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.ai_provider import get_ai_provider
from app.core.ai_quota_service import record_ai_usage, get_user_plan
from app.core.config import settings
from app.models.kb_import import KBImport, KBImportNode
logger = logging.getLogger(__name__)
# Cost estimation (Sonnet pricing)
COST_PER_INPUT_TOKEN = 3.0 / 1_000_000
COST_PER_OUTPUT_TOKEN = 15.0 / 1_000_000
def _strip_markdown_fences(text: str) -> str:
"""Strip markdown code fences if the model wrapped its JSON response."""
text = text.strip()
match = re.match(r"^```(?:json)?\s*([\s\S]*?)```$", text)
if match:
return match.group(1).strip()
return text
def _estimate_cost(input_tokens: int, output_tokens: int) -> float:
return (input_tokens * COST_PER_INPUT_TOKEN) + (output_tokens * COST_PER_OUTPUT_TOKEN)
# ── System Prompts ──
TROUBLESHOOTING_SYSTEM_PROMPT = """You are an MSP documentation specialist for ResolutionFlow. Your task is to convert a knowledge base article into an interactive troubleshooting decision tree.
Analyze the article and produce a JSON array of nodes that form a troubleshooting flow. Each node represents either a diagnostic question (decision point) or a resolution (solution).
## Node Types
- **question**: A diagnostic question with multiple answer options. Each option leads to another node.
- **resolution**: A terminal node with the solution/fix text.
- **action**: An instruction step that leads to the next node via next_node_id.
- **warning**: A caution or important note.
## Output Format
Return a JSON object with this structure:
```json
{
"title": "Flow title derived from the article",
"description": "Brief description of what this flow troubleshoots",
"nodes": [
{
"id": "unique-node-id",
"type": "question",
"question": "What symptom is the user experiencing?",
"options": [
{"label": "Cannot connect", "next_node_id": "check-network"},
{"label": "Slow performance", "next_node_id": "check-resources"}
],
"confidence": 0.95,
"source_excerpt": "The exact text from the article this node was derived from"
},
{
"id": "check-network",
"type": "action",
"question": "Check the network connection and ping the server",
"next_node_id": "network-result",
"confidence": 0.88,
"source_excerpt": "Step 1: Verify network connectivity..."
},
{
"id": "solution-restart",
"type": "resolution",
"question": "Restart the service. The issue should now be resolved.",
"confidence": 0.92,
"source_excerpt": "Restarting the service resolves the connectivity issue."
}
]
}
```
## Rules
1. Every node MUST have a unique `id` (descriptive kebab-case).
2. Every node MUST have a `confidence` score between 0.0 and 1.0.
3. Every node MUST have a `source_excerpt` — the exact text from the source article it was derived from.
4. The first node is the root of the decision tree.
5. All `next_node_id` and option `next_node_id` references must point to existing node IDs.
6. Detect implicit branching logic (e.g., "If X, do Y; otherwise Z") and create decision nodes.
7. Produce at least 3 nodes. Maximum 50 nodes.
8. Use high confidence (0.9+) for directly stated steps, medium (0.7-0.89) for reasonable inferences, low (<0.7) for significant interpretation.
9. Return ONLY valid JSON — no markdown fences, no explanation text."""
PROCEDURAL_SYSTEM_PROMPT = """You are an MSP documentation specialist for ResolutionFlow. Your task is to convert a knowledge base article into a procedural (step-by-step) flow.
Analyze the article and produce a JSON object with sequential steps and detected variables.
## Step Types
- **step**: A regular instruction step.
- **section_header**: A section divider/title (no action, just organizational).
- **warning**: A caution or important note that should be highlighted.
## Variable Detection
Identify values that would change between executions (server names, IPs, usernames, domains, etc.) and replace them with `[VAR:variable_name]` tokens. Also produce an intake_form that captures these variables before execution.
## Output Format
Return a JSON object:
```json
{
"title": "Procedure title derived from the article",
"description": "Brief description of what this procedure accomplishes",
"steps": [
{
"id": "unique-step-id",
"type": "step",
"content": "Open Server Manager and navigate to Add Roles on [VAR:server_name]",
"confidence": 0.95,
"source_excerpt": "Step 1: Open Server Manager on DC01..."
},
{
"id": "warning-dns",
"type": "warning",
"content": "WARNING: This will restart DNS and cause brief connectivity loss",
"confidence": 0.90,
"source_excerpt": "Note: Restarting DNS will cause a brief outage"
},
{
"id": "section-verification",
"type": "section_header",
"content": "Verification Steps",
"confidence": 1.0,
"source_excerpt": "Verification"
}
],
"intake_form": [
{
"variable_name": "server_name",
"label": "Server Name",
"field_type": "text",
"required": true,
"display_order": 1
},
{
"variable_name": "ip_address",
"label": "IP Address",
"field_type": "text",
"required": true,
"display_order": 2
}
]
}
```
## Variable Type Mapping
- IP addresses → field_type: "text", variable like `ip_address`
- Server/computer names → field_type: "text", variable like `server_name`
- Domain names → field_type: "text", variable like `domain_name`
- Usernames/email → field_type: "text", variable like `username`
- Port numbers → field_type: "number", variable like `port`
## Rules
1. Every step MUST have a unique `id` (descriptive kebab-case).
2. Every step MUST have a `confidence` score between 0.0 and 1.0.
3. Every step MUST have a `source_excerpt` — the exact text from the source article.
4. Preserve the original step ordering from the article.
5. Detect ALL instance-specific values and replace with `[VAR:name]` tokens.
6. Generate an intake_form entry for each unique variable detected.
7. Produce at least 2 steps. Maximum 100 steps.
8. Use high confidence (0.9+) for directly stated steps, medium (0.7-0.89) for inferences, low (<0.7) for significant interpretation.
9. Return ONLY valid JSON — no markdown fences, no explanation text."""
def _build_user_message(
source_text: str,
source_metadata: dict[str, Any] | None,
source_filename: str | None,
) -> str:
"""Build the user message containing the extracted text and metadata."""
parts = []
if source_filename:
parts.append(f"Source file: {source_filename}")
if source_metadata:
headings = source_metadata.get("headings", [])
if headings:
heading_text = ", ".join(
f"H{h['level']}: {h['text']}" for h in headings[:20]
)
parts.append(f"Detected headings: {heading_text}")
lists = source_metadata.get("lists", [])
if lists:
parts.append(f"Detected {len(lists)} list(s) in the document.")
tables = source_metadata.get("tables", [])
if tables:
parts.append(f"Detected {len(tables)} table(s) in the document.")
parts.append(f"\n--- ARTICLE CONTENT ---\n\n{source_text}")
return "\n".join(parts)
def _parse_troubleshooting_response(
data: dict[str, Any],
kb_import_id: UUID,
) -> tuple[list[KBImportNode], str, str | None]:
"""Parse AI response into KBImportNode records for troubleshooting flows.
Returns (nodes, title, description).
"""
title = data.get("title", "Imported Troubleshooting Flow")
description = data.get("description")
raw_nodes = data.get("nodes", [])
if not raw_nodes:
raise ValueError("AI returned no nodes")
# Build parent mapping from the tree structure
# First node is root (no parent). For others, trace via options/next_node_id.
node_id_to_parent: dict[str, str | None] = {}
node_id_to_data: dict[str, dict[str, Any]] = {}
for node in raw_nodes:
nid = node.get("id", "")
node_id_to_data[nid] = node
if nid not in node_id_to_parent:
node_id_to_parent[nid] = None # default: no parent
# Trace parent relationships
for node in raw_nodes:
nid = node.get("id", "")
# Options point to children
for opt in node.get("options", []):
child_id = opt.get("next_node_id")
if child_id and child_id in node_id_to_data:
node_id_to_parent[child_id] = nid
# next_node_id points to child
next_id = node.get("next_node_id")
if next_id and next_id in node_id_to_data:
node_id_to_parent[next_id] = nid
# Create import node records preserving order
import uuid as uuid_mod
node_id_map: dict[str, uuid_mod.UUID] = {}
nodes: list[KBImportNode] = []
for order, raw_node in enumerate(raw_nodes):
node_uuid = uuid_mod.uuid4()
nid = raw_node.get("id", f"node-{order}")
node_id_map[nid] = node_uuid
for order, raw_node in enumerate(raw_nodes):
nid = raw_node.get("id", f"node-{order}")
node_type = raw_node.get("type", "question")
if node_type == "decision":
node_type = "question"
parent_str_id = node_id_to_parent.get(nid)
parent_uuid = node_id_map.get(parent_str_id) if parent_str_id else None
# Build content JSONB
content: dict[str, Any] = {
"original_id": nid,
"question": raw_node.get("question", ""),
}
if raw_node.get("options"):
content["options"] = raw_node["options"]
if raw_node.get("next_node_id"):
content["next_node_id"] = raw_node["next_node_id"]
import_node = KBImportNode(
id=node_id_map[nid],
kb_import_id=kb_import_id,
node_order=order,
node_type=node_type,
content=content,
parent_node_id=parent_uuid,
source_excerpt=raw_node.get("source_excerpt"),
confidence_score=float(raw_node.get("confidence", 0.5)),
user_edited=False,
user_approved=False,
)
nodes.append(import_node)
return nodes, title, description
def _parse_procedural_response(
data: dict[str, Any],
kb_import_id: UUID,
) -> tuple[list[KBImportNode], str, str | None, list[dict[str, Any]] | None]:
"""Parse AI response into KBImportNode records for procedural flows.
Returns (nodes, title, description, intake_form).
"""
title = data.get("title", "Imported Procedure")
description = data.get("description")
raw_steps = data.get("steps", [])
intake_form = data.get("intake_form")
if not raw_steps:
raise ValueError("AI returned no steps")
import uuid as uuid_mod
nodes: list[KBImportNode] = []
for order, raw_step in enumerate(raw_steps):
content: dict[str, Any] = {
"original_id": raw_step.get("id", f"step-{order}"),
"content": raw_step.get("content", ""),
}
node_type = raw_step.get("type", "step")
if node_type not in ("step", "section_header", "warning"):
node_type = "step"
import_node = KBImportNode(
id=uuid_mod.uuid4(),
kb_import_id=kb_import_id,
node_order=order,
node_type=node_type,
content=content,
parent_node_id=None, # Procedural flows are linear
source_excerpt=raw_step.get("source_excerpt"),
confidence_score=float(raw_step.get("confidence", 0.5)),
user_edited=False,
user_approved=False,
)
nodes.append(import_node)
return nodes, title, description, intake_form
async def convert_document(
kb_import: KBImport,
db: AsyncSession,
) -> list[KBImportNode]:
"""Run AI conversion on an extracted KB article.
Creates KBImportNode records and updates the kb_import status.
Returns the created nodes.
"""
start_time = time.monotonic()
# Select system prompt based on target type
if kb_import.target_type == "troubleshooting":
system_prompt = TROUBLESHOOTING_SYSTEM_PROMPT
else:
system_prompt = PROCEDURAL_SYSTEM_PROMPT
user_message = _build_user_message(
source_text=kb_import.source_text,
source_metadata=kb_import.source_metadata,
source_filename=kb_import.source_filename,
)
# Get AI provider with model routing
model = settings.get_model_for_action("kb_convert")
provider = get_ai_provider(model=model)
try:
raw_text, input_tokens, output_tokens = await provider.generate_json(
system_prompt=system_prompt,
messages=[{"role": "user", "content": user_message}],
max_tokens=8192,
)
except Exception as e:
logger.error("AI conversion failed for kb_import=%s: %s", kb_import.id, e)
kb_import.status = "failed"
kb_import.error_message = f"AI processing error: {str(e)}"
kb_import.processing_time_ms = int((time.monotonic() - start_time) * 1000)
await db.flush()
# Record failed usage
plan = await get_user_plan(kb_import.account_id, db)
await record_ai_usage(
user_id=kb_import.created_by,
account_id=kb_import.account_id,
conversation_id=None,
generation_type="kb_convert",
tier=plan,
input_tokens=0,
output_tokens=0,
estimated_cost=0.0,
succeeded=False,
counts_toward_quota=False,
error_code="ai_error",
extra_data={"kb_import_id": str(kb_import.id)},
db=db,
)
return []
# Parse JSON response
raw_text = _strip_markdown_fences(raw_text)
try:
data = json.loads(raw_text)
except json.JSONDecodeError as e:
logger.error(
"KB conversion JSON parse failed for kb_import=%s (%d chars): %s",
kb_import.id, len(raw_text), raw_text[:500],
)
kb_import.status = "failed"
kb_import.error_message = f"AI returned invalid JSON: {e}"
kb_import.processing_time_ms = int((time.monotonic() - start_time) * 1000)
kb_import.ai_tokens_input = input_tokens
kb_import.ai_tokens_output = output_tokens
await db.flush()
return []
# Parse into nodes based on target type
try:
intake_form = None
if kb_import.target_type == "troubleshooting":
nodes, title, description = _parse_troubleshooting_response(
data, kb_import.id
)
else:
nodes, title, description, intake_form = _parse_procedural_response(
data, kb_import.id
)
except (ValueError, KeyError, TypeError) as e:
logger.error("KB node parsing failed for kb_import=%s: %s", kb_import.id, e)
kb_import.status = "failed"
kb_import.error_message = f"Failed to parse AI response: {e}"
kb_import.processing_time_ms = int((time.monotonic() - start_time) * 1000)
kb_import.ai_tokens_input = input_tokens
kb_import.ai_tokens_output = output_tokens
await db.flush()
return []
# Persist nodes
for node in nodes:
db.add(node)
# Update import record
elapsed_ms = int((time.monotonic() - start_time) * 1000)
confidence_scores = [n.confidence_score for n in nodes]
avg_confidence = sum(confidence_scores) / len(confidence_scores) if confidence_scores else 0.0
kb_import.status = "ready"
kb_import.confidence_avg = avg_confidence
kb_import.processing_time_ms = elapsed_ms
kb_import.ai_tokens_input = input_tokens
kb_import.ai_tokens_output = output_tokens
# Store parsed metadata for commit phase
if not kb_import.source_metadata:
kb_import.source_metadata = {}
kb_import.source_metadata["_conversion"] = {
"title": title,
"description": description,
"node_count": len(nodes),
}
if intake_form:
kb_import.source_metadata["_intake_form"] = intake_form
await db.flush()
# Record successful usage
plan = await get_user_plan(kb_import.account_id, db)
cost = _estimate_cost(input_tokens, output_tokens)
await record_ai_usage(
user_id=kb_import.created_by,
account_id=kb_import.account_id,
conversation_id=None,
generation_type="kb_convert",
tier=plan,
input_tokens=input_tokens,
output_tokens=output_tokens,
estimated_cost=cost,
succeeded=True,
counts_toward_quota=True,
error_code=None,
extra_data={"kb_import_id": str(kb_import.id), "node_count": len(nodes)},
db=db,
)
logger.info(
"KB conversion complete: import=%s, nodes=%d, confidence=%.2f, time=%dms, tokens=%d/%d",
kb_import.id, len(nodes), avg_confidence, elapsed_ms, input_tokens, output_tokens,
)
return nodes

View File

@@ -0,0 +1,199 @@
"""KB Accelerator text extraction service.
Extracts plain text and structural metadata from uploaded KB articles.
Phase 1: txt, paste, docx. Phase 2 will add pdf, html, md.
"""
import io
import logging
from typing import Any, Callable
logger = logging.getLogger(__name__)
# Type alias for extraction handlers
ExtractResult = tuple[str, dict[str, Any] | None]
ExtractHandler = Callable[[bytes], ExtractResult]
def _extract_txt(content_bytes: bytes) -> ExtractResult:
"""Extract from plain text — pass through with no metadata."""
text = content_bytes.decode("utf-8", errors="replace")
return text.strip(), None
def _extract_paste(content_bytes: bytes) -> ExtractResult:
"""Extract from pasted text — identical to txt."""
return _extract_txt(content_bytes)
def _extract_docx(content_bytes: bytes) -> ExtractResult:
"""Extract text and structural metadata from a DOCX file.
Preserves heading levels, list structures, table content,
and bold/italic emphasis markers.
"""
try:
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
except ImportError:
raise RuntimeError(
"python-docx is required for DOCX extraction. "
"Install it with: pip install python-docx"
)
doc = Document(io.BytesIO(content_bytes))
text_parts: list[str] = []
metadata: dict[str, Any] = {
"headings": [],
"lists": [],
"tables": [],
"emphasis": [],
}
list_items: list[dict[str, Any]] = []
current_list_type: str | None = None
for i, para in enumerate(doc.paragraphs):
style_name = para.style.name if para.style else ""
text = para.text.strip()
if not text:
# Flush any accumulated list
if list_items:
metadata["lists"].append({
"type": current_list_type or "unordered",
"items": list_items,
})
list_items = []
current_list_type = None
text_parts.append("")
continue
# Detect headings
if style_name.startswith("Heading"):
try:
level = int(style_name.split()[-1])
except (ValueError, IndexError):
level = 1
metadata["headings"].append({
"level": level,
"text": text,
"paragraph_index": i,
})
text_parts.append(text)
continue
# Detect list items
if style_name.startswith("List"):
is_ordered = "Number" in style_name or "Ordered" in style_name
list_type = "ordered" if is_ordered else "unordered"
if current_list_type is not None and current_list_type != list_type:
# Flush previous list
metadata["lists"].append({
"type": current_list_type,
"items": list_items,
})
list_items = []
current_list_type = list_type
list_items.append({"text": text, "paragraph_index": i})
text_parts.append(text)
continue
# Flush any accumulated list before a non-list paragraph
if list_items:
metadata["lists"].append({
"type": current_list_type or "unordered",
"items": list_items,
})
list_items = []
current_list_type = None
# Detect emphasis (bold/italic runs)
for run in para.runs:
run_text = run.text.strip()
if not run_text:
continue
if run.bold:
metadata["emphasis"].append({
"type": "bold",
"text": run_text,
"paragraph_index": i,
})
if run.italic:
metadata["emphasis"].append({
"type": "italic",
"text": run_text,
"paragraph_index": i,
})
text_parts.append(text)
# Flush trailing list
if list_items:
metadata["lists"].append({
"type": current_list_type or "unordered",
"items": list_items,
})
# Extract tables
for t_idx, table in enumerate(doc.tables):
table_data: list[list[str]] = []
for row in table.rows:
table_data.append([cell.text.strip() for cell in row.cells])
if table_data:
metadata["tables"].append({
"table_index": t_idx,
"rows": table_data,
})
# Also add table content to text
for row in table_data:
text_parts.append(" | ".join(row))
full_text = "\n".join(text_parts).strip()
# Clean up empty metadata sections
metadata = {k: v for k, v in metadata.items() if v}
return full_text, metadata if metadata else None
# Registry of format handlers — extend for Phase 2
FORMAT_HANDLERS: dict[str, ExtractHandler] = {
"txt": _extract_txt,
"paste": _extract_paste,
"docx": _extract_docx,
}
def extract_text(
content_bytes: bytes,
source_format: str,
) -> ExtractResult:
"""Extract plain text and structural metadata from uploaded content.
Args:
content_bytes: Raw bytes of the uploaded content.
source_format: Format identifier ('txt', 'paste', 'docx', etc.)
Returns:
Tuple of (plain_text, structural_metadata_or_none).
Raises:
ValueError: If the format is not supported.
RuntimeError: If a required extraction library is not installed.
"""
handler = FORMAT_HANDLERS.get(source_format)
if handler is None:
raise ValueError(f"Unsupported format: {source_format}")
logger.info("Extracting text from format=%s", source_format)
text, metadata = handler(content_bytes)
if not text.strip():
raise ValueError("Extracted text is empty — the document may be blank or contain only images.")
logger.info(
"Extraction complete: %d chars, metadata=%s",
len(text),
"yes" if metadata else "no",
)
return text, metadata

View File

@@ -34,6 +34,7 @@ from .copilot_conversation import CopilotConversation
from .assistant_chat import AssistantChat
from .survey_response import SurveyResponse
from .survey_invite import SurveyInvite
from .kb_import import KBImport, KBImportNode
__all__ = [
"User",
@@ -79,4 +80,6 @@ __all__ = [
"AssistantChat",
"SurveyResponse",
"SurveyInvite",
"KBImport",
"KBImportNode",
]

View File

@@ -0,0 +1,140 @@
import uuid
from datetime import datetime, timezone
from typing import Optional, Any, TYPE_CHECKING
from sqlalchemy import String, Text, DateTime, ForeignKey, Boolean, Integer, Float, CheckConstraint
from sqlalchemy.orm import Mapped, mapped_column, relationship
from sqlalchemy.dialects.postgresql import UUID, JSONB
from app.core.database import Base
if TYPE_CHECKING:
from app.models.account import Account
from app.models.user import User
from app.models.tree import Tree
class KBImport(Base):
__tablename__ = "kb_imports"
__table_args__ = (
CheckConstraint(
"source_format IN ('txt', 'paste', 'docx', 'pdf', 'html', 'md')",
name="ck_kb_imports_source_format",
),
CheckConstraint(
"target_type IN ('troubleshooting', 'procedural')",
name="ck_kb_imports_target_type",
),
CheckConstraint(
"status IN ('processing', 'ready', 'committed', 'failed')",
name="ck_kb_imports_status",
),
)
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
)
account_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("accounts.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
created_by: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("users.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
source_filename: Mapped[Optional[str]] = mapped_column(
String(500), nullable=True
)
source_format: Mapped[str] = mapped_column(String(20), nullable=False)
source_text: Mapped[str] = mapped_column(Text, nullable=False)
source_metadata: Mapped[Optional[dict[str, Any]]] = mapped_column(
JSONB, nullable=True
)
target_type: Mapped[str] = mapped_column(String(20), nullable=False)
status: Mapped[str] = mapped_column(
String(20), nullable=False, default="processing"
)
confidence_avg: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
error_message: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
processing_time_ms: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
ai_tokens_input: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
ai_tokens_output: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
tree_id: Mapped[Optional[uuid.UUID]] = mapped_column(
UUID(as_uuid=True),
ForeignKey("trees.id", ondelete="SET NULL"),
nullable=True,
)
batch_id: Mapped[Optional[uuid.UUID]] = mapped_column(
UUID(as_uuid=True), nullable=True, index=True
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc),
onupdate=lambda: datetime.now(timezone.utc),
)
# Relationships
account: Mapped["Account"] = relationship("Account", foreign_keys=[account_id])
created_by_user: Mapped["User"] = relationship("User", foreign_keys=[created_by])
tree: Mapped[Optional["Tree"]] = relationship("Tree", foreign_keys=[tree_id])
nodes: Mapped[list["KBImportNode"]] = relationship(
"KBImportNode",
back_populates="kb_import",
cascade="all, delete-orphan",
order_by="KBImportNode.node_order",
)
class KBImportNode(Base):
__tablename__ = "kb_import_nodes"
__table_args__ = (
CheckConstraint(
"node_type IN ('question', 'resolution', 'step', 'section_header', 'warning', 'action')",
name="ck_kb_import_nodes_node_type",
),
)
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
)
kb_import_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("kb_imports.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
node_order: Mapped[int] = mapped_column(Integer, nullable=False)
node_type: Mapped[str] = mapped_column(String(20), nullable=False)
content: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False)
parent_node_id: Mapped[Optional[uuid.UUID]] = mapped_column(
UUID(as_uuid=True),
ForeignKey("kb_import_nodes.id", ondelete="SET NULL"),
nullable=True,
)
source_excerpt: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
confidence_score: Mapped[float] = mapped_column(Float, nullable=False)
user_edited: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
user_approved: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc),
onupdate=lambda: datetime.now(timezone.utc),
)
# Relationships
kb_import: Mapped["KBImport"] = relationship(
"KBImport", back_populates="nodes"
)
parent: Mapped[Optional["KBImportNode"]] = relationship(
"KBImportNode",
remote_side="KBImportNode.id",
foreign_keys=[parent_node_id],
)

View File

@@ -1,4 +1,4 @@
from sqlalchemy import String, Integer, Boolean
from sqlalchemy import String, Integer, Boolean, text
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy.dialects.postgresql import JSONB
from app.core.database import Base
@@ -18,3 +18,13 @@ class PlanLimits(Base):
# AI Flow Builder limits
max_ai_builds_per_month: Mapped[int | None] = mapped_column(Integer, nullable=True)
max_ai_builds_per_24h: Mapped[int | None] = mapped_column(Integer, nullable=True)
# KB Accelerator limits
kb_accelerator_enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default=text("false"))
kb_max_lifetime_conversions: Mapped[int | None] = mapped_column(Integer, nullable=True)
kb_batch_max_size: Mapped[int | None] = mapped_column(Integer, nullable=True)
kb_allowed_formats: Mapped[list] = mapped_column(JSONB, nullable=False, default=lambda: ["txt", "paste"], server_default=text("'[\"txt\",\"paste\"]'::jsonb"))
kb_detailed_analysis: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default=text("false"))
kb_conversational_refinement: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default=text("false"))
kb_step_library_matching: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default=text("false"))
kb_history_limit: Mapped[int | None] = mapped_column(Integer, nullable=True)

View File

@@ -0,0 +1,142 @@
"""Pydantic schemas for KB Accelerator."""
from typing import Any, Literal, Optional
from uuid import UUID
from pydantic import BaseModel, Field
# ── Requests ──
class KBUploadTextRequest(BaseModel):
"""Upload KB article via text paste."""
content: str = Field(..., min_length=10, max_length=500_000)
title: Optional[str] = Field(None, min_length=1, max_length=255)
target_type: Optional[Literal["troubleshooting", "procedural"]] = Field(
None, description="Target flow type. If omitted, AI decides."
)
class KBNodeEditRequest(BaseModel):
"""Edit a specific KB import node during review."""
operation: Literal[
"approve", "reject", "edit", "delete", "regenerate", "insert_after"
]
content: Optional[dict[str, Any]] = Field(
None, description="Updated node content (required for 'edit' and 'insert_after')"
)
guidance: Optional[str] = Field(
None,
max_length=2000,
description="User guidance for 'regenerate' operation",
)
class KBCommitRequest(BaseModel):
"""Optional overrides when committing a KB import to the flow library."""
name: Optional[str] = Field(None, min_length=1, max_length=255)
description: Optional[str] = Field(None, max_length=2000)
category_id: Optional[UUID] = None
# ── Responses ──
class KBImportNodeResponse(BaseModel):
"""A single generated node in a KB import."""
id: UUID
kb_import_id: UUID
node_order: int
node_type: str
content: dict[str, Any]
parent_node_id: Optional[UUID] = None
source_excerpt: Optional[str] = None
confidence_score: float
user_edited: bool
user_approved: bool
model_config = {"from_attributes": True}
class KBUploadResponse(BaseModel):
"""Response after uploading a KB article."""
id: UUID
status: str
source_format: str
class KBImportResponse(BaseModel):
"""Full KB import detail with nodes."""
id: UUID
account_id: UUID
created_by: UUID
source_filename: Optional[str] = None
source_format: str
source_text: str
source_metadata: Optional[dict[str, Any]] = None
target_type: str
status: str
confidence_avg: Optional[float] = None
error_message: Optional[str] = None
processing_time_ms: Optional[int] = None
ai_tokens_input: Optional[int] = None
ai_tokens_output: Optional[int] = None
tree_id: Optional[UUID] = None
nodes: list[KBImportNodeResponse] = []
created_at: str
updated_at: str
model_config = {"from_attributes": True}
class KBImportSummary(BaseModel):
"""Lightweight import item for list view."""
id: UUID
source_filename: Optional[str] = None
source_format: str
target_type: str
status: str
confidence_avg: Optional[float] = None
node_count: int = 0
created_at: str
model_config = {"from_attributes": True}
class KBImportListResponse(BaseModel):
"""Paginated list of KB imports."""
items: list[KBImportSummary]
total: int
skip: int
limit: int
class KBCommitResponse(BaseModel):
"""Response after committing a KB import to the flow library."""
tree_id: UUID
import_id: UUID
tree_type: str
class KBQuotaResponse(BaseModel):
"""Current KB Accelerator entitlements and usage for the user's account."""
plan: str
kb_accelerator_enabled: bool
lifetime_conversions_used: int
lifetime_conversions_limit: Optional[int] = None
allowed_formats: list[str]
detailed_analysis: bool
conversational_refinement: bool
step_library_matching: bool
history_limit: Optional[int] = None
can_convert: bool

View File

@@ -0,0 +1,334 @@
"""Integration tests for KB Accelerator endpoints."""
import pytest
import json
from unittest.mock import AsyncMock, patch, PropertyMock
from httpx import AsyncClient
pytestmark = pytest.mark.asyncio
# ── Fixtures ──
@pytest.fixture
async def kb_setup(client, auth_headers, test_db):
"""Seed KB plan limits and return helpers."""
# Update plan_limits with KB columns for 'free' plan
await test_db.execute(
__import__("sqlalchemy").text("""
UPDATE plan_limits SET
kb_accelerator_enabled = true,
kb_max_lifetime_conversions = 3,
kb_allowed_formats = '["txt","paste"]'::jsonb,
kb_detailed_analysis = false,
kb_conversational_refinement = false,
kb_step_library_matching = false,
kb_history_limit = 3
WHERE plan = 'free'
""")
)
await test_db.execute(
__import__("sqlalchemy").text("""
UPDATE plan_limits SET
kb_accelerator_enabled = true,
kb_max_lifetime_conversions = NULL,
kb_allowed_formats = '["txt","paste","docx","pdf","html","md"]'::jsonb,
kb_detailed_analysis = true,
kb_conversational_refinement = true,
kb_step_library_matching = true,
kb_history_limit = NULL
WHERE plan = 'pro'
""")
)
await test_db.commit()
return {"client": client, "headers": auth_headers}
def _mock_ai_enabled():
"""Context manager to mock AI as enabled."""
return patch.object(
type(__import__("app.core.config", fromlist=["settings"]).settings),
"ai_enabled",
new_callable=PropertyMock,
return_value=True,
)
SAMPLE_KB_TEXT = """
Troubleshooting Outlook Connectivity Issues
Problem: Users report that Outlook keeps disconnecting from Exchange.
Step 1: Check Network Connectivity
Ping the Exchange server to verify network connectivity.
If ping fails, check the network configuration.
Step 2: Verify Outlook Profile
If the network is working, check the Outlook profile settings.
Go to Control Panel > Mail > Show Profiles.
Step 3: Check Exchange Server
If the profile is correct, verify the Exchange server is running.
Open Services.msc and check Microsoft Exchange services.
Resolution: After following these steps, Outlook should maintain
a persistent connection to Exchange.
"""
MOCK_AI_TROUBLESHOOTING_RESPONSE = json.dumps({
"title": "Troubleshooting Outlook Connectivity",
"description": "Diagnose and fix Outlook disconnection from Exchange",
"nodes": [
{
"id": "root-check",
"type": "question",
"question": "Is the network connection working?",
"options": [
{"label": "Yes", "next_node_id": "check-profile"},
{"label": "No", "next_node_id": "fix-network"},
],
"confidence": 0.92,
"source_excerpt": "Step 1: Check Network Connectivity",
},
{
"id": "fix-network",
"type": "resolution",
"question": "Fix the network configuration and retry.",
"confidence": 0.85,
"source_excerpt": "If ping fails, check the network configuration.",
},
{
"id": "check-profile",
"type": "question",
"question": "Is the Outlook profile configured correctly?",
"options": [
{"label": "Yes", "next_node_id": "check-exchange"},
{"label": "No", "next_node_id": "fix-profile"},
],
"confidence": 0.88,
"source_excerpt": "Step 2: Verify Outlook Profile",
},
{
"id": "fix-profile",
"type": "resolution",
"question": "Reconfigure the Outlook profile via Control Panel > Mail.",
"confidence": 0.90,
"source_excerpt": "Go to Control Panel > Mail > Show Profiles.",
},
{
"id": "check-exchange",
"type": "resolution",
"question": "Verify Exchange services are running in Services.msc.",
"confidence": 0.87,
"source_excerpt": "Open Services.msc and check Microsoft Exchange services.",
},
],
})
MOCK_AI_PROCEDURAL_RESPONSE = json.dumps({
"title": "Setup New Domain Controller",
"description": "Step-by-step procedure for setting up a new DC",
"steps": [
{
"id": "step-1",
"type": "step",
"content": "Open Server Manager on [VAR:server_name]",
"confidence": 0.95,
"source_excerpt": "Step 1: Open Server Manager on DC01",
},
{
"id": "warning-dns",
"type": "warning",
"content": "WARNING: This will restart DNS and cause brief connectivity loss",
"confidence": 0.90,
"source_excerpt": "Note: Restarting DNS will cause a brief outage",
},
{
"id": "step-2",
"type": "step",
"content": "Configure IP address [VAR:ip_address] on the network adapter",
"confidence": 0.88,
"source_excerpt": "Configure IP 192.168.1.10 on the adapter",
},
],
"intake_form": [
{
"variable_name": "server_name",
"label": "Server Name",
"field_type": "text",
"required": True,
"display_order": 1,
},
{
"variable_name": "ip_address",
"label": "IP Address",
"field_type": "text",
"required": True,
"display_order": 2,
},
],
})
# ── Upload Tests ──
class TestUpload:
async def test_upload_text_paste(self, kb_setup):
"""Upload via text paste creates a kb_import in processing status."""
c, h = kb_setup["client"], kb_setup["headers"]
with _mock_ai_enabled():
# Mock the background conversion (don't actually call AI)
with patch("app.api.endpoints.kb_accelerator._run_conversion"):
resp = await c.post(
"/api/v1/kb-accelerator/upload",
data={"content": SAMPLE_KB_TEXT, "target_type": "troubleshooting"},
headers=h,
)
assert resp.status_code == 201
data = resp.json()
assert data["status"] == "processing"
assert data["source_format"] == "paste"
assert "id" in data
async def test_upload_empty_content_rejected(self, kb_setup):
c, h = kb_setup["client"], kb_setup["headers"]
with _mock_ai_enabled():
resp = await c.post(
"/api/v1/kb-accelerator/upload",
data={"content": "short"},
headers=h,
)
assert resp.status_code == 400
async def test_upload_no_file_no_content_rejected(self, kb_setup):
c, h = kb_setup["client"], kb_setup["headers"]
with _mock_ai_enabled():
resp = await c.post(
"/api/v1/kb-accelerator/upload",
data={},
headers=h,
)
assert resp.status_code == 400
# ── Get/List Tests ──
class TestGetList:
async def test_get_import(self, kb_setup):
c, h = kb_setup["client"], kb_setup["headers"]
with _mock_ai_enabled(), patch("app.api.endpoints.kb_accelerator._run_conversion"):
create_resp = await c.post(
"/api/v1/kb-accelerator/upload",
data={"content": SAMPLE_KB_TEXT, "target_type": "troubleshooting"},
headers=h,
)
import_id = create_resp.json()["id"]
resp = await c.get(f"/api/v1/kb-accelerator/{import_id}", headers=h)
assert resp.status_code == 200
data = resp.json()
assert data["id"] == import_id
assert data["source_format"] == "paste"
async def test_list_imports(self, kb_setup):
c, h = kb_setup["client"], kb_setup["headers"]
with _mock_ai_enabled(), patch("app.api.endpoints.kb_accelerator._run_conversion"):
await c.post(
"/api/v1/kb-accelerator/upload",
data={"content": SAMPLE_KB_TEXT, "target_type": "troubleshooting"},
headers=h,
)
resp = await c.get("/api/v1/kb-accelerator", headers=h)
assert resp.status_code == 200
data = resp.json()
assert data["total"] >= 1
assert len(data["items"]) >= 1
# ── Quota Tests ──
class TestQuota:
async def test_get_quota(self, kb_setup):
c, h = kb_setup["client"], kb_setup["headers"]
resp = await c.get("/api/v1/kb-accelerator/quota", headers=h)
assert resp.status_code == 200
data = resp.json()
assert data["kb_accelerator_enabled"] is True
assert data["lifetime_conversions_limit"] == 3
assert data["can_convert"] is True
# ── Commit Tests ──
class TestCommit:
async def test_commit_creates_tree(self, kb_setup, test_db):
"""Committing a ready import creates a Tree record."""
c, h = kb_setup["client"], kb_setup["headers"]
# Create import
with _mock_ai_enabled(), patch("app.api.endpoints.kb_accelerator._run_conversion"):
create_resp = await c.post(
"/api/v1/kb-accelerator/upload",
data={"content": SAMPLE_KB_TEXT, "target_type": "troubleshooting"},
headers=h,
)
import_id = create_resp.json()["id"]
# Simulate conversion complete: update status + add nodes directly
from app.models.kb_import import KBImport, KBImportNode
from sqlalchemy import select
import uuid
result = await test_db.execute(select(KBImport).where(KBImport.id == uuid.UUID(import_id)))
kb_import = result.scalar_one()
kb_import.status = "ready"
kb_import.source_metadata = {"_conversion": {"title": "Test Flow", "description": "Test"}}
node = KBImportNode(
kb_import_id=kb_import.id,
node_order=0,
node_type="question",
content={"original_id": "root", "question": "Test question?", "options": []},
confidence_score=0.9,
)
test_db.add(node)
await test_db.commit()
# Commit
resp = await c.post(f"/api/v1/kb-accelerator/{import_id}/commit", headers=h)
assert resp.status_code == 200
data = resp.json()
assert "tree_id" in data
assert data["tree_type"] == "troubleshooting"
# ── Delete Tests ──
class TestDelete:
async def test_delete_import(self, kb_setup):
c, h = kb_setup["client"], kb_setup["headers"]
with _mock_ai_enabled(), patch("app.api.endpoints.kb_accelerator._run_conversion"):
create_resp = await c.post(
"/api/v1/kb-accelerator/upload",
data={"content": SAMPLE_KB_TEXT, "target_type": "troubleshooting"},
headers=h,
)
import_id = create_resp.json()["id"]
resp = await c.delete(f"/api/v1/kb-accelerator/{import_id}", headers=h)
assert resp.status_code == 204
# Verify deleted
resp = await c.get(f"/api/v1/kb-accelerator/{import_id}", headers=h)
assert resp.status_code == 404