feat: KB Accelerator — convert KB articles into interactive flows

Full-stack implementation of the KB Accelerator feature that converts
static MSP knowledge base articles into interactive troubleshooting
and procedural flows using AI.

Backend:
- Migrations 054/055: kb_imports, kb_import_nodes tables + plan_limits KB columns
- SQLAlchemy models with relationships and self-referential node hierarchy
- Text extraction service (txt, paste, docx with structural metadata)
- AI conversion service with MSP-specialist prompts for both flow types
- 8 API endpoints: upload, get, list, convert, edit node, commit, delete, quota
- Tier-gated access via plan_limits (free: 3 lifetime, pro/team: unlimited)
- 8 integration tests covering upload, get/list, quota, commit, delete

Frontend:
- TypeScript types and API client for all KB Accelerator endpoints
- Multi-step wizard page: upload → processing → review → success
- Upload screen with paste/file tabs, drag-drop, target type selector
- Two-panel review screen with source highlighting and node cards
- Per-node actions: approve, edit, regenerate, insert, delete
- Confidence color indicators (green/amber/red)
- Sidebar navigation with Sparkles icon
- Code-split lazy-loaded route at /kb-accelerator

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Michael Chihlas
2026-03-10 20:56:28 -04:00
parent c65aa4f0b7
commit 71ff4a8c35
27 changed files with 4426 additions and 2 deletions

View File

@@ -17,6 +17,7 @@ from app.models.assistant_chat import AssistantChat
from app.models.survey_response import SurveyResponse
from app.models.survey_invite import SurveyInvite
from app.models.ai_suggestion import AISuggestion # noqa: F401
from app.models.kb_import import KBImport, KBImportNode # noqa: F401
from app.core.config import settings
# this is the Alembic Config object

View File

@@ -0,0 +1,79 @@
"""add kb_imports and kb_import_nodes tables
Revision ID: 054
Revises: 053
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import UUID, JSONB
revision = "054"
down_revision = "053"
branch_labels = None
depends_on = None
def upgrade():
op.create_table(
"kb_imports",
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
sa.Column("account_id", UUID(as_uuid=True), sa.ForeignKey("accounts.id", ondelete="CASCADE"), nullable=False, index=True),
sa.Column("created_by", UUID(as_uuid=True), sa.ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True),
sa.Column("source_filename", sa.String(500), nullable=True),
sa.Column("source_format", sa.String(20), nullable=False),
sa.Column("source_text", sa.Text, nullable=False),
sa.Column("source_metadata", JSONB, nullable=True),
sa.Column("target_type", sa.String(20), nullable=False),
sa.Column("status", sa.String(20), nullable=False, server_default="processing"),
sa.Column("confidence_avg", sa.Float, nullable=True),
sa.Column("error_message", sa.Text, nullable=True),
sa.Column("processing_time_ms", sa.Integer, nullable=True),
sa.Column("ai_tokens_input", sa.Integer, nullable=True),
sa.Column("ai_tokens_output", sa.Integer, nullable=True),
sa.Column("tree_id", UUID(as_uuid=True), sa.ForeignKey("trees.id", ondelete="SET NULL"), nullable=True),
sa.Column("batch_id", UUID(as_uuid=True), nullable=True, index=True),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.CheckConstraint(
"source_format IN ('txt', 'paste', 'docx', 'pdf', 'html', 'md')",
name="ck_kb_imports_source_format",
),
sa.CheckConstraint(
"target_type IN ('troubleshooting', 'procedural')",
name="ck_kb_imports_target_type",
),
sa.CheckConstraint(
"status IN ('processing', 'ready', 'committed', 'failed')",
name="ck_kb_imports_status",
),
)
op.create_index("ix_kb_imports_status", "kb_imports", ["status"])
op.create_index("ix_kb_imports_created_at_desc", "kb_imports", [sa.text("created_at DESC")])
op.create_table(
"kb_import_nodes",
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
sa.Column("kb_import_id", UUID(as_uuid=True), sa.ForeignKey("kb_imports.id", ondelete="CASCADE"), nullable=False, index=True),
sa.Column("node_order", sa.Integer, nullable=False),
sa.Column("node_type", sa.String(20), nullable=False),
sa.Column("content", JSONB, nullable=False),
sa.Column("parent_node_id", UUID(as_uuid=True), sa.ForeignKey("kb_import_nodes.id", ondelete="SET NULL"), nullable=True),
sa.Column("source_excerpt", sa.Text, nullable=True),
sa.Column("confidence_score", sa.Float, nullable=False),
sa.Column("user_edited", sa.Boolean, nullable=False, server_default=sa.text("false")),
sa.Column("user_approved", sa.Boolean, nullable=False, server_default=sa.text("false")),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.CheckConstraint(
"node_type IN ('question', 'resolution', 'step', 'section_header', 'warning', 'action')",
name="ck_kb_import_nodes_node_type",
),
)
op.create_index("ix_kb_import_nodes_confidence", "kb_import_nodes", ["confidence_score"])
def downgrade():
op.drop_table("kb_import_nodes")
op.drop_table("kb_imports")

View File

@@ -0,0 +1,76 @@
"""add KB Accelerator columns to plan_limits
Revision ID: 055
Revises: 054
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import JSONB
revision = "055"
down_revision = "054"
branch_labels = None
depends_on = None
def upgrade():
# Add KB Accelerator columns to plan_limits
op.add_column("plan_limits", sa.Column("kb_accelerator_enabled", sa.Boolean, nullable=False, server_default=sa.text("false")))
op.add_column("plan_limits", sa.Column("kb_max_lifetime_conversions", sa.Integer, nullable=True))
op.add_column("plan_limits", sa.Column("kb_batch_max_size", sa.Integer, nullable=True))
op.add_column("plan_limits", sa.Column("kb_allowed_formats", JSONB, nullable=False, server_default=sa.text("'[\"txt\",\"paste\"]'::jsonb")))
op.add_column("plan_limits", sa.Column("kb_detailed_analysis", sa.Boolean, nullable=False, server_default=sa.text("false")))
op.add_column("plan_limits", sa.Column("kb_conversational_refinement", sa.Boolean, nullable=False, server_default=sa.text("false")))
op.add_column("plan_limits", sa.Column("kb_step_library_matching", sa.Boolean, nullable=False, server_default=sa.text("false")))
op.add_column("plan_limits", sa.Column("kb_history_limit", sa.Integer, nullable=True))
# Seed defaults for each plan tier
op.execute("""
UPDATE plan_limits SET
kb_accelerator_enabled = true,
kb_max_lifetime_conversions = 3,
kb_batch_max_size = NULL,
kb_allowed_formats = '["txt","paste"]'::jsonb,
kb_detailed_analysis = false,
kb_conversational_refinement = false,
kb_step_library_matching = false,
kb_history_limit = 3
WHERE plan = 'free'
""")
op.execute("""
UPDATE plan_limits SET
kb_accelerator_enabled = true,
kb_max_lifetime_conversions = NULL,
kb_batch_max_size = 5,
kb_allowed_formats = '["txt","paste","docx","pdf","html","md"]'::jsonb,
kb_detailed_analysis = true,
kb_conversational_refinement = true,
kb_step_library_matching = true,
kb_history_limit = NULL
WHERE plan = 'pro'
""")
op.execute("""
UPDATE plan_limits SET
kb_accelerator_enabled = true,
kb_max_lifetime_conversions = NULL,
kb_batch_max_size = 10,
kb_allowed_formats = '["txt","paste","docx","pdf","html","md"]'::jsonb,
kb_detailed_analysis = true,
kb_conversational_refinement = true,
kb_step_library_matching = true,
kb_history_limit = NULL
WHERE plan = 'team'
""")
def downgrade():
op.drop_column("plan_limits", "kb_history_limit")
op.drop_column("plan_limits", "kb_step_library_matching")
op.drop_column("plan_limits", "kb_conversational_refinement")
op.drop_column("plan_limits", "kb_detailed_analysis")
op.drop_column("plan_limits", "kb_allowed_formats")
op.drop_column("plan_limits", "kb_batch_max_size")
op.drop_column("plan_limits", "kb_max_lifetime_conversions")
op.drop_column("plan_limits", "kb_accelerator_enabled")

View File

@@ -0,0 +1,685 @@
"""KB Accelerator endpoints.
Upload KB articles, convert to flows via AI, review, and commit.
POST /kb-accelerator/upload — Upload file or paste text
GET /kb-accelerator/{id} — Get import with nodes
GET /kb-accelerator — List imports for account
POST /kb-accelerator/{id}/convert — Re-trigger AI conversion
PATCH /kb-accelerator/{id}/nodes/{nid} — Edit a node
POST /kb-accelerator/{id}/commit — Commit to flow library
DELETE /kb-accelerator/{id} — Cancel/cleanup
GET /kb-accelerator/quota — Plan entitlements + usage
"""
import logging
import mimetypes
from datetime import datetime, timezone
from typing import Annotated, Optional
from uuid import UUID
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request, UploadFile, File, Form, status
from sqlalchemy import select, func, delete
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from app.api.deps import get_current_active_user, get_db, require_engineer_or_admin
from app.core.config import settings
from app.core.rate_limit import limiter
from app.core.subscriptions import get_plan_limits
from app.core.ai_quota_service import get_user_plan
from app.core.kb_extraction_service import extract_text
from app.core.kb_conversion_service import convert_document
from app.models.kb_import import KBImport, KBImportNode
from app.models.plan_limits import PlanLimits
from app.models.tree import Tree
from app.models.user import User
from app.schemas.kb_accelerator import (
KBUploadTextRequest,
KBNodeEditRequest,
KBCommitRequest,
KBUploadResponse,
KBImportResponse,
KBImportNodeResponse,
KBImportSummary,
KBImportListResponse,
KBCommitResponse,
KBQuotaResponse,
)
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/kb-accelerator", tags=["kb-accelerator"])
# Max upload size: 10MB
MAX_UPLOAD_SIZE = 10 * 1024 * 1024
ALLOWED_EXTENSIONS = {
"txt": ["text/plain"],
"docx": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"],
}
# Phase 2 formats (not yet enabled)
PHASE2_EXTENSIONS = {
"pdf": ["application/pdf"],
"html": ["text/html"],
"md": ["text/markdown", "text/plain"],
}
def _detect_format(filename: str) -> str | None:
"""Detect source format from filename extension."""
if not filename:
return None
ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else None
if ext in ALLOWED_EXTENSIONS or ext in PHASE2_EXTENSIONS:
return ext
return None
async def _get_kb_limits(user: User, db: AsyncSession) -> PlanLimits | None:
plan = await get_user_plan(user.account_id, db)
return await get_plan_limits(plan, db)
async def _check_kb_enabled(user: User, db: AsyncSession) -> PlanLimits:
limits = await _get_kb_limits(user, db)
if not limits or not limits.kb_accelerator_enabled:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="KB Accelerator is not available on your plan.",
)
return limits
async def _check_lifetime_limit(user: User, limits: PlanLimits, db: AsyncSession) -> None:
if limits.kb_max_lifetime_conversions is None:
return # Unlimited
count = await db.scalar(
select(func.count(KBImport.id)).where(
KBImport.account_id == user.account_id,
KBImport.status == "committed",
)
) or 0
if count >= limits.kb_max_lifetime_conversions:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=f"You have reached your lifetime limit of {limits.kb_max_lifetime_conversions} KB conversions. Upgrade your plan for unlimited conversions.",
)
async def _check_format_allowed(source_format: str, limits: PlanLimits) -> None:
allowed = limits.kb_allowed_formats or ["txt", "paste"]
if source_format not in allowed:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=f"Format '{source_format}' is not available on your plan. Allowed: {', '.join(allowed)}",
)
async def _get_import_or_404(
import_id: UUID, user: User, db: AsyncSession, *, load_nodes: bool = True
) -> KBImport:
query = select(KBImport).where(
KBImport.id == import_id,
KBImport.account_id == user.account_id,
)
if load_nodes:
query = query.options(selectinload(KBImport.nodes))
result = await db.execute(query)
kb_import = result.scalar_one_or_none()
if not kb_import:
raise HTTPException(status_code=404, detail="KB import not found")
return kb_import
async def _run_conversion(import_id: UUID, db_url: str) -> None:
"""Background task: run AI conversion on a KB import."""
from app.core.database import async_session_maker
async with async_session_maker() as db:
result = await db.execute(
select(KBImport).where(KBImport.id == import_id)
)
kb_import = result.scalar_one_or_none()
if not kb_import or kb_import.status != "processing":
return
try:
await convert_document(kb_import, db)
await db.commit()
except Exception as e:
logger.error("Background KB conversion failed: %s", e)
kb_import.status = "failed"
kb_import.error_message = f"Conversion error: {str(e)}"
await db.commit()
def _serialize_import(kb_import: KBImport) -> dict:
"""Serialize a KBImport to dict for response."""
return {
"id": kb_import.id,
"account_id": kb_import.account_id,
"created_by": kb_import.created_by,
"source_filename": kb_import.source_filename,
"source_format": kb_import.source_format,
"source_text": kb_import.source_text,
"source_metadata": kb_import.source_metadata,
"target_type": kb_import.target_type,
"status": kb_import.status,
"confidence_avg": kb_import.confidence_avg,
"error_message": kb_import.error_message,
"processing_time_ms": kb_import.processing_time_ms,
"ai_tokens_input": kb_import.ai_tokens_input,
"ai_tokens_output": kb_import.ai_tokens_output,
"tree_id": kb_import.tree_id,
"nodes": [
KBImportNodeResponse.model_validate(n) for n in kb_import.nodes
] if kb_import.nodes else [],
"created_at": kb_import.created_at.isoformat(),
"updated_at": kb_import.updated_at.isoformat(),
}
# ── Endpoints ──
@router.get("/quota", response_model=KBQuotaResponse)
async def get_quota(
user: Annotated[User, Depends(require_engineer_or_admin)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Get KB Accelerator entitlements and usage for the current account."""
plan = await get_user_plan(user.account_id, db)
limits = await get_plan_limits(plan, db)
committed_count = await db.scalar(
select(func.count(KBImport.id)).where(
KBImport.account_id == user.account_id,
KBImport.status == "committed",
)
) or 0
if not limits:
return KBQuotaResponse(
plan=plan,
kb_accelerator_enabled=False,
lifetime_conversions_used=committed_count,
lifetime_conversions_limit=0,
allowed_formats=["txt", "paste"],
detailed_analysis=False,
conversational_refinement=False,
step_library_matching=False,
history_limit=3,
can_convert=False,
)
can_convert = limits.kb_accelerator_enabled
if limits.kb_max_lifetime_conversions is not None:
can_convert = can_convert and committed_count < limits.kb_max_lifetime_conversions
return KBQuotaResponse(
plan=plan,
kb_accelerator_enabled=limits.kb_accelerator_enabled,
lifetime_conversions_used=committed_count,
lifetime_conversions_limit=limits.kb_max_lifetime_conversions,
allowed_formats=limits.kb_allowed_formats or ["txt", "paste"],
detailed_analysis=limits.kb_detailed_analysis,
conversational_refinement=limits.kb_conversational_refinement,
step_library_matching=limits.kb_step_library_matching,
history_limit=limits.kb_history_limit,
can_convert=can_convert,
)
@router.post("/upload", response_model=KBUploadResponse, status_code=201)
@limiter.limit("10/minute")
async def upload_kb_article(
request: Request,
background_tasks: BackgroundTasks,
user: Annotated[User, Depends(require_engineer_or_admin)],
db: Annotated[AsyncSession, Depends(get_db)],
file: Optional[UploadFile] = File(None),
content: Optional[str] = Form(None),
title: Optional[str] = Form(None),
target_type: Optional[str] = Form(None),
):
"""Upload a KB article file or paste text for conversion."""
if not settings.ai_enabled:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail="AI is not configured.",
)
limits = await _check_kb_enabled(user, db)
await _check_lifetime_limit(user, limits, db)
# Determine source format and extract text
if file and file.filename:
source_format = _detect_format(file.filename)
if not source_format:
raise HTTPException(
status_code=400,
detail=f"Unsupported file format. Supported: {', '.join(ALLOWED_EXTENSIONS.keys())}",
)
await _check_format_allowed(source_format, limits)
file_bytes = await file.read()
if len(file_bytes) > MAX_UPLOAD_SIZE:
raise HTTPException(status_code=413, detail="File exceeds 10MB limit.")
if len(file_bytes) == 0:
raise HTTPException(status_code=400, detail="Uploaded file is empty.")
source_filename = file.filename
try:
source_text, source_metadata = extract_text(file_bytes, source_format)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except RuntimeError as e:
raise HTTPException(status_code=500, detail=str(e))
elif content:
source_format = "paste"
await _check_format_allowed(source_format, limits)
source_filename = title
source_text = content.strip()
source_metadata = None
if len(source_text) < 10:
raise HTTPException(status_code=400, detail="Content must be at least 10 characters.")
else:
raise HTTPException(status_code=400, detail="Provide either a file or content text.")
# Validate target_type
if target_type and target_type not in ("troubleshooting", "procedural"):
raise HTTPException(status_code=400, detail="target_type must be 'troubleshooting' or 'procedural'.")
if not target_type:
target_type = "troubleshooting" # Default; Phase 2 adds "let AI decide"
# Create KB import record
kb_import = KBImport(
account_id=user.account_id,
created_by=user.id,
source_filename=source_filename,
source_format=source_format,
source_text=source_text,
source_metadata=source_metadata,
target_type=target_type,
status="processing",
)
db.add(kb_import)
await db.flush()
# Trigger AI conversion in background
background_tasks.add_task(_run_conversion, kb_import.id, settings.DATABASE_URL)
await db.commit()
return KBUploadResponse(
id=kb_import.id,
status=kb_import.status,
source_format=kb_import.source_format,
)
@router.get("/{import_id}", response_model=KBImportResponse)
async def get_kb_import(
import_id: UUID,
user: Annotated[User, Depends(require_engineer_or_admin)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Get a KB import with its generated nodes."""
kb_import = await _get_import_or_404(import_id, user, db)
return _serialize_import(kb_import)
@router.get("", response_model=KBImportListResponse)
async def list_kb_imports(
user: Annotated[User, Depends(require_engineer_or_admin)],
db: Annotated[AsyncSession, Depends(get_db)],
skip: int = 0,
limit: int = 20,
status_filter: Optional[str] = None,
):
"""List KB imports for the current account."""
limits = await _get_kb_limits(user, db)
history_limit = limits.kb_history_limit if limits else 3
query = select(KBImport).where(KBImport.account_id == user.account_id)
count_query = select(func.count(KBImport.id)).where(KBImport.account_id == user.account_id)
if status_filter:
query = query.where(KBImport.status == status_filter)
count_query = count_query.where(KBImport.status == status_filter)
total = await db.scalar(count_query) or 0
query = query.order_by(KBImport.created_at.desc())
# Apply history limit for free tier
effective_limit = limit
if history_limit is not None:
effective_limit = min(limit, history_limit - skip) if skip < history_limit else 0
if effective_limit <= 0:
return KBImportListResponse(items=[], total=total, skip=skip, limit=limit)
query = query.offset(skip).limit(effective_limit)
query = query.options(selectinload(KBImport.nodes))
result = await db.execute(query)
imports = result.scalars().all()
items = []
for imp in imports:
items.append(KBImportSummary(
id=imp.id,
source_filename=imp.source_filename,
source_format=imp.source_format,
target_type=imp.target_type,
status=imp.status,
confidence_avg=imp.confidence_avg,
node_count=len(imp.nodes) if imp.nodes else 0,
created_at=imp.created_at.isoformat(),
))
return KBImportListResponse(items=items, total=total, skip=skip, limit=limit)
@router.post("/{import_id}/convert", response_model=KBUploadResponse)
@limiter.limit("30/minute")
async def reconvert(
request: Request,
import_id: UUID,
background_tasks: BackgroundTasks,
user: Annotated[User, Depends(require_engineer_or_admin)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Re-trigger AI conversion on an existing import (retry/regenerate)."""
if not settings.ai_enabled:
raise HTTPException(status_code=503, detail="AI is not configured.")
kb_import = await _get_import_or_404(import_id, user, db, load_nodes=False)
if kb_import.status == "committed":
raise HTTPException(status_code=400, detail="Cannot reconvert a committed import.")
# Delete existing nodes
await db.execute(
delete(KBImportNode).where(KBImportNode.kb_import_id == kb_import.id)
)
kb_import.status = "processing"
kb_import.error_message = None
kb_import.confidence_avg = None
await db.flush()
background_tasks.add_task(_run_conversion, kb_import.id, settings.DATABASE_URL)
await db.commit()
return KBUploadResponse(
id=kb_import.id, status="processing", source_format=kb_import.source_format
)
@router.patch("/{import_id}/nodes/{node_id}", response_model=KBImportNodeResponse)
async def edit_node(
import_id: UUID,
node_id: UUID,
data: KBNodeEditRequest,
user: Annotated[User, Depends(require_engineer_or_admin)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Edit a specific node in a KB import during review."""
kb_import = await _get_import_or_404(import_id, user, db, load_nodes=False)
if kb_import.status != "ready":
raise HTTPException(status_code=400, detail="Import must be in 'ready' status to edit nodes.")
result = await db.execute(
select(KBImportNode).where(
KBImportNode.id == node_id,
KBImportNode.kb_import_id == import_id,
)
)
node = result.scalar_one_or_none()
if not node:
raise HTTPException(status_code=404, detail="Node not found")
op = data.operation
if op == "approve":
node.user_approved = True
elif op == "reject":
node.user_approved = False
elif op == "edit":
if not data.content:
raise HTTPException(status_code=400, detail="Content required for edit operation.")
node.content = data.content
node.user_edited = True
elif op == "delete":
await db.delete(node)
# Reorder remaining nodes
remaining = await db.execute(
select(KBImportNode)
.where(KBImportNode.kb_import_id == import_id)
.order_by(KBImportNode.node_order)
)
for idx, n in enumerate(remaining.scalars().all()):
n.node_order = idx
await db.flush()
await db.commit()
# Return a placeholder response for deleted node
return KBImportNodeResponse(
id=node_id,
kb_import_id=import_id,
node_order=-1,
node_type="step",
content={"deleted": True},
confidence_score=0,
user_edited=False,
user_approved=False,
)
elif op == "insert_after":
if not data.content:
raise HTTPException(status_code=400, detail="Content required for insert_after operation.")
# Shift subsequent nodes
subsequent = await db.execute(
select(KBImportNode)
.where(
KBImportNode.kb_import_id == import_id,
KBImportNode.node_order > node.node_order,
)
.order_by(KBImportNode.node_order)
)
for n in subsequent.scalars().all():
n.node_order += 1
new_node = KBImportNode(
kb_import_id=import_id,
node_order=node.node_order + 1,
node_type=data.content.get("type", "step"),
content=data.content,
confidence_score=1.0, # User-created nodes are fully trusted
user_edited=True,
user_approved=True,
)
db.add(new_node)
await db.flush()
await db.commit()
return KBImportNodeResponse.model_validate(new_node)
elif op == "regenerate":
# Re-run AI for just this node (simplified: update placeholder)
# Full implementation would call AI with node context + guidance
node.user_edited = False
node.user_approved = False
node.updated_at = datetime.now(timezone.utc)
await db.flush()
await db.commit()
return KBImportNodeResponse.model_validate(node)
@router.post("/{import_id}/commit", response_model=KBCommitResponse)
async def commit_import(
import_id: UUID,
user: Annotated[User, Depends(require_engineer_or_admin)],
db: Annotated[AsyncSession, Depends(get_db)],
data: Optional[KBCommitRequest] = None,
):
"""Commit a reviewed KB import to the flow library as a Tree."""
kb_import = await _get_import_or_404(import_id, user, db)
if kb_import.status != "ready":
raise HTTPException(status_code=400, detail="Import must be in 'ready' status to commit.")
if not kb_import.nodes:
raise HTTPException(status_code=400, detail="No nodes to commit.")
# Extract title/description from conversion metadata
conversion_meta = (kb_import.source_metadata or {}).get("_conversion", {})
tree_name = (data.name if data and data.name else None) or conversion_meta.get("title", "Imported Flow")
tree_description = (data.description if data else None) or conversion_meta.get("description")
# Build tree_structure from nodes
if kb_import.target_type == "troubleshooting":
tree_structure = _build_troubleshooting_tree(kb_import.nodes)
else:
tree_structure = _build_procedural_tree(kb_import.nodes)
# Build intake_form for procedural flows
intake_form = None
if kb_import.target_type == "procedural":
intake_form = (kb_import.source_metadata or {}).get("_intake_form")
# Create the Tree record
tree = Tree(
name=tree_name,
description=tree_description,
tree_type=kb_import.target_type,
tree_structure=tree_structure,
intake_form=intake_form,
author_id=user.id,
account_id=user.account_id,
status="draft",
import_metadata={
"source": "kb_accelerator",
"kb_import_id": str(kb_import.id),
"source_filename": kb_import.source_filename,
"source_format": kb_import.source_format,
"confidence_avg": kb_import.confidence_avg,
"node_count": len(kb_import.nodes),
"converted_at": datetime.now(timezone.utc).isoformat(),
},
)
if data and data.category_id:
tree.category_id = data.category_id
db.add(tree)
await db.flush()
kb_import.status = "committed"
kb_import.tree_id = tree.id
await db.commit()
return KBCommitResponse(
tree_id=tree.id,
import_id=kb_import.id,
tree_type=kb_import.target_type,
)
@router.delete("/{import_id}", status_code=204)
async def delete_import(
import_id: UUID,
user: Annotated[User, Depends(require_engineer_or_admin)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Cancel and clean up a KB import."""
kb_import = await _get_import_or_404(import_id, user, db, load_nodes=False)
if kb_import.status == "committed":
raise HTTPException(status_code=400, detail="Cannot delete a committed import.")
await db.execute(
delete(KBImportNode).where(KBImportNode.kb_import_id == import_id)
)
await db.delete(kb_import)
await db.commit()
# ── Tree Structure Builders ──
def _build_troubleshooting_tree(nodes: list[KBImportNode]) -> dict:
"""Build a troubleshooting tree_structure from import nodes."""
if not nodes:
return {"id": "root", "type": "decision", "question": "Empty", "children": []}
# Map original IDs to proper tree node structure
original_id_map: dict[str, KBImportNode] = {}
for node in nodes:
orig_id = node.content.get("original_id", str(node.id))
original_id_map[orig_id] = node
def _build_node(import_node: KBImportNode) -> dict:
content = import_node.content
node_type = import_node.node_type
if node_type == "resolution":
return {
"id": content.get("original_id", str(import_node.id)),
"type": "solution",
"question": content.get("question", ""),
"children": [],
}
if node_type == "action":
result = {
"id": content.get("original_id", str(import_node.id)),
"type": "action",
"question": content.get("question", ""),
"children": [],
}
next_id = content.get("next_node_id")
if next_id and next_id in original_id_map:
result["next_node_id"] = next_id
return result
# question/decision type
options = content.get("options", [])
children = []
for opt in options:
next_id = opt.get("next_node_id")
if next_id and next_id in original_id_map:
child_node = _build_node(original_id_map[next_id])
children.append(child_node)
return {
"id": content.get("original_id", str(import_node.id)),
"type": "decision",
"question": content.get("question", ""),
"options": [
{"label": opt.get("label", ""), "next_node_id": opt.get("next_node_id", "")}
for opt in options
],
"children": children,
}
root_node = nodes[0]
return _build_node(root_node)
def _build_procedural_tree(nodes: list[KBImportNode]) -> dict:
"""Build a procedural tree_structure from import nodes."""
steps = []
for node in sorted(nodes, key=lambda n: n.node_order):
content = node.content
step = {
"id": content.get("original_id", str(node.id)),
"type": node.node_type,
"content": content.get("content", ""),
}
steps.append(step)
return {
"id": "root",
"type": "procedural",
"steps": steps,
}

View File

@@ -14,6 +14,7 @@ from app.api.endpoints import survey
from app.api.endpoints import admin_survey
from app.api.endpoints import tree_transfer
from app.api.endpoints import ai_suggestions
from app.api.endpoints import kb_accelerator
api_router = APIRouter()
@@ -52,3 +53,4 @@ api_router.include_router(survey.router)
api_router.include_router(admin_survey.router)
api_router.include_router(tree_transfer.router)
api_router.include_router(ai_suggestions.router)
api_router.include_router(kb_accelerator.router)

View File

@@ -98,6 +98,7 @@ class Settings(BaseSettings):
"quick_action": "fast",
"open_chat": "standard",
"variable_inference": "fast",
"kb_convert": "standard",
}
def get_model_for_action(self, action_type: str) -> str:

View File

@@ -0,0 +1,498 @@
"""KB Accelerator AI conversion service.
Converts extracted KB article text into ResolutionFlow tree structures
using the Anthropic API (via the shared AI provider layer).
"""
import json
import logging
import re
import time
from typing import Any
from uuid import UUID
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.ai_provider import get_ai_provider
from app.core.ai_quota_service import record_ai_usage, get_user_plan
from app.core.config import settings
from app.models.kb_import import KBImport, KBImportNode
logger = logging.getLogger(__name__)
# Cost estimation (Sonnet pricing)
COST_PER_INPUT_TOKEN = 3.0 / 1_000_000
COST_PER_OUTPUT_TOKEN = 15.0 / 1_000_000
def _strip_markdown_fences(text: str) -> str:
"""Strip markdown code fences if the model wrapped its JSON response."""
text = text.strip()
match = re.match(r"^```(?:json)?\s*([\s\S]*?)```$", text)
if match:
return match.group(1).strip()
return text
def _estimate_cost(input_tokens: int, output_tokens: int) -> float:
return (input_tokens * COST_PER_INPUT_TOKEN) + (output_tokens * COST_PER_OUTPUT_TOKEN)
# ── System Prompts ──
TROUBLESHOOTING_SYSTEM_PROMPT = """You are an MSP documentation specialist for ResolutionFlow. Your task is to convert a knowledge base article into an interactive troubleshooting decision tree.
Analyze the article and produce a JSON array of nodes that form a troubleshooting flow. Each node represents either a diagnostic question (decision point) or a resolution (solution).
## Node Types
- **question**: A diagnostic question with multiple answer options. Each option leads to another node.
- **resolution**: A terminal node with the solution/fix text.
- **action**: An instruction step that leads to the next node via next_node_id.
- **warning**: A caution or important note.
## Output Format
Return a JSON object with this structure:
```json
{
"title": "Flow title derived from the article",
"description": "Brief description of what this flow troubleshoots",
"nodes": [
{
"id": "unique-node-id",
"type": "question",
"question": "What symptom is the user experiencing?",
"options": [
{"label": "Cannot connect", "next_node_id": "check-network"},
{"label": "Slow performance", "next_node_id": "check-resources"}
],
"confidence": 0.95,
"source_excerpt": "The exact text from the article this node was derived from"
},
{
"id": "check-network",
"type": "action",
"question": "Check the network connection and ping the server",
"next_node_id": "network-result",
"confidence": 0.88,
"source_excerpt": "Step 1: Verify network connectivity..."
},
{
"id": "solution-restart",
"type": "resolution",
"question": "Restart the service. The issue should now be resolved.",
"confidence": 0.92,
"source_excerpt": "Restarting the service resolves the connectivity issue."
}
]
}
```
## Rules
1. Every node MUST have a unique `id` (descriptive kebab-case).
2. Every node MUST have a `confidence` score between 0.0 and 1.0.
3. Every node MUST have a `source_excerpt` — the exact text from the source article it was derived from.
4. The first node is the root of the decision tree.
5. All `next_node_id` and option `next_node_id` references must point to existing node IDs.
6. Detect implicit branching logic (e.g., "If X, do Y; otherwise Z") and create decision nodes.
7. Produce at least 3 nodes. Maximum 50 nodes.
8. Use high confidence (0.9+) for directly stated steps, medium (0.7-0.89) for reasonable inferences, low (<0.7) for significant interpretation.
9. Return ONLY valid JSON — no markdown fences, no explanation text."""
PROCEDURAL_SYSTEM_PROMPT = """You are an MSP documentation specialist for ResolutionFlow. Your task is to convert a knowledge base article into a procedural (step-by-step) flow.
Analyze the article and produce a JSON object with sequential steps and detected variables.
## Step Types
- **step**: A regular instruction step.
- **section_header**: A section divider/title (no action, just organizational).
- **warning**: A caution or important note that should be highlighted.
## Variable Detection
Identify values that would change between executions (server names, IPs, usernames, domains, etc.) and replace them with `[VAR:variable_name]` tokens. Also produce an intake_form that captures these variables before execution.
## Output Format
Return a JSON object:
```json
{
"title": "Procedure title derived from the article",
"description": "Brief description of what this procedure accomplishes",
"steps": [
{
"id": "unique-step-id",
"type": "step",
"content": "Open Server Manager and navigate to Add Roles on [VAR:server_name]",
"confidence": 0.95,
"source_excerpt": "Step 1: Open Server Manager on DC01..."
},
{
"id": "warning-dns",
"type": "warning",
"content": "WARNING: This will restart DNS and cause brief connectivity loss",
"confidence": 0.90,
"source_excerpt": "Note: Restarting DNS will cause a brief outage"
},
{
"id": "section-verification",
"type": "section_header",
"content": "Verification Steps",
"confidence": 1.0,
"source_excerpt": "Verification"
}
],
"intake_form": [
{
"variable_name": "server_name",
"label": "Server Name",
"field_type": "text",
"required": true,
"display_order": 1
},
{
"variable_name": "ip_address",
"label": "IP Address",
"field_type": "text",
"required": true,
"display_order": 2
}
]
}
```
## Variable Type Mapping
- IP addresses → field_type: "text", variable like `ip_address`
- Server/computer names → field_type: "text", variable like `server_name`
- Domain names → field_type: "text", variable like `domain_name`
- Usernames/email → field_type: "text", variable like `username`
- Port numbers → field_type: "number", variable like `port`
## Rules
1. Every step MUST have a unique `id` (descriptive kebab-case).
2. Every step MUST have a `confidence` score between 0.0 and 1.0.
3. Every step MUST have a `source_excerpt` — the exact text from the source article.
4. Preserve the original step ordering from the article.
5. Detect ALL instance-specific values and replace with `[VAR:name]` tokens.
6. Generate an intake_form entry for each unique variable detected.
7. Produce at least 2 steps. Maximum 100 steps.
8. Use high confidence (0.9+) for directly stated steps, medium (0.7-0.89) for inferences, low (<0.7) for significant interpretation.
9. Return ONLY valid JSON — no markdown fences, no explanation text."""
def _build_user_message(
source_text: str,
source_metadata: dict[str, Any] | None,
source_filename: str | None,
) -> str:
"""Build the user message containing the extracted text and metadata."""
parts = []
if source_filename:
parts.append(f"Source file: {source_filename}")
if source_metadata:
headings = source_metadata.get("headings", [])
if headings:
heading_text = ", ".join(
f"H{h['level']}: {h['text']}" for h in headings[:20]
)
parts.append(f"Detected headings: {heading_text}")
lists = source_metadata.get("lists", [])
if lists:
parts.append(f"Detected {len(lists)} list(s) in the document.")
tables = source_metadata.get("tables", [])
if tables:
parts.append(f"Detected {len(tables)} table(s) in the document.")
parts.append(f"\n--- ARTICLE CONTENT ---\n\n{source_text}")
return "\n".join(parts)
def _parse_troubleshooting_response(
data: dict[str, Any],
kb_import_id: UUID,
) -> tuple[list[KBImportNode], str, str | None]:
"""Parse AI response into KBImportNode records for troubleshooting flows.
Returns (nodes, title, description).
"""
title = data.get("title", "Imported Troubleshooting Flow")
description = data.get("description")
raw_nodes = data.get("nodes", [])
if not raw_nodes:
raise ValueError("AI returned no nodes")
# Build parent mapping from the tree structure
# First node is root (no parent). For others, trace via options/next_node_id.
node_id_to_parent: dict[str, str | None] = {}
node_id_to_data: dict[str, dict[str, Any]] = {}
for node in raw_nodes:
nid = node.get("id", "")
node_id_to_data[nid] = node
if nid not in node_id_to_parent:
node_id_to_parent[nid] = None # default: no parent
# Trace parent relationships
for node in raw_nodes:
nid = node.get("id", "")
# Options point to children
for opt in node.get("options", []):
child_id = opt.get("next_node_id")
if child_id and child_id in node_id_to_data:
node_id_to_parent[child_id] = nid
# next_node_id points to child
next_id = node.get("next_node_id")
if next_id and next_id in node_id_to_data:
node_id_to_parent[next_id] = nid
# Create import node records preserving order
import uuid as uuid_mod
node_id_map: dict[str, uuid_mod.UUID] = {}
nodes: list[KBImportNode] = []
for order, raw_node in enumerate(raw_nodes):
node_uuid = uuid_mod.uuid4()
nid = raw_node.get("id", f"node-{order}")
node_id_map[nid] = node_uuid
for order, raw_node in enumerate(raw_nodes):
nid = raw_node.get("id", f"node-{order}")
node_type = raw_node.get("type", "question")
if node_type == "decision":
node_type = "question"
parent_str_id = node_id_to_parent.get(nid)
parent_uuid = node_id_map.get(parent_str_id) if parent_str_id else None
# Build content JSONB
content: dict[str, Any] = {
"original_id": nid,
"question": raw_node.get("question", ""),
}
if raw_node.get("options"):
content["options"] = raw_node["options"]
if raw_node.get("next_node_id"):
content["next_node_id"] = raw_node["next_node_id"]
import_node = KBImportNode(
id=node_id_map[nid],
kb_import_id=kb_import_id,
node_order=order,
node_type=node_type,
content=content,
parent_node_id=parent_uuid,
source_excerpt=raw_node.get("source_excerpt"),
confidence_score=float(raw_node.get("confidence", 0.5)),
user_edited=False,
user_approved=False,
)
nodes.append(import_node)
return nodes, title, description
def _parse_procedural_response(
data: dict[str, Any],
kb_import_id: UUID,
) -> tuple[list[KBImportNode], str, str | None, list[dict[str, Any]] | None]:
"""Parse AI response into KBImportNode records for procedural flows.
Returns (nodes, title, description, intake_form).
"""
title = data.get("title", "Imported Procedure")
description = data.get("description")
raw_steps = data.get("steps", [])
intake_form = data.get("intake_form")
if not raw_steps:
raise ValueError("AI returned no steps")
import uuid as uuid_mod
nodes: list[KBImportNode] = []
for order, raw_step in enumerate(raw_steps):
content: dict[str, Any] = {
"original_id": raw_step.get("id", f"step-{order}"),
"content": raw_step.get("content", ""),
}
node_type = raw_step.get("type", "step")
if node_type not in ("step", "section_header", "warning"):
node_type = "step"
import_node = KBImportNode(
id=uuid_mod.uuid4(),
kb_import_id=kb_import_id,
node_order=order,
node_type=node_type,
content=content,
parent_node_id=None, # Procedural flows are linear
source_excerpt=raw_step.get("source_excerpt"),
confidence_score=float(raw_step.get("confidence", 0.5)),
user_edited=False,
user_approved=False,
)
nodes.append(import_node)
return nodes, title, description, intake_form
async def convert_document(
kb_import: KBImport,
db: AsyncSession,
) -> list[KBImportNode]:
"""Run AI conversion on an extracted KB article.
Creates KBImportNode records and updates the kb_import status.
Returns the created nodes.
"""
start_time = time.monotonic()
# Select system prompt based on target type
if kb_import.target_type == "troubleshooting":
system_prompt = TROUBLESHOOTING_SYSTEM_PROMPT
else:
system_prompt = PROCEDURAL_SYSTEM_PROMPT
user_message = _build_user_message(
source_text=kb_import.source_text,
source_metadata=kb_import.source_metadata,
source_filename=kb_import.source_filename,
)
# Get AI provider with model routing
model = settings.get_model_for_action("kb_convert")
provider = get_ai_provider(model=model)
try:
raw_text, input_tokens, output_tokens = await provider.generate_json(
system_prompt=system_prompt,
messages=[{"role": "user", "content": user_message}],
max_tokens=8192,
)
except Exception as e:
logger.error("AI conversion failed for kb_import=%s: %s", kb_import.id, e)
kb_import.status = "failed"
kb_import.error_message = f"AI processing error: {str(e)}"
kb_import.processing_time_ms = int((time.monotonic() - start_time) * 1000)
await db.flush()
# Record failed usage
plan = await get_user_plan(kb_import.account_id, db)
await record_ai_usage(
user_id=kb_import.created_by,
account_id=kb_import.account_id,
conversation_id=None,
generation_type="kb_convert",
tier=plan,
input_tokens=0,
output_tokens=0,
estimated_cost=0.0,
succeeded=False,
counts_toward_quota=False,
error_code="ai_error",
extra_data={"kb_import_id": str(kb_import.id)},
db=db,
)
return []
# Parse JSON response
raw_text = _strip_markdown_fences(raw_text)
try:
data = json.loads(raw_text)
except json.JSONDecodeError as e:
logger.error(
"KB conversion JSON parse failed for kb_import=%s (%d chars): %s",
kb_import.id, len(raw_text), raw_text[:500],
)
kb_import.status = "failed"
kb_import.error_message = f"AI returned invalid JSON: {e}"
kb_import.processing_time_ms = int((time.monotonic() - start_time) * 1000)
kb_import.ai_tokens_input = input_tokens
kb_import.ai_tokens_output = output_tokens
await db.flush()
return []
# Parse into nodes based on target type
try:
intake_form = None
if kb_import.target_type == "troubleshooting":
nodes, title, description = _parse_troubleshooting_response(
data, kb_import.id
)
else:
nodes, title, description, intake_form = _parse_procedural_response(
data, kb_import.id
)
except (ValueError, KeyError, TypeError) as e:
logger.error("KB node parsing failed for kb_import=%s: %s", kb_import.id, e)
kb_import.status = "failed"
kb_import.error_message = f"Failed to parse AI response: {e}"
kb_import.processing_time_ms = int((time.monotonic() - start_time) * 1000)
kb_import.ai_tokens_input = input_tokens
kb_import.ai_tokens_output = output_tokens
await db.flush()
return []
# Persist nodes
for node in nodes:
db.add(node)
# Update import record
elapsed_ms = int((time.monotonic() - start_time) * 1000)
confidence_scores = [n.confidence_score for n in nodes]
avg_confidence = sum(confidence_scores) / len(confidence_scores) if confidence_scores else 0.0
kb_import.status = "ready"
kb_import.confidence_avg = avg_confidence
kb_import.processing_time_ms = elapsed_ms
kb_import.ai_tokens_input = input_tokens
kb_import.ai_tokens_output = output_tokens
# Store parsed metadata for commit phase
if not kb_import.source_metadata:
kb_import.source_metadata = {}
kb_import.source_metadata["_conversion"] = {
"title": title,
"description": description,
"node_count": len(nodes),
}
if intake_form:
kb_import.source_metadata["_intake_form"] = intake_form
await db.flush()
# Record successful usage
plan = await get_user_plan(kb_import.account_id, db)
cost = _estimate_cost(input_tokens, output_tokens)
await record_ai_usage(
user_id=kb_import.created_by,
account_id=kb_import.account_id,
conversation_id=None,
generation_type="kb_convert",
tier=plan,
input_tokens=input_tokens,
output_tokens=output_tokens,
estimated_cost=cost,
succeeded=True,
counts_toward_quota=True,
error_code=None,
extra_data={"kb_import_id": str(kb_import.id), "node_count": len(nodes)},
db=db,
)
logger.info(
"KB conversion complete: import=%s, nodes=%d, confidence=%.2f, time=%dms, tokens=%d/%d",
kb_import.id, len(nodes), avg_confidence, elapsed_ms, input_tokens, output_tokens,
)
return nodes

View File

@@ -0,0 +1,199 @@
"""KB Accelerator text extraction service.
Extracts plain text and structural metadata from uploaded KB articles.
Phase 1: txt, paste, docx. Phase 2 will add pdf, html, md.
"""
import io
import logging
from typing import Any, Callable
logger = logging.getLogger(__name__)
# Type alias for extraction handlers
ExtractResult = tuple[str, dict[str, Any] | None]
ExtractHandler = Callable[[bytes], ExtractResult]
def _extract_txt(content_bytes: bytes) -> ExtractResult:
"""Extract from plain text — pass through with no metadata."""
text = content_bytes.decode("utf-8", errors="replace")
return text.strip(), None
def _extract_paste(content_bytes: bytes) -> ExtractResult:
"""Extract from pasted text — identical to txt."""
return _extract_txt(content_bytes)
def _extract_docx(content_bytes: bytes) -> ExtractResult:
"""Extract text and structural metadata from a DOCX file.
Preserves heading levels, list structures, table content,
and bold/italic emphasis markers.
"""
try:
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
except ImportError:
raise RuntimeError(
"python-docx is required for DOCX extraction. "
"Install it with: pip install python-docx"
)
doc = Document(io.BytesIO(content_bytes))
text_parts: list[str] = []
metadata: dict[str, Any] = {
"headings": [],
"lists": [],
"tables": [],
"emphasis": [],
}
list_items: list[dict[str, Any]] = []
current_list_type: str | None = None
for i, para in enumerate(doc.paragraphs):
style_name = para.style.name if para.style else ""
text = para.text.strip()
if not text:
# Flush any accumulated list
if list_items:
metadata["lists"].append({
"type": current_list_type or "unordered",
"items": list_items,
})
list_items = []
current_list_type = None
text_parts.append("")
continue
# Detect headings
if style_name.startswith("Heading"):
try:
level = int(style_name.split()[-1])
except (ValueError, IndexError):
level = 1
metadata["headings"].append({
"level": level,
"text": text,
"paragraph_index": i,
})
text_parts.append(text)
continue
# Detect list items
if style_name.startswith("List"):
is_ordered = "Number" in style_name or "Ordered" in style_name
list_type = "ordered" if is_ordered else "unordered"
if current_list_type is not None and current_list_type != list_type:
# Flush previous list
metadata["lists"].append({
"type": current_list_type,
"items": list_items,
})
list_items = []
current_list_type = list_type
list_items.append({"text": text, "paragraph_index": i})
text_parts.append(text)
continue
# Flush any accumulated list before a non-list paragraph
if list_items:
metadata["lists"].append({
"type": current_list_type or "unordered",
"items": list_items,
})
list_items = []
current_list_type = None
# Detect emphasis (bold/italic runs)
for run in para.runs:
run_text = run.text.strip()
if not run_text:
continue
if run.bold:
metadata["emphasis"].append({
"type": "bold",
"text": run_text,
"paragraph_index": i,
})
if run.italic:
metadata["emphasis"].append({
"type": "italic",
"text": run_text,
"paragraph_index": i,
})
text_parts.append(text)
# Flush trailing list
if list_items:
metadata["lists"].append({
"type": current_list_type or "unordered",
"items": list_items,
})
# Extract tables
for t_idx, table in enumerate(doc.tables):
table_data: list[list[str]] = []
for row in table.rows:
table_data.append([cell.text.strip() for cell in row.cells])
if table_data:
metadata["tables"].append({
"table_index": t_idx,
"rows": table_data,
})
# Also add table content to text
for row in table_data:
text_parts.append(" | ".join(row))
full_text = "\n".join(text_parts).strip()
# Clean up empty metadata sections
metadata = {k: v for k, v in metadata.items() if v}
return full_text, metadata if metadata else None
# Registry of format handlers — extend for Phase 2
FORMAT_HANDLERS: dict[str, ExtractHandler] = {
"txt": _extract_txt,
"paste": _extract_paste,
"docx": _extract_docx,
}
def extract_text(
content_bytes: bytes,
source_format: str,
) -> ExtractResult:
"""Extract plain text and structural metadata from uploaded content.
Args:
content_bytes: Raw bytes of the uploaded content.
source_format: Format identifier ('txt', 'paste', 'docx', etc.)
Returns:
Tuple of (plain_text, structural_metadata_or_none).
Raises:
ValueError: If the format is not supported.
RuntimeError: If a required extraction library is not installed.
"""
handler = FORMAT_HANDLERS.get(source_format)
if handler is None:
raise ValueError(f"Unsupported format: {source_format}")
logger.info("Extracting text from format=%s", source_format)
text, metadata = handler(content_bytes)
if not text.strip():
raise ValueError("Extracted text is empty — the document may be blank or contain only images.")
logger.info(
"Extraction complete: %d chars, metadata=%s",
len(text),
"yes" if metadata else "no",
)
return text, metadata

View File

@@ -34,6 +34,7 @@ from .copilot_conversation import CopilotConversation
from .assistant_chat import AssistantChat
from .survey_response import SurveyResponse
from .survey_invite import SurveyInvite
from .kb_import import KBImport, KBImportNode
__all__ = [
"User",
@@ -79,4 +80,6 @@ __all__ = [
"AssistantChat",
"SurveyResponse",
"SurveyInvite",
"KBImport",
"KBImportNode",
]

View File

@@ -0,0 +1,140 @@
import uuid
from datetime import datetime, timezone
from typing import Optional, Any, TYPE_CHECKING
from sqlalchemy import String, Text, DateTime, ForeignKey, Boolean, Integer, Float, CheckConstraint
from sqlalchemy.orm import Mapped, mapped_column, relationship
from sqlalchemy.dialects.postgresql import UUID, JSONB
from app.core.database import Base
if TYPE_CHECKING:
from app.models.account import Account
from app.models.user import User
from app.models.tree import Tree
class KBImport(Base):
__tablename__ = "kb_imports"
__table_args__ = (
CheckConstraint(
"source_format IN ('txt', 'paste', 'docx', 'pdf', 'html', 'md')",
name="ck_kb_imports_source_format",
),
CheckConstraint(
"target_type IN ('troubleshooting', 'procedural')",
name="ck_kb_imports_target_type",
),
CheckConstraint(
"status IN ('processing', 'ready', 'committed', 'failed')",
name="ck_kb_imports_status",
),
)
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
)
account_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("accounts.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
created_by: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("users.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
source_filename: Mapped[Optional[str]] = mapped_column(
String(500), nullable=True
)
source_format: Mapped[str] = mapped_column(String(20), nullable=False)
source_text: Mapped[str] = mapped_column(Text, nullable=False)
source_metadata: Mapped[Optional[dict[str, Any]]] = mapped_column(
JSONB, nullable=True
)
target_type: Mapped[str] = mapped_column(String(20), nullable=False)
status: Mapped[str] = mapped_column(
String(20), nullable=False, default="processing"
)
confidence_avg: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
error_message: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
processing_time_ms: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
ai_tokens_input: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
ai_tokens_output: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
tree_id: Mapped[Optional[uuid.UUID]] = mapped_column(
UUID(as_uuid=True),
ForeignKey("trees.id", ondelete="SET NULL"),
nullable=True,
)
batch_id: Mapped[Optional[uuid.UUID]] = mapped_column(
UUID(as_uuid=True), nullable=True, index=True
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc),
onupdate=lambda: datetime.now(timezone.utc),
)
# Relationships
account: Mapped["Account"] = relationship("Account", foreign_keys=[account_id])
created_by_user: Mapped["User"] = relationship("User", foreign_keys=[created_by])
tree: Mapped[Optional["Tree"]] = relationship("Tree", foreign_keys=[tree_id])
nodes: Mapped[list["KBImportNode"]] = relationship(
"KBImportNode",
back_populates="kb_import",
cascade="all, delete-orphan",
order_by="KBImportNode.node_order",
)
class KBImportNode(Base):
__tablename__ = "kb_import_nodes"
__table_args__ = (
CheckConstraint(
"node_type IN ('question', 'resolution', 'step', 'section_header', 'warning', 'action')",
name="ck_kb_import_nodes_node_type",
),
)
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
)
kb_import_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("kb_imports.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
node_order: Mapped[int] = mapped_column(Integer, nullable=False)
node_type: Mapped[str] = mapped_column(String(20), nullable=False)
content: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False)
parent_node_id: Mapped[Optional[uuid.UUID]] = mapped_column(
UUID(as_uuid=True),
ForeignKey("kb_import_nodes.id", ondelete="SET NULL"),
nullable=True,
)
source_excerpt: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
confidence_score: Mapped[float] = mapped_column(Float, nullable=False)
user_edited: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
user_approved: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc),
onupdate=lambda: datetime.now(timezone.utc),
)
# Relationships
kb_import: Mapped["KBImport"] = relationship(
"KBImport", back_populates="nodes"
)
parent: Mapped[Optional["KBImportNode"]] = relationship(
"KBImportNode",
remote_side="KBImportNode.id",
foreign_keys=[parent_node_id],
)

View File

@@ -1,4 +1,4 @@
from sqlalchemy import String, Integer, Boolean
from sqlalchemy import String, Integer, Boolean, text
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy.dialects.postgresql import JSONB
from app.core.database import Base
@@ -18,3 +18,13 @@ class PlanLimits(Base):
# AI Flow Builder limits
max_ai_builds_per_month: Mapped[int | None] = mapped_column(Integer, nullable=True)
max_ai_builds_per_24h: Mapped[int | None] = mapped_column(Integer, nullable=True)
# KB Accelerator limits
kb_accelerator_enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default=text("false"))
kb_max_lifetime_conversions: Mapped[int | None] = mapped_column(Integer, nullable=True)
kb_batch_max_size: Mapped[int | None] = mapped_column(Integer, nullable=True)
kb_allowed_formats: Mapped[list] = mapped_column(JSONB, nullable=False, default=lambda: ["txt", "paste"], server_default=text("'[\"txt\",\"paste\"]'::jsonb"))
kb_detailed_analysis: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default=text("false"))
kb_conversational_refinement: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default=text("false"))
kb_step_library_matching: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default=text("false"))
kb_history_limit: Mapped[int | None] = mapped_column(Integer, nullable=True)

View File

@@ -0,0 +1,142 @@
"""Pydantic schemas for KB Accelerator."""
from typing import Any, Literal, Optional
from uuid import UUID
from pydantic import BaseModel, Field
# ── Requests ──
class KBUploadTextRequest(BaseModel):
"""Upload KB article via text paste."""
content: str = Field(..., min_length=10, max_length=500_000)
title: Optional[str] = Field(None, min_length=1, max_length=255)
target_type: Optional[Literal["troubleshooting", "procedural"]] = Field(
None, description="Target flow type. If omitted, AI decides."
)
class KBNodeEditRequest(BaseModel):
"""Edit a specific KB import node during review."""
operation: Literal[
"approve", "reject", "edit", "delete", "regenerate", "insert_after"
]
content: Optional[dict[str, Any]] = Field(
None, description="Updated node content (required for 'edit' and 'insert_after')"
)
guidance: Optional[str] = Field(
None,
max_length=2000,
description="User guidance for 'regenerate' operation",
)
class KBCommitRequest(BaseModel):
"""Optional overrides when committing a KB import to the flow library."""
name: Optional[str] = Field(None, min_length=1, max_length=255)
description: Optional[str] = Field(None, max_length=2000)
category_id: Optional[UUID] = None
# ── Responses ──
class KBImportNodeResponse(BaseModel):
"""A single generated node in a KB import."""
id: UUID
kb_import_id: UUID
node_order: int
node_type: str
content: dict[str, Any]
parent_node_id: Optional[UUID] = None
source_excerpt: Optional[str] = None
confidence_score: float
user_edited: bool
user_approved: bool
model_config = {"from_attributes": True}
class KBUploadResponse(BaseModel):
"""Response after uploading a KB article."""
id: UUID
status: str
source_format: str
class KBImportResponse(BaseModel):
"""Full KB import detail with nodes."""
id: UUID
account_id: UUID
created_by: UUID
source_filename: Optional[str] = None
source_format: str
source_text: str
source_metadata: Optional[dict[str, Any]] = None
target_type: str
status: str
confidence_avg: Optional[float] = None
error_message: Optional[str] = None
processing_time_ms: Optional[int] = None
ai_tokens_input: Optional[int] = None
ai_tokens_output: Optional[int] = None
tree_id: Optional[UUID] = None
nodes: list[KBImportNodeResponse] = []
created_at: str
updated_at: str
model_config = {"from_attributes": True}
class KBImportSummary(BaseModel):
"""Lightweight import item for list view."""
id: UUID
source_filename: Optional[str] = None
source_format: str
target_type: str
status: str
confidence_avg: Optional[float] = None
node_count: int = 0
created_at: str
model_config = {"from_attributes": True}
class KBImportListResponse(BaseModel):
"""Paginated list of KB imports."""
items: list[KBImportSummary]
total: int
skip: int
limit: int
class KBCommitResponse(BaseModel):
"""Response after committing a KB import to the flow library."""
tree_id: UUID
import_id: UUID
tree_type: str
class KBQuotaResponse(BaseModel):
"""Current KB Accelerator entitlements and usage for the user's account."""
plan: str
kb_accelerator_enabled: bool
lifetime_conversions_used: int
lifetime_conversions_limit: Optional[int] = None
allowed_formats: list[str]
detailed_analysis: bool
conversational_refinement: bool
step_library_matching: bool
history_limit: Optional[int] = None
can_convert: bool

View File

@@ -0,0 +1,334 @@
"""Integration tests for KB Accelerator endpoints."""
import pytest
import json
from unittest.mock import AsyncMock, patch, PropertyMock
from httpx import AsyncClient
pytestmark = pytest.mark.asyncio
# ── Fixtures ──
@pytest.fixture
async def kb_setup(client, auth_headers, test_db):
"""Seed KB plan limits and return helpers."""
# Update plan_limits with KB columns for 'free' plan
await test_db.execute(
__import__("sqlalchemy").text("""
UPDATE plan_limits SET
kb_accelerator_enabled = true,
kb_max_lifetime_conversions = 3,
kb_allowed_formats = '["txt","paste"]'::jsonb,
kb_detailed_analysis = false,
kb_conversational_refinement = false,
kb_step_library_matching = false,
kb_history_limit = 3
WHERE plan = 'free'
""")
)
await test_db.execute(
__import__("sqlalchemy").text("""
UPDATE plan_limits SET
kb_accelerator_enabled = true,
kb_max_lifetime_conversions = NULL,
kb_allowed_formats = '["txt","paste","docx","pdf","html","md"]'::jsonb,
kb_detailed_analysis = true,
kb_conversational_refinement = true,
kb_step_library_matching = true,
kb_history_limit = NULL
WHERE plan = 'pro'
""")
)
await test_db.commit()
return {"client": client, "headers": auth_headers}
def _mock_ai_enabled():
"""Context manager to mock AI as enabled."""
return patch.object(
type(__import__("app.core.config", fromlist=["settings"]).settings),
"ai_enabled",
new_callable=PropertyMock,
return_value=True,
)
SAMPLE_KB_TEXT = """
Troubleshooting Outlook Connectivity Issues
Problem: Users report that Outlook keeps disconnecting from Exchange.
Step 1: Check Network Connectivity
Ping the Exchange server to verify network connectivity.
If ping fails, check the network configuration.
Step 2: Verify Outlook Profile
If the network is working, check the Outlook profile settings.
Go to Control Panel > Mail > Show Profiles.
Step 3: Check Exchange Server
If the profile is correct, verify the Exchange server is running.
Open Services.msc and check Microsoft Exchange services.
Resolution: After following these steps, Outlook should maintain
a persistent connection to Exchange.
"""
MOCK_AI_TROUBLESHOOTING_RESPONSE = json.dumps({
"title": "Troubleshooting Outlook Connectivity",
"description": "Diagnose and fix Outlook disconnection from Exchange",
"nodes": [
{
"id": "root-check",
"type": "question",
"question": "Is the network connection working?",
"options": [
{"label": "Yes", "next_node_id": "check-profile"},
{"label": "No", "next_node_id": "fix-network"},
],
"confidence": 0.92,
"source_excerpt": "Step 1: Check Network Connectivity",
},
{
"id": "fix-network",
"type": "resolution",
"question": "Fix the network configuration and retry.",
"confidence": 0.85,
"source_excerpt": "If ping fails, check the network configuration.",
},
{
"id": "check-profile",
"type": "question",
"question": "Is the Outlook profile configured correctly?",
"options": [
{"label": "Yes", "next_node_id": "check-exchange"},
{"label": "No", "next_node_id": "fix-profile"},
],
"confidence": 0.88,
"source_excerpt": "Step 2: Verify Outlook Profile",
},
{
"id": "fix-profile",
"type": "resolution",
"question": "Reconfigure the Outlook profile via Control Panel > Mail.",
"confidence": 0.90,
"source_excerpt": "Go to Control Panel > Mail > Show Profiles.",
},
{
"id": "check-exchange",
"type": "resolution",
"question": "Verify Exchange services are running in Services.msc.",
"confidence": 0.87,
"source_excerpt": "Open Services.msc and check Microsoft Exchange services.",
},
],
})
MOCK_AI_PROCEDURAL_RESPONSE = json.dumps({
"title": "Setup New Domain Controller",
"description": "Step-by-step procedure for setting up a new DC",
"steps": [
{
"id": "step-1",
"type": "step",
"content": "Open Server Manager on [VAR:server_name]",
"confidence": 0.95,
"source_excerpt": "Step 1: Open Server Manager on DC01",
},
{
"id": "warning-dns",
"type": "warning",
"content": "WARNING: This will restart DNS and cause brief connectivity loss",
"confidence": 0.90,
"source_excerpt": "Note: Restarting DNS will cause a brief outage",
},
{
"id": "step-2",
"type": "step",
"content": "Configure IP address [VAR:ip_address] on the network adapter",
"confidence": 0.88,
"source_excerpt": "Configure IP 192.168.1.10 on the adapter",
},
],
"intake_form": [
{
"variable_name": "server_name",
"label": "Server Name",
"field_type": "text",
"required": True,
"display_order": 1,
},
{
"variable_name": "ip_address",
"label": "IP Address",
"field_type": "text",
"required": True,
"display_order": 2,
},
],
})
# ── Upload Tests ──
class TestUpload:
async def test_upload_text_paste(self, kb_setup):
"""Upload via text paste creates a kb_import in processing status."""
c, h = kb_setup["client"], kb_setup["headers"]
with _mock_ai_enabled():
# Mock the background conversion (don't actually call AI)
with patch("app.api.endpoints.kb_accelerator._run_conversion"):
resp = await c.post(
"/api/v1/kb-accelerator/upload",
data={"content": SAMPLE_KB_TEXT, "target_type": "troubleshooting"},
headers=h,
)
assert resp.status_code == 201
data = resp.json()
assert data["status"] == "processing"
assert data["source_format"] == "paste"
assert "id" in data
async def test_upload_empty_content_rejected(self, kb_setup):
c, h = kb_setup["client"], kb_setup["headers"]
with _mock_ai_enabled():
resp = await c.post(
"/api/v1/kb-accelerator/upload",
data={"content": "short"},
headers=h,
)
assert resp.status_code == 400
async def test_upload_no_file_no_content_rejected(self, kb_setup):
c, h = kb_setup["client"], kb_setup["headers"]
with _mock_ai_enabled():
resp = await c.post(
"/api/v1/kb-accelerator/upload",
data={},
headers=h,
)
assert resp.status_code == 400
# ── Get/List Tests ──
class TestGetList:
async def test_get_import(self, kb_setup):
c, h = kb_setup["client"], kb_setup["headers"]
with _mock_ai_enabled(), patch("app.api.endpoints.kb_accelerator._run_conversion"):
create_resp = await c.post(
"/api/v1/kb-accelerator/upload",
data={"content": SAMPLE_KB_TEXT, "target_type": "troubleshooting"},
headers=h,
)
import_id = create_resp.json()["id"]
resp = await c.get(f"/api/v1/kb-accelerator/{import_id}", headers=h)
assert resp.status_code == 200
data = resp.json()
assert data["id"] == import_id
assert data["source_format"] == "paste"
async def test_list_imports(self, kb_setup):
c, h = kb_setup["client"], kb_setup["headers"]
with _mock_ai_enabled(), patch("app.api.endpoints.kb_accelerator._run_conversion"):
await c.post(
"/api/v1/kb-accelerator/upload",
data={"content": SAMPLE_KB_TEXT, "target_type": "troubleshooting"},
headers=h,
)
resp = await c.get("/api/v1/kb-accelerator", headers=h)
assert resp.status_code == 200
data = resp.json()
assert data["total"] >= 1
assert len(data["items"]) >= 1
# ── Quota Tests ──
class TestQuota:
async def test_get_quota(self, kb_setup):
c, h = kb_setup["client"], kb_setup["headers"]
resp = await c.get("/api/v1/kb-accelerator/quota", headers=h)
assert resp.status_code == 200
data = resp.json()
assert data["kb_accelerator_enabled"] is True
assert data["lifetime_conversions_limit"] == 3
assert data["can_convert"] is True
# ── Commit Tests ──
class TestCommit:
async def test_commit_creates_tree(self, kb_setup, test_db):
"""Committing a ready import creates a Tree record."""
c, h = kb_setup["client"], kb_setup["headers"]
# Create import
with _mock_ai_enabled(), patch("app.api.endpoints.kb_accelerator._run_conversion"):
create_resp = await c.post(
"/api/v1/kb-accelerator/upload",
data={"content": SAMPLE_KB_TEXT, "target_type": "troubleshooting"},
headers=h,
)
import_id = create_resp.json()["id"]
# Simulate conversion complete: update status + add nodes directly
from app.models.kb_import import KBImport, KBImportNode
from sqlalchemy import select
import uuid
result = await test_db.execute(select(KBImport).where(KBImport.id == uuid.UUID(import_id)))
kb_import = result.scalar_one()
kb_import.status = "ready"
kb_import.source_metadata = {"_conversion": {"title": "Test Flow", "description": "Test"}}
node = KBImportNode(
kb_import_id=kb_import.id,
node_order=0,
node_type="question",
content={"original_id": "root", "question": "Test question?", "options": []},
confidence_score=0.9,
)
test_db.add(node)
await test_db.commit()
# Commit
resp = await c.post(f"/api/v1/kb-accelerator/{import_id}/commit", headers=h)
assert resp.status_code == 200
data = resp.json()
assert "tree_id" in data
assert data["tree_type"] == "troubleshooting"
# ── Delete Tests ──
class TestDelete:
async def test_delete_import(self, kb_setup):
c, h = kb_setup["client"], kb_setup["headers"]
with _mock_ai_enabled(), patch("app.api.endpoints.kb_accelerator._run_conversion"):
create_resp = await c.post(
"/api/v1/kb-accelerator/upload",
data={"content": SAMPLE_KB_TEXT, "target_type": "troubleshooting"},
headers=h,
)
import_id = create_resp.json()["id"]
resp = await c.delete(f"/api/v1/kb-accelerator/{import_id}", headers=h)
assert resp.status_code == 204
# Verify deleted
resp = await c.get(f"/api/v1/kb-accelerator/{import_id}", headers=h)
assert resp.status_code == 404

View File

@@ -0,0 +1,520 @@
# RESOLUTIONFLOW — KB Accelerator
## Feature Design Document
*Transform static KB articles into interactive troubleshooting and procedural flows with AI-powered document analysis.*
| Field | Value |
|-------|-------|
| **Document** | KB Accelerator — Feature Design & Architecture |
| **Version** | 1.0 — Draft |
| **Date** | March 2026 |
| **Author** | ResolutionFlow LLC |
| **Status** | Design Phase |
---
## Table of Contents
1. Executive Summary
2. Problem Statement & Market Opportunity
3. Feature Overview
4. System Architecture
5. AI Processing Pipeline
6. Data Model
7. API Design
8. Frontend Design
9. Supported Input Formats
10. Conversion Intelligence
11. Pricing & Tier Integration
12. Build Phases & Roadmap
13. Risk Analysis
14. Success Metrics
---
## 1. Executive Summary
KB Accelerator is a new feature for ResolutionFlow that allows MSP teams to upload their existing knowledge base articles and automatically convert them into interactive troubleshooting flows and procedural flows. This solves the cold-start adoption problem, transforms passive documentation into active troubleshooting tools, and delivers immediate value from day one.
> **Core Value Proposition**
>
> MSPs have years of institutional knowledge trapped in static Word docs, PDFs, and wiki articles that nobody reads mid-ticket. KB Accelerator transforms that content into the interactive, branching flows that ResolutionFlow is built around — turning dead documentation into living troubleshooting intelligence.
### Key Capabilities
- Upload KB articles in multiple formats (DOCX, PDF, HTML, Markdown, plain text, copy-paste)
- AI-powered analysis that detects sequential steps, decision points, prerequisites, and resolution outcomes
- Automatic mapping to ResolutionFlow's existing tree schema (troubleshooting flows) and procedural schema (procedure flows)
- Intelligent detection of implicit branching logic buried in prose documentation
- Draft flow output that lands directly in the flow editor for human review and refinement
- Confidence scoring on each generated node so users know where AI interpretation needs attention
- Batch import capability for migrating entire KB libraries
### Strategic Impact
| Impact Area | Description |
|---|---|
| **Adoption** | Eliminates cold-start problem. New users get a library of draft flows on day one instead of building from scratch. |
| **Retention** | Users who import existing KB articles are investing their institutional knowledge into the platform, increasing switching costs. |
| **Revenue** | Pro/Team-gated feature that directly justifies subscription pricing. AI processing costs are per-conversion, aligning expense with usage. |
| **Differentiation** | No competing MSP documentation tool offers AI-powered conversion from static docs to interactive decision trees. |
| **Thesis Validation** | Proves the core ResolutionFlow thesis: documentation and troubleshooting should be the same activity. |
---
## 2. Problem Statement & Market Opportunity
### 2.1 The KB Problem in MSPs
Every MSP has knowledge base articles. They live in ConnectWise, IT Glue, Hudu, SharePoint, Confluence, or simply as Word documents on a shared drive. These articles represent years of accumulated troubleshooting experience and process documentation. The problem is that this content is fundamentally passive. It exists as prose that a technician has to read, interpret, and mentally convert into action steps while simultaneously working a live ticket.
The result is predictable: technicians don't read the KB articles. They ask a senior engineer instead, or they fumble through the issue on their own. The documentation exists but delivers no value because the format doesn't match the workflow.
### 2.2 The Cold-Start Problem
ResolutionFlow solves the format problem by making documentation interactive. But it introduces a new problem: a new ResolutionFlow customer has zero flows. Building troubleshooting trees from scratch is time-consuming. The customer has to invest significant effort before they see value, and most MSPs don't have that patience.
> **The Gap**
>
> MSPs have the knowledge (in KB articles). ResolutionFlow has the format (interactive flows). KB Accelerator bridges the gap by converting one into the other automatically.
### 2.3 Competitive Landscape
No MSP-focused tool currently offers AI-powered conversion from static documentation to interactive troubleshooting workflows. IT Glue and Hudu offer structured documentation but no interactive execution. ConnectWise's KB is search-based and static. This is a genuine whitespace opportunity.
---
## 3. Feature Overview
### 3.1 User Journey
1. User navigates to KB Accelerator from the ResolutionFlow dashboard (dedicated tab or sidebar action).
2. User uploads a file (DOCX, PDF, HTML, MD, TXT) or pastes raw text content directly.
3. System analyzes the document structure and displays a preview of detected elements: title, problem statement, steps, decision points, prerequisites, and resolution outcomes.
4. User selects target flow type: Troubleshooting Flow (branching decision tree) or Procedure Flow (linear steps with optional intake form).
5. AI processing pipeline generates a draft flow mapped to ResolutionFlow's schema.
6. User reviews the draft in a side-by-side view: original document on the left, generated flow preview on the right.
7. User can accept, edit, or regenerate individual nodes before finalizing.
8. Finalized flow is saved and appears in the user's flow library, ready for use or further editing in the standard flow editor.
### 3.2 Two Conversion Modes
**Troubleshooting Flow Conversion**
Best for: diagnostic articles, if/then troubleshooting guides, articles with multiple resolution paths.
- AI identifies the root question or symptom being diagnosed
- Decision nodes are created at each branching point ("if X, try Y; otherwise try Z")
- Resolution nodes capture final outcomes and fix instructions
- The branching tree maps to the existing node/option schema in the trees table
**Procedure Flow Conversion**
Best for: step-by-step guides, setup procedures, onboarding checklists, runbooks.
- AI extracts sequential steps in order
- Variable placeholders are detected (server names, IPs, usernames) and mapped to `[VAR:name]` tokens
- An intake form schema is auto-generated from detected variables
- Steps are enriched with detected warnings, time estimates, and verification checks
- Output uses the procedural `tree_type` with the `intake_form` JSONB schema from migration 035
---
## 4. System Architecture
### 4.1 High-Level Architecture
KB Accelerator integrates into the existing ResolutionFlow stack without introducing new infrastructure. It leverages the FastAPI backend for orchestration, the existing AI service (same infrastructure as the assistant chat) for document analysis, and outputs directly into the existing tree/node schema.
**Processing Pipeline Overview**
```
1. UPLOAD → 2. EXTRACT → 3. ANALYZE → 4. GENERATE → 5. REVIEW
File upload or Text extraction AI analysis of Map to tree/ Side-by-side
text paste via from DOCX/PDF/ structure, steps, node schema or review, edit,
API endpoint HTML/MD/TXT decision points procedure schema and finalize
```
### 4.2 Component Responsibilities
| Component | Responsibility | Integration Point |
|---|---|---|
| **Upload Service** | File validation, format detection, size limits, virus scanning hook | FastAPI endpoint, S3/local temp storage |
| **Extraction Service** | Convert uploaded files to normalized plain text with structural metadata | python-docx, PyMuPDF, BeautifulSoup, markdown-it |
| **AI Analysis Service** | Prompt engineering pipeline that identifies document structure and converts to flow schema | Anthropic API (Claude), existing AI service infrastructure |
| **Flow Generator** | Maps AI analysis output to tree/node database records with proper relationships | SQLAlchemy models, existing tree/node CRUD services |
| **Review UI** | Side-by-side document vs. flow preview with per-node editing | React frontend, existing flow editor components |
| **Batch Processor** | Queue-based processing for multi-article imports | Celery/Redis or async FastAPI background tasks |
---
## 5. AI Processing Pipeline
The AI pipeline is the core intelligence of KB Accelerator. It operates in two phases: structural analysis (understanding the document) and flow generation (converting that understanding into a ResolutionFlow-compatible schema). This two-phase approach allows for human review between analysis and generation.
### 5.1 Phase 1: Document Analysis
The first AI call analyzes the extracted text and returns a structured JSON document describing what was found. The prompt is carefully engineered to identify MSP-specific patterns.
**Analysis Prompt Strategy**
- System prompt establishes the AI as an MSP documentation specialist that understands IT troubleshooting workflows
- The extracted text is provided with any structural metadata (headings, lists, numbered steps) preserved
- AI is instructed to return a strict JSON schema identifying: document type, title, problem statement, prerequisites, sequential steps, decision points, resolution outcomes, and detected variables
**Detection Targets**
| Element | What AI Looks For | Example in KB Article |
|---|---|---|
| **Document Type** | Whether the article is diagnostic (troubleshooting) or procedural (step-by-step) | *"Troubleshooting Outlook connectivity" vs "Setting up a new domain controller"* |
| **Problem Statement** | The root issue or task being addressed | *"Users report that Outlook keeps disconnecting from Exchange"* |
| **Prerequisites** | Things that must be true before starting | *"Ensure you have Domain Admin credentials and the server is on the network"* |
| **Sequential Steps** | Ordered instructions that must happen in sequence | *"Step 1: Open Server Manager. Step 2: Add Roles and Features..."* |
| **Decision Points** | Conditional logic, if/then/else branches | *"If the user is on Windows 10, check the registry. On Windows 11, go to Settings..."* |
| **Variables** | Instance-specific values that change per execution | *Server names, IP addresses, usernames, license types, domain names* |
| **Warnings/Cautions** | Risk indicators or critical notes | *"WARNING: This will restart the DNS service and cause brief connectivity loss"* |
| **Resolution Outcomes** | End states that indicate the problem is solved | *"Outlook should now maintain a persistent connection to Exchange"* |
| **Verification Steps** | How to confirm a step or procedure worked | *"Run nslookup to verify DNS resolution is working correctly"* |
### 5.2 Phase 2: Flow Generation
The second AI call takes the structured analysis from Phase 1 and generates the actual flow structure, mapped directly to ResolutionFlow's schema. The output format differs based on the target flow type.
**Troubleshooting Flow Output**
- Root node with the problem statement as the question text
- Decision nodes with options array matching the existing node schema (question, options with label and next_node_id)
- Resolution nodes at leaf positions with solution text and tags from the six-dimension tagging system
- Each node includes a `confidence_score` (0.01.0) indicating how certain the AI is about the mapping
**Procedure Flow Output**
- Ordered steps array with rich metadata (type, content, warnings, time estimates, verification checks)
- Auto-generated `intake_form` schema from detected variables, with field types inferred (text for names, ip_address for IPs, select for known enumerations)
- `[VAR:name]` tokens injected into step content wherever variables were detected
- Section headers generated from logical groupings in the source document
### 5.3 Confidence Scoring
Every generated node includes a confidence score that communicates how certain the AI is about its interpretation. This is critical for the review step — it tells the user exactly where to focus their attention.
| Score Range | Label | UI Indicator | Meaning |
|---|---|---|---|
| **0.9 1.0** | High Confidence | Green left accent | Direct mapping from explicit steps or clear logic in the source |
| **0.7 0.89** | Medium Confidence | Amber left accent | Reasonable inference, but some ambiguity in the source material |
| **0.5 0.69** | Low Confidence | Red left accent | Significant interpretation required; user should carefully review |
| **< 0.5** | Needs Review | Red left accent + flag icon | AI made a best guess but recommends manual editing |
> **Design Note: Left Accent Border Pattern**
>
> The confidence indicators use the left accent border pattern established in the ResolutionFlow design system. This provides visual consistency with step status indicators and documentation callouts already in the UI.
---
## 6. Data Model
KB Accelerator introduces two new database tables and extends the existing tree model. All new tables follow the existing migration pattern and use the same base model infrastructure.
### 6.1 New Table: `kb_imports`
| Column | Type | Nullable | Description |
|---|---|---|---|
| **id** | UUID | No | Primary key (gen_random_uuid) |
| **organization_id** | UUID FK | No | Foreign key to organizations table |
| **created_by** | UUID FK | No | Foreign key to users table (who initiated the import) |
| **source_filename** | VARCHAR(500) | Yes | Original filename if file upload (null for text paste) |
| **source_format** | VARCHAR(20) | No | Enum: docx, pdf, html, md, txt, paste |
| **source_text** | TEXT | No | Extracted plain text content from the source document |
| **source_metadata** | JSONB | Yes | Structural metadata from extraction (headings, lists, etc.) |
| **analysis_result** | JSONB | Yes | Phase 1 AI analysis output (detected elements) |
| **target_type** | VARCHAR(20) | No | Enum: troubleshooting, procedural |
| **generated_flow** | JSONB | Yes | Phase 2 AI generation output (flow schema before commit) |
| **tree_id** | UUID FK | Yes | Foreign key to trees table (set after user finalizes) |
| **status** | VARCHAR(20) | No | Enum: uploaded, extracting, analyzing, reviewed, generating, completed, failed |
| **confidence_avg** | FLOAT | Yes | Average confidence score across all generated nodes |
| **error_message** | TEXT | Yes | Error details if status = failed |
| **processing_time_ms** | INTEGER | Yes | Total processing time in milliseconds |
| **created_at** | TIMESTAMPTZ | No | Auto-set on creation |
| **updated_at** | TIMESTAMPTZ | No | Auto-updated on modification |
### 6.2 New Table: `kb_import_nodes`
Stores individual generated nodes/steps before the user commits them to the actual tree. This allows per-node editing during the review phase without touching the live flow data.
| Column | Type | Nullable | Description |
|---|---|---|---|
| **id** | UUID | No | Primary key |
| **kb_import_id** | UUID FK | No | Foreign key to kb_imports |
| **node_order** | INTEGER | No | Position in the generated flow (0-indexed) |
| **node_type** | VARCHAR(20) | No | Enum: question, resolution, step, section_header, warning |
| **content** | JSONB | No | Node content (question text, step text, options, etc.) |
| **source_excerpt** | TEXT | Yes | The specific text from the source document that this node was derived from |
| **confidence_score** | FLOAT | No | AI confidence in this node's accuracy (0.01.0) |
| **user_edited** | BOOLEAN | No | Whether the user manually modified this node during review |
| **user_approved** | BOOLEAN | No | Whether the user explicitly approved this node |
### 6.3 Tree Model Extension
The existing trees table gets one new nullable column to link back to the import that created it. This enables analytics and provenance tracking.
**New column:** `kb_import_id` (UUID FK, nullable) — references `kb_imports.id`. Null for manually-created trees.
---
## 7. API Design
All KB Accelerator endpoints live under the `/api/v1/kb-accelerator` prefix and follow existing authentication, organization scoping, and error handling patterns.
### 7.1 Endpoints
| Method | Endpoint | Description |
|---|---|---|
| **POST** | `/api/v1/kb-accelerator/upload` | Upload a file or submit pasted text. Returns kb_import_id and starts extraction. |
| **GET** | `/api/v1/kb-accelerator/{id}` | Get import status, analysis results, and generated flow data. |
| **GET** | `/api/v1/kb-accelerator` | List all imports for the current organization with pagination and status filter. |
| **POST** | `/api/v1/kb-accelerator/{id}/analyze` | Trigger Phase 1 AI analysis on extracted text. Async — poll status via GET. |
| **POST** | `/api/v1/kb-accelerator/{id}/generate` | Trigger Phase 2 flow generation from analysis results. Requires target_type. |
| **PATCH** | `/api/v1/kb-accelerator/{id}/nodes/{node_id}` | Edit a specific generated node during review (content, approve, reject). |
| **POST** | `/api/v1/kb-accelerator/{id}/commit` | Finalize the import: create actual tree and node records from generated data. |
| **DELETE** | `/api/v1/kb-accelerator/{id}` | Cancel and clean up an in-progress or abandoned import. |
| **POST** | `/api/v1/kb-accelerator/batch` | Submit multiple files for batch processing. Returns array of kb_import_ids. |
### 7.2 Upload Endpoint Detail
**POST /api/v1/kb-accelerator/upload**
Accepts multipart/form-data for file uploads or application/json for text paste. Validates file size (max 10MB), format, and performs basic content extraction before returning.
**Request Body (File Upload)**
- **file**: UploadFile (required) — the KB article file
- **target_type**: string (optional) — "troubleshooting" or "procedural" (can be set later)
**Request Body (Text Paste)**
- **content**: string (required) — raw text content
- **title**: string (optional) — suggested title for the import
- **target_type**: string (optional) — "troubleshooting" or "procedural"
**Response (201 Created)**
- **id**: UUID — the new kb_import record ID
- **status**: "uploaded" or "extracting" (extraction may start immediately)
- **source_format**: detected format of the uploaded content
---
## 8. Frontend Design
### 8.1 Entry Points
KB Accelerator is accessible from two locations in the existing UI to maximize discoverability:
- **Dashboard action button:** a prominent "Import KB Article" button in the flow library header, next to "Create New Flow"
- **Sidebar navigation:** dedicated "KB Accelerator" item in the main navigation with a sparkle/lightning icon to communicate AI-powered functionality
### 8.2 Upload Screen
Clean, focused upload interface with two input modes:
- Drag-and-drop zone for file uploads with format badges showing supported types (DOCX, PDF, HTML, MD, TXT)
- Text paste tab with a full-width textarea and title field for direct content entry
- Target type selector (Troubleshooting Flow / Procedure Flow) with visual cards showing the difference
- "Let AI decide" option for target type that uses the analysis phase to recommend the best fit
### 8.3 Analysis Preview Screen
After Phase 1 analysis completes, the user sees a breakdown of what the AI detected:
- Document title and detected type with AI recommendation badge
- Detected elements displayed as color-coded cards: steps (blue), decision points (amber), warnings (red), variables (green), resolutions (emerald)
- Source text excerpts linked to each detected element so the user can see exactly what triggered the detection
- "Proceed to Generation" and "Re-analyze" action buttons
### 8.4 Review Screen (Core Experience)
The review screen is the most important UI in KB Accelerator. It's where the user validates AI output and builds trust in the system.
**Layout: Two-Panel Side-by-Side**
- Left panel: Original document text with detected elements highlighted inline (color-matched to the generated nodes)
- Right panel: Generated flow preview showing the tree structure (for troubleshooting) or step list (for procedures)
- Clicking a node in the right panel highlights its source excerpt in the left panel, and vice versa
- Each node shows its confidence score via the left accent border pattern (green/amber/red)
**Per-Node Actions**
- **Approve** (checkmark): Marks the node as reviewed and accepted
- **Edit** (pencil): Opens inline editing for the node's content, question text, options, etc.
- **Regenerate** (refresh): Re-runs AI generation for just this node with optional user guidance
- **Delete** (trash): Removes the node from the generated flow
- **Add Node** (plus): Insert a manual node between existing ones
**Bulk Actions**
- "Approve All High Confidence" — one-click approval for all nodes scoring 0.9+
- "Commit to Library" — finalizes the flow and creates the actual tree record
> **UI Principle**
>
> The review screen should feel like a code review, not a form. The user is reviewing AI-generated work with the power to accept, modify, or reject each piece. The side-by-side layout with source attribution builds trust by showing the AI's reasoning.
---
## 9. Supported Input Formats
| Format | Library | Structure Preserved | Notes |
|---|---|---|---|
| **DOCX** | python-docx | Headings, lists, tables, bold/italic emphasis | Most common format for MSP KB articles in SharePoint and shared drives |
| **PDF** | PyMuPDF (fitz) | Text extraction with layout awareness, headings via font size | Second most common; handles scanned docs with OCR fallback via Tesseract |
| **HTML** | BeautifulSoup | Full semantic structure (h1-h6, ul/ol, tables, code blocks) | Covers Confluence, IT Glue, and web-based KB exports |
| **Markdown** | markdown-it | Headings, lists, code blocks, emphasis, links | Common in developer-oriented documentation and GitHub repos |
| **Plain Text** | Built-in | Line breaks and indentation only; AI infers structure | Lowest fidelity but important for copy-paste and email-sourced docs |
| **Paste** | Built-in | None — AI infers all structure from content | Zero-friction entry point for quick conversions |
> **Extraction Quality Hierarchy**
>
> DOCX and HTML provide the richest structural metadata, giving the AI the most to work with. PDF extraction is good but lossy (formatting information is approximate). Plain text and paste require the AI to infer all structure from content alone, which reduces confidence scores but still produces usable output for well-written articles.
---
## 10. Conversion Intelligence
This section details the specific AI patterns and heuristics used to convert different types of KB content into flows. This is where KB Accelerator's real value lives — the ability to interpret messy, inconsistent MSP documentation and produce structured, actionable flows.
### 10.1 Detecting Implicit Branch Logic
The hardest challenge is identifying decision points that aren't explicitly written as if/then statements. MSP KB articles often bury branching logic in prose.
**Pattern Examples**
| KB Article Text | AI Interpretation |
|---|---|
| *"For Windows 10 machines, navigate to Settings > Update. For Windows 11, go to Settings > Windows Update."* | Decision node: "What Windows version?" with two branches leading to different step sequences |
| *"If the issue persists after restarting the service, escalate to Tier 2."* | Decision node after the restart step: "Did the restart resolve the issue?" with Yes (resolution) and No (escalation) paths |
| *"Note: Domain-joined computers use Group Policy. Workgroup computers need manual configuration."* | Decision node: "Is the computer domain-joined?" with two parallel procedure paths |
| *"Try clearing the DNS cache first. If that doesn't work, check the hosts file."* | Sequential diagnostic flow: DNS cache clear > verification > hosts file check, with early exit if first step resolves |
### 10.2 Variable Detection
For procedure flow conversion, the AI identifies values that would change between executions and maps them to `[VAR:name]` tokens with appropriate intake form field types.
| Detected Pattern | Variable Name | Form Field Type | Example Value |
|---|---|---|---|
| IP addresses (192.168.x.x) | `[VAR:ip_address]` | ip_address | 192.168.1.10 |
| Server/computer names | `[VAR:server_name]` | text | DC01 |
| Domain names | `[VAR:domain_name]` | text | contoso.local |
| Usernames/email | `[VAR:username]` | text | jsmith@contoso.com |
| License types | `[VAR:license_type]` | select (enum) | E3, E5, F1 |
| OU paths | `[VAR:ou_path]` | text | OU=Users,DC=contoso,DC=local |
| Port numbers | `[VAR:port]` | number | 443 |
| Subnet masks | `[VAR:subnet_mask]` | ip_address | 255.255.255.0 |
---
## 11. Pricing & Tier Integration
KB Accelerator is a premium feature that justifies Pro and Team subscription pricing. The AI processing has a real per-conversion cost (Anthropic API usage), so tiering aligns expense with revenue.
| Capability | Free | Pro ($19/mo) | Team ($15/user/mo) |
|---|---|---|---|
| **Single article import** | 3 lifetime conversions | Unlimited | Unlimited |
| **Batch import** | Not available | Up to 10 articles | Up to 50 articles |
| **Text paste** | Included in 3 conversions | Unlimited | Unlimited |
| **Target type selection** | AI decides only | Manual + AI | Manual + AI |
| **Review & edit** | Basic (approve/reject) | Full (edit, regenerate, add) | Full + team review |
| **Confidence scoring** | Shown | Shown + filter/sort | Shown + filter/sort |
| **Import history** | Last 3 only | Full history | Full history + audit log |
| **Supported formats** | TXT and paste only | All formats | All formats |
> **Free Tier Strategy**
>
> The free tier offers 3 lifetime conversions with limited formats. This is enough for a user to experience the value and see KB Accelerator work on their actual documentation. The restriction to TXT/paste on free tier also reduces extraction library dependencies for free-tier infrastructure cost optimization.
---
## 12. Build Phases & Roadmap
### Phase 1: Foundation (Weeks 13)
Core pipeline with single-article import and basic review.
- Database migrations: `kb_imports` and `kb_import_nodes` tables, tree model extension
- Upload endpoint with text paste and TXT file support
- Text extraction service (plain text only in Phase 1)
- AI analysis prompt engineering and Phase 1 pipeline
- AI generation prompt engineering and Phase 2 pipeline (troubleshooting flow output only)
- Basic review UI: list view of generated nodes with approve/reject
- Commit endpoint that creates actual tree/node records
### Phase 2: Rich Formats & Procedures (Weeks 46)
Full format support and procedure flow conversion.
- DOCX extraction via python-docx with structural metadata
- PDF extraction via PyMuPDF with layout analysis
- HTML extraction via BeautifulSoup
- Markdown extraction via markdown-it
- Procedure flow generation with variable detection and intake form generation
- Side-by-side review UI with source-to-node linking
- Per-node editing and regeneration in the review screen
- Confidence scoring visualization with left accent borders
### Phase 3: Polish & Scale (Weeks 79)
Batch import, UX refinement, and tier enforcement.
- Batch upload endpoint and queue processing
- Import history dashboard with status tracking
- Tier gating enforcement (free tier limits, format restrictions)
- "Approve All High Confidence" bulk action
- Analytics: conversion success rate, average confidence, most-used source formats
- Drag-and-drop file upload zone with format badges
- "Let AI decide" target type recommendation
### Phase 4: Advanced Intelligence (Future)
Stretch goals and post-launch enhancements.
- OCR fallback for scanned PDFs via Tesseract
- Multi-article correlation: detect when multiple KB articles describe the same issue from different angles and suggest merging
- Incremental re-import: detect when a source KB article has been updated and suggest flow updates
- ConnectWise/IT Glue/Hudu direct API integration for pulling articles without manual export
- Tag auto-assignment using the six-dimension tagging system based on article content analysis
- Template marketplace: share anonymized, high-confidence converted flows with the ResolutionFlow community
---
## 13. Risk Analysis
| Risk | Severity | Impact | Mitigation |
|---|---|---|---|
| **Poor quality KB input** | Medium | AI produces low-confidence flows that require extensive manual editing, reducing perceived value | Confidence scoring sets expectations. "Needs Review" flags prevent silent bad output. Free tier lets users test before committing. |
| **AI hallucination in flow logic** | High | Generated flows contain incorrect troubleshooting paths that could lead technicians astray | Mandatory review step before commit. Source attribution shows exact text the AI based each node on. No auto-publish. |
| **API cost overruns** | Medium | High usage of AI analysis burns through API budget faster than subscription revenue covers | Per-conversion cost tracking. Tier limits on batch size. Prompt optimization to minimize token usage. |
| **Extraction library maintenance** | Low | python-docx, PyMuPDF, etc. may have breaking changes or security issues | Pin versions. Phase 1 starts with text-only to defer library dependency. Each format is an independent module. |
| **User trust gap** | High | Users don't trust AI-generated flows and abandon the feature after trying it once | Side-by-side source view builds trust. Confidence scoring is transparent. Start with high-quality conversion on well-structured articles to build initial trust. |
| **Scope creep** | Medium | Feature grows to include direct PSA integration, real-time sync, and other complex functionality before core is proven | Phased roadmap with clear scope boundaries. Phase 4 items are explicitly deferred until post-launch data validates demand. |
---
## 14. Success Metrics
These KPIs determine whether KB Accelerator is delivering value to users and justifying its development investment.
| Metric | Target | Why It Matters |
|---|---|---|
| **Conversion completion rate** | > 70% | Percentage of started imports that reach "committed" status. Below 70% suggests the review step is too burdensome or quality is too low. |
| **Average confidence score** | > 0.75 | Across all generated nodes. Indicates the AI pipeline is producing reliably accurate output. |
| **Time from upload to commit** | < 10 minutes | The full cycle should feel fast. If users are spending 30+ minutes editing, the AI isn't saving enough time. |
| **Free-to-Pro conversion rate** | > 15% | Users who use their 3 free conversions and then upgrade. This validates that experiencing the feature drives subscription revenue. |
| **Repeat usage (Pro/Team)** | > 3 imports/month | Users who import once and never again didn't find sustained value. Repeat usage indicates the feature is part of their workflow. |
| **Node edit rate** | < 30% | Percentage of generated nodes that users edit before committing. Lower is better — means AI output is usable as-is. |
| **Imported flow usage rate** | > 50% | Percentage of committed flows that get used in actual troubleshooting sessions within 30 days. Unused flows mean the conversion produced shelfware. |
---
*End of Document*
ResolutionFlow LLC — March 2026

View File

@@ -0,0 +1,628 @@
# KB Accelerator — Merged Implementation Plan
## Document Context
| Field | Value |
|-------|-------|
| **Document** | KB Accelerator — Merged Implementation Plan |
| **Version** | 1.0 |
| **Date** | March 2026 |
| **Status** | Approved for Implementation |
| **Source Plans** | Claude Code design review + Codex implementation plan |
| **Design Doc** | `docs/plans/KB-Accelerator-Design-Document.md` |
This plan merges the best elements of two independent implementation plans produced by Claude Code and Codex against the KB Accelerator design document. Where the plans conflicted, explicit decisions were made and are documented below.
---
## 1. Summary of Decisions
### Agreed by Both Plans (Carry Forward As-Is)
- Dedicated KB Accelerator frontend experience — own route (`/kb-accelerator`), own sidebar nav item, own screens
- `account_id` tenancy everywhere — all design doc references to "organization" map to existing `account_id`
- Text + paste + DOCX in Phase 1; PDF, HTML, Markdown in Phase 2
- Both flow types (troubleshooting + procedural) supported from Phase 1
- Single-phase AI conversion by default; optional detailed analysis for Pro/Team
- 3 lifetime conversions for free tier, enforced per account (not per user)
- Hard server-side tier enforcement via PlanLimits columns
- Store extracted text + metadata only — raw uploaded files are not persisted
- File validation + pluggable scan hook interface (no-op default, AV integration ready)
- Per-node review actions: approve, edit, delete, regenerate, insert, plus bulk approve
- Side-by-side two-panel review UI with confidence indicators (green/amber/red left accent borders)
- `import_metadata` JSONB on trees table for provenance — no new FK column on trees
- HTTP polling for progress tracking (no SSE, no WebSockets)
- Multipart `files[]` + shared options for batch upload request shape (Phase 3)
- Auto-advance pipeline: upload → extraction → AI conversion → land on review screen (no manual stage gates)
- Auto-commit as draft for batch imports (Phase 3)
- Feature-flagged analysis preview screen (Pro/Team only)
- Basic shared visibility for Team tier (view/read, not collaborative editing)
- Sidebar nav item + "Import KB Article" CTA in flow library header
### Conflict Resolutions
| Decision | Chosen Approach | Rationale |
|---|---|---|
| **AI Infrastructure** | **Codex: Dedicated KB module** consuming shared AI service layer (model routing, token tracking, quota). NOT coupled to `AIChatSession`. | A KB import is a document conversion, not a chat session. Coupling to `AIChatSession` muddies analytics, session history, and data model semantics. Using shared AI *services* without coupling to the AI *data model* is the right separation. |
| **Per-node staging** | **Codex: Dedicated `kb_import_nodes` table** with proper columns for confidence, source excerpt, approval status. | Queryable (e.g., "all nodes below 0.7 confidence across imports"), normalized, clean PATCH semantics. Avoids the `_kb_meta` JSONB prefix hack which is fragile and risks junk data in production trees if stripping is missed. |
| **Batch import** | **Claude Code: Defer to Phase 3.** | Core single-article conversion must be validated first. Batch adds queue management, partial failure handling, and batch status UI — significant complexity for a feature nobody has requested yet. |
| **Conversational refinement** | **Claude Code's idea, Codex's architecture. Defer to Phase 2.** Built as a scoped chat panel in the review screen, NOT coupled to `AIChatSession`. | High-value feature, but Phase 1 must nail the core loop (upload → convert → review → commit). Refinement panel in Phase 2 uses a dedicated KB chat endpoint scoped to the import context. |
| **Step Library matching** | **Defer to Phase 2.** | Same reasoning — nail the core loop first, then layer on matching. |
| **Status values** | **Claude Code: Simplified to 4**`processing`, `ready`, `committed`, `failed`. | With single-phase AI and auto-advance, granular statuses (uploaded, extracting, analyzing, generating, reviewed) add complexity without user value. |
---
## 2. Architecture Overview
### Backend: Dedicated KB Module + Shared AI Services
KB Accelerator is a self-contained backend module with its own tables, endpoints, services, and business logic. It does NOT create or depend on `AIChatSession` records.
When AI processing is needed, the KB module calls the existing shared AI service layer:
- **Model routing** via `get_model_for_action()` — add `kb_convert` and `kb_analyze` to `ACTION_MODEL_MAP`
- **Token tracking** via existing token counting utilities
- **Quota enforcement** via `ai_quota_service` (`check_ai_quota`, `record_ai_usage`)
- **Cost tracking** via existing cost recording patterns
- **Anthropic API calls** via existing `AsyncAnthropic` client patterns
The KB module owns its own prompt engineering, extraction logic, pipeline orchestration, and data persistence.
### Frontend: Dedicated KB Accelerator Experience
The frontend is a standalone multi-step wizard UI under `/kb-accelerator`. Users never see "AI Chat" branding or feel like they've left KB Accelerator. The conversational refinement panel (Phase 2) is visually integrated into the KB review screen — it reuses `EditorAIPanel` component internals but is branded and scoped to the KB context.
### Processing Pipeline
```
User uploads file/paste
┌─────────────────┐
│ 1. UPLOAD │ Validate format, size, tier permissions
│ & EXTRACT │ Extract text + structural metadata
└────────┬────────┘
┌─────────────────┐
│ 2. CONVERT │ Single AI call → tree structure + confidence scores
│ (AI) │ OR two-phase (Pro/Team optional): analyze → generate
└────────┬────────┘
┌─────────────────┐
│ 3. REVIEW │ Side-by-side UI, per-node actions, edit/approve/delete
│ (User) │ + Conversational refinement panel (Phase 2)
└────────┬────────┘
┌─────────────────┐
│ 4. COMMIT │ Create Tree record, set import_metadata, strip staging data
│ │ Step Library match suggestions (Phase 2)
└─────────────────┘
```
---
## 3. Data Model
### New Table: `kb_imports` (Migration 054)
| Column | Type | Nullable | Description |
|---|---|---|---|
| `id` | UUID PK | No | Primary key (`gen_random_uuid()`) |
| `account_id` | UUID FK → accounts | No | Tenancy scoping |
| `created_by` | UUID FK → users | No | Who initiated the import |
| `source_filename` | VARCHAR(500) | Yes | Original filename (null for paste) |
| `source_format` | VARCHAR(20) | No | Enum: `txt`, `paste`, `docx` (Phase 1); `pdf`, `html`, `md` (Phase 2) |
| `source_text` | TEXT | No | Extracted plain text content |
| `source_metadata` | JSONB | Yes | Structural metadata from extraction (headings, lists, emphasis) |
| `target_type` | VARCHAR(20) | No | Enum: `troubleshooting`, `procedural` |
| `status` | VARCHAR(20) | No | Enum: `processing`, `ready`, `committed`, `failed` |
| `confidence_avg` | FLOAT | Yes | Average confidence across all generated nodes |
| `error_message` | TEXT | Yes | Error details if status = `failed` |
| `processing_time_ms` | INTEGER | Yes | Total processing time in milliseconds |
| `ai_tokens_input` | INTEGER | Yes | Total input tokens used for AI processing |
| `ai_tokens_output` | INTEGER | Yes | Total output tokens used for AI processing |
| `tree_id` | UUID FK → trees | Yes | Set after user commits (null until then) |
| `batch_id` | UUID | Yes | Groups batch imports together (Phase 3) |
| `created_at` | TIMESTAMPTZ | No | Auto-set on creation |
| `updated_at` | TIMESTAMPTZ | No | Auto-updated on modification |
**Indexes:** `account_id`, `status`, `batch_id`, `created_by`, `created_at DESC`.
### New Table: `kb_import_nodes` (Migration 054)
Stores individual generated nodes/steps during the review phase. Each row represents one node in the AI-generated flow before the user commits it to an actual tree.
| Column | Type | Nullable | Description |
|---|---|---|---|
| `id` | UUID PK | No | Primary key |
| `kb_import_id` | UUID FK → kb_imports | No | Parent import |
| `node_order` | INTEGER | No | Position in the generated flow (0-indexed) |
| `node_type` | VARCHAR(20) | No | Enum: `question`, `resolution`, `step`, `section_header`, `warning` |
| `content` | JSONB | No | Node content (question text, step text, options array, etc.) |
| `parent_node_id` | UUID FK → kb_import_nodes | Yes | Parent node (for tree structure) |
| `source_excerpt` | TEXT | Yes | Exact text from source document this node was derived from |
| `confidence_score` | FLOAT | No | AI confidence in this node's accuracy (0.01.0) |
| `user_edited` | BOOLEAN | No | Default `false`. Set `true` when user modifies content |
| `user_approved` | BOOLEAN | No | Default `false`. Set `true` when user explicitly approves |
| `created_at` | TIMESTAMPTZ | No | Auto-set on creation |
| `updated_at` | TIMESTAMPTZ | No | Auto-updated on modification |
**Indexes:** `kb_import_id`, `confidence_score`.
### Tree `import_metadata` JSONB Schema (Set on Commit)
When a user commits a KB Accelerator flow, the resulting tree's `import_metadata` column is populated:
```json
{
"source": "kb_accelerator",
"kb_import_id": "uuid-here",
"source_filename": "Exchange-Troubleshooting.docx",
"source_format": "docx",
"confidence_avg": 0.85,
"node_count": 12,
"converted_at": "2026-03-10T14:30:00Z"
}
```
### PlanLimits Extensions
Add the following columns to the existing `plan_limits` table (and corresponding `account_limit_overrides`, admin schemas, subscription schemas, and frontend types):
| Column | Type | Description |
|---|---|---|
| `kb_accelerator_enabled` | BOOLEAN | Whether KB Accelerator is available on this plan |
| `kb_max_lifetime_conversions` | INTEGER, nullable | Lifetime cap (null = unlimited). Free = 3. |
| `kb_batch_max_size` | INTEGER, nullable | Max files per batch upload (null = disabled). Phase 3. |
| `kb_allowed_formats` | JSONB | Array of allowed format strings. Free = `["txt", "paste"]`. Pro/Team = all. |
| `kb_detailed_analysis` | BOOLEAN | Whether optional two-phase analysis is available |
| `kb_conversational_refinement` | BOOLEAN | Whether AI refinement panel is available (Phase 2) |
| `kb_step_library_matching` | BOOLEAN | Whether Step Library matching is available (Phase 2) |
| `kb_history_limit` | INTEGER, nullable | Max visible import history entries (null = unlimited). Free = 3. |
**Seed defaults:**
| Plan | enabled | lifetime_cap | batch_max | formats | detailed_analysis | refinement | step_matching | history_limit |
|---|---|---|---|---|---|---|---|---|
| **Free** | true | 3 | null | `["txt", "paste"]` | false | false | false | 3 |
| **Pro** | true | null | 5 | `["txt", "paste", "docx", "pdf", "html", "md"]` | true | true | true | null |
| **Team** | true | null | 10 | `["txt", "paste", "docx", "pdf", "html", "md"]` | true | true | true | null |
---
## 4. API Design
All endpoints under `/api/v1/kb-accelerator`. All require authentication. All records scoped to `account_id`. Role enforcement: `require_engineer_or_admin`.
### Endpoints
| Method | Endpoint | Description | Phase |
|---|---|---|---|
| `POST` | `/upload` | Upload file or paste text. Creates `kb_import`, starts extraction, triggers auto-convert. Returns `kb_import_id`. | 1 |
| `GET` | `/{id}` | Get import status, source text preview, generated nodes, confidence stats. | 1 |
| `GET` | `/` | List imports for current account. Pagination + status filter. Respects `kb_history_limit`. | 1 |
| `POST` | `/{id}/convert` | Manually trigger or re-trigger AI conversion. For retry/regeneration scenarios. | 1 |
| `PATCH` | `/{id}/nodes/{node_id}` | Edit a specific node. Operations: `approve`, `reject`, `edit`, `delete`, `regenerate`, `insert_after`. | 1 |
| `POST` | `/{id}/commit` | Finalize: create Tree record from reviewed nodes, populate `import_metadata`, update status to `committed`. | 1 |
| `DELETE` | `/{id}` | Cancel and clean up an in-progress or abandoned import. | 1 |
| `GET` | `/quota` | Return current plan KB entitlements, usage counts, and UI flags (detailed_analysis, refinement, etc.). | 1 |
| `POST` | `/{id}/analyze` | (Pro/Team) Trigger detailed two-phase analysis before generation. | 2 |
| `POST` | `/{id}/refine` | Send a refinement message scoped to this import's context. Returns updated nodes. | 2 |
| `POST` | `/batch` | Submit multiple files. Returns `batch_id` + array of `kb_import_id`s. | 3 |
| `GET` | `/batch/{batch_id}` | Get grouped batch status and per-import outcomes. | 3 |
| `GET` | `/metrics` | KPI dashboard data: conversion rate, avg confidence, format usage, etc. | 3 |
### Upload Endpoint Detail
**`POST /api/v1/kb-accelerator/upload`**
Accepts `multipart/form-data` (file upload) or `application/json` (text paste).
**Request — File Upload:**
- `file`: UploadFile (required) — the KB article file
- `target_type`: string (optional) — `"troubleshooting"` or `"procedural"`. If omitted, AI decides.
**Request — Text Paste:**
- `content`: string (required) — raw text content
- `title`: string (optional) — suggested title
- `target_type`: string (optional)
**Validation:**
- Max file size: 10MB
- Format whitelist: `.txt`, `.docx` (Phase 1); `.pdf`, `.html`, `.md` (Phase 2)
- MIME type verification (content matches extension)
- Tier format check against `kb_allowed_formats`
- Lifetime conversion count check against `kb_max_lifetime_conversions`
**Response (201 Created):**
```json
{
"id": "uuid",
"status": "processing",
"source_format": "docx"
}
```
**Pipeline behavior:** After successful upload and extraction, the auto-convert pipeline triggers immediately. Frontend polls `GET /{id}` until status changes from `processing` to `ready` (or `failed`).
### Node Edit Endpoint Detail
**`PATCH /api/v1/kb-accelerator/{id}/nodes/{node_id}`**
Supports a union of operations:
- **`approve`**: Sets `user_approved = true`
- **`reject`**: Sets `user_approved = false`
- **`edit`**: Updates `content` JSONB, sets `user_edited = true`
- **`delete`**: Removes the node, reorders remaining nodes
- **`regenerate`**: Re-runs AI generation for this single node with optional user guidance text. Uses shared AI service.
- **`insert_after`**: Creates a new node after this one, shifts `node_order` for subsequent nodes
### Commit Endpoint Detail
**`POST /api/v1/kb-accelerator/{id}/commit`**
1. Validate all nodes are reviewed (or allow commit with unreviewed nodes — user's choice)
2. Build `tree_structure` JSONB from `kb_import_nodes` rows
3. Create Tree record with appropriate `tree_type` (`troubleshooting` or `procedural`)
4. For procedural flows: include generated `intake_form` schema from detected variables
5. Set `import_metadata` JSONB with provenance data
6. Update `kb_import.status` to `committed`, set `kb_import.tree_id`
7. Run best-effort RAG indexing on the new tree
8. Record audit event
**Batch behavior (Phase 3):** Successful batch items auto-commit as draft trees. Failed items retain `failed` status with error details.
---
## 5. AI Pipeline
### Single-Phase Conversion (Default)
One AI call that takes extracted text and returns a complete tree structure.
**System Prompt establishes:**
- AI role as MSP documentation specialist
- Target flow type (troubleshooting or procedural)
- ResolutionFlow tree schema with examples (reuse patterns from `ai_chat_service.py`)
- Confidence scoring instructions (0.01.0 per node with criteria)
- Source excerpt attribution requirement (every node must cite its source text)
- Variable detection instructions for procedural flows (`[VAR:name]` tokens)
**User message contains:**
- Extracted text with structural metadata (headings, lists, emphasis markers)
- Source filename and format for context
**Expected response:** Strict JSON matching the structure needed to populate `kb_import_nodes` rows, including `node_type`, `content`, `confidence_score`, `source_excerpt`, and parent-child relationships.
**Model routing:** Add `kb_convert` to `ACTION_MODEL_MAP` → maps to Sonnet (standard tier).
**Token tracking:** Record `ai_tokens_input` and `ai_tokens_output` on the `kb_import` record. Also call `record_ai_usage` for quota/cost tracking through the shared service.
### Two-Phase Analysis + Generation (Optional, Pro/Team)
**Phase 1 — Analysis:** AI returns structured JSON of detected elements (document type, problem statement, prerequisites, sequential steps, decision points, variables, warnings, resolutions, verification steps). Stored in `kb_import.source_metadata` or a dedicated analysis column.
**Phase 2 — Generation:** Takes Phase 1 analysis + original text → generates tree structure (same output as single-phase).
**Model routing:** Add `kb_analyze` to `ACTION_MODEL_MAP`.
### Confidence Scoring
| Score Range | Label | UI Indicator |
|---|---|---|
| 0.9 1.0 | High Confidence | Green left accent border |
| 0.7 0.89 | Medium Confidence | Amber left accent border |
| 0.5 0.69 | Low Confidence | Red left accent border |
| < 0.5 | Needs Review | Red left accent border + flag icon |
### Procedural Flow: Variable Detection
For procedural target type, the AI identifies instance-specific values and maps them to `[VAR:name]` tokens:
| Pattern | Variable Name | Form Field Type |
|---|---|---|
| IP addresses | `[VAR:ip_address]` | ip_address |
| Server/computer names | `[VAR:server_name]` | text |
| Domain names | `[VAR:domain_name]` | text |
| Usernames/email | `[VAR:username]` | text |
| License types | `[VAR:license_type]` | select |
| OU paths | `[VAR:ou_path]` | text |
| Port numbers | `[VAR:port]` | number |
| Subnet masks | `[VAR:subnet_mask]` | ip_address |
An `intake_form` JSONB schema is auto-generated from detected variables and stored on the committed tree.
---
## 6. Frontend Design
### Route: `/kb-accelerator`
Multi-step wizard with 3-4 screens, all within the existing app shell (sidebar + topbar). Uses the current design system: dark theme, cyan brand color, glass morphism, IBM Plex Sans / Bricolage Grotesque / JetBrains Mono fonts.
### Screen 1: Upload
- Drag-and-drop zone for files with format badges (DOCX, TXT in Phase 1)
- Tab switch to "Paste Text" with full-width textarea + title field
- Target type selector: two visual cards (Troubleshooting Flow / Procedure Flow) + "Let AI decide" option
- Primary action: "Convert" button (`bg-gradient-brand`)
- Pro/Team users see additional "Detailed Analysis" button alongside "Convert"
- Container: `.glass-card-static`
- Tier gating: free users see format restrictions and remaining conversion count
### Screen 2: Analysis Preview (Phase 2, Pro/Team Only, Feature-Flagged)
- Shows detected elements as color-coded cards: steps (blue), decision points (amber), warnings (red), variables (green), resolutions (emerald)
- Source text excerpts linked to each detection
- "Proceed to Generation" and "Re-analyze" action buttons
- Only accessible when user clicks "Detailed Analysis" on the upload screen
### Screen 3: Review (Core Experience)
**Two-Panel Side-by-Side Layout:**
- **Left panel:** Original document text with detected elements highlighted inline (color-matched to generated nodes)
- **Right panel:** Generated flow preview — tree visualization for troubleshooting, step list for procedures
- Clicking a node in the right panel highlights its source excerpt in the left panel, and vice versa
- Each node shows confidence score via left accent border pattern (green/amber/red)
**Per-Node Actions:**
- **Approve** (checkmark): Sets `user_approved = true`
- **Edit** (pencil): Opens inline editing for content, question text, options
- **Regenerate** (refresh): Re-runs AI for just this node with optional guidance
- **Delete** (trash): Removes node from generated flow
- **Add Node** (plus): Insert a manual node after this one
**Bulk Actions:**
- "Approve All High Confidence" — one-click approval for all nodes scoring ≥ 0.9
- "Commit to Library" — finalizes the flow
**AI Refinement Panel (Phase 2):** Slide-in panel on the review screen for conversational refinement. User types natural language instructions ("Add a warning about DNS propagation after step 4", "Split this decision point"). Scoped to the KB import context — NOT the general FlowPilot chat. Reuses `EditorAIPanel` component internals with KB-specific branding.
**Step Library Suggestions (Phase 2):** For procedural flows, matched steps show a "Link to Library" badge. Clicking shows the library step content and lets the user swap the generated step for the library step.
### Screen 4: Success
- Confirmation with link to the new flow in the editor
- "Convert Another" button
- Stats: average confidence score, node count, processing time
### Navigation
- **Sidebar:** "KB Accelerator" nav item with sparkle/lightning icon
- **Flow library header:** "Import KB Article" button next to "Create New Flow"
---
## 7. Tier Gating
| Capability | Free | Pro ($19/mo) | Team ($15/user/mo) |
|---|---|---|---|
| **Conversions** | 3 lifetime (account-wide) | Unlimited | Unlimited |
| **Formats** | TXT + paste only | All formats | All formats |
| **Target type selection** | AI decides only | Manual + AI | Manual + AI |
| **Detailed analysis** | No | Yes | Yes |
| **Conversational refinement** | No | Yes (Phase 2) | Yes (Phase 2) |
| **Step Library matching** | No | Yes (Phase 2) | Yes (Phase 2) |
| **Review actions** | Approve / Edit / Delete | Full (+ regenerate, insert, bulk approve) | Full (+ regenerate, insert, bulk approve) |
| **Import history** | Last 3 only | Full history | Full history + audit log |
| **Batch import** | No | Up to 5 articles (Phase 3) | Up to 10 articles (Phase 3) |
| **Team visibility** | N/A | N/A | Shared read access to imports |
**Enforcement:** Hard server-side checks on every endpoint. Check `subscription.plan``PlanLimits` columns. Free tier lifetime count = `COUNT(*) FROM kb_imports WHERE account_id = ? AND status = 'committed'`.
---
## 8. Build Phases
### Phase 1: Core Pipeline (Target: 23 Weeks)
The goal is a complete, working single-article conversion loop for text, paste, and DOCX inputs producing both troubleshooting and procedural flows.
**Backend:**
- Migration 054: `kb_imports` and `kb_import_nodes` tables
- Migration 055: `PlanLimits` KB Accelerator columns + seed defaults
- Upload endpoint — text, paste, DOCX extraction (python-docx)
- Single-phase AI conversion — prompt engineering, structured JSON parsing, node creation
- Node edit endpoint — approve, reject, edit, delete, regenerate, insert_after
- Commit endpoint — create Tree, set `import_metadata`, strip staging data, RAG indexing
- List/get import endpoints with pagination and status filter
- Quota endpoint — return plan entitlements and usage counts
- Delete/cancel endpoint
- Hard tier gating — format checks, lifetime conversion count, review action restrictions
- Add `kb_convert` to `ACTION_MODEL_MAP`
- Extraction service module (TXT, paste, DOCX) with pluggable architecture for Phase 2 formats
- Upload validation service — extension, MIME, size, pluggable scan hook (no-op default)
**Frontend:**
- Upload screen — drag-drop zone, paste tab, target type cards, "Let AI decide"
- Review screen — two-panel layout, confidence indicators, per-node actions, source highlighting
- Success screen — confirmation, stats, "Convert Another"
- Sidebar nav item + flow library CTA button
- KB Accelerator API client module (`kbAccelerator.ts`)
- TypeScript types (`kbAccelerator.ts`)
- HTTP polling for processing status
- Tier gating UI — format restrictions shown, remaining conversions shown, upgrade prompts for locked features
**Both flow types** (troubleshooting + procedural) supported from Phase 1 start.
### Phase 2: Rich Formats & Refinement (Target: 23 Weeks)
Layer on additional formats, the power-user analysis preview, conversational refinement, and Step Library matching.
**Backend:**
- PDF extraction via PyMuPDF with extraction preview/correction endpoint (user verifies extracted text before AI processing)
- HTML extraction via BeautifulSoup
- Markdown extraction via markdown-it-py
- Detailed analysis endpoint — two-phase AI (analyze → generate), Pro/Team gated
- Conversational refinement endpoint — scoped chat for the KB import context, uses shared AI service, NOT `AIChatSession`
- Step Library matching service — compare generated procedural steps against user's Step Library (text similarity or pgvector embeddings)
- Add `kb_analyze` and `kb_refine` to `ACTION_MODEL_MAP`
**Frontend:**
- PDF extraction preview screen — shows extracted text, highlights potential issues, user can edit before AI processing
- Analysis preview screen — feature-flagged for Pro/Team, shows detected elements as color-coded cards
- AI refinement slide-in panel on review screen — reuses `EditorAIPanel` internals with KB branding
- Step Library match suggestions — "Link to Library" badges on matched procedural steps
- "Approve All High Confidence" bulk action button
### Phase 3: Scale & Polish (Future)
Batch import, history dashboard, and analytics.
**Backend:**
- Batch upload endpoint — multipart `files[]` + shared options, returns `batch_id` + import IDs
- Batch status endpoint
- FastAPI background jobs for batch processing (DB-based job queue)
- Auto-commit as draft for successful batch items
- Import history dashboard endpoint
- Metrics/analytics endpoint — conversion rate, avg confidence, format usage, time trends
**Frontend:**
- Batch upload UI — multi-file drag-drop with per-file status indicators
- Batch results view — shows auto-committed drafts and failed items
- Import history dashboard with filters and search
- Analytics visualizations (conversion trends, confidence distributions)
---
## 9. Files to Create and Modify
### New Files
| File | Purpose |
|---|---|
| `backend/alembic/versions/054_add_kb_imports.py` | Migration: `kb_imports` + `kb_import_nodes` tables |
| `backend/alembic/versions/055_add_kb_plan_limits.py` | Migration: PlanLimits KB columns + seed defaults |
| `backend/app/models/kb_import.py` | SQLAlchemy models: `KBImport`, `KBImportNode` |
| `backend/app/schemas/kb_accelerator.py` | Pydantic schemas: request/response DTOs |
| `backend/app/api/endpoints/kb_accelerator.py` | API endpoints |
| `backend/app/core/kb_extraction_service.py` | Text extraction (TXT, paste, DOCX; extensible for Phase 2 formats) |
| `backend/app/core/kb_conversion_service.py` | AI prompt orchestration, JSON parsing, node creation |
| `backend/tests/test_kb_accelerator.py` | Integration tests |
| `frontend/src/api/kbAccelerator.ts` | API client module |
| `frontend/src/types/kbAccelerator.ts` | TypeScript types |
| `frontend/src/pages/KBAcceleratorPage.tsx` | Main page (multi-step wizard) |
| `frontend/src/components/kb-accelerator/UploadScreen.tsx` | Upload UI component |
| `frontend/src/components/kb-accelerator/ReviewScreen.tsx` | Two-panel review UI component |
| `frontend/src/components/kb-accelerator/SuccessScreen.tsx` | Post-commit confirmation component |
| `frontend/src/components/kb-accelerator/NodeCard.tsx` | Individual node display with confidence + actions |
| `frontend/src/components/kb-accelerator/SourcePanel.tsx` | Left panel: source text with highlights |
### Modified Files
| File | Change |
|---|---|
| `backend/app/models/__init__.py` | Import `KBImport`, `KBImportNode` |
| `backend/alembic/env.py` | Import KB models for migration detection |
| `backend/app/api/router.py` | Register `kb_accelerator` router |
| `backend/app/core/config.py` | Add `kb_convert` (Phase 1), `kb_analyze`, `kb_refine` (Phase 2) to `ACTION_MODEL_MAP` |
| `backend/app/models/plan_limits.py` | Add KB Accelerator limit columns |
| `frontend/src/router.tsx` | Add `/kb-accelerator` route |
| `frontend/src/components/layout/AppLayout.tsx` or `Sidebar.tsx` | Add KB Accelerator sidebar nav item |
| `frontend/src/types/index.ts` | Export KB Accelerator types |
| `frontend/src/api/index.ts` | Export KB Accelerator API client |
### Existing Files Reused (Not Modified)
| File | What's Reused |
|---|---|
| `backend/app/core/ai_chat_service.py` | Prompt patterns, structured output parsing examples |
| `backend/app/core/ai_quota_service.py` | `check_ai_quota()`, `record_ai_usage()` |
| `backend/app/core/ai_provider_service.py` | `get_model_for_action()`, Anthropic client patterns |
| `frontend/src/components/tree-editor/EditorAIPanel.tsx` | Component internals reused for refinement panel (Phase 2) |
---
## 10. Test Plan
### Backend Integration Tests
**Upload & Extraction:**
- Upload text/paste → verify `kb_import` created with status `processing`
- Upload DOCX → verify extraction produces `source_text` and `source_metadata`
- Upload unsupported format → verify 400 rejection
- Upload exceeding 10MB → verify 413 rejection
- Upload DOCX on free tier → verify 403 (format not in plan)
- Upload when lifetime limit reached → verify 403 with upgrade message
**AI Conversion:**
- Convert troubleshooting article → verify `kb_import_nodes` created with correct types, confidence scores, source excerpts
- Convert procedural article → verify step nodes created with `[VAR:name]` tokens and `intake_form` data
- Convert with AI failure → verify status set to `failed` with error message
- Verify token counts recorded on `kb_import`
- Verify `record_ai_usage` called through shared service
**Node Review Actions:**
- Approve node → verify `user_approved = true`
- Edit node → verify `content` updated, `user_edited = true`
- Delete node → verify removed, `node_order` resequenced
- Regenerate node → verify AI called, node content replaced, new confidence score
- Insert after → verify new node created with correct `node_order`, subsequent nodes shifted
**Commit:**
- Commit troubleshooting import → verify Tree created with correct `tree_type`, `tree_structure`, `import_metadata`
- Commit procedural import → verify Tree created with `intake_form` populated
- Verify `kb_import.status` = `committed`, `tree_id` set
- Verify committed tree appears in flow library
- Verify RAG indexing triggered (best-effort)
**Tier Enforcement:**
- Free tier: 4th conversion rejected (account-scoped lifetime count)
- Free tier: DOCX upload rejected, paste accepted
- Pro tier: unlimited conversions, all formats accepted
- Team tier: other account members can view import (shared visibility)
### Frontend Tests
- Upload flow: file drag-drop, paste, target type selection, validation messages
- Polling: status transitions from `processing` to `ready`
- Review screen: node display, confidence colors, source highlighting, click-to-highlight linking
- Node actions: inline edit, approve, delete — optimistic UI updates
- Commit flow: success screen, link to editor works
- Tier gating: free tier sees upgrade prompts, format restrictions shown, conversion count displayed
### E2E Smoke Test
1. Paste a sample KB article text
2. Select "Troubleshooting Flow"
3. Click "Convert"
4. Wait for processing → land on review screen
5. Verify nodes displayed with confidence indicators
6. Edit one low-confidence node
7. Approve all high-confidence nodes
8. Click "Commit to Library"
9. Verify flow appears in library
10. Open flow in tree editor — verify structure is correct
---
## 11. Success Metrics (Post-Launch)
| Metric | Target | Why It Matters |
|---|---|---|
| **Conversion completion rate** | > 70% | Imports reaching `committed` status. Below 70% = review too burdensome or quality too low. |
| **Average confidence score** | > 0.75 | Across all generated nodes. Indicates AI pipeline accuracy. |
| **Time from upload to commit** | < 10 minutes | Full cycle should feel fast. 30+ minutes = AI not saving enough time. |
| **Free-to-Pro conversion rate** | > 15% | Users who exhaust 3 free conversions then upgrade. Validates feature drives revenue. |
| **Repeat usage (Pro/Team)** | > 3 imports/month | Sustained usage indicates feature is part of workflow, not a one-time novelty. |
| **Node edit rate** | < 30% | Percentage of nodes edited before commit. Lower = AI output more usable as-is. |
| **Imported flow usage rate** | > 50% | Committed flows used in sessions within 30 days. Low = conversion producing shelfware. |
---
*End of Plan*
*ResolutionFlow LLC — March 2026*

View File

@@ -20,3 +20,4 @@ export { default as aiBuilderApi } from './aiBuilder'
export { copilotApi } from './copilot'
export { assistantChatApi } from './assistantChat'
export { flowTransferApi } from './flowTransfer'
export { kbAcceleratorApi } from './kbAccelerator'

View File

@@ -0,0 +1,76 @@
import apiClient from './client'
import type {
KBUploadResponse,
KBImport,
KBImportListResponse,
KBImportNode,
KBCommitResponse,
KBQuotaResponse,
KBListParams,
KBNodeEditRequest,
KBCommitRequest,
} from '@/types/kbAccelerator'
export const kbAcceleratorApi = {
async uploadText(data: { content: string; title?: string; target_type?: string }): Promise<KBUploadResponse> {
const formData = new FormData()
formData.append('content', data.content)
if (data.title) formData.append('title', data.title)
if (data.target_type) formData.append('target_type', data.target_type)
const response = await apiClient.post<KBUploadResponse>('/kb-accelerator/upload', formData, {
headers: { 'Content-Type': 'multipart/form-data' },
})
return response.data
},
async uploadFile(file: File, targetType?: string): Promise<KBUploadResponse> {
const formData = new FormData()
formData.append('file', file)
if (targetType) formData.append('target_type', targetType)
const response = await apiClient.post<KBUploadResponse>('/kb-accelerator/upload', formData, {
headers: { 'Content-Type': 'multipart/form-data' },
})
return response.data
},
async get(id: string): Promise<KBImport> {
const response = await apiClient.get<KBImport>(`/kb-accelerator/${id}`)
return response.data
},
async list(params?: KBListParams): Promise<KBImportListResponse> {
const response = await apiClient.get<KBImportListResponse>('/kb-accelerator', { params })
return response.data
},
async convert(id: string): Promise<{ status: string }> {
const response = await apiClient.post<{ status: string }>(`/kb-accelerator/${id}/convert`)
return response.data
},
async editNode(importId: string, nodeId: string, data: KBNodeEditRequest): Promise<KBImportNode> {
const response = await apiClient.patch<KBImportNode>(
`/kb-accelerator/${importId}/nodes/${nodeId}`,
data,
)
return response.data
},
async commit(id: string, data?: KBCommitRequest): Promise<KBCommitResponse> {
const response = await apiClient.post<KBCommitResponse>(`/kb-accelerator/${id}/commit`, data)
return response.data
},
async delete(id: string): Promise<void> {
await apiClient.delete(`/kb-accelerator/${id}`)
},
async getQuota(): Promise<KBQuotaResponse> {
const response = await apiClient.get<KBQuotaResponse>('/kb-accelerator/quota')
return response.data
},
}
export default kbAcceleratorApi

View File

@@ -0,0 +1,201 @@
import { useState } from 'react'
import { Check, X, Pencil, Trash2, RotateCcw, Plus, ChevronDown, ChevronUp } from 'lucide-react'
import { cn } from '@/lib/utils'
import type { KBImportNode, KBNodeEditRequest } from '@/types/kbAccelerator'
interface NodeCardProps {
node: KBImportNode
onEdit: (nodeId: string, data: KBNodeEditRequest) => Promise<void>
onHighlight: (excerpt: string | null) => void
}
function confidenceColor(score: number): string {
if (score >= 0.85) return 'border-emerald-400/40'
if (score >= 0.65) return 'border-amber-400/40'
return 'border-rose-500/40'
}
function confidenceLabel(score: number): string {
if (score >= 0.85) return 'High'
if (score >= 0.65) return 'Medium'
return 'Low'
}
function confidenceTextColor(score: number): string {
if (score >= 0.85) return 'text-emerald-400'
if (score >= 0.65) return 'text-amber-400'
return 'text-rose-500'
}
export function NodeCard({ node, onEdit, onHighlight }: NodeCardProps) {
const [expanded, setExpanded] = useState(false)
const [editMode, setEditMode] = useState(false)
const [editContent, setEditContent] = useState('')
const [busy, setBusy] = useState(false)
const question = (node.content?.question as string) ?? ''
const options = (node.content?.options as Array<{ label: string; next_node_id?: string }>) ?? []
const stepContent = (node.content?.content as string) ?? question
const handleAction = async (operation: KBNodeEditRequest['operation'], extra?: Partial<KBNodeEditRequest>) => {
setBusy(true)
try {
await onEdit(node.id, { operation, ...extra })
if (operation === 'edit') setEditMode(false)
} finally {
setBusy(false)
}
}
const startEdit = () => {
setEditContent(stepContent || question)
setEditMode(true)
}
return (
<div
className={cn(
'glass-card-static border-l-4 p-4 transition-all',
confidenceColor(node.confidence_score),
node.user_approved && 'opacity-75',
)}
onMouseEnter={() => onHighlight(node.source_excerpt)}
onMouseLeave={() => onHighlight(null)}
>
{/* Header */}
<div className="flex items-start justify-between gap-3">
<div className="flex-1 min-w-0">
<div className="flex items-center gap-2 mb-1">
<span className="font-label text-[0.625rem] uppercase tracking-[0.1em] text-muted-foreground">
{node.node_type}
</span>
<span className={cn('font-label text-[0.625rem]', confidenceTextColor(node.confidence_score))}>
{confidenceLabel(node.confidence_score)} ({Math.round(node.confidence_score * 100)}%)
</span>
{node.user_approved && (
<span className="font-label text-[0.625rem] text-emerald-400">Approved</span>
)}
{node.user_edited && (
<span className="font-label text-[0.625rem] text-blue-400">Edited</span>
)}
</div>
{editMode ? (
<div className="space-y-2">
<textarea
value={editContent}
onChange={e => setEditContent(e.target.value)}
className="w-full rounded-md border border-border bg-card px-3 py-2 text-sm text-foreground focus:border-primary/30 focus:outline-hidden"
rows={3}
/>
<div className="flex gap-2">
<button
onClick={() => handleAction('edit', { content: { ...node.content, question: editContent, content: editContent } })}
disabled={busy}
className="px-3 py-1.5 text-xs font-medium rounded-md bg-gradient-brand text-[#101114] hover:opacity-90"
>
Save
</button>
<button
onClick={() => setEditMode(false)}
className="px-3 py-1.5 text-xs font-medium rounded-md bg-[rgba(255,255,255,0.04)] border border-[rgba(255,255,255,0.06)] text-foreground"
>
Cancel
</button>
</div>
</div>
) : (
<p className="text-sm text-foreground">{stepContent || question}</p>
)}
</div>
{/* Actions */}
{!editMode && (
<div className="flex items-center gap-1 shrink-0">
{!node.user_approved && (
<button
onClick={() => handleAction('approve')}
disabled={busy}
className="p-1.5 rounded-md text-muted-foreground hover:text-emerald-400 hover:bg-emerald-400/10 transition-colors"
title="Approve"
>
<Check size={14} />
</button>
)}
{node.user_approved && (
<button
onClick={() => handleAction('reject')}
disabled={busy}
className="p-1.5 rounded-md text-muted-foreground hover:text-amber-400 hover:bg-amber-400/10 transition-colors"
title="Unapprove"
>
<X size={14} />
</button>
)}
<button
onClick={startEdit}
disabled={busy}
className="p-1.5 rounded-md text-muted-foreground hover:text-blue-400 hover:bg-blue-400/10 transition-colors"
title="Edit"
>
<Pencil size={14} />
</button>
<button
onClick={() => handleAction('regenerate')}
disabled={busy}
className="p-1.5 rounded-md text-muted-foreground hover:text-primary hover:bg-primary/10 transition-colors"
title="Regenerate"
>
<RotateCcw size={14} />
</button>
<button
onClick={() => handleAction('insert_after', { content: { question: 'New node', type: node.node_type } })}
disabled={busy}
className="p-1.5 rounded-md text-muted-foreground hover:text-primary hover:bg-primary/10 transition-colors"
title="Insert after"
>
<Plus size={14} />
</button>
<button
onClick={() => handleAction('delete')}
disabled={busy}
className="p-1.5 rounded-md text-muted-foreground hover:text-rose-500 hover:bg-rose-500/10 transition-colors"
title="Delete"
>
<Trash2 size={14} />
</button>
</div>
)}
</div>
{/* Options (troubleshooting) */}
{options.length > 0 && (
<div className="mt-3">
<button
onClick={() => setExpanded(!expanded)}
className="flex items-center gap-1 text-xs text-muted-foreground hover:text-foreground"
>
{expanded ? <ChevronUp size={12} /> : <ChevronDown size={12} />}
{options.length} option{options.length !== 1 ? 's' : ''}
</button>
{expanded && (
<div className="mt-2 space-y-1 pl-3 border-l border-border">
{options.map((opt, i) => (
<p key={i} className="text-xs text-muted-foreground">
{opt.label} {opt.next_node_id && <span className="text-[#5a6170]"> {opt.next_node_id}</span>}
</p>
))}
</div>
)}
</div>
)}
{/* Source excerpt */}
{node.source_excerpt && (
<p className="mt-2 text-xs text-[#5a6170] italic truncate" title={node.source_excerpt}>
Source: {node.source_excerpt}
</p>
)}
</div>
)
}

View File

@@ -0,0 +1,130 @@
import { useState } from 'react'
import { CheckCircle2, AlertTriangle, BarChart3 } from 'lucide-react'
import { cn } from '@/lib/utils'
import { NodeCard } from './NodeCard'
import { SourcePanel } from './SourcePanel'
import type { KBImport, KBNodeEditRequest, KBCommitRequest } from '@/types/kbAccelerator'
interface ReviewScreenProps {
kbImport: KBImport
onEditNode: (nodeId: string, data: KBNodeEditRequest) => Promise<void>
onCommit: (data?: KBCommitRequest) => Promise<void>
onDiscard: () => Promise<void>
loading: boolean
}
export function ReviewScreen({ kbImport, onEditNode, onCommit, onDiscard, loading }: ReviewScreenProps) {
const [highlightExcerpt, setHighlightExcerpt] = useState<string | null>(null)
const nodes = kbImport.nodes
const approvedCount = nodes.filter(n => n.user_approved).length
const lowConfidenceCount = nodes.filter(n => n.confidence_score < 0.65).length
const avgConfidence = kbImport.confidence_avg ?? 0
const title = (kbImport.source_metadata as Record<string, unknown> | null)
?._conversion as Record<string, unknown> | undefined
const flowTitle = (title?.title as string) ?? 'Untitled Flow'
const flowDescription = (title?.description as string) ?? ''
return (
<div className="flex flex-col h-full gap-4">
{/* Header */}
<div className="glass-card-static p-4 flex flex-wrap items-center justify-between gap-3">
<div>
<h2 className="text-lg font-heading font-semibold text-foreground">{flowTitle}</h2>
{flowDescription && (
<p className="text-sm text-muted-foreground mt-0.5">{flowDescription}</p>
)}
</div>
<div className="flex items-center gap-4">
<div className="flex items-center gap-2 text-sm">
<BarChart3 size={14} className="text-muted-foreground" />
<span className="text-muted-foreground">
Avg confidence: <span className="text-foreground font-medium">{Math.round(avgConfidence * 100)}%</span>
</span>
</div>
<div className="flex items-center gap-2 text-sm">
<CheckCircle2 size={14} className="text-emerald-400" />
<span className="text-muted-foreground">
{approvedCount}/{nodes.length} approved
</span>
</div>
{lowConfidenceCount > 0 && (
<div className="flex items-center gap-2 text-sm">
<AlertTriangle size={14} className="text-amber-400" />
<span className="text-amber-400">
{lowConfidenceCount} low confidence
</span>
</div>
)}
</div>
</div>
{/* Two-panel layout */}
<div className="flex-1 grid grid-cols-1 lg:grid-cols-2 gap-4 min-h-0">
{/* Source panel */}
<div className="overflow-hidden">
<SourcePanel
sourceText={kbImport.source_text}
sourceFormat={kbImport.source_format}
highlightExcerpt={highlightExcerpt}
/>
</div>
{/* Nodes panel */}
<div className="flex flex-col glass-card-static overflow-hidden">
<div className="flex items-center gap-2 px-4 py-3 border-b" style={{ borderColor: 'var(--glass-border)' }}>
<span className="text-sm font-medium text-foreground">Generated Flow</span>
<span className="font-label text-[0.625rem] uppercase tracking-[0.1em] text-muted-foreground ml-auto">
{kbImport.target_type === 'troubleshooting' ? 'Troubleshooting' : 'Project'}
</span>
</div>
<div className="flex-1 overflow-y-auto p-4 space-y-3">
{nodes.map(node => (
<NodeCard
key={node.id}
node={node}
onEdit={onEditNode}
onHighlight={setHighlightExcerpt}
/>
))}
{nodes.length === 0 && (
<p className="text-sm text-muted-foreground text-center py-8">
No nodes generated. Try converting again.
</p>
)}
</div>
</div>
</div>
{/* Actions */}
<div className="flex items-center justify-between gap-3">
<button
onClick={onDiscard}
disabled={loading}
className={cn(
'px-4 py-2.5 rounded-[10px] text-sm font-medium transition-colors',
'bg-[rgba(255,255,255,0.04)] border border-[rgba(255,255,255,0.06)] text-muted-foreground',
'hover:text-foreground hover:border-[rgba(255,255,255,0.12)]',
'disabled:opacity-50 disabled:cursor-not-allowed'
)}
>
Discard
</button>
<button
onClick={() => onCommit()}
disabled={loading || nodes.length === 0}
className={cn(
'flex items-center gap-2 px-6 py-2.5 rounded-[10px] text-sm font-semibold transition-all',
'bg-gradient-brand text-[#101114] shadow-lg shadow-primary/20',
'hover:opacity-90 active:scale-[0.97]',
'disabled:opacity-50 disabled:cursor-not-allowed'
)}
>
<CheckCircle2 size={16} />
{loading ? 'Committing...' : 'Commit to Library'}
</button>
</div>
</div>
)
}

View File

@@ -0,0 +1,42 @@
import { useMemo } from 'react'
import { FileText } from 'lucide-react'
interface SourcePanelProps {
sourceText: string
sourceFormat: string
highlightExcerpt: string | null
}
export function SourcePanel({ sourceText, sourceFormat, highlightExcerpt }: SourcePanelProps) {
const renderedText = useMemo(() => {
if (!highlightExcerpt || !sourceText.includes(highlightExcerpt)) {
return <span>{sourceText}</span>
}
const idx = sourceText.indexOf(highlightExcerpt)
return (
<>
<span>{sourceText.slice(0, idx)}</span>
<mark className="bg-primary/20 text-foreground rounded px-0.5">{highlightExcerpt}</mark>
<span>{sourceText.slice(idx + highlightExcerpt.length)}</span>
</>
)
}, [sourceText, highlightExcerpt])
return (
<div className="glass-card-static flex flex-col h-full">
<div className="flex items-center gap-2 px-4 py-3 border-b" style={{ borderColor: 'var(--glass-border)' }}>
<FileText size={16} className="text-muted-foreground" />
<span className="text-sm font-medium text-foreground">Source Document</span>
<span className="font-label text-[0.625rem] uppercase tracking-[0.1em] text-muted-foreground ml-auto">
{sourceFormat}
</span>
</div>
<div className="flex-1 overflow-y-auto p-4">
<pre className="text-sm text-muted-foreground whitespace-pre-wrap font-sans leading-relaxed">
{renderedText}
</pre>
</div>
</div>
)
}

View File

@@ -0,0 +1,56 @@
import { CheckCircle2, ArrowRight, Plus } from 'lucide-react'
import { cn } from '@/lib/utils'
import type { KBCommitResponse } from '@/types/kbAccelerator'
interface SuccessScreenProps {
result: KBCommitResponse
onViewFlow: () => void
onConvertAnother: () => void
}
export function SuccessScreen({ result, onViewFlow, onConvertAnother }: SuccessScreenProps) {
return (
<div className="max-w-lg mx-auto text-center space-y-6 py-12">
<div className="flex justify-center">
<div className="w-16 h-16 rounded-full bg-emerald-400/10 flex items-center justify-center">
<CheckCircle2 size={32} className="text-emerald-400" />
</div>
</div>
<div>
<h2 className="text-xl font-heading font-semibold text-foreground">
Flow Created Successfully
</h2>
<p className="text-sm text-muted-foreground mt-2">
Your KB article has been converted into a {result.tree_type === 'troubleshooting' ? 'troubleshooting' : 'project'} flow
and added to your library.
</p>
</div>
<div className="flex flex-col sm:flex-row items-center justify-center gap-3">
<button
onClick={onViewFlow}
className={cn(
'flex items-center gap-2 px-6 py-2.5 rounded-[10px] text-sm font-semibold transition-all',
'bg-gradient-brand text-[#101114] shadow-lg shadow-primary/20',
'hover:opacity-90 active:scale-[0.97]'
)}
>
View Flow
<ArrowRight size={16} />
</button>
<button
onClick={onConvertAnother}
className={cn(
'flex items-center gap-2 px-6 py-2.5 rounded-[10px] text-sm font-medium transition-colors',
'bg-[rgba(255,255,255,0.04)] border border-[rgba(255,255,255,0.06)] text-foreground',
'hover:border-[rgba(255,255,255,0.12)]'
)}
>
<Plus size={16} />
Convert Another
</button>
</div>
</div>
)
}

View File

@@ -0,0 +1,269 @@
import { useState, useCallback, useRef } from 'react'
import { Upload, FileText, ClipboardPaste, FileUp, Sparkles, AlertCircle } from 'lucide-react'
import { cn } from '@/lib/utils'
import { Textarea } from '@/components/ui/Textarea'
import { Input } from '@/components/ui/Input'
import type { KBQuotaResponse } from '@/types/kbAccelerator'
type TargetType = 'troubleshooting' | 'procedural' | 'auto'
interface UploadScreenProps {
quota: KBQuotaResponse | null
onSubmitText: (content: string, title: string, targetType: TargetType) => void
onSubmitFile: (file: File, targetType: TargetType) => void
loading: boolean
}
const TARGET_TYPES = [
{
value: 'troubleshooting' as const,
label: 'Troubleshooting Flow',
description: 'Decision tree with diagnostic questions and resolutions',
},
{
value: 'procedural' as const,
label: 'Project Flow',
description: 'Step-by-step procedure with warnings and variables',
},
{
value: 'auto' as const,
label: 'Let AI Decide',
description: 'AI will determine the best flow type from your content',
},
]
const FORMAT_LABELS: Record<string, string> = {
txt: 'TXT',
paste: 'Paste',
docx: 'DOCX',
pdf: 'PDF',
html: 'HTML',
md: 'Markdown',
}
export function UploadScreen({ quota, onSubmitText, onSubmitFile, loading }: UploadScreenProps) {
const [mode, setMode] = useState<'paste' | 'file'>('paste')
const [content, setContent] = useState('')
const [title, setTitle] = useState('')
const [targetType, setTargetType] = useState<TargetType>('auto')
const [file, setFile] = useState<File | null>(null)
const [dragOver, setDragOver] = useState(false)
const fileInputRef = useRef<HTMLInputElement>(null)
const canSubmit = mode === 'paste'
? content.trim().length >= 10
: file !== null
const handleSubmit = () => {
if (loading) return
if (mode === 'paste') {
onSubmitText(content, title, targetType)
} else if (file) {
onSubmitFile(file, targetType)
}
}
const handleDrop = useCallback((e: React.DragEvent) => {
e.preventDefault()
setDragOver(false)
const droppedFile = e.dataTransfer.files[0]
if (droppedFile) {
setFile(droppedFile)
setMode('file')
}
}, [])
const handleFileSelect = (e: React.ChangeEvent<HTMLInputElement>) => {
const selected = e.target.files?.[0]
if (selected) {
setFile(selected)
}
}
const allowedFormats = quota?.allowed_formats ?? ['txt', 'paste']
const fileFormats = allowedFormats.filter(f => f !== 'paste')
return (
<div className="max-w-3xl mx-auto space-y-6">
{/* Quota info */}
{quota && (
<div className="glass-card-static p-4 flex items-center justify-between">
<div className="flex items-center gap-3">
<Sparkles size={18} className="text-primary" />
<div>
<p className="text-sm font-medium text-foreground">
{quota.lifetime_conversions_limit
? `${quota.lifetime_conversions_limit - quota.lifetime_conversions_used} conversions remaining`
: 'Unlimited conversions'}
</p>
<p className="text-xs text-muted-foreground">
{quota.plan.charAt(0).toUpperCase() + quota.plan.slice(1)} plan
</p>
</div>
</div>
{!quota.can_convert && (
<div className="flex items-center gap-2 text-amber-400 text-sm">
<AlertCircle size={16} />
<span>Conversion limit reached</span>
</div>
)}
</div>
)}
{/* Mode toggle */}
<div className="flex gap-2">
<button
onClick={() => setMode('paste')}
className={cn(
'flex items-center gap-2 px-4 py-2 rounded-[10px] text-sm font-medium transition-colors',
mode === 'paste'
? 'bg-primary/10 text-foreground border border-primary/30'
: 'bg-[rgba(255,255,255,0.04)] border border-[rgba(255,255,255,0.06)] text-muted-foreground hover:text-foreground hover:border-[rgba(255,255,255,0.12)]'
)}
>
<ClipboardPaste size={16} />
Paste Text
</button>
{fileFormats.length > 0 && (
<button
onClick={() => setMode('file')}
className={cn(
'flex items-center gap-2 px-4 py-2 rounded-[10px] text-sm font-medium transition-colors',
mode === 'file'
? 'bg-primary/10 text-foreground border border-primary/30'
: 'bg-[rgba(255,255,255,0.04)] border border-[rgba(255,255,255,0.06)] text-muted-foreground hover:text-foreground hover:border-[rgba(255,255,255,0.12)]'
)}
>
<FileUp size={16} />
Upload File
</button>
)}
</div>
{/* Content area */}
<div className="glass-card-static p-5 space-y-4">
{mode === 'paste' ? (
<>
<div>
<label htmlFor="kb-title" className="block font-label text-[0.625rem] uppercase tracking-[0.1em] text-muted-foreground mb-1.5">
Title (optional)
</label>
<Input
id="kb-title"
value={title}
onChange={e => setTitle(e.target.value)}
placeholder="e.g., Outlook Connectivity Troubleshooting"
maxLength={255}
/>
</div>
<div>
<label htmlFor="kb-content" className="block font-label text-[0.625rem] uppercase tracking-[0.1em] text-muted-foreground mb-1.5">
KB Article Content
</label>
<Textarea
id="kb-content"
value={content}
onChange={e => setContent(e.target.value)}
placeholder="Paste your KB article text here..."
rows={12}
maxLength={500000}
/>
<p className="mt-1 text-xs text-muted-foreground">
{content.length.toLocaleString()} / 500,000 characters
</p>
</div>
</>
) : (
<div
onDragOver={e => { e.preventDefault(); setDragOver(true) }}
onDragLeave={() => setDragOver(false)}
onDrop={handleDrop}
onClick={() => fileInputRef.current?.click()}
className={cn(
'flex flex-col items-center justify-center gap-3 p-10 rounded-xl border-2 border-dashed cursor-pointer transition-colors',
dragOver
? 'border-primary/50 bg-primary/5'
: 'border-[rgba(255,255,255,0.08)] hover:border-[rgba(255,255,255,0.15)]'
)}
>
{file ? (
<>
<FileText size={32} className="text-primary" />
<div className="text-center">
<p className="text-sm font-medium text-foreground">{file.name}</p>
<p className="text-xs text-muted-foreground mt-1">
{(file.size / 1024).toFixed(1)} KB
</p>
<button
onClick={e => { e.stopPropagation(); setFile(null) }}
className="mt-2 text-xs text-primary hover:underline"
>
Remove
</button>
</div>
</>
) : (
<>
<Upload size={32} className="text-muted-foreground" />
<div className="text-center">
<p className="text-sm text-foreground">
Drop a file here or <span className="text-primary">browse</span>
</p>
<p className="text-xs text-muted-foreground mt-1">
Supported: {fileFormats.map(f => FORMAT_LABELS[f] || f.toUpperCase()).join(', ')}
</p>
</div>
</>
)}
<input
ref={fileInputRef}
type="file"
className="hidden"
accept={fileFormats.map(f => `.${f}`).join(',')}
onChange={handleFileSelect}
/>
</div>
)}
</div>
{/* Target type selector */}
<div>
<p className="font-label text-[0.625rem] uppercase tracking-[0.1em] text-muted-foreground mb-2">
Target Flow Type
</p>
<div className="grid grid-cols-1 sm:grid-cols-3 gap-3">
{TARGET_TYPES.map(t => (
<button
key={t.value}
onClick={() => setTargetType(t.value)}
className={cn(
'glass-card-static p-4 text-left transition-all',
targetType === t.value
? 'border-primary/30 bg-primary/5'
: 'hover:border-[rgba(255,255,255,0.12)]'
)}
>
<p className="text-sm font-medium text-foreground">{t.label}</p>
<p className="text-xs text-muted-foreground mt-1">{t.description}</p>
</button>
))}
</div>
</div>
{/* Submit */}
<button
onClick={handleSubmit}
disabled={!canSubmit || loading || (quota != null && !quota.can_convert)}
className={cn(
'w-full flex items-center justify-center gap-2 px-6 py-3 rounded-[10px] text-sm font-semibold transition-all',
'bg-gradient-brand text-[#101114] shadow-lg shadow-primary/20',
'hover:opacity-90 active:scale-[0.97]',
'disabled:opacity-50 disabled:cursor-not-allowed'
)}
>
<Sparkles size={16} />
{loading ? 'Converting...' : 'Convert to Flow'}
</button>
</div>
)
}

View File

@@ -1,5 +1,5 @@
import { useEffect, useState } from 'react'
import { LayoutGrid, Box, PenLine, Clock, FileText, Bookmark, BarChart3, Settings, PanelLeftClose, PanelLeftOpen, MessageSquareText, BotMessageSquare, BookOpen } from 'lucide-react'
import { LayoutGrid, Box, PenLine, Clock, FileText, Bookmark, BarChart3, Settings, PanelLeftClose, PanelLeftOpen, MessageSquareText, BotMessageSquare, BookOpen, Sparkles } from 'lucide-react'
import { cn } from '@/lib/utils'
import { useUserPreferencesStore } from '@/store/userPreferencesStore'
import { usePinnedFlowsStore } from '@/store/pinnedFlowsStore'
@@ -83,6 +83,7 @@ export function Sidebar() {
<NavItem href="/shares" icon={FileText} label="Exports" collapsed />
<NavItem href="/assistant" icon={BotMessageSquare} label="AI Assistant" collapsed />
<NavItem href="/step-library" icon={Bookmark} label="Step Library" collapsed />
<NavItem href="/kb-accelerator" icon={Sparkles} label="KB Accelerator" collapsed />
<NavItem href="/analytics" icon={BarChart3} label="Analytics" collapsed />
<NavItem href="/guides" icon={BookOpen} label="User Guides" collapsed />
<NavItem href="/feedback" icon={MessageSquareText} label="Feedback" collapsed />
@@ -115,6 +116,7 @@ export function Sidebar() {
<NavItem href="/shares" icon={FileText} label="Exports" />
<NavItem href="/assistant" icon={BotMessageSquare} label="AI Assistant" />
<NavItem href="/step-library" icon={Bookmark} label="Step Library" />
<NavItem href="/kb-accelerator" icon={Sparkles} label="KB Accelerator" />
<NavItem href="/analytics" icon={BarChart3} label="Analytics" />
</div>
</>

View File

@@ -0,0 +1,205 @@
import { useState, useEffect, useCallback, useRef } from 'react'
import { useNavigate } from 'react-router-dom'
import { Sparkles, Loader2 } from 'lucide-react'
import { toast } from '@/lib/toast'
import { kbAcceleratorApi } from '@/api'
import { UploadScreen } from '@/components/kb-accelerator/UploadScreen'
import { ReviewScreen } from '@/components/kb-accelerator/ReviewScreen'
import { SuccessScreen } from '@/components/kb-accelerator/SuccessScreen'
import { getTreeEditorPath } from '@/lib/routing'
import type { KBImport, KBQuotaResponse, KBCommitResponse, KBNodeEditRequest } from '@/types/kbAccelerator'
type Phase = 'upload' | 'processing' | 'review' | 'success'
type TargetType = 'troubleshooting' | 'procedural' | 'auto'
export default function KBAcceleratorPage() {
const navigate = useNavigate()
const [phase, setPhase] = useState<Phase>('upload')
const [quota, setQuota] = useState<KBQuotaResponse | null>(null)
const [importId, setImportId] = useState<string | null>(null)
const [kbImport, setKbImport] = useState<KBImport | null>(null)
const [commitResult, setCommitResult] = useState<KBCommitResponse | null>(null)
const [loading, setLoading] = useState(false)
const pollRef = useRef<ReturnType<typeof setInterval> | null>(null)
// Load quota on mount
useEffect(() => {
kbAcceleratorApi.getQuota().then(setQuota).catch(() => {})
}, [])
// Poll for processing status
const startPolling = useCallback((id: string) => {
if (pollRef.current) clearInterval(pollRef.current)
pollRef.current = setInterval(async () => {
try {
const data = await kbAcceleratorApi.get(id)
if (data.status === 'ready') {
if (pollRef.current) clearInterval(pollRef.current)
setKbImport(data)
setPhase('review')
} else if (data.status === 'failed') {
if (pollRef.current) clearInterval(pollRef.current)
toast.error(data.error_message || 'Conversion failed')
setPhase('upload')
}
} catch {
// Keep polling on transient errors
}
}, 2000)
}, [])
// Cleanup polling on unmount
useEffect(() => {
return () => {
if (pollRef.current) clearInterval(pollRef.current)
}
}, [])
const handleSubmitText = async (content: string, title: string, targetType: TargetType) => {
setLoading(true)
try {
const resp = await kbAcceleratorApi.uploadText({
content,
title: title || undefined,
target_type: targetType === 'auto' ? undefined : targetType,
})
setImportId(resp.id)
setPhase('processing')
startPolling(resp.id)
} catch (err: unknown) {
const message = (err as { response?: { data?: { detail?: string } } })?.response?.data?.detail ?? 'Upload failed'
toast.error(message)
} finally {
setLoading(false)
}
}
const handleSubmitFile = async (file: File, targetType: TargetType) => {
setLoading(true)
try {
const resp = await kbAcceleratorApi.uploadFile(file, targetType === 'auto' ? undefined : targetType)
setImportId(resp.id)
setPhase('processing')
startPolling(resp.id)
} catch (err: unknown) {
const message = (err as { response?: { data?: { detail?: string } } })?.response?.data?.detail ?? 'Upload failed'
toast.error(message)
} finally {
setLoading(false)
}
}
const handleEditNode = async (nodeId: string, data: KBNodeEditRequest) => {
if (!importId) return
const updatedNode = await kbAcceleratorApi.editNode(importId, nodeId, data)
setKbImport(prev => {
if (!prev) return prev
if (data.operation === 'delete') {
return { ...prev, nodes: prev.nodes.filter(n => n.id !== nodeId) }
}
if (data.operation === 'insert_after') {
const idx = prev.nodes.findIndex(n => n.id === nodeId)
const newNodes = [...prev.nodes]
newNodes.splice(idx + 1, 0, updatedNode)
return { ...prev, nodes: newNodes }
}
return {
...prev,
nodes: prev.nodes.map(n => n.id === updatedNode.id ? updatedNode : n),
}
})
}
const handleCommit = async () => {
if (!importId) return
setLoading(true)
try {
const result = await kbAcceleratorApi.commit(importId)
setCommitResult(result)
setPhase('success')
// Refresh quota
kbAcceleratorApi.getQuota().then(setQuota).catch(() => {})
} catch (err: unknown) {
const message = (err as { response?: { data?: { detail?: string } } })?.response?.data?.detail ?? 'Commit failed'
toast.error(message)
} finally {
setLoading(false)
}
}
const handleDiscard = async () => {
if (!importId) return
setLoading(true)
try {
await kbAcceleratorApi.delete(importId)
resetWizard()
} catch {
toast.error('Failed to discard')
} finally {
setLoading(false)
}
}
const resetWizard = () => {
setPhase('upload')
setImportId(null)
setKbImport(null)
setCommitResult(null)
setLoading(false)
}
return (
<div className="flex flex-col h-full p-6">
{/* Page title */}
<div className="flex items-center gap-3 mb-6">
<Sparkles size={24} className="text-primary" />
<h1 className="text-2xl font-heading font-bold text-foreground">KB Accelerator</h1>
</div>
{/* Phases */}
{phase === 'upload' && (
<UploadScreen
quota={quota}
onSubmitText={handleSubmitText}
onSubmitFile={handleSubmitFile}
loading={loading}
/>
)}
{phase === 'processing' && (
<div className="flex-1 flex flex-col items-center justify-center gap-4">
<Loader2 size={40} className="text-primary animate-spin" />
<div className="text-center">
<p className="text-lg font-heading font-semibold text-foreground">
Converting your KB article...
</p>
<p className="text-sm text-muted-foreground mt-1">
AI is analyzing your content and generating an interactive flow.
</p>
</div>
</div>
)}
{phase === 'review' && kbImport && (
<ReviewScreen
kbImport={kbImport}
onEditNode={handleEditNode}
onCommit={handleCommit}
onDiscard={handleDiscard}
loading={loading}
/>
)}
{phase === 'success' && commitResult && (
<SuccessScreen
result={commitResult}
onViewFlow={() => {
const path = getTreeEditorPath(commitResult.tree_id, commitResult.tree_type as 'troubleshooting' | 'procedural')
navigate(path)
}}
onConvertAnother={resetWizard}
/>
)}
</div>
)
}

View File

@@ -41,6 +41,7 @@ const MyAnalyticsPage = lazy(() => import('@/pages/MyAnalyticsPage'))
const FeedbackPage = lazy(() => import('@/pages/FeedbackPage'))
const StepLibraryPage = lazy(() => import('@/pages/StepLibraryPage'))
const AssistantChatPage = lazy(() => import('@/pages/AssistantChatPage'))
const KBAcceleratorPage = lazy(() => import('@/pages/KBAcceleratorPage'))
const GuidesHubPage = lazy(() => import('@/pages/GuidesHubPage'))
const GuideDetailPage = lazy(() => import('@/pages/GuideDetailPage'))
const AccountSettingsPage = lazy(() => import('@/pages/AccountSettingsPage'))
@@ -153,6 +154,7 @@ export const router = sentryCreateBrowserRouter([
{ path: 'analytics/me', element: page(MyAnalyticsPage) },
{ path: 'feedback', element: page(FeedbackPage) },
{ path: 'step-library', element: page(StepLibraryPage) },
{ path: 'kb-accelerator', element: page(KBAcceleratorPage) },
{ path: 'assistant', element: page(AssistantChatPage) },
{ path: 'guides', element: page(GuidesHubPage) },
{ path: 'guides/:slug', element: page(GuideDetailPage) },

View File

@@ -73,3 +73,17 @@ export type {
ContextMenuPosition,
SuggestionMarker,
} from './editor-ai'
export type {
KBUploadTextRequest,
KBNodeEditRequest,
KBCommitRequest,
KBListParams,
KBImportNode,
KBUploadResponse,
KBImport,
KBImportSummary,
KBImportListResponse,
KBCommitResponse,
KBQuotaResponse,
} from './kbAccelerator'

View File

@@ -0,0 +1,108 @@
/**
* KB Accelerator types — converts KB articles into interactive flows.
*/
// ── Requests ──
export interface KBUploadTextRequest {
content: string
title?: string
target_type?: 'troubleshooting' | 'procedural'
}
export interface KBNodeEditRequest {
operation: 'approve' | 'reject' | 'edit' | 'delete' | 'regenerate' | 'insert_after'
content?: Record<string, unknown>
guidance?: string
}
export interface KBCommitRequest {
name?: string
description?: string
category_id?: string
}
export interface KBListParams {
skip?: number
limit?: number
status?: string
}
// ── Responses ──
export interface KBImportNode {
id: string
kb_import_id: string
node_order: number
node_type: string
content: Record<string, unknown>
parent_node_id: string | null
source_excerpt: string | null
confidence_score: number
user_edited: boolean
user_approved: boolean
}
export interface KBUploadResponse {
id: string
status: string
source_format: string
}
export interface KBImport {
id: string
account_id: string
created_by: string
source_filename: string | null
source_format: string
source_text: string
source_metadata: Record<string, unknown> | null
target_type: string
status: string
confidence_avg: number | null
error_message: string | null
processing_time_ms: number | null
ai_tokens_input: number | null
ai_tokens_output: number | null
tree_id: string | null
nodes: KBImportNode[]
created_at: string
updated_at: string
}
export interface KBImportSummary {
id: string
source_filename: string | null
source_format: string
target_type: string
status: string
confidence_avg: number | null
node_count: number
created_at: string
}
export interface KBImportListResponse {
items: KBImportSummary[]
total: number
skip: number
limit: number
}
export interface KBCommitResponse {
tree_id: string
import_id: string
tree_type: string
}
export interface KBQuotaResponse {
plan: string
kb_accelerator_enabled: boolean
lifetime_conversions_used: number
lifetime_conversions_limit: number | null
allowed_formats: string[]
detailed_analysis: boolean
conversational_refinement: boolean
step_library_matching: boolean
history_limit: number | null
can_convert: boolean
}