feat: add AI assistant with in-session copilot and standalone chat with RAG

Implements three-phase AI assistant feature: - Phase 0: RAG infrastructure with pgvector embeddings, Voyage AI integration, tree chunking service, and semantic search over team's flow library - Phase 1: In-session copilot panel during flow navigation with contextual AI help, current step awareness, and suggested related flows - Phase 2: Standalone AI chat page with persistent conversation history, pin/delete, and configurable retention policies (account-level) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 01:36:36 -05:00
parent 41cb7956cb
commit 1aa60dada2
44 changed files with 3080 additions and 14 deletions
--- a/backend/app/services/assistant_chat_service.py
+++ b/backend/app/services/assistant_chat_service.py
@@ -0,0 +1,152 @@
+"""Standalone AI assistant chat service with RAG context.
+
+Provides persistent conversation history for general IT questions
+with semantic search over the team's flow library.
+"""
+import logging
+from typing import Optional, Any
+from uuid import UUID
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.core.ai_provider import get_ai_provider
+from app.models.assistant_chat import AssistantChat
+from app.services import rag_service
+
+logger = logging.getLogger(__name__)
+
+ASSISTANT_SYSTEM_PROMPT = """You are a Senior Systems and Network Engineer with 15+ years of experience working in Managed Service Provider (MSP) environments. You specialize in:
+- Windows Server, Active Directory, Group Policy, and Hybrid Identity (Entra ID)
+- Networking (TCP/IP, DNS, DHCP, VPN, firewall troubleshooting, Cisco/Fortinet)
+- Virtualization (VMware, Hyper-V) and cloud platforms (Azure, AWS, M365)
+- Endpoint management, RMM tools, and PSA platforms (ConnectWise, Datto, Kaseya)
+- PowerShell scripting and automation
+
+When answering:
+- Be direct and actionable — MSP engineers need fast, practical answers
+- Include specific commands, paths, and config values when relevant
+- Mention potential risks or gotchas before suggesting changes
+- If a relevant troubleshooting flow exists in the team's library, reference it
+- Keep responses concise but thorough — prefer bullet points and code blocks
+- Format code with proper markdown code blocks
+"""
+
+
+def _build_rag_context(rag_results: list[dict[str, Any]]) -> str:
+    """Format RAG results into a system prompt section."""
+    if not rag_results:
+        return ""
+
+    parts = ["\n--- RELEVANT FLOWS FROM TEAM LIBRARY ---"]
+    for r in rag_results[:5]:
+        parts.append(f"- [{r['tree_type']}] {r['tree_name']}: {r['chunk_text'][:200]}")
+
+    return "\n".join(parts)
+
+
+def _extract_suggested_flows(rag_results: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    """Extract unique suggested flows from RAG results."""
+    seen: set[str] = set()
+    suggestions = []
+    for r in rag_results:
+        tid = r["tree_id"]
+        if tid in seen or r["similarity"] < 0.3:
+            continue
+        seen.add(tid)
+        suggestions.append({
+            "tree_id": tid,
+            "tree_name": r["tree_name"],
+            "tree_type": r["tree_type"],
+            "relevance_snippet": r["chunk_text"][:150],
+        })
+    return suggestions[:3]
+
+
+def _auto_title(message: str) -> str:
+    """Generate a short title from the first user message."""
+    title = message.strip()[:100]
+    if len(message) > 100:
+        title = title.rsplit(" ", 1)[0] + "..."
+    return title
+
+
+async def create_chat(
+    user_id: UUID,
+    account_id: UUID,
+    db: AsyncSession,
+) -> AssistantChat:
+    """Create a new empty chat."""
+    chat = AssistantChat(
+        user_id=user_id,
+        account_id=account_id,
+        messages=[],
+    )
+    db.add(chat)
+    await db.flush()
+    return chat
+
+
+async def send_message(
+    chat_id: UUID,
+    user_id: UUID,
+    account_id: UUID,
+    message: str,
+    db: AsyncSession,
+) -> tuple[str, list[dict[str, Any]], AssistantChat]:
+    """Send a user message and get AI response.
+
+    Returns (ai_content, suggested_flows, chat).
+    """
+    result = await db.execute(
+        select(AssistantChat).where(
+            AssistantChat.id == chat_id,
+            AssistantChat.user_id == user_id,
+        )
+    )
+    chat = result.scalar_one_or_none()
+    if not chat:
+        raise ValueError("Chat not found")
+
+    # Auto-title from first message
+    if chat.message_count == 0:
+        chat.title = _auto_title(message)
+
+    # RAG search
+    rag_results = await rag_service.search(
+        query=message,
+        account_id=account_id,
+        db=db,
+        limit=8,
+    )
+
+    # Build system prompt
+    system_prompt = ASSISTANT_SYSTEM_PROMPT + _build_rag_context(rag_results)
+
+    # Build messages for AI
+    ai_messages = []
+    for msg in chat.messages:
+        if msg["role"] in ("user", "assistant"):
+            ai_messages.append({"role": msg["role"], "content": msg["content"]})
+    ai_messages.append({"role": "user", "content": message})
+
+    # Call AI
+    provider = get_ai_provider()
+    ai_content, input_tokens, output_tokens = await provider.generate_text(
+        system_prompt=system_prompt,
+        messages=ai_messages,
+        max_tokens=4096,
+    )
+
+    # Update chat
+    msgs = list(chat.messages)
+    msgs.append({"role": "user", "content": message})
+    msgs.append({"role": "assistant", "content": ai_content})
+    chat.messages = msgs
+    chat.message_count += 2
+    chat.total_input_tokens += input_tokens
+    chat.total_output_tokens += output_tokens
+
+    suggested_flows = _extract_suggested_flows(rag_results)
+
+    return ai_content, suggested_flows, chat