Implements three-phase AI assistant feature: - Phase 0: RAG infrastructure with pgvector embeddings, Voyage AI integration, tree chunking service, and semantic search over team's flow library - Phase 1: In-session copilot panel during flow navigation with contextual AI help, current step awareness, and suggested related flows - Phase 2: Standalone AI chat page with persistent conversation history, pin/delete, and configurable retention policies (account-level) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
153 lines
4.7 KiB
Python
153 lines
4.7 KiB
Python
"""Standalone AI assistant chat service with RAG context.
|
|
|
|
Provides persistent conversation history for general IT questions
|
|
with semantic search over the team's flow library.
|
|
"""
|
|
import logging
|
|
from typing import Optional, Any
|
|
from uuid import UUID
|
|
|
|
from sqlalchemy import select
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.core.ai_provider import get_ai_provider
|
|
from app.models.assistant_chat import AssistantChat
|
|
from app.services import rag_service
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
ASSISTANT_SYSTEM_PROMPT = """You are a Senior Systems and Network Engineer with 15+ years of experience working in Managed Service Provider (MSP) environments. You specialize in:
|
|
- Windows Server, Active Directory, Group Policy, and Hybrid Identity (Entra ID)
|
|
- Networking (TCP/IP, DNS, DHCP, VPN, firewall troubleshooting, Cisco/Fortinet)
|
|
- Virtualization (VMware, Hyper-V) and cloud platforms (Azure, AWS, M365)
|
|
- Endpoint management, RMM tools, and PSA platforms (ConnectWise, Datto, Kaseya)
|
|
- PowerShell scripting and automation
|
|
|
|
When answering:
|
|
- Be direct and actionable — MSP engineers need fast, practical answers
|
|
- Include specific commands, paths, and config values when relevant
|
|
- Mention potential risks or gotchas before suggesting changes
|
|
- If a relevant troubleshooting flow exists in the team's library, reference it
|
|
- Keep responses concise but thorough — prefer bullet points and code blocks
|
|
- Format code with proper markdown code blocks
|
|
"""
|
|
|
|
|
|
def _build_rag_context(rag_results: list[dict[str, Any]]) -> str:
|
|
"""Format RAG results into a system prompt section."""
|
|
if not rag_results:
|
|
return ""
|
|
|
|
parts = ["\n--- RELEVANT FLOWS FROM TEAM LIBRARY ---"]
|
|
for r in rag_results[:5]:
|
|
parts.append(f"- [{r['tree_type']}] {r['tree_name']}: {r['chunk_text'][:200]}")
|
|
|
|
return "\n".join(parts)
|
|
|
|
|
|
def _extract_suggested_flows(rag_results: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
"""Extract unique suggested flows from RAG results."""
|
|
seen: set[str] = set()
|
|
suggestions = []
|
|
for r in rag_results:
|
|
tid = r["tree_id"]
|
|
if tid in seen or r["similarity"] < 0.3:
|
|
continue
|
|
seen.add(tid)
|
|
suggestions.append({
|
|
"tree_id": tid,
|
|
"tree_name": r["tree_name"],
|
|
"tree_type": r["tree_type"],
|
|
"relevance_snippet": r["chunk_text"][:150],
|
|
})
|
|
return suggestions[:3]
|
|
|
|
|
|
def _auto_title(message: str) -> str:
|
|
"""Generate a short title from the first user message."""
|
|
title = message.strip()[:100]
|
|
if len(message) > 100:
|
|
title = title.rsplit(" ", 1)[0] + "..."
|
|
return title
|
|
|
|
|
|
async def create_chat(
|
|
user_id: UUID,
|
|
account_id: UUID,
|
|
db: AsyncSession,
|
|
) -> AssistantChat:
|
|
"""Create a new empty chat."""
|
|
chat = AssistantChat(
|
|
user_id=user_id,
|
|
account_id=account_id,
|
|
messages=[],
|
|
)
|
|
db.add(chat)
|
|
await db.flush()
|
|
return chat
|
|
|
|
|
|
async def send_message(
|
|
chat_id: UUID,
|
|
user_id: UUID,
|
|
account_id: UUID,
|
|
message: str,
|
|
db: AsyncSession,
|
|
) -> tuple[str, list[dict[str, Any]], AssistantChat]:
|
|
"""Send a user message and get AI response.
|
|
|
|
Returns (ai_content, suggested_flows, chat).
|
|
"""
|
|
result = await db.execute(
|
|
select(AssistantChat).where(
|
|
AssistantChat.id == chat_id,
|
|
AssistantChat.user_id == user_id,
|
|
)
|
|
)
|
|
chat = result.scalar_one_or_none()
|
|
if not chat:
|
|
raise ValueError("Chat not found")
|
|
|
|
# Auto-title from first message
|
|
if chat.message_count == 0:
|
|
chat.title = _auto_title(message)
|
|
|
|
# RAG search
|
|
rag_results = await rag_service.search(
|
|
query=message,
|
|
account_id=account_id,
|
|
db=db,
|
|
limit=8,
|
|
)
|
|
|
|
# Build system prompt
|
|
system_prompt = ASSISTANT_SYSTEM_PROMPT + _build_rag_context(rag_results)
|
|
|
|
# Build messages for AI
|
|
ai_messages = []
|
|
for msg in chat.messages:
|
|
if msg["role"] in ("user", "assistant"):
|
|
ai_messages.append({"role": msg["role"], "content": msg["content"]})
|
|
ai_messages.append({"role": "user", "content": message})
|
|
|
|
# Call AI
|
|
provider = get_ai_provider()
|
|
ai_content, input_tokens, output_tokens = await provider.generate_text(
|
|
system_prompt=system_prompt,
|
|
messages=ai_messages,
|
|
max_tokens=4096,
|
|
)
|
|
|
|
# Update chat
|
|
msgs = list(chat.messages)
|
|
msgs.append({"role": "user", "content": message})
|
|
msgs.append({"role": "assistant", "content": ai_content})
|
|
chat.messages = msgs
|
|
chat.message_count += 2
|
|
chat.total_input_tokens += input_tokens
|
|
chat.total_output_tokens += output_tokens
|
|
|
|
suggested_flows = _extract_suggested_flows(rag_results)
|
|
|
|
return ai_content, suggested_flows, chat
|