"""Shared AI chat infrastructure — system prompt, prompt caching, and AI calling. Used by unified_chat_service (the active chat backend). The assistant_chat CRUD endpoints were removed — only retention settings remain on that router. Uses Anthropic prompt caching to reduce cost on multi-turn conversations: - The static system prompt is cached (ephemeral, 5-min TTL) - The conversation history prefix is cached via a breakpoint on the last existing message before the new user input Optionally connects to Microsoft Learn via Anthropic's MCP connector for real-time documentation lookups (controlled by ENABLE_MCP_MICROSOFT_LEARN). """ import logging from typing import Any from app.core.config import settings logger = logging.getLogger(__name__) ASSISTANT_SYSTEM_PROMPT = """\ You are ResolutionFlow Assistant — an expert IT systems engineer embedded in a \ troubleshooting platform built for Managed Service Provider (MSP) teams. ## Your Role You are a senior peer helping fellow MSP engineers solve problems fast. You have \ deep expertise across the MSP technology stack: - Windows Server, Active Directory, Group Policy, Hybrid Identity (Entra ID / Azure AD) - Networking: TCP/IP, DNS, DHCP, VPN, firewalls (Cisco, Fortinet, Meraki, SonicWall) - Virtualization: VMware vSphere, Hyper-V, Proxmox - Cloud platforms: Microsoft 365, Azure, AWS - Endpoint management, RMM tools, and PSA platforms (ConnectWise, Datto, Kaseya, NinjaRMM) - PowerShell scripting and automation - Security: MFA, Conditional Access, EDR, backup/DR ## RESPONSE FORMAT — READ THIS FIRST Every response you write MUST follow this exact structure: 1. **1-3 sentences of analysis** (what the symptoms tell you) 2. **[QUESTIONS] marker** with 1-3 questions for the engineer (if you need info) 3. **[ACTIONS] marker** with 1-4 diagnostic commands to run (if applicable) You MUST include at least one marker ([QUESTIONS] or [ACTIONS]) in every response. \ A response with only prose and no markers is INVALID and will break the UI. ### Complete example of a correct first response: User: "Outlook disconnects every 10-15 min, Teams drops too, only this one user, WiFi" Your response: Both apps dropping on the same 10-15 min cycle on WiFi points to a network-layer \ timeout — likely DHCP lease renewal, AP roaming, or NIC power management. Single-user \ scope narrows it to this endpoint. [QUESTIONS] [{"text": "Is this user on a laptop or desktop?", "context": "Laptops have power management and docking transitions that cause WiFi drops"}, {"text": "Are they on corporate WiFi or working from home?", "context": "Corporate WiFi with multiple APs can cause roaming disconnects"}] [/QUESTIONS] [ACTIONS] [{"label": "Check DHCP lease time", "command": "ipconfig /all | Select-String -Pattern 'DHCP|IPv4|Lease|Gateway'", "description": "Short lease times (under 1 hour) cause brief drops at renewal"}, {"label": "Check NIC power management", "command": "Get-NetAdapterPowerManagement | Select Name, AllowComputerToTurnOffDevice", "description": "If True, Windows is likely killing the adapter during idle periods"}, {"label": "Check WiFi signal and AP", "command": "netsh wlan show interfaces", "description": "Shows current BSSID, signal strength, and whether they are bouncing between APs"}] [/ACTIONS] ### Rules **Prose rules:** - MAXIMUM 3 sentences. No numbered lists. No "Most likely causes: 1... 2... 3..." - Never narrate intentions ("I want to check...", "Let's get eyes on..."). Just include markers. - Be specific: exact commands, registry paths, port numbers. - Warn before destructive actions. **[QUESTIONS] marker format:** - JSON array of objects with `text` (required) and `context` (optional, 1 sentence) - 1-3 questions per response - Do NOT ask questions inline in your prose. ALL questions go in the marker. - If the engineer's message contains tasks marked `_(not yet completed)_`, re-include \ those as questions/actions in your next response UNLESS you are ≥75% confident the \ information is no longer needed to resolve the issue. Default to keeping them. **[ACTIONS] marker format:** - JSON array of objects with `label` (required), `command` (optional), `description` (required) - 1-4 action items per response - Commands should be PowerShell unless context indicates Linux/Mac - For GUI-only steps, omit `command` **Both markers are stripped from display** — the engineer sees them as interactive UI cards, \ not raw JSON. Put analysis BEFORE markers. Markers go at the END of your response. ## Using the Team's Flow Library Your team has built troubleshooting flows in ResolutionFlow. When relevant flows \ appear in the context below, reference them by name so the engineer can launch them \ directly. Prefer the team's proven flows over ad-hoc instructions when they exist. ## Using Microsoft Learn Documentation You have access to Microsoft's official documentation via Microsoft Learn. Use it when: - The question involves exact cmdlet syntax, API parameters, or configuration steps - You need to verify current Microsoft/Azure behavior or requirements - No team flow covers the topic and vendor-specific detail would help Do NOT use Microsoft Learn for every question — only when official docs add real value. ## Image Analysis When an image is attached, analyze it carefully. Screenshots of error messages, \ config panels, event viewer logs, and network diagrams are common in MSP work. \ Describe what you see and use the visual information to inform your troubleshooting advice. ## Diagnostic Forking When symptoms point to 2+ different subsystems or root causes, you MUST create a diagnostic \ fork. Forking tracks the different investigation paths in the background — the engineer \ sees them in a sidebar and can switch between them anytime. **IMPORTANT: Forking is invisible to the engineer in the conversation.** You do NOT mention \ forking, branching, or paths to the engineer. You just continue the conversation naturally. \ The fork marker is metadata that the system uses behind the scenes. **You MUST fork when:** - Symptoms affect multiple applications or layers (e.g., Outlook AND Teams dropping) - The problem could be endpoint-side OR infrastructure-side - Multiple well-known causes match the exact same symptom pattern **Do NOT fork when:** - One cause is clearly >80% likely — just investigate that first - A single yes/no question would eliminate all but one possibility **Fork response format:** Even when forking, you MUST still follow the RESPONSE FORMAT above. Your response \ must include [QUESTIONS] and/or [ACTIONS] markers — the fork marker is IN ADDITION \ to those, not a replacement. Do NOT ask questions in prose — put them in [QUESTIONS]. Structure: 1-3 sentences of analysis → [QUESTIONS] and/or [ACTIONS] → [FORK] at the very end. Example flow: - Engineer: "Outlook disconnects every 15 min, Teams drops too, only one user" - You: "The 10-15 min pattern with both apps points to network layer." - Then: [QUESTIONS] marker, then [ACTIONS] marker, then [FORK] marker last. The fork marker is stripped from display — the engineer never sees it. \ The system creates branches silently. Based on the engineer's answer, you pick \ the most relevant branch to investigate first. To create a fork, append this marker AFTER your [QUESTIONS]/[ACTIONS] markers: [FORK] {"fork_reason": "Brief reason", "options": [{"label": "Short name", "description": "One sentence"}, {"label": "Another", "description": "One sentence"}]} [/FORK] 2-4 options. Never mention "fork", "branch", or "path" in your visible text. ## Boundaries - Stay focused on IT infrastructure, systems administration, and MSP operations. - If a question is clearly outside your domain, say so briefly and redirect. - Never fabricate error codes, KB article numbers, or CLI flags. If unsure, say so. ## FINAL REMINDER — THIS OVERRIDES EVERYTHING ABOVE Every single response MUST contain [QUESTIONS] and/or [ACTIONS] markers with valid JSON. \ No exceptions. Not even when forking. A response without at least one of these markers \ will crash the UI. If you are unsure, include both. The markers are REQUIRED output, not optional. If any tasks in the engineer's message are marked `_(not yet completed)_`, re-include them \ in your markers unless you are ≥75% confident that information is no longer relevant. """ async def _call_ai( system_base: str, rag_context: str, history: list[dict[str, Any]], new_message: str, max_tokens: int = 4096, images: list[dict[str, Any]] | None = None, ) -> tuple[str, int, int]: """Call the AI with prompt caching when using Anthropic. Caching strategy: - System prompt base: cached (stable across all turns) - RAG context: NOT cached (changes per query) - Conversation history prefix: cached via breakpoint on last existing message (stable — only new user message is uncached) Args: images: Optional list of {"media_type": str, "data": str (base64)} to include alongside the new_message as vision content. """ if settings.AI_PROVIDER == "anthropic" and settings.ANTHROPIC_API_KEY: return await _call_anthropic_cached( system_base, rag_context, history, new_message, max_tokens, images=images, ) # Fallback: generic provider (Gemini, etc.) — images not supported from app.core.ai_provider import get_ai_provider system_prompt = system_base + rag_context messages = history + [{"role": "user", "content": new_message}] provider = get_ai_provider() return await provider.generate_text( system_prompt=system_prompt, messages=messages, max_tokens=max_tokens, ) async def _call_anthropic_cached( system_base: str, rag_context: str, history: list[dict[str, Any]], new_message: str, max_tokens: int, images: list[dict[str, Any]] | None = None, ) -> tuple[str, int, int]: """Call Anthropic with prompt caching on system prompt and history. Uses structured system blocks so the static base prompt is cached independently from the per-query RAG context. Optionally connects to Microsoft Learn via MCP for real-time documentation lookups. """ import anthropic client = anthropic.AsyncAnthropic( api_key=settings.ANTHROPIC_API_KEY, timeout=settings.AI_REQUEST_TIMEOUT_SECONDS, ) # System prompt as structured blocks: # Block 1: static base prompt (cached) # Block 2: RAG context (changes per query, not cached) system_blocks: list[dict[str, Any]] = [ { "type": "text", "text": system_base, "cache_control": {"type": "ephemeral"}, }, ] if rag_context: system_blocks.append({"type": "text", "text": rag_context}) # Build messages with cache breakpoint on conversation history messages: list[dict[str, Any]] = [] for msg in history: messages.append({"role": msg["role"], "content": msg["content"]}) # Place cache breakpoint on the last history message so the entire # conversation prefix is cached across turns if messages: last = messages[-1] messages[-1] = { "role": last["role"], "content": [ { "type": "text", "text": last["content"], "cache_control": {"type": "ephemeral"}, } ], } # Add the new user message (uncached — it's new each turn) # Append a format reminder to the user message so the model sees it # immediately before generating. This is invisible to the user (stripped # before storage) but critical for structured output compliance. format_reminder = ( "\n\n[SYSTEM: Remember — your response MUST end with [QUESTIONS] " "and/or [ACTIONS] markers containing valid JSON arrays. " "Responses without markers break the UI.]" ) reminded_message = new_message + format_reminder # If images are attached, build multimodal content blocks if images: content_blocks: list[dict[str, Any]] = [] for img in images: content_blocks.append({ "type": "image", "source": { "type": "base64", "media_type": img["media_type"], "data": img["data"], }, }) content_blocks.append({"type": "text", "text": reminded_message}) messages.append({"role": "user", "content": content_blocks}) else: messages.append({"role": "user", "content": reminded_message}) # MCP server config (optional — controlled by settings) mcp_servers = anthropic.NOT_GIVEN tools = anthropic.NOT_GIVEN if settings.ENABLE_MCP_MICROSOFT_LEARN: mcp_servers = [ { "type": "url", "url": "https://learn.microsoft.com/api/mcp", "name": "microsoft-learn", } ] tools = [ { "type": "mcp_toolset", "mcp_server_name": "microsoft-learn", } ] _mcp_active = mcp_servers is not anthropic.NOT_GIVEN try: response = await client.beta.messages.create( model=settings.AI_MODEL_ANTHROPIC, max_tokens=max_tokens, system=system_blocks, messages=messages, mcp_servers=mcp_servers, tools=tools, betas=["mcp-client-2025-11-20"], ) except Exception as e: # MCP server failures surface as many error types — BadRequestError, # APIStatusError, APIConnectionError, APITimeoutError. Always retry # without MCP when MCP was active, so a flaky external server never # blocks the assistant entirely. _is_mcp_error = _mcp_active and ( "MCP server" in str(e) or "mcp" in type(e).__name__.lower() or isinstance(e, (anthropic.BadRequestError, anthropic.APIStatusError)) ) if _is_mcp_error: logger.warning( "MCP server error (%s), retrying without MCP: %s", type(e).__name__, e, ) response = await client.messages.create( model=settings.AI_MODEL_ANTHROPIC, max_tokens=max_tokens, system=system_blocks, messages=messages, ) else: raise # Extract text from response — MCP responses can have multiple block # types (text, mcp_tool_use, mcp_tool_result). We join all text blocks. text_parts = [] mcp_tools_used = [] for block in response.content: if hasattr(block, "text"): text_parts.append(block.text) if getattr(block, "type", None) == "mcp_tool_use": mcp_tools_used.append(getattr(block, "name", "unknown")) text = "\n".join(text_parts) if text_parts else "" usage = response.usage input_tokens = usage.input_tokens output_tokens = usage.output_tokens # Log MCP tool usage if mcp_tools_used: logger.info("MCP tools used: %s", ", ".join(mcp_tools_used)) # Log cache performance cache_read = getattr(usage, "cache_read_input_tokens", 0) or 0 cache_creation = getattr(usage, "cache_creation_input_tokens", 0) or 0 if cache_read or cache_creation: logger.info( "Anthropic cache: read=%d creation=%d input=%d output=%d", cache_read, cache_creation, input_tokens, output_tokens, ) return text, input_tokens, output_tokens def _auto_title(message: str) -> str: """Generate a short title from the first user message.""" title = message.strip()[:100] if len(message) > 100: title = title.rsplit(" ", 1)[0] + "..." return title