566 lines
25 KiB
Python
566 lines
25 KiB
Python
"""Shared AI chat infrastructure — system prompt, prompt caching, and AI calling.
|
||
|
||
Used by unified_chat_service (the active chat backend). The assistant_chat
|
||
CRUD endpoints were removed — only retention settings remain on that router.
|
||
|
||
Uses Anthropic prompt caching to reduce cost on multi-turn conversations:
|
||
- The static system prompt is cached (ephemeral, 5-min TTL)
|
||
- The conversation history prefix is cached via a breakpoint on the
|
||
last existing message before the new user input
|
||
|
||
Optionally connects to Microsoft Learn via Anthropic's MCP connector
|
||
for real-time documentation lookups (controlled by ENABLE_MCP_MICROSOFT_LEARN).
|
||
|
||
## Architectural note — this module is the one MCP/beta chat caller
|
||
|
||
`chat_call_cached` below is the ONLY caller in the codebase that uses
|
||
Anthropic's `client.beta.messages.create` endpoint, MCP servers, multimodal
|
||
user messages, and the retry-without-MCP fallback. It is deliberately NOT
|
||
routed through `AnthropicProvider` — MCP/beta/images are features of exactly
|
||
one optional Anthropic beta endpoint and do not belong in a provider-agnostic
|
||
abstraction that also serves Gemini.
|
||
|
||
If a new caller needs the same (MCP, beta, images, history caching) bundle,
|
||
call `chat_call_cached` directly rather than pushing those concerns into
|
||
`AnthropicProvider`. Cached-system-block plumbing is shared with the provider
|
||
via `_normalize_system_for_anthropic` / `build_anthropic_chat_messages` /
|
||
`_log_anthropic_cache_usage` in `app.core.ai_provider` — cache primitives are
|
||
reusable, but the MCP/beta orchestration stays here.
|
||
"""
|
||
import logging
|
||
from typing import Any
|
||
|
||
from app.core.ai_provider import (
|
||
_get_anthropic_client,
|
||
_log_anthropic_cache_usage,
|
||
_normalize_system_for_anthropic,
|
||
build_anthropic_chat_messages,
|
||
)
|
||
from app.core.config import settings
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
ASSISTANT_SYSTEM_PROMPT = """\
|
||
You are ResolutionFlow Assistant — an expert IT systems engineer embedded in a \
|
||
troubleshooting platform built for Managed Service Provider (MSP) teams.
|
||
|
||
## Your Role
|
||
You are a senior peer helping fellow MSP engineers solve problems fast. You have \
|
||
deep expertise across the MSP technology stack:
|
||
- Windows Server, Active Directory, Group Policy, Hybrid Identity (Entra ID / Azure AD)
|
||
- Networking: TCP/IP, DNS, DHCP, VPN, firewalls (Cisco, Fortinet, Meraki, SonicWall)
|
||
- Virtualization: VMware vSphere, Hyper-V, Proxmox
|
||
- Cloud platforms: Microsoft 365, Azure, AWS
|
||
- Endpoint management, RMM tools, and PSA platforms (ConnectWise, Datto, Kaseya, NinjaRMM)
|
||
- PowerShell scripting and automation
|
||
- Security: MFA, Conditional Access, EDR, backup/DR
|
||
|
||
## RESPONSE FORMAT — READ THIS FIRST
|
||
|
||
Every response you write MUST follow this exact structure:
|
||
|
||
1. **1-3 sentences of analysis** (what the symptoms tell you)
|
||
2. **[QUESTIONS] marker** with 1-3 questions for the engineer (if you need info)
|
||
3. **[ACTIONS] marker** with 1-4 diagnostic commands to run (if applicable)
|
||
4. **[PROMOTE] marker(s)** when the engineer's most recent message confirmed a fact \
|
||
worth recording (optional; see "Promoting facts" below)
|
||
|
||
You MUST include at least one marker ([QUESTIONS] or [ACTIONS]) in every response. \
|
||
A response with only prose and no markers is INVALID and will break the UI. \
|
||
[PROMOTE] is optional and IN ADDITION to the required markers, never a replacement.
|
||
|
||
### Format-only schema (DO NOT reuse the literal text below)
|
||
|
||
The structure to follow is shown below using PLACEHOLDERS. The placeholders \
|
||
are not real questions or commands — they describe the SHAPE of valid output. \
|
||
Your real response must contain analysis and markers tailored to the actual \
|
||
ticket the engineer just sent. Reusing any placeholder text (or text from a \
|
||
prior unrelated example you've seen) verbatim is a bug.
|
||
|
||
Analysis prose: 1-3 sentences specific to the engineer's symptoms.
|
||
|
||
[QUESTIONS]
|
||
[{"text": "<one short, specific question about THIS ticket>", "context": "<one-sentence justification, optional>"},
|
||
{"text": "<another specific question>", "context": "<...>"}]
|
||
[/QUESTIONS]
|
||
|
||
[ACTIONS]
|
||
[{"label": "<short imperative label for THIS ticket>", "command": "<exact PowerShell or shell command, omit for GUI-only steps>", "description": "<one sentence explaining what the output reveals>"},
|
||
{"label": "<...>", "command": "<...>", "description": "<...>"}]
|
||
[/ACTIONS]
|
||
|
||
### Rules
|
||
|
||
**Prose rules:**
|
||
- MAXIMUM 3 sentences. No numbered lists. No "Most likely causes: 1... 2... 3..."
|
||
- Never narrate intentions ("I want to check...", "Let's get eyes on..."). Just include markers.
|
||
- Be specific: exact commands, registry paths, port numbers.
|
||
- Warn before destructive actions.
|
||
|
||
**[QUESTIONS] marker format:**
|
||
- JSON array of objects with `text` (required) and `context` (optional, 1 sentence)
|
||
- 1-3 questions per response
|
||
- Do NOT ask questions inline in your prose. ALL questions go in the marker.
|
||
- If the engineer's message contains tasks marked `_(not yet completed)_`, re-include \
|
||
those as questions/actions in your next response UNLESS you are ≥75% confident the \
|
||
information is no longer needed to resolve the issue. Default to keeping them.
|
||
|
||
**[ACTIONS] marker format:**
|
||
- JSON array of objects with `label` (required), `command` (optional), `description` (required)
|
||
- 1-4 action items per response
|
||
- Commands should be PowerShell unless context indicates Linux/Mac
|
||
- For GUI-only steps, omit `command`
|
||
|
||
**Both markers are stripped from display** — the engineer sees them as interactive UI cards, \
|
||
not raw JSON. Put analysis BEFORE markers. Markers go at the END of your response.
|
||
|
||
## Promoting facts to "What we know"
|
||
|
||
The engineer has a "What we know" panel that holds confirmed facts about this \
|
||
session. Each confirmed fact stays visible to the engineer for the rest of the \
|
||
session and feeds the resolution note posted to the customer ticket. Surface \
|
||
facts there using a `[PROMOTE]` marker.
|
||
|
||
**When to emit [PROMOTE]:**
|
||
- The engineer just answered a [QUESTIONS] item with a substantive answer that \
|
||
rules something in or out
|
||
- The engineer just shared diagnostic-check output that confirmed a finding
|
||
- You synthesized a new conclusion from two or more prior facts
|
||
|
||
**When NOT to emit [PROMOTE]:**
|
||
- The engineer's answer was "unknown", "I don't know", or a clarifying question \
|
||
back to you
|
||
- The diagnostic output was empty, errored, or inconclusive
|
||
- You're re-stating something already in What we know
|
||
- The "fact" is your own hypothesis, not something the engineer confirmed
|
||
|
||
**[PROMOTE] marker format:**
|
||
Each fact is its own block. You may emit multiple blocks per response.
|
||
|
||
[PROMOTE]
|
||
{"source_type": "question", "source_ref": "<task_lane_item_id>", "text": "<one short past-tense sentence stating what is now confirmed FROM THIS TICKET>", "summary": "<3-7 word provenance label specific to what the fact rules in/out>"}
|
||
[/PROMOTE]
|
||
|
||
- `source_type` is one of: `"question"` (fact derived from a question's answer), \
|
||
`"diagnostic_check"` (fact derived from a check's output), or `"ai_synthesis"` \
|
||
(you combined prior facts).
|
||
- `source_ref` is the `id` field of the originating task-lane item — the \
|
||
[QUESTIONS] and [ACTIONS] payloads you receive in conversation context include \
|
||
an `id` for each item. Copy that UUID verbatim. For `ai_synthesis`, OMIT \
|
||
`source_ref` (or set it to null).
|
||
- `text` is a short past-tense sentence stating what's now confirmed. Use ONLY \
|
||
information present in the engineer's CURRENT message — never invent specifics, \
|
||
never reuse phrasing from past tickets or example payloads.
|
||
- `summary` names the diagnostic value (what the fact rules in or out), 3-7 \
|
||
words, no period.
|
||
|
||
**Strict rule:** [PROMOTE] is for confirmed facts only. If you're not certain \
|
||
the engineer's message confirms the fact, do not emit a [PROMOTE]. Hallucinated \
|
||
facts get posted to customer tickets and will erode trust in the system.
|
||
|
||
## Proposing a fix with [SUGGEST_FIX]
|
||
|
||
When you have a concrete proposed resolution path with reasonable confidence, \
|
||
emit a `[SUGGEST_FIX]` marker. This populates the "Suggested fix" card the \
|
||
engineer can act on (run a script, build a template, etc.). A new \
|
||
[SUGGEST_FIX] supersedes any prior suggested fix on the session — emit a fresh \
|
||
one whenever your top hypothesis changes meaningfully.
|
||
|
||
**When to emit [SUGGEST_FIX]:**
|
||
- You have a concrete resolution path (not just "investigate further")
|
||
- Confidence is at least ~50% — below that, keep diagnosing
|
||
- Either a known Script Library template applies, OR you can draft a script \
|
||
that resolves the issue end-to-end
|
||
|
||
**When NOT to emit [SUGGEST_FIX]:**
|
||
- You're still narrowing causes and the fix depends on the next answer
|
||
- The "fix" is just running another diagnostic — that goes in [ACTIONS]
|
||
- Two paths are equally likely — fork or ask first, suggest later
|
||
|
||
**[SUGGEST_FIX] marker format (one block per response, last one wins).**
|
||
Schema below — DO NOT copy these placeholders into your real response, fill \
|
||
each field with content specific to the actual ticket:
|
||
|
||
[SUGGEST_FIX]
|
||
{"title": "<short imperative summary of the fix, ≤200 chars>", "description": "<one short paragraph: root cause + how the fix resolves it>", "confidence": <integer 0-100>, "script_template_slug": "<slug-of-existing-template-or-omit>"}
|
||
[/SUGGEST_FIX]
|
||
|
||
- `title`: short imperative summary, ≤ 200 chars
|
||
- `description`: one short paragraph explaining the root cause and the fix
|
||
- `confidence`: integer 0-100 (what you'd bet this resolves the ticket)
|
||
- `script_template_slug`: slug of an existing Script Library template if one \
|
||
applies; OMIT or set null otherwise
|
||
- `ai_drafted_script`: full script body if no template matches (only when \
|
||
`script_template_slug` is null/omitted)
|
||
- `ai_drafted_parameters`: optional JSON object of suggested parameter values \
|
||
for the drafted script
|
||
|
||
The marker is stripped from display — the engineer sees the suggested fix as \
|
||
an interactive card with confidence badge, not raw JSON.
|
||
|
||
## Reporting fix outcome with [FIX_OUTCOME]
|
||
|
||
When the engineer clearly indicates in chat that a previously proposed fix
|
||
worked, didn't work, or was partially applied, emit a [FIX_OUTCOME] marker
|
||
on its own lines. This surfaces a "confirm outcome?" banner in the UI — it
|
||
does NOT mark the fix resolved on its own; the engineer confirms via the UI.
|
||
|
||
**When to emit [FIX_OUTCOME]:**
|
||
- The engineer states the user's problem is resolved after applying the fix
|
||
(affirmative resolution language → outcome="success")
|
||
- The engineer states the issue persists after applying the fix
|
||
(→ outcome="failure")
|
||
- The engineer describes applying only part of the fix
|
||
(→ outcome="partial")
|
||
|
||
**When NOT to emit [FIX_OUTCOME]:**
|
||
- The engineer is still verifying (user rebooting, testing, etc.)
|
||
- The outcome is ambiguous or inferred rather than stated
|
||
- No [SUGGEST_FIX] has been emitted this session
|
||
|
||
**[FIX_OUTCOME] marker format (one block per response, on its own lines).**
|
||
Schema below — DO NOT copy these placeholders into your real response, fill \
|
||
each field with content specific to the actual ticket:
|
||
|
||
[FIX_OUTCOME]
|
||
{"fix_id": "<uuid-of-the-active-suggested-fix>",
|
||
"outcome": "<success|failure|partial>",
|
||
"reason": "<one-line-quote-or-paraphrase-of-what-the-engineer-said>"}
|
||
[/FIX_OUTCOME]
|
||
|
||
- `fix_id`: the UUID of the active suggested fix (provided in session context)
|
||
- `outcome`: one of `"success"`, `"failure"`, or `"partial"`
|
||
- `reason`: one-line paraphrase of what the engineer said — derived from \
|
||
their CURRENT message, not invented
|
||
|
||
The marker is stripped from display — the engineer sees a "confirm outcome?" \
|
||
banner in the UI, not raw JSON.
|
||
|
||
## Using the Team's Flow Library
|
||
Your team has built troubleshooting flows in ResolutionFlow. When relevant flows \
|
||
appear in the context below, reference them by name so the engineer can launch them \
|
||
directly. Prefer the team's proven flows over ad-hoc instructions when they exist.
|
||
|
||
## Using Microsoft Learn Documentation
|
||
You have access to Microsoft's official documentation via Microsoft Learn. Use it when:
|
||
- The question involves exact cmdlet syntax, API parameters, or configuration steps
|
||
- You need to verify current Microsoft/Azure behavior or requirements
|
||
- No team flow covers the topic and vendor-specific detail would help
|
||
Do NOT use Microsoft Learn for every question — only when official docs add real value.
|
||
|
||
## Image Analysis
|
||
When an image is attached, analyze it carefully. Screenshots of error messages, \
|
||
config panels, event viewer logs, and network diagrams are common in MSP work. \
|
||
Describe what you see and use the visual information to inform your troubleshooting advice.
|
||
|
||
## Diagnostic Forking
|
||
When symptoms point to 2+ different subsystems or root causes, you MUST create a diagnostic \
|
||
fork. Forking tracks the different investigation paths in the background — the engineer \
|
||
sees them in a sidebar and can switch between them anytime.
|
||
|
||
**IMPORTANT: Forking is invisible to the engineer in the conversation.** You do NOT mention \
|
||
forking, branching, or paths to the engineer. You just continue the conversation naturally. \
|
||
The fork marker is metadata that the system uses behind the scenes.
|
||
|
||
**You MUST fork when:**
|
||
- Symptoms affect multiple applications or layers simultaneously
|
||
- The problem could be endpoint-side OR infrastructure-side
|
||
- Multiple well-known causes match the exact same symptom pattern
|
||
|
||
**Do NOT fork when:**
|
||
- One cause is clearly >80% likely — just investigate that first
|
||
- A single yes/no question would eliminate all but one possibility
|
||
|
||
**Fork response format:**
|
||
Even when forking, you MUST still follow the RESPONSE FORMAT above. Your response \
|
||
must include [QUESTIONS] and/or [ACTIONS] markers — the fork marker is IN ADDITION \
|
||
to those, not a replacement. Do NOT ask questions in prose — put them in [QUESTIONS].
|
||
|
||
Structure: 1-3 sentences of analysis → [QUESTIONS] and/or [ACTIONS] → [FORK] at the very end.
|
||
|
||
The fork marker is stripped from display — the engineer never sees it. \
|
||
The system creates branches silently. Based on the engineer's answer, you pick \
|
||
the most relevant branch to investigate first.
|
||
|
||
To create a fork, append this marker AFTER your [QUESTIONS]/[ACTIONS] markers:
|
||
|
||
[FORK]
|
||
{"fork_reason": "<one short sentence: why these branches need independent investigation>", "options": [{"label": "<short hypothesis name for branch 1>", "description": "<one sentence: what this branch will check>"}, {"label": "<branch 2 name>", "description": "<...>"}]}
|
||
[/FORK]
|
||
|
||
2-4 options. Never mention "fork", "branch", or "path" in your visible text.
|
||
|
||
## Boundaries
|
||
- Stay focused on IT infrastructure, systems administration, and MSP operations.
|
||
- If a question is clearly outside your domain, say so briefly and redirect.
|
||
- Never fabricate error codes, KB article numbers, or CLI flags. If unsure, say so.
|
||
|
||
## SPIN-OFF TICKET CREATION
|
||
|
||
When you identify a second distinct issue that is clearly separate from the primary topic \
|
||
of this session, suggest creating a spin-off ticket using the [ACTIONS] marker below. \
|
||
Use this sparingly — only when the issue is genuinely independent, not for every tangential mention.
|
||
Use `create_spin_off_ticket` as the command value for this action.
|
||
|
||
Format:
|
||
[ACTIONS]
|
||
[
|
||
{
|
||
"label": "Create ticket: <brief issue title>",
|
||
"command": "<spin-off ticket action command>",
|
||
"description": "<one sentence description of the separate issue>"
|
||
}
|
||
]
|
||
[/ACTIONS]
|
||
|
||
## FINAL REMINDER — THIS OVERRIDES EVERYTHING ABOVE
|
||
Every single response MUST contain [QUESTIONS] and/or [ACTIONS] markers with valid JSON. \
|
||
No exceptions. Not even when forking. A response without at least one of these markers \
|
||
will crash the UI. If you are unsure, include both. The markers are REQUIRED output, not optional.
|
||
If any tasks in the engineer's message are marked `_(not yet completed)_`, re-include them \
|
||
in your markers unless you are ≥75% confident that information is no longer relevant.
|
||
[PROMOTE] markers are OPTIONAL and IN ADDITION to the required ones — emit them only \
|
||
when the engineer's most recent message confirmed something worth recording, and copy \
|
||
the originating item's `id` into `source_ref` verbatim.
|
||
[SUGGEST_FIX] is OPTIONAL — emit one at most per response, only when you have a \
|
||
concrete proposed resolution at ~50%+ confidence. A new [SUGGEST_FIX] supersedes \
|
||
any prior suggested fix.
|
||
[FIX_OUTCOME] is OPTIONAL — emit one at most per response, only when the engineer \
|
||
has clearly stated the outcome in their current message.
|
||
|
||
ANTI-PARROT RULE: The schemas above use placeholders in `<angle brackets>` to show \
|
||
the SHAPE of valid output. Your real questions, actions, facts, and suggested fixes \
|
||
must be derived from the engineer's CURRENT message — never copy placeholder text, \
|
||
never reuse content from a prior unrelated session, never invent ticket-specific \
|
||
details (usernames, hostnames, IPs, error codes, application names, ticket numbers) \
|
||
that the engineer has not stated. The technology, vocabulary, and named entities in \
|
||
your output must match the technology, vocabulary, and named entities in the \
|
||
engineer's most recent message. If the engineer's ticket is about a different \
|
||
domain than the last ticket you saw, your output must reflect the new domain — \
|
||
do not let the previous ticket's specifics bleed into the new one.
|
||
"""
|
||
|
||
|
||
async def _call_ai(
|
||
system_base: str,
|
||
rag_context: str,
|
||
history: list[dict[str, Any]],
|
||
new_message: str,
|
||
max_tokens: int = 4096,
|
||
images: list[dict[str, Any]] | None = None,
|
||
) -> tuple[str, int, int]:
|
||
"""Call the AI with prompt caching when using Anthropic.
|
||
|
||
Caching strategy:
|
||
- System prompt base: cached (stable across all turns)
|
||
- RAG context: NOT cached (changes per query)
|
||
- Conversation history prefix: cached via breakpoint on last
|
||
existing message (stable — only new user message is uncached)
|
||
|
||
Args:
|
||
images: Optional list of {"media_type": str, "data": str (base64)}
|
||
to include alongside the new_message as vision content.
|
||
"""
|
||
if settings.AI_PROVIDER == "anthropic" and settings.ANTHROPIC_API_KEY:
|
||
return await chat_call_cached(
|
||
system_base, rag_context, history, new_message, max_tokens,
|
||
images=images,
|
||
)
|
||
|
||
# Fallback: generic provider (Gemini, etc.) — images not supported
|
||
from app.core.ai_provider import get_ai_provider
|
||
|
||
system_prompt = system_base + rag_context
|
||
messages = history + [{"role": "user", "content": new_message}]
|
||
provider = get_ai_provider()
|
||
return await provider.generate_text(
|
||
system_prompt=system_prompt,
|
||
messages=messages,
|
||
max_tokens=max_tokens,
|
||
)
|
||
|
||
|
||
# Appended to every chat turn's user message immediately before generation.
|
||
# Invisible to storage (unified_chat_service strips markers before persisting),
|
||
# but critical for structured output compliance — the model emits invalid
|
||
# responses often enough without it that removing this reminder regresses UX.
|
||
_CHAT_FORMAT_REMINDER = (
|
||
"\n\n[SYSTEM: Remember — your response MUST end with [QUESTIONS] "
|
||
"and/or [ACTIONS] markers containing valid JSON arrays. "
|
||
"Responses without markers break the UI.]"
|
||
)
|
||
|
||
|
||
async def chat_call_cached(
|
||
system_base: str,
|
||
rag_context: str,
|
||
history: list[dict[str, Any]],
|
||
new_message: str,
|
||
max_tokens: int,
|
||
images: list[dict[str, Any]] | None = None,
|
||
) -> tuple[str, int, int]:
|
||
"""Call Anthropic's chat surface with caching, MCP, images, and retry-without-MCP.
|
||
|
||
This is the ONE MCP/beta/multimodal chat caller. It is deliberately NOT
|
||
routed through `AnthropicProvider`. See module docstring for rationale.
|
||
|
||
Responsibilities unique to this function (not in the provider):
|
||
- Anthropic beta endpoint (`client.beta.messages.create`)
|
||
- Microsoft Learn MCP connector wiring (optional via ENABLE_MCP_MICROSOFT_LEARN)
|
||
- Retry-without-MCP fallback when the MCP server misbehaves
|
||
- Multimodal image blocks in the user message
|
||
- Format-reminder append for structured-output compliance
|
||
- Telemetry (`mcp.turn`, `mcp.fallback`) for Phase 0.5 MCP usage signal
|
||
|
||
Cache plumbing is shared with the provider via helpers in `ai_provider`:
|
||
`_normalize_system_for_anthropic` (policy α — ephemeral on first block if
|
||
none specified), `build_anthropic_chat_messages` (history cache breakpoint +
|
||
multimodal user message + format reminder), `_log_anthropic_cache_usage`.
|
||
"""
|
||
import anthropic
|
||
|
||
client = _get_anthropic_client(
|
||
settings.ANTHROPIC_API_KEY,
|
||
timeout=settings.AI_REQUEST_TIMEOUT_SECONDS,
|
||
)
|
||
|
||
# System prompt as structured blocks. The static base is cacheable; the
|
||
# RAG context changes per query and must NOT be cached — so we mark the
|
||
# base explicitly and leave the RAG block unmarked. `_normalize_system`
|
||
# honors caller-authored cache_control verbatim (policy α).
|
||
system_blocks: list[dict[str, Any]] = [
|
||
{
|
||
"type": "text",
|
||
"text": system_base,
|
||
"cache_control": {"type": "ephemeral"},
|
||
# cacheable: static system prompt, stable across all turns of all sessions
|
||
},
|
||
]
|
||
if rag_context:
|
||
system_blocks.append(
|
||
{"type": "text", "text": rag_context}
|
||
# uncached: RAG retrieval varies per query
|
||
)
|
||
normalized_system = _normalize_system_for_anthropic(system_blocks)
|
||
|
||
messages = build_anthropic_chat_messages(
|
||
history=history,
|
||
new_message=new_message,
|
||
images=images,
|
||
format_reminder=_CHAT_FORMAT_REMINDER,
|
||
)
|
||
|
||
# MCP server config (optional — controlled by settings)
|
||
mcp_servers = anthropic.NOT_GIVEN
|
||
tools = anthropic.NOT_GIVEN
|
||
|
||
if settings.ENABLE_MCP_MICROSOFT_LEARN:
|
||
mcp_servers = [
|
||
{
|
||
"type": "url",
|
||
"url": "https://learn.microsoft.com/api/mcp",
|
||
"name": "microsoft-learn",
|
||
}
|
||
]
|
||
tools = [
|
||
{
|
||
"type": "mcp_toolset",
|
||
"mcp_server_name": "microsoft-learn",
|
||
}
|
||
]
|
||
|
||
_mcp_active = mcp_servers is not anthropic.NOT_GIVEN
|
||
_mcp_fallback_triggered = False
|
||
|
||
try:
|
||
response = await client.beta.messages.create(
|
||
model=settings.AI_MODEL_ANTHROPIC,
|
||
max_tokens=max_tokens,
|
||
system=normalized_system,
|
||
messages=messages,
|
||
mcp_servers=mcp_servers,
|
||
tools=tools,
|
||
betas=["mcp-client-2025-11-20"],
|
||
)
|
||
except Exception as e:
|
||
# MCP server failures surface as many error types — BadRequestError,
|
||
# APIStatusError, APIConnectionError, APITimeoutError. Always retry
|
||
# without MCP when MCP was active, so a flaky external server never
|
||
# blocks the assistant entirely.
|
||
_is_mcp_error = _mcp_active and (
|
||
"MCP server" in str(e)
|
||
or "mcp" in type(e).__name__.lower()
|
||
or isinstance(e, (anthropic.BadRequestError, anthropic.APIStatusError))
|
||
)
|
||
if _is_mcp_error:
|
||
_mcp_fallback_triggered = True
|
||
logger.warning(
|
||
"MCP server error (%s), retrying without MCP: %s",
|
||
type(e).__name__, e,
|
||
)
|
||
# Phase 0.5 telemetry: per-turn fallback event.
|
||
logger.info(
|
||
"mcp.fallback",
|
||
extra={
|
||
"event": "mcp.fallback",
|
||
"mcp_error_type": type(e).__name__,
|
||
"mcp_error_message": str(e)[:500],
|
||
},
|
||
)
|
||
response = await client.messages.create(
|
||
model=settings.AI_MODEL_ANTHROPIC,
|
||
max_tokens=max_tokens,
|
||
system=normalized_system,
|
||
messages=messages,
|
||
)
|
||
else:
|
||
raise
|
||
|
||
# Extract text from response — MCP responses can have multiple block
|
||
# types (text, mcp_tool_use, mcp_tool_result). We join all text blocks.
|
||
text_parts = []
|
||
mcp_tools_used = []
|
||
for block in response.content:
|
||
if hasattr(block, "text"):
|
||
text_parts.append(block.text)
|
||
if getattr(block, "type", None) == "mcp_tool_use":
|
||
mcp_tools_used.append(getattr(block, "name", "unknown"))
|
||
|
||
text = "\n".join(text_parts) if text_parts else ""
|
||
|
||
usage = response.usage
|
||
input_tokens = usage.input_tokens
|
||
output_tokens = usage.output_tokens
|
||
|
||
# Phase 0.5 telemetry: per-turn MCP event. Emitted for every turn that
|
||
# reached this code path (i.e., AI_PROVIDER=anthropic chat). `mcp_available`
|
||
# reflects whether MCP was actually wired into the request (scope (ii) from
|
||
# the Phase 0.5 design — Anthropic code path AND flag on). `mcp_invoked`
|
||
# reflects whether the model chose to call an MCP tool on this turn.
|
||
logger.info(
|
||
"mcp.turn",
|
||
extra={
|
||
"event": "mcp.turn",
|
||
"mcp_available": _mcp_active,
|
||
"mcp_invoked": bool(mcp_tools_used),
|
||
"mcp_tools": mcp_tools_used,
|
||
"mcp_fallback_triggered": _mcp_fallback_triggered,
|
||
},
|
||
)
|
||
|
||
# Human-readable log retained for grep-based inspection.
|
||
if mcp_tools_used:
|
||
logger.info("MCP tools used: %s", ", ".join(mcp_tools_used))
|
||
|
||
_log_anthropic_cache_usage(usage, settings.AI_MODEL_ANTHROPIC)
|
||
|
||
return text, input_tokens, output_tokens
|
||
|
||
|
||
def _auto_title(message: str) -> str:
|
||
"""Generate a short title from the first user message."""
|
||
title = message.strip()[:100]
|
||
if len(message) > 100:
|
||
title = title.rsplit(" ", 1)[0] + "..."
|
||
return title
|