From 0fbc1e0a57a50d8ff3e93bb4271c54b4834f348d Mon Sep 17 00:00:00 2001 From: Michael Chihlas Date: Fri, 17 Apr 2026 15:57:13 +0000 Subject: [PATCH] feat(telemetry): add MCP per-turn structured-log telemetry (Phase 0.5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Emits structured `mcp.turn` log events on every Anthropic-path chat turn, capturing whether MCP was wired in (mcp_available), whether the model actually invoked an MCP tool (mcp_invoked), which tool names fired, and whether the silent retry-without-MCP fallback was triggered. Adds a separate `mcp.fallback` event with error type/message for fallback occurrences. Establishes baseline data for deciding whether MCP investment is earning its keep before Phase 2+ expands the product footprint. Scope: the one MCP-using code path (`_call_anthropic_cached`) — not a general instrumentation layer. No new dependencies, no schema changes, no behavior change. Standard library `logging` is the sink; PostHog is not wired on the backend. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../app/services/assistant_chat_service.py | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/backend/app/services/assistant_chat_service.py b/backend/app/services/assistant_chat_service.py index 184fd744..84190e55 100644 --- a/backend/app/services/assistant_chat_service.py +++ b/backend/app/services/assistant_chat_service.py @@ -304,6 +304,7 @@ async def _call_anthropic_cached( ] _mcp_active = mcp_servers is not anthropic.NOT_GIVEN + _mcp_fallback_triggered = False try: response = await client.beta.messages.create( @@ -326,10 +327,20 @@ async def _call_anthropic_cached( or isinstance(e, (anthropic.BadRequestError, anthropic.APIStatusError)) ) if _is_mcp_error: + _mcp_fallback_triggered = True logger.warning( "MCP server error (%s), retrying without MCP: %s", type(e).__name__, e, ) + # Phase 0.5 telemetry: per-turn fallback event. + logger.info( + "mcp.fallback", + extra={ + "event": "mcp.fallback", + "mcp_error_type": type(e).__name__, + "mcp_error_message": str(e)[:500], + }, + ) response = await client.messages.create( model=settings.AI_MODEL_ANTHROPIC, max_tokens=max_tokens, @@ -355,7 +366,23 @@ async def _call_anthropic_cached( input_tokens = usage.input_tokens output_tokens = usage.output_tokens - # Log MCP tool usage + # Phase 0.5 telemetry: per-turn MCP event. Emitted for every turn that + # reached this code path (i.e., AI_PROVIDER=anthropic chat). `mcp_available` + # reflects whether MCP was actually wired into the request (scope (ii) from + # the Phase 0.5 design — Anthropic code path AND flag on). `mcp_invoked` + # reflects whether the model chose to call an MCP tool on this turn. + logger.info( + "mcp.turn", + extra={ + "event": "mcp.turn", + "mcp_available": _mcp_active, + "mcp_invoked": bool(mcp_tools_used), + "mcp_tools": mcp_tools_used, + "mcp_fallback_triggered": _mcp_fallback_triggered, + }, + ) + + # Human-readable log retained for grep-based inspection. if mcp_tools_used: logger.info("MCP tools used: %s", ", ".join(mcp_tools_used))