feat(telemetry): add MCP per-turn structured-log telemetry (Phase 0.5)

Emits structured `mcp.turn` log events on every Anthropic-path chat turn, capturing whether MCP was wired in (mcp_available), whether the model actually invoked an MCP tool (mcp_invoked), which tool names fired, and whether the silent retry-without-MCP fallback was triggered. Adds a separate `mcp.fallback` event with error type/message for fallback occurrences. Establishes baseline data for deciding whether MCP investment is earning its keep before Phase 2+ expands the product footprint. Scope: the one MCP-using code path (`_call_anthropic_cached`) — not a general instrumentation layer. No new dependencies, no schema changes, no behavior change. Standard library `logging` is the sink; PostHog is not wired on the backend. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-17 15:57:13 +00:00
parent 46291f30b9
commit 0fbc1e0a57
1 changed files with 28 additions and 1 deletions
--- a/backend/app/services/assistant_chat_service.py
+++ b/backend/app/services/assistant_chat_service.py
@@ -304,6 +304,7 @@ async def _call_anthropic_cached(
        ]

    _mcp_active = mcp_servers is not anthropic.NOT_GIVEN
+    _mcp_fallback_triggered = False

    try:
        response = await client.beta.messages.create(
@@ -326,10 +327,20 @@ async def _call_anthropic_cached(
            or isinstance(e, (anthropic.BadRequestError, anthropic.APIStatusError))
        )
        if _is_mcp_error:
+            _mcp_fallback_triggered = True
            logger.warning(
                "MCP server error (%s), retrying without MCP: %s",
                type(e).__name__, e,
            )
+            # Phase 0.5 telemetry: per-turn fallback event.
+            logger.info(
+                "mcp.fallback",
+                extra={
+                    "event": "mcp.fallback",
+                    "mcp_error_type": type(e).__name__,
+                    "mcp_error_message": str(e)[:500],
+                },
+            )
            response = await client.messages.create(
                model=settings.AI_MODEL_ANTHROPIC,
                max_tokens=max_tokens,
@@ -355,7 +366,23 @@ async def _call_anthropic_cached(
    input_tokens = usage.input_tokens
    output_tokens = usage.output_tokens

-    # Log MCP tool usage
+    # Phase 0.5 telemetry: per-turn MCP event. Emitted for every turn that
+    # reached this code path (i.e., AI_PROVIDER=anthropic chat). `mcp_available`
+    # reflects whether MCP was actually wired into the request (scope (ii) from
+    # the Phase 0.5 design — Anthropic code path AND flag on). `mcp_invoked`
+    # reflects whether the model chose to call an MCP tool on this turn.
+    logger.info(
+        "mcp.turn",
+        extra={
+            "event": "mcp.turn",
+            "mcp_available": _mcp_active,
+            "mcp_invoked": bool(mcp_tools_used),
+            "mcp_tools": mcp_tools_used,
+            "mcp_fallback_triggered": _mcp_fallback_triggered,
+        },
+    )
+
+    # Human-readable log retained for grep-based inspection.
    if mcp_tools_used:
        logger.info("MCP tools used: %s", ", ".join(mcp_tools_used))