perf: singleton AsyncAnthropic client to avoid per-call connection setup

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 23:02:07 +00:00
parent af2a41830c
commit c6772c6607
1 changed files with 19 additions and 7 deletions
--- a/backend/app/core/ai_provider.py
+++ b/backend/app/core/ai_provider.py
@@ -165,6 +165,24 @@ class GeminiProvider(AIProvider):
        return text, input_tokens, output_tokens
 # Singleton client cache — avoids creating new HTTP connections per call
 _anthropic_clients: dict[str, "anthropic.AsyncAnthropic"] = {}
 def _get_anthropic_client(api_key: str, timeout: int = 45) -> "anthropic.AsyncAnthropic":
    """Return a cached AsyncAnthropic client, creating one if needed."""
    import anthropic
    cache_key = f"{api_key[:8]}:{timeout}"
    if cache_key not in _anthropic_clients:
        _anthropic_clients[cache_key] = anthropic.AsyncAnthropic(
            api_key=api_key,
            timeout=timeout,
            max_retries=1,
        )
    return _anthropic_clients[cache_key]
 class AnthropicProvider(AIProvider):
    """Anthropic Claude provider using the anthropic SDK."""
@@ -179,13 +197,7 @@ class AnthropicProvider(AIProvider):
        messages: list[dict[str, str]],
        max_tokens: int = 4096,
    ) -> tuple[str, int, int]:
-        import anthropic
+        client = _get_anthropic_client(self._api_key, self._timeout)
        client = anthropic.AsyncAnthropic(
            api_key=self._api_key,
            timeout=self._timeout,
            max_retries=1,
        )
        response = await client.messages.create(
            model=self._model,