From c6772c6607fb384cfeee2d52639be846c0eec241 Mon Sep 17 00:00:00 2001 From: chihlasm Date: Sat, 28 Mar 2026 23:02:07 +0000 Subject: [PATCH] perf: singleton AsyncAnthropic client to avoid per-call connection setup Co-Authored-By: Claude Opus 4.6 (1M context) --- backend/app/core/ai_provider.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/backend/app/core/ai_provider.py b/backend/app/core/ai_provider.py index 4e38cceb..7453ec9c 100644 --- a/backend/app/core/ai_provider.py +++ b/backend/app/core/ai_provider.py @@ -165,6 +165,24 @@ class GeminiProvider(AIProvider): return text, input_tokens, output_tokens +# Singleton client cache — avoids creating new HTTP connections per call +_anthropic_clients: dict[str, "anthropic.AsyncAnthropic"] = {} + + +def _get_anthropic_client(api_key: str, timeout: int = 45) -> "anthropic.AsyncAnthropic": + """Return a cached AsyncAnthropic client, creating one if needed.""" + import anthropic + + cache_key = f"{api_key[:8]}:{timeout}" + if cache_key not in _anthropic_clients: + _anthropic_clients[cache_key] = anthropic.AsyncAnthropic( + api_key=api_key, + timeout=timeout, + max_retries=1, + ) + return _anthropic_clients[cache_key] + + class AnthropicProvider(AIProvider): """Anthropic Claude provider using the anthropic SDK.""" @@ -179,13 +197,7 @@ class AnthropicProvider(AIProvider): messages: list[dict[str, str]], max_tokens: int = 4096, ) -> tuple[str, int, int]: - import anthropic - - client = anthropic.AsyncAnthropic( - api_key=self._api_key, - timeout=self._timeout, - max_retries=1, - ) + client = _get_anthropic_client(self._api_key, self._timeout) response = await client.messages.create( model=self._model,