diff --git a/backend/app/core/ai_provider.py b/backend/app/core/ai_provider.py index 4e38cceb..7453ec9c 100644 --- a/backend/app/core/ai_provider.py +++ b/backend/app/core/ai_provider.py @@ -165,6 +165,24 @@ class GeminiProvider(AIProvider): return text, input_tokens, output_tokens +# Singleton client cache — avoids creating new HTTP connections per call +_anthropic_clients: dict[str, "anthropic.AsyncAnthropic"] = {} + + +def _get_anthropic_client(api_key: str, timeout: int = 45) -> "anthropic.AsyncAnthropic": + """Return a cached AsyncAnthropic client, creating one if needed.""" + import anthropic + + cache_key = f"{api_key[:8]}:{timeout}" + if cache_key not in _anthropic_clients: + _anthropic_clients[cache_key] = anthropic.AsyncAnthropic( + api_key=api_key, + timeout=timeout, + max_retries=1, + ) + return _anthropic_clients[cache_key] + + class AnthropicProvider(AIProvider): """Anthropic Claude provider using the anthropic SDK.""" @@ -179,13 +197,7 @@ class AnthropicProvider(AIProvider): messages: list[dict[str, str]], max_tokens: int = 4096, ) -> tuple[str, int, int]: - import anthropic - - client = anthropic.AsyncAnthropic( - api_key=self._api_key, - timeout=self._timeout, - max_retries=1, - ) + client = _get_anthropic_client(self._api_key, self._timeout) response = await client.messages.create( model=self._model,