perf: singleton AsyncAnthropic client to avoid per-call connection setup

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
chihlasm
2026-03-28 23:02:07 +00:00
parent af2a41830c
commit c6772c6607

View File

@@ -165,6 +165,24 @@ class GeminiProvider(AIProvider):
return text, input_tokens, output_tokens
# Singleton client cache — avoids creating new HTTP connections per call
_anthropic_clients: dict[str, "anthropic.AsyncAnthropic"] = {}
def _get_anthropic_client(api_key: str, timeout: int = 45) -> "anthropic.AsyncAnthropic":
"""Return a cached AsyncAnthropic client, creating one if needed."""
import anthropic
cache_key = f"{api_key[:8]}:{timeout}"
if cache_key not in _anthropic_clients:
_anthropic_clients[cache_key] = anthropic.AsyncAnthropic(
api_key=api_key,
timeout=timeout,
max_retries=1,
)
return _anthropic_clients[cache_key]
class AnthropicProvider(AIProvider):
"""Anthropic Claude provider using the anthropic SDK."""
@@ -179,13 +197,7 @@ class AnthropicProvider(AIProvider):
messages: list[dict[str, str]],
max_tokens: int = 4096,
) -> tuple[str, int, int]:
import anthropic
client = anthropic.AsyncAnthropic(
api_key=self._api_key,
timeout=self._timeout,
max_retries=1,
)
client = _get_anthropic_client(self._api_key, self._timeout)
response = await client.messages.create(
model=self._model,