diff --git a/backend/app/core/ai_provider.py b/backend/app/core/ai_provider.py
index 4e38cceb..7453ec9c 100644
--- a/backend/app/core/ai_provider.py
+++ b/backend/app/core/ai_provider.py
@@ -165,6 +165,24 @@ class GeminiProvider(AIProvider):
         return text, input_tokens, output_tokens
 
 
+# Singleton client cache — avoids creating new HTTP connections per call
+_anthropic_clients: dict[str, "anthropic.AsyncAnthropic"] = {}
+
+
+def _get_anthropic_client(api_key: str, timeout: int = 45) -> "anthropic.AsyncAnthropic":
+    """Return a cached AsyncAnthropic client, creating one if needed."""
+    import anthropic
+
+    cache_key = f"{api_key[:8]}:{timeout}"
+    if cache_key not in _anthropic_clients:
+        _anthropic_clients[cache_key] = anthropic.AsyncAnthropic(
+            api_key=api_key,
+            timeout=timeout,
+            max_retries=1,
+        )
+    return _anthropic_clients[cache_key]
+
+
 class AnthropicProvider(AIProvider):
     """Anthropic Claude provider using the anthropic SDK."""
 
@@ -179,13 +197,7 @@ class AnthropicProvider(AIProvider):
         messages: list[dict[str, str]],
         max_tokens: int = 4096,
     ) -> tuple[str, int, int]:
-        import anthropic
-
-        client = anthropic.AsyncAnthropic(
-            api_key=self._api_key,
-            timeout=self._timeout,
-            max_retries=1,
-        )
+        client = _get_anthropic_client(self._api_key, self._timeout)
 
         response = await client.messages.create(
             model=self._model,