Merge branch 'main' into feat/l1-workspace
# Conflicts: # frontend/src/router.tsx
This commit is contained in:
@@ -96,7 +96,8 @@ class TestAnthropicProvider:
|
||||
)
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = [MagicMock(text='{"result": "ok"}')]
|
||||
mock_response.content = [MagicMock(type="text", text='{"result": "ok"}')]
|
||||
mock_response.stop_reason = "end_turn"
|
||||
mock_response.usage = MagicMock(input_tokens=100, output_tokens=50)
|
||||
|
||||
mock_client = AsyncMock()
|
||||
@@ -120,6 +121,170 @@ class TestAnthropicProvider:
|
||||
messages=[{"role": "user", "content": "Hello"}],
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_generate_json_skips_non_text_blocks(self):
|
||||
"""A leading non-text block (e.g. thinking) is skipped; the first
|
||||
text block's text is returned instead of content[0].text."""
|
||||
from app.core import ai_provider
|
||||
|
||||
ai_provider._anthropic_clients.clear()
|
||||
|
||||
provider = AnthropicProvider(
|
||||
api_key="skip-key", model="claude-sonnet-4-6", timeout=31
|
||||
)
|
||||
|
||||
thinking_block = MagicMock(type="thinking", thinking="hmm...")
|
||||
text_block = MagicMock(type="text", text='{"ok": 1}')
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = [thinking_block, text_block]
|
||||
mock_response.stop_reason = "end_turn"
|
||||
mock_response.usage = MagicMock(input_tokens=10, output_tokens=5)
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.messages.create = AsyncMock(return_value=mock_response)
|
||||
|
||||
with patch("anthropic.AsyncAnthropic", return_value=mock_client):
|
||||
text, _, _ = await provider.generate_json(
|
||||
system_prompt="You are a helper.",
|
||||
messages=[{"role": "user", "content": "Hi"}],
|
||||
)
|
||||
|
||||
assert text == '{"ok": 1}'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_generate_json_raises_when_no_text_block(self):
|
||||
"""A response with no text block (e.g. a bare refusal) raises a clear
|
||||
error instead of returning a non-text block's attributes."""
|
||||
from app.core import ai_provider
|
||||
|
||||
ai_provider._anthropic_clients.clear()
|
||||
|
||||
provider = AnthropicProvider(
|
||||
api_key="empty-key", model="claude-sonnet-4-6", timeout=32
|
||||
)
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = [MagicMock(type="thinking", thinking="...")]
|
||||
mock_response.stop_reason = "refusal"
|
||||
mock_response.usage = MagicMock(input_tokens=10, output_tokens=0)
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.messages.create = AsyncMock(return_value=mock_response)
|
||||
|
||||
with patch("anthropic.AsyncAnthropic", return_value=mock_client):
|
||||
with pytest.raises(ValueError, match="no text block"):
|
||||
await provider.generate_json(
|
||||
system_prompt="You are a helper.",
|
||||
messages=[{"role": "user", "content": "Hi"}],
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_generate_json_logs_warning_on_truncation(self, caplog):
|
||||
"""When stop_reason is max_tokens, a warning is logged (truncation
|
||||
signal) and the partial text is still returned."""
|
||||
import logging
|
||||
|
||||
from app.core import ai_provider
|
||||
|
||||
ai_provider._anthropic_clients.clear()
|
||||
|
||||
provider = AnthropicProvider(
|
||||
api_key="trunc-key", model="claude-sonnet-4-6", timeout=33
|
||||
)
|
||||
|
||||
text_block = MagicMock(type="text", text='{"partial": tr')
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = [text_block]
|
||||
mock_response.stop_reason = "max_tokens"
|
||||
mock_response.usage = MagicMock(input_tokens=10, output_tokens=4096)
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.messages.create = AsyncMock(return_value=mock_response)
|
||||
|
||||
with patch("anthropic.AsyncAnthropic", return_value=mock_client):
|
||||
with caplog.at_level(logging.WARNING, logger="app.core.ai_provider"):
|
||||
text, _, _ = await provider.generate_json(
|
||||
system_prompt="You are a helper.",
|
||||
messages=[{"role": "user", "content": "Hi"}],
|
||||
)
|
||||
|
||||
assert text == '{"partial": tr'
|
||||
truncation_records = [
|
||||
r for r in caplog.records if getattr(r, "stop_reason", None) == "max_tokens"
|
||||
]
|
||||
assert truncation_records, "expected a warning record for max_tokens truncation"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_generate_json_passes_output_config_when_schema_given(self):
|
||||
"""When a JSON schema is supplied, it is forwarded as
|
||||
output_config.format so the API constrains the response shape."""
|
||||
from app.core import ai_provider
|
||||
|
||||
ai_provider._anthropic_clients.clear()
|
||||
|
||||
provider = AnthropicProvider(
|
||||
api_key="schema-key", model="claude-sonnet-4-6", timeout=34
|
||||
)
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = [MagicMock(type="text", text='{"title": "x"}')]
|
||||
mock_response.stop_reason = "end_turn"
|
||||
mock_response.usage = MagicMock(input_tokens=10, output_tokens=5)
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.messages.create = AsyncMock(return_value=mock_response)
|
||||
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {"title": {"type": "string"}},
|
||||
"required": ["title"],
|
||||
"additionalProperties": False,
|
||||
}
|
||||
|
||||
with patch("anthropic.AsyncAnthropic", return_value=mock_client):
|
||||
await provider.generate_json(
|
||||
system_prompt="You are a helper.",
|
||||
messages=[{"role": "user", "content": "Hi"}],
|
||||
max_tokens=512,
|
||||
schema=schema,
|
||||
)
|
||||
|
||||
mock_client.messages.create.assert_called_once_with(
|
||||
model="claude-sonnet-4-6",
|
||||
max_tokens=512,
|
||||
system="You are a helper.",
|
||||
messages=[{"role": "user", "content": "Hi"}],
|
||||
output_config={"format": {"type": "json_schema", "schema": schema}},
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_generate_json_no_output_config_when_schema_none(self):
|
||||
"""With no schema, output_config is not sent (backward compatible)."""
|
||||
from app.core import ai_provider
|
||||
|
||||
ai_provider._anthropic_clients.clear()
|
||||
|
||||
provider = AnthropicProvider(
|
||||
api_key="noschema-key", model="claude-sonnet-4-6", timeout=35
|
||||
)
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = [MagicMock(type="text", text="{}")]
|
||||
mock_response.stop_reason = "end_turn"
|
||||
mock_response.usage = MagicMock(input_tokens=1, output_tokens=1)
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.messages.create = AsyncMock(return_value=mock_response)
|
||||
|
||||
with patch("anthropic.AsyncAnthropic", return_value=mock_client):
|
||||
await provider.generate_json(
|
||||
system_prompt="You are a helper.",
|
||||
messages=[{"role": "user", "content": "Hi"}],
|
||||
)
|
||||
|
||||
_, call_kwargs = mock_client.messages.create.call_args
|
||||
assert "output_config" not in call_kwargs
|
||||
|
||||
|
||||
class TestGeminiProvider:
|
||||
"""Tests for GeminiProvider.generate_json."""
|
||||
@@ -174,6 +339,48 @@ class TestGeminiProvider:
|
||||
|
||||
mock_client.aio.models.generate_content.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_generate_json_accepts_and_ignores_schema(self):
|
||||
"""Gemini accepts the schema kwarg (interface parity) and still
|
||||
returns JSON; it does not error on the param."""
|
||||
provider = GeminiProvider(api_key="test-key", model="gemini-2.5-flash")
|
||||
|
||||
mock_usage = MagicMock()
|
||||
mock_usage.prompt_token_count = 5
|
||||
mock_usage.candidates_token_count = 3
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.text = '{"answer": 1}'
|
||||
mock_response.usage_metadata = mock_usage
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.aio.models.generate_content = AsyncMock(return_value=mock_response)
|
||||
|
||||
mock_genai_module = MagicMock()
|
||||
mock_genai_module.Client.return_value = mock_client
|
||||
|
||||
mock_types = MagicMock()
|
||||
mock_types.Content.side_effect = lambda **kw: kw
|
||||
mock_types.Part.side_effect = lambda **kw: kw
|
||||
mock_types.GenerateContentConfig.side_effect = lambda **kw: kw
|
||||
|
||||
mock_google = MagicMock()
|
||||
mock_google.genai = mock_genai_module
|
||||
mock_genai_module.types = mock_types
|
||||
|
||||
with patch.dict(sys.modules, {
|
||||
"google": mock_google,
|
||||
"google.genai": mock_genai_module,
|
||||
"google.genai.types": mock_types,
|
||||
}):
|
||||
text, _, _ = await provider.generate_json(
|
||||
system_prompt="Generate JSON.",
|
||||
messages=[{"role": "user", "content": "data"}],
|
||||
schema={"type": "object"},
|
||||
)
|
||||
|
||||
assert text == '{"answer": 1}'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_generate_json_handles_none_usage(self):
|
||||
"""Token counts default to 0 when usage_metadata attributes are None."""
|
||||
|
||||
104
backend/tests/test_kb_conversion_schema.py
Normal file
104
backend/tests/test_kb_conversion_schema.py
Normal file
@@ -0,0 +1,104 @@
|
||||
"""Tests for the structured-output JSON schemas used by KB conversion.
|
||||
|
||||
These validate that the schemas are well-formed against the Anthropic
|
||||
structured-output limits (every object carries additionalProperties: false,
|
||||
`required` is a subset of declared properties, no numeric/length constraints)
|
||||
and that the target_type -> schema selector returns the right shape. They do
|
||||
NOT exercise the live API — constrained decoding must be smoke-tested against
|
||||
a real model before AI_KB_CONVERT_STRUCTURED_OUTPUT is enabled in production.
|
||||
"""
|
||||
|
||||
from app.core.kb_conversion_service import (
|
||||
PROCEDURAL_SCHEMA,
|
||||
TROUBLESHOOTING_SCHEMA,
|
||||
_schema_for_target_type,
|
||||
)
|
||||
|
||||
# Constraints disallowed by Anthropic structured outputs (must be absent so the
|
||||
# API does not reject the schema or silently strip them).
|
||||
_DISALLOWED_KEYS = {
|
||||
"minimum",
|
||||
"maximum",
|
||||
"multipleOf",
|
||||
"minLength",
|
||||
"maxLength",
|
||||
"minItems",
|
||||
"maxItems",
|
||||
}
|
||||
|
||||
|
||||
def _assert_well_formed(schema: dict) -> None:
|
||||
"""Recursively assert a JSON schema obeys the structured-output limits."""
|
||||
if schema.get("type") == "object":
|
||||
assert schema.get("additionalProperties") is False, (
|
||||
f"object schema missing additionalProperties: false: {schema}"
|
||||
)
|
||||
props = schema.get("properties", {})
|
||||
required = set(schema.get("required", []))
|
||||
assert required <= set(props), (
|
||||
f"required keys not all declared as properties: {required - set(props)}"
|
||||
)
|
||||
for sub in props.values():
|
||||
_assert_well_formed(sub)
|
||||
elif schema.get("type") == "array":
|
||||
_assert_well_formed(schema["items"])
|
||||
|
||||
assert not (_DISALLOWED_KEYS & set(schema)), (
|
||||
f"schema uses unsupported constraint(s): {_DISALLOWED_KEYS & set(schema)}"
|
||||
)
|
||||
|
||||
|
||||
class TestStructuredOutputSchemas:
|
||||
def test_troubleshooting_schema_is_well_formed(self):
|
||||
_assert_well_formed(TROUBLESHOOTING_SCHEMA)
|
||||
|
||||
def test_procedural_schema_is_well_formed(self):
|
||||
_assert_well_formed(PROCEDURAL_SCHEMA)
|
||||
|
||||
def test_troubleshooting_schema_top_level_shape(self):
|
||||
props = TROUBLESHOOTING_SCHEMA["properties"]
|
||||
assert set(props) >= {"title", "description", "nodes"}
|
||||
node = props["nodes"]["items"]
|
||||
# Every field the troubleshooting prompt may emit must be modelled,
|
||||
# else additionalProperties: false makes them impossible to produce.
|
||||
assert set(node["properties"]) >= {
|
||||
"id",
|
||||
"type",
|
||||
"question",
|
||||
"options",
|
||||
"next_node_id",
|
||||
"confidence",
|
||||
"source_excerpt",
|
||||
}
|
||||
|
||||
def test_procedural_schema_top_level_shape(self):
|
||||
props = PROCEDURAL_SCHEMA["properties"]
|
||||
assert set(props) >= {"title", "description", "steps", "intake_form"}
|
||||
step = props["steps"]["items"]
|
||||
assert set(step["properties"]) >= {
|
||||
"id",
|
||||
"type",
|
||||
"content",
|
||||
"confidence",
|
||||
"source_excerpt",
|
||||
}
|
||||
intake = props["intake_form"]["items"]
|
||||
assert set(intake["properties"]) >= {
|
||||
"variable_name",
|
||||
"label",
|
||||
"field_type",
|
||||
"required",
|
||||
"display_order",
|
||||
}
|
||||
|
||||
|
||||
class TestSchemaSelector:
|
||||
def test_returns_troubleshooting_schema(self):
|
||||
assert _schema_for_target_type("troubleshooting") is TROUBLESHOOTING_SCHEMA
|
||||
|
||||
def test_returns_procedural_schema_for_procedural(self):
|
||||
assert _schema_for_target_type("procedural") is PROCEDURAL_SCHEMA
|
||||
|
||||
def test_defaults_to_procedural_for_unknown(self):
|
||||
# convert_document treats any non-"troubleshooting" target as procedural.
|
||||
assert _schema_for_target_type("something-else") is PROCEDURAL_SCHEMA
|
||||
Reference in New Issue
Block a user