"""Tests for the structured-output JSON schemas used by KB conversion. These validate that the schemas are well-formed against the Anthropic structured-output limits (every object carries additionalProperties: false, `required` is a subset of declared properties, no numeric/length constraints) and that the target_type -> schema selector returns the right shape. They do NOT exercise the live API — constrained decoding must be smoke-tested against a real model before AI_KB_CONVERT_STRUCTURED_OUTPUT is enabled in production. """ from app.core.kb_conversion_service import ( PROCEDURAL_SCHEMA, TROUBLESHOOTING_SCHEMA, _schema_for_target_type, ) # Constraints disallowed by Anthropic structured outputs (must be absent so the # API does not reject the schema or silently strip them). _DISALLOWED_KEYS = { "minimum", "maximum", "multipleOf", "minLength", "maxLength", "minItems", "maxItems", } def _assert_well_formed(schema: dict) -> None: """Recursively assert a JSON schema obeys the structured-output limits.""" if schema.get("type") == "object": assert schema.get("additionalProperties") is False, ( f"object schema missing additionalProperties: false: {schema}" ) props = schema.get("properties", {}) required = set(schema.get("required", [])) assert required <= set(props), ( f"required keys not all declared as properties: {required - set(props)}" ) for sub in props.values(): _assert_well_formed(sub) elif schema.get("type") == "array": _assert_well_formed(schema["items"]) assert not (_DISALLOWED_KEYS & set(schema)), ( f"schema uses unsupported constraint(s): {_DISALLOWED_KEYS & set(schema)}" ) class TestStructuredOutputSchemas: def test_troubleshooting_schema_is_well_formed(self): _assert_well_formed(TROUBLESHOOTING_SCHEMA) def test_procedural_schema_is_well_formed(self): _assert_well_formed(PROCEDURAL_SCHEMA) def test_troubleshooting_schema_top_level_shape(self): props = TROUBLESHOOTING_SCHEMA["properties"] assert set(props) >= {"title", "description", "nodes"} node = props["nodes"]["items"] # Every field the troubleshooting prompt may emit must be modelled, # else additionalProperties: false makes them impossible to produce. assert set(node["properties"]) >= { "id", "type", "question", "options", "next_node_id", "confidence", "source_excerpt", } def test_procedural_schema_top_level_shape(self): props = PROCEDURAL_SCHEMA["properties"] assert set(props) >= {"title", "description", "steps", "intake_form"} step = props["steps"]["items"] assert set(step["properties"]) >= { "id", "type", "content", "confidence", "source_excerpt", } intake = props["intake_form"]["items"] assert set(intake["properties"]) >= { "variable_name", "label", "field_type", "required", "display_order", } class TestSchemaSelector: def test_returns_troubleshooting_schema(self): assert _schema_for_target_type("troubleshooting") is TROUBLESHOOTING_SCHEMA def test_returns_procedural_schema_for_procedural(self): assert _schema_for_target_type("procedural") is PROCEDURAL_SCHEMA def test_defaults_to_procedural_for_unknown(self): # convert_document treats any non-"troubleshooting" target as procedural. assert _schema_for_target_type("something-else") is PROCEDURAL_SCHEMA