resolutionflow/backend/tests/test_kb_conversion_schema.py

"""Tests for the structured-output JSON schemas used by KB conversion.

These validate that the schemas are well-formed against the Anthropic
structured-output limits (every object carries additionalProperties: false,
`required` is a subset of declared properties, no numeric/length constraints)
and that the target_type -> schema selector returns the right shape. They do
NOT exercise the live API — constrained decoding must be smoke-tested against
a real model before AI_KB_CONVERT_STRUCTURED_OUTPUT is enabled in production.
"""

from app.core.kb_conversion_service import (
    PROCEDURAL_SCHEMA,
    TROUBLESHOOTING_SCHEMA,
    _schema_for_target_type,
)

# Constraints disallowed by Anthropic structured outputs (must be absent so the
# API does not reject the schema or silently strip them).
_DISALLOWED_KEYS = {
    "minimum",
    "maximum",
    "multipleOf",
    "minLength",
    "maxLength",
    "minItems",
    "maxItems",
}


def _assert_well_formed(schema: dict) -> None:
    """Recursively assert a JSON schema obeys the structured-output limits."""
    if schema.get("type") == "object":
        assert schema.get("additionalProperties") is False, (
            f"object schema missing additionalProperties: false: {schema}"
        )
        props = schema.get("properties", {})
        required = set(schema.get("required", []))
        assert required <= set(props), (
            f"required keys not all declared as properties: {required - set(props)}"
        )
        for sub in props.values():
            _assert_well_formed(sub)
    elif schema.get("type") == "array":
        _assert_well_formed(schema["items"])

    assert not (_DISALLOWED_KEYS & set(schema)), (
        f"schema uses unsupported constraint(s): {_DISALLOWED_KEYS & set(schema)}"
    )


class TestStructuredOutputSchemas:
    def test_troubleshooting_schema_is_well_formed(self):
        _assert_well_formed(TROUBLESHOOTING_SCHEMA)

    def test_procedural_schema_is_well_formed(self):
        _assert_well_formed(PROCEDURAL_SCHEMA)

    def test_troubleshooting_schema_top_level_shape(self):
        props = TROUBLESHOOTING_SCHEMA["properties"]
        assert set(props) >= {"title", "description", "nodes"}
        node = props["nodes"]["items"]
        # Every field the troubleshooting prompt may emit must be modelled,
        # else additionalProperties: false makes them impossible to produce.
        assert set(node["properties"]) >= {
            "id",
            "type",
            "question",
            "options",
            "next_node_id",
            "confidence",
            "source_excerpt",
        }

    def test_procedural_schema_top_level_shape(self):
        props = PROCEDURAL_SCHEMA["properties"]
        assert set(props) >= {"title", "description", "steps", "intake_form"}
        step = props["steps"]["items"]
        assert set(step["properties"]) >= {
            "id",
            "type",
            "content",
            "confidence",
            "source_excerpt",
        }
        intake = props["intake_form"]["items"]
        assert set(intake["properties"]) >= {
            "variable_name",
            "label",
            "field_type",
            "required",
            "display_order",
        }


class TestSchemaSelector:
    def test_returns_troubleshooting_schema(self):
        assert _schema_for_target_type("troubleshooting") is TROUBLESHOOTING_SCHEMA

    def test_returns_procedural_schema_for_procedural(self):
        assert _schema_for_target_type("procedural") is PROCEDURAL_SCHEMA

    def test_defaults_to_procedural_for_unknown(self):
        # convert_document treats any non-"troubleshooting" target as procedural.
        assert _schema_for_target_type("something-else") is PROCEDURAL_SCHEMA