Files
resolutionflow/backend/tests/test_fix_outcome_endpoint.py
Michael Chihlas 362c7b1d79 fix(pilot): outcome-aware Resolve/Escalate previews
Issue #1 from phase-8-review-issues.md. Cache invalidation alone isn't
enough — previews were also omitting outcome fields from the LLM bundle,
so a fresh regenerate still couldn't distinguish proposed / failed /
partial / success.

- PATCH /outcome now bumps ai_sessions.state_version (matches
  record_decision's existing pattern).
- Resolution-note + escalation-package bundles now include status,
  applied_at, verified_at, partial_notes, failure_reason on the active fix.
- Generator prompts prescribe outcome-aware phrasing (closure language
  for success; what-we've-tried + next-steps for failed/partial).
- New end-to-end test asserts the regenerated preview reflects the
  recorded outcome, not just that the cache key changed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 22:04:56 -04:00

330 lines
12 KiB
Python

"""Integration tests for PATCH /ai-sessions/{sid}/suggested-fixes/{fid}/outcome.
Fixture style follows test_session_suggested_fixes_api.py:
client, test_user, auth_headers, test_db
"""
from __future__ import annotations
from unittest.mock import AsyncMock, call, patch
import pytest
from httpx import AsyncClient
from sqlalchemy import select
from app.api.endpoints.session_suggested_fixes import _clear_preview_cache_for_tests
from app.models.ai_session import AISession
from app.models.session_suggested_fix import SessionSuggestedFix
@pytest.fixture(autouse=True)
def _isolate_preview_cache():
_clear_preview_cache_for_tests()
yield
_clear_preview_cache_for_tests()
# ── shared helper ────────────────────────────────────────────────────────────
async def _make_session_with_fix(test_db, user) -> tuple[str, str]:
"""Create an AISession + active proposed SessionSuggestedFix.
Returns (session_id_str, fix_id_str).
"""
session = AISession(
user_id=user["user_data"]["id"],
account_id=user["user_data"]["account_id"],
session_type="chat",
intake_type="free_text",
intake_content={"text": "outcome test"},
status="active",
confidence_tier="discovery",
conversation_messages=[],
)
test_db.add(session)
await test_db.flush()
fix = SessionSuggestedFix(
session_id=session.id,
account_id=session.account_id,
title="Reset credential cache",
description="Clear stale credentials from the domain cache.",
confidence_pct=82,
)
test_db.add(fix)
await test_db.commit()
await test_db.refresh(fix)
return str(session.id), str(fix.id)
# ── tests ────────────────────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_patch_outcome_marks_success(
client: AsyncClient, test_user, auth_headers, test_db
):
session_id, fix_id = await _make_session_with_fix(test_db, test_user)
r = await client.patch(
f"/api/v1/ai-sessions/{session_id}/suggested-fixes/{fix_id}/outcome",
headers=auth_headers,
json={"outcome": "applied_success"},
)
assert r.status_code == 200, r.text
body = r.json()
assert body["status"] == "applied_success"
assert body["verified_at"] is not None
@pytest.mark.asyncio
async def test_patch_outcome_partial_requires_notes(
client: AsyncClient, test_user, auth_headers, test_db
):
session_id, fix_id = await _make_session_with_fix(test_db, test_user)
r = await client.patch(
f"/api/v1/ai-sessions/{session_id}/suggested-fixes/{fix_id}/outcome",
headers=auth_headers,
json={"outcome": "applied_partial"},
)
assert r.status_code == 400
assert "notes" in r.text.lower()
@pytest.mark.asyncio
async def test_partial_to_success_allowed(
client: AsyncClient, test_user, auth_headers, test_db
):
session_id, fix_id = await _make_session_with_fix(test_db, test_user)
r1 = await client.patch(
f"/api/v1/ai-sessions/{session_id}/suggested-fixes/{fix_id}/outcome",
headers=auth_headers,
json={"outcome": "applied_partial", "notes": "ran cred clear only"},
)
assert r1.status_code == 200, r1.text
r2 = await client.patch(
f"/api/v1/ai-sessions/{session_id}/suggested-fixes/{fix_id}/outcome",
headers=auth_headers,
json={"outcome": "applied_success"},
)
assert r2.status_code == 200
assert r2.json()["status"] == "applied_success"
@pytest.mark.asyncio
async def test_terminal_outcome_is_locked(
client: AsyncClient, test_user, auth_headers, test_db
):
session_id, fix_id = await _make_session_with_fix(test_db, test_user)
r1 = await client.patch(
f"/api/v1/ai-sessions/{session_id}/suggested-fixes/{fix_id}/outcome",
headers=auth_headers,
json={"outcome": "applied_failed", "notes": "no change"},
)
assert r1.status_code == 200
r2 = await client.patch(
f"/api/v1/ai-sessions/{session_id}/suggested-fixes/{fix_id}/outcome",
headers=auth_headers,
json={"outcome": "applied_success"},
)
assert r2.status_code == 409
@pytest.mark.asyncio
async def test_partial_notes_can_be_updated(
client: AsyncClient, test_user, auth_headers, test_db
):
"""partial→partial with new notes updates the stored notes."""
session_id, fix_id = await _make_session_with_fix(test_db, test_user)
r1 = await client.patch(
f"/api/v1/ai-sessions/{session_id}/suggested-fixes/{fix_id}/outcome",
json={"outcome": "applied_partial", "notes": "ran cred clear only"},
headers=auth_headers,
)
assert r1.status_code == 200
assert r1.json()["partial_notes"] == "ran cred clear only"
r2 = await client.patch(
f"/api/v1/ai-sessions/{session_id}/suggested-fixes/{fix_id}/outcome",
json={"outcome": "applied_partial", "notes": "also finished the rebuild; not verified yet"},
headers=auth_headers,
)
assert r2.status_code == 200
assert r2.json()["partial_notes"] == "also finished the rebuild; not verified yet"
@pytest.mark.asyncio
async def test_dismissed_sets_no_timestamps(
client: AsyncClient, test_user, auth_headers, test_db
):
"""dismissed outcome does not stamp applied_at or verified_at."""
session_id, fix_id = await _make_session_with_fix(test_db, test_user)
r = await client.patch(
f"/api/v1/ai-sessions/{session_id}/suggested-fixes/{fix_id}/outcome",
json={"outcome": "dismissed"},
headers=auth_headers,
)
assert r.status_code == 200
body = r.json()
assert body["status"] == "dismissed"
assert body["applied_at"] is None
assert body["verified_at"] is None
@pytest.mark.asyncio
async def test_applied_at_auto_stamped_on_first_outcome(
client: AsyncClient, test_user, auth_headers, test_db
):
"""If applied_at is null when the engineer sets outcome, server stamps it."""
session_id, fix_id = await _make_session_with_fix(test_db, test_user)
r = await client.patch(
f"/api/v1/ai-sessions/{session_id}/suggested-fixes/{fix_id}/outcome",
json={"outcome": "applied_success"},
headers=auth_headers,
)
assert r.status_code == 200
body = r.json()
assert body["applied_at"] is not None
assert body["verified_at"] is not None
@pytest.mark.asyncio
async def test_failed_outcome_stores_notes_as_failure_reason(
client: AsyncClient, test_user, auth_headers, test_db
):
"""applied_failed stores notes under failure_reason (not partial_notes)."""
session_id, fix_id = await _make_session_with_fix(test_db, test_user)
r = await client.patch(
f"/api/v1/ai-sessions/{session_id}/suggested-fixes/{fix_id}/outcome",
json={"outcome": "applied_failed", "notes": "user reports no change"},
headers=auth_headers,
)
assert r.status_code == 200
body = r.json()
assert body["failure_reason"] == "user reports no change"
assert body["partial_notes"] is None
# ── state_version bump ────────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_outcome_patch_bumps_state_version(
client: AsyncClient, test_user, auth_headers, test_db
):
"""PATCH /outcome must increment ai_sessions.state_version (like record_decision)."""
session_id, fix_id = await _make_session_with_fix(test_db, test_user)
# Capture the initial state_version from DB.
from uuid import UUID
result = await test_db.execute(
select(AISession).where(AISession.id == UUID(session_id))
)
session_obj = result.scalar_one()
initial_version = session_obj.state_version
r = await client.patch(
f"/api/v1/ai-sessions/{session_id}/suggested-fixes/{fix_id}/outcome",
json={"outcome": "applied_success"},
headers=auth_headers,
)
assert r.status_code == 200
await test_db.refresh(session_obj)
assert session_obj.state_version == initial_version + 1, (
"Outcome patch must bump state_version so preview cache is invalidated"
)
# ── outcome propagation into preview bundle ───────────────────────────────────
@pytest.mark.asyncio
async def test_resolution_note_preview_reflects_outcome_after_patch(
client: AsyncClient, test_user, auth_headers, test_db
):
"""End-to-end: preview before outcome != preview after outcome; new preview
bundle includes failure_reason; state_version was bumped between the two.
The LLM is stubbed so the test is deterministic. The stub returns whatever
the user-message content is, which means the captured call args reflect
what the bundle actually contained.
"""
session_id, fix_id = await _make_session_with_fix(test_db, test_user)
distinct_failure_reason = "DISTINCT-FAILURE-REASON-XYZZY-42"
calls_made: list[str] = []
async def fake_generate_text(system_prompt, messages, max_tokens):
user_content = messages[0]["content"]
calls_made.append(user_content)
# Return markdown that includes the user-message bundle verbatim so we
# can assert the bundle shape without inspecting mock internals.
return (
f"## Problem\ntest\n\n## What we confirmed\n(none)\n\n"
f"## Root cause\ntest\n\n## Resolution\nBUNDLE_CONTENT={user_content}",
100,
50,
)
fake_provider = AsyncMock()
fake_provider.generate_text = AsyncMock(side_effect=fake_generate_text)
with patch(
"app.services.resolution_note_generator.get_ai_provider",
return_value=fake_provider,
):
# Preview A — before any outcome recorded (status = "proposed").
r_a = await client.post(
f"/api/v1/ai-sessions/{session_id}/resolution-note/preview",
headers=auth_headers,
)
assert r_a.status_code == 200
markdown_a = r_a.json()["markdown"]
version_a = r_a.json()["state_version"]
assert r_a.json()["from_cache"] is False
# Record an applied_failed outcome with a distinctive reason.
r_patch = await client.patch(
f"/api/v1/ai-sessions/{session_id}/suggested-fixes/{fix_id}/outcome",
json={"outcome": "applied_failed", "notes": distinct_failure_reason},
headers=auth_headers,
)
assert r_patch.status_code == 200
# Preview B — must be a cache miss because state_version changed.
r_b = await client.post(
f"/api/v1/ai-sessions/{session_id}/resolution-note/preview",
headers=auth_headers,
)
assert r_b.status_code == 200
markdown_b = r_b.json()["markdown"]
version_b = r_b.json()["state_version"]
assert r_b.json()["from_cache"] is False, (
"Preview after outcome patch must be a cache miss (state_version changed)"
)
# State version increased between the two previews.
assert version_b > version_a, (
f"state_version should have increased; got {version_a}{version_b}"
)
# Markdown differs between the two previews.
assert markdown_a != markdown_b, (
"Regenerated preview after outcome patch should differ from pre-outcome preview"
)
# The bundle passed to the LLM for preview B includes the outcome fields.
assert len(calls_made) == 2, f"Expected 2 LLM calls (one per preview); got {len(calls_made)}"
bundle_b = calls_made[1]
assert "applied_failed" in bundle_b, (
"Bundle for second preview should include 'Outcome status: applied_failed'"
)
assert distinct_failure_reason in bundle_b, (
"Bundle for second preview should include the failure_reason text"
)