First half of the WebSocket/SSE push slice. Paused mid-flight to hand
the branch to Codex for outside-voice review before stacking more
commits on top. See .ai/HANDOFF.md for the full pause context + what
to look at.
What's here:
- backend/app/core/escalation_bus.py — module-level singleton in-memory
pub/sub keyed by account_id. asyncio.Queue per subscriber with
64-event maxsize and drop-on-full semantics. Designed to be swappable
for Redis pub/sub when Railway scales past single-replica.
- backend/app/api/endpoints/session_handoffs.py — GET
/api/v1/ai-sessions/escalations/stream SSE endpoint. Auth via
require_engineer_or_admin. 25s heartbeat. Account-scoped subscribe
bound to current_user.account_id.
- backend/app/services/handoff_manager.py — dispatch_escalation_notifications
now publishes a `handoff_created` event to the bus BEFORE the email
fan-out, in a try/except so a bus failure can't block email delivery.
- backend/tests/test_escalation_bus.py — 7 unit tests, all green
standalone (0.14s). Cross-tenant isolation, drop-on-full, no-subscribers.
- backend/tests/test_handoff_manager.py — +1 dispatcher integration test
(publishes to bus, payload shape).
- backend/tests/test_session_handoffs_api.py — +2 endpoint tests (viewer
blocked, ready event handshake).
[gstack-context]
Decisions:
- SSE over WebSocket (one-way, browser EventSource semantics, fewer
moving parts behind Railway proxy)
- In-memory bus over Redis for v1 pilot (3 MSPs, single replica)
- Drop-on-full subscriber queue rather than back-pressure publishers
- Bus publish ahead of email send, both wrapped in try/except so
neither can break handoff creation
- Frontend will be a fetch-based ReadableStream reader matching the
existing streamDocumentation pattern, not native EventSource
(custom-header auth)
Remaining (post-Codex):
- Frontend SSE subscription in EscalationQueue.tsx (slide-in,
reconnect, tab-title flash, prefers-reduced-motion)
- Magic-moment handoff-context screen
- Re-run the full backend test suite to verify the SSE +
dispatcher integration tests (bus units already green standalone)
Tried:
- Running the full test suite repeatedly without xdist; the per-test
DROP SCHEMA + recreate fixture made wall-clock prohibitive when
multiple stale runs collided on the same Postgres test schema.
Resolution: -n auto next time.
[/gstack-context]
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
107 lines
3.4 KiB
Python
107 lines
3.4 KiB
Python
"""Unit tests for the in-memory escalation pub/sub bus."""
|
|
import asyncio
|
|
from uuid import uuid4
|
|
|
|
import pytest
|
|
|
|
from app.core.escalation_bus import EscalationBus
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_publish_with_no_subscribers_returns_zero():
|
|
bus = EscalationBus()
|
|
delivered = await bus.publish(uuid4(), {"type": "handoff_created"})
|
|
assert delivered == 0
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_subscribe_then_publish_delivers_event():
|
|
bus = EscalationBus()
|
|
account = uuid4()
|
|
queue = await bus.subscribe(account)
|
|
try:
|
|
delivered = await bus.publish(account, {"type": "handoff_created", "id": "x"})
|
|
assert delivered == 1
|
|
event = await asyncio.wait_for(queue.get(), timeout=1.0)
|
|
assert event == {"type": "handoff_created", "id": "x"}
|
|
finally:
|
|
await bus.unsubscribe(account, queue)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_two_subscribers_same_account_both_receive():
|
|
bus = EscalationBus()
|
|
account = uuid4()
|
|
q1 = await bus.subscribe(account)
|
|
q2 = await bus.subscribe(account)
|
|
try:
|
|
delivered = await bus.publish(account, {"type": "x"})
|
|
assert delivered == 2
|
|
e1 = await asyncio.wait_for(q1.get(), timeout=1.0)
|
|
e2 = await asyncio.wait_for(q2.get(), timeout=1.0)
|
|
assert e1 == e2 == {"type": "x"}
|
|
finally:
|
|
await bus.unsubscribe(account, q1)
|
|
await bus.unsubscribe(account, q2)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_subscriber_in_other_account_does_not_receive():
|
|
"""Cross-tenant isolation is the whole point — sanity check it directly."""
|
|
bus = EscalationBus()
|
|
account_a = uuid4()
|
|
account_b = uuid4()
|
|
q_a = await bus.subscribe(account_a)
|
|
q_b = await bus.subscribe(account_b)
|
|
try:
|
|
delivered = await bus.publish(account_a, {"type": "x"})
|
|
assert delivered == 1
|
|
|
|
e_a = await asyncio.wait_for(q_a.get(), timeout=1.0)
|
|
assert e_a == {"type": "x"}
|
|
|
|
# B's queue must remain empty.
|
|
with pytest.raises(asyncio.TimeoutError):
|
|
await asyncio.wait_for(q_b.get(), timeout=0.1)
|
|
finally:
|
|
await bus.unsubscribe(account_a, q_a)
|
|
await bus.unsubscribe(account_b, q_b)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_unsubscribe_drops_subscriber_count_to_zero():
|
|
bus = EscalationBus()
|
|
account = uuid4()
|
|
q = await bus.subscribe(account)
|
|
assert bus.subscriber_count(account) == 1
|
|
await bus.unsubscribe(account, q)
|
|
assert bus.subscriber_count(account) == 0
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_publish_drops_events_when_subscriber_queue_is_full():
|
|
"""A stuck subscriber must not back-pressure publishers."""
|
|
bus = EscalationBus()
|
|
account = uuid4()
|
|
queue = await bus.subscribe(account)
|
|
try:
|
|
# Stuff the queue past capacity (maxsize is 64) without consuming.
|
|
for _ in range(65):
|
|
await bus.publish(account, {"type": "x"})
|
|
# Sanity: queue holds at most maxsize.
|
|
assert queue.qsize() <= 64
|
|
# Publishes after capacity didn't raise — they were dropped silently.
|
|
finally:
|
|
await bus.unsubscribe(account, queue)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_unsubscribe_unknown_queue_is_noop():
|
|
"""Defensive: unsubscribe on an account/queue that isn't registered
|
|
should not raise — finally blocks rely on this."""
|
|
bus = EscalationBus()
|
|
account = uuid4()
|
|
fake_queue: asyncio.Queue = asyncio.Queue()
|
|
# Should not raise.
|
|
await bus.unsubscribe(account, fake_queue)
|