fix(billing): make Stripe webhook idempotency atomic so failed handlers can retry

Previously `apply_subscription_event` committed the StripeEvent idempotency
row before invoking the handler, then the handlers each committed their own
mutations. If a handler raised mid-flight (transient DB error, network blip,
race), the idempotency mark was already persisted — Stripe's retry would hit
the IntegrityError branch and silently return False, and the subscription
state would permanently desync from Stripe.

Switch to a single atomic transaction:
- Insert the StripeEvent + flush (catch IntegrityError on duplicate event_id).
- Run the handler.
- Commit on success; roll back the entire transaction on failure and re-raise.

Drop the four `db.commit()` calls inside `_handle_*` so the outer caller owns
commit. The webhook endpoint already lets exceptions propagate, so a 500
response now correctly tells Stripe to retry.

Tests: three new regression cases in test_stripe_webhook_handler.py covering
handler failure (no idempotency mark persisted), retry-after-failure success,
and duplicate-event-id skip.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-07 01:36:13 -04:00
parent 3630dd5a80
commit 06200fabb1
2 changed files with 213 additions and 22 deletions

View File

@@ -210,28 +210,44 @@ class BillingService:
) -> bool:
"""Idempotent. Returns True if the event was applied; False if it had
already been processed (idempotent ack). The webhook handler returns 200
either way."""
either way.
Atomic: the StripeEvent idempotency mark and the handler's state
mutations are committed in a single transaction. If the handler raises
the entire transaction (idempotency mark + partial mutations) is rolled
back, so a Stripe retry will re-run the handler. Without this, a
handler that fails mid-flight would leave the StripeEvent row persisted
and silently desync subscription state from Stripe.
"""
db.add(StripeEvent(
id=event_id,
event_type=event_type,
payload_excerpt=_excerpt(payload),
))
try:
db.add(StripeEvent(
id=event_id,
event_type=event_type,
payload_excerpt=_excerpt(payload),
))
await db.commit()
await db.flush()
except IntegrityError:
# Duplicate event_id — already processed (or in flight). Ack with False.
await db.rollback()
return False
if event_type == "checkout.session.completed":
await _handle_checkout_completed(db, payload)
elif event_type == "customer.subscription.updated":
await _handle_subscription_updated(db, payload)
elif event_type == "customer.subscription.deleted":
await _handle_subscription_deleted(db, payload)
elif event_type == "invoice.payment_failed":
await _handle_payment_failed(db, payload)
elif event_type == "invoice.payment_succeeded":
await _handle_payment_succeeded(db, payload)
try:
if event_type == "checkout.session.completed":
await _handle_checkout_completed(db, payload)
elif event_type == "customer.subscription.updated":
await _handle_subscription_updated(db, payload)
elif event_type == "customer.subscription.deleted":
await _handle_subscription_deleted(db, payload)
elif event_type == "invoice.payment_failed":
await _handle_payment_failed(db, payload)
elif event_type == "invoice.payment_succeeded":
await _handle_payment_succeeded(db, payload)
await db.commit()
except Exception:
# Roll back the StripeEvent insert + any partial handler mutations
# so Stripe's retry can re-run cleanly.
await db.rollback()
raise
return True
@@ -282,7 +298,7 @@ async def _handle_checkout_completed(db: AsyncSession, payload: dict):
)).scalar_one_or_none()
if pb is not None:
sub.plan = pb.plan
await db.commit()
# No commit — apply_subscription_event commits once for the full event.
async def _handle_subscription_updated(db: AsyncSession, payload: dict):
@@ -297,7 +313,7 @@ async def _handle_subscription_updated(db: AsyncSession, payload: dict):
sub.current_period_end = datetime.fromtimestamp(obj["current_period_end"], tz=timezone.utc)
sub.cancel_at_period_end = obj.get("cancel_at_period_end", False)
sub.seat_limit = obj["items"]["data"][0]["quantity"]
await db.commit()
# No commit — apply_subscription_event commits once for the full event.
async def _handle_subscription_deleted(db: AsyncSession, payload: dict):
@@ -308,7 +324,7 @@ async def _handle_subscription_deleted(db: AsyncSession, payload: dict):
if sub is None:
return
sub.status = "canceled"
await db.commit()
# No commit — apply_subscription_event commits once for the full event.
async def _handle_payment_failed(db: AsyncSession, payload: dict):
@@ -322,7 +338,7 @@ async def _handle_payment_failed(db: AsyncSession, payload: dict):
if sub is None:
return
sub.status = "past_due"
await db.commit()
# No commit — apply_subscription_event commits once for the full event.
async def _handle_payment_succeeded(db: AsyncSession, payload: dict):
@@ -337,4 +353,4 @@ async def _handle_payment_succeeded(db: AsyncSession, payload: dict):
return
if sub.status == "past_due":
sub.status = "active"
await db.commit()
# No commit — apply_subscription_event commits once for the full event.