fix: prevent InFailedSQLTransactionError in session creation

Root cause: embedding generation could break the DB transaction via a failed
SQL statement. The except block caught the Python error but left the transaction
in a failed state. Subsequent queries (_record_usage → subscription lookup)
then failed with InFailedSQLTransactionError.

Fixes:
- session_embedding_service: use begin_nested() savepoint so failures don't
  poison the parent transaction
- ai_sessions.py: add db.rollback() before _record_usage in all 3 error
  handlers (create, respond, pickup) to recover from broken transactions

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-20 04:36:12 +00:00
parent 2ed8a2af15
commit eed771cb27
2 changed files with 74 additions and 61 deletions

View File

@@ -139,6 +139,9 @@ async def create_session(
) )
except Exception as e: except Exception as e:
logger.exception("FlowPilot session start failed: %s", e) logger.exception("FlowPilot session start failed: %s", e)
# Rollback the failed transaction before attempting usage recording
await db.rollback()
try:
await _record_usage( await _record_usage(
current_user, db, current_user, db,
generation_type="flowpilot_start", generation_type="flowpilot_start",
@@ -146,6 +149,8 @@ async def create_session(
succeeded=False, error_code=type(e).__name__, succeeded=False, error_code=type(e).__name__,
) )
await db.commit() await db.commit()
except Exception:
logger.warning("Failed to record usage after session start failure", exc_info=True)
raise HTTPException( raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY, status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"AI provider error ({type(e).__name__}). Please try again.", detail=f"AI provider error ({type(e).__name__}). Please try again.",
@@ -193,6 +198,8 @@ async def respond_to_step(
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=str(e)) raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=str(e))
except Exception as e: except Exception as e:
logger.exception("FlowPilot response failed: %s", e) logger.exception("FlowPilot response failed: %s", e)
await db.rollback()
try:
await _record_usage( await _record_usage(
current_user, db, current_user, db,
generation_type="flowpilot_respond", generation_type="flowpilot_respond",
@@ -202,6 +209,8 @@ async def respond_to_step(
error_code=type(e).__name__, error_code=type(e).__name__,
) )
await db.commit() await db.commit()
except Exception:
logger.warning("Failed to record usage after response failure", exc_info=True)
raise HTTPException( raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY, status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"AI provider error ({type(e).__name__}). Please try again.", detail=f"AI provider error ({type(e).__name__}). Please try again.",
@@ -387,6 +396,8 @@ async def pickup_session(
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=str(e)) raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=str(e))
except Exception as e: except Exception as e:
logger.exception("FlowPilot pickup failed: %s", e) logger.exception("FlowPilot pickup failed: %s", e)
await db.rollback()
try:
await _record_usage( await _record_usage(
current_user, db, current_user, db,
generation_type="flowpilot_pickup", generation_type="flowpilot_pickup",
@@ -396,6 +407,8 @@ async def pickup_session(
error_code=type(e).__name__, error_code=type(e).__name__,
) )
await db.commit() await db.commit()
except Exception:
logger.warning("Failed to record usage after pickup failure", exc_info=True)
raise HTTPException( raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY, status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"AI provider error ({type(e).__name__}). Please try again.", detail=f"AI provider error ({type(e).__name__}). Please try again.",

View File

@@ -54,6 +54,8 @@ async def generate_session_embedding(session_id: UUID, db: AsyncSession) -> None
embedding_str = "[" + ",".join(str(v) for v in embedding_vector) + "]" embedding_str = "[" + ",".join(str(v) for v in embedding_vector) + "]"
# Use a savepoint so failures don't poison the parent transaction
async with db.begin_nested():
# Check for existing embedding # Check for existing embedding
existing = await db.execute( existing = await db.execute(
select(AISessionEmbedding).where( select(AISessionEmbedding).where(
@@ -90,8 +92,6 @@ async def generate_session_embedding(session_id: UUID, db: AsyncSession) -> None
"embedding": embedding_str, "embedding": embedding_str,
}, },
) )
await db.flush()
except Exception: except Exception:
logger.warning( logger.warning(
"Failed to generate embedding for session %s", session_id, exc_info=True "Failed to generate embedding for session %s", session_id, exc_info=True