From 92fa3bc6ab54fd3dcfe1b181a4d4387427973428 Mon Sep 17 00:00:00 2001 From: Michael Chihlas Date: Wed, 13 May 2026 15:52:21 -0400 Subject: [PATCH] feat(auth): add session policy settings + account columns + migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First commit in the session-expiration-policy series (see docs/plans/2026-05-13-session-expiration-policy.md). No behavior change yet — this lays the schema + settings groundwork only. - Settings: SESSION_IDLE_MINUTES_DEFAULT=4320 (3d), SESSION_ABSOLUTE_MINUTES_DEFAULT=20160 (14d), plus MIN/MAX bounds so account overrides have envelopes (15min..30d idle, 1h..90d absolute). - accounts table: nullable session_idle_minutes and session_absolute_minutes columns (NULL = use system default), plus a CHECK constraint that rejects idle > absolute when both are set. Partial-override validation lives at the app layer because the DB cannot read Settings. Subsequent commits will: distinguish idle vs invalid-token expiry on the wire, embed auth_time/idle_max/abs_max in refresh JWTs, enforce the absolute cap in /auth/refresh, add the owner-only policy + bulk-revoke endpoints, and surface everything in an AccountSecurity settings page with a session-expiry toast. Co-Authored-By: Claude Opus 4.7 --- ..._add_session_policy_columns_to_accounts.py | 72 +++ backend/app/core/config.py | 13 + backend/app/models/account.py | 6 + .../2026-05-13-session-expiration-policy.md | 435 ++++++++++++++++++ 4 files changed, 526 insertions(+) create mode 100644 backend/alembic/versions/b269a1add160_add_session_policy_columns_to_accounts.py create mode 100644 docs/plans/2026-05-13-session-expiration-policy.md diff --git a/backend/alembic/versions/b269a1add160_add_session_policy_columns_to_accounts.py b/backend/alembic/versions/b269a1add160_add_session_policy_columns_to_accounts.py new file mode 100644 index 00000000..366baecd --- /dev/null +++ b/backend/alembic/versions/b269a1add160_add_session_policy_columns_to_accounts.py @@ -0,0 +1,72 @@ +"""add_session_policy_columns_to_accounts + +Revision ID: b269a1add160 +Revises: 4ce3e594cb87 +Create Date: 2026-05-13 19:50:51.343777 + +Adds per-account session-policy overrides. NULL on either column means +"use the system default from Settings.SESSION_*_MINUTES_DEFAULT." The +CHECK constraint is defense-in-depth for the both-set case; the partial- +override case (one NULL, one set) is validated at the app layer because +the DB cannot see Settings. + +See docs/plans/2026-05-13-session-expiration-policy.md for full design. +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +revision: str = 'b269a1add160' +down_revision: Union[str, None] = '4ce3e594cb87' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + 'accounts', + sa.Column( + 'session_idle_minutes', + sa.Integer(), + nullable=True, + comment=( + 'Account override for idle session window in minutes. ' + 'NULL = use Settings.SESSION_IDLE_MINUTES_DEFAULT.' + ), + ), + ) + op.add_column( + 'accounts', + sa.Column( + 'session_absolute_minutes', + sa.Integer(), + nullable=True, + comment=( + 'Account override for absolute session lifetime in minutes. ' + 'NULL = use Settings.SESSION_ABSOLUTE_MINUTES_DEFAULT.' + ), + ), + ) + op.create_check_constraint( + 'session_idle_le_absolute_when_both_set', + 'accounts', + '(' + 'session_idle_minutes IS NULL ' + 'OR session_absolute_minutes IS NULL ' + 'OR session_idle_minutes <= session_absolute_minutes' + ')', + ) + op.execute( + "COMMENT ON CONSTRAINT session_idle_le_absolute_when_both_set ON accounts IS " + "'Defense in depth: catches idle > absolute when both are overridden. " + "Partial-override case (one NULL, one set) is validated at the app layer " + "against current system defaults, since the DB cannot see Settings.'" + ) + + +def downgrade() -> None: + op.drop_constraint('session_idle_le_absolute_when_both_set', 'accounts', type_='check') + op.drop_column('accounts', 'session_absolute_minutes') + op.drop_column('accounts', 'session_idle_minutes') diff --git a/backend/app/core/config.py b/backend/app/core/config.py index 9c5bd838..d1582265 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -69,6 +69,19 @@ class Settings(BaseSettings): ACCESS_TOKEN_EXPIRE_MINUTES: int = 5 REFRESH_TOKEN_EXPIRE_DAYS: int = 7 + # Session policy — see docs/plans/2026-05-13-session-expiration-policy.md + # Refresh tokens enforce two windows: idle (between rotations) and absolute + # (from original login). Defaults can be overridden per-account, bounded by + # the MIN/MAX values below. Values are minutes everywhere except inside the + # refresh JWT, where idle_max/abs_max are stored as seconds for direct + # Unix-time math. + SESSION_IDLE_MINUTES_DEFAULT: int = 4320 # 3 days + SESSION_ABSOLUTE_MINUTES_DEFAULT: int = 20160 # 14 days + SESSION_IDLE_MINUTES_MIN: int = 15 + SESSION_IDLE_MINUTES_MAX: int = 43200 # 30 days + SESSION_ABSOLUTE_MINUTES_MIN: int = 60 # 1 hour + SESSION_ABSOLUTE_MINUTES_MAX: int = 129600 # 90 days + # Security BCRYPT_ROUNDS: int = 12 diff --git a/backend/app/models/account.py b/backend/app/models/account.py index aa2c5750..b036d20f 100644 --- a/backend/app/models/account.py +++ b/backend/app/models/account.py @@ -44,6 +44,12 @@ class Account(Base): Integer, nullable=True, default=100, server_default="100" ) + # Session policy override (NULL = use Settings.SESSION_*_MINUTES_DEFAULT). + # Validated at the app layer because the DB cannot see Settings; a DB + # CHECK constraint covers the both-set case only. + session_idle_minutes: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) + session_absolute_minutes: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) + # Custom branding (Task 9) branding_logo_url: Mapped[Optional[str]] = mapped_column(String(500), nullable=True) branding_primary_color: Mapped[Optional[str]] = mapped_column(String(7), nullable=True) # hex like #06b6d4 diff --git a/docs/plans/2026-05-13-session-expiration-policy.md b/docs/plans/2026-05-13-session-expiration-policy.md new file mode 100644 index 00000000..4431bd4e --- /dev/null +++ b/docs/plans/2026-05-13-session-expiration-policy.md @@ -0,0 +1,435 @@ +# Session Expiration Policy — Design & Implementation Plan + +**Date:** 2026-05-13 +**Owner:** Michael Chihlas +**Status:** Draft — pending review +**Related issue:** none yet (file after plan approval) + +--- + +## 1. Problem + +Today, once a user logs in to ResolutionFlow, they effectively stay logged in forever: + +- Access token: 5 minutes — fine. +- Refresh token: 7 days, with JTI rotation. Every `/auth/refresh` mints a fresh 7-day window and revokes the old JTI. +- Frontend stores both in `localStorage`; Axios interceptor silently refreshes on every 401. + +Net effect: a **sliding 7-day session with no absolute cap**. As long as a user opens the app at least once a week, the refresh token rolls forward indefinitely. There is no enforced re-authentication, no idle-timeout cap, no maximum session lifetime — and no per-account control for MSP owners whose customers may demand stricter security. + +This was acceptable for pilot but is **not acceptable for self-serve launch**: + +- MSP buyers' SOC2 / cyber-insurance auditors routinely require enforced session timeouts. +- A stolen device with an unlocked browser hands an attacker indefinite access. +- Owners of paying accounts expect to be able to set policy for their members. + +## 2. Goals + +1. **System-level absolute cap** — no session can exceed N days regardless of activity. +2. **Idle cap** — sessions inactive for N days must require re-login. +3. **Per-account owner override** — account owners can tighten or (within sysadmin-imposed ceilings) loosen the policy for their account. +4. **Graceful UX** — users get warned before forced re-login; rotation continues to be silent within the active window. +5. **Backward-compatible rollout** — existing refresh tokens are grandfathered for one rotation, not invalidated at deploy. + +## 3. Non-goals + +- Multi-device session management (revoke individual devices). Tracked separately; out of scope here. +- "Remember this device" / trusted device list. Out of scope. +- Per-user (vs per-account) overrides. Out of scope. +- Re-auth on sensitive action (step-up auth). Out of scope. +- Annual review of session policy (analytics dashboards). Out of scope. + +## 4. Design + +### 4.1 Two windows, both enforced + +| Window | Default | Meaning | +|---|---|---| +| **Idle** | 3 days | Maximum time between `/auth/refresh` calls. Rotation extends this window. | +| **Absolute** | 14 days | Hard cap from original login (`auth_time`). Rotation does **not** extend this. | + +The shorter of the two governs: a token is valid only if `now < min(idle_exp, auth_time + absolute_max)`. + +### 4.2 JWT payload changes + +Refresh-token JWT today (`backend/app/core/security.py:36`): +```json +{ "sub": "", "type": "refresh", "jti": "", "exp": } +``` + +New refresh-token JWT: +```json +{ + "sub": "", + "type": "refresh", + "jti": "", + "exp": , // unchanged semantics, now = idle window + "auth_time": , // original login (Unix seconds); NOT reset on rotation + "idle_max": , // captured at login (account policy snapshot, seconds) + "abs_max": // captured at login (account policy snapshot, seconds) +} +``` + +**Unit convention (single source of truth):** + +| Surface | Unit | Why | +|---|---|---| +| `Settings.SESSION_*_MINUTES`, `accounts.session_*_minutes`, PATCH `/accounts/me/security` request/response, frontend form inputs | **minutes** | Human-readable, matches the column names, what owners actually edit | +| `idle_max`, `abs_max` inside the refresh JWT, `auth_time` | **seconds (Unix)** | Lets `auth_time + abs_max` be direct Unix math against `int(time.time())` with no conversion at check time | +| `idle_expires_at`, `absolute_expires_at` on API responses, `useAuthSessionExpiry` hook | **ISO 8601 UTC strings** | Matches the rest of the API surface (`DateTime(timezone=True)` everywhere) | + +`resolve_session_policy(account)` (see §4.4) returns minutes; the `_mint_session_tokens` helper multiplies by 60 once when stamping the JWT. That's the only place the conversion happens. + +Why snapshot `idle_max`/`abs_max` into the JWT instead of looking up the account policy on every refresh? Two reasons: + +- Refresh path stays DB-cheap (one query, not two). +- If an owner tightens the policy after a user has logged in, the user's existing session continues under the policy in effect at login — fairer UX, matches what Okta and Microsoft do. New logins pick up the tightened policy. + +Counter-consideration: if an owner *loosens* policy, existing sessions stay tight until next login. Acceptable; users won't notice. The owner-tightens case (security event) is the one that matters, and a kill-all-sessions admin button covers that scenario (out of scope here — log an issue). + +### 4.3 Per-account policy storage + +New columns on `accounts`: + +| Column | Type | Nullable | Meaning | +|---|---|---|---| +| `session_idle_minutes` | `Integer` | yes | NULL = use system default | +| `session_absolute_minutes` | `Integer` | yes | NULL = use system default | + +Minutes (not days) so admins can configure shorter windows for high-security tenants if needed. Stored as Integer to match existing pattern; conversion to `timedelta` happens at use site. + +System-imposed bounds (in `Settings`, environment-overridable): + +| Setting | Default | Floor | Ceiling | +|---|---|---|---| +| `SESSION_IDLE_MINUTES_DEFAULT` | 4320 (3d) | n/a | n/a | +| `SESSION_ABSOLUTE_MINUTES_DEFAULT` | 20160 (14d) | n/a | n/a | +| `SESSION_IDLE_MINUTES_MIN` | 15 | hard floor | account override cannot go below | +| `SESSION_IDLE_MINUTES_MAX` | 43200 (30d) | account override cannot go above | | +| `SESSION_ABSOLUTE_MINUTES_MIN` | 60 (1h) | hard floor | | +| `SESSION_ABSOLUTE_MINUTES_MAX` | 129600 (90d) | account override cannot go above | | + +Plus invariant: an account's *effective* idle window must not exceed its *effective* absolute window. Enforcement is layered: + +- **App-level (PATCH endpoint, authoritative):** before writing the row, resolve both effective values (`override ?? system_default`) and reject when effective idle > effective absolute. This is the only place that knows the current system defaults, so it's the only place that can catch a partial-override hole like `session_idle_minutes=43200, session_absolute_minutes=NULL` when the system absolute default is 20160. +- **DB CHECK constraint (defense in depth, narrower):** `session_idle_minutes IS NULL OR session_absolute_minutes IS NULL OR session_idle_minutes <= session_absolute_minutes`. This only catches the both-set case; the partial-override case is intentionally outside the DB's reach because the DB can't see `Settings`. Document this in a comment on the constraint. + +Alternative considered: require both columns to be NULL or both set (XOR-with-NULL). Rejected because it forces an owner who only wants to override idle to also re-declare the absolute window, which leaks the system default into account data and makes the system default harder to evolve later. + +### 4.4 Resolution function + +```python +# backend/app/core/security.py +def resolve_session_policy(account: Account) -> tuple[int, int]: + """Return (idle_minutes, absolute_minutes) for an account, applying defaults.""" + idle = account.session_idle_minutes or settings.SESSION_IDLE_MINUTES_DEFAULT + abs_ = account.session_absolute_minutes or settings.SESSION_ABSOLUTE_MINUTES_DEFAULT + return idle, abs_ +``` + +Called once at each of the four token-issuing entry points listed in §4.6 (`/auth/login`, `/auth/login/json`, `/auth/google/callback`, `/auth/microsoft/callback`) and snapshotted into the JWT via `_mint_session_tokens`. Not called on `/auth/refresh` — that path carries forward the existing snapshot. + +### 4.5 Refresh endpoint changes + +`POST /auth/refresh` (`backend/app/api/endpoints/auth.py:377`) currently: +1. Decodes refresh JWT (via `get_refresh_token_payload` dep). +2. Atomically revokes old JTI (`UPDATE … SET revoked_at=now() WHERE token_hash=? AND revoked_at IS NULL RETURNING …`). +3. Mints new refresh + access tokens with same `sub`. + +New algorithm (precise): + +1. Decode refresh JWT (idle expiry already surfaced as `session_expired_idle` by `decode_refresh_token_strict`; see §4.10). +2. **NEW:** load `user` and `user.account` by `sub` from the decoded payload. Needed before any legacy-token handling because the grandfather path needs to read the account's current policy. If the user is missing or inactive, return 401 with `detail="invalid_refresh_token"` (existing behavior, unchanged). +3. **NEW (grandfather path):** if `auth_time` is missing from the payload (legacy token issued before this PR), treat it as `now()` and snapshot the loaded account's current policy via `resolve_session_policy(account)` into `idle_max`/`abs_max`. One free rotation under the new policy. +4. **NEW:** compute `absolute_deadline = auth_time + abs_max` (both in Unix seconds). Compare with `now >= absolute_deadline`, not `>` — a token whose deadline equals `now()` is expired, not valid. +5. **Atomically revoke the JTI regardless of outcome** (single UPDATE, same statement as today). This consumes the token whether or not the absolute check passes — so an absolute-expired token cannot be replayed forever; a second attempt finds the row already `revoked_at IS NOT NULL` and falls through to the existing "invalid or revoked refresh token" 401. +6. If the atomic UPDATE matched zero rows (already revoked): 401 with `detail="invalid_refresh_token"`. +7. If `now >= absolute_deadline`: 401 with `detail="session_expired_absolute"`. (The row is already revoked from step 5.) +8. Otherwise mint new tokens, **carrying forward `auth_time`, `idle_max`, `abs_max` unchanged** from the old token (or freshly snapshotted if grandfathered in step 3). + +Helper contract: `_refresh_session_tokens(payload, user, account, db) -> Token`. Takes the validated decoded payload plus the already-loaded user/account so it doesn't re-query. Returns the same `Token` shape as `_mint_session_tokens` (with the two new ISO expiry fields). Distinct from `_mint_session_tokens` because the refresh path carries claims forward instead of resolving policy. + +Idle expiry is handled earlier in the chain: `get_refresh_token_payload` calls `decode_token`, which returns `None` for any JWT past `exp` — that's the existing 401 path. See §4.10 for distinguishing idle expiry from generic invalid-token errors in the response. + +### 4.6 Login endpoints + +Token-issuing endpoints that need the snapshot logic (verified against the codebase): + +| Endpoint | File:line | Response model | +|---|---|---| +| `POST /auth/login` (form-encoded, OAuth2PasswordRequestForm) | `backend/app/api/endpoints/auth.py:303` | `Token` | +| `POST /auth/login/json` (JSON body — what the frontend actually calls) | `backend/app/api/endpoints/auth.py:342` | `Token` | +| `POST /auth/google/callback` | `backend/app/api/endpoints/oauth.py:174` | `OAuthCallbackResponse` | +| `POST /auth/microsoft/callback` | `backend/app/api/endpoints/oauth.py:204` | `OAuthCallbackResponse` | +| `POST /auth/refresh` | `backend/app/api/endpoints/auth.py:377` | `Token` | + +`POST /auth/register` (`auth.py:92`) returns `UserResponse` and **does not auto-login** — the frontend follows up with a separate call to `/auth/login/json`. No token-minting changes needed in `/register` itself; the subsequent `/login/json` call will pick up the new claims naturally. + +Each of the four token-issuing endpoints (login, login/json, both OAuth callbacks) calls `create_refresh_token` with the extra claims. Wrap in a helper `_mint_session_tokens(user, account, db) -> Token` (or `OAuthCallbackResponse` — see §4.10 on shared response fields) to avoid drift across four sites. `/auth/refresh` uses a variant that carries forward existing claims instead of re-snapshotting policy. + +### 4.7 Account security endpoint + +New endpoint module: `backend/app/api/endpoints/account_security.py` + +``` +GET /accounts/me/security → returns {idle_minutes, absolute_minutes, effective_idle_minutes, effective_absolute_minutes, system_min/max bounds} +PATCH /accounts/me/security → owner only; validates bounds + invariant; writes account row +``` + +`require_account_owner` from `app/api/deps.py:189` enforces ownership. Returns the *effective* values (after defaults applied) so the frontend doesn't have to know about NULL semantics. + +### 4.8 Frontend changes + +**Response-field naming (single scheme, used everywhere):** + +Both `Token` (`/auth/login`, `/auth/login/json`, `/auth/refresh`) and `OAuthCallbackResponse` (`/auth/google/callback`, `/auth/microsoft/callback`) gain two new fields: + +| Field | Type | Source | +|---|---|---| +| `idle_expires_at` | ISO 8601 UTC string | derived from refresh JWT `exp` | +| `absolute_expires_at` | ISO 8601 UTC string | derived from refresh JWT `auth_time + abs_max` | + +ISO strings (not Unix ints) for consistency with the rest of the API surface, which uses `DateTime(timezone=True)` everywhere. Frontend parses with `new Date(...)`. + +**New hook:** `frontend/src/hooks/useAuthSessionExpiry.ts` +- Reads `idleExpiresAt` and `absoluteExpiresAt` from `authStore`. +- Returns `{ idleExpiresAt, absoluteExpiresAt, warning, reason }` where `warning ∈ {"none", "soon", "now"}` and `reason ∈ {"idle", "absolute"}` indicating which window is closer. +- "soon" fires at T-5min on whichever window comes first. +- Pairs with a top-of-app `` mounted in `AppLayout.tsx`. + +**Modified:** `frontend/src/api/client.ts` interceptor +- On 401 with `detail="session_expired_absolute"` **or** `detail="session_expired_idle"`: **skip the refresh attempt**, flush tokens, redirect to `/login?reason=session_expired`. (Both surfaces go through the same banner — users don't need to distinguish the two.) +- On 401 with `detail="invalid_refresh_token"` or any other detail: current behavior (drop to `/login` without the reason banner). +- Existing access-token-expired flow (transparent `/auth/refresh`) unchanged. + +**Modified:** `frontend/src/store/authStore.ts` +- `setTokens(token: Token)` (`authStore.ts:140`) is the single token-persistence path used by both `login()` and the OAuth flow. Extend the `Token` type with `idle_expires_at` + `absolute_expires_at`; `setTokens` writes them to store + localStorage alongside the access/refresh tokens. No new action. +- The Axios refresh interceptor (`api/client.ts:139`) destructures `access_token, refresh_token` today — extend to read the two new fields and call `setTokens` so refreshed sessions update their expiry metadata. +- **Legacy-state migration:** on store rehydrate, if tokens exist but `idle_expires_at` / `absolute_expires_at` are missing from localStorage, leave them `null` and let the next `/auth/refresh` populate them via response fields. The hook treats `null` as "unknown — don't warn yet." No forced logout for pre-deploy localStorage. + +**Modified:** `frontend/src/pages/OAuthCallbackPage.tsx` +- The `setTokens({...})` call at `OAuthCallbackPage.tsx:102` currently passes `{access_token, refresh_token, token_type}` from the `OAuthCallbackResponse`. Add `idle_expires_at` and `absolute_expires_at` to the spread so OAuth-issued sessions get the same expiry metadata as password logins. + +**New page:** `frontend/src/pages/account/AccountSecuritySettingsPage.tsx` +- Lives under existing `/account` routing with `requireRoleOwner` style guard. +- Two preset tiers — **Strict (3d/14d)** and **Standard (7d/30d)** — plus a **Custom** tier with two numeric inputs (idle/absolute in days). +- Hint copy showing the system min/max from the GET response. +- Save → PATCH → toast. +- Below the form, an info line: *"Policy changes apply to new logins. Existing sessions continue under the policy in effect at their login time. To force-logout existing sessions, use the actions below."* +- A separate "**Active sessions**" section with two actions (see §4.11): + - **Sign out everyone except me** (secondary button) — revokes other users' sessions in this account, leaves the caller signed in. + - **Sign out everyone, including me** (destructive-style button) — revokes all sessions for the account; the caller is immediately redirected to `/login`. Confirmation modal required. + +**Modified:** `AccountSettingsPage.tsx` +- Add a "Session Security" link card to the existing grid (owner-only visibility). + +**New login page banner:** when `?reason=session_expired` is present, show a calm info banner: "Your session ended for security. Please sign in again." (No alarm UI, just clarity. Same banner for both idle and absolute expiry; the user doesn't need to learn the distinction.) + +### 4.9 Migration + +`alembic revision -m "add session policy columns to accounts"` (manual, per Lesson 77). + +```sql +ALTER TABLE accounts + ADD COLUMN session_idle_minutes INTEGER, + ADD COLUMN session_absolute_minutes INTEGER, + ADD CONSTRAINT session_idle_le_absolute_when_both_set + CHECK (session_idle_minutes IS NULL + OR session_absolute_minutes IS NULL + OR session_idle_minutes <= session_absolute_minutes); + +COMMENT ON CONSTRAINT session_idle_le_absolute_when_both_set ON accounts IS + 'Defense in depth: catches idle > absolute when both are overridden. ' + 'The partial-override case (one NULL, one set) is validated at the app layer ' + 'against current system defaults, since the DB cannot see Settings.'; +``` + +No backfill: NULL is the intended state for "use system default." + +Confirm: `accounts` is in the global-tables list per PROJECT_CONTEXT.md, so the migration does **not** add RLS predicates. Verified — `accounts` is explicitly named there. + +### 4.10 Error-detail taxonomy + +`/auth/refresh` returns 401 with one of these `detail` values, so the frontend can distinguish UX paths: + +| `detail` | When | Frontend action | +|---|---|---| +| `session_expired_idle` | refresh JWT past `exp` (idle window elapsed) | flush tokens, redirect `/login?reason=session_expired` | +| `session_expired_absolute` | refresh JWT alive, but `now >= auth_time + abs_max` | flush tokens, redirect `/login?reason=session_expired` | +| `invalid_refresh_token` | JTI not in DB, already revoked, signature bad, type mismatch | flush tokens, redirect `/login` (no banner) | + +Implementation note: `decode_token` currently swallows `JWTError` and returns `None`, so idle expiry is indistinguishable from a signature failure at the dep level. Fix by switching `get_refresh_token_payload` (or adding a sibling) to call `jwt.decode` directly and catch `ExpiredSignatureError` separately from generic `JWTError`. Idle-expired tokens raise the former; map that to `session_expired_idle`. All other JWT errors map to `invalid_refresh_token`. + +### 4.11 Bulk session revocation (kill-all-sessions) + +**Endpoint:** `POST /accounts/me/security/revoke-sessions`, owner-only via `require_account_owner`. + +**Request body:** +```json +{ "scope": "all" | "others" } +``` +Default `"all"` if body omitted. `"others"` excludes the calling user's own refresh tokens (so the owner stays signed in); `"all"` includes them. + +**Response:** +```json +{ "revoked_count": } +``` + +**Behavior:** +- Single SQL UPDATE: `refresh_tokens.revoked_at = now()` for rows where `user_id IN (SELECT id FROM users WHERE account_id = :caller_account_id)` AND `revoked_at IS NULL`. If `scope="others"`, also AND `user_id != caller.id`. +- All affected users' next `/auth/refresh` matches zero rows in the atomic revoke (§4.5 step 5) → 401 `invalid_refresh_token` → redirect to `/login` (no banner — the user was signed out by an admin, not by expiry; the plain `/login` redirect is honest UX). +- Caller's access token is not revoked (we don't track access JTIs by design); it dies naturally on its 5-minute timer. For `scope="all"`, the frontend handles UX by clearing localStorage and redirecting to `/login` after the response — so the stale access token simply isn't used. Accept the 5-minute window where the caller's access token could in theory still hit endpoints; this matches the existing logout flow and is consistent with the threat model (the action is "kick everyone out," not "instantly invalidate every credential"). + +**Audit:** writes one `account.sessions_revoked_bulk` event with `{actor_user_id, account_id, scope, revoked_count}`. + +**Out of scope:** distinguishing `session_revoked_by_admin` from `invalid_refresh_token` on the wire for affected users. Doing so requires tracking the revocation reason per `refresh_tokens` row (new column). Not worth the complexity right now — the affected user just sees they're logged out, same as if they'd been logged out for any other reason. Revisit if pilots ask for it. + +**Why not also per-user-device revoke?** Refresh tokens today don't carry device/user-agent metadata; the unit of granularity is "all of user X's active sessions" (which is most of what people want anyway — e.g., I lost my laptop). The endpoint is account-scoped because that's the owner-control story we're shipping. Per-user device list is a follow-up if/when needed (§9). + +## 5. Backward compatibility + +### 5.1 Existing refresh tokens (no `auth_time` claim) + +On first `/auth/refresh` after deploy: +- Backend detects missing `auth_time`, treats current time as `auth_time`, snapshots current account policy. +- User effectively gets one free 14-day absolute window starting at first post-deploy refresh. + +Trade-off vs forcing universal re-login on deploy: +- ✅ Zero deploy-day support burden (no pilots flood Slack with "I got logged out"). +- ❌ Users with active sessions see no enforcement for up to 14 days. + +Given the user base is small (pilot phase) and the bigger goal is *new* signups have a secure default, the friendly path wins. + +### 5.2 If we ever need to invalidate everyone + +`SECRET_KEY` rotation kills all existing tokens. Documented in `DEV-ENV.md` but not part of this PR. + +## 6. Test plan + +Backend (`backend/tests/test_session_policy.py` — new file, unless noted): + +1. **Default policy applied** — login without account override → JWT has `idle_max=259200`, `abs_max=1209600` (seconds; 3d/14d). Account/settings columns are minutes (4320/20160); the helper multiplies by 60 when stamping. +2. **Account override honored** — owner PATCHes `session_idle_minutes=60`, `session_absolute_minutes=240` → next login JWT has `idle_max=3600`, `abs_max=14400` (seconds). +3. **Override bounds enforced** — PATCH idle below `SESSION_IDLE_MINUTES_MIN` → 422; PATCH absolute above `SESSION_ABSOLUTE_MINUTES_MAX` → 422. +4. **Invariant enforced (both-set)** — PATCH idle=300, absolute=120 → 422. +5. **Invariant enforced (partial override)** — system default absolute=20160; PATCH idle=43200 with absolute=NULL → 422 (effective idle > effective absolute, app-layer check). +6. **DB constraint catches both-set inversion** — direct SQL `UPDATE accounts SET session_idle_minutes=300, session_absolute_minutes=120` rolls back with `CheckViolation`. +7. **Non-owner cannot PATCH** — engineer/viewer get 403. +8. **Refresh respects absolute cap (boundary)** — set `auth_time = now - abs_max` exactly → refresh 401 with `session_expired_absolute` (deadline check is `>=`, not `>`). +9. **Absolute-expired token is consumed** — attempt #1 returns `session_expired_absolute`; attempt #2 with the same token returns `invalid_refresh_token` (row was revoked atomically in #1, cannot be replayed). +10. **Refresh extends idle but not absolute** — rotate twice within `abs_max`; both succeed; `auth_time` unchanged across rotations. +11. **Idle expiry (boundary)** — set refresh `exp = now` → 401 with `session_expired_idle` (not generic `invalid_refresh_token`). +12. **Grandfather path** — legacy refresh token without `auth_time`/`idle_max`/`abs_max` → one successful rotation; new JWT has all three claims, `auth_time≈now()`. +13. **Tightening after login doesn't affect existing sessions** — login under policy A, owner tightens to policy B, refresh succeeds under A's snapshot. +14. **`/auth/login/json` carries new claims and response fields** — JWT decode shows `auth_time`/`idle_max`/`abs_max`; response body has `idle_expires_at` + `absolute_expires_at` as ISO strings. +15. **OAuth callback responses include expiry fields** — `/auth/google/callback` and `/auth/microsoft/callback` `OAuthCallbackResponse` bodies have both `idle_expires_at` and `absolute_expires_at`. Mock the Google/Microsoft token-exchange step; assert on the final response shape. +16. **Policy update writes audit row** — PATCH `/accounts/me/security` emits one `account.session_policy_update` audit event with `actor_user_id`, `account_id`, and a payload of `{old: {...}, new: {...}, effective_old: {...}, effective_new: {...}}`. Verify via the existing audit-log query in `core/audit.py`. +17. **Bulk revoke scope=all** — seed three active refresh tokens for two users in the account (caller + one other). POST `/accounts/me/security/revoke-sessions` with `{"scope": "all"}` → `revoked_count=3`; caller's own refresh token is now revoked too. Their next `/auth/refresh` → 401 `invalid_refresh_token`. +18. **Bulk revoke scope=others** — same seed. POST with `{"scope": "others"}` → `revoked_count=2` (caller's token survives). Caller's `/auth/refresh` still succeeds; the other user's `/auth/refresh` → 401 `invalid_refresh_token`. +19. **Bulk revoke is account-scoped** — seed tokens for users in account A and account B. Owner of A POSTs revoke → `revoked_count` reflects only A's tokens; B's tokens remain active. +20. **Bulk revoke is owner-only** — engineer/viewer POST → 403; super_admin POST against `/me` works only if they own an account (the endpoint is `/me`, not `/{account_id}`). +21. **Bulk revoke writes audit row** — `account.sessions_revoked_bulk` with `{actor_user_id, account_id, scope, revoked_count}`. +22. **Bulk revoke is idempotent** — second immediate POST returns `revoked_count=0` (no already-revoked rows are double-stamped). + +Frontend (`frontend/src/__tests__/` or colocated `*.test.tsx`): + +- `useAuthSessionExpiry` returns `"soon"` within 5min of whichever of `idleExpiresAt`/`absoluteExpiresAt` comes first; `reason` field indicates which. +- Axios interceptor on 401 with `session_expired_absolute` redirects to `/login?reason=session_expired` instead of attempting refresh. +- Axios interceptor on 401 with `session_expired_idle` does the same. +- Axios interceptor on 401 with `invalid_refresh_token` redirects to `/login` *without* the reason banner. +- `authStore` rehydrate handles legacy localStorage shape (no `idleExpiresAt`/`absoluteExpiresAt`) without throwing or forced logout; hook treats `null` as "no warning." + +Manual: +- Log in as `owner@`, set **Custom (idle=60 min, absolute=240 min)** under Account → Session Security, log out, log in as `engineer@` (same account), decode the refresh JWT in localStorage, confirm `idle_max=3600` and `abs_max=14400` (seconds — the configured minutes × 60). +- Confirm the existing `useSessionTimer` (troubleshooting-flow timer) is unaffected by the new hook. +- Pre-deploy localStorage path: install build, log in to capture token, deploy session-policy build, refresh page — confirm no forced logout and that the next `/auth/refresh` populates the new fields. + +## 7. Rollout + +1. Land migration + backend changes behind no flag (the absolute cap is the whole point — flagging it defeats the purpose). +2. Default policy is Strict (3d/14d) for new accounts. Existing pilot accounts get NULL → defaults; user can manually loosen any pilot account via the new endpoint or direct SQL if friction emerges. +3. After deploy, watch Sentry for spikes in `session_expired_absolute` 401s (expected: tiny — only legacy tokens approaching 14-day mark hit this) and unexpected refresh failures. +4. Announce in pilot Slack: "We added session expiration. You'll be asked to log in again every 2 weeks max. Account owners can adjust under Account → Session Security." + +## 8. Files touched + +### Backend +- `backend/app/core/config.py` — new `SESSION_*` settings (defaults + min/max bounds). +- `backend/app/core/security.py` — `create_refresh_token` signature change (accepts `auth_time`/`idle_max`/`abs_max`), `resolve_session_policy(account)` helper, `decode_refresh_token_strict()` that distinguishes `ExpiredSignatureError` from generic `JWTError`. +- `backend/app/api/deps.py` — update `get_refresh_token_payload` to surface idle-expiry as `session_expired_idle` instead of collapsing into a generic 401. +- `backend/app/api/endpoints/auth.py` — refresh-endpoint logic (atomic-revoke-then-check-absolute), `_mint_session_tokens(user, account, db) -> Token` helper, login + login/json call sites. +- `backend/app/api/endpoints/oauth.py` — both callbacks call `_mint_session_tokens`; `OAuthCallbackResponse` gains the two new fields. +- `backend/app/schemas/token.py` — `Token` (`token.py:5`) adds `idle_expires_at` + `absolute_expires_at` (ISO strings). +- `backend/app/schemas/oauth.py` — `OAuthCallbackResponse` adds the same two fields. +- `backend/app/api/endpoints/account_security.py` — NEW (~130 lines: GET/PATCH for policy + POST `/revoke-sessions`, audit logging for both mutations). +- `backend/app/api/router.py` — register new router. +- `backend/app/models/account.py` — two new columns + DB CHECK constraint. +- `backend/app/schemas/account_security.py` — NEW (request/response: policy GET/PATCH with effective + bounds; `RevokeSessionsRequest` + `RevokeSessionsResponse`). +- `backend/app/core/audit.py` — add `account.session_policy_update` event type (or use the existing generic emitter if it accepts free-form types — verify during impl). +- `backend/alembic/versions/_session_policy_columns.py` — NEW (manual; per Lesson 77, never `--rev-id`). +- `backend/tests/test_session_policy.py` — NEW. + +### Frontend +- `frontend/src/api/client.ts` — interceptor branches on both `session_expired_idle` and `session_expired_absolute` (same redirect target `/login?reason=session_expired`); also propagates new expiry fields from successful `/auth/refresh` responses into `setTokens`. +- `frontend/src/api/auth.ts` — `Token` type adds the two new ISO fields. +- `frontend/src/store/authStore.ts` — `setTokens` persists the new expiry fields (no new action). +- `frontend/src/pages/OAuthCallbackPage.tsx` — pass `idle_expires_at` + `absolute_expires_at` through `setTokens({...})` at line 102. +- `frontend/src/hooks/useAuthSessionExpiry.ts` — NEW. +- `frontend/src/components/common/SessionExpiryToast.tsx` — NEW. +- `frontend/src/components/layout/AppLayout.tsx` — mount toast. +- `frontend/src/pages/account/AccountSecuritySettingsPage.tsx` — NEW (policy form + Active Sessions section with two revoke buttons + confirmation modal). +- `frontend/src/pages/AccountSettingsPage.tsx` — add link card. +- `frontend/src/router.tsx` — register route. +- `frontend/src/pages/LoginPage.tsx` — `?reason=session_expired` banner. + +### Docs +- `.ai/DECISIONS.md` — entry for the 3d/14d default + per-account-override architecture. +- `CURRENT-STATE.md` — add session policy to "auth surface" summary. + +Approx ~600 LoC across backend + frontend, plus tests. + +## 9. Resolved decisions & follow-ups + +Decisions baked into this plan (not open questions): + +- **Audit logging is required.** PATCH `/accounts/me/security` writes one `account.session_policy_update` audit event; POST `/revoke-sessions` writes `account.sessions_revoked_bulk`. Security-relevant by definition. Covered in §6 tests #16 and #21 and §8 backend file list. +- **Presets are Strict and Standard only**, plus Custom. No "Loose" preset; owners who want a loose policy can use Custom and own the choice explicitly. +- **Tightening policy mid-session does NOT force-logout existing sessions** — but owners *can* force it via the bulk-revoke endpoint in §4.11. Existing sessions continue under the policy snapshot they were issued under unless explicitly revoked. The Account Security page surfaces this in copy (§4.8). +- **Bulk revoke is account-scoped, two-mode (`all` / `others`).** Per-user device lists are out of scope (§4.11). + +Follow-up issues to file after this plan is approved (not blocking this PR): + +1. **Super-admin global lock with UI** — today, env-var ceilings cover this. File an issue to expose `SESSION_*_MAX` as a sysadmin-editable setting if/when a customer asks. +2. **Per-user device list + per-device revoke** — refresh tokens would gain `user_agent` + `ip` + `last_used_at` columns; a new "Active devices" page would let users self-revoke individual sessions. File only if a real ask arrives. The account-wide bulk revoke covers the breach-response use case in the meantime. +3. **Per-user (not per-account) policy** — out of scope. File only if a real ask arrives. + +## 10. Sequence of commits + +1. `feat(auth): add session policy settings + account columns + migration` (settings + model + migration + DB CHECK; no behavior change yet). +2. `feat(auth): distinguish idle expiry from invalid refresh tokens` (`decode_refresh_token_strict`, `session_expired_idle` detail, test #11). Lands the error-detail taxonomy from §4.10 before anything depends on it. +3. `feat(auth): embed auth_time/idle_max/abs_max in refresh tokens` (`security.py` + `_mint_session_tokens` helper called from `/auth/login`, `/auth/login/json`, both OAuth callbacks; `Token` and `OAuthCallbackResponse` gain `idle_expires_at` + `absolute_expires_at`). Refresh still doesn't enforce absolute cap yet. +4. `feat(auth): enforce absolute session cap in /auth/refresh` (atomic-revoke-then-check, `session_expired_absolute` detail, grandfather logic, tests #8–#13). +5. `feat(api): add GET/PATCH /accounts/me/security endpoint` (router, schemas, owner gate, bounds + partial-override invariant validation, audit logging on PATCH). +6. `feat(api): add POST /accounts/me/security/revoke-sessions` (bulk-revoke endpoint with `scope=all|others`, single-UPDATE implementation, audit logging, tests #17–#22). +7. `feat(ui): handle session_expired_{idle,absolute} in axios interceptor + authStore` (new fields persisted, legacy-state migration, redirect to `/login?reason=session_expired`). +8. `feat(ui): add AccountSecuritySettingsPage + AppLayout toast + login banner` (Strict/Standard/Custom presets, Active Sessions section with two revoke buttons + confirmation modal, `useAuthSessionExpiry`, expiry-soon toast, `?reason=session_expired` banner). +9. `docs: add decision entry + update CURRENT-STATE auth surface` (`.ai/DECISIONS.md`, `CURRENT-STATE.md`). + +Each commit independently passes `pytest --override-ini="addopts="` and `npm run build`. The two backend behavior gates (#2 and #4) ship behind no flag — they're the point of the work — but they're sequenced so any rollback is a single commit. + +--- + +**Review checklist before implementation:** + +- [x] Defaults confirmed: 3d idle / 14d absolute. +- [x] Per-account override approved. +- [x] Grandfather strategy (one free rotation) approved vs hard cutover. +- [x] Error-detail taxonomy approved (idle vs absolute distinct on the wire; same UX in the frontend). +- [x] Audit logging is a requirement, not optional. +- [x] Loose preset dropped; Strict / Standard / Custom only. +- [x] ISO timestamps (not Unix ints) for `idle_expires_at` / `absolute_expires_at` everywhere. +- [x] DB CHECK constraint scope documented; partial-override case validated app-side. +- [ ] System bounds in §4.3 acceptable as specified (15min floor, 30d idle ceiling, 90d absolute ceiling). +- [ ] Final approval on commit sequence in §10. +- [ ] No conflict with Phase O cutover sequencing (this can ship before OR after EIN/Stripe lands; independent path). +- [ ] File the kill-all-sessions follow-up issue per §9 before implementation begins, so the Account Security page can link to it (or leave the support-contact copy in place).