From 0a9b16a28f78bb244402bcf5f4bfef6892d2649e Mon Sep 17 00:00:00 2001 From: chihlasm Date: Thu, 9 Apr 2026 04:21:58 +0000 Subject: [PATCH] =?UTF-8?q?chore:=20Task=207+8=20=E2=80=94=20TargetList=20?= =?UTF-8?q?audit,=20CI=20tenant-filter=20grep=20check?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Task 7: TargetList dead code audit - Found active code references in 12+ files across backend and frontend (full CRUD API + frontend page + MaintenanceScheduleSection + BatchLaunchModal) - Decision: migrate to account_id in Phase 1 (cannot drop) - DB row count not available from code-server — must verify from VPS SSH before Phase 1 migration - Teams orphan check query documented; must run from VPS SSH before Phase 1 - Results documented in spec Section 9 Task 8: CI tenant-filter enforcement check (warn mode) - Create backend/scripts/check_tenant_filters.py Scans endpoint and service files for select() on tenant tables without tenant_filter/account_id/user_id in surrounding context. Currently reports 109 warnings (Phase 1 backlog). Exits 0 (warn mode). - Add Check tenant filter enforcement step to backend CI job Add --fail flag after Phase 1 backlog clears to make it blocking. Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/ci.yml | 5 + backend/scripts/check_tenant_filters.py | 91 +++++++++++++++++++ ...2026-04-09-tenant-data-isolation-design.md | 3 +- 3 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 backend/scripts/check_tenant_filters.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 89d6bbc6..18bf85ec 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,6 +47,11 @@ jobs: - name: Install dependencies run: pip install -r backend/requirements.txt -r backend/requirements-dev.txt + - name: Check tenant filter enforcement + run: cd backend && python scripts/check_tenant_filters.py + # Warn mode only (exits 0). Switch to --fail after Phase 1 backlog clears. + # See: docs/superpowers/specs/2026-04-09-tenant-data-isolation-design.md Section 3f + - name: Run tests with coverage run: cd backend && python -m pytest --override-ini="addopts=" --cov=app --cov-report=term-missing --cov-report=json:coverage.json --cov-fail-under=50 diff --git a/backend/scripts/check_tenant_filters.py b/backend/scripts/check_tenant_filters.py new file mode 100644 index 00000000..f608d4b7 --- /dev/null +++ b/backend/scripts/check_tenant_filters.py @@ -0,0 +1,91 @@ +""" +Tenant filter enforcement check. + +Scans endpoint and service files for SQLAlchemy select() calls on known +tenant tables and warns when account_id or tenant_filter is not present +in the surrounding 15 lines (the typical extent of a single query). + +Usage: + python scripts/check_tenant_filters.py # warn mode (exits 0) + python scripts/check_tenant_filters.py --fail # block mode (exits 1 on findings) +""" +import re +import sys +from pathlib import Path + +# Tables that must always be filtered by account_id or tenant_filter. +# Extend this list as new tenant tables are added. +TENANT_MODELS = [ + "Tree", "AISession", "Session", "StepLibrary", "FlowProposal", + "CopilotConversation", "AssistantChat", "FileUpload", "KBImport", + "PsaConnection", "PsaPostLog", "PsaMemberMapping", "AIChatSession", + "AIConversation", "AIUsage", "Subscription", "AccountInvite", + "Notification", "NotificationConfig", "SessionShare", "UserFolder", + "UserPinnedTree", "SessionBranch", "SessionHandoff", + "SessionResolutionOutput", "ForkPoint", "AISessionStep", + "AISuggestion", "StepCategory", "TreeCategory", "TreeTag", + "Attachment", "SessionSupportingData", "MaintenanceSchedule", + "AuditLog", "ScriptBuilderSession", "ScriptTemplate", + "StepRating", "StepUsageLog", "TargetList", +] + +# Directories to scan +SCAN_DIRS = [ + Path("app/api/endpoints"), + Path("app/services"), +] + +# Patterns that indicate the query is correctly scoped. +# NOTE: user_id scoping is accepted for user-owned resources (sessions, folders, notifications). +# For account-shared resources (trees, steps, etc.) use tenant_filter or account_id. +SAFE_PATTERNS = [ + r"tenant_filter", + r"account_id", + r"user_id", # User-scoped resources (sessions, folders, notifications, etc.) + r"is_super_admin", # Super admin queries intentionally bypass tenant filter + r"# cross-tenant: approved", # Explicit approval comment +] + +SKIP_FILES = { + "admin.py", # Super admin endpoints intentionally bypass tenant filter + "admin_gallery.py", # Gallery management — super admin only, no tenant scoping needed + "public_templates.py",# Public template browser — intentionally cross-tenant + "auth.py", # Auth/registration — no account context during login/register + "ratings.py", # Session ratings — user-scoped via session lookup chain +} + +findings = [] + +for scan_dir in SCAN_DIRS: + if not scan_dir.exists(): + continue + for path in sorted(scan_dir.glob("*.py")): + if path.name in SKIP_FILES: + continue + lines = path.read_text().splitlines() + for i, line in enumerate(lines): + for model in TENANT_MODELS: + if re.search(rf"\bselect\s*\(\s*{model}\b", line): + # Check surrounding 15 lines for a safe pattern + start = max(0, i - 2) + end = min(len(lines), i + 15) + context = "\n".join(lines[start:end]) + if not any(re.search(p, context) for p in SAFE_PATTERNS): + findings.append( + f"{path}:{i + 1}: select({model}) — no tenant_filter or account_id found in context" + ) + +if findings: + print(f"\n⚠ Tenant filter check — {len(findings)} warning(s):\n") + for f in findings: + print(f" {f}") + print() + if "--fail" in sys.argv: + print("Run with --fail: exiting 1") + sys.exit(1) + else: + print("Run in warn mode — not blocking. Pass --fail to block.") + sys.exit(0) +else: + print("✓ Tenant filter check passed — no unscoped tenant table queries found.") + sys.exit(0) diff --git a/docs/superpowers/specs/2026-04-09-tenant-data-isolation-design.md b/docs/superpowers/specs/2026-04-09-tenant-data-isolation-design.md index be49cc05..ab3e15ba 100644 --- a/docs/superpowers/specs/2026-04-09-tenant-data-isolation-design.md +++ b/docs/superpowers/specs/2026-04-09-tenant-data-isolation-design.md @@ -589,7 +589,8 @@ The following cross-tenant operations are explicitly approved. All others requir |---|---|---| | Is tree sharing intra-tenant only, or can trees be shared across accounts? | Determines `tree_shares` schema, backfill strategy, and RLS policy. Tree_shares table deferred until resolved. | Product | | What is the exact PostgreSQL exception raised when `current_setting('app.current_account_id', false)::uuid` is evaluated with no value set? | Determines the fail-closed test assertion. Must be tested in Phase 2. | Engineering | -| TargetList: zero references + zero rows? Or does data exist? | Determines whether table is dropped or migrated. | Phase 0 audit | +| TargetList audit complete: active code references found in 12+ files across backend and frontend (full CRUD API, frontend page, used in MaintenanceScheduleSection and BatchLaunchModal). Cannot be dropped. Decision: migrate to account_id in Phase 1 via backfill from team_id → accounts. DB row count not available from code-server (Docker only via VPS SSH) — run `SELECT COUNT(*) FROM target_lists;` before Phase 1 migration to confirm data volume. | ✓ Resolved — migrate in Phase 1. Row count verification required before migration. | Phase 0 audit ✓ | +| Teams orphan check: DB unavailable from code-server (Docker requires VPS SSH access). Must run before Phase 1 backfill: `SELECT COUNT(*) AS orphaned_teams FROM teams t LEFT JOIN users u ON u.team_id = t.id AND u.account_id IS NOT NULL WHERE u.id IS NULL;` | Count = 0 → proceed. Count > 0 → resolve orphaned teams before any Phase 1 backfill using team_id → account_id chain. | Run from VPS SSH before Phase 1 | ---