fix(prod): harden production configuration for launch
All checks were successful
Mirror to GitHub / mirror (push) Successful in 5s
All checks were successful
Mirror to GitHub / mirror (push) Successful in 5s
- Gate Swagger/ReDoc/OpenAPI behind DEBUG (no public API schema in prod) - Sentry send_default_pii only in dev (no auth headers/bodies in events) - Remove alembic from Dockerfile CMD (releaseCommand owns migrations; CMD copy raced across replicas/restarts) - Decouple rate limiting from DEBUG via RATE_LIMIT_ENABLED (PR envs with DEBUG=true were unlimited); tests disable the live limiter in conftest - max_instances=1 on the 4 scheduler jobs missing it - Boot-time failure when SELF_SERVE_ENABLED without RESEND_API_KEY/ANTHROPIC_API_KEY/FRONTEND_URL - Reject localhost OAUTH_REDIRECT_BASE outside DEBUG - pool_pre_ping + pool_recycle on the app engine - Frontend: DEV-gate stale-async console.warn; document VITE_SELF_SERVE_ENABLED fallback semantics in Dockerfile Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -39,4 +39,6 @@ STRIPE_WEBHOOK_SECRET=whsec_
|
|||||||
# global flag for specific users — used for prod test-mode validation
|
# global flag for specific users — used for prod test-mode validation
|
||||||
# before the public flip. Empty by default.
|
# before the public flip. Empty by default.
|
||||||
SELF_SERVE_ENABLED=false
|
SELF_SERVE_ENABLED=false
|
||||||
INTERNAL_TESTER_EMAILS=
|
INTERNAL_TESTER_EMAILS=
|
||||||
|
# Rate limiting (decoupled from DEBUG; keep true in PR/staging/prod)
|
||||||
|
RATE_LIMIT_ENABLED=false
|
||||||
|
|||||||
@@ -24,5 +24,6 @@ COPY . .
|
|||||||
# Expose port (Railway uses PORT env variable)
|
# Expose port (Railway uses PORT env variable)
|
||||||
EXPOSE 8000
|
EXPOSE 8000
|
||||||
|
|
||||||
# Run migrations then start the application
|
# Migrations run exclusively via Railway releaseCommand (scripts/release) —
|
||||||
CMD alembic upgrade head && uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-8000}
|
# running them here too would race across replicas/restarts.
|
||||||
|
CMD uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-8000}
|
||||||
|
|||||||
@@ -85,6 +85,10 @@ class Settings(BaseSettings):
|
|||||||
# Security
|
# Security
|
||||||
BCRYPT_ROUNDS: int = 12
|
BCRYPT_ROUNDS: int = 12
|
||||||
|
|
||||||
|
# Rate limiting — independent of DEBUG so PR/staging envs running with
|
||||||
|
# DEBUG=true still rate-limit auth and AI endpoints.
|
||||||
|
RATE_LIMIT_ENABLED: bool = True
|
||||||
|
|
||||||
# Security Headers
|
# Security Headers
|
||||||
CSP_REPORT_ONLY: bool = True # Set False to enforce CSP
|
CSP_REPORT_ONLY: bool = True # Set False to enforce CSP
|
||||||
CSP_EXTRA_SCRIPT_SOURCES: list[str] = [] # Additional script-src domains
|
CSP_EXTRA_SCRIPT_SOURCES: list[str] = [] # Additional script-src domains
|
||||||
@@ -255,6 +259,18 @@ class Settings(BaseSettings):
|
|||||||
MS_CLIENT_SECRET: Optional[str] = None
|
MS_CLIENT_SECRET: Optional[str] = None
|
||||||
OAUTH_REDIRECT_BASE: str = "http://localhost:5173"
|
OAUTH_REDIRECT_BASE: str = "http://localhost:5173"
|
||||||
|
|
||||||
|
@field_validator("OAUTH_REDIRECT_BASE", mode="after")
|
||||||
|
@classmethod
|
||||||
|
def reject_localhost_redirect_in_production(cls, v: str, info) -> str:
|
||||||
|
"""OAuth code exchange against a localhost redirect_uri is always a
|
||||||
|
misconfiguration outside DEBUG — fail at boot, not at first sign-in."""
|
||||||
|
debug = info.data.get("DEBUG", False)
|
||||||
|
if not debug and v.startswith("http://localhost"):
|
||||||
|
raise ValueError(
|
||||||
|
"OAUTH_REDIRECT_BASE must be set to the public frontend URL in production"
|
||||||
|
)
|
||||||
|
return v
|
||||||
|
|
||||||
# Monitoring
|
# Monitoring
|
||||||
SENTRY_DSN: Optional[str] = None
|
SENTRY_DSN: Optional[str] = None
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,10 @@ from app.core.tenant_context import register_tenant_listener
|
|||||||
engine = create_async_engine(
|
engine = create_async_engine(
|
||||||
settings.DATABASE_URL,
|
settings.DATABASE_URL,
|
||||||
echo=settings.DEBUG,
|
echo=settings.DEBUG,
|
||||||
future=True
|
future=True,
|
||||||
|
# Detect connections dropped by DB restarts/maintenance instead of failing requests
|
||||||
|
pool_pre_ping=True,
|
||||||
|
pool_recycle=1800,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create async session factory
|
# Create async session factory
|
||||||
|
|||||||
@@ -3,4 +3,4 @@ from slowapi.util import get_remote_address
|
|||||||
|
|
||||||
from app.core.config import settings
|
from app.core.config import settings
|
||||||
|
|
||||||
limiter = Limiter(key_func=get_remote_address, enabled=not settings.DEBUG)
|
limiter = Limiter(key_func=get_remote_address, enabled=settings.RATE_LIMIT_ENABLED)
|
||||||
|
|||||||
@@ -15,7 +15,9 @@ if settings.SENTRY_DSN:
|
|||||||
sentry_sdk.init(
|
sentry_sdk.init(
|
||||||
dsn=settings.SENTRY_DSN,
|
dsn=settings.SENTRY_DSN,
|
||||||
environment="development" if settings.DEBUG else "production",
|
environment="development" if settings.DEBUG else "production",
|
||||||
send_default_pii=True,
|
# PII (headers, bodies, IPs) only in dev — prod events must not capture
|
||||||
|
# auth tokens or customer data from a multi-tenant MSP product.
|
||||||
|
send_default_pii=settings.DEBUG,
|
||||||
traces_sample_rate=1.0 if settings.DEBUG else 0.2,
|
traces_sample_rate=1.0 if settings.DEBUG else 0.2,
|
||||||
# Profiling — included in free plan
|
# Profiling — included in free plan
|
||||||
profiles_sample_rate=1.0 if settings.DEBUG else 0.2,
|
profiles_sample_rate=1.0 if settings.DEBUG else 0.2,
|
||||||
@@ -151,6 +153,21 @@ async def lifespan(app: FastAPI):
|
|||||||
logger.info("Starting ResolutionFlow API server...")
|
logger.info("Starting ResolutionFlow API server...")
|
||||||
logger.info(f"Environment: {'Development' if settings.DEBUG else 'Production'}")
|
logger.info(f"Environment: {'Development' if settings.DEBUG else 'Production'}")
|
||||||
logger.info(f"ALLOW_RAILWAY_ORIGINS: {settings.ALLOW_RAILWAY_ORIGINS}")
|
logger.info(f"ALLOW_RAILWAY_ORIGINS: {settings.ALLOW_RAILWAY_ORIGINS}")
|
||||||
|
|
||||||
|
# Self-serve signup is broken without these — fail loudly at boot, not at the
|
||||||
|
# first customer's signup attempt.
|
||||||
|
if settings.SELF_SERVE_ENABLED and not settings.DEBUG:
|
||||||
|
missing = [
|
||||||
|
name for name, value in (
|
||||||
|
("RESEND_API_KEY", settings.RESEND_API_KEY),
|
||||||
|
("ANTHROPIC_API_KEY", settings.ANTHROPIC_API_KEY),
|
||||||
|
("FRONTEND_URL", settings.FRONTEND_URL),
|
||||||
|
) if not value
|
||||||
|
]
|
||||||
|
if missing:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"SELF_SERVE_ENABLED=true but required settings are unset: {', '.join(missing)}"
|
||||||
|
)
|
||||||
# Note: In production, use Alembic migrations instead of init_db
|
# Note: In production, use Alembic migrations instead of init_db
|
||||||
# await init_db()
|
# await init_db()
|
||||||
|
|
||||||
@@ -170,6 +187,7 @@ async def lifespan(app: FastAPI):
|
|||||||
hours=1,
|
hours=1,
|
||||||
id="cleanup_ai_conversations",
|
id="cleanup_ai_conversations",
|
||||||
replace_existing=True,
|
replace_existing=True,
|
||||||
|
max_instances=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Chat retention cleanup (daily)
|
# Chat retention cleanup (daily)
|
||||||
@@ -179,6 +197,7 @@ async def lifespan(app: FastAPI):
|
|||||||
hours=24,
|
hours=24,
|
||||||
id="cleanup_expired_chats",
|
id="cleanup_expired_chats",
|
||||||
replace_existing=True,
|
replace_existing=True,
|
||||||
|
max_instances=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Auto-archive stale AI chat sessions (daily at 3 AM)
|
# Auto-archive stale AI chat sessions (daily at 3 AM)
|
||||||
@@ -188,6 +207,7 @@ async def lifespan(app: FastAPI):
|
|||||||
hour=3,
|
hour=3,
|
||||||
id="archive_stale_ai_sessions",
|
id="archive_stale_ai_sessions",
|
||||||
replace_existing=True,
|
replace_existing=True,
|
||||||
|
max_instances=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
# PSA push retry (every 5 minutes)
|
# PSA push retry (every 5 minutes)
|
||||||
@@ -198,6 +218,7 @@ async def lifespan(app: FastAPI):
|
|||||||
minutes=5,
|
minutes=5,
|
||||||
id="psa_push_retry",
|
id="psa_push_retry",
|
||||||
replace_existing=True,
|
replace_existing=True,
|
||||||
|
max_instances=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Knowledge Flywheel analysis (every 5 minutes)
|
# Knowledge Flywheel analysis (every 5 minutes)
|
||||||
@@ -252,9 +273,10 @@ app = FastAPI(
|
|||||||
title=settings.APP_NAME,
|
title=settings.APP_NAME,
|
||||||
description="ResolutionFlow - Take the path MOST traveled. Guided troubleshooting with automatic documentation.",
|
description="ResolutionFlow - Take the path MOST traveled. Guided troubleshooting with automatic documentation.",
|
||||||
version="1.0.0",
|
version="1.0.0",
|
||||||
docs_url="/api/docs",
|
# Interactive docs + schema are dev-only; prod must not expose the full API surface.
|
||||||
redoc_url="/api/redoc",
|
docs_url="/api/docs" if settings.DEBUG else None,
|
||||||
openapi_url="/api/openapi.json",
|
redoc_url="/api/redoc" if settings.DEBUG else None,
|
||||||
|
openapi_url="/api/openapi.json" if settings.DEBUG else None,
|
||||||
lifespan=lifespan
|
lifespan=lifespan
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -29,6 +29,12 @@ from app import models as _models # noqa: F401
|
|||||||
# Disable invite code requirement for tests
|
# Disable invite code requirement for tests
|
||||||
settings.REQUIRE_INVITE_CODE = False
|
settings.REQUIRE_INVITE_CODE = False
|
||||||
|
|
||||||
|
# Disable rate limiting in tests — auth-heavy suites would trip the
|
||||||
|
# per-minute login/register limits. The limiter is constructed at import
|
||||||
|
# time, so flip the live instance rather than the setting.
|
||||||
|
from app.core.rate_limit import limiter as _limiter # noqa: E402
|
||||||
|
_limiter.enabled = False
|
||||||
|
|
||||||
# Test database URL — NEVER reuse DATABASE_URL. The test_db fixture does
|
# Test database URL — NEVER reuse DATABASE_URL. The test_db fixture does
|
||||||
# `DROP SCHEMA public CASCADE` on every test; if DATABASE_URL (which normally
|
# `DROP SCHEMA public CASCADE` on every test; if DATABASE_URL (which normally
|
||||||
# points at the dev/prod DB) leaked into this value, running `pytest tests/`
|
# points at the dev/prod DB) leaked into this value, running `pytest tests/`
|
||||||
|
|||||||
@@ -13,6 +13,10 @@ RUN npm ci
|
|||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
# Build arguments (set at build time)
|
# Build arguments (set at build time)
|
||||||
|
# NOTE: VITE_SELF_SERVE_ENABLED is only a network-error fallback — the live
|
||||||
|
# toggle is the backend /config/public flag; rebuilding with a new value here
|
||||||
|
# does NOT flip self-serve. VITE_STRIPE_PUBLISHABLE_KEY is currently unused
|
||||||
|
# (checkout is fully backend-driven); kept for a future client-side Stripe.js.
|
||||||
ARG VITE_API_URL
|
ARG VITE_API_URL
|
||||||
ARG VITE_SENTRY_DSN
|
ARG VITE_SENTRY_DSN
|
||||||
ARG VITE_PUBLIC_POSTHOG_KEY
|
ARG VITE_PUBLIC_POSTHOG_KEY
|
||||||
|
|||||||
@@ -266,11 +266,13 @@ export default function AssistantChatPage() {
|
|||||||
const loadedChatIdsRef = useRef<Set<string>>(new Set())
|
const loadedChatIdsRef = useRef<Set<string>>(new Set())
|
||||||
const guardCurrentChat = useCallback((expectedChatId: string, source: string) => {
|
const guardCurrentChat = useCallback((expectedChatId: string, source: string) => {
|
||||||
if (currentChatRef.current === expectedChatId) return true
|
if (currentChatRef.current === expectedChatId) return true
|
||||||
console.warn('[AssistantChat] Discarded stale async result', {
|
if (import.meta.env.DEV) {
|
||||||
source,
|
console.warn('[AssistantChat] Discarded stale async result', {
|
||||||
expectedChatId,
|
source,
|
||||||
currentChatId: currentChatRef.current,
|
expectedChatId,
|
||||||
})
|
currentChatId: currentChatRef.current,
|
||||||
|
})
|
||||||
|
}
|
||||||
return false
|
return false
|
||||||
}, [])
|
}, [])
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user