feat: add sensitive data redaction to export (Phase C)
Server-side regex redaction masks IPs, emails, bearer/API tokens, and UNC paths in exported session content. Redaction runs post-generation and post-variable-resolution with fail-closed error handling. Frontend gets a "Mask Sensitive Data" toggle in the export preview modal with a summary of what was redacted. 24 unit tests passing, frontend build clean. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -314,12 +314,33 @@ async def export_session(
|
||||
from app.services.variable_service import resolve_variables
|
||||
content = resolve_variables(content, session_vars)
|
||||
|
||||
# Phase C: Apply redaction AFTER generation and variable resolution
|
||||
redaction_summary = None
|
||||
if export_options.redaction_mode == "mask":
|
||||
from app.services.redaction_service import apply_redaction_to_text, format_redaction_footer
|
||||
try:
|
||||
content, redaction_summary = apply_redaction_to_text(content)
|
||||
footer = format_redaction_footer(redaction_summary)
|
||||
if footer:
|
||||
content += footer
|
||||
except Exception:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Redaction processing failed"
|
||||
)
|
||||
|
||||
# Only mark as exported if session is completed
|
||||
if session.completed_at:
|
||||
session.exported = True
|
||||
await db.commit()
|
||||
|
||||
return PlainTextResponse(content=content, media_type=media_type)
|
||||
# Build response with redaction headers
|
||||
import json
|
||||
headers = {"X-Redaction-Mode": export_options.redaction_mode}
|
||||
if redaction_summary is not None:
|
||||
headers["X-Redaction-Summary"] = json.dumps(redaction_summary.to_dict())
|
||||
|
||||
return PlainTextResponse(content=content, media_type=media_type, headers=headers)
|
||||
|
||||
|
||||
# --- Save Session as Tree ---
|
||||
|
||||
@@ -59,6 +59,7 @@ if settings.ALLOW_RAILWAY_ORIGINS:
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
expose_headers=["X-Redaction-Mode", "X-Redaction-Summary"],
|
||||
)
|
||||
else:
|
||||
app.add_middleware(
|
||||
@@ -67,6 +68,7 @@ else:
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
expose_headers=["X-Redaction-Mode", "X-Redaction-Summary"],
|
||||
)
|
||||
|
||||
# Include API router
|
||||
|
||||
@@ -92,6 +92,8 @@ class SessionExport(BaseModel):
|
||||
# Phase B
|
||||
include_summary: bool = False
|
||||
detail_level: Literal["standard", "full"] = "standard"
|
||||
# Phase C
|
||||
redaction_mode: Literal["none", "mask"] = "none"
|
||||
|
||||
|
||||
class SessionComplete(BaseModel):
|
||||
|
||||
113
backend/app/services/redaction_service.py
Normal file
113
backend/app/services/redaction_service.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""Sensitive data redaction service for export content.
|
||||
|
||||
Applies regex-based pattern matching to mask IPs, emails, tokens, and UNC paths.
|
||||
Redaction is non-persistent and request-scoped — database records are never mutated.
|
||||
"""
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Callable
|
||||
|
||||
|
||||
@dataclass
|
||||
class RedactionSummary:
|
||||
ips: int = 0
|
||||
emails: int = 0
|
||||
tokens: int = 0
|
||||
unc_paths: int = 0
|
||||
|
||||
@property
|
||||
def total(self) -> int:
|
||||
return self.ips + self.emails + self.tokens + self.unc_paths
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"ips": self.ips,
|
||||
"emails": self.emails,
|
||||
"tokens": self.tokens,
|
||||
"unc_paths": self.unc_paths,
|
||||
"total": self.total,
|
||||
}
|
||||
|
||||
|
||||
# --- Compiled patterns (module-level, not per-request) ---
|
||||
# Order matters: more specific/longer patterns first to prevent partial matches.
|
||||
|
||||
_PATTERNS: list[tuple[re.Pattern, str, str]] = [
|
||||
# 1. Bearer tokens (before general token detection)
|
||||
(
|
||||
re.compile(r"Bearer\s+[A-Za-z0-9._\-]+", re.ASCII),
|
||||
"[TOKEN REDACTED]",
|
||||
"tokens",
|
||||
),
|
||||
# 2. API key / long hex-base64 strings (32+ chars of hex/base64 characters)
|
||||
(
|
||||
re.compile(r"\b[A-Za-z0-9+/=_\-]{32,}\b", re.ASCII),
|
||||
"[TOKEN REDACTED]",
|
||||
"tokens",
|
||||
),
|
||||
# 3. UNC paths (\\server\share)
|
||||
(
|
||||
re.compile(r"\\\\[\w.\-]+\\[\w$.\-]+"),
|
||||
"[UNC PATH REDACTED]",
|
||||
"unc_paths",
|
||||
),
|
||||
# 4. Email addresses
|
||||
(
|
||||
re.compile(r"\b[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}\b"),
|
||||
"[EMAIL REDACTED]",
|
||||
"emails",
|
||||
),
|
||||
# 5. IPv6 (before IPv4 to avoid partial matches on mixed notation)
|
||||
(
|
||||
re.compile(r"\b(?:[0-9a-fA-F]{1,4}:){2,7}[0-9a-fA-F]{1,4}\b"),
|
||||
"[IP REDACTED]",
|
||||
"ips",
|
||||
),
|
||||
# 6. IPv4
|
||||
(
|
||||
re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b"),
|
||||
"[IP REDACTED]",
|
||||
"ips",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def apply_redaction_to_text(content: str) -> tuple[str, RedactionSummary]:
|
||||
"""Apply all redaction patterns to text content.
|
||||
|
||||
Uses re.subn for replacement + counting in one pass per pattern.
|
||||
Patterns are applied in priority order (most specific first).
|
||||
|
||||
Returns (redacted_content, summary).
|
||||
"""
|
||||
if not content:
|
||||
return content, RedactionSummary()
|
||||
|
||||
summary = RedactionSummary()
|
||||
|
||||
for pattern, replacement, category in _PATTERNS:
|
||||
content, count = pattern.subn(replacement, content)
|
||||
if count > 0:
|
||||
current = getattr(summary, category)
|
||||
setattr(summary, category, current + count)
|
||||
|
||||
return content, summary
|
||||
|
||||
|
||||
def format_redaction_footer(summary: RedactionSummary) -> str:
|
||||
"""Build a human-readable footer line summarizing what was redacted."""
|
||||
if summary.total == 0:
|
||||
return ""
|
||||
|
||||
parts = []
|
||||
if summary.ips > 0:
|
||||
parts.append(f"{summary.ips} IP{'s' if summary.ips != 1 else ''}")
|
||||
if summary.emails > 0:
|
||||
parts.append(f"{summary.emails} email{'s' if summary.emails != 1 else ''}")
|
||||
if summary.tokens > 0:
|
||||
parts.append(f"{summary.tokens} token{'s' if summary.tokens != 1 else ''}")
|
||||
if summary.unc_paths > 0:
|
||||
parts.append(f"{summary.unc_paths} UNC path{'s' if summary.unc_paths != 1 else ''}")
|
||||
|
||||
return f"\n--- Redacted: {', '.join(parts)} ---"
|
||||
Reference in New Issue
Block a user