feat: add sensitive data redaction to export (Phase C)
Server-side regex redaction masks IPs, emails, bearer/API tokens, and UNC paths in exported session content. Redaction runs post-generation and post-variable-resolution with fail-closed error handling. Frontend gets a "Mask Sensitive Data" toggle in the export preview modal with a summary of what was redacted. 24 unit tests passing, frontend build clean. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
113
backend/app/services/redaction_service.py
Normal file
113
backend/app/services/redaction_service.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""Sensitive data redaction service for export content.
|
||||
|
||||
Applies regex-based pattern matching to mask IPs, emails, tokens, and UNC paths.
|
||||
Redaction is non-persistent and request-scoped — database records are never mutated.
|
||||
"""
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Callable
|
||||
|
||||
|
||||
@dataclass
|
||||
class RedactionSummary:
|
||||
ips: int = 0
|
||||
emails: int = 0
|
||||
tokens: int = 0
|
||||
unc_paths: int = 0
|
||||
|
||||
@property
|
||||
def total(self) -> int:
|
||||
return self.ips + self.emails + self.tokens + self.unc_paths
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"ips": self.ips,
|
||||
"emails": self.emails,
|
||||
"tokens": self.tokens,
|
||||
"unc_paths": self.unc_paths,
|
||||
"total": self.total,
|
||||
}
|
||||
|
||||
|
||||
# --- Compiled patterns (module-level, not per-request) ---
|
||||
# Order matters: more specific/longer patterns first to prevent partial matches.
|
||||
|
||||
_PATTERNS: list[tuple[re.Pattern, str, str]] = [
|
||||
# 1. Bearer tokens (before general token detection)
|
||||
(
|
||||
re.compile(r"Bearer\s+[A-Za-z0-9._\-]+", re.ASCII),
|
||||
"[TOKEN REDACTED]",
|
||||
"tokens",
|
||||
),
|
||||
# 2. API key / long hex-base64 strings (32+ chars of hex/base64 characters)
|
||||
(
|
||||
re.compile(r"\b[A-Za-z0-9+/=_\-]{32,}\b", re.ASCII),
|
||||
"[TOKEN REDACTED]",
|
||||
"tokens",
|
||||
),
|
||||
# 3. UNC paths (\\server\share)
|
||||
(
|
||||
re.compile(r"\\\\[\w.\-]+\\[\w$.\-]+"),
|
||||
"[UNC PATH REDACTED]",
|
||||
"unc_paths",
|
||||
),
|
||||
# 4. Email addresses
|
||||
(
|
||||
re.compile(r"\b[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}\b"),
|
||||
"[EMAIL REDACTED]",
|
||||
"emails",
|
||||
),
|
||||
# 5. IPv6 (before IPv4 to avoid partial matches on mixed notation)
|
||||
(
|
||||
re.compile(r"\b(?:[0-9a-fA-F]{1,4}:){2,7}[0-9a-fA-F]{1,4}\b"),
|
||||
"[IP REDACTED]",
|
||||
"ips",
|
||||
),
|
||||
# 6. IPv4
|
||||
(
|
||||
re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b"),
|
||||
"[IP REDACTED]",
|
||||
"ips",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def apply_redaction_to_text(content: str) -> tuple[str, RedactionSummary]:
|
||||
"""Apply all redaction patterns to text content.
|
||||
|
||||
Uses re.subn for replacement + counting in one pass per pattern.
|
||||
Patterns are applied in priority order (most specific first).
|
||||
|
||||
Returns (redacted_content, summary).
|
||||
"""
|
||||
if not content:
|
||||
return content, RedactionSummary()
|
||||
|
||||
summary = RedactionSummary()
|
||||
|
||||
for pattern, replacement, category in _PATTERNS:
|
||||
content, count = pattern.subn(replacement, content)
|
||||
if count > 0:
|
||||
current = getattr(summary, category)
|
||||
setattr(summary, category, current + count)
|
||||
|
||||
return content, summary
|
||||
|
||||
|
||||
def format_redaction_footer(summary: RedactionSummary) -> str:
|
||||
"""Build a human-readable footer line summarizing what was redacted."""
|
||||
if summary.total == 0:
|
||||
return ""
|
||||
|
||||
parts = []
|
||||
if summary.ips > 0:
|
||||
parts.append(f"{summary.ips} IP{'s' if summary.ips != 1 else ''}")
|
||||
if summary.emails > 0:
|
||||
parts.append(f"{summary.emails} email{'s' if summary.emails != 1 else ''}")
|
||||
if summary.tokens > 0:
|
||||
parts.append(f"{summary.tokens} token{'s' if summary.tokens != 1 else ''}")
|
||||
if summary.unc_paths > 0:
|
||||
parts.append(f"{summary.unc_paths} UNC path{'s' if summary.unc_paths != 1 else ''}")
|
||||
|
||||
return f"\n--- Redacted: {', '.join(parts)} ---"
|
||||
Reference in New Issue
Block a user