"""Tests for sensitive data redaction service.""" import pytest from app.services.redaction_service import ( apply_redaction_to_text, format_redaction_footer, RedactionSummary, ) class TestIPv4Redaction: def test_single_ipv4(self): text = "Server at 192.168.1.100 is down" result, summary = apply_redaction_to_text(text) assert result == "Server at [IP REDACTED] is down" assert summary.ips == 1 def test_multiple_ipv4(self): text = "Route from 10.0.0.1 to 172.16.0.5" result, summary = apply_redaction_to_text(text) assert "[IP REDACTED]" in result assert "10.0.0.1" not in result assert "172.16.0.5" not in result assert summary.ips == 2 def test_ipv4_at_boundaries(self): text = "10.0.0.1\n192.168.1.1" result, summary = apply_redaction_to_text(text) assert summary.ips == 2 assert "10.0.0.1" not in result class TestIPv6Redaction: def test_full_ipv6(self): text = "Address: 2001:0db8:85a3:0000:0000:8a2e:0370:7334" result, summary = apply_redaction_to_text(text) assert result == "Address: [IP REDACTED]" assert summary.ips == 1 def test_abbreviated_ipv6(self): text = "fe80:1234:abcd:5678:9abc" result, summary = apply_redaction_to_text(text) assert "[IP REDACTED]" in result assert summary.ips == 1 class TestEmailRedaction: def test_simple_email(self): text = "Contact admin@company.com for help" result, summary = apply_redaction_to_text(text) assert result == "Contact [EMAIL REDACTED] for help" assert summary.emails == 1 def test_complex_email(self): text = "Send to john.doe+tag@sub.domain.co.uk" result, summary = apply_redaction_to_text(text) assert "[EMAIL REDACTED]" in result assert summary.emails == 1 def test_multiple_emails(self): text = "From user@a.com to admin@b.org" result, summary = apply_redaction_to_text(text) assert summary.emails == 2 assert "user@a.com" not in result class TestTokenRedaction: def test_bearer_token(self): text = "Authorization: Bearer eyJhbGciOiJIUzI1NiJ9.payload.sig" result, summary = apply_redaction_to_text(text) assert "Bearer" not in result or "[TOKEN REDACTED]" in result assert summary.tokens >= 1 def test_long_api_key(self): text = "API key: a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4" result, summary = apply_redaction_to_text(text) assert "[TOKEN REDACTED]" in result assert summary.tokens >= 1 def test_short_string_not_matched(self): text = "Short code: abc123" result, summary = apply_redaction_to_text(text) assert "abc123" in result # Too short to be a token class TestUNCPathRedaction: def test_simple_unc(self): text = r"Map drive to \\fileserver\shared" result, summary = apply_redaction_to_text(text) assert result == "Map drive to [UNC PATH REDACTED]" assert summary.unc_paths == 1 def test_unc_with_dollar_share(self): text = r"Access \\server01\C$" result, summary = apply_redaction_to_text(text) assert "[UNC PATH REDACTED]" in result assert summary.unc_paths == 1 class TestMixedContent: def test_multiple_pattern_types(self): text = ( "Server 192.168.1.1 has user admin@corp.com " r"and share \\filesvr\data" ) result, summary = apply_redaction_to_text(text) assert "192.168.1.1" not in result assert "admin@corp.com" not in result assert r"\\filesvr\data" not in result assert summary.ips == 1 assert summary.emails == 1 assert summary.unc_paths == 1 def test_no_sensitive_data(self): text = "Everything is working fine. No issues found." result, summary = apply_redaction_to_text(text) assert result == text assert summary.total == 0 class TestEdgeCases: def test_empty_string(self): result, summary = apply_redaction_to_text("") assert result == "" assert summary.total == 0 def test_idempotency(self): """Already-redacted content should not produce extra matches.""" text = "Server at [IP REDACTED] and [EMAIL REDACTED]" result, summary = apply_redaction_to_text(text) assert result == text assert summary.total == 0 def test_redaction_then_re_redaction(self): """Running redaction twice produces the same output.""" text = "Contact admin@test.com at 10.0.0.1" first_pass, _ = apply_redaction_to_text(text) second_pass, summary2 = apply_redaction_to_text(first_pass) assert first_pass == second_pass assert summary2.total == 0 class TestRedactionSummary: def test_total_calculation(self): s = RedactionSummary(ips=2, emails=1, tokens=3, unc_paths=1) assert s.total == 7 def test_to_dict(self): s = RedactionSummary(ips=1, emails=2, tokens=0, unc_paths=0) d = s.to_dict() assert d == {"ips": 1, "emails": 2, "tokens": 0, "unc_paths": 0, "total": 3} class TestRedactionFooter: def test_no_matches(self): assert format_redaction_footer(RedactionSummary()) == "" def test_single_category(self): footer = format_redaction_footer(RedactionSummary(ips=3)) assert footer == "\n--- Redacted: 3 IPs ---" def test_multiple_categories(self): footer = format_redaction_footer(RedactionSummary(ips=1, emails=2, tokens=1)) assert "1 IP" in footer assert "2 emails" in footer assert "1 token" in footer def test_singular_forms(self): footer = format_redaction_footer(RedactionSummary(ips=1, emails=1)) assert "1 IP," in footer or "1 IP ---" in footer assert "1 email" in footer