diff --git a/.wick.yaml.example b/.wick.yaml.example index ff22871..56e8aa0 100644 --- a/.wick.yaml.example +++ b/.wick.yaml.example @@ -1,7 +1,7 @@ # Wick project configuration # Place this file as .wick.yaml in your project root. -# Redaction style: "redacted" (default), "stars", or a custom string. +# Redaction style: "redacted" (default), "stars", "hash", or a custom string. # style: redacted # Custom detection patterns (in addition to built-in secrets and PII). @@ -11,3 +11,31 @@ # - name: internal-hostname # regex: "\\w+\\.internal\\.acme\\.com" # replacement: "[INTERNAL-HOST]" + +# Allowlist: known-safe values that should never be redacted. +# Use exact strings or set regex: true for regular expressions. +# allowlist: +# - pattern: "AKIAIOSFODNN7EXAMPLE" +# reason: "AWS documentation example key" +# - pattern: "test@example\\.com" +# regex: true +# reason: "Test fixture email" +# - pattern: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" +# reason: "AWS documentation example secret" + +# Blocklist: patterns that are always redacted, even if not in built-in rules. +# blocklist: +# - pattern: "ACME-INTERNAL-[A-Z0-9]+" +# category: "custom" +# reason: "Internal project codes" +# - pattern: "\\w+\\.corp\\.acme\\.com" +# category: "custom" +# reason: "Internal hostnames" + +# Load additional Gitleaks-compatible rules from a TOML file. +# rules_file: "./my-rules.toml" + +# Disable specific built-in rules by ID (use wick --report to see rule IDs). +# disable_rules: +# - "generic-api-key" +# - "email" diff --git a/allowlist_test.go b/allowlist_test.go new file mode 100644 index 0000000..fb3f01d --- /dev/null +++ b/allowlist_test.go @@ -0,0 +1,101 @@ +package wick + +import ( + "strings" + "testing" + + "github.com/krypsis-io/wick/detect" +) + +func TestWithAllowlist_SuppressesValue(t *testing.T) { + input := "Contact admin@acme.com for help" + output, report, err := Redact(input, WithAllowlist([]detect.AllowlistEntry{ + {Pattern: "admin@acme.com", Reason: "test fixture"}, + })) + if err != nil { + t.Fatalf("Redact: %v", err) + } + if strings.Contains(output, "[REDACTED]") { + t.Errorf("allowlisted value should not be redacted: %s", output) + } + if output != input { + t.Errorf("output should equal input when all findings are allowlisted: %s", output) + } + if report.Total != 0 { + t.Errorf("expected 0 findings, got %d", report.Total) + } +} + +func TestWithAllowlist_Regex(t *testing.T) { + input := "Email: test@fixture.com" + output, _, err := Redact(input, WithAllowlist([]detect.AllowlistEntry{ + {Pattern: `test@.*\.com`, Regex: true}, + })) + if err != nil { + t.Fatalf("Redact: %v", err) + } + if strings.Contains(output, "[REDACTED]") { + t.Errorf("regex-allowlisted value should not be redacted: %s", output) + } +} + +func TestWithAllowlist_OnlyAllowlistedSuppressed(t *testing.T) { + input := "safe@example.com and danger@corp.com both here" + _, report, err := Redact(input, WithAllowlist([]detect.AllowlistEntry{ + {Pattern: "safe@example.com"}, + })) + if err != nil { + t.Fatalf("Redact: %v", err) + } + for _, f := range report.Findings { + if f.Value == "safe@example.com" { + t.Errorf("safe@example.com should be suppressed by allowlist") + } + } + found := false + for _, f := range report.Findings { + if f.Value == "danger@corp.com" { + found = true + } + } + if !found { + t.Error("danger@corp.com should still be detected") + } +} + +func TestWithBlocklist_AlwaysRedacts(t *testing.T) { + // Custom value not in any built-in rule. + input := "Project code: ACME-INTERNAL-ABC123" + output, report, err := Redact(input, WithBlocklist([]detect.CustomPattern{ + {Name: "internal-code", Regex: `ACME-INTERNAL-[A-Z0-9]+`}, + })) + if err != nil { + t.Fatalf("Redact: %v", err) + } + if strings.Contains(output, "ACME-INTERNAL-ABC123") { + t.Errorf("blocklisted value should be redacted: %s", output) + } + if report.Total == 0 { + t.Error("expected at least 1 finding from blocklist") + } +} + +func TestWithBlocklist_CombinedWithBuiltins(t *testing.T) { + // Blocklist adds to built-in detection, not replaces it. + input := "Email: admin@acme.com, Code: ACME-INTERNAL-XYZ" + output, report, err := Redact(input, WithBlocklist([]detect.CustomPattern{ + {Name: "internal-code", Regex: `ACME-INTERNAL-[A-Z]+`}, + })) + if err != nil { + t.Fatalf("Redact: %v", err) + } + if strings.Contains(output, "admin@acme.com") { + t.Errorf("email should still be detected: %s", output) + } + if strings.Contains(output, "ACME-INTERNAL-XYZ") { + t.Errorf("blocklisted value should be redacted: %s", output) + } + if report.Total < 2 { + t.Errorf("expected at least 2 findings, got %d", report.Total) + } +} diff --git a/dehydrate_test.go b/dehydrate_test.go new file mode 100644 index 0000000..1a58a4c --- /dev/null +++ b/dehydrate_test.go @@ -0,0 +1,195 @@ +package wick + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestGenerateKey(t *testing.T) { + k1, err := GenerateKey() + if err != nil { + t.Fatalf("GenerateKey: %v", err) + } + k2, err := GenerateKey() + if err != nil { + t.Fatalf("GenerateKey: %v", err) + } + if k1 == k2 { + t.Error("two GenerateKey calls should not produce the same key") + } +} + +func TestDecodeKey_Valid(t *testing.T) { + encoded, _ := GenerateKey() + key, err := DecodeKey(encoded) + if err != nil { + t.Fatalf("DecodeKey: %v", err) + } + if len(key) != 32 { + t.Errorf("expected 32-byte key, got %d", len(key)) + } +} + +func TestDecodeKey_Invalid(t *testing.T) { + if _, err := DecodeKey("not-base64!!!"); err == nil { + t.Error("expected error for invalid base64") + } + // Valid base64 but wrong length (16 bytes = 24 base64 chars). + if _, err := DecodeKey("AAAAAAAAAAAAAAAAAAAAAA=="); err == nil { + t.Error("expected error for wrong key length") + } +} + +func TestDehydrate_Basic(t *testing.T) { + input := "Contact admin@acme.com from 10.0.1.42" + key, _ := GenerateKey() + keyBytes, _ := DecodeKey(key) + + redacted, tm, err := Dehydrate(input, keyBytes) + if err != nil { + t.Fatalf("Dehydrate: %v", err) + } + + if strings.Contains(redacted, "admin@acme.com") { + t.Errorf("email should be redacted: %s", redacted) + } + if strings.Contains(redacted, "10.0.1.42") { + t.Errorf("IP should be redacted: %s", redacted) + } + if len(tm.entries) == 0 { + t.Error("expected non-empty token map") + } +} + +func TestRoundTrip(t *testing.T) { + input := "Contact admin@acme.com from 10.0.1.42 — key: AKIAZ5GMHYJKLMNOPQRS" + key, _ := GenerateKey() + keyBytes, _ := DecodeKey(key) + + redacted, tm, err := Dehydrate(input, keyBytes) + if err != nil { + t.Fatalf("Dehydrate: %v", err) + } + + restored, err := Rehydrate(redacted, tm) + if err != nil { + t.Fatalf("Rehydrate: %v", err) + } + + if restored != input { + t.Errorf("round-trip failed:\n input: %q\n restored: %q", input, restored) + } +} + +func TestRoundTrip_RepeatedValue(t *testing.T) { + input := "admin@acme.com is the admin. Contact admin@acme.com." + key, _ := GenerateKey() + keyBytes, _ := DecodeKey(key) + + redacted, tm, err := Dehydrate(input, keyBytes) + if err != nil { + t.Fatalf("Dehydrate: %v", err) + } + + restored, err := Rehydrate(redacted, tm) + if err != nil { + t.Fatalf("Rehydrate: %v", err) + } + + if restored != input { + t.Errorf("round-trip with repeated value failed:\n input: %q\n restored: %q", input, restored) + } +} + +func TestSaveAndLoadTokenMap(t *testing.T) { + input := "Contact admin@acme.com from 10.0.1.42" + key, _ := GenerateKey() + keyBytes, _ := DecodeKey(key) + + _, tm, err := Dehydrate(input, keyBytes) + if err != nil { + t.Fatalf("Dehydrate: %v", err) + } + + tmpFile := filepath.Join(t.TempDir(), "tokens.enc") + if err := SaveTokenMap(tm, keyBytes, tmpFile); err != nil { + t.Fatalf("SaveTokenMap: %v", err) + } + + loaded, err := LoadTokenMap(keyBytes, tmpFile) + if err != nil { + t.Fatalf("LoadTokenMap: %v", err) + } + + if len(loaded.entries) != len(tm.entries) { + t.Errorf("loaded %d entries, want %d", len(loaded.entries), len(tm.entries)) + } +} + +func TestLoadTokenMap_WrongKey(t *testing.T) { + input := "admin@acme.com" + key, _ := GenerateKey() + keyBytes, _ := DecodeKey(key) + + _, tm, _ := Dehydrate(input, keyBytes) + tmpFile := filepath.Join(t.TempDir(), "tokens.enc") + _ = SaveTokenMap(tm, keyBytes, tmpFile) + + wrongKey, _ := GenerateKey() + wrongKeyBytes, _ := DecodeKey(wrongKey) + if _, err := LoadTokenMap(wrongKeyBytes, tmpFile); err == nil { + t.Error("expected error when decrypting with wrong key") + } +} + +func TestLoadTokenMap_MissingFile(t *testing.T) { + key, _ := GenerateKey() + keyBytes, _ := DecodeKey(key) + if _, err := LoadTokenMap(keyBytes, "/nonexistent/path.enc"); err == nil { + t.Error("expected error for missing file") + } +} + +func TestSaveTokenMap_FilePermissions(t *testing.T) { + input := "admin@acme.com" + key, _ := GenerateKey() + keyBytes, _ := DecodeKey(key) + _, tm, _ := Dehydrate(input, keyBytes) + + tmpFile := filepath.Join(t.TempDir(), "tokens.enc") + if err := SaveTokenMap(tm, keyBytes, tmpFile); err != nil { + t.Fatalf("SaveTokenMap: %v", err) + } + + info, err := os.Stat(tmpFile) + if err != nil { + t.Fatalf("stat: %v", err) + } + if info.Mode().Perm() != 0o600 { + t.Errorf("expected file mode 0600, got %o", info.Mode().Perm()) + } +} + +func TestRehydrate_NoFindings(t *testing.T) { + input := "nothing sensitive here" + key, _ := GenerateKey() + keyBytes, _ := DecodeKey(key) + + redacted, tm, err := Dehydrate(input, keyBytes) + if err != nil { + t.Fatalf("Dehydrate: %v", err) + } + if redacted != input { + t.Errorf("no-findings input should pass through unchanged: %q", redacted) + } + + restored, err := Rehydrate(redacted, tm) + if err != nil { + t.Fatalf("Rehydrate: %v", err) + } + if restored != input { + t.Errorf("round-trip with no findings failed: %q", restored) + } +} diff --git a/detect/allowlist.go b/detect/allowlist.go new file mode 100644 index 0000000..95d6b8c --- /dev/null +++ b/detect/allowlist.go @@ -0,0 +1,68 @@ +package detect + +import ( + "regexp" + "strings" +) + +// AllowlistEntry defines a known-safe value that should not be redacted. +// Pattern is treated as a literal string by default; set Regex to true to +// treat it as a regular expression. File globs are not evaluated here — +// callers with file-path context are responsible for pre-filtering. +type AllowlistEntry struct { + Pattern string `yaml:"pattern"` + Regex bool `yaml:"regex,omitempty"` + Reason string `yaml:"reason,omitempty"` +} + +type compiledAllowlistEntry struct { + literal string + re *regexp.Regexp +} + +// SetAllowlist compiles and stores project-level allowlist entries. Findings +// whose Value matches any entry will be suppressed from Detect output. +func (d *Detector) SetAllowlist(entries []AllowlistEntry) error { + compiled := make([]compiledAllowlistEntry, 0, len(entries)) + for _, e := range entries { + if e.Regex { + re, err := regexp.Compile(e.Pattern) + if err != nil { + return err + } + compiled = append(compiled, compiledAllowlistEntry{re: re}) + } else { + compiled = append(compiled, compiledAllowlistEntry{literal: e.Pattern}) + } + } + d.allowlist = compiled + return nil +} + +// isProjectAllowed returns true if value matches any compiled allowlist entry. +func (d *Detector) isProjectAllowed(value string) bool { + for _, e := range d.allowlist { + if e.re != nil { + if e.re.MatchString(value) { + return true + } + } else if strings.EqualFold(e.literal, value) { + return true + } + } + return false +} + +// filterAllowed removes findings that match the project-level allowlist. +func (d *Detector) filterAllowed(findings []Finding) []Finding { + if len(d.allowlist) == 0 { + return findings + } + out := findings[:0] + for _, f := range findings { + if !d.isProjectAllowed(f.Value) { + out = append(out, f) + } + } + return out +} diff --git a/detect/allowlist_test.go b/detect/allowlist_test.go new file mode 100644 index 0000000..f91a3b8 --- /dev/null +++ b/detect/allowlist_test.go @@ -0,0 +1,113 @@ +package detect + +import ( + "testing" +) + +func TestAllowlist_ExactMatch(t *testing.T) { + d, _ := New() + _ = d.SetAllowlist([]AllowlistEntry{ + {Pattern: "admin@acme.com", Reason: "test fixture"}, + }) + + findings := d.Detect("Contact admin@acme.com for help") + for _, f := range findings { + if f.Value == "admin@acme.com" { + t.Errorf("allowlisted value should be suppressed, got finding: %+v", f) + } + } +} + +func TestAllowlist_ExactMatch_CaseInsensitive(t *testing.T) { + d, _ := New() + _ = d.SetAllowlist([]AllowlistEntry{ + {Pattern: "Admin@Acme.Com"}, + }) + + findings := d.Detect("Contact admin@acme.com for help") + for _, f := range findings { + if f.Value == "admin@acme.com" { + t.Errorf("case-insensitive allowlist should suppress finding: %+v", f) + } + } +} + +func TestAllowlist_RegexMatch(t *testing.T) { + d, _ := New() + _ = d.SetAllowlist([]AllowlistEntry{ + {Pattern: `test@.*\.com`, Regex: true, Reason: "test emails"}, + }) + + findings := d.Detect("Email: test@example.com") + for _, f := range findings { + if f.RuleID == "email" { + t.Errorf("regex-allowlisted email should be suppressed: %+v", f) + } + } +} + +func TestAllowlist_DoesNotSuppressOthers(t *testing.T) { + d, _ := New() + _ = d.SetAllowlist([]AllowlistEntry{ + {Pattern: "safe@example.com"}, + }) + + findings := d.Detect("safe@example.com and admin@acme.com both appear") + foundSafe, foundOther := false, false + for _, f := range findings { + if f.Value == "safe@example.com" { + foundSafe = true + } + if f.Value == "admin@acme.com" { + foundOther = true + } + } + if foundSafe { + t.Error("safe@example.com should be allowlisted and suppressed") + } + if !foundOther { + t.Error("admin@acme.com should still be detected") + } +} + +func TestAllowlist_InvalidRegex(t *testing.T) { + d, _ := New() + err := d.SetAllowlist([]AllowlistEntry{ + {Pattern: `[invalid`, Regex: true}, + }) + if err == nil { + t.Error("expected error for invalid regex in allowlist") + } +} + +func TestAllowlist_Empty(t *testing.T) { + d, _ := New() + _ = d.SetAllowlist(nil) + + // Should still detect normally. + findings := d.Detect("admin@acme.com") + found := false + for _, f := range findings { + if f.RuleID == "email" { + found = true + } + } + if !found { + t.Error("empty allowlist should not suppress normal detection") + } +} + +func TestAllowlist_AWSExampleKey(t *testing.T) { + // Common preset: suppress the AWS documentation example key. + d, _ := New() + _ = d.SetAllowlist([]AllowlistEntry{ + {Pattern: "AKIAIOSFODNN7EXAMPLE", Reason: "AWS docs example"}, + }) + + findings := d.Detect("AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE") + for _, f := range findings { + if f.Value == "AKIAIOSFODNN7EXAMPLE" { + t.Errorf("AWS example key should be allowlisted: %+v", f) + } + } +} diff --git a/internal/detect/custom.go b/detect/custom.go similarity index 91% rename from internal/detect/custom.go rename to detect/custom.go index 5a10e94..292955f 100644 --- a/internal/detect/custom.go +++ b/detect/custom.go @@ -9,7 +9,7 @@ import ( type CustomPattern struct { Name string `yaml:"name"` Regex string `yaml:"regex"` - Replacement string `yaml:"replacement,omitempty"` // TODO: wire per-pattern replacements into redact.Redact + Replacement string `yaml:"replacement,omitempty"` // Applied via redact.PerRule when set } type compiledCustom struct { diff --git a/detect/detection_config_test.go b/detect/detection_config_test.go new file mode 100644 index 0000000..6016715 --- /dev/null +++ b/detect/detection_config_test.go @@ -0,0 +1,93 @@ +package detect + +import ( + "os" + "path/filepath" + "testing" +) + +func TestDisableRules(t *testing.T) { + d, _ := New() + + // Email should be detected before disabling. + findings := d.Detect("admin@acme.com") + found := false + for _, f := range findings { + if f.RuleID == "email" { + found = true + } + } + if !found { + t.Fatal("email should be detected before disabling") + } + + // Disable PII email rule. + d.DisableRules([]string{"email"}) + findings = d.Detect("admin@acme.com") + for _, f := range findings { + if f.RuleID == "email" { + t.Errorf("email rule should be disabled, got finding: %+v", f) + } + } +} + +func TestDisableRules_UnknownID(t *testing.T) { + d, _ := New() + // Should not panic on unknown rule IDs. + d.DisableRules([]string{"nonexistent-rule-id"}) +} + +func TestDisableRules_Empty(t *testing.T) { + d, _ := New() + d.DisableRules(nil) + // Should still detect normally. + findings := d.Detect("admin@acme.com") + found := false + for _, f := range findings { + if f.RuleID == "email" { + found = true + } + } + if !found { + t.Error("empty disable list should not affect detection") + } +} + +func TestAddRulesFile(t *testing.T) { + toml := ` +title = "test rules" + +[[rules]] +id = "test-custom-token" +description = "Test custom token" +regex = '''TEST-[A-Z0-9]{8}''' +keywords = ["test"] +` + tmp := filepath.Join(t.TempDir(), "rules.toml") + if err := os.WriteFile(tmp, []byte(toml), 0o600); err != nil { + t.Fatalf("write temp rules: %v", err) + } + + d, _ := New() + if err := d.AddRulesFile(tmp); err != nil { + t.Fatalf("AddRulesFile: %v", err) + } + + findings := d.Detect("token: TEST-ABCD1234") + found := false + for _, f := range findings { + if f.RuleID == "test-custom-token" { + found = true + } + } + if !found { + t.Errorf("custom rule from file not matched, findings: %+v", findings) + } +} + +func TestAddRulesFile_Missing(t *testing.T) { + d, _ := New() + if err := d.AddRulesFile("/nonexistent/path.toml"); err == nil { + t.Error("expected error for missing rules file") + } +} diff --git a/internal/detect/detector.go b/detect/detector.go similarity index 73% rename from internal/detect/detector.go rename to detect/detector.go index aa1025b..f6b0880 100644 --- a/internal/detect/detector.go +++ b/detect/detector.go @@ -2,6 +2,8 @@ package detect import ( + "fmt" + "os" "strings" ) @@ -9,6 +11,8 @@ import ( type Detector struct { secretRules []SecretRule customPatterns []compiledCustom + allowlist []compiledAllowlistEntry + disabledPII map[string]bool } // New creates a Detector with the embedded Gitleaks patterns and built-in PII rules. @@ -20,6 +24,48 @@ func New() (*Detector, error) { return &Detector{secretRules: rules}, nil } +// AddRulesFile loads additional secret rules from a Gitleaks-compatible TOML +// file and appends them to the detector's existing rules. +func (d *Detector) AddRulesFile(path string) error { + data, err := os.ReadFile(path) + if err != nil { + return fmt.Errorf("reading rules file %q: %w", path, err) + } + extra, err := LoadSecretRulesFromBytes(data) + if err != nil { + return fmt.Errorf("parsing rules file %q: %w", path, err) + } + d.secretRules = append(d.secretRules, extra...) + return nil +} + +// DisableRules removes rules with the given IDs from the detector. +// Unknown IDs are silently ignored. +func (d *Detector) DisableRules(ids []string) { + if len(ids) == 0 { + return + } + disabled := make(map[string]bool, len(ids)) + for _, id := range ids { + disabled[id] = true + } + kept := d.secretRules[:0] + for _, r := range d.secretRules { + if !disabled[r.ID] { + kept = append(kept, r) + } + } + d.secretRules = kept + + // Also track disabled PII rule IDs (email, ipv4, etc.). + for id := range disabled { + if d.disabledPII == nil { + d.disabledPII = make(map[string]bool) + } + d.disabledPII[id] = true + } +} + // SetCustomPatterns loads user-defined patterns into the detector. func (d *Detector) SetCustomPatterns(patterns []CustomPattern) error { compiled, err := compileCustomPatterns(patterns) @@ -38,10 +84,10 @@ func (d *Detector) Detect(input string) []Finding { for i, line := range lines { lineNum := i + 1 all = append(all, matchSecretRules(d.secretRules, line, lineNum)...) - all = append(all, matchPII(line, lineNum)...) + all = append(all, matchPII(line, lineNum, d.disabledPII)...) all = append(all, matchCustom(d.customPatterns, line, lineNum)...) } - return all + return d.filterAllowed(all) } // DetectMultiline runs only multiline-capable rules against the full unsplit input. @@ -78,7 +124,7 @@ func (d *Detector) DetectMultiline(input string) []Finding { } } } - return all + return d.filterAllowed(all) } // extractLine returns the single line within s that contains byte position pos. diff --git a/internal/detect/detector_test.go b/detect/detector_test.go similarity index 72% rename from internal/detect/detector_test.go rename to detect/detector_test.go index d3f73eb..54c9203 100644 --- a/internal/detect/detector_test.go +++ b/detect/detector_test.go @@ -25,6 +25,61 @@ func TestDetector_Secrets(t *testing.T) { } } +func TestDetector_AWSSecretAccessKey(t *testing.T) { + d, err := New() + if err != nil { + t.Fatalf("failed to create detector: %v", err) + } + + tests := []struct { + name string + input string + want bool + }{ + { + name: "env var assignment", + input: `AWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYzzzzzzzzAB`, + want: true, + }, + { + name: "quoted assignment", + input: `aws_secret_access_key = "Ab1CD2efGH3ijKL4mnOP5qrST6uvWX7yzAB8CDE9"`, + want: true, + }, + { + name: "yaml style", + input: `secret_access_key: Ab1CD2efGH3ijKL4mnOP5qrST6uvWX7yzAB8CDE9`, + want: true, + }, + { + name: "example key should be allowlisted", + input: `AWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY`, + want: false, + }, + { + name: "too short value ignored", + input: `AWS_SECRET_ACCESS_KEY=tooshort`, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + findings := d.Detect(tt.input) + found := false + for _, f := range findings { + if f.RuleID == "aws-secret-access-key" { + found = true + break + } + } + if found != tt.want { + t.Errorf("got found=%v, want %v, findings: %+v", found, tt.want, findings) + } + }) + } +} + func TestDetector_PII_Email(t *testing.T) { d, err := New() if err != nil { diff --git a/internal/detect/finding.go b/detect/finding.go similarity index 100% rename from internal/detect/finding.go rename to detect/finding.go diff --git a/internal/detect/patterns/gitleaks.toml b/detect/patterns/gitleaks.toml similarity index 99% rename from internal/detect/patterns/gitleaks.toml rename to detect/patterns/gitleaks.toml index 256f647..a887bf0 100644 --- a/internal/detect/patterns/gitleaks.toml +++ b/detect/patterns/gitleaks.toml @@ -219,6 +219,22 @@ regexes = [ '''.+EXAMPLE$''', ] +[[rules]] +id = "aws-secret-access-key" +description = "Detected an AWS Secret Access Key, risking unauthorized access to AWS services and data breaches." +regex = '''(?i)(?:aws_secret_access_key|aws_secret_key|secret_access_key)[\s'"]{0,3}(?:=|:|=>)[\s'"]{0,5}([A-Za-z0-9/+=]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 4 +secretGroup = 1 +keywords = [ + "aws_secret_access_key", + "aws_secret_key", + "secret_access_key", +] +[[rules.allowlists]] +regexes = [ + '''wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY''', +] + [[rules]] id = "aws-amazon-bedrock-api-key-long-lived" description = "Identified a pattern that may indicate long-lived Amazon Bedrock API keys, risking unauthorized Amazon Bedrock usage" diff --git a/internal/detect/pii.go b/detect/pii.go similarity index 91% rename from internal/detect/pii.go rename to detect/pii.go index 46f013d..6626984 100644 --- a/internal/detect/pii.go +++ b/detect/pii.go @@ -58,9 +58,13 @@ var piiPatterns = []piiPattern{ } // matchPII runs all PII patterns against a single line and returns findings. -func matchPII(line string, lineNum int) []Finding { +// disabled is an optional set of rule IDs to skip (may be nil). +func matchPII(line string, lineNum int, disabled map[string]bool) []Finding { var findings []Finding for _, p := range piiPatterns { + if disabled[p.ID] { + continue + } matches := p.Regex.FindAllStringIndex(line, -1) for _, m := range matches { value := line[m[0]:m[1]] diff --git a/internal/detect/secrets.go b/detect/secrets.go similarity index 100% rename from internal/detect/secrets.go rename to detect/secrets.go diff --git a/internal/format/detect.go b/format/detect.go similarity index 100% rename from internal/format/detect.go rename to format/detect.go diff --git a/internal/format/env.go b/format/env.go similarity index 56% rename from internal/format/env.go rename to format/env.go index 6ec5c7f..c3354a3 100644 --- a/internal/format/env.go +++ b/format/env.go @@ -3,12 +3,12 @@ package format import ( "strings" - "github.com/krypsis-io/wick/internal/detect" - "github.com/krypsis-io/wick/internal/redact" + "github.com/krypsis-io/wick/detect" + "github.com/krypsis-io/wick/redact" ) // ProcessEnv parses KEY=VALUE lines, redacting only the VALUE portion. -func ProcessEnv(input string, detector *detect.Detector, style redact.Style) (string, []detect.Finding) { +func ProcessEnv(input string, detector *detect.Detector, replacer redact.Replacer) (string, []detect.Finding) { lines := strings.Split(input, "\n") var allFindings []detect.Finding result := make([]string, len(lines)) @@ -32,16 +32,30 @@ func ProcessEnv(input string, detector *detect.Detector, style redact.Style) (st // Strip surrounding quotes for detection, but preserve them. stripped, prefix, suffix := stripQuotes(value) - found := detector.Detect(stripped) + // Detect against the full line so keyword-based rules (e.g., + // aws_secret_access_key) can see the key name for pre-filtering. + // Then filter findings to only those within the value portion. + valueStart := len(key) + len(prefix) + valueEnd := valueStart + len(stripped) + allFound := detector.Detect(line) + var found []detect.Finding + for _, f := range allFound { + if f.Start >= valueStart && f.End <= valueEnd { + // Shift offsets to be relative to stripped value. + f.Start -= valueStart + f.End -= valueStart + found = append(found, f) + } + } + if len(found) > 0 { - // Redact using original offsets (relative to stripped). - redacted := redact.Redact(stripped, found, style) + // Redact using offsets relative to stripped value. + redacted := redact.Redact(stripped, found, replacer) result[i] = key + prefix + redacted + suffix // Adjust finding offsets for output (relative to full line). - offset := len(key) + len(prefix) for j := range found { - found[j].Start += offset - found[j].End += offset + found[j].Start += valueStart + found[j].End += valueStart found[j].Line = i + 1 } allFindings = append(allFindings, found...) diff --git a/internal/format/format_test.go b/format/format_test.go similarity index 88% rename from internal/format/format_test.go rename to format/format_test.go index 5be5fca..e842034 100644 --- a/internal/format/format_test.go +++ b/format/format_test.go @@ -4,8 +4,8 @@ import ( "strings" "testing" - "github.com/krypsis-io/wick/internal/detect" - "github.com/krypsis-io/wick/internal/redact" + "github.com/krypsis-io/wick/detect" + "github.com/krypsis-io/wick/redact" ) func newDetector(t *testing.T) *detect.Detector { @@ -42,7 +42,7 @@ func TestDetectFormat(t *testing.T) { func TestProcessJSON(t *testing.T) { d := newDetector(t) input := `{"api_key": "AKIAZ5GMHYJKLMNOPQRS", "name": "test"}` - output, findings := ProcessJSON(input, d, redact.StyleRedacted) + output, findings := ProcessJSON(input, d, redact.Redacted) if len(findings) == 0 { t.Fatal("expected findings") @@ -61,7 +61,7 @@ func TestProcessJSON(t *testing.T) { func TestProcessEnv(t *testing.T) { d := newDetector(t) input := "# Config\nAPI_KEY=AKIAZ5GMHYJKLMNOPQRS\nDB_NAME=mydb" - output, findings := ProcessEnv(input, d, redact.StyleRedacted) + output, findings := ProcessEnv(input, d, redact.Redacted) if len(findings) == 0 { t.Fatal("expected findings") @@ -80,7 +80,7 @@ func TestProcessEnv(t *testing.T) { func TestProcessPlain(t *testing.T) { d := newDetector(t) input := "Contact admin@acme.com from 10.0.1.42" - output, findings := ProcessPlain(input, d, redact.StyleRedacted) + output, findings := ProcessPlain(input, d, redact.Redacted) if len(findings) < 2 { t.Fatalf("expected at least 2 findings, got %d", len(findings)) diff --git a/internal/format/json.go b/format/json.go similarity index 68% rename from internal/format/json.go rename to format/json.go index 50eaebd..75b0f78 100644 --- a/internal/format/json.go +++ b/format/json.go @@ -4,13 +4,13 @@ import ( "encoding/json" "strings" - "github.com/krypsis-io/wick/internal/detect" - "github.com/krypsis-io/wick/internal/redact" + "github.com/krypsis-io/wick/detect" + "github.com/krypsis-io/wick/redact" ) // redactString detects and redacts secrets/PII in a string value line by line, // so that multi-line strings have correct byte offsets for redaction. -func redactString(val string, d *detect.Detector, style redact.Style, findings *[]detect.Finding) (string, bool) { +func redactString(val string, d *detect.Detector, replacer redact.Replacer, findings *[]detect.Finding) (string, bool) { lines := strings.Split(val, "\n") changed := false for i, line := range lines { @@ -20,7 +20,7 @@ func redactString(val string, d *detect.Detector, style redact.Style, findings * found[j].Line = i + 1 } *findings = append(*findings, found...) - lines[i] = redact.Redact(line, found, style) + lines[i] = redact.Redact(line, found, replacer) changed = true } } @@ -31,17 +31,17 @@ func redactString(val string, d *detect.Detector, style redact.Style, findings * } // ProcessJSON parses JSON, redacts string values, and preserves structure. -func ProcessJSON(input string, detector *detect.Detector, style redact.Style) (string, []detect.Finding) { +func ProcessJSON(input string, detector *detect.Detector, replacer redact.Replacer) (string, []detect.Finding) { var data any dec := json.NewDecoder(strings.NewReader(input)) dec.UseNumber() if err := dec.Decode(&data); err != nil { // Fall back to plaintext if JSON parsing fails. - return ProcessPlain(input, detector, style) + return ProcessPlain(input, detector, replacer) } var allFindings []detect.Finding - redacted := walkJSON(data, detector, style, &allFindings) + redacted := walkJSON(data, detector, replacer, &allFindings) indent := indentOf(input) var ( @@ -54,27 +54,27 @@ func ProcessJSON(input string, detector *detect.Detector, style redact.Style) (s out, err = json.MarshalIndent(redacted, "", indent) } if err != nil { - return ProcessPlain(input, detector, style) + return ProcessPlain(input, detector, replacer) } return string(out), allFindings } -func walkJSON(v any, d *detect.Detector, style redact.Style, findings *[]detect.Finding) any { +func walkJSON(v any, d *detect.Detector, replacer redact.Replacer, findings *[]detect.Finding) any { switch val := v.(type) { case map[string]any: result := make(map[string]any, len(val)) for k, child := range val { - result[k] = walkJSON(child, d, style, findings) + result[k] = walkJSON(child, d, replacer, findings) } return result case []any: result := make([]any, len(val)) for i, child := range val { - result[i] = walkJSON(child, d, style, findings) + result[i] = walkJSON(child, d, replacer, findings) } return result case string: - if result, changed := redactString(val, d, style, findings); changed { + if result, changed := redactString(val, d, replacer, findings); changed { return result } return val diff --git a/internal/format/plain.go b/format/plain.go similarity index 86% rename from internal/format/plain.go rename to format/plain.go index a8d3691..8c9cb70 100644 --- a/internal/format/plain.go +++ b/format/plain.go @@ -3,12 +3,12 @@ package format import ( "strings" - "github.com/krypsis-io/wick/internal/detect" - "github.com/krypsis-io/wick/internal/redact" + "github.com/krypsis-io/wick/detect" + "github.com/krypsis-io/wick/redact" ) // ProcessPlain detects and redacts secrets/PII line by line. -func ProcessPlain(input string, detector *detect.Detector, style redact.Style) (string, []detect.Finding) { +func ProcessPlain(input string, detector *detect.Detector, replacer redact.Replacer) (string, []detect.Finding) { var allFindings []detect.Finding // First pass: multiline rules against the full input (e.g. PEM private key blocks). @@ -16,7 +16,7 @@ func ProcessPlain(input string, detector *detect.Detector, style redact.Style) ( multiFindings := detector.DetectMultiline(input) working := input if len(multiFindings) > 0 { - working = redact.Redact(input, multiFindings, style) + working = redact.Redact(input, multiFindings, replacer) // Normalize multiline findings from absolute input offsets to per-line relative // offsets so all entries in allFindings share the same coordinate semantics as // the per-line findings appended below. End is clamped to the end of the @@ -47,7 +47,7 @@ func ProcessPlain(input string, detector *detect.Detector, style redact.Style) ( findings[j].Line = i + 1 } allFindings = append(allFindings, findings...) - result[i] = redact.Redact(line, findings, style) + result[i] = redact.Redact(line, findings, replacer) } return strings.Join(result, "\n"), allFindings diff --git a/format/process.go b/format/process.go new file mode 100644 index 0000000..de185ca --- /dev/null +++ b/format/process.go @@ -0,0 +1,21 @@ +package format + +import ( + "github.com/krypsis-io/wick/detect" + "github.com/krypsis-io/wick/redact" +) + +// Process auto-detects the input format and applies the appropriate redaction strategy. +func Process(input string, detector *detect.Detector, replacer redact.Replacer) (string, []detect.Finding) { + detected := Detect(input) + switch detected { + case FormatJSON: + return ProcessJSON(input, detector, replacer) + case FormatYAML: + return ProcessYAML(input, detector, replacer) + case FormatEnv: + return ProcessEnv(input, detector, replacer) + default: + return ProcessPlain(input, detector, replacer) + } +} diff --git a/internal/format/yaml.go b/format/yaml.go similarity index 55% rename from internal/format/yaml.go rename to format/yaml.go index 589df53..7b38d99 100644 --- a/internal/format/yaml.go +++ b/format/yaml.go @@ -3,52 +3,52 @@ package format import ( "bytes" - "github.com/krypsis-io/wick/internal/detect" - "github.com/krypsis-io/wick/internal/redact" + "github.com/krypsis-io/wick/detect" + "github.com/krypsis-io/wick/redact" "gopkg.in/yaml.v3" ) // ProcessYAML parses YAML, redacts string values, and preserves structure/comments. -func ProcessYAML(input string, detector *detect.Detector, style redact.Style) (string, []detect.Finding) { +func ProcessYAML(input string, detector *detect.Detector, replacer redact.Replacer) (string, []detect.Finding) { var doc yaml.Node if err := yaml.Unmarshal([]byte(input), &doc); err != nil { - return ProcessPlain(input, detector, style) + return ProcessPlain(input, detector, replacer) } var allFindings []detect.Finding - walkYAML(&doc, detector, style, &allFindings) + walkYAML(&doc, detector, replacer, &allFindings) var buf bytes.Buffer enc := yaml.NewEncoder(&buf) enc.SetIndent(2) if err := enc.Encode(&doc); err != nil { - return ProcessPlain(input, detector, style) + return ProcessPlain(input, detector, replacer) } if err := enc.Close(); err != nil { - return ProcessPlain(input, detector, style) + return ProcessPlain(input, detector, replacer) } return buf.String(), allFindings } -func walkYAML(node *yaml.Node, d *detect.Detector, style redact.Style, findings *[]detect.Finding) { +func walkYAML(node *yaml.Node, d *detect.Detector, replacer redact.Replacer, findings *[]detect.Finding) { switch node.Kind { case yaml.DocumentNode: for _, child := range node.Content { - walkYAML(child, d, style, findings) + walkYAML(child, d, replacer, findings) } case yaml.MappingNode: // Content alternates: key, value, key, value... for i := 0; i+1 < len(node.Content); i += 2 { // Don't redact keys, only values. - walkYAML(node.Content[i+1], d, style, findings) + walkYAML(node.Content[i+1], d, replacer, findings) } case yaml.SequenceNode: for _, child := range node.Content { - walkYAML(child, d, style, findings) + walkYAML(child, d, replacer, findings) } case yaml.ScalarNode: if node.Tag == "!!str" || node.Tag == "" { - if result, changed := redactString(node.Value, d, style, findings); changed { + if result, changed := redactString(node.Value, d, replacer, findings); changed { node.Value = result } } diff --git a/internal/cmd/root.go b/internal/cmd/root.go index 71cee1f..7d18b20 100644 --- a/internal/cmd/root.go +++ b/internal/cmd/root.go @@ -9,21 +9,27 @@ import ( "path/filepath" "strings" + "github.com/krypsis-io/wick/detect" + "github.com/krypsis-io/wick/format" "github.com/krypsis-io/wick/internal/config" - "github.com/krypsis-io/wick/internal/detect" - "github.com/krypsis-io/wick/internal/format" "github.com/krypsis-io/wick/internal/output" - "github.com/krypsis-io/wick/internal/redact" + "github.com/krypsis-io/wick/redact" + wick "github.com/krypsis-io/wick" "github.com/spf13/cobra" ) var ( - flagFiles []string - flagDir string - flagOut string - flagStyle string - flagFormat string - flagSummary bool + flagFiles []string + flagDir string + flagOut string + flagStyle string + flagFormat string + flagSummary bool + flagReport bool + flagTokenize bool + flagRehydrate bool + flagKey string + flagTokenFile string ) var rootCmd = &cobra.Command{ @@ -40,9 +46,14 @@ func init() { rootCmd.Flags().StringSliceVar(&flagFiles, "file", nil, "input file(s) to redact") rootCmd.Flags().StringVar(&flagDir, "dir", "", "directory of files to redact") rootCmd.Flags().StringVar(&flagOut, "out", "", "output directory for --dir mode") - rootCmd.Flags().StringVar(&flagStyle, "style", "", "redaction style: redacted, stars, or custom=\"...\"") + rootCmd.Flags().StringVar(&flagStyle, "style", "", "redaction style: redacted, stars, hash, or custom=\"...\"") rootCmd.Flags().StringVar(&flagFormat, "format", "", "output format: text, json") rootCmd.Flags().BoolVar(&flagSummary, "summary", false, "print redaction summary to stderr") + rootCmd.Flags().BoolVar(&flagReport, "report", false, "print detailed per-finding report to stderr") + rootCmd.Flags().BoolVar(&flagTokenize, "tokenize", false, "redact with reversible tokens and write an encrypted token map") + rootCmd.Flags().BoolVar(&flagRehydrate, "rehydrate", false, "restore original values from a token map") + rootCmd.Flags().StringVar(&flagKey, "key", "", "base64-encoded AES-256 key for --tokenize / --rehydrate") + rootCmd.Flags().StringVar(&flagTokenFile, "token-file", ".wick-tokens.enc", "path to the encrypted token map file") } var errFindingsPresent = errors.New("findings present") @@ -70,15 +81,27 @@ func run(_ *cobra.Command, _ []string) error { return err } + // Rehydrate is a separate mode: read stdin, restore originals, done. + if flagRehydrate { + return runRehydrate() + } + cfg, err := config.Load() if err != nil { return fmt.Errorf("config: %w", err) } - style, err := resolveStyle(cfg) + // Tokenize mode: use reversible token replacer instead of normal style. + if flagTokenize { + return runTokenize(cfg) + } + + baseReplacer, err := resolveReplacer(cfg) if err != nil { return err } + // Apply per-pattern replacement overrides from custom patterns. + replacer := applyPerPatternReplacements(baseReplacer, cfg.CustomPatterns) detector, err := newDetector(cfg) if err != nil { @@ -96,7 +119,7 @@ func run(_ *cobra.Command, _ []string) error { out: flagOut, } - foundCount, err := executeRunMode(opts, detector, style, outputFormat) + foundCount, err := executeRunMode(opts, detector, replacer, outputFormat) if err != nil { return err } @@ -107,7 +130,116 @@ func run(_ *cobra.Command, _ []string) error { return nil } +func runTokenize(cfg *config.Config) error { + if len(flagFiles) > 0 || flagDir != "" { + return fmt.Errorf("--tokenize only supports stdin input") + } + + input, err := readStdin() + if err != nil { + return err + } + + // Resolve or generate the key. + keyStr := flagKey + if keyStr == "" { + var genErr error + keyStr, genErr = wick.GenerateKey() + if genErr != nil { + return genErr + } + fmt.Fprintf(os.Stderr, "wick: key: %s\n", keyStr) + } + + key, err := wick.DecodeKey(keyStr) + if err != nil { + return err + } + + var opts []wick.Option + if len(cfg.CustomPatterns) > 0 { + opts = append(opts, wick.WithCustomPatterns(cfg.CustomPatterns)) + } + if len(cfg.Allowlist) > 0 { + opts = append(opts, wick.WithAllowlist(cfg.Allowlist)) + } + if len(cfg.Blocklist) > 0 { + var blockPatterns []detect.CustomPattern + for _, b := range cfg.Blocklist { + blockPatterns = append(blockPatterns, detect.CustomPattern{ + Name: b.Category, + Regex: b.Pattern, + }) + } + opts = append(opts, wick.WithBlocklist(blockPatterns)) + } + + redacted, tm, err := wick.Dehydrate(input, key, opts...) + if err != nil { + return err + } + + if err := wick.SaveTokenMap(tm, key, flagTokenFile); err != nil { + return err + } + fmt.Fprintf(os.Stderr, "wick: token map written to %s\n", flagTokenFile) + + fmt.Print(redacted) + return nil +} + +func runRehydrate() error { + if flagKey == "" { + return fmt.Errorf("--rehydrate requires --key") + } + + key, err := wick.DecodeKey(flagKey) + if err != nil { + return err + } + + input, err := readStdin() + if err != nil { + return err + } + + tm, err := wick.LoadTokenMap(key, flagTokenFile) + if err != nil { + return err + } + + restored, err := wick.Rehydrate(input, tm) + if err != nil { + return err + } + + fmt.Print(restored) + return nil +} + +func readStdin() (string, error) { + stat, err := os.Stdin.Stat() + if err != nil { + return "", fmt.Errorf("stdin: %w", err) + } + if (stat.Mode() & os.ModeCharDevice) != 0 { + return "", fmt.Errorf("no input: pipe data to wick or use --file/--dir") + } + reader := io.LimitReader(os.Stdin, maxStdinBytes+1) + data, err := io.ReadAll(reader) + if err != nil { + return "", fmt.Errorf("reading stdin: %w", err) + } + if len(data) > maxStdinBytes { + return "", fmt.Errorf("stdin exceeds maximum size of %d bytes", maxStdinBytes) + } + return string(data), nil +} + func validateRunFlags() error { + if flagTokenize && flagRehydrate { + return fmt.Errorf("--tokenize and --rehydrate are mutually exclusive") + } if flagDir != "" && len(flagFiles) > 0 { return fmt.Errorf("--dir and --file are mutually exclusive") } @@ -122,59 +254,72 @@ func newDetector(cfg *config.Config) (*detect.Detector, error) { if err != nil { return nil, fmt.Errorf("detector: %w", err) } - if len(cfg.CustomPatterns) == 0 { - return detector, nil + + if cfg.RulesFile != "" { + if err := detector.AddRulesFile(cfg.RulesFile); err != nil { + return nil, fmt.Errorf("rules_file: %w", err) + } } - if err := detector.SetCustomPatterns(cfg.CustomPatterns); err != nil { - return nil, fmt.Errorf("config: %w", err) + + if len(cfg.DisableRules) > 0 { + detector.DisableRules(cfg.DisableRules) } + + // Merge custom patterns and blocklist entries. + var allPatterns []detect.CustomPattern + allPatterns = append(allPatterns, cfg.CustomPatterns...) + for _, b := range cfg.Blocklist { + allPatterns = append(allPatterns, detect.CustomPattern{ + Name: b.Category, + Regex: b.Pattern, + }) + } + if len(allPatterns) > 0 { + if err := detector.SetCustomPatterns(allPatterns); err != nil { + return nil, fmt.Errorf("config: %w", err) + } + } + + if len(cfg.Allowlist) > 0 { + if err := detector.SetAllowlist(cfg.Allowlist); err != nil { + return nil, fmt.Errorf("config allowlist: %w", err) + } + } + return detector, nil } -func executeRunMode(opts runOptions, detector *detect.Detector, style redact.Style, outputFormat string) (int, error) { +func executeRunMode(opts runOptions, detector *detect.Detector, replacer redact.Replacer, outputFormat string) (int, error) { switch { case opts.dir != "": - return executeDirMode(detector, style, opts.dir, opts.out, outputFormat) + return executeDirMode(detector, replacer, opts.dir, opts.out, outputFormat) case len(opts.files) > 0: - return processFiles(opts.files, detector, style, outputFormat) + return processFiles(opts.files, detector, replacer, outputFormat) default: - return processStdin(detector, style, outputFormat) + return processStdin(detector, replacer, outputFormat) } } -func executeDirMode(detector *detect.Detector, style redact.Style, dir, out, outputFormat string) (int, error) { +func executeDirMode(detector *detect.Detector, replacer redact.Replacer, dir, out, outputFormat string) (int, error) { if out == "" { return 0, fmt.Errorf("--out is required with --dir") } if outputFormat == "json" { return 0, fmt.Errorf("--format json is not supported with --dir") } - return processDir(dir, out, detector, style, outputFormat) + return processDir(dir, out, detector, replacer, outputFormat) } -func processStdin(detector *detect.Detector, style redact.Style, outputFormat string) (int, error) { - stat, err := os.Stdin.Stat() +func processStdin(detector *detect.Detector, replacer redact.Replacer, outputFormat string) (int, error) { + data, err := readStdin() if err != nil { - return 0, fmt.Errorf("stdin: %w", err) - } - if (stat.Mode() & os.ModeCharDevice) != 0 { - return 0, fmt.Errorf("no input: pipe data to wick or use --file/--dir") + return 0, err } - - reader := io.LimitReader(os.Stdin, maxStdinBytes+1) - data, err := io.ReadAll(reader) - if err != nil { - return 0, fmt.Errorf("reading stdin: %w", err) - } - if len(data) > maxStdinBytes { - return 0, fmt.Errorf("stdin exceeds maximum size of %d bytes", maxStdinBytes) - } - - return processInput(string(data), detector, style, outputFormat) + return processInput(data, detector, replacer, outputFormat) } -func processInput(input string, d *detect.Detector, style redact.Style, outputFmt string) (int, error) { - redacted, findings := format.Process(input, d, style) +func processInput(input string, d *detect.Detector, replacer redact.Replacer, outputFmt string) (int, error) { + redacted, findings := format.Process(input, d, replacer) if outputFmt == "json" { jsonOut, err := output.JSON(redacted, findings) @@ -185,17 +330,20 @@ func processInput(input string, d *detect.Detector, style redact.Style, outputFm } else { // format.Process already redacted the text; for TTY colorization // we re-process from the original input. - fmt.Print(output.Terminal(input, redacted, findings, style)) + fmt.Print(output.Terminal(input, redacted, findings, replacer)) } if flagSummary { output.Summary(os.Stderr, findings) } + if flagReport { + output.Report(os.Stderr, findings) + } return len(findings), nil } -func processFiles(files []string, d *detect.Detector, style redact.Style, outputFmt string) (int, error) { +func processFiles(files []string, d *detect.Detector, replacer redact.Replacer, outputFmt string) (int, error) { total := 0 for _, f := range files { info, err := os.Stat(f) @@ -209,7 +357,7 @@ func processFiles(files []string, d *detect.Detector, style redact.Style, output if err != nil { return total, fmt.Errorf("reading %s: %w", f, err) } - n, err := processInput(string(data), d, style, outputFmt) + n, err := processInput(string(data), d, replacer, outputFmt) total += n if err != nil { return total, err @@ -218,7 +366,7 @@ func processFiles(files []string, d *detect.Detector, style redact.Style, output return total, nil } -func processDir(dir, outDir string, d *detect.Detector, style redact.Style, _ string) (int, error) { +func processDir(dir, outDir string, d *detect.Detector, replacer redact.Replacer, _ string) (int, error) { dirAbs, err := filepath.Abs(dir) if err != nil { return 0, fmt.Errorf("resolving dir path: %w", err) @@ -255,7 +403,7 @@ func processDir(dir, outDir string, d *detect.Detector, style redact.Style, _ st return err } - redacted, findings := format.Process(string(data), d, style) + redacted, findings := format.Process(string(data), d, replacer) total += len(findings) if err := os.WriteFile(outPath, []byte(redacted), info.Mode()); err != nil { @@ -271,22 +419,33 @@ func processDir(dir, outDir string, d *detect.Detector, style redact.Style, _ st return total, err } -func resolveStyle(cfg *config.Config) (redact.Style, error) { +func resolveReplacer(cfg *config.Config) (redact.Replacer, error) { s := cfg.Style if flagStyle != "" { s = flagStyle } switch { case s == "" || s == "redacted": - return redact.StyleRedacted, nil + return redact.Redacted, nil case s == "stars": - return redact.StyleStars, nil + return redact.Stars, nil + case s == "hash": + return redact.Hash, nil case strings.HasPrefix(s, "custom="): - redact.SetCustomReplacement(strings.TrimPrefix(s, "custom=")) - return redact.CustomStyle(), nil + return redact.Custom(strings.TrimPrefix(s, "custom=")), nil default: - return 0, fmt.Errorf("unknown style %q: use redacted, stars, or custom=\"...\"", s) + return nil, fmt.Errorf("unknown style %q: use redacted, stars, hash, or custom=\"...\"", s) + } +} + +func applyPerPatternReplacements(base redact.Replacer, patterns []detect.CustomPattern) redact.Replacer { + overrides := make(map[string]string) + for _, p := range patterns { + if p.Replacement != "" { + overrides[p.Name] = p.Replacement + } } + return redact.PerRule(base, overrides) } func resolveFormat(cfg *config.Config) (string, error) { diff --git a/internal/config/config.go b/internal/config/config.go index 38ad5e7..083cfc3 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -5,15 +5,27 @@ import ( "os" "path/filepath" - "github.com/krypsis-io/wick/internal/detect" + "github.com/krypsis-io/wick/detect" "gopkg.in/yaml.v3" ) // Config represents the merged configuration from all sources. type Config struct { - Style string `yaml:"style"` - CustomPatterns []detect.CustomPattern `yaml:"patterns"` - Format string `yaml:"format"` + Style string `yaml:"style"` + CustomPatterns []detect.CustomPattern `yaml:"patterns"` + Format string `yaml:"format"` + Allowlist []detect.AllowlistEntry `yaml:"allowlist"` + Blocklist []BlocklistEntry `yaml:"blocklist"` + RulesFile string `yaml:"rules_file"` + DisableRules []string `yaml:"disable_rules"` +} + +// BlocklistEntry defines a pattern that is always redacted, even if not matched +// by built-in rules. It is compiled as a custom detection pattern. +type BlocklistEntry struct { + Pattern string `yaml:"pattern"` + Category string `yaml:"category,omitempty"` + Reason string `yaml:"reason,omitempty"` } // Load reads configuration from global (~/.config/wick/config.yaml) and @@ -47,6 +59,12 @@ func Load() (*Config, error) { cfg.Format = proj.Format } cfg.CustomPatterns = append(cfg.CustomPatterns, proj.CustomPatterns...) + cfg.Allowlist = append(cfg.Allowlist, proj.Allowlist...) + cfg.Blocklist = append(cfg.Blocklist, proj.Blocklist...) + if proj.RulesFile != "" { + cfg.RulesFile = proj.RulesFile + } + cfg.DisableRules = append(cfg.DisableRules, proj.DisableRules...) } return cfg, nil diff --git a/internal/format/process.go b/internal/format/process.go deleted file mode 100644 index 60bfdca..0000000 --- a/internal/format/process.go +++ /dev/null @@ -1,21 +0,0 @@ -package format - -import ( - "github.com/krypsis-io/wick/internal/detect" - "github.com/krypsis-io/wick/internal/redact" -) - -// Process auto-detects the input format and applies the appropriate redaction strategy. -func Process(input string, detector *detect.Detector, style redact.Style) (string, []detect.Finding) { - detected := Detect(input) - switch detected { - case FormatJSON: - return ProcessJSON(input, detector, style) - case FormatYAML: - return ProcessYAML(input, detector, style) - case FormatEnv: - return ProcessEnv(input, detector, style) - default: - return ProcessPlain(input, detector, style) - } -} diff --git a/internal/output/json.go b/internal/output/json.go index bb08664..707fc05 100644 --- a/internal/output/json.go +++ b/internal/output/json.go @@ -4,7 +4,7 @@ package output import ( "encoding/json" - "github.com/krypsis-io/wick/internal/detect" + "github.com/krypsis-io/wick/detect" ) type jsonOutput struct { diff --git a/internal/output/report.go b/internal/output/report.go new file mode 100644 index 0000000..6e7cfc1 --- /dev/null +++ b/internal/output/report.go @@ -0,0 +1,37 @@ +package output + +import ( + "fmt" + "io" + + "github.com/krypsis-io/wick/detect" +) + +const maxValueLen = 40 + +// Report writes a detailed per-finding report to w (typically stderr). +// Each finding gets its own line with line number, column, category, rule ID, +// and a truncated preview of the matched value. +// Output goes to stderr so it does not interfere with the redacted stdout. +func Report(w io.Writer, findings []detect.Finding) { + if len(findings) == 0 { + return + } + _, _ = fmt.Fprintf(w, "wick: %d finding(s)\n", len(findings)) + for _, f := range findings { + _, _ = fmt.Fprintf(w, " line %d col %d %-8s %-30s %s\n", + f.Line, + f.Start+1, // 1-based column + f.Category, + f.RuleID, + truncate(f.Value, maxValueLen), + ) + } +} + +func truncate(s string, max int) string { + if len(s) <= max { + return s + } + return s[:max-3] + "..." +} diff --git a/internal/output/report_test.go b/internal/output/report_test.go new file mode 100644 index 0000000..12f2472 --- /dev/null +++ b/internal/output/report_test.go @@ -0,0 +1,78 @@ +package output + +import ( + "strings" + "testing" + + "github.com/krypsis-io/wick/detect" +) + +func TestReport_Empty(t *testing.T) { + var buf strings.Builder + Report(&buf, nil) + if buf.String() != "" { + t.Errorf("expected no output for empty findings, got %q", buf.String()) + } +} + +func TestReport_SingleFinding(t *testing.T) { + findings := []detect.Finding{ + {Category: "pii", RuleID: "email", Value: "admin@acme.com", Line: 3, Start: 9}, + } + var buf strings.Builder + Report(&buf, findings) + out := buf.String() + + if !strings.Contains(out, "1 finding") { + t.Errorf("expected finding count in output: %q", out) + } + if !strings.Contains(out, "line 3") { + t.Errorf("expected line number in output: %q", out) + } + if !strings.Contains(out, "col 10") { // Start+1 + t.Errorf("expected 1-based column in output: %q", out) + } + if !strings.Contains(out, "pii") { + t.Errorf("expected category in output: %q", out) + } + if !strings.Contains(out, "email") { + t.Errorf("expected rule ID in output: %q", out) + } + if !strings.Contains(out, "admin@acme.com") { + t.Errorf("expected value in output: %q", out) + } +} + +func TestReport_MultipleFindings(t *testing.T) { + findings := []detect.Finding{ + {Category: "pii", RuleID: "email", Value: "admin@acme.com", Line: 1, Start: 0}, + {Category: "secret", RuleID: "aws-access-token", Value: "AKIAZ5GMHYJKLMNOPQRS", Line: 2, Start: 4}, + } + var buf strings.Builder + Report(&buf, findings) + out := buf.String() + + if !strings.Contains(out, "2 finding") { + t.Errorf("expected 2 findings in header: %q", out) + } + if !strings.Contains(out, "email") || !strings.Contains(out, "aws-access-token") { + t.Errorf("expected both rule IDs: %q", out) + } +} + +func TestReport_ValueTruncation(t *testing.T) { + long := strings.Repeat("a", 50) + findings := []detect.Finding{ + {Category: "custom", RuleID: "test", Value: long, Line: 1, Start: 0}, + } + var buf strings.Builder + Report(&buf, findings) + out := buf.String() + + if strings.Contains(out, long) { + t.Errorf("long value should be truncated: %q", out) + } + if !strings.Contains(out, "...") { + t.Errorf("truncated value should end with ...: %q", out) + } +} diff --git a/internal/output/summary.go b/internal/output/summary.go index 5fae1da..7fb0eef 100644 --- a/internal/output/summary.go +++ b/internal/output/summary.go @@ -6,7 +6,7 @@ import ( "sort" "strings" - "github.com/krypsis-io/wick/internal/detect" + "github.com/krypsis-io/wick/detect" ) // Summary writes a human-readable summary of findings to the given writer (typically stderr). diff --git a/internal/output/terminal.go b/internal/output/terminal.go index 8a456d1..7e219e3 100644 --- a/internal/output/terminal.go +++ b/internal/output/terminal.go @@ -5,8 +5,8 @@ import ( "strings" "charm.land/lipgloss/v2" - "github.com/krypsis-io/wick/internal/detect" - "github.com/krypsis-io/wick/internal/redact" + "github.com/krypsis-io/wick/detect" + "github.com/krypsis-io/wick/redact" ) var redactedStyle = lipgloss.NewStyle(). @@ -17,11 +17,11 @@ var redactedStyle = lipgloss.NewStyle(). // Terminal returns the output string. When stdout is a TTY, redacted values // in the original text are highlighted with color. When piped, returns the // pre-redacted plain text. -func Terminal(original, redacted string, findings []detect.Finding, style redact.Style) string { +func Terminal(original, redacted string, findings []detect.Finding, replacer redact.Replacer) string { if !isTTY() || len(findings) == 0 { return redacted } - return colorize(original, findings, style) + return colorize(original, findings, replacer) } func isTTY() bool { @@ -34,58 +34,39 @@ func isTTY() bool { // colorize rebuilds the output from the original text, replacing finding // ranges with color-highlighted replacement strings. -func colorize(input string, findings []detect.Finding, style redact.Style) string { +func colorize(input string, findings []detect.Finding, replacer redact.Replacer) string { lines := strings.Split(input, "\n") - replacement := style.Replacement() - colored := redactedStyle.Render(replacement) for i, line := range lines { lineFindings := findingsForLine(findings, i+1) if len(lineFindings) == 0 { continue } - lines[i] = replaceLine(line, lineFindings, colored) + lines[i] = replaceLine(line, lineFindings, replacer) } return strings.Join(lines, "\n") } -func replaceLine(line string, findings []detect.Finding, colored string) string { - spans := make([]span, len(findings)) - for i, f := range findings { - spans[i] = span{f.Start, f.End} +func replaceLine(line string, findings []detect.Finding, replacer redact.Replacer) string { + type coloredSpan struct { + start int + end int + colored string } - merged := mergeSpans(spans) - var result strings.Builder - prev := 0 - for _, s := range merged { - if s.start >= len(line) { - continue - } - end := s.end - if end > len(line) { - end = len(line) - } - result.WriteString(line[prev:s.start]) - result.WriteString(colored) - prev = end + spans := make([]coloredSpan, len(findings)) + for i, f := range findings { + replacement := replacer.Replace(line[f.Start:f.End], f) + spans[i] = coloredSpan{f.Start, f.End, redactedStyle.Render(replacement)} } - result.WriteString(line[prev:]) - return result.String() -} - -type span struct{ start, end int } -func mergeSpans(spans []span) []span { - if len(spans) == 0 { - return nil - } + // Sort and merge overlapping spans. for i := 1; i < len(spans); i++ { for j := i; j > 0 && spans[j].start < spans[j-1].start; j-- { spans[j], spans[j-1] = spans[j-1], spans[j] } } - merged := []span{spans[0]} + merged := []coloredSpan{spans[0]} for _, s := range spans[1:] { last := &merged[len(merged)-1] if s.start <= last.end { @@ -96,7 +77,23 @@ func mergeSpans(spans []span) []span { merged = append(merged, s) } } - return merged + + var result strings.Builder + prev := 0 + for _, s := range merged { + if s.start >= len(line) { + continue + } + end := s.end + if end > len(line) { + end = len(line) + } + result.WriteString(line[prev:s.start]) + result.WriteString(s.colored) + prev = end + } + result.WriteString(line[prev:]) + return result.String() } func findingsForLine(findings []detect.Finding, lineNum int) []detect.Finding { diff --git a/internal/redact/styles.go b/internal/redact/styles.go deleted file mode 100644 index b72c8aa..0000000 --- a/internal/redact/styles.go +++ /dev/null @@ -1,34 +0,0 @@ -package redact - -// Style determines how redacted values are replaced. -type Style int - -// Built-in redaction styles. -const ( - StyleRedacted Style = iota // [REDACTED] - StyleStars // *** -) - -// CustomStyle returns a Style that uses a custom replacement string. -// Stored as a negative value to distinguish from named styles. -// Use Replacement() to get the actual string. -func CustomStyle() Style { return Style(-1) } - -var customReplacement string - -// SetCustomReplacement sets the string used by CustomStyle. -func SetCustomReplacement(s string) { - customReplacement = s -} - -// Replacement returns the replacement string for a style. -func (s Style) Replacement() string { - switch s { - case StyleStars: - return "***" - case Style(-1): - return customReplacement - default: - return "[REDACTED]" - } -} diff --git a/options.go b/options.go new file mode 100644 index 0000000..82e78c1 --- /dev/null +++ b/options.go @@ -0,0 +1,72 @@ +package wick + +import ( + "github.com/krypsis-io/wick/detect" + "github.com/krypsis-io/wick/redact" +) + +// config holds the resolved options for a Redact call. +type config struct { + replacer redact.Replacer + customPatterns []detect.CustomPattern + allowlist []detect.AllowlistEntry + blocklist []detect.CustomPattern + rulesFile string + disableRules []string +} + +// Option configures a Redact call. +type Option interface { + apply(*config) +} + +type optionFunc func(*config) + +func (f optionFunc) apply(c *config) { f(c) } + +// WithReplacer sets the replacement strategy for redacted values. +// Use redact.Redacted, redact.Stars, redact.Custom("..."), or a custom Replacer. +func WithReplacer(r redact.Replacer) Option { + return optionFunc(func(c *config) { + c.replacer = r + }) +} + +// WithCustomPatterns adds user-defined detection patterns. +func WithCustomPatterns(patterns []detect.CustomPattern) Option { + return optionFunc(func(c *config) { + c.customPatterns = append(c.customPatterns, patterns...) + }) +} + +// WithAllowlist adds known-safe patterns that will never be redacted. +// Each entry can be an exact string or a regex (set Regex: true). +func WithAllowlist(entries []detect.AllowlistEntry) Option { + return optionFunc(func(c *config) { + c.allowlist = append(c.allowlist, entries...) + }) +} + +// WithBlocklist adds patterns that are always redacted, even if not matched +// by built-in rules. Each entry is treated as a custom detection pattern. +func WithBlocklist(entries []detect.CustomPattern) Option { + return optionFunc(func(c *config) { + c.blocklist = append(c.blocklist, entries...) + }) +} + +// WithRulesFile loads additional secret detection rules from a +// Gitleaks-compatible TOML file, appending them to the built-in rules. +func WithRulesFile(path string) Option { + return optionFunc(func(c *config) { + c.rulesFile = path + }) +} + +// WithDisabledRules removes the named rules from the detector. +// Use this to suppress built-in rules that produce false positives. +func WithDisabledRules(ids []string) Option { + return optionFunc(func(c *config) { + c.disableRules = append(c.disableRules, ids...) + }) +} diff --git a/redact/hash.go b/redact/hash.go new file mode 100644 index 0000000..0dd2c52 --- /dev/null +++ b/redact/hash.go @@ -0,0 +1,29 @@ +package redact + +import ( + "crypto/sha256" + "fmt" + "strings" + + "github.com/krypsis-io/wick/detect" +) + +// Hash is a Replacer that produces deterministic, one-way pseudonymized output. +// Each value is replaced with a bracketed tag containing the detection category +// and a truncated SHA-256 hash of the original value: +// +// admin@acme.com → [EMAIL:a1b2c3d4] +// 10.0.1.42 → [IPV4:e5f6a7b8] +// +// The same input value always produces the same replacement, enabling log +// correlation without exposing the underlying data. +var Hash Replacer = hashReplacer{} + +type hashReplacer struct{} + +func (hashReplacer) Replace(value string, finding detect.Finding) string { + sum := sha256.Sum256([]byte(value)) + h := fmt.Sprintf("%x", sum[:4]) // 8 hex chars from first 4 bytes + tag := strings.ToUpper(finding.RuleID) + return fmt.Sprintf("[%s:%s]", tag, h) +} diff --git a/redact/hash_test.go b/redact/hash_test.go new file mode 100644 index 0000000..10e18d8 --- /dev/null +++ b/redact/hash_test.go @@ -0,0 +1,80 @@ +package redact + +import ( + "strings" + "testing" + + "github.com/krypsis-io/wick/detect" +) + +func TestHashReplacer_Format(t *testing.T) { + f := detect.Finding{Category: "pii", RuleID: "email", Value: "admin@acme.com"} + result := Hash.Replace("admin@acme.com", f) + + if !strings.HasPrefix(result, "[EMAIL:") { + t.Errorf("expected [EMAIL:...] format, got %q", result) + } + if !strings.HasSuffix(result, "]") { + t.Errorf("expected closing ], got %q", result) + } + // Tag should be uppercase RuleID, hash should be 8 hex chars. + // Format: [EMAIL:a1b2c3d4] + inner := result[1 : len(result)-1] // strip [ and ] + parts := strings.SplitN(inner, ":", 2) + if len(parts) != 2 { + t.Fatalf("expected TAG:HASH format, got %q", inner) + } + if parts[0] != "EMAIL" { + t.Errorf("expected tag EMAIL, got %q", parts[0]) + } + if len(parts[1]) != 8 { + t.Errorf("expected 8-char hash, got %q (len %d)", parts[1], len(parts[1])) + } +} + +func TestHashReplacer_Deterministic(t *testing.T) { + f := detect.Finding{Category: "pii", RuleID: "email"} + a := Hash.Replace("admin@acme.com", f) + b := Hash.Replace("admin@acme.com", f) + if a != b { + t.Errorf("hash not deterministic: %q != %q", a, b) + } +} + +func TestHashReplacer_DifferentValues(t *testing.T) { + f := detect.Finding{Category: "pii", RuleID: "email"} + a := Hash.Replace("admin@acme.com", f) + b := Hash.Replace("other@acme.com", f) + if a == b { + t.Errorf("different values produced same hash: %q", a) + } +} + +func TestHashReplacer_RuleIDInTag(t *testing.T) { + tests := []struct { + ruleID string + wantTag string + }{ + {"email", "EMAIL"}, + {"ipv4", "IPV4"}, + {"us-ssn", "US-SSN"}, + {"aws-access-token", "AWS-ACCESS-TOKEN"}, + } + for _, tt := range tests { + f := detect.Finding{RuleID: tt.ruleID} + result := Hash.Replace("somevalue", f) + if !strings.HasPrefix(result, "["+tt.wantTag+":") { + t.Errorf("ruleID %q: expected tag %q, got %q", tt.ruleID, tt.wantTag, result) + } + } +} + +func TestHashReplacer_OneWay(t *testing.T) { + // Just verifies the output does not contain the original value. + f := detect.Finding{RuleID: "email"} + value := "supersecret@example.com" + result := Hash.Replace(value, f) + if strings.Contains(result, value) { + t.Errorf("hash output should not contain original value: %q", result) + } +} diff --git a/redact/perrule.go b/redact/perrule.go new file mode 100644 index 0000000..efd8b42 --- /dev/null +++ b/redact/perrule.go @@ -0,0 +1,27 @@ +package redact + +import "github.com/krypsis-io/wick/detect" + +// PerRule wraps a fallback Replacer and applies per-rule replacement overrides. +// If a finding's RuleID has an entry in the overrides map, that string is used +// instead of the fallback replacer's output. +// +// This is used to honour per-pattern replacement fields defined in .wick.yaml. +func PerRule(fallback Replacer, overrides map[string]string) Replacer { + if len(overrides) == 0 { + return fallback + } + return perRuleReplacer{fallback: fallback, overrides: overrides} +} + +type perRuleReplacer struct { + fallback Replacer + overrides map[string]string +} + +func (r perRuleReplacer) Replace(value string, finding detect.Finding) string { + if s, ok := r.overrides[finding.RuleID]; ok { + return s + } + return r.fallback.Replace(value, finding) +} diff --git a/redact/perrule_test.go b/redact/perrule_test.go new file mode 100644 index 0000000..98bc179 --- /dev/null +++ b/redact/perrule_test.go @@ -0,0 +1,40 @@ +package redact + +import ( + "testing" + + "github.com/krypsis-io/wick/detect" +) + +func TestPerRule_Override(t *testing.T) { + overrides := map[string]string{ + "internal-code": "[INTERNAL]", + } + r := PerRule(Redacted, overrides) + f := detect.Finding{RuleID: "internal-code"} + got := r.Replace("ACME-1234", f) + if got != "[INTERNAL]" { + t.Errorf("expected [INTERNAL], got %q", got) + } +} + +func TestPerRule_FallsBackToBase(t *testing.T) { + overrides := map[string]string{ + "internal-code": "[INTERNAL]", + } + r := PerRule(Stars, overrides) + f := detect.Finding{RuleID: "email"} + got := r.Replace("admin@acme.com", f) + if got != "***" { + t.Errorf("expected fallback ***, got %q", got) + } +} + +func TestPerRule_EmptyOverrides(t *testing.T) { + r := PerRule(Redacted, nil) + f := detect.Finding{RuleID: "email"} + got := r.Replace("admin@acme.com", f) + if got != "[REDACTED]" { + t.Errorf("expected [REDACTED], got %q", got) + } +} diff --git a/internal/redact/redactor.go b/redact/redactor.go similarity index 51% rename from internal/redact/redactor.go rename to redact/redactor.go index c8c014c..3b49b0d 100644 --- a/internal/redact/redactor.go +++ b/redact/redactor.go @@ -4,49 +4,37 @@ package redact import ( "sort" - "github.com/krypsis-io/wick/internal/detect" + "github.com/krypsis-io/wick/detect" ) -// span represents a byte range to redact within a single line. -type span struct { - start int - end int -} - -// Redact replaces all finding matches in the input line with the style's replacement string. +// Redact replaces all finding matches in the input line using the given Replacer. // Findings must all belong to the same line. Overlapping ranges are merged. -func Redact(line string, findings []detect.Finding, style Style) string { +func Redact(line string, findings []detect.Finding, replacer Replacer) string { if len(findings) == 0 { return line } - spans := make([]span, len(findings)) - for i, f := range findings { - spans[i] = span{start: f.Start, end: f.End} + // Build replacement strings per finding before merging spans. + type replacementSpan struct { + start int + end int + replacement string } - merged := mergeSpans(spans) - replacement := style.Replacement() - var result []byte - prev := 0 - for _, s := range merged { - result = append(result, line[prev:s.start]...) - result = append(result, replacement...) - prev = s.end + spans := make([]replacementSpan, len(findings)) + for i, f := range findings { + spans[i] = replacementSpan{ + start: f.Start, + end: f.End, + replacement: replacer.Replace(line[f.Start:f.End], f), + } } - result = append(result, line[prev:]...) - return string(result) -} -// mergeSpans sorts and merges overlapping byte ranges. -func mergeSpans(spans []span) []span { - if len(spans) == 0 { - return nil - } + // Sort and merge overlapping spans. For merged spans, use the first replacement. sort.Slice(spans, func(i, j int) bool { return spans[i].start < spans[j].start }) - merged := []span{spans[0]} + merged := []replacementSpan{spans[0]} for _, s := range spans[1:] { last := &merged[len(merged)-1] if s.start <= last.end { @@ -57,5 +45,15 @@ func mergeSpans(spans []span) []span { merged = append(merged, s) } } - return merged + + var result []byte + prev := 0 + for _, s := range merged { + result = append(result, line[prev:s.start]...) + result = append(result, s.replacement...) + prev = s.end + } + result = append(result, line[prev:]...) + return string(result) } + diff --git a/internal/redact/redactor_test.go b/redact/redactor_test.go similarity index 79% rename from internal/redact/redactor_test.go rename to redact/redactor_test.go index e9feedf..c1f6a50 100644 --- a/internal/redact/redactor_test.go +++ b/redact/redactor_test.go @@ -3,7 +3,7 @@ package redact import ( "testing" - "github.com/krypsis-io/wick/internal/detect" + "github.com/krypsis-io/wick/detect" ) func TestRedact_SingleFinding(t *testing.T) { @@ -11,7 +11,7 @@ func TestRedact_SingleFinding(t *testing.T) { findings := []detect.Finding{ {Start: 4, End: 24, Category: "secret", RuleID: "aws"}, } - got := Redact(line, findings, StyleRedacted) + got := Redact(line, findings, Redacted) want := "key=[REDACTED] done" if got != want { t.Errorf("got %q, want %q", got, want) @@ -23,7 +23,7 @@ func TestRedact_Stars(t *testing.T) { findings := []detect.Finding{ {Start: 7, End: 21, Category: "pii", RuleID: "email"}, } - got := Redact(line, findings, StyleStars) + got := Redact(line, findings, Stars) want := "email: ***" if got != want { t.Errorf("got %q, want %q", got, want) @@ -36,7 +36,7 @@ func TestRedact_Overlapping(t *testing.T) { {Start: 2, End: 6}, {Start: 4, End: 8}, } - got := Redact(line, findings, StyleRedacted) + got := Redact(line, findings, Redacted) want := "AB[REDACTED]IJ" if got != want { t.Errorf("got %q, want %q", got, want) @@ -45,19 +45,19 @@ func TestRedact_Overlapping(t *testing.T) { func TestRedact_NoFindings(t *testing.T) { line := "nothing here" - got := Redact(line, nil, StyleRedacted) + got := Redact(line, nil, Redacted) if got != line { t.Errorf("got %q, want %q", got, line) } } func TestRedact_CustomStyle(t *testing.T) { - SetCustomReplacement("XXXXX") + replacer := Custom("XXXXX") line := "secret=mysecret" findings := []detect.Finding{ {Start: 7, End: 15}, } - got := Redact(line, findings, CustomStyle()) + got := Redact(line, findings, replacer) want := "secret=XXXXX" if got != want { t.Errorf("got %q, want %q", got, want) diff --git a/redact/styles.go b/redact/styles.go new file mode 100644 index 0000000..52cccf3 --- /dev/null +++ b/redact/styles.go @@ -0,0 +1,43 @@ +package redact + +import "github.com/krypsis-io/wick/detect" + +// Replacer determines how detected values are replaced in redacted output. +// Static styles ignore the arguments and return a fixed string. Context-aware +// replacers (hash, tokenize) use the value and finding to produce per-match output. +type Replacer interface { + Replace(value string, finding detect.Finding) string +} + +type staticReplacer struct { + replacement string +} + +func (r staticReplacer) Replace(string, detect.Finding) string { + return r.replacement +} + +// Replacement returns the static replacement string, or empty for dynamic replacers. +func (r staticReplacer) Replacement() string { + return r.replacement +} + +// Predefined replacers. +var ( + Redacted Replacer = staticReplacer{"[REDACTED]"} + Stars Replacer = staticReplacer{"***"} +) + +// Custom returns a Replacer that always uses the given string. +func Custom(replacement string) Replacer { + return staticReplacer{replacement} +} + +// StaticReplacement returns the fixed replacement string if r is a static +// replacer, or empty string and false otherwise. +func StaticReplacement(r Replacer) (string, bool) { + if sr, ok := r.(staticReplacer); ok { + return sr.replacement, true + } + return "", false +} diff --git a/redact/tokenize.go b/redact/tokenize.go new file mode 100644 index 0000000..938c6a4 --- /dev/null +++ b/redact/tokenize.go @@ -0,0 +1,74 @@ +package redact + +import ( + "fmt" + "strings" + "sync" + + "github.com/krypsis-io/wick/detect" +) + +// TokenEntry records a single redaction mapping. +type TokenEntry struct { + Original string + Replacement string + Category string + RuleID string + Count int +} + +// TokenizeReplacer is a Replacer that replaces each unique value with a +// deterministic token of the form [CATEGORY-N]. The same original value +// always maps to the same token within a session. Call Entries() after +// processing to retrieve the full mapping. +type TokenizeReplacer struct { + mu sync.Mutex + byValue map[string]*TokenEntry // original value → entry + counter map[string]int // category → next counter +} + +// NewTokenizeReplacer creates a TokenizeReplacer ready for use. +func NewTokenizeReplacer() *TokenizeReplacer { + return &TokenizeReplacer{ + byValue: make(map[string]*TokenEntry), + counter: make(map[string]int), + } +} + +// Replace satisfies the Replacer interface. It assigns a stable token to each +// unique value, recording the mapping for later retrieval via Entries. +func (t *TokenizeReplacer) Replace(value string, finding detect.Finding) string { + t.mu.Lock() + defer t.mu.Unlock() + + if entry, ok := t.byValue[value]; ok { + entry.Count++ + return entry.Replacement + } + + tag := strings.ToUpper(finding.RuleID) + t.counter[tag]++ + token := fmt.Sprintf("[%s-%d]", tag, t.counter[tag]) + + entry := &TokenEntry{ + Original: value, + Replacement: token, + Category: finding.Category, + RuleID: finding.RuleID, + Count: 1, + } + t.byValue[value] = entry + return token +} + +// Entries returns all recorded value→token mappings, keyed by replacement token. +func (t *TokenizeReplacer) Entries() map[string]*TokenEntry { + t.mu.Lock() + defer t.mu.Unlock() + + out := make(map[string]*TokenEntry, len(t.byValue)) + for _, e := range t.byValue { + out[e.Replacement] = e + } + return out +} diff --git a/redact/tokenize_test.go b/redact/tokenize_test.go new file mode 100644 index 0000000..8b7b38d --- /dev/null +++ b/redact/tokenize_test.go @@ -0,0 +1,95 @@ +package redact + +import ( + "strings" + "testing" + + "github.com/krypsis-io/wick/detect" +) + +func TestTokenizeReplacer_Format(t *testing.T) { + tr := NewTokenizeReplacer() + f := detect.Finding{Category: "pii", RuleID: "email"} + result := tr.Replace("admin@acme.com", f) + + if !strings.HasPrefix(result, "[EMAIL-") { + t.Errorf("expected [EMAIL-N] format, got %q", result) + } + if !strings.HasSuffix(result, "]") { + t.Errorf("expected closing ], got %q", result) + } +} + +func TestTokenizeReplacer_SameValueSameToken(t *testing.T) { + tr := NewTokenizeReplacer() + f := detect.Finding{Category: "pii", RuleID: "email"} + a := tr.Replace("admin@acme.com", f) + b := tr.Replace("admin@acme.com", f) + if a != b { + t.Errorf("same value should produce same token: %q != %q", a, b) + } +} + +func TestTokenizeReplacer_DifferentValuesDifferentTokens(t *testing.T) { + tr := NewTokenizeReplacer() + f := detect.Finding{Category: "pii", RuleID: "email"} + a := tr.Replace("admin@acme.com", f) + b := tr.Replace("other@acme.com", f) + if a == b { + t.Errorf("different values should get different tokens, both got %q", a) + } +} + +func TestTokenizeReplacer_CountIncrement(t *testing.T) { + tr := NewTokenizeReplacer() + f := detect.Finding{Category: "pii", RuleID: "email"} + tr.Replace("admin@acme.com", f) + tr.Replace("admin@acme.com", f) + tr.Replace("admin@acme.com", f) + + entries := tr.Entries() + for _, e := range entries { + if e.Original == "admin@acme.com" && e.Count != 3 { + t.Errorf("expected count 3, got %d", e.Count) + } + } +} + +func TestTokenizeReplacer_Entries(t *testing.T) { + tr := NewTokenizeReplacer() + tr.Replace("admin@acme.com", detect.Finding{Category: "pii", RuleID: "email"}) + tr.Replace("10.0.1.42", detect.Finding{Category: "pii", RuleID: "ipv4"}) + + entries := tr.Entries() + if len(entries) != 2 { + t.Fatalf("expected 2 entries, got %d", len(entries)) + } + + // Entries are keyed by replacement token. + found := 0 + for _, e := range entries { + if e.Original == "admin@acme.com" || e.Original == "10.0.1.42" { + found++ + } + } + if found != 2 { + t.Errorf("expected both originals in entries, got %d", found) + } +} + +func TestTokenizeReplacer_TokenCountersPerRuleID(t *testing.T) { + tr := NewTokenizeReplacer() + e1 := tr.Replace("admin@acme.com", detect.Finding{RuleID: "email"}) + e2 := tr.Replace("other@acme.com", detect.Finding{RuleID: "email"}) + ip := tr.Replace("10.0.1.42", detect.Finding{RuleID: "ipv4"}) + + if e1 != "[EMAIL-1]" { + t.Errorf("expected [EMAIL-1], got %q", e1) + } + if e2 != "[EMAIL-2]" { + t.Errorf("expected [EMAIL-2], got %q", e2) + } + if ip != "[IPV4-1]" { + t.Errorf("expected [IPV4-1], got %q", ip) + } +} diff --git a/report.go b/report.go new file mode 100644 index 0000000..7708127 --- /dev/null +++ b/report.go @@ -0,0 +1,34 @@ +package wick + +// Report summarizes the results of a redaction operation. +type Report struct { + // Findings is the list of individual detections. + Findings []Finding + + // Total is the number of findings. + Total int + + // ByRule maps rule IDs to their occurrence count. + ByRule map[string]int +} + +// Finding represents a single detected secret, PII, or custom pattern match. +type Finding struct { + // Category is the detection category: "secret", "pii", or "custom". + Category string + + // RuleID identifies the specific detection rule (e.g., "aws-access-token", "email"). + RuleID string + + // Value is the matched sensitive text. + Value string + + // Start is the byte offset within the line where the match begins. + Start int + + // End is the byte offset within the line where the match ends (exclusive). + End int + + // Line is the 1-based line number of the match. + Line int +} diff --git a/tokenmap.go b/tokenmap.go new file mode 100644 index 0000000..fac1e98 --- /dev/null +++ b/tokenmap.go @@ -0,0 +1,111 @@ +package wick + +import ( + "crypto/aes" + "crypto/cipher" + "crypto/rand" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "os" +) + +// TokenEntry records a single redacted value and its replacement. +type TokenEntry struct { + Original string `json:"original"` + Replacement string `json:"replacement"` + Category string `json:"category"` + RuleID string `json:"rule_id"` + Count int `json:"count"` +} + +// TokenMap holds the mapping from replacement tokens back to original values. +// It is produced by Dehydrate and consumed by Rehydrate. +type TokenMap struct { + entries map[string]*TokenEntry // keyed by replacement token +} + +// GenerateKey generates a random 256-bit AES key, returning it as a +// base64-encoded string suitable for use with Dehydrate and Rehydrate. +func GenerateKey() (string, error) { + key := make([]byte, 32) + if _, err := io.ReadFull(rand.Reader, key); err != nil { + return "", fmt.Errorf("generating key: %w", err) + } + return base64.StdEncoding.EncodeToString(key), nil +} + +// DecodeKey decodes a base64-encoded key string into raw bytes. +func DecodeKey(encoded string) ([]byte, error) { + key, err := base64.StdEncoding.DecodeString(encoded) + if err != nil { + return nil, fmt.Errorf("decoding key: %w", err) + } + if len(key) != 32 { + return nil, fmt.Errorf("key must be 256 bits (32 bytes), got %d", len(key)) + } + return key, nil +} + +// SaveTokenMap encrypts the token map with AES-256-GCM and writes it to path. +func SaveTokenMap(tm TokenMap, key []byte, path string) error { + data, err := json.Marshal(tm.entries) + if err != nil { + return fmt.Errorf("marshaling token map: %w", err) + } + + block, err := aes.NewCipher(key) + if err != nil { + return fmt.Errorf("creating cipher: %w", err) + } + gcm, err := cipher.NewGCM(block) + if err != nil { + return fmt.Errorf("creating GCM: %w", err) + } + + nonce := make([]byte, gcm.NonceSize()) + if _, err := io.ReadFull(rand.Reader, nonce); err != nil { + return fmt.Errorf("generating nonce: %w", err) + } + + ciphertext := gcm.Seal(nonce, nonce, data, nil) + if err := os.WriteFile(path, ciphertext, 0o600); err != nil { + return fmt.Errorf("writing token file: %w", err) + } + return nil +} + +// LoadTokenMap reads and decrypts a token map file produced by SaveTokenMap. +func LoadTokenMap(key []byte, path string) (TokenMap, error) { + ciphertext, err := os.ReadFile(path) + if err != nil { + return TokenMap{}, fmt.Errorf("reading token file: %w", err) + } + + block, err := aes.NewCipher(key) + if err != nil { + return TokenMap{}, fmt.Errorf("creating cipher: %w", err) + } + gcm, err := cipher.NewGCM(block) + if err != nil { + return TokenMap{}, fmt.Errorf("creating GCM: %w", err) + } + + nonceSize := gcm.NonceSize() + if len(ciphertext) < nonceSize { + return TokenMap{}, fmt.Errorf("token file too short") + } + + nonce, ciphertext := ciphertext[:nonceSize], ciphertext[nonceSize:] + plaintext, err := gcm.Open(nil, nonce, ciphertext, nil) + if err != nil { + return TokenMap{}, fmt.Errorf("decrypting token map (wrong key?): %w", err) + } + + var entries map[string]*TokenEntry + if err := json.Unmarshal(plaintext, &entries); err != nil { + return TokenMap{}, fmt.Errorf("parsing token map: %w", err) + } + return TokenMap{entries: entries}, nil +} diff --git a/wick.go b/wick.go new file mode 100644 index 0000000..8f0cbd9 --- /dev/null +++ b/wick.go @@ -0,0 +1,162 @@ +// Package wick provides secret and PII detection and redaction for text streams. +// +// Wick detects secrets (API keys, tokens, credentials), PII (emails, IPs, SSNs), +// and custom patterns in any text input. It supports multiple redaction styles, +// format-aware processing (JSON, YAML, .env), and deterministic hash-based +// pseudonymization. +// +// Basic usage: +// +// output, report, err := wick.Redact(input) +// +// With options: +// +// output, report, err := wick.Redact(input, +// wick.WithReplacer(redact.Stars), +// wick.WithCustomPatterns(patterns), +// ) +package wick + +import ( + "strings" + + "github.com/krypsis-io/wick/detect" + "github.com/krypsis-io/wick/format" + "github.com/krypsis-io/wick/redact" +) + +// Redact detects and redacts secrets and PII in the input string. +// It auto-detects the input format (JSON, YAML, .env, plain text) and +// applies format-aware redaction. The function is safe for concurrent use. +func Redact(input string, opts ...Option) (string, Report, error) { + cfg := defaultConfig() + for _, o := range opts { + o.apply(cfg) + } + + detector, err := buildDetector(cfg) + if err != nil { + return "", Report{}, err + } + + replacer := buildReplacer(cfg.replacer, cfg.customPatterns) + redacted, findings := format.Process(input, detector, replacer) + return redacted, buildReport(findings), nil +} + +func buildDetector(cfg *config) (*detect.Detector, error) { + d, err := detect.New() + if err != nil { + return nil, err + } + + if cfg.rulesFile != "" { + if err := d.AddRulesFile(cfg.rulesFile); err != nil { + return nil, err + } + } + + if len(cfg.disableRules) > 0 { + d.DisableRules(cfg.disableRules) + } + + // Merge custom patterns and blocklist (blocklist is always-detect patterns). + all := append(cfg.customPatterns, cfg.blocklist...) + if len(all) > 0 { + if err := d.SetCustomPatterns(all); err != nil { + return nil, err + } + } + + if len(cfg.allowlist) > 0 { + if err := d.SetAllowlist(cfg.allowlist); err != nil { + return nil, err + } + } + + return d, nil +} + +// buildReplacer wraps the base replacer with per-pattern overrides if any +// custom patterns have a Replacement field set. +func buildReplacer(base redact.Replacer, patterns []detect.CustomPattern) redact.Replacer { + overrides := make(map[string]string) + for _, p := range patterns { + if p.Replacement != "" { + overrides[p.Name] = p.Replacement + } + } + return redact.PerRule(base, overrides) +} + +func buildReport(findings []detect.Finding) Report { + r := Report{ + Findings: make([]Finding, len(findings)), + ByRule: make(map[string]int), + } + for i, f := range findings { + r.Findings[i] = Finding{ + Category: f.Category, + RuleID: f.RuleID, + Value: f.Value, + Start: f.Start, + End: f.End, + Line: f.Line, + } + r.ByRule[f.RuleID]++ + } + r.Total = len(findings) + return r +} + +// Dehydrate redacts input using reversible token replacement and returns the +// redacted text along with a TokenMap that can be used to restore the original. +// The token map is encrypted with the provided AES-256 key (see GenerateKey). +// Each unique value gets a stable token of the form [RULEID-N], so the same +// value always maps to the same token within a single Dehydrate call. +func Dehydrate(input string, key []byte, opts ...Option) (string, TokenMap, error) { + cfg := defaultConfig() + for _, o := range opts { + o.apply(cfg) + } + + detector, err := buildDetector(cfg) + if err != nil { + return "", TokenMap{}, err + } + + tr := redact.NewTokenizeReplacer() + replacer := buildReplacer(tr, cfg.customPatterns) + redacted, _ := format.Process(input, detector, replacer) + + entries := tr.Entries() + tm := TokenMap{entries: make(map[string]*TokenEntry, len(entries))} + for token, e := range entries { + tm.entries[token] = &TokenEntry{ + Original: e.Original, + Replacement: e.Replacement, + Category: e.Category, + RuleID: e.RuleID, + Count: e.Count, + } + } + + return redacted, tm, nil +} + +// Rehydrate restores original values in a previously dehydrated string using +// the provided TokenMap. It performs a simple string replacement of each token +// with its original value. +func Rehydrate(input string, tm TokenMap) (string, error) { + result := input + for token, entry := range tm.entries { + result = strings.ReplaceAll(result, token, entry.Original) + } + return result, nil +} + +func defaultConfig() *config { + return &config{ + replacer: redact.Redacted, + } +} diff --git a/wick_test.go b/wick_test.go new file mode 100644 index 0000000..fd3a163 --- /dev/null +++ b/wick_test.go @@ -0,0 +1,97 @@ +package wick + +import ( + "strings" + "testing" + + "github.com/krypsis-io/wick/detect" + "github.com/krypsis-io/wick/redact" +) + +func TestRedact_Basic(t *testing.T) { + input := "Contact admin@acme.com from 10.0.1.42" + output, report, err := Redact(input) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if strings.Contains(output, "admin@acme.com") { + t.Errorf("email should be redacted: %s", output) + } + if strings.Contains(output, "10.0.1.42") { + t.Errorf("IP should be redacted: %s", output) + } + if report.Total < 2 { + t.Errorf("expected at least 2 findings, got %d", report.Total) + } +} + +func TestRedact_WithReplacer(t *testing.T) { + input := "key=AKIAZ5GMHYJKLMNOPQRS" + output, _, err := Redact(input, WithReplacer(redact.Stars)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.Contains(output, "***") { + t.Errorf("expected stars replacement: %s", output) + } +} + +func TestRedact_WithCustomPatterns(t *testing.T) { + input := "Project ACME-1234 is active" + output, report, err := Redact(input, WithCustomPatterns([]detect.CustomPattern{ + {Name: "internal-code", Regex: `ACME-\d{4}`}, + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if strings.Contains(output, "ACME-1234") { + t.Errorf("custom pattern should be redacted: %s", output) + } + if report.ByRule["internal-code"] != 1 { + t.Errorf("expected 1 internal-code finding, got %d", report.ByRule["internal-code"]) + } +} + +func TestRedact_JSON(t *testing.T) { + input := `{"api_key": "AKIAZ5GMHYJKLMNOPQRS", "name": "test"}` + output, report, err := Redact(input) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if report.Total == 0 { + t.Fatal("expected findings") + } + if strings.Contains(output, "AKIAZ5GMHYJKLMNOPQRS") { + t.Errorf("secret should be redacted: %s", output) + } +} + +func TestRedact_NoFindings(t *testing.T) { + input := "nothing sensitive here" + output, report, err := Redact(input) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if output != input { + t.Errorf("output should be unchanged: %s", output) + } + if report.Total != 0 { + t.Errorf("expected 0 findings, got %d", report.Total) + } +} + +func TestRedact_ConcurrentSafety(t *testing.T) { + input := "Contact admin@acme.com" + errs := make(chan error, 10) + for range 10 { + go func() { + _, _, err := Redact(input) + errs <- err + }() + } + for range 10 { + if err := <-errs; err != nil { + t.Errorf("concurrent Redact failed: %v", err) + } + } +}