From 4a10dde41515cff9d46e94828746f5675f4e3702 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 20 Jun 2026 20:03:40 +0000 Subject: [PATCH] fix(robot-repo-automaton): make ContentMatch detection compile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `detect_content_match` read file contents with `std::fs::read` (→ `Vec`) and passed `&content` to a string `regex::Regex::is_match`, which wants `&str`. The crate therefore never compiled and the whole `DetectionMethod::ContentMatch` detection path was dead code. Switch to `std::fs::read_to_string`. This fixes the type error and also implements the "skip non-UTF8" intent already stated in the comment just above (`read_to_string` returns `Err` on non-UTF8 bytes, which the `if let Ok` then skips), matching the sibling content scan in `hypatia.rs`. Add a regression test for the previously-uncompilable path: a positive regex match, a non-match, and a non-UTF8 file that must be skipped without panicking. `cargo build` is clean and all 101 tests pass. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01RozeeLxpJsd3WWFngaZWz3 --- robot-repo-automaton/src/detector.rs | 59 +++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/robot-repo-automaton/src/detector.rs b/robot-repo-automaton/src/detector.rs index 23b35fe9..99acea24 100644 --- a/robot-repo-automaton/src/detector.rs +++ b/robot-repo-automaton/src/detector.rs @@ -297,7 +297,9 @@ impl Detector { } } - if let Ok(content) = std::fs::read(file_path) { + // read_to_string returns Err on non-UTF8 bytes, so the + // `if let Ok` here also implements the "skip non-UTF8" intent. + if let Ok(content) = std::fs::read_to_string(file_path) { if re.is_match(&content) { affected.push(file_path.clone()); } @@ -377,6 +379,8 @@ impl Detector { #[cfg(test)] mod tests { use super::*; + use crate::catalog::{Detection, Fix, FixAction}; + use std::collections::HashMap; use tempfile::TempDir; #[test] @@ -404,4 +408,57 @@ mod tests { assert!(detector.file_exists(".github/workflows/ci.yml")); assert!(!detector.file_exists(".github/workflows/nonexistent.yml")); } + + fn content_match_error(condition: &str, files: Vec) -> ErrorType { + ErrorType { + id: "TEST-CONTENT".to_string(), + name: "content-match regression".to_string(), + severity: Severity::Medium, + category: "test".to_string(), + description: "regression coverage for detect_content_match".to_string(), + detection: Detection { + method: DetectionMethod::ContentMatch, + files, + condition: Some(condition.to_string()), + extension_map: HashMap::new(), + }, + affected_repos: vec![], + fix: Fix { + action: FixAction::Modify, + target: String::new(), + reason: None, + modification: None, + fallback: None, + }, + commit_message: "test".to_string(), + } + } + + #[test] + fn test_content_match_detects_and_skips_non_utf8() { + let temp = TempDir::new().unwrap(); + std::fs::write(temp.path().join("hit.txt"), "contains believe_me here").unwrap(); + std::fs::write(temp.path().join("miss.txt"), "nothing to see").unwrap(); + // Invalid UTF-8: detect_content_match must skip this (not panic) — the + // bug fix relies on std::fs::read_to_string returning Err here. + std::fs::write(temp.path().join("blob.bin"), [0xff, 0xfe, 0x62, 0x6d]).unwrap(); + + let detector = Detector::new(temp.path().to_path_buf()).unwrap(); + + // Positive: regex hits hit.txt; the non-UTF8 blob is skipped cleanly. + assert!( + detector + .detect(&content_match_error("believe_me", vec!["*".to_string()])) + .is_some(), + "should detect the file whose contents match the regex" + ); + + // Negative: a token present in no valid-UTF8 file yields no issue. + assert!( + detector + .detect(&content_match_error("no_such_token", vec!["*".to_string()])) + .is_none(), + "should report nothing when no file matches" + ); + } }