From 9f29fb1697956e61a9813e05cc82555eee062fda Mon Sep 17 00:00:00 2001
From: CoderDeltaLAN <CoderDeltaLAN@users.noreply.github.com>
Date: Wed, 17 Jun 2026 05:39:53 +0100
Subject: [PATCH] fix: report unreadable instruction files

---
 CHANGELOG.md                      |  1 +
 README.md                         |  1 +
 docs/RULES.md                     | 25 +++++++++++++----
 src/agent_rules_kit/governance.py | 46 ++++++++++++++++++++++++++-----
 tests/test_cli.py                 | 21 ++++++++++++++
 tests/test_governance.py          | 34 +++++++++++++++++++++++
 6 files changed, 115 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c4b8d75..74f2b4b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,7 @@ This project has a published GitHub Release line, but no stable support or API g
 
 ### Fixed
 
+- Report non-UTF-8 supported instruction files as `AIRK-SYS001` findings instead of silently skipping governance analysis.
 - Updated generated `AGENTS.md` baseline content so `init --write` no longer creates instructions that fail the current governance scope or authority check.
 - Fixed secret redaction pattern order so Anthropic-style `sk-ant-` keys match the specific Anthropic pattern before the generic `sk-` pattern.
 - Tightened governance regex coverage for review/CI bypass, unsafe command guidance, and runtime network or LLM dependency findings.
diff --git a/README.md b/README.md
index 7a7a891..a3bfdac 100644
--- a/README.md
+++ b/README.md
@@ -150,6 +150,7 @@ Current `main` evaluates the following governance finding rules, in stable evalu
 
 | Rule | Severity | Purpose |
 | --- | --- | --- |
+| `AIRK-SYS001` | `warning` | Flags supported instruction files that cannot be analyzed as UTF-8. |
 | `AIRK-GOV006` | `warning` | Flags unsupported security, production-readiness, or maturity claims. |
 | `AIRK-GOV003` | `warning` | Flags guidance that appears to bypass review, CI, PRs, or safe integration. |
 | `AIRK-GOV004` | `warning` | Flags unsafe command execution guidance without an explicit confirmation boundary. |
diff --git a/docs/RULES.md b/docs/RULES.md
index 3e1a14b..b59ed74 100644
--- a/docs/RULES.md
+++ b/docs/RULES.md
@@ -24,17 +24,30 @@ Governance findings do not execute repository commands, call external APIs, call
 
 Current `main` evaluates governance findings in this order:
 
-1. `AIRK-GOV006` — unsupported security or maturity claim.
-2. `AIRK-GOV003` — review or CI bypass guidance.
-3. `AIRK-GOV004` — unsafe command execution guidance.
-4. `AIRK-GOV005` — runtime network or LLM dependency guidance.
-5. `AIRK-GOV002` — missing secret-handling boundary.
-6. `AIRK-GOV001` — missing instruction scope or authority.
+1. `AIRK-SYS001` — unreadable supported instruction file.
+2. `AIRK-GOV006` — unsupported security or maturity claim.
+3. `AIRK-GOV003` — review or CI bypass guidance.
+4. `AIRK-GOV004` — unsafe command execution guidance.
+5. `AIRK-GOV005` — runtime network or LLM dependency guidance.
+6. `AIRK-GOV002` — missing secret-handling boundary.
+7. `AIRK-GOV001` — missing instruction scope or authority.
 
 Future rule-order changes must remain deterministic, documented, fixture-backed, and conservative.
 
 ## Rule reference
 
+### AIRK-SYS001 — Unreadable instruction file
+
+Flags supported instruction files that cannot be analyzed as UTF-8.
+
+Purpose:
+
+- prevent supported instruction files from being discovered but silently skipped;
+- make encoding problems visible in console, JSON, and Markdown output;
+- avoid printing raw undecodable bytes as evidence.
+
+This finding reports the repository-relative instruction file path and does not include line, column, or evidence fields.
+
 ### AIRK-GOV006 — Unsupported security or maturity claim
 
 Severity: `warning`.
diff --git a/src/agent_rules_kit/governance.py b/src/agent_rules_kit/governance.py
index 561c091..508b414 100644
--- a/src/agent_rules_kit/governance.py
+++ b/src/agent_rules_kit/governance.py
@@ -27,6 +27,11 @@
     "conflicts with local-first boundaries."
 )
 
+UNREADABLE_INSTRUCTION_FILE_RULE_ID = "AIRK-SYS001"
+UNREADABLE_INSTRUCTION_FILE_MESSAGE = (
+    "Instruction file could not be analyzed because it is not valid UTF-8."
+)
+
 AUTHORITY_SCOPE_RULE_ID = "AIRK-GOV001"
 AUTHORITY_SCOPE_MESSAGE = "Instruction file may lack clear scope or authority."
 
@@ -321,16 +326,40 @@ def find_governance_findings(
     instruction_files: tuple[InstructionFile, ...],
 ) -> tuple[Finding, ...]:
     """Return all governance findings in stable rule order."""
-    return (
-        *find_unsupported_claim_findings(repository_root, instruction_files),
-        *find_review_ci_bypass_findings(repository_root, instruction_files),
-        *find_unsafe_command_execution_findings(repository_root, instruction_files),
-        *find_runtime_network_llm_dependency_findings(repository_root, instruction_files),
-        *find_missing_secret_boundary_findings(repository_root, instruction_files),
-        *find_missing_authority_scope_findings(repository_root, instruction_files),
+    return _deduplicate_findings(
+        (
+            *find_unsupported_claim_findings(repository_root, instruction_files),
+            *find_review_ci_bypass_findings(repository_root, instruction_files),
+            *find_unsafe_command_execution_findings(repository_root, instruction_files),
+            *find_runtime_network_llm_dependency_findings(repository_root, instruction_files),
+            *find_missing_secret_boundary_findings(repository_root, instruction_files),
+            *find_missing_authority_scope_findings(repository_root, instruction_files),
+        )
+    )
+
+
+def _unreadable_instruction_file_finding(path: str) -> Finding:
+    return Finding(
+        rule_id=UNREADABLE_INSTRUCTION_FILE_RULE_ID,
+        severity=Severity.WARNING,
+        message=UNREADABLE_INSTRUCTION_FILE_MESSAGE,
+        path=path,
     )
 
 
+def _deduplicate_findings(findings: tuple[Finding, ...]) -> tuple[Finding, ...]:
+    unique: list[Finding] = []
+    seen: set[Finding] = set()
+
+    for finding in findings:
+        if finding in seen:
+            continue
+        seen.add(finding)
+        unique.append(finding)
+
+    return tuple(unique)
+
+
 def find_unsafe_command_execution_findings(
     repository_root: Path,
     instruction_files: tuple[InstructionFile, ...],
@@ -380,6 +409,7 @@ def find_missing_authority_scope_findings(
         try:
             text = candidate.read_text(encoding="utf-8")
         except UnicodeDecodeError:
+            findings.append(_unreadable_instruction_file_finding(instruction_file.path))
             continue
 
         if not _contains_authority_scope_boundary(text):
@@ -408,6 +438,7 @@ def find_missing_secret_boundary_findings(
         try:
             text = candidate.read_text(encoding="utf-8")
         except UnicodeDecodeError:
+            findings.append(_unreadable_instruction_file_finding(instruction_file.path))
             continue
 
         if not _contains_secret_boundary(text):
@@ -476,6 +507,7 @@ def _find_line_findings(
         try:
             text = candidate.read_text(encoding="utf-8")
         except UnicodeDecodeError:
+            findings.append(_unreadable_instruction_file_finding(instruction_file.path))
             continue
 
         lines = text.splitlines()
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 5c43e08..cf92cfc 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -497,6 +497,27 @@ def test_check_markdown_reports_unsupported_security_claim_findings(self) -> Non
         self.assertIn("| AIRK-GOV006 | warning | AGENTS.md:5 |", text)
         self.assertIn("| AIRK-GOV006 | warning | AGENTS.md:6 |", text)
 
+    def test_check_json_reports_invalid_utf8_instruction_file(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary_directory:
+            repository = Path(temporary_directory)
+            (repository / "AGENTS.md").write_bytes(
+                b"# AGENTS.md\n- Commit directly to main.\xff\n"
+            )
+
+            output = io.StringIO()
+            with redirect_stdout(output):
+                exit_code = main(["check", str(repository), "--format", "json"])
+
+        payload = json.loads(output.getvalue())
+
+        self.assertEqual(exit_code, 0)
+        self.assertEqual(payload["summary"]["finding_count"], 1)
+        self.assertEqual(payload["findings"][0]["rule_id"], "AIRK-SYS001")
+        self.assertEqual(payload["findings"][0]["severity"], "warning")
+        self.assertEqual(payload["findings"][0]["path"], "AGENTS.md")
+        self.assertNotIn("line", payload["findings"][0])
+        self.assertNotIn("evidence", payload["findings"][0])
+
     def test_check_json_reports_empty_findings_for_clean_fixture(self) -> None:
         output = io.StringIO()
 
diff --git a/tests/test_governance.py b/tests/test_governance.py
index 0b4ed0c..de25e01 100644
--- a/tests/test_governance.py
+++ b/tests/test_governance.py
@@ -24,6 +24,40 @@
 
 
 class GovernanceFindingTests(unittest.TestCase):
+    def test_combined_governance_reports_invalid_utf8_instruction_file_once(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary_directory:
+            repository = Path(temporary_directory)
+            (repository / "AGENTS.md").write_bytes(
+                b"# AGENTS.md\n- Commit directly to main.\xff\n"
+            )
+
+            instruction_files = discover_instruction_files(repository)
+            findings = find_governance_findings(repository, instruction_files)
+
+        self.assertEqual(len(findings), 1)
+        self.assertEqual(findings[0].rule_id, "AIRK-SYS001")
+        self.assertEqual(findings[0].severity.value, "warning")
+        self.assertEqual(
+            findings[0].message,
+            "Instruction file could not be analyzed because it is not valid UTF-8.",
+        )
+        self.assertEqual(findings[0].path, "AGENTS.md")
+        self.assertIsNone(findings[0].line)
+        self.assertIsNone(findings[0].evidence)
+
+    def test_rule_specific_governance_reports_invalid_utf8_instruction_file(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary_directory:
+            repository = Path(temporary_directory)
+            (repository / "AGENTS.md").write_bytes(
+                b"# AGENTS.md\n- Commit directly to main.\xff\n"
+            )
+
+            instruction_files = discover_instruction_files(repository)
+            findings = find_review_ci_bypass_findings(repository, instruction_files)
+
+        self.assertEqual([finding.rule_id for finding in findings], ["AIRK-SYS001"])
+        self.assertEqual([finding.path for finding in findings], ["AGENTS.md"])
+
     def test_reports_unsupported_security_and_maturity_claims(self) -> None:
         with tempfile.TemporaryDirectory() as temporary_directory:
             repository = Path(temporary_directory)