From 9f29fb1697956e61a9813e05cc82555eee062fda Mon Sep 17 00:00:00 2001 From: CoderDeltaLAN Date: Wed, 17 Jun 2026 05:39:53 +0100 Subject: [PATCH] fix: report unreadable instruction files --- CHANGELOG.md | 1 + README.md | 1 + docs/RULES.md | 25 +++++++++++++---- src/agent_rules_kit/governance.py | 46 ++++++++++++++++++++++++++----- tests/test_cli.py | 21 ++++++++++++++ tests/test_governance.py | 34 +++++++++++++++++++++++ 6 files changed, 115 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c4b8d75..74f2b4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ This project has a published GitHub Release line, but no stable support or API g ### Fixed +- Report non-UTF-8 supported instruction files as `AIRK-SYS001` findings instead of silently skipping governance analysis. - Updated generated `AGENTS.md` baseline content so `init --write` no longer creates instructions that fail the current governance scope or authority check. - Fixed secret redaction pattern order so Anthropic-style `sk-ant-` keys match the specific Anthropic pattern before the generic `sk-` pattern. - Tightened governance regex coverage for review/CI bypass, unsafe command guidance, and runtime network or LLM dependency findings. diff --git a/README.md b/README.md index 7a7a891..a3bfdac 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,7 @@ Current `main` evaluates the following governance finding rules, in stable evalu | Rule | Severity | Purpose | | --- | --- | --- | +| `AIRK-SYS001` | `warning` | Flags supported instruction files that cannot be analyzed as UTF-8. | | `AIRK-GOV006` | `warning` | Flags unsupported security, production-readiness, or maturity claims. | | `AIRK-GOV003` | `warning` | Flags guidance that appears to bypass review, CI, PRs, or safe integration. | | `AIRK-GOV004` | `warning` | Flags unsafe command execution guidance without an explicit confirmation boundary. | diff --git a/docs/RULES.md b/docs/RULES.md index 3e1a14b..b59ed74 100644 --- a/docs/RULES.md +++ b/docs/RULES.md @@ -24,17 +24,30 @@ Governance findings do not execute repository commands, call external APIs, call Current `main` evaluates governance findings in this order: -1. `AIRK-GOV006` — unsupported security or maturity claim. -2. `AIRK-GOV003` — review or CI bypass guidance. -3. `AIRK-GOV004` — unsafe command execution guidance. -4. `AIRK-GOV005` — runtime network or LLM dependency guidance. -5. `AIRK-GOV002` — missing secret-handling boundary. -6. `AIRK-GOV001` — missing instruction scope or authority. +1. `AIRK-SYS001` — unreadable supported instruction file. +2. `AIRK-GOV006` — unsupported security or maturity claim. +3. `AIRK-GOV003` — review or CI bypass guidance. +4. `AIRK-GOV004` — unsafe command execution guidance. +5. `AIRK-GOV005` — runtime network or LLM dependency guidance. +6. `AIRK-GOV002` — missing secret-handling boundary. +7. `AIRK-GOV001` — missing instruction scope or authority. Future rule-order changes must remain deterministic, documented, fixture-backed, and conservative. ## Rule reference +### AIRK-SYS001 — Unreadable instruction file + +Flags supported instruction files that cannot be analyzed as UTF-8. + +Purpose: + +- prevent supported instruction files from being discovered but silently skipped; +- make encoding problems visible in console, JSON, and Markdown output; +- avoid printing raw undecodable bytes as evidence. + +This finding reports the repository-relative instruction file path and does not include line, column, or evidence fields. + ### AIRK-GOV006 — Unsupported security or maturity claim Severity: `warning`. diff --git a/src/agent_rules_kit/governance.py b/src/agent_rules_kit/governance.py index 561c091..508b414 100644 --- a/src/agent_rules_kit/governance.py +++ b/src/agent_rules_kit/governance.py @@ -27,6 +27,11 @@ "conflicts with local-first boundaries." ) +UNREADABLE_INSTRUCTION_FILE_RULE_ID = "AIRK-SYS001" +UNREADABLE_INSTRUCTION_FILE_MESSAGE = ( + "Instruction file could not be analyzed because it is not valid UTF-8." +) + AUTHORITY_SCOPE_RULE_ID = "AIRK-GOV001" AUTHORITY_SCOPE_MESSAGE = "Instruction file may lack clear scope or authority." @@ -321,16 +326,40 @@ def find_governance_findings( instruction_files: tuple[InstructionFile, ...], ) -> tuple[Finding, ...]: """Return all governance findings in stable rule order.""" - return ( - *find_unsupported_claim_findings(repository_root, instruction_files), - *find_review_ci_bypass_findings(repository_root, instruction_files), - *find_unsafe_command_execution_findings(repository_root, instruction_files), - *find_runtime_network_llm_dependency_findings(repository_root, instruction_files), - *find_missing_secret_boundary_findings(repository_root, instruction_files), - *find_missing_authority_scope_findings(repository_root, instruction_files), + return _deduplicate_findings( + ( + *find_unsupported_claim_findings(repository_root, instruction_files), + *find_review_ci_bypass_findings(repository_root, instruction_files), + *find_unsafe_command_execution_findings(repository_root, instruction_files), + *find_runtime_network_llm_dependency_findings(repository_root, instruction_files), + *find_missing_secret_boundary_findings(repository_root, instruction_files), + *find_missing_authority_scope_findings(repository_root, instruction_files), + ) + ) + + +def _unreadable_instruction_file_finding(path: str) -> Finding: + return Finding( + rule_id=UNREADABLE_INSTRUCTION_FILE_RULE_ID, + severity=Severity.WARNING, + message=UNREADABLE_INSTRUCTION_FILE_MESSAGE, + path=path, ) +def _deduplicate_findings(findings: tuple[Finding, ...]) -> tuple[Finding, ...]: + unique: list[Finding] = [] + seen: set[Finding] = set() + + for finding in findings: + if finding in seen: + continue + seen.add(finding) + unique.append(finding) + + return tuple(unique) + + def find_unsafe_command_execution_findings( repository_root: Path, instruction_files: tuple[InstructionFile, ...], @@ -380,6 +409,7 @@ def find_missing_authority_scope_findings( try: text = candidate.read_text(encoding="utf-8") except UnicodeDecodeError: + findings.append(_unreadable_instruction_file_finding(instruction_file.path)) continue if not _contains_authority_scope_boundary(text): @@ -408,6 +438,7 @@ def find_missing_secret_boundary_findings( try: text = candidate.read_text(encoding="utf-8") except UnicodeDecodeError: + findings.append(_unreadable_instruction_file_finding(instruction_file.path)) continue if not _contains_secret_boundary(text): @@ -476,6 +507,7 @@ def _find_line_findings( try: text = candidate.read_text(encoding="utf-8") except UnicodeDecodeError: + findings.append(_unreadable_instruction_file_finding(instruction_file.path)) continue lines = text.splitlines() diff --git a/tests/test_cli.py b/tests/test_cli.py index 5c43e08..cf92cfc 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -497,6 +497,27 @@ def test_check_markdown_reports_unsupported_security_claim_findings(self) -> Non self.assertIn("| AIRK-GOV006 | warning | AGENTS.md:5 |", text) self.assertIn("| AIRK-GOV006 | warning | AGENTS.md:6 |", text) + def test_check_json_reports_invalid_utf8_instruction_file(self) -> None: + with tempfile.TemporaryDirectory() as temporary_directory: + repository = Path(temporary_directory) + (repository / "AGENTS.md").write_bytes( + b"# AGENTS.md\n- Commit directly to main.\xff\n" + ) + + output = io.StringIO() + with redirect_stdout(output): + exit_code = main(["check", str(repository), "--format", "json"]) + + payload = json.loads(output.getvalue()) + + self.assertEqual(exit_code, 0) + self.assertEqual(payload["summary"]["finding_count"], 1) + self.assertEqual(payload["findings"][0]["rule_id"], "AIRK-SYS001") + self.assertEqual(payload["findings"][0]["severity"], "warning") + self.assertEqual(payload["findings"][0]["path"], "AGENTS.md") + self.assertNotIn("line", payload["findings"][0]) + self.assertNotIn("evidence", payload["findings"][0]) + def test_check_json_reports_empty_findings_for_clean_fixture(self) -> None: output = io.StringIO() diff --git a/tests/test_governance.py b/tests/test_governance.py index 0b4ed0c..de25e01 100644 --- a/tests/test_governance.py +++ b/tests/test_governance.py @@ -24,6 +24,40 @@ class GovernanceFindingTests(unittest.TestCase): + def test_combined_governance_reports_invalid_utf8_instruction_file_once(self) -> None: + with tempfile.TemporaryDirectory() as temporary_directory: + repository = Path(temporary_directory) + (repository / "AGENTS.md").write_bytes( + b"# AGENTS.md\n- Commit directly to main.\xff\n" + ) + + instruction_files = discover_instruction_files(repository) + findings = find_governance_findings(repository, instruction_files) + + self.assertEqual(len(findings), 1) + self.assertEqual(findings[0].rule_id, "AIRK-SYS001") + self.assertEqual(findings[0].severity.value, "warning") + self.assertEqual( + findings[0].message, + "Instruction file could not be analyzed because it is not valid UTF-8.", + ) + self.assertEqual(findings[0].path, "AGENTS.md") + self.assertIsNone(findings[0].line) + self.assertIsNone(findings[0].evidence) + + def test_rule_specific_governance_reports_invalid_utf8_instruction_file(self) -> None: + with tempfile.TemporaryDirectory() as temporary_directory: + repository = Path(temporary_directory) + (repository / "AGENTS.md").write_bytes( + b"# AGENTS.md\n- Commit directly to main.\xff\n" + ) + + instruction_files = discover_instruction_files(repository) + findings = find_review_ci_bypass_findings(repository, instruction_files) + + self.assertEqual([finding.rule_id for finding in findings], ["AIRK-SYS001"]) + self.assertEqual([finding.path for finding in findings], ["AGENTS.md"]) + def test_reports_unsupported_security_and_maturity_claims(self) -> None: with tempfile.TemporaryDirectory() as temporary_directory: repository = Path(temporary_directory)