From d3597e10afe2a5fdfa8469be42a0c7af54ede876 Mon Sep 17 00:00:00 2001 From: CoderDeltaLAN Date: Wed, 17 Jun 2026 06:30:03 +0100 Subject: [PATCH] fix: reject symlinked instruction files --- CHANGELOG.md | 1 + README.md | 1 + docs/RULES.md | 26 +++- src/agent_rules_kit/discovery.py | 30 ++++- src/agent_rules_kit/governance.py | 37 ++++++ src/agent_rules_kit/init_plan.py | 3 + src/agent_rules_kit/init_write.py | 26 +++- tests/test_path_boundaries.py | 189 ++++++++++++++++++++++++++++++ 8 files changed, 297 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 74f2b4b..83490a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ This project has a published GitHub Release line, but no stable support or API g ### Fixed +- Reject symlinked supported instruction files and harden `init --write` temporary and backup paths against symlink escapes. - Report non-UTF-8 supported instruction files as `AIRK-SYS001` findings instead of silently skipping governance analysis. - Updated generated `AGENTS.md` baseline content so `init --write` no longer creates instructions that fail the current governance scope or authority check. - Fixed secret redaction pattern order so Anthropic-style `sk-ant-` keys match the specific Anthropic pattern before the generic `sk-` pattern. diff --git a/README.md b/README.md index a3bfdac..e244c8b 100644 --- a/README.md +++ b/README.md @@ -151,6 +151,7 @@ Current `main` evaluates the following governance finding rules, in stable evalu | Rule | Severity | Purpose | | --- | --- | --- | | `AIRK-SYS001` | `warning` | Flags supported instruction files that cannot be analyzed as UTF-8. | +| `AIRK-SYS002` | `warning` | Flags supported instruction file paths that are symlinks and are not analyzed. | | `AIRK-GOV006` | `warning` | Flags unsupported security, production-readiness, or maturity claims. | | `AIRK-GOV003` | `warning` | Flags guidance that appears to bypass review, CI, PRs, or safe integration. | | `AIRK-GOV004` | `warning` | Flags unsafe command execution guidance without an explicit confirmation boundary. | diff --git a/docs/RULES.md b/docs/RULES.md index b59ed74..1f03ed2 100644 --- a/docs/RULES.md +++ b/docs/RULES.md @@ -25,12 +25,13 @@ Governance findings do not execute repository commands, call external APIs, call Current `main` evaluates governance findings in this order: 1. `AIRK-SYS001` — unreadable supported instruction file. -2. `AIRK-GOV006` — unsupported security or maturity claim. -3. `AIRK-GOV003` — review or CI bypass guidance. -4. `AIRK-GOV004` — unsafe command execution guidance. -5. `AIRK-GOV005` — runtime network or LLM dependency guidance. -6. `AIRK-GOV002` — missing secret-handling boundary. -7. `AIRK-GOV001` — missing instruction scope or authority. +2. `AIRK-SYS002` — symlinked supported instruction file path. +3. `AIRK-GOV006` — unsupported security or maturity claim. +4. `AIRK-GOV003` — review or CI bypass guidance. +5. `AIRK-GOV004` — unsafe command execution guidance. +6. `AIRK-GOV005` — runtime network or LLM dependency guidance. +7. `AIRK-GOV002` — missing secret-handling boundary. +8. `AIRK-GOV001` — missing instruction scope or authority. Future rule-order changes must remain deterministic, documented, fixture-backed, and conservative. @@ -48,6 +49,19 @@ Purpose: This finding reports the repository-relative instruction file path and does not include line, column, or evidence fields. +### AIRK-SYS002 — Symlinked instruction file + +Flags supported instruction file paths that are symlinks and are not analyzed. + +Purpose: + +- keep local-first analysis inside explicit repository file boundaries; +- avoid following instruction-file symlinks to external files; +- make skipped symlink file paths visible instead of silently ignoring them; +- avoid traversing symlinked wildcard instruction directories. + +This finding reports the repository-relative instruction file path and does not include line, column, or evidence fields. + ### AIRK-GOV006 — Unsupported security or maturity claim Severity: `warning`. diff --git a/src/agent_rules_kit/discovery.py b/src/agent_rules_kit/discovery.py index ed4af05..9a2fefa 100644 --- a/src/agent_rules_kit/discovery.py +++ b/src/agent_rules_kit/discovery.py @@ -44,13 +44,13 @@ def discover_instruction_files(root: Path | str) -> tuple[InstructionFile, ...]: for relative_path, kind in _exact_instruction_paths(): candidate = root_path / relative_path - if candidate.is_file(): + if _is_supported_instruction_path(candidate): discovered.append(InstructionFile(path=relative_path, kind=kind)) cursor_rules_dir = root_path / ".cursor" / "rules" - if cursor_rules_dir.is_dir(): + if _is_safe_instruction_directory(root_path, cursor_rules_dir): for candidate in sorted(cursor_rules_dir.glob("*.mdc")): - if candidate.is_file(): + if _is_supported_instruction_path(candidate): discovered.append( InstructionFile( path=candidate.relative_to(root_path).as_posix(), @@ -59,9 +59,9 @@ def discover_instruction_files(root: Path | str) -> tuple[InstructionFile, ...]: ) github_instructions_dir = root_path / ".github" / "instructions" - if github_instructions_dir.is_dir(): + if _is_safe_instruction_directory(root_path, github_instructions_dir): for candidate in sorted(github_instructions_dir.glob("*.md")): - if candidate.is_file(): + if _is_supported_instruction_path(candidate): discovered.append( InstructionFile( path=candidate.relative_to(root_path).as_posix(), @@ -72,6 +72,26 @@ def discover_instruction_files(root: Path | str) -> tuple[InstructionFile, ...]: return tuple(discovered) +def _is_supported_instruction_path(candidate: Path) -> bool: + return candidate.is_file() or candidate.is_symlink() + + +def _is_safe_instruction_directory(root_path: Path, candidate: Path) -> bool: + return candidate.is_dir() and not _has_symlink_component(root_path, candidate) + + +def _has_symlink_component(root_path: Path, candidate: Path) -> bool: + relative_path = candidate.relative_to(root_path) + current = root_path + + for part in relative_path.parts: + current = current / part + if current.is_symlink(): + return True + + return False + + def _exact_instruction_paths() -> tuple[tuple[str, InstructionFileKind], ...]: return ( ("AGENTS.md", InstructionFileKind.AGENTS), diff --git a/src/agent_rules_kit/governance.py b/src/agent_rules_kit/governance.py index 508b414..8a621f0 100644 --- a/src/agent_rules_kit/governance.py +++ b/src/agent_rules_kit/governance.py @@ -32,6 +32,11 @@ "Instruction file could not be analyzed because it is not valid UTF-8." ) +SYMLINKED_INSTRUCTION_FILE_RULE_ID = "AIRK-SYS002" +SYMLINKED_INSTRUCTION_FILE_MESSAGE = ( + "Instruction file path is a symlink and was not analyzed." +) + AUTHORITY_SCOPE_RULE_ID = "AIRK-GOV001" AUTHORITY_SCOPE_MESSAGE = "Instruction file may lack clear scope or authority." @@ -347,6 +352,26 @@ def _unreadable_instruction_file_finding(path: str) -> Finding: ) +def _symlinked_instruction_file_finding(path: str) -> Finding: + return Finding( + rule_id=SYMLINKED_INSTRUCTION_FILE_RULE_ID, + severity=Severity.WARNING, + message=SYMLINKED_INSTRUCTION_FILE_MESSAGE, + path=path, + ) + + +def _has_symlink_component(repository_root: Path, relative_path: str) -> bool: + current = repository_root + + for part in Path(relative_path).parts: + current = current / part + if current.is_symlink(): + return True + + return False + + def _deduplicate_findings(findings: tuple[Finding, ...]) -> tuple[Finding, ...]: unique: list[Finding] = [] seen: set[Finding] = set() @@ -406,6 +431,10 @@ def find_missing_authority_scope_findings( for instruction_file in instruction_files: candidate = repository_root / instruction_file.path + if _has_symlink_component(repository_root, instruction_file.path): + findings.append(_symlinked_instruction_file_finding(instruction_file.path)) + continue + try: text = candidate.read_text(encoding="utf-8") except UnicodeDecodeError: @@ -435,6 +464,10 @@ def find_missing_secret_boundary_findings( for instruction_file in instruction_files: candidate = repository_root / instruction_file.path + if _has_symlink_component(repository_root, instruction_file.path): + findings.append(_symlinked_instruction_file_finding(instruction_file.path)) + continue + try: text = candidate.read_text(encoding="utf-8") except UnicodeDecodeError: @@ -504,6 +537,10 @@ def _find_line_findings( for instruction_file in instruction_files: candidate = repository_root / instruction_file.path + if _has_symlink_component(repository_root, instruction_file.path): + findings.append(_symlinked_instruction_file_finding(instruction_file.path)) + continue + try: text = candidate.read_text(encoding="utf-8") except UnicodeDecodeError: diff --git a/src/agent_rules_kit/init_plan.py b/src/agent_rules_kit/init_plan.py index 2854e9c..1f3fa05 100644 --- a/src/agent_rules_kit/init_plan.py +++ b/src/agent_rules_kit/init_plan.py @@ -43,6 +43,9 @@ def build_init_plan(root: Path | str) -> InitPlan: target_path = "AGENTS.md" candidate = root_path / target_path + if candidate.is_symlink(): + raise ValueError("refusing to plan init for symlinked path: AGENTS.md") + if candidate.exists(): action = InitPlanAction.BACKUP_AND_REPLACE reason = "existing file would be backed up before replacement" diff --git a/src/agent_rules_kit/init_write.py b/src/agent_rules_kit/init_write.py index 504bf09..89c5ff7 100644 --- a/src/agent_rules_kit/init_write.py +++ b/src/agent_rules_kit/init_write.py @@ -66,9 +66,12 @@ def write_init_files(root: Path | str) -> InitWriteResult: target = root_path / planned_file.path backup_path: Path | None = None + if target.is_symlink(): + raise ValueError("refusing to write init file through symlinked path: AGENTS.md") + if planned_file.action == InitPlanAction.BACKUP_AND_REPLACE: backup_path = _next_backup_path(target) - shutil.copy2(target, backup_path) + _copy_file_to_new_regular_path(target, backup_path) _write_text_atomic(target, BASELINE_AGENTS_CONTENT) @@ -94,31 +97,44 @@ def _write_text_atomic(target: Path, content: str) -> None: temporary_path = _next_available_path( target.with_name(f".{target.name}.agent-rules-kit.tmp") ) + temporary_created = False try: - temporary_path.write_text(content, encoding="utf-8") + with temporary_path.open("x", encoding="utf-8") as temporary_file: + temporary_created = True + temporary_file.write(content) temporary_path.replace(target) finally: - if temporary_path.exists(): + if temporary_created and _path_exists_or_is_symlink(temporary_path): temporary_path.unlink() +def _copy_file_to_new_regular_path(source: Path, destination: Path) -> None: + with source.open("rb") as source_file, destination.open("xb") as destination_file: + shutil.copyfileobj(source_file, destination_file) + shutil.copystat(source, destination, follow_symlinks=False) + + def _next_backup_path(target: Path) -> Path: return _next_available_path(target.with_name(f"{target.name}.agent-rules-kit.bak")) def _next_available_path(candidate: Path) -> Path: - if not candidate.exists(): + if not _path_exists_or_is_symlink(candidate): return candidate for index in range(1, 1000): indexed_candidate = candidate.with_name(f"{candidate.name}.{index}") - if not indexed_candidate.exists(): + if not _path_exists_or_is_symlink(indexed_candidate): return indexed_candidate raise RuntimeError(f"could not find available backup path for: {candidate}") +def _path_exists_or_is_symlink(candidate: Path) -> bool: + return candidate.exists() or candidate.is_symlink() + + __all__ = [ "BASELINE_AGENTS_CONTENT", "InitWriteResult", diff --git a/tests/test_path_boundaries.py b/tests/test_path_boundaries.py index f7d402f..3a604b4 100644 --- a/tests/test_path_boundaries.py +++ b/tests/test_path_boundaries.py @@ -9,6 +9,7 @@ InstructionFileKind, discover_instruction_files, ) +from agent_rules_kit.governance import find_governance_findings from agent_rules_kit.init_plan import InitPlanAction, build_init_plan from agent_rules_kit.init_write import BASELINE_AGENTS_CONTENT, write_init_files @@ -117,6 +118,194 @@ def test_write_init_files_leaves_no_atomic_temporary_file(self) -> None: self.assertFalse((repository / ".AGENTS.md.agent-rules-kit.tmp").exists()) + def test_governance_reports_symlinked_instruction_file_without_following_target(self) -> None: + with tempfile.TemporaryDirectory() as temporary_directory: + repository = Path(temporary_directory) / "repo" + repository.mkdir() + outside_file = Path(temporary_directory) / "outside-instructions.md" + outside_file.write_text( + "Scope: outside file\n" + "Secret handling: do not commit tokens.\n" + "- Commit directly to main.\n", + encoding="utf-8", + ) + (repository / "AGENTS.md").symlink_to(outside_file) + + instruction_files = discover_instruction_files(repository) + findings = find_governance_findings(repository, instruction_files) + + self.assertEqual( + instruction_files, + ( + InstructionFile( + path="AGENTS.md", + kind=InstructionFileKind.AGENTS, + ), + ), + ) + self.assertEqual([finding.rule_id for finding in findings], ["AIRK-SYS002"]) + self.assertEqual( + findings[0].message, + "Instruction file path is a symlink and was not analyzed.", + ) + self.assertEqual(findings[0].path, "AGENTS.md") + self.assertIsNone(findings[0].line) + self.assertIsNone(findings[0].evidence) + + def test_governance_reports_parent_symlinked_exact_instruction_path(self) -> None: + with tempfile.TemporaryDirectory() as temporary_directory: + repository = Path(temporary_directory) / "repo" + repository.mkdir() + outside_claude_directory = Path(temporary_directory) / "outside-claude" + outside_claude_directory.mkdir() + outside_claude_file = outside_claude_directory / "CLAUDE.md" + outside_claude_file.write_text( + "Scope: outside file\n" + "Secret handling: do not commit tokens.\n" + "- Commit directly to main.\n", + encoding="utf-8", + ) + (repository / ".claude").symlink_to( + outside_claude_directory, + target_is_directory=True, + ) + + instruction_files = discover_instruction_files(repository) + findings = find_governance_findings(repository, instruction_files) + + self.assertEqual( + instruction_files, + ( + InstructionFile( + path=".claude/CLAUDE.md", + kind=InstructionFileKind.CLAUDE, + ), + ), + ) + self.assertEqual([finding.rule_id for finding in findings], ["AIRK-SYS002"]) + self.assertEqual(findings[0].path, ".claude/CLAUDE.md") + + def test_discovery_does_not_traverse_symlinked_cursor_rules_directory(self) -> None: + with tempfile.TemporaryDirectory() as temporary_directory: + repository = Path(temporary_directory) / "repo" + repository.mkdir() + cursor_directory = repository / ".cursor" + cursor_directory.mkdir() + outside_rules_directory = Path(temporary_directory) / "outside-rules" + outside_rules_directory.mkdir() + (outside_rules_directory / "agent.mdc").write_text( + "outside cursor rule\n", + encoding="utf-8", + ) + (cursor_directory / "rules").symlink_to( + outside_rules_directory, + target_is_directory=True, + ) + + self.assertEqual(discover_instruction_files(repository), ()) + + def test_discovery_does_not_traverse_symlinked_github_instructions_directory(self) -> None: + with tempfile.TemporaryDirectory() as temporary_directory: + repository = Path(temporary_directory) / "repo" + repository.mkdir() + github_directory = repository / ".github" + github_directory.mkdir() + outside_instructions_directory = Path(temporary_directory) / "outside-instructions" + outside_instructions_directory.mkdir() + (outside_instructions_directory / "agents.instructions.md").write_text( + "outside GitHub instruction\n", + encoding="utf-8", + ) + (github_directory / "instructions").symlink_to( + outside_instructions_directory, + target_is_directory=True, + ) + + self.assertEqual(discover_instruction_files(repository), ()) + + def test_init_plan_rejects_symlinked_root_agents_file(self) -> None: + with tempfile.TemporaryDirectory() as temporary_directory: + repository = Path(temporary_directory) / "repo" + repository.mkdir() + outside_file = Path(temporary_directory) / "outside-agents.md" + outside_file.write_text("outside instructions\n", encoding="utf-8") + (repository / "AGENTS.md").symlink_to(outside_file) + + with self.assertRaisesRegex(ValueError, "symlinked path"): + build_init_plan(repository) + + self.assertEqual( + outside_file.read_text(encoding="utf-8"), + "outside instructions\n", + ) + + def test_write_init_files_rejects_symlinked_root_agents_file(self) -> None: + with tempfile.TemporaryDirectory() as temporary_directory: + repository = Path(temporary_directory) / "repo" + repository.mkdir() + outside_file = Path(temporary_directory) / "outside-agents.md" + outside_file.write_text("outside instructions\n", encoding="utf-8") + agents_file = repository / "AGENTS.md" + agents_file.symlink_to(outside_file) + + with self.assertRaisesRegex(ValueError, "symlinked path"): + write_init_files(repository) + + self.assertTrue(agents_file.is_symlink()) + self.assertEqual( + outside_file.read_text(encoding="utf-8"), + "outside instructions\n", + ) + + def test_write_init_files_does_not_follow_symlinked_temporary_path(self) -> None: + with tempfile.TemporaryDirectory() as temporary_directory: + repository = Path(temporary_directory) / "repo" + repository.mkdir() + outside_temporary_target = Path(temporary_directory) / "outside-temp.txt" + temporary_link = repository / ".AGENTS.md.agent-rules-kit.tmp" + temporary_link.symlink_to(outside_temporary_target) + + result = write_init_files(repository) + + agents_file = repository / "AGENTS.md" + + self.assertEqual(result.files[0].action, InitPlanAction.CREATE) + self.assertFalse(outside_temporary_target.exists()) + self.assertTrue(temporary_link.is_symlink()) + self.assertFalse(agents_file.is_symlink()) + self.assertEqual( + agents_file.read_text(encoding="utf-8"), + BASELINE_AGENTS_CONTENT, + ) + + def test_write_init_files_does_not_follow_symlinked_backup_path(self) -> None: + with tempfile.TemporaryDirectory() as temporary_directory: + repository = Path(temporary_directory) / "repo" + repository.mkdir() + agents_file = repository / "AGENTS.md" + agents_file.write_text("root instructions\n", encoding="utf-8") + outside_backup_target = Path(temporary_directory) / "outside-backup.txt" + backup_link = repository / "AGENTS.md.agent-rules-kit.bak" + backup_link.symlink_to(outside_backup_target) + + result = write_init_files(repository) + + second_backup = repository / "AGENTS.md.agent-rules-kit.bak.1" + + self.assertEqual(result.files[0].action, InitPlanAction.BACKUP_AND_REPLACE) + self.assertEqual(result.files[0].backup_path, "AGENTS.md.agent-rules-kit.bak.1") + self.assertFalse(outside_backup_target.exists()) + self.assertTrue(backup_link.is_symlink()) + self.assertEqual( + second_backup.read_text(encoding="utf-8"), + "root instructions\n", + ) + self.assertFalse(agents_file.is_symlink()) + self.assertEqual( + agents_file.read_text(encoding="utf-8"), + BASELINE_AGENTS_CONTENT, + ) + if __name__ == "__main__": unittest.main()