From bac373eb91c15173f510bdeba9b73197943e4fbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20K=C3=B6lnberger?= <159939812+ProfRandom92@users.noreply.github.com> Date: Tue, 19 May 2026 04:56:29 -0700 Subject: [PATCH 1/2] Add deterministic fixture manifest --- docs/FIXTURE_TEMPLATE_v1.md | 8 ++ docs/benchmarks/layered_admissibility.md | 2 + fixtures/manifest.json | 79 ++++++++++++++ tests/test_fixture_manifest.py | 127 +++++++++++++++++++++++ 4 files changed, 216 insertions(+) create mode 100644 fixtures/manifest.json create mode 100644 tests/test_fixture_manifest.py diff --git a/docs/FIXTURE_TEMPLATE_v1.md b/docs/FIXTURE_TEMPLATE_v1.md index c4b34f4..42a53cc 100644 --- a/docs/FIXTURE_TEMPLATE_v1.md +++ b/docs/FIXTURE_TEMPLATE_v1.md @@ -78,6 +78,14 @@ Recommended fields: For positive fixtures, `expected_failures` and `allowed_failures` should be empty arrays. + +## Fixture manifest + +- All fixture bundles must be listed in `fixtures/manifest.json`. +- The manifest is deterministic and hand-reviewable. +- `fixture_id`, `fixture_version`, `contracts`, `expected_failure_labels`, and `path` must match committed fixture metadata. +- Benchmark artifacts should reference only registered fixtures from the manifest. + ## Positive and negative fixtures - Positive fixtures should pass all must-hold contracts. diff --git a/docs/benchmarks/layered_admissibility.md b/docs/benchmarks/layered_admissibility.md index e6d098c..b8d2048 100644 --- a/docs/benchmarks/layered_admissibility.md +++ b/docs/benchmarks/layered_admissibility.md @@ -4,6 +4,8 @@ Deterministically compare admissibility outcomes across fixture bundles using ContractValidator and AdmissibilityScorer. +All benchmarked fixtures are indexed in `fixtures/manifest.json` and benchmark artifact references should resolve only to registered manifest entries. + ## Fixture results | fixture_id | expected_admissible | observed_admissible | structural_score | relational_score | operational_score | governance_score | overall_admissibility_score | failure_labels | diff --git a/fixtures/manifest.json b/fixtures/manifest.json new file mode 100644 index 0000000..cbc9315 --- /dev/null +++ b/fixtures/manifest.json @@ -0,0 +1,79 @@ +{ + "manifest_version": "1.0", + "fixtures": [ + { + "fixture_id": "coding_workflow_pr_review_v1", + "fixture_version": "1.0.0", + "category": "coding_workflow", + "family": "coding_workflow_pr_review", + "degradation_level": "baseline", + "path": "fixtures/coding_workflow_pr_review_v1", + "expected_admissible": true, + "contracts": [ + "no_orphan_tool_calls", + "pre_merge_review", + "recovery_path_available", + "security_causal_block" + ], + "expected_failure_labels": [] + }, + { + "fixture_id": "coding_workflow_pr_review_mild_v1", + "fixture_version": "1.0.0", + "category": "coding_workflow", + "family": "coding_workflow_pr_review", + "degradation_level": "mild", + "path": "fixtures/coding_workflow_pr_review_mild_v1", + "expected_admissible": false, + "contracts": [ + "no_orphan_tool_calls", + "pre_merge_review", + "recovery_path_available", + "security_causal_block" + ], + "expected_failure_labels": [ + "RECOVERY_PATH_INVALID" + ] + }, + { + "fixture_id": "coding_workflow_pr_review_moderate_v1", + "fixture_version": "1.0.0", + "category": "coding_workflow", + "family": "coding_workflow_pr_review", + "degradation_level": "moderate", + "path": "fixtures/coding_workflow_pr_review_moderate_v1", + "expected_admissible": false, + "contracts": [ + "no_orphan_tool_calls", + "pre_merge_review", + "recovery_path_available", + "security_causal_block" + ], + "expected_failure_labels": [ + "CAUSAL_DEPENDENCY_LOSS", + "RECOVERY_PATH_INVALID" + ] + }, + { + "fixture_id": "coding_workflow_pr_review_degraded_v1", + "fixture_version": "1.0.0", + "category": "coding_workflow", + "family": "coding_workflow_pr_review", + "degradation_level": "severe", + "path": "fixtures/coding_workflow_pr_review_degraded_v1", + "expected_admissible": false, + "contracts": [ + "no_orphan_tool_calls", + "pre_merge_review", + "recovery_path_available", + "security_causal_block" + ], + "expected_failure_labels": [ + "CAUSAL_DEPENDENCY_LOSS", + "INVARIANT_VIOLATION", + "POLICY_ORDER_BROKEN", + "RECOVERY_PATH_INVALID" + ] + } + ] +} diff --git a/tests/test_fixture_manifest.py b/tests/test_fixture_manifest.py new file mode 100644 index 0000000..eae4e81 --- /dev/null +++ b/tests/test_fixture_manifest.py @@ -0,0 +1,127 @@ +from __future__ import annotations + +import json +from pathlib import Path + +ROOT = Path(__file__).resolve().parent.parent +MANIFEST_PATH = ROOT / "fixtures" / "manifest.json" +ALLOWED_DEGRADATION_LEVELS = ["baseline", "mild", "moderate", "severe"] +EXPECTED_FIXTURE_ORDER = [ + "coding_workflow_pr_review_v1", + "coding_workflow_pr_review_mild_v1", + "coding_workflow_pr_review_moderate_v1", + "coding_workflow_pr_review_degraded_v1", +] + + +def _load_json(path: Path) -> dict: + with path.open("r", encoding="utf-8") as f: + return json.load(f) + + +def _load_manifest() -> dict: + return _load_json(MANIFEST_PATH) + + +def test_manifest_is_json_serializable_and_sorted() -> None: + manifest = _load_manifest() + json.dumps(manifest, sort_keys=True) + + fixture_ids = [entry["fixture_id"] for entry in manifest["fixtures"]] + assert fixture_ids == EXPECTED_FIXTURE_ORDER + + +def test_manifest_paths_exist() -> None: + manifest = _load_manifest() + required_paths = [ + Path("original/trace.json"), + Path("original/state.json"), + Path("original/dependency_graph.json"), + Path("original/contracts"), + Path("reconstructed/trace.json"), + Path("reconstructed/state.json"), + Path("reconstructed/dependency_graph.json"), + Path("expected/admissibility.json"), + Path("expected/failures.json"), + Path("README.md"), + ] + + for entry in manifest["fixtures"]: + fixture_dir = ROOT / entry["path"] + assert fixture_dir.exists(), f"Missing fixture directory: {fixture_dir}" + for rel_path in required_paths: + assert (fixture_dir / rel_path).exists(), f"Missing path: {fixture_dir / rel_path}" + + +def test_manifest_matches_fixture_admissibility_metadata() -> None: + manifest = _load_manifest() + + for entry in manifest["fixtures"]: + admissibility = _load_json(ROOT / entry["path"] / "expected" / "admissibility.json") + assert entry["fixture_id"] == admissibility["fixture_id"] + assert entry["fixture_version"] == admissibility["fixture_version"] + assert entry["expected_admissible"] == admissibility["expected_admissible"] + assert entry["expected_failure_labels"] == sorted(admissibility.get("expected_failure_labels", [])) + + +def test_manifest_contracts_match_contract_files() -> None: + manifest = _load_manifest() + + for entry in manifest["fixtures"]: + contracts_dir = ROOT / entry["path"] / "original" / "contracts" + contract_ids = [] + for contract_file in sorted(contracts_dir.glob("*.json")): + contract_ids.append(_load_json(contract_file)["contract_id"]) + assert sorted(contract_ids) == entry["contracts"] + + +def test_manifest_expected_failure_labels_match_failures_file() -> None: + manifest = _load_manifest() + + for entry in manifest["fixtures"]: + failures = _load_json(ROOT / entry["path"] / "expected" / "failures.json") + assert entry["expected_failure_labels"] == sorted(failures.get("expected_failures", [])) + + +def test_benchmark_artifact_references_only_manifest_fixtures() -> None: + manifest = _load_manifest() + benchmark = _load_json(ROOT / "artifacts" / "layered_admissibility_results.json") + + manifest_index = { + entry["fixture_id"]: { + "fixture_version": entry["fixture_version"], + "path": entry["path"], + } + for entry in manifest["fixtures"] + } + + for point in benchmark["points"]: + fixture_id = point["fixture_id"] + assert fixture_id in manifest_index + assert point["fixture_version"] == manifest_index[fixture_id]["fixture_version"] + assert point["fixture_path"] == manifest_index[fixture_id]["path"] + + +def test_degradation_levels_are_known_and_unique_per_family() -> None: + manifest = _load_manifest() + family_to_levels: dict[str, set[str]] = {} + + for entry in manifest["fixtures"]: + level = entry["degradation_level"] + family = entry["family"] + assert level in ALLOWED_DEGRADATION_LEVELS + family_to_levels.setdefault(family, set()) + assert level not in family_to_levels[family] + family_to_levels[family].add(level) + + +def test_no_unregistered_fixture_directories() -> None: + manifest = _load_manifest() + registered_paths = {entry["path"] for entry in manifest["fixtures"]} + + discovered_fixture_paths = { + str(path.parent.parent.relative_to(ROOT)).replace("\\", "/") + for path in (ROOT / "fixtures").glob("*/expected/admissibility.json") + } + + assert discovered_fixture_paths.issubset(registered_paths) From 2d5e2450017ddb592fba7205d008b568e91b990b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20K=C3=B6lnberger?= <159939812+ProfRandom92@users.noreply.github.com> Date: Tue, 19 May 2026 06:06:50 -0700 Subject: [PATCH 2/2] Align layered admissibility artifact with generator output --- artifacts/layered_admissibility_results.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/artifacts/layered_admissibility_results.json b/artifacts/layered_admissibility_results.json index 88d82de..00cc5f9 100644 --- a/artifacts/layered_admissibility_results.json +++ b/artifacts/layered_admissibility_results.json @@ -61,7 +61,7 @@ "governance_score": 1.0, "observed_admissible": false, "operational_score": 1.0, - "overall_admissibility_score": 0.8333333333333334, + "overall_admissibility_score": 0.8333333333333333, "passed_contracts": [ "no_orphan_tool_calls", "pre_merge_review"