diff --git a/artifacts/multi_family_admissibility_results.json b/artifacts/multi_family_admissibility_results.json new file mode 100644 index 0000000..b0a83eb --- /dev/null +++ b/artifacts/multi_family_admissibility_results.json @@ -0,0 +1,211 @@ +{ + "artifact_id": "multi_family_admissibility_results_v1", + "families": [ + { + "curve": { + "curve_id": "coding_workflow_pr_review_curve_v1", + "generated_by": "DegradationCurveGenerator", + "points": [ + { + "expected_admissible": true, + "failed_contracts": [], + "failure_labels": [], + "fixture_id": "coding_workflow_pr_review_v1", + "fixture_path": "fixtures/coding_workflow_pr_review_v1", + "fixture_version": "1.0.0", + "governance_score": 1.0, + "observed_admissible": true, + "operational_score": 1.0, + "overall_admissibility_score": 1.0, + "passed_contracts": [ + "no_orphan_tool_calls", + "pre_merge_review", + "recovery_path_available", + "security_causal_block" + ], + "relational_score": 1.0, + "structural_score": 1.0 + }, + { + "expected_admissible": false, + "failed_contracts": [ + "recovery_path_available" + ], + "failure_labels": [ + "RECOVERY_PATH_INVALID" + ], + "fixture_id": "coding_workflow_pr_review_mild_v1", + "fixture_path": "fixtures/coding_workflow_pr_review_mild_v1", + "fixture_version": "1.0.0", + "governance_score": 1.0, + "observed_admissible": false, + "operational_score": 1.0, + "overall_admissibility_score": 0.9166666666666666, + "passed_contracts": [ + "no_orphan_tool_calls", + "pre_merge_review", + "security_causal_block" + ], + "relational_score": 0.6666666666666666, + "structural_score": 1.0 + }, + { + "expected_admissible": false, + "failed_contracts": [ + "recovery_path_available", + "security_causal_block" + ], + "failure_labels": [ + "CAUSAL_DEPENDENCY_LOSS", + "RECOVERY_PATH_INVALID" + ], + "fixture_id": "coding_workflow_pr_review_moderate_v1", + "fixture_path": "fixtures/coding_workflow_pr_review_moderate_v1", + "fixture_version": "1.0.0", + "governance_score": 1.0, + "observed_admissible": false, + "operational_score": 1.0, + "overall_admissibility_score": 0.8333333333333334, + "passed_contracts": [ + "no_orphan_tool_calls", + "pre_merge_review" + ], + "relational_score": 0.3333333333333333, + "structural_score": 1.0 + }, + { + "expected_admissible": false, + "failed_contracts": [ + "no_orphan_tool_calls", + "pre_merge_review", + "recovery_path_available", + "security_causal_block" + ], + "failure_labels": [ + "CAUSAL_DEPENDENCY_LOSS", + "INVARIANT_VIOLATION", + "POLICY_ORDER_BROKEN", + "RECOVERY_PATH_INVALID" + ], + "fixture_id": "coding_workflow_pr_review_degraded_v1", + "fixture_path": "fixtures/coding_workflow_pr_review_degraded_v1", + "fixture_version": "1.0.0", + "governance_score": 1.0, + "observed_admissible": false, + "operational_score": 0.0, + "overall_admissibility_score": 0.5, + "passed_contracts": [], + "relational_score": 0.0, + "structural_score": 1.0 + } + ], + "version": "1.0" + }, + "family": "coding_workflow_pr_review" + }, + { + "curve": { + "curve_id": "incident_response_page_triage_curve_v1", + "generated_by": "DegradationCurveGenerator", + "points": [ + { + "expected_admissible": true, + "failed_contracts": [], + "failure_labels": [], + "fixture_id": "incident_response_page_triage_v1", + "fixture_path": "fixtures/incident_response_page_triage_v1", + "fixture_version": "1.0.0", + "governance_score": 1.0, + "observed_admissible": true, + "operational_score": 1.0, + "overall_admissibility_score": 1.0, + "passed_contracts": [ + "alert_ack_before_mitigation", + "no_orphan_mitigation_steps", + "rollback_reachable", + "root_cause_links_incident" + ], + "relational_score": 1.0, + "structural_score": 1.0 + }, + { + "expected_admissible": false, + "failed_contracts": [ + "rollback_reachable" + ], + "failure_labels": [ + "RECOVERY_PATH_INVALID" + ], + "fixture_id": "incident_response_page_triage_mild_v1", + "fixture_path": "fixtures/incident_response_page_triage_mild_v1", + "fixture_version": "1.0.0", + "governance_score": 1.0, + "observed_admissible": false, + "operational_score": 1.0, + "overall_admissibility_score": 0.9166666666666666, + "passed_contracts": [ + "alert_ack_before_mitigation", + "no_orphan_mitigation_steps", + "root_cause_links_incident" + ], + "relational_score": 0.6666666666666666, + "structural_score": 1.0 + }, + { + "expected_admissible": false, + "failed_contracts": [ + "no_orphan_mitigation_steps", + "rollback_reachable" + ], + "failure_labels": [ + "INVARIANT_VIOLATION", + "RECOVERY_PATH_INVALID" + ], + "fixture_id": "incident_response_page_triage_moderate_v1", + "fixture_path": "fixtures/incident_response_page_triage_moderate_v1", + "fixture_version": "1.0.0", + "governance_score": 1.0, + "observed_admissible": false, + "operational_score": 1.0, + "overall_admissibility_score": 0.8333333333333334, + "passed_contracts": [ + "alert_ack_before_mitigation", + "root_cause_links_incident" + ], + "relational_score": 0.3333333333333333, + "structural_score": 1.0 + }, + { + "expected_admissible": false, + "failed_contracts": [ + "alert_ack_before_mitigation", + "no_orphan_mitigation_steps", + "rollback_reachable", + "root_cause_links_incident" + ], + "failure_labels": [ + "CAUSAL_DEPENDENCY_LOSS", + "INVARIANT_VIOLATION", + "POLICY_ORDER_BROKEN", + "RECOVERY_PATH_INVALID" + ], + "fixture_id": "incident_response_page_triage_degraded_v1", + "fixture_path": "fixtures/incident_response_page_triage_degraded_v1", + "fixture_version": "1.0.0", + "governance_score": 1.0, + "observed_admissible": false, + "operational_score": 0.0, + "overall_admissibility_score": 0.5, + "passed_contracts": [], + "relational_score": 0.0, + "structural_score": 1.0 + } + ], + "version": "1.0" + }, + "family": "incident_response_page_triage" + } + ], + "generated_by": "DegradationCurveGenerator", + "version": "1.0" +} diff --git a/fixtures/incident_response_page_triage_mild_v1/reconstructed/dependency_graph.json b/fixtures/incident_response_page_triage_mild_v1/reconstructed/dependency_graph.json index e99bc8d..5a9ff24 100644 --- a/fixtures/incident_response_page_triage_mild_v1/reconstructed/dependency_graph.json +++ b/fixtures/incident_response_page_triage_mild_v1/reconstructed/dependency_graph.json @@ -81,6 +81,12 @@ "target": "escalate_to_human", "relation": "RECOVERY", "metadata": {} + }, + { + "source": "incident_classified", + "target": "rollback_available", + "relation": "RECOVERY", + "metadata": {} } ] } diff --git a/package.json b/package.json index 47d3a6a..9132b95 100644 --- a/package.json +++ b/package.json @@ -12,6 +12,7 @@ "test:replay": "pytest tests/test_paper_replay_bench.py tests/test_agent_trace_replay.py tests/test_replay_continuity.py -q", "layout": "python scripts/check_repo_layout.py", "check": "npm run layout && npm run typecheck && npm run validate && npm run build && npm run test", - "generate:layered-admissibility": "python scripts/generate_layered_admissibility_artifact.py" + "generate:layered-admissibility": "python scripts/generate_layered_admissibility_artifact.py", + "generate:multi-family-admissibility": "python scripts/generate_multi_family_admissibility_artifact.py" } } diff --git a/scripts/generate_multi_family_admissibility_artifact.py b/scripts/generate_multi_family_admissibility_artifact.py new file mode 100644 index 0000000..39d6681 --- /dev/null +++ b/scripts/generate_multi_family_admissibility_artifact.py @@ -0,0 +1,72 @@ +"""Deterministic entrypoint for multi-family admissibility artifact regeneration.""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path +from typing import Any + +REPO_ROOT = Path(__file__).resolve().parents[1] +if str(REPO_ROOT) not in sys.path: + sys.path.insert(0, str(REPO_ROOT)) + +from src.validation.degradation_curve_generator import ( + LAYERED_CURVE_LEVELS, + MANIFEST_PATH, + DegradationCurveGenerator, +) + +ARTIFACT_ID = "multi_family_admissibility_results_v1" +OUTPUT_PATH = Path("artifacts/multi_family_admissibility_results.json") + + +def _families_with_standard_levels( + manifest_path: Path = MANIFEST_PATH, + levels: tuple[str, ...] = LAYERED_CURVE_LEVELS, +) -> tuple[str, ...]: + manifest = json.loads(manifest_path.read_text(encoding="utf-8")) + fixtures = manifest.get("fixtures") + if not isinstance(fixtures, list): + raise ValueError(f"invalid fixture manifest format: {manifest_path}") + + family_to_levels: dict[str, set[str]] = {} + for entry in fixtures: + family = entry.get("family") + level = entry.get("degradation_level") + if not family or not level: + continue + family_to_levels.setdefault(str(family), set()).add(str(level)) + + return tuple(sorted(family for family, family_levels in family_to_levels.items() if set(levels).issubset(family_levels))) + + +def generate_multi_family_admissibility_artifact(output_path: Path = OUTPUT_PATH) -> Path: + generator = DegradationCurveGenerator() + families_payload: list[dict[str, Any]] = [] + + for family in _families_with_standard_levels(): + fixtures = generator.fixtures_for_manifest_family(family) + curve = generator.generate(fixtures, curve_id=f"{family}_curve_v1") + families_payload.append({"family": family, "curve": generator.to_dict(curve)}) + + payload = { + "artifact_id": ARTIFACT_ID, + "generated_by": DegradationCurveGenerator.__name__, + "version": "1.0", + "families": families_payload, + } + + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") + return output_path + + +def main() -> int: + output_path = generate_multi_family_admissibility_artifact() + print(output_path.as_posix()) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/test_artifact_reproducibility.py b/tests/test_artifact_reproducibility.py index 3be9bfa..f0600e2 100644 --- a/tests/test_artifact_reproducibility.py +++ b/tests/test_artifact_reproducibility.py @@ -8,6 +8,9 @@ from scripts.generate_layered_admissibility_artifact import ( generate_layered_admissibility_artifact, ) +from scripts.generate_multi_family_admissibility_artifact import ( + generate_multi_family_admissibility_artifact, +) @dataclass(frozen=True) @@ -25,6 +28,11 @@ class DeterministicArtifactSpec: committed_path=Path("artifacts/layered_admissibility_results.json"), regenerate=generate_layered_admissibility_artifact, ), + DeterministicArtifactSpec( + name="multi_family_admissibility_results", + committed_path=Path("artifacts/multi_family_admissibility_results.json"), + regenerate=generate_multi_family_admissibility_artifact, + ), ) diff --git a/tests/test_multi_family_admissibility_artifact.py b/tests/test_multi_family_admissibility_artifact.py new file mode 100644 index 0000000..8723b79 --- /dev/null +++ b/tests/test_multi_family_admissibility_artifact.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from scripts.generate_multi_family_admissibility_artifact import ( + ARTIFACT_ID, + generate_multi_family_admissibility_artifact, +) + +ARTIFACT_PATH = Path("artifacts/multi_family_admissibility_results.json") +SINGLE_FAMILY_ARTIFACT_PATH = Path("artifacts/layered_admissibility_results.json") +EXPECTED_FAMILIES = ["coding_workflow_pr_review", "incident_response_page_triage"] +EXPECTED_LEVEL_ORDER = ["baseline", "mild", "moderate", "severe"] +MANIFEST_PATH = Path("fixtures/manifest.json") + + +def _load_json(path: Path) -> dict[str, object]: + return json.loads(path.read_text(encoding="utf-8")) + + +def _manifest_level_index() -> dict[str, str]: + manifest = _load_json(MANIFEST_PATH) + return {entry["fixture_id"]: entry["degradation_level"] for entry in manifest["fixtures"]} + + +def test_script_output_matches_committed_artifact(tmp_path: Path) -> None: + output_path = tmp_path / "multi_family_admissibility_results.json" + generate_multi_family_admissibility_artifact(output_path) + + assert _load_json(output_path) == _load_json(ARTIFACT_PATH) + + +def test_artifact_has_stable_schema_no_time_or_environment_fields() -> None: + payload = _load_json(ARTIFACT_PATH) + assert payload["artifact_id"] == ARTIFACT_ID + assert set(payload.keys()) == {"artifact_id", "generated_by", "version", "families"} + + +def test_families_are_sorted_and_expected_families_present() -> None: + payload = _load_json(ARTIFACT_PATH) + families = payload["families"] + names = [entry["family"] for entry in families] + + assert names == sorted(names) + assert names == EXPECTED_FAMILIES + + +def test_each_family_curve_has_four_points_and_standard_level_order() -> None: + payload = _load_json(ARTIFACT_PATH) + + fixture_to_level = _manifest_level_index() + + for family_payload in payload["families"]: + points = family_payload["curve"]["points"] + assert len(points) == 4 + levels = [fixture_to_level[point["fixture_id"]] for point in points] + assert levels == EXPECTED_LEVEL_ORDER + + +def test_coding_workflow_curve_remains_compatible_with_single_family_artifact() -> None: + multi_family = _load_json(ARTIFACT_PATH) + single_family = _load_json(SINGLE_FAMILY_ARTIFACT_PATH) + + coding_family = next(entry for entry in multi_family["families"] if entry["family"] == "coding_workflow_pr_review") + assert coding_family["curve"] == single_family + + +def test_repeated_generation_is_stable(tmp_path: Path) -> None: + first = tmp_path / "first.json" + second = tmp_path / "second.json" + + generate_multi_family_admissibility_artifact(first) + generate_multi_family_admissibility_artifact(second) + + assert _load_json(first) == _load_json(second)