diff --git a/src/astra/validation/semantic.py b/src/astra/validation/semantic.py index 919b643..d4d042b 100644 --- a/src/astra/validation/semantic.py +++ b/src/astra/validation/semantic.py @@ -161,6 +161,7 @@ def validate_analysis(data: dict[str, Any], base_path: Path | None = None) -> li inputs = data.get("inputs") or [] outputs = data.get("outputs") or [] prior_insights = data.get("prior_insights") or {} + findings = data.get("findings") or {} # Check for duplicate input IDs input_ids: set[str] = set() @@ -196,7 +197,11 @@ def validate_analysis(data: dict[str, Any], base_path: Path | None = None) -> li root_decisions = _collect_node_decisions(data) # Validate all decisions - errors.extend(_validate_decisions(root_decisions, prior_insights, "", ancestor_chain=[])) + errors.extend( + _validate_decisions( + root_decisions, prior_insights, "", ancestor_chain=[], findings=findings + ) + ) # Validate evidence artifact references in prior_insights and findings errors.extend( @@ -345,10 +350,12 @@ def _validate_analysis_node( if segments[0] in target_decisions: constraint_scope[decision_id] = target_decisions[segments[0]] # `Option.insights` resolves only against this node's own - # `prior_insights` map. Cross-scope refs must be written explicitly - # as `../id`, `../../id`, ... (matching `Input.from` / `Decision.from` - # convention) — `_validate_decisions` parses those via the ancestor chain. + # `prior_insights` and `findings` maps. Cross-scope refs must be written + # explicitly as `../id`, `../../id`, ... (matching `Input.from` / + # `Decision.from` convention) — `_validate_decisions` parses those via + # the ancestor chain. node_prior_insights = node.get("prior_insights") or {} + node_findings = node.get("findings") or {} errors.extend( _validate_decisions( node_decisions, @@ -356,6 +363,7 @@ def _validate_analysis_node( node_path, constraint_scope, ancestor_chain=ancestor_chain, + findings=node_findings, ) ) @@ -465,23 +473,30 @@ def _validate_decisions( path_prefix: str, constraint_scope: dict[str, Any] | None = None, ancestor_chain: list[dict[str, Any]] | None = None, + findings: dict[str, Any] | None = None, ) -> list[SemanticError]: """Validate a set of decisions at a given node. Args: prior_insights: Node-local ``prior_insights`` map. Bare-id - ``Option.insights`` refs resolve here. + ``Option.insights`` refs resolve against the union of this map + and ``findings``. constraint_scope: Decisions available for constraint resolution. Defaults to decisions themselves, but may include parent decisions for sub-analyses. ancestor_chain: Root-first chain of ancestor scopes for resolving - ``../id``-form ``Option.insights`` refs against ancestor - ``prior_insights``. Empty/None at the root. + ``../id``-form ``Option.insights`` refs against the union of + an ancestor's ``prior_insights`` and ``findings``. Empty/None + at the root. + findings: Node-local ``findings`` map. Bare-id ``Option.insights`` + refs may also resolve here (alongside ``prior_insights``). """ errors: list[SemanticError] = [] if constraint_scope is None: constraint_scope = decisions if ancestor_chain is None: ancestor_chain = [] + if findings is None: + findings = {} decisions_prefix = f"{path_prefix}.decisions" if path_prefix else "decisions" for decision_id, decision in decisions.items(): @@ -559,6 +574,7 @@ def _validate_decisions( prior_insights, ancestor_chain, f"{option_path}.insights[{i}]", + findings=findings, ) ) @@ -915,13 +931,20 @@ def _validate_option_insight_ref( prior_insights: dict[str, Any], ancestor_chain: list[dict[str, Any]], ref_path: str, + findings: dict[str, Any] | None = None, ) -> list[SemanticError]: """Validate a single ``Option.insights`` reference. - Bare id resolves against ``prior_insights`` (the node-local map); - ``../id``, ``../../id``, ... resolves against the corresponding - ancestor's ``prior_insights``. Mirrors the ``../`` grammar used by - ``Input.from`` and ``Decision.from``. + Bare id resolves against the union of the node-local ``prior_insights`` + and ``findings`` maps; ``../id``, ``../../id``, ... resolves against the + corresponding ancestor's ``prior_insights`` and ``findings`` (also + unioned). Mirrors the ``../`` grammar used by ``Input.from`` and + ``Decision.from``. + + Options may cite either prior_insights (literature/external claims that + motivate the choice) or findings (claims produced by this analysis that + in turn justify the choice), reflecting that both are evidence in the + Insight sense. """ def _error(message: str) -> list[SemanticError]: @@ -938,9 +961,12 @@ def _error(message: str) -> list[SemanticError]: ) insight_id = segments[0] + if findings is None: + findings = {} + if up == 0: - target_insights = prior_insights - scope_desc = "this node's prior_insights" + target_insights = {**prior_insights, **findings} + scope_desc = "this node's prior_insights or findings" else: target_scope = _resolve_ancestor_scope(ancestor_chain, up) if target_scope is None: @@ -948,8 +974,11 @@ def _error(message: str) -> list[SemanticError]: f"Option insight '{ref}' escapes {up} level(s) but only " f"{len(ancestor_chain)} ancestor scope(s) available" ) - target_insights = target_scope.get("prior_insights") or {} - scope_desc = f"{up}-level ancestor's prior_insights" + target_insights = { + **(target_scope.get("prior_insights") or {}), + **(target_scope.get("findings") or {}), + } + scope_desc = f"{up}-level ancestor's prior_insights or findings" if insight_id not in target_insights: return _error(f"Option insight '{ref}' not found in {scope_desc}") diff --git a/tests/fixtures/valid/option_insights_reference_finding.yaml b/tests/fixtures/valid/option_insights_reference_finding.yaml new file mode 100644 index 0000000..857576c --- /dev/null +++ b/tests/fixtures/valid/option_insights_reference_finding.yaml @@ -0,0 +1,86 @@ +# Option.insights may reference either prior_insights or findings. +# Prior insights motivate a choice via external/literature evidence; +# findings (claims produced by this analysis) can in turn justify a +# downstream choice. The semantic validator resolves Option.insights ids +# against the union of prior_insights and findings — at the node-local +# scope for bare ids and at an ancestor scope for `../id`-form refs. +version: "0.0.10" +name: "Option insights reference findings" + +inputs: + - id: catalog + type: data + source: "data/catalog.parquet" + +outputs: + - id: pilot_fit + type: data + inputs: [catalog] + decisions: [fit_form] + recipe: + command: python src/pilot.py {inputs.catalog} + - id: production_fit + from: production.fit_out + +prior_insights: + literature_motivation: + id: literature_motivation + claim: "Linear fits suffice in the regime we target." + created_at: "2026-05-15T00:00:00Z" + evidence: + - id: ev1 + doi: "10.1234/example.lit" + +findings: + pilot_residuals_flat: + id: pilot_residuals_flat + claim: "Pilot residuals showed no curvature signal." + created_at: "2026-05-15T00:00:00Z" + evidence: + - id: ev_pilot + artifact: pilot_fit + derived: true + +decisions: + fit_form: + label: "Fit form" + default: linear + options: + linear: + label: "Linear" + # Bare id resolving into prior_insights (literature motivation). + insights: [literature_motivation] + quadratic: + label: "Quadratic" + excluded: true + excluded_reason: "Ruled out by pilot residuals." + # Bare id resolving into findings (claim produced by this analysis). + insights: [pilot_residuals_flat] + +analyses: + production: + inputs: + - id: catalog + from: ../catalog + outputs: + - id: fit_out + type: data + inputs: [catalog] + decisions: [production_fit_form] + recipe: + command: python src/production.py {inputs.catalog} + decisions: + production_fit_form: + label: "Production fit form" + default: linear + options: + linear: + label: "Linear" + # `../id` resolving into a root-level finding. + insights: [../pilot_residuals_flat] + quadratic: + label: "Quadratic" + excluded: true + excluded_reason: "Same as the pilot decision." + # `../id` resolving into a root-level prior_insight. + insights: [../literature_motivation] diff --git a/tests/test_validation.py b/tests/test_validation.py index d0a8650..eff1764 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -102,6 +102,20 @@ def test_insight_ref_escapes_too_far(self, invalid_dir: Path): assert len(bad) == 1, f"expected 1 INVALID_INSIGHT_REF, got: {bad}" assert "escapes" in bad[0].message + def test_option_insights_resolve_against_findings(self, valid_dir: Path): + """`Option.insights` may reference findings (not only prior_insights), + at both the node-local scope (bare id) and at ancestor scopes + (``../id``). Findings are claims produced by the analysis and are + valid evidence for a downstream option, mirroring how prior_insights + cite external claims. + """ + errors = validate_analysis_file(valid_dir / "option_insights_reference_finding.yaml") + insight_errors = [e for e in errors if e.code == "INVALID_INSIGHT_REF"] + assert insight_errors == [], ( + "Bare-id and `../`-form refs into `findings` should both " + f"resolve; got: {insight_errors}" + ) + def test_invalid_finding_output(self, invalid_dir: Path): errors = validate_analysis_file(invalid_dir / "invalid_finding_output.yaml") assert any(e.code == "INVALID_ARTIFACT_REF" for e in errors)