Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 44 additions & 15 deletions src/astra/validation/semantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ def validate_analysis(data: dict[str, Any], base_path: Path | None = None) -> li
inputs = data.get("inputs") or []
outputs = data.get("outputs") or []
prior_insights = data.get("prior_insights") or {}
findings = data.get("findings") or {}

# Check for duplicate input IDs
input_ids: set[str] = set()
Expand Down Expand Up @@ -196,7 +197,11 @@ def validate_analysis(data: dict[str, Any], base_path: Path | None = None) -> li
root_decisions = _collect_node_decisions(data)

# Validate all decisions
errors.extend(_validate_decisions(root_decisions, prior_insights, "", ancestor_chain=[]))
errors.extend(
_validate_decisions(
root_decisions, prior_insights, "", ancestor_chain=[], findings=findings
)
)

# Validate evidence artifact references in prior_insights and findings
errors.extend(
Expand Down Expand Up @@ -345,17 +350,20 @@ def _validate_analysis_node(
if segments[0] in target_decisions:
constraint_scope[decision_id] = target_decisions[segments[0]]
# `Option.insights` resolves only against this node's own
# `prior_insights` map. Cross-scope refs must be written explicitly
# as `../id`, `../../id`, ... (matching `Input.from` / `Decision.from`
# convention) — `_validate_decisions` parses those via the ancestor chain.
# `prior_insights` and `findings` maps. Cross-scope refs must be written
# explicitly as `../id`, `../../id`, ... (matching `Input.from` /
# `Decision.from` convention) — `_validate_decisions` parses those via
# the ancestor chain.
node_prior_insights = node.get("prior_insights") or {}
node_findings = node.get("findings") or {}
errors.extend(
_validate_decisions(
node_decisions,
node_prior_insights,
node_path,
constraint_scope,
ancestor_chain=ancestor_chain,
findings=node_findings,
)
)

Expand Down Expand Up @@ -465,23 +473,30 @@ def _validate_decisions(
path_prefix: str,
constraint_scope: dict[str, Any] | None = None,
ancestor_chain: list[dict[str, Any]] | None = None,
findings: dict[str, Any] | None = None,
) -> list[SemanticError]:
"""Validate a set of decisions at a given node.

Args:
prior_insights: Node-local ``prior_insights`` map. Bare-id
``Option.insights`` refs resolve here.
``Option.insights`` refs resolve against the union of this map
and ``findings``.
constraint_scope: Decisions available for constraint resolution. Defaults to
decisions themselves, but may include parent decisions for sub-analyses.
ancestor_chain: Root-first chain of ancestor scopes for resolving
``../id``-form ``Option.insights`` refs against ancestor
``prior_insights``. Empty/None at the root.
``../id``-form ``Option.insights`` refs against the union of
an ancestor's ``prior_insights`` and ``findings``. Empty/None
at the root.
findings: Node-local ``findings`` map. Bare-id ``Option.insights``
refs may also resolve here (alongside ``prior_insights``).
"""
errors: list[SemanticError] = []
if constraint_scope is None:
constraint_scope = decisions
if ancestor_chain is None:
ancestor_chain = []
if findings is None:
findings = {}

decisions_prefix = f"{path_prefix}.decisions" if path_prefix else "decisions"
for decision_id, decision in decisions.items():
Expand Down Expand Up @@ -559,6 +574,7 @@ def _validate_decisions(
prior_insights,
ancestor_chain,
f"{option_path}.insights[{i}]",
findings=findings,
)
)

Expand Down Expand Up @@ -915,13 +931,20 @@ def _validate_option_insight_ref(
prior_insights: dict[str, Any],
ancestor_chain: list[dict[str, Any]],
ref_path: str,
findings: dict[str, Any] | None = None,
) -> list[SemanticError]:
"""Validate a single ``Option.insights`` reference.

Bare id resolves against ``prior_insights`` (the node-local map);
``../id``, ``../../id``, ... resolves against the corresponding
ancestor's ``prior_insights``. Mirrors the ``../`` grammar used by
``Input.from`` and ``Decision.from``.
Bare id resolves against the union of the node-local ``prior_insights``
and ``findings`` maps; ``../id``, ``../../id``, ... resolves against the
corresponding ancestor's ``prior_insights`` and ``findings`` (also
unioned). Mirrors the ``../`` grammar used by ``Input.from`` and
``Decision.from``.

Options may cite either prior_insights (literature/external claims that
motivate the choice) or findings (claims produced by this analysis that
in turn justify the choice), reflecting that both are evidence in the
Insight sense.
"""

def _error(message: str) -> list[SemanticError]:
Expand All @@ -938,18 +961,24 @@ def _error(message: str) -> list[SemanticError]:
)
insight_id = segments[0]

if findings is None:
findings = {}

if up == 0:
target_insights = prior_insights
scope_desc = "this node's prior_insights"
target_insights = {**prior_insights, **findings}
scope_desc = "this node's prior_insights or findings"
else:
target_scope = _resolve_ancestor_scope(ancestor_chain, up)
if target_scope is None:
return _error(
f"Option insight '{ref}' escapes {up} level(s) but only "
f"{len(ancestor_chain)} ancestor scope(s) available"
)
target_insights = target_scope.get("prior_insights") or {}
scope_desc = f"{up}-level ancestor's prior_insights"
target_insights = {
**(target_scope.get("prior_insights") or {}),
**(target_scope.get("findings") or {}),
}
scope_desc = f"{up}-level ancestor's prior_insights or findings"

if insight_id not in target_insights:
return _error(f"Option insight '{ref}' not found in {scope_desc}")
Expand Down
86 changes: 86 additions & 0 deletions tests/fixtures/valid/option_insights_reference_finding.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# Option.insights may reference either prior_insights or findings.
# Prior insights motivate a choice via external/literature evidence;
# findings (claims produced by this analysis) can in turn justify a
# downstream choice. The semantic validator resolves Option.insights ids
# against the union of prior_insights and findings — at the node-local
# scope for bare ids and at an ancestor scope for `../id`-form refs.
version: "0.0.10"
name: "Option insights reference findings"

inputs:
- id: catalog
type: data
source: "data/catalog.parquet"

outputs:
- id: pilot_fit
type: data
inputs: [catalog]
decisions: [fit_form]
recipe:
command: python src/pilot.py {inputs.catalog}
- id: production_fit
from: production.fit_out

prior_insights:
literature_motivation:
id: literature_motivation
claim: "Linear fits suffice in the regime we target."
created_at: "2026-05-15T00:00:00Z"
evidence:
- id: ev1
doi: "10.1234/example.lit"

findings:
pilot_residuals_flat:
id: pilot_residuals_flat
claim: "Pilot residuals showed no curvature signal."
created_at: "2026-05-15T00:00:00Z"
evidence:
- id: ev_pilot
artifact: pilot_fit
derived: true

decisions:
fit_form:
label: "Fit form"
default: linear
options:
linear:
label: "Linear"
# Bare id resolving into prior_insights (literature motivation).
insights: [literature_motivation]
quadratic:
label: "Quadratic"
excluded: true
excluded_reason: "Ruled out by pilot residuals."
# Bare id resolving into findings (claim produced by this analysis).
insights: [pilot_residuals_flat]

analyses:
production:
inputs:
- id: catalog
from: ../catalog
outputs:
- id: fit_out
type: data
inputs: [catalog]
decisions: [production_fit_form]
recipe:
command: python src/production.py {inputs.catalog}
decisions:
production_fit_form:
label: "Production fit form"
default: linear
options:
linear:
label: "Linear"
# `../id` resolving into a root-level finding.
insights: [../pilot_residuals_flat]
quadratic:
label: "Quadratic"
excluded: true
excluded_reason: "Same as the pilot decision."
# `../id` resolving into a root-level prior_insight.
insights: [../literature_motivation]
14 changes: 14 additions & 0 deletions tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,20 @@ def test_insight_ref_escapes_too_far(self, invalid_dir: Path):
assert len(bad) == 1, f"expected 1 INVALID_INSIGHT_REF, got: {bad}"
assert "escapes" in bad[0].message

def test_option_insights_resolve_against_findings(self, valid_dir: Path):
"""`Option.insights` may reference findings (not only prior_insights),
at both the node-local scope (bare id) and at ancestor scopes
(``../id``). Findings are claims produced by the analysis and are
valid evidence for a downstream option, mirroring how prior_insights
cite external claims.
"""
errors = validate_analysis_file(valid_dir / "option_insights_reference_finding.yaml")
insight_errors = [e for e in errors if e.code == "INVALID_INSIGHT_REF"]
assert insight_errors == [], (
"Bare-id and `../`-form refs into `findings` should both "
f"resolve; got: {insight_errors}"
)

def test_invalid_finding_output(self, invalid_dir: Path):
errors = validate_analysis_file(invalid_dir / "invalid_finding_output.yaml")
assert any(e.code == "INVALID_ARTIFACT_REF" for e in errors)
Expand Down
Loading