Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ Workspace directories:
- `code/`: runnable pipeline code, scripts, configs, and method implementations.
- `data/`: machine-readable datasets, manifests, processed splits, caches, and loaders.
- `results/`: machine-readable metrics, predictions, ablations, tables, and evaluation outputs.
AutoR also standardizes `results/experiment_manifest.json` as a machine-readable summary over result, code, and note artifacts for downstream analysis.
- `writing/`: manuscript sources, LaTeX, section drafts, tables, and bibliography.
- `figures/`: plots, diagrams, charts, and paper figures.
- `artifacts/`: compiled PDFs and packaged deliverables.
Expand Down Expand Up @@ -374,6 +375,7 @@ Artifact requirements by stage:

- Stage 03+: machine-readable data under `workspace/data/`
- Stage 05+: machine-readable results under `workspace/results/`
- Stage 05+: `workspace/results/experiment_manifest.json` must exist and remain structurally valid
- Stage 06+: figure files under `workspace/figures/`
- Stage 07+: venue-aware conference or journal-style LaTeX sources plus a compiled PDF under `workspace/writing/` or `workspace/artifacts/`
- Stage 08+: review and readiness artifacts under `workspace/reviews/`
Expand Down
2 changes: 2 additions & 0 deletions src/artifact_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ def _scan_artifacts(paths: RunPaths) -> list[ArtifactRecord]:
continue
if path.name.endswith(".schema.json"):
continue
if category == "results" and path.name == "experiment_manifest.json":
continue
stat = path.stat()
records.append(
ArtifactRecord(
Expand Down
187 changes: 187 additions & 0 deletions src/experiment_manifest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
from __future__ import annotations

import json
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path

from .artifact_index import indexed_artifacts_for_category, write_artifact_index
from .utils import RunPaths


@dataclass(frozen=True)
class ExperimentManifest:
generated_at: str
ready_for_analysis: bool
result_artifacts: list[dict[str, object]]
code_artifacts: list[str]
note_artifacts: list[str]
summary: dict[str, int]

def to_dict(self) -> dict[str, object]:
return {
"generated_at": self.generated_at,
"ready_for_analysis": self.ready_for_analysis,
"result_artifacts": self.result_artifacts,
"code_artifacts": self.code_artifacts,
"note_artifacts": self.note_artifacts,
"summary": self.summary,
}

@classmethod
def from_dict(cls, payload: dict[str, object]) -> "ExperimentManifest":
return cls(
generated_at=str(payload.get("generated_at", "")).strip(),
ready_for_analysis=bool(payload.get("ready_for_analysis", False)),
result_artifacts=[
dict(item)
for item in payload.get("result_artifacts", [])
if isinstance(item, dict)
],
code_artifacts=[
str(item)
for item in payload.get("code_artifacts", [])
if str(item).strip()
],
note_artifacts=[
str(item)
for item in payload.get("note_artifacts", [])
if str(item).strip()
],
summary={
str(key): int(value)
for key, value in dict(payload.get("summary", {})).items()
},
)


def write_experiment_manifest(paths: RunPaths) -> ExperimentManifest:
artifact_index = write_artifact_index(paths)
result_artifacts = [
artifact
for artifact in indexed_artifacts_for_category(artifact_index, "results")
if artifact.get("rel_path") != "results/experiment_manifest.json"
]
code_artifacts = _list_relative_files(paths.code_dir, paths.workspace_root)
note_artifacts = _list_relative_files(paths.notes_dir, paths.workspace_root)
manifest = ExperimentManifest(
generated_at=datetime.now().isoformat(timespec="seconds"),
ready_for_analysis=bool(result_artifacts),
result_artifacts=result_artifacts,
code_artifacts=code_artifacts,
note_artifacts=note_artifacts,
summary={
"result_artifact_count": len(result_artifacts),
"code_artifact_count": len(code_artifacts),
"note_artifact_count": len(note_artifacts),
},
)
paths.experiment_manifest.write_text(
json.dumps(manifest.to_dict(), indent=2, ensure_ascii=True) + "\n",
encoding="utf-8",
)
return manifest


def load_experiment_manifest(path: Path) -> ExperimentManifest | None:
if not path.exists():
return None
payload = json.loads(path.read_text(encoding="utf-8"))
return ExperimentManifest.from_dict(payload)


def validate_experiment_manifest(path: Path) -> list[str]:
manifest = load_experiment_manifest(path)
if manifest is None:
return ["Missing experiment_manifest.json."]

problems: list[str] = []
if not manifest.generated_at:
problems.append("experiment_manifest.json is missing generated_at.")
if "result_artifact_count" not in manifest.summary:
problems.append("experiment_manifest.json is missing summary.result_artifact_count.")
if "code_artifact_count" not in manifest.summary:
problems.append("experiment_manifest.json is missing summary.code_artifact_count.")
if "note_artifact_count" not in manifest.summary:
problems.append("experiment_manifest.json is missing summary.note_artifact_count.")
if not isinstance(manifest.ready_for_analysis, bool):
problems.append("experiment_manifest.json must contain a boolean ready_for_analysis field.")

for artifact in manifest.result_artifacts:
rel_path = str(artifact.get("rel_path", "")).strip()
if not rel_path:
problems.append("experiment_manifest.json contains a result artifact without rel_path.")
continue
schema = artifact.get("schema")
if not isinstance(schema, dict):
problems.append(
f"experiment_manifest.json result artifact `{rel_path}` is missing schema metadata."
)

return problems


def format_experiment_manifest_for_prompt(manifest: ExperimentManifest, max_results: int = 5) -> str:
lines = [
f"Experiment manifest generated at: {manifest.generated_at}",
f"Ready for analysis: {'yes' if manifest.ready_for_analysis else 'no'}",
(
"Summary: "
f"{manifest.summary['result_artifact_count']} result artifacts, "
f"{manifest.summary['code_artifact_count']} code artifacts, "
f"{manifest.summary['note_artifact_count']} note artifacts"
),
]

if manifest.result_artifacts:
lines.append("\n### Result Artifacts")
for artifact in manifest.result_artifacts[:max_results]:
rel_path = str(artifact.get("rel_path", "")).strip()
schema = artifact.get("schema", {})
summary = _format_schema(schema)
line = f"- `{rel_path}`"
if summary:
line += f" | {summary}"
lines.append(line)

if manifest.code_artifacts:
lines.append("\n### Supporting Code")
for rel_path in manifest.code_artifacts[:max_results]:
lines.append(f"- `{rel_path}`")

if manifest.note_artifacts:
lines.append("\n### Experiment Notes")
for rel_path in manifest.note_artifacts[:max_results]:
lines.append(f"- `{rel_path}`")

return "\n".join(lines)


def _list_relative_files(directory: Path, workspace_root: Path) -> list[str]:
if not directory.exists():
return []
return sorted(
str(path.relative_to(workspace_root))
for path in directory.rglob("*")
if path.is_file()
)


def _format_schema(schema: object) -> str:
if not isinstance(schema, dict) or not schema:
return ""

pieces: list[str] = []
kind = str(schema.get("kind") or schema.get("source") or "").strip()
if kind:
pieces.append(kind)
if isinstance(schema.get("columns"), list) and schema["columns"]:
pieces.append("columns=" + ", ".join(str(item) for item in schema["columns"][:6]))
if isinstance(schema.get("keys"), list) and schema["keys"]:
pieces.append("keys=" + ", ".join(str(item) for item in schema["keys"][:6]))
if "row_count" in schema:
pieces.append(f"rows={schema['row_count']}")
if "item_count" in schema:
pieces.append(f"items={schema['item_count']}")

return ", ".join(pieces)
18 changes: 17 additions & 1 deletion src/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import TextIO

from .artifact_index import format_artifact_index_for_prompt, write_artifact_index
from .experiment_manifest import format_experiment_manifest_for_prompt, write_experiment_manifest
from .manifest import (
ensure_run_manifest,
format_manifest_status,
Expand Down Expand Up @@ -146,6 +147,7 @@ def _create_run(self, user_goal: str, venue: str | None = None) -> RunPaths:
config = initialize_run_config(paths, model=self.operator.model, venue=venue)
initialize_run_manifest(paths)
write_artifact_index(paths)
write_experiment_manifest(paths)
append_log_entry(paths.logs, "run_start", f"Run root: {paths.run_root}")
append_log_entry(
paths.logs,
Expand Down Expand Up @@ -421,10 +423,15 @@ def _run_stage(self, paths: RunPaths, stage: StageSpec) -> bool:
)
write_stage_handoff(paths, stage, stage_markdown)
write_artifact_index(paths)
write_experiment_manifest(paths)
append_log_entry(
paths.logs,
f"{stage.slug} approved",
f"Stage approved and appended to memory.\nUpdated artifact index: {paths.artifact_index}",
(
"Stage approved and appended to memory.\n"
f"Updated artifact index: {paths.artifact_index}\n"
f"Updated experiment manifest: {paths.experiment_manifest}"
),
)
self._print(f"Approved {stage.stage_title}.")
return True
Expand Down Expand Up @@ -462,6 +469,15 @@ def _build_stage_prompt(
+ format_artifact_index_for_prompt(artifact_index)
+ "\n"
)
if stage.number >= 5:
experiment_manifest = write_experiment_manifest(paths)
stage_template = (
stage_template.rstrip()
+ "\n\n## Experiment Bundle Manifest\n\n"
+ f"Standard experiment manifest: `{paths.experiment_manifest.resolve()}`\n\n"
+ format_experiment_manifest_for_prompt(experiment_manifest)
+ "\n"
)
if stage.slug == "07_writing":
manifest = build_writing_manifest(paths)
stage_template = (
Expand Down
2 changes: 2 additions & 0 deletions src/prompts/05_experimentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Run or define credible experiments that test the approved hypotheses using the i
- Put experiment scripts and run configs under `{{WORKSPACE_CODE_DIR}}` when needed.
- Put raw or processed outputs under `{{WORKSPACE_RESULTS_DIR}}`.
- Store machine-readable result artifacts such as `.json`, `.jsonl`, `.csv`, `.tsv`, `.parquet`, `.npy`, or `.npz` under `{{WORKSPACE_RESULTS_DIR}}`; markdown alone is not sufficient.
- Keep `{{WORKSPACE_RESULTS_DIR}}/experiment_manifest.json` aligned with the current experiment bundle so downstream analysis can consume a stable machine-readable summary.
- Put experiment logs, notes, and exception handling details under `{{WORKSPACE_NOTES_DIR}}`.
- The stage summary draft for the current attempt must be written to `{{STAGE_OUTPUT_PATH}}`.
- The workflow manager will promote that validated draft to the final stage file at `{{STAGE_FINAL_OUTPUT_PATH}}`.
Expand Down Expand Up @@ -49,5 +50,6 @@ Additional expectations for this stage:
- Do not fabricate results.
- If results are simulated, partial, or blocked, say so explicitly.
- Do not treat a prose results summary as sufficient experimentation output when raw/processed result files can be written.
- Do not leave `experiment_manifest.json` missing or stale relative to the current result artifacts.
- Do not control workflow progression.
- Do not write outside the current run directory.
1 change: 1 addition & 0 deletions src/prompts/06_analysis.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Interpret the available evidence rigorously and determine what claims the curren
- Put analysis notes, evaluation breakdowns, and interpretive documents under `{{WORKSPACE_RESULTS_DIR}}` or `{{WORKSPACE_NOTES_DIR}}`.
- Put figures, plots, or tables created for interpretation under `{{WORKSPACE_FIGURES_DIR}}` or `{{WORKSPACE_RESULTS_DIR}}`.
- Create real figure files (`.png`, `.pdf`, `.svg`, `.jpg`) under `{{WORKSPACE_FIGURES_DIR}}`; textual descriptions of figures are not sufficient.
- Read `{{WORKSPACE_RESULTS_DIR}}/experiment_manifest.json` before drawing conclusions so analysis tracks the actual standardized experiment bundle.
- The stage summary draft for the current attempt must be written to `{{STAGE_OUTPUT_PATH}}`.
- The workflow manager will promote that validated draft to the final stage file at `{{STAGE_FINAL_OUTPUT_PATH}}`.

Expand Down
11 changes: 11 additions & 0 deletions src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class RunPaths:
code_dir: Path
data_dir: Path
results_dir: Path
experiment_manifest: Path
writing_dir: Path
figures_dir: Path
artifacts_dir: Path
Expand Down Expand Up @@ -164,6 +165,7 @@ def build_run_paths(run_root: Path) -> RunPaths:
code_dir=workspace_root / "code",
data_dir=workspace_root / "data",
results_dir=workspace_root / "results",
experiment_manifest=workspace_root / "results" / "experiment_manifest.json",
writing_dir=workspace_root / "writing",
figures_dir=workspace_root / "figures",
artifacts_dir=workspace_root / "artifacts",
Expand Down Expand Up @@ -593,6 +595,15 @@ def validate_stage_artifacts(stage: StageSpec, paths: RunPaths) -> list[str]:
problems.append(
f"{stage.stage_title} requires machine-readable result artifacts under workspace/results."
)
if not paths.experiment_manifest.exists():
problems.append(
f"{stage.stage_title} requires experiment_manifest.json under workspace/results."
)
else:
from .experiment_manifest import validate_experiment_manifest

for problem in validate_experiment_manifest(paths.experiment_manifest):
problems.append(f"{stage.stage_title}: {problem}")

if stage.number >= 6:
if _count_files_with_suffixes(paths.figures_dir, FIGURE_SUFFIXES) == 0:
Expand Down
64 changes: 64 additions & 0 deletions tests/test_experiment_manifest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from __future__ import annotations

import tempfile
import unittest
from pathlib import Path

from src.experiment_manifest import (
format_experiment_manifest_for_prompt,
load_experiment_manifest,
write_experiment_manifest,
)
from src.utils import STAGES, build_run_paths, ensure_run_layout, validate_stage_artifacts, write_text


STAGE_05 = next(stage for stage in STAGES if stage.slug == "05_experimentation")


class ExperimentManifestTests(unittest.TestCase):
def _build_paths(self) -> object:
tmp_dir = tempfile.TemporaryDirectory()
self.addCleanup(tmp_dir.cleanup)
run_root = Path(tmp_dir.name) / "run"
paths = build_run_paths(run_root)
ensure_run_layout(paths)
return paths

def test_write_experiment_manifest_collects_results_code_and_notes(self) -> None:
paths = self._build_paths()
write_text(paths.data_dir / "design.json", '{"task":"demo"}')
write_text(paths.code_dir / "train.py", "print('train')\n")
write_text(paths.notes_dir / "experiment_note.md", "# Note\n")
write_text(paths.results_dir / "scores.csv", "step,score\n1,0.7\n2,0.8\n")

manifest = write_experiment_manifest(paths)
self.assertTrue(manifest.ready_for_analysis)
self.assertEqual(manifest.summary["result_artifact_count"], 1)
self.assertEqual(manifest.summary["code_artifact_count"], 1)
self.assertEqual(manifest.summary["note_artifact_count"], 1)

loaded = load_experiment_manifest(paths.experiment_manifest)
self.assertIsNotNone(loaded)
assert loaded is not None
self.assertEqual(loaded.result_artifacts[0]["rel_path"], "results/scores.csv")
self.assertEqual(loaded.result_artifacts[0]["schema"]["row_count"], 2)

prompt_context = format_experiment_manifest_for_prompt(loaded)
self.assertIn("results/scores.csv", prompt_context)
self.assertIn("code/train.py", prompt_context)
self.assertIn("notes/experiment_note.md", prompt_context)

def test_stage05_validation_requires_experiment_manifest(self) -> None:
paths = self._build_paths()
write_text(paths.data_dir / "design.json", '{"task":"demo"}')
write_text(paths.results_dir / "scores.csv", "step,score\n1,0.7\n")

problems = validate_stage_artifacts(STAGE_05, paths)
self.assertTrue(any("experiment_manifest.json" in problem for problem in problems))

write_experiment_manifest(paths)
self.assertEqual(validate_stage_artifacts(STAGE_05, paths), [])


if __name__ == "__main__":
unittest.main()
2 changes: 2 additions & 0 deletions tests/test_writing_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import unittest
from pathlib import Path

from src.experiment_manifest import write_experiment_manifest
from src.utils import (
DEFAULT_VENUE,
STAGES,
Expand Down Expand Up @@ -41,6 +42,7 @@ def _build_paths(self) -> tuple[Path, object]:
return run_root, paths

def _populate_valid_stage07_outputs(self, paths: object) -> None:
write_experiment_manifest(paths)
sections_dir = paths.writing_dir / "sections"
sections_dir.mkdir(parents=True, exist_ok=True)

Expand Down