AutoX-AI-Labs · Zefan-Cai · Mar 31, 2026
diff --git a/src/manager.py b/src/manager.py
@@ -28,6 +28,7 @@
     StageSpec,
     append_approved_stage_summary,
     append_log_entry,
+    build_handoff_context,
     build_continuation_prompt,
     build_prompt,
     build_run_paths,
@@ -45,6 +46,7 @@
     truncate_text,
     validate_stage_artifacts,
     validate_stage_markdown,
+    write_stage_handoff,
     write_text,
 )
 
@@ -415,6 +417,7 @@ def _run_stage(self, paths: RunPaths, stage: StageSpec) -> bool:
                     attempt_no,
                     self._stage_file_paths(stage_markdown),
                 )
+                write_stage_handoff(paths, stage, stage_markdown)
                 append_log_entry(
                     paths.logs,
                     f"{stage.slug} approved",
@@ -441,6 +444,7 @@ def _build_stage_prompt(
     ) -> str:
         template = load_prompt_template(self.prompt_dir, stage)
         stage_template = format_stage_template(template, stage, paths)
+        handoff_context = build_handoff_context(paths, upto_stage=stage)
         stage_template = (
             stage_template.rstrip()
             + "\n\n## Run Configuration\n\n"
@@ -456,11 +460,11 @@ def _build_stage_prompt(
                 + "\n"
             )
         if continue_session:
-            return build_continuation_prompt(stage, stage_template, paths, revision_feedback)
+            return build_continuation_prompt(stage, stage_template, paths, handoff_context, revision_feedback)
 
         user_request = read_text(paths.user_input)
         approved_memory = read_text(paths.memory)
-        return build_prompt(stage, stage_template, user_request, approved_memory, revision_feedback)
+        return build_prompt(stage, stage_template, user_request, approved_memory, handoff_context, revision_feedback)
 
     def _display_stage_output(self, stage: StageSpec, markdown: str) -> None:
         divider = "=" * 80

diff --git a/src/utils.py b/src/utils.py
@@ -39,6 +39,7 @@ class RunPaths:
     prompt_cache_dir: Path
     operator_state_dir: Path
     stages_dir: Path
+    handoff_dir: Path
     workspace_root: Path
     literature_dir: Path
     code_dir: Path
@@ -155,6 +156,7 @@ def build_run_paths(run_root: Path) -> RunPaths:
         prompt_cache_dir=run_root / "prompt_cache",
         operator_state_dir=run_root / "operator_state",
         stages_dir=run_root / "stages",
+        handoff_dir=run_root / "handoff",
         workspace_root=workspace_root,
         literature_dir=workspace_root / "literature",
         code_dir=workspace_root / "code",
@@ -173,6 +175,7 @@ def ensure_run_layout(paths: RunPaths) -> None:
     paths.prompt_cache_dir.mkdir(parents=True, exist_ok=True)
     paths.operator_state_dir.mkdir(parents=True, exist_ok=True)
     paths.stages_dir.mkdir(parents=True, exist_ok=True)
+    paths.handoff_dir.mkdir(parents=True, exist_ok=True)
     paths.workspace_root.mkdir(parents=True, exist_ok=True)
 
     for directory in workspace_dirs(paths):
@@ -387,6 +390,7 @@ def build_prompt(
     stage_template: str,
     user_request: str,
     approved_memory: str,
+    handoff_context: str,
     revision_feedback: str | None,
 ) -> str:
     sections = [
@@ -415,6 +419,8 @@ def build_prompt(
         user_request.strip(),
         "# Approved Memory",
         approved_memory.strip() or "_None yet._",
+        "# Stage Handoff Context",
+        handoff_context.strip() or "No stage handoff summaries available yet.",
         "# Revision Feedback",
         revision_feedback.strip() if revision_feedback else "None.",
     ]
@@ -425,6 +431,7 @@ def build_continuation_prompt(
     stage: StageSpec,
     stage_template: str,
     paths: RunPaths,
+    handoff_context: str,
     revision_feedback: str | None,
 ) -> str:
     current_draft = paths.stage_tmp_file(stage)
@@ -450,13 +457,16 @@ def build_continuation_prompt(
             f"1. Read the current draft at `{current_draft.resolve()}` if it exists.\n"
             f"2. Read the last promoted stage summary at `{current_final.resolve()}` if it exists.\n"
             f"3. Read approved memory from `{paths.memory.resolve()}` and the original user goal from `{paths.user_input.resolve()}` if needed.\n"
+            f"4. Read prior handoff summaries under `{paths.handoff_dir.resolve()}` when they exist.\n"
             f"4. Treat workspace artifacts already under `{paths.workspace_root.resolve()}` as part of the current stage context and reuse them.\n"
             "5. Preserve all valid work already completed in this stage unless the new feedback requires changing it.\n"
             "6. Fill the missing pieces, fix weak points, and update the stage summary instead of throwing away correct work.\n"
             "7. Overwrite only the draft stage output path once you are ready to produce the updated complete summary.\n"
             "8. Do not leave placeholder text such as [In progress], [Pending], [TODO], [TBD], or similar unfinished markers.\n"
             "9. If the existing stage work is partially correct, keep the correct parts and extend them rather than replacing them blindly."
         ),
+        "# Stage Handoff Context",
+        handoff_context.strip() or "No stage handoff summaries available yet.",
         "# New Feedback",
         revision_feedback.strip()
         if revision_feedback
@@ -723,6 +733,35 @@ def extract_path_references(text: str) -> list[str]:
     return paths
 
 
+def write_stage_handoff(paths: RunPaths, stage: StageSpec, stage_markdown: str) -> Path:
+    handoff_path = paths.handoff_dir / f"{stage.slug}.md"
+    objective = extract_markdown_section(stage_markdown, "Objective") or "Not provided."
+    key_results = extract_markdown_section(stage_markdown, "Key Results") or "Not provided."
+    files_produced = extract_markdown_section(stage_markdown, "Files Produced") or "Not provided."
+    write_text(
+        handoff_path,
+        (
+            f"# Handoff: {stage.stage_title}\n\n"
+            "## Objective\n"
+            f"{objective}\n\n"
+            "## Key Results\n"
+            f"{key_results}\n\n"
+            "## Files Produced\n"
+            f"{files_produced}\n"
+        ),
+    )
+    return handoff_path
+
+
+def build_handoff_context(paths: RunPaths, upto_stage: StageSpec | None = None, max_stages: int = 4) -> str:
+    handoffs = sorted(path for path in paths.handoff_dir.glob("*.md") if path.is_file())
+    if upto_stage is not None:
+        handoffs = [path for path in handoffs if path.stem < upto_stage.slug]
+    handoffs = handoffs[-max_stages:]
+    parts = [read_text(path).strip() for path in handoffs if path.exists()]
+    return "\n\n".join(parts).strip() or "No stage handoff summaries available yet."
+
+
 def _extract_path_references(text: str) -> list[str]:
     seen: set[str] = set()
     paths: list[str] = []

diff --git a/tests/test_stage_handoff.py b/tests/test_stage_handoff.py
@@ -0,0 +1,87 @@
+from __future__ import annotations
+
+import tempfile
+import unittest
+from pathlib import Path
+
+from src.utils import STAGES, build_continuation_prompt, build_handoff_context, build_run_paths, ensure_run_layout, write_stage_handoff
+
+
+class StageHandoffTests(unittest.TestCase):
+    def test_write_stage_handoff_and_prompt_context(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            run_root = Path(tmp_dir) / "run"
+            paths = build_run_paths(run_root)
+            ensure_run_layout(paths)
+            stage = STAGES[0]
+            stage_markdown = (
+                "# Stage 01: Literature Survey\n\n"
+                "## Objective\nObjective.\n\n"
+                "## Previously Approved Stage Summaries\n_None yet._\n\n"
+                "## What I Did\nDid work.\n\n"
+                "## Key Results\nKey result.\n\n"
+                "## Files Produced\n- `workspace/literature/evidence.md`\n\n"
+                "## Suggestions for Refinement\n"
+                "1. Refine one.\n2. Refine two.\n3. Refine three.\n\n"
+                "## Your Options\n"
+                "1. Use suggestion 1\n2. Use suggestion 2\n3. Use suggestion 3\n4. Refine with your own feedback\n5. Approve and continue\n6. Abort\n"
+            )
+            write_stage_handoff(paths, stage, stage_markdown)
+            handoff_context = build_handoff_context(paths, upto_stage=STAGES[1])
+            prompt = build_continuation_prompt(
+                stage=STAGES[1],
+                stage_template="Stage template body",
+                paths=paths,
+                handoff_context=handoff_context,
+                revision_feedback=None,
+            )
+
+            self.assertIn("Handoff: Stage 01: Literature Survey", handoff_context)
+            self.assertIn("# Stage Handoff Context", prompt)
+            self.assertIn("Handoff: Stage 01: Literature Survey", prompt)
+
+    def test_build_handoff_context_collects_multiple_stage_files(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            run_root = Path(tmp_dir) / "run"
+            paths = build_run_paths(run_root)
+            ensure_run_layout(paths)
+            write_stage_handoff(
+                paths,
+                STAGES[0],
+                (
+                    "# Stage 01: Literature Survey\n\n"
+                    "## Objective\nObjective A.\n\n"
+                    "## Previously Approved Stage Summaries\n_None yet._\n\n"
+                    "## What I Did\nDid work.\n\n"
+                    "## Key Results\nKey A.\n\n"
+                    "## Files Produced\n- `workspace/literature/a.md`\n\n"
+                    "## Suggestions for Refinement\n"
+                    "1. Refine one.\n2. Refine two.\n3. Refine three.\n\n"
+                    "## Your Options\n"
+                    "1. Use suggestion 1\n2. Use suggestion 2\n3. Use suggestion 3\n4. Refine with your own feedback\n5. Approve and continue\n6. Abort\n"
+                ),
+            )
+            write_stage_handoff(
+                paths,
+                STAGES[1],
+                (
+                    "# Stage 02: Hypothesis Generation\n\n"
+                    "## Objective\nObjective B.\n\n"
+                    "## Previously Approved Stage Summaries\n_None yet._\n\n"
+                    "## What I Did\nDid work.\n\n"
+                    "## Key Results\nKey B.\n\n"
+                    "## Files Produced\n- `workspace/notes/b.md`\n\n"
+                    "## Suggestions for Refinement\n"
+                    "1. Refine one.\n2. Refine two.\n3. Refine three.\n\n"
+                    "## Your Options\n"
+                    "1. Use suggestion 1\n2. Use suggestion 2\n3. Use suggestion 3\n4. Refine with your own feedback\n5. Approve and continue\n6. Abort\n"
+                ),
+            )
+
+            handoff_context = build_handoff_context(paths, upto_stage=STAGES[2])
+            self.assertIn("Handoff: Stage 01: Literature Survey", handoff_context)
+            self.assertIn("Handoff: Stage 02: Hypothesis Generation", handoff_context)
+
+
+if __name__ == "__main__":
+    unittest.main()