Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
StageSpec,
append_approved_stage_summary,
append_log_entry,
build_handoff_context,
build_continuation_prompt,
build_prompt,
build_run_paths,
Expand All @@ -45,6 +46,7 @@
truncate_text,
validate_stage_artifacts,
validate_stage_markdown,
write_stage_handoff,
write_text,
)

Expand Down Expand Up @@ -415,6 +417,7 @@ def _run_stage(self, paths: RunPaths, stage: StageSpec) -> bool:
attempt_no,
self._stage_file_paths(stage_markdown),
)
write_stage_handoff(paths, stage, stage_markdown)
append_log_entry(
paths.logs,
f"{stage.slug} approved",
Expand All @@ -441,6 +444,7 @@ def _build_stage_prompt(
) -> str:
template = load_prompt_template(self.prompt_dir, stage)
stage_template = format_stage_template(template, stage, paths)
handoff_context = build_handoff_context(paths, upto_stage=stage)
stage_template = (
stage_template.rstrip()
+ "\n\n## Run Configuration\n\n"
Expand All @@ -456,11 +460,11 @@ def _build_stage_prompt(
+ "\n"
)
if continue_session:
return build_continuation_prompt(stage, stage_template, paths, revision_feedback)
return build_continuation_prompt(stage, stage_template, paths, handoff_context, revision_feedback)

user_request = read_text(paths.user_input)
approved_memory = read_text(paths.memory)
return build_prompt(stage, stage_template, user_request, approved_memory, revision_feedback)
return build_prompt(stage, stage_template, user_request, approved_memory, handoff_context, revision_feedback)

def _display_stage_output(self, stage: StageSpec, markdown: str) -> None:
divider = "=" * 80
Expand Down
39 changes: 39 additions & 0 deletions src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class RunPaths:
prompt_cache_dir: Path
operator_state_dir: Path
stages_dir: Path
handoff_dir: Path
workspace_root: Path
literature_dir: Path
code_dir: Path
Expand Down Expand Up @@ -155,6 +156,7 @@ def build_run_paths(run_root: Path) -> RunPaths:
prompt_cache_dir=run_root / "prompt_cache",
operator_state_dir=run_root / "operator_state",
stages_dir=run_root / "stages",
handoff_dir=run_root / "handoff",
workspace_root=workspace_root,
literature_dir=workspace_root / "literature",
code_dir=workspace_root / "code",
Expand All @@ -173,6 +175,7 @@ def ensure_run_layout(paths: RunPaths) -> None:
paths.prompt_cache_dir.mkdir(parents=True, exist_ok=True)
paths.operator_state_dir.mkdir(parents=True, exist_ok=True)
paths.stages_dir.mkdir(parents=True, exist_ok=True)
paths.handoff_dir.mkdir(parents=True, exist_ok=True)
paths.workspace_root.mkdir(parents=True, exist_ok=True)

for directory in workspace_dirs(paths):
Expand Down Expand Up @@ -387,6 +390,7 @@ def build_prompt(
stage_template: str,
user_request: str,
approved_memory: str,
handoff_context: str,
revision_feedback: str | None,
) -> str:
sections = [
Expand Down Expand Up @@ -415,6 +419,8 @@ def build_prompt(
user_request.strip(),
"# Approved Memory",
approved_memory.strip() or "_None yet._",
"# Stage Handoff Context",
handoff_context.strip() or "No stage handoff summaries available yet.",
"# Revision Feedback",
revision_feedback.strip() if revision_feedback else "None.",
]
Expand All @@ -425,6 +431,7 @@ def build_continuation_prompt(
stage: StageSpec,
stage_template: str,
paths: RunPaths,
handoff_context: str,
revision_feedback: str | None,
) -> str:
current_draft = paths.stage_tmp_file(stage)
Expand All @@ -450,13 +457,16 @@ def build_continuation_prompt(
f"1. Read the current draft at `{current_draft.resolve()}` if it exists.\n"
f"2. Read the last promoted stage summary at `{current_final.resolve()}` if it exists.\n"
f"3. Read approved memory from `{paths.memory.resolve()}` and the original user goal from `{paths.user_input.resolve()}` if needed.\n"
f"4. Read prior handoff summaries under `{paths.handoff_dir.resolve()}` when they exist.\n"
f"4. Treat workspace artifacts already under `{paths.workspace_root.resolve()}` as part of the current stage context and reuse them.\n"
"5. Preserve all valid work already completed in this stage unless the new feedback requires changing it.\n"
"6. Fill the missing pieces, fix weak points, and update the stage summary instead of throwing away correct work.\n"
"7. Overwrite only the draft stage output path once you are ready to produce the updated complete summary.\n"
"8. Do not leave placeholder text such as [In progress], [Pending], [TODO], [TBD], or similar unfinished markers.\n"
"9. If the existing stage work is partially correct, keep the correct parts and extend them rather than replacing them blindly."
),
"# Stage Handoff Context",
handoff_context.strip() or "No stage handoff summaries available yet.",
"# New Feedback",
revision_feedback.strip()
if revision_feedback
Expand Down Expand Up @@ -723,6 +733,35 @@ def extract_path_references(text: str) -> list[str]:
return paths


def write_stage_handoff(paths: RunPaths, stage: StageSpec, stage_markdown: str) -> Path:
handoff_path = paths.handoff_dir / f"{stage.slug}.md"
objective = extract_markdown_section(stage_markdown, "Objective") or "Not provided."
key_results = extract_markdown_section(stage_markdown, "Key Results") or "Not provided."
files_produced = extract_markdown_section(stage_markdown, "Files Produced") or "Not provided."
write_text(
handoff_path,
(
f"# Handoff: {stage.stage_title}\n\n"
"## Objective\n"
f"{objective}\n\n"
"## Key Results\n"
f"{key_results}\n\n"
"## Files Produced\n"
f"{files_produced}\n"
),
)
return handoff_path


def build_handoff_context(paths: RunPaths, upto_stage: StageSpec | None = None, max_stages: int = 4) -> str:
handoffs = sorted(path for path in paths.handoff_dir.glob("*.md") if path.is_file())
if upto_stage is not None:
handoffs = [path for path in handoffs if path.stem < upto_stage.slug]
handoffs = handoffs[-max_stages:]
parts = [read_text(path).strip() for path in handoffs if path.exists()]
return "\n\n".join(parts).strip() or "No stage handoff summaries available yet."


def _extract_path_references(text: str) -> list[str]:
seen: set[str] = set()
paths: list[str] = []
Expand Down
87 changes: 87 additions & 0 deletions tests/test_stage_handoff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from __future__ import annotations

import tempfile
import unittest
from pathlib import Path

from src.utils import STAGES, build_continuation_prompt, build_handoff_context, build_run_paths, ensure_run_layout, write_stage_handoff


class StageHandoffTests(unittest.TestCase):
def test_write_stage_handoff_and_prompt_context(self) -> None:
with tempfile.TemporaryDirectory() as tmp_dir:
run_root = Path(tmp_dir) / "run"
paths = build_run_paths(run_root)
ensure_run_layout(paths)
stage = STAGES[0]
stage_markdown = (
"# Stage 01: Literature Survey\n\n"
"## Objective\nObjective.\n\n"
"## Previously Approved Stage Summaries\n_None yet._\n\n"
"## What I Did\nDid work.\n\n"
"## Key Results\nKey result.\n\n"
"## Files Produced\n- `workspace/literature/evidence.md`\n\n"
"## Suggestions for Refinement\n"
"1. Refine one.\n2. Refine two.\n3. Refine three.\n\n"
"## Your Options\n"
"1. Use suggestion 1\n2. Use suggestion 2\n3. Use suggestion 3\n4. Refine with your own feedback\n5. Approve and continue\n6. Abort\n"
)
write_stage_handoff(paths, stage, stage_markdown)
handoff_context = build_handoff_context(paths, upto_stage=STAGES[1])
prompt = build_continuation_prompt(
stage=STAGES[1],
stage_template="Stage template body",
paths=paths,
handoff_context=handoff_context,
revision_feedback=None,
)

self.assertIn("Handoff: Stage 01: Literature Survey", handoff_context)
self.assertIn("# Stage Handoff Context", prompt)
self.assertIn("Handoff: Stage 01: Literature Survey", prompt)

def test_build_handoff_context_collects_multiple_stage_files(self) -> None:
with tempfile.TemporaryDirectory() as tmp_dir:
run_root = Path(tmp_dir) / "run"
paths = build_run_paths(run_root)
ensure_run_layout(paths)
write_stage_handoff(
paths,
STAGES[0],
(
"# Stage 01: Literature Survey\n\n"
"## Objective\nObjective A.\n\n"
"## Previously Approved Stage Summaries\n_None yet._\n\n"
"## What I Did\nDid work.\n\n"
"## Key Results\nKey A.\n\n"
"## Files Produced\n- `workspace/literature/a.md`\n\n"
"## Suggestions for Refinement\n"
"1. Refine one.\n2. Refine two.\n3. Refine three.\n\n"
"## Your Options\n"
"1. Use suggestion 1\n2. Use suggestion 2\n3. Use suggestion 3\n4. Refine with your own feedback\n5. Approve and continue\n6. Abort\n"
),
)
write_stage_handoff(
paths,
STAGES[1],
(
"# Stage 02: Hypothesis Generation\n\n"
"## Objective\nObjective B.\n\n"
"## Previously Approved Stage Summaries\n_None yet._\n\n"
"## What I Did\nDid work.\n\n"
"## Key Results\nKey B.\n\n"
"## Files Produced\n- `workspace/notes/b.md`\n\n"
"## Suggestions for Refinement\n"
"1. Refine one.\n2. Refine two.\n3. Refine three.\n\n"
"## Your Options\n"
"1. Use suggestion 1\n2. Use suggestion 2\n3. Use suggestion 3\n4. Refine with your own feedback\n5. Approve and continue\n6. Abort\n"
),
)

handoff_context = build_handoff_context(paths, upto_stage=STAGES[2])
self.assertIn("Handoff: Stage 01: Literature Survey", handoff_context)
self.assertIn("Handoff: Stage 02: Hypothesis Generation", handoff_context)


if __name__ == "__main__":
unittest.main()