From 9472e666366d4ba679dfef35a85516624b9bfa0f Mon Sep 17 00:00:00 2001 From: Zefan Cai <67849306+Zefan-Cai@users.noreply.github.com> Date: Sun, 29 Mar 2026 17:37:40 -0500 Subject: [PATCH 1/2] Add CLI research workflow platform foundations --- README.md | 33 +++- main.py | 26 ++- src/inspection.py | 289 ++++++++++++++++++++++++++++ src/knowledge_base.py | 242 ++++++++++++++++++++++++ src/manager.py | 315 ++++++++++++++++++++++++++++++- src/operator.py | 82 +++++++- src/platform/__init__.py | 2 + src/platform/agents.py | 33 ++++ src/platform/fault_tolerance.py | 67 +++++++ src/platform/foundry.py | 51 +++++ src/platform/messaging.py | 31 +++ src/platform/observability.py | 54 ++++++ src/platform/orchestration.py | 56 ++++++ src/platform/protocols.py | 53 ++++++ src/platform/sandbox.py | 39 ++++ src/platform/security.py | 55 ++++++ src/platform/semantic.py | 52 +++++ src/platform/types.py | 54 ++++++ src/run_state.py | 245 ++++++++++++++++++++++++ src/utils.py | 123 +++++++++++- tests/test_clawdock_alignment.py | 103 ++++++++++ tests/test_platform_alignment.py | 86 +++++++++ 22 files changed, 2072 insertions(+), 19 deletions(-) create mode 100644 src/inspection.py create mode 100644 src/knowledge_base.py create mode 100644 src/platform/__init__.py create mode 100644 src/platform/agents.py create mode 100644 src/platform/fault_tolerance.py create mode 100644 src/platform/foundry.py create mode 100644 src/platform/messaging.py create mode 100644 src/platform/observability.py create mode 100644 src/platform/orchestration.py create mode 100644 src/platform/protocols.py create mode 100644 src/platform/sandbox.py create mode 100644 src/platform/security.py create mode 100644 src/platform/semantic.py create mode 100644 src/platform/types.py create mode 100644 src/run_state.py create mode 100644 tests/test_clawdock_alignment.py create mode 100644 tests/test_platform_alignment.py diff --git a/README.md b/README.md index 8de3262..002dc3a 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ File boundaries: ## Workspace Structure -Each run contains `user_input.txt`, `memory.md`, `prompt_cache/`, `operator_state/`, `stages/`, `workspace/`, `logs.txt`, and `logs_raw.jsonl`. The substantive research payload lives in `workspace/`. +Each run contains `user_input.txt`, `memory.md`, `knowledge_base/`, `run_state.json`, `prompt_cache/`, `operator_state/`, `stages/`, `workspace/`, `logs.txt`, and `logs_raw.jsonl`. The substantive research payload lives in `workspace/`. ```mermaid flowchart TD @@ -263,6 +263,31 @@ python main.py --resume-run 20260329_210252 --redo-stage 03 Valid stage identifiers include `03`, `3`, and `03_study_design`. +Inspect structured run state: + +```bash +python main.py --resume-run latest --show-status +``` + +Search the per-run knowledge base: + +```bash +python main.py --resume-run latest --kb-search "hypothesis evidence" --kb-limit 8 +``` + +Platform-alignment layer under `src/platform/` now includes: + +- orchestration patterns: sequential, parallel, hierarchical, swarm +- A2A/MCP-style protocol bridge primitives +- agent runtime manager and command-style research agents +- semantic retrieval for Knowledge Base ranking +- Foundry document generation for paper/poster/slides/social bundles +- observability collectors for spans and metrics +- RBAC scope checks for API access +- sandbox execution policy and runner abstractions +- messaging outbox integration +- retry/fallback/classification/checkpoint primitives + ## Scope Included: @@ -278,10 +303,14 @@ Included: - draft-to-final stage promotion - resume and redo-stage support - artifact-level validation +- structured `run_state.json` lifecycle tracking +- per-run Knowledge Base with prompt injection and CLI search +- stage-pattern metadata aligned with the ClawDock research design +- platform-alignment modules for orchestration, protocols, runtimes, Foundry, observability, security, sandboxing, messaging, and deployment Out of scope: -- multi-agent orchestration +- true multi-agent orchestration execution - database-backed state - web UI - concurrent stage execution diff --git a/main.py b/main.py index 96e5ce1..21f0c0b 100644 --- a/main.py +++ b/main.py @@ -38,6 +38,21 @@ def parse_args() -> argparse.Namespace: "--redo-stage", help="When resuming a run, restart from this stage slug or stage number (for example '06_analysis' or '6').", ) + parser.add_argument( + "--show-status", + action="store_true", + help="Print the structured run status for --resume-run and exit.", + ) + parser.add_argument( + "--kb-search", + help="Search the run knowledge base for --resume-run and exit.", + ) + parser.add_argument( + "--kb-limit", + type=int, + default=5, + help="Maximum number of knowledge-base results to return with --kb-search. Defaults to 5.", + ) return parser.parse_args() @@ -106,8 +121,17 @@ def main() -> int: ) if args.resume_run: - start_stage = resolve_stage(args.redo_stage) run_root = resolve_resume_run(runs_dir, args.resume_run) + if args.show_status or args.kb_search: + if args.redo_stage: + raise ValueError("--redo-stage cannot be combined with --show-status or --kb-search.") + if args.show_status: + print(manager.describe_run_status(run_root)) + if args.kb_search: + print(manager.search_run_knowledge_base(run_root, args.kb_search, limit=max(args.kb_limit, 1))) + return 0 + + start_stage = resolve_stage(args.redo_stage) manager.resume_run(run_root, start_stage=start_stage) return 0 diff --git a/src/inspection.py b/src/inspection.py new file mode 100644 index 0000000..3841a53 --- /dev/null +++ b/src/inspection.py @@ -0,0 +1,289 @@ +from __future__ import annotations + +import mimetypes +from datetime import datetime +from pathlib import Path + +from .knowledge_base import KBSearchResult, load_kb_entries, search_knowledge_base +from .run_state import RunState, load_run_state +from .utils import STAGES, StageSpec, approved_stage_summaries, build_run_paths, read_text + + +def list_run_roots(runs_dir: Path) -> list[Path]: + if not runs_dir.exists(): + return [] + return sorted(path for path in runs_dir.iterdir() if path.is_dir()) + + +def run_exists(runs_dir: Path, run_id: str) -> bool: + return (runs_dir / run_id).exists() + + +def build_run_snapshot(run_root: Path) -> dict[str, object]: + paths = build_run_paths(run_root) + run_state = load_run_state(paths.run_state) + memory_text = read_text(paths.memory) if paths.memory.exists() else "" + approved_memory = approved_stage_summaries(memory_text) + approved_titles = { + item.get("title", "") + for item in (run_state.approved_stages if run_state else []) + if isinstance(item, dict) + } + kb_entries = load_kb_entries(paths.knowledge_base_entries) + + stage_statuses: list[dict[str, object]] = [] + for stage in STAGES: + final_stage_path = paths.stage_file(stage) + tmp_stage_path = paths.stage_tmp_file(stage) + approved = stage.stage_title in approved_titles or stage.stage_title in approved_memory + stage_statuses.append( + { + "number": stage.number, + "slug": stage.slug, + "title": stage.stage_title, + "pattern": stage.orchestration_pattern, + "approved": approved, + "final_exists": final_stage_path.exists(), + "draft_exists": tmp_stage_path.exists(), + "final_stage_path": str(final_stage_path), + "draft_stage_path": str(tmp_stage_path), + } + ) + + snapshot = { + "run_id": run_root.name, + "run_root": str(run_root), + "status": run_state.status if run_state else "UNKNOWN", + "last_event": run_state.last_event if run_state else None, + "updated_at": run_state.updated_at if run_state else None, + "current_stage_slug": run_state.current_stage_slug if run_state else None, + "current_stage_title": run_state.current_stage_title if run_state else None, + "current_pattern": run_state.current_pattern if run_state else None, + "current_attempt": run_state.current_attempt if run_state else None, + "waiting_for_human_review": run_state.waiting_for_human_review if run_state else False, + "last_error": run_state.last_error if run_state else None, + "completed_at": run_state.completed_at if run_state else None, + "approved_stage_count": len(run_state.approved_stages) if run_state else 0, + "knowledge_base_entry_count": len(kb_entries), + "knowledge_base_entry_types": _count_entry_types(kb_entries), + "stages": stage_statuses, + } + return snapshot + + +def list_run_summaries(runs_dir: Path) -> list[dict[str, object]]: + summaries: list[dict[str, object]] = [] + for run_root in list_run_roots(runs_dir): + snapshot = build_run_snapshot(run_root) + summaries.append( + { + "run_id": snapshot["run_id"], + "status": snapshot["status"], + "updated_at": snapshot["updated_at"], + "current_stage_title": snapshot["current_stage_title"], + "waiting_for_human_review": snapshot["waiting_for_human_review"], + "approved_stage_count": snapshot["approved_stage_count"], + "knowledge_base_entry_count": snapshot["knowledge_base_entry_count"], + } + ) + return summaries + + +def load_run_state_snapshot(run_root: Path) -> RunState | None: + paths = build_run_paths(run_root) + return load_run_state(paths.run_state) + + +def list_run_kb_entries( + run_root: Path, + *, + limit: int = 20, + entry_type: str | None = None, +) -> list[dict[str, object]]: + paths = build_run_paths(run_root) + entries = load_kb_entries(paths.knowledge_base_entries) + if entry_type: + entries = [entry for entry in entries if entry.entry_type == entry_type] + entries = entries[-limit:] + return [entry.to_dict() for entry in reversed(entries)] + + +def search_run_kb(run_root: Path, query: str, limit: int = 5) -> list[dict[str, object]]: + paths = build_run_paths(run_root) + results = search_knowledge_base(paths.knowledge_base_entries, query=query, limit=limit) + return [_serialize_kb_search_result(result) for result in results] + + +def get_stage_snapshot(run_root: Path, stage_slug: str) -> dict[str, object]: + paths = build_run_paths(run_root) + stage = _resolve_stage(stage_slug) + final_path = paths.stage_file(stage) + draft_path = paths.stage_tmp_file(stage) + final_text = read_text(final_path) if final_path.exists() else "" + draft_text = read_text(draft_path) if draft_path.exists() else "" + return { + "run_id": run_root.name, + "stage_slug": stage.slug, + "stage_title": stage.stage_title, + "final_stage_path": str(final_path), + "draft_stage_path": str(draft_path), + "final_exists": final_path.exists(), + "draft_exists": draft_path.exists(), + "final_markdown": final_text, + "draft_markdown": draft_text, + "selected_markdown": final_text or draft_text, + } + + +def get_run_log_tail(run_root: Path, max_chars: int = 6000) -> dict[str, object]: + paths = build_run_paths(run_root) + logs_text = read_text(paths.logs) if paths.logs.exists() else "" + raw_logs_text = read_text(paths.logs_raw) if paths.logs_raw.exists() else "" + return { + "run_id": run_root.name, + "logs_path": str(paths.logs), + "logs_raw_path": str(paths.logs_raw), + "logs_tail": logs_text[-max_chars:] if logs_text else "", + "logs_raw_tail": raw_logs_text[-max_chars:] if raw_logs_text else "", + } + + +def get_run_observability(run_root: Path, max_chars: int = 6000) -> dict[str, object]: + spans_path = run_root / "observability" / "spans.jsonl" + metrics_path = run_root / "observability" / "metrics.jsonl" + spans_text = read_text(spans_path) if spans_path.exists() else "" + metrics_text = read_text(metrics_path) if metrics_path.exists() else "" + return { + "run_id": run_root.name, + "spans_path": str(spans_path), + "metrics_path": str(metrics_path), + "spans_tail": spans_text[-max_chars:] if spans_text else "", + "metrics_tail": metrics_text[-max_chars:] if metrics_text else "", + } + + +def get_run_messages(run_root: Path, max_chars: int = 6000) -> dict[str, object]: + outbox_path = run_root / "messages" / "outbox.jsonl" + outbox_text = read_text(outbox_path) if outbox_path.exists() else "" + return { + "run_id": run_root.name, + "outbox_path": str(outbox_path), + "outbox_tail": outbox_text[-max_chars:] if outbox_text else "", + } + + +def list_run_artifacts(run_root: Path) -> dict[str, object]: + paths = build_run_paths(run_root) + groups = { + "literature": paths.literature_dir, + "code": paths.code_dir, + "data": paths.data_dir, + "results": paths.results_dir, + "writing": paths.writing_dir, + "figures": paths.figures_dir, + "artifacts": paths.artifacts_dir, + "notes": paths.notes_dir, + "reviews": paths.reviews_dir, + } + + grouped_artifacts: dict[str, list[dict[str, object]]] = {} + total_files = 0 + for group_name, directory in groups.items(): + artifacts: list[dict[str, object]] = [] + if directory.exists(): + for path in sorted(directory.rglob("*")): + if not path.is_file(): + continue + artifacts.append(_serialize_artifact(path, run_root)) + grouped_artifacts[group_name] = artifacts + total_files += len(artifacts) + + return { + "run_id": run_root.name, + "total_files": total_files, + "groups": grouped_artifacts, + } + + +def get_artifact_preview(run_root: Path, relative_path: str, max_chars: int = 8000) -> dict[str, object]: + run_root_resolved = run_root.resolve() + candidate = (run_root_resolved / relative_path).resolve() + try: + candidate.relative_to(run_root_resolved) + except ValueError as exc: + raise ValueError(f"Artifact path escapes the run root: {relative_path}") from exc + + if not candidate.exists() or not candidate.is_file(): + raise FileNotFoundError(f"Artifact not found: {relative_path}") + + artifact = _serialize_artifact(candidate, run_root) + preview_kind = artifact["preview_kind"] + preview_text = "" + if preview_kind == "text": + preview_text = read_text(candidate)[:max_chars] + + return { + "run_id": run_root.name, + "artifact": artifact, + "preview_text": preview_text, + "download_path": relative_path, + } + + +def _serialize_kb_search_result(result: KBSearchResult) -> dict[str, object]: + return { + "score": result.score, + "entry": result.entry.to_dict(), + } + + +def _count_entry_types(entries: list[object]) -> dict[str, int]: + counts: dict[str, int] = {} + for entry in entries: + entry_type = getattr(entry, "entry_type", "") + counts[entry_type] = counts.get(entry_type, 0) + 1 + return counts + + +def resolve_artifact_file(run_root: Path, relative_path: str) -> tuple[Path, str]: + run_root_resolved = run_root.resolve() + candidate = (run_root_resolved / relative_path).resolve() + try: + candidate.relative_to(run_root_resolved) + except ValueError as exc: + raise ValueError(f"Artifact path escapes the run root: {relative_path}") from exc + + if not candidate.exists() or not candidate.is_file(): + raise FileNotFoundError(f"Artifact not found: {relative_path}") + + content_type = mimetypes.guess_type(candidate.name)[0] or "application/octet-stream" + return candidate, content_type + + +def _resolve_stage(value: str) -> StageSpec: + normalized = value.strip().lower() + for stage in STAGES: + if normalized in {stage.slug.lower(), str(stage.number), f"{stage.number:02d}"}: + return stage + raise ValueError(f"Unknown stage identifier: {value}") + + +def _serialize_artifact(path: Path, run_root: Path) -> dict[str, object]: + suffix = path.suffix.lower() + preview_kind = "binary" + if suffix in {".md", ".txt", ".json", ".jsonl", ".csv", ".tsv", ".yaml", ".yml", ".py", ".tex", ".html"}: + preview_kind = "text" + elif suffix in {".png", ".jpg", ".jpeg", ".gif", ".webp", ".svg"}: + preview_kind = "image" + elif suffix == ".pdf": + preview_kind = "pdf" + + return { + "relative_path": str(path.resolve().relative_to(run_root.resolve())), + "name": path.name, + "group": path.parts[-2] if len(path.parts) >= 2 else "", + "size_bytes": path.stat().st_size, + "modified_at": datetime.fromtimestamp(path.stat().st_mtime).isoformat(timespec="seconds"), + "preview_kind": preview_kind, + "suffix": suffix, + } diff --git a/src/knowledge_base.py b/src/knowledge_base.py new file mode 100644 index 0000000..8b4cdff --- /dev/null +++ b/src/knowledge_base.py @@ -0,0 +1,242 @@ +from __future__ import annotations + +import json +import re +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from pathlib import Path + +from .platform.semantic import SemanticIndexer +from .utils import RunPaths, StageSpec, truncate_text + + +TOKEN_PATTERN = re.compile(r"[a-z0-9_]{2,}") + + +@dataclass(frozen=True) +class KBEntry: + entry_id: str + created_at: str + run_id: str + entry_type: str + title: str + summary: str + content: str + stage_slug: str | None = None + tags: list[str] = field(default_factory=list) + file_paths: list[str] = field(default_factory=list) + + def to_dict(self) -> dict[str, object]: + return { + "entry_id": self.entry_id, + "created_at": self.created_at, + "run_id": self.run_id, + "entry_type": self.entry_type, + "title": self.title, + "summary": self.summary, + "content": self.content, + "stage_slug": self.stage_slug, + "tags": list(self.tags), + "file_paths": list(self.file_paths), + } + + @classmethod + def from_dict(cls, payload: dict[str, object]) -> "KBEntry": + return cls( + entry_id=str(payload.get("entry_id") or uuid.uuid4()), + created_at=str(payload.get("created_at") or _now()), + run_id=str(payload.get("run_id") or ""), + entry_type=str(payload.get("entry_type") or "note"), + title=str(payload.get("title") or "Untitled entry"), + summary=str(payload.get("summary") or ""), + content=str(payload.get("content") or ""), + stage_slug=str(payload["stage_slug"]) if payload.get("stage_slug") is not None else None, + tags=[str(item) for item in payload.get("tags", []) if str(item).strip()], + file_paths=[str(item) for item in payload.get("file_paths", []) if str(item).strip()], + ) + + +@dataclass(frozen=True) +class KBSearchResult: + entry: KBEntry + score: float + + +def _now() -> str: + return datetime.now().isoformat(timespec="seconds") + + +def _tokenize(text: str) -> set[str]: + return set(TOKEN_PATTERN.findall(text.lower())) + + +def initialize_knowledge_base(paths: RunPaths, user_goal: str) -> None: + if paths.knowledge_base_entries.exists() and paths.knowledge_base_entries.read_text(encoding="utf-8").strip(): + return + + write_kb_entry( + paths, + entry_type="user_goal", + title="Original user goal", + summary=truncate_text(user_goal, max_chars=240), + content=user_goal, + tags=["goal", "run"], + ) + + +def load_kb_entries(entries_path: Path) -> list[KBEntry]: + if not entries_path.exists(): + return [] + + entries: list[KBEntry] = [] + for line in entries_path.read_text(encoding="utf-8").splitlines(): + stripped = line.strip() + if not stripped: + continue + entries.append(KBEntry.from_dict(json.loads(stripped))) + return entries + + +def append_kb_entry(entries_path: Path, entry: KBEntry) -> None: + entries_path.parent.mkdir(parents=True, exist_ok=True) + with entries_path.open("a", encoding="utf-8") as handle: + handle.write(json.dumps(entry.to_dict(), ensure_ascii=True) + "\n") + + +def write_kb_entry( + paths: RunPaths, + *, + entry_type: str, + title: str, + summary: str, + content: str, + stage: StageSpec | None = None, + tags: list[str] | None = None, + file_paths: list[str] | None = None, +) -> KBEntry: + entry = KBEntry( + entry_id=str(uuid.uuid4()), + created_at=_now(), + run_id=paths.run_root.name, + entry_type=entry_type, + title=title.strip(), + summary=summary.strip(), + content=content.strip(), + stage_slug=stage.slug if stage else None, + tags=[tag.strip() for tag in (tags or []) if tag.strip()], + file_paths=[path.strip() for path in (file_paths or []) if path.strip()], + ) + append_kb_entry(paths.knowledge_base_entries, entry) + return entry + + +def search_knowledge_base( + entries_path: Path, + query: str, + limit: int = 5, + stage: StageSpec | None = None, +) -> list[KBSearchResult]: + entries = load_kb_entries(entries_path) + if not entries: + return [] + + query_text = query.strip().lower() + query_tokens = _tokenize(query_text) + semantic_matches = SemanticIndexer().rank( + query_text, + [ + " ".join( + [ + entry.title, + entry.summary, + entry.content, + entry.entry_type, + entry.stage_slug or "", + " ".join(entry.tags), + " ".join(entry.file_paths), + ] + ) + for entry in entries + ], + limit=max(limit * 3, 12), + ) + semantic_scores = {match.index: match.score for match in semantic_matches} + scored: list[KBSearchResult] = [] + + for index, entry in enumerate(entries): + haystack = " ".join( + [ + entry.title, + entry.summary, + entry.content, + entry.entry_type, + entry.stage_slug or "", + " ".join(entry.tags), + " ".join(entry.file_paths), + ] + ).lower() + haystack_tokens = _tokenize(haystack) + + score = 0.0 + if query_text and query_text in haystack: + score += 4.0 + + overlap = query_tokens & haystack_tokens + if query_tokens: + score += 2.0 * len(overlap) + score += len(overlap) / len(query_tokens) + + score += semantic_scores.get(index, 0.0) * 6.0 + + if stage and entry.stage_slug == stage.slug: + score += 1.5 + elif stage and entry.stage_slug is None: + score += 0.2 + + if entry.entry_type in {"stage_approved", "stage_validated"}: + score += 0.5 + + if score <= 0: + continue + + scored.append(KBSearchResult(entry=entry, score=score)) + + scored.sort(key=lambda result: (result.score, result.entry.created_at), reverse=True) + return scored[:limit] + + +def format_kb_context(results: list[KBSearchResult]) -> str: + if not results: + return "No relevant knowledge-base entries yet." + + blocks: list[str] = [] + for index, result in enumerate(results, start=1): + entry = result.entry + stage_label = entry.stage_slug or "global" + lines = [ + f"{index}. [{entry.entry_type}] {entry.title}", + f" Stage: {stage_label}", + f" Summary: {truncate_text(entry.summary or entry.content, max_chars=280)}", + ] + if entry.file_paths: + lines.append(" Files: " + ", ".join(f"`{path}`" for path in entry.file_paths[:6])) + blocks.append("\n".join(lines)) + return "\n\n".join(blocks) + + +def format_kb_search_results(results: list[KBSearchResult]) -> str: + if not results: + return "No matching knowledge-base entries." + + lines: list[str] = [] + for result in results: + entry = result.entry + lines.append( + ( + f"- [{entry.entry_type}] {entry.title} | stage={entry.stage_slug or 'global'} " + f"| score={result.score:.2f} | created_at={entry.created_at}\n" + f" {truncate_text(entry.summary or entry.content, max_chars=280)}" + ) + ) + return "\n".join(lines) diff --git a/src/manager.py b/src/manager.py index 684b525..9f439f6 100644 --- a/src/manager.py +++ b/src/manager.py @@ -1,11 +1,35 @@ from __future__ import annotations +import json import shutil import sys from pathlib import Path from typing import TextIO +from .knowledge_base import ( + format_kb_context, + format_kb_search_results, + initialize_knowledge_base, + search_knowledge_base, + write_kb_entry, +) from .operator import ClaudeOperator +from .platform.fault_tolerance import CheckpointManager +from .platform.observability import ObservabilityCollector +from .platform.orchestration import HierarchicalPattern, ParallelPattern, SequentialPattern, SwarmPattern +from .platform.types import PipelineStage, ProvenanceRecord, ResearchTask, TaskResult +from .run_state import ( + ensure_run_state, + format_run_state, + initialize_run_state, + load_run_state, + mark_run_cancelled, + mark_run_completed, + mark_run_failed, + mark_stage_approved, + mark_stage_human_review, + mark_stage_running, +) from .utils import ( STAGES, RunPaths, @@ -18,6 +42,8 @@ canonicalize_stage_markdown, create_run_root, ensure_run_layout, + extract_markdown_section, + extract_path_references, format_stage_template, initialize_memory, load_prompt_template, @@ -45,9 +71,9 @@ def __init__( self.output_stream = output_stream def run(self, user_goal: str) -> bool: - paths = self._create_run(user_goal) + paths = self.create_run_paths(user_goal) self._print(f"Run created at: {paths.run_root}") - return self._run_from_paths(paths) + return self.execute_run_paths(paths) def resume_run(self, run_root: Path, start_stage: StageSpec | None = None) -> bool: paths = build_run_paths(run_root) @@ -57,6 +83,9 @@ def resume_run(self, run_root: Path, start_stage: StageSpec | None = None) -> bo if not paths.memory.exists(): raise FileNotFoundError(f"Missing memory.md in run: {run_root}") + initialize_knowledge_base(paths, read_text(paths.user_input)) + ensure_run_state(paths) + append_log_entry( paths.logs, "run_resume", @@ -66,7 +95,32 @@ def resume_run(self, run_root: Path, start_stage: StageSpec | None = None) -> bo self._print(f"Resuming run at: {paths.run_root}") if start_stage: self._print(f"Restarting from: {start_stage.stage_title}") - return self._run_from_paths(paths, start_stage=start_stage) + return self.execute_run_paths(paths, start_stage=start_stage, failure_title="Run failed during resume", failure_tags=["failure", "run", "resume"]) + + def create_run_paths(self, user_goal: str) -> RunPaths: + return self._create_run(user_goal) + + def execute_run_paths( + self, + paths: RunPaths, + start_stage: StageSpec | None = None, + failure_title: str = "Run failed", + failure_tags: list[str] | None = None, + ) -> bool: + try: + return self._run_from_paths(paths, start_stage=start_stage) + except Exception as exc: + mark_run_failed(paths, error=str(exc), stage=start_stage) + write_kb_entry( + paths, + entry_type="run_failed", + title=failure_title, + summary=str(exc), + content=f"{failure_title} with error:\n{exc}", + stage=start_stage, + tags=failure_tags or ["failure", "run"], + ) + raise def _run_from_paths(self, paths: RunPaths, start_stage: StageSpec | None = None) -> bool: stages_to_run = self._select_stages_for_run(paths, start_stage) @@ -79,10 +133,29 @@ def _run_from_paths(self, paths: RunPaths, start_stage: StageSpec | None = None) "run_aborted", f"Run aborted during {stage.stage_title}.", ) + mark_run_cancelled(paths, stage=stage) + write_kb_entry( + paths, + entry_type="run_cancelled", + title="Run aborted by user", + summary=f"Run aborted during {stage.stage_title}.", + content=f"The run was aborted while working in {stage.stage_title}.", + stage=stage, + tags=["run", "cancelled"], + ) self._print("Run aborted.") return False append_log_entry(paths.logs, "run_complete", "All stages approved.") + mark_run_completed(paths) + write_kb_entry( + paths, + entry_type="run_completed", + title="Run completed", + summary="All stages were approved.", + content="The run completed successfully after approval of all eight stages.", + tags=["run", "completed"], + ) self._print("All stages approved. Run complete.") return True @@ -92,6 +165,8 @@ def _create_run(self, user_goal: str) -> RunPaths: ensure_run_layout(paths) write_text(paths.user_input, user_goal) initialize_memory(paths, user_goal) + initialize_knowledge_base(paths, user_goal) + initialize_run_state(paths) append_log_entry(paths.logs, "run_start", f"Run root: {paths.run_root}") return paths @@ -119,6 +194,24 @@ def _run_stage(self, paths: RunPaths, stage: StageSpec) -> bool: continue_session = False while True: + orchestration_summary = self._execute_stage_orchestration(paths, stage, attempt_no) + mark_stage_running(paths, stage, attempt_no) + write_kb_entry( + paths, + entry_type="stage_attempt_started", + title=f"{stage.stage_title} attempt {attempt_no} started", + summary=( + f"Started {stage.stage_title} attempt {attempt_no} using the " + f"{stage.orchestration_pattern} stage pattern." + ), + content=( + f"Attempt {attempt_no} for {stage.stage_title} started.\n\n" + f"{stage.pattern_summary}\n\n" + f"Planned subtasks: {orchestration_summary['subtask_count']}" + ), + stage=stage, + tags=["stage", "attempt", "running", stage.slug], + ) self._print(f"\nRunning {stage.stage_title} (attempt {attempt_no})...") prompt = self._build_stage_prompt(paths, stage, revision_feedback, continue_session) append_log_entry( @@ -197,6 +290,15 @@ def _run_stage(self, paths: RunPaths, stage: StageSpec) -> bool: f"{stage.slug} attempt {attempt_no} validation_failed", "\n".join(validation_errors), ) + write_kb_entry( + paths, + entry_type="stage_validation_failed", + title=f"{stage.stage_title} validation failed", + summary=validation_errors[0], + content="\n".join(validation_errors), + stage=stage, + tags=["stage", "validation_failed", stage.slug], + ) repair_result = self.operator.repair_stage_summary( stage=stage, original_prompt=prompt, @@ -249,6 +351,16 @@ def _run_stage(self, paths: RunPaths, stage: StageSpec) -> bool: + truncate_text(normalized_markdown, max_chars=6000) ), ) + write_kb_entry( + paths, + entry_type="stage_local_normalization", + title=f"{stage.stage_title} normalized locally", + summary="Applied local stage summary normalization after repair remained invalid.", + content=truncate_text(normalized_markdown, max_chars=6000), + stage=stage, + file_paths=[str(repair_result.stage_file_path.relative_to(paths.run_root))], + tags=["stage", "normalization", stage.slug], + ) stage_markdown = read_text(repair_result.stage_file_path) validation_errors = validate_stage_markdown(stage_markdown) + validate_stage_artifacts(stage, paths) @@ -287,9 +399,21 @@ def _run_stage(self, paths: RunPaths, stage: StageSpec) -> bool: ), ) stage_markdown = read_text(final_stage_path) + mark_stage_human_review(paths, stage, attempt_no) + write_kb_entry( + paths, + entry_type="stage_validated", + title=f"{stage.stage_title} ready for human review", + summary=(extract_markdown_section(stage_markdown, "Key Results") or "Validated stage summary.").strip(), + content=truncate_text(stage_markdown, max_chars=8000), + stage=stage, + file_paths=self._stage_file_paths(paths, stage, stage_markdown), + tags=["stage", "validated", "human_review", stage.slug], + ) self._display_stage_output(stage, stage_markdown) choice = self._ask_choice() + custom_feedback: str | None = None append_log_entry( paths.logs, f"{stage.slug} attempt {attempt_no} user_choice", @@ -304,6 +428,15 @@ def _run_stage(self, paths: RunPaths, stage: StageSpec) -> bool: "Do not discard correct completed parts. Address this refinement request:\n" f"{selected}" ) + write_kb_entry( + paths, + entry_type="stage_revision_requested", + title=f"{stage.stage_title} revision requested", + summary=selected, + content=f"Revision requested via built-in suggestion {choice}.\n\n{selected}", + stage=stage, + tags=["stage", "revision", "human_feedback", stage.slug], + ) continue_session = True attempt_no += 1 continue @@ -321,21 +454,52 @@ def _run_stage(self, paths: RunPaths, stage: StageSpec) -> bool: f"{stage.slug} attempt {attempt_no} custom_feedback", custom_feedback, ) + write_kb_entry( + paths, + entry_type="stage_revision_requested", + title=f"{stage.stage_title} custom feedback", + summary=truncate_text(custom_feedback, max_chars=240), + content=custom_feedback, + stage=stage, + tags=["stage", "revision", "human_feedback", stage.slug], + ) continue_session = True attempt_no += 1 continue if choice == "5": append_approved_stage_summary(paths.memory, stage, stage_markdown) + mark_stage_approved(paths, stage) append_log_entry( paths.logs, f"{stage.slug} approved", "Stage approved and appended to memory.", ) + write_kb_entry( + paths, + entry_type="stage_approved", + title=f"{stage.stage_title} approved", + summary=(extract_markdown_section(stage_markdown, "Key Results") or "Stage approved.").strip(), + content=truncate_text(stage_markdown, max_chars=8000), + stage=stage, + file_paths=self._stage_file_paths(paths, stage, stage_markdown), + tags=["stage", "approved", stage.slug], + ) self._print(f"Approved {stage.stage_title}.") return True if choice == "6": + mark_run_cancelled(paths, stage=stage) + write_kb_entry( + paths, + entry_type="stage_aborted", + title=f"{stage.stage_title} aborted by user", + summary=f"User aborted during {stage.stage_title}.", + content=truncate_text(stage_markdown, max_chars=4000), + stage=stage, + file_paths=self._stage_file_paths(paths, stage, stage_markdown), + tags=["stage", "aborted", stage.slug], + ) return False def _build_stage_prompt( @@ -347,12 +511,13 @@ def _build_stage_prompt( ) -> str: template = load_prompt_template(self.prompt_dir, stage) stage_template = format_stage_template(template, stage, paths) + kb_context = self._build_kb_context(paths, stage) if continue_session: - return build_continuation_prompt(stage, stage_template, paths, revision_feedback) + return build_continuation_prompt(stage, stage_template, paths, kb_context, revision_feedback) user_request = read_text(paths.user_input) approved_memory = read_text(paths.memory) - return build_prompt(stage, stage_template, user_request, approved_memory, revision_feedback) + return build_prompt(stage, stage_template, user_request, approved_memory, kb_context, revision_feedback) def _display_stage_output(self, stage: StageSpec, markdown: str) -> None: divider = "=" * 80 @@ -388,3 +553,143 @@ def _read_multiline_feedback(self) -> str: def _print(self, text: str) -> None: print(text, file=self.output_stream) + + def _execute_stage_orchestration(self, paths: RunPaths, stage: StageSpec, attempt_no: int) -> dict[str, object]: + subtasks = self._build_stage_subtasks(paths, stage, attempt_no) + runner = self._orchestration_runner() + pattern_name = stage.orchestration_pattern.lower() + if "parallel" in pattern_name and "+" in pattern_name: + pattern = SequentialPattern() + results = pattern.execute(subtasks, runner) + elif "parallel" in pattern_name: + results = ParallelPattern(max_workers=min(len(subtasks), 4)).execute(subtasks, runner) + elif "hierarchical" in pattern_name: + results = HierarchicalPattern().execute(subtasks[0], planner=lambda _root: subtasks, runner=runner) + elif "swarm" in pattern_name: + results = SwarmPattern(rounds=2).execute(subtasks, runner) + else: + results = SequentialPattern().execute(subtasks, runner) + + summary = { + "stage_slug": stage.slug, + "attempt_no": attempt_no, + "pattern": stage.orchestration_pattern, + "subtask_count": len(subtasks), + "subtasks": [ + { + "task_id": task.task_id, + "title": task.title, + "goal": task.goal, + } + for task in subtasks + ], + "results": [ + { + "task_id": result.task_id, + "output": result.output, + "provenance": [record.action for record in result.provenance], + } + for result in results + ], + } + + plan_path = paths.notes_dir / f"{stage.slug}_attempt_{attempt_no:02d}_orchestration.json" + write_text(plan_path, json.dumps(summary, indent=2, ensure_ascii=True)) + CheckpointManager(paths.control_dir / f"{stage.slug}_attempt_{attempt_no:02d}_checkpoint.json").save(summary) + collector = ObservabilityCollector(paths.run_root) + collector.emit_span( + "stage.orchestration.executed", + run_id=paths.run_root.name, + stage_slug=stage.slug, + attempt_no=attempt_no, + pattern=stage.orchestration_pattern, + ) + collector.emit_metric( + "autor.orchestration.subtask_count", + float(len(subtasks)), + run_id=paths.run_root.name, + stage_slug=stage.slug, + ) + return summary + + def _build_stage_subtasks(self, paths: RunPaths, stage: StageSpec, attempt_no: int) -> list[ResearchTask]: + stage_key: PipelineStage = stage.slug[3:] + project_id = paths.run_root.name + goal = read_text(paths.user_input).strip() + + subtask_titles: dict[str, list[str]] = { + "01_literature_survey": ["Search sources", "Extract evidence", "Merge survey map"], + "02_hypothesis_generation": ["Propose hypotheses", "Critique hypotheses", "Synthesize direction"], + "03_study_design": ["Plan protocol", "Define variables", "Set evaluation criteria"], + "04_implementation": ["Prepare environment", "Implement pipeline", "Validate execution"], + "05_experimentation": ["Set experiment plan", "Run experiments", "Aggregate checkpoints"], + "06_analysis": ["Compute statistics", "Generate visuals", "Interpret findings"], + "07_writing": ["Outline manuscript", "Draft sections", "Check consistency"], + "08_dissemination": ["Draft poster", "Draft slides", "Draft social summary"], + } + + tasks: list[ResearchTask] = [] + for index, title in enumerate(subtask_titles.get(stage.slug, [stage.display_name]), start=1): + tasks.append( + ResearchTask( + task_id=f"{stage.slug}-attempt-{attempt_no:02d}-task-{index:02d}", + title=title, + goal=f"{goal}\n\nStage focus: {stage.stage_title}\nSubtask: {title}", + pipeline_stage=stage_key, + project_id=project_id, + kb_context=[paths.run_root.name, stage.slug], + human_gate_required=False, + reproducibility_notes=[f"Attempt {attempt_no}", stage.orchestration_pattern], + ) + ) + return tasks + + def _orchestration_runner(self) -> callable: + def _run(task: ResearchTask) -> TaskResult: + return TaskResult( + task_id=task.task_id, + output=f"Completed orchestration subtask: {task.title}", + provenance=[ + ProvenanceRecord( + agent_name="AutoROrchestrator", + action=f"planned:{task.title}", + evidence=[task.pipeline_stage, task.project_id], + ) + ], + ) + + return _run + + def _build_kb_context(self, paths: RunPaths, stage: StageSpec) -> str: + user_request = read_text(paths.user_input) + query = f"{stage.display_name} {stage.slug} {user_request}" + results = search_knowledge_base( + paths.knowledge_base_entries, + query=query, + limit=6, + stage=stage, + ) + return format_kb_context(results) + + def _stage_file_paths(self, paths: RunPaths, stage: StageSpec, stage_markdown: str) -> list[str]: + file_paths = extract_path_references(stage_markdown) + final_stage_path = str(paths.stage_file(stage).relative_to(paths.run_root)) + if final_stage_path not in file_paths: + file_paths.insert(0, final_stage_path) + return file_paths[:16] + + def describe_run_status(self, run_root: Path) -> str: + paths = build_run_paths(run_root) + ensure_run_layout(paths) + ensure_run_state(paths) + state = load_run_state(paths.run_state) + if state is None: + raise RuntimeError(f"Could not load run state from {paths.run_state}") + return format_run_state(state) + + def search_run_knowledge_base(self, run_root: Path, query: str, limit: int = 5) -> str: + paths = build_run_paths(run_root) + ensure_run_layout(paths) + initialize_knowledge_base(paths, read_text(paths.user_input)) + results = search_knowledge_base(paths.knowledge_base_entries, query=query, limit=limit) + return format_kb_search_results(results) diff --git a/src/operator.py b/src/operator.py index 89f7cb4..0426bfa 100644 --- a/src/operator.py +++ b/src/operator.py @@ -424,6 +424,84 @@ def _run_fake( "calling Claude." ), ) + file_paths = [relative_to_run(note_path, paths.run_root), relative_to_run(stage_tmp_path, paths.run_root)] + + if stage.number >= 3: + data_path = paths.data_dir / f"{stage.slug}_fake_data.json" + write_text( + data_path, + json.dumps( + { + "stage": stage.slug, + "fake": True, + "kind": "data", + "attempt": attempt_no, + }, + indent=2, + ), + ) + file_paths.append(relative_to_run(data_path, paths.run_root)) + + if stage.number >= 5: + results_path = paths.results_dir / f"{stage.slug}_fake_results.json" + write_text( + results_path, + json.dumps( + { + "stage": stage.slug, + "fake": True, + "kind": "results", + "attempt": attempt_no, + }, + indent=2, + ), + ) + file_paths.append(relative_to_run(results_path, paths.run_root)) + + if stage.number >= 6: + figure_path = paths.figures_dir / f"{stage.slug}_fake_figure.svg" + write_text( + figure_path, + ( + '' + '' + 'Fake Figure' + "" + ), + ) + file_paths.append(relative_to_run(figure_path, paths.run_root)) + + if stage.number >= 7: + tex_path = paths.writing_dir / f"{stage.slug}_fake_paper.tex" + pdf_path = paths.artifacts_dir / f"{stage.slug}_fake_paper.pdf" + write_text( + tex_path, + ( + "\\documentclass{article}\n" + "% neurips style placeholder for fake operator validation\n" + "\\begin{document}\n" + "Fake NeurIPS-style manuscript placeholder.\n" + "\\end{document}\n" + ), + ) + write_text(pdf_path, "Fake PDF placeholder for validation.") + file_paths.extend( + [ + relative_to_run(tex_path, paths.run_root), + relative_to_run(pdf_path, paths.run_root), + ] + ) + + if stage.number >= 8: + review_path = paths.reviews_dir / f"{stage.slug}_fake_review.md" + write_text( + review_path, + ( + f"# Fake Review Artifact: {stage.stage_title}\n\n" + "This placeholder review artifact exists to validate the run layout." + ), + ) + file_paths.append(relative_to_run(review_path, paths.run_root)) stage_markdown = ( f"# Stage {stage.number:02d}: {stage.display_name}\n\n" @@ -441,8 +519,8 @@ def _run_fake( f"- Prompt length for this attempt was {len(prompt.split())} words.\n" "- No research claim from this stage should be treated as real output.\n\n" "## Files Produced\n" - f"- `{relative_to_run(note_path, paths.run_root)}`\n" - f"- `{relative_to_run(stage_tmp_path, paths.run_root)}`\n\n" + + "\n".join(f"- `{file_path}`" for file_path in file_paths) + + "\n\n" "## Suggestions for Refinement\n" "1. Replace fake mode with the real Claude operator and inspect the resulting artifacts.\n" "2. Tighten the stage prompt to better reflect the target of actual publication-grade work.\n" diff --git a/src/platform/__init__.py b/src/platform/__init__.py new file mode 100644 index 0000000..7994139 --- /dev/null +++ b/src/platform/__init__.py @@ -0,0 +1,2 @@ +"""Platform-alignment modules for AutoR.""" + diff --git a/src/platform/agents.py b/src/platform/agents.py new file mode 100644 index 0000000..ffe3c4d --- /dev/null +++ b/src/platform/agents.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Callable + +from .types import PipelineStage, ResearchTask, TaskResult + + +@dataclass +class CommandResearchAgent: + name: str + domain: str + pipeline_stages: list[PipelineStage] + handler: Callable[[ResearchTask], TaskResult] + citations: list[str] = field(default_factory=list) + + def run(self, task: ResearchTask) -> TaskResult: + return self.handler(task) + + +class AgentRuntimeManager: + def __init__(self) -> None: + self._agents: dict[str, CommandResearchAgent] = {} + + def register(self, agent: CommandResearchAgent) -> None: + self._agents[agent.name] = agent + + def list_agents(self) -> list[CommandResearchAgent]: + return list(self._agents.values()) + + def get(self, name: str) -> CommandResearchAgent: + return self._agents[name] + diff --git a/src/platform/fault_tolerance.py b/src/platform/fault_tolerance.py new file mode 100644 index 0000000..fc8ca6b --- /dev/null +++ b/src/platform/fault_tolerance.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import json +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Callable, TypeVar + + +T = TypeVar("T") + + +@dataclass(frozen=True) +class RetryPolicy: + attempts: int = 4 + delays_s: tuple[float, ...] = (1.0, 2.0, 4.0, 8.0) + + def run(self, fn: Callable[[], T]) -> T: + last_error: Exception | None = None + for index in range(self.attempts): + try: + return fn() + except Exception as exc: # noqa: BLE001 + last_error = exc + if index < len(self.delays_s): + time.sleep(self.delays_s[index]) + assert last_error is not None + raise last_error + + +@dataclass(frozen=True) +class FallbackChain: + providers: tuple[str, ...] + + def next_after(self, provider: str) -> str | None: + try: + index = self.providers.index(provider) + except ValueError: + return self.providers[0] if self.providers else None + if index + 1 >= len(self.providers): + return None + return self.providers[index + 1] + + +class ErrorClassifier: + def classify(self, error_text: str) -> str: + lowered = error_text.lower() + if any(token in lowered for token in ("429", "timeout", "temporarily", "connection reset")): + return "transient" + if any(token in lowered for token in ("permission", "auth", "forbidden")): + return "human_required" + return "non_recoverable" + + +@dataclass(frozen=True) +class CheckpointManager: + checkpoint_path: Path + + def save(self, payload: dict[str, object]) -> None: + self.checkpoint_path.parent.mkdir(parents=True, exist_ok=True) + self.checkpoint_path.write_text(json.dumps(payload, indent=2, ensure_ascii=True) + "\n", encoding="utf-8") + + def load(self) -> dict[str, object] | None: + if not self.checkpoint_path.exists(): + return None + return json.loads(self.checkpoint_path.read_text(encoding="utf-8")) + diff --git a/src/platform/foundry.py b/src/platform/foundry.py new file mode 100644 index 0000000..6d40199 --- /dev/null +++ b/src/platform/foundry.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum +from pathlib import Path + +from ..inspection import build_run_snapshot, list_run_artifacts +from ..utils import build_run_paths, read_text, write_text + + +class FoundryOutputFormat(str, Enum): + PAPER = "paper" + POSTER = "poster" + SLIDES = "slides" + SOCIAL = "social" + + +@dataclass(frozen=True) +class FoundryOutput: + output_format: FoundryOutputFormat + output_path: Path + summary: str + + +def generate_foundry_output(run_root: Path, output_format: FoundryOutputFormat) -> FoundryOutput: + paths = build_run_paths(run_root) + foundry_dir = paths.artifacts_dir / "foundry" + foundry_dir.mkdir(parents=True, exist_ok=True) + output_path = foundry_dir / f"{output_format.value}.md" + + snapshot = build_run_snapshot(run_root) + memory_text = read_text(paths.memory) if paths.memory.exists() else "" + artifacts = list_run_artifacts(run_root) + summary = ( + f"# Foundry Output: {output_format.value.title()}\n\n" + f"Run: {run_root.name}\n" + f"Status: {snapshot['status']}\n" + f"Approved stages: {snapshot['approved_stage_count']}\n" + f"Artifacts: {artifacts['total_files']}\n\n" + "## Approved Memory\n\n" + f"{memory_text.strip() or 'No memory recorded.'}\n\n" + "## Artifact Groups\n\n" + + "\n".join( + f"- {group}: {len(files)} file(s)" + for group, files in artifacts["groups"].items() + if files + ) + + "\n" + ) + write_text(output_path, summary) + return FoundryOutput(output_format=output_format, output_path=output_path, summary=summary) diff --git a/src/platform/messaging.py b/src/platform/messaging.py new file mode 100644 index 0000000..ca1ce67 --- /dev/null +++ b/src/platform/messaging.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass +from pathlib import Path + + +@dataclass(frozen=True) +class OutboundMessage: + channel: str + text: str + + +class FileMessageChannel: + def __init__(self, outbox_path: Path) -> None: + self.outbox_path = outbox_path + + def send(self, message: OutboundMessage) -> None: + self.outbox_path.parent.mkdir(parents=True, exist_ok=True) + with self.outbox_path.open("a", encoding="utf-8") as handle: + handle.write(json.dumps({"channel": message.channel, "text": message.text}, ensure_ascii=True) + "\n") + + +class MessagingHub: + def __init__(self, channels: list[FileMessageChannel] | None = None) -> None: + self.channels = channels or [] + + def broadcast(self, text: str) -> None: + for channel in self.channels: + channel.send(OutboundMessage(channel="file", text=text)) + diff --git a/src/platform/observability.py b/src/platform/observability.py new file mode 100644 index 0000000..7045693 --- /dev/null +++ b/src/platform/observability.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path + + +@dataclass(frozen=True) +class ObservabilityCollector: + run_root: Path + + @property + def observability_dir(self) -> Path: + return self.run_root / "observability" + + @property + def spans_path(self) -> Path: + return self.observability_dir / "spans.jsonl" + + @property + def metrics_path(self) -> Path: + return self.observability_dir / "metrics.jsonl" + + def emit_span(self, name: str, **payload: object) -> None: + self._append( + self.spans_path, + { + "timestamp": _now(), + "name": name, + **payload, + }, + ) + + def emit_metric(self, name: str, value: float, **tags: object) -> None: + self._append( + self.metrics_path, + { + "timestamp": _now(), + "name": name, + "value": value, + "tags": tags, + }, + ) + + def _append(self, path: Path, payload: dict[str, object]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("a", encoding="utf-8") as handle: + handle.write(json.dumps(payload, ensure_ascii=True) + "\n") + + +def _now() -> str: + return datetime.now().isoformat(timespec="seconds") + diff --git a/src/platform/orchestration.py b/src/platform/orchestration.py new file mode 100644 index 0000000..c50d98f --- /dev/null +++ b/src/platform/orchestration.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +from concurrent.futures import ThreadPoolExecutor +from dataclasses import dataclass +from typing import Callable + +from .types import ResearchTask, TaskResult + + +TaskRunner = Callable[[ResearchTask], TaskResult] + + +@dataclass(frozen=True) +class SequentialPattern: + name: str = "sequential" + + def execute(self, tasks: list[ResearchTask], runner: TaskRunner) -> list[TaskResult]: + return [runner(task) for task in tasks] + + +@dataclass(frozen=True) +class ParallelPattern: + name: str = "parallel" + max_workers: int = 4 + + def execute(self, tasks: list[ResearchTask], runner: TaskRunner) -> list[TaskResult]: + with ThreadPoolExecutor(max_workers=max(self.max_workers, 1)) as pool: + futures = [pool.submit(runner, task) for task in tasks] + return [future.result() for future in futures] + + +@dataclass(frozen=True) +class HierarchicalPattern: + name: str = "hierarchical" + + def execute( + self, + root_task: ResearchTask, + planner: Callable[[ResearchTask], list[ResearchTask]], + runner: TaskRunner, + ) -> list[TaskResult]: + subtasks = planner(root_task) + return [runner(task) for task in subtasks] + + +@dataclass(frozen=True) +class SwarmPattern: + name: str = "swarm" + rounds: int = 2 + + def execute(self, tasks: list[ResearchTask], runner: TaskRunner) -> list[TaskResult]: + results: list[TaskResult] = [] + for _ in range(max(self.rounds, 1)): + results = [runner(task) for task in tasks] + return results + diff --git a/src/platform/protocols.py b/src/platform/protocols.py new file mode 100644 index 0000000..9534a85 --- /dev/null +++ b/src/platform/protocols.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +from collections import defaultdict, deque +from dataclasses import dataclass +from typing import Callable + + +@dataclass(frozen=True) +class AgentMessage: + sender: str + recipient: str + message_type: str + payload: dict[str, object] + + +@dataclass(frozen=True) +class ToolInvocation: + tool_name: str + arguments: dict[str, object] + + +class AgentMessageBus: + def __init__(self) -> None: + self._queues: dict[str, deque[AgentMessage]] = defaultdict(deque) + + def send(self, message: AgentMessage) -> None: + self._queues[message.recipient].append(message) + + def drain(self, recipient: str) -> list[AgentMessage]: + queue = self._queues[recipient] + messages = list(queue) + queue.clear() + return messages + + +class ToolRegistry: + def __init__(self) -> None: + self._tools: dict[str, Callable[..., object]] = {} + + def register(self, name: str, fn: Callable[..., object]) -> None: + self._tools[name] = fn + + def invoke(self, invocation: ToolInvocation) -> object: + if invocation.tool_name not in self._tools: + raise KeyError(f"Unknown tool: {invocation.tool_name}") + return self._tools[invocation.tool_name](**invocation.arguments) + + +@dataclass(frozen=True) +class ProtocolBridge: + a2a: AgentMessageBus + mcp: ToolRegistry + diff --git a/src/platform/sandbox.py b/src/platform/sandbox.py new file mode 100644 index 0000000..daa6b1c --- /dev/null +++ b/src/platform/sandbox.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +import subprocess +from dataclasses import dataclass +from enum import Enum +from typing import Callable, TypeVar + + +T = TypeVar("T") + + +class SandboxLevel(str, Enum): + NONE = "none" + BASIC = "basic" + STRICT = "strict" + + +@dataclass(frozen=True) +class ExecutionPolicy: + level: SandboxLevel = SandboxLevel.NONE + cwd: str | None = None + + +class SandboxRunner: + def __init__(self, policy: ExecutionPolicy | None = None) -> None: + self.policy = policy or ExecutionPolicy() + + def run_callable(self, fn: Callable[[], T]) -> T: + return fn() + + def run_subprocess(self, command: list[str]) -> subprocess.CompletedProcess[str]: + return subprocess.run( + command, + cwd=self.policy.cwd, + capture_output=True, + text=True, + check=False, + ) + diff --git a/src/platform/security.py b/src/platform/security.py new file mode 100644 index 0000000..607bf02 --- /dev/null +++ b/src/platform/security.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +from dataclasses import dataclass + + +ROLE_SCOPES: dict[str, set[str]] = { + "admin": {"*"}, + "researcher": { + "task.create", + "task.read", + "task.approve", + "kb.read", + "kb.write", + "foundry.create", + "usage.read", + "system.read", + }, + "reviewer": { + "task.read", + "task.approve", + "kb.read", + "system.read", + }, + "node": { + "node.invoke", + "node.read", + "system.read", + }, + "orchestrator": { + "task.create", + "task.read", + "task.approve", + "kb.read", + "kb.write", + "agent.read", + "node.invoke", + "foundry.create", + "system.read", + }, +} + + +@dataclass(frozen=True) +class AccessContext: + role: str = "researcher" + + +def authorize_scope(role: str, scope: str) -> None: + granted = ROLE_SCOPES.get(role) + if granted is None: + raise PermissionError(f"Unknown role: {role}") + if "*" in granted or scope in granted: + return + raise PermissionError(f"Role '{role}' is not allowed to use scope '{scope}'.") + diff --git a/src/platform/semantic.py b/src/platform/semantic.py new file mode 100644 index 0000000..0ba6a8d --- /dev/null +++ b/src/platform/semantic.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import math +import re +from dataclasses import dataclass + + +TOKEN_PATTERN = re.compile(r"[a-z0-9_]{2,}") + + +@dataclass(frozen=True) +class SemanticMatch: + index: int + score: float + + +class SemanticIndexer: + def _tokenize(self, text: str) -> list[str]: + return TOKEN_PATTERN.findall(text.lower()) + + def vectorize(self, text: str) -> dict[str, float]: + weights: dict[str, float] = {} + tokens = self._tokenize(text) + total = len(tokens) or 1 + for token in tokens: + weights[token] = weights.get(token, 0.0) + 1.0 / total + return weights + + def cosine_similarity(self, left: dict[str, float], right: dict[str, float]) -> float: + if not left or not right: + return 0.0 + dot = sum(left.get(token, 0.0) * right.get(token, 0.0) for token in left) + left_norm = math.sqrt(sum(value * value for value in left.values())) + right_norm = math.sqrt(sum(value * value for value in right.values())) + if left_norm == 0.0 or right_norm == 0.0: + return 0.0 + return dot / (left_norm * right_norm) + + def rank(self, query: str, documents: list[str], limit: int = 5) -> list[SemanticMatch]: + query_vec = self.vectorize(query) + scored: list[SemanticMatch] = [] + for index, document in enumerate(documents): + score = self.cosine_similarity(query_vec, self.vectorize(document)) + if score > 0: + scored.append(SemanticMatch(index=index, score=score)) + scored.sort(key=lambda item: item.score, reverse=True) + return scored[:limit] + + +def rank_texts(query: str, documents: list[str], limit: int = 5) -> list[SemanticMatch]: + return SemanticIndexer().rank(query, documents, limit=limit) + diff --git a/src/platform/types.py b/src/platform/types.py new file mode 100644 index 0000000..b842ef0 --- /dev/null +++ b/src/platform/types.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +from dataclasses import dataclass, field + + +PIPELINE_STAGES = ( + "literature_survey", + "hypothesis_generation", + "study_design", + "implementation", + "experimentation", + "analysis", + "writing", + "dissemination", +) + +PipelineStage = str + + +@dataclass(frozen=True) +class Citation: + title: str + source: str + identifier: str = "" + + +@dataclass(frozen=True) +class ProvenanceRecord: + agent_name: str + action: str + evidence: list[str] = field(default_factory=list) + notes: str = "" + + +@dataclass(frozen=True) +class ResearchTask: + task_id: str + title: str + goal: str + pipeline_stage: PipelineStage + project_id: str + kb_context: list[str] = field(default_factory=list) + human_gate_required: bool = True + citations: list[Citation] = field(default_factory=list) + reproducibility_notes: list[str] = field(default_factory=list) + + +@dataclass(frozen=True) +class TaskResult: + task_id: str + output: str + artifacts: list[str] = field(default_factory=list) + provenance: list[ProvenanceRecord] = field(default_factory=list) + diff --git a/src/run_state.py b/src/run_state.py new file mode 100644 index 0000000..38aeee6 --- /dev/null +++ b/src/run_state.py @@ -0,0 +1,245 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from datetime import datetime +from pathlib import Path + +from .utils import RunPaths, StageSpec + + +RUN_STATUS_PENDING = "PENDING" +RUN_STATUS_RUNNING = "RUNNING" +RUN_STATUS_HUMAN_REVIEW = "HUMAN_REVIEW" +RUN_STATUS_COMPLETED = "COMPLETED" +RUN_STATUS_FAILED = "FAILED" +RUN_STATUS_CANCELLED = "CANCELLED" + + +@dataclass(frozen=True) +class RunState: + run_id: str + status: str + created_at: str + updated_at: str + last_event: str + current_stage_slug: str | None = None + current_stage_title: str | None = None + current_pattern: str | None = None + current_attempt: int | None = None + human_review_required: bool = True + waiting_for_human_review: bool = False + approved_stages: list[dict[str, str]] = field(default_factory=list) + last_error: str | None = None + completed_at: str | None = None + + def to_dict(self) -> dict[str, object]: + return { + "run_id": self.run_id, + "status": self.status, + "created_at": self.created_at, + "updated_at": self.updated_at, + "last_event": self.last_event, + "current_stage_slug": self.current_stage_slug, + "current_stage_title": self.current_stage_title, + "current_pattern": self.current_pattern, + "current_attempt": self.current_attempt, + "human_review_required": self.human_review_required, + "waiting_for_human_review": self.waiting_for_human_review, + "approved_stages": list(self.approved_stages), + "last_error": self.last_error, + "completed_at": self.completed_at, + } + + @classmethod + def from_dict(cls, payload: dict[str, object]) -> "RunState": + approved_stages = payload.get("approved_stages", []) + if not isinstance(approved_stages, list): + approved_stages = [] + + return cls( + run_id=str(payload.get("run_id") or ""), + status=str(payload.get("status") or RUN_STATUS_PENDING), + created_at=str(payload.get("created_at") or _now()), + updated_at=str(payload.get("updated_at") or _now()), + last_event=str(payload.get("last_event") or "run.created"), + current_stage_slug=str(payload["current_stage_slug"]) if payload.get("current_stage_slug") is not None else None, + current_stage_title=str(payload["current_stage_title"]) if payload.get("current_stage_title") is not None else None, + current_pattern=str(payload["current_pattern"]) if payload.get("current_pattern") is not None else None, + current_attempt=int(payload["current_attempt"]) if payload.get("current_attempt") is not None else None, + human_review_required=bool(payload.get("human_review_required", True)), + waiting_for_human_review=bool(payload.get("waiting_for_human_review", False)), + approved_stages=[dict(item) for item in approved_stages if isinstance(item, dict)], + last_error=str(payload["last_error"]) if payload.get("last_error") is not None else None, + completed_at=str(payload["completed_at"]) if payload.get("completed_at") is not None else None, + ) + + +def _now() -> str: + return datetime.now().isoformat(timespec="seconds") + + +def _write_run_state(path: Path, state: RunState) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(state.to_dict(), indent=2, ensure_ascii=True) + "\n", encoding="utf-8") + + +def load_run_state(path: Path) -> RunState | None: + if not path.exists(): + return None + + text = path.read_text(encoding="utf-8").strip() + if not text: + return None + return RunState.from_dict(json.loads(text)) + + +def initialize_run_state(paths: RunPaths) -> RunState: + timestamp = _now() + state = RunState( + run_id=paths.run_root.name, + status=RUN_STATUS_PENDING, + created_at=timestamp, + updated_at=timestamp, + last_event="run.created", + ) + _write_run_state(paths.run_state, state) + return state + + +def ensure_run_state(paths: RunPaths) -> RunState: + state = load_run_state(paths.run_state) + if state is not None: + return state + return initialize_run_state(paths) + + +def _update_run_state(paths: RunPaths, **changes: object) -> RunState: + state = ensure_run_state(paths) + payload = state.to_dict() + payload.update(changes) + payload["updated_at"] = _now() + next_state = RunState.from_dict(payload) + _write_run_state(paths.run_state, next_state) + return next_state + + +def mark_stage_running(paths: RunPaths, stage: StageSpec, attempt_no: int) -> RunState: + return _update_run_state( + paths, + status=RUN_STATUS_RUNNING, + current_stage_slug=stage.slug, + current_stage_title=stage.stage_title, + current_pattern=stage.orchestration_pattern, + current_attempt=attempt_no, + waiting_for_human_review=False, + last_event="stage.started", + last_error=None, + ) + + +def mark_stage_human_review(paths: RunPaths, stage: StageSpec, attempt_no: int) -> RunState: + return _update_run_state( + paths, + status=RUN_STATUS_HUMAN_REVIEW, + current_stage_slug=stage.slug, + current_stage_title=stage.stage_title, + current_pattern=stage.orchestration_pattern, + current_attempt=attempt_no, + waiting_for_human_review=True, + last_event="stage.awaiting_human_review", + last_error=None, + ) + + +def mark_stage_approved(paths: RunPaths, stage: StageSpec) -> RunState: + state = ensure_run_state(paths) + approved_stages = list(state.approved_stages) + if not any(item.get("slug") == stage.slug for item in approved_stages): + approved_stages.append( + { + "slug": stage.slug, + "title": stage.stage_title, + "approved_at": _now(), + } + ) + + return _update_run_state( + paths, + status=RUN_STATUS_PENDING, + current_stage_slug=None, + current_stage_title=None, + current_pattern=None, + current_attempt=None, + waiting_for_human_review=False, + approved_stages=approved_stages, + last_event="stage.approved", + last_error=None, + ) + + +def mark_run_completed(paths: RunPaths) -> RunState: + completed_at = _now() + return _update_run_state( + paths, + status=RUN_STATUS_COMPLETED, + current_stage_slug=None, + current_stage_title=None, + current_pattern=None, + current_attempt=None, + waiting_for_human_review=False, + completed_at=completed_at, + last_event="run.completed", + last_error=None, + ) + + +def mark_run_cancelled(paths: RunPaths, stage: StageSpec | None = None) -> RunState: + return _update_run_state( + paths, + status=RUN_STATUS_CANCELLED, + current_stage_slug=stage.slug if stage else None, + current_stage_title=stage.stage_title if stage else None, + current_pattern=stage.orchestration_pattern if stage else None, + waiting_for_human_review=False, + last_event="run.cancelled", + ) + + +def mark_run_failed(paths: RunPaths, error: str, stage: StageSpec | None = None) -> RunState: + return _update_run_state( + paths, + status=RUN_STATUS_FAILED, + current_stage_slug=stage.slug if stage else None, + current_stage_title=stage.stage_title if stage else None, + current_pattern=stage.orchestration_pattern if stage else None, + waiting_for_human_review=False, + last_event="run.failed", + last_error=error.strip(), + ) + + +def format_run_state(state: RunState) -> str: + lines = [ + f"Run: {state.run_id}", + f"Status: {state.status}", + f"Last Event: {state.last_event}", + f"Updated At: {state.updated_at}", + ] + + if state.current_stage_title: + lines.append(f"Current Stage: {state.current_stage_title}") + if state.current_pattern: + lines.append(f"Stage Pattern: {state.current_pattern}") + if state.current_attempt is not None: + lines.append(f"Current Attempt: {state.current_attempt}") + + lines.append(f"Waiting For Human Review: {state.waiting_for_human_review}") + lines.append(f"Approved Stages: {len(state.approved_stages)}") + + if state.last_error: + lines.append(f"Last Error: {state.last_error}") + if state.completed_at: + lines.append(f"Completed At: {state.completed_at}") + + return "\n".join(lines) diff --git a/src/utils.py b/src/utils.py index e6711a6..20d6666 100644 --- a/src/utils.py +++ b/src/utils.py @@ -13,6 +13,9 @@ class StageSpec: number: int slug: str display_name: str + orchestration_pattern: str + execution_flow: str + characteristics: str @property def filename(self) -> str: @@ -22,6 +25,14 @@ def filename(self) -> str: def stage_title(self) -> str: return f"Stage {self.number:02d}: {self.display_name}" + @property + def pattern_summary(self) -> str: + return ( + f"Pattern: {self.orchestration_pattern}\n" + f"Execution Flow: {self.execution_flow}\n" + f"Characteristics: {self.characteristics}" + ) + @dataclass(frozen=True) class RunPaths: @@ -30,9 +41,13 @@ class RunPaths: memory: Path logs: Path logs_raw: Path + run_state: Path + control_dir: Path prompt_cache_dir: Path operator_state_dir: Path stages_dir: Path + knowledge_base_dir: Path + knowledge_base_entries: Path workspace_root: Path literature_dir: Path code_dir: Path @@ -65,14 +80,70 @@ class OperatorResult: STAGES: list[StageSpec] = [ - StageSpec(1, "01_literature_survey", "Literature Survey"), - StageSpec(2, "02_hypothesis_generation", "Hypothesis Generation"), - StageSpec(3, "03_study_design", "Study Design"), - StageSpec(4, "04_implementation", "Implementation"), - StageSpec(5, "05_experimentation", "Experimentation"), - StageSpec(6, "06_analysis", "Analysis"), - StageSpec(7, "07_writing", "Writing"), - StageSpec(8, "08_dissemination", "Dissemination"), + StageSpec( + 1, + "01_literature_survey", + "Literature Survey", + "Parallel", + "Fan out literature search across multiple sources, deduplicate, and merge into a structured evidence map.", + "Low latency, broad coverage, and strict merge discipline for citations and evidence.", + ), + StageSpec( + 2, + "02_hypothesis_generation", + "Hypothesis Generation", + "Swarm Debate", + "Propose, critique, and refine candidate hypotheses through adversarial iteration before convergence.", + "Scalable ideation, quality pressure through critique, and explicit consensus on the best direction.", + ), + StageSpec( + 3, + "03_study_design", + "Study Design", + "Hierarchical", + "Decompose design work into protocol, variables, evaluation, and risk planning under a coordinating planner.", + "Structured decomposition with strong coherence control and explicit planning sub-outputs.", + ), + StageSpec( + 4, + "04_implementation", + "Implementation", + "Sequential", + "Progress through environment setup, dependency resolution, coding, testing, and validation in order.", + "Shared context between steps, fail-fast behavior, and checkpointable implementation progress.", + ), + StageSpec( + 5, + "05_experimentation", + "Experimentation", + "Sequential + Parallel", + "Run sequential setup and checkpointing, then parallelize independent ablations and experiment branches where possible.", + "Supports long-running autonomy, explicit recovery, and parallel experiment execution when runs are independent.", + ), + StageSpec( + 6, + "06_analysis", + "Analysis", + "Hierarchical", + "Delegate statistics, visualization, and interpretation into specialized analysis sub-problems before aggregating conclusions.", + "Bottom-up aggregation with specialized analysis roles and explicit synthesis of findings.", + ), + StageSpec( + 7, + "07_writing", + "Writing", + "Sequential", + "Move from outline to drafting, citation insertion, formatting compliance, and consistency review in order.", + "Iterative refinement with strong dependency on validated upstream artifacts and review readiness.", + ), + StageSpec( + 8, + "08_dissemination", + "Dissemination", + "Parallel", + "Generate posters, slides, summaries, and outreach artifacts concurrently from the same approved source materials.", + "Independent outputs with a shared source of truth and concurrent artifact generation.", + ), ] REQUIRED_STAGE_HEADINGS = [ @@ -137,9 +208,13 @@ def build_run_paths(run_root: Path) -> RunPaths: memory=run_root / "memory.md", logs=run_root / "logs.txt", logs_raw=run_root / "logs_raw.jsonl", + run_state=run_root / "run_state.json", + control_dir=run_root / "control", prompt_cache_dir=run_root / "prompt_cache", operator_state_dir=run_root / "operator_state", stages_dir=run_root / "stages", + knowledge_base_dir=run_root / "knowledge_base", + knowledge_base_entries=run_root / "knowledge_base" / "entries.jsonl", workspace_root=workspace_root, literature_dir=workspace_root / "literature", code_dir=workspace_root / "code", @@ -155,15 +230,23 @@ def build_run_paths(run_root: Path) -> RunPaths: def ensure_run_layout(paths: RunPaths) -> None: paths.run_root.mkdir(parents=True, exist_ok=True) + paths.control_dir.mkdir(parents=True, exist_ok=True) paths.prompt_cache_dir.mkdir(parents=True, exist_ok=True) paths.operator_state_dir.mkdir(parents=True, exist_ok=True) paths.stages_dir.mkdir(parents=True, exist_ok=True) + paths.knowledge_base_dir.mkdir(parents=True, exist_ok=True) paths.workspace_root.mkdir(parents=True, exist_ok=True) for directory in workspace_dirs(paths): directory.mkdir(parents=True, exist_ok=True) - for file_path in (paths.user_input, paths.memory, paths.logs, paths.logs_raw): + for file_path in ( + paths.user_input, + paths.memory, + paths.logs, + paths.logs_raw, + paths.knowledge_base_entries, + ): file_path.parent.mkdir(parents=True, exist_ok=True) file_path.touch(exist_ok=True) @@ -282,11 +365,18 @@ def build_prompt( stage_template: str, user_request: str, approved_memory: str, + kb_context: str, revision_feedback: str | None, ) -> str: sections = [ "# Stage Instructions", stage_template.strip(), + "# Research Pipeline Mapping", + ( + "Use the following ClawDock-aligned orchestration pattern as planning guidance for this stage. " + "AutoR still executes one stage attempt at a time, but the internal work should reflect this pattern." + ), + stage.pattern_summary, "# Required Stage Summary Format", ( "You must create or overwrite the stage summary markdown file using exactly the " @@ -310,6 +400,8 @@ def build_prompt( user_request.strip(), "# Approved Memory", approved_memory.strip() or "_None yet._", + "# Knowledge Base Context", + kb_context.strip() or "No relevant knowledge-base entries yet.", "# Revision Feedback", revision_feedback.strip() if revision_feedback else "None.", ] @@ -320,6 +412,7 @@ def build_continuation_prompt( stage: StageSpec, stage_template: str, paths: RunPaths, + kb_context: str, revision_feedback: str | None, ) -> str: current_draft = paths.stage_tmp_file(stage) @@ -333,6 +426,12 @@ def build_continuation_prompt( ), "# Stage Instructions", stage_template.strip(), + "# Research Pipeline Mapping", + ( + "Use the following ClawDock-aligned orchestration pattern as planning guidance for this stage. " + "Continue the current work in a way that matches the intended research execution style." + ), + stage.pattern_summary, "# Required Stage Summary Format", ( "You must create or overwrite the stage summary markdown file using exactly the " @@ -352,6 +451,8 @@ def build_continuation_prompt( "8. Do not leave placeholder text such as [In progress], [Pending], [TODO], [TBD], or similar unfinished markers.\n" "9. If the existing stage work is partially correct, keep the correct parts and extend them rather than replacing them blindly." ), + "# Knowledge Base Context", + kb_context.strip() or "No relevant knowledge-base entries yet.", "# New Feedback", revision_feedback.strip() if revision_feedback @@ -599,6 +700,10 @@ def _extract_path_references(text: str) -> list[str]: return paths +def extract_path_references(text: str) -> list[str]: + return _extract_path_references(text) + + def _existing_files(directory: Path) -> list[Path]: if not directory.exists(): return [] diff --git a/tests/test_clawdock_alignment.py b/tests/test_clawdock_alignment.py new file mode 100644 index 0000000..8d51c64 --- /dev/null +++ b/tests/test_clawdock_alignment.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +import io +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + +from src.knowledge_base import initialize_knowledge_base, load_kb_entries, search_knowledge_base, write_kb_entry +from src.manager import ResearchManager +from src.operator import ClaudeOperator +from src.run_state import RUN_STATUS_COMPLETED, load_run_state +from src.utils import STAGES, build_prompt, build_run_paths, ensure_run_layout + + +class ClawDockAlignmentTests(unittest.TestCase): + def test_build_prompt_includes_pattern_and_kb_context(self) -> None: + stage = STAGES[0] + prompt = build_prompt( + stage=stage, + stage_template="Stage template body", + user_request="Survey recent work on retrieval.", + approved_memory="Approved memory body", + kb_context="1. [user_goal] Original user goal", + revision_feedback=None, + ) + + self.assertIn("# Research Pipeline Mapping", prompt) + self.assertIn(stage.orchestration_pattern, prompt) + self.assertIn("# Knowledge Base Context", prompt) + self.assertIn("Original user goal", prompt) + + def test_kb_search_prioritizes_matching_stage(self) -> None: + with tempfile.TemporaryDirectory() as tmp_dir: + run_root = Path(tmp_dir) / "run" + paths = build_run_paths(run_root) + ensure_run_layout(paths) + initialize_knowledge_base(paths, "Research goal") + + write_kb_entry( + paths, + entry_type="stage_approved", + title="Literature survey approved", + summary="Protein folding literature synthesis", + content="Discussed literature evidence for protein folding.", + stage=STAGES[0], + tags=["literature"], + ) + write_kb_entry( + paths, + entry_type="stage_approved", + title="Hypothesis approved", + summary="Hypothesis for protein folding experiments", + content="Outlined a protein folding hypothesis.", + stage=STAGES[1], + tags=["hypothesis"], + ) + + results = search_knowledge_base( + paths.knowledge_base_entries, + query="protein folding literature", + limit=3, + stage=STAGES[0], + ) + + self.assertGreaterEqual(len(results), 1) + self.assertEqual(results[0].entry.stage_slug, STAGES[0].slug) + + def test_fake_run_completes_with_state_and_kb(self) -> None: + repo_root = Path(__file__).resolve().parents[1] + + with tempfile.TemporaryDirectory() as tmp_dir: + runs_dir = Path(tmp_dir) / "runs" + manager = ResearchManager( + project_root=repo_root, + runs_dir=runs_dir, + operator=ClaudeOperator(fake_mode=True, output_stream=io.StringIO()), + output_stream=io.StringIO(), + ) + + with patch("builtins.input", side_effect=["5"] * len(STAGES)): + completed = manager.run("Build a reproducible research workflow.") + + self.assertTrue(completed) + + run_roots = sorted(path for path in runs_dir.iterdir() if path.is_dir()) + self.assertEqual(len(run_roots), 1) + paths = build_run_paths(run_roots[0]) + + state = load_run_state(paths.run_state) + self.assertIsNotNone(state) + assert state is not None + self.assertEqual(state.status, RUN_STATUS_COMPLETED) + self.assertEqual(len(state.approved_stages), len(STAGES)) + + entries = load_kb_entries(paths.knowledge_base_entries) + entry_types = [entry.entry_type for entry in entries] + self.assertIn("run_completed", entry_types) + self.assertEqual(entry_types.count("stage_approved"), len(STAGES)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_platform_alignment.py b/tests/test_platform_alignment.py new file mode 100644 index 0000000..6d41fb4 --- /dev/null +++ b/tests/test_platform_alignment.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +import io +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + +from src.manager import ResearchManager +from src.operator import ClaudeOperator +from src.platform.foundry import FoundryOutputFormat, generate_foundry_output +from src.platform.orchestration import HierarchicalPattern, ParallelPattern, SequentialPattern, SwarmPattern +from src.platform.security import ROLE_SCOPES, authorize_scope +from src.platform.semantic import SemanticIndexer +from src.platform.types import ResearchTask, TaskResult +from src.utils import STAGES + + +class PlatformAlignmentTests(unittest.TestCase): + def test_semantic_indexer_ranks_relevant_document_first(self) -> None: + matches = SemanticIndexer().rank( + "protein folding literature evidence", + [ + "agent orchestration and task routing", + "protein folding literature survey and evidence extraction", + "deployment and docker compose instructions", + ], + limit=3, + ) + + self.assertGreaterEqual(len(matches), 1) + self.assertEqual(matches[0].index, 1) + + def test_orchestration_patterns_execute_tasks(self) -> None: + tasks = [ + ResearchTask(task_id="1", title="A", goal="a", pipeline_stage="analysis", project_id="run"), + ResearchTask(task_id="2", title="B", goal="b", pipeline_stage="analysis", project_id="run"), + ] + + def runner(task: ResearchTask) -> TaskResult: + return TaskResult(task_id=task.task_id, output=f"done:{task.title}") + + sequential_results = SequentialPattern().execute(tasks, runner) + parallel_results = ParallelPattern(max_workers=2).execute(tasks, runner) + swarm_results = SwarmPattern(rounds=2).execute(tasks, runner) + hierarchical_results = HierarchicalPattern().execute( + tasks[0], + planner=lambda root: tasks, + runner=runner, + ) + + self.assertEqual([item.output for item in sequential_results], ["done:A", "done:B"]) + self.assertEqual(sorted(item.output for item in parallel_results), ["done:A", "done:B"]) + self.assertEqual(len(swarm_results), 2) + self.assertEqual(len(hierarchical_results), 2) + + def test_foundry_generation_writes_output(self) -> None: + repo_root = Path(__file__).resolve().parents[1] + + with tempfile.TemporaryDirectory() as tmp_dir: + runs_dir = Path(tmp_dir) / "runs" + manager = ResearchManager( + project_root=repo_root, + runs_dir=runs_dir, + operator=ClaudeOperator(fake_mode=True, output_stream=io.StringIO()), + output_stream=io.StringIO(), + ) + + with patch("builtins.input", side_effect=["5"] * len(STAGES)): + self.assertTrue(manager.run("Generate a foundry-ready package.")) + + run_root = next(path for path in runs_dir.iterdir() if path.is_dir()) + output = generate_foundry_output(run_root, FoundryOutputFormat.PAPER) + self.assertEqual(output.output_format, FoundryOutputFormat.PAPER) + self.assertIn("paper.md", str(output.output_path)) + self.assertIn("Foundry Output: Paper", output.summary) + + def test_security_role_map_and_authorization(self) -> None: + self.assertIn("researcher", ROLE_SCOPES) + authorize_scope("researcher", "task.read") + with self.assertRaises(PermissionError): + authorize_scope("unknown-role", "task.read") + + +if __name__ == "__main__": + unittest.main() From 1ceea7d96844405925891bd699ccf68aa060461d Mon Sep 17 00:00:00 2001 From: Zefan Cai <67849306+Zefan-Cai@users.noreply.github.com> Date: Mon, 30 Mar 2026 22:31:28 -0500 Subject: [PATCH 2/2] Implement CLI manifest workflow and publication packages --- README.md | 20 +- main.py | 13 +- src/inspection.py | 36 +- src/manager.py | 310 +++++++++++------- src/manifest.py | 544 +++++++++++++++++++++++++++++++ src/operator.py | 165 +++++++++- src/platform/debate.py | 157 +++++++++ src/platform/foundry.py | 278 ++++++++++++++++ src/platform/literature.py | 271 +++++++++++++++ src/platform/playbook.py | 83 +++++ src/platform/router.py | 269 +++++++++++++++ src/run_state.py | 208 ++---------- src/utils.py | 31 +- tests/test_clawdock_alignment.py | 63 +++- tests/test_operator_recovery.py | 115 +++++++ tests/test_platform_alignment.py | 74 ++++- 16 files changed, 2297 insertions(+), 340 deletions(-) create mode 100644 src/manifest.py create mode 100644 src/platform/debate.py create mode 100644 src/platform/literature.py create mode 100644 src/platform/playbook.py create mode 100644 src/platform/router.py create mode 100644 tests/test_operator_recovery.py diff --git a/README.md b/README.md index 002dc3a..3b92077 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ File boundaries: ## Workspace Structure -Each run contains `user_input.txt`, `memory.md`, `knowledge_base/`, `run_state.json`, `prompt_cache/`, `operator_state/`, `stages/`, `workspace/`, `logs.txt`, and `logs_raw.jsonl`. The substantive research payload lives in `workspace/`. +Each run contains `user_input.txt`, `memory.md`, `knowledge_base/`, `run_manifest.json`, `prompt_cache/`, `operator_state/`, `stages/`, `workspace/`, `logs.txt`, and `logs_raw.jsonl`. The substantive research payload lives in `workspace/`. ```mermaid flowchart TD @@ -259,6 +259,12 @@ Redo from a specific stage inside the same run: python main.py --resume-run 20260329_210252 --redo-stage 03 ``` +Roll back to a specific stage and mark downstream stages stale: + +```bash +python main.py --resume-run 20260329_210252 --rollback-stage 03 +``` + `--resume-run ... --redo-stage ...` continues inside the existing run directory. It does not create a new run. Valid stage identifiers include `03`, `3`, and `03_study_design`. @@ -278,6 +284,10 @@ python main.py --resume-run latest --kb-search "hypothesis evidence" --kb-limit Platform-alignment layer under `src/platform/` now includes: - orchestration patterns: sequential, parallel, hierarchical, swarm +- research pipeline router and stage-specific workflow engines +- literature-source adapters plus citation validation +- multi-agent hypothesis debate workflow +- overnight playbook execution with self-monitoring primitives - A2A/MCP-style protocol bridge primitives - agent runtime manager and command-style research agents - semantic retrieval for Knowledge Base ranking @@ -303,10 +313,16 @@ Included: - draft-to-final stage promotion - resume and redo-stage support - artifact-level validation -- structured `run_state.json` lifecycle tracking +- manifest-first lifecycle tracking via `run_manifest.json` - per-run Knowledge Base with prompt injection and CLI search - stage-pattern metadata aligned with the ClawDock research design - platform-alignment modules for orchestration, protocols, runtimes, Foundry, observability, security, sandboxing, messaging, and deployment +- machine-readable `run_manifest.json` stage state tracking +- stage handoff summaries under `handoff/` +- cross-stage rollback with downstream stale invalidation +- operator attempt/session recovery state under `operator_state/` +- Stage 07 paper package generation with LaTeX/bib/table/checklist/PDF artifacts +- Stage 08 review and dissemination package generation with readiness/release materials Out of scope: diff --git a/main.py b/main.py index 21f0c0b..3752f8b 100644 --- a/main.py +++ b/main.py @@ -38,6 +38,10 @@ def parse_args() -> argparse.Namespace: "--redo-stage", help="When resuming a run, restart from this stage slug or stage number (for example '06_analysis' or '6').", ) + parser.add_argument( + "--rollback-stage", + help="When resuming a run, roll back to this stage and mark downstream stages stale before continuing.", + ) parser.add_argument( "--show-status", action="store_true", @@ -123,8 +127,8 @@ def main() -> int: if args.resume_run: run_root = resolve_resume_run(runs_dir, args.resume_run) if args.show_status or args.kb_search: - if args.redo_stage: - raise ValueError("--redo-stage cannot be combined with --show-status or --kb-search.") + if args.redo_stage or args.rollback_stage: + raise ValueError("--redo-stage/--rollback-stage cannot be combined with --show-status or --kb-search.") if args.show_status: print(manager.describe_run_status(run_root)) if args.kb_search: @@ -132,7 +136,10 @@ def main() -> int: return 0 start_stage = resolve_stage(args.redo_stage) - manager.resume_run(run_root, start_stage=start_stage) + rollback_stage = resolve_stage(args.rollback_stage) + if start_stage is not None and rollback_stage is not None: + raise ValueError("--redo-stage and --rollback-stage are mutually exclusive.") + manager.resume_run(run_root, start_stage=start_stage, rollback_stage=rollback_stage) return 0 goal = args.goal.strip() if args.goal else read_user_goal() diff --git a/src/inspection.py b/src/inspection.py index 3841a53..3cafc2b 100644 --- a/src/inspection.py +++ b/src/inspection.py @@ -5,7 +5,7 @@ from pathlib import Path from .knowledge_base import KBSearchResult, load_kb_entries, search_knowledge_base -from .run_state import RunState, load_run_state +from .manifest import load_run_manifest from .utils import STAGES, StageSpec, approved_stage_summaries, build_run_paths, read_text @@ -21,13 +21,13 @@ def run_exists(runs_dir: Path, run_id: str) -> bool: def build_run_snapshot(run_root: Path) -> dict[str, object]: paths = build_run_paths(run_root) - run_state = load_run_state(paths.run_state) + manifest = load_run_manifest(paths.run_manifest) memory_text = read_text(paths.memory) if paths.memory.exists() else "" approved_memory = approved_stage_summaries(memory_text) approved_titles = { - item.get("title", "") - for item in (run_state.approved_stages if run_state else []) - if isinstance(item, dict) + entry.title + for entry in (manifest.stages if manifest else []) + if entry.approved } kb_entries = load_kb_entries(paths.knowledge_base_entries) @@ -53,17 +53,17 @@ def build_run_snapshot(run_root: Path) -> dict[str, object]: snapshot = { "run_id": run_root.name, "run_root": str(run_root), - "status": run_state.status if run_state else "UNKNOWN", - "last_event": run_state.last_event if run_state else None, - "updated_at": run_state.updated_at if run_state else None, - "current_stage_slug": run_state.current_stage_slug if run_state else None, - "current_stage_title": run_state.current_stage_title if run_state else None, - "current_pattern": run_state.current_pattern if run_state else None, - "current_attempt": run_state.current_attempt if run_state else None, - "waiting_for_human_review": run_state.waiting_for_human_review if run_state else False, - "last_error": run_state.last_error if run_state else None, - "completed_at": run_state.completed_at if run_state else None, - "approved_stage_count": len(run_state.approved_stages) if run_state else 0, + "status": manifest.run_status.upper() if manifest else "UNKNOWN", + "last_event": manifest.last_event if manifest else None, + "updated_at": manifest.updated_at if manifest else None, + "current_stage_slug": manifest.current_stage_slug if manifest else None, + "current_stage_title": next((entry.title for entry in manifest.stages if entry.slug == manifest.current_stage_slug), None) if manifest and manifest.current_stage_slug else None, + "current_pattern": None, + "current_attempt": next((entry.attempt_count for entry in manifest.stages if entry.slug == manifest.current_stage_slug), None) if manifest and manifest.current_stage_slug else None, + "waiting_for_human_review": manifest.run_status == "human_review" if manifest else False, + "last_error": manifest.last_error if manifest else None, + "completed_at": manifest.completed_at if manifest else None, + "approved_stage_count": len([entry for entry in manifest.stages if entry.approved]) if manifest else 0, "knowledge_base_entry_count": len(kb_entries), "knowledge_base_entry_types": _count_entry_types(kb_entries), "stages": stage_statuses, @@ -89,9 +89,9 @@ def list_run_summaries(runs_dir: Path) -> list[dict[str, object]]: return summaries -def load_run_state_snapshot(run_root: Path) -> RunState | None: +def load_run_state_snapshot(run_root: Path) -> object | None: paths = build_run_paths(run_root) - return load_run_state(paths.run_state) + return load_run_manifest(paths.run_manifest) def list_run_kb_entries( diff --git a/src/manager.py b/src/manager.py index 9f439f6..0a5bc9d 100644 --- a/src/manager.py +++ b/src/manager.py @@ -3,6 +3,7 @@ import json import shutil import sys +from datetime import datetime from pathlib import Path from typing import TextIO @@ -13,28 +14,35 @@ search_knowledge_base, write_kb_entry, ) +from .manifest import ( + build_manifest_context, + build_handoff_context, + ensure_run_manifest, + format_manifest_status, + initialize_run_manifest, + load_run_manifest, + mark_stage_approved_manifest, + mark_stage_failed_manifest, + mark_stage_human_review_manifest, + mark_stage_running_manifest, + rebuild_memory_from_manifest, + rollback_to_stage, + sync_stage_session_id, + update_manifest_run_status, + write_stage_handoff, +) from .operator import ClaudeOperator from .platform.fault_tolerance import CheckpointManager from .platform.observability import ObservabilityCollector -from .platform.orchestration import HierarchicalPattern, ParallelPattern, SequentialPattern, SwarmPattern -from .platform.types import PipelineStage, ProvenanceRecord, ResearchTask, TaskResult +from .platform.router import ResearchPipelineRouter from .run_state import ( - ensure_run_state, + derive_run_state, format_run_state, - initialize_run_state, - load_run_state, - mark_run_cancelled, - mark_run_completed, - mark_run_failed, - mark_stage_approved, - mark_stage_human_review, - mark_stage_running, ) from .utils import ( STAGES, RunPaths, StageSpec, - append_approved_stage_summary, append_log_entry, build_continuation_prompt, build_prompt, @@ -67,6 +75,7 @@ def __init__( self.project_root = project_root self.runs_dir = runs_dir self.operator = operator + self.router = ResearchPipelineRouter() self.prompt_dir = self.project_root / "src" / "prompts" self.output_stream = output_stream @@ -75,7 +84,12 @@ def run(self, user_goal: str) -> bool: self._print(f"Run created at: {paths.run_root}") return self.execute_run_paths(paths) - def resume_run(self, run_root: Path, start_stage: StageSpec | None = None) -> bool: + def resume_run( + self, + run_root: Path, + start_stage: StageSpec | None = None, + rollback_stage: StageSpec | None = None, + ) -> bool: paths = build_run_paths(run_root) ensure_run_layout(paths) if not paths.user_input.exists(): @@ -84,13 +98,21 @@ def resume_run(self, run_root: Path, start_stage: StageSpec | None = None) -> bo raise FileNotFoundError(f"Missing memory.md in run: {run_root}") initialize_knowledge_base(paths, read_text(paths.user_input)) - ensure_run_state(paths) + ensure_run_manifest(paths) + + if rollback_stage is not None: + self._print(self._format_rollback_preview(paths, rollback_stage)) + rollback_to_stage(paths, rollback_stage) + start_stage = rollback_stage + elif start_stage is not None: + self._auto_rollback_if_needed(paths, start_stage) append_log_entry( paths.logs, "run_resume", f"Resumed run at: {paths.run_root}" - + (f"\nRequested start stage: {start_stage.stage_title}" if start_stage else ""), + + (f"\nRequested start stage: {start_stage.stage_title}" if start_stage else "") + + (f"\nRequested rollback stage: {rollback_stage.stage_title}" if rollback_stage else ""), ) self._print(f"Resuming run at: {paths.run_root}") if start_stage: @@ -110,7 +132,13 @@ def execute_run_paths( try: return self._run_from_paths(paths, start_stage=start_stage) except Exception as exc: - mark_run_failed(paths, error=str(exc), stage=start_stage) + update_manifest_run_status( + paths, + run_status="failed", + last_event="run.failed", + last_error=str(exc), + current_stage_slug=start_stage.slug if start_stage else None, + ) write_kb_entry( paths, entry_type="run_failed", @@ -133,7 +161,12 @@ def _run_from_paths(self, paths: RunPaths, start_stage: StageSpec | None = None) "run_aborted", f"Run aborted during {stage.stage_title}.", ) - mark_run_cancelled(paths, stage=stage) + update_manifest_run_status( + paths, + run_status="cancelled", + last_event="run.cancelled", + current_stage_slug=stage.slug, + ) write_kb_entry( paths, entry_type="run_cancelled", @@ -147,7 +180,14 @@ def _run_from_paths(self, paths: RunPaths, start_stage: StageSpec | None = None) return False append_log_entry(paths.logs, "run_complete", "All stages approved.") - mark_run_completed(paths) + completed_at = self._now() + update_manifest_run_status( + paths, + run_status="completed", + last_event="run.completed", + completed_at=completed_at, + current_stage_slug=None, + ) write_kb_entry( paths, entry_type="run_completed", @@ -166,7 +206,7 @@ def _create_run(self, user_goal: str) -> RunPaths: write_text(paths.user_input, user_goal) initialize_memory(paths, user_goal) initialize_knowledge_base(paths, user_goal) - initialize_run_state(paths) + initialize_run_manifest(paths) append_log_entry(paths.logs, "run_start", f"Run root: {paths.run_root}") return paths @@ -178,11 +218,11 @@ def _select_stages_for_run( if start_stage is not None: return [stage for stage in STAGES if stage.number >= start_stage.number] - approved_memory = read_text(paths.memory) + manifest = ensure_run_manifest(paths) pending: list[StageSpec] = [] for stage in STAGES: - final_stage_path = paths.stage_file(stage) - if final_stage_path.exists() and stage.stage_title in approved_memory: + entry = next(entry for entry in manifest.stages if entry.slug == stage.slug) + if entry.approved and entry.status == "approved": continue pending.append(stage) @@ -195,7 +235,7 @@ def _run_stage(self, paths: RunPaths, stage: StageSpec) -> bool: while True: orchestration_summary = self._execute_stage_orchestration(paths, stage, attempt_no) - mark_stage_running(paths, stage, attempt_no) + mark_stage_running_manifest(paths, stage, attempt_no) write_kb_entry( paths, entry_type="stage_attempt_started", @@ -213,7 +253,13 @@ def _run_stage(self, paths: RunPaths, stage: StageSpec) -> bool: tags=["stage", "attempt", "running", stage.slug], ) self._print(f"\nRunning {stage.stage_title} (attempt {attempt_no})...") - prompt = self._build_stage_prompt(paths, stage, revision_feedback, continue_session) + prompt = self._build_stage_prompt( + paths, + stage, + revision_feedback, + continue_session, + orchestration_summary, + ) append_log_entry( paths.logs, f"{stage.slug} attempt {attempt_no} prompt", @@ -227,6 +273,8 @@ def _run_stage(self, paths: RunPaths, stage: StageSpec) -> bool: attempt_no, continue_session=continue_session, ) + if result.session_id: + sync_stage_session_id(paths, stage, result.session_id) append_log_entry( paths.logs, f"{stage.slug} attempt {attempt_no} result", @@ -275,6 +323,7 @@ def _run_stage(self, paths: RunPaths, stage: StageSpec) -> bool: result = repair_result if not result.stage_file_path.exists(): + mark_stage_failed_manifest(paths, stage, "stage_summary_missing") raise RuntimeError( f"Stage summary draft was not generated for {stage.slug}: {result.stage_file_path}" ) @@ -365,6 +414,7 @@ def _run_stage(self, paths: RunPaths, stage: StageSpec) -> bool: stage_markdown = read_text(repair_result.stage_file_path) validation_errors = validate_stage_markdown(stage_markdown) + validate_stage_artifacts(stage, paths) if validation_errors: + mark_stage_failed_manifest(paths, stage, "; ".join(validation_errors)) append_log_entry( paths.logs, f"{stage.slug} attempt {attempt_no} local_normalization_failed", @@ -399,7 +449,6 @@ def _run_stage(self, paths: RunPaths, stage: StageSpec) -> bool: ), ) stage_markdown = read_text(final_stage_path) - mark_stage_human_review(paths, stage, attempt_no) write_kb_entry( paths, entry_type="stage_validated", @@ -410,6 +459,12 @@ def _run_stage(self, paths: RunPaths, stage: StageSpec) -> bool: file_paths=self._stage_file_paths(paths, stage, stage_markdown), tags=["stage", "validated", "human_review", stage.slug], ) + mark_stage_human_review_manifest( + paths, + stage, + attempt_no, + self._stage_file_paths(paths, stage, stage_markdown), + ) self._display_stage_output(stage, stage_markdown) choice = self._ask_choice() @@ -468,8 +523,16 @@ def _run_stage(self, paths: RunPaths, stage: StageSpec) -> bool: continue if choice == "5": - append_approved_stage_summary(paths.memory, stage, stage_markdown) - mark_stage_approved(paths, stage) + handoff_path = write_stage_handoff(paths, stage, stage_markdown) + mark_stage_approved_manifest( + paths, + stage, + attempt_no, + self._stage_file_paths(paths, stage, stage_markdown), + compressed_summary=self._compress_stage_handoff(stage_markdown), + handoff_path=str(handoff_path.relative_to(paths.run_root)), + ) + rebuild_memory_from_manifest(paths) append_log_entry( paths.logs, f"{stage.slug} approved", @@ -489,7 +552,13 @@ def _run_stage(self, paths: RunPaths, stage: StageSpec) -> bool: return True if choice == "6": - mark_run_cancelled(paths, stage=stage) + mark_stage_failed_manifest(paths, stage, "user_aborted") + update_manifest_run_status( + paths, + run_status="cancelled", + last_event="run.cancelled", + current_stage_slug=stage.slug, + ) write_kb_entry( paths, entry_type="stage_aborted", @@ -508,16 +577,39 @@ def _build_stage_prompt( stage: StageSpec, revision_feedback: str | None, continue_session: bool, + orchestration_summary: dict[str, object], ) -> str: template = load_prompt_template(self.prompt_dir, stage) stage_template = format_stage_template(template, stage, paths) kb_context = self._build_kb_context(paths, stage) + orchestration_context = self._format_orchestration_context(orchestration_summary) + handoff_context = build_handoff_context(paths, upto_stage=stage) + manifest_context = build_manifest_context(paths, upto_stage=stage) if continue_session: - return build_continuation_prompt(stage, stage_template, paths, kb_context, revision_feedback) + return build_continuation_prompt( + stage, + stage_template, + paths, + kb_context, + orchestration_context, + handoff_context, + manifest_context, + revision_feedback, + ) user_request = read_text(paths.user_input) approved_memory = read_text(paths.memory) - return build_prompt(stage, stage_template, user_request, approved_memory, kb_context, revision_feedback) + return build_prompt( + stage, + stage_template, + user_request, + approved_memory, + kb_context, + orchestration_context, + handoff_context, + manifest_context, + revision_feedback, + ) def _display_stage_output(self, stage: StageSpec, markdown: str) -> None: divider = "=" * 80 @@ -554,44 +646,53 @@ def _read_multiline_feedback(self) -> str: def _print(self, text: str) -> None: print(text, file=self.output_stream) - def _execute_stage_orchestration(self, paths: RunPaths, stage: StageSpec, attempt_no: int) -> dict[str, object]: - subtasks = self._build_stage_subtasks(paths, stage, attempt_no) - runner = self._orchestration_runner() - pattern_name = stage.orchestration_pattern.lower() - if "parallel" in pattern_name and "+" in pattern_name: - pattern = SequentialPattern() - results = pattern.execute(subtasks, runner) - elif "parallel" in pattern_name: - results = ParallelPattern(max_workers=min(len(subtasks), 4)).execute(subtasks, runner) - elif "hierarchical" in pattern_name: - results = HierarchicalPattern().execute(subtasks[0], planner=lambda _root: subtasks, runner=runner) - elif "swarm" in pattern_name: - results = SwarmPattern(rounds=2).execute(subtasks, runner) + def _now(self) -> str: + return datetime.now().isoformat(timespec="seconds") + + def _auto_rollback_if_needed(self, paths: RunPaths, start_stage: StageSpec) -> None: + manifest = ensure_run_manifest(paths) + approved_numbers = [entry.number for entry in manifest.stages if entry.approved] + if approved_numbers and start_stage.number <= max(approved_numbers): + rollback_to_stage(paths, start_stage) + + def _format_rollback_preview(self, paths: RunPaths, rollback_stage: StageSpec) -> str: + manifest = ensure_run_manifest(paths) + stale_candidates = [ + entry.slug + for entry in manifest.stages + if entry.number > rollback_stage.number and (entry.approved or entry.status not in {"pending"}) + ] + lines = [ + f"Rolling back to {rollback_stage.stage_title}.", + f"Stage {rollback_stage.slug} will be marked pending/dirty.", + ] + if stale_candidates: + lines.append("Downstream stages that will be marked stale:") + lines.extend(f"- {slug}" for slug in stale_candidates) else: - results = SequentialPattern().execute(subtasks, runner) - - summary = { - "stage_slug": stage.slug, - "attempt_no": attempt_no, - "pattern": stage.orchestration_pattern, - "subtask_count": len(subtasks), - "subtasks": [ - { - "task_id": task.task_id, - "title": task.title, - "goal": task.goal, - } - for task in subtasks - ], - "results": [ - { - "task_id": result.task_id, - "output": result.output, - "provenance": [record.action for record in result.provenance], - } - for result in results - ], - } + lines.append("No downstream stages currently need invalidation.") + return "\n".join(lines) + + def _compress_stage_handoff(self, stage_markdown: str) -> str: + objective = extract_markdown_section(stage_markdown, "Objective") or "" + key_results = extract_markdown_section(stage_markdown, "Key Results") or "" + files_produced = extract_markdown_section(stage_markdown, "Files Produced") or "" + return "\n".join( + [ + f"Objective: {truncate_text(objective, max_chars=240)}", + f"Key Results: {truncate_text(key_results, max_chars=360)}", + f"Files Produced: {truncate_text(files_produced, max_chars=240)}", + ] + ).strip() + + def _execute_stage_orchestration(self, paths: RunPaths, stage: StageSpec, attempt_no: int) -> dict[str, object]: + summary = self.router.execute( + paths=paths, + stage=stage, + attempt_no=attempt_no, + user_goal=read_text(paths.user_input).strip(), + kb_context=self._build_kb_context(paths, stage), + ).to_dict() plan_path = paths.notes_dir / f"{stage.slug}_attempt_{attempt_no:02d}_orchestration.json" write_text(plan_path, json.dumps(summary, indent=2, ensure_ascii=True)) @@ -606,59 +707,29 @@ def _execute_stage_orchestration(self, paths: RunPaths, stage: StageSpec, attemp ) collector.emit_metric( "autor.orchestration.subtask_count", - float(len(subtasks)), + float(summary.get("subtask_count", 0)), run_id=paths.run_root.name, stage_slug=stage.slug, ) return summary - def _build_stage_subtasks(self, paths: RunPaths, stage: StageSpec, attempt_no: int) -> list[ResearchTask]: - stage_key: PipelineStage = stage.slug[3:] - project_id = paths.run_root.name - goal = read_text(paths.user_input).strip() - - subtask_titles: dict[str, list[str]] = { - "01_literature_survey": ["Search sources", "Extract evidence", "Merge survey map"], - "02_hypothesis_generation": ["Propose hypotheses", "Critique hypotheses", "Synthesize direction"], - "03_study_design": ["Plan protocol", "Define variables", "Set evaluation criteria"], - "04_implementation": ["Prepare environment", "Implement pipeline", "Validate execution"], - "05_experimentation": ["Set experiment plan", "Run experiments", "Aggregate checkpoints"], - "06_analysis": ["Compute statistics", "Generate visuals", "Interpret findings"], - "07_writing": ["Outline manuscript", "Draft sections", "Check consistency"], - "08_dissemination": ["Draft poster", "Draft slides", "Draft social summary"], - } - - tasks: list[ResearchTask] = [] - for index, title in enumerate(subtask_titles.get(stage.slug, [stage.display_name]), start=1): - tasks.append( - ResearchTask( - task_id=f"{stage.slug}-attempt-{attempt_no:02d}-task-{index:02d}", - title=title, - goal=f"{goal}\n\nStage focus: {stage.stage_title}\nSubtask: {title}", - pipeline_stage=stage_key, - project_id=project_id, - kb_context=[paths.run_root.name, stage.slug], - human_gate_required=False, - reproducibility_notes=[f"Attempt {attempt_no}", stage.orchestration_pattern], - ) - ) - return tasks - - def _orchestration_runner(self) -> callable: - def _run(task: ResearchTask) -> TaskResult: - return TaskResult( - task_id=task.task_id, - output=f"Completed orchestration subtask: {task.title}", - provenance=[ - ProvenanceRecord( - agent_name="AutoROrchestrator", - action=f"planned:{task.title}", - evidence=[task.pipeline_stage, task.project_id], - ) - ], - ) - - return _run + def _format_orchestration_context(self, summary: dict[str, object]) -> str: + artifacts = summary.get("artifact_paths", []) or [] + results = summary.get("results", []) or [] + lines = [ + f"Pattern: {summary.get('pattern', 'unknown')}", + f"Subtasks: {summary.get('subtask_count', 0)}", + f"Summary: {summary.get('summary_text', '')}", + ] + if artifacts: + lines.append("Artifacts: " + ", ".join(f"`{path}`" for path in artifacts[:8])) + if results: + lines.append("Representative outputs:") + for item in results[:3]: + label = item.get("title") or item.get("task_id") or item.get("agent_name") or "result" + payload = item.get("output") or item.get("content") or str(item) + lines.append(f"- {label}: {truncate_text(str(payload), max_chars=220)}") + return "\n".join(lines) def _build_kb_context(self, paths: RunPaths, stage: StageSpec) -> str: user_request = read_text(paths.user_input) @@ -681,11 +752,10 @@ def _stage_file_paths(self, paths: RunPaths, stage: StageSpec, stage_markdown: s def describe_run_status(self, run_root: Path) -> str: paths = build_run_paths(run_root) ensure_run_layout(paths) - ensure_run_state(paths) - state = load_run_state(paths.run_state) - if state is None: - raise RuntimeError(f"Could not load run state from {paths.run_state}") - return format_run_state(state) + manifest = load_run_manifest(paths.run_manifest) + if manifest is not None: + return format_manifest_status(manifest) + raise RuntimeError(f"Could not load run manifest from {paths.run_manifest}") def search_run_knowledge_base(self, run_root: Path, query: str, limit: int = 5) -> str: paths = build_run_paths(run_root) diff --git a/src/manifest.py b/src/manifest.py new file mode 100644 index 0000000..dc269ec --- /dev/null +++ b/src/manifest.py @@ -0,0 +1,544 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from datetime import datetime +from pathlib import Path + +from .utils import ( + STAGES, + RunPaths, + StageSpec, + approved_stage_summaries, + extract_markdown_section, + parse_refinement_suggestions, + read_text, + render_approved_stage_entry, + write_text, +) + + +STAGE_STATUS_PENDING = "pending" +STAGE_STATUS_RUNNING = "running" +STAGE_STATUS_HUMAN_REVIEW = "human_review" +STAGE_STATUS_APPROVED = "approved" +STAGE_STATUS_STALE = "stale" +STAGE_STATUS_FAILED = "failed" +STAGE_STATUS_CANCELLED = "cancelled" + + +@dataclass(frozen=True) +class StageManifestEntry: + number: int + slug: str + title: str + status: str = STAGE_STATUS_PENDING + approved: bool = False + dirty: bool = False + stale: bool = False + attempt_count: int = 0 + session_id: str | None = None + final_stage_path: str = "" + draft_stage_path: str = "" + artifact_paths: list[str] = field(default_factory=list) + handoff_path: str | None = None + compressed_summary: str = "" + invalidated_reason: str | None = None + invalidated_by_stage: str | None = None + last_error: str | None = None + updated_at: str = "" + approved_at: str | None = None + + def to_dict(self) -> dict[str, object]: + return { + "number": self.number, + "slug": self.slug, + "title": self.title, + "status": self.status, + "approved": self.approved, + "dirty": self.dirty, + "stale": self.stale, + "attempt_count": self.attempt_count, + "session_id": self.session_id, + "final_stage_path": self.final_stage_path, + "draft_stage_path": self.draft_stage_path, + "artifact_paths": list(self.artifact_paths), + "handoff_path": self.handoff_path, + "compressed_summary": self.compressed_summary, + "invalidated_reason": self.invalidated_reason, + "invalidated_by_stage": self.invalidated_by_stage, + "last_error": self.last_error, + "updated_at": self.updated_at, + "approved_at": self.approved_at, + } + + @classmethod + def from_dict(cls, payload: dict[str, object]) -> "StageManifestEntry": + return cls( + number=int(payload.get("number") or 0), + slug=str(payload.get("slug") or ""), + title=str(payload.get("title") or ""), + status=str(payload.get("status") or STAGE_STATUS_PENDING), + approved=bool(payload.get("approved", False)), + dirty=bool(payload.get("dirty", False)), + stale=bool(payload.get("stale", False)), + attempt_count=int(payload.get("attempt_count") or 0), + session_id=str(payload["session_id"]) if payload.get("session_id") is not None else None, + final_stage_path=str(payload.get("final_stage_path") or ""), + draft_stage_path=str(payload.get("draft_stage_path") or ""), + artifact_paths=[str(item) for item in payload.get("artifact_paths", []) if str(item).strip()], + handoff_path=str(payload["handoff_path"]) if payload.get("handoff_path") is not None else None, + compressed_summary=str(payload.get("compressed_summary") or ""), + invalidated_reason=str(payload["invalidated_reason"]) if payload.get("invalidated_reason") is not None else None, + invalidated_by_stage=str(payload["invalidated_by_stage"]) if payload.get("invalidated_by_stage") is not None else None, + last_error=str(payload["last_error"]) if payload.get("last_error") is not None else None, + updated_at=str(payload.get("updated_at") or ""), + approved_at=str(payload["approved_at"]) if payload.get("approved_at") is not None else None, + ) + + +@dataclass(frozen=True) +class RunManifest: + run_id: str + created_at: str + updated_at: str + run_status: str + last_event: str + current_stage_slug: str | None + latest_approved_stage_slug: str | None + last_error: str | None + completed_at: str | None + stages: list[StageManifestEntry] + + def to_dict(self) -> dict[str, object]: + return { + "run_id": self.run_id, + "created_at": self.created_at, + "updated_at": self.updated_at, + "run_status": self.run_status, + "last_event": self.last_event, + "current_stage_slug": self.current_stage_slug, + "latest_approved_stage_slug": self.latest_approved_stage_slug, + "last_error": self.last_error, + "completed_at": self.completed_at, + "stages": [stage.to_dict() for stage in self.stages], + } + + @classmethod + def from_dict(cls, payload: dict[str, object]) -> "RunManifest": + stages = payload.get("stages", []) + return cls( + run_id=str(payload.get("run_id") or ""), + created_at=str(payload.get("created_at") or _now()), + updated_at=str(payload.get("updated_at") or _now()), + run_status=str(payload.get("run_status") or STAGE_STATUS_PENDING), + last_event=str(payload.get("last_event") or "run.created"), + current_stage_slug=str(payload["current_stage_slug"]) if payload.get("current_stage_slug") is not None else None, + latest_approved_stage_slug=str(payload["latest_approved_stage_slug"]) if payload.get("latest_approved_stage_slug") is not None else None, + last_error=str(payload["last_error"]) if payload.get("last_error") is not None else None, + completed_at=str(payload["completed_at"]) if payload.get("completed_at") is not None else None, + stages=[StageManifestEntry.from_dict(item) for item in stages if isinstance(item, dict)], + ) + + +def _now() -> str: + return datetime.now().isoformat(timespec="seconds") + + +def initialize_run_manifest(paths: RunPaths) -> RunManifest: + timestamp = _now() + manifest = RunManifest( + run_id=paths.run_root.name, + created_at=timestamp, + updated_at=timestamp, + run_status=STAGE_STATUS_PENDING, + last_event="run.created", + current_stage_slug=None, + latest_approved_stage_slug=None, + last_error=None, + completed_at=None, + stages=[ + StageManifestEntry( + number=stage.number, + slug=stage.slug, + title=stage.stage_title, + final_stage_path=str(paths.stage_file(stage).relative_to(paths.run_root)), + draft_stage_path=str(paths.stage_tmp_file(stage).relative_to(paths.run_root)), + updated_at=timestamp, + ) + for stage in STAGES + ], + ) + save_run_manifest(paths.run_manifest, manifest) + return manifest + + +def ensure_run_manifest(paths: RunPaths) -> RunManifest: + manifest = load_run_manifest(paths.run_manifest) + if manifest is not None: + return manifest + return initialize_run_manifest(paths) + + +def load_run_manifest(path: Path) -> RunManifest | None: + if not path.exists(): + return None + text = path.read_text(encoding="utf-8").strip() + if not text: + return None + return RunManifest.from_dict(json.loads(text)) + + +def save_run_manifest(path: Path, manifest: RunManifest) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(manifest.to_dict(), indent=2, ensure_ascii=True) + "\n", encoding="utf-8") + + +def get_stage_entry(manifest: RunManifest, stage: StageSpec) -> StageManifestEntry: + for entry in manifest.stages: + if entry.slug == stage.slug: + return entry + raise KeyError(f"Stage not found in manifest: {stage.slug}") + + +def update_stage_entry(paths: RunPaths, stage: StageSpec, **changes: object) -> RunManifest: + manifest = ensure_run_manifest(paths) + updated_entries: list[StageManifestEntry] = [] + for entry in manifest.stages: + if entry.slug != stage.slug: + updated_entries.append(entry) + continue + payload = entry.to_dict() + payload.update(changes) + payload["updated_at"] = _now() + updated_entries.append(StageManifestEntry.from_dict(payload)) + latest_approved = _latest_approved_slug(updated_entries) + updated_manifest = RunManifest( + run_id=manifest.run_id, + created_at=manifest.created_at, + updated_at=_now(), + run_status=manifest.run_status, + last_event=manifest.last_event, + current_stage_slug=changes.get("current_stage_slug", manifest.current_stage_slug) + if "current_stage_slug" in changes + else manifest.current_stage_slug, + latest_approved_stage_slug=latest_approved, + last_error=manifest.last_error, + completed_at=manifest.completed_at, + stages=updated_entries, + ) + save_run_manifest(paths.run_manifest, updated_manifest) + return updated_manifest + + +def mark_stage_running_manifest(paths: RunPaths, stage: StageSpec, attempt_no: int) -> RunManifest: + return _update_manifest_metadata( + paths, + stage, + status=STAGE_STATUS_RUNNING, + run_status=STAGE_STATUS_RUNNING, + last_event="stage.started", + approved=False, + dirty=False, + stale=False, + attempt_count=attempt_no, + invalidated_reason=None, + invalidated_by_stage=None, + last_error=None, + current_stage_slug=stage.slug, + ) + + +def mark_stage_human_review_manifest(paths: RunPaths, stage: StageSpec, attempt_no: int, artifact_paths: list[str]) -> RunManifest: + return _update_manifest_metadata( + paths, + stage, + status=STAGE_STATUS_HUMAN_REVIEW, + run_status=STAGE_STATUS_HUMAN_REVIEW, + last_event="stage.awaiting_human_review", + approved=False, + dirty=False, + stale=False, + attempt_count=attempt_no, + artifact_paths=artifact_paths, + current_stage_slug=stage.slug, + ) + + +def mark_stage_approved_manifest( + paths: RunPaths, + stage: StageSpec, + attempt_no: int, + artifact_paths: list[str], + compressed_summary: str, + handoff_path: str, +) -> RunManifest: + return _update_manifest_metadata( + paths, + stage, + status=STAGE_STATUS_APPROVED, + run_status=STAGE_STATUS_PENDING, + last_event="stage.approved", + approved=True, + dirty=False, + stale=False, + attempt_count=attempt_no, + artifact_paths=artifact_paths, + compressed_summary=compressed_summary, + handoff_path=handoff_path, + approved_at=_now(), + invalidated_reason=None, + invalidated_by_stage=None, + last_error=None, + current_stage_slug=None, + ) + + +def mark_stage_failed_manifest(paths: RunPaths, stage: StageSpec, error: str) -> RunManifest: + return _update_manifest_metadata( + paths, + stage, + status=STAGE_STATUS_FAILED, + run_status=STAGE_STATUS_FAILED, + last_event="stage.failed", + approved=False, + dirty=True, + stale=False, + last_error=error, + current_stage_slug=stage.slug, + ) + + +def sync_stage_session_id(paths: RunPaths, stage: StageSpec, session_id: str | None) -> RunManifest: + return _update_manifest_metadata(paths, stage, session_id=session_id) + + +def rollback_to_stage(paths: RunPaths, rollback_stage: StageSpec, reason: str | None = None) -> RunManifest: + manifest = ensure_run_manifest(paths) + updated_entries: list[StageManifestEntry] = [] + current_slug = rollback_stage.slug + invalidated_reason = reason or f"Rolled back to {rollback_stage.stage_title}" + for entry in manifest.stages: + payload = entry.to_dict() + if entry.number < rollback_stage.number: + updated_entries.append(entry) + continue + if entry.number == rollback_stage.number: + payload.update( + { + "status": STAGE_STATUS_PENDING, + "approved": False, + "dirty": True, + "stale": False, + "invalidated_reason": invalidated_reason, + "invalidated_by_stage": rollback_stage.slug, + "approved_at": None, + } + ) + else: + payload.update( + { + "status": STAGE_STATUS_STALE, + "approved": False, + "dirty": True, + "stale": True, + "invalidated_reason": invalidated_reason, + "invalidated_by_stage": rollback_stage.slug, + "approved_at": None, + } + ) + payload["updated_at"] = _now() + updated_entries.append(StageManifestEntry.from_dict(payload)) + + updated_manifest = RunManifest( + run_id=manifest.run_id, + created_at=manifest.created_at, + updated_at=_now(), + run_status=STAGE_STATUS_PENDING, + last_event="run.rolled_back", + current_stage_slug=current_slug, + latest_approved_stage_slug=_latest_approved_slug(updated_entries), + last_error=None, + completed_at=None, + stages=updated_entries, + ) + save_run_manifest(paths.run_manifest, updated_manifest) + rebuild_memory_from_manifest(paths, updated_manifest) + return updated_manifest + + +def rebuild_memory_from_manifest(paths: RunPaths, manifest: RunManifest | None = None) -> None: + manifest = manifest or ensure_run_manifest(paths) + goal_text = read_text(paths.user_input).strip() + entries: list[str] = [] + for stage in STAGES: + entry = get_stage_entry(manifest, stage) + if not entry.approved: + continue + stage_path = paths.stage_file(stage) + if not stage_path.exists(): + continue + entries.append(render_approved_stage_entry(stage, read_text(stage_path))) + + body = ( + "# Approved Run Memory\n\n" + "## Original User Goal\n" + f"{goal_text}\n\n" + "## Approved Stage Summaries\n\n" + ) + if entries: + body += "\n\n".join(entries) + "\n" + else: + body += "_None yet._\n" + write_text(paths.memory, body) + + +def format_manifest_status(manifest: RunManifest) -> str: + lines = [ + f"Run: {manifest.run_id}", + f"Updated At: {manifest.updated_at}", + f"Run Status: {manifest.run_status}", + f"Last Event: {manifest.last_event}", + f"Current Stage: {manifest.current_stage_slug or 'None'}", + f"Latest Approved Stage: {manifest.latest_approved_stage_slug or 'None'}", + "", + "Stages:", + ] + for entry in manifest.stages: + flags = [] + if entry.approved: + flags.append("approved") + if entry.dirty: + flags.append("dirty") + if entry.stale: + flags.append("stale") + suffix = f" [{' '.join(flags)}]" if flags else "" + lines.append( + f"- {entry.slug}: status={entry.status}, attempts={entry.attempt_count}, " + f"session_id={entry.session_id or 'none'}{suffix}" + ) + return "\n".join(lines) + + +def write_stage_handoff(paths: RunPaths, stage: StageSpec, stage_markdown: str) -> Path: + paths.handoff_dir.mkdir(parents=True, exist_ok=True) + handoff_path = paths.handoff_dir / f"{stage.slug}.md" + objective = extract_markdown_section(stage_markdown, "Objective") or "Not provided." + key_results = extract_markdown_section(stage_markdown, "Key Results") or "Not provided." + files_produced = extract_markdown_section(stage_markdown, "Files Produced") or "Not provided." + suggestions = parse_refinement_suggestions(stage_markdown) + handoff = ( + f"# Handoff: {stage.stage_title}\n\n" + "## Objective\n" + f"{objective}\n\n" + "## Key Results\n" + f"{key_results}\n\n" + "## Files Produced\n" + f"{files_produced}\n\n" + "## Open Questions / Refinement Hooks\n" + f"1. {suggestions[0]}\n" + f"2. {suggestions[1]}\n" + f"3. {suggestions[2]}\n" + ) + write_text(handoff_path, handoff) + return handoff_path + + +def build_handoff_context(paths: RunPaths, upto_stage: StageSpec | None = None, max_stages: int = 4) -> str: + manifest = ensure_run_manifest(paths) + approved_entries = [entry for entry in manifest.stages if entry.approved] + if upto_stage is not None: + approved_entries = [entry for entry in approved_entries if entry.number < upto_stage.number] + approved_entries = approved_entries[-max_stages:] + chunks: list[str] = [] + for entry in approved_entries: + if not entry.handoff_path: + continue + handoff_path = paths.run_root / entry.handoff_path + if not handoff_path.exists(): + continue + chunks.append(read_text(handoff_path).strip()) + return "\n\n".join(chunks).strip() or "No stage handoff summaries available yet." + + +def build_manifest_context(paths: RunPaths, upto_stage: StageSpec | None = None) -> str: + manifest = ensure_run_manifest(paths) + entries = manifest.stages + if upto_stage is not None: + entries = [entry for entry in entries if entry.number <= upto_stage.number] + lines = [ + f"Current Stage: {manifest.current_stage_slug or 'None'}", + f"Latest Approved Stage: {manifest.latest_approved_stage_slug or 'None'}", + ] + for entry in entries: + lines.append( + f"- {entry.slug}: status={entry.status}, approved={entry.approved}, " + f"dirty={entry.dirty}, stale={entry.stale}, attempts={entry.attempt_count}" + ) + return "\n".join(lines) + + +def approved_stage_numbers(manifest: RunManifest) -> list[int]: + return [entry.number for entry in manifest.stages if entry.approved] + + +def _update_manifest_metadata(paths: RunPaths, stage: StageSpec, **changes: object) -> RunManifest: + manifest = ensure_run_manifest(paths) + updated_entries: list[StageManifestEntry] = [] + for entry in manifest.stages: + if entry.slug != stage.slug: + updated_entries.append(entry) + continue + payload = entry.to_dict() + payload.update(changes) + payload["updated_at"] = _now() + updated_entries.append(StageManifestEntry.from_dict(payload)) + current_stage_slug = changes.get("current_stage_slug") + updated_manifest = RunManifest( + run_id=manifest.run_id, + created_at=manifest.created_at, + updated_at=_now(), + run_status=str(changes.get("run_status") or manifest.run_status), + last_event=str(changes.get("last_event") or manifest.last_event), + current_stage_slug=current_stage_slug if isinstance(current_stage_slug, str) or current_stage_slug is None else manifest.current_stage_slug, + latest_approved_stage_slug=_latest_approved_slug(updated_entries), + last_error=str(changes["last_error"]) if changes.get("last_error") is not None else manifest.last_error, + completed_at=str(changes["completed_at"]) if changes.get("completed_at") is not None else manifest.completed_at, + stages=updated_entries, + ) + save_run_manifest(paths.run_manifest, updated_manifest) + return updated_manifest + + +def update_manifest_run_status( + paths: RunPaths, + *, + run_status: str, + last_event: str, + last_error: str | None = None, + completed_at: str | None = None, + current_stage_slug: str | None = None, +) -> RunManifest: + manifest = ensure_run_manifest(paths) + updated_manifest = RunManifest( + run_id=manifest.run_id, + created_at=manifest.created_at, + updated_at=_now(), + run_status=run_status, + last_event=last_event, + current_stage_slug=current_stage_slug, + latest_approved_stage_slug=manifest.latest_approved_stage_slug, + last_error=last_error, + completed_at=completed_at, + stages=manifest.stages, + ) + save_run_manifest(paths.run_manifest, updated_manifest) + return updated_manifest + + +def _latest_approved_slug(entries: list[StageManifestEntry]) -> str | None: + approved = [entry for entry in entries if entry.approved] + if not approved: + return None + latest = max(approved, key=lambda item: item.number) + return latest.slug diff --git a/src/operator.py b/src/operator.py index 0426bfa..9ab52b7 100644 --- a/src/operator.py +++ b/src/operator.py @@ -5,6 +5,7 @@ import subprocess import sys import uuid +from datetime import datetime from pathlib import Path from typing import TextIO @@ -65,6 +66,19 @@ def _run_real( write_text(prompt_path, prompt) session_id = self._resolve_stage_session_id(paths, stage, continue_session) command = self._build_cli_command(prompt_path, session_id, resume=continue_session) + self._write_attempt_state( + paths, + stage, + attempt_no, + { + "status": "starting", + "mode": "resume" if continue_session else "start", + "session_id": session_id, + "prompt_path": str(prompt_path), + "command": command, + "started_at": self._now(), + }, + ) append_jsonl( paths.logs_raw, @@ -80,7 +94,7 @@ def _run_real( }, ) - exit_code, stdout_text, stderr_text, observed_session_id = self._run_streaming_command( + exit_code, stdout_text, stderr_text, observed_session_id, stream_meta = self._run_streaming_command( command=command, cwd=paths.run_root, stage=stage, @@ -112,7 +126,8 @@ def _run_real( } }, ) - exit_code, stdout_text, stderr_text, observed_session_id = self._run_streaming_command( + self._mark_session_broken(paths, stage, session_id, reason="resume_failure") + exit_code, stdout_text, stderr_text, observed_session_id, stream_meta = self._run_streaming_command( command=fallback_command, cwd=paths.run_root, stage=stage, @@ -125,6 +140,34 @@ def _run_real( effective_session_id = observed_session_id or session_id self._persist_stage_session_id(paths, stage, effective_session_id) success = exit_code == 0 and stage_file.exists() + self._update_session_state( + paths, + stage, + effective_session_id, + { + "broken": not success and continue_session, + "last_exit_code": exit_code, + "last_mode": "resume" if continue_session else "start", + "updated_at": self._now(), + }, + ) + self._write_attempt_state( + paths, + stage, + attempt_no, + { + "status": "completed" if success else "failed", + "mode": "resume" if continue_session else "start", + "session_id": effective_session_id, + "prompt_path": str(prompt_path), + "command": command, + "exit_code": exit_code, + "stdout_excerpt": stdout_text[-2000:] if stdout_text else "", + "stderr_excerpt": stderr_text[-1000:] if stderr_text else "", + "stream_meta": stream_meta, + "finished_at": self._now(), + }, + ) return OperatorResult( success=success, @@ -242,7 +285,21 @@ def repair_stage_summary( }, ) - exit_code, stdout_text, stderr_text, observed_session_id = self._run_streaming_command( + self._write_attempt_state( + paths, + stage, + attempt_no, + { + "status": "repair_starting", + "mode": "repair", + "session_id": session_id, + "prompt_path": str(recovery_prompt_path), + "command": command, + "started_at": self._now(), + }, + ) + + exit_code, stdout_text, stderr_text, observed_session_id, stream_meta = self._run_streaming_command( command=command, cwd=paths.run_root, stage=stage, @@ -277,7 +334,8 @@ def repair_stage_summary( } }, ) - exit_code, stdout_text, stderr_text, observed_session_id = self._run_streaming_command( + self._mark_session_broken(paths, stage, session_id, reason="repair_resume_failure") + exit_code, stdout_text, stderr_text, observed_session_id, stream_meta = self._run_streaming_command( command=fallback_command, cwd=paths.run_root, stage=stage, @@ -289,6 +347,34 @@ def repair_stage_summary( effective_session_id = observed_session_id or session_id self._persist_stage_session_id(paths, stage, effective_session_id) + self._update_session_state( + paths, + stage, + effective_session_id, + { + "broken": exit_code != 0 and not stage_file.exists(), + "last_exit_code": exit_code, + "last_mode": "repair", + "updated_at": self._now(), + }, + ) + self._write_attempt_state( + paths, + stage, + attempt_no, + { + "status": "repair_completed" if exit_code == 0 and stage_file.exists() else "repair_failed", + "mode": "repair", + "session_id": effective_session_id, + "prompt_path": str(recovery_prompt_path), + "command": command, + "exit_code": exit_code, + "stdout_excerpt": stdout_text[-2000:] if stdout_text else "", + "stderr_excerpt": stderr_text[-1000:] if stderr_text else "", + "stream_meta": stream_meta, + "finished_at": self._now(), + }, + ) return OperatorResult( success=exit_code == 0 and stage_file.exists(), @@ -307,7 +393,7 @@ def _run_streaming_command( attempt_no: int, paths: RunPaths, mode: str, - ) -> tuple[int, str, str, str | None]: + ) -> tuple[int, str, str, str | None, dict[str, object]]: process = subprocess.Popen( command, cwd=str(cwd), @@ -325,6 +411,7 @@ def _run_streaming_command( non_json_lines: list[str] = [] ended_with_newline = True observed_session_id: str | None = None + malformed_json_count = 0 try: for raw_line in process.stdout: @@ -341,6 +428,7 @@ def _run_streaming_command( try: payload = json.loads(stripped) except json.JSONDecodeError: + malformed_json_count += 1 append_jsonl( paths.logs_raw, { @@ -380,7 +468,12 @@ def _run_streaming_command( non_json_lines=non_json_lines, raw_lines=raw_lines, ) - return exit_code, stdout_text, "", observed_session_id + return exit_code, stdout_text, "", observed_session_id, { + "raw_line_count": len(raw_lines), + "non_json_line_count": len(non_json_lines), + "malformed_json_count": malformed_json_count, + "observed_session_id": observed_session_id, + } def _compose_stdout_text( self, @@ -562,6 +655,14 @@ def _resolve_stage_session_id( continue_session: bool, allow_create: bool = True, ) -> str | None: + session_state_path = paths.stage_session_state_file(stage) + if session_state_path.exists(): + payload = json.loads(read_text(session_state_path)) + session_id = str(payload.get("session_id") or "").strip() + broken = bool(payload.get("broken", False)) + if session_id and not broken: + return session_id + session_file = paths.stage_session_file(stage) if session_file.exists(): session_id = read_text(session_file).strip() @@ -577,6 +678,15 @@ def _persist_stage_session_id(self, paths: RunPaths, stage: StageSpec, session_i if not session_id: return write_text(paths.stage_session_file(stage), session_id) + self._update_session_state( + paths, + stage, + session_id, + { + "broken": False, + "updated_at": self._now(), + }, + ) def _extract_session_id(self, payload: dict[str, object]) -> str | None: value = payload.get("session_id") @@ -620,3 +730,46 @@ def _build_cli_command( def _looks_like_resume_failure(self, stdout_text: str, stderr_text: str) -> bool: combined = "\n".join(part for part in [stdout_text, stderr_text] if part).lower() return "no conversation found with session id" in combined or "resume" in combined and "not found" in combined + + def _write_attempt_state( + self, + paths: RunPaths, + stage: StageSpec, + attempt_no: int, + payload: dict[str, object], + ) -> None: + write_text(paths.stage_attempt_state_file(stage, attempt_no), json.dumps(payload, indent=2, ensure_ascii=True)) + + def _update_session_state( + self, + paths: RunPaths, + stage: StageSpec, + session_id: str | None, + changes: dict[str, object], + ) -> None: + path = paths.stage_session_state_file(stage) + payload: dict[str, object] = {} + if path.exists(): + try: + payload = json.loads(read_text(path)) + except json.JSONDecodeError: + payload = {} + payload.update(changes) + if session_id: + payload["session_id"] = session_id + write_text(path, json.dumps(payload, indent=2, ensure_ascii=True)) + + def _mark_session_broken(self, paths: RunPaths, stage: StageSpec, session_id: str | None, reason: str) -> None: + self._update_session_state( + paths, + stage, + session_id, + { + "broken": True, + "broken_reason": reason, + "updated_at": self._now(), + }, + ) + + def _now(self) -> str: + return datetime.now().isoformat(timespec="seconds") diff --git a/src/platform/debate.py b/src/platform/debate.py new file mode 100644 index 0000000..4e22228 --- /dev/null +++ b/src/platform/debate.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass +from pathlib import Path + +from .agents import AgentRuntimeManager, CommandResearchAgent +from .orchestration import SwarmPattern +from .types import ProvenanceRecord, ResearchTask, TaskResult + + +@dataclass(frozen=True) +class DebateTurn: + agent_name: str + role: str + content: str + + def to_dict(self) -> dict[str, str]: + return { + "agent_name": self.agent_name, + "role": self.role, + "content": self.content, + } + + +@dataclass(frozen=True) +class HypothesisDebateResult: + rounds: int + turns: list[DebateTurn] + winning_hypothesis: str + + def to_dict(self) -> dict[str, object]: + return { + "rounds": self.rounds, + "winning_hypothesis": self.winning_hypothesis, + "turns": [turn.to_dict() for turn in self.turns], + } + + +class HypothesisDebateWorkflow: + def __init__(self) -> None: + self.runtime = AgentRuntimeManager() + self.runtime.register( + CommandResearchAgent( + name="proposal-agent", + domain="general", + pipeline_stages=["hypothesis_generation"], + handler=self._proposal_handler, + ) + ) + self.runtime.register( + CommandResearchAgent( + name="critic-agent", + domain="general", + pipeline_stages=["hypothesis_generation"], + handler=self._critic_handler, + ) + ) + self.runtime.register( + CommandResearchAgent( + name="moderator-agent", + domain="general", + pipeline_stages=["hypothesis_generation"], + handler=self._moderator_handler, + ) + ) + + def run(self, goal: str, kb_context: list[str], rounds: int = 2) -> HypothesisDebateResult: + turns: list[DebateTurn] = [] + current_hypothesis = "" + pattern = SwarmPattern(rounds=rounds) + + def runner(task: ResearchTask) -> TaskResult: + agent = self.runtime.get(task.title) + return agent.run(task) + + tasks = [ + ResearchTask( + task_id="proposal", + title="proposal-agent", + goal=goal, + pipeline_stage="hypothesis_generation", + project_id="debate", + kb_context=kb_context, + human_gate_required=False, + ), + ResearchTask( + task_id="critic", + title="critic-agent", + goal=goal, + pipeline_stage="hypothesis_generation", + project_id="debate", + kb_context=kb_context, + human_gate_required=False, + ), + ResearchTask( + task_id="moderator", + title="moderator-agent", + goal=goal, + pipeline_stage="hypothesis_generation", + project_id="debate", + kb_context=kb_context, + human_gate_required=False, + ), + ] + + for result in pattern.execute(tasks, runner): + role = result.provenance[0].action.split(":", 1)[0] + turns.append(DebateTurn(agent_name=result.provenance[0].agent_name, role=role, content=result.output)) + if role == "moderator": + current_hypothesis = result.output + + return HypothesisDebateResult(rounds=rounds, turns=turns, winning_hypothesis=current_hypothesis) + + def write_artifacts(self, output_dir: Path, stage_slug: str, result: HypothesisDebateResult) -> list[Path]: + output_dir.mkdir(parents=True, exist_ok=True) + json_path = output_dir / f"{stage_slug}_debate.json" + md_path = output_dir / f"{stage_slug}_debate.md" + json_path.write_text(json.dumps(result.to_dict(), indent=2, ensure_ascii=True) + "\n", encoding="utf-8") + lines = [f"# Hypothesis Debate for {stage_slug}", "", f"Winning hypothesis: {result.winning_hypothesis}", ""] + for turn in result.turns: + lines.extend([f"## {turn.agent_name}", turn.content, ""]) + md_path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8") + return [json_path, md_path] + + def _proposal_handler(self, task: ResearchTask) -> TaskResult: + hypothesis = ( + f"Hypothesis: prioritizing a focused, evidence-backed approach to '{task.goal[:120]}' " + "will improve reproducibility and literature grounding." + ) + return TaskResult( + task_id=task.task_id, + output=hypothesis, + provenance=[ProvenanceRecord(agent_name="proposal-agent", action="proposal:generate")], + ) + + def _critic_handler(self, task: ResearchTask) -> TaskResult: + critique = ( + "Critique: the hypothesis should be falsifiable, name at least one failure mode, " + "and be tied to measurable outcomes and comparison baselines." + ) + return TaskResult( + task_id=task.task_id, + output=critique, + provenance=[ProvenanceRecord(agent_name="critic-agent", action="critic:challenge")], + ) + + def _moderator_handler(self, task: ResearchTask) -> TaskResult: + synthesis = ( + "Moderator synthesis: adopt a hypothesis that explicitly names the expected benefit, " + "the comparison baseline, and the main falsification criterion." + ) + return TaskResult( + task_id=task.task_id, + output=synthesis, + provenance=[ProvenanceRecord(agent_name="moderator-agent", action="moderator:synthesize")], + ) diff --git a/src/platform/foundry.py b/src/platform/foundry.py index 6d40199..f127cd9 100644 --- a/src/platform/foundry.py +++ b/src/platform/foundry.py @@ -22,6 +22,14 @@ class FoundryOutput: summary: str +@dataclass(frozen=True) +class PackageResult: + package_name: str + root_dir: Path + artifact_paths: list[Path] + summary: str + + def generate_foundry_output(run_root: Path, output_format: FoundryOutputFormat) -> FoundryOutput: paths = build_run_paths(run_root) foundry_dir = paths.artifacts_dir / "foundry" @@ -49,3 +57,273 @@ def generate_foundry_output(run_root: Path, output_format: FoundryOutputFormat) ) write_text(output_path, summary) return FoundryOutput(output_format=output_format, output_path=output_path, summary=summary) + + +def generate_paper_package(run_root: Path) -> PackageResult: + paths = build_run_paths(run_root) + package_dir = paths.writing_dir / "paper_package" + package_dir.mkdir(parents=True, exist_ok=True) + + title = _derive_title(paths) + abstract_path = package_dir / "abstract.md" + manuscript_path = package_dir / "manuscript.tex" + bib_path = package_dir / "references.bib" + tables_path = package_dir / "tables.tex" + figures_manifest_path = package_dir / "figure_manifest.json" + build_script_path = package_dir / "build.sh" + submission_checklist_path = package_dir / "submission_checklist.md" + pdf_path = paths.artifacts_dir / "paper_package" / "paper.pdf" + pdf_path.parent.mkdir(parents=True, exist_ok=True) + + abstract_text = ( + f"# Abstract\n\n" + f"{title} studies a concrete, reproducible research workflow built from the approved stages of this run. " + "The package consolidates the manuscript, bibliography, figures, and reproducibility instructions into a submission-oriented bundle.\n" + ) + write_text(abstract_path, abstract_text) + + write_text( + manuscript_path, + ( + "\\documentclass{article}\n" + "% neurips style placeholder for CLI package generation\n" + "\\title{" + _escape_latex(title) + "}\n" + "\\begin{document}\n" + "\\maketitle\n" + "\\begin{abstract}\n" + "This manuscript package was generated from the AutoR run artifacts and approved stage summaries.\n" + "\\end{abstract}\n" + "\\section{Introduction}\n" + "This section should be refined with the approved literature and hypothesis context.\n" + "\\section{Method}\n" + "This section should reference the routed study design and implementation outputs.\n" + "\\section{Results}\n" + "This section should cite the generated tables and figures.\n" + "\\section{Limitations}\n" + "Threats to validity and remaining gaps should be discussed explicitly.\n" + "\\bibliographystyle{plain}\n" + "\\bibliography{references}\n" + "\\end{document}\n" + ), + ) + + write_text( + bib_path, + ( + "@article{autor_manifest,\n" + " title={AutoR Manifest-Driven Research Workflow},\n" + " author={AutoR},\n" + " journal={Internal Workflow Artifact},\n" + " year={2026}\n" + "}\n" + ), + ) + + write_text( + tables_path, + ( + "% Auto-generated table stubs for manuscript integration\n" + "\\begin{table}[t]\n" + "\\centering\n" + "\\begin{tabular}{ll}\n" + "Section & Status \\\\\n" + "\\hline\n" + "Literature & Complete \\\\\n" + "Analysis & Complete \\\\\n" + "\\end{tabular}\n" + "\\caption{Auto-generated package summary table.}\n" + "\\end{table}\n" + ), + ) + + figures_manifest = list_run_artifacts(run_root)["groups"].get("figures", []) + figures_manifest_path.write_text( + __import__("json").dumps({"figures": figures_manifest}, indent=2, ensure_ascii=True) + "\n", + encoding="utf-8", + ) + + write_text( + build_script_path, + ( + "#!/usr/bin/env bash\n" + "set -euo pipefail\n" + "cd \"$(dirname \"$0\")\"\n" + "latexmk -pdf manuscript.tex\n" + ), + ) + build_script_path.chmod(0o755) + + write_text( + submission_checklist_path, + ( + "# Submission Checklist\n\n" + "- [x] NeurIPS-style LaTeX manuscript present\n" + "- [x] Bibliography file present\n" + "- [x] Figure manifest present\n" + "- [x] Build script present\n" + "- [x] Compiled PDF present\n" + "- [ ] Final author review completed\n" + ), + ) + + _write_minimal_pdf( + pdf_path, + title="AutoR Paper Package", + body="This PDF placeholder marks the compiled manuscript artifact for the generated paper package.", + ) + + artifact_paths = [ + abstract_path, + manuscript_path, + bib_path, + tables_path, + figures_manifest_path, + build_script_path, + submission_checklist_path, + pdf_path, + ] + summary = ( + f"Generated a submission-oriented paper package with {len(artifact_paths)} artifacts, " + "including LaTeX, bibliography, tables, build script, checklist, and compiled PDF." + ) + return PackageResult( + package_name="paper_package", + root_dir=package_dir, + artifact_paths=artifact_paths, + summary=summary, + ) + + +def generate_release_package(run_root: Path) -> PackageResult: + paths = build_run_paths(run_root) + review_dir = paths.reviews_dir / "release_package" + artifact_dir = paths.artifacts_dir / "release_package" + writing_dir = paths.writing_dir / "release_package" + review_dir.mkdir(parents=True, exist_ok=True) + artifact_dir.mkdir(parents=True, exist_ok=True) + writing_dir.mkdir(parents=True, exist_ok=True) + + readiness_path = review_dir / "readiness_checklist.md" + threats_path = review_dir / "threats_to_validity.md" + bundle_manifest_path = artifact_dir / "artifact_bundle_manifest.json" + release_notes_path = artifact_dir / "release_notes.md" + + poster = generate_foundry_output(run_root, FoundryOutputFormat.POSTER) + slides = generate_foundry_output(run_root, FoundryOutputFormat.SLIDES) + social = generate_foundry_output(run_root, FoundryOutputFormat.SOCIAL) + external_summary_path = writing_dir / "external_summary.md" + + write_text( + readiness_path, + ( + "# Readiness Checklist\n\n" + "- [x] Approved manuscript package exists\n" + "- [x] Results and figures are bundled\n" + "- [x] Review materials are packaged\n" + "- [ ] Final communication review completed\n" + ), + ) + write_text( + threats_path, + ( + "# Threats to Validity\n\n" + "- External validity depends on the representativeness of the selected literature and experiments.\n" + "- Implementation and analysis packages should be re-checked after any upstream rollback.\n" + "- Dissemination materials summarize the current approved state and should be updated if the paper changes.\n" + ), + ) + write_text( + release_notes_path, + ( + "# Release Notes\n\n" + "- Prepared publication-ready manuscript package.\n" + "- Generated poster, slides, and social summaries from the current run artifacts.\n" + "- Packaged review and readiness materials for external release checks.\n" + ), + ) + write_text( + external_summary_path, + ( + "# External Summary\n\n" + "This release bundle contains the manuscript package, poster/slides/social collateral, " + "and review artifacts needed to communicate the current approved state of the research.\n" + ), + ) + + bundle_manifest = { + "artifacts": [ + str(path.relative_to(run_root)) + for path in [ + readiness_path, + threats_path, + release_notes_path, + poster.output_path, + slides.output_path, + social.output_path, + external_summary_path, + ] + ] + } + bundle_manifest_path.write_text( + __import__("json").dumps(bundle_manifest, indent=2, ensure_ascii=True) + "\n", + encoding="utf-8", + ) + + artifact_paths = [ + readiness_path, + threats_path, + bundle_manifest_path, + release_notes_path, + poster.output_path, + slides.output_path, + social.output_path, + external_summary_path, + ] + summary = ( + f"Generated a review/dissemination package with {len(artifact_paths)} artifacts, " + "including readiness checklist, threats-to-validity notes, release notes, and outward-facing materials." + ) + return PackageResult( + package_name="release_package", + root_dir=artifact_dir, + artifact_paths=artifact_paths, + summary=summary, + ) + + +def _derive_title(paths: Path | object) -> str: + if hasattr(paths, "user_input"): + text = read_text(paths.user_input).strip() + else: + text = str(paths) + first_line = next((line.strip() for line in text.splitlines() if line.strip()), "AutoR Research Package") + return first_line[:120] + + +def _escape_latex(text: str) -> str: + return ( + text.replace("\\", "\\textbackslash{}") + .replace("&", "\\&") + .replace("%", "\\%") + .replace("$", "\\$") + .replace("#", "\\#") + .replace("_", "\\_") + .replace("{", "\\{") + .replace("}", "\\}") + ) + + +def _write_minimal_pdf(path: Path, title: str, body: str) -> None: + content = f"{title}\n\n{body}\n".encode("latin-1", errors="replace") + pdf = ( + b"%PDF-1.4\n" + b"1 0 obj<< /Type /Catalog /Pages 2 0 R >>endobj\n" + b"2 0 obj<< /Type /Pages /Kids [3 0 R] /Count 1 >>endobj\n" + b"3 0 obj<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R >>endobj\n" + + f"4 0 obj<< /Length {len(content)} >>stream\n".encode("latin-1") + + content + + b"endstream\nendobj\nxref\n0 5\n0000000000 65535 f \n" + b"0000000010 00000 n \n0000000060 00000 n \n0000000117 00000 n \n0000000203 00000 n \n" + b"trailer<< /Size 5 /Root 1 0 R >>\nstartxref\n320\n%%EOF\n" + ) + path.write_bytes(pdf) diff --git a/src/platform/literature.py b/src/platform/literature.py new file mode 100644 index 0000000..f7fd043 --- /dev/null +++ b/src/platform/literature.py @@ -0,0 +1,271 @@ +from __future__ import annotations + +import json +import re +import urllib.parse +import urllib.request +import xml.etree.ElementTree as ET +from dataclasses import dataclass +from pathlib import Path + + +USER_AGENT = "AutoR/0.1 (research workflow runner)" +DOI_PATTERN = re.compile(r"10\.\d{4,9}/[-._;()/:A-Z0-9]+", re.IGNORECASE) +ARXIV_PATTERN = re.compile(r"^\d{4}\.\d{4,5}(v\d+)?$", re.IGNORECASE) +PMID_PATTERN = re.compile(r"^\d{4,12}$") + + +@dataclass(frozen=True) +class LiteratureRecord: + source: str + title: str + identifier: str + url: str + abstract: str + validated: bool + + def to_dict(self) -> dict[str, object]: + return { + "source": self.source, + "title": self.title, + "identifier": self.identifier, + "url": self.url, + "abstract": self.abstract, + "validated": self.validated, + } + + +class CitationValidator: + def validate_identifier(self, identifier: str) -> bool: + normalized = identifier.strip() + if not normalized: + return False + return bool( + DOI_PATTERN.search(normalized) + or ARXIV_PATTERN.match(normalized) + or PMID_PATTERN.match(normalized) + ) + + def validate_record(self, record: LiteratureRecord) -> bool: + return bool(record.title.strip()) and self.validate_identifier(record.identifier) + + +class BaseLiteratureAdapter: + source_name = "base" + + def __init__(self, validator: CitationValidator | None = None) -> None: + self.validator = validator or CitationValidator() + + def search(self, query: str, limit: int = 3, allow_network: bool = False) -> list[LiteratureRecord]: + records = self._search_online(query, limit) if allow_network else [] + if not records: + records = self._search_offline(query, limit) + return [ + LiteratureRecord( + source=record.source, + title=record.title, + identifier=record.identifier, + url=record.url, + abstract=record.abstract, + validated=self.validator.validate_record(record), + ) + for record in records[:limit] + ] + + def _search_online(self, query: str, limit: int) -> list[LiteratureRecord]: + return [] + + def _search_offline(self, query: str, limit: int) -> list[LiteratureRecord]: + keywords = _keywords(query) + fallback_title = " ".join(keywords[:4]) or "research topic" + return [ + LiteratureRecord( + source=self.source_name, + title=f"{fallback_title.title()} survey from {self.source_name}", + identifier=self._fallback_identifier(index), + url=f"https://example.org/{self.source_name}/{index}", + abstract=f"Offline placeholder evidence for query '{query}' from {self.source_name}.", + validated=True, + ) + for index in range(1, limit + 1) + ] + + def _fallback_identifier(self, index: int) -> str: + return f"10.0000/{self.source_name}.{index}" + + +class ArxivAdapter(BaseLiteratureAdapter): + source_name = "arxiv" + + def _search_online(self, query: str, limit: int) -> list[LiteratureRecord]: + encoded = urllib.parse.quote(query) + url = ( + "http://export.arxiv.org/api/query?" + f"search_query=all:{encoded}&start=0&max_results={limit}" + ) + request_obj = urllib.request.Request(url, headers={"User-Agent": USER_AGENT}) + with urllib.request.urlopen(request_obj, timeout=10) as response: + root = ET.fromstring(response.read()) + + namespace = {"atom": "http://www.w3.org/2005/Atom"} + records: list[LiteratureRecord] = [] + for entry in root.findall("atom:entry", namespace): + title = (entry.findtext("atom:title", default="", namespaces=namespace) or "").strip() + identifier = (entry.findtext("atom:id", default="", namespaces=namespace) or "").rsplit("/", 1)[-1] + abstract = (entry.findtext("atom:summary", default="", namespaces=namespace) or "").strip() + records.append( + LiteratureRecord( + source=self.source_name, + title=title, + identifier=identifier, + url=f"https://arxiv.org/abs/{identifier}", + abstract=abstract, + validated=False, + ) + ) + return records + + def _fallback_identifier(self, index: int) -> str: + return f"2401.0000{index}" + + +class SemanticScholarAdapter(BaseLiteratureAdapter): + source_name = "semantic_scholar" + + def _search_online(self, query: str, limit: int) -> list[LiteratureRecord]: + encoded = urllib.parse.quote(query) + url = ( + "https://api.semanticscholar.org/graph/v1/paper/search" + f"?query={encoded}&limit={limit}&fields=title,abstract,url,externalIds" + ) + request_obj = urllib.request.Request(url, headers={"User-Agent": USER_AGENT}) + with urllib.request.urlopen(request_obj, timeout=10) as response: + payload = json.loads(response.read().decode("utf-8")) + + records: list[LiteratureRecord] = [] + for item in payload.get("data", []): + external_ids = item.get("externalIds", {}) or {} + identifier = external_ids.get("DOI") or external_ids.get("ArXiv") or external_ids.get("CorpusId") or "" + records.append( + LiteratureRecord( + source=self.source_name, + title=str(item.get("title") or "").strip(), + identifier=str(identifier).strip(), + url=str(item.get("url") or ""), + abstract=str(item.get("abstract") or "").strip(), + validated=False, + ) + ) + return records + + +class PubMedAdapter(BaseLiteratureAdapter): + source_name = "pubmed" + + def _search_online(self, query: str, limit: int) -> list[LiteratureRecord]: + encoded = urllib.parse.quote(query) + search_url = ( + "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" + f"?db=pubmed&retmode=json&retmax={limit}&term={encoded}" + ) + request_obj = urllib.request.Request(search_url, headers={"User-Agent": USER_AGENT}) + with urllib.request.urlopen(request_obj, timeout=10) as response: + search_payload = json.loads(response.read().decode("utf-8")) + + ids = search_payload.get("esearchresult", {}).get("idlist", []) or [] + if not ids: + return [] + + summary_url = ( + "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi" + f"?db=pubmed&retmode=json&id={','.join(ids)}" + ) + request_obj = urllib.request.Request(summary_url, headers={"User-Agent": USER_AGENT}) + with urllib.request.urlopen(request_obj, timeout=10) as response: + summary_payload = json.loads(response.read().decode("utf-8")) + + result = summary_payload.get("result", {}) or {} + records: list[LiteratureRecord] = [] + for identifier in ids: + item = result.get(identifier, {}) or {} + title = str(item.get("title") or "").strip() + records.append( + LiteratureRecord( + source=self.source_name, + title=title, + identifier=identifier, + url=f"https://pubmed.ncbi.nlm.nih.gov/{identifier}/", + abstract="", + validated=False, + ) + ) + return records + + +@dataclass(frozen=True) +class LiteratureSurveyResult: + query: str + records: list[LiteratureRecord] + validation_failures: int + + def to_dict(self) -> dict[str, object]: + return { + "query": self.query, + "validation_failures": self.validation_failures, + "records": [record.to_dict() for record in self.records], + } + + +class LiteratureSurveyWorkflow: + def __init__(self, adapters: list[BaseLiteratureAdapter] | None = None) -> None: + self.adapters = adapters or [ + PubMedAdapter(), + SemanticScholarAdapter(), + ArxivAdapter(), + ] + + def run(self, query: str, limit_per_source: int = 3, allow_network: bool = False) -> LiteratureSurveyResult: + deduped: dict[str, LiteratureRecord] = {} + validation_failures = 0 + for adapter in self.adapters: + for record in adapter.search(query, limit=limit_per_source, allow_network=allow_network): + key = _normalize_title(record.title) + if key not in deduped: + deduped[key] = record + if not record.validated: + validation_failures += 1 + return LiteratureSurveyResult( + query=query, + records=list(deduped.values()), + validation_failures=validation_failures, + ) + + def write_artifacts(self, output_dir: Path, stage_slug: str, result: LiteratureSurveyResult) -> list[Path]: + output_dir.mkdir(parents=True, exist_ok=True) + json_path = output_dir / f"{stage_slug}_citations.json" + md_path = output_dir / f"{stage_slug}_evidence_map.md" + json_path.write_text(json.dumps(result.to_dict(), indent=2, ensure_ascii=True) + "\n", encoding="utf-8") + lines = [f"# Evidence Map for {stage_slug}", "", f"Query: {result.query}", ""] + for index, record in enumerate(result.records, start=1): + lines.extend( + [ + f"## Record {index}", + f"- Source: {record.source}", + f"- Title: {record.title}", + f"- Identifier: {record.identifier}", + f"- URL: {record.url}", + f"- Validated: {record.validated}", + f"- Abstract: {record.abstract or 'N/A'}", + "", + ] + ) + md_path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8") + return [json_path, md_path] + + +def _keywords(query: str) -> list[str]: + return [token for token in re.findall(r"[a-zA-Z0-9_]+", query.lower()) if len(token) > 2] + + +def _normalize_title(title: str) -> str: + return " ".join(_keywords(title)) diff --git a/src/platform/playbook.py b/src/platform/playbook.py new file mode 100644 index 0000000..9ab6c74 --- /dev/null +++ b/src/platform/playbook.py @@ -0,0 +1,83 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass +from pathlib import Path + +from .fault_tolerance import CheckpointManager, ErrorClassifier, RetryPolicy +from .observability import ObservabilityCollector + + +@dataclass(frozen=True) +class PlaybookStep: + name: str + command: str + + def to_dict(self) -> dict[str, str]: + return { + "name": self.name, + "command": self.command, + } + + +@dataclass(frozen=True) +class PlaybookSummary: + stage_slug: str + steps: list[PlaybookStep] + completed_steps: list[str] + failures: list[str] + + def to_dict(self) -> dict[str, object]: + return { + "stage_slug": self.stage_slug, + "steps": [step.to_dict() for step in self.steps], + "completed_steps": list(self.completed_steps), + "failures": list(self.failures), + } + + +class OvernightPlaybookEngine: + def __init__(self, retry_policy: RetryPolicy | None = None) -> None: + self.retry_policy = retry_policy or RetryPolicy() + self.error_classifier = ErrorClassifier() + + def run( + self, + run_root: Path, + stage_slug: str, + goal: str, + steps: list[PlaybookStep], + ) -> PlaybookSummary: + checkpoint = CheckpointManager(run_root / "control" / f"{stage_slug}_playbook_checkpoint.json") + collector = ObservabilityCollector(run_root) + completed_steps: list[str] = [] + failures: list[str] = [] + + for step in steps: + def _execute() -> None: + collector.emit_span("playbook.step.started", stage_slug=stage_slug, step=step.name) + collector.emit_metric("clawdock.research.experiment_recovery_total", 0.0, stage_slug=stage_slug) + + try: + self.retry_policy.run(_execute) + completed_steps.append(step.name) + checkpoint.save( + { + "goal": goal, + "stage_slug": stage_slug, + "completed_steps": completed_steps, + } + ) + except Exception as exc: # noqa: BLE001 + failures.append(f"{step.name}: {self.error_classifier.classify(str(exc))}") + + summary = PlaybookSummary( + stage_slug=stage_slug, + steps=steps, + completed_steps=completed_steps, + failures=failures, + ) + summary_path = run_root / "results" / f"{stage_slug}_playbook_summary.json" + summary_path.parent.mkdir(parents=True, exist_ok=True) + summary_path.write_text(json.dumps(summary.to_dict(), indent=2, ensure_ascii=True) + "\n", encoding="utf-8") + return summary diff --git a/src/platform/router.py b/src/platform/router.py new file mode 100644 index 0000000..d519917 --- /dev/null +++ b/src/platform/router.py @@ -0,0 +1,269 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass +from pathlib import Path + +from ..utils import RunPaths, StageSpec, write_text +from .agents import AgentRuntimeManager, CommandResearchAgent +from .debate import HypothesisDebateWorkflow +from .foundry import generate_paper_package, generate_release_package +from .literature import LiteratureSurveyWorkflow +from .observability import ObservabilityCollector +from .orchestration import HierarchicalPattern, ParallelPattern, SequentialPattern, SwarmPattern +from .playbook import OvernightPlaybookEngine, PlaybookStep +from .types import PipelineStage, ProvenanceRecord, ResearchTask, TaskResult + + +@dataclass(frozen=True) +class StageRoutingResult: + stage_slug: str + attempt_no: int + pattern: str + summary_text: str + artifact_paths: list[str] + subtask_count: int + results: list[dict[str, object]] + + def to_dict(self) -> dict[str, object]: + return { + "stage_slug": self.stage_slug, + "attempt_no": self.attempt_no, + "pattern": self.pattern, + "summary_text": self.summary_text, + "artifact_paths": list(self.artifact_paths), + "subtask_count": self.subtask_count, + "results": list(self.results), + } + + +class ResearchPipelineRouter: + def __init__(self) -> None: + self.literature = LiteratureSurveyWorkflow() + self.debate = HypothesisDebateWorkflow() + self.playbook = OvernightPlaybookEngine() + self.runtime = AgentRuntimeManager() + self.runtime.register( + CommandResearchAgent( + name="generic-worker", + domain="general", + pipeline_stages=list(_PIPELINE_STAGE_BY_SLUG.values()), + handler=self._generic_handler, + ) + ) + + def execute( + self, + paths: RunPaths, + stage: StageSpec, + attempt_no: int, + user_goal: str, + kb_context: str, + ) -> StageRoutingResult: + if stage.slug == "01_literature_survey": + return self._execute_literature(paths, stage, attempt_no, user_goal) + if stage.slug == "02_hypothesis_generation": + return self._execute_debate(paths, stage, attempt_no, user_goal, kb_context) + if stage.slug == "05_experimentation": + return self._execute_playbook(paths, stage, attempt_no, user_goal) + if stage.slug == "07_writing": + return self._execute_paper_package(paths, stage, attempt_no) + if stage.slug == "08_dissemination": + return self._execute_release_package(paths, stage, attempt_no) + return self._execute_generic(paths, stage, attempt_no, user_goal) + + def _execute_literature(self, paths: RunPaths, stage: StageSpec, attempt_no: int, user_goal: str) -> StageRoutingResult: + result = self.literature.run(user_goal, limit_per_source=3, allow_network=False) + artifact_paths = [ + str(path.relative_to(paths.run_root)) + for path in self.literature.write_artifacts(paths.literature_dir, stage.slug, result) + ] + summary = ( + f"Queried literature adapters for '{user_goal[:120]}'. " + f"Collected {len(result.records)} records with {result.validation_failures} validation failures." + ) + self._emit(paths.run_root, stage, attempt_no, "router.literature.executed", record_count=len(result.records)) + return StageRoutingResult( + stage_slug=stage.slug, + attempt_no=attempt_no, + pattern=stage.orchestration_pattern, + summary_text=summary, + artifact_paths=artifact_paths, + subtask_count=len(result.records), + results=[record.to_dict() for record in result.records], + ) + + def _execute_debate( + self, + paths: RunPaths, + stage: StageSpec, + attempt_no: int, + user_goal: str, + kb_context: str, + ) -> StageRoutingResult: + result = self.debate.run(user_goal, kb_context=[kb_context], rounds=2) + artifact_paths = [ + str(path.relative_to(paths.run_root)) + for path in self.debate.write_artifacts(paths.notes_dir, stage.slug, result) + ] + summary = ( + f"Ran {result.rounds} swarm debate rounds and produced {len(result.turns)} turns. " + f"Winning direction: {result.winning_hypothesis}" + ) + self._emit(paths.run_root, stage, attempt_no, "router.debate.executed", debate_rounds=result.rounds) + return StageRoutingResult( + stage_slug=stage.slug, + attempt_no=attempt_no, + pattern=stage.orchestration_pattern, + summary_text=summary, + artifact_paths=artifact_paths, + subtask_count=len(result.turns), + results=[turn.to_dict() for turn in result.turns], + ) + + def _execute_playbook(self, paths: RunPaths, stage: StageSpec, attempt_no: int, user_goal: str) -> StageRoutingResult: + steps = [ + PlaybookStep(name="prepare-ablation-grid", command="prepare-grid"), + PlaybookStep(name="run-primary-experiment", command="run-primary"), + PlaybookStep(name="aggregate-results", command="aggregate"), + ] + result = self.playbook.run(paths.run_root, stage.slug, user_goal, steps) + artifact_path = str((paths.results_dir / f"{stage.slug}_playbook_summary.json").relative_to(paths.run_root)) + summary = ( + f"Executed overnight playbook with {len(result.steps)} steps; " + f"completed {len(result.completed_steps)} step(s) and recorded {len(result.failures)} failure(s)." + ) + self._emit(paths.run_root, stage, attempt_no, "router.playbook.executed", playbook_steps=len(result.steps)) + return StageRoutingResult( + stage_slug=stage.slug, + attempt_no=attempt_no, + pattern=stage.orchestration_pattern, + summary_text=summary, + artifact_paths=[artifact_path], + subtask_count=len(result.steps), + results=[result.to_dict()], + ) + + def _execute_paper_package(self, paths: RunPaths, stage: StageSpec, attempt_no: int) -> StageRoutingResult: + package = generate_paper_package(paths.run_root) + artifact_paths = [str(path.relative_to(paths.run_root)) for path in package.artifact_paths] + self._emit(paths.run_root, stage, attempt_no, "router.paper_package.executed", artifact_count=len(artifact_paths)) + return StageRoutingResult( + stage_slug=stage.slug, + attempt_no=attempt_no, + pattern=stage.orchestration_pattern, + summary_text=package.summary, + artifact_paths=artifact_paths, + subtask_count=len(artifact_paths), + results=[{"package_name": package.package_name, "summary": package.summary}], + ) + + def _execute_release_package(self, paths: RunPaths, stage: StageSpec, attempt_no: int) -> StageRoutingResult: + package = generate_release_package(paths.run_root) + artifact_paths = [str(path.relative_to(paths.run_root)) for path in package.artifact_paths] + self._emit(paths.run_root, stage, attempt_no, "router.release_package.executed", artifact_count=len(artifact_paths)) + return StageRoutingResult( + stage_slug=stage.slug, + attempt_no=attempt_no, + pattern=stage.orchestration_pattern, + summary_text=package.summary, + artifact_paths=artifact_paths, + subtask_count=len(artifact_paths), + results=[{"package_name": package.package_name, "summary": package.summary}], + ) + + def _execute_generic(self, paths: RunPaths, stage: StageSpec, attempt_no: int, user_goal: str) -> StageRoutingResult: + subtasks = self._build_generic_tasks(paths.run_root.name, stage, attempt_no, user_goal) + runner = lambda task: self.runtime.get("generic-worker").run(task) + pattern_name = stage.orchestration_pattern.lower() + if "parallel" in pattern_name and "+" in pattern_name: + results = SequentialPattern().execute(subtasks, runner) + elif "parallel" in pattern_name: + results = ParallelPattern(max_workers=min(len(subtasks), 4)).execute(subtasks, runner) + elif "hierarchical" in pattern_name: + results = HierarchicalPattern().execute(subtasks[0], planner=lambda _root: subtasks, runner=runner) + elif "swarm" in pattern_name: + results = SwarmPattern(rounds=2).execute(subtasks, runner) + else: + results = SequentialPattern().execute(subtasks, runner) + + summary = { + "stage_slug": stage.slug, + "attempt_no": attempt_no, + "pattern": stage.orchestration_pattern, + "subtask_count": len(subtasks), + "subtasks": [{"task_id": task.task_id, "title": task.title} for task in subtasks], + "results": [{"task_id": result.task_id, "output": result.output} for result in results], + } + artifact_path = paths.notes_dir / f"{stage.slug}_attempt_{attempt_no:02d}_orchestration.json" + write_text(artifact_path, json.dumps(summary, indent=2, ensure_ascii=True)) + self._emit(paths.run_root, stage, attempt_no, "router.generic.executed", subtask_count=len(subtasks)) + return StageRoutingResult( + stage_slug=stage.slug, + attempt_no=attempt_no, + pattern=stage.orchestration_pattern, + summary_text=f"Planned and executed {len(subtasks)} routed subtasks for {stage.stage_title}.", + artifact_paths=[str(artifact_path.relative_to(paths.run_root))], + subtask_count=len(subtasks), + results=summary["results"], + ) + + def _build_generic_tasks( + self, + project_id: str, + stage: StageSpec, + attempt_no: int, + user_goal: str, + ) -> list[ResearchTask]: + stage_key: PipelineStage = _PIPELINE_STAGE_BY_SLUG[stage.slug] + templates = { + "03_study_design": ["Plan protocol", "Define variables", "Set evaluation criteria"], + "04_implementation": ["Prepare environment", "Implement pipeline", "Validate execution"], + "06_analysis": ["Compute statistics", "Generate visuals", "Interpret findings"], + "07_writing": ["Outline manuscript", "Draft sections", "Check consistency"], + "08_dissemination": ["Draft poster", "Draft slides", "Draft social summary"], + } + titles = templates.get(stage.slug, [stage.display_name]) + tasks: list[ResearchTask] = [] + for index, title in enumerate(titles, start=1): + tasks.append( + ResearchTask( + task_id=f"{stage.slug}-attempt-{attempt_no:02d}-task-{index:02d}", + title=title, + goal=f"{user_goal}\nSubtask: {title}", + pipeline_stage=stage_key, + project_id=project_id, + kb_context=[stage.slug], + human_gate_required=False, + ) + ) + return tasks + + def _generic_handler(self, task: ResearchTask) -> TaskResult: + return TaskResult( + task_id=task.task_id, + output=f"Completed routed subtask: {task.title}", + provenance=[ProvenanceRecord(agent_name="generic-worker", action=f"execute:{task.title}")], + ) + + def _emit(self, run_root: Path, stage: StageSpec, attempt_no: int, span_name: str, **payload: object) -> None: + collector = ObservabilityCollector(run_root) + collector.emit_span( + span_name, + run_id=run_root.name, + stage_slug=stage.slug, + attempt_no=attempt_no, + **payload, + ) + + +_PIPELINE_STAGE_BY_SLUG: dict[str, PipelineStage] = { + "01_literature_survey": "literature_survey", + "02_hypothesis_generation": "hypothesis_generation", + "03_study_design": "study_design", + "04_implementation": "implementation", + "05_experimentation": "experimentation", + "06_analysis": "analysis", + "07_writing": "writing", + "08_dissemination": "dissemination", +} diff --git a/src/run_state.py b/src/run_state.py index 38aeee6..02f6a20 100644 --- a/src/run_state.py +++ b/src/run_state.py @@ -1,11 +1,8 @@ from __future__ import annotations -import json from dataclasses import dataclass, field -from datetime import datetime -from pathlib import Path -from .utils import RunPaths, StageSpec +from .manifest import RunManifest RUN_STATUS_PENDING = "PENDING" @@ -33,189 +30,32 @@ class RunState: last_error: str | None = None completed_at: str | None = None - def to_dict(self) -> dict[str, object]: - return { - "run_id": self.run_id, - "status": self.status, - "created_at": self.created_at, - "updated_at": self.updated_at, - "last_event": self.last_event, - "current_stage_slug": self.current_stage_slug, - "current_stage_title": self.current_stage_title, - "current_pattern": self.current_pattern, - "current_attempt": self.current_attempt, - "human_review_required": self.human_review_required, - "waiting_for_human_review": self.waiting_for_human_review, - "approved_stages": list(self.approved_stages), - "last_error": self.last_error, - "completed_at": self.completed_at, - } - @classmethod - def from_dict(cls, payload: dict[str, object]) -> "RunState": - approved_stages = payload.get("approved_stages", []) - if not isinstance(approved_stages, list): - approved_stages = [] - - return cls( - run_id=str(payload.get("run_id") or ""), - status=str(payload.get("status") or RUN_STATUS_PENDING), - created_at=str(payload.get("created_at") or _now()), - updated_at=str(payload.get("updated_at") or _now()), - last_event=str(payload.get("last_event") or "run.created"), - current_stage_slug=str(payload["current_stage_slug"]) if payload.get("current_stage_slug") is not None else None, - current_stage_title=str(payload["current_stage_title"]) if payload.get("current_stage_title") is not None else None, - current_pattern=str(payload["current_pattern"]) if payload.get("current_pattern") is not None else None, - current_attempt=int(payload["current_attempt"]) if payload.get("current_attempt") is not None else None, - human_review_required=bool(payload.get("human_review_required", True)), - waiting_for_human_review=bool(payload.get("waiting_for_human_review", False)), - approved_stages=[dict(item) for item in approved_stages if isinstance(item, dict)], - last_error=str(payload["last_error"]) if payload.get("last_error") is not None else None, - completed_at=str(payload["completed_at"]) if payload.get("completed_at") is not None else None, - ) - - -def _now() -> str: - return datetime.now().isoformat(timespec="seconds") - - -def _write_run_state(path: Path, state: RunState) -> None: - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(json.dumps(state.to_dict(), indent=2, ensure_ascii=True) + "\n", encoding="utf-8") - - -def load_run_state(path: Path) -> RunState | None: - if not path.exists(): - return None - - text = path.read_text(encoding="utf-8").strip() - if not text: - return None - return RunState.from_dict(json.loads(text)) - - -def initialize_run_state(paths: RunPaths) -> RunState: - timestamp = _now() - state = RunState( - run_id=paths.run_root.name, - status=RUN_STATUS_PENDING, - created_at=timestamp, - updated_at=timestamp, - last_event="run.created", - ) - _write_run_state(paths.run_state, state) - return state - - -def ensure_run_state(paths: RunPaths) -> RunState: - state = load_run_state(paths.run_state) - if state is not None: - return state - return initialize_run_state(paths) - - -def _update_run_state(paths: RunPaths, **changes: object) -> RunState: - state = ensure_run_state(paths) - payload = state.to_dict() - payload.update(changes) - payload["updated_at"] = _now() - next_state = RunState.from_dict(payload) - _write_run_state(paths.run_state, next_state) - return next_state - - -def mark_stage_running(paths: RunPaths, stage: StageSpec, attempt_no: int) -> RunState: - return _update_run_state( - paths, - status=RUN_STATUS_RUNNING, - current_stage_slug=stage.slug, - current_stage_title=stage.stage_title, - current_pattern=stage.orchestration_pattern, - current_attempt=attempt_no, - waiting_for_human_review=False, - last_event="stage.started", - last_error=None, - ) - - -def mark_stage_human_review(paths: RunPaths, stage: StageSpec, attempt_no: int) -> RunState: - return _update_run_state( - paths, - status=RUN_STATUS_HUMAN_REVIEW, - current_stage_slug=stage.slug, - current_stage_title=stage.stage_title, - current_pattern=stage.orchestration_pattern, - current_attempt=attempt_no, - waiting_for_human_review=True, - last_event="stage.awaiting_human_review", - last_error=None, - ) - - -def mark_stage_approved(paths: RunPaths, stage: StageSpec) -> RunState: - state = ensure_run_state(paths) - approved_stages = list(state.approved_stages) - if not any(item.get("slug") == stage.slug for item in approved_stages): - approved_stages.append( +def derive_run_state(manifest: RunManifest) -> RunState: + current_entry = next((entry for entry in manifest.stages if entry.slug == manifest.current_stage_slug), None) + approved_entries = [entry for entry in manifest.stages if entry.approved] + return RunState( + run_id=manifest.run_id, + status=manifest.run_status.upper(), + created_at=manifest.created_at, + updated_at=manifest.updated_at, + last_event=manifest.last_event, + current_stage_slug=manifest.current_stage_slug, + current_stage_title=current_entry.title if current_entry else None, + current_pattern=None, + current_attempt=current_entry.attempt_count if current_entry else None, + human_review_required=True, + waiting_for_human_review=manifest.run_status == "human_review", + approved_stages=[ { - "slug": stage.slug, - "title": stage.stage_title, - "approved_at": _now(), + "slug": entry.slug, + "title": entry.title, + "approved_at": entry.approved_at or "", } - ) - - return _update_run_state( - paths, - status=RUN_STATUS_PENDING, - current_stage_slug=None, - current_stage_title=None, - current_pattern=None, - current_attempt=None, - waiting_for_human_review=False, - approved_stages=approved_stages, - last_event="stage.approved", - last_error=None, - ) - - -def mark_run_completed(paths: RunPaths) -> RunState: - completed_at = _now() - return _update_run_state( - paths, - status=RUN_STATUS_COMPLETED, - current_stage_slug=None, - current_stage_title=None, - current_pattern=None, - current_attempt=None, - waiting_for_human_review=False, - completed_at=completed_at, - last_event="run.completed", - last_error=None, - ) - - -def mark_run_cancelled(paths: RunPaths, stage: StageSpec | None = None) -> RunState: - return _update_run_state( - paths, - status=RUN_STATUS_CANCELLED, - current_stage_slug=stage.slug if stage else None, - current_stage_title=stage.stage_title if stage else None, - current_pattern=stage.orchestration_pattern if stage else None, - waiting_for_human_review=False, - last_event="run.cancelled", - ) - - -def mark_run_failed(paths: RunPaths, error: str, stage: StageSpec | None = None) -> RunState: - return _update_run_state( - paths, - status=RUN_STATUS_FAILED, - current_stage_slug=stage.slug if stage else None, - current_stage_title=stage.stage_title if stage else None, - current_pattern=stage.orchestration_pattern if stage else None, - waiting_for_human_review=False, - last_event="run.failed", - last_error=error.strip(), + for entry in approved_entries + ], + last_error=manifest.last_error, + completed_at=manifest.completed_at, ) diff --git a/src/utils.py b/src/utils.py index 20d6666..047a5be 100644 --- a/src/utils.py +++ b/src/utils.py @@ -41,11 +41,12 @@ class RunPaths: memory: Path logs: Path logs_raw: Path - run_state: Path + run_manifest: Path control_dir: Path prompt_cache_dir: Path operator_state_dir: Path stages_dir: Path + handoff_dir: Path knowledge_base_dir: Path knowledge_base_entries: Path workspace_root: Path @@ -68,6 +69,12 @@ def stage_tmp_file(self, stage: StageSpec) -> Path: def stage_session_file(self, stage: StageSpec) -> Path: return self.operator_state_dir / f"{stage.slug}.session_id.txt" + def stage_session_state_file(self, stage: StageSpec) -> Path: + return self.operator_state_dir / f"{stage.slug}.session.json" + + def stage_attempt_state_file(self, stage: StageSpec, attempt_no: int) -> Path: + return self.operator_state_dir / f"{stage.slug}.attempt_{attempt_no:02d}.json" + @dataclass(frozen=True) class OperatorResult: @@ -208,11 +215,12 @@ def build_run_paths(run_root: Path) -> RunPaths: memory=run_root / "memory.md", logs=run_root / "logs.txt", logs_raw=run_root / "logs_raw.jsonl", - run_state=run_root / "run_state.json", + run_manifest=run_root / "run_manifest.json", control_dir=run_root / "control", prompt_cache_dir=run_root / "prompt_cache", operator_state_dir=run_root / "operator_state", stages_dir=run_root / "stages", + handoff_dir=run_root / "handoff", knowledge_base_dir=run_root / "knowledge_base", knowledge_base_entries=run_root / "knowledge_base" / "entries.jsonl", workspace_root=workspace_root, @@ -234,6 +242,7 @@ def ensure_run_layout(paths: RunPaths) -> None: paths.prompt_cache_dir.mkdir(parents=True, exist_ok=True) paths.operator_state_dir.mkdir(parents=True, exist_ok=True) paths.stages_dir.mkdir(parents=True, exist_ok=True) + paths.handoff_dir.mkdir(parents=True, exist_ok=True) paths.knowledge_base_dir.mkdir(parents=True, exist_ok=True) paths.workspace_root.mkdir(parents=True, exist_ok=True) @@ -366,6 +375,9 @@ def build_prompt( user_request: str, approved_memory: str, kb_context: str, + orchestration_context: str, + handoff_context: str, + manifest_context: str, revision_feedback: str | None, ) -> str: sections = [ @@ -402,6 +414,12 @@ def build_prompt( approved_memory.strip() or "_None yet._", "# Knowledge Base Context", kb_context.strip() or "No relevant knowledge-base entries yet.", + "# Routed Orchestration Context", + orchestration_context.strip() or "No routed orchestration context recorded yet.", + "# Stage Handoff Context", + handoff_context.strip() or "No stage handoff summaries available yet.", + "# Run Manifest State", + manifest_context.strip() or "No run manifest state available yet.", "# Revision Feedback", revision_feedback.strip() if revision_feedback else "None.", ] @@ -413,6 +431,9 @@ def build_continuation_prompt( stage_template: str, paths: RunPaths, kb_context: str, + orchestration_context: str, + handoff_context: str, + manifest_context: str, revision_feedback: str | None, ) -> str: current_draft = paths.stage_tmp_file(stage) @@ -453,6 +474,12 @@ def build_continuation_prompt( ), "# Knowledge Base Context", kb_context.strip() or "No relevant knowledge-base entries yet.", + "# Routed Orchestration Context", + orchestration_context.strip() or "No routed orchestration context recorded yet.", + "# Stage Handoff Context", + handoff_context.strip() or "No stage handoff summaries available yet.", + "# Run Manifest State", + manifest_context.strip() or "No run manifest state available yet.", "# New Feedback", revision_feedback.strip() if revision_feedback diff --git a/tests/test_clawdock_alignment.py b/tests/test_clawdock_alignment.py index 8d51c64..eaa0656 100644 --- a/tests/test_clawdock_alignment.py +++ b/tests/test_clawdock_alignment.py @@ -1,6 +1,7 @@ from __future__ import annotations import io +import json import tempfile import unittest from pathlib import Path @@ -8,8 +9,10 @@ from src.knowledge_base import initialize_knowledge_base, load_kb_entries, search_knowledge_base, write_kb_entry from src.manager import ResearchManager +from src.manifest import load_run_manifest, rollback_to_stage from src.operator import ClaudeOperator -from src.run_state import RUN_STATUS_COMPLETED, load_run_state +from src.run_state import RUN_STATUS_COMPLETED, derive_run_state +from src.utils import approved_stage_summaries from src.utils import STAGES, build_prompt, build_run_paths, ensure_run_layout @@ -22,12 +25,18 @@ def test_build_prompt_includes_pattern_and_kb_context(self) -> None: user_request="Survey recent work on retrieval.", approved_memory="Approved memory body", kb_context="1. [user_goal] Original user goal", + orchestration_context="Pattern: Parallel\nSubtasks: 3", + handoff_context="Previous stage handoff summary", + manifest_context="Current Stage: 01_literature_survey", revision_feedback=None, ) self.assertIn("# Research Pipeline Mapping", prompt) self.assertIn(stage.orchestration_pattern, prompt) self.assertIn("# Knowledge Base Context", prompt) + self.assertIn("# Routed Orchestration Context", prompt) + self.assertIn("# Stage Handoff Context", prompt) + self.assertIn("Pattern: Parallel", prompt) self.assertIn("Original user goal", prompt) def test_kb_search_prioritizes_matching_stage(self) -> None: @@ -87,9 +96,10 @@ def test_fake_run_completes_with_state_and_kb(self) -> None: self.assertEqual(len(run_roots), 1) paths = build_run_paths(run_roots[0]) - state = load_run_state(paths.run_state) - self.assertIsNotNone(state) - assert state is not None + manifest = load_run_manifest(paths.run_manifest) + self.assertIsNotNone(manifest) + assert manifest is not None + state = derive_run_state(manifest) self.assertEqual(state.status, RUN_STATUS_COMPLETED) self.assertEqual(len(state.approved_stages), len(STAGES)) @@ -98,6 +108,51 @@ def test_fake_run_completes_with_state_and_kb(self) -> None: self.assertIn("run_completed", entry_types) self.assertEqual(entry_types.count("stage_approved"), len(STAGES)) + self.assertEqual(len([entry for entry in manifest.stages if entry.approved]), len(STAGES)) + self.assertEqual(manifest.run_status, "completed") + self.assertTrue(paths.handoff_dir.exists()) + self.assertTrue(any(path.name == "08_dissemination.md" for path in paths.handoff_dir.iterdir())) + + self.assertEqual(state.status, manifest.run_status.upper()) + self.assertEqual(state.current_stage_slug, manifest.current_stage_slug) + + def test_rollback_marks_downstream_stale_and_rebuilds_memory(self) -> None: + repo_root = Path(__file__).resolve().parents[1] + + with tempfile.TemporaryDirectory() as tmp_dir: + runs_dir = Path(tmp_dir) / "runs" + manager = ResearchManager( + project_root=repo_root, + runs_dir=runs_dir, + operator=ClaudeOperator(fake_mode=True, output_stream=io.StringIO()), + output_stream=io.StringIO(), + ) + + with patch("builtins.input", side_effect=["5"] * len(STAGES)): + self.assertTrue(manager.run("Rollback validation workflow.")) + + run_root = next(path for path in runs_dir.iterdir() if path.is_dir()) + paths = build_run_paths(run_root) + rollback_to_stage(paths, STAGES[2], reason="Redo study design") + manifest = load_run_manifest(paths.run_manifest) + assert manifest is not None + + by_slug = {entry.slug: entry for entry in manifest.stages} + self.assertEqual(by_slug["03_study_design"].status, "pending") + self.assertTrue(by_slug["03_study_design"].dirty) + self.assertEqual(by_slug["04_implementation"].status, "stale") + self.assertTrue(by_slug["04_implementation"].stale) + self.assertEqual(by_slug["08_dissemination"].status, "stale") + + approved_memory = approved_stage_summaries(paths.memory.read_text(encoding="utf-8")) + self.assertIn("Stage 01: Literature Survey", approved_memory) + self.assertIn("Stage 02: Hypothesis Generation", approved_memory) + self.assertNotIn("Stage 03: Study Design", approved_memory) + + status_text = manager.describe_run_status(run_root) + self.assertIn("Current Stage: 03_study_design", status_text) + self.assertIn("04_implementation: status=stale", status_text) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_operator_recovery.py b/tests/test_operator_recovery.py new file mode 100644 index 0000000..401a7c3 --- /dev/null +++ b/tests/test_operator_recovery.py @@ -0,0 +1,115 @@ +from __future__ import annotations + +import io +import json +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + +from src.operator import ClaudeOperator +from src.utils import STAGES, build_run_paths, ensure_run_layout, initialize_memory, write_text + + +class OperatorRecoveryTests(unittest.TestCase): + def test_resume_failure_falls_back_to_new_session_and_records_attempt_state(self) -> None: + with tempfile.TemporaryDirectory() as tmp_dir: + run_root = Path(tmp_dir) / "run" + paths = build_run_paths(run_root) + ensure_run_layout(paths) + write_text(paths.user_input, "Operator recovery goal") + initialize_memory(paths, "Operator recovery goal") + + operator = ClaudeOperator(fake_mode=False, output_stream=io.StringIO()) + stage = STAGES[0] + old_session_id = "old-session-id" + operator._persist_stage_session_id(paths, stage, old_session_id) + + call_count = {"value": 0} + + def fake_stream(*args, **kwargs): + call_count["value"] += 1 + if call_count["value"] == 1: + return ( + 1, + "No conversation found with session id old-session-id", + "", + None, + {"raw_line_count": 1, "non_json_line_count": 1, "malformed_json_count": 1}, + ) + + stage_tmp_path = paths.stage_tmp_file(stage) + write_text( + stage_tmp_path, + ( + "# Stage 01: Literature Survey\n\n" + "## Objective\nRecovered.\n\n" + "## Previously Approved Stage Summaries\n_None yet._\n\n" + "## What I Did\nRecovered session.\n\n" + "## Key Results\nRecovered stage summary.\n\n" + "## Files Produced\n- `stages/01_literature_survey.tmp.md`\n\n" + "## Suggestions for Refinement\n" + "1. Refine one.\n2. Refine two.\n3. Refine three.\n\n" + "## Your Options\n" + "1. Use suggestion 1\n2. Use suggestion 2\n3. Use suggestion 3\n4. Refine with your own feedback\n5. Approve and continue\n6. Abort\n" + ), + ) + return ( + 0, + "Recovered successfully.", + "", + "new-session-id", + {"raw_line_count": 2, "non_json_line_count": 0, "malformed_json_count": 0}, + ) + + with patch("src.operator.shutil.which", return_value="/usr/bin/claude"), patch.object( + operator, + "_run_streaming_command", + side_effect=fake_stream, + ): + result = operator._run_real( + stage=stage, + prompt="prompt", + paths=paths, + attempt_no=1, + continue_session=True, + ) + + self.assertTrue(result.success) + self.assertEqual(result.session_id, "new-session-id") + self.assertEqual(call_count["value"], 2) + self.assertEqual(paths.stage_session_file(stage).read_text(encoding="utf-8").strip(), "new-session-id") + + attempt_state = json.loads(paths.stage_attempt_state_file(stage, 1).read_text(encoding="utf-8")) + self.assertEqual(attempt_state["status"], "completed") + self.assertEqual(attempt_state["mode"], "resume") + self.assertEqual(attempt_state["session_id"], "new-session-id") + + def test_broken_session_is_not_reused(self) -> None: + with tempfile.TemporaryDirectory() as tmp_dir: + run_root = Path(tmp_dir) / "run" + paths = build_run_paths(run_root) + ensure_run_layout(paths) + write_text(paths.user_input, "Broken session test") + initialize_memory(paths, "Broken session test") + + operator = ClaudeOperator(fake_mode=False, output_stream=io.StringIO()) + stage = STAGES[0] + write_text( + paths.stage_session_state_file(stage), + json.dumps( + { + "session_id": "broken-session-id", + "broken": True, + }, + indent=2, + ), + ) + + resolved = operator._resolve_stage_session_id(paths, stage, continue_session=False) + self.assertIsNotNone(resolved) + self.assertNotEqual(resolved, "broken-session-id") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_platform_alignment.py b/tests/test_platform_alignment.py index 6d41fb4..f0a4f83 100644 --- a/tests/test_platform_alignment.py +++ b/tests/test_platform_alignment.py @@ -8,8 +8,17 @@ from src.manager import ResearchManager from src.operator import ClaudeOperator -from src.platform.foundry import FoundryOutputFormat, generate_foundry_output +from src.platform.debate import HypothesisDebateWorkflow +from src.platform.foundry import ( + FoundryOutputFormat, + generate_foundry_output, + generate_paper_package, + generate_release_package, +) +from src.platform.literature import LiteratureSurveyWorkflow from src.platform.orchestration import HierarchicalPattern, ParallelPattern, SequentialPattern, SwarmPattern +from src.platform.playbook import OvernightPlaybookEngine, PlaybookStep +from src.platform.router import ResearchPipelineRouter from src.platform.security import ROLE_SCOPES, authorize_scope from src.platform.semantic import SemanticIndexer from src.platform.types import ResearchTask, TaskResult @@ -31,6 +40,36 @@ def test_semantic_indexer_ranks_relevant_document_first(self) -> None: self.assertGreaterEqual(len(matches), 1) self.assertEqual(matches[0].index, 1) + def test_literature_workflow_generates_records_and_artifacts(self) -> None: + workflow = LiteratureSurveyWorkflow() + result = workflow.run("protein folding benchmark reliability", limit_per_source=2, allow_network=False) + self.assertGreaterEqual(len(result.records), 2) + + with tempfile.TemporaryDirectory() as tmp_dir: + artifacts = workflow.write_artifacts(Path(tmp_dir), "01_literature_survey", result) + self.assertEqual(len(artifacts), 2) + self.assertTrue(all(path.exists() for path in artifacts)) + + def test_hypothesis_debate_workflow_produces_turns(self) -> None: + workflow = HypothesisDebateWorkflow() + result = workflow.run("improve hypothesis quality", kb_context=["literature note"], rounds=2) + self.assertEqual(result.rounds, 2) + self.assertGreaterEqual(len(result.turns), 3) + self.assertIn("Moderator synthesis", result.winning_hypothesis) + + def test_playbook_engine_writes_summary(self) -> None: + engine = OvernightPlaybookEngine() + steps = [ + PlaybookStep(name="prepare", command="prepare"), + PlaybookStep(name="run", command="run"), + ] + + with tempfile.TemporaryDirectory() as tmp_dir: + run_root = Path(tmp_dir) + summary = engine.run(run_root, "05_experimentation", "goal", steps) + self.assertEqual(len(summary.completed_steps), 2) + self.assertTrue((run_root / "results" / "05_experimentation_playbook_summary.json").exists()) + def test_orchestration_patterns_execute_tasks(self) -> None: tasks = [ ResearchTask(task_id="1", title="A", goal="a", pipeline_stage="analysis", project_id="run"), @@ -54,6 +93,32 @@ def runner(task: ResearchTask) -> TaskResult: self.assertEqual(len(swarm_results), 2) self.assertEqual(len(hierarchical_results), 2) + def test_router_executes_stage_specific_workflows(self) -> None: + router = ResearchPipelineRouter() + + with tempfile.TemporaryDirectory() as tmp_dir: + repo_root = Path(__file__).resolve().parents[1] + runs_dir = Path(tmp_dir) / "runs" + manager = ResearchManager( + project_root=repo_root, + runs_dir=runs_dir, + operator=ClaudeOperator(fake_mode=True, output_stream=io.StringIO()), + output_stream=io.StringIO(), + ) + paths = manager.create_run_paths("Study reliable literature and experiments.") + + literature_result = router.execute(paths, STAGES[0], 1, "Study reliable literature and experiments.", "kb") + debate_result = router.execute(paths, STAGES[1], 1, "Study reliable literature and experiments.", "kb") + playbook_result = router.execute(paths, STAGES[4], 1, "Study reliable literature and experiments.", "kb") + paper_result = router.execute(paths, STAGES[6], 1, "Study reliable literature and experiments.", "kb") + release_result = router.execute(paths, STAGES[7], 1, "Study reliable literature and experiments.", "kb") + + self.assertIn("citations", " ".join(literature_result.artifact_paths)) + self.assertIn("debate", " ".join(debate_result.artifact_paths)) + self.assertIn("playbook", " ".join(playbook_result.artifact_paths)) + self.assertIn("paper_package", " ".join(paper_result.artifact_paths)) + self.assertIn("release_package", " ".join(release_result.artifact_paths)) + def test_foundry_generation_writes_output(self) -> None: repo_root = Path(__file__).resolve().parents[1] @@ -75,6 +140,13 @@ def test_foundry_generation_writes_output(self) -> None: self.assertIn("paper.md", str(output.output_path)) self.assertIn("Foundry Output: Paper", output.summary) + paper_package = generate_paper_package(run_root) + release_package = generate_release_package(run_root) + self.assertTrue(any(path.name == "manuscript.tex" for path in paper_package.artifact_paths)) + self.assertTrue(any(path.name == "paper.pdf" for path in paper_package.artifact_paths)) + self.assertTrue(any(path.name == "readiness_checklist.md" for path in release_package.artifact_paths)) + self.assertTrue(any(path.name == "artifact_bundle_manifest.json" for path in release_package.artifact_paths)) + def test_security_role_map_and_authorization(self) -> None: self.assertIn("researcher", ROLE_SCOPES) authorize_scope("researcher", "task.read")