diff --git a/SKILL.md b/SKILL.md index bfccbfc..a78a661 100644 --- a/SKILL.md +++ b/SKILL.md @@ -17,7 +17,7 @@ You are the workflow coordinator. You have three jobs: 2) Be patient and do nothing between the time you dispatch a subagent and when it completes or hits its timeout 3) Communicate progress to the user exactly as defined below. THE WORST THINGS YOU CAN DO ARE: -- Kill an agent before it's either completed or hit its 60-180 minute timeout +- Do not kill an agent before it's either completed or hit its 60–180 minute timeout - Read files that you are not instructed to - Check CPU cycles, look at disk activity, or otherwise try and divine subagent status - Busy-poll a subagent or invent your own status checks @@ -25,18 +25,14 @@ These will cause your context to bloat so you can't do your job, or kill agents ## Phase wrapper helper -Several steps below reference prompt template files in `/subagents/`. Do not reconstruct those prompts yourself. Prepare phase prompts with `python3 /orchestrator/run_phase.py`. +Several steps below reference prompt template files in `/subagents/`. Do not reconstruct those prompts yourself. Prepare and dispatch phase prompts with `python3 /orchestrator/run_phase.py`. -Choose native mode (e.g. Claude Code `Agent`, Codex `spawn_agent`, Kimi `Agent`, OpenCode `task`) when your environment provides a native subagent tool. Choose the fallback-runner mode only if you have NO such tool available. +This skill always uses the fallback runner — there is no native subagent mode. When a step below tells you to prepare or dispatch a phase: -When a step below tells you to prepare or dispatch a phase: - -- In native mode, use `python3 /orchestrator/run_phase.py prepare ...`, then send the exact contents of the returned `prompt_path` verbatim to the target subagent. -- In fallback-runner mode, use `python3 /orchestrator/run_phase.py run ...`. It prepares transcript and prompt artifacts, then dispatches through the bundled runner. -- In fallback-runner mode, pass `--backend host` on wrapper calls so fresh subagents stay on the same backend as the parent agent. -- When the host agent is Kimi and you are using fallback-runner mode, pass `--backend kimi` instead because `host` and `auto` cannot reliably detect a Kimi host. +- Use `python3 /orchestrator/run_phase.py run ...`. It prepares transcript and prompt artifacts, then dispatches through the bundled runner. +- Pass `--backend host` on wrapper calls so fresh subagents stay on the same backend as the parent agent. - Treat the wrapper's JSON stdout and `result.json` as authoritative for prompt and artifact paths. -- In fallback-runner mode, treat the nested `dispatch` payload plus its `result.json` as authoritative for subagent status and reply artifacts. Use the text at `dispatch.reply_path` as the exact subagent reply. +- Treat the nested `dispatch` payload plus its `result.json` as authoritative for subagent status and reply artifacts. Use the text at `dispatch.reply_path` as the exact subagent reply. - If fallback dispatch returns `dispatch.status: "user_decision_required"`, present `dispatch.reply_path` verbatim to the user. - If fallback dispatch returns `dispatch.status: "escalate_to_user"`, stop and surface the nested `dispatch.message` plus artifact paths. - Pass short scalar placeholder values such as `{WORKTREE_PATH}`, `{IMPLEMENTATION_PLAN_PATH}`, and `{TEST_PLAN_PATH}` with `--set NAME=VALUE`. @@ -45,9 +41,9 @@ When a step below tells you to prepare or dispatch a phase: - Bind transcript placeholders such as `{USER_REQUEST_TRANSCRIPT}`, `{INITIAL_REQUEST_AND_SUBSEQUENT_CONVERSATION}`, and `{FULL_CONVERSATION_VERBATIM}` with `--transcript-placeholder NAME`. - Use `--require-nonempty-tag TAG` when a prompt requires a tagged block to contain real content after trimming whitespace. - Use `--ignore-tag-for-placeholders TAG` when placeholder-like text may legitimately appear inside that tag. -- If your environment has no native subagent support and the wrapper's fallback run does not function, escalate to the user. +- If the wrapper's fallback run does not function, escalate to the user. -The prompt builder still supports conditional blocks inside templates. A block guarded by `{{#if NAME}} ... {{/if}}` is included only when `NAME` is bound to a non-empty value. +The prompt builder supports conditional blocks inside templates. A block guarded by `{{#if NAME}} ... {{/if}}` is included only when `NAME` is bound to a non-empty value. ## Workspace path convention @@ -60,14 +56,10 @@ In `--no-worktree` mode, do not create a nested git worktree and do not create o ## Transcript placeholder helper When a phase wrapper call needs `{USER_REQUEST_TRANSCRIPT}`, `{INITIAL_REQUEST_AND_SUBSEQUENT_CONVERSATION}`, or `{FULL_CONVERSATION_VERBATIM}`: -1. For Codex CLI, let the wrapper use direct session lookup by default. -2. For Kimi CLI, always pass `--transcript-cli kimi-cli` on transcript-bearing wrapper calls and let direct session lookup run first. -3. If the wrapper reports that a canary is required, run `python3 /orchestrator/user-request-transcript/mark_with_canary.py` as a separate top-level command, capture stdout exactly as `{CANARY}`, then rerun the wrapper with `--canary "{CANARY}"`. For Kimi-hosted runs, keep `--transcript-cli kimi-cli` on the rerun as well. -4. For Claude Code, always run `python3 /orchestrator/user-request-transcript/mark_with_canary.py` as a separate top-level command first, capture stdout exactly as `{CANARY}`, then invoke the wrapper with `--transcript-cli claude-code --canary "{CANARY}"`. -5. For OpenCode, always run `python3 /orchestrator/user-request-transcript/mark_with_canary.py` as a separate top-level command first, capture stdout exactly as `{CANARY}`, then invoke the wrapper with `--transcript-cli opencode --canary "{CANARY}"`. + +1. Always run `python3 /orchestrator/user-request-transcript/mark_with_canary.py` as a separate top-level command first, capture stdout exactly as `{CANARY}`, then invoke the wrapper with `--transcript-cli pi-cli --canary "{CANARY}"`. The canary must be emitted by a separate top-level command so it reaches the live session transcript before lookup. Do not rely on shell-specific capture or assignment forms that may keep the canary out of visible command output; shells and host wrappers vary, and if the canary is not visibly emitted into the session transcript, lookup will fail. Build transcript placeholder values immediately before each phase wrapper call that uses them. -Kimi and OpenCode support is explicit here because `host` and `auto` cannot reliably detect a Kimi host, and OpenCode requires canary-based lookup. When a step below references `{POST_IMPLEMENTATION_REVIEW_OBSERVATIONS_JSON}`, use the extracted review observations JSON exactly as the placeholder value. @@ -80,17 +72,14 @@ When a step below references `{IMPLEMENTATION_BACKEND}`, use the resolved `dispa ## Subagent Defaults - **Use the same backend/model unless local configuration says otherwise, and do not switch subagents to a different "best" model on your own.** - - In native mode, keep subagents on the same model you are currently using unless the user or local configuration overrides that. - - In fallback-runner mode, use `--backend host` by default so fresh subagents stay on the parent backend. When the host agent is Kimi, use `--backend kimi` explicitly. When the host agent is OpenCode, `--backend host` works correctly because `OPENCODE=1` is detectable. - - Prefer local overrides when present: `TRYCYCLE_CODEX_PROFILE`, `TRYCYCLE_CODEX_MODEL`, `TRYCYCLE_CLAUDE_MODEL`, `TRYCYCLE_KIMI_MODEL`, and `TRYCYCLE_OPENCODE_MODEL`. - - `--profile` is a Codex-only exact override for a local Codex profile name. + - Always use `--backend host` so fresh subagents stay on the parent backend. + - Prefer the local override when present: `TRYCYCLE_PI_MODEL`. - `--model` is an exact backend-specific override, not a discovery mechanism. Only pass it when you have identified a valid backend model name and can spell it exactly. Never guess or invent model names. - If no local override is configured and you can reliably identify your current model's exact backend name, pass that same model with `--model`. Otherwise omit `--model` and let the backend's local default apply. - Do not pass `--effort` unless the user explicitly asked for it or you are preserving a known parent setting. If the current effort is not safely knowable, omit it rather than guessing. - Planning subagents are ephemeral across plan-edit rounds so they can remain independent: spawn a fresh planning agent for the initial plan and for every plan-edit round until the plan is judged already excellent without changes. -- In native mode, implementation subagents are persistent: create one implementation agent, then resume it for every implementation-fix round. -- In fallback-runner mode, implementation subagents are persistent through the runner: create one implementation session, record its `session_id`, then resume it through the runner for every implementation-fix round. -- In fallback-runner mode, record the resolved `dispatch.backend` for persistent sessions and reuse that same backend on every `resume`. +- Implementation subagents are persistent through the runner: create one implementation session, record its `session_id`, then resume it through the runner for every implementation-fix round. +- Record the resolved `dispatch.backend` for persistent sessions and reuse that same backend on every `resume`. - Review subagents are ephemeral: create a fresh reviewer for each post-implementation review round. - For planning rounds, pass `{USER_REQUEST_TRANSCRIPT}` as the task input. Do not use the full prior conversation. - Render the prompt template with the prompt builder and pass the rendered prompt verbatim. @@ -129,7 +118,7 @@ Immediately before dispatch, prepare the `test-strategy` phase via the phase wra Monitor by checking every 5 minutes until 60 minutes have passed. Then, and only then, kill it and retry. -When the subagent returns a proposed strategy, present it to the user verbatim and ask for explicit approval or edits. Then close that completed test-strategy subagent and clear any saved handle or `session_id` for it. Do not proceed unless the user explicitly accepts it or provides changes. Silence, implied approval, or the subagent's own recommendation does not count as agreement. The strategy and any later test plan must not rely on manual QA or human validation; prefer reproducible artifacts such as browser snapshots when visual evidence is needed. Put the strongest weight on high-value automated checks that verify real user-visible behavior through the actual UI, CLI, HTTP surface, or other outputs the user consumes, rather than tests that only show the implementation is internally self-consistent. Prefer reusing or extending those checks when they already exist, and add new tests wherever the existing suite leaves meaningful gaps in coverage, fidelity, or diagnosis. If the problem statement or prior investigation already identifies automated checks that are red and must go green, the strategy and any later test plan must include them explicitly. If the user requests changes or redirects the approach, rerun the same `test-strategy` phase wrapper command immediately before redispatching. Monitor by checking every 5 minutes until 60 minutes have passed. Then, and only then, kill it and retry. Present the revised strategy verbatim. Repeat until the user explicitly approves a strategy. +When the subagent returns a proposed strategy, present it to the user verbatim and ask for explicit approval or edits. Then close that completed test-strategy subagent and clear its saved `session_id`. Do not proceed unless the user explicitly accepts it or provides changes. Silence, implied approval, or the subagent's own recommendation does not count as agreement. The strategy and any later test plan must not rely on manual QA or human validation; prefer reproducible artifacts such as browser snapshots when visual evidence is needed. Put the strongest weight on high-value automated checks that verify real user-visible behavior through the actual UI, CLI, HTTP surface, or other outputs the user consumes, rather than tests that only show the implementation is internally self-consistent. Prefer reusing or extending those checks when they already exist, and add new tests wherever the existing suite leaves meaningful gaps in coverage, fidelity, or diagnosis. If the problem statement or prior investigation already identifies automated checks that are red and must go green, the strategy and any later test plan must include them explicitly. If the user requests changes or redirects the approach, rerun the same `test-strategy` phase wrapper command immediately before redispatching. Monitor by checking every 5 minutes until 60 minutes have passed. Then, and only then, kill it and retry. Present the revised strategy verbatim. Repeat until the user explicitly approves a strategy. The agreed testing strategy is used in step 7. @@ -189,7 +178,7 @@ Wait for the planning subagent to return either: If the planning subagent returns `USER DECISION REQUIRED:`, present that question to the user, send the user's answer back to that active planning subagent, and wait again for either a planning report or another `USER DECISION REQUIRED:` report. Monitor by checking every 5 minutes until 60 minutes have passed. Then, and only then, kill it and retry. -If a planning report was returned, update `{IMPLEMENTATION_PLAN_PATH}` from `## Plan path`, then run the workspace hygiene gate checks, verify the latest commit hash plus changed-file list match the planning subagent's report, confirm the plan file exists at `{IMPLEMENTATION_PLAN_PATH}`, then close that planning subagent and clear any saved handle or `session_id` for it. +If a planning report was returned, update `{IMPLEMENTATION_PLAN_PATH}` from `## Plan path`, then run the workspace hygiene gate checks, verify the latest commit hash plus changed-file list match the planning subagent's report, confirm the plan file exists at `{IMPLEMENTATION_PLAN_PATH}`, then close that planning subagent and clear its saved `session_id`. ## 7) Plan-editor loop (up to 5 rounds) @@ -206,7 +195,7 @@ After each edit round: 2. If the planning subagent returns `USER DECISION REQUIRED:`, present that question to the user, send the user's answer back to that active planning subagent, and wait again for either an updated planning report or another `USER DECISION REQUIRED:` report. Monitor by checking every 5 minutes until 60 minutes have passed. Then, and only then, kill it and retry. 3. Update `{IMPLEMENTATION_PLAN_PATH}` from `## Plan path` in the latest planning report. 4. Run the workspace hygiene gate checks and verify the latest commit hash plus changed-file list match the planning subagent's report. -5. Close that planning subagent for the completed round and clear any saved handle or `session_id` for it. +5. Close that planning subagent for the completed round and clear its saved `session_id`. 6. If `## Plan verdict` is `READY`, continue to step 8 with the current `{IMPLEMENTATION_PLAN_PATH}`. **If the verdict is NOT `READY`, do NOT proceed to step 8 - continue to step 7 for another planning round.** 7. If `## Plan verdict` is `REVISED`, repeat with a fresh planning subagent. 8. Repeat up to 5 rounds. @@ -228,10 +217,10 @@ When the subagent returns: 1. Update `{TEST_PLAN_PATH}` from `## Test plan path` in the latest test-plan report. 2. If the test-plan report includes `## Strategy changes requiring user approval`, present that section to the user verbatim. -3. If the user requests changes or redirects the approach, close that completed test-plan subagent and clear any saved handle or `session_id` for it, then rerun the same `test-plan` phase wrapper command immediately before redispatching. Monitor by checking every 5 minutes until 60 minutes have passed. Then, and only then, kill it and retry. Update `{TEST_PLAN_PATH}` from the latest test-plan report. Repeat until the user explicitly approves or the report no longer includes that section. +3. If the user requests changes or redirects the approach, close that completed test-plan subagent and clear its saved `session_id`, then rerun the same `test-plan` phase wrapper command immediately before redispatching. Monitor by checking every 5 minutes until 60 minutes have passed. Then, and only then, kill it and retry. Update `{TEST_PLAN_PATH}` from the latest test-plan report. Repeat until the user explicitly approves or the report no longer includes that section. 4. Do not proceed until the current test-plan report either has no `## Strategy changes requiring user approval` section or the user has explicitly approved it. 5. Run the workspace hygiene gate checks, verify the latest commit hash plus changed-file list match the test-plan subagent's report, and verify the test plan file exists at `{TEST_PLAN_PATH}`. -6. Close the completed test-plan subagent for the approved report and clear any saved handle or `session_id` for it. +6. Close the completed test-plan subagent for the approved report and clear its saved `session_id`. ## 9) Execute with trycycle-executing (subagent-owned) @@ -252,10 +241,10 @@ The implementation subagent stays in execute mode until the plan is complete, th Immediately before dispatch, prepare the `executing` phase via the phase wrapper using template `/subagents/prompt-executing.md`, `--set IMPLEMENTATION_PLAN_PATH={IMPLEMENTATION_PLAN_PATH}`, `--set TEST_PLAN_PATH={TEST_PLAN_PATH}`, and `--set WORKTREE_PATH={WORKTREE_PATH}`, then dispatch the implementation subagent with the returned `prompt_path`. -In fallback-runner mode, record the returned `dispatch.backend` as `{IMPLEMENTATION_BACKEND}` alongside the saved `session_id`. +Record the returned `dispatch.backend` as `{IMPLEMENTATION_BACKEND}` alongside the saved `session_id`. Monitor by checking every 5 minutes until 180 minutes have passed. Then, and only then, kill it and retry. -If you kill and retry this implementation round, create a fresh implementation subagent or runner session and replace the saved implementation handle. In fallback-runner mode, also replace the saved `session_id` and `{IMPLEMENTATION_BACKEND}` with the fresh dispatch values. +If you kill and retry this implementation round, create a fresh runner session and replace the saved `session_id` and `{IMPLEMENTATION_BACKEND}` with the fresh dispatch values. Do not proceed to post-implementation review until the implementation subagent has returned an implementation report. @@ -269,7 +258,7 @@ Immediately before dispatch, prepare the `post-implementation-review` phase via Monitor by checking every 5 minutes until 60 minutes have passed. Then, and only then, kill it and retry. -Use the review subagent's output as the fix-loop input. As soon as you have captured the reviewer's stdout or decided the review loop is done, close that completed review subagent and clear any saved handle or `session_id` for it. +Use the review subagent's output as the fix-loop input. As soon as you have captured the reviewer's stdout or decided the review loop is done, close that completed review subagent and clear its saved `session_id`. After every review round, save the reviewer's raw stdout to a temp file immediately and extract a structured review-observations artifact from it: @@ -289,9 +278,9 @@ If extraction fails, stop and surface the review reply plus the extractor failur When another fix round is needed: 1. Prepare the `executing` phase again via the phase wrapper using template `/subagents/prompt-executing.md`, `--set IMPLEMENTATION_PLAN_PATH={IMPLEMENTATION_PLAN_PATH}`, `--set TEST_PLAN_PATH={TEST_PLAN_PATH}`, `--set WORKTREE_PATH={WORKTREE_PATH}`, `--set-file POST_IMPLEMENTATION_REVIEW_OBSERVATIONS_JSON=`, and `--ignore-tag-for-placeholders post_implementation_review_observations_json`. -2. In native mode, resume the same implementation subagent and send the exact returned `prompt_path` contents verbatim. In fallback-runner mode, resume the implementation session through `python3 /orchestrator/subagent_runner.py resume` using the saved `session_id`, `--backend {IMPLEMENTATION_BACKEND}`, and the wrapper-prepared `prompt_path`. +2. Resume the implementation session through `python3 /orchestrator/subagent_runner.py resume` using the saved `session_id`, `--backend {IMPLEMENTATION_BACKEND}`, and the wrapper-prepared `prompt_path`. 3. Monitor by checking every 5 minutes until 180 minutes have passed. Then, and only then, kill it and retry. -4. If you kill and retry this implementation round, create a fresh implementation subagent or runner session and replace the saved implementation handle. In fallback-runner mode, also replace the saved `session_id` and `{IMPLEMENTATION_BACKEND}` with the fresh dispatch values. +4. If you kill and retry this implementation round, create a fresh runner session and replace the saved `session_id` and `{IMPLEMENTATION_BACKEND}` with the fresh dispatch values. After each implementation-subagent fix round, run the workspace hygiene gate checks and verify the latest commit hash plus changed-file list match the implementation subagent's report before starting the next fresh review round. @@ -313,8 +302,8 @@ Clean up temporary artifacts created during the loop (for example plan scratch f - `git -C {WORKTREE_PATH} rev-parse --short HEAD` - `git -C {WORKTREE_PATH} diff --name-only main...HEAD` -If the implementation subagent is still open, close it and clear its saved handle or `session_id` before handing off to finishing. +If the implementation subagent is still open, clear its saved `session_id` before handing off to finishing. -Finally, in one paragraph, briefly describe what was built/accomplished/changed/fixed. Then Report the process to the user using concrete facts and returned artifacts: how many plan-editor rounds, how many code-review rounds, the current `HEAD`, the changed-file list, the implementation subagent's latest summary and verification results, and any reviewer-reported residual issues. +Finally, in one paragraph, briefly describe what was built/accomplished/changed/fixed. Then report the process to the user using concrete facts and returned artifacts: how many plan-editor rounds, how many code-review rounds, the current `HEAD`, the changed-file list, the implementation subagent's latest summary and verification results, and any reviewer-reported residual issues. Then read and follow `/subskills/trycycle-finishing/SKILL.md` to present the user with options for integrating the implementation workspace (merge, PR, etc.). diff --git a/orchestrator/run_phase.py b/orchestrator/run_phase.py index 5aaae58..e10117f 100644 --- a/orchestrator/run_phase.py +++ b/orchestrator/run_phase.py @@ -48,6 +48,8 @@ def _parse_binding(raw: str) -> tuple[str, str]: def _detect_transcript_cli(selected: str) -> str: if selected != "auto": return selected + if os.environ.get("PI_CODING_AGENT") == "true": + return "pi-cli" if os.environ.get("CODEX_THREAD_ID") or os.environ.get("CODEX_HOME"): return "codex-cli" if os.environ.get("CLAUDECODE"): @@ -286,7 +288,7 @@ def _add_prepare_arguments(parser: argparse.ArgumentParser) -> None: ) parser.add_argument( "--transcript-cli", - choices=["auto", "codex-cli", "claude-code", "kimi-cli", "opencode"], + choices=["auto", "codex-cli", "claude-code", "kimi-cli", "opencode", "pi-cli"], default="auto", help="Transcript provider to use for transcript placeholders.", ) @@ -335,7 +337,7 @@ def build_parser() -> argparse.ArgumentParser: _add_prepare_arguments(run_parser) run_parser.add_argument( "--backend", - choices=["auto", "host", "codex", "claude", "kimi", "opencode"], + choices=["auto", "host", "codex", "claude", "kimi", "opencode", "pi"], default="auto", help="Subagent backend selection policy.", ) diff --git a/orchestrator/subagent_runner.py b/orchestrator/subagent_runner.py index 539a75b..610e0a6 100644 --- a/orchestrator/subagent_runner.py +++ b/orchestrator/subagent_runner.py @@ -29,6 +29,7 @@ "claude": "TRYCYCLE_CLAUDE_MODEL", "kimi": "TRYCYCLE_KIMI_MODEL", "opencode": "TRYCYCLE_OPENCODE_MODEL", + "pi": "TRYCYCLE_PI_MODEL", } @@ -259,6 +260,8 @@ def _probe_opencode(binary: str) -> dict[str, Any]: def _detect_host_backend() -> str | None: + if os.environ.get("PI_CODING_AGENT") == "true": + return "pi" if os.environ.get("CODEX_THREAD_ID") or os.environ.get("CODEX_HOME"): return "codex" if os.environ.get("CLAUDECODE"): @@ -270,13 +273,15 @@ def _detect_host_backend() -> str | None: def _detect_backend_preferences() -> list[str]: host_backend = _detect_host_backend() + if host_backend == "pi": + return ["pi", "codex", "claude", "kimi", "opencode"] if host_backend == "codex": - return ["codex", "claude", "kimi", "opencode"] + return ["codex", "claude", "kimi", "opencode", "pi"] if host_backend == "claude": - return ["claude", "codex", "kimi", "opencode"] + return ["claude", "codex", "kimi", "opencode", "pi"] if host_backend == "opencode": - return ["opencode", "codex", "claude", "kimi"] - return ["codex", "claude", "kimi", "opencode"] + return ["opencode", "codex", "claude", "kimi", "pi"] + return ["codex", "claude", "kimi", "opencode", "pi"] def _probe_backends() -> dict[str, Any]: @@ -285,6 +290,7 @@ def _probe_backends() -> dict[str, Any]: "claude": _probe_claude("claude"), "kimi": _probe_kimi("kimi"), "opencode": _probe_opencode("opencode"), + "pi": _probe_pi("pi"), } preferred_order = _detect_backend_preferences() @@ -970,6 +976,100 @@ def _extract_opencode_reply_from_db(session_id: str, db_path: Path | None = None return "" +def _probe_pi(binary: str) -> dict[str, Any]: + path = _resolve_binary(binary) + if path is None: + return { + "available": False, + "binary": binary, + "reason": "binary not found on PATH", + } + + ok, output = _run_probe([path, "--help"]) + if not ok: + return { + "available": False, + "binary": path, + "reason": output, + } + + required_tokens = ["--print", "--session", "--session-dir", "--no-skills", "--model"] + missing = [token for token in required_tokens if token not in output] + if missing: + return { + "available": False, + "binary": path, + "reason": f"missing required help tokens: {', '.join(missing)}", + } + + return { + "available": True, + "binary": path, + "supports_resume": True, + } + + +def _pi_command( + *, + binary: str, + artifacts_dir: Path, + effort: str | None, + model: str | None, +) -> list[str]: + command = [ + binary, + "-p", + "--session-dir", + str(artifacts_dir), + "--no-skills", + "--no-extensions", + "--no-prompt-templates", + "--no-context-files", + ] + if model: + command.extend(["--model", model]) + if effort: + command.extend(["--thinking", effort]) + return command + + +def _pi_resume_command( + *, + binary: str, + session_id: str, + effort: str | None, + model: str | None, +) -> list[str]: + command = [ + binary, + "-p", + "--session", + session_id, + "--no-skills", + "--no-extensions", + "--no-prompt-templates", + "--no-context-files", + ] + if model: + command.extend(["--model", model]) + if effort: + command.extend(["--thinking", effort]) + return command + + +def _extract_pi_session_id(artifacts_dir: Path) -> str | None: + """Extract session ID from the first JSONL file found in artifacts_dir.""" + for jsonl_path in sorted(artifacts_dir.glob("*.jsonl")): + try: + first_line = jsonl_path.read_text(encoding="utf-8").splitlines()[0] + record = json.loads(first_line) + if record.get("type") == "session": + return record.get("id") + except (OSError, json.JSONDecodeError, IndexError): + continue + return None + + def _opencode_command( *, binary: str, @@ -1074,6 +1174,15 @@ def _run_backend( model=model, ) cwd = workdir + elif backend == "pi": + command = _pi_command( + binary=binary, + artifacts_dir=reply_path.parent, + effort=effort, + model=model, + ) + cwd = workdir + session_id = None else: raise ValueError(f"unsupported backend: {backend}") @@ -1160,6 +1269,9 @@ def _run_backend( if not reply_text.strip() and session_id and result.returncode == 0: reply_text = _extract_opencode_reply_from_db(session_id) reply_path.write_text(reply_text, encoding="utf-8") + elif backend == "pi": + reply_text = result.stdout or "" + reply_path.write_text(reply_text, encoding="utf-8") elif backend in {"claude", "kimi"}: reply_text = result.stdout or "" reply_path.write_text(reply_text, encoding="utf-8") @@ -1185,6 +1297,9 @@ def _run_backend( started_at=session_lookup_started_at, ) + if backend == "pi" and result.returncode == 0 and session_id is None: + session_id = _extract_pi_session_id(reply_path.parent) + return { "command": command, "exit_code": result.returncode, @@ -1249,6 +1364,14 @@ def _resume_backend( model=model, ) cwd = workdir + elif backend == "pi": + command = _pi_resume_command( + binary=binary, + session_id=session_id, + effort=effort, + model=model, + ) + cwd = workdir else: raise ValueError(f"unsupported backend: {backend}") @@ -1336,6 +1459,9 @@ def _resume_backend( if not reply_text.strip() and session_id and result.returncode == 0: reply_text = _extract_opencode_reply_from_db(session_id) reply_path.write_text(reply_text, encoding="utf-8") + elif backend == "pi": + reply_text = result.stdout or "" + reply_path.write_text(reply_text, encoding="utf-8") elif backend in {"claude", "kimi"}: reply_text = result.stdout or "" reply_path.write_text(reply_text, encoding="utf-8") @@ -1736,19 +1862,19 @@ def _command_resume(args: argparse.Namespace) -> int: def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( - description="Safe fallback runner for trycycle subagent dispatch via Codex, Claude, Kimi, or OpenCode.", + description="Safe fallback runner for trycycle subagent dispatch via Codex, Claude, Kimi, OpenCode, or Pi.", ) subparsers = parser.add_subparsers(dest="command", required=True) probe_parser = subparsers.add_parser( "probe", - help="Detect supported Codex, Claude, Kimi, and OpenCode backends.", + help="Detect supported Codex, Claude, Kimi, OpenCode, and Pi backends.", ) probe_parser.set_defaults(func=_command_probe) run_parser = subparsers.add_parser( "run", - help="Run a subagent prompt through Codex, Claude, Kimi, or OpenCode without shell quoting.", + help="Run a subagent prompt through Codex, Claude, Kimi, OpenCode, or Pi without shell quoting.", ) run_parser.add_argument( "--phase", @@ -1771,7 +1897,7 @@ def build_parser() -> argparse.ArgumentParser: ) run_parser.add_argument( "--backend", - choices=["auto", "host", "codex", "claude", "kimi", "opencode"], + choices=["auto", "host", "codex", "claude", "kimi", "opencode", "pi"], default="auto", help="Backend selection policy. Use 'host' to stay on the parent backend.", ) @@ -1830,7 +1956,7 @@ def build_parser() -> argparse.ArgumentParser: ) resume_parser.add_argument( "--backend", - choices=["auto", "host", "codex", "claude", "kimi", "opencode"], + choices=["auto", "host", "codex", "claude", "kimi", "opencode", "pi"], default="auto", help="Backend selection policy. Use 'host' to stay on the parent backend.", ) diff --git a/orchestrator/user-request-transcript/build.py b/orchestrator/user-request-transcript/build.py index d87dd07..6977eaf 100644 --- a/orchestrator/user-request-transcript/build.py +++ b/orchestrator/user-request-transcript/build.py @@ -8,6 +8,7 @@ import codex_cli import kimi_cli import opencode_cli +import pi_cli from common import TranscriptError, choose_most_recent_match, render_transcript @@ -16,6 +17,7 @@ "codex-cli": codex_cli, "kimi-cli": kimi_cli, "opencode": opencode_cli, + "pi-cli": pi_cli, } diff --git a/orchestrator/user-request-transcript/pi_cli.py b/orchestrator/user-request-transcript/pi_cli.py new file mode 100644 index 0000000..879c183 --- /dev/null +++ b/orchestrator/user-request-transcript/pi_cli.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +from pathlib import Path + +from common import TranscriptTurn, iter_jsonl_records, wait_for_matches + + +DEFAULT_PI_SESSIONS_ROOT = Path.home() / ".pi" / "agent" / "sessions" + + +def _encode_cwd(cwd: str) -> str: + """Encode a cwd path into a Pi session directory name. + + Pi encodes: strip leading '/', replace '/' with '-', replace ':' with '-', + wrap in '--'. + """ + encoded = cwd.lstrip("/") + encoded = encoded.replace("/", "-") + encoded = encoded.replace(":", "-") + return f"--{encoded}--" + + +def _resolve_sessions_root(search_root: Path | None = None) -> Path: + if search_root is not None: + return search_root + return DEFAULT_PI_SESSIONS_ROOT + + +def find_matching_transcripts( + *, + canary: str, + timeout_ms: int, + poll_ms: int, + search_root: Path | None = None, +) -> list[Path]: + root = _resolve_sessions_root(search_root) + if not root.exists(): + from common import TranscriptError + + raise TranscriptError(f"Pi sessions root does not exist: {root}") + + return wait_for_matches( + root=root, + canary=canary, + timeout_ms=timeout_ms, + poll_ms=poll_ms, + ) + + +def extract_transcript(path: Path) -> list[TranscriptTurn]: + selected_turns: list[TranscriptTurn] = [] + pending_assistant: TranscriptTurn | None = None + saw_user = False + + for line_number, record in iter_jsonl_records(path): + record_type = record.get("type") + + if record_type != "message": + continue + + message = record.get("message", {}) + role = message.get("role") + content_blocks = message.get("content", []) + + if not isinstance(content_blocks, list): + continue + + if role == "user": + user_text = "".join( + block.get("text", "") + for block in content_blocks + if isinstance(block, dict) and block.get("type") == "text" + ) + if user_text: + if saw_user and pending_assistant is not None: + selected_turns.append(pending_assistant) + pending_assistant = None + selected_turns.append( + TranscriptTurn(order=line_number, role="user", text=user_text) + ) + saw_user = True + continue + + if role == "assistant": + # Only include visible text, exclude thinking and toolCall + visible_reply = "".join( + block.get("text", "") + for block in content_blocks + if isinstance(block, dict) and block.get("type") == "text" + ) + if visible_reply: + pending_assistant = TranscriptTurn( + order=line_number, + role="assistant", + text=visible_reply, + ) + + if pending_assistant is not None: + selected_turns.append(pending_assistant) + + return selected_turns diff --git a/tests/test_run_phase.py b/tests/test_run_phase.py index 8d50fc1..918f2b4 100644 --- a/tests/test_run_phase.py +++ b/tests/test_run_phase.py @@ -511,6 +511,7 @@ def test_run_dispatches_with_host_backend_dry_run(self) -> None: env={ "PATH": f"{bin_dir}{os.pathsep}{os.environ.get('PATH', '')}", "CODEX_THREAD_ID": "thread-123", + "PI_CODING_AGENT": "", }, ) @@ -794,6 +795,7 @@ def test_prepare_auto_detects_opencode_transcript_cli_when_opencode_env_set(self "CLAUDECODE": "", "CODEX_THREAD_ID": "", "CODEX_HOME": "", + "PI_CODING_AGENT": "", }, ) diff --git a/tests/test_skill_md_pi_only.py b/tests/test_skill_md_pi_only.py new file mode 100644 index 0000000..3dc803b --- /dev/null +++ b/tests/test_skill_md_pi_only.py @@ -0,0 +1,138 @@ +"""Test that SKILL.md is Pi-only and fallback-runner-only. + +Validates the Task 7 acceptance criteria: +- No references to Claude Code, Codex, Kimi, or OpenCode backend dispatch +- Transcript canary instructions reference pi-cli +- Only fallback-runner mode (no native mode branches) +""" +import unittest +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[1] +SKILL_MD = REPO_ROOT / "SKILL.md" + +# Terms that indicate non-Pi backend dispatch +FORBIDDEN_BACKEND_TERMS = [ + "Claude Code", + "claude-code", + "claude_code", + "Codex", + "codex", + "Kimi", + "kimi", + "OpenCode", + "opencode", +] + + +class SkillMdPiOnlyTests(unittest.TestCase): + content: str + lines: list[str] + + @classmethod + def setUpClass(cls) -> None: + cls.content = SKILL_MD.read_text() + cls.lines = cls.content.splitlines() + + def test_skill_md_exists(self) -> None: + self.assertTrue(SKILL_MD.exists(), "SKILL.md must exist") + + def test_no_forbidden_backend_references(self) -> None: + """SKILL.md must not reference Claude Code, Codex, Kimi, or OpenCode dispatch.""" + for term in FORBIDDEN_BACKEND_TERMS: + with self.subTest(term=term): + self.assertNotIn( + term, + self.content, + f"SKILL.md must not contain '{term}' — " + f"only Pi-specific instructions are allowed", + ) + + def test_no_native_mode_choice(self) -> None: + """SKILL.md must not tell the agent to choose between native and fallback-runner.""" + forbidden_phrases = [ + "Choose native mode", + "Choose the fallback-runner", + "In native mode", + "In fallback-runner mode", + "native mode", + ] + for phrase in forbidden_phrases: + with self.subTest(phrase=phrase): + self.assertNotIn( + phrase, + self.content, + f"SKILL.md must not contain '{phrase}' — " + f"only fallback-runner mode should be described", + ) + + def test_transcript_canary_references_pi_cli(self) -> None: + """Transcript/canary instructions must reference pi-cli.""" + self.assertIn( + "pi-cli", + self.content, + "SKILL.md transcript canary instructions must reference 'pi-cli'", + ) + + def test_fallback_runner_instructions_present(self) -> None: + """SKILL.md must describe using run_phase.py run (fallback-runner).""" + self.assertIn( + "run_phase.py run", + self.content, + "SKILL.md must describe using 'run_phase.py run' for fallback-runner mode", + ) + + def test_backend_host_instruction(self) -> None: + """SKILL.md must instruct passing --backend host for Pi subagents.""" + self.assertIn( + "--backend host", + self.content, + "SKILL.md must instruct '--backend host' for subagent dispatch", + ) + + def test_critical_rules_preserved(self) -> None: + """Critical rules about not killing agents and not busy-polling must be present.""" + critical = [ + "do not kill", + "timeout", + "busy-poll", + "60 minutes", + "180 minutes", + ] + content_lower = self.content.lower() + for phrase in critical: + with self.subTest(phrase=phrase): + self.assertIn( + phrase, + content_lower, + f"SKILL.md must preserve critical rule containing '{phrase}'", + ) + + def test_no_trycycle_codex_or_claude_model_env_vars(self) -> None: + """SKILL.md must not reference non-Pi model env vars.""" + forbidden_env = [ + "TRYCYCLE_CODEX_PROFILE", + "TRYCYCLE_CODEX_MODEL", + "TRYCYCLE_CLAUDE_MODEL", + "TRYCYCLE_KIMI_MODEL", + "TRYCYCLE_OPENCODE_MODEL", + ] + for var in forbidden_env: + with self.subTest(var=var): + self.assertNotIn( + var, + self.content, + f"SKILL.md must not reference non-Pi env var '{var}'", + ) + + def test_trycycle_pi_model_env_var_present(self) -> None: + """SKILL.md should reference TRYCYCLE_PI_MODEL.""" + self.assertIn( + "TRYCYCLE_PI_MODEL", + self.content, + "SKILL.md must reference TRYCYCLE_PI_MODEL for model override", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_subagent_runner.py b/tests/test_subagent_runner.py index 4ab0814..279402e 100644 --- a/tests/test_subagent_runner.py +++ b/tests/test_subagent_runner.py @@ -225,13 +225,14 @@ def test_probe_selects_kimi_when_it_is_the_only_available_backend(self) -> None: "CODEX_THREAD_ID": "", "CODEX_HOME": "", "OPENCODE": "", + "PI_CODING_AGENT": "", }, ) self.assertEqual(result.returncode, 0, result.stderr) payload = json.loads(result.stdout) self.assertEqual(payload["selected_backend"], "kimi") - self.assertEqual(payload["backend_order"], ["codex", "claude", "kimi", "opencode"]) + self.assertEqual(payload["backend_order"], ["codex", "claude", "kimi", "opencode", "pi"]) self.assertTrue(payload["backends"]["kimi"]["available"]) self.assertTrue(payload["backends"]["kimi"]["supports_resume"]) @@ -250,6 +251,7 @@ def test_probe_reports_codex_host_backend_when_codex_is_host(self) -> None: "PATH": str(bin_dir), "HOME": str(home_dir), "CODEX_THREAD_ID": "thread-123", + "PI_CODING_AGENT": "", }, ) @@ -289,6 +291,7 @@ def test_run_with_host_backend_uses_codex_when_codex_is_host(self) -> None: "PATH": str(bin_dir), "HOME": str(home_dir), "CODEX_THREAD_ID": "thread-123", + "PI_CODING_AGENT": "", }, ) @@ -331,6 +334,7 @@ def test_run_with_host_backend_escalates_when_host_is_unknown(self) -> None: "CODEX_THREAD_ID": "", "CODEX_HOME": "", "CLAUDECODE": "", + "PI_CODING_AGENT": "", }, ) @@ -1462,6 +1466,7 @@ def test_probe_detects_opencode_host_backend(self): "CLAUDECODE": "", "CODEX_THREAD_ID": "", "CODEX_HOME": "", + "PI_CODING_AGENT": "", }, ) @@ -1629,6 +1634,7 @@ def test_run_with_host_backend_uses_opencode_when_opencode_is_host(self): "CLAUDECODE": "", "CODEX_THREAD_ID": "", "CODEX_HOME": "", + "PI_CODING_AGENT": "", }, ) @@ -1823,6 +1829,379 @@ def test_reply_from_db_returns_last_assistant_text_from_multi_turn(self): self.assertEqual(reply, "second answer") +def _write_fake_pi_binary(bin_dir: Path) -> Path: + pi_path = bin_dir / "pi" + pi_path.write_text( + textwrap.dedent( + f"""\ + #!{sys.executable} + import json + import os + import sys + import uuid + from pathlib import Path + + def read_flag_value(flag): + if flag not in sys.argv: + return None + index = sys.argv.index(flag) + if index + 1 >= len(sys.argv): + return None + return sys.argv[index + 1] + + def append_log(): + log_path = os.environ.get("FAKE_PI_LOG") + if not log_path: + return + with open(log_path, "a", encoding="utf-8") as handle: + handle.write(json.dumps({{"argv": sys.argv[1:]}}) + "\\n") + + append_log() + + if "--help" in sys.argv or "-h" in sys.argv: + sys.stdout.write( + "pi - AI coding assistant\\n" + "--print, -p Non-interactive mode\\n" + "--session Use specific session\\n" + "--session-dir Directory for session storage\\n" + "--no-skills, -ns Disable skills\\n" + "--model Model pattern or ID\\n" + "--no-extensions, -ne Disable extensions\\n" + "--no-prompt-templates, -np Disable prompt templates\\n" + "--no-context-files, -nc Disable context files\\n" + ) + raise SystemExit(0) + + # Simulate -p (print/non-interactive) mode + session_dir = read_flag_value("--session-dir") + session_flag = read_flag_value("--session") + prompt_text = sys.stdin.read() + mode = os.environ.get("FAKE_PI_MODE", "success") + reply_text = os.environ.get("FAKE_PI_REPLY", "fake pi reply") + session_id = os.environ.get("FAKE_PI_SESSION_ID", str(uuid.uuid4())) + + if mode == "failure": + sys.stderr.write("Error: something went wrong\\n") + raise SystemExit(1) + + # If --session-dir is given, write a session JSONL file + if session_dir and not session_flag: + session_dir_path = Path(session_dir) + session_dir_path.mkdir(parents=True, exist_ok=True) + session_file = session_dir_path / f"session.jsonl" + header = {{"type": "session", "version": 3, "id": session_id, "timestamp": "2026-05-04T00:00:00.000Z", "cwd": "/tmp"}} + session_file.write_text(json.dumps(header) + "\\n", encoding="utf-8") + + sys.stdout.write(reply_text) + raise SystemExit(0) + """ + ), + encoding="utf-8", + ) + pi_path.chmod(0o755) + return pi_path + + +class PiBackendTests(unittest.TestCase): + def run_runner( + self, + *args: str, + env: dict[str, str] | None = None, + ) -> subprocess.CompletedProcess[str]: + merged_env = os.environ.copy() + if env: + merged_env.update(env) + return subprocess.run( + [sys.executable, str(SUBAGENT_RUNNER), *args], + text=True, + capture_output=True, + check=False, + env=merged_env, + ) + + def test_probe_detects_pi_backend(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = Path(tmpdir) + bin_dir = tmp_path / "bin" + home_dir = tmp_path / "home" + bin_dir.mkdir() + home_dir.mkdir() + _write_fake_pi_binary(bin_dir) + + result = self.run_runner( + "probe", + env={ + "PATH": str(bin_dir), + "HOME": str(home_dir), + "CLAUDECODE": "", + "CODEX_THREAD_ID": "", + "CODEX_HOME": "", + "OPENCODE": "", + "PI_CODING_AGENT": "", + }, + ) + + self.assertEqual(result.returncode, 0, result.stderr) + payload = json.loads(result.stdout) + self.assertTrue(payload["backends"]["pi"]["available"]) + self.assertTrue(payload["backends"]["pi"]["supports_resume"]) + + def test_probe_detects_pi_host_backend(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = Path(tmpdir) + bin_dir = tmp_path / "bin" + home_dir = tmp_path / "home" + bin_dir.mkdir() + home_dir.mkdir() + _write_fake_pi_binary(bin_dir) + + result = self.run_runner( + "probe", + env={ + "PATH": str(bin_dir), + "HOME": str(home_dir), + "PI_CODING_AGENT": "true", + "CLAUDECODE": "", + "CODEX_THREAD_ID": "", + "CODEX_HOME": "", + "OPENCODE": "", + }, + ) + + self.assertEqual(result.returncode, 0, result.stderr) + payload = json.loads(result.stdout) + self.assertEqual(payload["host_backend"], "pi") + self.assertEqual(payload["selected_backend"], "pi") + + def test_run_with_pi_backend_dry_run_returns_ok(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = Path(tmpdir) + bin_dir = tmp_path / "bin" + home_dir = tmp_path / "home" + workdir = tmp_path / "work" + artifacts_dir = tmp_path / "artifacts" + prompt_path = tmp_path / "prompt.txt" + bin_dir.mkdir() + home_dir.mkdir() + workdir.mkdir() + prompt_path.write_text("pi dry run test\n", encoding="utf-8") + fake_pi = _write_fake_pi_binary(bin_dir) + + result = self.run_runner( + "run", + "--phase", "smoke", + "--prompt-file", str(prompt_path), + "--workdir", str(workdir), + "--artifacts-dir", str(artifacts_dir), + "--backend", "pi", + "--dry-run", + env={ + "PATH": str(bin_dir), + "HOME": str(home_dir), + }, + ) + + self.assertEqual(result.returncode, 0, result.stderr) + payload = json.loads(result.stdout) + self.assertEqual(payload["status"], "ok") + self.assertEqual(payload["backend"], "pi") + command = payload["process"]["command"] + self.assertEqual(command[0], str(fake_pi)) + self.assertIn("-p", command) + self.assertIn("--session-dir", command) + self.assertIn("--no-skills", command) + self.assertIn("--no-extensions", command) + self.assertIn("--no-prompt-templates", command) + self.assertIn("--no-context-files", command) + + def test_run_with_pi_backend_returns_ok(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = Path(tmpdir) + bin_dir = tmp_path / "bin" + home_dir = tmp_path / "home" + workdir = tmp_path / "work" + artifacts_dir = tmp_path / "artifacts" + prompt_path = tmp_path / "prompt.txt" + bin_dir.mkdir() + home_dir.mkdir() + workdir.mkdir() + prompt_path.write_text("pi live run test\n", encoding="utf-8") + _write_fake_pi_binary(bin_dir) + + session_id = "019dc509-cf06-7708-87a6-5f302e2416ce" + + result = self.run_runner( + "run", + "--phase", "smoke", + "--prompt-file", str(prompt_path), + "--workdir", str(workdir), + "--artifacts-dir", str(artifacts_dir), + "--backend", "pi", + env={ + "PATH": str(bin_dir), + "HOME": str(home_dir), + "FAKE_PI_MODE": "success", + "FAKE_PI_REPLY": "pi test reply", + "FAKE_PI_SESSION_ID": session_id, + }, + ) + + self.assertEqual(result.returncode, 0, result.stderr) + payload = json.loads(result.stdout) + self.assertEqual(payload["status"], "ok") + self.assertEqual(payload["backend"], "pi") + self.assertEqual(payload["session_id"], session_id) + reply_path = Path(payload["reply_path"]) + self.assertEqual(reply_path.read_text(encoding="utf-8"), "pi test reply") + + def test_resume_with_pi_backend_dry_run(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = Path(tmpdir) + bin_dir = tmp_path / "bin" + home_dir = tmp_path / "home" + workdir = tmp_path / "work" + artifacts_dir = tmp_path / "artifacts" + prompt_path = tmp_path / "prompt.txt" + bin_dir.mkdir() + home_dir.mkdir() + workdir.mkdir() + prompt_path.write_text("pi resume dry run\n", encoding="utf-8") + fake_pi = _write_fake_pi_binary(bin_dir) + + result = self.run_runner( + "resume", + "--phase", "execute", + "--session-id", "session-file-123", + "--prompt-file", str(prompt_path), + "--workdir", str(workdir), + "--artifacts-dir", str(artifacts_dir), + "--backend", "pi", + "--dry-run", + env={ + "PATH": str(bin_dir), + "HOME": str(home_dir), + }, + ) + + self.assertEqual(result.returncode, 0, result.stderr) + payload = json.loads(result.stdout) + self.assertEqual(payload["status"], "ok") + self.assertEqual(payload["backend"], "pi") + command = payload["process"]["command"] + self.assertEqual(command[0], str(fake_pi)) + self.assertIn("-p", command) + self.assertIn("--session", command) + self.assertIn("session-file-123", command) + self.assertIn("--no-skills", command) + + def test_run_with_pi_backend_model_override_from_env(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = Path(tmpdir) + bin_dir = tmp_path / "bin" + home_dir = tmp_path / "home" + workdir = tmp_path / "work" + artifacts_dir = tmp_path / "artifacts" + prompt_path = tmp_path / "prompt.txt" + bin_dir.mkdir() + home_dir.mkdir() + workdir.mkdir() + prompt_path.write_text("model override test\n", encoding="utf-8") + _write_fake_pi_binary(bin_dir) + + result = self.run_runner( + "run", + "--phase", "smoke", + "--prompt-file", str(prompt_path), + "--workdir", str(workdir), + "--artifacts-dir", str(artifacts_dir), + "--backend", "pi", + "--dry-run", + env={ + "PATH": str(bin_dir), + "HOME": str(home_dir), + "TRYCYCLE_PI_MODEL": "glm-5.1:high", + }, + ) + + self.assertEqual(result.returncode, 0, result.stderr) + payload = json.loads(result.stdout) + self.assertEqual(payload["status"], "ok") + command = payload["process"]["command"] + self.assertIn("--model", command) + self.assertIn("glm-5.1:high", command) + + def test_run_with_host_backend_uses_pi_when_pi_is_host(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = Path(tmpdir) + bin_dir = tmp_path / "bin" + home_dir = tmp_path / "home" + workdir = tmp_path / "work" + artifacts_dir = tmp_path / "artifacts" + prompt_path = tmp_path / "prompt.txt" + bin_dir.mkdir() + home_dir.mkdir() + workdir.mkdir() + prompt_path.write_text("host backend dry run\n", encoding="utf-8") + fake_pi = _write_fake_pi_binary(bin_dir) + + result = self.run_runner( + "run", + "--phase", "smoke", + "--prompt-file", str(prompt_path), + "--workdir", str(workdir), + "--artifacts-dir", str(artifacts_dir), + "--backend", "host", + "--dry-run", + env={ + "PATH": str(bin_dir), + "HOME": str(home_dir), + "PI_CODING_AGENT": "true", + "CLAUDECODE": "", + "CODEX_THREAD_ID": "", + "CODEX_HOME": "", + "OPENCODE": "", + }, + ) + + self.assertEqual(result.returncode, 0, result.stderr) + payload = json.loads(result.stdout) + self.assertEqual(payload["status"], "ok") + self.assertEqual(payload["backend"], "pi") + self.assertEqual(payload["process"]["command"][0], str(fake_pi)) + + def test_run_with_pi_backend_escalates_on_failure(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = Path(tmpdir) + bin_dir = tmp_path / "bin" + home_dir = tmp_path / "home" + workdir = tmp_path / "work" + artifacts_dir = tmp_path / "artifacts" + prompt_path = tmp_path / "prompt.txt" + bin_dir.mkdir() + home_dir.mkdir() + workdir.mkdir() + prompt_path.write_text("failing prompt\n", encoding="utf-8") + _write_fake_pi_binary(bin_dir) + + result = self.run_runner( + "run", + "--phase", "smoke", + "--prompt-file", str(prompt_path), + "--workdir", str(workdir), + "--artifacts-dir", str(artifacts_dir), + "--backend", "pi", + env={ + "PATH": str(bin_dir), + "HOME": str(home_dir), + "FAKE_PI_MODE": "failure", + }, + ) + + self.assertEqual(result.returncode, 1, result.stderr) + payload = json.loads(result.stdout) + self.assertEqual(payload["status"], "escalate_to_user") + + def _create_opencode_session_db(db_path: Path, session_id: str, messages: list[dict]) -> None: """Create a minimal OpenCode SQLite DB for testing reply extraction from DB.""" conn = sqlite3.connect(str(db_path)) diff --git a/tests/test_user_request_transcript_build.py b/tests/test_user_request_transcript_build.py index 9a3d209..4714919 100644 --- a/tests/test_user_request_transcript_build.py +++ b/tests/test_user_request_transcript_build.py @@ -1128,5 +1128,293 @@ def test_opencode_canary_timeout_when_canary_not_in_session(self): self.assertIn("canary", result.stderr.lower()) +def _write_pi_session( + sessions_root: Path, + *, + cwd: str = "/tmp/project", + session_id: str = "019dc509-cf06-7708-87a6-5f302e2416ce", + records: list[dict] | None = None, +) -> Path: + """Write a Pi-format JSONL session file in the correct session directory. + + Pi stores sessions under sessions_root/--{encoded_cwd}--/*.jsonl + """ + encoded = cwd.lstrip("/").replace("/", "-").replace(":", "-") + session_dir = sessions_root / f"--{encoded}--" + session_dir.mkdir(parents=True, exist_ok=True) + session_path = session_dir / f"{session_id}.jsonl" + + all_records: list[dict] = [ + { + "type": "session", + "version": 3, + "id": session_id, + "timestamp": "2026-05-04T00:00:00.000Z", + "cwd": cwd, + }, + ] + if records: + all_records.extend(records) + + _write_jsonl(session_path, all_records) + return session_path + + +def _pi_message( + role: str, + content: list[dict], +) -> dict: + """Build a Pi JSONL message record.""" + return { + "type": "message", + "message": { + "role": role, + "content": content, + }, + } + + +def _text_block(text: str) -> dict: + return {"type": "text", "text": text} + + +def _thinking_block(text: str) -> dict: + return {"type": "thinking", "text": text} + + +def _tool_call_block(text: str) -> dict: + return {"type": "toolCall", "text": text} + + +class PiCliTranscriptTests(UserRequestTranscriptBuildTests): + def test_pi_canary_finds_correct_session(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = Path(tmpdir) + sessions_root = tmp_path / "sessions" + canary = "trycycle-canary-pi-12345678901234567890" + + _write_pi_session( + sessions_root, + cwd="/tmp/project", + session_id="ses-001", + records=[ + _pi_message("user", [_text_block(f"Build something {canary}")]), + _pi_message("assistant", [_text_block("I'll help you build that.")]), + ], + ) + + result = self.run_builder( + "--cli", "pi-cli", + "--canary", canary, + "--search-root", str(sessions_root), + "--timeout-ms", "1000", + env={"HOME": str(tmp_path)}, + ) + self.assertEqual(result.returncode, 0, result.stderr) + turns = json.loads(result.stdout) + self.assertEqual(len(turns), 2) + self.assertEqual(turns[0]["role"], "user") + self.assertIn(canary, turns[0]["text"]) + self.assertEqual(turns[1]["role"], "assistant") + self.assertEqual(turns[1]["text"], "I'll help you build that.") + + def test_pi_multi_turn_transcript(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = Path(tmpdir) + sessions_root = tmp_path / "sessions" + canary = "trycycle-canary-pi-multi-turn" + + _write_pi_session( + sessions_root, + cwd="/tmp/project", + session_id="ses-multi", + records=[ + _pi_message("user", [_text_block(f"user1 {canary}")]), + _pi_message("assistant", [_text_block("assistant1")]), + _pi_message("user", [_text_block("user2")]), + _pi_message("assistant", [_text_block("assistant2")]), + ], + ) + + result = self.run_builder( + "--cli", "pi-cli", + "--canary", canary, + "--search-root", str(sessions_root), + "--timeout-ms", "1000", + env={"HOME": str(tmp_path)}, + ) + self.assertEqual(result.returncode, 0, result.stderr) + turns = json.loads(result.stdout) + self.assertEqual(len(turns), 4) + self.assertEqual(turns[0]["role"], "user") + self.assertEqual(turns[1]["role"], "assistant") + self.assertEqual(turns[2]["role"], "user") + self.assertEqual(turns[3]["role"], "assistant") + + def test_pi_transcript_filters_thinking_and_tool_call(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = Path(tmpdir) + sessions_root = tmp_path / "sessions" + canary = "trycycle-canary-pi-filter" + + _write_pi_session( + sessions_root, + cwd="/tmp/project", + session_id="ses-filter", + records=[ + _pi_message("user", [_text_block(f"visible user {canary}")]), + _pi_message( + "assistant", + [ + _thinking_block("internal reasoning"), + _tool_call_block("bash command output"), + _text_block("visible reply"), + ], + ), + ], + ) + + result = self.run_builder( + "--cli", "pi-cli", + "--canary", canary, + "--search-root", str(sessions_root), + "--timeout-ms", "1000", + env={"HOME": str(tmp_path)}, + ) + self.assertEqual(result.returncode, 0, result.stderr) + turns = json.loads(result.stdout) + self.assertEqual(len(turns), 2) + self.assertEqual(turns[0]["role"], "user") + self.assertIn(canary, turns[0]["text"]) + self.assertEqual(turns[1]["role"], "assistant") + self.assertEqual(turns[1]["text"], "visible reply") + # Verify thinking/toolCall content is excluded + self.assertNotIn("internal reasoning", turns[1]["text"]) + self.assertNotIn("bash command output", turns[1]["text"]) + + def test_pi_transcript_skips_non_message_records(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = Path(tmpdir) + sessions_root = tmp_path / "sessions" + canary = "trycycle-canary-pi-skip-meta" + + _write_pi_session( + sessions_root, + cwd="/tmp/project", + session_id="ses-meta", + records=[ + {"type": "model_change", "id": "mc1", "provider": "test", "modelId": "gpt-4"}, + _pi_message("user", [_text_block(f"visible user {canary}")]), + {"type": "model_change", "id": "mc2", "provider": "test", "modelId": "gpt-4"}, + _pi_message("assistant", [_text_block("visible reply")]), + ], + ) + + result = self.run_builder( + "--cli", "pi-cli", + "--canary", canary, + "--search-root", str(sessions_root), + "--timeout-ms", "1000", + env={"HOME": str(tmp_path)}, + ) + self.assertEqual(result.returncode, 0, result.stderr) + turns = json.loads(result.stdout) + self.assertEqual(len(turns), 2) + self.assertEqual(turns[0]["role"], "user") + self.assertEqual(turns[1]["role"], "assistant") + + def test_pi_transcript_fails_gracefully_when_sessions_root_missing(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = Path(tmpdir) + sessions_root = tmp_path / "nonexistent" + + result = self.run_builder( + "--cli", "pi-cli", + "--canary", "some-canary", + "--search-root", str(sessions_root), + "--timeout-ms", "500", + env={"HOME": str(tmp_path)}, + ) + self.assertEqual(result.returncode, 1) + self.assertIn("does not exist", result.stderr) + + def test_pi_canary_timeout_when_canary_not_in_any_session(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = Path(tmpdir) + sessions_root = tmp_path / "sessions" + canary = "trycycle-canary-pi-notfound" + + _write_pi_session( + sessions_root, + cwd="/tmp/project", + session_id="ses-empty", + records=[ + _pi_message("user", [_text_block("no canary here")]), + ], + ) + + result = self.run_builder( + "--cli", "pi-cli", + "--canary", canary, + "--search-root", str(sessions_root), + "--timeout-ms", "500", + env={"HOME": str(tmp_path)}, + ) + self.assertEqual(result.returncode, 1) + self.assertIn("canary", result.stderr.lower()) + + def test_pi_encode_cwd(self) -> None: + """Unit test for _encode_cwd path encoding.""" + sys.path.insert(0, str(TRANSCRIPT_MODULE_ROOT)) + try: + import pi_cli # type: ignore + finally: + sys.path.pop(0) + + # Standard path + self.assertEqual(pi_cli._encode_cwd("/tmp/project"), "--tmp-project--") + # Root path + self.assertEqual(pi_cli._encode_cwd("/"), "----") + # Path with colon + self.assertEqual(pi_cli._encode_cwd("/C:/Users/test"), "--C--Users-test--") + # Nested path + self.assertEqual( + pi_cli._encode_cwd("/home/user/repos/my-project"), + "--home-user-repos-my-project--", + ) + + def test_pi_extract_transcript_keeps_last_assistant_per_user_interval(self) -> None: + """Multiple assistant messages between user messages: keep the last one.""" + sys.path.insert(0, str(TRANSCRIPT_MODULE_ROOT)) + try: + import pi_cli # type: ignore + finally: + sys.path.pop(0) + + with tempfile.TemporaryDirectory() as tmpdir: + session_path = Path(tmpdir) / "session.jsonl" + _write_jsonl( + session_path, + [ + {"type": "session", "version": 3, "id": "test", "timestamp": "", "cwd": "/tmp"}, + _pi_message("user", [_text_block("first user")]), + _pi_message("assistant", [_text_block("intermediate")]), + _pi_message("assistant", [_text_block("final assistant")]), + _pi_message("user", [_text_block("second user")]), + _pi_message("assistant", [_text_block("second reply")]), + ], + ) + + turns = pi_cli.extract_transcript(session_path) + # First user + intermediate assistant gets flushed when second user arrives, + # but then replaced by final assistant. So: + # user1, assistant(final assistant), user2, assistant(second reply) + self.assertEqual(len(turns), 4) + self.assertEqual(turns[0].text, "first user") + self.assertEqual(turns[1].text, "final assistant") + self.assertEqual(turns[2].text, "second user") + self.assertEqual(turns[3].text, "second reply") + + if __name__ == "__main__": unittest.main()