From ac8690d862498aa7c4872a58a5393d5491f39a20 Mon Sep 17 00:00:00 2001
From: Friende <35026241+pengyou200902@users.noreply.github.com>
Date: Mon, 4 May 2026 18:56:56 -0400
Subject: [PATCH] fix: harden rescue task handoff failures

---
 plugins/codex/agents/codex-rescue.md          |   7 +-
 plugins/codex/commands/rescue.md              |   2 +
 plugins/codex/scripts/codex-companion.mjs     |  24 +++-
 plugins/codex/scripts/lib/codex.mjs           |   3 +-
 .../codex/skills/codex-cli-runtime/SKILL.md   |   7 +-
 tests/commands.test.mjs                       |  19 ++-
 tests/runtime.test.mjs                        | 110 ++++++++++++++++++
 7 files changed, 158 insertions(+), 14 deletions(-)
diff --git a/plugins/codex/agents/codex-rescue.md b/plugins/codex/agents/codex-rescue.md
index 7009ec86..900bb8f7 100644
--- a/plugins/codex/agents/codex-rescue.md
+++ b/plugins/codex/agents/codex-rescue.md
@@ -30,6 +30,7 @@ Forwarding rules:
 - Leave model unset by default. Only add `--model` when the user explicitly asks for a specific model.
 - If the user asks for `spark`, map that to `--model gpt-5.3-codex-spark`.
 - If the user asks for a concrete model name such as `gpt-5.4-mini`, pass it through with `--model`.
+- Copy the user's requested model string into `--model` character-for-character, except for the documented `spark` alias. Do not substitute, shorten, normalize, or guess model names. Never emit a model name the user did not literally type or whose alias they did not literally type.
 - Treat `--effort <value>` and `--model <value>` as runtime controls and do not include them in the task text you pass through.
 - Default to a write-capable Codex run by adding `--write` unless the user explicitly asks for read-only behavior or only wants review, diagnosis, or research without edits.
 - Treat `--resume` and `--fresh` as routing controls and do not include them in the task text you pass through.
@@ -38,8 +39,10 @@ Forwarding rules:
 - If the user is clearly asking to continue prior Codex work in this repository, such as "continue", "keep going", "resume", "apply the top fix", or "dig deeper", add `--resume-last` unless `--fresh` is present.
 - Otherwise forward the task as a fresh `task` run.
 - Preserve the user's task text as-is apart from stripping routing flags.
-- Return the stdout of the `codex-companion` command exactly as-is.
-- If the Bash call fails or Codex cannot be invoked, return nothing.
+- Pass the forwarded task text on stdin with `task --stdin`, not as a shell-quoted CLI argument.
+- Do not inline multi-line prompts as a quoted argument or as `"$(cat ...)"`; that re-expands prompt content through shell quoting.
+- Return the stdout of the `codex-companion` command exactly as-is when it succeeds.
+- If the Bash call fails or Codex cannot be invoked, return the failure output exactly once and stop. Do not retry, do not make another `task` invocation, and do not return an empty response.
 
 Response style:
 
diff --git a/plugins/codex/commands/rescue.md b/plugins/codex/commands/rescue.md
index 56de9555..f9e18e0f 100644
--- a/plugins/codex/commands/rescue.md
+++ b/plugins/codex/commands/rescue.md
@@ -39,6 +39,8 @@ node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task-resume-candidate -
 Operating rules:
 
 - The subagent is a thin forwarder only. It should use one `Bash` call to invoke `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task ...` and return that command's stdout as-is.
+- The subagent must pass forwarded task text on stdin with `task --stdin`, not as a shell-quoted prompt argument.
+- If the subagent's Bash call fails, it must return the failure output once and stop rather than retrying or returning an empty response.
 - Return the Codex companion stdout verbatim to the user.
 - Do not paraphrase, summarize, rewrite, or add commentary before or after it.
 - Do not ask the subagent to inspect files, monitor progress, poll `/codex:status`, fetch `/codex:result`, call `/codex:cancel`, summarize output, or do follow-up work of its own.
diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs
index 35222fd5..37132cb9 100644
--- a/plugins/codex/scripts/codex-companion.mjs
+++ b/plugins/codex/scripts/codex-companion.mjs
@@ -77,7 +77,7 @@ function printUsage() {
       "  node scripts/codex-companion.mjs setup [--enable-review-gate|--disable-review-gate] [--json]",
       "  node scripts/codex-companion.mjs review [--wait|--background] [--base <ref>] [--scope <auto|working-tree|branch>]",
       "  node scripts/codex-companion.mjs adversarial-review [--wait|--background] [--base <ref>] [--scope <auto|working-tree|branch>] [focus text]",
-      "  node scripts/codex-companion.mjs task [--background] [--write] [--resume-last|--resume|--fresh] [--model <model|spark>] [--effort <none|minimal|low|medium|high|xhigh>] [prompt]",
+      "  node scripts/codex-companion.mjs task [--background] [--write] [--resume-last|--resume|--fresh] [--model <model|spark>] [--effort <none|minimal|low|medium|high|xhigh>] [--stdin|--prompt-file <path>|prompt]",
       "  node scripts/codex-companion.mjs status [job-id] [--all] [--json]",
       "  node scripts/codex-companion.mjs result [job-id] [--json]",
       "  node scripts/codex-companion.mjs cancel [job-id] [--json]"
@@ -610,7 +610,20 @@ function buildTaskRequest({ cwd, model, effort, prompt, write, resumeLast, jobId
   };
 }
 
-function readTaskPrompt(cwd, options, positionals) {
+function readTaskPrompt(cwd, options, positionals, context = {}) {
+  if (options.stdin && options["prompt-file"]) {
+    throw new Error("Choose either --stdin or --prompt-file.");
+  }
+  if (options.stdin && positionals.length > 0) {
+    throw new Error("Do not pass positional prompt text when using --stdin.");
+  }
+  if (options.stdin) {
+    const prompt = readStdinIfPiped();
+    if (!prompt && !context.resumeLast) {
+      throw new Error("Expected prompt text on stdin.");
+    }
+    return prompt;
+  }
   if (options["prompt-file"]) {
     return fs.readFileSync(path.resolve(cwd, options["prompt-file"]), "utf8");
   }
@@ -732,7 +745,7 @@ async function handleReview(argv) {
 async function handleTask(argv) {
   const { options, positionals } = parseCommandInput(argv, {
     valueOptions: ["model", "effort", "cwd", "prompt-file"],
-    booleanOptions: ["json", "write", "resume-last", "resume", "fresh", "background"],
+    booleanOptions: ["json", "write", "resume-last", "resume", "fresh", "background", "stdin"],
     aliasMap: {
       m: "model"
     }
@@ -742,13 +755,14 @@ async function handleTask(argv) {
   const workspaceRoot = resolveCommandWorkspace(options);
   const model = normalizeRequestedModel(options.model);
   const effort = normalizeReasoningEffort(options.effort);
-  const prompt = readTaskPrompt(cwd, options, positionals);
-
   const resumeLast = Boolean(options["resume-last"] || options.resume);
   const fresh = Boolean(options.fresh);
   if (resumeLast && fresh) {
     throw new Error("Choose either --resume/--resume-last or --fresh.");
   }
+  const prompt = readTaskPrompt(cwd, options, positionals, {
+    resumeLast
+  });
   const write = Boolean(options.write);
   const taskMetadata = buildTaskRunMetadata({
     prompt,
diff --git a/plugins/codex/scripts/lib/codex.mjs b/plugins/codex/scripts/lib/codex.mjs
index f2fe88bd..2db9d1fb 100644
--- a/plugins/codex/scripts/lib/codex.mjs
+++ b/plugins/codex/scripts/lib/codex.mjs
@@ -60,8 +60,7 @@ function buildThreadParams(cwd, options = {}) {
     approvalPolicy: options.approvalPolicy ?? "never",
     sandbox: options.sandbox ?? "read-only",
     serviceName: SERVICE_NAME,
-    ephemeral: options.ephemeral ?? true,
-    experimentalRawEvents: false
+    ephemeral: options.ephemeral ?? true
   };
 }
 
diff --git a/plugins/codex/skills/codex-cli-runtime/SKILL.md b/plugins/codex/skills/codex-cli-runtime/SKILL.md
index 0e91bfb5..c7348fce 100644
--- a/plugins/codex/skills/codex-cli-runtime/SKILL.md
+++ b/plugins/codex/skills/codex-cli-runtime/SKILL.md
@@ -9,7 +9,7 @@ user-invocable: false
 Use this skill only inside the `codex:codex-rescue` subagent.
 
 Primary helper:
-- `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task "<raw arguments>"`
+- `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task --stdin ...`
 
 Execution rules:
 - The rescue subagent is a forwarder, not an orchestrator. Its only job is to invoke `task` once and return that stdout unchanged.
@@ -21,6 +21,7 @@ Execution rules:
 - Leave `--effort` unset unless the user explicitly requests a specific effort.
 - Leave model unset by default. Add `--model` only when the user explicitly asks for one.
 - Map `spark` to `--model gpt-5.3-codex-spark`.
+- Copy the user's requested model string into `--model` character-for-character, except for the documented `spark` alias. Do not substitute, shorten, normalize, or guess model names.
 - Default to a write-capable Codex run by adding `--write` unless the user explicitly asks for read-only behavior or only wants review, diagnosis, or research without edits.
 
 Command selection:
@@ -34,10 +35,12 @@ Command selection:
 - `--fresh`: always use a fresh `task` run, even if the request sounds like a follow-up.
 - `--effort`: accepted values are `none`, `minimal`, `low`, `medium`, `high`, `xhigh`.
 - `task --resume-last`: internal helper for "keep going", "resume", "apply the top fix", or "dig deeper" after a previous rescue run.
+- Pass the forwarded task text on stdin with `task --stdin`.
+- Do not pass raw task text as a shell-quoted CLI argument, and do not use `"$(cat ...)"` to inline file contents.
 
 Safety rules:
 - Default to write-capable Codex work in `codex:codex-rescue` unless the user explicitly asks for read-only behavior.
 - Preserve the user's task text as-is apart from stripping routing flags.
 - Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own.
 - Return the stdout of the `task` command exactly as-is.
-- If the Bash call fails or Codex cannot be invoked, return nothing.
+- If the Bash call fails or Codex cannot be invoked, return the failure output exactly once and stop. Do not retry, do not make another `task` invocation, and do not return an empty response.
diff --git a/tests/commands.test.mjs b/tests/commands.test.mjs
index 3724ffa4..d2bb8bf3 100644
--- a/tests/commands.test.mjs
+++ b/tests/commands.test.mjs
@@ -135,11 +135,19 @@ test("rescue command absorbs continue semantics", () => {
   assert.match(agent, /Leave model unset by default/i);
   assert.match(agent, /If the user asks for `spark`, map that to `--model gpt-5\.3-codex-spark`/i);
   assert.match(agent, /If the user asks for a concrete model name such as `gpt-5\.4-mini`, pass it through with `--model`/i);
+  assert.match(agent, /Copy the user's requested model string into `--model` character-for-character, except for the documented `spark` alias/i);
+  assert.match(agent, /Never emit a model name the user did not literally type or whose alias they did not literally type/i);
+  assert.match(agent, /Pass the forwarded task text on stdin with `task --stdin`, not as a shell-quoted CLI argument/i);
+  assert.doesNotMatch(agent, /mktemp|chmod 600|trap/);
+  assert.match(agent, /Do not inline multi-line prompts as a quoted argument or as `"\$\(cat \.\.\.\)"`/i);
   assert.match(agent, /Return the stdout of the `codex-companion` command exactly as-is/i);
-  assert.match(agent, /If the Bash call fails or Codex cannot be invoked, return nothing/i);
+  assert.match(agent, /If the Bash call fails or Codex cannot be invoked, return the failure output exactly once and stop/i);
+  assert.match(agent, /Do not retry, do not make another `task` invocation, and do not return an empty response/i);
+  assert.doesNotMatch(agent, /return nothing/i);
   assert.match(agent, /gpt-5-4-prompting/);
   assert.match(agent, /only to tighten the user's request into a better Codex prompt/i);
   assert.match(agent, /Do not use that skill to inspect the repository, reason through the problem yourself, draft a solution, or do any independent work/i);
+  assert.match(runtimeSkill, /codex-companion\.mjs" task --stdin \.\.\./);
   assert.match(runtimeSkill, /only job is to invoke `task` once and return that stdout unchanged/i);
   assert.match(runtimeSkill, /Do not call `setup`, `review`, `adversarial-review`, `status`, `result`, or `cancel`/i);
   assert.match(runtimeSkill, /use the `gpt-5-4-prompting` skill to rewrite the user's request into a tighter Codex prompt/i);
@@ -147,11 +155,16 @@ test("rescue command absorbs continue semantics", () => {
   assert.match(runtimeSkill, /Leave `--effort` unset unless the user explicitly requests a specific effort/i);
   assert.match(runtimeSkill, /Leave model unset by default/i);
   assert.match(runtimeSkill, /Map `spark` to `--model gpt-5\.3-codex-spark`/i);
+  assert.match(runtimeSkill, /Copy the user's requested model string into `--model` character-for-character, except for the documented `spark` alias/i);
   assert.match(runtimeSkill, /If the forwarded request includes `--background` or `--wait`, treat that as Claude-side execution control only/i);
   assert.match(runtimeSkill, /Strip it before calling `task`/i);
   assert.match(runtimeSkill, /`--effort`: accepted values are `none`, `minimal`, `low`, `medium`, `high`, `xhigh`/i);
+  assert.match(runtimeSkill, /Pass the forwarded task text on stdin with `task --stdin`/i);
+  assert.match(runtimeSkill, /Do not pass raw task text as a shell-quoted CLI argument/i);
   assert.match(runtimeSkill, /Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own/i);
-  assert.match(runtimeSkill, /If the Bash call fails or Codex cannot be invoked, return nothing/i);
+  assert.match(runtimeSkill, /If the Bash call fails or Codex cannot be invoked, return the failure output exactly once and stop/i);
+  assert.match(runtimeSkill, /Do not retry, do not make another `task` invocation, and do not return an empty response/i);
+  assert.doesNotMatch(runtimeSkill, /return nothing/i);
   assert.match(readme, /`codex:codex-rescue` subagent/i);
   assert.match(readme, /if you do not pass `--model` or `--effort`, Codex chooses its own defaults/i);
   assert.match(readme, /--model gpt-5\.4-mini --effort medium/i);
@@ -186,7 +199,7 @@ test("internal docs use task terminology for rescue runs", () => {
   const promptingSkill = read("skills/gpt-5-4-prompting/SKILL.md");
   const promptRecipes = read("skills/gpt-5-4-prompting/references/codex-prompt-recipes.md");
 
-  assert.match(runtimeSkill, /codex-companion\.mjs" task "<raw arguments>"/);
+  assert.match(runtimeSkill, /codex-companion\.mjs" task --stdin \.\.\./);
   assert.match(runtimeSkill, /Use `task` for every rescue request/i);
   assert.match(runtimeSkill, /task --resume-last/i);
   assert.match(promptingSkill, /Use `task` when the task is diagnosis/i);
diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs
index 90408372..c9678270 100644
--- a/tests/runtime.test.mjs
+++ b/tests/runtime.test.mjs
@@ -627,6 +627,116 @@ test("task --fresh is treated as routing control and does not leak into the prom
   assert.equal(fakeState.lastTurnStart.prompt, "diagnose the flaky test");
 });
 
+test("task --prompt-file preserves multiline shell-sensitive prompt text", () => {
+  const repo = makeTempDir();
+  const binDir = makeTempDir();
+  const statePath = path.join(binDir, "fake-codex-state.json");
+  installFakeCodex(binDir);
+  initGitRepo(repo);
+  fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
+  run("git", ["add", "README.md"], { cwd: repo });
+  run("git", ["commit", "-m", "init"], { cwd: repo });
+
+  const prompt = "diagnose quoting\nliteral $(echo should-not-run)\nquote ' and double \" and backslash \\";
+  const promptFile = path.join(repo, "prompt.txt");
+  fs.writeFileSync(promptFile, prompt, "utf8");
+
+  const result = run("node", [SCRIPT, "task", "--prompt-file", promptFile], {
+    cwd: repo,
+    env: buildEnv(binDir)
+  });
+
+  assert.equal(result.status, 0, result.stderr);
+  const fakeState = JSON.parse(fs.readFileSync(statePath, "utf8"));
+  assert.equal(fakeState.lastTurnStart.prompt, prompt);
+});
+
+test("task --stdin preserves multiline shell-sensitive prompt text", () => {
+  const repo = makeTempDir();
+  const binDir = makeTempDir();
+  const statePath = path.join(binDir, "fake-codex-state.json");
+  installFakeCodex(binDir);
+  initGitRepo(repo);
+  fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
+  run("git", ["add", "README.md"], { cwd: repo });
+  run("git", ["commit", "-m", "init"], { cwd: repo });
+
+  const prompt = "diagnose quoting\nliteral $(echo should-not-run)\nquote ' and double \" and backslash \\";
+  const result = run("node", [SCRIPT, "task", "--stdin"], {
+    cwd: repo,
+    env: buildEnv(binDir),
+    input: prompt
+  });
+
+  assert.equal(result.status, 0, result.stderr);
+  const fakeState = JSON.parse(fs.readFileSync(statePath, "utf8"));
+  assert.equal(fakeState.lastTurnStart.prompt, prompt);
+});
+
+test("task --stdin rejects mixed prompt sources", () => {
+  const repo = makeTempDir();
+  const binDir = makeTempDir();
+  installFakeCodex(binDir);
+  initGitRepo(repo);
+  fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
+  run("git", ["add", "README.md"], { cwd: repo });
+  run("git", ["commit", "-m", "init"], { cwd: repo });
+
+  const withPositional = run("node", [SCRIPT, "task", "--stdin", "diagnose"], {
+    cwd: repo,
+    env: buildEnv(binDir),
+    input: "from stdin"
+  });
+  assert.notEqual(withPositional.status, 0);
+  assert.match(withPositional.stderr, /Do not pass positional prompt text when using --stdin/);
+
+  const promptFile = path.join(repo, "prompt.txt");
+  fs.writeFileSync(promptFile, "from file", "utf8");
+  const withPromptFile = run("node", [SCRIPT, "task", "--stdin", "--prompt-file", promptFile], {
+    cwd: repo,
+    env: buildEnv(binDir),
+    input: "from stdin"
+  });
+  assert.notEqual(withPromptFile.status, 0);
+  assert.match(withPromptFile.stderr, /Choose either --stdin or --prompt-file/);
+});
+
+test("task --stdin allows empty prompt only when resuming", () => {
+  const repo = makeTempDir();
+  const binDir = makeTempDir();
+  const statePath = path.join(binDir, "fake-codex-state.json");
+  installFakeCodex(binDir);
+  initGitRepo(repo);
+  fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
+  run("git", ["add", "README.md"], { cwd: repo });
+  run("git", ["commit", "-m", "init"], { cwd: repo });
+
+  const firstRun = run("node", [SCRIPT, "task", "initial task"], {
+    cwd: repo,
+    env: buildEnv(binDir)
+  });
+  assert.equal(firstRun.status, 0, firstRun.stderr);
+
+  const resume = run("node", [SCRIPT, "task", "--stdin", "--resume-last"], {
+    cwd: repo,
+    env: buildEnv(binDir),
+    input: ""
+  });
+
+  assert.equal(resume.status, 0, resume.stderr);
+  const fakeState = JSON.parse(fs.readFileSync(statePath, "utf8"));
+  assert.equal(fakeState.lastTurnStart.threadId, "thr_1");
+  assert.equal(fakeState.lastTurnStart.prompt, "Continue from the current thread state. Pick the next highest-value step and follow through until the task is resolved.");
+
+  const fresh = run("node", [SCRIPT, "task", "--stdin"], {
+    cwd: repo,
+    env: buildEnv(binDir),
+    input: ""
+  });
+  assert.notEqual(fresh.status, 0);
+  assert.match(fresh.stderr, /Expected prompt text on stdin/);
+});
+
 test("task forwards model selection and reasoning effort to app-server turn/start", () => {
   const repo = makeTempDir();
   const binDir = makeTempDir();