From 8d2a6ade0d6fb488dd87c2d0b95d2eb1580edcd8 Mon Sep 17 00:00:00 2001
From: cardene777 <cardene777@gmail.com>
Date: Mon, 11 May 2026 21:01:10 +0900
Subject: [PATCH] fix: cap adversarial review prompt size to stay under Codex
 API 1MB input limit

The `adversarial-review --wait` command fails on heavy PRs with
`Input exceeds the maximum length of 1048576 characters.` even after PR
#179 capped the embedded diff to 256KB. The Codex API thread input is
hard-limited to 1,048,576 characters, and the template plus 256KB diff
plus collection guidance plus focus text can still cross that line in
practice, especially when the diff contains multi-byte UTF-8.

Add `MAX_ADVERSARIAL_PROMPT_BYTES = 850 * 1024` and
`MAX_ADVERSARIAL_PROMPT_CHARS = 900 * 1024` budgets and a small fallback
chain inside `buildAdversarialReviewPrompt`:

1. render the full prompt and return it if it already fits both budgets;
2. otherwise binary-search the largest prefix of `REVIEW_INPUT` that fits,
   append a truncation notice, and re-render;
3. if no useful prefix survives, drop the inline diff entirely and switch
   the collection guidance to the existing lightweight self-collect path.

Both budgets are checked together, so a prompt of mostly multi-byte
content (e.g. emoji-heavy diffs) is constrained by chars while a long
ASCII diff is constrained by bytes. `USER_FOCUS` is never dropped, since
it is small and high-signal.

The helper is now exported alongside the budget constants so it can be
unit-tested directly. The bottom-of-file `main()` call is guarded behind
`isDirectInvocation()` so importing the module from tests no longer
runs the CLI entry point.

New tests in `tests/adversarial-prompt-cap.test.mjs` cover: small input
passthrough, ~900KB ASCII content truncation, truncation marker presence,
unfittable (5MB) input falling back to self-collect mode, multi-byte
content respecting both budgets, `USER_FOCUS` preservation, and a sanity
check that the byte/char budgets stay under the 1,048,576-char API cap.

Refs: cardene777/claude-config#1467
---
 plugins/codex/scripts/codex-companion.mjs | 135 ++++++++++++++++++++--
 tests/adversarial-prompt-cap.test.mjs     | 107 +++++++++++++++++
 2 files changed, 231 insertions(+), 11 deletions(-)
 create mode 100644 tests/adversarial-prompt-cap.test.mjs

diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs
index 35222fd5..3f4b22b3 100644
--- a/plugins/codex/scripts/codex-companion.mjs
+++ b/plugins/codex/scripts/codex-companion.mjs
@@ -235,15 +235,115 @@ async function handleSetup(argv) {
   outputResult(options.json ? finalReport : renderSetupReport(finalReport), options.json);
 }
 
-function buildAdversarialReviewPrompt(context, focusText) {
+// Codex API thread input cap is 1048576 chars. Leave safety margin for system
+// prompt / thread metadata that the API stitches onto every turn.
+export const MAX_ADVERSARIAL_PROMPT_CHARS = 900 * 1024;
+export const MAX_ADVERSARIAL_PROMPT_BYTES = 850 * 1024;
+const LIGHTWEIGHT_COLLECTION_GUIDANCE =
+  "The repository context below is a lightweight summary because the full diff exceeded the prompt size budget. Inspect the target diff yourself with read-only git commands before finalizing findings.";
+const TRUNCATION_NOTICE_TEMPLATE = "\n\n[truncated: REVIEW_INPUT was trimmed by {{DROPPED}} bytes to keep the prompt within the Codex API input budget]\n";
+
+function measurePromptSize(text) {
+  return {
+    bytes: Buffer.byteLength(text, "utf8"),
+    chars: [...text].length
+  };
+}
+
+function fitsBudget(text) {
+  const { bytes, chars } = measurePromptSize(text);
+  return bytes <= MAX_ADVERSARIAL_PROMPT_BYTES && chars <= MAX_ADVERSARIAL_PROMPT_CHARS;
+}
+
+function renderAdversarialPromptTemplate(values) {
   const template = loadPromptTemplate(ROOT_DIR, "adversarial-review");
-  return interpolateTemplate(template, {
+  return interpolateTemplate(template, values);
+}
+
+function trimContentToBudget(content, fixedOverheadText) {
+  // Binary search the largest content prefix that keeps the rendered prompt
+  // within both the byte and char budgets. Account for the constant overhead
+  // of every other interpolated placeholder so the answer is precise.
+  const overhead = measurePromptSize(fixedOverheadText);
+  const byteHeadroom = MAX_ADVERSARIAL_PROMPT_BYTES - overhead.bytes;
+  const charHeadroom = MAX_ADVERSARIAL_PROMPT_CHARS - overhead.chars;
+  if (byteHeadroom <= 0 || charHeadroom <= 0) {
+    return "";
+  }
+  // Reserve room for the truncation notice that will be appended.
+  const notice = TRUNCATION_NOTICE_TEMPLATE.replace("{{DROPPED}}", String(content.length));
+  const noticeSize = measurePromptSize(notice);
+  const byteLimit = Math.max(0, byteHeadroom - noticeSize.bytes);
+  const charLimit = Math.max(0, charHeadroom - noticeSize.chars);
+  if (byteLimit === 0 || charLimit === 0) {
+    return "";
+  }
+
+  let lo = 0;
+  let hi = content.length;
+  while (lo < hi) {
+    const mid = Math.floor((lo + hi + 1) / 2);
+    const candidate = content.slice(0, mid);
+    const { bytes, chars } = measurePromptSize(candidate);
+    if (bytes <= byteLimit && chars <= charLimit) {
+      lo = mid;
+    } else {
+      hi = mid - 1;
+    }
+  }
+  return content.slice(0, lo);
+}
+
+export function buildAdversarialReviewPrompt(context, focusText) {
+  const targetLabel = context.target.label;
+  const userFocus = focusText || "No extra focus provided.";
+  const collectionGuidance = context.collectionGuidance;
+  const content = typeof context.content === "string" ? context.content : "";
+
+  const fullPrompt = renderAdversarialPromptTemplate({
+    REVIEW_KIND: "Adversarial Review",
+    TARGET_LABEL: targetLabel,
+    USER_FOCUS: userFocus,
+    REVIEW_COLLECTION_GUIDANCE: collectionGuidance,
+    REVIEW_INPUT: content
+  });
+  if (fitsBudget(fullPrompt)) {
+    return fullPrompt;
+  }
+
+  // Fallback 1: trim REVIEW_INPUT until the rendered prompt fits the budget.
+  const fixedOverheadPrompt = renderAdversarialPromptTemplate({
     REVIEW_KIND: "Adversarial Review",
-    TARGET_LABEL: context.target.label,
-    USER_FOCUS: focusText || "No extra focus provided.",
-    REVIEW_COLLECTION_GUIDANCE: context.collectionGuidance,
-    REVIEW_INPUT: context.content
+    TARGET_LABEL: targetLabel,
+    USER_FOCUS: userFocus,
+    REVIEW_COLLECTION_GUIDANCE: collectionGuidance,
+    REVIEW_INPUT: ""
   });
+  const trimmed = trimContentToBudget(content, fixedOverheadPrompt);
+  if (trimmed.length > 0) {
+    const droppedBytes = Buffer.byteLength(content, "utf8") - Buffer.byteLength(trimmed, "utf8");
+    const notice = TRUNCATION_NOTICE_TEMPLATE.replace("{{DROPPED}}", String(droppedBytes));
+    const trimmedPrompt = renderAdversarialPromptTemplate({
+      REVIEW_KIND: "Adversarial Review",
+      TARGET_LABEL: targetLabel,
+      USER_FOCUS: userFocus,
+      REVIEW_COLLECTION_GUIDANCE: collectionGuidance,
+      REVIEW_INPUT: trimmed + notice
+    });
+    if (fitsBudget(trimmedPrompt)) {
+      return trimmedPrompt;
+    }
+  }
+
+  // Fallback 2: drop REVIEW_INPUT entirely and switch to self-collect guidance.
+  const lightweightPrompt = renderAdversarialPromptTemplate({
+    REVIEW_KIND: "Adversarial Review",
+    TARGET_LABEL: targetLabel,
+    USER_FOCUS: userFocus,
+    REVIEW_COLLECTION_GUIDANCE: LIGHTWEIGHT_COLLECTION_GUIDANCE,
+    REVIEW_INPUT: "[truncated: the diff was too large to inline; collect it with read-only git commands such as `git diff` and `git log`.]"
+  });
+  return lightweightPrompt;
 }
 
 function ensureCodexAvailable(cwd) {
@@ -1020,8 +1120,21 @@ async function main() {
   }
 }
 
-main().catch((error) => {
-  const message = error instanceof Error ? error.message : String(error);
-  process.stderr.write(`${message}\n`);
-  process.exitCode = 1;
-});
+function isDirectInvocation() {
+  if (!process.argv[1]) {
+    return false;
+  }
+  try {
+    return fileURLToPath(import.meta.url) === fs.realpathSync(process.argv[1]);
+  } catch {
+    return false;
+  }
+}
+
+if (isDirectInvocation()) {
+  main().catch((error) => {
+    const message = error instanceof Error ? error.message : String(error);
+    process.stderr.write(`${message}\n`);
+    process.exitCode = 1;
+  });
+}
diff --git a/tests/adversarial-prompt-cap.test.mjs b/tests/adversarial-prompt-cap.test.mjs
new file mode 100644
index 00000000..7bf04905
--- /dev/null
+++ b/tests/adversarial-prompt-cap.test.mjs
@@ -0,0 +1,107 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import {
+  buildAdversarialReviewPrompt,
+  MAX_ADVERSARIAL_PROMPT_BYTES,
+  MAX_ADVERSARIAL_PROMPT_CHARS
+} from "../plugins/codex/scripts/codex-companion.mjs";
+
+function makeContext(contentBytes, { mode = "branch", inputMode = "inline-diff" } = {}) {
+  return {
+    target: { label: "test-target", mode },
+    collectionGuidance: "Use the repository context below as primary evidence.",
+    content: "A".repeat(contentBytes),
+    inputMode,
+    diffBytes: contentBytes
+  };
+}
+
+test("buildAdversarialReviewPrompt returns prompt under MAX_ADVERSARIAL_PROMPT_BYTES for small input", () => {
+  const ctx = makeContext(1024);
+  const prompt = buildAdversarialReviewPrompt(ctx, "review focus");
+  assert.ok(prompt.includes("test-target"), "target label should be interpolated");
+  assert.ok(prompt.includes("review focus"), "focus text should be interpolated");
+  assert.ok(prompt.length < MAX_ADVERSARIAL_PROMPT_CHARS);
+});
+
+test("buildAdversarialReviewPrompt truncates REVIEW_INPUT when total size exceeds MAX_ADVERSARIAL_PROMPT_BYTES", () => {
+  // 900KB content + template should exceed 800KB byte cap, triggering truncation
+  const ctx = makeContext(900 * 1024);
+  const prompt = buildAdversarialReviewPrompt(ctx, "focus");
+  const promptBytes = Buffer.byteLength(prompt, "utf8");
+  const promptChars = [...prompt].length;
+  assert.ok(
+    promptBytes <= MAX_ADVERSARIAL_PROMPT_BYTES,
+    `prompt byte size ${promptBytes} should be <= ${MAX_ADVERSARIAL_PROMPT_BYTES}`
+  );
+  assert.ok(
+    promptChars <= MAX_ADVERSARIAL_PROMPT_CHARS,
+    `prompt char count ${promptChars} should be <= ${MAX_ADVERSARIAL_PROMPT_CHARS}`
+  );
+});
+
+test("buildAdversarialReviewPrompt records truncation notice when content was trimmed", () => {
+  const ctx = makeContext(900 * 1024);
+  const prompt = buildAdversarialReviewPrompt(ctx, "focus");
+  assert.ok(
+    prompt.includes("[truncated") || prompt.includes("self-collect"),
+    "truncated prompt should include a truncation marker or self-collect guidance"
+  );
+});
+
+test("buildAdversarialReviewPrompt falls back to self-collect when content cannot be fitted", () => {
+  // 5MB content cannot fit even after aggressive truncation - should drop to self-collect mode
+  const ctx = makeContext(5 * 1024 * 1024);
+  const prompt = buildAdversarialReviewPrompt(ctx, "focus");
+  const promptBytes = Buffer.byteLength(prompt, "utf8");
+  const promptChars = [...prompt].length;
+  assert.ok(
+    promptBytes <= MAX_ADVERSARIAL_PROMPT_BYTES,
+    `prompt byte size ${promptBytes} should fit under cap`
+  );
+  assert.ok(
+    promptChars <= MAX_ADVERSARIAL_PROMPT_CHARS,
+    `prompt char count ${promptChars} should fit under cap`
+  );
+  // self-collect mode is signaled by lightweight collection guidance
+  assert.ok(
+    prompt.includes("lightweight summary") || prompt.includes("self-collect") || prompt.includes("[truncated"),
+    "should signal lightweight / truncated mode in the prompt"
+  );
+});
+
+test("buildAdversarialReviewPrompt handles multi-byte UTF-8 input within the char cap", () => {
+  // Each emoji is 4 bytes in UTF-8 but counts as 2 chars in [...str].length (surrogate pair)
+  // 250000 emojis = 1MB UTF-8 bytes but 500000 [...str] chars
+  const emojiCount = 250000;
+  const ctx = makeContext(0);
+  ctx.content = "\u{1F4A9}".repeat(emojiCount);
+  const prompt = buildAdversarialReviewPrompt(ctx, "focus");
+  const promptBytes = Buffer.byteLength(prompt, "utf8");
+  const promptChars = [...prompt].length;
+  assert.ok(
+    promptBytes <= MAX_ADVERSARIAL_PROMPT_BYTES,
+    `multi-byte prompt should respect byte cap (got ${promptBytes})`
+  );
+  assert.ok(
+    promptChars <= MAX_ADVERSARIAL_PROMPT_CHARS,
+    `multi-byte prompt should respect char cap (got ${promptChars})`
+  );
+});
+
+test("buildAdversarialReviewPrompt preserves USER_FOCUS even when truncating", () => {
+  const ctx = makeContext(900 * 1024);
+  const focusText = "Focus on auth boundary";
+  const prompt = buildAdversarialReviewPrompt(ctx, focusText);
+  assert.ok(
+    prompt.includes(focusText),
+    "USER_FOCUS should never be dropped because it is small and high-signal"
+  );
+});
+
+test("MAX_ADVERSARIAL_PROMPT_BYTES leaves safety margin below 1MB API input cap", () => {
+  // Codex API thread input cap is 1048576 chars. We need a safety margin for system prompt overhead.
+  assert.ok(MAX_ADVERSARIAL_PROMPT_BYTES <= 900 * 1024, "byte cap should leave >= ~100KB safety margin");
+  assert.ok(MAX_ADVERSARIAL_PROMPT_CHARS <= 1048576, "char cap must be <= Codex API thread input cap");
+});