From 8d2a6ade0d6fb488dd87c2d0b95d2eb1580edcd8 Mon Sep 17 00:00:00 2001 From: cardene777 Date: Mon, 11 May 2026 21:01:10 +0900 Subject: [PATCH] fix: cap adversarial review prompt size to stay under Codex API 1MB input limit The `adversarial-review --wait` command fails on heavy PRs with `Input exceeds the maximum length of 1048576 characters.` even after PR #179 capped the embedded diff to 256KB. The Codex API thread input is hard-limited to 1,048,576 characters, and the template plus 256KB diff plus collection guidance plus focus text can still cross that line in practice, especially when the diff contains multi-byte UTF-8. Add `MAX_ADVERSARIAL_PROMPT_BYTES = 850 * 1024` and `MAX_ADVERSARIAL_PROMPT_CHARS = 900 * 1024` budgets and a small fallback chain inside `buildAdversarialReviewPrompt`: 1. render the full prompt and return it if it already fits both budgets; 2. otherwise binary-search the largest prefix of `REVIEW_INPUT` that fits, append a truncation notice, and re-render; 3. if no useful prefix survives, drop the inline diff entirely and switch the collection guidance to the existing lightweight self-collect path. Both budgets are checked together, so a prompt of mostly multi-byte content (e.g. emoji-heavy diffs) is constrained by chars while a long ASCII diff is constrained by bytes. `USER_FOCUS` is never dropped, since it is small and high-signal. The helper is now exported alongside the budget constants so it can be unit-tested directly. The bottom-of-file `main()` call is guarded behind `isDirectInvocation()` so importing the module from tests no longer runs the CLI entry point. New tests in `tests/adversarial-prompt-cap.test.mjs` cover: small input passthrough, ~900KB ASCII content truncation, truncation marker presence, unfittable (5MB) input falling back to self-collect mode, multi-byte content respecting both budgets, `USER_FOCUS` preservation, and a sanity check that the byte/char budgets stay under the 1,048,576-char API cap. Refs: cardene777/claude-config#1467 --- plugins/codex/scripts/codex-companion.mjs | 135 ++++++++++++++++++++-- tests/adversarial-prompt-cap.test.mjs | 107 +++++++++++++++++ 2 files changed, 231 insertions(+), 11 deletions(-) create mode 100644 tests/adversarial-prompt-cap.test.mjs diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 35222fd5..3f4b22b3 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -235,15 +235,115 @@ async function handleSetup(argv) { outputResult(options.json ? finalReport : renderSetupReport(finalReport), options.json); } -function buildAdversarialReviewPrompt(context, focusText) { +// Codex API thread input cap is 1048576 chars. Leave safety margin for system +// prompt / thread metadata that the API stitches onto every turn. +export const MAX_ADVERSARIAL_PROMPT_CHARS = 900 * 1024; +export const MAX_ADVERSARIAL_PROMPT_BYTES = 850 * 1024; +const LIGHTWEIGHT_COLLECTION_GUIDANCE = + "The repository context below is a lightweight summary because the full diff exceeded the prompt size budget. Inspect the target diff yourself with read-only git commands before finalizing findings."; +const TRUNCATION_NOTICE_TEMPLATE = "\n\n[truncated: REVIEW_INPUT was trimmed by {{DROPPED}} bytes to keep the prompt within the Codex API input budget]\n"; + +function measurePromptSize(text) { + return { + bytes: Buffer.byteLength(text, "utf8"), + chars: [...text].length + }; +} + +function fitsBudget(text) { + const { bytes, chars } = measurePromptSize(text); + return bytes <= MAX_ADVERSARIAL_PROMPT_BYTES && chars <= MAX_ADVERSARIAL_PROMPT_CHARS; +} + +function renderAdversarialPromptTemplate(values) { const template = loadPromptTemplate(ROOT_DIR, "adversarial-review"); - return interpolateTemplate(template, { + return interpolateTemplate(template, values); +} + +function trimContentToBudget(content, fixedOverheadText) { + // Binary search the largest content prefix that keeps the rendered prompt + // within both the byte and char budgets. Account for the constant overhead + // of every other interpolated placeholder so the answer is precise. + const overhead = measurePromptSize(fixedOverheadText); + const byteHeadroom = MAX_ADVERSARIAL_PROMPT_BYTES - overhead.bytes; + const charHeadroom = MAX_ADVERSARIAL_PROMPT_CHARS - overhead.chars; + if (byteHeadroom <= 0 || charHeadroom <= 0) { + return ""; + } + // Reserve room for the truncation notice that will be appended. + const notice = TRUNCATION_NOTICE_TEMPLATE.replace("{{DROPPED}}", String(content.length)); + const noticeSize = measurePromptSize(notice); + const byteLimit = Math.max(0, byteHeadroom - noticeSize.bytes); + const charLimit = Math.max(0, charHeadroom - noticeSize.chars); + if (byteLimit === 0 || charLimit === 0) { + return ""; + } + + let lo = 0; + let hi = content.length; + while (lo < hi) { + const mid = Math.floor((lo + hi + 1) / 2); + const candidate = content.slice(0, mid); + const { bytes, chars } = measurePromptSize(candidate); + if (bytes <= byteLimit && chars <= charLimit) { + lo = mid; + } else { + hi = mid - 1; + } + } + return content.slice(0, lo); +} + +export function buildAdversarialReviewPrompt(context, focusText) { + const targetLabel = context.target.label; + const userFocus = focusText || "No extra focus provided."; + const collectionGuidance = context.collectionGuidance; + const content = typeof context.content === "string" ? context.content : ""; + + const fullPrompt = renderAdversarialPromptTemplate({ + REVIEW_KIND: "Adversarial Review", + TARGET_LABEL: targetLabel, + USER_FOCUS: userFocus, + REVIEW_COLLECTION_GUIDANCE: collectionGuidance, + REVIEW_INPUT: content + }); + if (fitsBudget(fullPrompt)) { + return fullPrompt; + } + + // Fallback 1: trim REVIEW_INPUT until the rendered prompt fits the budget. + const fixedOverheadPrompt = renderAdversarialPromptTemplate({ REVIEW_KIND: "Adversarial Review", - TARGET_LABEL: context.target.label, - USER_FOCUS: focusText || "No extra focus provided.", - REVIEW_COLLECTION_GUIDANCE: context.collectionGuidance, - REVIEW_INPUT: context.content + TARGET_LABEL: targetLabel, + USER_FOCUS: userFocus, + REVIEW_COLLECTION_GUIDANCE: collectionGuidance, + REVIEW_INPUT: "" }); + const trimmed = trimContentToBudget(content, fixedOverheadPrompt); + if (trimmed.length > 0) { + const droppedBytes = Buffer.byteLength(content, "utf8") - Buffer.byteLength(trimmed, "utf8"); + const notice = TRUNCATION_NOTICE_TEMPLATE.replace("{{DROPPED}}", String(droppedBytes)); + const trimmedPrompt = renderAdversarialPromptTemplate({ + REVIEW_KIND: "Adversarial Review", + TARGET_LABEL: targetLabel, + USER_FOCUS: userFocus, + REVIEW_COLLECTION_GUIDANCE: collectionGuidance, + REVIEW_INPUT: trimmed + notice + }); + if (fitsBudget(trimmedPrompt)) { + return trimmedPrompt; + } + } + + // Fallback 2: drop REVIEW_INPUT entirely and switch to self-collect guidance. + const lightweightPrompt = renderAdversarialPromptTemplate({ + REVIEW_KIND: "Adversarial Review", + TARGET_LABEL: targetLabel, + USER_FOCUS: userFocus, + REVIEW_COLLECTION_GUIDANCE: LIGHTWEIGHT_COLLECTION_GUIDANCE, + REVIEW_INPUT: "[truncated: the diff was too large to inline; collect it with read-only git commands such as `git diff` and `git log`.]" + }); + return lightweightPrompt; } function ensureCodexAvailable(cwd) { @@ -1020,8 +1120,21 @@ async function main() { } } -main().catch((error) => { - const message = error instanceof Error ? error.message : String(error); - process.stderr.write(`${message}\n`); - process.exitCode = 1; -}); +function isDirectInvocation() { + if (!process.argv[1]) { + return false; + } + try { + return fileURLToPath(import.meta.url) === fs.realpathSync(process.argv[1]); + } catch { + return false; + } +} + +if (isDirectInvocation()) { + main().catch((error) => { + const message = error instanceof Error ? error.message : String(error); + process.stderr.write(`${message}\n`); + process.exitCode = 1; + }); +} diff --git a/tests/adversarial-prompt-cap.test.mjs b/tests/adversarial-prompt-cap.test.mjs new file mode 100644 index 00000000..7bf04905 --- /dev/null +++ b/tests/adversarial-prompt-cap.test.mjs @@ -0,0 +1,107 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +import { + buildAdversarialReviewPrompt, + MAX_ADVERSARIAL_PROMPT_BYTES, + MAX_ADVERSARIAL_PROMPT_CHARS +} from "../plugins/codex/scripts/codex-companion.mjs"; + +function makeContext(contentBytes, { mode = "branch", inputMode = "inline-diff" } = {}) { + return { + target: { label: "test-target", mode }, + collectionGuidance: "Use the repository context below as primary evidence.", + content: "A".repeat(contentBytes), + inputMode, + diffBytes: contentBytes + }; +} + +test("buildAdversarialReviewPrompt returns prompt under MAX_ADVERSARIAL_PROMPT_BYTES for small input", () => { + const ctx = makeContext(1024); + const prompt = buildAdversarialReviewPrompt(ctx, "review focus"); + assert.ok(prompt.includes("test-target"), "target label should be interpolated"); + assert.ok(prompt.includes("review focus"), "focus text should be interpolated"); + assert.ok(prompt.length < MAX_ADVERSARIAL_PROMPT_CHARS); +}); + +test("buildAdversarialReviewPrompt truncates REVIEW_INPUT when total size exceeds MAX_ADVERSARIAL_PROMPT_BYTES", () => { + // 900KB content + template should exceed 800KB byte cap, triggering truncation + const ctx = makeContext(900 * 1024); + const prompt = buildAdversarialReviewPrompt(ctx, "focus"); + const promptBytes = Buffer.byteLength(prompt, "utf8"); + const promptChars = [...prompt].length; + assert.ok( + promptBytes <= MAX_ADVERSARIAL_PROMPT_BYTES, + `prompt byte size ${promptBytes} should be <= ${MAX_ADVERSARIAL_PROMPT_BYTES}` + ); + assert.ok( + promptChars <= MAX_ADVERSARIAL_PROMPT_CHARS, + `prompt char count ${promptChars} should be <= ${MAX_ADVERSARIAL_PROMPT_CHARS}` + ); +}); + +test("buildAdversarialReviewPrompt records truncation notice when content was trimmed", () => { + const ctx = makeContext(900 * 1024); + const prompt = buildAdversarialReviewPrompt(ctx, "focus"); + assert.ok( + prompt.includes("[truncated") || prompt.includes("self-collect"), + "truncated prompt should include a truncation marker or self-collect guidance" + ); +}); + +test("buildAdversarialReviewPrompt falls back to self-collect when content cannot be fitted", () => { + // 5MB content cannot fit even after aggressive truncation - should drop to self-collect mode + const ctx = makeContext(5 * 1024 * 1024); + const prompt = buildAdversarialReviewPrompt(ctx, "focus"); + const promptBytes = Buffer.byteLength(prompt, "utf8"); + const promptChars = [...prompt].length; + assert.ok( + promptBytes <= MAX_ADVERSARIAL_PROMPT_BYTES, + `prompt byte size ${promptBytes} should fit under cap` + ); + assert.ok( + promptChars <= MAX_ADVERSARIAL_PROMPT_CHARS, + `prompt char count ${promptChars} should fit under cap` + ); + // self-collect mode is signaled by lightweight collection guidance + assert.ok( + prompt.includes("lightweight summary") || prompt.includes("self-collect") || prompt.includes("[truncated"), + "should signal lightweight / truncated mode in the prompt" + ); +}); + +test("buildAdversarialReviewPrompt handles multi-byte UTF-8 input within the char cap", () => { + // Each emoji is 4 bytes in UTF-8 but counts as 2 chars in [...str].length (surrogate pair) + // 250000 emojis = 1MB UTF-8 bytes but 500000 [...str] chars + const emojiCount = 250000; + const ctx = makeContext(0); + ctx.content = "\u{1F4A9}".repeat(emojiCount); + const prompt = buildAdversarialReviewPrompt(ctx, "focus"); + const promptBytes = Buffer.byteLength(prompt, "utf8"); + const promptChars = [...prompt].length; + assert.ok( + promptBytes <= MAX_ADVERSARIAL_PROMPT_BYTES, + `multi-byte prompt should respect byte cap (got ${promptBytes})` + ); + assert.ok( + promptChars <= MAX_ADVERSARIAL_PROMPT_CHARS, + `multi-byte prompt should respect char cap (got ${promptChars})` + ); +}); + +test("buildAdversarialReviewPrompt preserves USER_FOCUS even when truncating", () => { + const ctx = makeContext(900 * 1024); + const focusText = "Focus on auth boundary"; + const prompt = buildAdversarialReviewPrompt(ctx, focusText); + assert.ok( + prompt.includes(focusText), + "USER_FOCUS should never be dropped because it is small and high-signal" + ); +}); + +test("MAX_ADVERSARIAL_PROMPT_BYTES leaves safety margin below 1MB API input cap", () => { + // Codex API thread input cap is 1048576 chars. We need a safety margin for system prompt overhead. + assert.ok(MAX_ADVERSARIAL_PROMPT_BYTES <= 900 * 1024, "byte cap should leave >= ~100KB safety margin"); + assert.ok(MAX_ADVERSARIAL_PROMPT_CHARS <= 1048576, "char cap must be <= Codex API thread input cap"); +});