From dc4752bbe340a2de368b55eaf64320de167ef6b3 Mon Sep 17 00:00:00 2001
From: cardene777 <cardene777@gmail.com>
Date: Mon, 11 May 2026 20:58:56 +0900
Subject: [PATCH] fix: cap adversarial-review prompt at 800KB with UTF-8-safe
 fallback chain
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Heavy-tier adversarial reviews crash with `Input exceeds the maximum length
of 1048576 characters.` when `buildAdversarialReviewPrompt` interpolates
a near-256KB diff alongside the standard template (closes #11 follow-up).

This change adds a `MAX_PROMPT_BYTES = 800 * 1024` byte cap to
`buildAdversarialReviewPrompt` (now exported) and a three-step fallback
chain:

1. Initial render — return verbatim if within cap.
2. Lightweight fallback — switch guidance to "Inspect the target diff
   yourself" and replace `REVIEW_INPUT` with summary + changedFiles.
3. Hard truncation — UTF-8-safe byte truncation with explicit
   `[content truncated to fit prompt size limit]` marker.

The truncation routine walks back from UTF-8 continuation bytes (0x80-0xBF)
so multi-byte sequences are never split mid-character. `Buffer.byteLength`
is used throughout to measure UTF-8 bytes, not UTF-16 code units.

`isDirectExecution()` wraps `main()` so importing the module from tests
no longer runs the CLI as a side effect. Comparison uses
`fs.realpathSync.native()` on both `process.argv[1]` and
`fileURLToPath(import.meta.url)` so symlinked install paths (plugin cache,
macOS `/var` vs `/private/var`) still match. Lexical comparison remains as
a fallback when realpath throws.

Adds `tests/codex-companion.test.mjs` covering small / at-limit / 1MB
lightweight fallback / 5MB truncation / 750KB multibyte input.

Refs: https://github.com/cardene777/claude-config/issues/1467
---
 plugins/codex/scripts/codex-companion.mjs | 121 +++++++++++++++++++--
 tests/codex-companion.test.mjs            | 126 ++++++++++++++++++++++
 2 files changed, 239 insertions(+), 8 deletions(-)
 create mode 100644 tests/codex-companion.test.mjs

diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs
index 35222fd5..1451e928 100644
--- a/plugins/codex/scripts/codex-companion.mjs
+++ b/plugins/codex/scripts/codex-companion.mjs
@@ -69,6 +69,10 @@ const DEFAULT_STATUS_POLL_INTERVAL_MS = 2000;
 const VALID_REASONING_EFFORTS = new Set(["none", "minimal", "low", "medium", "high", "xhigh"]);
 const MODEL_ALIASES = new Map([["spark", "gpt-5.3-codex-spark"]]);
 const STOP_REVIEW_TASK_MARKER = "Run a stop-gate review of the previous Claude turn.";
+const MAX_PROMPT_BYTES = 800 * 1024;
+const TRUNCATION_MARKER = "\n\n[content truncated to fit prompt size limit]\n";
+const LIGHTWEIGHT_GUIDANCE =
+  "The repository context below is a lightweight summary. Inspect the target diff yourself with read-only git commands before finalizing findings.";
 
 function printUsage() {
   console.log(
@@ -235,17 +239,116 @@ async function handleSetup(argv) {
   outputResult(options.json ? finalReport : renderSetupReport(finalReport), options.json);
 }
 
-function buildAdversarialReviewPrompt(context, focusText) {
+export function buildAdversarialReviewPrompt(context, focusText) {
   const template = loadPromptTemplate(ROOT_DIR, "adversarial-review");
+  const initial = renderAdversarialReviewPrompt(
+    template,
+    context,
+    focusText,
+    context.collectionGuidance,
+    context.content
+  );
+  if (Buffer.byteLength(initial, "utf8") <= MAX_PROMPT_BYTES) {
+    return initial;
+  }
+
+  const lightweightContent = buildLightweightAdversarialReviewContent(context);
+  const lightweight = renderAdversarialReviewPrompt(
+    template,
+    context,
+    focusText,
+    LIGHTWEIGHT_GUIDANCE,
+    lightweightContent
+  );
+  if (Buffer.byteLength(lightweight, "utf8") <= MAX_PROMPT_BYTES && lightweightContent !== context.content) {
+    return lightweight;
+  }
+
+  const overhead = Buffer.byteLength(
+    renderAdversarialReviewPrompt(template, context, focusText, LIGHTWEIGHT_GUIDANCE, ""),
+    "utf8"
+  );
+  const budget = MAX_PROMPT_BYTES - overhead - Buffer.byteLength(TRUNCATION_MARKER, "utf8");
+
+  if (budget < 0) {
+    return hardTruncateWithMarker(
+      renderAdversarialReviewPrompt(template, context, "", LIGHTWEIGHT_GUIDANCE, ""),
+      MAX_PROMPT_BYTES
+    );
+  }
+
+  return renderAdversarialReviewPrompt(
+    template,
+    context,
+    focusText,
+    LIGHTWEIGHT_GUIDANCE,
+    `${truncateToByteBudget(lightweightContent, budget)}${TRUNCATION_MARKER}`
+  );
+}
+
+function renderAdversarialReviewPrompt(template, context, focusText, guidance, content) {
   return interpolateTemplate(template, {
     REVIEW_KIND: "Adversarial Review",
     TARGET_LABEL: context.target.label,
     USER_FOCUS: focusText || "No extra focus provided.",
-    REVIEW_COLLECTION_GUIDANCE: context.collectionGuidance,
-    REVIEW_INPUT: context.content
+    REVIEW_COLLECTION_GUIDANCE: guidance,
+    REVIEW_INPUT: content
   });
 }
 
+function buildLightweightAdversarialReviewContent(context) {
+  const parts = [];
+
+  if (context.summary) {
+    parts.push(`Summary: ${context.summary}`);
+  }
+  if (Array.isArray(context.changedFiles) && context.changedFiles.length > 0) {
+    parts.push(`Changed files (${context.changedFiles.length}):\n${context.changedFiles.slice(0, 50).join("\n")}`);
+  } else if (typeof context.fileCount === "number") {
+    parts.push(`Changed file count: ${context.fileCount}`);
+  }
+
+  return parts.length > 0 ? parts.join("\n\n") : context.content;
+}
+
+function truncateToByteBudget(value, maxBytes) {
+  if (maxBytes <= 0) {
+    return "";
+  }
+  if (Buffer.byteLength(value, "utf8") <= maxBytes) {
+    return value;
+  }
+
+  const buffer = Buffer.from(value, "utf8");
+  let end = Math.min(maxBytes, buffer.length);
+  // Avoid ending mid-sequence so the truncated prompt stays valid UTF-8.
+  while (end > 0 && (buffer[end] & 0xc0) === 0x80) {
+    end -= 1;
+  }
+  return buffer.subarray(0, end).toString("utf8");
+}
+
+function hardTruncateWithMarker(value, maxBytes) {
+  const head = truncateToByteBudget(
+    value,
+    Math.max(0, maxBytes - Buffer.byteLength(TRUNCATION_MARKER, "utf8"))
+  );
+  return `${head}${TRUNCATION_MARKER}`;
+}
+
+function isDirectExecution() {
+  const entry = process.argv[1];
+  if (!entry) return false;
+  const moduleFile = fileURLToPath(import.meta.url);
+  // Compare canonical (realpath) forms so symlinked install paths
+  // (plugin cache, macOS /var vs /private/var) still match the script.
+  try {
+    return fs.realpathSync.native(entry) === fs.realpathSync.native(moduleFile);
+  } catch {
+    return path.resolve(entry) === moduleFile;
+  }
+}
+
 function ensureCodexAvailable(cwd) {
   const availability = getCodexAvailability(cwd);
   if (!availability.available) {
@@ -1020,8 +1123,10 @@ async function main() {
   }
 }
 
-main().catch((error) => {
-  const message = error instanceof Error ? error.message : String(error);
-  process.stderr.write(`${message}\n`);
-  process.exitCode = 1;
-});
+if (isDirectExecution()) {
+  main().catch((error) => {
+    const message = error instanceof Error ? error.message : String(error);
+    process.stderr.write(`${message}\n`);
+    process.exitCode = 1;
+  });
+}
diff --git a/tests/codex-companion.test.mjs b/tests/codex-companion.test.mjs
new file mode 100644
index 00000000..0f56e076
--- /dev/null
+++ b/tests/codex-companion.test.mjs
@@ -0,0 +1,126 @@
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import fs from 'node:fs';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+import { buildAdversarialReviewPrompt } from '../plugins/codex/scripts/codex-companion.mjs';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+const ROOT_DIR = path.join(__dirname, '..', 'plugins', 'codex');
+const TEMPLATE_PATH = path.join(ROOT_DIR, 'prompts', 'adversarial-review.md');
+const MAX_PROMPT_BYTES = 800 * 1024;
+const TEMPLATE = fs.readFileSync(TEMPLATE_PATH, 'utf8');
+
+function buildPromptBaseline(template, label, focusText, guidance) {
+  return template
+    .replace('{{REVIEW_KIND}}', 'Adversarial Review')
+    .replace('{{TARGET_LABEL}}', label)
+    .replace('{{USER_FOCUS}}', focusText || 'No extra focus provided.')
+    .replace('{{REVIEW_COLLECTION_GUIDANCE}}', guidance)
+    .replace('{{REVIEW_INPUT}}', '');
+}
+
+function buildContentToHitExactCap(template, label, focusText, guidance, capBytes) {
+  const basePrompt = buildPromptBaseline(template, label, focusText, guidance);
+  const overhead = Buffer.byteLength(basePrompt, 'utf8');
+  return 'x'.repeat(capBytes - overhead);
+}
+
+test('buildAdversarialReviewPrompt: small content passes through verbatim', () => {
+  // Given
+  const context = {
+    target: { label: 'branch feature/x vs main' },
+    collectionGuidance: 'Use the repository context below as primary evidence.',
+    content: 'hello world'
+  };
+  const focusText = 'test focus';
+
+  // When
+  const result = buildAdversarialReviewPrompt(context, focusText);
+
+  // Then
+  assert.ok(Buffer.byteLength(result, 'utf8') < MAX_PROMPT_BYTES);
+  assert.equal(result.includes('hello world'), true);
+  assert.equal(result.includes('branch feature/x vs main'), true);
+  assert.equal(result.includes('test focus'), true);
+});
+
+test('buildAdversarialReviewPrompt: exact cap boundary keeps full content', () => {
+  // Given
+  const label = 'branch feature/at-limit vs main';
+  const focusText = 'focus';
+  const guidance = 'Use the repository context below as primary evidence.';
+  const content = buildContentToHitExactCap(TEMPLATE, label, focusText, guidance, MAX_PROMPT_BYTES);
+  const context = {
+    target: { label },
+    collectionGuidance: guidance,
+    content
+  };
+
+  // When
+  const result = buildAdversarialReviewPrompt(context, focusText);
+
+  // Then
+  assert.equal(Buffer.byteLength(result, 'utf8'), MAX_PROMPT_BYTES);
+  assert.equal(result.includes(content.slice(0, 100)), true);
+  assert.equal(result.includes(content.slice(-100)), true);
+});
+
+test('buildAdversarialReviewPrompt: 1MB input falls back to lightweight guidance', () => {
+  // Given
+  const context = {
+    target: { label: 'branch big vs main' },
+    collectionGuidance: 'Use the repository context below as primary evidence.',
+    content: 'x'.repeat(1024 * 1024)
+  };
+  const focusText = '';
+
+  // When
+  const result = buildAdversarialReviewPrompt(context, focusText);
+
+  // Then
+  assert.ok(Buffer.byteLength(result, 'utf8') <= MAX_PROMPT_BYTES);
+  assert.equal(
+    result.includes('lightweight summary') || result.includes('Inspect the target diff yourself'),
+    true
+  );
+  assert.equal(result.includes('x'.repeat(1024 * 1024)), false);
+});
+
+test('buildAdversarialReviewPrompt: 5MB input uses truncation fallback', () => {
+  // Given
+  const context = {
+    target: { label: 'branch huge vs main' },
+    collectionGuidance: 'Use the repository context below as primary evidence.',
+    content: 'x'.repeat(5 * 1024 * 1024)
+  };
+  const focusText = '';
+
+  // When
+  const result = buildAdversarialReviewPrompt(context, focusText);
+
+  // Then
+  assert.ok(Buffer.byteLength(result, 'utf8') <= MAX_PROMPT_BYTES);
+  assert.equal(result.toLowerCase().includes('truncated'), true);
+});
+
+test('buildAdversarialReviewPrompt: utf8 byte accounting stays under cap', () => {
+  // Given
+  const content = 'あ'.repeat(250 * 1024);
+  const context = {
+    target: { label: 'branch jp vs main' },
+    collectionGuidance: 'Use the repository context below as primary evidence.',
+    content
+  };
+  const focusText = '';
+
+  // When
+  const result = buildAdversarialReviewPrompt(context, focusText);
+
+  // Then
+  assert.equal(Buffer.byteLength(content, 'utf8'), 750 * 1024);
+  assert.ok(Buffer.byteLength(result, 'utf8') <= MAX_PROMPT_BYTES);
+  assert.equal(result.includes('あ'), true);
+});