diff --git a/plugins/codex/scripts/lib/app-server.mjs b/plugins/codex/scripts/lib/app-server.mjs index 127c8376..4df53aee 100644 --- a/plugins/codex/scripts/lib/app-server.mjs +++ b/plugins/codex/scripts/lib/app-server.mjs @@ -14,6 +14,7 @@ import { spawn } from "node:child_process"; import readline from "node:readline"; import { parseBrokerEndpoint } from "./broker-endpoint.mjs"; import { ensureBrokerSession, loadBrokerSession } from "./broker-lifecycle.mjs"; +import { cleanProtocolLine } from "./jsonl.mjs"; import { terminateProcessTree } from "./process.mjs"; const PLUGIN_MANIFEST_URL = new URL("../../.claude-plugin/plugin.json", import.meta.url); @@ -114,16 +115,20 @@ class AppServerClientBase { } } - handleLine(line) { - if (!line.trim()) { + handleLine(rawLine) { + const candidate = cleanProtocolLine(rawLine); + if (candidate === null) { + // Empty line, terminal noise (issue #23), or localized OS messages + // (e.g. CP-950 mojibaked taskkill SUCCESS on Windows zh-TW). Drop + // it instead of tearing the connection down — see lib/jsonl.mjs. return; } let message; try { - message = JSON.parse(line); + message = JSON.parse(candidate); } catch (error) { - this.handleExit(createProtocolError(`Failed to parse codex app-server JSONL: ${error.message}`, { line })); + this.handleExit(createProtocolError(`Failed to parse codex app-server JSONL: ${error.message}`, { line: rawLine })); return; } diff --git a/plugins/codex/scripts/lib/jsonl.mjs b/plugins/codex/scripts/lib/jsonl.mjs new file mode 100644 index 00000000..1e6dccf3 --- /dev/null +++ b/plugins/codex/scripts/lib/jsonl.mjs @@ -0,0 +1,69 @@ +/** + * Helpers for reading the Codex app-server JSONL protocol. + * + * The codex CLI's app-server occasionally emits non-JSON bytes onto stdout + * that have nothing to do with the protocol. Two known sources: + * + * 1. Terminal/shell init noise — e.g. zsh writes the bracketed-paste + * marker `\x1b[?2004h` when a subprocess inherits the parent's TTY + * (issue #23). + * + * 2. Localized OS messages on Windows non-English locales. On zh-TW + * (CP-950 / Big5), when codex.exe runs `taskkill /T /F` to clean up + * a failed MCP child and inadvertently routes taskkill's stdout to + * its own stdout, we see the bytes `A6 A8 A5 5C 3A 20 50 49 44 ...` + * ("成功: PID 為 xxxx ...") which decode under UTF-8 as + * `���\\: PID ...`. + * + * Both cases historically tore the broker connection down with + * `Failed to parse codex app-server JSONL: Unexpected token …`. The + * client never recovers because `handleExit` rejects every in-flight + * request before any subsequent valid record arrives. + * + * `cleanProtocolLine` is the conservative guard: + * + * - Strip CSI / OSC ANSI escape sequences from the raw line. + * - Trim whitespace. + * - Require the first remaining character to be `{` or `[`. JSONL + * records cannot start with anything else, so any other prefix is + * definitively garbage and is dropped. + * + * Lines that pass these checks are still parsed with `JSON.parse`; if + * that fails the caller surfaces a real protocol error as before. + */ + +// CSI: ESC [ +// params = 0x30..0x3F (digits, ?, etc.) +// intermediates= 0x20..0x2F (space, !, ", $, %, &, ', (, ), *, +, ,, -, ., /) +// final = 0x40..0x7E (any byte in @, A..Z, [, \, ], ^, _, `, a..z, {, |, }, ~) +// This grammar matches the original ECMA-48 spec and covers cases like +// bracketed-paste *content* markers `\x1b[200~` / `\x1b[201~` (final +// byte `~`) which a letter-only `[a-zA-Z]` final misses. +// OSC: ESC ] (BEL | ESC \) e.g. window-title sets +// eslint-disable-next-line no-control-regex +const ANSI_ESCAPE_RE = /\x1b\[[\x30-\x3F]*[\x20-\x2F]*[\x40-\x7E]|\x1b\][^\x07]*(?:\x07|\x1b\\)/g; + +/** + * Returns a JSON-shaped candidate string for the given raw line, or + * `null` if the line should be skipped because it cannot be valid JSONL. + * + * Accepts arbitrary input — non-string values short-circuit to `null` so + * callers don't have to type-check before invoking. + * + * @param {unknown} rawLine + * @returns {string | null} + */ +export function cleanProtocolLine(rawLine) { + if (typeof rawLine !== "string") { + return null; + } + const cleaned = rawLine.replace(ANSI_ESCAPE_RE, "").trim(); + if (!cleaned) { + return null; + } + const firstChar = cleaned.charCodeAt(0); + if (firstChar !== 0x7B /* { */ && firstChar !== 0x5B /* [ */) { + return null; + } + return cleaned; +} diff --git a/tests/jsonl.test.mjs b/tests/jsonl.test.mjs new file mode 100644 index 00000000..e7333d6e --- /dev/null +++ b/tests/jsonl.test.mjs @@ -0,0 +1,101 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +import { cleanProtocolLine } from "../plugins/codex/scripts/lib/jsonl.mjs"; + +test("cleanProtocolLine returns null for empty / whitespace lines", () => { + assert.equal(cleanProtocolLine(""), null); + assert.equal(cleanProtocolLine(" "), null); + assert.equal(cleanProtocolLine("\t\r\n"), null); +}); + +test("cleanProtocolLine returns null for non-string input", () => { + assert.equal(cleanProtocolLine(undefined), null); + assert.equal(cleanProtocolLine(null), null); + assert.equal(cleanProtocolLine(42), null); +}); + +test("cleanProtocolLine passes through plain JSON object lines unchanged", () => { + assert.equal(cleanProtocolLine('{"id":1,"result":{}}'), '{"id":1,"result":{}}'); + assert.equal(cleanProtocolLine(' {"a":1} '), '{"a":1}'); +}); + +test("cleanProtocolLine passes through plain JSON array lines unchanged", () => { + assert.equal(cleanProtocolLine('[1,2,3]'), '[1,2,3]'); +}); + +test("cleanProtocolLine strips bracketed-paste-mode ANSI prefix (issue #23)", () => { + assert.equal( + cleanProtocolLine('\x1b[?2004h{"id":1}'), + '{"id":1}' + ); + assert.equal( + cleanProtocolLine('{"id":1}\x1b[?2004l'), + '{"id":1}' + ); +}); + +test("cleanProtocolLine strips CSI sequences whose final byte is not a letter", () => { + // Bracketed-paste content markers use `~` as the final byte. A + // letter-only CSI final pattern (`[a-zA-Z]`) misses them and the + // surrounding JSON would be incorrectly dropped by the first-char + // guard. ECMA-48's CSI grammar allows any final byte in 0x40..0x7E. + assert.equal( + cleanProtocolLine('\x1b[200~{"id":1}\x1b[201~'), + '{"id":1}' + ); + // Other non-letter finals at the boundaries of the 0x40..0x7E range. + assert.equal(cleanProtocolLine('\x1b[@{"id":1}'), '{"id":1}'); // 0x40 + assert.equal(cleanProtocolLine('\x1b[`{"id":1}'), '{"id":1}'); // 0x60 +}); + +test("cleanProtocolLine strips OSC window-title sequences", () => { + assert.equal( + cleanProtocolLine('\x1b]0;some title\x07{"id":1}'), + '{"id":1}' + ); +}); + +test("cleanProtocolLine strips SGR color codes", () => { + assert.equal( + cleanProtocolLine('\x1b[1;31m{"id":1}\x1b[0m'), + '{"id":1}' + ); +}); + +test("cleanProtocolLine drops CP-950 mojibake of Windows taskkill SUCCESS", () => { + // Raw bytes from `taskkill /T /F` on a zh-TW (Big5 / CP-950) Windows + // system: `成功: PID 為 1234 (PID 為 5678 的子處理程序) 的處理程序已終止。` + // When read off codex.exe's stdout pipe under Node's UTF-8 decoder, the + // invalid CP-950 high bytes become U+FFFD replacement characters. + const mojibake = "���\\: PID �� 1234 (PID �� 5678 ���l�B�z�{��)"; + assert.equal(cleanProtocolLine(mojibake), null); +}); + +test("cleanProtocolLine drops the literal U+FFFD prefix that triggered the original bug", () => { + // This is the smallest reproducer of the production failure: the parser + // saw `Unexpected token '�', "���\: PID "...` and tore the connection + // down. After this guard the line is silently skipped. + assert.equal(cleanProtocolLine("���\\: PID 1234"), null); +}); + +test("cleanProtocolLine drops other non-JSON garbage (shell prompts, plain text)", () => { + assert.equal(cleanProtocolLine("PS C:\\> "), null); + assert.equal(cleanProtocolLine("user@host:~$ "), null); + assert.equal(cleanProtocolLine("warning: something happened"), null); + // Note: anything whose first non-whitespace character is `[` (e.g. an + // ANSI-stripped bracket-log timestamp like `[2026-05-09T...]`) is NOT + // dropped by this guard — it looks like a JSON array. Such lines still + // reach JSON.parse and surface as a real protocol error, which is the + // desired behaviour: the guard only drops lines that cannot possibly + // be JSONL. +}); + +test("cleanProtocolLine forwards a malformed but JSON-shaped line for the parser to reject", () => { + // Lines that DO start with `{` or `[` are returned even if they are not + // strictly valid JSON. The caller will then surface a real protocol + // error via JSON.parse. This is the desired behaviour: the guard only + // drops lines that cannot possibly be JSONL. + assert.equal(cleanProtocolLine('{"id":1,'), '{"id":1,'); + assert.equal(cleanProtocolLine('[1,2,'), '[1,2,'); +});