Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions plugins/codex/scripts/lib/app-server.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import { spawn } from "node:child_process";
import readline from "node:readline";
import { parseBrokerEndpoint } from "./broker-endpoint.mjs";
import { ensureBrokerSession, loadBrokerSession } from "./broker-lifecycle.mjs";
import { cleanProtocolLine } from "./jsonl.mjs";
import { terminateProcessTree } from "./process.mjs";

const PLUGIN_MANIFEST_URL = new URL("../../.claude-plugin/plugin.json", import.meta.url);
Expand Down Expand Up @@ -114,16 +115,20 @@ class AppServerClientBase {
}
}

handleLine(line) {
if (!line.trim()) {
handleLine(rawLine) {
const candidate = cleanProtocolLine(rawLine);
if (candidate === null) {
// Empty line, terminal noise (issue #23), or localized OS messages
// (e.g. CP-950 mojibaked taskkill SUCCESS on Windows zh-TW). Drop
// it instead of tearing the connection down — see lib/jsonl.mjs.
return;
}

let message;
try {
message = JSON.parse(line);
message = JSON.parse(candidate);
} catch (error) {
this.handleExit(createProtocolError(`Failed to parse codex app-server JSONL: ${error.message}`, { line }));
this.handleExit(createProtocolError(`Failed to parse codex app-server JSONL: ${error.message}`, { line: rawLine }));
return;
}

Expand Down
69 changes: 69 additions & 0 deletions plugins/codex/scripts/lib/jsonl.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/**
* Helpers for reading the Codex app-server JSONL protocol.
*
* The codex CLI's app-server occasionally emits non-JSON bytes onto stdout
* that have nothing to do with the protocol. Two known sources:
*
* 1. Terminal/shell init noise — e.g. zsh writes the bracketed-paste
* marker `\x1b[?2004h` when a subprocess inherits the parent's TTY
* (issue #23).
*
* 2. Localized OS messages on Windows non-English locales. On zh-TW
* (CP-950 / Big5), when codex.exe runs `taskkill /T /F` to clean up
* a failed MCP child and inadvertently routes taskkill's stdout to
* its own stdout, we see the bytes `A6 A8 A5 5C 3A 20 50 49 44 ...`
* ("成功: PID 為 xxxx ...") which decode under UTF-8 as
* `���\\: PID ...`.
*
* Both cases historically tore the broker connection down with
* `Failed to parse codex app-server JSONL: Unexpected token …`. The
* client never recovers because `handleExit` rejects every in-flight
* request before any subsequent valid record arrives.
*
* `cleanProtocolLine` is the conservative guard:
*
* - Strip CSI / OSC ANSI escape sequences from the raw line.
* - Trim whitespace.
* - Require the first remaining character to be `{` or `[`. JSONL
* records cannot start with anything else, so any other prefix is
* definitively garbage and is dropped.
*
* Lines that pass these checks are still parsed with `JSON.parse`; if
* that fails the caller surfaces a real protocol error as before.
*/

// CSI: ESC [ <params> <intermediates> <final>
// params = 0x30..0x3F (digits, ?, etc.)
// intermediates= 0x20..0x2F (space, !, ", $, %, &, ', (, ), *, +, ,, -, ., /)
// final = 0x40..0x7E (any byte in @, A..Z, [, \, ], ^, _, `, a..z, {, |, }, ~)
// This grammar matches the original ECMA-48 spec and covers cases like
// bracketed-paste *content* markers `\x1b[200~` / `\x1b[201~` (final
// byte `~`) which a letter-only `[a-zA-Z]` final misses.
// OSC: ESC ] <text> (BEL | ESC \) e.g. window-title sets
// eslint-disable-next-line no-control-regex
const ANSI_ESCAPE_RE = /\x1b\[[\x30-\x3F]*[\x20-\x2F]*[\x40-\x7E]|\x1b\][^\x07]*(?:\x07|\x1b\\)/g;

/**
* Returns a JSON-shaped candidate string for the given raw line, or
* `null` if the line should be skipped because it cannot be valid JSONL.
*
* Accepts arbitrary input — non-string values short-circuit to `null` so
* callers don't have to type-check before invoking.
*
* @param {unknown} rawLine
* @returns {string | null}
*/
export function cleanProtocolLine(rawLine) {
if (typeof rawLine !== "string") {
return null;
Comment on lines +46 to +58
}
const cleaned = rawLine.replace(ANSI_ESCAPE_RE, "").trim();
if (!cleaned) {
return null;
}
const firstChar = cleaned.charCodeAt(0);
if (firstChar !== 0x7B /* { */ && firstChar !== 0x5B /* [ */) {
return null;
}
return cleaned;
}
101 changes: 101 additions & 0 deletions tests/jsonl.test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import test from "node:test";
import assert from "node:assert/strict";

import { cleanProtocolLine } from "../plugins/codex/scripts/lib/jsonl.mjs";

test("cleanProtocolLine returns null for empty / whitespace lines", () => {
assert.equal(cleanProtocolLine(""), null);
assert.equal(cleanProtocolLine(" "), null);
assert.equal(cleanProtocolLine("\t\r\n"), null);
});

test("cleanProtocolLine returns null for non-string input", () => {
assert.equal(cleanProtocolLine(undefined), null);
assert.equal(cleanProtocolLine(null), null);
assert.equal(cleanProtocolLine(42), null);
});

test("cleanProtocolLine passes through plain JSON object lines unchanged", () => {
assert.equal(cleanProtocolLine('{"id":1,"result":{}}'), '{"id":1,"result":{}}');
assert.equal(cleanProtocolLine(' {"a":1} '), '{"a":1}');
});

test("cleanProtocolLine passes through plain JSON array lines unchanged", () => {
assert.equal(cleanProtocolLine('[1,2,3]'), '[1,2,3]');
});

test("cleanProtocolLine strips bracketed-paste-mode ANSI prefix (issue #23)", () => {
assert.equal(
cleanProtocolLine('\x1b[?2004h{"id":1}'),
'{"id":1}'
);
assert.equal(
cleanProtocolLine('{"id":1}\x1b[?2004l'),
'{"id":1}'
);
});

test("cleanProtocolLine strips CSI sequences whose final byte is not a letter", () => {
// Bracketed-paste content markers use `~` as the final byte. A
// letter-only CSI final pattern (`[a-zA-Z]`) misses them and the
// surrounding JSON would be incorrectly dropped by the first-char
// guard. ECMA-48's CSI grammar allows any final byte in 0x40..0x7E.
assert.equal(
cleanProtocolLine('\x1b[200~{"id":1}\x1b[201~'),
'{"id":1}'
);
// Other non-letter finals at the boundaries of the 0x40..0x7E range.
assert.equal(cleanProtocolLine('\x1b[@{"id":1}'), '{"id":1}'); // 0x40
assert.equal(cleanProtocolLine('\x1b[`{"id":1}'), '{"id":1}'); // 0x60
});

test("cleanProtocolLine strips OSC window-title sequences", () => {
assert.equal(
cleanProtocolLine('\x1b]0;some title\x07{"id":1}'),
'{"id":1}'
);
});

test("cleanProtocolLine strips SGR color codes", () => {
assert.equal(
cleanProtocolLine('\x1b[1;31m{"id":1}\x1b[0m'),
'{"id":1}'
);
});

test("cleanProtocolLine drops CP-950 mojibake of Windows taskkill SUCCESS", () => {
// Raw bytes from `taskkill /T /F` on a zh-TW (Big5 / CP-950) Windows
// system: `成功: PID 為 1234 (PID 為 5678 的子處理程序) 的處理程序已終止。`
// When read off codex.exe's stdout pipe under Node's UTF-8 decoder, the
// invalid CP-950 high bytes become U+FFFD replacement characters.
const mojibake = "���\\: PID �� 1234 (PID �� 5678 ���l�B�z�{��)";
assert.equal(cleanProtocolLine(mojibake), null);
});

test("cleanProtocolLine drops the literal U+FFFD prefix that triggered the original bug", () => {
// This is the smallest reproducer of the production failure: the parser
// saw `Unexpected token '�', "���\: PID "...` and tore the connection
// down. After this guard the line is silently skipped.
assert.equal(cleanProtocolLine("���\\: PID 1234"), null);
});

test("cleanProtocolLine drops other non-JSON garbage (shell prompts, plain text)", () => {
assert.equal(cleanProtocolLine("PS C:\\> "), null);
assert.equal(cleanProtocolLine("user@host:~$ "), null);
assert.equal(cleanProtocolLine("warning: something happened"), null);
// Note: anything whose first non-whitespace character is `[` (e.g. an
// ANSI-stripped bracket-log timestamp like `[2026-05-09T...]`) is NOT
// dropped by this guard — it looks like a JSON array. Such lines still
// reach JSON.parse and surface as a real protocol error, which is the
// desired behaviour: the guard only drops lines that cannot possibly
// be JSONL.
});

test("cleanProtocolLine forwards a malformed but JSON-shaped line for the parser to reject", () => {
// Lines that DO start with `{` or `[` are returned even if they are not
// strictly valid JSON. The caller will then surface a real protocol
// error via JSON.parse. This is the desired behaviour: the guard only
// drops lines that cannot possibly be JSONL.
assert.equal(cleanProtocolLine('{"id":1,'), '{"id":1,');
assert.equal(cleanProtocolLine('[1,2,'), '[1,2,');
});