From 487b9ec4f5393850154d3c4939204bd2dbb6f182 Mon Sep 17 00:00:00 2001 From: Willy Date: Thu, 14 May 2026 22:17:21 -0500 Subject: [PATCH] fix: replace lone surrogates in IPC payloads --- src/browser_harness/_ipc.py | 25 ++++++++++++++++++++++--- src/browser_harness/daemon.py | 7 ++++--- src/browser_harness/run.py | 8 ++++---- tests/unit/test_ipc.py | 33 +++++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 10 deletions(-) diff --git a/src/browser_harness/_ipc.py b/src/browser_harness/_ipc.py index 2d265766..21426327 100644 --- a/src/browser_harness/_ipc.py +++ b/src/browser_harness/_ipc.py @@ -76,6 +76,25 @@ def spawn_kwargs(): # subprocess.Popen flags so the daemon detaches from this t return {"start_new_session": True} +def _replace_surrogates(value): + """Return a JSON-compatible value with lone surrogates replaced in strings.""" + if isinstance(value, str): + return value.encode("utf-8", errors="replace").decode("utf-8") + if isinstance(value, dict): + return {_replace_surrogates(k): _replace_surrogates(v) for k, v in value.items()} + if isinstance(value, (list, tuple)): + return [_replace_surrogates(v) for v in value] + return value + + +def json_dumps(value, **kwargs): + return json.dumps(_replace_surrogates(value), **kwargs) + + +def json_loads(value, **kwargs): + return _replace_surrogates(json.loads(value, **kwargs)) + + def connect(name, timeout=1.0): """Blocking client. Returns (sock, token); token is None on POSIX, hex string on Windows. Callers sending JSON requests MUST include the token as req["token"] on Windows.""" @@ -93,13 +112,13 @@ def request(c, token, req): """One-shot send + recv + parse on an open socket. Injects token on Windows. Returns the parsed JSON response. Caller closes the socket.""" if token: req = {**req, "token": token} - c.sendall((json.dumps(req) + "\n").encode()) + c.sendall((json_dumps(req) + "\n").encode()) data = b"" while not data.endswith(b"\n"): chunk = c.recv(1 << 16) if not chunk: break data += chunk - return json.loads(data or b"{}") + return json_loads(data or b"{}") def ping(name, timeout=1.0): @@ -177,7 +196,7 @@ async def serve(name, handler): pf = port_path(name) # Atomic write so a concurrent reader never sees a half-written file. tmp = pf.with_name(pf.name + ".tmp") - tmp.write_text(json.dumps({"port": port, "token": _server_token})) + tmp.write_text(json_dumps({"port": port, "token": _server_token})) os.replace(tmp, pf) try: async with server: await asyncio.Event().wait() diff --git a/src/browser_harness/daemon.py b/src/browser_harness/daemon.py index 0f0f2555..5aa26f94 100644 --- a/src/browser_harness/daemon.py +++ b/src/browser_harness/daemon.py @@ -361,13 +361,14 @@ async def handler(reader, writer): try: line = await reader.readline() if not line: return - resp = await d.handle(json.loads(line)) - writer.write((json.dumps(resp, default=str) + "\n").encode()) + req = ipc.json_loads(line) + resp = await d.handle(req) + writer.write((ipc.json_dumps(resp, default=str) + "\n").encode()) await writer.drain() except Exception as e: log(f"conn: {e}") try: - writer.write((json.dumps({"error": str(e)}) + "\n").encode()) + writer.write((ipc.json_dumps({"error": str(e)}) + "\n").encode()) await writer.drain() except Exception: pass diff --git a/src/browser_harness/run.py b/src/browser_harness/run.py index 8ab1f0f1..d0f024c9 100644 --- a/src/browser_harness/run.py +++ b/src/browser_harness/run.py @@ -1,10 +1,10 @@ import os, sys, urllib.request -# Windows default stdout encoding is cp1252, which can't encode the 🐴 marker +# Windows default terminal encoding is often cp1252, which can't encode the 🐴 marker # helpers prepend to tab titles (or anything else outside Latin-1). Force UTF-8 -# so `print(page_info())` doesn't UnicodeEncodeError on Windows. Issue #124(4). -if hasattr(sys.stdout, "reconfigure"): - try: sys.stdout.reconfigure(encoding="utf-8", errors="replace") +# so `print(page_info())` doesn't UnicodeEncodeError on Windows. Issue #124(4), #359. +for _stream in (sys.stdout, sys.stderr): + try: _stream.reconfigure(encoding="utf-8", errors="replace") except Exception: pass from .admin import ( diff --git a/tests/unit/test_ipc.py b/tests/unit/test_ipc.py index 96e2dbc6..f9458348 100644 --- a/tests/unit/test_ipc.py +++ b/tests/unit/test_ipc.py @@ -7,6 +7,18 @@ class _FakeConn: def close(self): pass +class _RecordingConn: + def __init__(self, response=b"{}\n"): + self.sent = b"" + self.response = response + + def sendall(self, data): + self.sent += data + + def recv(self, _size): + return self.response + + def _patch_identify_response(monkeypatch, response): """Stub connect() and request() so identify() sees `response` as the JSON parsed from the daemon's reply, exactly as it would arrive over the wire.""" @@ -20,6 +32,27 @@ def test_identify_returns_pid_for_well_formed_ping_reply(monkeypatch): assert ipc.identify("default", timeout=0.0) == 4242 +def test_request_replaces_lone_surrogates_before_json_ipc(): + conn = _RecordingConn() + + assert ipc.request(conn, None, {"method": "Runtime.evaluate", "params": {"text": "\udc80"}}) == {} + + assert b"\\udc80" not in conn.sent + assert b'"text": "?"' in conn.sent + + +def test_json_dumps_replaces_nested_lone_surrogates(): + payload = {"outer": ["ok", {"bad": "\udc80", "\udc80key": "value"}]} + + assert ipc.json_dumps(payload) == '{"outer": ["ok", {"bad": "?", "?key": "value"}]}' + + +def test_request_replaces_lone_surrogates_in_json_ipc_response(): + conn = _RecordingConn(b'{"result": "\\udc80"}\n') + + assert ipc.request(conn, None, {"meta": "ping"}) == {"result": "?"} + + def test_identify_rejects_boolean_pid(monkeypatch): """isinstance(True, int) is True in Python; a hostile or buggy daemon that replies {"pid": True} would otherwise yield PID 1 (init on POSIX),