From f440e6e27d652d78c02ff60d0fc75d0ea68594d2 Mon Sep 17 00:00:00 2001 From: Matt Windbrook Date: Fri, 8 May 2026 15:11:16 -0700 Subject: [PATCH 1/3] Add browser-WS bridge primitives (browser-session, cdp-router, page-session, browser-bridge) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four new modules under skills/browsing/lib/ implement the flatten-mode CDP substrate that page-action commands will ride in the next commit: - browser-session.js: one CDP WebSocket per Chrome process, lazy-connected on first send()/onEvent() call. Discovers /devtools/browser/ via /json/version. Owns root-session pendingRequests; page-session-tagged responses fall through to event listeners. Exposes a `sendRaw(json)` escape hatch for page sessions to send sessionId-enveloped messages without colliding with the root id-counter. - cdp-router.js: dispatches inbound browser-WS messages by sessionId. Per-session command responses go to that session's pendingRequests; per-session events go to that session's listeners. Sessionless events go to root listeners. Sessionless command responses fall through to browser-session.js — we do NOT add a parallel rootPending map here (single source of truth for root-session correlation). - page-session.js: per-page CDP session over the browser-WS, attached via Target.attachToTarget({flatten:true}). Independent id-counter per session. send() builds a sessionId-enveloped JSON payload and pushes it through browser.sendRaw; the cdp-router correlates the response. enableDomain is idempotent so multiple callers (navigation auto-capture, console-logging) can coexist on the same session. No retry, reconnect, or fallback inside send — the deliberate one-shot contract that retires the pre-flatten per-page WS pool's silent single-use fallback. - browser-bridge.js: subscribes Target.setDiscoverTargets, tracks the live target set, and exposes targets.list/onCreated/onDestroyed/waitForNew plus createBrowserContext/disposeBrowserContext for atomic per-test isolation. Tests at test/lib/{browser-session,cdp-router,page-session,browser-bridge}.test.mjs cover the dispatch logic via mock browser-sessions. The bridge primitives are not yet wired into createSession() — that comes in the orchestrator commit, after the action libs are migrated to consume page sessions. Co-Authored-By: Mahit (Bob 1b32d32d/Opus 4.7) --- skills/browsing/lib/browser-bridge.js | 163 ++++++++++++++++++++++++ skills/browsing/lib/browser-session.js | 164 +++++++++++++++++++++++++ skills/browsing/lib/cdp-router.js | 86 +++++++++++++ skills/browsing/lib/page-session.js | 94 ++++++++++++++ test/lib/browser-bridge.test.mjs | 151 +++++++++++++++++++++++ test/lib/browser-session.test.mjs | 67 ++++++++++ test/lib/cdp-router.test.mjs | 147 ++++++++++++++++++++++ test/lib/page-session.test.mjs | 154 +++++++++++++++++++++++ 8 files changed, 1026 insertions(+) create mode 100644 skills/browsing/lib/browser-bridge.js create mode 100644 skills/browsing/lib/browser-session.js create mode 100644 skills/browsing/lib/cdp-router.js create mode 100644 skills/browsing/lib/page-session.js create mode 100644 test/lib/browser-bridge.test.mjs create mode 100644 test/lib/browser-session.test.mjs create mode 100644 test/lib/cdp-router.test.mjs create mode 100644 test/lib/page-session.test.mjs diff --git a/skills/browsing/lib/browser-bridge.js b/skills/browsing/lib/browser-bridge.js new file mode 100644 index 0000000..3dce9b7 --- /dev/null +++ b/skills/browsing/lib/browser-bridge.js @@ -0,0 +1,163 @@ +// Target.* events + BrowserContext create/dispose, plus the +// cdp-router and page-session attach point — all the browser-WS bridge's +// consumer-facing surface in one module. + +const { createCdpRouter } = require('./cdp-router'); +const { attachPageSession } = require('./page-session'); + +/** + * attachBrowserBridge({browser, host, port, rewriteWsUrl}) — attaches + * Target.setDiscoverTargets to the browser session, tracks the live target + * set, and exposes: + * - targets.list() — synchronous snapshot + * - targets.onCreated(handler) — register listener; returns unsub fn + * - targets.onDestroyed(handler) + * - targets.waitForNew(predicate, {timeoutMs}) + * - createBrowserContext({proxyServer?}) + * - attachPageSession(targetId) — page session over the browser-WS + * + * host/port/rewriteWsUrl are needed by createBrowserContext.createPage to + * construct per-page WS URLs for callers that still want one (the bridge + * itself never uses them — page sessions ride the browser-WS). + */ +async function attachBrowserBridge({ browser, host, port, rewriteWsUrl }) { + const ctxHost = host; + const ctxPort = port; + const ctxRewriteWsUrl = rewriteWsUrl; + + // The cdp-router sits between browser-session and bridge consumers. + // Page-session-tagged messages dispatch to the right session; root-session + // events (Target.*, etc.) fire root listeners. Command responses without + // sessionId stay correlated by browser-session.js's pendingRequests + // (single source of truth for root-session correlation). + const router = createCdpRouter({ browser }); + + const targetMap = new Map(); // targetId -> targetInfo + const onCreatedFns = new Set(); + const onDestroyedFns = new Set(); + + router.getRootListeners().add((msg) => { + if (msg.method === 'Target.targetCreated') { + const t = msg.params.targetInfo; + targetMap.set(t.targetId, t); + for (const fn of onCreatedFns) { + try { fn(t); } catch (e) { console.error('targets onCreated handler threw:', e); } + } + } else if (msg.method === 'Target.targetInfoChanged') { + const t = msg.params.targetInfo; + targetMap.set(t.targetId, t); + } else if (msg.method === 'Target.targetDestroyed') { + const t = targetMap.get(msg.params.targetId); + targetMap.delete(msg.params.targetId); + if (t) { + for (const fn of onDestroyedFns) { + try { fn(t); } catch (e) { console.error('targets onDestroyed handler threw:', e); } + } + } + } + }); + + // Subscribe — replays existing targets as targetCreated events. + await browser.send('Target.setDiscoverTargets', { discover: true }); + + function list() { return Array.from(targetMap.values()); } + function onCreated(fn) { onCreatedFns.add(fn); return () => onCreatedFns.delete(fn); } + function onDestroyed(fn) { onDestroyedFns.add(fn); return () => onDestroyedFns.delete(fn); } + + function waitForNew(predicate, { timeoutMs = 15000 } = {}) { + return new Promise((resolve, reject) => { + let unsub = null; + const timeout = setTimeout(() => { + if (unsub) unsub(); + reject(new Error(`waitForNew: timed out after ${timeoutMs}ms`)); + }, timeoutMs); + unsub = onCreated((t) => { + let match; + try { match = predicate(t); } + catch (e) { + clearTimeout(timeout); + if (unsub) unsub(); + reject(e); + return; + } + if (match) { + clearTimeout(timeout); + if (unsub) unsub(); + resolve(t); + } + }); + }); + } + + /** + * createBrowserContext({proxyServer?}) — creates a Chrome BrowserContext. + * Returns {browserContextId, createPage, dispose}. + * + * createPage(url) calls Target.createTarget({url, browserContextId}) and + * constructs a tab-shape-compatible page handle whose webSocketDebuggerUrl + * is run through rewriteWsUrl. + * + * dispose() is atomic — Chrome tears down cookies/storage/IDB/SW for the + * context in one call. + */ + async function createBrowserContext(opts = {}) { + const params = {}; + if (opts.proxyServer) params.proxyServer = opts.proxyServer; + const { browserContextId } = await browser.send('Target.createBrowserContext', params); + + let disposed = false; + + async function createPage(url = 'about:blank') { + if (disposed) throw new Error('BrowserContext disposed'); + const { targetId } = await browser.send('Target.createTarget', { + url, + browserContextId, + }); + // Construct the per-page WS URL — same shape Chrome's /json/list returns. + const rawWsUrl = `ws://${ctxHost}:${ctxPort}/devtools/page/${targetId}`; + const webSocketDebuggerUrl = ctxRewriteWsUrl(rawWsUrl, ctxHost, ctxPort); + return { + id: targetId, + targetId, + webSocketDebuggerUrl, + type: 'page', + url, + browserContextId, + }; + } + + async function dispose() { + if (disposed) return; + disposed = true; + try { + await browser.send('Target.disposeBrowserContext', { browserContextId }); + } catch (e) { + // best-effort: log but don't throw — dispose is meant to be safe. + console.warn('BrowserContext.dispose() failed:', e && e.message); + } + } + + return { browserContextId, createPage, dispose }; + } + + /** + * Attach a CDP page session to an existing target. Returns a pageSession + * with `{sessionId, targetId, send, onEvent, waitForEvent, enableDomain, detach}`. + * + * The page session rides the browser-WS via `Target.attachToTarget({flatten:true})`. + * No new WebSocket per page; no per-page WS death race. + */ + async function attachPage(targetId) { + return attachPageSession({ browser, router }, targetId); + } + + return { + targets: { list, onCreated, onDestroyed, waitForNew }, + createBrowserContext, + attachPageSession: attachPage, + // Exposed for tests + advanced callers; not part of the public session API. + router, + }; +} + +module.exports = { attachBrowserBridge }; diff --git a/skills/browsing/lib/browser-session.js b/skills/browsing/lib/browser-session.js new file mode 100644 index 0000000..b39cd84 --- /dev/null +++ b/skills/browsing/lib/browser-session.js @@ -0,0 +1,164 @@ +// One CDP WebSocket per Chrome process, talking to /devtools/browser/. +// +// Independent of the per-page actions in lib/cdp-connection.js (which is on +// its way out — see chrome-ws-lib.js's bridge wiring): the browser-WS is the +// transport for Target.* events, BrowserContext create/dispose, and every +// page session attached via Target.attachToTarget({flatten:true}). +// +// Lazy: connect happens on first send() / onEvent() call. Both local and +// remote-Chrome callers exercise the same code path — for the remote case +// startChrome() is skipped entirely, and lazy-connect on first bridge use is +// what serves both. + +const { WebSocketClient } = require('./websocket-client'); + +/** + * createBrowserSession({host, port, rewriteWsUrl, chromeHttp}) -> bridge handle. + * + * Discovers the WS URL via chromeHttp('/json/version').webSocketDebuggerUrl + * and pipes it through rewriteWsUrl (same pattern getTabs/newTab use for + * per-page URLs). + * + * Returned API: + * send(method, params?, {timeoutMs?}) -> Promise // root-session command + * onEvent(handler) -> unsub fn + * close() -> Promise + * isConnected() -> boolean + * sendRaw(json) -> void // pre-built envelope + * + * `sendRaw` is the page-session escape hatch: page sessions need to send + * messages with a `sessionId` envelope, but `send()` builds its own + * envelope (and would clash on the id-counter). The page session pre-builds + * the JSON, manages its own id-counter via the cdp-router, and pushes it + * onto the WS through `sendRaw`. Caller is responsible for matching + * responses (the cdp-router does this). + */ +function createBrowserSession({ host, port, rewriteWsUrl, chromeHttp }) { + let ws = null; + const pendingRequests = new Map(); // id -> {resolve, reject, timeout} + let messageIdCounter = 1; + const eventListeners = new Set(); // (msg) => void + let connectPromise = null; // memoized in-flight connect + let closed = false; + + async function ensureConnected() { + if (ws && ws.isConnected()) return; + if (connectPromise) { await connectPromise; return; } + connectPromise = (async () => { + const versionInfo = await chromeHttp('/json/version'); + if (!versionInfo || !versionInfo.webSocketDebuggerUrl) { + throw new Error('chromeHttp(/json/version) returned no webSocketDebuggerUrl'); + } + const url = rewriteWsUrl(versionInfo.webSocketDebuggerUrl, host, port); + const next = new WebSocketClient(url); + next.on('message', (raw) => { + let data; + try { data = JSON.parse(raw); } catch (e) { + console.error('browser-session: bad JSON from CDP:', e); + return; + } + // Id correlation here is for ROOT-session command responses + // only. Page-session responses arrive with {id, result, sessionId} + // — they go to event listeners (the cdp-router dispatches by + // sessionId). Without this guard, a root id=1 would incorrectly + // resolve when a page-session id=1 response arrives, since each + // session has its own id-counter. + if (data.id !== undefined && data.sessionId === undefined) { + const pending = pendingRequests.get(data.id); + if (pending) { + clearTimeout(pending.timeout); + pendingRequests.delete(data.id); + if (data.error) { + pending.reject(new Error(data.error.message || JSON.stringify(data.error))); + } else { + pending.resolve(data.result); + } + return; // handled — don't deliver to event listeners + } + } + // Everything else (events with method, or page-session command + // responses with sessionId) goes to event listeners. The router + // dispatches by sessionId from there. + for (const fn of eventListeners) { + try { fn(data); } catch (e) { console.error('browser-session listener threw:', e); } + } + }); + next.on('close', () => { + for (const [, p] of pendingRequests) { + clearTimeout(p.timeout); + p.reject(new Error('Browser session WS closed')); + } + pendingRequests.clear(); + }); + await next.connect(); + // Only assign `ws` after a successful connect so concurrent callers + // that fall through the `ws && ws.isConnected()` early-return don't + // observe a partially-initialized socket. Don't null connectPromise — + // leaving the resolved promise in place makes subsequent awaits a no-op. + ws = next; + })(); + await connectPromise; + } + + async function send(method, params = {}, { timeoutMs = 10000 } = {}) { + if (closed) throw new Error('Browser session closed'); + await ensureConnected(); + // Re-check after the await — close() may have run during ensureConnected(). + if (closed) throw new Error('Browser session closed'); + const id = messageIdCounter++; + return new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + pendingRequests.delete(id); + reject(new Error(`Browser session timeout: ${method}`)); + }, timeoutMs); + pendingRequests.set(id, { resolve, reject, timeout }); + try { + ws.send(JSON.stringify({ id, method, params })); + } catch (e) { + clearTimeout(timeout); + pendingRequests.delete(id); + reject(e); + } + }); + } + + function onEvent(handler) { + eventListeners.add(handler); + return () => eventListeners.delete(handler); + } + + async function close() { + closed = true; + if (ws) { ws.close(); ws = null; } + for (const [, p] of pendingRequests) { + clearTimeout(p.timeout); + p.reject(new Error('Browser session closed')); + } + pendingRequests.clear(); + eventListeners.clear(); + } + + function isConnected() { return ws !== null && ws.isConnected(); } + + /** + * Send a pre-formed JSON payload. Used by page-session.js to send messages + * with a sessionId envelope without browser-session needing to know about + * sessionIds. Caller is responsible for response correlation (the + * cdp-router does this for page sessions). + * + * Caller must ensure the browser-WS is open. Page sessions reach this + * path only after `attachPageSession` has already issued + * `Target.attachToTarget` via the regular `send`, which lazy-opens the WS. + */ + function sendRaw(json) { + if (closed) throw new Error('Browser session closed'); + if (!ws || !ws.isConnected()) { + throw new Error('Browser WS not connected (call send() first to lazy-open)'); + } + ws.send(json); + } + + return { send, onEvent, close, isConnected, sendRaw }; +} + +module.exports = { createBrowserSession }; diff --git a/skills/browsing/lib/cdp-router.js b/skills/browsing/lib/cdp-router.js new file mode 100644 index 0000000..418dd49 --- /dev/null +++ b/skills/browsing/lib/cdp-router.js @@ -0,0 +1,86 @@ +// sessionId-aware dispatcher for browser-WS messages. +// +// Routes incoming browser-WS messages by sessionId: +// - msg.sessionId set → page session's pendingRequests / event listeners +// - msg.method, no sessionId → root listeners (target events, etc.) +// - msg.id, no sessionId → falls through. browser-session.js's existing +// pendingRequests Map correlates root command +// responses; the router does NOT also try to. +// (Avoids duplicate correlation paths.) +// +// Per-session message id counters are independent. {id:1, sessionId:"A"} +// and {id:1, sessionId:"B"} correlate independently on one WS — collapsing +// id space across sessions would silently break correlation. + +function createCdpRouter({ browser }) { + // sessionId -> { pendingRequests: Map, + // eventListeners: Set<(msg) => void> } + const sessions = new Map(); + + // Root browser-session event listeners (events only — command responses are + // correlated in browser-session.js's pendingRequests, not here). + const rootListeners = new Set(); + + browser.onEvent((msg) => { + const sid = msg.sessionId; + if (sid) { + const sess = sessions.get(sid); + if (!sess) return; // detached or never registered + if (msg.id !== undefined) { + const pending = sess.pendingRequests.get(msg.id); + if (pending) { + clearTimeout(pending.timeout); + sess.pendingRequests.delete(msg.id); + if (msg.error) { + pending.reject(new Error(msg.error.message || JSON.stringify(msg.error))); + } else { + pending.resolve(msg.result); + } + } + } else if (msg.method) { + for (const fn of sess.eventListeners) { + try { fn(msg); } catch (e) { console.error('cdp-router page listener threw:', e); } + } + } + } else if (msg.method) { + // Root session events (e.g. Target.targetCreated). Command responses + // (msg.id without sessionId) intentionally fall through — + // browser-session.js handles them. + for (const fn of rootListeners) { + try { fn(msg); } catch (e) { console.error('cdp-router root listener threw:', e); } + } + } + }); + + function registerSession(sessionId) { + const sess = { + pendingRequests: new Map(), + eventListeners: new Set(), + }; + sessions.set(sessionId, sess); + return sess; + } + + function unregisterSession(sessionId) { + const sess = sessions.get(sessionId); + if (!sess) return; + // Reject any in-flight requests so awaiting callers don't hang. + for (const [, p] of sess.pendingRequests) { + clearTimeout(p.timeout); + p.reject(new Error('Page session detached')); + } + sess.pendingRequests.clear(); + sess.eventListeners.clear(); + sessions.delete(sessionId); + } + + function getRootListeners() { return rootListeners; } + + return { + registerSession, + unregisterSession, + getRootListeners, + }; +} + +module.exports = { createCdpRouter }; diff --git a/skills/browsing/lib/page-session.js b/skills/browsing/lib/page-session.js new file mode 100644 index 0000000..2441e9d --- /dev/null +++ b/skills/browsing/lib/page-session.js @@ -0,0 +1,94 @@ +// Per-page CDP session over the browser-WS, attached via +// Target.attachToTarget({flatten:true}). +// +// Each pageSession wraps: +// - a sessionId (from Target.attachToTarget) +// - a targetId (the underlying CDP target) +// - a per-session message id counter (independent of other sessions) +// - pendingRequests + eventListeners (held in the cdp-router) +// +// pageSession.send is the only way page-action commands reach Chrome +// through this transport. There is no fallback. If the browser-WS dies, +// the call rejects and the caller decides what to do — the deliberate +// contract that retires the per-page WS pool's silent single-use fallback. + +async function attachPageSession({ browser, router }, targetId) { + const { sessionId } = await browser.send('Target.attachToTarget', { + targetId, + flatten: true, + }); + + const sess = router.registerSession(sessionId); + let messageIdCounter = 1; + let detached = false; + const enabledDomains = new Set(); // domains we've already sent X.enable for + + async function send(method, params = {}, { timeoutMs = 30000 } = {}) { + if (detached) throw new Error(`Page session detached (sessionId=${sessionId})`); + const id = messageIdCounter++; + return new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + sess.pendingRequests.delete(id); + reject(new Error(`Page session timeout: ${method}`)); + }, timeoutMs); + sess.pendingRequests.set(id, { resolve, reject, timeout }); + // Send via browser-session, with the sessionId envelope. + // browser.send doesn't natively envelope by sessionId, so we use the + // sendRaw escape hatch with a pre-built JSON payload. + try { + browser.sendRaw(JSON.stringify({ id, method, params, sessionId })); + } catch (e) { + clearTimeout(timeout); + sess.pendingRequests.delete(id); + reject(e); + } + }); + } + + function onEvent(handler) { + sess.eventListeners.add(handler); + return () => sess.eventListeners.delete(handler); + } + + function waitForEvent(method, { timeoutMs = 15000 } = {}) { + return new Promise((resolve, reject) => { + let unsub = null; + const timeout = setTimeout(() => { + if (unsub) unsub(); + reject(new Error(`waitForEvent ${method}: timed out after ${timeoutMs}ms`)); + }, timeoutMs); + unsub = onEvent((msg) => { + if (msg.method === method) { + clearTimeout(timeout); + unsub(); + resolve(msg); + } + }); + }); + } + + /** + * Enable a CDP domain for this page session, idempotently. Multiple + * callers (navigation auto-capture + console-logging stream, e.g.) can + * call enableDomain('Runtime') without coordination — it's a no-op if + * already enabled. + */ + async function enableDomain(name) { + if (enabledDomains.has(name)) return; + await send(`${name}.enable`, {}); + enabledDomains.add(name); + } + + async function detach() { + if (detached) return; + detached = true; + try { + await browser.send('Target.detachFromTarget', { sessionId }); + } catch { /* best-effort — Chrome may already have torn down the target */ } + router.unregisterSession(sessionId); + } + + return { sessionId, targetId, send, onEvent, waitForEvent, enableDomain, detach }; +} + +module.exports = { attachPageSession }; diff --git a/test/lib/browser-bridge.test.mjs b/test/lib/browser-bridge.test.mjs new file mode 100644 index 0000000..8e7a62b --- /dev/null +++ b/test/lib/browser-bridge.test.mjs @@ -0,0 +1,151 @@ +import { describe, it } from 'node:test'; +import { strict as assert } from 'node:assert'; +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const { attachBrowserBridge } = require('../../skills/browsing/lib/browser-bridge.js'); + +// Mock browser-session that lets the test drive inbound CDP messages. +function makeMockBrowser({ contextId = 'CTX1', attachSessionId = 'SID1' } = {}) { + let handler = null; + const sentCommands = []; + return { + onEvent(fn) { handler = fn; return () => { handler = null; }; }, + deliver(msg) { if (handler) handler(msg); }, + sentCommands, + async send(method, params) { + sentCommands.push({ method, params }); + if (method === 'Target.setDiscoverTargets') return {}; + if (method === 'Target.createBrowserContext') return { browserContextId: contextId }; + if (method === 'Target.disposeBrowserContext') return {}; + if (method === 'Target.createTarget') return { targetId: 'new-target' }; + if (method === 'Target.attachToTarget') return { sessionId: attachSessionId }; + return {}; + }, + sendRaw() {}, + }; +} + +const idRewriteWsUrl = (u) => u; + +describe('browser-bridge', () => { + it('subscribes to Target.setDiscoverTargets on attach', async () => { + const browser = makeMockBrowser(); + await attachBrowserBridge({ browser, host: 'localhost', port: 9222, rewriteWsUrl: idRewriteWsUrl }); + const sub = browser.sentCommands.find((c) => c.method === 'Target.setDiscoverTargets'); + assert.ok(sub); + assert.equal(sub.params.discover, true); + }); + + it('targets.list reflects targetCreated / targetDestroyed events', async () => { + const browser = makeMockBrowser(); + const bridge = await attachBrowserBridge({ browser, host: 'localhost', port: 9222, rewriteWsUrl: idRewriteWsUrl }); + + browser.deliver({ method: 'Target.targetCreated', params: { targetInfo: { targetId: 'A', type: 'page', url: 'about:blank' } } }); + browser.deliver({ method: 'Target.targetCreated', params: { targetInfo: { targetId: 'B', type: 'page', url: 'about:blank' } } }); + + let list = bridge.targets.list(); + assert.equal(list.length, 2); + + browser.deliver({ method: 'Target.targetDestroyed', params: { targetId: 'A' } }); + + list = bridge.targets.list(); + assert.equal(list.length, 1); + assert.equal(list[0].targetId, 'B'); + }); + + it('targets.onCreated fires for new targets and unsub stops it', async () => { + const browser = makeMockBrowser(); + const bridge = await attachBrowserBridge({ browser, host: 'localhost', port: 9222, rewriteWsUrl: idRewriteWsUrl }); + + const seen = []; + const unsub = bridge.targets.onCreated((t) => seen.push(t.targetId)); + + browser.deliver({ method: 'Target.targetCreated', params: { targetInfo: { targetId: 'A', type: 'page' } } }); + assert.deepEqual(seen, ['A']); + + unsub(); + browser.deliver({ method: 'Target.targetCreated', params: { targetInfo: { targetId: 'B', type: 'page' } } }); + assert.deepEqual(seen, ['A']); + }); + + it('targets.onDestroyed fires only after a matching create', async () => { + const browser = makeMockBrowser(); + const bridge = await attachBrowserBridge({ browser, host: 'localhost', port: 9222, rewriteWsUrl: idRewriteWsUrl }); + + const seen = []; + bridge.targets.onDestroyed((t) => seen.push(t.targetId)); + + browser.deliver({ method: 'Target.targetDestroyed', params: { targetId: 'never-seen' } }); + assert.deepEqual(seen, []); // unknown — bridge has no info to deliver + + browser.deliver({ method: 'Target.targetCreated', params: { targetInfo: { targetId: 'A', type: 'page' } } }); + browser.deliver({ method: 'Target.targetDestroyed', params: { targetId: 'A' } }); + assert.deepEqual(seen, ['A']); + }); + + it('targets.waitForNew resolves on first matching predicate', async () => { + const browser = makeMockBrowser(); + const bridge = await attachBrowserBridge({ browser, host: 'localhost', port: 9222, rewriteWsUrl: idRewriteWsUrl }); + + const p = bridge.targets.waitForNew((t) => t.openerId === 'parent', { timeoutMs: 5000 }); + + browser.deliver({ method: 'Target.targetCreated', params: { targetInfo: { targetId: 'irrelevant', type: 'page' } } }); + browser.deliver({ method: 'Target.targetCreated', params: { targetInfo: { targetId: 'popup', type: 'page', openerId: 'parent' } } }); + + const t = await p; + assert.equal(t.targetId, 'popup'); + }); + + it('targets.waitForNew rejects on timeout', async () => { + const browser = makeMockBrowser(); + const bridge = await attachBrowserBridge({ browser, host: 'localhost', port: 9222, rewriteWsUrl: idRewriteWsUrl }); + await assert.rejects(() => bridge.targets.waitForNew(() => false, { timeoutMs: 25 }), /timed out/i); + }); + + it('createBrowserContext returns {browserContextId, createPage, dispose}', async () => { + const browser = makeMockBrowser({ contextId: 'CTX-7' }); + const bridge = await attachBrowserBridge({ browser, host: 'localhost', port: 9222, rewriteWsUrl: idRewriteWsUrl }); + + const ctx = await bridge.createBrowserContext(); + assert.equal(ctx.browserContextId, 'CTX-7'); + assert.equal(typeof ctx.createPage, 'function'); + assert.equal(typeof ctx.dispose, 'function'); + }); + + it('createBrowserContext.createPage builds a tab handle with correct WS URL', async () => { + const browser = makeMockBrowser({ contextId: 'CTX-7' }); + const bridge = await attachBrowserBridge({ browser, host: 'localhost', port: 9222, rewriteWsUrl: idRewriteWsUrl }); + + const ctx = await bridge.createBrowserContext(); + const tab = await ctx.createPage('https://example.com'); + + assert.equal(tab.targetId, 'new-target'); + assert.equal(tab.id, 'new-target'); + assert.equal(tab.type, 'page'); + assert.equal(tab.browserContextId, 'CTX-7'); + assert.equal(tab.webSocketDebuggerUrl, 'ws://localhost:9222/devtools/page/new-target'); + }); + + it('createBrowserContext.dispose calls Target.disposeBrowserContext exactly once', async () => { + const browser = makeMockBrowser({ contextId: 'CTX-7' }); + const bridge = await attachBrowserBridge({ browser, host: 'localhost', port: 9222, rewriteWsUrl: idRewriteWsUrl }); + + const ctx = await bridge.createBrowserContext(); + await ctx.dispose(); + await ctx.dispose(); // idempotent + + const disposes = browser.sentCommands.filter((c) => c.method === 'Target.disposeBrowserContext'); + assert.equal(disposes.length, 1); + assert.equal(disposes[0].params.browserContextId, 'CTX-7'); + }); + + it('createPage after dispose throws', async () => { + const browser = makeMockBrowser(); + const bridge = await attachBrowserBridge({ browser, host: 'localhost', port: 9222, rewriteWsUrl: idRewriteWsUrl }); + + const ctx = await bridge.createBrowserContext(); + await ctx.dispose(); + await assert.rejects(() => ctx.createPage(), /disposed/i); + }); +}); diff --git a/test/lib/browser-session.test.mjs b/test/lib/browser-session.test.mjs new file mode 100644 index 0000000..b4339f1 --- /dev/null +++ b/test/lib/browser-session.test.mjs @@ -0,0 +1,67 @@ +import { describe, it } from 'node:test'; +import { strict as assert } from 'node:assert'; +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const { createBrowserSession } = require('../../skills/browsing/lib/browser-session.js'); + +// browser-session.js depends on a real WebSocketClient for the connect path, +// so the unit-testable surface here is small (lazy connect, send-after-close, +// onEvent registration, sendRaw refusal when not connected). The dispatch +// path is exercised by the cdp-router tests via a mock browser, and by the +// integration tests at /test/smoke.test.mjs. + +describe('browser-session', () => { + function setup({ chromeHttpResult } = {}) { + const chromeHttp = async () => chromeHttpResult; + return createBrowserSession({ + host: 'localhost', + port: 9222, + rewriteWsUrl: (u) => u, + chromeHttp, + }); + } + + it('isConnected returns false before connect', () => { + const bs = setup(); + assert.equal(bs.isConnected(), false); + }); + + it('send rejects after close', async () => { + const bs = setup(); + await bs.close(); + await assert.rejects(() => bs.send('X.y'), /closed/i); + }); + + it('sendRaw throws after close', async () => { + const bs = setup(); + await bs.close(); + assert.throws(() => bs.sendRaw('{}'), /closed/i); + }); + + it('sendRaw throws when not connected', () => { + const bs = setup(); + assert.throws(() => bs.sendRaw('{}'), /not connected/i); + }); + + it('onEvent returns an unsubscribe function', () => { + const bs = setup(); + const unsub = bs.onEvent(() => {}); + assert.equal(typeof unsub, 'function'); + unsub(); // no throw + }); + + it('exports the expected method set', () => { + const bs = setup(); + assert.equal(typeof bs.send, 'function'); + assert.equal(typeof bs.onEvent, 'function'); + assert.equal(typeof bs.close, 'function'); + assert.equal(typeof bs.isConnected, 'function'); + assert.equal(typeof bs.sendRaw, 'function'); + }); + + it('send rejects with helpful error when chromeHttp returns no webSocketDebuggerUrl', async () => { + const bs = setup({ chromeHttpResult: {} }); + await assert.rejects(() => bs.send('X.y'), /webSocketDebuggerUrl/i); + }); +}); diff --git a/test/lib/cdp-router.test.mjs b/test/lib/cdp-router.test.mjs new file mode 100644 index 0000000..881a13a --- /dev/null +++ b/test/lib/cdp-router.test.mjs @@ -0,0 +1,147 @@ +import { describe, it } from 'node:test'; +import { strict as assert } from 'node:assert'; +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const { createCdpRouter } = require('../../skills/browsing/lib/cdp-router.js'); + +// Mock browser-session: captures the onEvent handler so the test can drive +// it directly. Each test manually calls browserSession.deliver(msg) to +// simulate an inbound CDP message. +function makeMockBrowser() { + let handler = null; + return { + onEvent(fn) { handler = fn; return () => { handler = null; }; }, + deliver(msg) { if (handler) handler(msg); }, + }; +} + +describe('cdp-router', () => { + it('routes session command response to the matching pendingRequest', () => { + const browser = makeMockBrowser(); + const router = createCdpRouter({ browser }); + + const sess = router.registerSession('SID1'); + let resolved; + sess.pendingRequests.set(7, { + resolve: (v) => { resolved = v; }, + reject: () => { throw new Error('should not reject'); }, + timeout: setTimeout(() => {}, 1000), + }); + + browser.deliver({ id: 7, sessionId: 'SID1', result: { ok: true } }); + + assert.deepEqual(resolved, { ok: true }); + assert.equal(sess.pendingRequests.has(7), false); + }); + + it('routes session command error to reject', () => { + const browser = makeMockBrowser(); + const router = createCdpRouter({ browser }); + + const sess = router.registerSession('SID1'); + let rejected; + sess.pendingRequests.set(1, { + resolve: () => { throw new Error('should not resolve'); }, + reject: (e) => { rejected = e; }, + timeout: setTimeout(() => {}, 1000), + }); + + browser.deliver({ id: 1, sessionId: 'SID1', error: { message: 'boom' } }); + + assert.equal(rejected.message, 'boom'); + }); + + it('routes session events to that session\'s eventListeners', () => { + const browser = makeMockBrowser(); + const router = createCdpRouter({ browser }); + + const sess = router.registerSession('SID1'); + const seen = []; + sess.eventListeners.add((m) => seen.push(m)); + + browser.deliver({ method: 'Page.loadEventFired', params: {}, sessionId: 'SID1' }); + + assert.equal(seen.length, 1); + assert.equal(seen[0].method, 'Page.loadEventFired'); + }); + + it('does not fire other sessions\' listeners', () => { + const browser = makeMockBrowser(); + const router = createCdpRouter({ browser }); + + const a = router.registerSession('A'); + const b = router.registerSession('B'); + const seenA = []; + const seenB = []; + a.eventListeners.add((m) => seenA.push(m)); + b.eventListeners.add((m) => seenB.push(m)); + + browser.deliver({ method: 'X.y', sessionId: 'A' }); + + assert.equal(seenA.length, 1); + assert.equal(seenB.length, 0); + }); + + it('routes sessionless events to root listeners', () => { + const browser = makeMockBrowser(); + const router = createCdpRouter({ browser }); + + const seen = []; + router.getRootListeners().add((m) => seen.push(m)); + + browser.deliver({ method: 'Target.targetCreated', params: { targetInfo: { targetId: 't1' } } }); + + assert.equal(seen.length, 1); + assert.equal(seen[0].params.targetInfo.targetId, 't1'); + }); + + it('does NOT route sessionless command responses to root listeners', () => { + // Regression: root command responses are correlated by browser-session.js's + // pendingRequests, NOT the router. The router must NOT also try, or we'd + // have two correlation paths fighting. + const browser = makeMockBrowser(); + const router = createCdpRouter({ browser }); + + const seen = []; + router.getRootListeners().add((m) => seen.push(m)); + + browser.deliver({ id: 42, result: { ok: true } }); + + assert.equal(seen.length, 0); + }); + + it('drops messages for an unregistered sessionId silently', () => { + const browser = makeMockBrowser(); + const router = createCdpRouter({ browser }); + + // No session registered for 'GHOST'. Should not throw. + browser.deliver({ id: 1, sessionId: 'GHOST', result: {} }); + browser.deliver({ method: 'X.y', sessionId: 'GHOST' }); + }); + + it('unregisterSession rejects in-flight pendingRequests', () => { + const browser = makeMockBrowser(); + const router = createCdpRouter({ browser }); + + const sess = router.registerSession('SID'); + let rejected; + sess.pendingRequests.set(1, { + resolve: () => {}, + reject: (e) => { rejected = e; }, + timeout: setTimeout(() => {}, 1000), + }); + + router.unregisterSession('SID'); + + assert.match(rejected.message, /detached/i); + assert.equal(sess.pendingRequests.size, 0); + }); + + it('unregisterSession on missing session is a no-op', () => { + const browser = makeMockBrowser(); + const router = createCdpRouter({ browser }); + // No throw. + router.unregisterSession('NOPE'); + }); +}); diff --git a/test/lib/page-session.test.mjs b/test/lib/page-session.test.mjs new file mode 100644 index 0000000..585f21e --- /dev/null +++ b/test/lib/page-session.test.mjs @@ -0,0 +1,154 @@ +import { describe, it } from 'node:test'; +import { strict as assert } from 'node:assert'; +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const { attachPageSession } = require('../../skills/browsing/lib/page-session.js'); +const { createCdpRouter } = require('../../skills/browsing/lib/cdp-router.js'); + +// Mock browser that stores Target.attachToTarget responses, captures sendRaw +// payloads, and lets the test deliver inbound messages via deliver(). +function makeMockBrowser({ attachSessionId = 'SID1' } = {}) { + let handler = null; + const sentRaw = []; + const sentCommands = []; + return { + onEvent(fn) { handler = fn; return () => { handler = null; }; }, + deliver(msg) { if (handler) handler(msg); }, + sentRaw, + sentCommands, + async send(method, params) { + sentCommands.push({ method, params }); + if (method === 'Target.attachToTarget') return { sessionId: attachSessionId }; + if (method === 'Target.detachFromTarget') return {}; + return {}; + }, + sendRaw(json) { sentRaw.push(JSON.parse(json)); }, + }; +} + +describe('page-session', () => { + it('attaches via Target.attachToTarget({flatten:true}) and returns a session handle', async () => { + const browser = makeMockBrowser({ attachSessionId: 'SID-X' }); + const router = createCdpRouter({ browser }); + const ps = await attachPageSession({ browser, router }, 'target-1'); + + assert.equal(ps.sessionId, 'SID-X'); + assert.equal(ps.targetId, 'target-1'); + assert.equal(browser.sentCommands[0].method, 'Target.attachToTarget'); + assert.equal(browser.sentCommands[0].params.targetId, 'target-1'); + assert.equal(browser.sentCommands[0].params.flatten, true); + }); + + it('send() builds a sessionId-enveloped JSON payload via sendRaw and resolves on response', async () => { + const browser = makeMockBrowser({ attachSessionId: 'SID-A' }); + const router = createCdpRouter({ browser }); + const ps = await attachPageSession({ browser, router }, 'target-1'); + + const sendP = ps.send('Page.navigate', { url: 'about:blank' }); + + // The send should have pushed a JSON envelope through sendRaw. + const env = browser.sentRaw[0]; + assert.equal(env.method, 'Page.navigate'); + assert.deepEqual(env.params, { url: 'about:blank' }); + assert.equal(env.sessionId, 'SID-A'); + assert.equal(typeof env.id, 'number'); + + // Simulate the response arriving via the router. + browser.deliver({ id: env.id, sessionId: 'SID-A', result: { frameId: 'F1' } }); + + const result = await sendP; + assert.deepEqual(result, { frameId: 'F1' }); + }); + + it('send() uses an independent id-counter per session', async () => { + const browser = makeMockBrowser({ attachSessionId: 'A' }); + const router = createCdpRouter({ browser }); + const ps = await attachPageSession({ browser, router }, 'target-1'); + + const p1 = ps.send('X.y'); + const p2 = ps.send('X.z'); + + const ids = browser.sentRaw.map((e) => e.id); + assert.equal(ids[0], 1); + assert.equal(ids[1], 2); + + // Resolve to keep the test clean. + browser.deliver({ id: 1, sessionId: 'A', result: { ok: 1 } }); + browser.deliver({ id: 2, sessionId: 'A', result: { ok: 2 } }); + await p1; await p2; + }); + + it('onEvent receives session-tagged events', async () => { + const browser = makeMockBrowser({ attachSessionId: 'A' }); + const router = createCdpRouter({ browser }); + const ps = await attachPageSession({ browser, router }, 't1'); + + const seen = []; + ps.onEvent((m) => seen.push(m)); + + browser.deliver({ method: 'Page.loadEventFired', params: {}, sessionId: 'A' }); + browser.deliver({ method: 'Other', sessionId: 'B' }); // different session, must not fire + + assert.equal(seen.length, 1); + assert.equal(seen[0].method, 'Page.loadEventFired'); + }); + + it('waitForEvent resolves on first matching event', async () => { + const browser = makeMockBrowser({ attachSessionId: 'A' }); + const router = createCdpRouter({ browser }); + const ps = await attachPageSession({ browser, router }, 't1'); + + const p = ps.waitForEvent('Page.loadEventFired'); + browser.deliver({ method: 'Runtime.consoleAPICalled', sessionId: 'A' }); // ignore + browser.deliver({ method: 'Page.loadEventFired', sessionId: 'A', params: { timestamp: 1 } }); + + const got = await p; + assert.equal(got.method, 'Page.loadEventFired'); + }); + + it('enableDomain is idempotent', async () => { + const browser = makeMockBrowser({ attachSessionId: 'A' }); + const router = createCdpRouter({ browser }); + const ps = await attachPageSession({ browser, router }, 't1'); + + // Drive responses for the enable() call. + const drive = (env) => browser.deliver({ id: env.id, sessionId: 'A', result: {} }); + + const p1 = ps.enableDomain('Runtime'); + drive(browser.sentRaw[browser.sentRaw.length - 1]); + await p1; + + // Second call: must not push another command through sendRaw. + const before = browser.sentRaw.length; + await ps.enableDomain('Runtime'); + assert.equal(browser.sentRaw.length, before); + }); + + it('detach issues Target.detachFromTarget and unregisters the session', async () => { + const browser = makeMockBrowser({ attachSessionId: 'A' }); + const router = createCdpRouter({ browser }); + const ps = await attachPageSession({ browser, router }, 't1'); + + await ps.detach(); + + const detachCalls = browser.sentCommands.filter((c) => c.method === 'Target.detachFromTarget'); + assert.equal(detachCalls.length, 1); + assert.equal(detachCalls[0].params.sessionId, 'A'); + + // After detach, send rejects. + await assert.rejects(() => ps.send('X.y'), /detached/i); + }); + + it('detach is idempotent', async () => { + const browser = makeMockBrowser({ attachSessionId: 'A' }); + const router = createCdpRouter({ browser }); + const ps = await attachPageSession({ browser, router }, 't1'); + + await ps.detach(); + await ps.detach(); // no throw + + const detachCalls = browser.sentCommands.filter((c) => c.method === 'Target.detachFromTarget'); + assert.equal(detachCalls.length, 1); + }); +}); From 60b44e2faa71c5c6b01defc4af552f4a08478f01 Mon Sep 17 00:00:00 2001 From: Matt Windbrook Date: Fri, 8 May 2026 15:24:29 -0700 Subject: [PATCH 2/3] Migrate page actions onto flatten-mode page sessions; retire per-page WS pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Page-action commands now ride a per-target CDP page session over the single browser-WS, with sessionId routing handled by the cdp-router. The per-page WebSocket pool in lib/cdp-connection.js — and its silent "Pooled connection failed, using single-use" fallback — is gone. Action-lib signature change. Every action lib now takes a `getPageSession` resolver instead of `(resolveWsUrl, sendCdpCommand)`. The orchestrator's resolver accepts the legacy shapes (numeric tab index, ws:// URL, numeric string) AND a pre-attached pageSession, routing through page-session.send() for actual CDP traffic. 12 libs migrated: mouse, keyboard-input, evaluation, screenshot, navigation, extraction, file-upload, select-option, viewport, cookies, capture, console-logging. Structural changes elsewhere: - skills/browsing/lib/tabs.js: tab handles returned by getTabs() and newTab() carry a lazy `getPageSession()` thunk that memoizes per targetId. closeTab() detaches the cached session before issuing the HTTP close. Orchestrator wires the attacher via setPageSessionAttacher to avoid a construction-order cycle. - skills/browsing/lib/navigation.js: Page.loadEventFired now arrives through pageSession.waitForEvent — no second WebSocket per navigation. Auto-capture's Runtime.consoleAPICalled stream rides the same page session. The 30s hard cap and "listener-ready-before-navigate" ordering are preserved (Page.enable + waitForEvent are registered before Page.navigate). - skills/browsing/lib/console-logging.js: enableConsoleLogging registers a page-session event listener instead of opening its own WebSocket. state.consoleMessages is keyed by ps.sessionId. Returns {close()} so a caller can stop capturing without detaching the whole page session. - skills/browsing/lib/session-state.js: drop the now-unused connectionPool Map. consoleMessages comment updated to reflect the sessionId keying. - skills/browsing/chrome-ws-lib.js: orchestrator wires _ensureBridge / _closeBridge, defines the getPageSession resolver, exposes the bridge surface (targets, createBrowserContext, attachPageSession), and drops closePooledConnection / closeAllConnections. Bridge open is lazy on first targets/context/page-session access — the remote-Chrome path (where startChrome is skipped) and the local path use one code path. - skills/browsing/lib/chrome-process.js: killChrome now accepts a closeBridge callback and runs it before tearing down Chrome, so the browser-WS and any attached page sessions clean up before the process goes. Test changes: - test/lib/_helpers.mjs: add makePageSessionSpy + makeGetPageSession. The pageSession spy records send() calls in `.calls` and supports onEvent/waitForEvent/enableDomain/detach plus a `.deliver(msg)` hatch for tests that simulate inbound events. - 12 action-lib tests migrated to use makePageSessionSpy. The spy records the same shape as the legacy makeCdpSpy, so test logic survives largely intact — only setup wiring and the call records changed. - console-logging tests gain coverage of the new event-stream path (deliver() to verify the listener captures into state.consoleMessages by sessionId). - test/session-isolation.test.mjs: replace the closeAllConnections identity check with bridge-surface identity checks (attachPageSession / createBrowserContext / targets). - test/lib/cdp-connection.test.mjs: deleted (the module is gone). All 171 tests pass — including the 8 real-Chrome smoke tests against a live Chrome. Co-Authored-By: Mahit (Bob 1b32d32d/Opus 4.7) --- skills/browsing/chrome-ws-lib.js | 208 ++++++++++++++++++------ skills/browsing/lib/browser-session.js | 9 +- skills/browsing/lib/capture.js | 113 +++++++------ skills/browsing/lib/cdp-connection.js | 188 --------------------- skills/browsing/lib/chrome-process.js | 18 ++- skills/browsing/lib/console-logging.js | 148 +++++++---------- skills/browsing/lib/cookies.js | 14 +- skills/browsing/lib/evaluation.js | 31 ++-- skills/browsing/lib/extraction.js | 30 ++-- skills/browsing/lib/file-upload.js | 28 ++-- skills/browsing/lib/keyboard-input.js | 82 +++++----- skills/browsing/lib/mouse.js | 136 ++++++++-------- skills/browsing/lib/navigation.js | 215 ++++++++----------------- skills/browsing/lib/screenshot.js | 24 +-- skills/browsing/lib/select-option.js | 20 +-- skills/browsing/lib/session-state.js | 7 +- skills/browsing/lib/tabs.js | 78 +++++++-- skills/browsing/lib/viewport.js | 45 +++--- test/lib/_helpers.mjs | 82 +++++++++- test/lib/capture.test.mjs | 15 +- test/lib/cdp-connection.test.mjs | 45 ------ test/lib/cdp-router.test.mjs | 2 +- test/lib/console-logging.test.mjs | 58 +++++-- test/lib/cookies.test.mjs | 14 +- test/lib/evaluation.test.mjs | 67 ++++---- test/lib/extraction.test.mjs | 30 ++-- test/lib/file-upload.test.mjs | 26 +-- test/lib/keyboard-input.test.mjs | 56 +++---- test/lib/mouse.test.mjs | 47 +++--- test/lib/navigation.test.mjs | 77 +++------ test/lib/screenshot.test.mjs | 18 +-- test/lib/select-option.test.mjs | 30 ++-- test/lib/viewport.test.mjs | 33 ++-- test/session-isolation.test.mjs | 33 ++-- 34 files changed, 952 insertions(+), 1075 deletions(-) delete mode 100644 skills/browsing/lib/cdp-connection.js delete mode 100644 test/lib/cdp-connection.test.mjs diff --git a/skills/browsing/chrome-ws-lib.js b/skills/browsing/chrome-ws-lib.js index 7d7e1aa..d9b56d8 100644 --- a/skills/browsing/chrome-ws-lib.js +++ b/skills/browsing/chrome-ws-lib.js @@ -1,9 +1,26 @@ /** - * Chrome WebSocket Library - Core CDP automation functions - * Used by both CLI and MCP server + * Chrome WebSocket Library — Core CDP automation functions + * + * The orchestrator: a thin wiring layer over `lib/*.js` modules. + * + * Page-action commands ride a single browser-level CDP WebSocket + * (lib/browser-session.js) via `Target.attachToTarget({flatten:true})` + * sessions. Per-page WebSockets are no longer used as the transport for + * actions — the page session (lib/page-session.js) does the work, with + * sessionId routing handled by lib/cdp-router.js. That subsystem + * (browser-session + cdp-router + page-session + browser-bridge) replaces + * the per-page connection pool that previously lived in lib/cdp-connection.js. + * + * The bridge is lazy: the browser-WS is opened on first targets/context/ + * page-session access, not at createSession() time. That way the + * remote-Chrome path (where the caller passes `{host, port}` of an + * already-running Chrome and skips startChrome) works through the same + * code path as the local-launched case. * * Fixes implemented: - * - JRV-130: Connection pooling for persistent focus + * - JRV-130: focus survives across Runtime.evaluate calls (now via the + * persistent page session — same property as the old pool, simpler + * substrate) * - JRV-127: keyboard_press action for special keys * - JRV-123: React-compatible input via Input.insertText * - JRV-124: React-compatible click via Input.dispatchMouseEvent @@ -30,9 +47,10 @@ const { attachExtraction } = require('./lib/extraction'); const { attachScreenshot } = require('./lib/screenshot'); const { attachTabs } = require('./lib/tabs'); const { attachFileUpload } = require('./lib/file-upload'); -const { attachCdpConnection } = require('./lib/cdp-connection'); const { attachConsoleLogging } = require('./lib/console-logging'); const { attachSelectOption } = require('./lib/select-option'); +const { createBrowserSession } = require('./lib/browser-session'); +const { attachBrowserBridge } = require('./lib/browser-bridge'); const { getXdgCacheHome, getChromeProfileDir, @@ -48,60 +66,155 @@ const { * Build a fresh Chrome session — a state-bag scoped to a single Chrome target. * * Pre-factory, every consumer that required this file shared module-level - * state: the connection pool, console-message buffers, the chosen profile - * name, the launched Chrome process handle, the active CDP port, and the - * host-override config. Two consumers in the same process therefore drove a - * single Chrome — fine for the CLI and the MCP server (each owns its - * process), but a hazard for any caller that wants to drive multiple Chromes - * concurrently from one Node process (different ports, different profiles). + * state: console-message buffers, the chosen profile name, the launched + * Chrome process handle, the active CDP port, and the host-override config. + * Two consumers in the same process therefore drove a single Chrome. * * `createSession({ host, port })` returns a fresh instance with private state - * and methods bound to that state. Two instances do not share a connection - * pool, console-message map, profile, Chrome process, or host-override — - * mutating one (e.g. setProfileName, startChrome) has no effect on the other. + * and methods bound to that state. Two instances do not share a console-message + * map, profile, Chrome process, host-override, or browser-WS bridge. * Pass `host`/`port` to seed the host-override; omit them to seed from the - * `CHROME_WS_HOST` / `CHROME_WS_PORT` env vars exactly as before. - * - * The returned object preserves the legacy module-level export shape — the - * one-line consumer migration is `require(...)` becomes - * `require(...).createSession()`. + * `CHROME_WS_HOST` / `CHROME_WS_PORT` env vars. */ function createSession({ host, port } = {}) { const state = createState({ host, port }); - // ============================================================================= - const { - sendCdpCommand, - closePooledConnection, - closeAllConnections, - } = attachCdpConnection({ state }); - - const { chromeHttp, resolveWsUrl, getTabs, newTab, closeTab } = attachTabs({ state }); + // ===== Tabs / chromeHttp / resolveWsUrl ===== + const tabsApi = attachTabs({ state }); + const { chromeHttp, getTabs, newTab, closeTab } = tabsApi; + + // ===== Browser-WS bridge (lazy) ===== + // The browser-WS is opened the first time a consumer reaches for the + // bridge surface (targets, createBrowserContext, attachPageSession, or + // any action lib via getPageSession). It is NOT opened in startChrome — + // the remote-Chrome path bypasses startChrome entirely, and lazy-open + // serves both modes with one code path. + let _browser = null; + let _bridge = null; + + async function _ensureBridge() { + if (_bridge) return _bridge; + if (!_browser) { + _browser = createBrowserSession({ + host: state.hostOverride.getHost(), + port: state.activePort, + rewriteWsUrl: state.rewriteWsUrl, + chromeHttp, + }); + } + _bridge = await attachBrowserBridge({ + browser: _browser, + host: state.hostOverride.getHost(), + port: state.activePort, + rewriteWsUrl: state.rewriteWsUrl, + }); + return _bridge; + } + + async function _closeBridge() { + if (_browser) { + try { await _browser.close(); } catch { /* best-effort */ } + _browser = null; + _bridge = null; + } + } + + // Public bridge wrappers — each lazy-opens the browser-WS on first use. + const targets = { + async list() { return (await _ensureBridge()).targets.list(); }, + async onCreated(fn) { return (await _ensureBridge()).targets.onCreated(fn); }, + async onDestroyed(fn) { return (await _ensureBridge()).targets.onDestroyed(fn); }, + async waitForNew(predicate, opts) { return (await _ensureBridge()).targets.waitForNew(predicate, opts); }, + }; + async function createBrowserContext(opts) { + return (await _ensureBridge()).createBrowserContext(opts); + } + + /** + * Attach a page session to an existing target. Returns + * `{sessionId, targetId, send, onEvent, waitForEvent, enableDomain, detach}`. + * Page sessions ride the browser-WS via `Target.attachToTarget({flatten:true})` + * — no per-page WebSocket, no per-page WS-drop race. + */ + async function attachPageSession(targetId) { + return (await _ensureBridge()).attachPageSession(targetId); + } + + // Wire the lazy attacher into tabs.js so tab handles returned by + // getTabs() / newTab() carry a `getPageSession()` thunk. The thunk goes + // through _ensureBridge → bridge.attachPageSession at call time, so + // there's no construction-order dependency between tabs and the bridge. + tabsApi.setPageSessionAttacher((targetId) => attachPageSession(targetId)); + + /** + * Action-lib argument resolver. + * + * Accepts the legacy shapes that tools/tests use today (numeric tab index, + * `ws://...` URL, numeric string) AND the new shape (an existing + * pageSession object) and returns the corresponding pageSession. + */ + async function getPageSession(arg) { + // Already a pageSession? Pass through. + if (arg && typeof arg.send === 'function' && arg.sessionId) { + return arg; + } + + // Numeric or numeric-string index — index into the current tabs list. + if (typeof arg === 'number' || (typeof arg === 'string' && /^\d+$/.test(arg))) { + const idx = typeof arg === 'number' ? arg : parseInt(arg, 10); + const allTabs = await getTabs(); + const pageTabs = allTabs.filter((t) => t.type === 'page'); + + // Auto-create a tab if none exist (matches the legacy auto-start + // behaviour of resolveWsUrl — tools shouldn't have to special-case + // fresh Chrome). + if (pageTabs.length === 0) { + const newTabInfo = await newTab(); + if (!newTabInfo || !newTabInfo.getPageSession) { + throw new Error('getPageSession: newTab failed to return a tab handle'); + } + return newTabInfo.getPageSession(); + } + + if (!pageTabs[idx]) throw new Error(`getPageSession: no tab at index ${idx} (have ${pageTabs.length})`); + return pageTabs[idx].getPageSession(); + } + + // ws:// URL — find the matching tab. + if (typeof arg === 'string' && arg.startsWith('ws://')) { + const allTabs = await getTabs(); + const rewritten = state.rewriteWsUrl(arg, state.hostOverride.getHost(), state.activePort); + const tab = allTabs.find((t) => t.webSocketDebuggerUrl === rewritten || t.webSocketDebuggerUrl === arg); + if (!tab) throw new Error(`getPageSession: no tab found for ${arg}`); + return tab.getPageSession(); + } + + throw new Error(`getPageSession: unsupported arg type: ${typeof arg}`); + } + + // ===== Action libs ===== const { click, hover, drag, mouseMove, scroll, doubleClick, rightClick } = - attachMouse({ resolveWsUrl, sendCdpCommand }); + attachMouse({ getPageSession }); const { keyboardPress, fill, humanType } = - attachKeyboardInput({ state, resolveWsUrl, sendCdpCommand, click }); - - const { fileUpload } = attachFileUpload({ resolveWsUrl, sendCdpCommand }); - - const { selectOption } = attachSelectOption({ resolveWsUrl, sendCdpCommand }); + attachKeyboardInput({ state, getPageSession, click }); - const { evaluate } = attachEvaluation({ resolveWsUrl, sendCdpCommand }); + const { fileUpload } = attachFileUpload({ getPageSession }); - // ============================================================================= + const { selectOption } = attachSelectOption({ getPageSession }); - const { extractText, getHtml, getAttribute } = attachExtraction({ resolveWsUrl, sendCdpCommand }); + const { evaluate } = attachEvaluation({ getPageSession }); + const { extractText, getHtml, getAttribute } = attachExtraction({ getPageSession }); - const { screenshot } = attachScreenshot({ resolveWsUrl, sendCdpCommand }); + const { screenshot } = attachScreenshot({ getPageSession }); const { startChrome, killChrome, showBrowser, hideBrowser, getBrowserMode, getChromePid, getActivePort, getProfileName, setProfileName } = - attachChromeProcess({ state, chromeHttp, getTabs, newTab }); + attachChromeProcess({ state, chromeHttp, getTabs, newTab, closeBridge: _closeBridge }); const { enableConsoleLogging, getConsoleMessages, clearConsoleMessages } = - attachConsoleLogging({ state, resolveWsUrl }); + attachConsoleLogging({ state, getPageSession }); const { initializeSession, @@ -118,24 +231,23 @@ function createSession({ host, port } = {}) { evaluateWithCapture, } = attachCapture({ state, - resolveWsUrl, - sendCdpCommand, + getPageSession, getHtml, screenshot, actions: { click, fill, selectOption, evaluate }, }); const { navigate, waitForElement, waitForText } = - attachNavigation({ state, resolveWsUrl, sendCdpCommand, capturePageArtifacts, evaluate }); + attachNavigation({ state, getPageSession, capturePageArtifacts, evaluate }); - const { setViewport, clearViewport, getViewport } = attachViewport({ resolveWsUrl, sendCdpCommand }); - const { clearCookies } = attachCookies({ resolveWsUrl, sendCdpCommand }); + const { setViewport, clearViewport, getViewport } = attachViewport({ getPageSession }); + const { clearCookies } = attachCookies({ getPageSession }); return { // Internal helpers (exported for testing) getElementSelector, - // Core browser actions (click/fill now use CDP events by default for React compatibility) + // Core browser actions (click/fill use CDP events by default for React compatibility) getTabs, newTab, closeTab, @@ -208,9 +320,11 @@ function createSession({ host, port } = {}) { generateHtmlDiff, captureActionWithDiff, - // Connection management (JRV-130) - closePooledConnection, - closeAllConnections, + // Browser-WS bridge — Target.* events, BrowserContext create/dispose, + // and per-page CDP sessions over the shared browser-WS. + targets, + createBrowserContext, + attachPageSession, // Dynamic port allocation and per-profile meta.json getActivePort, diff --git a/skills/browsing/lib/browser-session.js b/skills/browsing/lib/browser-session.js index b39cd84..baf00dc 100644 --- a/skills/browsing/lib/browser-session.js +++ b/skills/browsing/lib/browser-session.js @@ -1,9 +1,10 @@ // One CDP WebSocket per Chrome process, talking to /devtools/browser/. // -// Independent of the per-page actions in lib/cdp-connection.js (which is on -// its way out — see chrome-ws-lib.js's bridge wiring): the browser-WS is the -// transport for Target.* events, BrowserContext create/dispose, and every -// page session attached via Target.attachToTarget({flatten:true}). +// This is the transport for Target.* events, BrowserContext create/dispose, +// and every page session attached via Target.attachToTarget({flatten:true}). +// Page-action commands ride the per-page sessions (lib/page-session.js), +// which envelope into this socket via sendRaw and correlate responses +// through the cdp-router. // // Lazy: connect happens on first send() / onEvent() call. Both local and // remote-Chrome callers exercise the same code path — for the remote case diff --git a/skills/browsing/lib/capture.js b/skills/browsing/lib/capture.js index 760e7c5..2b6fda9 100644 --- a/skills/browsing/lib/capture.js +++ b/skills/browsing/lib/capture.js @@ -41,11 +41,10 @@ function ensureProcessHandlersRegistered() { * - WithCapture wrappers: thin adapters that pair an action with a * post-action capturePageArtifacts. * - * `attachCapture({ state, resolveWsUrl, sendCdpCommand, getHtml, - * screenshot, actions: { click, fill, selectOption, evaluate } })` - * returns the bound API. + * Helpers accept `tabIndexOrPageSession` and route through + * `pageSession.send`. */ -function attachCapture({ state, resolveWsUrl, sendCdpCommand, getHtml, screenshot, actions }) { +function attachCapture({ state, getPageSession, getHtml, screenshot, actions }) { function initializeSession() { if (!state.sessionDir) { // ~/.cache/superpowers/browser/YYYY-MM-DD/session-{timestamp} @@ -87,18 +86,18 @@ function attachCapture({ state, resolveWsUrl, sendCdpCommand, getHtml, screensho // Token-efficient page summary: heading list, interactive-element counts, // main/nav landmark detection. Used in the auto-capture artifact bundle so // the model can decide whether to read the .md or .html file. - async function generateDomSummary(tabIndexOrWsUrl) { - const wsUrl = await resolveWsUrl(tabIndexOrWsUrl); - const result = await sendCdpCommand(wsUrl, 'Runtime.evaluate', { + async function generateDomSummary(tabIndexOrPageSession) { + const ps = await getPageSession(tabIndexOrPageSession); + const result = await ps.send('Runtime.evaluate', { expression: domSummaryScript, - returnByValue: true + returnByValue: true, }); throwIfExceptionDetails(result); return result.result.value; } - async function getPageSize(tabIndexOrWsUrl) { - const wsUrl = await resolveWsUrl(tabIndexOrWsUrl); + async function getPageSize(tabIndexOrPageSession) { + const ps = await getPageSession(tabIndexOrPageSession); const js = `({ width: window.innerWidth, @@ -107,9 +106,9 @@ function attachCapture({ state, resolveWsUrl, sendCdpCommand, getHtml, screensho documentHeight: document.documentElement.scrollHeight })`; - const result = await sendCdpCommand(wsUrl, 'Runtime.evaluate', { + const result = await ps.send('Runtime.evaluate', { expression: js, - returnByValue: true + returnByValue: true, }); throwIfExceptionDetails(result); return result.result.value; @@ -118,11 +117,11 @@ function attachCapture({ state, resolveWsUrl, sendCdpCommand, getHtml, screensho // Render the page to markdown for token-efficient consumption. Includes // images >= 100x100 in a header summary; inlines image references >= 50x50 // with size info; skips smaller icons. - async function generateMarkdown(tabIndexOrWsUrl) { - const wsUrl = await resolveWsUrl(tabIndexOrWsUrl); - const result = await sendCdpCommand(wsUrl, 'Runtime.evaluate', { + async function generateMarkdown(tabIndexOrPageSession) { + const ps = await getPageSession(tabIndexOrPageSession); + const result = await ps.send('Runtime.evaluate', { expression: markdownScript, - returnByValue: true + returnByValue: true, }); throwIfExceptionDetails(result); return result.result.value; @@ -131,15 +130,15 @@ function attachCapture({ state, resolveWsUrl, sendCdpCommand, getHtml, screensho // Single post-action snapshot: html + markdown + screenshot + console-log // placeholder, all parallelised. Filenames share a numbered prefix so the // session dir reads like a flat timeline. - async function capturePageArtifacts(tabIndexOrWsUrl, actionType = 'navigate') { + async function capturePageArtifacts(tabIndexOrPageSession, actionType = 'navigate') { const prefix = createCapturePrefix(actionType); const dir = initializeSession(); const [html, markdown, pageSize, domSummary] = await Promise.all([ - getHtml(tabIndexOrWsUrl), - generateMarkdown(tabIndexOrWsUrl), - getPageSize(tabIndexOrWsUrl), - generateDomSummary(tabIndexOrWsUrl) + getHtml(tabIndexOrPageSession), + generateMarkdown(tabIndexOrPageSession), + getPageSize(tabIndexOrPageSession), + generateDomSummary(tabIndexOrPageSession), ]); const htmlPath = path.join(dir, `${prefix}.html`); @@ -151,7 +150,7 @@ function attachCapture({ state, resolveWsUrl, sendCdpCommand, getHtml, screensho fs.writeFileSync(markdownPath, markdown || ''); fs.writeFileSync(consoleLogPath, '# Console Log\n# TODO: Console logging not yet implemented\n'); - await screenshot(tabIndexOrWsUrl, screenshotPath); + await screenshot(tabIndexOrPageSession, screenshotPath); return { capturePrefix: prefix, @@ -160,10 +159,10 @@ function attachCapture({ state, resolveWsUrl, sendCdpCommand, getHtml, screensho html: htmlPath, markdown: markdownPath, screenshot: screenshotPath, - consoleLog: consoleLogPath + consoleLog: consoleLogPath, }, pageSize, - domSummary + domSummary, }; } @@ -171,13 +170,13 @@ function attachCapture({ state, resolveWsUrl, sendCdpCommand, getHtml, screensho // get the action result alongside the diff and screenshots. Saves and // restores focus around the BEFORE screenshot — taking a screenshot can // shift focus, which then breaks any focus-dependent action that follows. - async function captureActionWithDiff(tabIndexOrWsUrl, actionType, actionFn, settleTime = 3000) { + async function captureActionWithDiff(tabIndexOrPageSession, actionType, actionFn, settleTime = 3000) { const prefix = createCapturePrefix(actionType); const dir = initializeSession(); - const wsUrl = await resolveWsUrl(tabIndexOrWsUrl); + const ps = await getPageSession(tabIndexOrPageSession); async function saveFocus() { - const result = await sendCdpCommand(wsUrl, 'Runtime.evaluate', { + const result = await ps.send('Runtime.evaluate', { expression: ` (() => { const el = document.activeElement; @@ -199,7 +198,7 @@ function attachCapture({ state, resolveWsUrl, sendCdpCommand, getHtml, screensho return { type: 'path', value: focusPath }; })() `, - returnByValue: true + returnByValue: true, }); throwIfExceptionDetails(result); return result.result?.value; @@ -225,31 +224,31 @@ function attachCapture({ state, resolveWsUrl, sendCdpCommand, getHtml, screensho })()`; } if (selector) { - const restoreResult = await sendCdpCommand(wsUrl, 'Runtime.evaluate', { - expression: `(() => { const el = ${selector}; if (el) el.focus(); })()` + const restoreResult = await ps.send('Runtime.evaluate', { + expression: `(() => { const el = ${selector}; if (el) el.focus(); })()`, }); throwIfExceptionDetails(restoreResult); } } // BEFORE: html + screenshot, with focus saved/restored around the screenshot. - const beforeHtml = await getHtml(tabIndexOrWsUrl); + const beforeHtml = await getHtml(ps); const focusInfo = await saveFocus(); const beforeScreenshotPath = path.join(dir, `${prefix}-before.png`); - await screenshot(tabIndexOrWsUrl, beforeScreenshotPath); + await screenshot(ps, beforeScreenshotPath); await restoreFocus(focusInfo); const actionResult = await actionFn(); // Settle: lets React re-renders, animations, and post-action XHRs complete // before the AFTER snapshot. - await new Promise(resolve => setTimeout(resolve, settleTime)); + await new Promise((resolve) => setTimeout(resolve, settleTime)); const [afterHtml, markdown, pageSize, domSummary] = await Promise.all([ - getHtml(tabIndexOrWsUrl), - generateMarkdown(tabIndexOrWsUrl), - getPageSize(tabIndexOrWsUrl), - generateDomSummary(tabIndexOrWsUrl) + getHtml(ps), + generateMarkdown(ps), + getPageSize(ps), + generateDomSummary(ps), ]); const diff = generateHtmlDiff(beforeHtml, afterHtml); @@ -264,7 +263,7 @@ function attachCapture({ state, resolveWsUrl, sendCdpCommand, getHtml, screensho fs.writeFileSync(afterHtmlPath, afterHtml || ''); fs.writeFileSync(diffPath, diff); fs.writeFileSync(markdownPath, markdown || ''); - await screenshot(tabIndexOrWsUrl, afterScreenshotPath); + await screenshot(ps, afterScreenshotPath); return { actionResult, @@ -277,21 +276,21 @@ function attachCapture({ state, resolveWsUrl, sendCdpCommand, getHtml, screensho diff: diffPath, markdown: markdownPath, beforeScreenshot: beforeScreenshotPath, - afterScreenshot: afterScreenshotPath + afterScreenshot: afterScreenshotPath, }, pageSize, domSummary, - diffSummary: diff.split('\n').slice(0, 5).join('\n') + (diff.split('\n').length > 5 ? '\n...' : '') - } + diffSummary: diff.split('\n').slice(0, 5).join('\n') + (diff.split('\n').length > 5 ? '\n...' : ''), + }, }; } // *WithCapture wrappers — perform an action, then capturePageArtifacts. // The MCP server consumes these directly; the bare action variants stay // exported for callers (and tests) that don't want auto-capture. - async function clickWithCapture(tabIndexOrWsUrl, selector) { - await actions.click(tabIndexOrWsUrl, selector); - const artifacts = await capturePageArtifacts(tabIndexOrWsUrl, 'click'); + async function clickWithCapture(tabIndexOrPageSession, selector) { + await actions.click(tabIndexOrPageSession, selector); + const artifacts = await capturePageArtifacts(tabIndexOrPageSession, 'click'); return { action: 'click', selector, @@ -300,13 +299,13 @@ function attachCapture({ state, resolveWsUrl, sendCdpCommand, getHtml, screensho sessionDir: artifacts.sessionDir, files: artifacts.files, domSummary: artifacts.domSummary, - consoleLog: [] // Placeholder + consoleLog: [], // Placeholder }; } - async function fillWithCapture(tabIndexOrWsUrl, selector, value) { - await actions.fill(tabIndexOrWsUrl, selector, value); - const artifacts = await capturePageArtifacts(tabIndexOrWsUrl, 'type'); + async function fillWithCapture(tabIndexOrPageSession, selector, value) { + await actions.fill(tabIndexOrPageSession, selector, value); + const artifacts = await capturePageArtifacts(tabIndexOrPageSession, 'type'); return { action: 'type', selector, @@ -316,13 +315,13 @@ function attachCapture({ state, resolveWsUrl, sendCdpCommand, getHtml, screensho sessionDir: artifacts.sessionDir, files: artifacts.files, domSummary: artifacts.domSummary, - consoleLog: [] // Placeholder + consoleLog: [], // Placeholder }; } - async function selectOptionWithCapture(tabIndexOrWsUrl, selector, value) { - await actions.selectOption(tabIndexOrWsUrl, selector, value); - const artifacts = await capturePageArtifacts(tabIndexOrWsUrl, 'select'); + async function selectOptionWithCapture(tabIndexOrPageSession, selector, value) { + await actions.selectOption(tabIndexOrPageSession, selector, value); + const artifacts = await capturePageArtifacts(tabIndexOrPageSession, 'select'); return { action: 'select', selector, @@ -332,13 +331,13 @@ function attachCapture({ state, resolveWsUrl, sendCdpCommand, getHtml, screensho sessionDir: artifacts.sessionDir, files: artifacts.files, domSummary: artifacts.domSummary, - consoleLog: [] // Placeholder + consoleLog: [], // Placeholder }; } - async function evaluateWithCapture(tabIndexOrWsUrl, expression) { - const result = await actions.evaluate(tabIndexOrWsUrl, expression); - const artifacts = await capturePageArtifacts(tabIndexOrWsUrl, 'eval'); + async function evaluateWithCapture(tabIndexOrPageSession, expression) { + const result = await actions.evaluate(tabIndexOrPageSession, expression); + const artifacts = await capturePageArtifacts(tabIndexOrPageSession, 'eval'); return { action: 'eval', expression, @@ -348,7 +347,7 @@ function attachCapture({ state, resolveWsUrl, sendCdpCommand, getHtml, screensho sessionDir: artifacts.sessionDir, files: artifacts.files, domSummary: artifacts.domSummary, - consoleLog: [] // Placeholder + consoleLog: [], // Placeholder }; } diff --git a/skills/browsing/lib/cdp-connection.js b/skills/browsing/lib/cdp-connection.js deleted file mode 100644 index 73606dd..0000000 --- a/skills/browsing/lib/cdp-connection.js +++ /dev/null @@ -1,188 +0,0 @@ -const { WebSocketClient } = require('./websocket-client'); - -// Default per-CDP-call timeout. Caller can override via the `timeout` -// parameter on sendCdpCommand. -const DEFAULT_CDP_TIMEOUT_MS = 30000; - -/** - * CDP transport — pooled WebSocket connections to Chrome's debugger. - * - * Why pooling matters (JRV-130): the original single-use connection per - * `Runtime.evaluate` call lost focus between calls because each new - * connection re-attached to the page as a fresh debugger client. The - * pool keeps one persistent ws per tab, so focus/state survives across - * commands. - * - * `sendCdpCommand` is the public entry point. It tries the pool first - * and falls back to a single-use connection if the pooled call throws — - * the fallback is a safety net for the rare case where the pooled - * connection is wedged (broken socket, frame parse error) but a fresh - * connection would still work. - * - * Per-connection request ids start at 1 in each pooled `conn`. The - * single-use path always uses id=1 because each fresh ws has nothing to - * collide with. - * - * The pool eventHandler hook (`conn.eventHandler`) is consumed by - * `enableConsoleLogging` (and any future caller that wants to listen on - * the persistent socket without spinning up a second one). - * - * `attachCdpConnection({ state })` returns the bound API. - */ -function attachCdpConnection({ state }) { - async function getPooledConnection(wsUrl) { - let conn = state.connectionPool.get(wsUrl); - - if (conn && conn.ws.isConnected()) { - return conn; - } - - const ws = new WebSocketClient(wsUrl); - conn = { - ws, - pendingRequests: new Map(), // id -> { resolve, reject, timeout } - messageIdCounter: 1 - }; - - ws.on('message', (msg) => { - try { - const data = JSON.parse(msg); - if (data.id !== undefined) { - const pending = conn.pendingRequests.get(data.id); - if (pending) { - clearTimeout(pending.timeout); - conn.pendingRequests.delete(data.id); - if (data.error) { - pending.reject(new Error(data.error.message || JSON.stringify(data.error))); - } else { - pending.resolve(data.result); - } - } - } - // Forward events (e.g. Runtime.consoleAPICalled) to the per-connection - // eventHandler if one was attached — used by enableConsoleLogging. - if (data.method && conn.eventHandler) { - conn.eventHandler(data); - } - } catch (e) { - console.error('Error processing CDP message:', e); - } - }); - - ws.on('close', () => { - state.connectionPool.delete(wsUrl); - // Reject any in-flight requests so callers don't hang forever. - for (const [_id, pending] of conn.pendingRequests) { - clearTimeout(pending.timeout); - pending.reject(new Error('Connection closed')); - } - conn.pendingRequests.clear(); - }); - - ws.on('error', (err) => { - console.error('WebSocket error:', err); - }); - - await ws.connect(); - state.connectionPool.set(wsUrl, conn); - - return conn; - } - - async function sendCdpCommandPooled(wsUrl, method, params = {}, timeout = DEFAULT_CDP_TIMEOUT_MS) { - const conn = await getPooledConnection(wsUrl); - const id = conn.messageIdCounter++; - - return new Promise((resolve, reject) => { - const timeoutHandle = setTimeout(() => { - conn.pendingRequests.delete(id); - reject(new Error(`CDP command timeout: ${method}`)); - }, timeout); - - conn.pendingRequests.set(id, { resolve, reject, timeout: timeoutHandle }); - conn.ws.send(JSON.stringify({ id, method, params })); - }); - } - - // Single-use ws — fallback when the pool is wedged. Each call opens a - // fresh connection, sends one request, waits for the matching id, - // closes. Less efficient (re-handshakes per call) but recovers from - // broken pooled connections without wedging the rest of the session. - async function sendCdpCommandSingle(wsUrl, method, params = {}, timeout = DEFAULT_CDP_TIMEOUT_MS) { - const ws = new WebSocketClient(wsUrl); - - return new Promise((resolve, reject) => { - // Single-use ws sends exactly one request — id=1 is fine because the - // connection is fresh and there's nothing to collide with. - const id = 1; - let resolved = false; - - ws.on('message', (msg) => { - const data = JSON.parse(msg); - if (data.id === id) { - resolved = true; - ws.close(); - if (data.error) { - reject(new Error(data.error.message || JSON.stringify(data.error))); - } else { - resolve(data.result); - } - } - }); - - ws.on('error', (err) => { - if (!resolved) { - reject(err); - } - }); - - ws.connect() - .then(() => { - ws.send(JSON.stringify({ id, method, params })); - }) - .catch(reject); - - setTimeout(() => { - if (!resolved) { - ws.close(); - reject(new Error('CDP command timeout')); - } - }, timeout); - }); - } - - async function sendCdpCommand(wsUrl, method, params = {}, timeout = DEFAULT_CDP_TIMEOUT_MS) { - try { - return await sendCdpCommandPooled(wsUrl, method, params, timeout); - } catch (e) { - console.error('Pooled connection failed, using single-use:', e.message); - return await sendCdpCommandSingle(wsUrl, method, params, timeout); - } - } - - function closePooledConnection(wsUrl) { - const conn = state.connectionPool.get(wsUrl); - if (conn) { - conn.ws.close(); - state.connectionPool.delete(wsUrl); - } - } - - function closeAllConnections() { - for (const [_wsUrl, conn] of state.connectionPool) { - conn.ws.close(); - } - state.connectionPool.clear(); - } - - return { - getPooledConnection, - sendCdpCommand, - sendCdpCommandPooled, - sendCdpCommandSingle, - closePooledConnection, - closeAllConnections, - }; -} - -module.exports = { attachCdpConnection }; diff --git a/skills/browsing/lib/chrome-process.js b/skills/browsing/lib/chrome-process.js index 4a4aea3..555ec03 100644 --- a/skills/browsing/lib/chrome-process.js +++ b/skills/browsing/lib/chrome-process.js @@ -19,10 +19,15 @@ const os = require('os'); * needs — chromeHttp for graceful shutdown, getTabs/newTab for the * show/hide tab-restoration flow. * - * `attachChromeProcess({ state, chromeHttp, getTabs, newTab })` returns - * the bound methods. + * `closeBridge` (optional) is invoked at the start of `killChrome` so the + * browser-level CDP WebSocket and any attached page sessions are torn down + * before Chrome itself is killed. If absent (older callers, tests), the + * bridge close is skipped. + * + * `attachChromeProcess({ state, chromeHttp, getTabs, newTab, closeBridge })` + * returns the bound methods. */ -function attachChromeProcess({ state, chromeHttp, getTabs, newTab }) { +function attachChromeProcess({ state, chromeHttp, getTabs, newTab, closeBridge }) { // Read-once derived constants from the per-session host-override. const CHROME_DEBUG_HOST = state.hostOverride.getHost(); const CHROME_DEBUG_PORT = state.hostOverride.getPort(); @@ -143,6 +148,13 @@ function attachChromeProcess({ state, chromeHttp, getTabs, newTab }) { } async function killChrome() { + // Close the browser-level WS bridge before tearing down Chrome. Best-effort + // — if the bridge never opened (no consumer touched targets / pageSession), + // this is a no-op. + if (closeBridge) { + try { await closeBridge(); } catch { /* best-effort */ } + } + let pidToKill = null; if (state.chromeProcess && state.chromeProcess.pid) { diff --git a/skills/browsing/lib/console-logging.js b/skills/browsing/lib/console-logging.js index 79fdb10..3d0e532 100644 --- a/skills/browsing/lib/console-logging.js +++ b/skills/browsing/lib/console-logging.js @@ -1,115 +1,75 @@ -const { WebSocketClient } = require('./websocket-client'); - -// Fixed CDP request id used to mark the Runtime.enable response so the -// message handler can distinguish setup-acknowledged from runtime-event -// without tracking ids generally. -const RUNTIME_ENABLE_REQUEST_ID = 999999; - -// How long to wait for Runtime.enable to acknowledge before failing the -// console-logging setup. -const ENABLE_TIMEOUT_MS = 5000; - /** * Page console-message capture. * - * `enableConsoleLogging` opens a persistent WebSocket alongside the - * pooled CDP connection (kept separate so the request/response flow - * isn't polluted with `Runtime.consoleAPICalled` events) and streams - * console output into `state.consoleMessages` keyed by tab ws URL. - * `getConsoleMessages` reads them out — optionally filtered by - * timestamp — and `clearConsoleMessages` resets the buffer for a tab. + * `enableConsoleLogging` enables Runtime on the page session and registers a + * page-session event listener for `Runtime.consoleAPICalled`, streaming + * console output into `state.consoleMessages` keyed by `ps.sessionId`. + * `getConsoleMessages` reads them out — optionally filtered by timestamp — + * and `clearConsoleMessages` resets the buffer for a tab. * - * The fixed id `999999` is used for the `Runtime.enable` request/response - * pair so the message handler can tell setup-acknowledged from - * runtime-event without tracking ids generally. + * `enableDomain('Runtime')` is idempotent so navigation's auto-capture + * and `enableConsoleLogging` can coexist on the same page session without + * stomping on each other. * - * `attachConsoleLogging({ state, resolveWsUrl })` returns the bound API. + * Keying: `state.consoleMessages` is keyed by `ps.sessionId`. The public + * adapter API (`getConsoleMessages(arg, sinceTimestamp)`, + * `clearConsoleMessages(arg)`) accepts tabIndex / wsUrl / pageSession — + * all resolve through `getPageSession` to `ps.sessionId`. */ -function attachConsoleLogging({ state, resolveWsUrl }) { - async function enableConsoleLogging(tabIndexOrWsUrl) { - const wsUrl = await resolveWsUrl(tabIndexOrWsUrl); +function attachConsoleLogging({ state, getPageSession }) { + async function enableConsoleLogging(tabIndexOrPageSession) { + const ps = await getPageSession(tabIndexOrPageSession); - if (!state.consoleMessages.has(wsUrl)) { - state.consoleMessages.set(wsUrl, []); + if (!state.consoleMessages.has(ps.sessionId)) { + state.consoleMessages.set(ps.sessionId, []); } - // Persistent ws — kept open after Runtime.enable so we keep receiving - // Runtime.consoleAPICalled events. Separate from the pooled CDP - // connection so RPC traffic doesn't fight event traffic. - const ws = new WebSocketClient(wsUrl); - - return new Promise((resolve, reject) => { - let enabledRuntime = false; - - ws.on('message', (msg) => { - const data = JSON.parse(msg); - - // Fixed id marks the Runtime.enable response; everything - // after that is event traffic. - if (data.id === RUNTIME_ENABLE_REQUEST_ID && !enabledRuntime) { - enabledRuntime = true; - resolve(); - return; - } - - if (data.method === 'Runtime.consoleAPICalled') { - const entry = data.params; - const timestamp = new Date().toISOString(); - const level = entry.type || 'log'; - const args = entry.args || []; - - const text = args.map(arg => { - if (arg.type === 'string') return arg.value; - if (arg.type === 'number') return String(arg.value); - if (arg.type === 'boolean') return String(arg.value); - if (arg.type === 'object') return arg.description || '[Object]'; - return String(arg.value || arg.description || arg.type); - }).join(' '); - - const messages = state.consoleMessages.get(wsUrl) || []; - messages.push({ timestamp, level, text }); - state.consoleMessages.set(wsUrl, messages); - } - }); - - ws.on('error', (err) => { - if (!enabledRuntime) { - reject(err); - } - }); - - ws.connect() - .then(() => { - ws.send(JSON.stringify({ - id: RUNTIME_ENABLE_REQUEST_ID, - method: 'Runtime.enable' - })); - }) - .catch(reject); - - setTimeout(() => { - if (!enabledRuntime) { - ws.close(); - reject(new Error('Console logging enable timeout')); - } - }, ENABLE_TIMEOUT_MS); + await ps.enableDomain('Runtime'); // idempotent + + const unsub = ps.onEvent((data) => { + if (data.method !== 'Runtime.consoleAPICalled') return; + const entry = data.params || {}; + const timestamp = new Date().toISOString(); + const level = entry.type || 'log'; + const args = entry.args || []; + + const text = args.map((arg) => { + if (arg.type === 'string') return arg.value; + if (arg.type === 'number') return String(arg.value); + if (arg.type === 'boolean') return String(arg.value); + if (arg.type === 'object') return arg.description || '[Object]'; + return String(arg.value || arg.description || arg.type); + }).join(' '); + + const messages = state.consoleMessages.get(ps.sessionId) || []; + messages.push({ timestamp, level, text }); + state.consoleMessages.set(ps.sessionId, messages); }); + + // The page session detach handles event-listener cleanup via router + // unregisterSession; close() here just unsubscribes this specific + // listener so a caller that wants to stop capturing without detaching + // the whole page session can do so. + return { + close: () => { + try { unsub(); } catch { /* best-effort */ } + }, + }; } - async function getConsoleMessages(tabIndexOrWsUrl, sinceTime = null) { - const wsUrl = await resolveWsUrl(tabIndexOrWsUrl); - const messages = state.consoleMessages.get(wsUrl) || []; + async function getConsoleMessages(tabIndexOrPageSession, sinceTime = null) { + const ps = await getPageSession(tabIndexOrPageSession); + const messages = state.consoleMessages.get(ps.sessionId) || []; if (!sinceTime) { return messages; } - - return messages.filter(msg => new Date(msg.timestamp) > sinceTime); + return messages.filter((msg) => new Date(msg.timestamp) > sinceTime); } - async function clearConsoleMessages(tabIndexOrWsUrl) { - const wsUrl = await resolveWsUrl(tabIndexOrWsUrl); - state.consoleMessages.set(wsUrl, []); + async function clearConsoleMessages(tabIndexOrPageSession) { + const ps = await getPageSession(tabIndexOrPageSession); + state.consoleMessages.set(ps.sessionId, []); } return { enableConsoleLogging, getConsoleMessages, clearConsoleMessages }; diff --git a/skills/browsing/lib/cookies.js b/skills/browsing/lib/cookies.js index 1b53562..0a9a10a 100644 --- a/skills/browsing/lib/cookies.js +++ b/skills/browsing/lib/cookies.js @@ -1,15 +1,13 @@ /** * Cookie management — currently just a single "clear everything" action. * - * Builds against any session-scoped pair of `resolveWsUrl` (tab index → ws URL) - * and `sendCdpCommand` (CDP request) — see `attachCookies({ resolveWsUrl, - * sendCdpCommand })`. The returned methods carry the session binding through - * closure capture of those helpers. + * Helpers accept `tabIndexOrPageSession` and route through + * `pageSession.send`. */ -function attachCookies({ resolveWsUrl, sendCdpCommand }) { - async function clearCookies(tabIndexOrWsUrl) { - const wsUrl = await resolveWsUrl(tabIndexOrWsUrl); - await sendCdpCommand(wsUrl, 'Network.clearBrowserCookies', {}); +function attachCookies({ getPageSession }) { + async function clearCookies(tabIndexOrPageSession) { + const ps = await getPageSession(tabIndexOrPageSession); + await ps.send('Network.clearBrowserCookies', {}); } return { clearCookies }; diff --git a/skills/browsing/lib/evaluation.js b/skills/browsing/lib/evaluation.js index 07ef879..c5d74dd 100644 --- a/skills/browsing/lib/evaluation.js +++ b/skills/browsing/lib/evaluation.js @@ -15,25 +15,26 @@ * full RemoteObject including `objectId`). For callers that need the * raw CDP shape. * - * `attachEvaluation({ resolveWsUrl, sendCdpCommand })` binds them to a - * session via the helpers' closure capture. + * Helpers accept `tabIndexOrPageSession` (the orchestrator's + * `getPageSession` resolver handles all shapes) and route through + * `pageSession.send`. */ const { throwIfExceptionDetails } = require('./cdp-utils'); -function attachEvaluation({ resolveWsUrl, sendCdpCommand }) { - async function evaluate(tabIndexOrWsUrl, expression) { - const wsUrl = await resolveWsUrl(tabIndexOrWsUrl); - const result = await sendCdpCommand(wsUrl, 'Runtime.evaluate', { +function attachEvaluation({ getPageSession }) { + async function evaluate(tabIndexOrPageSession, expression) { + const ps = await getPageSession(tabIndexOrPageSession); + const result = await ps.send('Runtime.evaluate', { expression, returnByValue: true, - awaitPromise: true + awaitPromise: true, }); throwIfExceptionDetails(result); return result.result.value; } - async function evaluateJson(tabIndexOrWsUrl, expression) { - const wsUrl = await resolveWsUrl(tabIndexOrWsUrl); + async function evaluateJson(tabIndexOrPageSession, expression) { + const ps = await getPageSession(tabIndexOrPageSession); const wrappedExpression = ` (() => { @@ -60,20 +61,20 @@ function attachEvaluation({ resolveWsUrl, sendCdpCommand }) { })() `; - const result = await sendCdpCommand(wsUrl, 'Runtime.evaluate', { + const result = await ps.send('Runtime.evaluate', { expression: wrappedExpression, returnByValue: true, - awaitPromise: true + awaitPromise: true, }); throwIfExceptionDetails(result); return result.result.value; } - async function evaluateRaw(tabIndexOrWsUrl, expression) { - const wsUrl = await resolveWsUrl(tabIndexOrWsUrl); - const result = await sendCdpCommand(wsUrl, 'Runtime.evaluate', { + async function evaluateRaw(tabIndexOrPageSession, expression) { + const ps = await getPageSession(tabIndexOrPageSession); + const result = await ps.send('Runtime.evaluate', { expression, - returnByValue: false + returnByValue: false, }); throwIfExceptionDetails(result); return result.result; diff --git a/skills/browsing/lib/extraction.js b/skills/browsing/lib/extraction.js index ef7d8b3..c47183b 100644 --- a/skills/browsing/lib/extraction.js +++ b/skills/browsing/lib/extraction.js @@ -10,40 +10,40 @@ const { throwIfExceptionDetails } = require('./cdp-utils'); * found but empty." The page-content / DOM-summary / markdown extractors * (the heavyweight ones used by auto-capture) live in `lib/capture.js`. * - * `attachExtraction({ resolveWsUrl, sendCdpCommand })` returns the bound - * methods — no session state needed. + * Helpers accept `tabIndexOrPageSession` and route through + * `pageSession.send`. */ -function attachExtraction({ resolveWsUrl, sendCdpCommand }) { - async function extractText(tabIndexOrWsUrl, selector) { - const wsUrl = await resolveWsUrl(tabIndexOrWsUrl); +function attachExtraction({ getPageSession }) { + async function extractText(tabIndexOrPageSession, selector) { + const ps = await getPageSession(tabIndexOrPageSession); const js = `${getElementSelector(selector)}?.textContent`; - const result = await sendCdpCommand(wsUrl, 'Runtime.evaluate', { + const result = await ps.send('Runtime.evaluate', { expression: js, - returnByValue: true + returnByValue: true, }); throwIfExceptionDetails(result); return result.result.value; } - async function getHtml(tabIndexOrWsUrl, selector = null) { - const wsUrl = await resolveWsUrl(tabIndexOrWsUrl); + async function getHtml(tabIndexOrPageSession, selector = null) { + const ps = await getPageSession(tabIndexOrPageSession); const js = selector ? `${getElementSelector(selector)}?.innerHTML` : 'document.documentElement.outerHTML'; - const result = await sendCdpCommand(wsUrl, 'Runtime.evaluate', { + const result = await ps.send('Runtime.evaluate', { expression: js, - returnByValue: true + returnByValue: true, }); throwIfExceptionDetails(result); return result.result.value; } - async function getAttribute(tabIndexOrWsUrl, selector, attrName) { - const wsUrl = await resolveWsUrl(tabIndexOrWsUrl); + async function getAttribute(tabIndexOrPageSession, selector, attrName) { + const ps = await getPageSession(tabIndexOrPageSession); const js = `${getElementSelector(selector)}?.getAttribute(${JSON.stringify(attrName)})`; - const result = await sendCdpCommand(wsUrl, 'Runtime.evaluate', { + const result = await ps.send('Runtime.evaluate', { expression: js, - returnByValue: true + returnByValue: true, }); throwIfExceptionDetails(result); return result.result.value; diff --git a/skills/browsing/lib/file-upload.js b/skills/browsing/lib/file-upload.js index 507eddd..5cdd2d4 100644 --- a/skills/browsing/lib/file-upload.js +++ b/skills/browsing/lib/file-upload.js @@ -7,34 +7,34 @@ * or `DOM.performSearch` + `DOM.getSearchResults` for XPath, then attaches * the absolute file paths to the input. * - * `attachFileUpload({ resolveWsUrl, sendCdpCommand })` returns the bound - * action. + * Helpers accept `tabIndexOrPageSession` and route through + * `pageSession.send`. */ -function attachFileUpload({ resolveWsUrl, sendCdpCommand }) { - async function fileUpload(tabIndexOrWsUrl, selector, filePaths) { - const wsUrl = await resolveWsUrl(tabIndexOrWsUrl); +function attachFileUpload({ getPageSession }) { + async function fileUpload(tabIndexOrPageSession, selector, filePaths) { + const ps = await getPageSession(tabIndexOrPageSession); - const docResult = await sendCdpCommand(wsUrl, 'DOM.getDocument', {}); + const docResult = await ps.send('DOM.getDocument', {}); const rootNodeId = docResult.root.nodeId; let nodeId; if (selector.startsWith('/') || selector.startsWith('//')) { - const searchResult = await sendCdpCommand(wsUrl, 'DOM.performSearch', { - query: selector + const searchResult = await ps.send('DOM.performSearch', { + query: selector, }); if (searchResult.resultCount === 0) { throw new Error(`File input not found: ${selector}`); } - const nodesResult = await sendCdpCommand(wsUrl, 'DOM.getSearchResults', { + const nodesResult = await ps.send('DOM.getSearchResults', { searchId: searchResult.searchId, fromIndex: 0, - toIndex: 1 + toIndex: 1, }); nodeId = nodesResult.nodeIds[0]; } else { - const queryResult = await sendCdpCommand(wsUrl, 'DOM.querySelector', { + const queryResult = await ps.send('DOM.querySelector', { nodeId: rootNodeId, - selector: selector + selector, }); nodeId = queryResult.nodeId; } @@ -43,9 +43,9 @@ function attachFileUpload({ resolveWsUrl, sendCdpCommand }) { throw new Error(`File input not found: ${selector}`); } - await sendCdpCommand(wsUrl, 'DOM.setFileInputFiles', { + await ps.send('DOM.setFileInputFiles', { files: filePaths, - nodeId: nodeId + nodeId, }); return { uploaded: true, files: filePaths.length }; diff --git a/skills/browsing/lib/keyboard-input.js b/skills/browsing/lib/keyboard-input.js index 6cbb52f..b0002a9 100644 --- a/skills/browsing/lib/keyboard-input.js +++ b/skills/browsing/lib/keyboard-input.js @@ -14,11 +14,12 @@ const { throwIfExceptionDetails } = require('./cdp-utils'); * so headless skips key events and relies on `Input.insertText` plus * per-character timing for whatever realism it can offer. * - * `attachKeyboardInput({ state, resolveWsUrl, sendCdpCommand, click })` - * returns the bound API. `click` is the mouse-side click — humanType - * uses it to focus a target before typing. + * Helpers accept `tabIndexOrPageSession` (the orchestrator's + * `getPageSession` resolver handles all shapes) and route through + * `pageSession.send`. `click` is the mouse-side click — humanType uses + * it to focus a target before typing. */ -function attachKeyboardInput({ state, resolveWsUrl, sendCdpCommand, click }) { +function attachKeyboardInput({ state, getPageSession, click }) { /** * Press a named key (Tab, Enter, F1-F12, arrows, etc.) with optional * modifiers. Sends both keyDown and keyUp; if the key has a `text` @@ -26,8 +27,8 @@ function attachKeyboardInput({ state, resolveWsUrl, sendCdpCommand, click }) { * browser fires the matching `input`/`keypress` events that form * submission depends on. */ - async function keyboardPress(tabIndexOrWsUrl, keyName, modifiers = {}) { - const wsUrl = await resolveWsUrl(tabIndexOrWsUrl); + async function keyboardPress(tabIndexOrPageSession, keyName, modifiers = {}) { + const ps = await getPageSession(tabIndexOrPageSession); const keyDef = KEY_DEFINITIONS[keyName]; if (!keyDef) { @@ -40,23 +41,23 @@ function attachKeyboardInput({ state, resolveWsUrl, sendCdpCommand, click }) { if (modifiers.meta) modifierFlags |= 4; if (modifiers.shift) modifierFlags |= 8; - await sendCdpCommand(wsUrl, 'Input.dispatchKeyEvent', { + await ps.send('Input.dispatchKeyEvent', { type: 'keyDown', key: keyDef.key, code: keyDef.code, windowsVirtualKeyCode: keyDef.keyCode, nativeVirtualKeyCode: keyDef.keyCode, modifiers: modifierFlags, - ...(keyDef.text && { text: keyDef.text }) + ...(keyDef.text && { text: keyDef.text }), }); - await sendCdpCommand(wsUrl, 'Input.dispatchKeyEvent', { + await ps.send('Input.dispatchKeyEvent', { type: 'keyUp', key: keyDef.key, code: keyDef.code, windowsVirtualKeyCode: keyDef.keyCode, nativeVirtualKeyCode: keyDef.keyCode, - modifiers: modifierFlags + modifiers: modifierFlags, }); return { pressed: keyName, modifiers }; @@ -66,16 +67,15 @@ function attachKeyboardInput({ state, resolveWsUrl, sendCdpCommand, click }) { * Smart text input. If `selector` is supplied, focuses the element * (via JS focus to avoid mouse-click side effects). Then types the * value, treating \t as Tab, \n as Enter (unless current focus is a - *