Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 106 additions & 35 deletions extension/src/worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ declare const chrome: any;
// ── Bridge link constants ──

const RECONNECT_MIN_MS = 1_000;
const RECONNECT_MAX_MS = 15_000;
// Cap retries low so a live worker re-links to a fresh `connect` bridge quickly.
const RECONNECT_MAX_MS = 5_000;
/**
* Secondary wake: if the worker is ever killed while the bridge is down, this
* alarm revives it to retry connecting. The PRIMARY keepalive is the bridge's
Expand Down Expand Up @@ -442,21 +443,26 @@ async function reResolveRef(drv: TabDriver, ref: string): Promise<number> {
type BrowserAction = { type: string; [k: string]: any };
type BrowserResult = Record<string, any>;

async function executeAction(drv: TabDriver, action: BrowserAction): Promise<BrowserResult> {
async function executeAction(action: BrowserAction): Promise<BrowserResult> {
// Navigation goes through chrome.tabs (no debugger attach needed), so it
// works even when the active tab is a chrome:// / new-tab page that CDP can't
// attach to. We attach only afterwards, once it's a real page, for the snapshot.
if (action.type === "navigate") {
let tabId: number;
try {
tabId = await navigateAgentTab(action.url);
} catch (err) {
throw new Error(`navigate failed: ${err instanceof Error ? err.message : String(err)}`);
}
await new Promise((r) => setTimeout(r, 500)); // settle (≈ networkidle)
const drv = await ensureAttached(tabId);
const info = await drv.info();
const snapshot = await takeSnapshot(drv, { interactiveOnly: true });
return { type: "done", ...info, message: `Navigated to ${action.url}`, snapshot };
}

const drv = await ensureAttached(requireAgentTab());
switch (action.type) {
case "navigate": {
const loaded = drv.once("Page.loadEventFired", 30_000);
try {
await drv.send("Page.navigate", { url: action.url });
} catch (err) {
throw new Error(`navigate failed: ${err instanceof Error ? err.message : String(err)}`);
}
await loaded;
await new Promise((r) => setTimeout(r, 500)); // settle (≈ networkidle)
const info = await drv.info();
const snapshot = await takeSnapshot(drv, { interactiveOnly: true });
return { type: "done", ...info, message: `Navigated to ${action.url}`, snapshot };
}
case "click": {
const urlBefore = (await drv.info()).url;
let nodeId = resolveRef(drv, action.ref);
Expand Down Expand Up @@ -631,28 +637,88 @@ async function navigateHistory(drv: TabDriver, delta: number): Promise<void> {

let driver: TabDriver | null = null;

async function resolveTargetTabId(): Promise<number> {
let tabs = await chrome.tabs.query({ active: true, lastFocusedWindow: true });
let tab = tabs[0];
if (!tab) {
tabs = await chrome.tabs.query({ active: true, currentWindow: true });
tab = tabs[0];
}
if (!tab) {
tabs = await chrome.tabs.query({ active: true });
tab = tabs[0];
/**
* The agent drives its OWN tab — created on the first navigate — rather than
* hijacking whatever the user is looking at. Cookies/logins are profile-wide,
* so this tab is fully signed in, and the user can keep using their other tabs
* (and switch away) without disrupting the agent. Reused across actions until
* it's closed; a closed tab is reopened on the next navigate.
*/
let agentTabId: number | null = null;

async function tabExists(id: number): Promise<boolean> {
try {
await chrome.tabs.get(id);
return true;
} catch {
return false;
}
if (!tab || typeof tab.id !== "number") throw new Error("No active tab to control");
const url: string = tab.url ?? "";
if (/^(chrome|edge|brave|devtools|chrome-extension|about):/i.test(url) || url.startsWith("https://chromewebstore.google.com")) {
throw new Error(`Can't control this page (${url || "internal page"}). Switch to a normal website tab and try again.`);
}

/** Navigate the agent's tab to a URL, creating the tab if it doesn't exist yet. */
async function navigateAgentTab(url: string): Promise<number> {
const current = agentTabId;
if (current != null && (await tabExists(current))) {
const complete = waitForTabComplete(current, 30_000);
await chrome.tabs.update(current, { url, active: true });
await complete;
return current;
}
return tab.id;
const tab = await chrome.tabs.create({ url, active: true });
if (typeof tab?.id !== "number") throw new Error("could not open a new tab");
const newId: number = tab.id;
agentTabId = newId;
if (tab.status !== "complete") await waitForTabComplete(newId, 30_000);
return newId;
}

function requireAgentTab(): number {
if (agentTabId == null) throw new Error("No page open yet — navigate to a URL first.");
return agentTabId;
}

/** chrome.debugger can't attach to chrome://, the Web Store, etc. */
function isAttachable(url: string): boolean {
if (/^(chrome|edge|brave|devtools|chrome-extension|about|view-source):/i.test(url)) return false;
if (url.startsWith("https://chromewebstore.google.com")) return false;
if (url.startsWith("https://chrome.google.com/webstore")) return false;
return true;
}

/** Ensure we're attached to the current active tab; (re)attach + enable domains. */
async function ensureAttached(): Promise<TabDriver> {
const tabId = await resolveTargetTabId();
/** Resolve once a tab finishes loading (chrome.tabs status), or after `timeout` ms. */
function waitForTabComplete(tabId: number, timeout: number): Promise<void> {
return new Promise<void>((resolve) => {
let done = false;
const fin = () => {
if (done) return;
done = true;
try {
chrome.tabs.onUpdated.removeListener(listener);
} catch {
/* ignore */
}
resolve();
};
const listener = (id: number, info: any) => {
if (id === tabId && info?.status === "complete") fin();
};
chrome.tabs.onUpdated.addListener(listener);
setTimeout(fin, timeout);
});
}

/** Ensure we're attached to the given tab; (re)attach + enable CDP domains. */
async function ensureAttached(tabId: number): Promise<TabDriver> {
let url = "";
try {
const t = await chrome.tabs.get(tabId);
url = t?.url ?? "";
} catch {
/* ignore */
}
if (!isAttachable(url)) {
throw new Error(`Can't control this page (${url || "internal page"}). Open a normal website tab and try again.`);
}
if (driver && driver.tabId === tabId) return driver;
if (driver) {
try {
Expand Down Expand Up @@ -688,6 +754,12 @@ chrome.debugger.onDetach.addListener((source: any) => {
if (driver && source?.tabId === driver.tabId) driver = null;
});

// If the agent's tab is closed, forget it so the next navigate opens a fresh one.
chrome.tabs.onRemoved.addListener((tabId: number) => {
if (tabId === agentTabId) agentTabId = null;
if (driver && driver.tabId === tabId) driver = null;
});

// ── Bridge connection ──

let socket: any = null;
Expand All @@ -704,8 +776,7 @@ function sendToBridge(msg: Record<string, unknown>): void {

async function handleCommand(id: string, action: BrowserAction): Promise<void> {
try {
const drv = await ensureAttached();
const result = await executeAction(drv, action);
const result = await executeAction(action);
sendToBridge({ type: "response", id, result });
} catch (err) {
sendToBridge({ type: "response", id, error: err instanceof Error ? err.message : String(err) });
Expand Down
12 changes: 8 additions & 4 deletions zero/src/cli/commands/browser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import { browser } from "../../sdk/browser.ts";
import * as fs from "node:fs/promises";
import { hasFlag, getOption, printJson } from "../format.ts";
import { companionConnect } from "./companion.ts";
import { companionConnect, companionSetup } from "./companion.ts";

const HELP = `zero browser - drive the per-project browser session

Expand All @@ -28,7 +28,8 @@ Usage:
zero browser snapshot [--mode interactive|full] [--selector <css>]
zero browser extract <query> [--max <n>]
zero browser status
zero browser connect [--cdp <url>] [--chromium] (run on YOUR laptop)
zero browser setup (run on YOUR laptop — one-time: add the Chrome extension)
zero browser connect (run on YOUR laptop — drive the agent with your Chrome)

Context-efficient tip: prefer \`snapshot\` (text a11y tree) or \`extract\` (query-driven paragraphs) over \`screenshot\` / full DOM dumps.

Expand All @@ -39,8 +40,11 @@ export async function browserCommand(args: string[]): Promise<number> {
const [action, ...rest] = args;
if (!action || action === "--help" || action === "-h") { process.stdout.write(HELP); return 0; }

// `connect` runs the LOCAL companion (laptop side) rather than issuing a
// remote browser action. It never touches the SDK call path.
// `setup` and `connect` run the LOCAL companion (laptop side) rather than
// issuing a remote browser action. They never touch the SDK call path.
if (action === "setup") {
return companionSetup(rest);
}
if (action === "connect") {
return companionConnect(rest);
}
Expand Down
67 changes: 45 additions & 22 deletions zero/src/cli/commands/companion.ts
Original file line number Diff line number Diff line change
@@ -1,34 +1,60 @@
import { hasFlag } from "../format.ts";
import { loadConfig } from "../../sdk/config.ts";
import { CompanionRunner } from "../../companion/runner.ts";
import { BridgeEngine } from "../../companion/bridge-engine.ts";

const HELP = `zero browser connect - let the agent use YOUR Chrome (with your logins)
const SETUP_HELP = `zero browser setup - one-time setup so the agent can use your Chrome

Usage:
zero browser setup

Adds the "Zero Companion" extension to your Google Chrome. Run this once: it
opens chrome://extensions and the extension folder for you — turn on Developer
mode and drag the folder in. Once it's added it stays added.

After setup, run \`zero browser connect\` to start a session.
`;

const CONNECT_HELP = `zero browser connect - let the agent use YOUR Chrome (with your logins)

Usage:
zero browser connect
zero companion (alias for "zero browser connect")

Installs the Zero Companion extension into your Google Chrome and lets the agent
drive your active tab for the bound project — your real session, your logins, no
separate browser. You can keep browsing while it works. Press Ctrl-C to stop and
hand control back to the agent's own browser.
Lets the agent drive a tab in your own Google Chrome for the bound project —
your real session, your logins, no separate browser. The agent works in its own
tab, so you can keep browsing. While it's driving you'll see "Zero Companion
started debugging this browser" — that's expected. Press Ctrl-C to stop.

The first time (or after you fully quit Chrome), Chrome reopens once with the
helper loaded — your tabs are restored. Chrome shows "Zero Companion started
debugging this browser" while the agent is driving; that's expected.
Run \`zero browser setup\` once first. Requires \`zero login\`.
`;

Requires \`zero login\` first.
function isHelp(args: string[]): boolean {
return args.includes("--help") || args.includes("-h");
}

Options:
--no-launch Don't reopen Chrome automatically. Load the extension yourself
via chrome://extensions (Developer mode → Load unpacked), then
this just waits for it to connect.
`;
/** `zero browser setup` — one-time install of the companion extension. */
export async function companionSetup(args: string[]): Promise<number> {
if (isHelp(args)) {
process.stdout.write(SETUP_HELP);
return 0;
}
const write = (line: string) => process.stdout.write(`${line}\n`);
const engine = new BridgeEngine({ onWarn: write, onStatus: write });
try {
await engine.setup();
return 0;
} catch (err) {
process.stderr.write(`${err instanceof Error ? err.message : String(err)}\n`);
return 1;
} finally {
await engine.stop();
}
}

/** Run the companion runner until interrupted. Shared by `browser connect` and `companion`. */
/** `zero browser connect` — link the agent to your already-installed extension. */
export async function companionConnect(args: string[]): Promise<number> {
if (hasFlag(args, "--help") || hasFlag(args, "-h")) {
process.stdout.write(HELP);
if (isHelp(args)) {
process.stdout.write(CONNECT_HELP);
return 0;
}
const cfg = loadConfig();
Expand All @@ -37,11 +63,8 @@ export async function companionConnect(args: string[]): Promise<number> {
return 1;
}

const noLaunch = hasFlag(args, "--no-launch");

const write = (line: string) => process.stdout.write(`${line}\n`);
const runner = new CompanionRunner({
noLaunch,
onWarn: write,
onStatus: write,
// Displaced by another computer on this account: the runner has already
Expand All @@ -63,7 +86,7 @@ export async function companionConnect(args: string[]): Promise<number> {
try {
await runner.start();
} catch (err) {
process.stderr.write(`companion failed: ${err instanceof Error ? err.message : String(err)}\n`);
process.stderr.write(`${err instanceof Error ? err.message : String(err)}\n`);
return 1;
}

Expand Down
2 changes: 1 addition & 1 deletion zero/src/cli/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Groups:
image generate
tasks add (with --schedule, --event, or --script), ls, update, rm
creds ls, get, set, rm
browser open, snapshot, click, fill, screenshot, evaluate, wait, extract, status, connect
browser open, snapshot, click, fill, screenshot, evaluate, wait, extract, status, setup, connect
companion run the local companion (drive the agent's browser with your Chrome)
apps create, delete, list
llm generate
Expand Down
Loading
Loading