Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/snapshot-refs.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@houwert/conductor": minor
---

Add ephemeral `@eN` element refs. `capture-ui` now assigns each accessible element a short ref (`@e1`, `@e2`, …) and persists its screen coordinates per session, so `tap-on @e3` can act on the captured point directly without re-querying or fuzzy text/id matching. Stale snapshots (different device or older than 60s) emit an advisory warning rather than hard-failing.
8 changes: 8 additions & 0 deletions packages/cli/src/commands/capture-ui.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import {
buildWebA11y,
A11ySnapshotEntry,
} from '../drivers/a11y.js';
import { buildStoredSnapshot, saveSnapshot } from '../snapshot-store.js';

export interface CaptureBundle {
version: 1;
Expand Down Expand Up @@ -114,6 +115,13 @@ export async function captureUI(
capabilities: { perViewPixels: false, depthData: false },
};

// Persist `@eN` refs so `tap-on @e3` can act on this capture without a
// re-query. Keyed by session — see snapshot-store.ts.
await saveSnapshot(
sessionName,
buildStoredSnapshot(a11ySnapshot, { deviceId: sessionName, platform })
);

const json = JSON.stringify(bundle);

if (outputPath) {
Expand Down
20 changes: 17 additions & 3 deletions packages/cli/src/commands/tap.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export const HELP = ` tap-on <element> Tap element by text or id
export const HELP = ` tap-on <element> Tap element by text, id, or @eN snapshot ref
--id <id> Match by accessibility id instead of text
--text <text> Match by text only (not id)
--index <n> Pick the nth match (0-based)
Expand All @@ -21,6 +21,7 @@ import { AndroidDriver } from '../drivers/android.js';
import { WebDriver } from '../drivers/web.js';
import { waitForIOSElement, waitForAndroidElement, waitForWebElement } from '../drivers/wait.js';
import { makeIOSDirectResolver } from '../drivers/direct-ios-selector.js';
import { isRefQuery, loadSnapshot, resolveRef } from '../snapshot-store.js';
import { sleep } from '../utils.js';

export async function tap(
Expand Down Expand Up @@ -62,6 +63,9 @@ export async function tap(
...(flags.rightOf && { rightOf: { query: flags.rightOf } }),
};

// A bare `@eN` query taps the cached coordinates from the last `capture-ui`
// snapshot, skipping fuzzy text/id resolution. Explicit --text/--id win.
const useRef = isRefQuery(query) && !flags.text && !flags.id;
const label = flags.text ? `text="${flags.text}"` : flags.id ? `id="${flags.id}"` : `"${query}"`;

const result = await runDirect(async (driver) => {
Expand All @@ -72,8 +76,18 @@ export async function tap(
);
}

let el;
if (driver instanceof IOSDriver) {
let el: { centerX: number; centerY: number };
if (useRef) {
const { entry, staleReason } = resolveRef(await loadSnapshot(sessionName), query, {
deviceId: sessionName,
});
if (staleReason) {
process.stderr.write(
`warning: ${query} — ${staleReason}; re-run capture-ui if the tap misses\n`
);
}
el = { centerX: entry.centerX, centerY: entry.centerY };
} else if (driver instanceof IOSDriver) {
el = await waitForIOSElement(
(o) => driver.viewHierarchy(false, [], { cache: o?.cached }).then((h) => h.axElement),
sel,
Expand Down
6 changes: 6 additions & 0 deletions packages/cli/src/drivers/a11y.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ export interface A11yState {

export interface A11ySnapshotEntry {
nodeId: string;
/** Ephemeral, snapshot-scoped element ref (`@e1`, `@e2`, …). Sequential over
* the snapshot array, 1-indexed. Only valid for the snapshot it was built in. */
ref: string;
order: number;
frame: A11yFrame;
label: string;
Expand Down Expand Up @@ -136,6 +139,7 @@ export function buildIOSA11y(root: AXElement): A11yBuildResult<IOSA11yNode> {
accessibilityOrder = order++;
snapshot.push({
nodeId: path,
ref: `@e${accessibilityOrder + 1}`,
order: accessibilityOrder,
frame: {
x: node.frame.X,
Expand Down Expand Up @@ -403,6 +407,7 @@ export function buildAndroidA11y(xml: string): A11yBuildResult<AndroidA11yNode[]
accessibilityOrder = order++;
snapshot.push({
nodeId: path,
ref: `@e${accessibilityOrder + 1}`,
order: accessibilityOrder,
frame: {
x: n.bounds.x1,
Expand Down Expand Up @@ -521,6 +526,7 @@ export function buildWebA11y(hierarchy: WebViewHierarchy): A11yBuildResult<WebA1
accessibilityOrder = order++;
snapshot.push({
nodeId: path,
ref: `@e${accessibilityOrder + 1}`,
order: accessibilityOrder,
frame: { x: n.bounds.x, y: n.bounds.y, w: n.bounds.width, h: n.bounds.height },
label: n.name,
Expand Down
16 changes: 6 additions & 10 deletions packages/cli/src/drivers/flow-recorder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,7 @@
import fs from 'fs';
import path from 'path';
import os from 'os';
import { updateSession, getSession, type Session } from '../session.js';

interface SessionWithRecording extends Session {
recordingPath?: string;
}
import { updateSession, getSession } from '../session.js';

const FLOWS_DIR = path.join(os.homedir(), '.conductor', 'recordings');

Expand All @@ -34,22 +30,22 @@ export async function startRecording(
(appId ? `appId: ${appId}\n` : `# appId: <set me>\n`) +
`---\n# Recording started ${new Date().toISOString()}\n`;
fs.writeFileSync(target, header, 'utf-8');
await updateSession({ recordingPath: target } as Partial<Session>, sessionName);
await updateSession({ recordingPath: target }, sessionName);
return target;
}

export async function finishRecording(sessionName: string): Promise<string | null> {
const session = (await getSession(sessionName)) as SessionWithRecording;
const session = await getSession(sessionName);
if (!session.recordingPath) return null;
const out = session.recordingPath;
fs.appendFileSync(out, `# Recording finished ${new Date().toISOString()}\n`);
delete session.recordingPath;
await updateSession(session as Partial<Session>, sessionName);
// Setting to undefined clears the key on save — JSON.stringify drops it.
await updateSession({ recordingPath: undefined }, sessionName);
return out;
}

export async function getActiveRecording(sessionName: string): Promise<string | null> {
const session = (await getSession(sessionName)) as SessionWithRecording;
const session = await getSession(sessionName);
return session.recordingPath ?? null;
}

Expand Down
42 changes: 31 additions & 11 deletions packages/cli/src/drivers/metro-cdp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
* `metro-discovery.ts` — do not duplicate discovery logic here.
*/
import WebSocket from 'ws';
import { fetchTargets } from './log-sources/metro.js';
import { fetchTargets, type MetroTarget } from './log-sources/metro.js';
import { selectTargetForDevice, getDeviceDisplayName } from './log-sources/metro-discovery.js';

export interface CdpCallOptions {
Expand Down Expand Up @@ -40,13 +40,19 @@ interface CdpRequest {
}

/**
* Resolve a Metro target's `webSocketDebuggerUrl` honoring deviceId / targetIndex.
* Throws with a clear message if Metro is unreachable or no target matches.
* Pick a debugger `webSocketDebuggerUrl` from an already-fetched target list.
* Pure — the async `fetchTargets` / `getDeviceDisplayName` calls happen in
* `resolveDebuggerUrl`. `displayName` is the device's resolved display name,
* used for device-scoped selection when present. Throws with a clear message
* when no target matches.
*/
export async function resolveDebuggerUrl(opts: CdpCallOptions): Promise<string> {
export function selectDebuggerUrl(
targets: MetroTarget[],
opts: Pick<CdpCallOptions, 'port' | 'host' | 'targetIndex'>,
displayName?: string
): string {
const port = opts.port ?? 8081;
const host = opts.host ?? 'localhost';
const targets = await fetchTargets(port, host);
const withWs = targets.filter((t) => t.webSocketDebuggerUrl);

if (withWs.length === 0) {
Expand All @@ -62,19 +68,33 @@ export async function resolveDebuggerUrl(opts: CdpCallOptions): Promise<string>
return withWs[opts.targetIndex].webSocketDebuggerUrl!;
}

if (opts.deviceId && opts.platform) {
const displayName = await getDeviceDisplayName(opts.platform, opts.deviceId);
if (displayName) {
const target = selectTargetForDevice(withWs, displayName);
if (target) return target.webSocketDebuggerUrl!;
}
if (displayName) {
const target = selectTargetForDevice(withWs, displayName);
if (target) return target.webSocketDebuggerUrl!;
}

// Prefer the Hermes/React target by title, otherwise first.
const target = withWs.find((t) => t.title && /hermes|react/i.test(t.title)) ?? withWs[0];
return target.webSocketDebuggerUrl!;
}

/**
* Resolve a Metro target's `webSocketDebuggerUrl` honoring deviceId / targetIndex.
* Throws with a clear message if Metro is unreachable or no target matches.
*/
export async function resolveDebuggerUrl(opts: CdpCallOptions): Promise<string> {
const port = opts.port ?? 8081;
const host = opts.host ?? 'localhost';
const targets = await fetchTargets(port, host);

let displayName: string | undefined;
if (opts.deviceId && opts.platform) {
displayName = (await getDeviceDisplayName(opts.platform, opts.deviceId)) ?? undefined;
}

return selectDebuggerUrl(targets, opts, displayName);
}

/**
* Open a short-lived CDP socket, send a single method, return the result.
* Closes the socket whether the call succeeds or throws.
Expand Down
2 changes: 2 additions & 0 deletions packages/cli/src/session.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import path from 'path';
export interface Session {
appId?: string;
deviceId?: string;
/** Path of the active `flow record` recording, when one is in progress. */
recordingPath?: string;
}

const CONDUCTOR_DIR = path.join(os.homedir(), '.conductor');
Expand Down
130 changes: 130 additions & 0 deletions packages/cli/src/snapshot-store.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/**
* Snapshot-scoped ephemeral element refs.
*
* `capture-ui` assigns each accessible element a short ref (`@e1`, `@e2`, …) and
* persists its resolved screen coordinates here, keyed by session. `tap-on @e3`
* then taps the cached point directly — no fuzzy text/id matching.
*
* Refs are deliberately ephemeral: a stale snapshot warns (it does not hard-fail),
* and the agent is expected to re-run `capture-ui` and act on fresh refs.
*/
import fs from 'fs/promises';
import os from 'os';
import path from 'path';
import type { A11ySnapshotEntry, A11yFrame } from './drivers/a11y.js';

const SNAPSHOTS_DIR = path.join(os.homedir(), '.conductor', 'snapshots');

/** A snapshot older than this is considered stale. */
export const SNAPSHOT_STALE_MS = 60_000;

export interface SnapshotRefEntry {
ref: string;
centerX: number;
centerY: number;
frame: A11yFrame;
/** Accessibility label — used to render a friendly message and for replay portability. */
label: string;
/** Tree-path id of the source node within the capture's hierarchy. */
nodeId: string;
}

export interface StoredSnapshot {
version: 1;
/** ISO timestamp of the capture. */
capturedAt: string;
deviceId: string;
platform: string;
refs: Record<string, SnapshotRefEntry>;
}

export function snapshotFilePath(sessionName = 'default'): string {
return path.join(SNAPSHOTS_DIR, `${sessionName}.json`);
}

/** True when `s` looks like an ephemeral element ref (`@e3`). */
export function isRefQuery(s: string): boolean {
return /^@e\d+$/i.test(s.trim());
}

/** Build a `StoredSnapshot` from a freshly built a11y snapshot. */
export function buildStoredSnapshot(
entries: A11ySnapshotEntry[],
device: { deviceId: string; platform: string }
): StoredSnapshot {
const refs: Record<string, SnapshotRefEntry> = {};
for (const e of entries) {
refs[e.ref] = {
ref: e.ref,
centerX: e.frame.x + e.frame.w / 2,
centerY: e.frame.y + e.frame.h / 2,
frame: e.frame,
label: e.label,
nodeId: e.nodeId,
};
}
return {
version: 1,
capturedAt: new Date().toISOString(),
deviceId: device.deviceId,
platform: device.platform,
refs,
};
}

export async function saveSnapshot(sessionName: string, snapshot: StoredSnapshot): Promise<void> {
await fs.mkdir(SNAPSHOTS_DIR, { recursive: true });
await fs.writeFile(snapshotFilePath(sessionName), JSON.stringify(snapshot, null, 2));
}

export async function loadSnapshot(sessionName: string): Promise<StoredSnapshot | null> {
try {
const data = await fs.readFile(snapshotFilePath(sessionName), 'utf-8');
return JSON.parse(data) as StoredSnapshot;
} catch {
return null;
}
}

export interface RefResolution {
entry: SnapshotRefEntry;
/** Non-null when the snapshot may no longer match what's on screen. Advisory. */
staleReason: string | null;
}

/**
* Resolve an `@eN` ref against the session's last `capture-ui` snapshot.
* Throws when there is no snapshot or the ref is unknown. `staleReason` is
* advisory — callers warn but still act, since refs are explicitly ephemeral.
*/
export function resolveRef(
snapshot: StoredSnapshot | null,
ref: string,
ctx?: { deviceId?: string }
): RefResolution {
if (!snapshot) {
throw new Error(
`no snapshot for this session — run \`conductor capture-ui\` before using ${ref}`
);
}
const norm = ref.trim().toLowerCase();
const key = Object.keys(snapshot.refs).find((k) => k.toLowerCase() === norm);
if (!key) {
const avail = Object.keys(snapshot.refs);
const shown = avail.slice(0, 8).join(', ');
throw new Error(
`${ref} is not in the last snapshot ` +
`(${avail.length} ref${avail.length === 1 ? '' : 's'}: ${shown}${avail.length > 8 ? ', …' : ''})`
);
}

let staleReason: string | null = null;
const ageMs = Date.now() - new Date(snapshot.capturedAt).getTime();
if (ageMs > SNAPSHOT_STALE_MS) {
staleReason = `snapshot is ${Math.round(ageMs / 1000)}s old`;
} else if (ctx?.deviceId && snapshot.deviceId && ctx.deviceId !== snapshot.deviceId) {
staleReason = `snapshot was captured on a different device (${snapshot.deviceId})`;
}

return { entry: snapshot.refs[key], staleReason };
}
5 changes: 4 additions & 1 deletion packages/cli/tests/all-tests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ import { daemonIdle } from './daemon-idle.test.js';
import { devicePoolSuite } from './device-pool.test.js';
import { androidSdk } from './android-sdk.test.js';
import { startDeviceAndroid } from './start-device-android.test.js';
import { metroCdp } from './metro-cdp.test.js';
import { snapshotRef } from './snapshot-ref.test.js';
import { flowRecorder } from './flow-recorder.test.js';
import { getDriver } from '../src/runner.js';
import { IOSDriver } from '../src/drivers/ios.js';
import { parseFlowFile, executeFlow } from '../src/drivers/flow-runner.js';
Expand Down Expand Up @@ -71,7 +74,7 @@ async function detectDevice(deviceUdid: string | undefined): Promise<string | un
async function main(): Promise<void> {
const { deviceUdid, suiteFilter } = parseArgs();
const device = await detectDevice(deviceUdid);
let suites = [parser, iosExec, androidExec, fileBased, scriptSuite, elementResolver, a11ySuite, envFlag, daemonIdle, devicePoolSuite, androidSdk, startDeviceAndroid];
let suites = [parser, iosExec, androidExec, fileBased, scriptSuite, elementResolver, a11ySuite, envFlag, daemonIdle, devicePoolSuite, androidSdk, startDeviceAndroid, metroCdp, snapshotRef, flowRecorder];

if (device) {
console.log(`\nDevice: ${device}`);
Expand Down
Loading
Loading