From fa994d3bf23f628831ab1e8f06d9e712854c7fff Mon Sep 17 00:00:00 2001 From: snowdamiz Date: Thu, 28 May 2026 21:23:19 -0700 Subject: [PATCH 1/3] docs --- COMPUTER-USE-MANUAL-DRAG-PLAN.md | 98 ++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 COMPUTER-USE-MANUAL-DRAG-PLAN.md diff --git a/COMPUTER-USE-MANUAL-DRAG-PLAN.md b/COMPUTER-USE-MANUAL-DRAG-PLAN.md new file mode 100644 index 00000000..f09871e5 --- /dev/null +++ b/COMPUTER-USE-MANUAL-DRAG-PLAN.md @@ -0,0 +1,98 @@ +# Computer Use Manual Drag Support Plan + +## Reader and Goal + +Reader: an internal engineer implementing Computer Use manual-control input. + +Post-read action: add and verify click-and-drag support for manual desktop control, so a user can drag windows, select file ranges, and perform ordinary drag gestures from the streamed desktop viewport. + +## Audit Conclusion + +Manual Computer Use does not currently support true click-and-drag from the human-controlled viewport. + +The lower layers already have partial drag capability: + +- The desktop control action model includes `mouse_drag`. +- The remote manual-control bridge accepts manual-control input actions and maps `x`, `y`, `toX`, `toY`, `sourceWidth`, and `sourceHeight` into the desktop control request. +- The desktop runtime validates drag source and target points, normalizes active stream coordinates to display coordinates, and maps the action to sidecar drag control when available. +- The native and sidecar input paths can emit a left-button drag sequence. + +The manual-control UI path is the gap: + +- Pointer down sends `mouse_click` or `mouse_right_click` immediately. +- Pointer move while a button is held sends throttled `mouse_move`. +- Pointer up on non-mobile does not send any button-release or drag action. +- Mobile touch gestures are reserved for tap, pan, and pinch behavior; they do not map touch movement into desktop drag. +- The relay client input type does not currently advertise `toX` or `toY`, even though the payload path can forward them. + +Because the remote desktop never receives a held mouse button from the manual viewport, the current behavior cannot drag windows or rubber-band select files. It can only click, move the pointer, scroll, and send keyboard/text input. + +## Implementation Strategy + +Use the existing `mouse_drag` control action first. Do not introduce stateful `mouse_down` / `mouse_up` protocol actions unless one-shot drag proves unreliable during manual QA. + +Implement desktop pointer drag as a gesture recognizer in the manual viewport: + +1. On primary-button pointer down in manual mode, capture the pointer and store a pending gesture with pointer id, button, click detail, screen start position, and mapped desktop start point. +2. Do not send `mouse_click` immediately. Wait until pointer up so the gesture can be classified as click or drag. +3. On pointer move for the captured pointer, update the latest mapped point. Mark the gesture as dragging once movement exceeds the existing tap/click slop threshold. +4. On pointer up: + - If movement stayed within slop, send the existing click or double-click payload. + - If movement exceeded slop and the button is left, send one `mouse_drag` payload with `x`, `y`, `toX`, `toY`, `sourceWidth`, and `sourceHeight`. + - If movement exceeded slop for right or middle button, keep the initial implementation conservative and do not synthesize unsupported button-drag behavior. +5. On pointer cancel, lost capture, manual-control release, stream change, or unmount, clear the pending gesture without sending a click. +6. Keep click ripples for click gestures only. Do not add temporary debug UI. + +This preserves the backend approval, lease, stream-token, and coordinate-normalization paths already used by manual control. + +## Type and Contract Updates + +Update the relay client manual-input type so drag is first-class: + +- Add `toX` and `toY`. +- Prefer a small string union for known manual actions if it fits local style; otherwise keep `action: string` and extend the shape only. +- Add a relay client test proving `mouse_drag` forwards start and target coordinates plus stream security fields. + +Add bridge/runtime coverage: + +- Add a bridge unit test for manual `mouse_drag` payload mapping, including `toX` and `toY`. +- Add or extend runtime/sidecar mapping tests so drag target coordinates are preserved into the sidecar request. +- If manual QA shows instant two-point drags are flaky, extend the runtime later with interpolated drag duration or a stateful press/drag/release protocol guarded by the same manual-control lease. + +## Frontend Tests + +Add focused tests around the manual viewport: + +- A simple pointer down/up still sends one `mouse_click`. +- A small move within slop still sends a click. +- A left-button move beyond slop sends one `mouse_drag` on pointer up and does not send the old immediate `mouse_click`. +- The drag payload uses mapped desktop stream coordinates for both source and target, including object-contain letterboxing. +- Pointer cancel sends no click or drag. +- Existing mobile pinch, pan, tap, keyboard capture, scroll, and right-click behavior remain covered. + +## Verification + +Run scoped checks only: + +- `pnpm --dir ./cloud test -- src/routes/-desktop-click-ripple.test.tsx src/lib/relay/relay-client.test.ts` +- `cargo test -p xero-desktop manual_control_drag --lib` +- `cargo test -p xero-desktop-sidecar mouse_drag --tests` + +Run Cargo commands one at a time. + +Manual QA must be performed in the Tauri app, not by opening the app in a browser: + +- Start a Computer Use desktop stream. +- Enter manual control. +- Drag a window by its title bar and confirm it moves. +- Drag across files/icons and confirm multi-select works. +- Confirm normal click, double-click, right-click, scroll, and keyboard passthrough still behave normally. + +## Acceptance Criteria + +- Manual left-button drag works from the streamed desktop viewport. +- Clicks are not accidentally converted into drags. +- Drag gestures do not emit a premature click at the start point. +- Manual-control lease and approval gates remain unchanged. +- No temporary or test-only UI is added. +- Scoped frontend and Rust tests pass. From 95b6a1bb2fa759474b9c487f9bdcb4f02d8483c8 Mon Sep 17 00:00:00 2001 From: snowdamiz Date: Thu, 28 May 2026 21:47:29 -0700 Subject: [PATCH 2/3] drag use --- .../tests/ipc_contract.rs | 28 ++- .../src-tauri/src/commands/remote_bridge.rs | 28 +++ .../desktop_control.rs | 42 ++++ cloud/src/lib/relay/relay-client.test.ts | 47 +++++ cloud/src/lib/relay/relay-client.ts | 16 +- .../src/routes/-desktop-click-ripple.test.tsx | 198 ++++++++++++++++++ .../sessions.$computerId.$sessionId.tsx | 196 +++++++++++++++-- 7 files changed, 532 insertions(+), 23 deletions(-) diff --git a/client/src-tauri/crates/xero-desktop-sidecar/tests/ipc_contract.rs b/client/src-tauri/crates/xero-desktop-sidecar/tests/ipc_contract.rs index 7fa940c4..efc2116f 100644 --- a/client/src-tauri/crates/xero-desktop-sidecar/tests/ipc_contract.rs +++ b/client/src-tauri/crates/xero-desktop-sidecar/tests/ipc_contract.rs @@ -8,11 +8,11 @@ use serde_json::json; use time::{format_description::well_known::Rfc3339, Duration, OffsetDateTime}; use xero_desktop_control_ipc::{ hash_session_token, DesktopSidecarActor, DesktopSidecarAuth, DesktopSidecarAuthScheme, - DesktopSidecarCapabilities, DesktopSidecarHandshake, DesktopSidecarOperation, - DesktopSidecarPermissionsPayload, DesktopSidecarRequest, DesktopSidecarResponse, - DesktopSidecarStreamCapabilitiesPayload, DesktopSidecarStreamPayload, - DesktopSidecarStreamStatus, DesktopSidecarStreamTransport, DESKTOP_SIDECAR_PROTOCOL, - DESKTOP_SIDECAR_SCHEMA_VERSION, + DesktopSidecarCapabilities, DesktopSidecarControlRequest, DesktopSidecarHandshake, + DesktopSidecarMouseButton, DesktopSidecarOperation, DesktopSidecarPermissionsPayload, + DesktopSidecarRequest, DesktopSidecarResponse, DesktopSidecarStreamCapabilitiesPayload, + DesktopSidecarStreamPayload, DesktopSidecarStreamStatus, DesktopSidecarStreamTransport, + DESKTOP_SIDECAR_PROTOCOL, DESKTOP_SIDECAR_SCHEMA_VERSION, }; struct SidecarHarness { @@ -313,3 +313,21 @@ fn sidecar_ipc_rejects_shell_like_payload_keys() { "sidecar_forbidden_payload" ); } + +#[test] +fn mouse_drag_control_contract_decodes_target_coordinates() { + let request = serde_json::from_value::(json!({ + "x": 10, + "y": 20, + "toX": 300, + "toY": 240, + "button": "left" + })) + .expect("mouse drag control request"); + + assert_eq!(request.x, Some(10)); + assert_eq!(request.y, Some(20)); + assert_eq!(request.to_x, Some(300)); + assert_eq!(request.to_y, Some(240)); + assert_eq!(request.button, Some(DesktopSidecarMouseButton::Left)); +} diff --git a/client/src-tauri/src/commands/remote_bridge.rs b/client/src-tauri/src/commands/remote_bridge.rs index 342e03cd..f6f43446 100644 --- a/client/src-tauri/src/commands/remote_bridge.rs +++ b/client/src-tauri/src/commands/remote_bridge.rs @@ -3942,6 +3942,34 @@ mod tests { ); } + #[test] + fn manual_control_drag_payload_maps_to_desktop_control_request() { + let request = manual_control_input_request(&json!({ + "action": "mouse_drag", + "x": 42, + "y": 64, + "toX": 320, + "toY": 240, + "sourceWidth": 1280, + "sourceHeight": 720, + "button": "left", + })) + .expect("manual drag input request"); + + assert_eq!(request.action, AutonomousDesktopControlAction::MouseDrag); + assert_eq!(request.x, Some(42)); + assert_eq!(request.y, Some(64)); + assert_eq!(request.to_x, Some(320)); + assert_eq!(request.to_y, Some(240)); + assert_eq!(request.source_width, Some(1280)); + assert_eq!(request.source_height, Some(720)); + assert_eq!(request.button, Some(AutonomousDesktopMouseButton::Left)); + assert_eq!( + request.reason.as_deref(), + Some("cloud_manual_control_input") + ); + } + #[test] fn manual_control_keyboard_payloads_map_to_desktop_control_requests() { let text_request = manual_control_input_request(&json!({ diff --git a/client/src-tauri/src/runtime/autonomous_tool_runtime/desktop_control.rs b/client/src-tauri/src/runtime/autonomous_tool_runtime/desktop_control.rs index d5ad6fc5..59675d6c 100644 --- a/client/src-tauri/src/runtime/autonomous_tool_runtime/desktop_control.rs +++ b/client/src-tauri/src/runtime/autonomous_tool_runtime/desktop_control.rs @@ -7096,6 +7096,48 @@ mod tests { ); } + #[test] + fn manual_control_drag_sidecar_request_preserves_target_coordinates() { + let request = AutonomousDesktopControlRequest { + action: AutonomousDesktopControlAction::MouseDrag, + display_id: None, + window_id: None, + app_name: None, + bundle_id: None, + element_id: None, + x: Some(10), + y: Some(20), + source_width: Some(1280), + source_height: Some(720), + to_x: Some(300), + to_y: Some(240), + delta_x: None, + delta_y: None, + button: Some(AutonomousDesktopMouseButton::Left), + clicks: None, + key: None, + keys: Vec::new(), + text: None, + value: None, + menu_path: Vec::new(), + reason: Some("cloud_manual_control_input".into()), + sensitivity: None, + }; + + validate_desktop_control_request(&request).expect("valid drag request"); + let sidecar = sidecar_control_request(&request); + + assert_eq!( + desktop_control_sidecar_operation(&request.action), + Some(DesktopSidecarOperation::MouseDrag) + ); + assert_eq!(sidecar.x, Some(10)); + assert_eq!(sidecar.y, Some(20)); + assert_eq!(sidecar.to_x, Some(300)); + assert_eq!(sidecar.to_y, Some(240)); + assert_eq!(sidecar.button, Some(DesktopSidecarMouseButton::Left)); + } + #[test] fn maps_scaled_stream_points_to_display_coordinates() { let display = AutonomousDesktopDisplay { diff --git a/cloud/src/lib/relay/relay-client.test.ts b/cloud/src/lib/relay/relay-client.test.ts index c974e987..8b461c9a 100644 --- a/cloud/src/lib/relay/relay-client.test.ts +++ b/cloud/src/lib/relay/relay-client.test.ts @@ -539,6 +539,53 @@ describe("pushInboundCommand", () => { ); }); + it("sends manual drag input with target coordinates and stream security fields", () => { + const push = vi.fn(); + + sendComputerUseManualInput({ push } as never, { + computerId: "desktop-1", + sessionId: "session-1", + deviceId: "web-1", + manualControlId: "manual-web-1", + runId: "run-1", + streamToken: "stream-token-1", + input: { + action: "mouse_drag", + x: 100, + y: 120, + toX: 540, + toY: 360, + sourceWidth: 1280, + sourceHeight: 720, + button: "left", + }, + }); + + expect(push).toHaveBeenCalledWith( + "frame", + expect.objectContaining({ + computer_id: "desktop-1", + session_id: "session-1", + device_id: "web-1", + kind: "computer_use_manual_control_input", + payload: { + manualControlId: "manual-web-1", + reason: "cloud_manual_control_input", + action: "mouse_drag", + x: 100, + y: 120, + toX: 540, + toY: 360, + sourceWidth: 1280, + sourceHeight: 720, + button: "left", + runId: "run-1", + streamToken: "stream-token-1", + }, + }), + ); + }); + it("sends manual keyboard payloads through the brokered input frame", () => { const push = vi.fn(); const baseOptions = { diff --git a/cloud/src/lib/relay/relay-client.ts b/cloud/src/lib/relay/relay-client.ts index b7d76372..53dad36d 100644 --- a/cloud/src/lib/relay/relay-client.ts +++ b/cloud/src/lib/relay/relay-client.ts @@ -43,6 +43,18 @@ interface StreamTokenOptions { streamToken?: string | null; } +type ComputerUseManualInputAction = + | "mouse_move" + | "mouse_click" + | "mouse_double_click" + | "mouse_right_click" + | "mouse_drag" + | "scroll" + | "key_press" + | "hotkey" + | "type_text" + | "paste_text"; + let socket: Socket | null = null; function socketIsReusable(socketInstance: Socket): boolean { @@ -485,9 +497,11 @@ export function sendComputerUseManualInput( deviceId: string; manualControlId?: string | null; input: { - action: string; + action: ComputerUseManualInputAction; x?: number; y?: number; + toX?: number; + toY?: number; sourceWidth?: number; sourceHeight?: number; deltaX?: number; diff --git a/cloud/src/routes/-desktop-click-ripple.test.tsx b/cloud/src/routes/-desktop-click-ripple.test.tsx index 4ef16a79..3af5f229 100644 --- a/cloud/src/routes/-desktop-click-ripple.test.tsx +++ b/cloud/src/routes/-desktop-click-ripple.test.tsx @@ -49,6 +49,7 @@ describe("ComputerUseDesktopViewport click feedback", () => { const { desktop, image, push } = await renderManualDesktopViewport(); image.getBoundingClientRect = () => domRect(0, 0, 640, 360); desktop.getBoundingClientRect = () => domRect(0, 0, 640, 360); + push.mockClear(); fireEvent.pointerDown(desktop, { button: 0, @@ -57,6 +58,14 @@ describe("ComputerUseDesktopViewport click feedback", () => { detail: 1, pointerId: 7, }); + expect(push).not.toHaveBeenCalled(); + fireEvent.pointerUp(desktop, { + button: 0, + clientX: 160, + clientY: 90, + detail: 1, + pointerId: 7, + }); const ripple = desktop.querySelector( ".desktop-click-ripple", @@ -83,6 +92,7 @@ describe("ComputerUseDesktopViewport click feedback", () => { const { desktop, image, push } = await renderManualDesktopViewport(); image.getBoundingClientRect = () => domRect(0, 0, 640, 640); desktop.getBoundingClientRect = () => domRect(0, 0, 640, 640); + push.mockClear(); fireEvent.pointerDown(desktop, { button: 0, @@ -91,6 +101,13 @@ describe("ComputerUseDesktopViewport click feedback", () => { detail: 1, pointerId: 8, }); + fireEvent.pointerUp(desktop, { + button: 0, + clientX: 160, + clientY: 410, + detail: 1, + pointerId: 8, + }); expect(push).toHaveBeenCalledWith( "frame", @@ -107,6 +124,180 @@ describe("ComputerUseDesktopViewport click feedback", () => { ); }); + it("keeps a small pointer move within click slop as one click", async () => { + const { desktop, image, push } = await renderManualDesktopViewport(); + image.getBoundingClientRect = () => domRect(0, 0, 640, 360); + desktop.getBoundingClientRect = () => domRect(0, 0, 640, 360); + push.mockClear(); + + fireEvent.pointerDown(desktop, { + button: 0, + clientX: 160, + clientY: 90, + detail: 1, + pointerId: 9, + }); + fireEvent.pointerMove(desktop, { + buttons: 1, + clientX: 164, + clientY: 94, + pointerId: 9, + }); + fireEvent.pointerUp(desktop, { + button: 0, + clientX: 164, + clientY: 94, + detail: 1, + pointerId: 9, + }); + + expect(push).toHaveBeenCalledTimes(1); + expect(push).toHaveBeenCalledWith( + "frame", + expect.objectContaining({ + kind: "computer_use_manual_control_input", + payload: expect.objectContaining({ + action: "mouse_click", + x: 320, + y: 180, + sourceWidth: 1280, + sourceHeight: 720, + }), + }), + ); + }); + + it("sends one left-button drag after movement exceeds click slop", async () => { + const { desktop, image, push } = await renderManualDesktopViewport(); + image.getBoundingClientRect = () => domRect(0, 0, 640, 360); + desktop.getBoundingClientRect = () => domRect(0, 0, 640, 360); + push.mockClear(); + + fireEvent.pointerDown(desktop, { + button: 0, + clientX: 160, + clientY: 90, + detail: 1, + pointerId: 10, + }); + fireEvent.pointerMove(desktop, { + buttons: 1, + clientX: 320, + clientY: 180, + pointerId: 10, + }); + fireEvent.pointerUp(desktop, { + button: 0, + clientX: 320, + clientY: 180, + detail: 1, + pointerId: 10, + }); + + expect(push).toHaveBeenCalledTimes(1); + expect(push).toHaveBeenCalledWith( + "frame", + expect.objectContaining({ + kind: "computer_use_manual_control_input", + payload: expect.objectContaining({ + action: "mouse_drag", + x: 320, + y: 180, + toX: 640, + toY: 360, + sourceWidth: 1280, + sourceHeight: 720, + button: "left", + }), + }), + ); + expect( + push.mock.calls.some( + ([, frame]) => + (frame as { payload?: { action?: string } }).payload?.action === + "mouse_click", + ), + ).toBe(false); + expect(desktop.querySelector(".desktop-click-ripple")).toBeNull(); + }); + + it("maps drag source and target against the painted stream area", async () => { + const { desktop, image, push } = await renderManualDesktopViewport(); + image.getBoundingClientRect = () => domRect(0, 0, 640, 640); + desktop.getBoundingClientRect = () => domRect(0, 0, 640, 640); + push.mockClear(); + + fireEvent.pointerDown(desktop, { + button: 0, + clientX: 160, + clientY: 410, + detail: 1, + pointerId: 11, + }); + fireEvent.pointerMove(desktop, { + buttons: 1, + clientX: 480, + clientY: 230, + pointerId: 11, + }); + fireEvent.pointerUp(desktop, { + button: 0, + clientX: 480, + clientY: 230, + detail: 1, + pointerId: 11, + }); + + expect(push).toHaveBeenCalledWith( + "frame", + expect.objectContaining({ + kind: "computer_use_manual_control_input", + payload: expect.objectContaining({ + action: "mouse_drag", + x: 320, + y: 540, + toX: 960, + toY: 180, + sourceWidth: 1280, + sourceHeight: 720, + }), + }), + ); + }); + + it("does not send click or drag when a pointer gesture is cancelled", async () => { + const { desktop, image, push } = await renderManualDesktopViewport(); + image.getBoundingClientRect = () => domRect(0, 0, 640, 360); + desktop.getBoundingClientRect = () => domRect(0, 0, 640, 360); + push.mockClear(); + + fireEvent.pointerDown(desktop, { + button: 0, + clientX: 160, + clientY: 90, + detail: 1, + pointerId: 12, + }); + fireEvent.pointerMove(desktop, { + buttons: 1, + clientX: 320, + clientY: 180, + pointerId: 12, + }); + fireEvent.pointerCancel(desktop, { + pointerId: 12, + }); + fireEvent.pointerUp(desktop, { + button: 0, + clientX: 320, + clientY: 180, + pointerId: 12, + }); + + expect(push).not.toHaveBeenCalled(); + expect(desktop.querySelector(".desktop-click-ripple")).toBeNull(); + }); + it("zooms the mobile desktop stream with pinch gestures and maps the next tap through the zoomed media", async () => { const { desktop, image, push } = await renderManualDesktopViewport({ presentation: { @@ -389,6 +580,13 @@ async function armManualKeyboardCapture() { detail: 1, pointerId: 10, }); + fireEvent.pointerUp(context.desktop, { + button: 0, + clientX: 160, + clientY: 90, + detail: 1, + pointerId: 10, + }); await waitFor(() => { expect(document.activeElement).toBe(context.keyboard); diff --git a/cloud/src/routes/sessions.$computerId.$sessionId.tsx b/cloud/src/routes/sessions.$computerId.$sessionId.tsx index d7633297..6729a5c8 100644 --- a/cloud/src/routes/sessions.$computerId.$sessionId.tsx +++ b/cloud/src/routes/sessions.$computerId.$sessionId.tsx @@ -915,7 +915,7 @@ const MANUAL_KEYBOARD_TEXT_MAX_CHARS = 8_000; const MANUAL_KEYBOARD_COMPOSITION_DUPLICATE_MS = 80; const DESKTOP_MOBILE_ZOOM_MIN = 1; const DESKTOP_MOBILE_ZOOM_MAX = 4; -const DESKTOP_MOBILE_TAP_SLOP_PX = 8; +const DESKTOP_POINTER_TAP_SLOP_PX = 8; type ManualKeyboardCaptureState = "inactive" | "armed" | "composing"; @@ -950,6 +950,22 @@ interface DesktopClickRipple { y: number; } +interface DesktopManualPointerClick { + button: number; + clientX: number; + clientY: number; + clicks: number; +} + +interface DesktopManualPointerGesture extends DesktopManualPointerClick { + dragging: boolean; + latestPoint: DesktopInputPoint; + pointerId: number; + startClientX: number; + startClientY: number; + startPoint: DesktopInputPoint; +} + function desktopMediaContentRect( rect: DOMRect, sourceWidth: number, @@ -1676,6 +1692,10 @@ export function ComputerUseDesktopViewport({ ); const manualControlIdRef = useRef(null); const keyboardCaptureManualControlIdRef = useRef(null); + const manualPointerGestureRef = useRef( + null, + ); + const manualPointerGestureResetKeyRef = useRef(""); const lastPointerMoveAtRef = useRef(0); const desktopSurfaceRef = useRef(null); const controlBarRef = useRef(null); @@ -1719,6 +1739,7 @@ export function ComputerUseDesktopViewport({ presentation.rotateDesktop && hasVisibleDesktopMedia; const presentationModeKey = `${presentation.isMobile}:${shouldRotateDesktopContent}`; const mobileViewportResetKey = `${computerId}:${sessionId}:${presentationModeKey}`; + const manualPointerGestureResetKey = `${mobileViewportResetKey}:${streamId ?? ""}`; const setClampedMobileViewportTransform = useCallback( (transform: DesktopMobileViewportTransform) => { const rect = desktopSurfaceRef.current?.getBoundingClientRect() ?? null; @@ -1738,6 +1759,18 @@ export function ComputerUseDesktopViewport({ DEFAULT_DESKTOP_MOBILE_VIEWPORT_TRANSFORM; setMobileViewportTransform(DEFAULT_DESKTOP_MOBILE_VIEWPORT_TRANSFORM); }, []); + const clearManualPointerGesture = useCallback(() => { + const gesture = manualPointerGestureRef.current; + if (!gesture) return; + manualPointerGestureRef.current = null; + const surface = desktopSurfaceRef.current; + if (!surface) return; + try { + surface.releasePointerCapture(gesture.pointerId); + } catch { + // The browser may have already dropped capture after cancel/release. + } + }, []); useEffect(() => { setControlBarPosition((position) => @@ -1777,6 +1810,7 @@ export function ComputerUseDesktopViewport({ mobileTouchPointersRef.current.clear(); mobileTapPointerIdRef.current = null; mobilePinchGestureRef.current = null; + manualPointerGestureRef.current = null; if (textBatchTimeoutRef.current !== null) { window.clearTimeout(textBatchTimeoutRef.current); textBatchTimeoutRef.current = null; @@ -1796,6 +1830,20 @@ export function ComputerUseDesktopViewport({ manualControlIdRef.current = manualControlId; }, [manualControlId]); + useEffect(() => { + if (state !== "manual") clearManualPointerGesture(); + }, [clearManualPointerGesture, state]); + + useEffect(() => { + if ( + manualPointerGestureResetKeyRef.current === manualPointerGestureResetKey + ) { + return; + } + manualPointerGestureResetKeyRef.current = manualPointerGestureResetKey; + clearManualPointerGesture(); + }, [clearManualPointerGesture, manualPointerGestureResetKey]); + useEffect(() => { streamQualityRef.current = streamQuality; }, [streamQuality]); @@ -2348,6 +2396,7 @@ export function ComputerUseDesktopViewport({ const startStream = () => { if (!channel || !deviceId) return; + clearManualPointerGesture(); disarmKeyboardCaptureRef.current?.(); streamStopRequestedRef.current = false; liveVideoSeenRef.current = false; @@ -2372,6 +2421,7 @@ export function ComputerUseDesktopViewport({ }; const stopStream = () => { if (!channel || !deviceId) return; + clearManualPointerGesture(); disarmKeyboardCaptureRef.current?.({ flushText: true }); const activeStreamId = streamIdRef.current ?? streamId; const activeManualControlId = manualControlIdRef.current ?? manualControlId; @@ -2403,6 +2453,7 @@ export function ComputerUseDesktopViewport({ }; const requestManual = () => { if (!channel || !deviceId) return; + clearManualPointerGesture(); disarmKeyboardCaptureRef.current?.(); const nextManualControlId = manualControlIdRef.current ?? createManualControlId(deviceId, sessionId); @@ -2421,6 +2472,7 @@ export function ComputerUseDesktopViewport({ }; const releaseManual = () => { if (!channel || !deviceId) return; + clearManualPointerGesture(); disarmKeyboardCaptureRef.current?.({ flushText: true }); const activeManualControlId = manualControlIdRef.current ?? manualControlId; releaseComputerUseManualControl(channel, { @@ -2834,7 +2886,8 @@ export function ComputerUseDesktopViewport({ ); const showDesktopClickRipple = useCallback( ( - event: PointerEvent, + clientX: number, + clientY: number, button: DesktopClickRipple["button"], ) => { const surface = desktopSurfaceRef.current; @@ -2854,29 +2907,30 @@ export function ComputerUseDesktopViewport({ { button, id, - x: event.clientX - surfaceRect.left, - y: event.clientY - surfaceRect.top, + x: clientX - surfaceRect.left, + y: clientY - surfaceRect.top, }, ]); }, [], ); const sendManualPointerClick = useCallback( - (event: PointerEvent, point: DesktopInputPoint) => { + (click: DesktopManualPointerClick, point: DesktopInputPoint) => { armKeyboardCapture(); showDesktopClickRipple( - event, - event.button === 2 ? "secondary" : "primary", + click.clientX, + click.clientY, + click.button === 2 ? "secondary" : "primary", ); sendManualInput({ - action: event.button === 2 ? "mouse_right_click" : "mouse_click", + action: click.button === 2 ? "mouse_right_click" : "mouse_click", x: point.x, y: point.y, sourceWidth: point.sourceWidth, sourceHeight: point.sourceHeight, button: - event.button === 1 ? "middle" : event.button === 2 ? "right" : "left", - clicks: event.detail > 1 ? 2 : 1, + click.button === 1 ? "middle" : click.button === 2 ? "right" : "left", + clicks: click.clicks, }); }, [armKeyboardCapture, sendManualInput, showDesktopClickRipple], @@ -2971,7 +3025,7 @@ export function ComputerUseDesktopViewport({ Math.hypot( pointer.clientX - pointer.startClientX, pointer.clientY - pointer.startClientY, - ) > DESKTOP_MOBILE_TAP_SLOP_PX + ) > DESKTOP_POINTER_TAP_SLOP_PX ) { pointer.moved = true; if (mobileTapPointerIdRef.current === pointer.pointerId) { @@ -3052,7 +3106,15 @@ export function ComputerUseDesktopViewport({ disarmKeyboardCapture(); return; } - sendManualPointerClick(event, point); + sendManualPointerClick( + { + button: event.button, + clientX: event.clientX, + clientY: event.clientY, + clicks: event.detail > 1 ? 2 : 1, + }, + point, + ); }, [ beginMobilePinchGesture, @@ -3070,28 +3132,47 @@ export function ComputerUseDesktopViewport({ return; } if (state !== "manual") { + clearManualPointerGesture(); disarmKeyboardCapture(); return; } if (event.button > 2) { + clearManualPointerGesture(); disarmKeyboardCapture(); return; } const point = pointFromPointerEvent(event); if (!point) { + clearManualPointerGesture(); disarmKeyboardCapture(); return; } event.preventDefault(); - event.currentTarget.setPointerCapture(event.pointerId); - sendManualPointerClick(event, point); + clearManualPointerGesture(); + try { + event.currentTarget.setPointerCapture(event.pointerId); + } catch { + // Embedded webviews can occasionally deny capture during teardown. + } + manualPointerGestureRef.current = { + button: event.button, + clientX: event.clientX, + clientY: event.clientY, + clicks: event.detail > 1 ? 2 : 1, + dragging: false, + latestPoint: point, + pointerId: event.pointerId, + startClientX: event.clientX, + startClientY: event.clientY, + startPoint: point, + }; }, [ + clearManualPointerGesture, disarmKeyboardCapture, handleMobileTouchPointerDown, pointFromPointerEvent, presentation.isMobile, - sendManualPointerClick, state, ], ); @@ -3101,6 +3182,21 @@ export function ComputerUseDesktopViewport({ handleMobileTouchPointerMove(event); return; } + const gesture = manualPointerGestureRef.current; + if (gesture?.pointerId === event.pointerId) { + event.preventDefault(); + if ( + Math.hypot( + event.clientX - gesture.startClientX, + event.clientY - gesture.startClientY, + ) > DESKTOP_POINTER_TAP_SLOP_PX + ) { + gesture.dragging = true; + } + const point = pointFromPointerEvent(event); + if (point) gesture.latestPoint = point; + return; + } if (state !== "manual" || event.buttons === 0) return; const now = Date.now(); if (now - lastPointerMoveAtRef.current < 80) return; @@ -3127,17 +3223,82 @@ export function ComputerUseDesktopViewport({ (event: PointerEvent) => { if (presentation.isMobile && event.pointerType === "touch") { handleMobileTouchPointerEnd(event); + return; } + const gesture = manualPointerGestureRef.current; + if (!gesture || gesture.pointerId !== event.pointerId) return; + event.preventDefault(); + manualPointerGestureRef.current = null; + try { + event.currentTarget.releasePointerCapture(event.pointerId); + } catch { + // Capture can already be gone after browser-level cancellation. + } + const releasePoint = pointFromPointerEvent(event); + const targetPoint = releasePoint ?? gesture.latestPoint; + const movedBeyondSlop = + gesture.dragging || + Math.hypot( + event.clientX - gesture.startClientX, + event.clientY - gesture.startClientY, + ) > DESKTOP_POINTER_TAP_SLOP_PX; + if (!movedBeyondSlop) { + sendManualPointerClick( + { + button: gesture.button, + clientX: gesture.startClientX, + clientY: gesture.startClientY, + clicks: gesture.clicks, + }, + gesture.startPoint, + ); + return; + } + if (gesture.button !== 0) return; + sendManualInput({ + action: "mouse_drag", + x: gesture.startPoint.x, + y: gesture.startPoint.y, + toX: targetPoint.x, + toY: targetPoint.y, + sourceWidth: gesture.startPoint.sourceWidth, + sourceHeight: gesture.startPoint.sourceHeight, + button: "left", + }); }, - [handleMobileTouchPointerEnd, presentation.isMobile], + [ + handleMobileTouchPointerEnd, + pointFromPointerEvent, + presentation.isMobile, + sendManualInput, + sendManualPointerClick, + ], ); const handlePointerCancel = useCallback( (event: PointerEvent) => { if (presentation.isMobile && event.pointerType === "touch") { handleMobileTouchPointerEnd(event, true); + return; + } + if (manualPointerGestureRef.current?.pointerId !== event.pointerId) { + return; } + event.preventDefault(); + clearManualPointerGesture(); }, - [handleMobileTouchPointerEnd, presentation.isMobile], + [ + clearManualPointerGesture, + handleMobileTouchPointerEnd, + presentation.isMobile, + ], + ); + const handleLostPointerCapture = useCallback( + (event: PointerEvent) => { + if (manualPointerGestureRef.current?.pointerId === event.pointerId) { + manualPointerGestureRef.current = null; + } + }, + [], ); const handleWheel = useCallback( (event: WheelEvent) => { @@ -3315,6 +3476,7 @@ export function ComputerUseDesktopViewport({ onPointerMove={handlePointerMove} onPointerUp={handlePointerUp} onPointerCancel={handlePointerCancel} + onLostPointerCapture={handleLostPointerCapture} onWheel={handleWheel} className={cn( "relative flex h-full min-h-0 overflow-hidden bg-zinc-950 outline-none", From c112ee4906acd9ebf07d7e745e1b4a5ffcbb449f Mon Sep 17 00:00:00 2001 From: snowdamiz Date: Thu, 28 May 2026 22:24:50 -0700 Subject: [PATCH 3/3] save --- .github/workflows/macos-signed-build.yml | 1 + .github/workflows/release.yml | 1 + .../scripts/sign-macos-target-binaries.sh | 21 ++++++- .../scripts/verify-macos-desktop-sidecar.sh | 29 +++++++++ .../desktop_control.rs | 62 ++++++++++++++----- client/src-tauri/tauri.macos.conf.json | 3 +- .../src-tauri/tests/tauri_bundle_resources.rs | 52 ++++++++++++++++ 7 files changed, 148 insertions(+), 21 deletions(-) create mode 100755 client/src-tauri/scripts/verify-macos-desktop-sidecar.sh create mode 100644 client/src-tauri/tests/tauri_bundle_resources.rs diff --git a/.github/workflows/macos-signed-build.yml b/.github/workflows/macos-signed-build.yml index 7532b70a..f7f9b38c 100644 --- a/.github/workflows/macos-signed-build.yml +++ b/.github/workflows/macos-signed-build.yml @@ -121,6 +121,7 @@ jobs: codesign --verify --deep --strict --verbose=2 "$APP_BUNDLE" spctl --assess --type execute --verbose "$APP_BUNDLE" bash client/src-tauri/scripts/verify-macos-portable-linkage.sh "$APP_BUNDLE" + bash client/src-tauri/scripts/verify-macos-desktop-sidecar.sh "$APP_BUNDLE" - name: Upload signed artifacts uses: actions/upload-artifact@v4 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 83831725..dbe9892b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -553,6 +553,7 @@ jobs: codesign --verify --deep --strict --verbose=2 "$APP_BUNDLE" spctl --assess --type execute --verbose "$APP_BUNDLE" bash client/src-tauri/scripts/verify-macos-portable-linkage.sh "$APP_BUNDLE" + bash client/src-tauri/scripts/verify-macos-desktop-sidecar.sh "$APP_BUNDLE" - name: Check Azure Windows signing configuration id: windows_signing_config diff --git a/client/src-tauri/scripts/sign-macos-target-binaries.sh b/client/src-tauri/scripts/sign-macos-target-binaries.sh index d5ae36e8..443489b2 100644 --- a/client/src-tauri/scripts/sign-macos-target-binaries.sh +++ b/client/src-tauri/scripts/sign-macos-target-binaries.sh @@ -117,9 +117,20 @@ helper_names=( xero-harness-evals tool-harness xero + xero-desktop-sidecar +) +resource_helper_paths=( + "$tauri_dir/resources/xero-desktop-sidecar" ) codesign_timeout_seconds="${XERO_CODESIGN_TIMEOUT_SECONDS:-300}" +sign_helper_binary() { + local helper_path="$1" + echo "Signing helper binary $helper_path" + run_with_timeout "$codesign_timeout_seconds" codesign --force --options runtime --timestamp --sign "$identity" "$helper_path" + signed_any=1 +} + signed_any=0 while IFS= read -r release_dir; do for helper_name in "${helper_names[@]}"; do @@ -128,12 +139,16 @@ while IFS= read -r release_dir; do continue fi - echo "Signing target helper binary $helper_path" - run_with_timeout "$codesign_timeout_seconds" codesign --force --options runtime --timestamp --sign "$identity" "$helper_path" - signed_any=1 + sign_helper_binary "$helper_path" done done < <(find "$tauri_dir/target" -type d -path "*/release" 2>/dev/null | sort) +for helper_path in "${resource_helper_paths[@]}"; do + if [ -f "$helper_path" ]; then + sign_helper_binary "$helper_path" + fi +done + if [ "$signed_any" -eq 0 ]; then echo "No target helper binaries found to sign." fi diff --git a/client/src-tauri/scripts/verify-macos-desktop-sidecar.sh b/client/src-tauri/scripts/verify-macos-desktop-sidecar.sh new file mode 100755 index 00000000..9b593e5f --- /dev/null +++ b/client/src-tauri/scripts/verify-macos-desktop-sidecar.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [ "$#" -ne 1 ]; then + echo "usage: $0 /path/to/App.app" >&2 + exit 64 +fi + +app_bundle="$1" +sidecar="$app_bundle/Contents/Resources/resources/xero-desktop-sidecar" + +if [ ! -f "$sidecar" ]; then + echo "::error::Missing bundled desktop sidecar at $sidecar." + exit 66 +fi + +if [ ! -x "$sidecar" ]; then + echo "::error file=$sidecar::Bundled desktop sidecar is not executable." + exit 66 +fi + +if ! file "$sidecar" | grep -q "Mach-O"; then + echo "::error file=$sidecar::Bundled desktop sidecar is not a Mach-O binary." + exit 66 +fi + +codesign --verify --strict --verbose=2 "$sidecar" + +echo "macOS desktop sidecar is bundled and signed." diff --git a/client/src-tauri/src/runtime/autonomous_tool_runtime/desktop_control.rs b/client/src-tauri/src/runtime/autonomous_tool_runtime/desktop_control.rs index 59675d6c..53660f32 100644 --- a/client/src-tauri/src/runtime/autonomous_tool_runtime/desktop_control.rs +++ b/client/src-tauri/src/runtime/autonomous_tool_runtime/desktop_control.rs @@ -3959,23 +3959,11 @@ fn resolve_desktop_sidecar_binary() -> Result { #[cfg(not(test))] { let binary_name = desktop_sidecar_binary_name(); - let mut candidates = Vec::new(); - if let Ok(exe) = std::env::current_exe() { - if let Some(dir) = exe.parent() { - candidates.push(dir.join(&binary_name)); - candidates.push(dir.join("../Resources").join(&binary_name)); - } - } - if let Some(manifest_dir) = option_env!("CARGO_MANIFEST_DIR") { - let manifest_dir = PathBuf::from(manifest_dir); - candidates.push(manifest_dir.join("resources").join(&binary_name)); - if let Some(target_dir) = manifest_dir.parent() { - candidates.push(target_dir.join("target/debug").join(&binary_name)); - candidates.push(target_dir.join("target/release").join(&binary_name)); - } - } - - candidates + desktop_sidecar_binary_candidates( + &binary_name, + std::env::current_exe().ok(), + option_env!("CARGO_MANIFEST_DIR").map(PathBuf::from), + ) .into_iter() .find_map(|candidate| validate_sidecar_binary_path(candidate).ok()) .ok_or_else(|| { @@ -3987,6 +3975,30 @@ fn resolve_desktop_sidecar_binary() -> Result { } } +fn desktop_sidecar_binary_candidates( + binary_name: &str, + current_exe: Option, + manifest_dir: Option, +) -> Vec { + let mut candidates = Vec::new(); + if let Some(exe) = current_exe { + if let Some(dir) = exe.parent() { + candidates.push(dir.join(binary_name)); + let bundled_resources_dir = dir.join("../Resources"); + candidates.push(bundled_resources_dir.join(binary_name)); + candidates.push(bundled_resources_dir.join("resources").join(binary_name)); + } + } + if let Some(manifest_dir) = manifest_dir { + candidates.push(manifest_dir.join("resources").join(binary_name)); + if let Some(target_dir) = manifest_dir.parent() { + candidates.push(target_dir.join("target/debug").join(binary_name)); + candidates.push(target_dir.join("target/release").join(binary_name)); + } + } + candidates +} + #[cfg(not(test))] fn desktop_sidecar_binary_name() -> String { if cfg!(windows) { @@ -6436,6 +6448,22 @@ mod tests { .is_some_and(|message| message.contains("closed before sending a response"))); } + #[test] + fn sidecar_candidates_include_tauri_preserved_resource_path() { + let exe = PathBuf::from("Xero.app") + .join("Contents") + .join("MacOS") + .join("xero-desktop"); + let resources_dir = PathBuf::from("Xero.app") + .join("Contents") + .join("MacOS") + .join("../Resources"); + let candidates = desktop_sidecar_binary_candidates("xero-desktop-sidecar", Some(exe), None); + + assert!(candidates.contains(&resources_dir.join("xero-desktop-sidecar"))); + assert!(candidates.contains(&resources_dir.join("resources").join("xero-desktop-sidecar"))); + } + #[test] fn cloud_manual_input_requires_active_controller_lease() { let repo = tempdir().expect("tempdir"); diff --git a/client/src-tauri/tauri.macos.conf.json b/client/src-tauri/tauri.macos.conf.json index d5c3ad7e..f4a7d7a2 100644 --- a/client/src-tauri/tauri.macos.conf.json +++ b/client/src-tauri/tauri.macos.conf.json @@ -6,7 +6,8 @@ "bundle": { "resources": [ "resources/scrcpy-server-v2.7.jar", - "resources/solana-toolchain/**/*" + "resources/solana-toolchain/**/*", + "resources/xero-desktop-sidecar*" ] } } diff --git a/client/src-tauri/tests/tauri_bundle_resources.rs b/client/src-tauri/tests/tauri_bundle_resources.rs new file mode 100644 index 00000000..4638b7d3 --- /dev/null +++ b/client/src-tauri/tests/tauri_bundle_resources.rs @@ -0,0 +1,52 @@ +use serde_json::Value; + +#[test] +fn macos_release_config_bundles_and_verifies_desktop_sidecar() { + let manifest_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR")); + let base_config = read_config(&manifest_dir.join("tauri.conf.json")); + let macos_config = read_config(&manifest_dir.join("tauri.macos.conf.json")); + let signing_script = + std::fs::read_to_string(manifest_dir.join("scripts/sign-macos-target-binaries.sh")) + .expect("read macOS signing script"); + let verification_script = + std::fs::read_to_string(manifest_dir.join("scripts/verify-macos-desktop-sidecar.sh")) + .expect("read macOS sidecar verification script"); + + assert!( + bundle_resources(&base_config).contains(&"resources/xero-desktop-sidecar*".to_owned()), + "base Tauri resources must include the desktop stream sidecar" + ); + assert!( + bundle_resources(&macos_config).contains(&"resources/xero-desktop-sidecar*".to_owned()), + "macOS Tauri override must not drop the desktop stream sidecar" + ); + assert!( + signing_script.contains("resources/xero-desktop-sidecar"), + "macOS signing must sign the generated sidecar resource before bundling" + ); + assert!( + verification_script.contains("Contents/Resources/resources/xero-desktop-sidecar"), + "macOS release verification must check Tauri's preserved resource path" + ); +} + +fn read_config(path: &std::path::Path) -> Value { + serde_json::from_slice(&std::fs::read(path).expect("read Tauri config")) + .expect("parse Tauri config") +} + +fn bundle_resources(config: &Value) -> Vec { + match &config["bundle"]["resources"] { + Value::Array(resources) => resources + .iter() + .map(|resource| { + resource + .as_str() + .expect("bundle resource entries must be strings") + .to_owned() + }) + .collect(), + Value::Object(resources) => resources.keys().cloned().collect(), + other => panic!("bundle.resources must be an array or object, got {other:?}"), + } +}