diff --git a/.gitignore b/.gitignore index a1777bf0..ee1b0666 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,4 @@ broker/target/ .context/ docs/broker-protocol.md docs/ralph-broker-plan.md +review-tasks/ diff --git a/PHASE-4-VISIBLE-REFLOW.md b/PHASE-4-VISIBLE-REFLOW.md deleted file mode 100644 index e2100d47..00000000 --- a/PHASE-4-VISIBLE-REFLOW.md +++ /dev/null @@ -1,189 +0,0 @@ -# Phase 4 — Visible-Grid Reflow on Resize - -Followup to `SCROLLBACK-REFLOW-PLAN.md`. Phases 1-3 fixed scrollback reflow at snapshot time. This phase fixes the visible grid: when the terminal resizes (rotation, keyboard up/down, grid↔single switch), the broker's active grid currently just `resize_with(cols, blank)` — pad/truncate per row, no awareness of paragraphs spanning rows. - -## Trigger - -Open this when a user reports: "after resizing my terminal, the visible screen content looks wrong — text spilling, cursor in wrong place, prompt half-visible". Phases 1-3 don't address this; the visible grid remains naive. - -## Symptom example - -``` -before resize (cols=120): - $ very long command that wraps onto next row at col 121 | ← cursor here - continuation - -after resize to cols=60 (today): - $ very long command that wraps onto next row at col 121 - | ← cursor lost; line stuck mid-content; no rewrap -``` - -After fix: -``` - $ very long command that wraps - onto next row at col 121 - continuation - | ← cursor on its content -``` - -## Architecture - -Reuse the same wrap-marker primitive Phase 1 introduced. Visible grid rows already carry `Row.wrapped` (set in `put_char` auto-wrap path). On resize: - -1. Coalesce active grid rows into paragraphs using `wrapped` flags -2. Track cursor's current paragraph + cell offset within it (BEFORE rewrap) -3. Rewrap each paragraph at new `cols` using the same `flush_paragraph` logic -4. If rewrap produces > new `rows`: spill the oldest paragraphs into scrollback (primary only — alt is throwaway) -5. If rewrap produces < new `rows`: pad with blank rows at the bottom (or pull from scrollback tail — see "open question") -6. Reposition cursor: find which output row contains the saved cell offset, set (row, col) accordingly - -## Files - -| File | Change | -| --- | --- | -| `broker/src/terminal_state.rs` | rewrite `Inner::resize` to use paragraph reflow | -| `broker/src/terminal_state.rs` | factor `reflow_lines` / `flush_paragraph` to reuse | -| `broker/src/terminal_state.rs` | new tests for visible-grid resize | - -No protocol changes — this is purely server-side. No client changes either; the snapshot already carries reflowed visible rows, and `SessionResized` event semantics are unchanged. - -## Tasks - -## 1. Cursor offset tracking - -Before rewrap, compute `(paragraph_index, cell_offset_in_paragraph)` from current `(cursor.row, cursor.col)`: - -```rust -fn cursor_to_logical(grid: &Grid, cursor: &CursorPos) -> (usize, usize) { - let mut para_idx = 0; - let mut offset = 0; - let mut row = 0; - while row < cursor.row { - offset += grid.lines[row].cells.len(); - if !grid.lines[row].wrapped { - para_idx += 1; - offset = 0; - } - row += 1; - } - offset += cursor.col; - (para_idx, offset) -} -``` - -After rewrap, walk the new rows to find which one contains paragraph `para_idx` at cell `offset`: - -```rust -fn logical_to_cursor(rows: &[Row], target_para: usize, target_offset: usize) -> CursorPos { - let mut para_idx = 0; - let mut offset = 0; - for (r, row) in rows.iter().enumerate() { - if para_idx == target_para && offset + row.cells.len() > target_offset { - return CursorPos { row: r, col: target_offset - offset }; - } - offset += row.cells.len(); - if !row.wrapped { - para_idx += 1; - offset = 0; - } - } - CursorPos { row: rows.len().saturating_sub(1), col: 0 } -} -``` - -## 2. Rewrap on resize (primary buffer) - -In `Inner::resize` (`terminal_state.rs:279`), before the existing `primary.resize` call: - -```rust -let (cur_para, cur_offset) = cursor_to_logical(&self.primary, &self.cursor); -let reflowed = reflow_lines(&self.primary.lines, cols); -// Spill overflow into scrollback (oldest first). -let overflow = reflowed.len().saturating_sub(rows); -for row in reflowed.iter().take(overflow) { - self.scrollback.push_back(row.clone()); -} -while self.scrollback.len() > self.scrollback_max { - self.scrollback.pop_front(); -} -let kept: Vec = reflowed.into_iter().skip(overflow).collect(); -self.primary.lines = kept; -self.primary.lines.resize_with(rows, || blank_line(cols)); -self.primary.cols = cols; -self.primary.rows = rows; -self.cursor = logical_to_cursor(&self.primary.lines, cur_para.saturating_sub(overflow), cur_offset); -``` - -## 3. Alt-screen rewrap (truncate, no scrollback) - -For `self.alt`, same rewrap but if it overflows new `rows`, truncate from the TOP (alt screen has no scrollback). TUI apps will issue full redraw on SIGWINCH so any lost content gets repainted by the app itself. - -## 4. Tests - -```rust -#[test] -fn resize_shrink_rewraps_visible_grid() { - let mut t = TerminalState::new(10, 3); - t.feed(b"hello world!"); // wraps in 10 cols - // Now resize to 5 cols — "hello world!" should re-wrap to 3 rows. - t.resize(5, 5); - // verify visible content equivalent paragraph at width 5 -} - -#[test] -fn resize_grow_unwraps_continuation() { - let mut t = TerminalState::new(5, 3); - t.feed(b"helloworld"); - t.resize(20, 3); - // "helloworld" now fits on one row -} - -#[test] -fn resize_overflow_spills_to_scrollback() { - let mut t = TerminalState::new(20, 3); - t.feed(b"line1\nline2\nline3"); - t.resize(5, 2); // shrink — first paragraph overflow - assert!(!t.inner.scrollback.is_empty()); -} - -#[test] -fn resize_preserves_cursor_relative_position() { - let mut t = TerminalState::new(10, 3); - t.feed(b"hello "); // cursor at (0, 6) - t.resize(3, 5); - // cursor should still point at the cell after the space — now at (2, 0) -} - -#[test] -fn resize_alt_screen_truncates_top() { - // enter alt, fill with content, shrink, verify top truncated, no scrollback -} - -#[test] -fn resize_no_op_when_dims_unchanged() { - // sanity: resize to same dims must be idempotent -} -``` - -## Open questions - -1. **Pull from scrollback when growing?** tmux does this. Simpler: don't — let the next live PTY redraw fill the new space. Recommendation: defer pulling; ship without and see if anyone cares. -2. **Cursor on a wrapped boundary?** If cursor is exactly at end of a wrapped row (`pending_wrap=true`), is it "on" that row or the next? Decision: treat `pending_wrap` as "cursor is logically at offset = end of this row's cells". Existing emulator semantics already handle this; preserve them. -3. **What about the scroll region?** `scroll_top` / `scroll_bottom` currently get reset in `Inner::resize` (line 284-285). Keep that — TUI apps re-issue DECSTBM on SIGWINCH if they care. -4. **Performance?** Reflow runs on every resize, which is debounced at 80ms server-side. For typical 24-row visible grid, this is microseconds. No concern. - -## Estimated effort - -~6h focused. Cursor handling is the highest-risk piece — that's where tmux/alacritty have historically had the most reflow bugs. Plan extra test time on cursor invariants. - -## When NOT to do this - -Skip / defer if: -- Phases 1-3 fully resolve user reports (most likely outcome — the bulk of "weird history" is scrollback) -- We decide to migrate the broker emulator to a vetted crate (vte's `Term`, ghostty's terminal core) — they already implement reflow correctly and we'd inherit it for free - -## Related - -- `SCROLLBACK-REFLOW-PLAN.md` — Phases 1-3 (shipped) -- tmux `aggressive-resize` — reference behavior -- ghostty's `Terminal.reflow` — reference implementation diff --git a/REVIEW_FIX_PLAN.md b/REVIEW_FIX_PLAN.md deleted file mode 100644 index 5162c08e..00000000 --- a/REVIEW_FIX_PLAN.md +++ /dev/null @@ -1,117 +0,0 @@ -# PR #123 — Review Fix Plan - -Status legend: `[ ]` not started · `[~]` in progress · `[x]` done - -Each phase is independently shippable. #1–#3 + #10 are merge-blockers; the rest can land as follow-ups. - ---- - -## Phase 1 — Ship-blockers (must land before merging #123) - -### [x] 1. Close the socket-perms TOCTOU race -**File:** `broker/src/server.rs:~110` -**Problem:** `UnixListener::bind` creates the socket with default umask; `set_permissions(0o600)` runs after — local attacker can `connect()` in between. -**Fix:** -- `umask(0o077)` before bind, restore after, OR -- bind in a 0o700 parent dir (always — not only when path literally ends in `.wolfpack`) and `rename` into place -- harden the *parent dir* unconditionally for the fallback path, not pattern-matched on name -**Test:** integration test that `stat`s the socket immediately after `wait_ready` and asserts `0o600`. Parallel test using a custom socket path locks in parent-dir hardening. - -### [x] 2. Fix snapshot→subscribe seq gap -**Files:** `src/server/broker-backend.ts:~372`, `src/broker/client.ts` (subscribe call site) -**Problem:** `getSessionPrefill` returns `snapshot.seq = N`, then `client.subscribe` attaches at broker's `current_seq = M ≥ N` with **no `since_seq`**. Bytes `(N, M]` sit in the ring but are never replayed → relies on byte-overlap heuristics. -**Fix:** pass `sinceSeq: snapshot.seq` to `subscribe`. Broker already replays from ring. Remove or downgrade `__stripInitialPtyOverlap` once seq-based replay covers the gap. -**Test:** integration test injecting PTY output between snapshot capture and subscribe attach; assert no bytes lost and no duplicates. - -### [x] 3. Surface subscribe RPC errors in BrokerBackend -**File:** `src/server/broker-backend.ts:361-392` -**Problem:** RPC failure leaves local cb registered with refcount=1 → silent leak, no error to caller. -**Fix:** await the RPC inside `subscribeOutput`, unwind local cb registration on rejection, propagate error. -**Test:** unit in `broker-backend.test.ts` mocking RPC failure; assert no leaked subscriber state and that caller observes the error. - -### [x] 10. Investigate & fix: black screen after time on another session; devtools-resize unblocks but with limited scrollback -**Symptom:** switch to another session, come back later → terminal canvas blank → opening devtools triggers layout/resize → screen renders but scrollback is short. - -**Two compounding hypotheses:** -- **(a) renderer canvas invalidation.** xterm's canvas/webgl backing store can be invalidated by browser when the host element is hidden/unmounted. Only a forced reflow (resize, devtools toggle) triggers `term.refresh()`. The existing `visibilitychange` handler at `public/app.ts:2860` covers tab visibility but **not** in-app session switches — tab is still visible, only a different DOM node is shown. -- **(b) prefill mode truncates scrollback.** The first prefill into xterm's in-memory ring is `prefillMode: "viewport"` for grid cells (right) but possibly also for the focused single-session view (wrong). After the forced redraw, xterm draws what's in *its* ring — the broker's full scrollback was never streamed. So the post-resize render shows only the visible screen + whatever fit. - -**Investigation steps:** -- repro: open session, switch away >X minutes, switch back. Log `term._core._renderService` state and `document.visibilityState` at the black-frame moment. -- confirm `visibilitychange` does NOT fire on in-app session switch (expected — tab still visible). -- check exactly what triggers redraw when devtools opens: window resize? `ResizeObserver`? Add instrumentation. -- audit `prefillMode` per controller: single-session vs grid cell, first attach vs reconnect. -- check whether `term.refresh()` alone (without resize) clears the black frame in repro — that pins the hypothesis to (a) vs (b). - -**Likely fix:** -- on session show/focus, call `term.refresh(0, term.rows - 1)` unconditionally (cheap, idempotent) → addresses (a) -- for the active single-session view, default `prefillMode: "full"`; viewport mode stays for grid cells → addresses (b) -- on long-stale return (>`DESKTOP_STALE_THRESHOLD_MS`-equivalent applied to session-switch, not just tab-visibility), force `reconnect()` so broker re-streams from current snapshot+scrollback -- add `IntersectionObserver` on the terminal element → on becoming visible after being hidden, fire refresh + (if stale) reconnect - -**Test:** e2e — open session, toggle hide/show via `display:none`, assert canvas non-blank within 100ms. Separate test: stale (>60s) session switch triggers a snapshot RPC and full-scrollback prefill. - ---- - -## Phase 2 — Notable concerns (tracked issues, follow-up PRs) - -### [x] 4. Notify clients on broadcast `Lagged` -**File:** `broker/src/server.rs:475-512` -**Fix:** on `Lagged(n)`, emit `subscription_dropped` event (new) or reuse `snapshot_invalidated`. Client auto-resyncs via fresh snapshot+subscribe. Don't tear down the connection. -**TS:** handler in `src/broker/client.ts` re-issues snapshot + subscribe with current seq. -**Test:** integration test with one slow consumer + one fast producer; assert the slow consumer gets a resync event and recovers. - -### [x] 5. Signal replay truncation on subscribe -**Files:** `broker/src/output_bus.rs`, `broker/src/protocol.rs` -**Fix:** add `replay_truncated: bool` to `SubscribeResponse`; set when `since_seq < earliest_seq_in_ring`. TS treats it as "fetch snapshot before live." -**Test:** unit on `output_bus` with ring eviction; assert the flag. - -### [x] 6. Move resize event emission onto `Session` -**Files:** `broker/src/session.rs:308-329`, `broker/src/session_router.rs` -**Fix:** `Session::resize` calls `EventSender::session_resized` directly; router becomes a thin caller. -**Why:** invariant lives with the type that owns the state — drift risk goes away. - -### [x] 7. Tighten `unsubscribe` semantics + test session-id reuse -**File:** `broker/src/server.rs:431-467` -**Fix:** drain pending writer mpsc for that subscription before acking unsubscribe, or document the lag and add a fence frame. -**Test:** unsub session A, immediately sub session B with same recycled name (post-reap); assert no A-frames delivered as B. - ---- - -## Phase 3 — Test coverage gaps - -### [x] 8. Add the missing failure-mode tests -- malformed JSON in `control_request` payload → broker returns typed error, connection survives -- oversized frame (>16 MiB) over a live socket (currently only codec-level test) → connection dropped cleanly -- slow consumer end-to-end exceeding `DEFAULT_BROADCAST_CAPACITY` → exercises #4 path -- broker `SIGKILL` mid-session + client reconnect — manual + scripted via `kill -9` -- socket perms: `stat` after start, assert `0o600` (covered by #1's test) - ---- - -## Phase 4 — Minor cleanup (single bundled PR, post-merge) - -### [x] 9. Polish -- pooled buffer in `broker/src/codec.rs:72` — deferred (profiling needed) -- fix TS `unsubscribe` return type — done (`Promise` → `Promise`) -- `BrokerBackend.list()` cache TTL — deferred (O(1) on cache hit; miss-path acceptable for current session counts) -- extract `terminal_state.rs` vte-tracking layer — deferred (architectural, separate PR) - ---- - -## Out of scope -- redesigning broadcast vs ring capacity coupling (#5 makes truncation observable; decoupling sizes is a separate proposal) -- replacing xterm with ghostty-web everywhere - ---- - -## Sequencing -1. **before merge:** #1, #2, #3, #10 — parallelizable, different files -2. **week of merge:** #4 + #5 as a pair — slow-consumer recovery needs both -3. **opportunistic:** #6, #7, #8, #9 - -## Verification gate before declaring done -- all Phase 1 boxes checked -- `cargo test` + `bun test` + e2e suite green -- manual repro of #10 confirmed fixed on real device (desktop + mobile) -- CI captures `stat` of socket post-start showing `0o600` diff --git a/context.md b/context.md deleted file mode 100644 index bb319bd6..00000000 --- a/context.md +++ /dev/null @@ -1,135 +0,0 @@ - -# Wolfpack — Architecture Context - -Wolfpack is a PWA mobile command center for AI agent sessions. The TypeScript server delegates session ownership to a mandatory Rust broker daemon (`wolfpack-broker`) over a Unix socket; the server itself never owns PTYs. It runs as a local HTTP/WebSocket process on a developer machine, exposed over Tailscale HTTPS so it can be controlled from a phone or remote browser. A single Bun binary bundles all frontend assets, server code, and CLI tooling. Default port: 18790. - ---- - -## Module Index - -| Module | Key Files | Context | -|--------|-----------|---------| -| Auth | `src/auth.ts` | [.context/auth.md](.context/auth.md) | -| Validation | `src/validation.ts`, `src/ws-constants.ts`, `src/wolfpack-context.ts` | [.context/validation.md](.context/validation.md) | -| Server Core | `src/server/index.ts`, `src/server/routes.ts`, `src/server/http.ts` | [.context/server-core.md](.context/server-core.md) | -| WebSocket | `src/server/websocket.ts`, `src/take-control-logic.ts`, `src/reconnect-hydration.ts` | [.context/websocket.md](.context/websocket.md) | -| Session Backends | `src/server/backend.ts`, `src/server/broker-backend.ts`, `src/server/mock-backend.ts`, `src/server/strip-ansi.ts`, `src/server/shell.ts`, `src/server/dev-dir.ts` | [.context/session-backends.md](.context/session-backends.md) | -| Broker Client | `src/broker/client.ts`, `src/broker/codec.ts`, `broker/` (Rust daemon) | — | -| Ralph Orchestration | `src/ralph-macchio.ts`, `src/server/ralph.ts`, `src/ralph-skill-audit.ts`, `src/ralph-skill-cleanup.ts`, `src/worktree.ts` | [.context/ralph.md](.context/ralph.md) | -| Push Notifications | `src/server/push.ts`, `public/sw-push.js` | [.context/push.md](.context/push.md) | -| CLI | `src/cli/index.ts`, `src/cli/config.ts`, `src/cli/setup.ts`, `src/cli/service.ts`, `src/cli/doctor.ts`, `src/cli/formatting.ts` | [.context/cli.md](.context/cli.md) | -| Frontend | `public/app.ts`, `public/app-state.ts`, `public/app-ralph.ts`, `public/app-grid.ts`, `public/app-touch.ts`, `public/index.html`, `public/manifest.json`, `public/sw-push.js`, `public/wolfpack-lib.js` | [.context/frontend.md](.context/frontend.md) | -| Shared Utilities | `src/log.ts`, `src/triage.ts`, `src/qr.ts`, `src/shared/process-cleanup.ts`, `src/public-assets.ts`, `src/terminal-buffer.ts`, `src/terminal-input.ts`, `src/grid-logic.ts`, `src/test-hooks.ts`, `src/types.d.ts` | [.context/shared.md](.context/shared.md) | -| Build & Scripts | `scripts/build.ts`, `scripts/bundle-app.ts`, `scripts/gen-assets.ts`, `scripts/bundle-ghostty.ts`, `scripts/bundle-client-lib.ts` | [.context/build.md](.context/build.md) | -| Tests | `tests/unit/`, `tests/integration/`, `tests/e2e/`, `tests/snapshot/` | [.context/tests.md](.context/tests.md) | - ---- - -## Actors - -- **Mobile/remote browser** — authenticates via JWT, consumes HTTP API and WebSocket connections over Tailscale HTTPS -- **Wolfpack server** — single Bun process; serves frontend, handles HTTP API, manages WS sessions, spawns ralph workers -- **Ralph worker** — detached Bun subprocess per project; runs the AI agent iteration loop against a plan file -- **AI agent** (claude, gemini, codex, cursor) — spawned by ralph worker; executes tasks in a project directory, optionally sandboxed via `srt` -- **wolfpack-broker** — mandatory Rust daemon; owns all PTY sessions, exposes a Unix-socket RPC + event protocol consumed by the wolfpack server -- **Tailscale daemon** — provides HTTPS termination and injects `Tailscale-User-Login` header (trusted boundary) -- **Peer wolfpack instances** — other wolfpack servers on the same tailnet; queried via `/api/info` for session aggregation - ---- - -## Key End-to-End Flows - -- **Auth → Session → WS Relay**: browser JWT auth → `/api/sessions` → WS upgrade → `handlePtyWs` → `setupNewPtyEntry` → ghostty-web renders PTY output -- **Ralph Lifecycle**: `POST /api/ralph/start` → atomic lock → detached worker spawn → iteration loop (extract task → run agent → mark done) → push notification on completion -- **Push Notification**: browser subscribes → VAPID key exchange → server detects state transition → RFC 8291 encrypt → POST to FCM/APNs → service worker `showNotification` -- **Take-Control**: second viewer → `viewer_conflict` → `take_control` message → `performImmediateTakeover` → old viewer displaced (close 4002) → new viewer gets `control_granted` -- **Peer Aggregation**: `GET /api/ralph?aggregate=true` → probe peers via `/api/info` → forward auth header → `validatePeerLoops` strip → merge results - ---- - -## Global Invariants - -1. **Path containment**: all project directory access is guarded by `isUnderDevDir(realpathSync(dir))` using proper boundary check (`=== baseDir || startsWith(baseDir + "/")`) after symlink rejection via `lstatSync`. (`src/server/routes.ts:134`, `src/server/dev-dir.ts`) - -2. **JWT always timingSafeEqual**: signature comparison uses `Buffer.timingSafeEqual` with pre-checked length equality. String equality never used. (`src/auth.ts:143-146`) - -3. **Auth disabled by default**: if `WOLFPACK_JWT_SECRET` is unset or < 32 chars, all requests pass unauthenticated. Intentional for local dev. (`src/auth.ts:98`) - -4. **Single viewer per PTY session**: `activePtySessions` enforces one `viewer` per entry. Concurrent WS connections receive `viewer_conflict` and must explicitly take control. (`src/server/websocket.ts:32`) - -5. **Lock atomicity**: ralph lock uses `{ flag: "wx" }` — fails if file exists. PID written after spawn. Stale lock detected by PID liveness check + cmdline verification. (`src/server/routes.ts:657`) - -6. **Shell args never interpolated**: all git and agent invocations use array form (`execFile`, `execFileSync`, `spawn` with args array). No string shell interpolation. One exception: `BrokerBackend.createSession` uses `shellEscape` before `-lic` when wrapping the agent command. (`src/server/broker-backend.ts`) - -7. **WOLFPACK_TEST gate**: all test-only hooks throw unless `process.env.WOLFPACK_TEST` is set. (`src/test-hooks.ts`) - -8. **Push endpoint allowlist**: push subscription endpoints must have exact-match hostname in `ALLOWED_PUSH_HOSTS`. Prevents SSRF. (`src/server/push.ts:101`) - -9. **CSP nonce**: every HTML response gets a fresh cryptographically-random 16-byte nonce. All `