From 86fda68e37a2502277bc37566ba7a677ef276fed Mon Sep 17 00:00:00 2001 From: Sahil Ahuja Date: Wed, 3 Jun 2026 01:53:18 +0530 Subject: [PATCH 1/4] refactor: Move-based server-scoped boards (pin sessions) Replace the per-WebSocket relay ephemeral (rk-relay-*) + @rk_board server-option encoding with a move-based, server-scoped pin-session board model. Net ~830 lines removed. Every window lives in exactly one place: a home session (SESSIONS view, direct relay attach) or moved into its own single-window _rk-pin- session (BOARDS view). Removing window *sharing* removes the need for the per-connection ephemeral isolation layer. Board membership is derived from _rk-pin-* sessions plus session vars @rk_board / @rk_home / @rk_board_order (fractional ComputeOrderKey). No DB (Constitution II). Boards are server-scoped, empty boards vanish, and pins persist across rk restarts (no restore-sweep). Deleted: relay ephemeral path, NewGroupedSession, @rk_owner_pid stamping, cmd/rk/serve_sweep.go, and the @rk_board encoding + cross-server union + lazy/eager cleanup. Intended behavioral changes: pinned windows leave their home session's tab list until unpinned; multi-client active-window collisions on a shared real session are accepted. The _rk-ctl anchor + exit-empty off backstop are untouched. --- app/backend/api/relay.go | 93 +-- app/backend/api/relay_test.go | 230 ++------ app/backend/api/router.go | 8 - app/backend/api/sessions_test.go | 37 +- app/backend/api/sse.go | 188 +----- app/backend/api/sse_subscriber_test.go | 52 -- app/backend/api/sse_test.go | 192 +----- app/backend/cmd/rk/serve.go | 24 +- app/backend/cmd/rk/serve_sweep.go | 135 ----- app/backend/cmd/rk/serve_sweep_test.go | 145 ----- .../internal/tmux/active_window_test.go | 8 +- app/backend/internal/tmux/board.go | 534 +++++++++-------- app/backend/internal/tmux/board_test.go | 555 +++++++++--------- app/backend/internal/tmux/reaper.go | 3 +- app/backend/internal/tmux/socketsweep_test.go | 38 -- app/backend/internal/tmux/tmux.go | 194 +++--- app/backend/internal/tmux/tmux_test.go | 154 ++--- app/frontend/src/api/boards.ts | 15 +- app/frontend/src/hooks/use-boards.ts | 22 +- .../tests/e2e/boards-desktop-suspend.spec.md | 2 +- app/frontend/tests/e2e/boards-mobile.spec.md | 2 +- app/frontend/tests/e2e/boards-mobile.spec.ts | 7 +- .../tests/e2e/boards-multi-server.spec.md | 15 +- .../tests/e2e/boards-multi-server.spec.ts | 7 +- .../boards-same-session-multi-pane.spec.md | 28 +- .../boards-same-session-multi-pane.spec.ts | 16 +- docs/memory/run-kit/architecture.md | 60 +- docs/memory/run-kit/index.md | 2 +- docs/memory/run-kit/tmux-sessions.md | 151 ++--- docs/memory/run-kit/ui-patterns.md | 13 +- .../.history.jsonl | 26 + .../.status.yaml | 49 ++ .../intake.md | 281 +++++++++ .../plan.md | 317 ++++++++++ 34 files changed, 1686 insertions(+), 1917 deletions(-) delete mode 100644 app/backend/cmd/rk/serve_sweep.go delete mode 100644 app/backend/cmd/rk/serve_sweep_test.go create mode 100644 fab/changes/260602-qn62-move-based-board-pin-sessions/.history.jsonl create mode 100644 fab/changes/260602-qn62-move-based-board-pin-sessions/.status.yaml create mode 100644 fab/changes/260602-qn62-move-based-board-pin-sessions/intake.md create mode 100644 fab/changes/260602-qn62-move-based-board-pin-sessions/plan.md diff --git a/app/backend/api/relay.go b/app/backend/api/relay.go index 1a4591d3..d500da38 100644 --- a/app/backend/api/relay.go +++ b/app/backend/api/relay.go @@ -2,10 +2,7 @@ package api import ( "context" - "crypto/rand" - "encoding/hex" "encoding/json" - "fmt" "io" "log/slog" "net/http" @@ -21,18 +18,6 @@ import ( "rk/internal/tmux" ) -// newEphemeralRelayName returns a unique ephemeral session name of the form -// "rk-relay-<8 hex chars>". The 8-hex suffix is read from crypto/rand and is -// never derived from user input — keeping the surface inside the relay handler -// closed against injection (constitution I). -func newEphemeralRelayName() (string, error) { - var b [4]byte - if _, err := rand.Read(b[:]); err != nil { - return "", err - } - return fmt.Sprintf("%s%s", tmux.RelaySessionPrefix, hex.EncodeToString(b[:])), nil -} - // No timeout for the attach command — it's a long-lived process that stays alive // for the duration of the WebSocket connection. Cancellation happens via the // cancel() call in the cleanup function on disconnect. @@ -83,10 +68,13 @@ func (s *Server) handleRelay(w http.ResponseWriter, r *http.Request) { } defer conn.Close() - // Resolve the owning session from the window ID. The per-WebSocket ephemeral - // grouped-session mechanism keys off the *real session name*, so we derive it - // from the window ID via a targeted display-message lookup. A missing window - // (resolution fails or returns empty) preserves the existing 4004 close code. + // Resolve the owning session from the window ID. In the move-based model a + // window lives in exactly ONE session — either a normal home session or its + // board pin-session (`_rk-pin-*`). The relay attaches the PTY DIRECTLY to that + // real session (no per-WebSocket ephemeral grouped session): single-window + // pin-sessions remove window *sharing*, which was the only reason the + // ephemeral isolation layer existed. A missing window (resolution fails or + // returns empty) preserves the existing 4004 close code. resolveCtx, resolveCancel := context.WithTimeout(r.Context(), 5*time.Second) session, err := s.tmux.ResolveWindowSession(resolveCtx, server, windowID) resolveCancel() @@ -97,60 +85,13 @@ func (s *Server) handleRelay(w http.ResponseWriter, r *http.Request) { return } - // Allocate a per-WebSocket ephemeral grouped session. tmux session groups - // share window membership but maintain independent active-window state, so - // each relay can SelectWindow on its own ephemeral without disturbing other - // clients attached to the same real session (e.g., other board panes, or - // other browser tabs). - ephemeral, err := newEphemeralRelayName() - if err != nil { - slog.Error("ephemeral name generation failed", "err", err) - conn.WriteMessage(websocket.CloseMessage, - websocket.FormatCloseMessage(4001, "Failed to allocate relay session")) - return - } - if err := s.tmux.NewGroupedSession(r.Context(), server, session, ephemeral); err != nil { - slog.Warn("new-session (grouped) failed", "err", err, "session", session, "ephemeral", ephemeral) - conn.WriteMessage(websocket.CloseMessage, - websocket.FormatCloseMessage(4004, "Session not found")) - return - } - // Best-effort cleanup with a fresh context — r.Context() is cancelled at - // disconnect time (the trigger for this defer), so reusing it would cause - // the kill to be cancelled before tmux can run it. - defer func() { - if err := s.tmux.KillSessionCtx(context.Background(), server, ephemeral); err != nil { - slog.Debug("ephemeral cleanup failed", "err", err, "ephemeral", ephemeral) - } - }() - - // Stamp the ephemeral with this rk serve process's PID BEFORE it becomes - // attachable (before SelectWindowInSession). A sibling startup sweep reaps - // any rk-relay-* whose @rk_owner_pid is empty, so an attachable-but-unstamped - // relay is indistinguishable from an orphan and would be wrongly killed. - // Stamping first guarantees the only unstamped relays a sweep can see are - // genuine orphans (owner already exited), never this live instance's relay. - // - // On stamp failure the relay is unprotectable — keeping it open is a false - // promise (the next sweep would reap owner=="" and drop the terminal). So we - // abort cleanly: log, close the WebSocket with the relay-allocation close - // code, and return — the deferred KillSessionCtx above reaps the half-owned - // ephemeral. This mirrors every other setup-step failure in handleRelay. - if err := s.tmux.SetSessionOwnerPID(r.Context(), server, ephemeral, os.Getpid()); err != nil { - slog.Warn("relay owner-pid stamp failed", "err", err, "ephemeral", ephemeral) - conn.WriteMessage(websocket.CloseMessage, - websocket.FormatCloseMessage(4001, "Failed to allocate relay session")) - return - } - - // Select the window on the ephemeral, scoped to the ephemeral session. A bare - // window-id target (`select-window -t @N`) is ambiguous inside a session group - // — members share window membership but keep independent active-window state, - // so tmux could set the active window on the real session or another group - // member. Qualifying the target as ":@N" pins the active window to - // THIS WebSocket's ephemeral, preserving multi-client isolation. - if err := s.tmux.SelectWindowInSession(ephemeral, windowID, server); err != nil { - slog.Error("select-window failed", "err", err, "ephemeral", ephemeral, "windowID", windowID) + // Select the window on its real session so the attach renders the right + // window. The accepted tradeoff (#1 in the intake): the real session has a + // single active-window pointer shared across attachments, so multi-client + // navigation mutates the real session's active window. For a pin-session this + // is a no-op — its sole window is permanently active. + if err := s.tmux.SelectWindow(windowID, server); err != nil { + slog.Error("select-window failed", "err", err, "session", session, "windowID", windowID) conn.WriteMessage(websocket.CloseMessage, websocket.FormatCloseMessage(4004, "Window not found")) return @@ -189,9 +130,9 @@ func (s *Server) handleRelay(w http.ResponseWriter, r *http.Request) { slog.Debug("config reload before attach (best-effort)", "server", server, "err", err) } - // Attach to the ephemeral, not the real session — this is the linchpin of - // the grouped-session fix. - attachArgs = append(attachArgs, "attach-session", "-t", ephemeral) + // Attach DIRECTLY to the resolved owning session (home or `_rk-pin-*`). No + // ephemeral, no defer-kill — the session is durable and owned by tmux. + attachArgs = append(attachArgs, "attach-session", "-t", session) cmd := exec.CommandContext(ctx, "tmux", attachArgs...) cmd.Env = forceTERM(os.Environ()) diff --git a/app/backend/api/relay_test.go b/app/backend/api/relay_test.go index 52c1f6bd..504081de 100644 --- a/app/backend/api/relay_test.go +++ b/app/backend/api/relay_test.go @@ -148,7 +148,8 @@ func readUntilContains(t *testing.T, conn *websocket.Conn, needle string, deadli if strings.Contains(err.Error(), "i/o timeout") || strings.Contains(err.Error(), "deadline exceeded") { continue } - // Connection closed — return what we have. + // Connection closed / failed — return what we have. Do NOT loop back + // into ReadMessage(), which panics on an already-failed gorilla conn. return buf.Bytes() } buf.Write(msg) @@ -159,99 +160,77 @@ func readUntilContains(t *testing.T, conn *websocket.Conn, needle string, deadli return buf.Bytes() } -func TestRelay_TwoWindowsTwoRelaysDistinctOutput(t *testing.T) { - tmuxServer, _, win0ID, win1ID := withRelayTmux(t) +// realSessionNames returns the non-pin, non-anchor session names on a tmux +// server. Used to assert the relay creates NO extra (ephemeral) session — the +// move-based model attaches the PTY directly to the real session. +func realSessionNames(t *testing.T, server string) []string { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + sessions, err := tmux.ListSessions(ctx, server) + if err != nil { + t.Fatalf("ListSessions: %v", err) + } + names := make([]string, 0, len(sessions)) + for _, s := range sessions { + names = append(names, s.Name) + } + return names +} + +// TestRelay_DirectAttachRendersSelectedWindow proves the relay attaches the PTY +// DIRECTLY to the real session and renders the window it selected — once per +// window. The connections are opened SEQUENTIALLY (not concurrently): in the +// move-based model both windows live in the SAME real session with a single +// shared active-window pointer (the accepted multi-client tradeoff #1), so two +// SIMULTANEOUS attaches would fight over that pointer. Sequential attaches +// exercise the direct-attach + select-window path per window without that race. +func TestRelay_DirectAttachRendersSelectedWindow(t *testing.T) { + tmuxServer, real, win0ID, win1ID := withRelayTmux(t) ts := relayServerWithProdTmux(t) defer ts.Close() connA := dialRelay(t, ts, tmuxServer, win0ID) - connB := dialRelay(t, ts, tmuxServer, win1ID) - defer connA.Close() - defer connB.Close() - - // Read enough bytes from each to capture the echo'd window markers. bytesA := readUntilContains(t, connA, "WINDOW_ZERO", 5*time.Second) - bytesB := readUntilContains(t, connB, "WINDOW_ONE", 5*time.Second) - + connA.Close() if !bytes.Contains(bytesA, []byte("WINDOW_ZERO")) { - t.Errorf("relay A did not receive WINDOW_ZERO marker; got: %q", string(bytesA)) + t.Errorf("relay for win0 did not receive WINDOW_ZERO marker; got: %q", string(bytesA)) } + + connB := dialRelay(t, ts, tmuxServer, win1ID) + bytesB := readUntilContains(t, connB, "WINDOW_ONE", 5*time.Second) + connB.Close() if !bytes.Contains(bytesB, []byte("WINDOW_ONE")) { - t.Errorf("relay B did not receive WINDOW_ONE marker; got: %q", string(bytesB)) + t.Errorf("relay for win1 did not receive WINDOW_ONE marker; got: %q", string(bytesB)) } - // The central bug-fix invariant: each relay only sees its own window's - // content, never the other's. - if bytes.Contains(bytesA, []byte("WINDOW_ONE")) { - t.Errorf("relay A leaked WINDOW_ONE content (would indicate the active-window bug); got: %q", string(bytesA)) - } - if bytes.Contains(bytesB, []byte("WINDOW_ZERO")) { - t.Errorf("relay B leaked WINDOW_ZERO content (would indicate the active-window bug); got: %q", string(bytesB)) + + // The relay must NOT create any extra (ephemeral) session — it attaches the + // PTY directly to the real session. Only `real` should remain user-facing. + names := realSessionNames(t, tmuxServer) + for _, n := range names { + if n != real { + t.Errorf("unexpected extra session %q after relay connect (no ephemeral expected); sessions=%v", n, names) + } } } -func TestRelay_EphemeralCleanupOnClose(t *testing.T) { - tmuxServer, _, win0ID, win1ID := withRelayTmux(t) +// TestRelay_NoEphemeralCreated asserts the relay attaches directly to the real +// session and leaves NO `rk-relay-*` ephemeral or extra session behind. +func TestRelay_NoEphemeralCreated(t *testing.T) { + tmuxServer, real, win0ID, _ := withRelayTmux(t) ts := relayServerWithProdTmux(t) defer ts.Close() - connA := dialRelay(t, ts, tmuxServer, win0ID) - connB := dialRelay(t, ts, tmuxServer, win1ID) - - // Helper that uses a fresh per-call timeout so the surrounding polling - // loops never run past a shared parent deadline (which previously made - // this test flaky once the cleanup wait outlived the original 3s ctx). - listRelaySessions := func() ([]string, error) { - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) - defer cancel() - return tmux.ListRawSessionNames(ctx, tmuxServer) - } - - // Wait briefly so the relay handlers finish creating their ephemerals. - deadline := time.Now().Add(3 * time.Second) - for time.Now().Before(deadline) { - names, err := listRelaySessions() - if err == nil { - n := 0 - for _, name := range names { - if strings.HasPrefix(name, tmux.RelaySessionPrefix) { - n++ - } - } - if n >= 2 { - break - } - } - time.Sleep(50 * time.Millisecond) - } + conn := dialRelay(t, ts, tmuxServer, win0ID) + defer conn.Close() - // Close both WebSockets; the relay handlers' deferred KillSessionCtx must - // reap the ephemerals. - connA.Close() - connB.Close() + // Give the attach a moment to establish. + _ = readUntilContains(t, conn, "WINDOW_ZERO", 3*time.Second) - // Poll until no rk-relay-* sessions remain (cleanup is best-effort and - // runs after the goroutine sees the WS close). - cleanupDeadline := time.Now().Add(5 * time.Second) - var lastNames []string - for time.Now().Before(cleanupDeadline) { - names, err := listRelaySessions() - if err != nil { - t.Fatalf("ListRawSessionNames: %v", err) - } - lastNames = names - any := false - for _, name := range names { - if strings.HasPrefix(name, tmux.RelaySessionPrefix) { - any = true - break - } - } - if !any { - return // success - } - time.Sleep(100 * time.Millisecond) + names := realSessionNames(t, tmuxServer) + if len(names) != 1 || names[0] != real { + t.Errorf("expected only the real session %q, got %v (relay must not create an ephemeral)", real, names) } - t.Fatalf("rk-relay-* sessions persisted after WebSocket close: %v", lastNames) } // TestRelay_PercentEncodedAtNot400 is a regression: clients URL-encode '@' @@ -282,10 +261,10 @@ func TestRelay_PercentEncodedAtNot400(t *testing.T) { // TestRelay_MissingWindowClose4004 exercises the error path: opening a relay // to a well-formed but non-existent window ID should close the WebSocket with -// code 4004 (session resolution fails) and not leak any ephemeral on the tmux -// server. +// code 4004 (session resolution fails) and not leak any extra session on the +// tmux server. func TestRelay_MissingWindowClose4004(t *testing.T) { - tmuxServer, _, _, _ := withRelayTmux(t) + tmuxServer, real, _, _ := withRelayTmux(t) ts := relayServerWithProdTmux(t) defer ts.Close() @@ -315,98 +294,17 @@ func TestRelay_MissingWindowClose4004(t *testing.T) { closeErr, ok := readErr.(*websocket.CloseError) if !ok { // The server may abort without a clean close frame in some paths; we - // still need to verify no ephemeral was created. + // still need to verify no extra session was created. t.Logf("read returned non-close error (acceptable): %v", readErr) } else if closeErr.Code != 4004 { t.Errorf("close code = %d, want 4004", closeErr.Code) } - // Verify no rk-relay-* leaked. - listCtx, cancelList := context.WithTimeout(context.Background(), 3*time.Second) - defer cancelList() - names, err := tmux.ListRawSessionNames(listCtx, tmuxServer) - if err != nil { - t.Fatalf("ListRawSessionNames: %v", err) - } - for _, name := range names { - if strings.HasPrefix(name, tmux.RelaySessionPrefix) { - t.Errorf("ephemeral leaked after missing-session relay: %s", name) - } - } -} - -// TestRelay_OwnerStampFailureAbortsClean exercises the abort-clean path: when -// the @rk_owner_pid stamp fails after the ephemeral grouped session is created, -// handleRelay MUST close the WebSocket with the relay-allocation code (4001) and -// reap the half-owned ephemeral via the deferred KillSessionCtx — so no live -// but unstamped relay survives (which the next sweep would wrongly reap as an -// owner=="" orphan). Uses mockTmuxOps to inject the stamp failure deterministically -// after a successful session resolution and NewGroupedSession. -func TestRelay_OwnerStampFailureAbortsClean(t *testing.T) { - logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError})) - ops := &mockTmuxOps{ - resolveWindowSessionResult: "real-session", - setSessionOwnerPIDErr: fmt.Errorf("stamp failed: tmux unreachable"), - } - router := NewTestRouter(logger, &mockSessionFetcher{}, ops, "test-host") - ts := httptest.NewServer(router) - defer ts.Close() - - httpURL, err := url.Parse(ts.URL) - if err != nil { - t.Fatalf("parse url: %v", err) - } - wsURL := url.URL{ - Scheme: "ws", - Host: httpURL.Host, - Path: "/relay/@1", - RawQuery: "server=default", - } - conn, _, err := websocket.DefaultDialer.Dial(wsURL.String(), nil) - if err != nil { - t.Fatalf("dial relay: %v", err) - } - defer conn.Close() - - // The handler must close with 4001 once the owner-pid stamp fails. - conn.SetReadDeadline(time.Now().Add(3 * time.Second)) - _, _, readErr := conn.ReadMessage() - if readErr == nil { - t.Fatal("expected close from server, got message") - } - if closeErr, ok := readErr.(*websocket.CloseError); ok { - if closeErr.Code != 4001 { - t.Errorf("close code = %d, want 4001", closeErr.Code) + // Verify no extra session leaked — only the real session should remain. + names := realSessionNames(t, tmuxServer) + for _, n := range names { + if n != real { + t.Errorf("unexpected extra session %q after missing-window relay; sessions=%v", n, names) } - } else { - t.Logf("read returned non-close error (acceptable): %v", readErr) - } - - // The stamp must have been attempted, and the ephemeral reaped by the - // deferred KillSessionCtx so no unstamped relay survives. - // - // The reap runs in handleRelay's deferred cleanup on the SERVER goroutine, - // which is not synchronized with the client seeing the 4001 close above. - // Reading the kill state immediately therefore races the server's defer and - // flakes under load (observed in CI). Poll the mutex-guarded accessor with a - // short deadline instead of asserting once on the bare fields. - if !ops.setSessionOwnerPIDCalled { - t.Error("SetSessionOwnerPID was not called — stamp path not exercised") - } - var killed bool - var killedName string - deadline := time.Now().Add(2 * time.Second) - for time.Now().Before(deadline) { - if killed, killedName = ops.KillSessionWasCalled(); killed { - break - } - time.Sleep(10 * time.Millisecond) - } - if !killed { - t.Error("ephemeral was not reaped after stamp failure (deferred KillSessionCtx not invoked)") - } - if killedName != ops.newGroupedSessionEphemeral { - t.Errorf("reaped session = %q, want the created ephemeral %q", - killedName, ops.newGroupedSessionEphemeral) } } diff --git a/app/backend/api/router.go b/app/backend/api/router.go index 9a4c39ae..e4b0b714 100644 --- a/app/backend/api/router.go +++ b/app/backend/api/router.go @@ -28,7 +28,6 @@ type TmuxOps interface { CreateSession(name, cwd, server string) error KillSession(session, server string) error KillSessionCtx(ctx context.Context, server, session string) error - NewGroupedSession(ctx context.Context, server, realSession, ephemeral string) error RenameSession(session, name, server string) error CreateWindow(session, name, cwd, server string) error KillWindow(windowID, server string) error @@ -56,7 +55,6 @@ type TmuxOps interface { CreateWindowWithOptions(session, name, cwd, server string, ops []tmux.WindowOptionOp) error GetSessionOrder(ctx context.Context, server string) ([]string, error) SetSessionOrder(ctx context.Context, server string, order []string) error - SetSessionOwnerPID(ctx context.Context, server, session string, pid int) error ListBoards(ctx context.Context) ([]tmux.BoardSummary, error) GetBoard(ctx context.Context, name string) ([]tmux.BoardEntry, error) ListBoardEntries(ctx context.Context, server string) ([]tmux.BoardEntry, error) @@ -120,9 +118,6 @@ func (p *prodTmuxOps) KillSession(session, server string) error { func (p *prodTmuxOps) KillSessionCtx(ctx context.Context, server, session string) error { return tmux.KillSessionCtx(ctx, server, session) } -func (p *prodTmuxOps) NewGroupedSession(ctx context.Context, server, realSession, ephemeral string) error { - return tmux.NewGroupedSession(ctx, server, realSession, ephemeral) -} func (p *prodTmuxOps) RenameSession(session, name, server string) error { return tmux.RenameSession(session, name, server) } @@ -204,9 +199,6 @@ func (p *prodTmuxOps) GetSessionOrder(ctx context.Context, server string) ([]str func (p *prodTmuxOps) SetSessionOrder(ctx context.Context, server string, order []string) error { return tmux.SetSessionOrder(ctx, server, order) } -func (p *prodTmuxOps) SetSessionOwnerPID(ctx context.Context, server, session string, pid int) error { - return tmux.SetSessionOwnerPID(ctx, server, session, pid) -} func (p *prodTmuxOps) ListBoards(ctx context.Context) ([]tmux.BoardSummary, error) { return tmux.ListBoards(ctx) } diff --git a/app/backend/api/sessions_test.go b/app/backend/api/sessions_test.go index c0500d77..dd7a989d 100644 --- a/app/backend/api/sessions_test.go +++ b/app/backend/api/sessions_test.go @@ -31,9 +31,9 @@ func (m *mockSessionFetcher) FetchSessions(ctx context.Context, server string) ( // // Most fields are written and read within a single goroutine (synchronous // handler tests), so they need no locking. The kill-session fields are the -// exception: the relay abort-clean path reaps the ephemeral from a deferred -// cleanup on the SERVER goroutine while the test goroutine observes it, so -// those two are guarded by killMu and accessed via KillSessionWasCalled. +// exception: a deferred cleanup on the SERVER goroutine may observe them while +// the test goroutine reads them, so those two are guarded by killMu and +// accessed via KillSessionWasCalled. type mockTmuxOps struct { createSessionCalled bool createSessionName string @@ -45,12 +45,6 @@ type mockTmuxOps struct { renameSessionSession string renameSessionName string - newGroupedSessionCalled bool - newGroupedSessionServer string - newGroupedSessionReal string - newGroupedSessionEphemeral string - newGroupedSessionErr error - createWindowCalled bool createWindowSession string createWindowName string @@ -139,11 +133,6 @@ type mockTmuxOps struct { setSessionOrderOrder []string setSessionOrderErr error - setSessionOwnerPIDCalled bool - setSessionOwnerPIDSession string - setSessionOwnerPIDPID int - setSessionOwnerPIDErr error - // Boards listBoardsCalled bool listBoardsResult []tmux.BoardSummary @@ -206,16 +195,6 @@ func (m *mockTmuxOps) KillSessionWasCalled() (bool, string) { defer m.killMu.Unlock() return m.killSessionCalled, m.killSessionName } -func (m *mockTmuxOps) NewGroupedSession(ctx context.Context, server, realSession, ephemeral string) error { - m.newGroupedSessionCalled = true - m.newGroupedSessionServer = server - m.newGroupedSessionReal = realSession - m.newGroupedSessionEphemeral = ephemeral - if m.newGroupedSessionErr != nil { - return m.newGroupedSessionErr - } - return m.err -} func (m *mockTmuxOps) RenameSession(session, name, server string) error { m.renameSessionCalled = true m.renameSessionSession = session @@ -390,16 +369,6 @@ func (m *mockTmuxOps) SetSessionOrder(ctx context.Context, server string, order } return m.err } -func (m *mockTmuxOps) SetSessionOwnerPID(ctx context.Context, server, session string, pid int) error { - m.setSessionOwnerPIDCalled = true - m.setSessionOwnerPIDSession = session - m.setSessionOwnerPIDPID = pid - if m.setSessionOwnerPIDErr != nil { - return m.setSessionOwnerPIDErr - } - return m.err -} - func (m *mockTmuxOps) ListBoards(ctx context.Context) ([]tmux.BoardSummary, error) { m.listBoardsCalled = true if m.listBoardsErr != nil { diff --git a/app/backend/api/sse.go b/app/backend/api/sse.go index a026dd31..3015f8a2 100644 --- a/app/backend/api/sse.go +++ b/app/backend/api/sse.go @@ -27,11 +27,13 @@ func (prodSessionOrderFetcher) GetSessionOrder(ctx context.Context, server strin return tmux.GetSessionOrder(ctx, server) } -// BoardEntriesFetcher reads the @rk_board entries for a tmux server. -// Injected so tests can stub the tmux dependency for bootstrap and cleanup. +// BoardEntriesFetcher reads board pin entries for a tmux server. In the +// move-based model membership is derived live from `_rk-pin-*` sessions, so the +// SSE hub no longer needs an eager-cleanup hook — a killed pinned window simply +// drops out of the next ListBoardEntries read. Kept as a one-method interface +// so tests can stub the tmux dependency. type BoardEntriesFetcher interface { ListBoardEntries(ctx context.Context, server string) ([]tmux.BoardEntry, error) - RemoveAllByWindowID(ctx context.Context, server, windowID string) ([]string, error) } type prodBoardEntriesFetcher struct{} @@ -40,33 +42,22 @@ func (prodBoardEntriesFetcher) ListBoardEntries(ctx context.Context, server stri return tmux.ListBoardEntries(ctx, server) } -func (prodBoardEntriesFetcher) RemoveAllByWindowID(ctx context.Context, server, windowID string) ([]string, error) { - return tmux.RemoveAllByWindowID(ctx, server, windowID) -} - // boardEventName is the SSE event type for board-membership changes. Matches // the kebab-case convention established by `event: session-order`. const boardEventName = "board-changed" -// boardChangedPayload is the body of `event: board-changed` for pin/unpin/ -// reorder/cleanup mutations. +// boardChangedPayload is the body of `event: board-changed` for explicit +// pin/unpin/reorder mutations. Board membership changes only through these +// mutations (each handler emits its own event), so there is no synthetic +// cleanup or bootstrap variant. type boardChangedPayload struct { Board string `json:"board"` - Change string `json:"change"` // "pin" | "unpin" | "reorder" | "cleanup" | "bootstrap" + Change string `json:"change"` // "pin" | "unpin" | "reorder" Server string `json:"server"` WindowID string `json:"windowId,omitempty"` OrderKey string `json:"orderKey,omitempty"` } -// boardBootstrapPayload is the body of the synthetic bootstrap event sent on -// first poll per server. Carries the full entries snapshot so the frontend -// can rehydrate. -type boardBootstrapPayload struct { - Server string `json:"server"` - Change string `json:"change"` // always "bootstrap" - Entries []tmux.BoardEntry `json:"entries"` -} - const ( // safetyPollInterval is the safety-net cadence for snapshot rebuilds // when no control-mode subscriber is available (PTY-unavailable @@ -142,14 +133,11 @@ type sseHub struct { previousJSON map[string]string // per-server sessions JSON dedup cache previousOrderJSON map[string]string // per-server session-order event payload cache (only present when populated by a successful read or a POST broadcast) orderBootstrapAttempts map[string]int // per-server count of failed bootstrap attempts; capped at orderBootstrapMaxAttempts - previousBoardJSON map[string]string // per-server board bootstrap snapshot payload cache - previousWindowIDs map[string]map[string]bool // per-server prior-tick live window ids for kill-detection - previousRealSessions map[string]map[string]bool // per-server prior-tick real (non-relay/anchor) session names for disappearance logging + previousRealSessions map[string]map[string]bool // per-server prior-tick real (non-anchor) session names for disappearance logging cache map[string]*cachedResult // per-server session fetch cache (500ms TTL) polling bool fetcher SessionFetcher orderFetcher SessionOrderFetcher - boardFetcher BoardEntriesFetcher metrics *metrics.Collector cachedMetricsJSON string // latest metrics JSON for new clients @@ -191,32 +179,16 @@ func (h *sseHub) safetyIntervalEffective(servers []string) time.Duration { return safetyPollInterval } -// detectKilledWindowIDs is a pure function: it returns the set of window ids -// present in prev but absent in current. Used by the snapshot builder to fan -// out one `board-changed { cleanup }` event per killed window. -func detectKilledWindowIDs(prev, current map[string]bool) []string { - var killed []string - for id := range prev { - if !current[id] { - killed = append(killed, id) - } - } - return killed -} - func newSSEHub(fetcher SessionFetcher, mc *metrics.Collector) *sseHub { return &sseHub{ clients: make(map[string][]*sseClient), previousJSON: make(map[string]string), previousOrderJSON: make(map[string]string), orderBootstrapAttempts: make(map[string]int), - previousBoardJSON: make(map[string]string), - previousWindowIDs: make(map[string]map[string]bool), previousRealSessions: make(map[string]map[string]bool), cache: make(map[string]*cachedResult), fetcher: fetcher, orderFetcher: prodSessionOrderFetcher{}, - boardFetcher: prodBoardEntriesFetcher{}, metrics: mc, } } @@ -243,14 +215,6 @@ func (h *sseHub) addClient(c *sseClient) { } } - // Send cached board-changed bootstrap snapshot (after session-order, before metrics). - if prev, ok := h.previousBoardJSON[c.server]; ok && prev != "" { - select { - case c.ch <- []byte(fmt.Sprintf("event: %s\ndata: %s\n\n", boardEventName, prev)): - default: - } - } - // Send cached metrics snapshot immediately (server-independent) if h.cachedMetricsJSON != "" { select { @@ -325,9 +289,10 @@ func (h *sseHub) broadcastSessionOrder(server string, order []string) { // broadcastBoardChanged pushes a board-changed event to every client // connected for the supplied server. The payload is rendered as JSON and -// emitted using the shared SSE envelope. No payload caching is performed -// for incremental events — the bootstrap cache covers the snapshot use -// case via previousBoardJSON. +// emitted using the shared SSE envelope. No payload caching is performed: +// board membership changes only through the explicit pin/unpin/reorder +// handlers (each emits its own event), and a killed pinned window drops out +// of the next live ListBoardEntries read — there is no snapshot to cache. func (h *sseHub) broadcastBoardChanged(server string, payload boardChangedPayload) { jsonBytes, err := json.Marshal(payload) if err != nil { @@ -350,68 +315,18 @@ func (h *sseHub) broadcastBoardChanged(server string, payload boardChangedPayloa } } -// broadcastBoardBootstrap delivers the per-server snapshot of @rk_board -// entries on first poll. Caches the payload under previousBoardJSON so -// future addClient calls receive the same snapshot. -func (h *sseHub) broadcastBoardBootstrap(server string, entries []tmux.BoardEntry) { - if entries == nil { - entries = []tmux.BoardEntry{} - } - payload := boardBootstrapPayload{ - Server: server, - Change: "bootstrap", - Entries: entries, - } - jsonBytes, err := json.Marshal(payload) - if err != nil { - slog.Warn("board-bootstrap broadcast marshal failed", "err", err, "server", server) - return - } - jsonStr := string(jsonBytes) - event := []byte(fmt.Sprintf("event: %s\ndata: %s\n\n", boardEventName, jsonStr)) - - h.mu.Lock() - defer h.mu.Unlock() - h.previousBoardJSON[server] = jsonStr - for _, c := range h.clients[server] { - select { - case c.ch <- event: - default: - if !c.dropped { - slog.Warn("SSE event dropped", "server", server, "event", boardEventName) - c.dropped = true - } - } - } -} - -// windowIDSetFromSessions extracts the union of window ids across every -// session's windows. Used for window-kill detection between poll ticks. -func windowIDSetFromSessions(sess []sessions.ProjectSession) map[string]bool { - out := make(map[string]bool) - for _, s := range sess { - for _, w := range s.Windows { - if w.WindowID != "" { - out[w.WindowID] = true - } - } - } - return out -} - // realSessionNameSet returns the set of *user-facing* session names in the -// snapshot — excluding the per-connection relay ephemerals (rk-relay-*) and the -// control-mode anchor (_rk-ctl), which churn constantly by design and are not -// sessions a user would notice losing. Used to detect when a real session -// disappears between poll ticks (observability for Constitution VI — tmux -// sessions must survive). +// snapshot — excluding the board pin-sessions (_rk-pin-*) and the control-mode +// anchor (_rk-ctl), which are not sessions a user would notice losing. Used to +// detect when a real session disappears between poll ticks (observability for +// Constitution VI — tmux sessions must survive). func realSessionNameSet(sess []sessions.ProjectSession) map[string]bool { out := make(map[string]bool) for _, s := range sess { if s.Name == "" { continue } - if strings.HasPrefix(s.Name, tmux.RelaySessionPrefix) || s.Name == tmux.ControlAnchorSessionName { + if strings.HasPrefix(s.Name, tmux.PinSessionPrefix) || s.Name == tmux.ControlAnchorSessionName { continue } out[s.Name] = true @@ -420,7 +335,7 @@ func realSessionNameSet(sess []sessions.ProjectSession) map[string]bool { } // detectDisappearedSessions returns names present in prev but absent in -// current. Pure; mirrors detectKilledWindowIDs. +// current. Pure helper for the real-session disappearance WARN. func detectDisappearedSessions(prev, current map[string]bool) []string { var gone []string for name := range prev { @@ -552,63 +467,22 @@ func (h *sseHub) poll() { } } - // Board bootstrap on first successful poll for this server. - h.mu.RLock() - _, boardSeeded := h.previousBoardJSON[server] - h.mu.RUnlock() - if !boardSeeded && h.boardFetcher != nil { - bootCtx, cancelBoot := context.WithTimeout(context.Background(), 2*time.Second) - entries, berr := h.boardFetcher.ListBoardEntries(bootCtx, server) - cancelBoot() - if berr != nil { - slog.Debug("board bootstrap (best-effort)", "server", server, "err", berr) - } else { - h.broadcastBoardBootstrap(server, entries) - } - } - - // Window-kill detection for eager board cleanup. Compute the - // current window-id set from the freshly fetched session list, - // diff against the prior snapshot via the pure - // detectKilledWindowIDs helper, and fan out one - // board-changed { cleanup } event per affected board. - currentIDs := windowIDSetFromSessions(result) - h.mu.RLock() - prevIDs, hasPrev := h.previousWindowIDs[server] - h.mu.RUnlock() - if hasPrev && h.boardFetcher != nil { - killed := detectKilledWindowIDs(prevIDs, currentIDs) - for _, prevID := range killed { - cleanCtx, cancelClean := context.WithTimeout(context.Background(), 2*time.Second) - boards, cerr := h.boardFetcher.RemoveAllByWindowID(cleanCtx, server, prevID) - cancelClean() - if cerr != nil { - slog.Debug("board cleanup (best-effort)", "server", server, "windowId", prevID, "err", cerr) - continue - } - for _, b := range boards { - h.broadcastBoardChanged(server, boardChangedPayload{ - Board: b, - Change: "cleanup", - Server: server, - WindowID: prevID, - }) - } - } - } - h.mu.Lock() - h.previousWindowIDs[server] = currentIDs - h.mu.Unlock() + // Board membership changes are surfaced only via the explicit + // pin/unpin/reorder handlers (each emits its own board-changed + // event). In the move-based model a killed pinned window simply + // drops out of the next ListBoardEntries read — the frontend's + // refetch on the session-list change picks it up — so there is no + // eager board-cleanup diff and no first-poll bootstrap broadcast. // Real-session disappearance logging (observability only — no // behavior change). run-kit audit-logs every session IT kills - // (relay ephemerals, explicit kill-session), but a real user - // session can vanish OUTSIDE that path — a shell exiting, an - // external `tmux kill-session`, an OOM kill, or a server collapsing + // (board pin-session teardown on unpin, explicit kill-session), but + // a real user session can vanish OUTSIDE that path — a shell exiting, + // an external `tmux kill-session`, an OOM kill, or a server collapsing // to zero under `exit-empty`. When that happens today the logs go // silent, making post-hoc diagnosis impossible (see the `utils` // incident). Emit one WARN per disappeared real session so the next - // occurrence is diagnosable. We exclude relay/anchor churn via + // occurrence is diagnosable. We exclude pin-session/anchor churn via // realSessionNameSet. This does NOT prevent the loss — it records // it. Constitution VI PREVENTION (always-on `_rk-ctl` anchor floor + // imperative `exit-empty off` on every dialed server) is implemented diff --git a/app/backend/api/sse_subscriber_test.go b/app/backend/api/sse_subscriber_test.go index 4ca891c5..fdecb803 100644 --- a/app/backend/api/sse_subscriber_test.go +++ b/app/backend/api/sse_subscriber_test.go @@ -206,58 +206,6 @@ func TestSSE_SafetyTickerFiresWithoutSubscriber(t *testing.T) { } } -// TestDetectKilledWindowIDs verifies the extracted pure function. -func TestDetectKilledWindowIDs(t *testing.T) { - cases := []struct { - name string - prev map[string]bool - current map[string]bool - want map[string]bool - }{ - { - name: "no_kills", - prev: map[string]bool{"@1": true, "@2": true}, - current: map[string]bool{"@1": true, "@2": true}, - want: nil, - }, - { - name: "one_kill", - prev: map[string]bool{"@1": true, "@2": true, "@3": true}, - current: map[string]bool{"@1": true, "@3": true}, - want: map[string]bool{"@2": true}, - }, - { - name: "all_killed", - prev: map[string]bool{"@1": true, "@2": true}, - current: map[string]bool{}, - want: map[string]bool{"@1": true, "@2": true}, - }, - { - name: "empty_prev_returns_nothing", - prev: map[string]bool{}, - current: map[string]bool{"@1": true}, - want: nil, - }, - } - for _, c := range cases { - t.Run(c.name, func(t *testing.T) { - killed := detectKilledWindowIDs(c.prev, c.current) - gotSet := map[string]bool{} - for _, id := range killed { - gotSet[id] = true - } - if len(gotSet) != len(c.want) { - t.Fatalf("len mismatch: got %v, want %v", gotSet, c.want) - } - for k := range c.want { - if !gotSet[k] { - t.Errorf("missing %s in result %v", k, gotSet) - } - } - }) - } -} - // neverSubscriber is a WindowChangeSubscriber whose Wait channel never closes // for any server — models the PTY-unavailable case where supervisorSubscriber // has no Client for the requested socket. The SSE loop MUST fall through to diff --git a/app/backend/api/sse_test.go b/app/backend/api/sse_test.go index 535d1a2d..0596933f 100644 --- a/app/backend/api/sse_test.go +++ b/app/backend/api/sse_test.go @@ -547,187 +547,6 @@ func TestSSE_SessionOrderCachedOnConnect(t *testing.T) { } } -// stubBoardFetcher implements BoardEntriesFetcher for SSE tests. -type stubBoardFetcher struct { - mu sync.Mutex - entries map[string][]tmux.BoardEntry - listCalls int - listErr error - removed map[string][]string // server -> windowIDs removed - removedBoards map[string]map[string][]string // server -> windowID -> boards returned -} - -func (s *stubBoardFetcher) ListBoardEntries(ctx context.Context, server string) ([]tmux.BoardEntry, error) { - s.mu.Lock() - defer s.mu.Unlock() - s.listCalls++ - if s.listErr != nil { - return nil, s.listErr - } - return s.entries[server], nil -} - -func (s *stubBoardFetcher) RemoveAllByWindowID(ctx context.Context, server, windowID string) ([]string, error) { - s.mu.Lock() - defer s.mu.Unlock() - if s.removed == nil { - s.removed = make(map[string][]string) - } - s.removed[server] = append(s.removed[server], windowID) - if s.removedBoards != nil { - if perWin, ok := s.removedBoards[server]; ok { - return perWin[windowID], nil - } - } - // Default: derive from entries snapshot. - var boards []string - for _, e := range s.entries[server] { - if e.WindowID == windowID { - boards = append(boards, e.Board) - } - } - return boards, nil -} - -func TestSSE_BoardChangedCachedOnConnect(t *testing.T) { - hub := newSSEHub(&slowSessionFetcher{}, nil) - hub.orderFetcher = &stubOrderFetcher{orders: map[string][]string{}} - hub.boardFetcher = &stubBoardFetcher{} - - // Pre-populate the bootstrap cache by broadcasting bootstrap. - hub.broadcastBoardBootstrap("default", []tmux.BoardEntry{ - {Server: "default", WindowID: "@1234", Board: "main", OrderKey: "m"}, - }) - - c := &sseClient{ch: make(chan []byte, 32), server: "default"} - hub.addClient(c) - defer hub.removeClient(c) - - deadline := time.After(500 * time.Millisecond) - got := false - for !got { - select { - case ev := <-c.ch: - s := string(ev) - if strings.Contains(s, "event: board-changed") && strings.Contains(s, `"change":"bootstrap"`) { - got = true - } - case <-deadline: - t.Fatal("client did not receive cached board-changed bootstrap event") - } - } -} - -func TestSSE_BoardBootstrapReadsTmuxOnFirstPoll(t *testing.T) { - stub := &stubBoardFetcher{ - entries: map[string][]tmux.BoardEntry{ - "default": { - {Server: "default", WindowID: "@1234", Board: "main", OrderKey: "m"}, - }, - }, - } - hub := newSSEHub(&slowSessionFetcher{}, nil) - hub.orderFetcher = &stubOrderFetcher{orders: map[string][]string{}} - hub.boardFetcher = stub - - c := &sseClient{ch: make(chan []byte, 32), server: "default"} - hub.addClient(c) - defer hub.removeClient(c) - - deadline := time.After(legacyPollInterval + 1*time.Second) - got := false - for !got { - select { - case ev := <-c.ch: - s := string(ev) - if strings.Contains(s, "event: board-changed") && - strings.Contains(s, `"change":"bootstrap"`) && - strings.Contains(s, `"@1234"`) { - got = true - } - case <-deadline: - t.Fatal("client did not receive bootstrapped board-changed event") - } - } - stub.mu.Lock() - calls := stub.listCalls - stub.mu.Unlock() - if calls < 1 { - t.Errorf("boardFetcher.ListBoardEntries calls = %d, want >= 1", calls) - } -} - -// killTrackingFetcher emits a different session set on each call so we can -// trigger the kill-detection path. -type killTrackingFetcher struct { - mu sync.Mutex - calls int - frames [][]sessions.ProjectSession -} - -func (f *killTrackingFetcher) FetchSessions(ctx context.Context, server string) ([]sessions.ProjectSession, error) { - f.mu.Lock() - defer f.mu.Unlock() - if f.calls >= len(f.frames) { - f.calls++ - return f.frames[len(f.frames)-1], nil - } - out := f.frames[f.calls] - f.calls++ - return out, nil -} - -func TestSSE_WindowKillEmitsBoardCleanup(t *testing.T) { - // Frame 0: window @1234 alive, pinned to main. - // Frame 1+: window @1234 gone — should trigger cleanup broadcast. - frames := [][]sessions.ProjectSession{ - { - {Name: "dev", Windows: []tmux.WindowInfo{ - {Index: 0, WindowID: "@1234", Name: "agent"}, - }}, - }, - { - {Name: "dev", Windows: []tmux.WindowInfo{}}, - }, - } - stub := &stubBoardFetcher{ - entries: map[string][]tmux.BoardEntry{ - "default": { - {Server: "default", WindowID: "@1234", Board: "main", OrderKey: "m"}, - }, - }, - } - hub := newSSEHub(&killTrackingFetcher{frames: frames}, nil) - hub.orderFetcher = &stubOrderFetcher{orders: map[string][]string{}} - hub.boardFetcher = stub - - c := &sseClient{ch: make(chan []byte, 64), server: "default"} - hub.addClient(c) - defer hub.removeClient(c) - - deadline := time.After(legacyPollInterval*3 + 2*time.Second) - got := false - for !got { - select { - case ev := <-c.ch: - s := string(ev) - if strings.Contains(s, "event: board-changed") && - strings.Contains(s, `"change":"cleanup"`) && - strings.Contains(s, `"@1234"`) { - got = true - } - case <-deadline: - t.Fatal("did not receive cleanup event after window killed") - } - } - - stub.mu.Lock() - defer stub.mu.Unlock() - if len(stub.removed["default"]) == 0 { - t.Errorf("RemoveAllByWindowID was not invoked for default server") - } -} - func TestSSE_HubBootstrapReadsOrderOnFirstPoll(t *testing.T) { stub := &stubOrderFetcher{orders: map[string][]string{ "default": {"alpha", "beta"}, @@ -764,13 +583,14 @@ func TestSSE_HubBootstrapReadsOrderOnFirstPoll(t *testing.T) { } // TestRealSessionNameSet verifies the snapshot→real-session-name extraction -// excludes relay ephemerals and the control anchor (which churn by design and -// must not trip the disappearance log) while keeping user-facing sessions. +// excludes board pin-sessions and the control anchor (which are not sessions a +// user would notice losing and must not trip the disappearance log) while +// keeping user-facing sessions. func TestRealSessionNameSet(t *testing.T) { in := []sessions.ProjectSession{ {Name: "shll", Windows: []tmux.WindowInfo{}}, {Name: "wt", Windows: []tmux.WindowInfo{}}, - {Name: tmux.RelaySessionPrefix + "abc123", Windows: []tmux.WindowInfo{}}, + {Name: tmux.PinSessionPrefix + "42", Windows: []tmux.WindowInfo{}}, {Name: tmux.ControlAnchorSessionName, Windows: []tmux.WindowInfo{}}, {Name: "", Windows: []tmux.WindowInfo{}}, // defensive: empty name ignored } @@ -784,8 +604,8 @@ func TestRealSessionNameSet(t *testing.T) { t.Errorf("realSessionNameSet missing real session %q", name) } } - if got[tmux.RelaySessionPrefix+"abc123"] { - t.Error("realSessionNameSet must exclude relay ephemerals") + if got[tmux.PinSessionPrefix+"42"] { + t.Error("realSessionNameSet must exclude board pin-sessions") } if got[tmux.ControlAnchorSessionName] { t.Error("realSessionNameSet must exclude the control anchor") diff --git a/app/backend/cmd/rk/serve.go b/app/backend/cmd/rk/serve.go index 4581ae58..ddd83c2f 100644 --- a/app/backend/cmd/rk/serve.go +++ b/app/backend/cmd/rk/serve.go @@ -83,16 +83,11 @@ To run rk as a background daemon, see 'rk daemon start' (and the rest of the return fmt.Errorf("ensuring tmux config: %w", err) } - // Reap orphaned rk-relay-* ephemerals left by a previously crashed - // rk serve instance. Synchronous to eliminate races with new relays - // creating ephemerals concurrently with the sweep. Bounded to 30s - // so a misbehaving tmux server cannot stall startup indefinitely. - // Failures are logged but never block startup. - sweepCtx, sweepCancel := context.WithTimeout(context.Background(), 30*time.Second) - if err := sweepOrphanedRelaySessions(sweepCtx); err != nil { - slog.Warn("relay sweep finished with errors", "err", err) - } - sweepCancel() + // No startup sweep: relay ephemerals are gone (the relay attaches the PTY + // directly to the real session), and board pin-sessions (`_rk-pin-*`) are + // PERSISTENT across rk restarts (Constitution VI — tmux survives the + // server). A persisted pin is valid state, not an orphan, so there is + // nothing to reap. logLevel := slog.LevelInfo if strings.EqualFold(os.Getenv("LOG_LEVEL"), "debug") { @@ -107,12 +102,9 @@ To run rk as a background daemon, see 'rk daemon start' (and the rest of the router, apiServer := api.NewRouterAndServer(ctx, logger) - // Start the tmuxctl supervisor AFTER tmux.EnsureConfig() and - // sweepOrphanedRelaySessions (both above) and BEFORE the HTTP - // listen. The sweep must run first so it does not observe the - // `_rk-ctl` anchor as an orphan; the supervisor must run before - // listen so the SSE hub never races an empty Client map for - // sockets that already exist on disk. + // Start the tmuxctl supervisor AFTER tmux.EnsureConfig() (above) and + // BEFORE the HTTP listen, so the SSE hub never races an empty Client map + // for sockets that already exist on disk. // // Per-socket Open failures (PTY unavailable, etc.) are logged // inside the Supervisor and never block startup. diff --git a/app/backend/cmd/rk/serve_sweep.go b/app/backend/cmd/rk/serve_sweep.go deleted file mode 100644 index 1d743079..00000000 --- a/app/backend/cmd/rk/serve_sweep.go +++ /dev/null @@ -1,135 +0,0 @@ -package main - -import ( - "context" - "errors" - "fmt" - "log/slog" - "strconv" - "strings" - "syscall" - - "rk/internal/tmux" -) - -// pidAlive reports whether pid names a live process, biased toward "alive" on -// any ambiguity so the sweep leaks rather than wrongly kills (leak-not-kill). -// syscall.Kill(pid, 0) is the canonical liveness probe: -// - nil → process exists and is signalable → alive (spare) -// - ESRCH → no such process → dead (reap) -// - EPERM → process exists but owned by another user → alive (spare) -// - other → ambiguous → alive (spare) -// -// A non-positive pid is treated as dead (reap): a real owner is always a -// concrete os.Getpid() (≥ 1), so 0 or negative is a malformed/invalid stamp. -// This guard is also necessary for correctness — syscall.Kill(0, 0) and -// negative pids target a process group, not a single process, and would -// otherwise return nil and be misread as a live owner that is spared forever. -// -// This deliberately differs from daemon_portowner.go:processAlive (which treats -// EPERM as dead): that predicate guards a forceful SIGTERM/SIGKILL where erring -// toward "dead" is safe, whereas here erring toward "alive" avoids reaping a -// live instance's relay. The single-uid socket model (ListServers scans only -// /tmp/tmux-/) means EPERM is not an expected owner state; sparing it is -// the benign-leak direction (see spec Requirement: pidAlive ownership semantics). -func pidAlive(pid int) bool { - if pid <= 0 { - return false - } - err := syscall.Kill(pid, 0) - if err == nil { - return true - } - return !errors.Is(err, syscall.ESRCH) -} - -// relayOwnerIsDead reports whether a relay's @rk_owner_pid value identifies an -// owner that is gone — i.e. the relay is reapable. An empty owner is a legacy/ -// unstamped or crashed-predecessor orphan (reap). A non-integer owner is -// malformed and treated as an orphan (reap) defensively. Otherwise the relay is -// reaped only when its owner PID is not alive. A live owner spares the relay. -func relayOwnerIsDead(owner string) bool { - if owner == "" { - return true - } - pid, err := strconv.Atoi(owner) - if err != nil { - return true - } - return !pidAlive(pid) -} - -// sweepOrphanedRelaySessions reaps rk-relay-* sessions whose owning rk serve -// instance is gone, across every known tmux server. Runs synchronously at -// startup before HTTP listeners bind to eliminate races with new relays. -// -// Each relay is stamped at creation with @rk_owner_pid (the owning rk serve -// PID). The sweep reads that option and reaps a relay only when the owner is -// absent (unstamped/legacy) or dead — a live sibling's relays (e2e backend, an -// air rebuild, a second instance) are spared so their open terminals survive. -// -// Read scope follows ListServers: in production (RK_SERVER_ALLOWLIST unset) it -// scans every socket so the UI keeps seeing foreign servers (rk-e2e-*). Under -// the e2e harness (allowlist set) ListServers is narrowed to the test servers, -// so the sweep only visits those — benign, since the destructive reap is scoped -// by PID ownership regardless and the test env only cares about test servers. -// -// Per-server failures (list, owner-read, or kill) are logged and accumulated — -// they MUST NOT abort the sweep or block server startup. The caller -// (serveCmd.RunE) MAY log the aggregate error but SHALL continue startup either -// way. -// -// Uses ListRawSessionNames (not the filtered ListSessions) because the user- -// facing filter would hide the ephemerals we are trying to reap. -func sweepOrphanedRelaySessions(ctx context.Context) error { - servers, err := tmux.ListServers(ctx) - if err != nil { - slog.Error("relay sweep: list servers failed", "err", err) - return fmt.Errorf("list servers: %w", err) - } - var perServerErrs []string - killed := 0 - for _, server := range servers { - names, err := tmux.ListRawSessionNames(ctx, server) - if err != nil { - slog.Warn("relay sweep: list sessions failed", "server", server, "err", err) - perServerErrs = append(perServerErrs, fmt.Sprintf("%s: %v", server, err)) - continue - } - for _, name := range names { - if !strings.HasPrefix(name, tmux.RelaySessionPrefix) { - continue - } - // Defense-in-depth: the tmuxctl anchor `_rk-ctl` is not prefixed - // with `rk-relay-`, so the check above already excludes it. The - // explicit guard below documents that the anchor is owned by - // tmuxctl and must NEVER be reaped here even if naming changes. - if name == tmux.ControlAnchorSessionName { - continue - } - // Owner-PID scoping: spare relays whose owning rk serve is alive. - owner, err := tmux.GetSessionOwnerPID(ctx, server, name) - if err != nil { - slog.Warn("relay sweep: owner-pid read failed", "server", server, "session", name, "err", err) - perServerErrs = append(perServerErrs, fmt.Sprintf("%s/%s: %v", server, name, err)) - continue - } - if !relayOwnerIsDead(owner) { - continue - } - if err := tmux.KillSessionCtx(ctx, server, name); err != nil { - slog.Warn("relay sweep: kill failed", "server", server, "session", name, "err", err) - perServerErrs = append(perServerErrs, fmt.Sprintf("%s/%s: %v", server, name, err)) - continue - } - killed++ - } - } - if killed > 0 { - slog.Info("relay sweep: reaped orphan ephemerals", "count", killed) - } - if len(perServerErrs) > 0 { - return fmt.Errorf("relay sweep partial failures: %s", strings.Join(perServerErrs, "; ")) - } - return nil -} diff --git a/app/backend/cmd/rk/serve_sweep_test.go b/app/backend/cmd/rk/serve_sweep_test.go deleted file mode 100644 index 18b0d289..00000000 --- a/app/backend/cmd/rk/serve_sweep_test.go +++ /dev/null @@ -1,145 +0,0 @@ -package main - -import ( - "context" - "fmt" - "os" - "os/exec" - "strconv" - "testing" - "time" - - "rk/internal/tmux" -) - -// testSocketName builds a unified test socket name: rk-test---. -// Local copy of the helper in internal/tmux/main_test.go and api/main_test.go -// (Go _test.go symbols are package-private and cannot be shared across -// packages). The single cmd/rk naming site routes through it so no inline -// "rk-test-..." format string remains. -func testSocketName(role string) string { - return fmt.Sprintf("rk-test-%s-%d-%d", role, os.Getpid(), time.Now().UnixNano()) -} - -func TestPidAlive(t *testing.T) { - // The current process is unambiguously alive. - if !pidAlive(os.Getpid()) { - t.Errorf("pidAlive(self) = false, want true") - } - - // PID 1 (init) always exists; signalling it as a non-root user returns - // EPERM, which pidAlive MUST treat as alive (spare) — the benign-leak bias. - if !pidAlive(1) { - t.Errorf("pidAlive(1) = false, want true (EPERM/own → spare)") - } - - // A PID that does not exist (kill(pid,0) → ESRCH) is dead. PIDs above the - // kernel default pid_max (and never recycled into existence here) are a - // reliable stand-in for "no such process". - const deadPID = 0x7FFFFFFE - if pidAlive(deadPID) { - t.Errorf("pidAlive(%d) = true, want false (ESRCH → dead)", deadPID) - } -} - -func TestRelayOwnerIsDead(t *testing.T) { - tests := []struct { - name string - owner string - want bool - }{ - {"empty owner is orphan", "", true}, - {"non-integer owner is orphan", "not-a-pid", true}, - {"live owner spared", strconv.Itoa(os.Getpid()), false}, - {"dead owner reaped", "2147483646", true}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := relayOwnerIsDead(tt.owner); got != tt.want { - t.Errorf("relayOwnerIsDead(%q) = %v, want %v", tt.owner, got, tt.want) - } - }) - } -} - -// tmuxL runs a tmux command against an isolated server, failing the test on -// error so setup mistakes surface immediately. -func tmuxL(t *testing.T, server string, args ...string) { - t.Helper() - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - full := append([]string{"-L", server}, args...) - if out, err := exec.CommandContext(ctx, "tmux", full...).CombinedOutput(); err != nil { - t.Fatalf("tmux %v: %v\n%s", args, err, out) - } -} - -// TestSweepOrphanedRelaySessions_scoping is an end-to-end check that the sweep -// reaps only dead-owner / unstamped relays and spares a live-owner relay and the -// control anchor. It runs against a real isolated tmux server discoverable by -// tmux.ListServers (named rk-test-- so it lands in /tmp/tmux-/). -func TestSweepOrphanedRelaySessions_scoping(t *testing.T) { - if _, err := exec.LookPath("tmux"); err != nil { - t.Skip("tmux not available — skipping integration test") - } - server := testSocketName("unit") - - // Bootstrap the isolated server with a non-relay session so it stays alive - // even after every relay is reaped (server with zero sessions exits). - bootCtx, cancelBoot := context.WithTimeout(context.Background(), 5*time.Second) - defer cancelBoot() - if out, err := exec.CommandContext(bootCtx, "tmux", "-L", server, - "new-session", "-d", "-s", "keepalive").CombinedOutput(); err != nil { - t.Skipf("could not start isolated tmux server %q: %v\n%s", server, err, out) - } - t.Cleanup(func() { - killCtx, cancelKill := context.WithTimeout(context.Background(), 5*time.Second) - defer cancelKill() - _ = exec.CommandContext(killCtx, "tmux", "-L", server, "kill-server").Run() - }) - - live := tmux.RelaySessionPrefix + "live0001" // stamped with this PID → spared - dead := tmux.RelaySessionPrefix + "dead0002" // stamped with a dead PID → reaped - unstamped := tmux.RelaySessionPrefix + "none003" // no @rk_owner_pid → reaped - anchor := tmux.ControlAnchorSessionName // _rk-ctl → never reaped - - for _, name := range []string{live, dead, unstamped, anchor} { - tmuxL(t, server, "new-session", "-d", "-s", name) - } - tmuxL(t, server, "set-option", "-t", live, tmux.OwnerPIDOption, strconv.Itoa(os.Getpid())) - tmuxL(t, server, "set-option", "-t", dead, tmux.OwnerPIDOption, "2147483646") // > pid_max → dead - - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - // The sweep iterates every server in /tmp/tmux-/. A foreign server may - // fail to list and surface as an aggregated per-server error — that MUST NOT - // abort the sweep (A-009), so we log it but still assert on our server's - // final state below rather than failing on a non-nil aggregate. - if err := sweepOrphanedRelaySessions(ctx); err != nil { - t.Logf("sweep returned aggregated per-server error (non-fatal): %v", err) - } - - listCtx, cancelList := context.WithTimeout(context.Background(), 5*time.Second) - defer cancelList() - names, err := tmux.ListRawSessionNames(listCtx, server) - if err != nil { - t.Fatalf("list sessions after sweep: %v", err) - } - survived := make(map[string]bool, len(names)) - for _, n := range names { - survived[n] = true - } - - if !survived[live] { - t.Errorf("live-owner relay %q was reaped, want spared", live) - } - if !survived[anchor] { - t.Errorf("control anchor %q was reaped, want spared", anchor) - } - if survived[dead] { - t.Errorf("dead-owner relay %q survived, want reaped", dead) - } - if survived[unstamped] { - t.Errorf("unstamped relay %q survived, want reaped", unstamped) - } -} diff --git a/app/backend/internal/tmux/active_window_test.go b/app/backend/internal/tmux/active_window_test.go index fb7ac62b..00558994 100644 --- a/app/backend/internal/tmux/active_window_test.go +++ b/app/backend/internal/tmux/active_window_test.go @@ -25,11 +25,11 @@ func TestBaseGroupName(t *testing.T) { want string }{ {"ungrouped → own name", "solo", "", "solo"}, - {"base member from list (queried as base)", "runKit", "runKit,rk-relay-abc", "runKit"}, - {"base member from list (queried as ephemeral)", "rk-relay-abc", "runKit,rk-relay-abc", "runKit"}, + {"base member from list (queried as base)", "runKit", "runKit,_rk-ctl", "runKit"}, + {"base member from list (queried as anchor)", "_rk-ctl", "runKit,_rk-ctl", "runKit"}, {"anchor skipped, base chosen", "_rk-ctl", "_rk-ctl,runKit", "runKit"}, - {"ephemeral-only list → own name fallback", "rk-relay-x", "rk-relay-x", "rk-relay-x"}, - {"order independent — base is first non-special", "rk-relay-x", "rk-relay-x,runKit", "runKit"}, + {"anchor-only list → own name fallback", "_rk-ctl", "_rk-ctl", "_rk-ctl"}, + {"order independent — base is first non-special", "_rk-ctl", "_rk-ctl,runKit", "runKit"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/app/backend/internal/tmux/board.go b/app/backend/internal/tmux/board.go index 1d2d6337..56500091 100644 --- a/app/backend/internal/tmux/board.go +++ b/app/backend/internal/tmux/board.go @@ -9,20 +9,22 @@ import ( "strings" ) -// BoardOption is the tmux server-scoped user option that stores the -// per-server pin membership of pane boards. The stored value is a -// comma-separated list of `::` entries. -const BoardOption = "@rk_board" - -// boardEntrySep separates entries within the @rk_board value. boardFieldSep -// separates fields within an entry. Both are reserved characters and rejected -// in board-name validation. +// Board membership is derived entirely from single-window pin-sessions +// (`_rk-pin-*`) and their session-scoped user options — there is no `@rk_board` +// server-option encoding (Constitution II: state derived from tmux). A board is +// the set of pin-sessions sharing a `@rk_board` value. +// +// - @rk_board — which board this pinned window belongs to +// - @rk_home — the home session to restore the window to on unpin +// - @rk_board_order — fractional order key within the board (ComputeOrderKey) const ( - boardEntrySep = "," - boardFieldSep = ":" + BoardOption = "@rk_board" + HomeOption = "@rk_home" + BoardOrderOption = "@rk_board_order" ) -// BoardEntry represents a single (server, windowID) pin to a named board. +// BoardEntry represents a single (server, windowID) pin to a named board, +// derived from a `_rk-pin-*` session's vars. type BoardEntry struct { Server string `json:"server"` WindowID string `json:"windowId"` @@ -43,8 +45,7 @@ var ( ) // ValidBoardName reports whether name is a syntactically valid board name. -// Pattern: alphanumeric + hyphen + underscore, length 1-32. The reserved -// separator characters `,` and `:` are excluded by the pattern. +// Pattern: alphanumeric + hyphen + underscore, length 1-32. func ValidBoardName(name string) bool { return boardNamePattern.MatchString(name) } @@ -60,57 +61,6 @@ func ValidOrderKey(key string) bool { return orderKeyPattern.MatchString(key) } -// parseBoardValue parses the raw @rk_board option value into entries. The -// supplied server is attached to each entry. Malformed entries are skipped -// with a warning log; valid entries are returned unsorted. -func parseBoardValue(server, raw string) []BoardEntry { - raw = strings.TrimSpace(raw) - if raw == "" { - return nil - } - parts := strings.Split(raw, boardEntrySep) - out := make([]BoardEntry, 0, len(parts)) - for _, p := range parts { - p = strings.TrimSpace(p) - if p == "" { - continue - } - fields := strings.Split(p, boardFieldSep) - if len(fields) != 3 { - slog.Warn("board: malformed @rk_board entry (field count)", "server", server, "entry", p) - continue - } - windowID := strings.TrimSpace(fields[0]) - board := strings.TrimSpace(fields[1]) - orderKey := strings.TrimSpace(fields[2]) - if !ValidWindowID(windowID) || !ValidBoardName(board) || !ValidOrderKey(orderKey) { - slog.Warn("board: malformed @rk_board entry (invalid field)", "server", server, "entry", p) - continue - } - out = append(out, BoardEntry{ - Server: server, - WindowID: windowID, - Board: board, - OrderKey: orderKey, - }) - } - return out -} - -// serializeBoardValue produces the canonical @rk_board option value for a -// slice of entries. Server is implicit (per-server option), so it is not -// included in the serialized form. -func serializeBoardValue(entries []BoardEntry) string { - if len(entries) == 0 { - return "" - } - parts := make([]string, 0, len(entries)) - for _, e := range entries { - parts = append(parts, e.WindowID+boardFieldSep+e.Board+boardFieldSep+e.OrderKey) - } - return strings.Join(parts, boardEntrySep) -} - // isAbsentOption returns true when err is one of the operational tmux states // that map to "no entries" rather than a real error: option unset, no server // running, or socket not connectable. @@ -125,58 +75,108 @@ func isAbsentOption(err error) bool { strings.Contains(msg, "failed to connect") } -// ListBoardEntries returns the pinned-window entries stored on the named -// server. Returns ([]BoardEntry{}, nil) when the option is unset or the -// server is not reachable — these are normal operational states. +// showSessionOption reads a single session-scoped user option from a session, +// returning "" (no error) when the option is unset or the server is +// unreachable. Other failures propagate. +func showSessionOption(ctx context.Context, server, session, option string) (string, error) { + out, err := tmuxExecRawServer(ctx, server, "show-options", "-v", "-t", session, option) + if err != nil { + if isAbsentOption(err) { + return "", nil + } + return "", fmt.Errorf("read %s on %s/%s: %w", option, server, session, err) + } + return strings.TrimSpace(out), nil +} + +// setSessionOption sets a session-scoped user option on a session. +func setSessionOption(ctx context.Context, server, session, option, value string) error { + _, err := tmuxExecRawServer(ctx, server, "set-option", "-t", session, option, value) + return err +} + +// pinEntry derives the BoardEntry for a single pin-session by reading its +// session vars. Returns (entry, true, nil) when the session carries a valid +// @rk_board value; (zero, false, nil) when it is not a board pin (no/invalid +// @rk_board) — a defensive skip rather than an error. +func pinEntry(ctx context.Context, server, pinSession string) (BoardEntry, bool, error) { + windowID, ok := WindowIDFromPinSession(pinSession) + if !ok { + return BoardEntry{}, false, nil + } + board, err := showSessionOption(ctx, server, pinSession, BoardOption) + if err != nil { + return BoardEntry{}, false, err + } + if !ValidBoardName(board) { + // Not a board pin (or malformed) — skip without error. + return BoardEntry{}, false, nil + } + orderKey, err := showSessionOption(ctx, server, pinSession, BoardOrderOption) + if err != nil { + return BoardEntry{}, false, err + } + if !ValidOrderKey(orderKey) { + orderKey = initialAppendKey + } + return BoardEntry{ + Server: server, + WindowID: windowID, + Board: board, + OrderKey: orderKey, + }, true, nil +} + +// ListBoardEntries returns the pinned-window entries on the named server, +// derived from its `_rk-pin-*` sessions. Returns ([]BoardEntry{}, nil) when no +// pin-sessions exist or the server is not reachable — normal operational states. func ListBoardEntries(ctx context.Context, server string) ([]BoardEntry, error) { ctx, cancel := context.WithTimeout(ctx, TmuxTimeout) defer cancel() - out, err := tmuxExecRawServer(ctx, server, "show-option", "-sv", BoardOption) + pins, err := ListPinSessionNames(ctx, server) if err != nil { if isAbsentOption(err) { return []BoardEntry{}, nil } - return nil, fmt.Errorf("read %s on %s: %w", BoardOption, server, err) + return nil, fmt.Errorf("list pin sessions on %s: %w", server, err) } - entries := parseBoardValue(server, out) - if entries == nil { - return []BoardEntry{}, nil + out := make([]BoardEntry, 0, len(pins)) + for _, pin := range pins { + entry, ok, derr := pinEntry(ctx, server, pin) + if derr != nil { + slog.Warn("board: pin-session var read failed", "server", server, "pin", pin, "err", derr) + continue + } + if !ok { + continue + } + out = append(out, entry) } - return entries, nil + return out, nil } -// ListAllBoardEntries aggregates entries from every reachable server. -func ListAllBoardEntries(ctx context.Context) ([]BoardEntry, error) { +// ListBoards returns the alphabetical per-board pin-count summary across all +// reachable servers, derived from pin-sessions. A board exists only while at +// least one pin-session carries its name (no empty boards, no registry). +func ListBoards(ctx context.Context) ([]BoardSummary, error) { servers, err := ListServers(ctx) if err != nil { return nil, fmt.Errorf("list servers: %w", err) } if len(servers) == 0 { - // No reachable servers — also try "default" since it may not have a socket file yet. servers = []string{"default"} } - all := make([]BoardEntry, 0) + counts := make(map[string]int) for _, s := range servers { entries, lerr := ListBoardEntries(ctx, s) if lerr != nil { slog.Warn("board: ListBoardEntries failed", "server", s, "err", lerr) continue } - all = append(all, entries...) - } - return all, nil -} - -// ListBoards returns the alphabetical summary across all servers. -func ListBoards(ctx context.Context) ([]BoardSummary, error) { - entries, err := ListAllBoardEntries(ctx) - if err != nil { - return nil, err - } - counts := make(map[string]int) - for _, e := range entries { - counts[e.Board]++ + for _, e := range entries { + counts[e.Board]++ + } } out := make([]BoardSummary, 0, len(counts)) for name, count := range counts { @@ -186,34 +186,10 @@ func ListBoards(ctx context.Context) ([]BoardSummary, error) { return out, nil } -// liveWindowIDs returns the set of window IDs currently present on the named -// server. Returns nil with nil error when the server is unreachable. -func liveWindowIDs(ctx context.Context, server string) (map[string]bool, error) { - ctx, cancel := context.WithTimeout(ctx, TmuxTimeout) - defer cancel() - - out, err := tmuxExecRawServer(ctx, server, "list-windows", "-a", "-F", "#{window_id}") - if err != nil { - if isAbsentOption(err) { - return map[string]bool{}, nil - } - return nil, fmt.Errorf("list-windows on %s: %w", server, err) - } - set := make(map[string]bool) - for _, line := range strings.Split(out, "\n") { - line = strings.TrimSpace(line) - if line != "" { - set[line] = true - } - } - return set, nil -} - -// GetBoard returns entries for a single board across all servers, sorted by -// order key. Stale entries (windows that no longer exist on their source -// server) are dropped from the response and best-effort write-back to -// @rk_board on each affected server. Write-back failures do NOT fail the -// read; they are logged and the cleaned slice is returned. +// GetBoard returns entries for a single board across all reachable servers, +// sorted by order key. Membership is derived live from pin-sessions, so there +// is no stale entry to clean up — a killed pinned window's session simply +// disappears from the listing. func GetBoard(ctx context.Context, name string) ([]BoardEntry, error) { if !ValidBoardName(name) { return nil, fmt.Errorf("invalid board name") @@ -232,64 +208,24 @@ func GetBoard(ctx context.Context, name string) ([]BoardEntry, error) { slog.Warn("board: ListBoardEntries failed", "server", s, "err", lerr) continue } - live, werr := liveWindowIDs(ctx, s) - if werr != nil { - slog.Warn("board: liveWindowIDs failed", "server", s, "err", werr) - // Without live data we can't safely drop stale entries — return what we have. - for _, e := range entries { - if e.Board == name { - out = append(out, e) - } - } - continue - } - // Split into kept-on-server (all boards) and matching-this-board. - kept := entries[:0:len(entries)] - var dropped bool for _, e := range entries { - if !live[e.WindowID] { - dropped = true - continue - } - kept = append(kept, e) if e.Board == name { out = append(out, e) } } - if dropped { - // Best-effort write-back of the cleaned slice. - if werr := setBoardValue(ctx, s, kept); werr != nil { - slog.Warn("board: stale-cleanup write-back failed", "server", s, "err", werr) - } - } } sort.Slice(out, func(i, j int) bool { return out[i].OrderKey < out[j].OrderKey }) return out, nil } -// setBoardValue writes the entries slice as the @rk_board option on the named -// server. An empty slice unsets the option (set -u) so the absent state is -// canonical. -func setBoardValue(ctx context.Context, server string, entries []BoardEntry) error { - ctx, cancel := context.WithTimeout(ctx, TmuxTimeout) - defer cancel() - if len(entries) == 0 { - _, err := tmuxExecRawServer(ctx, server, "set-option", "-su", BoardOption) - return err - } - value := serializeBoardValue(entries) - _, err := tmuxExecRawServer(ctx, server, "set-option", "-s", BoardOption, value) - return err -} - // initialAppendKey is the first order key assigned when a board has no // entries. Using a midpoint letter leaves headroom for both prepend and // append operations, which is important since the alphabet has no // representation strictly less than "a". const initialAppendKey = "m" -// nextAppendKey returns an order key strictly greater than the largest -// existing key in entries (lexicographic). Empty list → initialAppendKey. +// nextAppendKey returns an order key strictly greater than the largest existing +// key among the supplied board entries (lexicographic). Empty → initialAppendKey. func nextAppendKey(entries []BoardEntry) string { maxKey := "" for _, e := range entries { @@ -302,15 +238,22 @@ func nextAppendKey(entries []BoardEntry) string { } next, err := ComputeOrderKey(maxKey, "") if err != nil { - // Fall back to extending with 'a'. return maxKey + "a" } return next } -// Pin adds an entry for (server, windowID, board) with a fresh order key. -// Idempotent: returns nil with no mutation if the same (windowID, board) -// already exists on the server. +// Pin MOVES the window identified by windowID into its own single-window +// pin-session `_rk-pin-` and records its board membership. The window leaves +// its home session (intended — this is what removes window sharing and lets a +// board pane attach directly to the pin-session). +// +// Idempotent: if `_rk-pin-` already exists, Pin is a no-op (no re-move, no +// order-key churn). +// +// Security (Constitution §I): windowID and board are validated before any +// subprocess; every tmux call is ctx+timeout-scoped via the package exec +// helpers with explicit argument slices (no shell strings). func Pin(ctx context.Context, server, windowID, board string) error { ctx, cancel := context.WithTimeout(ctx, TmuxTimeout) defer cancel() @@ -320,36 +263,125 @@ func Pin(ctx context.Context, server, windowID, board string) error { if !ValidBoardName(board) { return fmt.Errorf("invalid board name") } - entries, err := ListBoardEntries(ctx, server) - if err != nil { - return err + pinSession, ok := PinSessionName(windowID) + if !ok { + return fmt.Errorf("invalid window id") } - // Idempotency: same window already pinned to this board is a no-op. - for _, e := range entries { - if e.WindowID == windowID && e.Board == board { + + // Idempotency: the pin-session already exists → the window is already pinned. + // A same-board re-pin is a clean no-op. A *different*-board re-pin must NOT + // silently report success while leaving the window on its old board — re-stamp + // @rk_board so the requested board wins (the window has exactly one pin-session, + // so this is the only authoritative place membership lives). + if _, err := tmuxExecRawServer(ctx, server, "has-session", "-t", pinSession); err == nil { + current, readErr := showSessionOption(ctx, server, pinSession, BoardOption) + if readErr != nil { + return fmt.Errorf("read %s on existing pin %q: %w", BoardOption, pinSession, readErr) + } + if current == board { return nil } + if err := setSessionOption(ctx, server, pinSession, BoardOption, board); err != nil { + return fmt.Errorf("re-stamp %s on existing pin %q: %w", BoardOption, pinSession, err) + } + return nil + } + + // Resolve the home session to remember for unpin. The window must currently + // live in a home session (not already a pin-session). + home, err := ResolveWindowSession(ctx, server, windowID) + if err != nil { + return fmt.Errorf("resolve home session: %w", err) + } + + // Compute the append key restricted to this board BEFORE the move (the + // window still counts under its old session, but board membership is read + // from existing pin-sessions, which excludes this window). + entries, err := ListBoardEntries(ctx, server) + if err != nil { + return err } - // Compute the next append key restricted to this board, so order keys are - // monotonic within the board (cross-board reuse is fine). boardEntries := make([]BoardEntry, 0) for _, e := range entries { if e.Board == board { boardEntries = append(boardEntries, e) } } - newKey := nextAppendKey(boardEntries) - entries = append(entries, BoardEntry{ - Server: server, - WindowID: windowID, - Board: board, - OrderKey: newKey, - }) - return setBoardValue(ctx, server, entries) + orderKey := nextAppendKey(boardEntries) + + // Create the pin-session (starts with one placeholder window) and capture the + // placeholder window's ID by reading the new session's sole window. Move the + // target window in, then kill the captured placeholder by ID so the moved + // window is the session's sole window. Capturing the placeholder ID (rather + // than assuming index 0) is robust to base-index config and to the moved + // window's landing index. + if _, err := tmuxExecServer(ctx, server, "new-session", "-d", "-s", pinSession); err != nil { + return fmt.Errorf("create pin session: %w", err) + } + placeholderLines, err := tmuxExecServer(ctx, server, "list-windows", "-t", pinSession, "-F", "#{window_id}") + if err != nil || len(placeholderLines) == 0 { + // Roll back the empty pin-session. Root the teardown in context.Background(): + // Pin's ctx may already be at/near its deadline, and KillSessionCtx wraps the + // passed ctx with WithTimeout — a cancelled parent would make the kill a no-op + // and orphan the session (the same reason relay.go roots teardown in Background). + _ = KillSessionCtx(context.Background(), server, pinSession) + if err != nil { + return fmt.Errorf("read pin placeholder window: %w", err) + } + return fmt.Errorf("read pin placeholder window: pin-session %q reported no windows", pinSession) + } + placeholderID := strings.TrimSpace(placeholderLines[0]) + if err := MoveWindowToSession(windowID, pinSession, server); err != nil { + // Roll back the empty pin-session so a failed move leaves no orphan. + _ = KillSessionCtx(context.Background(), server, pinSession) + return fmt.Errorf("move window into pin session: %w", err) + } + if _, err := tmuxExecServer(ctx, server, "kill-window", "-t", placeholderID); err != nil { + // Non-fatal: a stray placeholder is cosmetic, but log it loudly. + slog.Warn("board: pin placeholder kill failed", "server", server, "pin", pinSession, "placeholder", placeholderID, "err", err) + } + + // The window now physically lives in the pin-session. From here a stamp failure + // must NOT return with the window stranded: pinEntry rejects a pin-session with + // no/invalid @rk_board (→ absent from BOARDS) and parseSessions filters _rk-pin-* + // (→ absent from SESSIONS), so a half-stamped pin is an invisible lost window. + // On any stamp failure, undo the move (window back to its home) and kill the + // pin-session — both rooted in context.Background() for the same reason as above. + // Double-fault guard: only kill the pin-session if the move-back SUCCEEDED. If the + // move-back itself fails the window is still physically inside the pin-session; + // killing it would destroy a live window. Leaving the (still-named) pin-session + // keeps the window recoverable — the pin is unpinnable by window id, and the next + // same-window Pin sees the existing session and re-stamps via the idempotent path. + rollbackMove := func(cause error, opt string) error { + if mvErr := MoveWindowToSession(windowID, home, server); mvErr != nil { + slog.Error("board: pin stamp-failure rollback move failed — leaving pin-session intact so the window survives (recoverable via unpin/re-pin)", + "server", server, "window", windowID, "home", home, "pin", pinSession, "err", mvErr) + return fmt.Errorf("set %s: %w (rollback move-back also failed: %v)", opt, cause, mvErr) + } + _ = KillSessionCtx(context.Background(), server, pinSession) + return fmt.Errorf("set %s: %w", opt, cause) + } + + // Stamp membership vars on the pin-session. + if err := setSessionOption(ctx, server, pinSession, HomeOption, home); err != nil { + return rollbackMove(err, HomeOption) + } + if err := setSessionOption(ctx, server, pinSession, BoardOption, board); err != nil { + return rollbackMove(err, BoardOption) + } + if err := setSessionOption(ctx, server, pinSession, BoardOrderOption, orderKey); err != nil { + return rollbackMove(err, BoardOrderOption) + } + return nil } -// Unpin removes the entry matching (windowID, board) on the given server. -// Idempotent: silently succeeds if the entry is not present. +// Unpin restores the pinned window to its remembered home session and removes +// the pin-session. If the home session was killed while the window was pinned, +// it is recreated with the moved window as its only window. The window is +// appended at tmux's next free index in the home session (no original-position +// restore). +// +// Idempotent: a missing pin-session is a silent success. func Unpin(ctx context.Context, server, windowID, board string) error { ctx, cancel := context.WithTimeout(ctx, TmuxTimeout) defer cancel() @@ -359,27 +391,83 @@ func Unpin(ctx context.Context, server, windowID, board string) error { if !ValidBoardName(board) { return fmt.Errorf("invalid board name") } - entries, err := ListBoardEntries(ctx, server) + pinSession, ok := PinSessionName(windowID) + if !ok { + return fmt.Errorf("invalid window id") + } + + // Idempotency: no pin-session → nothing to unpin. + if _, err := tmuxExecRawServer(ctx, server, "has-session", "-t", pinSession); err != nil { + return nil + } + + home, err := showSessionOption(ctx, server, pinSession, HomeOption) if err != nil { - return err + return fmt.Errorf("read %s: %w", HomeOption, err) } - out := entries[:0:len(entries)] - changed := false - for _, e := range entries { - if e.WindowID == windowID && e.Board == board { - changed = true - continue + + homeAlive := false + if home != "" { + if _, err := tmuxExecRawServer(ctx, server, "has-session", "-t", home); err == nil { + homeAlive = true } - out = append(out, e) } - if !changed { + + if homeAlive { + // Move the window back into the live home session (tmux appends it). + // Moving the pin-session's SOLE window out may auto-destroy the now-empty + // pin-session (tmux's default exit-empty behaviour), so a subsequent + // kill-session would report "can't find session" — which IS the desired + // end state. killPinSessionIfPresent tolerates that. + if err := MoveWindowToSession(windowID, home, server); err != nil { + return fmt.Errorf("restore window to home %q: %w", home, err) + } + return killPinSessionIfPresent(ctx, server, pinSession) + } + + // Home is gone (or was never recorded) — recreate it. Rename the pin-session + // to the home name so the moved window becomes the new home session's only + // window (no placeholder). When home is empty, fall back to keeping the + // window in a freshly named session is impossible without a name; use the + // pin-session's window via rename-session to the remembered home. + if home == "" { + // No recorded home: leave the window where it is by clearing membership + // so it is no longer a board pin, then it surfaces in SESSIONS only if + // the pin-session is renamed away. Without a target name we cannot + // restore; this should not happen (Pin always stamps @rk_home), so treat + // it as an error rather than silently stranding the window. + return fmt.Errorf("unpin: pin-session %q has no @rk_home to restore to", pinSession) + } + // Recreate home by renaming the (single-window) pin-session to the home name. + // This preserves the window as the sole window of the recreated home session + // with no placeholder, and atomically removes the `_rk-pin-*` name. + if err := RenameSession(pinSession, home, server); err != nil { + return fmt.Errorf("recreate home %q from pin session: %w", home, err) + } + // Clear the membership vars left on the now-renamed session so a future read + // does not mistake the recreated home for a pin. + _, _ = tmuxExecRawServer(ctx, server, "set-option", "-u", "-t", home, BoardOption) + _, _ = tmuxExecRawServer(ctx, server, "set-option", "-u", "-t", home, HomeOption) + _, _ = tmuxExecRawServer(ctx, server, "set-option", "-u", "-t", home, BoardOrderOption) + return nil +} + +// killPinSessionIfPresent kills the pin-session, treating an +// already-gone session ("can't find session" / "session not found") as success. +// Moving a single-window pin-session's only window out can auto-destroy the +// empty session under tmux's default exit-empty behaviour, so the explicit kill +// is best-effort cleanup, not a hard requirement. +func killPinSessionIfPresent(ctx context.Context, server, pinSession string) error { + if _, err := tmuxExecRawServer(ctx, server, "has-session", "-t", pinSession); err != nil { + // Already gone (auto-destroyed) — the desired end state. return nil } - return setBoardValue(ctx, server, out) + return KillSessionCtx(ctx, server, pinSession) } -// Reorder updates the order key of an existing entry. Returns an error if -// the entry is not found or newOrderKey is invalid. +// Reorder updates the order key of an existing pin by rewriting only its +// pin-session's @rk_board_order var. Returns an error if the pin-session does +// not exist, is not on the named board, or newOrderKey is invalid. func Reorder(ctx context.Context, server, windowID, board, newOrderKey string) error { ctx, cancel := context.WithTimeout(ctx, TmuxTimeout) defer cancel() @@ -392,57 +480,21 @@ func Reorder(ctx context.Context, server, windowID, board, newOrderKey string) e if !ValidOrderKey(newOrderKey) { return fmt.Errorf("invalid order key") } - entries, err := ListBoardEntries(ctx, server) - if err != nil { - return err - } - found := false - for i, e := range entries { - if e.WindowID == windowID && e.Board == board { - entries[i].OrderKey = newOrderKey - found = true - break - } + pinSession, ok := PinSessionName(windowID) + if !ok { + return fmt.Errorf("invalid window id") } - if !found { + if _, err := tmuxExecRawServer(ctx, server, "has-session", "-t", pinSession); err != nil { return fmt.Errorf("entry not found") } - return setBoardValue(ctx, server, entries) -} - -// RemoveAllByWindowID removes every entry whose window_id matches the -// supplied id from the named server's @rk_board, returning the list of -// board names that lost entries (deduplicated, sorted alphabetically). -// Idempotent: empty result + nil error if no entries matched. -func RemoveAllByWindowID(ctx context.Context, server, windowID string) ([]string, error) { - if !ValidWindowID(windowID) { - return nil, fmt.Errorf("invalid window id") - } - entries, err := ListBoardEntries(ctx, server) + current, err := showSessionOption(ctx, server, pinSession, BoardOption) if err != nil { - return nil, err - } - out := entries[:0:len(entries)] - boardSet := make(map[string]struct{}) - for _, e := range entries { - if e.WindowID == windowID { - boardSet[e.Board] = struct{}{} - continue - } - out = append(out, e) - } - if len(boardSet) == 0 { - return nil, nil - } - if err := setBoardValue(ctx, server, out); err != nil { - return nil, err + return err } - names := make([]string, 0, len(boardSet)) - for n := range boardSet { - names = append(names, n) + if current != board { + return fmt.Errorf("entry not found") } - sort.Strings(names) - return names, nil + return setSessionOption(ctx, server, pinSession, BoardOrderOption, newOrderKey) } // ComputeOrderKey returns a key strictly between `before` and `after` in diff --git a/app/backend/internal/tmux/board_test.go b/app/backend/internal/tmux/board_test.go index 9ba7c72f..922e2682 100644 --- a/app/backend/internal/tmux/board_test.go +++ b/app/backend/internal/tmux/board_test.go @@ -83,98 +83,6 @@ func TestValidOrderKey(t *testing.T) { } } -func TestParseBoardValue(t *testing.T) { - tests := []struct { - name string - raw string - want []BoardEntry - }{ - {"empty", "", nil}, - {"whitespace", " \n", nil}, - {"single", "@1234:main:a", []BoardEntry{ - {Server: "s", WindowID: "@1234", Board: "main", OrderKey: "a"}, - }}, - {"multiple", "@1234:main:a,@5678:main:c,@9000:deploy:b", []BoardEntry{ - {Server: "s", WindowID: "@1234", Board: "main", OrderKey: "a"}, - {Server: "s", WindowID: "@5678", Board: "main", OrderKey: "c"}, - {Server: "s", WindowID: "@9000", Board: "deploy", OrderKey: "b"}, - }}, - {"skip malformed field count", "not:a:valid:entry,@1234:main:a", []BoardEntry{ - {Server: "s", WindowID: "@1234", Board: "main", OrderKey: "a"}, - }}, - {"skip malformed window id", "1234:main:a,@5678:main:b", []BoardEntry{ - {Server: "s", WindowID: "@5678", Board: "main", OrderKey: "b"}, - }}, - {"skip malformed board", "@1234:foo,bar:a,@5678:main:b", []BoardEntry{ - // the first parses as 4 fields and is skipped on count - {Server: "s", WindowID: "@5678", Board: "main", OrderKey: "b"}, - }}, - {"skip malformed order key", "@1234:main:Z,@5678:main:b", []BoardEntry{ - {Server: "s", WindowID: "@5678", Board: "main", OrderKey: "b"}, - }}, - {"empty entries between commas", "@1234:main:a,,@5678:main:b", []BoardEntry{ - {Server: "s", WindowID: "@1234", Board: "main", OrderKey: "a"}, - {Server: "s", WindowID: "@5678", Board: "main", OrderKey: "b"}, - }}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := parseBoardValue("s", tt.raw) - if len(got) != len(tt.want) { - t.Fatalf("got %d entries, want %d (got=%v)", len(got), len(tt.want), got) - } - for i := range tt.want { - if got[i] != tt.want[i] { - t.Errorf("idx %d: got %+v, want %+v", i, got[i], tt.want[i]) - } - } - }) - } -} - -func TestSerializeBoardValue(t *testing.T) { - tests := []struct { - name string - in []BoardEntry - want string - }{ - {"empty", nil, ""}, - {"single", []BoardEntry{ - {WindowID: "@1234", Board: "main", OrderKey: "a"}, - }, "@1234:main:a"}, - {"multiple preserves order", []BoardEntry{ - {WindowID: "@1234", Board: "main", OrderKey: "a"}, - {WindowID: "@5678", Board: "deploy", OrderKey: "b"}, - }, "@1234:main:a,@5678:deploy:b"}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := serializeBoardValue(tt.in) - if got != tt.want { - t.Errorf("got %q, want %q", got, tt.want) - } - }) - } -} - -func TestRoundTripBoardValue(t *testing.T) { - in := []BoardEntry{ - {Server: "s", WindowID: "@1234", Board: "main", OrderKey: "a"}, - {Server: "s", WindowID: "@5678", Board: "deploy", OrderKey: "b"}, - {Server: "s", WindowID: "@9999", Board: "main", OrderKey: "bm"}, - } - raw := serializeBoardValue(in) - got := parseBoardValue("s", raw) - if len(got) != len(in) { - t.Fatalf("len got=%d, want=%d", len(got), len(in)) - } - for i := range in { - if got[i] != in[i] { - t.Errorf("idx %d: got %+v, want %+v", i, got[i], in[i]) - } - } -} - func TestComputeOrderKey(t *testing.T) { tests := []struct { name string @@ -256,295 +164,406 @@ func TestComputeOrderKey_InvalidInputs(t *testing.T) { } } -// withBoardTmux starts an ephemeral tmux server for board integration tests. -// Mirrors withSessionOrderTmux from tmux_test.go. +func TestPinSessionNameRoundTrip(t *testing.T) { + tests := []struct { + windowID string + wantName string + wantOK bool + }{ + {"@42", "_rk-pin-42", true}, + {"@0", "_rk-pin-0", true}, + {"@9999999", "_rk-pin-9999999", true}, + {"42", "", false}, + {"@abc", "", false}, + {"", "", false}, + } + for _, tt := range tests { + t.Run(tt.windowID, func(t *testing.T) { + name, ok := PinSessionName(tt.windowID) + if ok != tt.wantOK || name != tt.wantName { + t.Fatalf("PinSessionName(%q) = (%q, %v), want (%q, %v)", tt.windowID, name, ok, tt.wantName, tt.wantOK) + } + if !ok { + return + } + id, rok := WindowIDFromPinSession(name) + if !rok || id != tt.windowID { + t.Errorf("WindowIDFromPinSession(%q) = (%q, %v), want (%q, true)", name, id, rok, tt.windowID) + } + }) + } +} + +func TestWindowIDFromPinSession_Invalid(t *testing.T) { + for _, name := range []string{"dev", "_rk-ctl", "_rk-pin-", "_rk-pin-abc", "rk-relay-x"} { + if _, ok := WindowIDFromPinSession(name); ok { + t.Errorf("WindowIDFromPinSession(%q) = ok, want not-ok", name) + } + } +} + +// withBoardTmux starts an ephemeral tmux server with a single home session +// ("home") for board integration tests. Reuses withSessionOrderTmux's +// bootstrap, then renames the boot session to "home" so window moves have a +// stable home target. func withBoardTmux(t *testing.T) string { t.Helper() - server := withSessionOrderTmux(t) // re-use same helper + server := withSessionOrderTmux(t) + if err := RenameSession("boot", "home", server); err != nil { + t.Fatalf("rename boot->home: %v", err) + } return server } -func TestPin_AppendsAndIsIdempotent(t *testing.T) { - server := withBoardTmux(t) - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) +// createHomeWindow adds a window named `name` to the home session and returns +// its stable @N window id. +func createHomeWindow(t *testing.T, server, session, name string) string { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - - // Use a fake @ id; tmux options don't validate that the id maps to a - // live window — we test idempotency at the option level. - if err := Pin(ctx, server, "@1234", "main"); err != nil { - t.Fatalf("Pin first: %v", err) + if _, err := tmuxExecServer(ctx, server, "new-window", "-t", session, "-n", name, "-P", "-F", "#{window_id}"); err != nil { + t.Fatalf("new-window %q: %v", name, err) } - entries, err := ListBoardEntries(ctx, server) + // Resolve the id by listing windows and matching the name (the -P output is + // swallowed by tmuxExecServer line filtering in some shells; list is robust). + windows, err := ListWindows(ctx, session, server) if err != nil { - t.Fatalf("ListBoardEntries: %v", err) + t.Fatalf("list windows: %v", err) } - if len(entries) != 1 || entries[0].WindowID != "@1234" || entries[0].Board != "main" { - t.Fatalf("after first Pin got %+v", entries) + for _, w := range windows { + if w.Name == name { + return w.WindowID + } } + t.Fatalf("could not resolve window id for %q in %q", name, session) + return "" +} - // Idempotent re-pin. - if err := Pin(ctx, server, "@1234", "main"); err != nil { - t.Fatalf("Pin second: %v", err) - } - entries2, err := ListBoardEntries(ctx, server) +// windowsInSession returns the @N ids currently in a session (empty if the +// session does not exist). +func windowsInSession(t *testing.T, server, session string) []string { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + windows, err := ListWindows(ctx, session, server) if err != nil { - t.Fatalf("ListBoardEntries: %v", err) - } - if len(entries2) != 1 { - t.Errorf("expected 1 entry after idempotent re-pin, got %+v", entries2) + return nil } - if entries2[0].OrderKey != entries[0].OrderKey { - t.Errorf("order key changed on idempotent re-pin: %q -> %q", entries[0].OrderKey, entries2[0].OrderKey) + ids := make([]string, 0, len(windows)) + for _, w := range windows { + ids = append(ids, w.WindowID) } + return ids +} - // Pin a different window — should append. - if err := Pin(ctx, server, "@5678", "main"); err != nil { - t.Fatalf("Pin third: %v", err) - } - entries3, err := ListBoardEntries(ctx, server) - if err != nil { - t.Fatalf("ListBoardEntries: %v", err) - } - if len(entries3) != 2 { - t.Fatalf("expected 2 entries, got %+v", entries3) - } - // Second entry's order key must be greater than the first's. - var first, second BoardEntry - for _, e := range entries3 { - if e.WindowID == "@1234" { - first = e - } else { - second = e - } - } - if !(first.OrderKey < second.OrderKey) { - t.Errorf("expected %q < %q", first.OrderKey, second.OrderKey) - } +func hasSession(t *testing.T, server, session string) bool { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + _, err := tmuxExecRawServer(ctx, server, "has-session", "-t", session) + return err == nil } -func TestUnpin_RemovesOnlyMatching(t *testing.T) { +func TestPin_MovesWindowAndStampsVars(t *testing.T) { server := withBoardTmux(t) - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) defer cancel() - if err := Pin(ctx, server, "@1234", "main"); err != nil { - t.Fatal(err) + wid := createHomeWindow(t, server, "home", "agent") + pin, _ := PinSessionName(wid) + + if err := Pin(ctx, server, wid, "main"); err != nil { + t.Fatalf("Pin: %v", err) } - if err := Pin(ctx, server, "@1234", "deploy"); err != nil { - t.Fatal(err) + + // The window left its home session. + for _, id := range windowsInSession(t, server, "home") { + if id == wid { + t.Errorf("window %s still in home session after Pin", wid) + } } - if err := Pin(ctx, server, "@5678", "main"); err != nil { - t.Fatal(err) + // The pin-session holds exactly the moved window (no placeholder). + pinWindows := windowsInSession(t, server, pin) + if len(pinWindows) != 1 || pinWindows[0] != wid { + t.Fatalf("pin session windows = %v, want [%s] (single window, no placeholder)", pinWindows, wid) } - - if err := Unpin(ctx, server, "@1234", "main"); err != nil { - t.Fatal(err) + // Membership vars are stamped. + board, _ := showSessionOption(ctx, server, pin, BoardOption) + home, _ := showSessionOption(ctx, server, pin, HomeOption) + order, _ := showSessionOption(ctx, server, pin, BoardOrderOption) + if board != "main" || home != "home" || !ValidOrderKey(order) { + t.Errorf("vars: board=%q home=%q order=%q, want main/home/", board, home, order) } + // Derived entry matches. entries, err := ListBoardEntries(ctx, server) if err != nil { t.Fatal(err) } - if len(entries) != 2 { - t.Fatalf("expected 2 entries, got %+v", entries) + if len(entries) != 1 || entries[0].WindowID != wid || entries[0].Board != "main" { + t.Fatalf("entries = %+v, want one main pin for %s", entries, wid) } +} - // @1234:deploy and @5678:main should remain. - have := map[string]bool{} - for _, e := range entries { - have[e.WindowID+":"+e.Board] = true +func TestPin_Idempotent(t *testing.T) { + server := withBoardTmux(t) + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + + wid := createHomeWindow(t, server, "home", "agent") + pin, _ := PinSessionName(wid) + + if err := Pin(ctx, server, wid, "main"); err != nil { + t.Fatalf("Pin first: %v", err) + } + order1, _ := showSessionOption(ctx, server, pin, BoardOrderOption) + + if err := Pin(ctx, server, wid, "main"); err != nil { + t.Fatalf("Pin second (idempotent): %v", err) + } + order2, _ := showSessionOption(ctx, server, pin, BoardOrderOption) + if order1 != order2 { + t.Errorf("order key churned on idempotent re-pin: %q -> %q", order1, order2) } - if !have["@1234:deploy"] || !have["@5678:main"] { - t.Errorf("got entries %+v, want @1234:deploy and @5678:main", entries) + pinWindows := windowsInSession(t, server, pin) + if len(pinWindows) != 1 { + t.Errorf("idempotent re-pin changed pin window count: %v", pinWindows) } } -func TestUnpin_Idempotent(t *testing.T) { +func TestPin_RePinToDifferentBoardRestamps(t *testing.T) { server := withBoardTmux(t) - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) defer cancel() - // Unpin from empty — no error. - if err := Unpin(ctx, server, "@1234", "main"); err != nil { - t.Fatalf("unpin from empty: %v", err) + wid := createHomeWindow(t, server, "home", "agent") + pin, _ := PinSessionName(wid) + + if err := Pin(ctx, server, wid, "main"); err != nil { + t.Fatalf("Pin to main: %v", err) + } + // Re-pin the already-pinned window to a DIFFERENT board. This must re-stamp + // @rk_board (not silently no-op leaving it on "main"), and must not move the + // window or churn its pin-session. + if err := Pin(ctx, server, wid, "deploy"); err != nil { + t.Fatalf("Pin to deploy (re-pin): %v", err) + } + got, _ := showSessionOption(ctx, server, pin, BoardOption) + if got != "deploy" { + t.Errorf("re-pin to different board did not re-stamp @rk_board: got %q, want %q", got, "deploy") + } + pinWindows := windowsInSession(t, server, pin) + if len(pinWindows) != 1 { + t.Errorf("re-pin to different board changed pin window count: %v", pinWindows) } } -func TestReorder_UpdatesOrderKey(t *testing.T) { +func TestPin_AppendsMonotonicWithinBoard(t *testing.T) { server := withBoardTmux(t) - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) defer cancel() - if err := Pin(ctx, server, "@1234", "main"); err != nil { + w1 := createHomeWindow(t, server, "home", "a1") + w2 := createHomeWindow(t, server, "home", "a2") + if err := Pin(ctx, server, w1, "main"); err != nil { t.Fatal(err) } - if err := Reorder(ctx, server, "@1234", "main", "m"); err != nil { + if err := Pin(ctx, server, w2, "main"); err != nil { t.Fatal(err) } - entries, err := ListBoardEntries(ctx, server) + entries, err := GetBoard(ctx, "main") if err != nil { t.Fatal(err) } - if len(entries) != 1 || entries[0].OrderKey != "m" { - t.Errorf("got %+v, want order key m", entries) + var k1, k2 string + for _, e := range entries { + switch e.WindowID { + case w1: + k1 = e.OrderKey + case w2: + k2 = e.OrderKey + } + } + if k1 == "" || k2 == "" || !(k1 < k2) { + t.Errorf("expected k1 < k2, got k1=%q k2=%q", k1, k2) } } -func TestReorder_NotFound(t *testing.T) { +func TestUnpin_RestoresToLiveHome(t *testing.T) { server := withBoardTmux(t) - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) defer cancel() - err := Reorder(ctx, server, "@1234", "main", "a") - if err == nil { - t.Error("expected error for missing entry") + wid := createHomeWindow(t, server, "home", "agent") + pin, _ := PinSessionName(wid) + + if err := Pin(ctx, server, wid, "main"); err != nil { + t.Fatalf("Pin: %v", err) + } + if err := Unpin(ctx, server, wid, "main"); err != nil { + t.Fatalf("Unpin: %v", err) + } + // Pin-session is gone. + if hasSession(t, server, pin) { + t.Errorf("pin session %s survived Unpin", pin) + } + // Window is back in home. + found := false + for _, id := range windowsInSession(t, server, "home") { + if id == wid { + found = true + } + } + if !found { + t.Errorf("window %s not restored to home after Unpin", wid) } } -func TestRemoveAllByWindowID(t *testing.T) { +func TestUnpin_RecreatesDeadHome(t *testing.T) { server := withBoardTmux(t) - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) defer cancel() - if err := Pin(ctx, server, "@1234", "main"); err != nil { - t.Fatal(err) - } - if err := Pin(ctx, server, "@1234", "deploy"); err != nil { - t.Fatal(err) - } - if err := Pin(ctx, server, "@5678", "main"); err != nil { - t.Fatal(err) + // Create a dedicated home with two windows so it survives moving one out, + // then we kill it while the pin is active to exercise the recreate path. + if err := CreateSession("temp", "", server); err != nil { + t.Fatalf("create temp home: %v", err) } + wid := createHomeWindow(t, server, "temp", "agent") + pin, _ := PinSessionName(wid) - boards, err := RemoveAllByWindowID(ctx, server, "@1234") - if err != nil { - t.Fatal(err) + if err := Pin(ctx, server, wid, "main"); err != nil { + t.Fatalf("Pin: %v", err) + } + // Kill the home session while the window is pinned (home is now empty of the + // pinned window but may still hold its other window — kill the whole session). + if err := KillSession("temp", server); err != nil { + t.Fatalf("kill home: %v", err) } - if len(boards) != 2 || boards[0] != "deploy" || boards[1] != "main" { - t.Errorf("got boards %v, want [deploy main]", boards) + if hasSession(t, server, "temp") { + t.Fatalf("home session 'temp' still alive after kill") } - entries, err := ListBoardEntries(ctx, server) - if err != nil { - t.Fatal(err) + if err := Unpin(ctx, server, wid, "main"); err != nil { + t.Fatalf("Unpin (recreate home): %v", err) } - if len(entries) != 1 || entries[0].WindowID != "@5678" { - t.Errorf("after removal got %+v, want only @5678", entries) + // Home recreated with the moved window as a member; pin-session gone. + if hasSession(t, server, pin) { + t.Errorf("pin session %s survived Unpin recreate", pin) } - - // Removing again is a no-op. - boards2, err := RemoveAllByWindowID(ctx, server, "@1234") - if err != nil { - t.Fatal(err) + if !hasSession(t, server, "temp") { + t.Fatalf("home session 'temp' was not recreated") } - if len(boards2) != 0 { - t.Errorf("re-remove got %v, want empty", boards2) + ids := windowsInSession(t, server, "temp") + if len(ids) != 1 || ids[0] != wid { + t.Errorf("recreated home windows = %v, want [%s] (sole window, no placeholder)", ids, wid) + } + // The recreated home must not carry board membership vars. + if b, _ := showSessionOption(ctx, server, "temp", BoardOption); b != "" { + t.Errorf("recreated home retained @rk_board=%q", b) } } -func TestListBoardEntries_UnsetReturnsEmpty(t *testing.T) { +func TestUnpin_Idempotent(t *testing.T) { server := withBoardTmux(t) ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() - got, err := ListBoardEntries(ctx, server) - if err != nil { - t.Fatalf("err: %v", err) - } - if len(got) != 0 { - t.Errorf("got %v, want empty", got) + // Unpin a window that was never pinned — no pin-session, silent success. + if err := Unpin(ctx, server, "@9999", "main"); err != nil { + t.Fatalf("unpin of never-pinned window: %v", err) } } -func TestListBoards_AlphabeticalAggregation(t *testing.T) { +func TestReorder_RewritesOnlyOneVar(t *testing.T) { server := withBoardTmux(t) - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) defer cancel() - if err := Pin(ctx, server, "@1234", "main"); err != nil { - t.Fatal(err) - } - if err := Pin(ctx, server, "@5678", "main"); err != nil { + w1 := createHomeWindow(t, server, "home", "a1") + w2 := createHomeWindow(t, server, "home", "a2") + if err := Pin(ctx, server, w1, "main"); err != nil { t.Fatal(err) } - if err := Pin(ctx, server, "@9999", "deploy"); err != nil { + if err := Pin(ctx, server, w2, "main"); err != nil { t.Fatal(err) } + pin2, _ := PinSessionName(w2) + before2, _ := showSessionOption(ctx, server, pin2, BoardOrderOption) - // Force ListBoards to use this server only by skipping ListServers — instead - // call the helper directly. ListBoards iterates ListServers, which may - // return many servers in CI; we only validate via ListBoardEntries summary - // helpers here. Compose the summary manually. - entries, err := ListBoardEntries(ctx, server) - if err != nil { - t.Fatal(err) + if err := Reorder(ctx, server, w1, "main", "z"); err != nil { + t.Fatalf("Reorder: %v", err) } - counts := map[string]int{} - for _, e := range entries { - counts[e.Board]++ + pin1, _ := PinSessionName(w1) + after1, _ := showSessionOption(ctx, server, pin1, BoardOrderOption) + after2, _ := showSessionOption(ctx, server, pin2, BoardOrderOption) + if after1 != "z" { + t.Errorf("reordered window key = %q, want z", after1) + } + if after2 != before2 { + t.Errorf("sibling key changed: %q -> %q (no renumber expected)", before2, after2) } - if counts["main"] != 2 || counts["deploy"] != 1 { - t.Errorf("counts = %v, want main:2 deploy:1", counts) +} + +func TestReorder_NotFound(t *testing.T) { + server := withBoardTmux(t) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + if err := Reorder(ctx, server, "@9999", "main", "a"); err == nil { + t.Error("expected error for missing pin-session") } } -func TestGetBoard_DropsStaleEntries(t *testing.T) { +func TestListBoardEntries_NoPinsReturnsEmpty(t *testing.T) { server := withBoardTmux(t) ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() - // boot session has window @1 (or similar) — discover its real id. - rawIDs, err := tmuxExecRawServer(ctx, server, "list-windows", "-a", "-F", "#{window_id}") + got, err := ListBoardEntries(ctx, server) if err != nil { - t.Fatalf("list-windows: %v", err) + t.Fatalf("err: %v", err) } - ids := strings.Split(strings.TrimSpace(rawIDs), "\n") - if len(ids) == 0 || ids[0] == "" { - t.Fatal("no live windows on bootstrap session") + if len(got) != 0 { + t.Errorf("got %v, want empty", got) } - liveID := ids[0] +} - // Pin one live and one stale. - if err := Pin(ctx, server, liveID, "main"); err != nil { +func TestEmptyBoardVanishesOnLastUnpin(t *testing.T) { + server := withBoardTmux(t) + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + + wid := createHomeWindow(t, server, "home", "only") + if err := Pin(ctx, server, wid, "deploy"); err != nil { t.Fatal(err) } - if err := Pin(ctx, server, "@9999999", "main"); err != nil { + // Board exists while the pin exists (filter to our server's entries). + entries, err := ListBoardEntries(ctx, server) + if err != nil { t.Fatal(err) } - - // GetBoard runs the cleanup. We can't easily inject ListServers, so call - // ListBoardEntries afterwards on this server only and verify the stale - // entry was written back. To avoid pulling other servers into the test, - // run GetBoard, then re-read entries on the test server only. - gb, err := GetBoard(ctx, "main") - if err != nil { - t.Fatalf("GetBoard: %v", err) - } - // gb may include entries from other servers if ListServers returns more. - // We just verify the stale @9999999 is not present and the live one is. - foundLive := false - for _, e := range gb { - if e.Server == server { - if e.WindowID == "@9999999" { - t.Errorf("stale @9999999 leaked into GetBoard result") - } - if e.WindowID == liveID { - foundLive = true - } + foundDeploy := false + for _, e := range entries { + if e.Board == "deploy" { + foundDeploy = true } } - if !foundLive { - t.Errorf("live entry %s not found in GetBoard result", liveID) + if !foundDeploy { + t.Fatalf("board 'deploy' not derived while pin exists") } - // Also assert the option was rewritten. - entries, err := ListBoardEntries(ctx, server) + if err := Unpin(ctx, server, wid, "deploy"); err != nil { + t.Fatal(err) + } + // After the last unpin, no pin carries @rk_board=deploy on this server. + entries2, err := ListBoardEntries(ctx, server) if err != nil { t.Fatal(err) } - for _, e := range entries { - if e.WindowID == "@9999999" { - t.Errorf("write-back failed: stale entry still in @rk_board") + for _, e := range entries2 { + if e.Board == "deploy" { + t.Errorf("board 'deploy' still derived after last unpin: %+v", e) } } } diff --git a/app/backend/internal/tmux/reaper.go b/app/backend/internal/tmux/reaper.go index 4dca9bcf..d0f05339 100644 --- a/app/backend/internal/tmux/reaper.go +++ b/app/backend/internal/tmux/reaper.go @@ -123,8 +123,7 @@ type ReapResult struct { // // Per-entry failures are logged via slog and skipped — a single failure MUST // NOT abort the sweep. An aggregate error describing the failed entries is -// returned at the end (nil when every entry succeeded), mirroring -// sweepOrphanedRelaySessions. +// returned at the end (nil when every entry succeeded). func ReapTestServers(ctx context.Context, prefix string, act, force bool) (ReapResult, error) { if len(prefix) <= minSafePrefixLen && !force { return ReapResult{}, fmt.Errorf( diff --git a/app/backend/internal/tmux/socketsweep_test.go b/app/backend/internal/tmux/socketsweep_test.go index c4c3c2cf..23e31c65 100644 --- a/app/backend/internal/tmux/socketsweep_test.go +++ b/app/backend/internal/tmux/socketsweep_test.go @@ -157,41 +157,3 @@ func TestSweepDeadTestSockets_reapsOwnAndDeadSparesOtherLive(t *testing.T) { t.Errorf("dead-PID socket %q survived — the post-sweep must reap it", dead) } } - -// TestGetSessionOwnerPID_unsetReturnsEmpty verifies that an un-stamped session -// reads back as "" with no error — the "orphan" signal the sweep treats as -// reapable. Mirrors GetSessionOrder's unset-tolerance contract. -func TestGetSessionOwnerPID_unsetReturnsEmpty(t *testing.T) { - server := withSessionOrderTmux(t) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - got, err := GetSessionOwnerPID(ctx, server, "boot") - if err != nil { - t.Fatalf("GetSessionOwnerPID unset: %v", err) - } - if got != "" { - t.Errorf("got %q, want empty", got) - } -} - -// TestSetSessionOwnerPID_roundTrip stamps @rk_owner_pid on a session and reads -// it back verbatim — the create-side/sweep-side contract that lets the sweep -// spare a live owner and reap a dead one. -func TestSetSessionOwnerPID_roundTrip(t *testing.T) { - server := withSessionOrderTmux(t) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - want := os.Getpid() - if err := SetSessionOwnerPID(ctx, server, "boot", want); err != nil { - t.Fatalf("SetSessionOwnerPID: %v", err) - } - got, err := GetSessionOwnerPID(ctx, server, "boot") - if err != nil { - t.Fatalf("GetSessionOwnerPID: %v", err) - } - if got != strconv.Itoa(want) { - t.Errorf("owner pid round-trip: got %q, want %q", got, strconv.Itoa(want)) - } -} diff --git a/app/backend/internal/tmux/tmux.go b/app/backend/internal/tmux/tmux.go index dac5b35e..e41c9669 100644 --- a/app/backend/internal/tmux/tmux.go +++ b/app/backend/internal/tmux/tmux.go @@ -20,13 +20,6 @@ import ( // JSON-encoded sidebar session order. const SessionOrderOption = "@rk_session_order" -// OwnerPIDOption is the session-scoped user option stamped on each relay -// ephemeral with the PID of the owning `rk serve` process. The startup sweep -// reads it to distinguish a live sibling's relays (spare) from a crashed -// predecessor's orphans (reap). Session-scoped so it dies with the ephemeral -// and never bleeds onto the real session through the session group. -const OwnerPIDOption = "@rk_owner_pid" - // OriginalTMUX captures the TMUX env var before init() strips it. // Package-level var init runs before init(), so this sees the original value. // Used by cmd/rk/context.go to restore TMUX in child process environments @@ -159,18 +152,48 @@ const ( ActivityThresholdSeconds = 10 // listDelim is the tab delimiter used in tmux format strings. listDelim = "\t" - // RelaySessionPrefix is the reserved name prefix for run-kit's per-WebSocket - // ephemeral grouped sessions. Sessions matching this prefix are filtered out - // of user-facing session lists and reaped at server start. - RelaySessionPrefix = "rk-relay-" + // PinSessionPrefix is the reserved name prefix for run-kit's single-window + // board pin-sessions. Each pinned window is MOVED into its own session named + // `_rk-pin-` (the window's `@N` id with the `@` stripped, since + // tmux session names disallow `@`). Sessions matching this prefix are filtered + // out of user-facing session lists — a board is the set of pin-sessions + // sharing an `@rk_board` value, not a session itself. Pin-sessions are + // persistent across rk restarts (Constitution VI); there is no startup sweep. + PinSessionPrefix = "_rk-pin-" // ControlAnchorSessionName is the literal name of the hidden anchor session // created by the tmuxctl package on tmux servers that have zero user // sessions (a `tmux -CC attach` requires an attached session). It is - // filtered from user-facing session lists in parseSessions and is NEVER - // touched by the relay sweep — it's owned by tmuxctl, not the relay. + // filtered from user-facing session lists in parseSessions — it's owned by + // tmuxctl, not user-facing. ControlAnchorSessionName = "_rk-ctl" ) +// PinSessionName derives the single-window pin-session name for a window id by +// stripping the leading `@` (tmux session names disallow `@`): `@42` → +// `_rk-pin-42`. Returns ("", false) for an invalid window id. The mapping is +// pure and reversible (see WindowIDFromPinSession), so membership needs no +// name→id lookup table. +func PinSessionName(windowID string) (string, bool) { + if !ValidWindowID(windowID) { + return "", false + } + return PinSessionPrefix + windowID[1:], true +} + +// WindowIDFromPinSession is the inverse of PinSessionName: `_rk-pin-42` → `@42`. +// Returns ("", false) when name lacks the prefix or the recovered id is not a +// valid `@` window id. +func WindowIDFromPinSession(name string) (string, bool) { + if !strings.HasPrefix(name, PinSessionPrefix) { + return "", false + } + id := "@" + strings.TrimPrefix(name, PinSessionPrefix) + if !ValidWindowID(id) { + return "", false + } + return id, true +} + // PaneInfo describes a single tmux pane within a window. type PaneInfo struct { PaneID string `json:"paneId"` @@ -273,12 +296,15 @@ func parseSessions(lines []string) []SessionInfo { if len(parts) < 2 { continue } - // Filter run-kit's per-WebSocket ephemeral grouped sessions from every - // user-facing session list. This is the single chokepoint — every + // Filter run-kit's single-window board pin-sessions from every + // user-facing session list. A pinned window is physically MOVED into + // its `_rk-pin-*` session, so it leaves its home session's tab list; + // the pin-session itself is never a user-facing SESSIONS entry (it is + // rendered only as a BOARDS pane). This is the single chokepoint — every // consumer (REST, SSE, board derivation, server-aggregate) flows // through ListSessions/parseSessions, so a single early-skip here - // guarantees no ephemeral leaks into the UI. - if strings.HasPrefix(parts[0], RelaySessionPrefix) { + // guarantees no pin-session leaks into the SESSIONS UI. + if strings.HasPrefix(parts[0], PinSessionPrefix) { continue } // Filter the tmuxctl control-mode anchor session — owned by the @@ -341,14 +367,11 @@ func parseSessions(lines []string) []SessionInfo { return sessions } -// ListRawSessionNames returns every session name on the given server WITHOUT -// the user-facing filters applied by ListSessions (group-copy de-duplication -// and rk-relay-* exclusion). It is intended only for housekeeping callers that -// need to see every session, such as the startup sweep that reaps orphan -// rk-relay-* ephemerals from a prior crashed instance. -// -// Returns nil if the server is not running. -func ListRawSessionNames(ctx context.Context, server string) ([]string, error) { +// ListPinSessionNames returns every `_rk-pin-*` session name on the given +// server. Board membership is derived from these single-window pin-sessions and +// their session vars (`@rk_board`/`@rk_home`/`@rk_board_order`). Returns nil +// (no error) if the server is not running. Read-only. +func ListPinSessionNames(ctx context.Context, server string) ([]string, error) { ctx, cancel := context.WithTimeout(ctx, TmuxTimeout) defer cancel() @@ -360,12 +383,18 @@ func ListRawSessionNames(ctx context.Context, server string) ([]string, error) { } return nil, err } - return lines, nil + var pins []string + for _, name := range lines { + if strings.HasPrefix(name, PinSessionPrefix) { + pins = append(pins, name) + } + } + return pins, nil } // ListSessions returns sessions from the specified tmux server, -// filtering out session-group copies and run-kit's per-WebSocket ephemerals -// (RelaySessionPrefix). Returns nil if no server is running. +// filtering out session-group copies and run-kit's board pin-sessions +// (PinSessionPrefix). Returns nil if no server is running. func ListSessions(ctx context.Context, server string) ([]SessionInfo, error) { ctx, cancel := context.WithTimeout(ctx, TmuxTimeout) defer cancel() @@ -541,10 +570,11 @@ func ListWindows(ctx context.Context, session string, server string) ([]WindowIn // baseGroupName returns the user-facing base session name for a session group, // given the session's own name and its `#{session_group_list}` value (a -// comma-separated list of MEMBER NAMES). The base is the member that is neither -// an rk-relay-* ephemeral nor the _rk-ctl anchor — i.e. the real, user-facing -// session that the dashboard keys on. When the list is empty (ungrouped -// session) or yields no qualifying member, the session's own name is returned. +// comma-separated list of MEMBER NAMES). The base is the member that is not the +// _rk-ctl anchor — i.e. the real, user-facing session that the dashboard keys +// on. (Relay ephemerals no longer exist, so only the anchor is filtered.) When +// the list is empty (ungrouped session) or yields no qualifying member, the +// session's own name is returned. // // This MUST NOT key on `#{session_group}`: tmux 3.6a reports that field as an // opaque NUMERIC group id (e.g. "0"), not the leader's name — so `name == @@ -559,7 +589,7 @@ func baseGroupName(name, groupList string) string { if m == "" { continue } - if strings.HasPrefix(m, RelaySessionPrefix) || m == ControlAnchorSessionName { + if m == ControlAnchorSessionName { continue } return m @@ -569,10 +599,10 @@ func baseGroupName(name, groupList string) string { // parseSessionGroups parses `list-sessions` output of the form // `#{session_id}#{session_name}#{session_group_list}` into a -// `$sid`→base-session-name map. The rk-relay-* ephemerals and the _rk-ctl -// anchor are NOT filtered here — they share their base session's group, so their -// `$sid` must resolve to the SAME base name for an active-window event fired -// against an ephemeral member to update the correct (user-facing) group. The +// `$sid`→base-session-name map. The _rk-ctl anchor is NOT filtered here — it +// shares its base session's group, so its `$sid` must resolve to the SAME base +// name for an active-window event fired against the anchor member to update the +// correct (user-facing) group. The // group key is the base session name (via baseGroupName), NOT tmux's numeric // `#{session_group}` id, so it matches the `SessionInfo.Name` the derivation // path looks up. Lines with fewer than 3 fields are skipped. Exported @@ -671,7 +701,7 @@ func parseActiveWindowsByGroup(lines []string) map[string]string { baseSeen[base] = true continue } - // Non-base (ephemeral/anchor) row — only used as a fallback if the + // Non-base (anchor) row — only used as a fallback if the // group never produces a base-member row in this listing. if !baseSeen[base] { if _, ok := out[base]; !ok { @@ -866,42 +896,12 @@ func KillSessionCtx(ctx context.Context, server, session string) error { return err } -// NewGroupedSession creates a detached ephemeral session in the same group as -// realSession on the given tmux server using `tmux new-session -d -s -// -t `. The new session shares window membership with realSession -// but maintains independent active-window state — clients attached to it can -// navigate windows independently of clients attached to other group members. -// -// Used by the WebSocket relay to give each connection its own attach target so -// concurrent board panes targeting the same real session do not steal each -// other's active window. The returned session MUST be killed by the caller -// (typically via `defer KillSessionCtx`). -// -// The parent ctx is wrapped with TmuxTimeout consistent with sibling helpers. -// -// Returns a non-nil error if realSession does not exist on the server. tmux's -// new-session -t silently creates an empty group when the target is missing, -// which would leak a useless ephemeral; we explicitly probe with has-session -// first so the caller's defer-kill is the only path that creates ephemerals. -func NewGroupedSession(ctx context.Context, server, realSession, ephemeral string) error { - ctx, cancel := context.WithTimeout(ctx, TmuxTimeout) - defer cancel() - - if _, err := tmuxExecServer(ctx, server, "has-session", "-t", realSession); err != nil { - return fmt.Errorf("real session %q not found: %w", realSession, err) - } - _, err := tmuxExecServer(ctx, server, "new-session", "-d", "-s", ephemeral, "-t", realSession) - return err -} - -// ResolveWindowSession returns the name of the user-facing session that owns -// the window identified by windowID on the given server. Ephemeral relay -// sessions (RelaySessionPrefix) are filtered out — a window in a session group -// appears under every group member, and `display-message -t @N` may pick the -// ephemeral over the real session, which would make a fresh relay group itself -// against a dying ephemeral. Returns an error when the window ID does not exist -// in any non-ephemeral session — callers (e.g. the relay) treat that as -// "window not found". +// ResolveWindowSession returns the name of the session that owns the window +// identified by windowID on the given server — either a normal home session or +// a board pin-session (`_rk-pin-*`). Since a window lives in exactly ONE session +// (the move-based model removes window sharing), the first match is +// authoritative. Returns an error when the window ID does not exist in any +// session — callers (e.g. the relay) treat that as "window not found". func ResolveWindowSession(ctx context.Context, server, windowID string) (string, error) { lines, err := tmuxExecServer(ctx, server, "list-windows", "-a", "-F", "#{session_name}"+listDelim+"#{window_id}") if err != nil { @@ -917,9 +917,6 @@ func ResolveWindowSession(ctx context.Context, server, windowID string) (string, if id != windowID { continue } - if strings.HasPrefix(session, RelaySessionPrefix) { - continue - } if session == "" { continue } @@ -1190,44 +1187,6 @@ func UnsetSessionColor(session string, server string) error { return err } -// SetSessionOwnerPID stamps the @rk_owner_pid user option on a relay ephemeral -// session with the owning `rk serve` process PID. Session-scoped (mirrors -// SetSessionColor's `set-option -t ` pattern) so ownership lives on the -// ephemeral itself and is never inherited by the real session through the -// session group. The startup sweep reads this to spare a live sibling's relays. -func SetSessionOwnerPID(ctx context.Context, server, session string, pid int) error { - ctx, cancel := context.WithTimeout(ctx, TmuxTimeout) - defer cancel() - - _, err := tmuxExecServer(ctx, server, "set-option", "-t", session, OwnerPIDOption, strconv.Itoa(pid)) - return err -} - -// GetSessionOwnerPID reads the @rk_owner_pid user option from a session and -// returns its raw string value, or "" when the option is unset or the server is -// unreachable. Mirrors GetSessionOrder's tolerance: tmux reports an unset -// user-option as "invalid option"/"unknown option" and an absent socket as -// "no server running"/"failed to connect" — both are normal states that the -// sweep MUST treat as "no owner" (→ orphan) rather than a hard error. Other -// subprocess failures propagate so the caller can log + accumulate per server. -func GetSessionOwnerPID(ctx context.Context, server, session string) (string, error) { - ctx, cancel := context.WithTimeout(ctx, TmuxTimeout) - defer cancel() - - out, err := tmuxExecRawServer(ctx, server, "show-options", "-v", "-t", session, OwnerPIDOption) - if err != nil { - errMsg := err.Error() - if strings.Contains(errMsg, "invalid option") || - strings.Contains(errMsg, "unknown option") || - strings.Contains(errMsg, "no server running") || - strings.Contains(errMsg, "failed to connect") { - return "", nil - } - return "", fmt.Errorf("read %s on %s: %w", OwnerPIDOption, session, err) - } - return strings.TrimSpace(out), nil -} - // SetWindowColor sets the @color user option on a window by its window ID. func SetWindowColor(windowID string, color int, server string) error { ctx, cancel := withTimeout() @@ -1259,8 +1218,9 @@ func SelectWindow(windowID string, server string) error { // ":". A bare window-id target (`select-window -t @N`) is // ambiguous inside a tmux session group — group members share window membership // but keep independent active-window state, so tmux may set the active window on -// the wrong member. The relay needs the active window set on its per-WebSocket -// ephemeral specifically, so it qualifies the target with the ephemeral session. +// the wrong member. The REST window-select handler (api/windows.go handleWindowSelect) +// resolves the owning session and qualifies the target with it so the active window +// is set on the intended session, not an arbitrary group member. func SelectWindowInSession(session, windowID, server string) error { ctx, cancel := withTimeout() defer cancel() diff --git a/app/backend/internal/tmux/tmux_test.go b/app/backend/internal/tmux/tmux_test.go index 5e493804..4d1d68f3 100644 --- a/app/backend/internal/tmux/tmux_test.go +++ b/app/backend/internal/tmux/tmux_test.go @@ -178,28 +178,36 @@ func TestParseSessions(t *testing.T) { want: []SessionInfo{{Name: "alpha", Color: intPtr(4)}, {Name: "beta"}}, }, { - name: "filters rk-relay-* ephemerals from user-facing list", + name: "filters _rk-pin-* board pin-sessions from user-facing list", lines: []string{ sessionLine("agent", "0", "agent"), - sessionLine("rk-relay-deadbeef", "0", "rk-relay-deadbeef"), + sessionLine("_rk-pin-42", "0", "_rk-pin-42"), sessionLine("dev", "0", "dev"), }, want: []SessionInfo{{Name: "agent"}, {Name: "dev"}}, }, { - name: "rk-relay-* exclusion still allows group leaders to be kept", + name: "_rk-pin-* exclusion still allows group leaders to be kept", lines: []string{ sessionLineGrouped("devshell", "1", "devshell", 2), sessionLineGrouped("devshell-82", "1", "devshell", 2), - sessionLine("rk-relay-cafebabe", "0", "rk-relay-cafebabe"), + sessionLine("_rk-pin-7", "0", "_rk-pin-7"), }, want: []SessionInfo{{Name: "devshell"}}, }, { - name: "only rk-relay-* sessions present returns nil", + name: "relay ephemerals are no longer filtered (relay layer removed)", lines: []string{ - sessionLine("rk-relay-aaaa1111", "0", "rk-relay-aaaa1111"), - sessionLine("rk-relay-bbbb2222", "0", "rk-relay-bbbb2222"), + sessionLine("rk-relay-deadbeef", "0", "rk-relay-deadbeef"), + sessionLine("dev", "0", "dev"), + }, + want: []SessionInfo{{Name: "rk-relay-deadbeef"}, {Name: "dev"}}, + }, + { + name: "only _rk-pin-* sessions present returns nil", + lines: []string{ + sessionLine("_rk-pin-1", "0", "_rk-pin-1"), + sessionLine("_rk-pin-2", "0", "_rk-pin-2"), }, want: nil, }, @@ -1109,10 +1117,10 @@ func TestGetSessionOrder_invalidJSONReturnsSyntaxError(t *testing.T) { } } -// withGroupedSessionTmux starts an isolated tmux server with a "real" session -// containing two windows for the NewGroupedSession integration tests. Skips -// the test if tmux is unavailable. Returns (server, realSession). -func withGroupedSessionTmux(t *testing.T) (string, string) { +// withRealSessionTmux starts an isolated tmux server with a "real" session +// containing two windows. Skips the test if tmux is unavailable. Returns +// (server, realSession). +func withRealSessionTmux(t *testing.T) (string, string) { t.Helper() if _, err := exec.LookPath("tmux"); err != nil { t.Skip("tmux not available — skipping integration test") @@ -1126,7 +1134,6 @@ func withGroupedSessionTmux(t *testing.T) (string, string) { if out, err := cmd.CombinedOutput(); err != nil { t.Skipf("could not start isolated tmux server %q: %v\n%s", server, err, string(out)) } - // Add a second window so we can verify group window membership is shared. addCtx, cancelAdd := context.WithTimeout(context.Background(), 5*time.Second) defer cancelAdd() if out, err := exec.CommandContext(addCtx, "tmux", "-L", server, "new-window", "-t", real, "-n", "win1").CombinedOutput(); err != nil { @@ -1141,66 +1148,14 @@ func withGroupedSessionTmux(t *testing.T) (string, string) { return server, real } -func TestNewGroupedSession_success(t *testing.T) { - server, real := withGroupedSessionTmux(t) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - ephemeral := "rk-relay-test1234" - if err := NewGroupedSession(ctx, server, real, ephemeral); err != nil { - t.Fatalf("NewGroupedSession: %v", err) - } - - // Ephemeral appears in the raw session list (the user-facing ListSessions - // filters rk-relay-*, so we use the raw helper). - names, err := ListRawSessionNames(ctx, server) - if err != nil { - t.Fatalf("ListRawSessionNames: %v", err) - } - found := false - for _, n := range names { - if n == ephemeral { - found = true - break - } - } - if !found { - t.Fatalf("ephemeral %q not in raw session list: %v", ephemeral, names) - } - - // Window membership is shared with the real session. - winLines, err := tmuxExecServer(ctx, server, "list-windows", "-t", ephemeral, "-F", "#{window_index}") - if err != nil { - t.Fatalf("list-windows for ephemeral: %v", err) - } - realWinLines, err := tmuxExecServer(ctx, server, "list-windows", "-t", real, "-F", "#{window_index}") - if err != nil { - t.Fatalf("list-windows for real: %v", err) - } - if len(winLines) != len(realWinLines) { - t.Errorf("ephemeral has %d windows, real has %d (should be equal in a session group)", len(winLines), len(realWinLines)) - } - if len(winLines) < 2 { - t.Errorf("ephemeral has %d windows, expected ≥2 from real session", len(winLines)) - } -} - -// Regression: when a relay's ephemeral session is grouped with the real -// session, every window appears under both sessions. ResolveWindowSession -// must return the real (user-facing) session, not the ephemeral — otherwise -// a fresh relay groups itself against a dying ephemeral and tears down with -// it. -func TestResolveWindowSession_skipsEphemeralGroupMember(t *testing.T) { - server, real := withGroupedSessionTmux(t) +// ResolveWindowSession returns the session a window lives in. In the move-based +// model a window lives in exactly one session (home or `_rk-pin-*`), so a window +// in the real session resolves to that session. +func TestResolveWindowSession_findsOwningSession(t *testing.T) { + server, real := withRealSessionTmux(t) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - ephemeral := "rk-relay-resolve-test" - if err := NewGroupedSession(ctx, server, real, ephemeral); err != nil { - t.Fatalf("NewGroupedSession: %v", err) - } - - // Pick a window ID that exists in both group members. lines, err := tmuxExecServer(ctx, server, "list-windows", "-t", real, "-F", "#{window_id}") if err != nil || len(lines) == 0 { t.Fatalf("list-windows on real session: lines=%v err=%v", lines, err) @@ -1212,63 +1167,35 @@ func TestResolveWindowSession_skipsEphemeralGroupMember(t *testing.T) { t.Fatalf("ResolveWindowSession: %v", err) } if got != real { - t.Errorf("ResolveWindowSession(%q) = %q, want %q (ephemeral group member must be skipped)", id, got, real) + t.Errorf("ResolveWindowSession(%q) = %q, want %q", id, got, real) } } -func TestNewGroupedSession_missingRealSessionFails(t *testing.T) { - server, _ := withGroupedSessionTmux(t) +func TestKillSessionCtx_killsSession(t *testing.T) { + server, _ := withRealSessionTmux(t) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - ephemeral := "rk-relay-test5678" - if err := NewGroupedSession(ctx, server, "ghost", ephemeral); err == nil { - t.Fatal("expected error when real session does not exist, got nil") + // Create a throwaway session, then kill it via KillSessionCtx. + if _, err := tmuxExecServer(ctx, server, "new-session", "-d", "-s", "victim"); err != nil { + t.Fatalf("create victim session: %v", err) } - - // Ephemeral must NOT have been created on failure. - names, err := ListRawSessionNames(ctx, server) - if err != nil { - t.Fatalf("ListRawSessionNames: %v", err) - } - for _, n := range names { - if n == ephemeral { - t.Errorf("ephemeral %q should not exist after failed NewGroupedSession", ephemeral) - } - } -} - -func TestKillSessionCtx_killsEphemeral(t *testing.T) { - server, real := withGroupedSessionTmux(t) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - ephemeral := "rk-relay-deadbeef" - if err := NewGroupedSession(ctx, server, real, ephemeral); err != nil { - t.Fatalf("NewGroupedSession: %v", err) - } - if err := KillSessionCtx(ctx, server, ephemeral); err != nil { + if err := KillSessionCtx(ctx, server, "victim"); err != nil { t.Fatalf("KillSessionCtx: %v", err) } - names, err := ListRawSessionNames(ctx, server) - if err != nil { - t.Fatalf("ListRawSessionNames: %v", err) - } - for _, n := range names { - if n == ephemeral { - t.Errorf("ephemeral %q still present after KillSessionCtx", ephemeral) - } + if _, err := tmuxExecRawServer(ctx, server, "has-session", "-t", "victim"); err == nil { + t.Errorf("session 'victim' still present after KillSessionCtx") } } -func TestListSessions_filtersRkRelay(t *testing.T) { - server, real := withGroupedSessionTmux(t) +func TestListSessions_filtersPinSessions(t *testing.T) { + server, real := withRealSessionTmux(t) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - ephemeral := "rk-relay-feedface" - if err := NewGroupedSession(ctx, server, real, ephemeral); err != nil { - t.Fatalf("NewGroupedSession: %v", err) + // A `_rk-pin-*` session must be filtered out of the user-facing list. + if _, err := tmuxExecServer(ctx, server, "new-session", "-d", "-s", PinSessionPrefix+"42"); err != nil { + t.Fatalf("create pin session: %v", err) } got, err := ListSessions(ctx, server) @@ -1276,11 +1203,10 @@ func TestListSessions_filtersRkRelay(t *testing.T) { t.Fatalf("ListSessions: %v", err) } for _, s := range got { - if strings.HasPrefix(s.Name, RelaySessionPrefix) { - t.Errorf("ListSessions returned ephemeral %q — should be filtered", s.Name) + if strings.HasPrefix(s.Name, PinSessionPrefix) { + t.Errorf("ListSessions returned pin-session %q — should be filtered", s.Name) } } - // Real session should still be present. foundReal := false for _, s := range got { if s.Name == real { diff --git a/app/frontend/src/api/boards.ts b/app/frontend/src/api/boards.ts index 1a16b2da..385d469a 100644 --- a/app/frontend/src/api/boards.ts +++ b/app/frontend/src/api/boards.ts @@ -16,7 +16,13 @@ export interface BoardSummary { pinCount: number; } -/** A single pinned-window entry on a board, joined with live window data for rendering. */ +/** + * A single pinned-window entry on a board, joined with live window data for + * rendering. In the move-based model the pinned window has been MOVED into its + * own single-window pin-session (`_rk-pin-`) server-side, so `session` is + * that pin-session — the relay resolves it from `windowId` transparently, and + * the component does not need to know it is a pin-session. + */ export interface BoardEntry { server: string; windowId: string; @@ -33,7 +39,12 @@ export interface ReorderResponse { newOrderKey: string; } -/** GET /api/boards — aggregated across all servers, sorted by name. */ +/** + * GET /api/boards — derived from `_rk-pin-*` pin-sessions grouped by their + * `@rk_board` session var, summarized across reachable servers and sorted by + * name. Boards are server-scoped (a pinned window's session lives on one tmux + * server); a board exists only while at least one pin carries its name. + */ export async function listBoards(): Promise { const res = await deduplicatedFetch("/api/boards"); if (!res.ok) await throwOnError(res); diff --git a/app/frontend/src/hooks/use-boards.ts b/app/frontend/src/hooks/use-boards.ts index 234893c5..bbe4db84 100644 --- a/app/frontend/src/hooks/use-boards.ts +++ b/app/frontend/src/hooks/use-boards.ts @@ -23,20 +23,23 @@ interface UseBoardEntriesResult { } /** - * Subscribe to the union of board-changed events across all running tmux - * servers. Returns a function that dispatches the supplied callback when an - * event arrives. Re-subscribes when the server list changes. Reuses - * SessionProvider's EventSource pool via attachServer/subscribeBoardChange - * so we share the per-server SSE connections (boards span servers, so we - * attach all known servers) and stay under the 6-connection cap. + * Subscribe to board-changed events across all running tmux servers. Returns a + * function that dispatches the supplied callback when an event arrives. + * Re-subscribes when the server list changes. Reuses SessionProvider's + * EventSource pool via attachServer/subscribeBoardChange so we share the + * per-server SSE connections and stay under the 6-connection cap. + * + * Boards are server-scoped (a pinned window's pin-session lives on a single + * tmux server), but the board LIST is summarized across every reachable server, + * so we attach all known servers to receive each one's pin/unpin/reorder events. */ function useBoardChangedSubscription(onEvent: () => void): void { const onEventRef = useRef(onEvent); onEventRef.current = onEvent; // Use SessionProvider's EventSource pool instead of opening per-server - // connections here. We attach all known servers so cross-server - // board-changed events arrive (boards are explicitly cross-server). + // connections here. We attach all known servers so each server's + // board-changed events arrive (the board list spans servers). const { servers: ctxServers, attachServer, subscribeBoardChange } = useSessionContext(); useEffect(() => { for (const s of ctxServers) attachServer(s.name); @@ -96,7 +99,8 @@ export function useBoards(): UseBoardsResult { /** * useBoardEntries fetches and live-updates a specific board's entries. - * Subscribes to board-changed events on every server (boards span servers). + * Subscribes to board-changed events on every server (the board list spans + * servers, so a pin/unpin on any server may affect this board). */ export function useBoardEntries(name: string): UseBoardEntriesResult { const [entries, setEntries] = useState([]); diff --git a/app/frontend/tests/e2e/boards-desktop-suspend.spec.md b/app/frontend/tests/e2e/boards-desktop-suspend.spec.md index 83491eb7..8982e9d4 100644 --- a/app/frontend/tests/e2e/boards-desktop-suspend.spec.md +++ b/app/frontend/tests/e2e/boards-desktop-suspend.spec.md @@ -19,7 +19,7 @@ carousel's existing `paused={idx !== carouselIndex}` suspension. during the test. - `afterAll` POSTs `/api/boards//unpin` for each tracked entry (best-effort) so the persistent `rk-test-e2e` server doesn't carry stale - `@rk_board` entries into later runs, then kills the test session. + `_rk-pin-*` pin-sessions into later runs, then kills the test session. ## Tests diff --git a/app/frontend/tests/e2e/boards-mobile.spec.md b/app/frontend/tests/e2e/boards-mobile.spec.md index 5eca4e94..eb493158 100644 --- a/app/frontend/tests/e2e/boards-mobile.spec.md +++ b/app/frontend/tests/e2e/boards-mobile.spec.md @@ -14,7 +14,7 @@ slot. pinned during the test. - `afterAll` first POSTs `/api/boards//unpin` for each tracked entry (best-effort) so the persistent `rk-test-e2e` server doesn't carry stale - `@rk_board` entries into later runs, then kills the test session. + `_rk-pin-*` pin-sessions into later runs, then kills the test session. ## Tests diff --git a/app/frontend/tests/e2e/boards-mobile.spec.ts b/app/frontend/tests/e2e/boards-mobile.spec.ts index ca9c0f50..a9152ff6 100644 --- a/app/frontend/tests/e2e/boards-mobile.spec.ts +++ b/app/frontend/tests/e2e/boards-mobile.spec.ts @@ -28,9 +28,10 @@ test.describe("Boards: mobile carousel", () => { }); test.afterAll(async ({ request }) => { - // Unpin while the tmux server is still alive — `@rk_board` is a server - // option that survives `kill-session`, so stale entries would otherwise - // pollute the persistent `rk-test-e2e` server across runs. + // Unpin while the tmux server is still alive — each pin lives in a + // `_rk-pin-*` session that persists across restarts (and survives killing + // the source session), so stale pin-sessions would otherwise pollute the + // persistent `rk-test-e2e` server across runs. for (const entry of pinnedEntries) { try { await request.post(`/api/boards/${BOARD_NAME}/unpin`, { diff --git a/app/frontend/tests/e2e/boards-multi-server.spec.md b/app/frontend/tests/e2e/boards-multi-server.spec.md index 6efbbcd7..011013c5 100644 --- a/app/frontend/tests/e2e/boards-multi-server.spec.md +++ b/app/frontend/tests/e2e/boards-multi-server.spec.md @@ -1,7 +1,10 @@ # boards-multi-server.spec.ts -Validates that boards aggregate windows across multiple tmux servers — the -core cross-server requirement from the spec. +Validates that the board view aggregates pinned windows sharing a board name +across multiple tmux servers. In the move-based model each pinned window's +pin-session (`_rk-pin-`) lives on a single tmux server (boards are +server-scoped), but `GET /api/boards/` and the board page UNION every +pin-session carrying that `@rk_board` name across all reachable servers. ## Shared setup @@ -13,7 +16,7 @@ core cross-server requirement from the spec. pinned during the test. - `afterAll` first POSTs `/api/boards//unpin` for each tracked entry (best-effort) so the persistent `rk-test-e2e` server doesn't carry stale - `@rk_board` entries into later runs, then kills the primary session and + `_rk-pin-*` pin-sessions into later runs, then kills the primary session and the secondary tmux server entirely. ## Tests @@ -21,9 +24,9 @@ core cross-server requirement from the spec. ### `a board with windows from two servers shows the union on /board/` **What it proves:** Pinning windows from two different tmux servers to the -same board makes both windows appear on the board page — the -cross-server aggregation contract holds end-to-end through the HTTP API and -the UI render path. +same board name makes both windows appear on the board page — the +cross-server board-name aggregation contract holds end-to-end through the HTTP +API and the UI render path, even though each pin-session is server-local. **Steps:** diff --git a/app/frontend/tests/e2e/boards-multi-server.spec.ts b/app/frontend/tests/e2e/boards-multi-server.spec.ts index 6e371dd9..ca6da77b 100644 --- a/app/frontend/tests/e2e/boards-multi-server.spec.ts +++ b/app/frontend/tests/e2e/boards-multi-server.spec.ts @@ -31,9 +31,10 @@ test.describe("Boards: multi-server union", () => { }); test.afterAll(async ({ request }) => { - // Unpin while servers are still alive — `@rk_board` lives on the tmux - // server and survives `kill-session`, so without this the persistent - // `rk-test-e2e` server would carry stale entries into later runs. + // Unpin while servers are still alive — each pin lives in a `_rk-pin-*` + // session that PERSISTS across restarts (and survives killing the SOURCE + // session), so without this the persistent `rk-test-e2e` server would carry + // stale pin-sessions into later runs. for (const entry of pinnedEntries) { try { await request.post(`/api/boards/${BOARD_NAME}/unpin`, { diff --git a/app/frontend/tests/e2e/boards-same-session-multi-pane.spec.md b/app/frontend/tests/e2e/boards-same-session-multi-pane.spec.md index 04492e0d..f47f319a 100644 --- a/app/frontend/tests/e2e/boards-same-session-multi-pane.spec.md +++ b/app/frontend/tests/e2e/boards-same-session-multi-pane.spec.md @@ -1,16 +1,16 @@ # boards-same-session-multi-pane.spec.ts Validates that pinning two windows from the **same** tmux session to one board -renders each pane with its own window's content — the central regression -covered by `260508-hdjr-relay-grouped-sessions-board-panes` (PR #186 shipped -the boards feature with a relay-layer bug where every pane mirrored the same -active window). +renders each pane with its own window's content. -The bug-fix architecture: each WebSocket relay creates a per-connection -ephemeral grouped tmux session (`rk-relay-`), `select-window`s on the -ephemeral, and attaches to the ephemeral. Group members share window membership -but maintain independent active-window state, so each pane's terminal sees -only its targeted window's PTY output. +The move-based architecture (`260602-qn62-move-based-board-pin-sessions`): each +pinned window is MOVED into its own single-window pin-session (`_rk-pin-`), +and a board pane attaches its WebSocket relay DIRECTLY to that pin-session +(whose sole window is permanently active). There is no per-connection ephemeral +grouped session anymore — single-window pin-sessions remove window *sharing*, +which is what the old ephemeral isolation layer existed to work around. Two +windows from one source session therefore become two independent pin-sessions, +each with its own direct relay, so each pane sees only its own window's PTY. ## Shared setup @@ -30,8 +30,8 @@ So the original "scrape the marker text" assertion was unverifiable against the real renderer. Per-pane isolation is instead proven at the **relay layer**: each pinned window opens its own `/relay/` WebSocket and each pane mounts its own live `.xterm` instance. Two distinct relay sockets for the two -distinct window ids is the direct connection-level proof that the -grouped-ephemeral relay isolates each pane. This matches the assertion style of +distinct window ids is the direct connection-level proof that each pane attaches +to its own pin-session. This matches the assertion style of `boards-desktop-suspend.spec.ts`. ## Tests @@ -41,9 +41,9 @@ grouped-ephemeral relay isolates each pane. This matches the assertion style of **What it proves:** Pinning two distinct windows of the same tmux session into a single board produces two independent pane terminals — each mounts its own live xterm instance and opens its own per-window relay WebSocket, with no -shared/aliased socket. This is the multi-pane same-session relay-isolation -invariant restored by the grouped-session refactor, verified at the connection -layer (xterm's WebGL canvas exposes no DOM text to scrape). +shared/aliased socket. This is the multi-pane same-session isolation invariant: +each window is moved into its own pin-session and relayed directly, verified at +the connection layer (xterm's WebGL canvas exposes no DOM text to scrape). **Steps:** diff --git a/app/frontend/tests/e2e/boards-same-session-multi-pane.spec.ts b/app/frontend/tests/e2e/boards-same-session-multi-pane.spec.ts index 7d0465db..f4d22506 100644 --- a/app/frontend/tests/e2e/boards-same-session-multi-pane.spec.ts +++ b/app/frontend/tests/e2e/boards-same-session-multi-pane.spec.ts @@ -10,11 +10,13 @@ const BOARD_NAME = `mp${Date.now().toString().slice(-6)}`; // xterm renders glyphs to a WebGL canvas with NO DOM text layer (verified — // `.xterm-rows` is absent and `body.innerText()` never contains terminal // content), so the previous `innerText` assertion could never pass. Per-pane -// isolation is instead proven at the relay layer: each pinned window opens its -// OWN `/relay/` WebSocket, and each pane mounts its own live `.xterm` -// instance. Distinct relay sockets for the two distinct window ids is the -// direct connection-level proof that the grouped-ephemeral relay isolates each -// pane (matching the assertion style of boards-desktop-suspend.spec.ts). +// isolation is instead proven at the relay layer: in the move-based model each +// pinned window is MOVED into its own single-window pin-session (`_rk-pin-`) +// and a board pane attaches DIRECTLY to it, opening its OWN `/relay/` +// WebSocket and mounting its own live `.xterm`. Two windows from ONE source +// session therefore become two independent pin-sessions, each with its own +// relay socket — distinct sockets for the two window ids is the connection-level +// proof that the panes are isolated (matching boards-desktop-suspend.spec.ts). const WIN_A_MARKER = "PANE_ALPHA_OK"; const WIN_B_MARKER = "PANE_BRAVO_OK"; @@ -70,8 +72,8 @@ test.describe("Boards: same-session multi-pane", () => { expect(winB).toBeTruthy(); // Track which window ids opened a relay WebSocket. A distinct relay per - // window id is the isolation proof: two windows from ONE session each get - // their own grouped-ephemeral relay. + // window id is the isolation proof: two windows from ONE source session are + // each MOVED into their own pin-session and get their own direct relay. const relayWindowIds = new Set(); page.on("websocket", (ws) => { const wid = relayWindowId(ws.url()); diff --git a/docs/memory/run-kit/architecture.md b/docs/memory/run-kit/architecture.md index a6ef183a..467634e8 100644 --- a/docs/memory/run-kit/architecture.md +++ b/docs/memory/run-kit/architecture.md @@ -147,8 +147,8 @@ All endpoints served by the single Go binary on one port. POST-only mutations wi | `/api/settings/theme` | GET | Returns `{"theme": "..."}` — reads via `settings.Load()`. Returns `"system"` when no settings file exists. Not per-server (no `?server=` param) | | `/api/settings/theme` | POST | Accepts `{"theme": "..."}` — writes via `settings.Save()`. (Migrated PUT→POST by `260529-jad6` per §IX; body/response unchanged.) Returns `{"status": "ok"}` on success, `400` if theme is empty. Not per-server | | `/api/settings/server-color` | GET/POST | Per-server color preference. `POST` accepts `{"server":"...","color":N}` (migrated PUT→POST by `260529-jad6` per §IX; body unchanged). Not per-`?server=` | -| `/api/boards` | GET | Lists pane boards aggregated across every server returned by `tmux.ListServers(ctx)`. Returns `200 [{name, pinCount}]` (alphabetical by `name`); `[]` when empty (never `null`). Not per-server (boards span servers — no `?server=` query) | -| `/api/boards/{name}` | GET | Returns the board's entries joined with live tmux window data: `[{server, windowId, session, windowIndex, windowName, orderKey, panes}]`, sorted by `orderKey`. `{name}` is validated against `^[A-Za-z0-9_-]{1,32}$` (`400` on invalid). Stale entries (window vanished between read and join) are silently dropped. `200 []` when no entries match the name | +| `/api/boards` | GET | Lists pane boards summarized across every server returned by `tmux.ListServers(ctx)` (each board's pins are server-scoped; the list is the union of board names). Derived from `_rk-pin-*` session `@rk_board` vars (since `260602-qn62`). Returns `200 [{name, pinCount}]` (alphabetical by `name`); `[]` when empty (never `null`). Not per-server (no `?server=` query) | +| `/api/boards/{name}` | GET | Returns the board's entries (one per `_rk-pin-*` session carrying this `@rk_board`) joined with live tmux window data: `[{server, windowId, session, windowIndex, windowName, orderKey, panes}]`, sorted by `orderKey`. **Response field shape unchanged** by `260602-qn62` — `session` is now the `_rk-pin-*` pin-session, transparent to the frontend. `{name}` validated against `^[A-Za-z0-9_-]{1,32}$` (`400` on invalid). A killed pinned window's session just drops out of the live read (no stale write-back). `200 []` when no entries match the name | | `/api/boards/{name}/pin` | POST | Pin a window to a board. Body `{"server":"...","windowId":"@1234"}`. Validates `name`, `server` (via `validate.ValidateName`), and `windowId` (`^@\d+$`). Returns `404` when the named window does not exist on the named server. Idempotent — re-pinning the same window to the same board is a no-op. Returns `201 {"ok":true}` on success and broadcasts `event: board-changed { change: "pin" }` on the source server's SSE stream | | `/api/boards/{name}/unpin` | POST | Unpin a window from a board. Body `{"server":"...","windowId":"@1234"}`. Returns `200 {"ok":true}`. Tolerant of "entry not present" (also returns `200`). Empty board cannot exist — when this removes the last entry, the board vanishes from `GET /api/boards`. Broadcasts `event: board-changed { change: "unpin" }` | | `/api/boards/{name}/reorder` | POST | Reorder a pin within a board. Body `{"server":"...","windowId":"@1234","before":"@5678"\|null,"after":"@9abc"\|null}`. The new `orderKey` is computed server-side via `tmux.ComputeOrderKey(beforeKey, afterKey)`; the frontend never generates keys. Returns `200 {"ok":true,"newOrderKey":"bm"}` and broadcasts `event: board-changed { change: "reorder", orderKey }` | @@ -198,21 +198,18 @@ All API functions (except `listServers`, `createServer`, `killServer`, `getDirec WebSocket endpoint at `/relay/{windowId}?server=runkit|default` on the same port as the API — no separate relay port. Uses `gorilla/websocket` for WebSocket handling and `creack/pty` for PTY allocation. Implementation in `app/backend/api/relay.go`. The `server` query param determines which tmux server to attach to (defaults to `runkit`). The `{windowId}` is validated via `validate.ValidateWindowID` before the WS upgrade — a malformed ID responds `400` (no upgrade, no tmux call). -Per connection: -1. **Resolves the owning session from the window ID** via `s.tmux.ResolveWindowSession(ctx, server, windowID)` (a targeted `tmux [-L server] display-message -t -p '#{session_name}'` with a 5s timeout, O(1)). The grouped-session ephemeral mechanism keys off the *real session name*, so the relay must derive it from the window ID. If resolution fails or returns empty (unknown window ID), the relay closes the WebSocket with code `4004` ("Window not found") -2. Generates an ephemeral session name `rk-relay-<8 hex>` via `crypto/rand` (`newEphemeralRelayName` in `relay.go`); 4B namespace, never derived from user input (constitution I) -3. Creates a per-WebSocket grouped session via `s.tmux.NewGroupedSession(r.Context(), server, session, ephemeral)` (`tmux [-L server] new-session -d -s -t `) — sessions in the same group share window membership but maintain **independent active-window state**, so concurrent relays no longer steal each other's `select-window` -4. `defer s.tmux.KillSessionCtx(context.Background(), server, ephemeral)` reaps the ephemeral on every exit path. Cleanup uses `context.Background()` rather than `r.Context()` because the request context is already cancelled at cleanup time (the trigger for the defer) -5. Stamps `@rk_owner_pid = os.Getpid()` on the ephemeral via `s.tmux.SetSessionOwnerPID(...)` — **after** the create + cleanup-defer, **before** the select/attach below (since `260529-wtg4-isolate-relay-sweep-test-leaks`). The ordering is load-bearing: a sibling startup sweep reaps any unstamped `rk-relay-*`, so an attachable-but-unstamped relay would be wrongly killed. On stamp failure the handler aborts clean (`slog.Warn` + WS close `4001` + return; the deferred `KillSessionCtx` reaps the half-owned ephemeral) -6. Selects the target window on the **ephemeral** via `s.tmux.SelectWindow(windowID, server)` — window IDs are shared across grouped sessions, so targeting by `@id` selects the right window on the ephemeral without disturbing other clients; never selects on the real session -7. Spawns `tmux [-L server] attach-session -t ` via `creack/pty`. Attaching to the ephemeral (not the real session) is the linchpin of the fix -8. Relays I/O between WebSocket and pty (goroutine for pty→WS, main loop for WS→pty) -9. Handles resize messages (JSON `{"type":"resize","cols":N,"rows":N}`) via `pty.Setsize` -10. On disconnect: `sync.Once` cleanup cancels context, closes PTY, kills process; the deferred `KillSessionCtx` then reaps the ephemeral. PTY reader goroutine calls `cleanup()` (not `conn.Close()`) on read failure — eliminates concurrent WebSocket close race. WebSocket connection closed only by `defer conn.Close()` on the main goroutine +Per connection (since `260602-qn62-move-based-board-pin-sessions`, the relay attaches **directly** — no ephemeral): +1. **Validates the window ID** via the shared `decodeWindowID(r)` helper (`url.PathUnescape` + `validate.ValidateWindowID`) — a malformed ID is a `400` before the WS upgrade or any tmux call (the same helper the REST path uses, so they cannot drift — bug #205) +2. **Resolves the owning session from the window ID** via `s.tmux.ResolveWindowSession(ctx, server, windowID)` (5s timeout) — the result is the ONE session that owns the window: a normal home session OR a board pin-session (`_rk-pin-*`). Since the move-based model removes window sharing, the first match is authoritative. If resolution fails or returns empty (unknown window ID), the relay closes the WebSocket with code `4004` ("Window not found") +3. **Selects the window** on its real session via a direct bare `s.tmux.SelectWindow(windowID, server)` (`select-window -t @N`) so the attach renders the right window. For a pin-session this is a no-op (sole window always active); for a home session it mutates the real session's single active-window pointer — the accepted multi-client collision tradeoff +4. Waits up to 5s for the first `{"type":"resize"}` message to start the PTY at the correct dimensions (falls back to 80×24) +5. Best-effort `tmux.ReloadConfig(server)` so true-color terminal-overrides are active even on a foreign server, then spawns `tmux [-L server] attach-session -t ` via `creack/pty` (`forceTERM` sets `TERM=xterm-256color`). Attaches **directly to the resolved owning session** — no ephemeral, no defer-kill; the session is durable and owned by tmux +6. Relays I/O between WebSocket and pty (goroutine for pty→WS, main loop for WS→pty); handles resize via `pty.Setsize` +7. On disconnect: `sync.Once` cleanup cancels context, closes PTY, kills the attach process. PTY reader goroutine calls `cleanup()` (not `conn.Close()`) on read failure — eliminates concurrent WebSocket close race. WebSocket closed only by `defer conn.Close()` on the main goroutine -The grouped-session indirection fixes the central board bug — N board panes targeting different windows of the same real session now show distinct content — and as a side effect closes a latent multi-tab bug where two browser tabs viewing different windows of the same session yanked each other's active window. +**Why no ephemeral** (the `260602-qn62` simplification): the old per-WebSocket grouped ephemeral (`rk-relay-*`) existed only to give each connection its own active-window pointer (a tmux session has exactly ONE, shared across attachments) so board panes pointing at different windows of one session wouldn't collide. The move-based board model removes window *sharing* — each board pane's window now lives in its own single-window pin-session whose sole window is permanently active — so the isolation layer has no purpose and was deleted: `newEphemeralRelayName`, `tmux.NewGroupedSession`, the `@rk_owner_pid` stamp (`SetSessionOwnerPID`/`GetSessionOwnerPID`/`OwnerPIDOption`), the ephemeral `defer KillSessionCtx`, the scoped `SelectWindowInSession`-on-ephemeral, `RelaySessionPrefix`, and `ListRawSessionNames` are all gone. **Accepted tradeoffs**: multi-client navigation now mutates a shared home session's active window (two tabs / a navigating viewer yank each other); a no-op for pin-sessions. See `tmux-sessions.md` § Terminal Relay and § Pin Sessions. -**Startup sweep** (owner-PID scoped since `260529-wtg4-isolate-relay-sweep-test-leaks`): `rk serve` reaps orphan `rk-relay-*` sessions across every known tmux server before binding HTTP listeners (`sweepOrphanedRelaySessions` in `app/backend/cmd/rk/serve_sweep.go`, wired into `serveCmd.RunE` after `tmux.EnsureConfig()` and before the `ListenAndServe` goroutine, with a 30s bounded context). The sweep iterates `tmux.ListServers(ctx)` (read scope unchanged) and calls `tmux.ListRawSessionNames(ctx, server)` (the unfiltered variant — the user-facing filter would hide the very ephemerals being reaped), but reaps each `rk-relay-*` session **only** when its `@rk_owner_pid` user option is empty/unstamped or names a dead PID (`pidAlive` via `syscall.Kill(pid,0)`, leak-not-kill bias) — a live sibling's relays (e2e backend, `air` rebuild, second instance) are spared so their open terminals survive. Per-server failures are logged and accumulated without blocking startup. See `tmux-sessions.md` § Per-WebSocket Ephemeral Grouped Sessions for the full convention. +**No startup sweep** (since `260602-qn62`): `cmd/rk/serve_sweep.go` (`sweepOrphanedRelaySessions`/`pidAlive`/`relayOwnerIsDead`) is **deleted** and unwired from `serveCmd.RunE`. Relay ephemerals no longer exist, and board pin-sessions (`_rk-pin-*`) are PERSISTENT across rk restarts (a valid state, not an orphan — Constitution VI), so there is no in-server session class to reap. The `_rk-ctl` anchor + `exit-empty off` backstop are untouched. Client-side WebSocket reconnection: exponential backoff (1s, 2s, 4s, 8s, 16s, max 30s) on unexpected close. Shows `[reconnecting...]` in terminal. Re-sends resize on successful reconnect. Skips reconnect on component unmount. On close code `4004` (session/window not found): shows `[session not found]` and navigates to `/` instead of reconnecting. Terminal page connects via `ws://${location.host}/relay/${encodeURIComponent(windowId)}?server={runkit|default}` — same host, window ID (`@N`) in the path, server param from session metadata. The session name no longer appears in the relay URL. @@ -289,7 +286,7 @@ The `api` package owns the bridge between `tmuxctl.Supervisor` and the SSE hub. ### Active-Window Event Derivation (`260530-v6hm-active-window-event-derivation`) -The active-window highlight is **event-derived per session group** rather than read from the base session's `#{window_active}` pointer alone. The base pointer goes stale whenever a per-WebSocket `rk-relay-*` ephemeral (or `rk riff`, or an external `new-window`) moves the active window, because each group member keeps an independent active-window pointer (see `tmux-sessions.md` § Per-WebSocket Ephemeral Grouped Sessions); reading it produced the wrong-highlight bug. The fix corrects the *read signal* — it does not change any window creator or the ephemeral relay model. +The active-window highlight is **event-derived per session group** rather than read from the base session's `#{window_active}` pointer alone. The base pointer goes stale whenever an attachment moves the active window while another group member keeps an independent active-window pointer — historically the per-WebSocket `rk-relay-*` ephemeral, and still `rk riff` / an external `new-window`; reading the base pointer produced the wrong-highlight bug. This whole derivation subsystem (`internal/tmuxctl/*`) was scoped *investigate-only, NOT deletable* by `260602-qn62-move-based-board-pin-sessions` (it is driven by the `_rk-ctl` anchor and serves the SESSIONS highlight independent of boards) and is **left untouched** — note that `260602-qn62` removed the `rk-relay-*` ephemeral, so the relay is no longer one of the stale-pointer sources, but the subsystem's internal comments still reference relays (out of scope for that change). The fix corrects the *read signal* — it does not change any window creator. **Tracker** (`internal/tmuxctl/active_window.go`) — `ActiveWindowTracker` is a concurrency-safe, in-memory (Constitution §II) store for ONE server, owned by one `Client` (the read-loop goroutine is the sole writer; the fetch path reads). Two `sync.RWMutex`-guarded maps: `byGroup` (group name → active `@wid`, latest-event-wins via `Set`, bulk re-seeded via `SeedGroups`) and `sidGroup` (`$sid` → group, resolved via `ResolveGroup`, refreshed wholesale via `ReplaceSidGroups`). `Get`/`Snapshot` are the read accessors. @@ -311,19 +308,13 @@ The active-window highlight is **event-derived per session group** rather than r The loop body (`poll` + `waitForNext` + `selectFirst`) blocks on a `select` over (a) one `subscriber.Wait(server, perServerGen[server])` channel per active server and (b) `time.NewTimer(safetyIntervalEffective())`. When a subscriber fires, the winning server's prior generation is refreshed and the server is marked event-driven so the next iteration invalidates its 500ms `cache` entry and observes the post-mutation tmux state immediately. Non-winning servers are peeked non-blockingly so their backlog isn't replayed unnecessarily. `selectFirst` reads `timer.C` in the outer select to avoid the goroutine leak that would otherwise occur if a subscriber wins the race and the timer goroutine blocked on `timer.C` forever (`Stop` does not deliver on `C`). Heartbeat is wall-clock-based (`sseHeartbeatPeriod = 15 * time.Second`) — wall-clock cadence prevents the slower safety interval from starving heartbeats. -### Killed-Window Diff in the Snapshot Builder +### No Killed-Window Board Cleanup Diff (removed) -The board stale-entry cleanup logic moved out of the inline poll loop into a pure function called from the snapshot-build entry point: - -```go -func detectKilledWindowIDs(prev, current map[string]bool) []string -``` - -The snapshot builder maintains `prev` and `current` per server (`sseHub.previousWindowIDs`), computes the killed set on every snapshot bump (control-mode-driven OR ticker-driven), and for each killed window id calls `boardFetcher.RemoveAllByWindowID(ctx, server, windowID)` and broadcasts one `event: board-changed { change: "cleanup", server, windowId }` per affected board. `prev` is initialised from `current` on the first snapshot per server — the `hasPrev` guard in `poll` ensures the first snapshot after a Supervisor restart emits no synthetic cleanup events. The diff moved because snapshots are now event-driven; any transition-dependent logic must live at the snapshot-build site, not where snapshots used to be triggered. Without the move, board cleanup latency would track the 12s safety interval rather than the sub-500ms control-mode latency. +Since `260602-qn62-move-based-board-pin-sessions`, the SSE hub no longer detects killed windows to clean up board membership. The old `detectKilledWindowIDs(prev, current) []string` pure function, the per-server `sseHub.previousWindowIDs` tracking, the per-tick window-kill cleanup loop, and the `BoardEntriesFetcher.RemoveAllByWindowID` dependency are all **deleted**. In the move-based model board membership is derived live from `_rk-pin-*` sessions, so a killed pinned window's pin-session simply drops out of the next `ListBoardEntries` read — the frontend's refetch on the next session-list change picks it up. There is also no `@rk_board` first-poll bootstrap broadcast (`broadcastBoardBootstrap`/`boardBootstrapPayload`/`previousBoardJSON` removed) and no cached-board send on `addClient`. `BoardEntriesFetcher` is now a one-method interface (`ListBoardEntries`) kept only so tests can stub the tmux dependency; `broadcastBoardChanged` (pin/unpin/reorder) remains and the `"cleanup"`/`"bootstrap"` change strings are gone. ### `rk serve` Wiring -`cmd/rk/serve.go` instantiates the Supervisor after `tmux.EnsureConfig()` and `sweepOrphanedRelaySessions(ctx)` but BEFORE the `server.ListenAndServe()` goroutine. The sweep must run first so it does not observe the `_rk-ctl` anchor as an orphan; the supervisor must run before listen so the SSE hub never races an empty Client map for sockets that already exist on disk. The Supervisor is constructed with `api.NewHubSinkFactory()` (since `260530-v6hm` — the per-socket tracker-bound sink factory; was `api.NewHubSink()`) and started synchronously; per-socket `Open` failures (PTY unavailable, etc.) are logged inside the Supervisor and never block startup. Once `Start` returns, `apiServer.SetWindowChangeSubscriber(api.NewSupervisorSubscriber(supervisor))` wires the change-notification bridge and `apiServer.SetActiveWindowProvider(supervisor)` wires the Tier-1 active-window read path (see § Active-Window Event Derivation) — if `Start` itself fails (rare; usually fsnotify allocation), a single `slog.Warn` falls back to safety-net-only mode without aborting the server. +`cmd/rk/serve.go` instantiates the Supervisor after `tmux.EnsureConfig()` but BEFORE the `server.ListenAndServe()` goroutine, so the SSE hub never races an empty Client map for sockets that already exist on disk. (Since `260602-qn62-move-based-board-pin-sessions` there is no longer a `sweepOrphanedRelaySessions(ctx)` step between them — relay ephemerals are gone and pin-sessions are persistent, so `serve_sweep.go` was deleted and the startup ordering dropped to `EnsureConfig → Supervisor.Start → ListenAndServe`.) The Supervisor is constructed with `api.NewHubSinkFactory()` (since `260530-v6hm` — the per-socket tracker-bound sink factory; was `api.NewHubSink()`) and started synchronously; per-socket `Open` failures (PTY unavailable, etc.) are logged inside the Supervisor and never block startup. Once `Start` returns, `apiServer.SetWindowChangeSubscriber(api.NewSupervisorSubscriber(supervisor))` wires the change-notification bridge and `apiServer.SetActiveWindowProvider(supervisor)` wires the Tier-1 active-window read path (see § Active-Window Event Derivation) — if `Start` itself fails (rare; usually fsnotify allocation), a single `slog.Warn` falls back to safety-net-only mode without aborting the server. On SIGINT/SIGTERM, the shutdown handler runs `supervisor.Stop(ctx5s)` BEFORE `server.Shutdown(ctx)`. Stop errors are logged at `slog.Warn` and never block shutdown — matches PR #197's daemon-log graceful-degradation pattern. The 5s bounded teardown matches the existing `tmux.TmuxTimeout = 10s` shape (control-mode subprocesses are expected to close cleanly within half that bound). @@ -331,7 +322,7 @@ On SIGINT/SIGTERM, the shutdown handler runs `supervisor.Stop(ctx5s)` BEFORE `se - **§I (Security First)** — all subprocess calls in `tmuxctl/` use `exec.CommandContext` with explicit argument slices; no shell strings. The Client's `tmux -CC` invocation has no overall timeout because the connection is long-lived (per-call timeouts are not appropriate); the context is cancelled by `Close()` and propagates through the goroutine. - **§II (No Database)** — no file is written under `~/.run-kit/`, `~/.cache/rk/`, or anywhere else. The Supervisor's `map[socket]*Client` mirrors the kernel-observable filesystem view of tmux sockets; nothing persists across `rk serve` lifetimes. The only persistent artefact in this neighbourhood is `~/.cache/rk/daemon.log` from PR #197, unchanged by this package. -- **§VI (Tmux Sessions Survive Server Restarts)** — `tmuxctl` observes tmux state changes and pushes them to SSE clients; it never mutates *user* sessions (it creates/reads only the `_rk-ctl` anchor and sets the server-scoped `exit-empty off` option — never touching any window, pane, or user session). Since `260602-a1wo-prevent-exit-empty-server-death` the `_rk-ctl` anchor is created **on every observed server** (including the user's `default` with live sessions), as a permanent session floor — NOT, as before, only on servers with zero pre-existing sessions. This is itself the deliberate Constitution VI *prevention*, not a violation: the floor + the server-global `set-option -g exit-empty off` (set imperatively in `productionDial` before the anchor on every dial/reconnect, plus baked into the embedded conf) together stop tmux's default `exit-empty on` from reaping a server when its last real session closes and the remaining `rk-relay-*` ephemerals drain to zero — the recurring failure (`runWork`/`utils`/`kit`, ≥3x) that killed live agent sessions. The anchor is harmless on a server with real sessions: it is filtered from every user-facing list (`tmux.ControlAnchorSessionName` chokepoint in `parseSessions`), ungrouped, and the control-mode connection attaches read-only (`-r`). Managed servers now die only via explicit `kill-server` / `rk reaper`; empty anchor-only servers persist by design (no auto-reaping, no cross-process state per §II). +- **§VI (Tmux Sessions Survive Server Restarts)** — `tmuxctl` observes tmux state changes and pushes them to SSE clients; it never mutates *user* sessions (it creates/reads only the `_rk-ctl` anchor and sets the server-scoped `exit-empty off` option — never touching any window, pane, or user session). Since `260602-a1wo-prevent-exit-empty-server-death` the `_rk-ctl` anchor is created **on every observed server** (including the user's `default` with live sessions), as a permanent session floor — NOT, as before, only on servers with zero pre-existing sessions. This is itself the deliberate Constitution VI *prevention*, not a violation: the floor + the server-global `set-option -g exit-empty off` (set imperatively in `productionDial` before the anchor on every dial/reconnect, plus baked into the embedded conf) together stop tmux's default `exit-empty on` from reaping a server when its session count momentarily hits zero — the recurring failure (`runWork`/`utils`/`kit`, ≥3x) that killed live agent sessions. (Pre-`260602-qn62` this happened when the last real session closed and the remaining `rk-relay-*` ephemerals drained to zero; since `qn62` removed those ephemerals, the same backstop now also keeps an *empty home session* alive after its only window is pinned away into a `_rk-pin-*` session — pin-session persistence relies on `exit-empty off`.) The anchor is harmless on a server with real sessions: it is filtered from every user-facing list (`tmux.ControlAnchorSessionName` chokepoint in `parseSessions`), ungrouped, and the control-mode connection attaches read-only (`-r`). Managed servers now die only via explicit `kill-server` / `rk reaper`; empty anchor-only servers persist by design (no auto-reaping, no cross-process state per §II). - **No new network surface** — the Client is a local subprocess attached via PTY. `ss -tlnp` still shows only the existing HTTP listener. ## SPA Static Serving @@ -519,7 +510,7 @@ Install flow: `brew tap sahil87/tap && brew install rk`. Update: `rk update`. ## Boards Feature -Pane boards are named, cross-server collections of pinned tmux windows rendered as a horizontal pane dashboard at `/board/$name`. Storage lives in `@rk_board` (server-scoped tmux user-option) — see `tmux-sessions.md` § `@rk_board` for the full storage contract, value format, order keys, and stale-cleanup rules. +Pane boards are named collections of pinned tmux windows rendered as a horizontal pane dashboard at `/board/$name`. Since `260602-qn62-move-based-board-pin-sessions` the model is **move-based and server-scoped**: a pinned window is physically `move-window`'d into its own single-window session `_rk-pin-`, and a board is the **set of pin-sessions sharing an `@rk_board` SESSION var** (not a tmux session itself, and no longer a `@rk_board` server-option encoding). Boards are server-scoped because `move-window` can't cross tmux servers — the board *list* is summarized across reachable servers, but there is no cross-server window union. A board exists only while ≥1 pin carries its name (no empty boards, no registry). See `tmux-sessions.md` § Pin Sessions and § `@rk_board` for the full pin/unpin/reorder contract, the three session vars (`@rk_board`/`@rk_home`/`@rk_board_order`), order keys, and the direct-attach relay. ### Route Placement @@ -533,17 +524,17 @@ Pane boards are named, cross-server collections of pinned tmux windows rendered | Layer | File | Responsibility | |-------|------|---------------| -| Storage | `app/backend/internal/tmux/board.go` | `@rk_board` parser/serializer, `Pin`/`Unpin`/`Reorder`/`GetBoard`/`ListBoards`, `ComputeOrderKey` (lowercase-`a..z` fractional indexing), `RemoveAllByWindowID` (used by SSE eager cleanup), validators (`ValidBoardName`, `ValidWindowID`, `ValidOrderKey`) | -| HTTP handlers | `app/backend/api/boards.go` | `handleBoardsList`, `handleBoardGet`, `handleBoardPin`, `handleBoardUnpin`, `handleBoardReorder` — register in `router.go` `buildRouter()` | -| Interface | `app/backend/api/router.go` `TmuxOps` | `ListBoards`, `GetBoard`, `PinBoard`, `UnpinBoard`, `ReorderBoard` (production wires `prodTmuxOps` to `tmux.*`; tests use `mockTmuxOps`) | -| SSE | `app/backend/api/sse.go` | `BoardEntriesFetcher` interface (parallel to `OrderFetcher`), `previousBoardJSON` per-server cache, `broadcastBoardChanged(server, payload)`, `event: board-changed` (kebab-case, parity with `event: session-order`), bootstrap-on-first-poll, cached-snapshot send to new clients (between `session-order` and `metrics`), and window-kill detection that calls `RemoveAllByWindowID` and broadcasts one `cleanup` event per affected board | +| Storage | `app/backend/internal/tmux/board.go` | move-based `Pin`/`Unpin`/`Reorder`, `pinEntry`, `ListBoardEntries`/`ListBoards`/`GetBoard` (derived from `_rk-pin-*` session vars), `ComputeOrderKey`/`nextAppendKey` (lowercase-`a..z` fractional indexing), `BoardOption`/`HomeOption`/`BoardOrderOption` session-var keys, validators (`ValidBoardName`, `ValidWindowID`, `ValidOrderKey`). The old `@rk_board` server-option parser/serializer (`parseBoardValue`/`serializeBoardValue`/`setBoardValue`), cross-server `ListAllBoardEntries`, and `RemoveAllByWindowID` are **deleted** (since `260602-qn62`). Pin-session naming (`PinSessionPrefix`/`PinSessionName`/`WindowIDFromPinSession`/`ListPinSessionNames`) lives in `tmux.go` | +| HTTP handlers | `app/backend/api/boards.go` | `handleBoardsList`, `handleBoardGet`, `handleBoardPin`, `handleBoardUnpin`, `handleBoardReorder` — register in `router.go` `buildRouter()`; each emits its own `board-changed` SSE event | +| Interface | `app/backend/api/router.go` `TmuxOps` | `ListBoards`, `GetBoard`, `ListBoardEntries`, `PinBoard`→`tmux.Pin`, `UnpinBoard`→`tmux.Unpin`, `ReorderBoard` (uses `lookupNeighbourKeys` + `ComputeOrderKey` then `tmux.Reorder`). `260602-qn62` removed `NewGroupedSession` and `SetSessionOwnerPID` from the interface + `prodTmuxOps` + `mockTmuxOps` | +| SSE | `app/backend/api/sse.go` | `BoardEntriesFetcher` (now a one-method `ListBoardEntries` interface for test stubbing), `broadcastBoardChanged(server, payload)`, `event: board-changed` (kebab-case; `change` ∈ `pin`/`unpin`/`reorder` only). Since `260602-qn62`: NO `previousBoardJSON` cache, NO bootstrap-on-first-poll, NO cached-snapshot replay, NO window-kill `cleanup` diff — a killed pinned window's pin-session just disappears from the next live `ListBoardEntries` read | ### Constitution Alignment -- **II (No Database)** — pin state lives only in tmux server-scoped options + browser `localStorage` for view-local pane widths -- **IV (Minimal Surface Area)** — adds exactly one route (`/board/$name`); no settings or admin pages +- **II (No Database)** — pin state lives only in tmux: the `_rk-pin-*` session structure + its `@rk_board`/`@rk_home`/`@rk_board_order` session vars (a deliberate shift since `260602-qn62` from "a tiny derived server-option value" to "tmux session structure IS the record"). Browser `localStorage` holds only view-local pane widths +- **IV (Minimal Surface Area)** — adds exactly one route (`/board/$name`); `260602-qn62` net-reduced backend surface (deleted the relay ephemeral subsystem, the `@rk_board` encoding, the startup sweep, and the SSE board-cleanup/bootstrap) while keeping one rendering path (`TerminalClient`/`/relay/{windowId}`) - **V (Keyboard-First)** — pin/unpin/switch/cycle reachable via Cmd+K palette; pane focus cycle via Cmd+[/] -- **VI (Tmux Sessions Survive Server Restarts)** — pin state persists with the tmux server; rk-go restart is bridged by the SSE bootstrap; `tmux kill-server` loses the boards on that server (expected and acceptable) +- **VI (Tmux Sessions Survive Server Restarts)** — pin state persists with the tmux server across rk-go restarts, so there is NO restore-sweep (since `260602-qn62`); `tmux kill-server` loses the boards on that server (expected and acceptable). An emptied home session (its only window pinned away) persists via the `exit-empty off` backstop (`260602-a1wo`) so unpin can restore the window to it - **VII (Convention Over Configuration)** — no new env vars, no new config files; reuses the established `@rk_*` user-option pattern ## Design Decisions @@ -713,3 +704,4 @@ E2E test coverage: create/kill session via UI, SSE stream delivers real data, si | 2026-05-30 | **Active-window event-driven derivation** — the sidebar/URL active-window highlight is now derived from tmux control-mode `%session-window-changed` events per session group, fixing the stale-base-pointer wrong-highlight bug (an `rk-relay-*` ephemeral / `rk riff` / external `new-window` moves a group member's active window without moving the base session pointer the read path used). Fix is purely in the *derivation/read path* — no window creator changed, the ephemeral relay model untouched, frontend unchanged. New `internal/tmuxctl/active_window.go` `ActiveWindowTracker` (per-server, per-Client, in-memory §II; `sync.RWMutex` over `byGroup` group→`@wid` + `sidGroup` `$sid`→group; `Set`/`Get`/`ResolveGroup`/`ReplaceSidGroups`/`SeedGroups`/`Snapshot`). `api/tmuxctl_bridge.go`: `NewHubSink()` **removed**, replaced by `NewHubSinkFactory()` returning a per-socket tracker-bound `*hubSink`; `OnSessionWindowChanged` resolves `$sid`→group (O(1) cached map, unknown-sid tolerated) and records `@wid` (latest-event-wins); `OnSessionsChanged` refreshes the map; `OnConnectionEstablished` re-seeds (map + Tier-1 from current `#{window_active}` per group, because tmux does NOT replay `%session-window-changed` on a fresh `-CC` attach) — both re-seed callbacks offloaded to a goroutine to honor the EventSink non-blocking contract; injectable `listSessionGroups`/`listActiveByGroup` query seams for testing. `internal/tmuxctl/supervisor.go`: `NewSupervisor(factory SinkFactory)` (`Supervisor.sink` field removed; `nil` factory ⇒ `NoOpSink`); one tracker per socket allocated in `openSocket`; new `Supervisor.ActiveWindow(server, group) (wid, ok)` server-keyed read accessor. New tmux helpers `ListSessionGroups`/`parseSessionGroups` (`$sid`→group, ungrouped→name fallback) and `ListActiveWindowsByGroup`/`parseActiveWindowsByGroup` (group→active-`@wid`; honors only the leader row since `list-windows -a` reports `window_active=1` per group member) — `tmuxExecServer` + `context.WithTimeout` + pure-parse-split convention (§I, §VI). Two-tier derivation in `internal/sessions/sessions.go`: `FetchSessions(ctx, server, provider)` gains an `ActiveWindowProvider` (satisfied by `Supervisor`, injected via `prodSessionFetcher` in `api/router.go` + `Server.SetActiveWindowProvider`); pure `applyActiveWindow(windows, trackedWid)` — Tier 1 (tracked `@wid` authoritative, single-highlight enforced, stale-wid falls back to Tier 2) over Tier 2 (base `#{window_active}`); nil provider degrades to Tier-2-only. Wired in `cmd/rk/serve.go`: `NewSupervisor(NewHubSinkFactory())` then `SetActiveWindowProvider(supervisor)` alongside `SetWindowChangeSubscriber`. Tests: `active_window_test.go` (`-race` concurrent read/write), `tmuxctl_bridge_test.go` (event handling + stubbed re-seed), `internal/sessions/active_window_test.go` (`applyActiveWindow` tiers + single-highlight + stale-wid), `tmux_test.go` (new parsers). Frontend verified no-change (existing `app.tsx` writeback follows once `isActiveWindow` is corrected; boards pin explicit IDs). | `260530-v6hm-active-window-event-derivation` | | 2026-06-02 | **`exit-empty` server-death prevention (Constitution VI).** The `_rk-ctl` control-mode anchor is now created **unconditionally** on every server the tmuxctl Supervisor observes — a permanent session floor that keeps the count above zero so tmux's default `exit-empty on` can't reap the server. Previously `resolveBootstrap` created it only on servers empty at first connect, so servers with real sessions got no floor and collapsed once their last real session closed (relays-only → next disconnect → zero → whole-server death; killed live agent sessions, ≥3×). The floor is decoupled from the attach target (still prefers a real session; `firstSessionName` skips the anchor). `tmux.SetExitEmptyOff` sets `exit-empty off` server-globally, imperatively in `productionDial` before the anchor on every dial/reconnect (ordering closes the restart-window sliver); embedded conf sets it too. **Corrects the prior §VI compliance claim in this file**: tmuxctl now *does* create a session on servers with live user sessions (incl. `default`) — but it remains harmless (filtered from UIs via `ControlAnchorSessionName`, ungrouped, read-only `-r` attach), and `exit-empty off` is the deliberate §VI *prevention*, not a violation. Lifetime is explicit-kill-only (server dies only via `kill-server`/`rk reaper`; anchor-only servers persist by design; no cross-process state, §II). Test/e2e sockets still excluded by `isTmuxSocketCandidate`→`IsTestServerName` (no resurrection). | `260602-a1wo-prevent-exit-empty-server-death` | | 2026-06-02 | **Build-time help-dump — emit rk CLI help tree as `help/run-kit.json`** — New hidden Cobra subcommand `help-dump` in `cmd/rk/help_dump.go` (`Hidden: true`, registered in `root.go` `init()`). `captureNode` walks `rootCmd.Commands()` recursively to full depth, dropping `completion`, `help`, and any `Hidden` node at every level via `includeInDump` (self-excludes `help-dump`); leaves serialize `"commands":[]` (non-nil slice, never `null`). Per-node `name`/`path`/`short`/`usage`/`text` from `cmd.Name()`/`CommandPath()`/`Short`/`UseLine()`/`UsageString()` (raw `text`, newlines preserved). Frozen top-level contract `{tool:"rk", version, captured_at (RFC3339 UTC), schema_version:1, root:node}`, mirroring `sahil87/shll.ai` `help/wt.json`. `version` from `displayVersion()` (ldflags, never hardcoded); `captured_at` via injectable package-level `nowUTC` var + pure `buildDump(root, version, now)` builder (test seam, no global time freeze). Writes to `[output-path]` arg via `os.WriteFile` (0o644) else stdout — pure in-process introspection, no subprocess/shell-string (§I Security First). `release.yml` gains a **Publish help tree to shll.ai** step placed LAST (after the GitHub Release + Homebrew tap, so a fatal in-repo failure can't preempt the published release): `dist/rk-linux-amd64/rk help-dump help/run-kit.json` (versioned artifact) + `jq empty` validation (in-repo, fails the job on bad JSON), then a best-effort auto-merge PR (not a direct push) into `sahil87/shll.ai` via `SHLLAI_TOKEN` mirroring the Homebrew-tap token-clone pattern — skips when unchanged, and logs `::warning::` (PR left open) rather than failing the release when shll.ai is unreachable / auto-merge is disabled. rk's slice of a 7-tool "Command reference" rollout; the shll.ai site-side consumer is a separate repo (out of scope). Tests: `cmd/rk/help_dump_test.go` (shape, filtering via synthetic + real tree, self-exclusion, full-depth recursion, leaf `[]` not `null`, version-from-`displayVersion()`, injected/parseable `captured_at`). | `260602-a36m-help-dump-shll-ai` | +| 2026-06-02 | **Move-based server-scoped boards (pin sessions); relay ephemerals removed.** Boards switched from per-WebSocket `rk-relay-*` ephemeral isolation + `@rk_board` server-option encoding to a move-based pin-session model. A pinned window is `move-window`'d into its own single-window session `_rk-pin-` (helpers `PinSessionName`/`WindowIDFromPinSession` in `tmux.go`); a board = the set of pin-sessions sharing an `@rk_board` SESSION var (server-scoped), with `@rk_home` (restore) + `@rk_board_order` (fractional `ComputeOrderKey`). `board.go` rewritten: pin-session-derived `ListBoardEntries`/`ListBoards`/`GetBoard` (no write-back), move-based `Pin`/`Unpin`/`Reorder` (Pin stamps 3 vars with `context.Background()`-rooted rollback + double-fault guard; idempotent incl. wrong-board re-stamp; Unpin restores to `@rk_home` or recreates a dead home via `rename-session`, no placeholder); kept `ComputeOrderKey`/`nextAppendKey`/validators/`BoardOption` (now the session-var key); deleted `parseBoardValue`/`serializeBoardValue`/`setBoardValue`/`ListAllBoardEntries`/`RemoveAllByWindowID`. **Relay** (`api/relay.go`) attaches the PTY directly to `ResolveWindowSession`'s result (home or `_rk-pin-*`) — removed the ephemeral allocation, `@rk_owner_pid` stamp, ephemeral select/kill; `ResolveWindowSession` no longer filters `rk-relay-*`. **Deleted**: `cmd/rk/serve_sweep.go` + its `serve.go` wiring; `RelaySessionPrefix`/`OwnerPIDOption`/`NewGroupedSession`/`SetSessionOwnerPID`/`GetSessionOwnerPID`/`ListRawSessionNames` from `tmux.go`; `TmuxOps.NewGroupedSession`/`SetSessionOwnerPID`. `parseSessions` skips `_rk-pin-*`+`_rk-ctl`, NO LONGER `rk-relay-*`. **SSE** (`api/sse.go`): dropped `detectKilledWindowIDs`/`previousWindowIDs`/window-kill `board-changed {cleanup}` diff + `RemoveAllByWindowID` dep, `broadcastBoardBootstrap`/`previousBoardJSON`/first-poll bootstrap/cached-board send; only pin/unpin/reorder `board-changed` events remain (`BoardEntriesFetcher` slimmed to one method). Accepted tradeoffs: multi-client active-window collisions on a shared home session; a pinned window disappears from its home session's sidebar until unpinned. Pins persist across rk restarts → no restore-sweep. `_rk-ctl` anchor + `exit-empty off` (`260602-a1wo`) unchanged — pin-session persistence relies on `exit-empty off`; `tmuxctl/` active-window derivation left untouched (investigate-only). Frontend `boards.ts`/`use-boards.ts` doc/contract updated to server-scoped pin-session derivation; `BoardEntry` shape + `board-pane.tsx`/`board-page.tsx` structure unchanged. | `260602-qn62-move-based-board-pin-sessions` | diff --git a/docs/memory/run-kit/index.md b/docs/memory/run-kit/index.md index 3ddfadaa..cde2b9fd 100644 --- a/docs/memory/run-kit/index.md +++ b/docs/memory/run-kit/index.md @@ -3,6 +3,6 @@ | File | Description | |------|-------------| | [architecture.md](architecture.md) | System architecture, component responsibilities, data flow | -| [tmux-sessions.md](tmux-sessions.md) | Session enumeration, group filtering, relay ephemerals, window addressing, unified test-socket naming + `rk reaper`, env-gated `RK_SERVER_ALLOWLIST` test-scoping | +| [tmux-sessions.md](tmux-sessions.md) | Session enumeration, group filtering, direct-attach relay + move-based board pin-sessions (`_rk-pin-*`), window addressing, unified test-socket naming + `rk reaper`, env-gated `RK_SERVER_ALLOWLIST` test-scoping | | [ui-patterns.md](ui-patterns.md) | URL structure, three-way server route guard (view/waiting/not-found) + create-server pending lifecycle, keyboard shortcuts, component conventions | | [rk-riff.md](rk-riff.md) | `rk riff` subcommand — worktree + tmux window + Claude launcher | diff --git a/docs/memory/run-kit/tmux-sessions.md b/docs/memory/run-kit/tmux-sessions.md index c3df0517..819a2543 100644 --- a/docs/memory/run-kit/tmux-sessions.md +++ b/docs/memory/run-kit/tmux-sessions.md @@ -65,55 +65,76 @@ Grouped sessions share the same windows, so displaying both is incorrect — it `session_grouped=1` for ALL members of a group — including the primary session. You cannot simply filter out `grouped=1` sessions without also losing the primaries. The `name === group` check distinguishes primaries from copies. -## Per-WebSocket Ephemeral Grouped Sessions (`rk-relay-*`) +## Terminal Relay — Direct Attach (no ephemeral) -The terminal relay (`app/backend/api/relay.go`) creates one ephemeral grouped session per WebSocket connection. tmux session groups share window membership but maintain **independent active-window state**, so this is the natural unit of isolation for clients that must navigate windows independently — particularly board panes pointing at different windows of the same real session. +Since `260602-qn62-move-based-board-pin-sessions`, the per-WebSocket ephemeral grouped session (`rk-relay-*`) is **gone**. The relay (`app/backend/api/relay.go`) attaches the PTY **directly** to the real session that owns the window. The ephemeral existed solely to give each WebSocket its own active-window pointer (a tmux session has exactly ONE, shared across all attachments) so board panes pointing at different windows of the same session wouldn't collide. The move-based board model removes window *sharing* — each board pane's window now lives in its own single-window pin-session whose sole window is permanently active — so the isolation layer has no remaining purpose and was deleted wholesale. -**Naming convention**: `rk-relay-<8 hex>`. The prefix is exported as `tmux.RelaySessionPrefix = "rk-relay-"` and is reserved by run-kit. The 8-hex suffix is read from `crypto/rand` (constitution I — never derived from user input). 4B namespace, collision-free at any realistic scale. +**What was removed** (all gone — do NOT reference as live): `tmux.RelaySessionPrefix = "rk-relay-"`, `newEphemeralRelayName` (the `crypto/rand` 8-hex generator), `tmux.NewGroupedSession`, the `@rk_owner_pid` ownership stamp + `tmux.SetSessionOwnerPID`/`GetSessionOwnerPID` + `tmux.OwnerPIDOption`, the ephemeral `defer KillSessionCtx`, the scoped `SelectWindowInSession`-on-ephemeral, `tmux.ListRawSessionNames`, and the startup `sweepOrphanedRelaySessions` (whole file `cmd/rk/serve_sweep.go` deleted). `parseSessions` **no longer skips** `rk-relay-*`. -**Creation**: `tmux.NewGroupedSession(ctx, server, realSession, ephemeral)` runs `tmux [-L server] new-session -d -s -t `. It first probes `has-session -t ` because tmux's `new-session -t` silently creates an empty group when the target is missing — the explicit probe ensures the caller's `defer KillSessionCtx` is the only path that creates ephemerals. The wrapper applies `context.WithTimeout(ctx, TmuxTimeout)` consistent with sibling helpers. +**`handleRelay` flow now**: `decodeWindowID(r)` validates the `@N` path param (400 before upgrade), `serverFromRequest(r)` picks the server, then `s.tmux.ResolveWindowSession(ctx, server, windowID)` (5s timeout) resolves the owning session — either a normal home session or a `_rk-pin-*` board pin-session. A missing window closes the socket with `4004`. The relay then does a direct `s.tmux.SelectWindow(windowID, server)` (bare `select-window -t @N`) on the real session and `attach-session -t ` via `creack/pty`. There is no defer-kill — the session is durable and owned by tmux. -**Lifecycle**: bound to the WebSocket via `defer s.tmux.KillSessionCtx(context.Background(), server, ephemeral)` in the relay handler. Cleanup uses `context.Background()` rather than `r.Context()` because the request context is already cancelled at cleanup time (the trigger for the defer). The defer is placed before `pty.StartWithSize` so a PTY-start failure still reaps the ephemeral. +**Accepted behavioural tradeoffs** (the user explicitly accepted these — `260602-qn62`): the real (home) session has a single active-window pointer shared across attachments, so multi-client navigation now mutates the real session's active window (two tabs on different windows of one session yank each other; a viewer navigating moves the real pointer). For a pin-session this is a no-op — its sole window is permanently active. These were the precise behaviours the ephemeral previously masked. -**Relay ownership stamp** (since `260529-wtg4-isolate-relay-sweep-test-leaks`): each relay is stamped with `@rk_owner_pid = os.Getpid()` (the owning `rk serve` PID) via session-scoped `tmux.SetSessionOwnerPID(ctx, server, ephemeral, pid)` — `set-option -t @rk_owner_pid `, mirroring `SetSessionColor`'s `set-option -t @opt` pattern; the option name is the exported const `tmux.OwnerPIDOption = "@rk_owner_pid"`. Session-scoped so it dies with the ephemeral and never bleeds onto the real session through the group. **Stamp ordering is load-bearing**: the stamp happens AFTER `NewGroupedSession` succeeds and the `defer KillSessionCtx` is registered, but BEFORE `SelectWindowInSession` makes the ephemeral attachable. A sibling sweep reaps any `rk-relay-*` whose `@rk_owner_pid` is empty (treated as an orphan), so an attachable-but-unstamped relay (owner=="") is indistinguishable from an orphan and would be wrongly reaped by the next sweep; stamping before attach guarantees the only unstamped relays a sweep can see are genuine orphans. **On stamp failure the handler aborts clean**: `slog.Warn`, write a WebSocket close with the `4001` relay-allocation close code, and `return` — the already-registered `defer KillSessionCtx` reaps the half-owned ephemeral. Keeping an unstampable relay open is a false promise (the next sweep would drop it), so abort-clean mirrors every other setup-step failure in `handleRelay`. +**Filter at the chokepoint**: `parseSessions` in `internal/tmux/tmux.go` early-skips `_rk-pin-*` sessions and the `_rk-ctl` anchor (it NO LONGER skips `rk-relay-*` — those don't exist). See § Pin Sessions and § parseSessions Filter Chokepoint below. -**Filter at the chokepoint**: `parseSessions` in `internal/tmux/tmux.go` early-skips any line whose `#{session_name}` starts with `RelaySessionPrefix`. This is the single chokepoint — every user-facing list (REST `/api/sessions`, SSE `sessions` event, board derivation in `api/boards.go`, server-aggregate `/api/servers`) flows through `ListSessions`, so a single early-skip here guarantees ephemerals never leak into the UI regardless of future consumers (multi-server SessionProvider included). +**Active-window highlight is event-derived per group** (since `260530-v6hm-active-window-event-derivation`, unchanged by `260602-qn62`): the sidebar/URL highlight is derived from tmux control-mode `%session-window-changed` events tracked per session group (two-tier: event-tracked `@wid` authoritative, base `#{window_active}` only as a cold-start/reconnect fallback). The `internal/tmuxctl/*` derivation subsystem was scoped *investigate-only, NOT deletable* by `260602-qn62` (it is driven by the `_rk-ctl` anchor and serves the SESSIONS highlight independent of boards) and is **left untouched** — its internal comments still mention relays, which is out of scope. The `$sid`→group resolution it relies on still does NOT filter `_rk-ctl` (the anchor must resolve to its base group). Design in `architecture.md` § Active-Window Event Derivation. -**Startup sweep** (owner-PID scoped since `260529-wtg4-isolate-relay-sweep-test-leaks`): `sweepOrphanedRelaySessions(ctx)` in `app/backend/cmd/rk/serve_sweep.go` reaps orphan `rk-relay-*` ephemerals left by a crashed predecessor. Wired into `serveCmd.RunE` after `tmux.EnsureConfig()` and before the goroutine that calls `server.ListenAndServe()`, with a 30s bounded context. The sweep iterates `tmux.ListServers(ctx)` (read scope unchanged) and calls `tmux.ListRawSessionNames(ctx, server)` (the unfiltered variant — the user-facing filter would hide the very ephemerals being reaped). For each `rk-relay-*` session it now reads `@rk_owner_pid` via `tmux.GetSessionOwnerPID` and reaps (`tmux.KillSessionCtx`) **only** when the owner is empty/unstamped (`relayOwnerIsDead("")→true`) OR names a dead PID. Liveness is `pidAlive(pid)` via `syscall.Kill(pid, 0)`: `nil`→alive (spare), `ESRCH`→dead (reap), `EPERM`→alive (spare), any other error→alive (spare) — a leak-not-kill bias. A non-integer owner is malformed and treated as orphan (reap) defensively. +## Pin Sessions (`_rk-pin-*`) — Move-Based Board Membership -*Why owner-PID scoping*: the old blanket reap killed **every** `rk-relay-*` it found across all servers, with no notion of ownership. Because the sweep runs at every `rk serve` startup, launching the e2e backend, an `air` rebuild under `just dev`, or any second `rk serve` would drop a **live** dashboard's open terminal WebSockets (relay ephemerals) even though the underlying real session survived. PID-ownership scoping spares a live sibling's relays while still reaping genuine orphans whose owner has actually exited. +Since `260602-qn62-move-based-board-pin-sessions`, a window pinned to a board is physically **moved** (`tmux move-window`, not `link-window`) into its **own single-window session** named `_rk-pin-`. The core invariant: **every window lives in exactly ONE session at a time** — either its home session (visible in SESSIONS) or a pin-session (visible only as a BOARDS pane), never both. A "board" is **not a tmux session** — it is the *set of pin-sessions sharing an `@rk_board` value*. -Per-server failures (list, owner-read, or kill) are logged and accumulated into the aggregate error — they MUST NOT abort the sweep or block startup. The sweep still matches only the fixed `rk-relay-` prefix; user sessions and daemon sessions (`rk` on `rk-daemon`) are never touched (constitution VI), and the `_rk-ctl` anchor is excluded by both the prefix guard and an explicit `name == tmux.ControlAnchorSessionName` guard. +**Reserved naming + reversible helpers** (`internal/tmux/tmux.go`): `tmux.PinSessionPrefix = "_rk-pin-"`. The name is derived deterministically from the window's `@N` id by stripping the leading `@` (tmux session names disallow `@`): `PinSessionName("@42") → ("_rk-pin-42", true)`; the inverse `WindowIDFromPinSession("_rk-pin-42") → ("@42", true)`. Both validate via `ValidWindowID`. The pure, reversible mapping means membership needs **no** name→id lookup table. `ListPinSessionNames(ctx, server)` runs `list-sessions -F '#{session_name}'` and returns every `_rk-pin-*` name (nil, no error, when no server runs) — board membership reads flow through it. -*Read-side stays global*: only the destructive relay reap is scoped. `tmux.ListServers` is unchanged — every real tmux server (including leaked test sockets) stays visible in the UI by design, so this class of leak stays observable rather than masked. (Since `260530-cf3g`, `/api/servers` surfaces *every* server — its test-socket hide was deleted — and the old `IsGoTestServerName` allowlist is gone, replaced by `IsTestServerName`; see § `/api/servers` Lists Every Server and § Unified Test-Socket Naming.) +**Membership = session-scoped vars on the pin-session** (`internal/tmux/board.go`), read via `show-options -v -t `: -*Single-uid socket model / EPERM rationale*: `ListServers` scans only `/tmp/tmux-{os.Getuid()}/`, so every server the sweep touches belongs to the current user; a foreign user's tmux server lives under a different uid's socket dir and is invisible to the sweep. EPERM is therefore not an expected owner state — relays are stamped with the same-user `rk serve` PID — and is spared as the benign leak-not-kill direction (a recycled/foreign PID survives one extra cycle rather than wrongly killing a live relay). Note `pidAlive` (sweep, EPERM→spare) intentionally differs from `daemon_portowner.go:processAlive` (EPERM→dead, where a forceful SIGTERM/SIGKILL makes erring toward "dead" the safe direction); both biases are documented at their sites. +| Var (constant) | Meaning | +|----------------|---------| +| `@rk_board` (`tmux.BoardOption`) | which board this pinned window belongs to | +| `@rk_home` (`tmux.HomeOption`) | the home session to restore the window to on unpin | +| `@rk_board_order` (`tmux.BoardOrderOption`) | fractional order key within the board (`ComputeOrderKey`) | -**Why `ListRawSessionNames` exists**: the housekeeping caller (sweep) needs to *see* the ephemerals that the user-facing path *hides*. `ListRawSessionNames(ctx, server)` runs `list-sessions -F '#{session_name}'` and returns every name without applying the group-copy de-duplication or the `rk-relay-*` exclusion that `ListSessions`/`parseSessions` applies. It returns `nil` when no tmux server is running on the socket. Treat it as an internal escape hatch — no other callers should use it. +> **Deliberate retention**: `BoardOption` (`@rk_board`) survives `260602-qn62` — but its meaning changed from a SERVER-option key (the old comma/colon encoding) to a per-pin-session SESSION-var key. `ComputeOrderKey` and `nextAppendKey` also survive (ordering); only the bespoke server-option *encoding* helpers were deleted. -**Relay resolves the owning session from the window ID** (since `260529-chgz-window-id-routing`): the WebSocket URL is now `/relay/{windowId}?server={server}` (was `/relay/{session}/{window}`). Because the grouped ephemeral keys off the *real session name*, the relay calls `ResolveWindowSession(ctx, server, windowID)` (a targeted `display-message -t -p '#{session_name}'`, 5s timeout) before `NewGroupedSession`. A malformed window ID is rejected with `400` before the WS upgrade; an unknown ID closes the socket with `4004`. The `select-window` on the ephemeral now targets the window ID directly (`@N` is shared across grouped sessions). Ephemeral names remain purely backend-internal — the frontend never sees an `rk-relay-*` name in URLs, request bodies, response payloads, or SSE frames. +**`Pin(ctx, server, windowID, board)`**: validates `windowID`/`board` (Constitution §I); idempotent — if `_rk-pin-` already exists it is a no-op when the board matches, and **re-stamps `@rk_board`** when it differs (wrong-board re-pin must not silently leave the window on its old board, since the pin-session is the only authoritative place membership lives). Otherwise: resolve the home session via `ResolveWindowSession`; compute the append order key (`nextAppendKey` over the board's existing keys, restricted to this board, computed BEFORE the move); `new-session -d -s _rk-pin-` (starts with a placeholder window); capture the placeholder's `#{window_id}` via `list-windows` (robust to base-index config, not assumed index 0); `move-window` the target window in; `kill-window` the captured placeholder so the moved window is the session's sole window; then stamp `@rk_home`/`@rk_board`/`@rk_board_order`. +- **Rollback (load-bearing)**: a half-stamped pin is an invisible lost window — `pinEntry` rejects a pin-session with no/invalid `@rk_board` (→ absent from BOARDS) and `parseSessions` filters `_rk-pin-*` (→ absent from SESSIONS). On any post-move `set-option` failure the move is undone (`MoveWindowToSession` back to home) and the pin-session killed. **Double-fault guard**: the pin-session is killed only if the move-back SUCCEEDED — if move-back itself fails the window is still physically inside the pin-session, so killing it would destroy a live window; the (still-named) pin-session is left intact so the window stays recoverable via unpin/re-pin. +- **`context.Background()` for teardown**: all rollback/teardown `KillSessionCtx` calls AND the rollback `MoveWindowToSession` are rooted in `context.Background()`, NOT Pin's ctx — Pin's ctx may be at/near its deadline, and a cancelled parent would make the kill a no-op and orphan the session (same reasoning the old relay teardown used). -**Active-window highlight is event-derived per group** (since `260530-v6hm-active-window-event-derivation`): the independent per-member active-window pointer that makes ephemerals the right isolation unit is also why the sidebar/URL highlight used to lag — when a relay ephemeral (or `rk riff`, or an external `new-window`) moved the active window, the *base* session's `#{window_active}` pointer that the read path consumed stayed stale. The highlight is now derived from tmux control-mode `%session-window-changed` events tracked per session group (two-tier: event-tracked `@wid` authoritative, base `#{window_active}` only as a cold-start/reconnect fallback), so the correct window highlights regardless of which group member moved it. The fix lives entirely in the tmuxctl→SSE derivation path — this relay/ephemeral model is unchanged. The `$sid`→group resolution that backs it relies on `rk-relay-*` ephemerals and the `_rk-ctl` anchor sharing their base session's `#{session_group}`, so `tmux.ListSessionGroups` deliberately does NOT filter them. Design in `architecture.md` § Active-Window Event Derivation. +**`Unpin(ctx, server, windowID, board)`**: idempotent — a missing pin-session is a silent success. Reads `@rk_home`; if that home session is alive, `move-window` the window back into it (tmux **appends** at the next free index — no original-position restore) and `killPinSessionIfPresent` (moving a pin-session's sole window out may auto-destroy the now-empty pin-session under tmux's exit-empty, so the explicit kill tolerates an already-gone session). If the home is dead, it is recreated by `rename-session`-ing the single-window pin-session to the home name (so the moved window becomes the recreated home's sole window — **no placeholder**), then the leftover `@rk_board`/`@rk_home`/`@rk_board_order` vars are unset on the recreated home so a future read doesn't mistake it for a pin. A pin-session with no recorded `@rk_home` is an error (Pin always stamps it; this should not happen). + +**Multi-unpin** appends each window in unpin order; there is no stored original index. **Pinning a home's only window** leaves an empty home session that persists via `exit-empty off` (the `260602-a1wo` backstop); unpin later finds it via `@rk_home` and moves the window back. **Pins are persistent across rk restarts** (durable user intent; tmux survives restarts per Constitution VI) → there is **NO restore-sweep** (and no relay sweep — both gone). + +**`Reorder(ctx, server, windowID, board, newOrderKey)`**: rewrites exactly the target pin-session's `@rk_board_order` var (via `setSessionOption`); errors if the pin-session is absent or its `@rk_board` ≠ `board`. No sibling renumber. The new key is computed server-side by `ReorderBoard` in `router.go` via `lookupNeighbourKeys` + `ComputeOrderKey`. + +**Board listing** (all server-scoped — `move-window` can't cross tmux servers): `ListBoardEntries(ctx, server)` enumerates `_rk-pin-*` sessions on one server and derives `[]BoardEntry` via `pinEntry` (a session with no/invalid `@rk_board` is defensively skipped, not an error). `ListBoards(ctx)` iterates `ListServers(ctx)`, groups entries by `@rk_board`, returns an alphabetical `[]BoardSummary` with per-board pin counts. `GetBoard(ctx, name)` filters entries to one board across reachable servers, sorts by order key, with **NO stale write-back** — membership is derived live, so a killed pinned window's session simply disappears from the listing. A board exists only while ≥1 pin-session carries its name (**no empty boards, no name registry**); the last unpin makes the board vanish. + +## parseSessions Filter Chokepoint + +`parseSessions` in `internal/tmux/tmux.go` is the single chokepoint feeding every user-facing session list — REST `/api/sessions`, SSE `event: sessions`, board derivation, server-aggregate `/api/servers`. It early-skips two name classes: + +1. **`_rk-pin-*`** (`strings.HasPrefix(name, PinSessionPrefix)`) — a pinned window is physically MOVED into its pin-session, so it leaves its home session's tab list; the pin-session is never a SESSIONS entry (it renders only as a BOARDS pane). A single early-skip here guarantees no pin-session leaks into the SESSIONS UI regardless of future consumers. +2. **`_rk-ctl`** (`name == ControlAnchorSessionName`) — the tmuxctl control-mode anchor (unchanged). + +Since `260602-qn62-move-based-board-pin-sessions`, `parseSessions` **NO LONGER skips `rk-relay-*`** — those ephemerals were deleted (the relay attaches directly; see § Terminal Relay). A `rk-relay-xxxx` name, if one somehow appeared, would NOT be filtered. ## `_rk-ctl` Anchor Session (tmuxctl control mode) `app/backend/internal/tmuxctl/` opens a long-running `tmux -CC` control-mode connection per tmux server (one Client per socket; see `architecture.md` § tmux Control-Mode Subscription for the package-level design). `tmux -CC` requires an attached session to emit notifications, so the Client creates a hidden anchor session named `_rk-ctl`. -Since `260602-a1wo-prevent-exit-empty-server-death`, the anchor is created **unconditionally on every observed server** — it is a permanent **session floor**, not a fallback. Earlier it was created only when the server had zero pre-existing sessions; that left a server that had real sessions at attach time with **no floor**, so when its last real session later closed and the remaining `rk-relay-*` ephemerals drained to zero, tmux's default `exit-empty on` reaped the whole server (a recurring Constitution VI violation — `runWork`/`utils`/`kit`, ≥3x). The floor now keeps the session count above zero for the server's lifetime regardless of how many real sessions exist. **The floor is decoupled from the control-mode attach target** — see § Anchored Target Form below. (Test/e2e sockets are still excluded from the control-mode candidate set entirely by `isTmuxSocketCandidate`→`IsTestServerName` in the supervisor, so no anchor is ever created on a leaked `rk-test-*` socket and the always-create path cannot resurrect them — see `architecture.md` § `IsTestServerName`.) +Since `260602-a1wo-prevent-exit-empty-server-death`, the anchor is created **unconditionally on every observed server** — it is a permanent **session floor**, not a fallback. Earlier it was created only when the server had zero pre-existing sessions; that left a server that had real sessions at attach time with **no floor**, so when its last real session later closed (and, pre-`260602-qn62`, the remaining `rk-relay-*` ephemerals drained to zero), tmux's default `exit-empty on` reaped the whole server (a recurring Constitution VI violation — `runWork`/`utils`/`kit`, ≥3x). The floor now keeps the session count above zero for the server's lifetime regardless of how many real sessions exist. (Since `260602-qn62` removed the relay ephemerals, the floor + `exit-empty off` is also what keeps an empty home session alive after its only window is pinned away — pin-session persistence relies on this backstop.) **The floor is decoupled from the control-mode attach target** — see § Anchored Target Form below. (Test/e2e sockets are still excluded from the control-mode candidate set entirely by `isTmuxSocketCandidate`→`IsTestServerName` in the supervisor, so no anchor is ever created on a leaked `rk-test-*` socket and the always-create path cannot resurrect them — see `architecture.md` § `IsTestServerName`.) -**Naming convention**: literal name `_rk-ctl`. The leading underscore follows the project's internal-entity convention. The single exported constant `tmux.ControlAnchorSessionName = "_rk-ctl"` lives alongside `tmux.RelaySessionPrefix` in `app/backend/internal/tmux/tmux.go` (it is a literal name rather than a prefix because there is exactly one anchor per tmux server). It is the single source of truth for the literal — `tmuxctl.Client`, `parseSessions`, and `sweepOrphanedRelaySessions` all reference the constant. +**Naming convention**: literal name `_rk-ctl`. The leading underscore follows the project's internal-entity convention. The single exported constant `tmux.ControlAnchorSessionName = "_rk-ctl"` lives alongside `tmux.PinSessionPrefix` in `app/backend/internal/tmux/tmux.go` (it is a literal name rather than a prefix because there is exactly one anchor per tmux server; since `260602-qn62` removed `RelaySessionPrefix`, `PinSessionPrefix` is its neighbour). It is the single source of truth for the literal — `tmuxctl.Client` and `parseSessions` reference the constant (the relay sweep that also referenced it was deleted with `serve_sweep.go`). **Keepalive tag**: immediately after `new-session -d`, the Client runs `tmux set-option -t =_rk-ctl @rk_ctl_keepalive 1` (the option name is exported as `tmuxctl.AnchorKeepaliveOption = "@rk_ctl_keepalive"`). The tag is a defensive marker only — v1 has no runtime consumer, but the marker exists so future code can identify the anchor without depending on the literal name. `set-option` is idempotent and safe to re-run when another `rk serve` instance created the anchor first. **Anchored target form**: the Client invokes `tmux [-L ] -CC attach-session -t = -r` when at least one real user session exists, else `-t =_rk-ctl -r`. The `=` prefix forces exact-match targeting and prevents prefix-match collisions, consistent with the convention established in PR #196 (daemon detection's `=rk-daemon:=serve`). The `-r` flag puts the connection in **read-only mode** — restricts input only; notifications still emit. Defensive default — future refactors that accidentally wire commands through the control-mode connection cannot mutate tmux state. -**Floor vs. attach target are decoupled** (since `260602-a1wo-prevent-exit-empty-server-death`). `resolveBootstrap` (`client.go`) does two separate things on every dial: (1) **always** `createAnchor` + `setAnchorKeepalive` (the floor — R1), then (2) pick the attach target — the first real session if one exists, else `_rk-ctl` (R2). The attach target is the first name returned by `firstSessionName`, which runs `tmux [-L ] list-sessions -F '#{session_name}'` and now **skips `_rk-ctl`** so the always-present anchor is never picked when a real session exists (`_rk-ctl` sorts ahead of a lowercase name like `runkit`, so an unskipped listing would wrongly select it and regress the "prefer a real session" contract). `rk-relay-*` ephemerals are NOT skipped by `firstSessionName` — they share their base session's window membership and are valid attach targets. Attaching control mode to `_rk-ctl` would also be correct (`%session-window-changed` is global on tmux 3.6a — see [[tmux-control-mode-event-scope]]); preferring a real session is a minimal-diff, zero-event-scope-risk choice, not a correctness requirement. +**Floor vs. attach target are decoupled** (since `260602-a1wo-prevent-exit-empty-server-death`). `resolveBootstrap` (`client.go`) does two separate things on every dial: (1) **always** `createAnchor` + `setAnchorKeepalive` (the floor — R1), then (2) pick the attach target — the first real session if one exists, else `_rk-ctl` (R2). The attach target is the first name returned by `firstSessionName`, which runs `tmux [-L ] list-sessions -F '#{session_name}'` and now **skips `_rk-ctl`** so the always-present anchor is never picked when a real session exists (`_rk-ctl` sorts ahead of a lowercase name like `runkit`, so an unskipped listing would wrongly select it and regress the "prefer a real session" contract). (Pre-`260602-qn62`, `rk-relay-*` ephemerals were valid attach targets and not skipped; they no longer exist. `_rk-pin-*` pin-sessions are single-window board sessions — `firstSessionName` is in the untouched tmuxctl path, so whether it prefers or skips them is out of scope for `260602-qn62`.) Attaching control mode to `_rk-ctl` would also be correct (`%session-window-changed` is global on tmux 3.6a — see [[tmux-control-mode-event-scope]]); preferring a real session is a minimal-diff, zero-event-scope-risk choice, not a correctness requirement. **Concurrent-rk race**: when two `rk serve` instances open a Client against the same fresh tmux server, only one `new-session -d` call succeeds; the loser gets tmux's "duplicate session" error. `tmuxctl.isDuplicateSessionError` treats this as benign and the loser proceeds to attach — multi-rk is supported by construction since `tmux -CC attach` is independent per client. -**Filtering at the chokepoint**: `parseSessions` in `app/backend/internal/tmux/tmux.go` early-skips any line whose `#{session_name}` equals `ControlAnchorSessionName` — parallel to the existing `RelaySessionPrefix` skip. Single chokepoint — every user-facing list (REST `/api/sessions`, SSE `event: sessions`, board derivation, server-aggregate `/api/servers`) flows through `ListSessions`, so the anchor never leaks into the UI. +**Filtering at the chokepoint**: `parseSessions` in `app/backend/internal/tmux/tmux.go` early-skips any line whose `#{session_name}` equals `ControlAnchorSessionName` — parallel to the `_rk-pin-*` pin-session skip (since `260602-qn62`; the old `RelaySessionPrefix` skip was removed with the relay ephemerals). Single chokepoint — every user-facing list (REST `/api/sessions`, SSE `event: sessions`, board derivation, server-aggregate `/api/servers`) flows through `ListSessions`, so the anchor never leaks into the UI. -**Sweep exclusion**: `sweepOrphanedRelaySessions(ctx)` in `app/backend/cmd/rk/serve_sweep.go` only ever considers `rk-relay-`-prefixed sessions for reaping (and only reaps those whose `@rk_owner_pid` is dead/absent — see § Per-WebSocket Ephemeral Grouped Sessions > Startup sweep), so the anchor is excluded by construction. A defense-in-depth `name == tmux.ControlAnchorSessionName` guard is in place anyway — the anchor is owned by `tmuxctl`, not the relay, and must never be reaped by the relay sweep. `cmd/rk/serve.go` orders startup as `EnsureConfig → sweepOrphanedRelaySessions → tmuxctl.Supervisor.Start → server.ListenAndServe`, so the sweep runs before any anchor has been created — but the explicit guard protects against future re-orderings. +**No relay sweep to exclude from** (since `260602-qn62`): the startup `sweepOrphanedRelaySessions` and its file `cmd/rk/serve_sweep.go` are deleted, so there is no longer a sweep that could observe the anchor as an orphan. `cmd/rk/serve.go` now orders startup as `EnsureConfig → tmuxctl.Supervisor.Start → server.ListenAndServe` (no sweep step). The `rk reaper` operator command still hard-skips `_rk-ctl` (see § rk reaper). **Lifecycle**: the anchor outlives any single `Client` invocation. `Close()` does not delete it — closing the PTY only ends the subscription; the detached session lives on. This is intentional: subsequent `rk serve` invocations re-attach to the existing anchor rather than re-creating it. The anchor only disappears when the entire tmux server is killed. @@ -135,15 +156,14 @@ All tmux functions accept a `server string` parameter: - `ListSessions(server)` — queries only the specified server - `ListWindows(session, server)` — lists windows for a session on the specified server -- `SelectWindow(windowID, server)` — selects a window on the specified server by its stable window ID via a bare `select-window -t @N` (`@N` is a self-contained `-t` target; no session:index string — since `260529-chgz-window-id-routing`). **Caveat (since `260529-jad6`)**: a bare target is ambiguous inside a tmux session group (members share window membership but keep independent active-window state), so both the REST `/select` handler and the relay now use the session-scoped `SelectWindowInSession(session, windowID, server)` (`select-window -t :@N`) instead — `SelectWindow` has no production callers left -- `SelectWindowInSession(session, windowID, server)` — session-scoped select (`select-window -t :@N`). REST `/select` resolves the owning session via `ResolveWindowSession` then calls this; the relay scopes the select to its per-WebSocket ephemeral. The scoped target is what disambiguates which group member gets the active window -- `ResolveWindowSession(ctx, server, windowID)` — returns the owning session name for a window ID via a targeted `display-message -t -p '#{session_name}'` (O(1)); used by the relay (to build the grouped ephemeral), the REST `/select` handler (session-scoped select), and `ProjectRoot`. Errors (or empty result) mean "window not found" +- `SelectWindow(windowID, server)` — selects a window on the specified server by its stable window ID via a bare `select-window -t @N` (`@N` is a self-contained `-t` target; no session:index string — since `260529-chgz-window-id-routing`). Since `260602-qn62` the **relay uses this bare select directly** on the resolved real session (the ephemeral that needed the scoped variant is gone). The REST `/select` handler still uses the session-scoped `SelectWindowInSession` to disambiguate inside session groups +- `SelectWindowInSession(session, windowID, server)` — session-scoped select (`select-window -t :@N`). REST `/select` resolves the owning session via `ResolveWindowSession` then calls this. (Pre-`260602-qn62` the relay also used it to scope the select to its per-WebSocket ephemeral; that path is gone, so REST `/select` is now its sole caller) +- `ResolveWindowSession(ctx, server, windowID)` — returns the owning session name for a window ID via `list-windows -a -F '#{session_name}\t#{window_id}'`, returning the first matching session. Since `260602-qn62` a window lives in exactly ONE session, so the first match is authoritative — the result is a normal home session OR a `_rk-pin-*` pin-session (the function no longer filters `rk-relay-*`). Used by the relay (direct attach), `Pin` (to remember the home), the REST `/select` handler, and `ProjectRoot`. Errors (or empty result) mean "window not found" +- `ListPinSessionNames(ctx, server)` — returns every `_rk-pin-*` session name via `list-sessions -F '#{session_name}'`; board-membership reads (`ListBoardEntries`) flow through it. Returns nil (no error) when no server runs. **Replaces** the deleted `ListRawSessionNames` as the only "raw session-name listing" helper, but it is scoped to pin-sessions, not a general escape hatch - `CreateSession(name, cwd, server)` — creates sessions on the specified server - `ReloadConfig(server)` — hot-reloads config via `source-file` on the specified server - `KillSession(session, server)` — kills the named session on the specified server (thin `context.Background()` wrapper around `KillSessionCtx`) -- `KillSessionCtx(ctx, server, session)` — ctx-accepting variant; relay handler cleanup passes `context.Background()` so the kill survives request-context cancellation (the trigger for the defer) -- `NewGroupedSession(ctx, server, realSession, ephemeral)` — creates `rk-relay-*` ephemeral via `new-session -d -s -t `; probes `has-session -t ` first to avoid leaking an empty-group session when the target is missing -- `ListRawSessionNames(ctx, server)` — unfiltered `list-sessions -F '#{session_name}'`; bypasses `parseSessions`'s group-copy and `rk-relay-*` filters. Reserved for housekeeping callers (the startup sweep). Returns nil when no server is running +- `KillSessionCtx(ctx, server, session)` — ctx-accepting variant; `Pin`'s rollback/teardown passes `context.Background()` so the kill survives a near-deadline Pin ctx (an expired parent would make the kill a no-op and orphan the session) - `SendKeys(windowID, keys, server)` — sends keys to the window identified by `windowID` on the specified server - `MoveWindow(windowID, dstIndex, server)` — reorders a window within its own session. Source addressed by stable `windowID`; destination remains positional. Resolves the source's current index from the ID exactly once (via `resolveWindowSessionIndex`, a `display-message -p '#{session_name}\t#{window_index}'` lookup), then (since `260529-jad6`) emits the full adjacent-`swap-window` bubble sequence as a **single `\;`-chained tmux invocation** rather than one subprocess per step — so no other mutation can interleave mid-reorder (insert-before semantics). tmux preserves the window's ID across the swaps - `MoveWindowToSession(windowID, dstSession, server)` — moves a window from its current session to another on the specified server via `tmux move-window -s {windowID} -t {dstSession}:` (window-ID source, session destination). Destination index is auto-assigned by tmux; the window's ID is preserved (tmux contract) @@ -159,7 +179,7 @@ Server management endpoints: ## Server-Scoped User Options -tmux distinguishes window-scoped (`-w`) options from server-scoped (`-s`) options. We use both: window-scoped for per-window state (`@color`, `@rk_type`, `@rk_url`) and server-scoped for state that belongs to the tmux server as a whole. +tmux distinguishes window-scoped (`-w`) options, server-scoped (`-s`) options, and session-scoped user options (the default — `set-option -t `). We use all three: window-scoped for per-window state (`@color`, `@rk_type`, `@rk_url`), server-scoped for state belonging to the tmux server as a whole (`@rk_session_order`), and session-scoped on `_rk-pin-*` pin-sessions for board membership (`@rk_board`/`@rk_home`/`@rk_board_order`, since `260602-qn62`) and on `_rk-ctl` for the control-mode keepalive marker. | Option | Scope | Set via | Read via | Owner | |--------|-------|---------|----------|-------| @@ -167,63 +187,46 @@ tmux distinguishes window-scoped (`-w`) options from server-scoped (`-s`) option | `@rk_type` | window (`-w`) | `CreateWindowWithOptions`, `tmux.SetWindowOptions` (both via `appendOptionOps`; since `260529-jad6`) | `ListWindows` format string field 9 | per-window (iframe) | | `@rk_url` | window (`-w`) | `CreateWindowWithOptions`, `tmux.SetWindowOptions` (both via `appendOptionOps`; since `260529-jad6`) | `ListWindows` format string field 10 | per-window (iframe) | | `@rk_session_order` | server (`-s`) | `tmux.SetSessionOrder(ctx, server, order)` | `tmux.GetSessionOrder(ctx, server)` | sidebar reorder | -| `@rk_board` | server (`-s`) | `tmux.Pin` / `tmux.Unpin` / `tmux.Reorder` | `tmux.ListBoardEntries(ctx, server)` | pane boards (cross-server union) | +| `@rk_board` | session-scoped on each `_rk-pin-*` (set via `set-option -t `) | `tmux.Pin` / `tmux.Reorder` (re-stamp on wrong-board re-pin) | `tmux.ListBoardEntries(ctx, server)` (per-pin `show-options -v`) | board membership (which board; const `tmux.BoardOption`) | +| `@rk_home` | session-scoped on each `_rk-pin-*` | `tmux.Pin` (stamped at pin time) | `tmux.Unpin` (restore target) | board pin restore-to-home (const `tmux.HomeOption`) | +| `@rk_board_order` | session-scoped on each `_rk-pin-*` | `tmux.Pin` (append key) / `tmux.Reorder` | `tmux.ListBoardEntries` / `tmux.GetBoard` (sort) | board pin fractional order (const `tmux.BoardOrderOption`; via `ComputeOrderKey`) | | `@rk_ctl_keepalive` | session-scoped on `_rk-ctl` (set via `set-option -t =_rk-ctl`) | `tmuxctl.Client.setAnchorKeepalive` | (no runtime consumer; defensive marker) | tmuxctl control-mode anchor | -| `@rk_owner_pid` | session-scoped on each `rk-relay-*` ephemeral (set via `set-option -t `) | `tmux.SetSessionOwnerPID` (const `tmux.OwnerPIDOption`) | `tmux.GetSessionOwnerPID` | relay ownership (startup sweep liveness check) | `@rk_session_order` stores a JSON-encoded array of session names defining the user-preferred sidebar render order. Because the value is server-scoped, it is shared by every client connected to the same tmux server — laptop and phone hitting the same `tmux -L runkit` see the same order. Lifetime matches the tmux server (lost on server kill, NOT on rk-go restart per Constitution VI). Both wrapper functions wrap their context with `context.WithTimeout(ctx, TmuxTimeout)` (10s) and route through `tmuxExecRawServer` (which captures stderr in error messages so callers can pattern-match "invalid option" / "no server running" to distinguish operational empty-state from real failures). The HTTP endpoints `GET /api/sessions/order` and `POST /api/sessions/order` (migrated PUT→POST by `260529-jad6` per §IX; see `architecture.md` § Endpoints) layer over these wrappers. The mutating POST triggers a synchronous SSE broadcast (`event: session-order`) so all connected clients on that server reorder live; the SSE hub also bootstraps the cache once per server on first poll so the order survives an rk-go restart that left tmux running. -### `@rk_board` — Pane Board Membership +### `@rk_board` — Board Membership (pin-session SESSION var) -`@rk_board` stores the per-server portion of pane-board memberships. **Each tmux server stores memberships only for its own windows** — there is no central registry. The aggregate "boards" set is derived by reading `@rk_board` from every server discovered via `tmux.ListServers(ctx)` and unioning the entries. Boards are **derived from membership**: there is no separate `@rk_boards` registry option, and empty boards cannot exist (a board materializes on first pin and vanishes on last unpin). +Since `260602-qn62-move-based-board-pin-sessions`, `@rk_board` is **no longer a server-option comma/colon encoding**. It is a **session-scoped var on each `_rk-pin-*` pin-session** holding just the board name. Board membership is derived entirely from the set of pin-sessions and their vars — a **board is the set of pin-sessions sharing an `@rk_board` value**. There is no `@rk_boards` registry; **empty boards cannot exist** (a board appears when its first pin is created and vanishes when its last pin is unpinned). Boards are **server-scoped** — `move-window` can't cross tmux servers, so a pin-session always lives on its window's server; the board *list* is summarized across reachable servers but no cross-server window union exists. -**Value format**: comma-separated entries, each entry colon-separated `::`. Empty value or unset option is treated as zero entries (no error). Example: +**Deleted with this change** (the old bespoke serialization — do NOT reference as live): the `@rk_board` server-option value format `::`, `parseBoardValue`, `serializeBoardValue`, `setBoardValue`, `ListAllBoardEntries` (cross-server union), `nextAppendKey`'s old behaviour (the helper survives, see below), `RemoveAllByWindowID`, the `boardEntrySep`/`boardFieldSep` separators, and the `GetBoard` lazy stale write-back. The `BoardOption` constant survives as the `@rk_board` SESSION-var key (repurposed in place — see § Pin Sessions for the deliberate retention). -``` -runkit: @rk_board = "@1234:main:a,@5678:main:c,@9abc:deploy:a" -default: @rk_board = "@def0:main:b" -``` - -This reconstructs board `main` as `[@1234@runkit:a, @def0@default:b, @5678@runkit:c]` (sorted by `order_key`). +**Validators** (unchanged): board name `^[A-Za-z0-9_-]{1,32}$` (`ValidBoardName`), window id `^@\d+$` (`ValidWindowID`), order key `^[a-z]{1,16}$` (`ValidOrderKey`). -**Field separators are reserved**: `,` and `:` MUST NOT appear in board names. The board-name regex `^[A-Za-z0-9_-]{1,32}$` enforces this at validation time. `window_id` matches tmux's `#{window_id}` form (`^@\d+$`). `order_key` is `^[a-z]{1,16}$` (lowercase ASCII only). - -**Read pattern** — parallel `show-options -s -v @rk_board` across `ListServers()`, then union and tag each entry with its source server. The tmux pkg exposes: +**The board API surface** (`internal/tmux/board.go`): | Function | Purpose | |----------|---------| -| `tmux.ListBoardEntries(ctx, server)` | per-server entries (unset/no-server/invalid-option ⇒ `([]BoardEntry{}, nil)`) | -| `tmux.ListAllBoardEntries(ctx)` | aggregate across `ListServers()` | -| `tmux.ListBoards(ctx)` | distinct board names + pin counts (alphabetical) | -| `tmux.GetBoard(ctx, name)` | entries for one board, sorted by `OrderKey`, with lazy stale cleanup | -| `tmux.Pin(ctx, server, windowID, board)` | append-or-noop (idempotent re-pin) | -| `tmux.Unpin(ctx, server, windowID, board)` | remove `(windowID, board)` only; tolerant of missing entries | -| `tmux.Reorder(ctx, server, windowID, board, newKey)` | rewrite the matching entry's `orderKey` | -| `tmux.RemoveAllByWindowID(ctx, server, windowID)` | drop every entry for a window-id; returns the affected board names (used by the SSE hub for window-kill cleanup) | -| `tmux.ComputeOrderKey(before, after)` | fractional indexing helper | - -All wrappers route through `tmuxExecRawServer` and wrap their context with `context.WithTimeout(ctx, TmuxTimeout)` (10s). Reads treat `invalid option`, `unknown option`, `no server running`, and `failed to connect` as the empty-entries case (mirrors `GetSessionOrder`). Malformed entries inside a well-formed value are silently skipped with `slog.Warn` — the well-formed entries are still returned. - -**Lexicographic / fractional order keys**: cross-server ordering is achieved without renumbering via Figma/Linear-style fractional indexing. `ComputeOrderKey(before, after)` returns a string strictly greater than `before` and strictly less than `after` in lexicographic order: - -- `(null, "b")` → `"a"` (prepend) -- `("c", null)` → `"d"` (append) -- `("b", "c")` → `"bm"` (insert) -- `("b", "bm")` → `"bg"` (insert between adjacent suffixes) - -The algorithm is pure Go (no external deps). Inserts MUST NOT renumber existing entries. +| `pinEntry(ctx, server, pinSession)` | derive one `BoardEntry` by reading the pin-session's `@rk_board`/`@rk_board_order` vars; a session with no/invalid `@rk_board` is defensively skipped (`ok=false`, no error) | +| `ListBoardEntries(ctx, server)` | per-server entries from `ListPinSessionNames` → `pinEntry` (no-server/absent-option ⇒ `([]BoardEntry{}, nil)`) | +| `ListBoards(ctx)` | iterate `ListServers`, group by `@rk_board`, alphabetical `[]BoardSummary` with per-board pin counts | +| `GetBoard(ctx, name)` | entries for one board across reachable servers, sorted by `OrderKey`, **no write-back** (membership is live; killed pins just disappear) | +| `Pin(ctx, server, windowID, board)` | move-window into `_rk-pin-` + stamp 3 vars; idempotent (re-stamp `@rk_board` on wrong-board re-pin); rollback rooted in `context.Background()` (see § Pin Sessions) | +| `Unpin(ctx, server, windowID, board)` | restore window to `@rk_home` (recreate dead home via rename-session, no placeholder) + kill pin-session; idempotent on missing pin-session | +| `Reorder(ctx, server, windowID, board, newOrderKey)` | rewrite only that pin-session's `@rk_board_order`; errors if absent or `@rk_board` ≠ board | +| `ComputeOrderKey(before, after)` | fractional indexing helper (**retained**) | +| `nextAppendKey(entries)` | append key strictly greater than the max existing key, via `ComputeOrderKey` (**retained**, thin wrapper) | -**Lazy stale-entry cleanup at read time** — `GetBoard(ctx, name)` runs `liveWindowIDs(server)` per source server and intersects with the parsed entries. Entries whose `window_id` no longer exists on its source server are omitted from the response and removed from `@rk_board` via a best-effort write-back (`setBoardValue`). Write-back failure does NOT fail the read — the response is still returned with stale entries dropped, and the failure is logged. +Reads route through `tmuxExecRawServer` + `context.WithTimeout(ctx, TmuxTimeout)`; `isAbsentOption` treats `invalid option`/`unknown option`/`no server running`/`failed to connect` as the empty case (mirrors `GetSessionOrder`). A pin-session whose var read fails is logged via `slog.Warn` and skipped — well-formed entries still return. -**Eager cleanup via SSE poll-tick** — `sseHub.poll()` (`api/sse.go`) compares the per-server window-id set across consecutive ticks. For each killed `window_id`, the hub calls `tmux.RemoveAllByWindowID(ctx, server, windowID)` and broadcasts one `event: board-changed` per affected board with `change: "cleanup"`. This closes the gap when a board has not been read recently. +**Lexicographic / fractional order keys** (`ComputeOrderKey`, unchanged, pure Go): returns a string strictly between `before` and `after` lexicographically; an empty neighbour means prepend/append. Examples: `(null,"b")→"a"`, `("c",null)→"d"`, `("b","c")→"bm"`, `("b","bm")→"bg"`. `initialAppendKey = "m"` is the first key on an empty board (midpoint letter leaves prepend headroom — there is no key `< "a"`). Inserts MUST NOT renumber siblings — a reorder rewrites exactly one `@rk_board_order` var. **The key is now stored per pin-session** in `@rk_board_order` rather than embedded in the old encoding. -**SSE event** — `event: board-changed` rides the existing per-server SSE stream (`GET /api/sessions/stream?server=`). Payload shape: `{"board":"main","change":"pin"|"unpin"|"reorder"|"cleanup","server":"runkit","windowId":"@1234","orderKey":"bm"}`. `orderKey` is omitted (`omitempty`) for `unpin` / `cleanup`. Frontend clients viewing a board open one SSE connection per server contributing entries — boards span servers, so cross-server fan-out is required (see `architecture.md` § Boards Feature). +**No eager/lazy cleanup, no bootstrap** (deleted with this change): there is no SSE poll-tick window-kill diff and no `RemoveAllByWindowID` consumer — a killed pinned window's pin-session simply drops out of the next `ListBoardEntries` read, picked up by the frontend's refetch on the next session-list change. There is no `@rk_board` first-poll bootstrap broadcast. Membership changes surface only via the explicit pin/unpin/reorder `board-changed` SSE events (see § SSE Board Events below and `architecture.md` § Boards Feature). -**Bootstrap parity with `@rk_session_order`** — The hub reads `@rk_board` once per server on first poll and broadcasts a synthetic `board-changed` event with `change: "bootstrap"` and payload `{"server":"","change":"bootstrap","entries":[...]}`. This survives an rk-go restart that left tmux running (Constitution VI). The cached payload is sent to new SSE clients on connect (positioned between `session-order` and `metrics`). +**SSE event** — `event: board-changed` rides the per-server SSE stream. Payload `{"board":"main","change":"pin"|"unpin"|"reorder","server":"runkit","windowId":"@1234","orderKey":"bm"}` (`orderKey` `omitempty`). The `"cleanup"`/`"bootstrap"` change variants are GONE. Each pin/unpin/reorder handler emits its own event. See `architecture.md` § Boards Feature. -**Window ID stability across `move-window`** — pins follow tmux's documented contract that `move-window` (and `swap-window`) preserves `window_id` (`@N`) and only changes `window_index` (`:N`). A pinned window moved between sessions on the same server remains pinned without manual intervention. As of `260529-chgz-window-id-routing`, the window-ID routing migration relies on this same contract: `MoveWindow` (reorder via adjacent `swap-window`) and `MoveWindowToSession` (`move-window -s -t :`) keep the window's `@N` stable, so the URL/selection that addresses it by ID survives the move and the post-move navigation targets the unchanged `windowId`. +**Window ID stability across `move-window`** — pins rely on tmux's documented contract that `move-window` preserves `#{window_id}` (`@N`) and only changes `#{window_index}`. This is now load-bearing for the move-based model: Pin (`move-window` into the pin-session) and Unpin (`move-window` back to home) keep the window's `@N` stable, so the URL/selection that addresses it by ID survives the move, and `WindowIDFromPinSession` recovers the same id from the pin-session name. `MoveWindow` (reorder via adjacent `swap-window`) and `MoveWindowToSession` (`move-window -s -t :`) likewise preserve `@N`. ## Frontend Server Routing Contract @@ -352,9 +355,9 @@ The **pre-sweep was dropped entirely** (was `sweepDeadTestSockets(); os.Exit(m.R `rk reaper` (introduced `260529-fww2-rk-reaper-command`, rewritten by `260530-cf3g-unify-test-socket-reaping`) is a **top-level, operator-invoked** command — a sibling of `rk serve`/`rk riff`, registered via `rootCmd.AddCommand(reaperCmd)` in `cmd/rk/root.go`. It is **NOT** wired into any startup path. The command body (`cmd/rk/reaper.go`) is thin — flag parsing + summary rendering; all scan/classify/reap logic lives in `internal/tmux/reaper.go` (constitution §III). -### It does NOT replace the relay startup sweep +### There is no longer a relay startup sweep to coexist with -The reaper is orthogonal to `sweepOrphanedRelaySessions`. That sweep reaps `rk-relay-*` **sessions inside live servers** (see § Per-WebSocket Ephemeral Grouped Sessions) and is untouched by this change — `cmd/rk/serve.go` is unmodified. The reaper reaps **whole test servers and dead/stale sockets/`.lock` files**, and only when an operator runs it. Different scope, different trigger. +When `rk reaper` was introduced (`260529-fww2`) it was orthogonal to the relay startup sweep `sweepOrphanedRelaySessions`. Since `260602-qn62-move-based-board-pin-sessions` that sweep — and its whole file `cmd/rk/serve_sweep.go` — is **deleted**: relay ephemerals no longer exist (the relay attaches directly), and board pin-sessions (`_rk-pin-*`) are PERSISTENT across rk restarts (a valid state, not an orphan), so there is no in-server session class to reap at startup. The reaper remains the operator-only janitor for **whole test servers and dead/stale sockets/`.lock` files** — different scope, different trigger. ### Brute-force-by-prefix — no liveness probe to match @@ -391,7 +394,7 @@ Because the manual reaper has **no live-run protection by design** (no name allo `ReapTestServers(ctx, prefix, act, force) (ReapResult, error)` is the public entry point: it applies the dangerous-prefix guard, scans via `ScanSocketDir`, then delegates per-candidate work to the internal seam `reapCandidates(ctx, dir, prefix, candidates, probe, act)` — passing `socketDirPath()` and `probeServerAlive`. Tests drive `reapCandidates` directly with a temp dir + fake prober (no real tmux server spawned). `ReapResult` carries `Killed []string`, `RemovedSockets []string`, and `DryRunPlan []ReapPlanEntry` (`{Name, Action}` pairs, populated only on a dry-run). -**Partial failure** mirrors `sweepOrphanedRelaySessions`: each kill/remove failure is logged via `slog.Warn` and skipped; iteration continues; a joined aggregate error (`reaper partial failures: …`) is returned at the end (nil when all succeed). The command renders the summary *before* surfacing the aggregate error, so the operator sees what was reaped even on partial failure. +**Partial failure** (the pattern the deleted relay sweep also used): each kill/remove failure is logged via `slog.Warn` and skipped; iteration continues; a joined aggregate error (`reaper partial failures: …`) is returned at the end (nil when all succeed). The command renders the summary *before* surfacing the aggregate error, so the operator sees what was reaped even on partial failure. ## `/api/servers` Lists Every Server — the Test-Socket Hide Was Deleted @@ -408,7 +411,7 @@ Since `260531-tmnm-test-scoped-server-enumeration`, `ListServers` applies an **e **Why prefix, not exact**: multi-server e2e specs create secondaries in `beforeAll` named `rk-test-e2e---` (e.g. `rk-test-e2e-multi-*`, `rk-test-e2e-coupling-*`, `rk-test-e2e-msb-*`). Exact match on `rk-test-e2e` would wrongly exclude them and break those specs; prefix admits the primary plus this-run secondaries (cross-worktree-safe via the embedded `process.pid`). The allowlist targets `rk-test-e2e*` specifically — a `rk-test-relay-*` Go-test server under the broader `rk-test-` umbrella is NOT admitted. -**Why the filter lives in `ListServers`, not the `/api/servers` handler**: the board route attaches servers from **two** distinct `ListServers`-rooted paths — (1) `GET /api/servers` (`api/servers.go`) populating `useSessionContext().servers`, and (2) the internal `board.go` board-entry enumeration (`ListAllBoardEntries` / `GetBoard`). Filtering only the HTTP handler leaves path (2) unscoped, so the SSE inflation persists. Placing it in `ListServers` means **all** enumeration consumers inherit the scope when the env is set: `/api/servers`, `board.go`, and the startup relay-orphan sweep (`serve_sweep.go`). This is the intended outcome in the test environment (the only environment that sets the var). +**Why the filter lives in `ListServers`, not the `/api/servers` handler**: the board route attaches servers from **two** distinct `ListServers`-rooted paths — (1) `GET /api/servers` (`api/servers.go`) populating `useSessionContext().servers`, and (2) the internal `board.go` board-entry enumeration (`ListBoards` / `GetBoard`, which iterate `ListServers` per-server since `260602-qn62`). Filtering only the HTTP handler leaves path (2) unscoped, so the SSE inflation persists. Placing it in `ListServers` means **all** enumeration consumers inherit the scope when the env is set: `/api/servers` and `board.go`. (Pre-`260602-qn62` the startup relay-orphan sweep `serve_sweep.go` was a third consumer; it is now deleted.) This is the intended outcome in the test environment (the only environment that sets the var). **Why it matters (the bug it fixes)**: on the board route, the frontend attaches **all** known servers and opens **one SSE `EventSource` per server** (boards are cross-server by design). On a busy operator box, N live `kit`/`runWork`/orphan servers → N EventSources → N of the browser's 6 HTTP/1.1 connection slots consumed before any relay WebSocket or xterm chunk fetch. Scoping the backend READ path to `rk-test-e2e*` bounds the board route to one SSE per test server, removing the **load-dependent** connection-pool starvation that made board e2e specs flaky only on busy sessions (the environmental third vector of `e2e-flakiness-board-route-dynamic-import-hang`). @@ -429,7 +432,7 @@ The `tmuxctl` supervisor is **unaffected**: it does NOT call `ListServers` — i ## Related Files -- `app/backend/internal/tmux/tmux.go` — `serverArgs()`, `tmuxExecServer()`, `ListSessions()`, `ListServers()` (delegates the raw socket-dir scan to `ScanSocketDir` and the probe to `probeServerAlive`; applies the env-gated `RK_SERVER_ALLOWLIST` filter post-probe), `ServerAllowlistEnv` const + `matchesServerAllowlist(name, allowlist)` pure prefix-match predicate (test-isolation filter; see § `RK_SERVER_ALLOWLIST`), `ScanSocketDir(ctx)`, `socketDirPath()`, `filterSocketEntries()`, `probeServerAlive(ctx, name)`, `IsTestServerName()` (single `HasPrefix("rk-test-")`; consumed only by the tmuxctl supervisor resurrection guard), `LockSocketSuffix`, `ListKeys()`, `KillServer()`, `CreateSession()`, `SelectWindow(windowID, server)`, `ResolveWindowSession()`, `resolveWindowSessionIndex()`, `MoveWindow(windowID, dstIndex, server)`, `MoveWindowToSession(windowID, dstSession, server)`, `ReloadConfig()`, `EnsureConfig()`, `ConfigPath()` +- `app/backend/internal/tmux/tmux.go` — `serverArgs()`, `tmuxExecServer()`, `ListSessions()`, `ListServers()` (delegates the raw socket-dir scan to `ScanSocketDir` and the probe to `probeServerAlive`; applies the env-gated `RK_SERVER_ALLOWLIST` filter post-probe), `ServerAllowlistEnv` const + `matchesServerAllowlist(name, allowlist)` pure prefix-match predicate (test-isolation filter; see § `RK_SERVER_ALLOWLIST`), `ScanSocketDir(ctx)`, `socketDirPath()`, `filterSocketEntries()`, `probeServerAlive(ctx, name)`, `IsTestServerName()` (single `HasPrefix("rk-test-")`; consumed only by the tmuxctl supervisor resurrection guard), `LockSocketSuffix`, `ListKeys()`, `KillServer()`, `CreateSession()`, `SelectWindow(windowID, server)`, `ResolveWindowSession()` (first-match owning session — home or `_rk-pin-*`), `resolveWindowSessionIndex()`, `MoveWindow(windowID, dstIndex, server)`, `MoveWindowToSession(windowID, dstSession, server)`, `ReloadConfig()`, `EnsureConfig()`, `ConfigPath()`, plus the pin-session helpers `PinSessionPrefix`/`PinSessionName(windowID)`/`WindowIDFromPinSession(name)`/`ListPinSessionNames(ctx, server)` (since `260602-qn62`; the deleted `RelaySessionPrefix`/`OwnerPIDOption`/`NewGroupedSession`/`SetSessionOwnerPID`/`GetSessionOwnerPID`/`ListRawSessionNames` are gone) - `app/backend/internal/tmux/reaper.go` — brute-force-by-prefix reaper logic: pure `classifyReap(name, prefix, serverLive) ReapAction` (enum `ReapActionSkip`/`ReapActionKill`/`ReapActionRemove`), `probeNeeded(name, prefix)` (gates the kill-vs-remove subprocess probe), the consts `productionDaemonServer = "rk-daemon"` + `minSafePrefixLen = 3`, `ReapResult`/`ReapPlanEntry`, the public `ReapTestServers(ctx, prefix, act, force)` (applies the dangerous-prefix guard), and the test seam `reapCandidates(ctx, dir, prefix, candidates, probe, act)` (log-and-skip per entry via `slog`, aggregate error at end). Tested in `reaper_test.go` - `app/backend/cmd/rk/reaper.go` — thin `reaperCmd` (top-level; `--prefix` default `rk-test`, `--yes`/`--force` action gate, `--dry-run` explicit-alias for the default preview); `act := (yes||force) && !dryRun`; calls `tmux.ReapTestServers(ctx, prefix, act, force)` and renders summary/dry-run (`renderReapSummary`/`renderDryRun`); `Long` help states the brute-force/no-liveness-protection/operating contract; no scan/probe/remove/kill in `cmd/rk` - `app/backend/internal/sessions/sessions.go` — `FetchSessions(server)` builds the dashboard view, `ProjectSession` has `Name` and `Windows` (no `Server` field); pane-map enrichment re-keys from `session:index` to windowID before joining; `ProjectRoot(ctx, windowID, server)` resolves by window ID @@ -438,8 +441,9 @@ The `tmuxctl` supervisor is **unaffected**: it does NOT call `ListServers` — i - `app/backend/api/servers.go` — server list/create/kill handlers - `app/backend/api/keybindings.go` — `GET /api/keybindings` handler (runs `list-keys`, filters via whitelist, returns JSON) - `app/backend/api/sse.go` — per-server SSE polling hub -- `app/backend/api/relay.go` — WebSocket relay at `/relay/{windowId}` reads `?server=` query param, validates the window ID, resolves the owning session via `tmux.ResolveWindowSession`, allocates a per-WebSocket ephemeral via `tmux.NewGroupedSession`, stamps `@rk_owner_pid` on the ephemeral via `tmux.SetSessionOwnerPID` (after the create + cleanup-defer, before attach; abort-clean on failure), selects the window by ID on the ephemeral, attaches to the ephemeral (not the real session), and reaps it on disconnect via deferred `KillSessionCtx` -- `app/backend/cmd/rk/serve_sweep.go` — `sweepOrphanedRelaySessions(ctx)` reaps `rk-relay-*` ephemerals across every server returned by `tmux.ListServers(ctx)` before HTTP bind, but **only** when the session's `@rk_owner_pid` (read via `tmux.GetSessionOwnerPID`) is empty/unstamped or names a dead PID — `pidAlive(pid)`/`relayOwnerIsDead(owner)` predicates with a leak-not-kill (EPERM→spare) bias; live-owner relays are spared. Explicit `name == tmux.ControlAnchorSessionName` guard skips the `_rk-ctl` anchor (defense-in-depth) +- `app/backend/api/relay.go` — WebSocket relay at `/relay/{windowId}` reads `?server=` query param, validates the window ID via `decodeWindowID`, resolves the owning session via `tmux.ResolveWindowSession` (home OR `_rk-pin-*`), does a direct bare `tmux.SelectWindow(windowID, server)` on that real session, and `attach-session -t ` via `creack/pty`. **No ephemeral, no `@rk_owner_pid` stamp, no defer-kill** (all removed by `260602-qn62`) +- `app/backend/internal/tmux/board.go` — move-based board layer: `Pin`/`Unpin`/`Reorder`, `pinEntry`, `ListBoardEntries`/`ListBoards`/`GetBoard` (derived from `_rk-pin-*` session vars), `ComputeOrderKey`/`nextAppendKey`/`initialAppendKey`, validators (`ValidBoardName`/`ValidWindowID`/`ValidOrderKey`), and the `BoardOption`/`HomeOption`/`BoardOrderOption` session-var key constants. The old `@rk_board` server-option encoding (`parseBoardValue`/`serializeBoardValue`/`setBoardValue`/`ListAllBoardEntries`/`RemoveAllByWindowID`) is deleted +- *(`app/backend/cmd/rk/serve_sweep.go` — DELETED by `260602-qn62`; the relay startup sweep no longer exists)* - `app/backend/internal/tmuxctl/` — control-mode subscription package; `Client` opens `tmux -CC ... -t = -r` per socket, **always** creates the `_rk-ctl` anchor floor (since `260602-a1wo-prevent-exit-empty-server-death`) and tags it with `@rk_ctl_keepalive 1`, and sets `exit-empty off` via `tmux.SetExitEmptyOff` in `productionDial` before the anchor on every dial/reconnect. See `architecture.md` § tmux Control-Mode Subscription - `app/backend/internal/tmux/tmux.conf` — canonical tmux configuration (Go-embedded, written to `~/.run-kit/tmux.conf` on first run) - `app/backend/cmd/rk/riff.go` — `rk riff` subcommand: N-pane `tmux new-window` + `split-window` + `select-layout` + `select-pane` sequence per window on the user's current tmux server (via `tmux.OriginalTMUX` restore in child env), with parallel fan-out + rollback on failure @@ -490,3 +494,4 @@ Established by `260531-tmnm-test-scoped-server-enumeration`: | 2026-05-31 | **Test-scoped server enumeration — env-gated `RK_SERVER_ALLOWLIST` allowlist in `ListServers`.** New const `tmux.ServerAllowlistEnv = "RK_SERVER_ALLOWLIST"` + pure predicate `matchesServerAllowlist(name, allowlist)` (`internal/tmux/tmux.go`). `ListServers` reads the env directly via `os.Getenv` in-package (matching the `RK_TMUX_CONF`/`OriginalTMUX` precedent — NOT via `internal/config`; `ListServers` is a `ctx`-only free fn) and, when set, narrows the post-probe live-server list to names that `HasPrefix` ANY trimmed, non-empty comma-delimited token. Empty/whitespace-only value = treated as UNSET = admits everything (production no-op, byte-for-byte). Prefix (not exact) because multi-server e2e specs create `rk-test-e2e---` secondaries in `beforeAll` that exact match would exclude; `rk-test-relay-*` under the broader `rk-test-` umbrella is NOT admitted. Filter lives in `ListServers` (not `handleServersList`) so ALL enumeration consumers inherit the scope — `/api/servers`, `board.go` `ListAllBoardEntries`/`GetBoard`, the `serve_sweep.go` relay-orphan sweep; the `tmuxctl` supervisor is unaffected (uses `os.ReadDir` + `isTmuxSocketCandidate`, never `ListServers`). The new forward allowlist (hides NORMAL servers from tests) is orthogonal to and does not touch the existing `IsTestServerName` denylist (hides TEST servers from normal operation). `scripts/test-e2e.sh` exports `RK_SERVER_ALLOWLIST=$E2E_TMUX_SERVER` into the `setsid … just dev` backend launch — scoping the backend READ path, distinct from `E2E_TMUX_SERVER` (the WRITE socket). Hermetic unit test `TestMatchesServerAllowlist`. **Why:** bounds board-route SSE to one `EventSource` per test server, removing the load-dependent HTTP/1.1 connection-pool starvation (N live operator servers → N EventSources → N of 6 slots) that made board e2e specs flaky only on busy boxes — the environmental third vector of the board-route hang. No prod behavior change (env unset). | `260531-tmnm-test-scoped-server-enumeration` | | 2026-05-30 | **Unified test-socket reaping.** All test tmux-socket names (Go + Playwright) collapsed under one umbrella `rk-test---` (roles `unit`/`relay`/`tmuxctl`/`daemon`/`e2e`/`e2e-multi`/`e2e-coupling`/`e2e-msb`), replacing the old 5-prefix scheme (`rk-test-`, `rk-relay-test-`, `rk-verify-`, fixed `rk-tmuxctl-test`/`rk-daemon-test`) + the separate `rk-e2e` exclusion. `IsGoTestServerName` (5-prefix allowlist) **deleted** → `IsTestServerName(name) = HasPrefix("rk-test-")`, consumed only by the tmuxctl supervisor resurrection guard. `parseTestSocketPID` now parses the PID as the **second-to-last** hyphen field (was the field right after the prefix) so hyphenated roles parse; `` is a single hyphen-free token. `TestMain` sweep flipped **pre-sweep → post-sweep** (`code := m.Run(); sweepDeadTestSockets(); os.Exit(code)`) in `internal/tmux` + `api`; still PID-scoped to dead owners only (concurrent `go test` packages spared), pre-sweep dropped. Manual `rk reaper` rewritten **brute-force-by-prefix** (bare ≡ `--prefix rk-test`): no liveness probe / e2e exclusion / `.lock`-inheritance; **dry-run is the default** (both bare and `--prefix`), `--yes`/`--force` to act, only `--force` bypasses the dangerous-prefix guard (empty or ≤3 chars refused); `_rk-ctl` + live `rk-daemon` hard-skipped unconditionally. Operating contract: do NOT run `rk reaper` while tests are running (no live-run protection by design). `/api/servers` test-socket **hide filter deleted** — it now lists every server, incl. leaked `rk-test-*` orphans (accepted cost: one SSE stream per orphan until reaped). E2E (Playwright) socket names embed `process.pid` (was a `Date.now().slice(-6)` epoch); harness server is `rk-test-e2e` (was `rk-e2e`) in `scripts/test-e2e.sh`, `global-teardown.ts`, the `just pw` default, and all specs. **Separate bug fixed (constitution VI):** `scripts/test-e2e.sh` used `kill 0` in its EXIT-trap cleanup, signalling the caller's (non-detached) process group — SIGTERMing live tmux servers / `-CC` clients sharing it when run inline in an interactive/agent session (proven root cause of `kit`/`abbb`/`runWork` dying mid-session, distinct from `exit-empty`). Fixed: dev server launched via `setsid` into its own group; cleanup kills only `kill -- "-$DEV_PGID"`. | `260530-cf3g-unify-test-socket-reaping` | | 2026-06-02 | **`exit-empty` server-death prevention — always-on `_rk-ctl` anchor floor + imperative `exit-empty off`.** Fixes the *other* recurring whole-server-death cause (distinct from the `260530-cf3g` `kill 0` grenade): tmux's default `exit-empty on` reaping a server the instant its session count hits zero. Root cause was `tmuxctl.resolveBootstrap` creating the `_rk-ctl` anchor **only** when a server was empty at first control-mode connect — so a server that already had real sessions got no floor, and when its last real session later closed (only `rk-relay-*` ephemerals left) the next relay disconnect drained it to zero and tmux reaped the whole server (no run-kit `kill-server`; killed live agent sessions; ≥3× — runWork/utils/kit). `resolveBootstrap` now **always** creates the anchor as a permanent session floor, decoupled from the control-mode **attach target** (still prefers the first real session; `firstSessionName` now skips `_rk-ctl` so the always-present anchor — which sorts ahead of lowercase names — isn't picked; `%session-window-changed` is global so attach-target choice is a minimal-diff preference, not a correctness need). New `tmux.SetExitEmptyOff(ctx, server)` (`set-option -g exit-empty off`, mirrors `tmuxExecServer`/`serverArgs`, ctx+`TmuxTimeout`) is called in `tmuxctl.productionDial` **BEFORE** `resolveBootstrap`/anchor creation, on the initial dial **and** every reconnect — ordering is load-bearing: it closes the reapable zero-session sliver during the close-then-reopen restart window before the floor is reinstalled. The embedded `configs/tmux/default.conf` also `set -g exit-empty off` for run-kit-created servers (belt-and-suspenders; imperative set covers hand-created/foreign servers the `-f` conf never reaches). Server-lifetime contract is now **explicit-kill-only**: a server dies only via `kill-server`/`rk reaper`; empty (anchor-only) servers persist by design (no auto-reap, no cross-process refcounting — §II). Concurrent multi-`rk serve` anchor creation stays benign via `isDuplicateSessionError` (now made actually-reachable + correct: `createAnchor` captures stderr since `cmd.Run()` doesn't populate `ExitError.Stderr`, and the classifier matches both the wrapped message and `ee.Stderr`). Test/e2e sockets are still excluded by `isTmuxSocketCandidate`→`IsTestServerName` so the always-create path can't resurrect them. The `sse.go` "real session disappeared" WARN is kept as defense-in-depth (covers external `kill-session`/OOM/shell-exit that prevention can't). | `260602-a1wo-prevent-exit-empty-server-death` | +| 2026-06-02 | **Move-based server-scoped boards (pin sessions) — relay ephemerals removed.** Boards switched from a per-WebSocket `rk-relay-*` ephemeral isolation layer + `@rk_board` server-option comma/colon encoding to a **move-based pin-session model**. New `tmux.PinSessionPrefix = "_rk-pin-"` + reversible `PinSessionName`/`WindowIDFromPinSession` helpers (strip/restore the `@`) + `ListPinSessionNames`. A pinned window is physically `move-window`'d into its own single-window session `_rk-pin-`; a board = the set of pin-sessions sharing an `@rk_board` SESSION var (server-scoped — `move-window` can't cross servers), with `@rk_home` (restore target) and `@rk_board_order` (fractional `ComputeOrderKey`). `Pin` stamps the 3 vars after the move with `context.Background()`-rooted rollback on partial failure (double-fault guard: kill the pin-session only if move-back succeeded); idempotent incl. wrong-board re-stamp. `Unpin` restores to `@rk_home` (recreating a dead home via `rename-session`, no placeholder) then kills the pin-session; multi-unpin appends. `board.go` rewritten: kept `ComputeOrderKey`/`nextAppendKey`/`initialAppendKey`/validators/`BoardOption` (repurposed as the `@rk_board` session-var key); **deleted** `parseBoardValue`/`serializeBoardValue`/`setBoardValue`/`ListAllBoardEntries`/`RemoveAllByWindowID`/the server-option encoding. `ListBoardEntries`/`ListBoards`/`GetBoard` now derive from pin-sessions (no stale write-back). **Relay** (`api/relay.go`) attaches the PTY DIRECTLY to the `ResolveWindowSession` result (home or `_rk-pin-*`) — removed `newEphemeralRelayName`/`NewGroupedSession`/`SetSessionOwnerPID`/ephemeral-`SelectWindowInSession`/ephemeral-`KillSessionCtx`; `ResolveWindowSession` no longer filters `rk-relay-*`. **Deleted**: `cmd/rk/serve_sweep.go` (whole file — `sweepOrphanedRelaySessions`/`pidAlive`/`relayOwnerIsDead`) + its `serve.go` wiring; `RelaySessionPrefix`/`OwnerPIDOption`/`NewGroupedSession`/`SetSessionOwnerPID`/`GetSessionOwnerPID`/`ListRawSessionNames` from `tmux.go`. `parseSessions` now skips `_rk-pin-*` + `_rk-ctl`, NO LONGER skips `rk-relay-*`. **SSE** (`api/sse.go`): dropped the window-kill `board-changed {cleanup}` diff + `RemoveAllByWindowID` dep, `detectKilledWindowIDs`/`previousWindowIDs`, and the `broadcastBoardBootstrap`/`previousBoardJSON` first-poll bootstrap; only explicit pin/unpin/reorder `board-changed` events remain. `TmuxOps` lost `NewGroupedSession`/`SetSessionOwnerPID`. Accepted tradeoffs: multi-client active-window collisions on a shared home session; a pinned window disappears from its home session's sidebar until unpinned. Pins persist across rk restarts → NO restore-sweep. `_rk-ctl` anchor + `exit-empty off` backstop UNCHANGED (pin-session persistence relies on `exit-empty off`); `tmuxctl/` active-window derivation left untouched (investigate-only). Frontend `boards.ts`/`use-boards.ts` doc/contract updated to server-scoped pin-session derivation; response shape + component structure unchanged. | `260602-qn62-move-based-board-pin-sessions` | diff --git a/docs/memory/run-kit/ui-patterns.md b/docs/memory/run-kit/ui-patterns.md index 833a07d8..e1ab7258 100644 --- a/docs/memory/run-kit/ui-patterns.md +++ b/docs/memory/run-kit/ui-patterns.md @@ -93,7 +93,11 @@ Clients viewing different `?server=` values are independent — each server's SS ## Boards View -`/board/$name` renders a horizontal pane dashboard for windows pinned to a named board (see `tmux-sessions.md` § `@rk_board` for storage and `architecture.md` § Boards Feature for the route placement rationale). The board view does NOT mount AppShell, but it shares the same root-mounted multi-server `SessionProvider`, the same unified ``, and as of `260509-17m3-rotated-shell-layout` the same `` grid wrapper — the sidebar's per-server session groups stay populated across the route switch because the provider lives at the root. +`/board/$name` renders a horizontal pane dashboard for windows pinned to a named board (see `tmux-sessions.md` § Pin Sessions / § `@rk_board` for storage and `architecture.md` § Boards Feature for the route placement rationale). The board view does NOT mount AppShell, but it shares the same root-mounted multi-server `SessionProvider`, the same unified ``, and as of `260509-17m3-rotated-shell-layout` the same `` grid wrapper — the sidebar's per-server session groups stay populated across the route switch because the provider lives at the root. + +### SESSIONS-vs-BOARDS exclusivity (a pinned window is physically MOVED) + +Since `260602-qn62-move-based-board-pin-sessions`, pinning a window **physically moves** it (`tmux move-window`) out of its home session into its own single-window pin-session (`_rk-pin-`), so a window is in **exactly one** view at a time — SESSIONS (its home session) or BOARDS (a pane on the board), never both. A pinned window therefore **disappears from its home session's window list in the SESSIONS sidebar** until it is unpinned (which moves it back to `@rk_home`, appending at the next index). This is intended and is what lets a board pane attach the relay DIRECTLY to the pin-session (no ephemeral). It is also "already true" for the sidebar with no frontend work: the SSE session snapshot no longer lists the moved window under its home session (pin-sessions are filtered at the `parseSessions` chokepoint), so the sidebar reflects it automatically. The pin-icon filled state and the active-board accent (below) are the only board-aware affordances on a SESSIONS row — and a pinned window's row is simply absent from its home group while pinned. ### BoardPage Layout (`app/frontend/src/components/board/board-page.tsx`) @@ -101,7 +105,7 @@ BoardPage uses the shared `` wrapper with `grid-template-areas: "sidebar - **Sidebar** (`gridArea: "sidebar"`) — the unified `` (same component as AppShell). No per-server group is marked current on board routes. Per-server session groups + Boards section + ServerPanel + bottom panels render as on AppShell. Mobile (`< 640px`) renders the sidebar as a Shell-level overlay positioned via `gridRow: "2/4"` (below the topbar) — same overlay implementation as AppShell - **TopBar** (`gridArea: "topbar"`) — ``. Board mode renders `Board ▸ {name} ▾` (the existing `BoardSwitcherDropdown` dropdown listing `← Sessions` + other boards, with `(current)` on the active one — moved into TopBar from BoardPage's pre-rotation inline `
`) followed by inline-info `{N} pane[s] · {M} server[s] · ⌘[⌘] cycle` (singular/plural correct, `text-xs text-text-secondary`, `hidden sm:inline`). The right section's chrome (theme toggle, `FixedWidthToggle`, `⌘K`, compose `>_`) is byte-identical to terminal mode; `FixedWidthToggle` is now route-agnostic and renders even though `currentWindow` is null on the board route -- **Content** (`gridArea: "content"`) — the existing `DesktopRow` (desktop) / `MobileCarousel` (mobile) horizontally-scrollable container of pane "cards" sorted by `orderKey`. Each card is a `BoardPane` with a `BoardHeader` (` · ` + unpin button) and an embedded `TerminalClient` connected via WebSocket to `?server=` (one WS per pane). The horizontal-scroll viewport begins at the `content` grid area's left edge — flush with `sidebar.right` (or page.left when `sidebarOpen === false`); no left gutter for board-level chrome +- **Content** (`gridArea: "content"`) — the existing `DesktopRow` (desktop) / `MobileCarousel` (mobile) horizontally-scrollable container of pane "cards" sorted by `orderKey`. Each card is a `BoardPane` with a `BoardHeader` (` · ` + unpin button) and an embedded `TerminalClient` connected via WebSocket to `/relay/{windowId}?server=` (one WS per pane) — **the same `TerminalClient`/relay path as the normal terminal route**. Since `260602-qn62` the `windowId` resolves to the window's `_rk-pin-*` pin-session server-side (the relay attaches directly to it), so `BoardPane`/`board-page.tsx` need NO structural change — the move is transparent to the component (`BoardEntry` shape unchanged). The horizontal-scroll viewport begins at the `content` grid area's left edge — flush with `sidebar.right` (or page.left when `sidebarOpen === false`); no left gutter for board-level chrome - **BottomBar** (`gridArea: "bottombar"`) — the shared `` (NEW on this route — board route had no BottomBar pre-rotation). Byte-identical to AppShell's invocation: same three callbacks (`onOpenCompose`, `onFocusTerminal`, `onScrollLockChange`). `onOpenCompose` calls `setComposeOpen(true)` from `FocusedTerminalContext`; `onFocusTerminal` invokes a ref-tracked `focusFocusedPaneRef.current()` that re-focuses the currently-focused board pane via its `paneRefs[focusedIndex].focus()`; `onScrollLockChange` plumbs through `DesktopRow`/`MobileCarousel` → `BoardPane` → `TerminalClient.scrollLocked`. Input target is the focused pane's wsRef (read from `FocusedTerminalContext.focused?.wsRef`) - **Own `` mount** — BoardPage retains its own palette mount because board-route-only entries (Switch / Leave Board View / Cycle Pane Focus →/←) need a registration site, and the AppShell palette doesn't mount on `/board/`. The mount is preserved through the rotation @@ -196,8 +200,8 @@ The AppShell palette mount carries `Switch to ` + `Pin Current Window` + ` | Hook | File | Returns | |------|------|---------| -| `useBoards()` | `hooks/use-boards.ts` | `{ boards, isLoading, error }`. Initial `listBoards()` on mount; subscribes to `board-changed` SSE on every server returned by `listServers()`; 50ms debounce coalesces rapid events; preserves last good value on transient error | -| `useBoardEntries(name)` | `hooks/use-boards.ts` | `{ entries, isLoading, error }`. Initial `getBoard(name)`; subscribes on all known servers (boards span servers); same debounce + error tolerance | +| `useBoards()` | `hooks/use-boards.ts` | `{ boards, isLoading, error }`. Initial `listBoards()` on mount; subscribes to `board-changed` SSE on every server returned by `listServers()` (boards are server-scoped since `260602-qn62`, but the board LIST is summarized across servers, so it attaches all to catch each one's pin/unpin/reorder events); 50ms debounce coalesces rapid events; preserves last good value on transient error | +| `useBoardEntries(name)` | `hooks/use-boards.ts` | `{ entries, isLoading, error }`. Initial `getBoard(name)`; subscribes on all known servers (the board list spans servers, so a pin/unpin on any server may affect this board); same debounce + error tolerance | | `usePinActions(board?)` | `hooks/use-pin-actions.ts` | `{ pin, unpin, reorder }` stable callbacks; toast on error; optimistic — SSE re-broadcast reconciles | | `usePaneWidths(boardName, sidebarWidth)` | `hooks/use-pane-widths.ts` | `{ getWidth, setWidth }`; reads/writes `localStorage["runkit:board-widths:"]`; clamps to `[280, viewport - sidebar]`; default 480px | | `useIsMobile()` | `hooks/use-is-mobile.ts` | `boolean`; `matchMedia("(max-width: 640px)")` listener | @@ -1082,3 +1086,4 @@ The regression test in `app/frontend/src/hooks/use-dialog-state.test.tsx` flips | 2026-05-31 | **Bounded desktop relay WebSockets** — new § Desktop Relay-Connection Suspension. On plaintext origins (`window.location.protocol === "http:"`) `DesktopRow` drives each `BoardPane`'s `paused` prop from an `IntersectionObserver` rooted on `rowRef` (was hardcoded `paused={false}`), unmounting off-screen panes' `TerminalClient` so the `/relay/` WebSocket closes and the connection slot frees — the desktop analogue of the mobile carousel's `paused={idx !== carouselIndex}`. Pre-warm `RELAY_PREWARM_ROOT_MARGIN` (one pane-width horizontal `rootMargin`, no debounce) prevents scroll-past thrash. Live panes capped at `MAX_LIVE_RELAY_PANES = 4` via the pure `selectLivePanes` helper (`select-live-panes.ts`, colocated unit tests): focused pane always live (exempt from visibility-pause and the cap, preserving `Cmd+]`/`Cmd+[` cycling + BottomBar targeting), then most-recently-focused visible panes fill remaining slots, least-recently-focused paused first beyond the cap. Pane elements observed via a `data-paneIndex` + `rootRef` callback prop on `BoardPane`, distinct from the `paneRefs` imperative `BoardPaneHandle`. On HTTPS/h2 (production via Tailscale) the feature is OFF — every pane stays live, no observer, no cap — because the ~6-connection ceiling is a plaintext HTTP/1.1 artifact (h2 multiplexes; relay WS limit ~255). Composes with sibling `260531-m3pl-static-xterm-imports` (removes xterm chunk-fetch pressure): together they fit the board route under 6 connections, fixing the plaintext board-route E2E hang. `MobileCarousel` untouched. E2E `boards-desktop-suspend.spec.ts` + companion `.spec.md`. | `260531-rus8-bound-desktop-relay-websockets` | | 2026-05-30 | **`$session` dropped from the route + identity keyed on `@N` alone.** Route shape `/$server/$session/$window` → `/$server/$window` (TanStack `terminalRoute.path: "/$window"`; `parseParams` exposes only `window`). The owning session name is now **derived from the active window's SSE snapshot** (`currentSession = sessions.find(s => s.windows.some(w => w.windowId === windowParam))`, `sessionName = currentSession?.name`) wherever it was previously read from the URL `$session` segment — breadcrumbs, dropdowns, browser title, kill-redirect inputs. `pendingClickRef` holds `{ windowId }` only (dropped `session`) and `urlMatchesPending` is `pending.windowId === windowParam` — so a session rename or cross-session move (where `@N` survives) no longer releases the pending-click suppression early and bounces the selection. Mount-time alignment guard keyed on `${server}|${windowParam}` (window-id-only); URL writeback navigates `{ to: "/$server/$window", params: { server, window } }` (no session param); `navigateToWindow(windowId)` drops the session arg. All `app.tsx` navigate sites target the 2-segment shape. Deep link `/$server/@N` derives the session server-side from the first snapshot for breadcrumb display and aligns tmux to `@N`. Old 3-segment `/$server/$session/$window` URLs are a hard break — they fall through to `NotFoundPage` / the server-dashboard fallback (no redirect shim; constitution §II). The `IframeWindow` URL bar's `updateWindowUrl` now routes through the unified `setWindowOptions` → `POST /api/windows/{windowId}/options` (`{"@rk_url": url}`) instead of the removed `PUT /url`; `setWindowColor`/`updateWindowType` likewise delegate to `/options`. E2E specs migrated to the 2-segment shape with sibling `.spec.md` updates (`sidebar-window-sync`, `multi-server-sidebar`, `mobile-touch-scroll`). | `260529-jad6-window-api-stability` | | 2026-06-02 | **Fix non-current sidebar group expand (StrictMode purity)** — `toggleServerSection` made pure: the `localStorage.setItem` write to `runkit-panel-sessions-{server}` and the lazy `attachServer(server)` call moved OUT of the `setServerSectionsOpen` updater. Root cause: React 19 StrictMode double-invokes updaters, and the in-updater `localStorage` write was observed by the second pass (which re-read it via `readServerOpen`), inverting `next` and making a single Expand click on a non-current group a no-op (the group never opened — `multi-server-sidebar.spec.ts:70` failed deterministically). Fix snapshots `current = readServerOpen(server)` once, computes `next`, runs side-effects once outside the updater, then commits a pure functional update deriving `next` from `prev` (fallback to the `current` snapshot for untouched groups) for batch-safety. StrictMode-wrapped click-toggle regression test added in `index.test.tsx`. No backend change; existing coupling/persistence behavior preserved. | `260602-mss7-fix-sidebar-group-expand` | +| 2026-06-02 | **Move-based boards — SESSIONS-vs-BOARDS exclusivity.** New § SESSIONS-vs-BOARDS exclusivity: pinning a window physically MOVES it (`tmux move-window`) into its own `_rk-pin-*` pin-session, so it disappears from its home session's window list in the SESSIONS sidebar until unpinned (which moves it back to `@rk_home`, appending at the next index). "Already true" for the sidebar with no frontend work — the SSE snapshot no longer lists the moved window under its home session (pin-sessions filtered at the `parseSessions` chokepoint). Board pane rendering is UNCHANGED: each `BoardPane` still embeds the same `TerminalClient` on `/relay/{windowId}?server=`, and the relay now resolves `windowId` to the pin-session server-side (transparent — `BoardEntry` shape + `board-pane.tsx`/`board-page.tsx` structure untouched). `boards.ts`/`use-boards.ts` doc comments updated from "boards are explicitly cross-server" to server-scoped derivation (pins live on one server; the board LIST is summarized across servers, so `useBoards`/`useBoardEntries` still attach all known servers for `board-changed` events). Frontend-only doc/contract change; pin icon + active-board accent affordances on SESSIONS rows are unchanged. | `260602-qn62-move-based-board-pin-sessions` | diff --git a/fab/changes/260602-qn62-move-based-board-pin-sessions/.history.jsonl b/fab/changes/260602-qn62-move-based-board-pin-sessions/.history.jsonl new file mode 100644 index 00000000..28704f82 --- /dev/null +++ b/fab/changes/260602-qn62-move-based-board-pin-sessions/.history.jsonl @@ -0,0 +1,26 @@ +{"action":"enter","driver":"fab-new","event":"stage-transition","stage":"intake","ts":"2026-06-02T18:37:05Z"} +{"args":"refactor: move-based server-scoped boards — replace per-WebSocket relay ephemeral + @rk_board option encoding with single-location pin sessions","cmd":"fab-new","event":"command","ts":"2026-06-02T18:37:05Z"} +{"delta":"+3.1","event":"confidence","score":3.1,"trigger":"calc-score","ts":"2026-06-02T18:38:37Z"} +{"delta":"+0.0","event":"confidence","score":3.1,"trigger":"calc-score","ts":"2026-06-02T18:38:42Z"} +{"cmd":"fab-clarify","event":"command","ts":"2026-06-02T18:47:36Z"} +{"cmd":"fab-clarify","event":"command","ts":"2026-06-02T18:47:41Z"} +{"delta":"+0.9","event":"confidence","score":4,"trigger":"calc-score","ts":"2026-06-02T18:49:31Z"} +{"delta":"+0.0","event":"confidence","score":4,"trigger":"calc-score","ts":"2026-06-02T18:49:36Z"} +{"delta":"+0.0","event":"confidence","score":4,"trigger":"calc-score","ts":"2026-06-02T18:49:42Z"} +{"delta":"+0.0","event":"confidence","score":4,"trigger":"calc-score","ts":"2026-06-02T18:50:03Z"} +{"delta":"+0.0","event":"confidence","score":4,"trigger":"calc-score","ts":"2026-06-02T18:53:19Z"} +{"delta":"+1.0","event":"confidence","score":5,"trigger":"calc-score","ts":"2026-06-02T18:53:25Z"} +{"delta":"+0.0","event":"confidence","score":5,"trigger":"calc-score","ts":"2026-06-02T18:53:33Z"} +{"delta":"+0.0","event":"confidence","score":5,"trigger":"calc-score","ts":"2026-06-02T18:53:37Z"} +{"cmd":"fab-fff","event":"command","ts":"2026-06-02T18:54:08Z"} +{"action":"enter","driver":"fab-fff","event":"stage-transition","stage":"apply","ts":"2026-06-02T19:09:27Z"} +{"cmd":"fab-continue","event":"command","ts":"2026-06-02T19:43:47Z"} +{"action":"enter","driver":"fab-fff","event":"stage-transition","stage":"review","ts":"2026-06-02T19:45:10Z"} +{"event":"review","result":"failed","ts":"2026-06-02T19:58:25Z"} +{"action":"re-entry","driver":"fab-fff","event":"stage-transition","stage":"apply","ts":"2026-06-02T19:58:25Z"} +{"action":"enter","driver":"fab-fff","event":"stage-transition","stage":"review","ts":"2026-06-02T20:08:05Z"} +{"action":"enter","driver":"fab-fff","event":"stage-transition","stage":"hydrate","ts":"2026-06-02T20:08:09Z"} +{"event":"review","result":"passed","ts":"2026-06-02T20:08:09Z"} +{"cmd":"fab-continue","event":"command","ts":"2026-06-02T20:08:59Z"} +{"action":"enter","driver":"fab-fff","event":"stage-transition","stage":"ship","ts":"2026-06-02T20:21:46Z"} +{"cmd":"git-pr","event":"command","ts":"2026-06-02T20:22:41Z"} diff --git a/fab/changes/260602-qn62-move-based-board-pin-sessions/.status.yaml b/fab/changes/260602-qn62-move-based-board-pin-sessions/.status.yaml new file mode 100644 index 00000000..e7937413 --- /dev/null +++ b/fab/changes/260602-qn62-move-based-board-pin-sessions/.status.yaml @@ -0,0 +1,49 @@ +id: qn62 +name: 260602-qn62-move-based-board-pin-sessions +created: 2026-06-02T18:37:05Z +created_by: sahil-noon +change_type: refactor +issues: [] +progress: + intake: done + apply: done + review: done + hydrate: done + ship: active + review-pr: pending +plan: + generated: true + task_count: 21 + acceptance_count: 28 + acceptance_completed: 0 +confidence: + certain: 9 + confident: 0 + tentative: 0 + unresolved: 0 + score: 5.0 + fuzzy: true + dimensions: + signal: 95.7 + reversibility: 63.3 + competence: 84.8 + disambiguation: 84.4 +stage_metrics: + intake: {started_at: "2026-06-02T18:37:05Z", driver: fab-new, iterations: 1, completed_at: "2026-06-02T19:09:27Z"} + apply: {started_at: "2026-06-02T19:58:25Z", driver: fab-fff, iterations: 2, completed_at: "2026-06-02T20:08:05Z"} + review: {started_at: "2026-06-02T20:08:05Z", driver: fab-fff, iterations: 1, completed_at: "2026-06-02T20:08:09Z"} + hydrate: {started_at: "2026-06-02T20:08:09Z", driver: fab-fff, iterations: 1, completed_at: "2026-06-02T20:21:46Z"} + ship: {started_at: "2026-06-02T20:21:46Z", driver: fab-fff, iterations: 1} +prs: [] +true_impact: + added: 0 + deleted: 0 + net: 0 + excluding: + added: 0 + deleted: 0 + net: 0 + computed_at: "2026-06-02T20:21:46Z" + computed_at_stage: hydrate +# true_impact: lazily created on first apply-finish (no placeholder here). +last_updated: 2026-06-02T20:21:46Z diff --git a/fab/changes/260602-qn62-move-based-board-pin-sessions/intake.md b/fab/changes/260602-qn62-move-based-board-pin-sessions/intake.md new file mode 100644 index 00000000..d840922f --- /dev/null +++ b/fab/changes/260602-qn62-move-based-board-pin-sessions/intake.md @@ -0,0 +1,281 @@ +# Intake: Move-Based Server-Scoped Boards (Pin Sessions) + +**Change**: 260602-qn62-move-based-board-pin-sessions +**Created**: 2026-06-03 +**Status**: Draft + +## Origin + + + +> Initiated via `/fab-discuss` → free-form, first-principles design conversation. The user asked +> why all run-kit terminal sessions are `rk-relay-*` ephemerals and what reverting to a 1:1 +> session↔tmux mapping would imply. Through a multi-turn exploration (with live tmux probing on +> tmux 3.6a), we converged on a model that removes the relay-ephemeral isolation layer entirely. + +This was a **conversational** design session, not a one-shot. The reasoning chain (each step +verified empirically against a throwaway tmux server) was: + +1. The `rk-relay-*` ephemeral exists because tmux gives each session exactly **one active-window + pointer**, shared across all attachments to that session. run-kit is multi-viewer (two tabs, + laptop+phone, board panes), so two attachments wanting different active windows on one shared + session collide. The ephemeral grouped session gives each WebSocket its own active-window + pointer → isolation. +2. The user accepted the consequences of losing multi-client isolation (#1) and of viewer + navigation mutating the real session (#3), but wanted to keep **boards**. +3. We established (probed) that a board renders **N live panes simultaneously**, each a different + window → needs **N independent active-window pointers** → N sessions. A single shared "board + session" does NOT solve this (probe: selecting A1 then A2 in one session just moves the one + pointer). So link-based or option-based membership still requires a per-pane isolation session. +4. **Key unlock (user's idea):** remove window *sharing*. A window lives in exactly ONE place — + either a home session (SESSIONS view) or moved onto a board (BOARDS view), never both. PIN = + `move-window` (not `link-window`) into the window's OWN single-window session. With one window + per session, that session's single active-window pointer is *permanently* that window — a + viewer attaches **directly**, no isolation layer. Probed end-to-end including the dead-home + restore fallback. + +Decisions locked interactively (see Assumptions table for SRAD grades): +- Boards are **server-scoped** (`move-window` can't cross tmux servers). +- Pin **relocates** the window out of its home session (intended — this is what enables the deletion). +- Unpin restores to a **remembered home** (`@rk_home` session var), recreating the home session if it died. +- Pins are **persistent across rk restarts** (durable user intent; tmux survives restarts per Constitution VI) → **no restore-sweep needed**. + +## Why + +**Problem.** Today every terminal WebSocket attaches to a per-connection ephemeral grouped session +(`rk-relay-*`, `app/backend/api/relay.go`). This isolation layer is load-bearing but expensive in +surface area: + +- Ephemeral lifecycle + ownership stamping (`@rk_owner_pid` via `SetSessionOwnerPID`/`GetSessionOwnerPID`). +- A startup PID-liveness sweep (`cmd/rk/serve_sweep.go`: `sweepOrphanedRelaySessions`, `pidAlive`, `relayOwnerIsDead`) to reap orphans left by a crashed predecessor. +- The `rk-relay-*` filter chokepoint in `parseSessions` + the `ListRawSessionNames` escape hatch. +- A large slice of the active-window event-derivation (`260530-v6hm`) that exists *because* the + ephemeral moves the active-window pointer off the base session, leaving `#{window_active}` stale. + +Separately, board membership is a hand-rolled comma/colon encoding inside the `@rk_board` **server +option** (`::` triples), with fractional `ComputeOrderKey` ordering, +cross-server read+union, and lazy + eager stale cleanup (`RemoveAllByWindowID`, SSE poll-tick +window-kill diffing). That is a bespoke serialization layered on top of tmux state. + +**Consequence of not fixing.** The ephemeral isolation layer and the `@rk_board` encoding are two +independent subsystems that together carry the bulk of the relay/board complexity. They are the +reason the "common case" (a single terminal) cannot be the dumb thing — direct attach to a real +session. Every new board/relay feature pays the isolation + encoding tax. + +**Why this approach over alternatives (all explored and rejected in-conversation):** + +- *Revert to 1:1, drop boards.* Rejected — user wants boards. +- *1:1 terminals + keep ephemeral-per-pane for boards only.* Rejected — forks the relay into two + codepaths (`board-pane.tsx` currently shares `TerminalClient`/`/relay/{windowId}` with the normal + terminal); more code, not less; cuts against Constitution IV. +- *One `_rk-board-` session holding all pins (link-window).* Rejected — proven by probe to + still collide (one shared active-window pointer for N panes). +- *Keep ephemeral for everything (status quo).* Viable but does not achieve the "dumb common case" + goal and retains both subsystems. +- **Chosen: move-based single-location pin sessions.** Removes *sharing*, which is the root cause of + the isolation requirement. One window per pin-session → direct attach → no ephemeral. Membership + becomes "where the window lives" + two session vars → no `@rk_board` encoding. This is the most + idiomatic "tmux as source of truth" answer and the only one that lets us delete the ephemeral + while keeping boards. + +## What Changes + +### Core principle + +Every window lives in **exactly one** session at a time: + +- **In SESSIONS view** → it's a window of a normal (home) session. A viewer attaches directly to the + home session. This is the dumb common case — no ephemeral. +- **In BOARDS view** → it has been *moved* into its own single-window session `_rk-pin-`. + A board pane attaches directly to that pin-session. + +Never both. A "board" is **not a tmux session** — it is the *set of pin-sessions that share an +`@rk_board` value*. + +### Reserved naming + +- New reserved session-name prefix `_rk-pin-` (one pin-session per pinned window). Constant lives + alongside `tmux.RelaySessionPrefix`/`tmux.ControlAnchorSessionName` in `internal/tmux/tmux.go`. +- `parseSessions` gains an early-skip for `_rk-pin-` and **loses** the `rk-relay-*` skip (relay + ephemerals are gone). The `_rk-ctl` anchor skip is **unchanged**. + +### Source-of-truth model (all in tmux — Constitution II) + +| Concept | tmux representation | +|---------|---------------------| +| Pinned window | its own single-window session `_rk-pin-` | +| Board membership | session var `@rk_board=` set on the pin-session (`set-option -t @rk_board `) | +| Restore target | session var `@rk_home=` stamped at pin time | +| Order within board | session var on the pin-session (e.g. `@rk_board_order=`) | +| The window identity | unchanged `@N` — `move-window` preserves `#{window_id}` (tmux contract) | + +### Operations (full tmux surface) + +**PIN** (window `@id` on server `S`, into board ``): +``` +new-session -d -s _rk-pin-@id # creates with a placeholder window +move-window -s :@id -t _rk-pin-@id: +kill-window -t _rk-pin-@id: # leave only the moved window +set-option -t _rk-pin-@id @rk_home +set-option -t _rk-pin-@id @rk_board +set-option -t _rk-pin-@id @rk_board_order +``` +Refinement (probe-surfaced): construct so the pin-session ends with the moved window as its sole +window — no stray `new-session` placeholder left behind. + +**UNPIN** (`@id`): +``` +home = show-options -t _rk-pin-@id -v @rk_home +if has-session =home: move-window -s _rk-pin-@id:@id -t home: +else: recreate home (moved window becomes its only window — no placeholder) +kill-session _rk-pin-@id +``` + +**LIST boards** = `list-sessions` filtered to `_rk-pin-*`, grouped by `@rk_board`. +**LIST a board's pins** = pin-sessions where `@rk_board == name`, ordered by `@rk_board_order`. +**RENDER a board pane** = attach PTY **directly** to `_rk-pin-` (its sole window is permanently +active). No ephemeral, no scoped select needed. +**REORDER** = rewrite `@rk_board_order` on the pin-session (keep the existing fractional +`ComputeOrderKey`? — see Open Questions; index-within-board is no longer meaningful since each pin +is its own session, so ordering must be an explicit key). + +### Relay simplification (`app/backend/api/relay.go`) + +- Remove the per-WebSocket ephemeral allocation: no `newEphemeralRelayName`, no + `NewGroupedSession`, no `SetSessionOwnerPID` stamp, no scoped `SelectWindowInSession` on an + ephemeral, no `defer KillSessionCtx` for an ephemeral. +- The relay resolves the owning session via `ResolveWindowSession(windowID)` (kept) and attaches the + PTY directly to that session (be it a home session or a `_rk-pin-*` session). Active-window + selection for the common case becomes a direct `select-window` on the real session (the accepted + multi-client collision, #1). + +### Deletions (backend) + +- `cmd/rk/serve_sweep.go` — entire file (`sweepOrphanedRelaySessions`, `pidAlive`, + `relayOwnerIsDead`) and its wiring in `cmd/rk/serve.go`. Pins are persistent; ephemerals are gone; + nothing to sweep. (Confirm: no other orphan class needs sweeping.) +- `internal/tmux/tmux.go` — `NewGroupedSession`, `SetSessionOwnerPID`, `GetSessionOwnerPID`, + `ListRawSessionNames`, `RelaySessionPrefix`, `OwnerPIDOption`. +- `internal/tmux/board.go` — `parseBoardValue`, `serializeBoardValue`, `setBoardValue`, + `ListBoardEntries`, `ListAllBoardEntries`, `nextAppendKey`, `RemoveAllByWindowID`, and the + `@rk_board` server-option format. `ComputeOrderKey` MAY survive if reused for `@rk_board_order` + (see Open Questions). +- `api/sse.go` — eager board-cleanup (`BoardEntriesFetcher.RemoveAllByWindowID`, the per-tick + window-kill diff that emits `board-changed {cleanup}`) and the `@rk_board` bootstrap broadcast. + Board membership now changes only via explicit pin/unpin (which emit their own SSE), and a killed + pinned window simply empties+removes its pin-session. + +### Additions (backend) + +- `_rk-pin-` prefix constant + `parseSessions` skip. +- Pin/unpin/list/reorder rewritten in `internal/tmux/board.go` + `api/boards.go` around + `new-session`/`move-window`/`kill-session` + the `@rk_board`/`@rk_home`/`@rk_board_order` session + vars. `windowExistsOnServer` (kept) still gates pin. +- Recreate-home fallback for a dead `@rk_home` on unpin. + +### Frontend + +- `src/components/board/board-pane.tsx` — still renders ``; + the windowId now resolves to a `_rk-pin-*` session server-side, transparent to the component. +- `src/api/boards.ts`, `src/hooks/use-boards.ts` — board list/pin/unpin/reorder contracts updated to + the new derivation (server-scoped; no cross-server union; ordering via the new key). SESSIONS + sidebar must not show pinned windows (they're physically moved out — already true once the home + session no longer contains them; verify the sidebar reflects the post-move session list). + +### Investigate (NOT assumed deletable) + +The active-window event-derivation subsystem (`internal/tmuxctl/*`, `260530-v6hm`, ~10 files) is +**also** driven by the `_rk-ctl` control-mode anchor and serves the SESSIONS sidebar highlight +*independent of boards*. It is **not** purely downstream of the ephemeral. Scope: *investigate* +whether it simplifies once nothing moves the active pointer off the base session for the common +case — but do **not** assume removal. The `_rk-ctl` anchor and the `exit-empty off` backstop +(`260602-a1wo`) are **unaffected and must stay** (Constitution VI). + +## Affected Memory + +- `run-kit/tmux-sessions`: (modify) — §"Per-WebSocket Ephemeral Grouped Sessions (`rk-relay-*`)" + largely removed; add §"Pin Sessions (`_rk-pin-*`)" describing the move-based board model; replace + §"`@rk_board` — Pane Board Membership"; update the §"Startup sweep" content (sweep removed); update + the Server-Scoped User Options table (drop `@rk_owner_pid`/`@rk_board` server-option rows, add + `@rk_board`/`@rk_home`/`@rk_board_order` session-scoped rows on pin-sessions). +- `run-kit/architecture`: (modify) — §"Boards Feature" (server-scoped, move-based), §"Terminal Relay" + (direct attach, no ephemeral), Data Model `@rk_board` row, caching/SSE board-cleanup notes. +- `run-kit/ui-patterns`: (modify) — board pane rendering; SESSIONS-vs-BOARDS exclusivity (a pinned + window leaves its home session's tab list). + +## Impact + +- **Backend**: `app/backend/api/relay.go`, `api/boards.go`, `api/sse.go`, + `internal/tmux/tmux.go`, `internal/tmux/board.go`, `cmd/rk/serve_sweep.go` (delete), + `cmd/rk/serve.go` (unwire sweep). Tests: `internal/tmux/board_test.go`, + `internal/tmux/socketsweep_test.go`, `api/*_test.go` for boards/relay/sse. +- **Frontend**: `src/components/board/*`, `src/api/boards.ts`, `src/hooks/use-boards.ts`, plus their + `.test.ts(x)` and any `*.spec.ts`/`*.spec.md` board e2e companions (Constitution Test Companion Docs). +- **Constitution checkpoints**: §I (all new tmux calls via `exec.CommandContext` + timeout, validate + `@id`/board names — reuse `ValidWindowID`/`ValidBoardName`); §II (no DB — membership stays derived + from tmux sessions + vars; flag the philosophical shift from "tiny derived option value" to "tmux + session structure as the record" for review); §IV (one rendering path; fewer subsystems); §VI + (`_rk-ctl` anchor + `exit-empty off` untouched; pins survive restarts); §IX (mutations stay POST). +- **Behavioral changes the user accepted**: multi-client active-window collisions on a shared real + session; viewer navigation mutates the real session's active window; pinned windows disappear from + their home session until unpinned. + +## Open Questions + +All four open questions were resolved during the 2026-06-03 clarify session (see +`## Clarifications`). Resolutions, with the lowest-surface choice taken in each case: + +- **Ordering key** → **Reuse the fractional `ComputeOrderKey`**, stored per pin-session in the + `@rk_board_order` session var. Reorder rewrites exactly one var; no renumbering of sibling pins; + preserves drag-to-insert-between. `ComputeOrderKey` is therefore the one piece of the old + `board.go` that survives the rewrite. + +- **Empty-board semantics** → **No empty boards** — a board is purely the set of `_rk-pin-*` + sessions sharing an `@rk_board` value; when the last pin is unpinned its pin-session is killed and + the board is no longer listed. No board-name registry, no placeholder. Matches today's behavior. + +- **Pin home session's *only* window** → **Empty home session persists.** `exit-empty off` + (already set, `260602-a1wo`) keeps the now-empty home session alive; unpin later finds it via + `@rk_home` and moves the window back. No special-casing on pin. Consistent with the session-floor + model. + +- **Multi-unpin placement** → **Append in unpin order.** Each unpin appends the window to its home + session at tmux's auto-assigned next index — no stored original index, no slot reconstruction. + Return order reflects unpin order, not original tab position. + + +## Clarifications + +### Session 2026-06-03 (bulk confirm) + +| # | Action | Detail | +|---|--------|--------| +| 6 | Confirmed | — | +| 7 | Confirmed | — | +| 8 | Confirmed | — | + +### Session 2026-06-03 (open questions) + +| Q | Question | Answer | +|---|----------|--------| +| 1 | Pin ordering storage | Reuse fractional `ComputeOrderKey` in `@rk_board_order` (one var per reorder, no renumber) — resolves Tentative #9 | +| 2 | Empty-board semantics | No empty boards — board derives from pin-sessions; vanishes on last unpin; no name registry | +| 3 | Pinning a home session's only window | Empty home session persists via `exit-empty off`; unpin restores via `@rk_home` | +| 4 | Multi-unpin placement | Append in unpin order at next index; no stored original position | + +## Assumptions + +| # | Grade | Decision | Rationale | Scores | +|---|-------|----------|-----------|--------| +| 1 | Certain | Boards are server-scoped; `move-window` can't cross tmux servers, so a pin-session lives on its window's server and cross-server union logic is removed | User explicitly decided server-scoped this session; tmux constraint is hard (verified `move-window`/`link-window` are server-local) | S:98 R:70 A:95 D:95 | +| 2 | Certain | PIN moves the window out of its home session (it disappears from SESSIONS until unpinned) | User explicitly confirmed "Yes — that's the point"; it is the mechanism that removes sharing and enables deleting the ephemeral | S:98 R:55 A:90 D:95 | +| 3 | Certain | UNPIN restores to the `@rk_home` session var; if home was killed, recreate it (moved window as its only window) | User selected "Remember home session" with recreate fallback; probed end-to-end including dead-home recreate | S:95 R:65 A:90 D:90 | +| 4 | Certain | Pins persist across rk restarts → no restore-sweep; the relay startup sweep is deleted | User decided pins are persistent; tmux survives restarts (Constitution VI); a persisted pin is valid state, not an orphan | S:95 R:60 A:85 D:90 | +| 5 | Certain | Each pinned window is its OWN single-window session (`_rk-pin-`), NOT co-located in one per-board session | Proven by live tmux probe: one session has one active-window pointer, so N visible panes require N sessions; co-location collides | S:95 R:75 A:98 D:95 | +| 6 | Certain | Membership = `@rk_board`/`@rk_home`/`@rk_board_order` session vars on pin-sessions; the `@rk_board` server-option encoding (+ fractional cross-server union, lazy/eager cleanup) is removed | Clarified — user confirmed | S:95 R:55 A:80 D:80 | +| 7 | Certain | The relay attaches the PTY directly to the resolved session (home or pin); the `rk-relay-*` ephemeral path and `@rk_owner_pid` stamping are removed | Clarified — user confirmed | S:95 R:50 A:85 D:85 | +| 8 | Certain | The `tmuxctl/` active-window event-derivation is scoped as "investigate, likely simplifiable" — NOT assumed deletable; `_rk-ctl` anchor + `exit-empty off` backstop stay | Clarified — user confirmed | S:95 R:60 A:80 D:75 | +| 9 | Certain | Board ordering reuses the existing fractional `ComputeOrderKey`, stored in `@rk_board_order` per pin-session | Clarified — user confirmed | S:95 R:80 A:60 D:55 | + +9 assumptions (9 certain, 0 confident, 0 tentative, 0 unresolved). diff --git a/fab/changes/260602-qn62-move-based-board-pin-sessions/plan.md b/fab/changes/260602-qn62-move-based-board-pin-sessions/plan.md new file mode 100644 index 00000000..453ed34a --- /dev/null +++ b/fab/changes/260602-qn62-move-based-board-pin-sessions/plan.md @@ -0,0 +1,317 @@ +# Plan: Move-Based Server-Scoped Boards (Pin Sessions) + +**Change**: 260602-qn62-move-based-board-pin-sessions +**Status**: In Progress +**Intake**: `intake.md` + +## Requirements + +### Naming & Session Model: Pin Sessions + +#### R1: Reserved `_rk-pin-` prefix and window-id-derived pin-session names +The tmux layer SHALL define a reserved session-name prefix `_rk-pin-` (constant `PinSessionPrefix`) +alongside `ControlAnchorSessionName` in `internal/tmux/tmux.go`. Each pinned window lives in exactly +one single-window session whose name is derived deterministically from the window's `@N` id by +stripping the leading `@` (tmux session names disallow `@`), e.g. `@42` → `_rk-pin-42`. A pure helper +SHALL map a window id to its pin-session name and back (and validate the derived name). + +- **GIVEN** a window id `@42` +- **WHEN** its pin-session name is derived +- **THEN** the result is `_rk-pin-42` +- **AND** the inverse maps `_rk-pin-42` back to window id `@42` + +#### R2: `parseSessions` skips `_rk-pin-*`, no longer skips `rk-relay-*` +`parseSessions` (the single chokepoint feeding every user-facing session list) SHALL early-skip any +session whose name has the `_rk-pin-` prefix, and SHALL NO LONGER skip `rk-relay-*` (relay ephemerals +are removed). The `_rk-ctl` anchor skip SHALL remain unchanged. + +- **GIVEN** a `list-sessions` output containing `dev`, `_rk-pin-42`, and `_rk-ctl` +- **WHEN** `parseSessions` runs +- **THEN** only `dev` is returned (pin-session and anchor filtered) +- **AND** a `rk-relay-xxxx` name, if present, is NOT filtered (the relay-skip is gone) + +### Board Membership: tmux-derived, no `@rk_board` server-option encoding + +#### R3: Membership stored as session vars on pin-sessions +Board membership SHALL be derived entirely from `_rk-pin-*` sessions and their session-scoped user +options: `@rk_board=` (which board), `@rk_home=` (restore target), and +`@rk_board_order=` (order within board). The comma/colon `@rk_board` server-option encoding +(`parseBoardValue`/`serializeBoardValue`/`setBoardValue`/`BoardOption`), fractional cross-server +union (`ListAllBoardEntries`), `nextAppendKey`, and stale-cleanup (`RemoveAllByWindowID`, +`GetBoard` write-back) SHALL be removed. `ComputeOrderKey` SHALL survive and be reused for +`@rk_board_order`. + +- **GIVEN** a pin-session `_rk-pin-42` with `@rk_board=main`, `@rk_home=dev`, `@rk_board_order=m` +- **WHEN** board membership is listed +- **THEN** the entry `{server, windowId:@42, board:main, orderKey:m}` is derived from the session vars +- **AND** no `@rk_board` server option is read or written anywhere + +#### R4: Boards are server-scoped; board list derives from pin-sessions +`ListBoards` SHALL enumerate pin-sessions per reachable server, group by `@rk_board`, and return an +alphabetical `[]BoardSummary` with per-board pin counts. A board exists only while at least one +pin-session carries its `@rk_board` value (no empty boards, no name registry). `GetBoard(name)` SHALL +return the entries whose `@rk_board == name`, sorted by `@rk_board_order`, with NO stale write-back. + +- **GIVEN** pin-sessions `_rk-pin-1 (@rk_board=main)`, `_rk-pin-2 (@rk_board=main)`, `_rk-pin-3 (@rk_board=deploy)` +- **WHEN** `ListBoards` runs +- **THEN** it returns `[{deploy,1},{main,2}]` (alphabetical) +- **AND** unpinning the last `deploy` pin removes `deploy` from the list with no placeholder + +### Pin / Unpin / Reorder operations (move-based) + +#### R5: PIN moves the window into its own pin-session +`Pin(server, windowID, board)` SHALL: resolve the window's current (home) session; create +`_rk-pin-`; `move-window` the window into it so the moved window is the pin-session's sole window +(no stray placeholder); and stamp `@rk_home`, `@rk_board`, and `@rk_board_order` (a fresh append key +via `ComputeOrderKey` over the board's existing keys). PIN SHALL be idempotent: if the pin-session +already exists for that window, it is a no-op (no re-move, no order-key churn). All tmux calls SHALL +use `exec.CommandContext` + `context.WithTimeout(ctx, TmuxTimeout)` via the `internal/tmux` exec +helpers, with `ValidWindowID`/`ValidBoardName` validated before use (Constitution §I). + +- **GIVEN** window `@42` in home session `dev` and board `main` with existing pin keyed `m` +- **WHEN** `Pin(server, "@42", "main")` runs +- **THEN** `_rk-pin-42` exists holding only `@42`, with `@rk_home=dev`, `@rk_board=main`, and an order key `> m` +- **AND** `dev` no longer contains `@42` +- **AND** a second identical `Pin` call leaves the pin-session and its order key unchanged + +#### R6: UNPIN moves the window back to its remembered home, recreating it if dead +`Unpin(server, windowID, board)` SHALL: read `@rk_home` from `_rk-pin-`; if that home session +exists, `move-window` the window back into it (tmux appends at the next index); else recreate the +home session so the moved window becomes its only window (no placeholder); then `kill-session` the +now-empty pin-session. UNPIN SHALL be idempotent: a missing pin-session is a silent success. + +- **GIVEN** pinned window `@42` in `_rk-pin-42` with `@rk_home=dev` +- **WHEN** `Unpin(server, "@42", "main")` runs and `dev` still exists +- **THEN** `@42` is appended back into `dev` at the next index and `_rk-pin-42` is killed +- **AND** when `dev` was killed first, it is recreated with `@42` as its only window + +#### R7: REORDER rewrites exactly one `@rk_board_order` var +`Reorder` SHALL compute a new order key strictly between the supplied neighbours (via the surviving +`ComputeOrderKey`) and write it to the pin-session's `@rk_board_order` only — no sibling renumbering. + +- **GIVEN** pinned windows with keys `m` and `t` on board `main` +- **WHEN** a window is reordered between them +- **THEN** only that window's `@rk_board_order` changes to a key strictly between `m` and `t` + +### Relay: direct attach, no ephemeral + +#### R8: Relay attaches the PTY directly to the resolved owning session +`handleRelay` SHALL resolve the owning session via `ResolveWindowSession(windowID)` (home or +`_rk-pin-*`) and attach the PTY directly to that session — removing per-WebSocket ephemeral +allocation (`newEphemeralRelayName`, `NewGroupedSession`), the `@rk_owner_pid` stamp +(`SetSessionOwnerPID`), the scoped `SelectWindowInSession` on the ephemeral, and the deferred +ephemeral `KillSessionCtx`. Active-window selection for the common case becomes a direct +`select-window` on the real session. `ResolveWindowSession` SHALL stop filtering `rk-relay-*` +(ephemerals are gone) so a window living in a `_rk-pin-*` session resolves to that pin-session. + +- **GIVEN** a relay connection for window `@42` whose owning session is `dev` +- **WHEN** the WebSocket connects +- **THEN** the PTY attaches directly to `dev` (no `rk-relay-*` session is created) +- **AND** a relay for a pinned window `@42` attaches directly to `_rk-pin-42` + +### SSE: no board cleanup, no bootstrap broadcast + +#### R9: SSE drops eager board cleanup and the board bootstrap broadcast +The SSE hub SHALL remove the per-tick window-kill diff that emitted `board-changed {cleanup}` (and +the `RemoveAllByWindowID` dependency on `BoardEntriesFetcher`) and the `@rk_board` bootstrap +broadcast (`broadcastBoardBootstrap`, `previousBoardJSON`, first-poll board read). Board membership +changes SHALL be surfaced only via the explicit pin/unpin/reorder `board-changed` events the handlers +already emit; a killed pinned window simply empties and removes its pin-session (observed by the +frontend's existing refetch on the next session-list change). + +- **GIVEN** the SSE poll loop is running with board entries present +- **WHEN** a pinned window is killed externally +- **THEN** no `board-changed {cleanup}` event is emitted and no `RemoveAllByWindowID` call is made +- **AND** no `board-changed {bootstrap}` event is emitted on first poll + +### Startup: no relay sweep + +#### R10: Delete the relay startup sweep +`cmd/rk/serve_sweep.go` (`sweepOrphanedRelaySessions`, `pidAlive`, `relayOwnerIsDead`) and its wiring +in `cmd/rk/serve.go` SHALL be deleted. Pins are persistent across rk restarts (Constitution §VI); +ephemerals are gone; there is no orphan class to sweep. `ListRawSessionNames`, `RelaySessionPrefix`, +`OwnerPIDOption`, `NewGroupedSession`, `SetSessionOwnerPID`, `GetSessionOwnerPID` SHALL be removed +from `internal/tmux/tmux.go`. The `_rk-ctl` anchor and `exit-empty off` backstop SHALL stay untouched. + +- **GIVEN** an `rk serve` start +- **WHEN** the process boots +- **THEN** no relay sweep runs and no `rk-relay-*` reaping is attempted +- **AND** existing `_rk-pin-*` sessions from a prior run are left intact (persistent pins) + +### Frontend: transparent pin-session resolution + +#### R11: API response shape unchanged; frontend board contracts updated for server-scoping +The `GET /api/boards/{name}` response (`BoardEntryResponse`: server, windowId, session, windowIndex, +windowName, orderKey, panes) SHALL keep its existing field shape so `board-pane.tsx` and +`board-page.tsx` need no structural change — the `windowId` now resolves to a `_rk-pin-*` session +server-side, transparent to the component. `src/api/boards.ts` and `src/hooks/use-boards.ts` SHALL +reflect the new derivation in comments/contract (server-scoped; membership from pin-sessions). The +SESSIONS sidebar SHALL NOT show pinned windows (already true once the home session no longer contains +the moved window). All mutating endpoints SHALL stay POST (Constitution §IX). + +- **GIVEN** a board with one pinned window +- **WHEN** the board page renders +- **THEN** `BoardPane` receives the same `BoardEntry` shape and renders `` +- **AND** the pinned window does not appear in its former home session's sidebar tab list + +### Non-Goals + +- Simplifying the `internal/tmuxctl/*` active-window event-derivation subsystem — intake scopes it as + *investigate only, NOT assumed deletable*. It is driven by the `_rk-ctl` anchor and serves the + SESSIONS sidebar highlight independent of boards. This change leaves it untouched. +- Any DB / persistent store (Constitution §II) — membership stays tmux-derived. +- Restore-sweep on startup — pins are durable, not orphans (Constitution §VI). + +### Design Decisions + +1. **One window per pin-session (`_rk-pin-`)**: each pinned window is moved into its own + single-window session — *Why*: a tmux session has exactly one active-window pointer, so N visible + board panes require N sessions; a direct attach to a single-window session removes the ephemeral + isolation layer entirely. — *Rejected*: one shared `_rk-board-` session (proven by probe to + collide on the single active-window pointer); keeping ephemerals for board panes only (forks the + relay into two codepaths, more code). +2. **Pin-session name derived by stripping `@`**: `@42` → `_rk-pin-42` — *Why*: deterministic, + reversible, avoids storing a name→id map; tmux session names disallow `@`. — *Rejected*: random + suffix (needs a lookup map; not derivable from the window id). +3. **Keep the API response shape stable**: the move is invisible to the frontend because the relay + resolves the owning session from the window id server-side — *Why*: minimizes frontend churn + (Constitution §IV), the windowId is the stable identity `move-window` preserves. — *Rejected*: + exposing the pin-session name to the client (leaks an implementation detail, more frontend change). +4. **Reuse `ComputeOrderKey` for `@rk_board_order`**: store one fractional key per pin-session — + *Why*: a reorder rewrites exactly one var, no sibling renumber, preserves drag-to-insert-between. + +### Deprecated Requirements + +#### `@rk_board` server-option encoding +**Reason**: replaced by per-pin-session `@rk_board`/`@rk_home`/`@rk_board_order` vars; the bespoke +comma/colon serialization, cross-server union, and lazy/eager stale cleanup are removed. +**Migration**: membership is now the set of `_rk-pin-*` sessions and their session vars. + +#### Per-WebSocket ephemeral relay grouped sessions (`rk-relay-*`) +**Reason**: single-window pin-sessions remove window *sharing*, so the isolation layer is unnecessary; +the relay attaches directly. +**Migration**: `handleRelay` attaches the PTY to the resolved owning session directly. + +#### Relay startup sweep +**Reason**: ephemerals are gone and pins are persistent — no orphan class to reap. +**Migration**: N/A (deleted). + +## Tasks + +### Phase 1: tmux layer — naming + helpers (foundation) + +- [x] T001 Add `PinSessionPrefix = "_rk-pin-"` constant and pure helpers `PinSessionName(windowID) (string, bool)` + `WindowIDFromPinSession(name) (string, bool)` in `app/backend/internal/tmux/tmux.go`, validating with `ValidWindowID`. +- [x] T002 Update `parseSessions` in `app/backend/internal/tmux/tmux.go` to early-skip `PinSessionPrefix`, remove the `RelaySessionPrefix` skip; keep the `_rk-ctl` skip. Add `parseSessions` unit-test cases (skip `_rk-pin-*`, do NOT skip `rk-relay-*`, still skip `_rk-ctl`) in `app/backend/internal/tmux/tmux_test.go`. + +### Phase 2: tmux layer — board.go rewrite (move-based) + +- [x] T003 Rewrite `app/backend/internal/tmux/board.go`: keep `BoardEntry`, `BoardSummary`, `ValidBoardName`, `ValidWindowID`, `ValidOrderKey`, `ComputeOrderKey`, `initialAppendKey`. Delete `BoardOption`, `boardEntrySep/boardFieldSep`, `parseBoardValue`, `serializeBoardValue`, `setBoardValue`, `nextAppendKey`, `ListAllBoardEntries`, `RemoveAllByWindowID`, and the `GetBoard` stale write-back. +- [x] T004 Implement pin-session-backed reads in `board.go`: `pinSessionVars(ctx, server, pinSession)` reading `@rk_board`/`@rk_home`/`@rk_board_order` via `show-options -v -t`; `ListBoardEntries(ctx, server)` enumerating `_rk-pin-*` sessions (via `list-sessions -F #{session_name}`) and deriving `[]BoardEntry`; `ListBoards(ctx)` grouping per-server entries by board (alphabetical summary); `GetBoard(ctx, name)` filtering+sorting by order key, NO write-back. +- [x] T005 Implement `Pin(ctx, server, windowID, board)` in `board.go`: validate ids; idempotent no-op if `_rk-pin-` exists; resolve home session via `ResolveWindowSession`; `new-session -d -s _rk-pin-`; `move-window` the window in; kill the placeholder window so only the moved window remains; stamp `@rk_home`/`@rk_board`/`@rk_board_order` (append key from existing board keys via `ComputeOrderKey`). All via ctx+timeout exec helpers. +- [x] T006 Implement `Unpin(ctx, server, windowID, board)` in `board.go`: idempotent no-op if pin-session absent; read `@rk_home`; if home `has-session`, `move-window` back; else recreate home with the moved window as sole window (no placeholder); `kill-session _rk-pin-`. +- [x] T007 Implement `Reorder(ctx, server, windowID, board, newOrderKey)` in `board.go`: validate, set `@rk_board_order` on `_rk-pin-` only; error if the pin-session/board does not match. +- [x] T008 Rewrite `app/backend/internal/tmux/board_test.go` for the new model: drop `parseBoardValue`/`serializeBoardValue`/round-trip/`RemoveAllByWindowID`/stale-write-back tests; keep `ValidBoardName`/`ValidWindowID`/`ValidOrderKey`/`ComputeOrderKey` tests; add integration tests (against the existing `withBoardTmux` isolated server) for Pin-moves-window, Pin-idempotent, Unpin-restores-to-home, Unpin-recreates-dead-home, Reorder-one-var, ListBoards-derives-from-pin-sessions, empty-board-vanishes. + +### Phase 3: API layer — relay, router interface, sse + +- [x] T009 Update `ResolveWindowSession` in `app/backend/internal/tmux/tmux.go` to stop filtering `RelaySessionPrefix` (removed); it returns the first non-empty owning session for the window id (home or `_rk-pin-*`). +- [x] T010 Rewrite `app/backend/api/relay.go` `handleRelay`: remove `newEphemeralRelayName`, `NewGroupedSession`, `SetSessionOwnerPID`, `SelectWindowInSession`-on-ephemeral, and the deferred ephemeral `KillSessionCtx`; resolve the owning session and attach the PTY directly to it; do a direct `SelectWindow` on the real session for the common case. +- [x] T011 Update `app/backend/api/router.go` `TmuxOps` interface + `prodTmuxOps`: remove `NewGroupedSession` and `SetSessionOwnerPID` methods; keep `PinBoard`/`UnpinBoard`/`ReorderBoard` wired to the rewritten `tmux.Pin`/`Unpin`/`Reorder` (ReorderBoard still uses `lookupNeighbourKeys` + `ComputeOrderKey`). +- [x] T012 Update `app/backend/api/sse.go`: drop `RemoveAllByWindowID` from the `BoardEntriesFetcher` interface + `prodBoardEntriesFetcher`; delete `detectKilledWindowIDs`, `previousWindowIDs`, the window-kill cleanup loop, `broadcastBoardBootstrap`, `boardBootstrapPayload`, `previousBoardJSON`, the board-bootstrap first-poll read, and the `addClient` cached-board send. Keep `broadcastBoardChanged` (pin/unpin/reorder) and the `"cleanup"`/`"bootstrap"` strings only where still emitted (none). +- [x] T013 Update `app/backend/cmd/rk/serve.go`: remove the `sweepOrphanedRelaySessions` call + its ctx and the explanatory comments referencing the sweep ordering. Delete `app/backend/cmd/rk/serve_sweep.go` and `app/backend/cmd/rk/serve_sweep_test.go`. +- [x] T014 Remove `RelaySessionPrefix`, `OwnerPIDOption`, `NewGroupedSession`, `SetSessionOwnerPID`, `GetSessionOwnerPID`, `ListRawSessionNames` from `app/backend/internal/tmux/tmux.go`; update `baseGroupName`/`parseActiveWindowsByGroup`/`realSessionNameSet` references to `RelaySessionPrefix` (these supported relay-group derivation — adjust to filter only the `_rk-ctl` anchor, since relays no longer exist). + +### Phase 4: Test fixups (Go) — mocks, deleted-symbol tests + +- [x] T015 Update `app/backend/api/sessions_test.go` `mockTmuxOps`: remove `NewGroupedSession`/`SetSessionOwnerPID` methods + recorded fields; remove `RemoveAllByWindowID` from `stubBoardFetcher` if the interface no longer requires it. +- [x] T016 Update `app/backend/api/relay_test.go`: drop `TestRelay_EphemeralCleanupOnClose` and `TestRelay_OwnerStampFailureAbortsClean` (ephemeral path removed); rewrite `TestRelay_TwoWindowsTwoRelaysDistinctOutput` to assert each relay attaches directly to its window's session (no `rk-relay-*` created); keep `TestRelay_PercentEncodedAtNot400` and `TestRelay_MissingWindowClose4004` (adjust the latter's `ListRawSessionNames` leak-check, which is removed, to a `ListSessions`/`list-sessions`-based no-pin/no-relay assertion). +- [x] T017 Update `app/backend/api/sse_test.go`: drop `TestSSE_BoardChangedCachedOnConnect`, `TestSSE_BoardBootstrapReadsTmuxOnFirstPoll`, `TestSSE_WindowKillEmitsBoardCleanup`, the `killTrackingFetcher`, and the `stubBoardFetcher.RemoveAllByWindowID` method; keep the pin/unpin/reorder broadcast coverage in `boards_test.go` untouched. +- [x] T018 Update/delete `app/backend/api/socketsweep_test.go` and `app/backend/internal/tmux/socketsweep_test.go` only as needed: these test the rk-test-* socket reaper (TestMain post-sweep), NOT the relay sweep — keep them unless they reference removed symbols (`ListRawSessionNames`, `RelaySessionPrefix`). Adjust any reference to removed symbols. + +### Phase 5: Frontend + +- [x] T019 Update `app/frontend/src/api/boards.ts` doc comments to reflect server-scoped, pin-session-derived membership (no cross-server union); keep the `BoardEntry`/`BoardSummary`/`ReorderResponse` types and function signatures stable. Update `app/frontend/src/hooks/use-boards.ts` comments where they assert "boards are explicitly cross-server" → server-scoped derivation; keep the SSE-refetch behavior. Adjust `app/frontend/src/api/boards.test.ts` / `use-boards.test.tsx` only if assertions reference removed cross-server semantics. +- [x] T020 Verify `app/frontend/src/components/board/board-pane.tsx` and `board-page.tsx` need no structural change (same `BoardEntry` shape); run `npx tsc --noEmit` (via just) to confirm no type drift. + +### Phase 6: e2e spec companions + +- [x] T021 Review the board e2e specs (`boards-pin-flow`, `boards-mobile`, `boards-multi-server`, `boards-same-session-multi-pane`, `boards-desktop-suspend`) under `app/frontend/tests/e2e/`: `boards-same-session-multi-pane.spec.ts` asserted the OLD multi-pane-same-session behavior (now each pin is its own session) and `boards-multi-server.spec.ts` asserted cross-server aggregation (now server-scoped) — update those `.spec.ts` to the new model and update their sibling `.spec.md` companions in the same change (Constitution Test Companion Docs). Specs that still hold (pin/unpin a window, render a live pane) stay. + +## Execution Order + +- Phase 1 (T001-T002) is the foundation: the prefix constant + helpers are used by board.go and relay. +- Phase 2 (T003-T008) depends on T001; T004 depends on T003; T005-T007 depend on T004; T008 after T005-T007. +- Phase 3 (T009-T014) depends on Phase 1-2: T009/T010 (relay) need T001; T011 (interface) needs T010; T012-T014 are independent of board.go internals but depend on the symbol removals. +- Phase 4 (T015-T018) depends on Phase 3 (interface/symbol removals). +- Phase 5-6 (T019-T021) are frontend/e2e, independent of Go internals but validated last. + +## Acceptance + +### Functional Completeness + +- [ ] A-001 R1: `PinSessionPrefix`, `PinSessionName`, and `WindowIDFromPinSession` exist in `tmux.go`; `@42` ↔ `_rk-pin-42` round-trips; invalid ids rejected. +- [ ] A-002 R2: `parseSessions` skips `_rk-pin-*` and `_rk-ctl`, does NOT skip `rk-relay-*`; covered by a unit test. +- [ ] A-003 R3: Membership is read from pin-session `@rk_board`/`@rk_home`/`@rk_board_order` vars; the old `@rk_board` *server-option encoding* is gone — no `parseBoardValue`/`serializeBoardValue`/`setBoardValue` remain. `ComputeOrderKey` is retained. +- [ ] A-004 R4: `ListBoards` derives an alphabetical per-board count from pin-sessions; `GetBoard` filters+sorts by order key with no write-back; the last unpin removes the board from the list. +- [ ] A-005 R5: `Pin` moves the window into `_rk-pin-` (sole window, no placeholder), stamps the three vars, removes it from the home session, and is idempotent. +- [ ] A-006 R6: `Unpin` restores the window to `@rk_home` (or recreates a dead home as a single-window session), kills the pin-session, and is idempotent on a missing pin-session. +- [ ] A-007 R7: `Reorder` rewrites only the target pin-session's `@rk_board_order` (strictly-between key); no sibling renumber. +- [ ] A-008 R8: `handleRelay` attaches the PTY directly to the resolved session with no `rk-relay-*` creation; `NewGroupedSession`/`SetSessionOwnerPID`/`newEphemeralRelayName`/ephemeral-`SelectWindowInSession`/ephemeral-`KillSessionCtx` are gone; `ResolveWindowSession` no longer filters `rk-relay-*`. +- [ ] A-009 R9: SSE emits no `board-changed {cleanup}` or `{bootstrap}`; `RemoveAllByWindowID`, `broadcastBoardBootstrap`, `previousBoardJSON`, `previousWindowIDs`, `detectKilledWindowIDs`, and the kill-detection loop are removed. +- [ ] A-010 R10: `serve_sweep.go` (+ test) deleted, `sweepOrphanedRelaySessions` unwired from `serve.go`; `ListRawSessionNames`/`RelaySessionPrefix`/`OwnerPIDOption`/`NewGroupedSession`/`SetSessionOwnerPID`/`GetSessionOwnerPID` removed from `tmux.go`. +- [ ] A-011 R11: `GET /api/boards/{name}` keeps the `BoardEntryResponse` field shape; `board-pane.tsx`/`board-page.tsx` unchanged structurally; pinned windows absent from the home session sidebar; mutations stay POST. + +### Behavioral Correctness + +- [ ] A-012 R5: After `Pin`, `tmux list-windows -t dev` no longer lists `@42` and `_rk-pin-42` holds exactly one window (`@42`). +- [ ] A-013 R6: After `Unpin` of a window whose home was killed, the home session is recreated with the moved window as its only window (no extra placeholder window). +- [ ] A-014 R8: Two relays to two different windows each receive only their own window's PTY output (no cross-leak) while attaching directly to the real sessions. + +### Removal Verification + +- [ ] A-015 R3/R8/R9/R10: A repo-wide grep finds no remaining references to `parseBoardValue`, `serializeBoardValue`, `setBoardValue`, `RemoveAllByWindowID`, `newEphemeralRelayName`, `NewGroupedSession`, `SetSessionOwnerPID`, `GetSessionOwnerPID`, `RelaySessionPrefix`, `OwnerPIDOption`, `ListRawSessionNames`, `sweepOrphanedRelaySessions`, `broadcastBoardBootstrap` (outside this plan/intake/memory docs). + +### Scenario Coverage + +- [ ] A-016 R5/R6/R7: Go integration tests cover Pin-moves, Pin-idempotent, Unpin-restore, Unpin-recreate-dead-home, Reorder-one-var, ListBoards-derivation, empty-board-vanishes (board_test.go). +- [ ] A-017 R8: `relay_test.go` proves direct-attach (no ephemeral created) and per-window isolation. +- [ ] A-018 R11: Board e2e specs updated for the move-based/server-scoped model with sibling `.spec.md` updated in the same change. + +### Edge Cases & Error Handling + +- [ ] A-019 R6: Unpin on a missing pin-session is a silent success (idempotent); Pin on an already-pinned window is a no-op. +- [ ] A-020 R4: An unreachable/empty server yields an empty board list (no error), consistent with the existing `isAbsentOption` tolerance. + +### Code Quality + +- [ ] A-021 Pattern consistency: New tmux funcs follow the `tmuxExecServer`/`tmuxExecRawServer` + `context.WithTimeout(ctx, TmuxTimeout)` pattern and the `killAudit` convention for any `kill-session`. +- [ ] A-022 No unnecessary duplication: Pin-session name derivation lives in one helper; `ComputeOrderKey` reused (not reimplemented); existing `ResolveWindowSession`/`windowExistsOnServer` reused. +- [ ] A-023 (Go subprocess security, §I): every new tmux call (`new-session`, `move-window`, `kill-session`, `set-option`, `show-options`, `has-session`) uses `exec.CommandContext` with a timeout context and an explicit argument slice — no shell strings; window ids validated with `ValidWindowID`, board names with `ValidBoardName` before use. +- [ ] A-024 (No magic strings, §anti-patterns): `_rk-pin-`, `@rk_board`, `@rk_home`, `@rk_board_order` are named constants in `internal/tmux`. +- [ ] A-025 (Inline tmux construction, §anti-patterns): all new tmux interaction goes through `internal/tmux/` helpers — no tmux command construction in `api/`. +- [ ] A-026 (Derive state from tmux, §principles): membership is derived from pin-sessions + session vars at request time; no in-memory cache, no DB (§II). +- [ ] A-027 (Constitution §VI): the `_rk-ctl` anchor and `exit-empty off` backstop are untouched; pins persist across restarts (no restore-sweep added). + +### Security + +- [ ] A-028 R5/R6/R7: Pin/Unpin/Reorder validate `windowID` (`ValidWindowID`) and `board` (`ValidBoardName`) before any subprocess; all mutating board endpoints remain POST and pass through the existing handler validation. + +## Notes + +- Check items as you review: `- [x]` +- All acceptance items must pass before `/fab-continue` (hydrate) +- If an item is not applicable, mark checked and prefix with **N/A**: `- [x] A-NNN **N/A**: {reason}` +- The intake's "Investigate" item (`internal/tmuxctl/*` simplification) is intentionally a Non-Goal here — left untouched per assumption #8. + +## Assumptions + +| # | Grade | Decision | Rationale | Scores | +|---|-------|----------|-----------|--------| +| 1 | Certain | Pin-session name = `_rk-pin-` + windowID with the leading `@` stripped (`@42`→`_rk-pin-42`); a pure reversible helper derives it | tmux session names disallow `@`; deterministic + reversible avoids a name→id map; intake fixes the `_rk-pin-` shape | S:90 R:80 A:90 D:85 | +| 2 | Certain | Keep the `GET /api/boards/{name}` `BoardEntryResponse` field shape stable so the frontend is structurally unchanged; the move is transparent because the relay resolves the session from the windowId server-side | Intake §Frontend: "windowId now resolves to a `_rk-pin-*` session server-side, transparent to the component"; minimizes surface (§IV) | S:90 R:75 A:90 D:85 | +| 3 | Certain | Remove BOTH the SSE board `{cleanup}` diff and the `{bootstrap}` broadcast; membership changes surface only via explicit pin/unpin/reorder events | Intake §Relay simplification + §Deletions: "eager board-cleanup ... and the `@rk_board` bootstrap broadcast" both listed for removal | S:92 R:65 A:85 D:80 | +| 4 | Confident | `baseGroupName`/`parseActiveWindowsByGroup`/`realSessionNameSet` (which referenced `RelaySessionPrefix` for relay-group derivation) are adjusted to filter only the `_rk-ctl` anchor, since `rk-relay-*` no longer exists | These helpers serve the tmuxctl active-window seed which the intake scopes as "untouched/investigate"; the minimal correct edit is to drop the now-dead relay branch while preserving anchor filtering | S:80 R:55 A:75 D:70 | +| 5 | Confident | `Pin` resolves the home session via the existing `ResolveWindowSession` rather than introducing a new lookup; the placeholder window from `new-session -d` is killed after `move-window` so the pin-session ends single-window | Intake PIN recipe explicitly notes "construct so the pin-session ends with the moved window as its sole window — no stray placeholder"; reuses an existing helper (§anti-duplication) | S:88 R:70 A:80 D:75 | +| 6 | Confident | The rk-test-* socket reaper tests (`socketsweep_test.go`, TestMain post-sweep) are KEPT; only the relay sweep (`serve_sweep.go`) is deleted — they are distinct subsystems | Intake §Deletions targets only the relay sweep; the socket reaper is the cross-run SIGKILL cleanup, unrelated to ephemerals | S:85 R:70 A:85 D:80 | + +6 assumptions (3 certain, 3 confident, 0 tentative). From 6b91c47ff471f2eb9e5ab68df2c6cc237c64a434 Mon Sep 17 00:00:00 2001 From: Sahil Ahuja Date: Wed, 3 Jun 2026 01:53:57 +0530 Subject: [PATCH 2/4] Update ship status and record PR URL --- .../.history.jsonl | 1 + .../.status.yaml | 12 +++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/fab/changes/260602-qn62-move-based-board-pin-sessions/.history.jsonl b/fab/changes/260602-qn62-move-based-board-pin-sessions/.history.jsonl index 28704f82..bc58b311 100644 --- a/fab/changes/260602-qn62-move-based-board-pin-sessions/.history.jsonl +++ b/fab/changes/260602-qn62-move-based-board-pin-sessions/.history.jsonl @@ -24,3 +24,4 @@ {"cmd":"fab-continue","event":"command","ts":"2026-06-02T20:08:59Z"} {"action":"enter","driver":"fab-fff","event":"stage-transition","stage":"ship","ts":"2026-06-02T20:21:46Z"} {"cmd":"git-pr","event":"command","ts":"2026-06-02T20:22:41Z"} +{"action":"enter","driver":"git-pr","event":"stage-transition","stage":"review-pr","ts":"2026-06-02T20:23:53Z"} diff --git a/fab/changes/260602-qn62-move-based-board-pin-sessions/.status.yaml b/fab/changes/260602-qn62-move-based-board-pin-sessions/.status.yaml index e7937413..1e1a8ed7 100644 --- a/fab/changes/260602-qn62-move-based-board-pin-sessions/.status.yaml +++ b/fab/changes/260602-qn62-move-based-board-pin-sessions/.status.yaml @@ -9,8 +9,8 @@ progress: apply: done review: done hydrate: done - ship: active - review-pr: pending + ship: done + review-pr: active plan: generated: true task_count: 21 @@ -33,8 +33,10 @@ stage_metrics: apply: {started_at: "2026-06-02T19:58:25Z", driver: fab-fff, iterations: 2, completed_at: "2026-06-02T20:08:05Z"} review: {started_at: "2026-06-02T20:08:05Z", driver: fab-fff, iterations: 1, completed_at: "2026-06-02T20:08:09Z"} hydrate: {started_at: "2026-06-02T20:08:09Z", driver: fab-fff, iterations: 1, completed_at: "2026-06-02T20:21:46Z"} - ship: {started_at: "2026-06-02T20:21:46Z", driver: fab-fff, iterations: 1} -prs: [] + ship: {started_at: "2026-06-02T20:21:46Z", driver: fab-fff, iterations: 1, completed_at: "2026-06-02T20:23:53Z"} + review-pr: {started_at: "2026-06-02T20:23:53Z", driver: git-pr, iterations: 1} +prs: + - https://github.com/sahil87/run-kit/pull/233 true_impact: added: 0 deleted: 0 @@ -46,4 +48,4 @@ true_impact: computed_at: "2026-06-02T20:21:46Z" computed_at_stage: hydrate # true_impact: lazily created on first apply-finish (no placeholder here). -last_updated: 2026-06-02T20:21:46Z +last_updated: 2026-06-02T20:23:53Z From 3bd1f04e96d55657ba77edd53930cf0800e9938f Mon Sep 17 00:00:00 2001 From: Sahil Ahuja Date: Wed, 3 Jun 2026 03:39:37 +0530 Subject: [PATCH 3/4] fix: Board render join must resolve pinned windows via their pin-session MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit handleBoardGet built its live-window join by scanning the user-facing ListSessions, which the move-based board change taught to hide _rk-pin-* sessions. Since pinned windows now LIVE in those hidden sessions, the join matched nothing and GET /api/boards/{name} returned [] — the board rendered zero panes (.xterm count 0). Fixed by joining each board entry against its own pin-session directly via tmux.PinSessionName(windowID) -> ListWindows, a by-name target query not subject to the session-list filter. Also fix windowExistsOnServer to check the pin-session, so re-pinning an already-pinned window to a different board reaches tmux.Pin's re-stamp path instead of a spurious 404. Adds a session-aware mock (listWindowsBySession) and a regression test asserting the board renders its entry when the window lives only in its pin-session — the integration gap that let CI catch what unit mocks could not. Verified: just test-backend green; the 6 previously-failing board/relay e2e specs (boards-pin-flow, boards-same-session-multi-pane, boards-multi-server, boards-mobile, boards-desktop-suspend, shell-rotation) now pass locally. --- app/backend/api/boards.go | 97 ++++++++++--------- app/backend/api/boards_test.go | 53 +++++++++- app/backend/api/sessions_test.go | 12 ++- .../.history.jsonl | 2 + .../.status.yaml | 8 +- .../plan.md | 4 + 6 files changed, 121 insertions(+), 55 deletions(-) diff --git a/app/backend/api/boards.go b/app/backend/api/boards.go index 5cd1e304..808b9821 100644 --- a/app/backend/api/boards.go +++ b/app/backend/api/boards.go @@ -50,61 +50,45 @@ func (s *Server) handleBoardGet(w http.ResponseWriter, r *http.Request) { } out := make([]BoardEntryResponse, 0, len(entries)) - // Build per-server windowID -> {session, WindowInfo} maps once so the - // per-entry join below is O(1) instead of O(sessions × windows). - type windowMatch struct { - session string - info tmux.WindowInfo - } - byServer := make(map[string]map[string]windowMatch) - - // First pass: fetch all sessions per server, populating the per-server - // windowID lookup as we go. - serversNeeded := make(map[string]struct{}) - for _, e := range entries { - serversNeeded[e.Server] = struct{}{} - } - for srv := range serversNeeded { - sessions, sErr := s.tmux.ListSessions(r.Context(), srv) - if sErr != nil { - continue - } - serverIndex := make(map[string]windowMatch) - for _, sess := range sessions { - windows, wErr := s.tmux.ListWindows(r.Context(), sess.Name, srv) - if wErr != nil { - continue - } - for _, win := range windows { - // First win wins — duplicate windowIDs across sessions on the - // same tmux server should not occur. - if _, exists := serverIndex[win.WindowID]; exists { - continue - } - serverIndex[win.WindowID] = windowMatch{session: sess.Name, info: win} - } - } - byServer[srv] = serverIndex - } - + // Join each board entry with live window data. A pinned window has been MOVED + // into its own single-window pin-session `_rk-pin-`, so the window lives + // inside a session that the user-facing `ListSessions`/`parseSessions` path + // deliberately HIDES (the `_rk-pin-` skip). Enumerating via `ListSessions` + // would therefore never find the pinned window and the board would render + // empty (the CI/e2e failure this replaced). Instead, look the window up in its + // OWN pin-session directly: the entry's WindowID maps deterministically to its + // pin-session name, and `ListWindows -t ` is a by-name target query + // that is NOT subject to the session-list filter. O(entries) targeted lookups. for _, e := range entries { - serverIndex, ok := byServer[e.Server] + pinSession, ok := tmux.PinSessionName(e.WindowID) if !ok { + // Malformed window id (should not occur — entries come from pin + // sessions) — skip defensively. continue } - match, ok := serverIndex[e.WindowID] - if !ok { - // Window vanished between GetBoard and the join — skip. + windows, wErr := s.tmux.ListWindows(r.Context(), pinSession, e.Server) + if wErr != nil || len(windows) == 0 { + // Pin-session vanished between GetBoard and the join (window/pin + // killed) — skip; the board simply shows one fewer pane. continue } + // A pin-session holds exactly one window — its sole window IS the pinned + // window. Match by WindowID defensively in case of an unexpected extra. + win := windows[0] + for _, w := range windows { + if w.WindowID == e.WindowID { + win = w + break + } + } out = append(out, BoardEntryResponse{ Server: e.Server, WindowID: e.WindowID, - Session: match.session, - WindowIndex: match.info.Index, - WindowName: match.info.Name, + Session: pinSession, + WindowIndex: win.Index, + WindowName: win.Name, OrderKey: e.OrderKey, - Panes: match.info.Panes, + Panes: win.Panes, }) } // Stable sort by orderKey to preserve the GetBoard ordering after the join. @@ -279,9 +263,28 @@ func (s *Server) handleBoardReorder(w http.ResponseWriter, r *http.Request) { writeJSON(w, http.StatusOK, map[string]interface{}{"ok": true, "newOrderKey": newKey}) } -// windowExistsOnServer scans every session on the server and returns true if -// the supplied windowID matches a live window. +// windowExistsOnServer returns true if the supplied windowID matches a live +// window on the server — whether the window is in a normal (home) session OR +// already moved into its own pin-session. +// +// The pin-session must be checked explicitly: `ListSessions`/`parseSessions` +// HIDES `_rk-pin-*` sessions, so a window that is ALREADY pinned would be +// invisible to the home-session scan alone. Without the pin-session check, a +// re-pin of an already-pinned window (e.g. moving it to a different board) would +// be rejected 404 before reaching tmux.Pin's wrong-board re-stamp path. func (s *Server) windowExistsOnServer(r *http.Request, server, windowID string) bool { + // Fast path: the window's own pin-session (by-name target, not subject to the + // session-list filter). If present, the window is already pinned and live. + if pinSession, ok := tmux.PinSessionName(windowID); ok { + if windows, err := s.tmux.ListWindows(r.Context(), pinSession, server); err == nil { + for _, w := range windows { + if w.WindowID == windowID { + return true + } + } + } + } + // Otherwise scan the visible (home) sessions. sessions, err := s.tmux.ListSessions(r.Context(), server) if err != nil { return false diff --git a/app/backend/api/boards_test.go b/app/backend/api/boards_test.go index cc2ec8a6..c1fcc088 100644 --- a/app/backend/api/boards_test.go +++ b/app/backend/api/boards_test.go @@ -57,13 +57,16 @@ func TestBoards_GET_aggregateAcrossServers(t *testing.T) { } func TestBoard_GET_byName(t *testing.T) { + // In the move-based model a pinned window lives in its own `_rk-pin-` + // session (the handler joins live window data from there, not from a home + // session). The pinned window @1234 → pin-session `_rk-pin-1234`. ops := &mockTmuxOps{ getBoardResult: []tmux.BoardEntry{ {Server: "default", WindowID: "@1234", Board: "main", OrderKey: "a"}, }, listSessionsResult: []tmux.SessionInfo{{Name: "dev"}}, - listWindowsResult: []tmux.WindowInfo{ - {Index: 2, WindowID: "@1234", Name: "agent"}, + listWindowsBySession: map[string][]tmux.WindowInfo{ + "_rk-pin-1234": {{Index: 0, WindowID: "@1234", Name: "agent"}}, }, } router := newTestRouter(&mockSessionFetcher{}, ops) @@ -83,11 +86,55 @@ func TestBoard_GET_byName(t *testing.T) { t.Fatalf("got %d entries, want 1", len(got)) } g := got[0] - if g.WindowID != "@1234" || g.Session != "dev" || g.WindowIndex != 2 || g.WindowName != "agent" || g.OrderKey != "a" { + if g.WindowID != "@1234" || g.Session != "_rk-pin-1234" || g.WindowName != "agent" || g.OrderKey != "a" { t.Errorf("got %+v", g) } } +// TestBoard_GET_byName_windowInPinSession is the regression test for the +// CI/e2e failure where a pinned board rendered EMPTY. In the move-based model a +// pinned window is moved into its own `_rk-pin-` session, which the +// user-facing ListSessions/parseSessions path HIDES. handleBoardGet must look the +// window up in its pin-session directly — NOT by scanning ListSessions, which +// would never find it and drop every entry. Here the home session list contains +// only an unrelated empty session; the pinned window @1234 lives ONLY under +// `_rk-pin-1234`. The join must still return the entry with live window data. +func TestBoard_GET_byName_windowInPinSession(t *testing.T) { + ops := &mockTmuxOps{ + getBoardResult: []tmux.BoardEntry{ + {Server: "default", WindowID: "@1234", Board: "main", OrderKey: "a"}, + }, + // Home sessions visible to ListSessions do NOT contain @1234 — it was + // moved out into its pin-session (which ListSessions hides). A scan of + // these would find nothing. + listSessionsResult: []tmux.SessionInfo{{Name: "dev"}}, + listWindowsBySession: map[string][]tmux.WindowInfo{ + "dev": {{Index: 0, WindowID: "@9", Name: "other"}}, + "_rk-pin-1234": {{Index: 0, WindowID: "@1234", Name: "agent"}}, + }, + } + router := newTestRouter(&mockSessionFetcher{}, ops) + + req := httptest.NewRequest(http.MethodGet, "/api/boards/main", nil) + rec := httptest.NewRecorder() + router.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("status = %d, body=%s", rec.Code, rec.Body.String()) + } + var got []BoardEntryResponse + if err := json.Unmarshal(rec.Body.Bytes(), &got); err != nil { + t.Fatal(err) + } + if len(got) != 1 { + t.Fatalf("got %d entries, want 1 (board must NOT render empty when the window lives in its pin-session); body=%s", len(got), rec.Body.String()) + } + g := got[0] + if g.WindowID != "@1234" || g.Session != "_rk-pin-1234" || g.WindowName != "agent" || g.OrderKey != "a" { + t.Errorf("got %+v, want WindowID=@1234 Session=_rk-pin-1234 WindowName=agent OrderKey=a", g) + } +} + func TestBoard_GET_invalidName_400(t *testing.T) { ops := &mockTmuxOps{} router := newTestRouter(&mockSessionFetcher{}, ops) diff --git a/app/backend/api/sessions_test.go b/app/backend/api/sessions_test.go index dd7a989d..3ee674fa 100644 --- a/app/backend/api/sessions_test.go +++ b/app/backend/api/sessions_test.go @@ -74,7 +74,14 @@ type mockTmuxOps struct { listWindowsResult []tmux.WindowInfo listWindowsErr error - listSessionsResult []tmux.SessionInfo + // listWindowsBySession, when non-nil, makes ListWindows session-aware: + // it returns the windows mapped to the queried session name (empty slice + // for an unmapped session). This is required to faithfully model the + // move-based board world, where a pinned window lives ONLY in its + // `_rk-pin-` session and NOT in any home session — the flat + // listWindowsResult (returned for every session) cannot express that. + listWindowsBySession map[string][]tmux.WindowInfo + listSessionsResult []tmux.SessionInfo listServersResult []string resolveWindowSessionResult string @@ -244,6 +251,9 @@ func (m *mockTmuxOps) SendKeys(windowID, keys, server string) error { return m.err } func (m *mockTmuxOps) ListWindows(ctx context.Context, session, server string) ([]tmux.WindowInfo, error) { + if m.listWindowsBySession != nil { + return m.listWindowsBySession[session], m.listWindowsErr + } return m.listWindowsResult, m.listWindowsErr } func (m *mockTmuxOps) ResolveWindowSession(ctx context.Context, server, windowID string) (string, error) { diff --git a/fab/changes/260602-qn62-move-based-board-pin-sessions/.history.jsonl b/fab/changes/260602-qn62-move-based-board-pin-sessions/.history.jsonl index bc58b311..b58421b8 100644 --- a/fab/changes/260602-qn62-move-based-board-pin-sessions/.history.jsonl +++ b/fab/changes/260602-qn62-move-based-board-pin-sessions/.history.jsonl @@ -25,3 +25,5 @@ {"action":"enter","driver":"fab-fff","event":"stage-transition","stage":"ship","ts":"2026-06-02T20:21:46Z"} {"cmd":"git-pr","event":"command","ts":"2026-06-02T20:22:41Z"} {"action":"enter","driver":"git-pr","event":"stage-transition","stage":"review-pr","ts":"2026-06-02T20:23:53Z"} +{"cmd":"git-pr-review","event":"command","ts":"2026-06-02T20:25:08Z"} +{"event":"review","result":"passed","ts":"2026-06-02T20:25:37Z"} diff --git a/fab/changes/260602-qn62-move-based-board-pin-sessions/.status.yaml b/fab/changes/260602-qn62-move-based-board-pin-sessions/.status.yaml index 1e1a8ed7..d328b27f 100644 --- a/fab/changes/260602-qn62-move-based-board-pin-sessions/.status.yaml +++ b/fab/changes/260602-qn62-move-based-board-pin-sessions/.status.yaml @@ -10,10 +10,10 @@ progress: review: done hydrate: done ship: done - review-pr: active + review-pr: done plan: generated: true - task_count: 21 + task_count: 22 acceptance_count: 28 acceptance_completed: 0 confidence: @@ -34,7 +34,7 @@ stage_metrics: review: {started_at: "2026-06-02T20:08:05Z", driver: fab-fff, iterations: 1, completed_at: "2026-06-02T20:08:09Z"} hydrate: {started_at: "2026-06-02T20:08:09Z", driver: fab-fff, iterations: 1, completed_at: "2026-06-02T20:21:46Z"} ship: {started_at: "2026-06-02T20:21:46Z", driver: fab-fff, iterations: 1, completed_at: "2026-06-02T20:23:53Z"} - review-pr: {started_at: "2026-06-02T20:23:53Z", driver: git-pr, iterations: 1} + review-pr: {started_at: "2026-06-02T20:23:53Z", driver: git-pr, iterations: 1, completed_at: "2026-06-02T20:25:37Z"} prs: - https://github.com/sahil87/run-kit/pull/233 true_impact: @@ -48,4 +48,4 @@ true_impact: computed_at: "2026-06-02T20:21:46Z" computed_at_stage: hydrate # true_impact: lazily created on first apply-finish (no placeholder here). -last_updated: 2026-06-02T20:23:53Z +last_updated: 2026-06-02T22:09:10Z diff --git a/fab/changes/260602-qn62-move-based-board-pin-sessions/plan.md b/fab/changes/260602-qn62-move-based-board-pin-sessions/plan.md index 453ed34a..267cc0bf 100644 --- a/fab/changes/260602-qn62-move-based-board-pin-sessions/plan.md +++ b/fab/changes/260602-qn62-move-based-board-pin-sessions/plan.md @@ -237,6 +237,10 @@ the relay attaches directly. - [x] T021 Review the board e2e specs (`boards-pin-flow`, `boards-mobile`, `boards-multi-server`, `boards-same-session-multi-pane`, `boards-desktop-suspend`) under `app/frontend/tests/e2e/`: `boards-same-session-multi-pane.spec.ts` asserted the OLD multi-pane-same-session behavior (now each pin is its own session) and `boards-multi-server.spec.ts` asserted cross-server aggregation (now server-scoped) — update those `.spec.ts` to the new model and update their sibling `.spec.md` companions in the same change (Constitution Test Companion Docs). Specs that still hold (pin/unpin a window, render a live pane) stay. +### Phase 7: Rework — board-render join through pin-sessions + +- [x] T022 Fix `app/backend/api/boards.go` `handleBoardGet` (and `windowExistsOnServer`) to find pinned windows in their `_rk-pin-` sessions. + ## Execution Order - Phase 1 (T001-T002) is the foundation: the prefix constant + helpers are used by board.go and relay. From 4eed733644c0c262efe52ca79ec5596c02d9596d Mon Sep 17 00:00:00 2001 From: Sahil Ahuja Date: Wed, 3 Jun 2026 03:46:13 +0530 Subject: [PATCH 4/4] fix: Harden Pin/Unpin/relay per Copilot review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address Copilot PR-review findings, several of which interacted with the earlier rework: - Pin stamps @rk_home/@rk_board/@rk_board_order BEFORE moving the window into the pin-session. The window can no longer enter a pin-session that lacks @rk_home, which eliminates the prior double-fault 'un-unpinnable' state and removes the rollback-move complexity: a stamp failure simply kills the still-empty pin-session and the window is untouched in its home. - Idempotent re-pin re-stamps @rk_board and repairs a missing @rk_board_order. It does NOT attempt to re-derive @rk_home (impossible once the window lives in its pin-session); durability is guaranteed by stamp-before-move instead. - Unpin no-ops on a board-name mismatch, so the handler never emits a false board-changed SSE event for a board the window was never on. - Unpin recovers an @rk_home-less pin-session by renaming it to recovered rather than hard-failing — a window is never stranded. - handleRelay uses session-scoped SelectWindowInSession instead of a bare SelectWindow, so the selected window matches the session it attaches to (group-safe). Adds TestUnpin_BoardMismatchIsNoOp and TestUnpin_HomelessPinRecoversWindow. just test-backend green; the 6 board/relay e2e specs pass locally. --- app/backend/api/relay.go | 17 +- app/backend/internal/tmux/board.go | 147 +++++++++++------- app/backend/internal/tmux/board_test.go | 72 +++++++++ .../.status.yaml | 4 +- .../plan.md | 4 +- 5 files changed, 177 insertions(+), 67 deletions(-) diff --git a/app/backend/api/relay.go b/app/backend/api/relay.go index d500da38..1534dcb5 100644 --- a/app/backend/api/relay.go +++ b/app/backend/api/relay.go @@ -86,11 +86,18 @@ func (s *Server) handleRelay(w http.ResponseWriter, r *http.Request) { } // Select the window on its real session so the attach renders the right - // window. The accepted tradeoff (#1 in the intake): the real session has a - // single active-window pointer shared across attachments, so multi-client - // navigation mutates the real session's active window. For a pin-session this - // is a no-op — its sole window is permanently active. - if err := s.tmux.SelectWindow(windowID, server); err != nil { + // window. Scope the select to the resolved session (`:@N`) rather + // than a bare `select-window -t @N`: a bare window-id target is ambiguous + // inside a tmux session group (members share window membership but keep + // independent active-window state, so tmux may set the active window on an + // arbitrary member), and `attach-session -t ` below attaches to THIS + // session — the two must agree. For a single-window pin-session the select is a + // no-op (its sole window is permanently active); for a multi-window home + // session it pins the active window on the same session we attach to. + // The accepted tradeoff (#1 in the intake): a home session's single + // active-window pointer is shared across attachments, so multi-client + // navigation mutates it. + if err := s.tmux.SelectWindowInSession(session, windowID, server); err != nil { slog.Error("select-window failed", "err", err, "session", session, "windowID", windowID) conn.WriteMessage(websocket.CloseMessage, websocket.FormatCloseMessage(4004, "Window not found")) diff --git a/app/backend/internal/tmux/board.go b/app/backend/internal/tmux/board.go index 56500091..8ae13d8c 100644 --- a/app/backend/internal/tmux/board.go +++ b/app/backend/internal/tmux/board.go @@ -269,21 +269,31 @@ func Pin(ctx context.Context, server, windowID, board string) error { } // Idempotency: the pin-session already exists → the window is already pinned. - // A same-board re-pin is a clean no-op. A *different*-board re-pin must NOT - // silently report success while leaving the window on its old board — re-stamp - // @rk_board so the requested board wins (the window has exactly one pin-session, - // so this is the only authoritative place membership lives). + // Re-pin makes the requested board win and repairs the order key if it went + // missing, so the pin is always board-derivable and sortable. We do NOT try to + // repair @rk_home here: once a window lives in its pin-session, its "current + // session" IS the pin-session, so there is no source to re-derive the original + // home from. @rk_home durability is instead guaranteed at creation by the + // stamp-before-move ordering below (it is written while the pin-session is + // still empty, so the window never enters a pin-session lacking @rk_home). + // A window has exactly one pin-session, so this is the sole authoritative + // place its membership lives. if _, err := tmuxExecRawServer(ctx, server, "has-session", "-t", pinSession); err == nil { - current, readErr := showSessionOption(ctx, server, pinSession, BoardOption) - if readErr != nil { - return fmt.Errorf("read %s on existing pin %q: %w", BoardOption, pinSession, readErr) - } - if current == board { - return nil - } + // @rk_board: always set to the requested board (different-board re-pin must + // not silently keep the old board; same-board is a harmless idempotent set). if err := setSessionOption(ctx, server, pinSession, BoardOption, board); err != nil { return fmt.Errorf("re-stamp %s on existing pin %q: %w", BoardOption, pinSession, err) } + // @rk_board_order: repair only if missing/invalid so GetBoard can sort it. + curOrder, oerr := showSessionOption(ctx, server, pinSession, BoardOrderOption) + if oerr != nil { + return fmt.Errorf("read %s on existing pin %q: %w", BoardOrderOption, pinSession, oerr) + } + if !ValidOrderKey(curOrder) { + if err := setSessionOption(ctx, server, pinSession, BoardOrderOption, initialAppendKey); err != nil { + return fmt.Errorf("repair %s on existing pin %q: %w", BoardOrderOption, pinSession, err) + } + } return nil } @@ -310,9 +320,8 @@ func Pin(ctx context.Context, server, windowID, board string) error { orderKey := nextAppendKey(boardEntries) // Create the pin-session (starts with one placeholder window) and capture the - // placeholder window's ID by reading the new session's sole window. Move the - // target window in, then kill the captured placeholder by ID so the moved - // window is the session's sole window. Capturing the placeholder ID (rather + // placeholder window's ID so we can kill it after the move, leaving the moved + // window as the session's sole window. Capturing the placeholder ID (rather // than assuming index 0) is robust to base-index config and to the moved // window's landing index. if _, err := tmuxExecServer(ctx, server, "new-session", "-d", "-s", pinSession); err != nil { @@ -331,46 +340,43 @@ func Pin(ctx context.Context, server, windowID, board string) error { return fmt.Errorf("read pin placeholder window: pin-session %q reported no windows", pinSession) } placeholderID := strings.TrimSpace(placeholderLines[0]) - if err := MoveWindowToSession(windowID, pinSession, server); err != nil { - // Roll back the empty pin-session so a failed move leaves no orphan. - _ = KillSessionCtx(context.Background(), server, pinSession) - return fmt.Errorf("move window into pin session: %w", err) - } - if _, err := tmuxExecServer(ctx, server, "kill-window", "-t", placeholderID); err != nil { - // Non-fatal: a stray placeholder is cosmetic, but log it loudly. - slog.Warn("board: pin placeholder kill failed", "server", server, "pin", pinSession, "placeholder", placeholderID, "err", err) - } - // The window now physically lives in the pin-session. From here a stamp failure - // must NOT return with the window stranded: pinEntry rejects a pin-session with - // no/invalid @rk_board (→ absent from BOARDS) and parseSessions filters _rk-pin-* - // (→ absent from SESSIONS), so a half-stamped pin is an invisible lost window. - // On any stamp failure, undo the move (window back to its home) and kill the - // pin-session — both rooted in context.Background() for the same reason as above. - // Double-fault guard: only kill the pin-session if the move-back SUCCEEDED. If the - // move-back itself fails the window is still physically inside the pin-session; - // killing it would destroy a live window. Leaving the (still-named) pin-session - // keeps the window recoverable — the pin is unpinnable by window id, and the next - // same-window Pin sees the existing session and re-stamps via the idempotent path. - rollbackMove := func(cause error, opt string) error { - if mvErr := MoveWindowToSession(windowID, home, server); mvErr != nil { - slog.Error("board: pin stamp-failure rollback move failed — leaving pin-session intact so the window survives (recoverable via unpin/re-pin)", - "server", server, "window", windowID, "home", home, "pin", pinSession, "err", mvErr) - return fmt.Errorf("set %s: %w (rollback move-back also failed: %v)", opt, cause, mvErr) - } + // STAMP-BEFORE-MOVE: write all three membership vars onto the (still empty) + // pin-session BEFORE moving the target window in. Ordering is load-bearing for + // crash/failure safety: + // - The window has NOT moved yet, so a stamp failure strands nothing — we + // simply kill the empty placeholder-only pin-session and return; the window + // is untouched in its home session. + // - Once the move succeeds (below), @rk_home is already durably present, so + // the window can ALWAYS be unpinned. There is no window-moved-but-unstamped + // window, hence no double-fault rollback, no "un-unpinnable" pin-session, + // and the idempotent recovery story is trivially true. + stampRollback := func(cause error, opt string) error { _ = KillSessionCtx(context.Background(), server, pinSession) - return fmt.Errorf("set %s: %w", opt, cause) + return fmt.Errorf("set %s on new pin %q: %w", opt, pinSession, cause) } - - // Stamp membership vars on the pin-session. if err := setSessionOption(ctx, server, pinSession, HomeOption, home); err != nil { - return rollbackMove(err, HomeOption) + return stampRollback(err, HomeOption) } if err := setSessionOption(ctx, server, pinSession, BoardOption, board); err != nil { - return rollbackMove(err, BoardOption) + return stampRollback(err, BoardOption) } if err := setSessionOption(ctx, server, pinSession, BoardOrderOption, orderKey); err != nil { - return rollbackMove(err, BoardOrderOption) + return stampRollback(err, BoardOrderOption) + } + + // Now move the window in. The pin-session is fully stamped, so a successful + // move yields a complete, unpinnable pin. A move FAILURE strands nothing (the + // window stays home) — roll back the stamped-but-windowless pin-session. + if err := MoveWindowToSession(windowID, pinSession, server); err != nil { + _ = KillSessionCtx(context.Background(), server, pinSession) + return fmt.Errorf("move window into pin session: %w", err) + } + if _, err := tmuxExecServer(ctx, server, "kill-window", "-t", placeholderID); err != nil { + // Non-fatal: a stray placeholder is cosmetic, but log it loudly. The pin is + // already valid (window moved, vars stamped) — a leftover placeholder window + // in the pin-session does not affect board derivation or unpin. + slog.Warn("board: pin placeholder kill failed", "server", server, "pin", pinSession, "placeholder", placeholderID, "err", err) } return nil } @@ -401,6 +407,22 @@ func Unpin(ctx context.Context, server, windowID, board string) error { return nil } + // Board-match guard: only unpin if the pin actually belongs to the requested + // board. A mismatched `/api/boards/{name}/unpin` (stale or wrong board name) + // must NOT silently unpin the window AND must not cause the handler to emit a + // `board-changed` event referencing a board the window was never on. Treat a + // mismatch as a no-op success — the window stays pinned to its real board, and + // the handler's broadcast (which names the URL board) describes a state that + // did not change, so suppressing the unpin keeps SSE consistent. An unreadable + // @rk_board is a real error. + curBoard, err := showSessionOption(ctx, server, pinSession, BoardOption) + if err != nil { + return fmt.Errorf("read %s: %w", BoardOption, err) + } + if curBoard != board { + return nil + } + home, err := showSessionOption(ctx, server, pinSession, HomeOption) if err != nil { return fmt.Errorf("read %s: %w", HomeOption, err) @@ -425,20 +447,27 @@ func Unpin(ctx context.Context, server, windowID, board string) error { return killPinSessionIfPresent(ctx, server, pinSession) } - // Home is gone (or was never recorded) — recreate it. Rename the pin-session - // to the home name so the moved window becomes the new home session's only - // window (no placeholder). When home is empty, fall back to keeping the - // window in a freshly named session is impossible without a name; use the - // pin-session's window via rename-session to the remembered home. + // No recorded @rk_home. With stamp-before-move (see Pin) this should be + // unreachable — @rk_home is durably set before the window ever enters the + // pin-session — but a legacy/corrupt pin-session could still lack it. Rather + // than hard-failing and stranding the window invisibly (it is filtered from + // SESSIONS as a `_rk-pin-*` name and, once we strip membership, also from + // BOARDS), RECOVER it: rename the pin-session to a deterministic recovered + // home name so the window resurfaces in the SESSIONS sidebar. A window is + // never left unrecoverable. if home == "" { - // No recorded home: leave the window where it is by clearing membership - // so it is no longer a board pin, then it surfaces in SESSIONS only if - // the pin-session is renamed away. Without a target name we cannot - // restore; this should not happen (Pin always stamps @rk_home), so treat - // it as an error rather than silently stranding the window. - return fmt.Errorf("unpin: pin-session %q has no @rk_home to restore to", pinSession) - } - // Recreate home by renaming the (single-window) pin-session to the home name. + recovered := "recovered" + strings.TrimPrefix(pinSession, PinSessionPrefix) + slog.Warn("board: unpin found pin-session with no @rk_home — recovering window into a renamed session", + "server", server, "pin", pinSession, "recovered", recovered) + if err := RenameSession(pinSession, recovered, server); err != nil { + return fmt.Errorf("recover window from @rk_home-less pin %q: %w", pinSession, err) + } + _, _ = tmuxExecRawServer(ctx, server, "set-option", "-u", "-t", recovered, BoardOption) + _, _ = tmuxExecRawServer(ctx, server, "set-option", "-u", "-t", recovered, BoardOrderOption) + return nil + } + // Home is gone — recreate it by renaming the (single-window) pin-session to + // the home name. // This preserves the window as the sole window of the recreated home session // with no placeholder, and atomically removes the `_rk-pin-*` name. if err := RenameSession(pinSession, home, server); err != nil { diff --git a/app/backend/internal/tmux/board_test.go b/app/backend/internal/tmux/board_test.go index 922e2682..af2a5018 100644 --- a/app/backend/internal/tmux/board_test.go +++ b/app/backend/internal/tmux/board_test.go @@ -464,6 +464,78 @@ func TestUnpin_RecreatesDeadHome(t *testing.T) { } } +func TestUnpin_BoardMismatchIsNoOp(t *testing.T) { + server := withBoardTmux(t) + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + + wid := createHomeWindow(t, server, "home", "agent") + pin, _ := PinSessionName(wid) + + if err := Pin(ctx, server, wid, "main"); err != nil { + t.Fatalf("Pin to main: %v", err) + } + // Unpin with the WRONG board name must be a no-op success: the window stays + // pinned to "main" and the pin-session survives (so the handler emits no + // false board-changed event for "other"). + if err := Unpin(ctx, server, wid, "other"); err != nil { + t.Fatalf("Unpin with mismatched board returned error, want no-op: %v", err) + } + if !hasSession(t, server, pin) { + t.Errorf("mismatched-board Unpin removed the pin-session %s (should be a no-op)", pin) + } + if b, _ := showSessionOption(ctx, server, pin, BoardOption); b != "main" { + t.Errorf("mismatched-board Unpin changed @rk_board to %q, want unchanged 'main'", b) + } + // The correct board name still unpins. + if err := Unpin(ctx, server, wid, "main"); err != nil { + t.Fatalf("Unpin with correct board: %v", err) + } + if hasSession(t, server, pin) { + t.Errorf("pin-session %s survived a correct-board Unpin", pin) + } +} + +func TestUnpin_HomelessPinRecoversWindow(t *testing.T) { + server := withBoardTmux(t) + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + + wid := createHomeWindow(t, server, "home", "agent") + pin, _ := PinSessionName(wid) + + if err := Pin(ctx, server, wid, "main"); err != nil { + t.Fatalf("Pin: %v", err) + } + // Simulate a legacy/corrupt pin-session that lost @rk_home (stamp-before-move + // makes this unreachable in practice, but Unpin must still never strand a + // window). Once the window lives in the pin-session there is no source to + // re-derive the original home, so Unpin RECOVERS it by renaming the pin-session + // to a `recovered*` session — the window resurfaces in SESSIONS rather than + // being lost (filtered out of both SESSIONS and BOARDS). + if _, err := tmuxExecRawServer(ctx, server, "set-option", "-t", pin, "-u", HomeOption); err != nil { + t.Fatalf("clear @rk_home: %v", err) + } + if err := Unpin(ctx, server, wid, "main"); err != nil { + t.Fatalf("Unpin of @rk_home-less pin returned error, want recovery: %v", err) + } + if hasSession(t, server, pin) { + t.Errorf("pin-session %s survived recovery Unpin", pin) + } + recovered := "recovered" + strings.TrimPrefix(pin, PinSessionPrefix) + if !hasSession(t, server, recovered) { + t.Fatalf("recovery session %s was not created — window may be stranded", recovered) + } + ids := windowsInSession(t, server, recovered) + if len(ids) != 1 || ids[0] != wid { + t.Errorf("recovered session windows = %v, want [%s]", ids, wid) + } + // The recovered session must carry no board membership (it's a plain session). + if b, _ := showSessionOption(ctx, server, recovered, BoardOption); b != "" { + t.Errorf("recovered session retained @rk_board=%q", b) + } +} + func TestUnpin_Idempotent(t *testing.T) { server := withBoardTmux(t) ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) diff --git a/fab/changes/260602-qn62-move-based-board-pin-sessions/.status.yaml b/fab/changes/260602-qn62-move-based-board-pin-sessions/.status.yaml index d328b27f..8d20dcf5 100644 --- a/fab/changes/260602-qn62-move-based-board-pin-sessions/.status.yaml +++ b/fab/changes/260602-qn62-move-based-board-pin-sessions/.status.yaml @@ -13,7 +13,7 @@ progress: review-pr: done plan: generated: true - task_count: 22 + task_count: 23 acceptance_count: 28 acceptance_completed: 0 confidence: @@ -48,4 +48,4 @@ true_impact: computed_at: "2026-06-02T20:21:46Z" computed_at_stage: hydrate # true_impact: lazily created on first apply-finish (no placeholder here). -last_updated: 2026-06-02T22:09:10Z +last_updated: 2026-06-02T22:15:54Z diff --git a/fab/changes/260602-qn62-move-based-board-pin-sessions/plan.md b/fab/changes/260602-qn62-move-based-board-pin-sessions/plan.md index 267cc0bf..d1f68cff 100644 --- a/fab/changes/260602-qn62-move-based-board-pin-sessions/plan.md +++ b/fab/changes/260602-qn62-move-based-board-pin-sessions/plan.md @@ -239,7 +239,9 @@ the relay attaches directly. ### Phase 7: Rework — board-render join through pin-sessions -- [x] T022 Fix `app/backend/api/boards.go` `handleBoardGet` (and `windowExistsOnServer`) to find pinned windows in their `_rk-pin-` sessions. +- [x] T022 Fix `app/backend/api/boards.go` `handleBoardGet` (and `windowExistsOnServer`) to find pinned windows in their `_rk-pin-` sessions. + +- [x] T023 Address Copilot PR-review findings on Pin/Unpin/relay. ## Execution Order