Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 50 additions & 47 deletions app/backend/api/boards.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,61 +50,45 @@ func (s *Server) handleBoardGet(w http.ResponseWriter, r *http.Request) {
}
out := make([]BoardEntryResponse, 0, len(entries))

// Build per-server windowID -> {session, WindowInfo} maps once so the
// per-entry join below is O(1) instead of O(sessions × windows).
type windowMatch struct {
session string
info tmux.WindowInfo
}
byServer := make(map[string]map[string]windowMatch)

// First pass: fetch all sessions per server, populating the per-server
// windowID lookup as we go.
serversNeeded := make(map[string]struct{})
for _, e := range entries {
serversNeeded[e.Server] = struct{}{}
}
for srv := range serversNeeded {
sessions, sErr := s.tmux.ListSessions(r.Context(), srv)
if sErr != nil {
continue
}
serverIndex := make(map[string]windowMatch)
for _, sess := range sessions {
windows, wErr := s.tmux.ListWindows(r.Context(), sess.Name, srv)
if wErr != nil {
continue
}
for _, win := range windows {
// First win wins — duplicate windowIDs across sessions on the
// same tmux server should not occur.
if _, exists := serverIndex[win.WindowID]; exists {
continue
}
serverIndex[win.WindowID] = windowMatch{session: sess.Name, info: win}
}
}
byServer[srv] = serverIndex
}

// Join each board entry with live window data. A pinned window has been MOVED
// into its own single-window pin-session `_rk-pin-<id>`, so the window lives
// inside a session that the user-facing `ListSessions`/`parseSessions` path
// deliberately HIDES (the `_rk-pin-` skip). Enumerating via `ListSessions`
// would therefore never find the pinned window and the board would render
// empty (the CI/e2e failure this replaced). Instead, look the window up in its
// OWN pin-session directly: the entry's WindowID maps deterministically to its
// pin-session name, and `ListWindows -t <pinSession>` is a by-name target query
// that is NOT subject to the session-list filter. O(entries) targeted lookups.
for _, e := range entries {
serverIndex, ok := byServer[e.Server]
pinSession, ok := tmux.PinSessionName(e.WindowID)
if !ok {
// Malformed window id (should not occur — entries come from pin
// sessions) — skip defensively.
continue
}
match, ok := serverIndex[e.WindowID]
if !ok {
// Window vanished between GetBoard and the join — skip.
windows, wErr := s.tmux.ListWindows(r.Context(), pinSession, e.Server)
if wErr != nil || len(windows) == 0 {
// Pin-session vanished between GetBoard and the join (window/pin
// killed) — skip; the board simply shows one fewer pane.
continue
}
// A pin-session holds exactly one window — its sole window IS the pinned
// window. Match by WindowID defensively in case of an unexpected extra.
win := windows[0]
for _, w := range windows {
if w.WindowID == e.WindowID {
win = w
break
}
}
out = append(out, BoardEntryResponse{
Server: e.Server,
WindowID: e.WindowID,
Session: match.session,
WindowIndex: match.info.Index,
WindowName: match.info.Name,
Session: pinSession,
WindowIndex: win.Index,
WindowName: win.Name,
OrderKey: e.OrderKey,
Panes: match.info.Panes,
Panes: win.Panes,
})
}
// Stable sort by orderKey to preserve the GetBoard ordering after the join.
Expand Down Expand Up @@ -279,9 +263,28 @@ func (s *Server) handleBoardReorder(w http.ResponseWriter, r *http.Request) {
writeJSON(w, http.StatusOK, map[string]interface{}{"ok": true, "newOrderKey": newKey})
}

// windowExistsOnServer scans every session on the server and returns true if
// the supplied windowID matches a live window.
// windowExistsOnServer returns true if the supplied windowID matches a live
// window on the server — whether the window is in a normal (home) session OR
// already moved into its own pin-session.
//
// The pin-session must be checked explicitly: `ListSessions`/`parseSessions`
// HIDES `_rk-pin-*` sessions, so a window that is ALREADY pinned would be
// invisible to the home-session scan alone. Without the pin-session check, a
// re-pin of an already-pinned window (e.g. moving it to a different board) would
// be rejected 404 before reaching tmux.Pin's wrong-board re-stamp path.
func (s *Server) windowExistsOnServer(r *http.Request, server, windowID string) bool {
// Fast path: the window's own pin-session (by-name target, not subject to the
// session-list filter). If present, the window is already pinned and live.
if pinSession, ok := tmux.PinSessionName(windowID); ok {
if windows, err := s.tmux.ListWindows(r.Context(), pinSession, server); err == nil {
for _, w := range windows {
if w.WindowID == windowID {
return true
}
}
}
}
// Otherwise scan the visible (home) sessions.
sessions, err := s.tmux.ListSessions(r.Context(), server)
if err != nil {
return false
Expand Down
53 changes: 50 additions & 3 deletions app/backend/api/boards_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,16 @@ func TestBoards_GET_aggregateAcrossServers(t *testing.T) {
}

func TestBoard_GET_byName(t *testing.T) {
// In the move-based model a pinned window lives in its own `_rk-pin-<id>`
// session (the handler joins live window data from there, not from a home
// session). The pinned window @1234 → pin-session `_rk-pin-1234`.
ops := &mockTmuxOps{
getBoardResult: []tmux.BoardEntry{
{Server: "default", WindowID: "@1234", Board: "main", OrderKey: "a"},
},
listSessionsResult: []tmux.SessionInfo{{Name: "dev"}},
listWindowsResult: []tmux.WindowInfo{
{Index: 2, WindowID: "@1234", Name: "agent"},
listWindowsBySession: map[string][]tmux.WindowInfo{
"_rk-pin-1234": {{Index: 0, WindowID: "@1234", Name: "agent"}},
},
}
router := newTestRouter(&mockSessionFetcher{}, ops)
Expand All @@ -83,11 +86,55 @@ func TestBoard_GET_byName(t *testing.T) {
t.Fatalf("got %d entries, want 1", len(got))
}
g := got[0]
if g.WindowID != "@1234" || g.Session != "dev" || g.WindowIndex != 2 || g.WindowName != "agent" || g.OrderKey != "a" {
if g.WindowID != "@1234" || g.Session != "_rk-pin-1234" || g.WindowName != "agent" || g.OrderKey != "a" {
t.Errorf("got %+v", g)
}
}

// TestBoard_GET_byName_windowInPinSession is the regression test for the
// CI/e2e failure where a pinned board rendered EMPTY. In the move-based model a
// pinned window is moved into its own `_rk-pin-<id>` session, which the
// user-facing ListSessions/parseSessions path HIDES. handleBoardGet must look the
// window up in its pin-session directly — NOT by scanning ListSessions, which
// would never find it and drop every entry. Here the home session list contains
// only an unrelated empty session; the pinned window @1234 lives ONLY under
// `_rk-pin-1234`. The join must still return the entry with live window data.
func TestBoard_GET_byName_windowInPinSession(t *testing.T) {
ops := &mockTmuxOps{
getBoardResult: []tmux.BoardEntry{
{Server: "default", WindowID: "@1234", Board: "main", OrderKey: "a"},
},
// Home sessions visible to ListSessions do NOT contain @1234 — it was
// moved out into its pin-session (which ListSessions hides). A scan of
// these would find nothing.
listSessionsResult: []tmux.SessionInfo{{Name: "dev"}},
listWindowsBySession: map[string][]tmux.WindowInfo{
"dev": {{Index: 0, WindowID: "@9", Name: "other"}},
"_rk-pin-1234": {{Index: 0, WindowID: "@1234", Name: "agent"}},
},
}
router := newTestRouter(&mockSessionFetcher{}, ops)

req := httptest.NewRequest(http.MethodGet, "/api/boards/main", nil)
rec := httptest.NewRecorder()
router.ServeHTTP(rec, req)

if rec.Code != http.StatusOK {
t.Fatalf("status = %d, body=%s", rec.Code, rec.Body.String())
}
var got []BoardEntryResponse
if err := json.Unmarshal(rec.Body.Bytes(), &got); err != nil {
t.Fatal(err)
}
if len(got) != 1 {
t.Fatalf("got %d entries, want 1 (board must NOT render empty when the window lives in its pin-session); body=%s", len(got), rec.Body.String())
}
g := got[0]
if g.WindowID != "@1234" || g.Session != "_rk-pin-1234" || g.WindowName != "agent" || g.OrderKey != "a" {
t.Errorf("got %+v, want WindowID=@1234 Session=_rk-pin-1234 WindowName=agent OrderKey=a", g)
}
}

func TestBoard_GET_invalidName_400(t *testing.T) {
ops := &mockTmuxOps{}
router := newTestRouter(&mockSessionFetcher{}, ops)
Expand Down
100 changes: 24 additions & 76 deletions app/backend/api/relay.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@ package api

import (
"context"
"crypto/rand"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"log/slog"
"net/http"
Expand All @@ -21,18 +18,6 @@ import (
"rk/internal/tmux"
)

// newEphemeralRelayName returns a unique ephemeral session name of the form
// "rk-relay-<8 hex chars>". The 8-hex suffix is read from crypto/rand and is
// never derived from user input — keeping the surface inside the relay handler
// closed against injection (constitution I).
func newEphemeralRelayName() (string, error) {
var b [4]byte
if _, err := rand.Read(b[:]); err != nil {
return "", err
}
return fmt.Sprintf("%s%s", tmux.RelaySessionPrefix, hex.EncodeToString(b[:])), nil
}

// No timeout for the attach command — it's a long-lived process that stays alive
// for the duration of the WebSocket connection. Cancellation happens via the
// cancel() call in the cleanup function on disconnect.
Expand Down Expand Up @@ -83,10 +68,13 @@ func (s *Server) handleRelay(w http.ResponseWriter, r *http.Request) {
}
defer conn.Close()

// Resolve the owning session from the window ID. The per-WebSocket ephemeral
// grouped-session mechanism keys off the *real session name*, so we derive it
// from the window ID via a targeted display-message lookup. A missing window
// (resolution fails or returns empty) preserves the existing 4004 close code.
// Resolve the owning session from the window ID. In the move-based model a
// window lives in exactly ONE session — either a normal home session or its
// board pin-session (`_rk-pin-*`). The relay attaches the PTY DIRECTLY to that
// real session (no per-WebSocket ephemeral grouped session): single-window
// pin-sessions remove window *sharing*, which was the only reason the
// ephemeral isolation layer existed. A missing window (resolution fails or
// returns empty) preserves the existing 4004 close code.
resolveCtx, resolveCancel := context.WithTimeout(r.Context(), 5*time.Second)
session, err := s.tmux.ResolveWindowSession(resolveCtx, server, windowID)
resolveCancel()
Expand All @@ -97,60 +85,20 @@ func (s *Server) handleRelay(w http.ResponseWriter, r *http.Request) {
return
}

// Allocate a per-WebSocket ephemeral grouped session. tmux session groups
// share window membership but maintain independent active-window state, so
// each relay can SelectWindow on its own ephemeral without disturbing other
// clients attached to the same real session (e.g., other board panes, or
// other browser tabs).
ephemeral, err := newEphemeralRelayName()
if err != nil {
slog.Error("ephemeral name generation failed", "err", err)
conn.WriteMessage(websocket.CloseMessage,
websocket.FormatCloseMessage(4001, "Failed to allocate relay session"))
return
}
if err := s.tmux.NewGroupedSession(r.Context(), server, session, ephemeral); err != nil {
slog.Warn("new-session (grouped) failed", "err", err, "session", session, "ephemeral", ephemeral)
conn.WriteMessage(websocket.CloseMessage,
websocket.FormatCloseMessage(4004, "Session not found"))
return
}
// Best-effort cleanup with a fresh context — r.Context() is cancelled at
// disconnect time (the trigger for this defer), so reusing it would cause
// the kill to be cancelled before tmux can run it.
defer func() {
if err := s.tmux.KillSessionCtx(context.Background(), server, ephemeral); err != nil {
slog.Debug("ephemeral cleanup failed", "err", err, "ephemeral", ephemeral)
}
}()

// Stamp the ephemeral with this rk serve process's PID BEFORE it becomes
// attachable (before SelectWindowInSession). A sibling startup sweep reaps
// any rk-relay-* whose @rk_owner_pid is empty, so an attachable-but-unstamped
// relay is indistinguishable from an orphan and would be wrongly killed.
// Stamping first guarantees the only unstamped relays a sweep can see are
// genuine orphans (owner already exited), never this live instance's relay.
//
// On stamp failure the relay is unprotectable — keeping it open is a false
// promise (the next sweep would reap owner=="" and drop the terminal). So we
// abort cleanly: log, close the WebSocket with the relay-allocation close
// code, and return — the deferred KillSessionCtx above reaps the half-owned
// ephemeral. This mirrors every other setup-step failure in handleRelay.
if err := s.tmux.SetSessionOwnerPID(r.Context(), server, ephemeral, os.Getpid()); err != nil {
slog.Warn("relay owner-pid stamp failed", "err", err, "ephemeral", ephemeral)
conn.WriteMessage(websocket.CloseMessage,
websocket.FormatCloseMessage(4001, "Failed to allocate relay session"))
return
}

// Select the window on the ephemeral, scoped to the ephemeral session. A bare
// window-id target (`select-window -t @N`) is ambiguous inside a session group
// — members share window membership but keep independent active-window state,
// so tmux could set the active window on the real session or another group
// member. Qualifying the target as "<ephemeral>:@N" pins the active window to
// THIS WebSocket's ephemeral, preserving multi-client isolation.
if err := s.tmux.SelectWindowInSession(ephemeral, windowID, server); err != nil {
slog.Error("select-window failed", "err", err, "ephemeral", ephemeral, "windowID", windowID)
// Select the window on its real session so the attach renders the right
// window. Scope the select to the resolved session (`<session>:@N`) rather
// than a bare `select-window -t @N`: a bare window-id target is ambiguous
// inside a tmux session group (members share window membership but keep
// independent active-window state, so tmux may set the active window on an
// arbitrary member), and `attach-session -t <session>` below attaches to THIS
// session — the two must agree. For a single-window pin-session the select is a
// no-op (its sole window is permanently active); for a multi-window home
// session it pins the active window on the same session we attach to.
// The accepted tradeoff (#1 in the intake): a home session's single
// active-window pointer is shared across attachments, so multi-client
// navigation mutates it.
if err := s.tmux.SelectWindowInSession(session, windowID, server); err != nil {
slog.Error("select-window failed", "err", err, "session", session, "windowID", windowID)
Comment on lines +88 to +101
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed — handleRelay now uses the session-scoped SelectWindowInSession(session, windowID, server) (the resolved owning session) instead of a bare SelectWindow, so the selected active window matches the session attach-session -t attaches to, group-safe. (4eed733)

conn.WriteMessage(websocket.CloseMessage,
websocket.FormatCloseMessage(4004, "Window not found"))
return
Expand Down Expand Up @@ -189,9 +137,9 @@ func (s *Server) handleRelay(w http.ResponseWriter, r *http.Request) {
slog.Debug("config reload before attach (best-effort)", "server", server, "err", err)
}

// Attach to the ephemeral, not the real session — this is the linchpin of
// the grouped-session fix.
attachArgs = append(attachArgs, "attach-session", "-t", ephemeral)
// Attach DIRECTLY to the resolved owning session (home or `_rk-pin-*`). No
// ephemeral, no defer-kill — the session is durable and owned by tmux.
attachArgs = append(attachArgs, "attach-session", "-t", session)
cmd := exec.CommandContext(ctx, "tmux", attachArgs...)
cmd.Env = forceTERM(os.Environ())

Expand Down
Loading
Loading