From b49528a4879977cfba23becaac811d83c65e2734 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=94=B3=E6=99=97?= Date: Wed, 24 Jun 2026 01:24:03 +0800 Subject: [PATCH 1/2] =?UTF-8?q?feat(backend):=20PTY=20=E9=80=80=E5=BD=B9?= =?UTF-8?q?=E7=AC=AC=E4=B8=80=E6=AD=A5=20=E2=80=94=E2=80=94=20tmux=20?= =?UTF-8?q?=E4=B8=8D=E5=8F=AF=E7=94=A8=E6=94=B9=E7=A1=AC=20gate=20?= =?UTF-8?q?=E8=80=8C=E9=9D=9E=E9=9D=99=E9=BB=98=E5=9B=9E=E9=80=80=20PTY?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 默认后端恒为 tmux;请求的持久后端(tmux/herdr/zellij)不可用且无存活会话时, 不再静默 spawn 裸 PTY(这是「悄悄跑在 PTY 上、随后撞一堆 PTY 问题」一类 bug 的根源), 改为向话题发一张可操作卡片(原因+安装命令+逃生阀说明)并拒绝启动会话。 PTY 仅保留为显式逃生阀:显式设 BACKEND_TYPE=pty 或 per-bot backendType:pty 才会用。 已存在的 tmux 会话仍照常重连(保留 PR#249 的瞬时探针保护,不误杀)。 - config.ts: detectDefaultBackend() 恒返回 tmux - worker.ts: 三处 silent pty fallback 合并为硬 gate(decideBackendGate + user_notify + throw) - setup/index.ts: tmux 装不上从「退回 PTY/不受影响」改为报错+安装指引 - session-backend-selector.ts: 抽纯函数 decideBackendGate / backendGateUserMessage - 新增 test/backend-gate.test.ts;更新 worker-pipe-initial-screen-order 源码模式断言 --- .../backend/session-backend-selector.ts | 46 +++++++++++++ src/config.ts | 19 +++--- src/setup/index.ts | 24 +++++-- src/worker.ts | 64 ++++++++++++------- test/backend-gate.test.ts | 45 +++++++++++++ test/worker-pipe-initial-screen-order.test.ts | 15 +++-- 6 files changed, 172 insertions(+), 41 deletions(-) create mode 100644 test/backend-gate.test.ts diff --git a/src/adapters/backend/session-backend-selector.ts b/src/adapters/backend/session-backend-selector.ts index e19b8394c..0f518248a 100644 --- a/src/adapters/backend/session-backend-selector.ts +++ b/src/adapters/backend/session-backend-selector.ts @@ -5,6 +5,52 @@ import { TmuxPipeBackend } from './tmux-pipe-backend.js'; import { ZellijBackend } from './zellij-backend.js'; import type { BackendType, SessionBackend } from './types.js'; +export type BackendGateDecision = + | { action: 'spawn' } + | { action: 'gate'; reason: string }; + +/** + * Hard gate (PTY 退役): a requested *persistent* backend (tmux/herdr/zellij) + * that isn't functional on this host no longer silently degrades to raw PTY. + * That silent fallback was the root of the "secretly running on PTY, then + * hitting all of PTY's problems (no survival across daemon restart, etc.)" + * bug class. Instead the worker refuses to spawn and posts an actionable card. + * + * PTY stays reachable ONLY as an explicit opt-in — `BACKEND_TYPE=pty` or a + * per-bot `backendType: 'pty'` — which arrives here as `requested === 'pty'` + * and is always allowed straight through. + * + * An already-running persistent session reattaches regardless of a transient + * probe failure (a disposable "can we start a new server?" probe is far less + * authoritative than a live session — see PR#249): abandoning it would spawn a + * duplicate CLI and orphan the real conversation. + */ +export function decideBackendGate(opts: { + requested: BackendType; + available: boolean; + hasExistingSession: boolean; +}): BackendGateDecision { + if (opts.requested === 'pty') return { action: 'spawn' }; + if (opts.hasExistingSession) return { action: 'spawn' }; + if (opts.available) return { action: 'spawn' }; + return { action: 'gate', reason: `${opts.requested} 后端在本机不可用` }; +} + +/** User-facing card shown when {@link decideBackendGate} gates a session. */ +export function backendGateUserMessage(backend: BackendType, reason: string): string { + const installHint = + backend === 'tmux' + ? 'macOS: brew install tmux | Debian/Ubuntu: sudo apt-get install -y tmux | 其它发行版用对应包管理器安装 tmux' + : `请确认 ${backend} 已正确安装并可用`; + return [ + `⚠️ 本机 ${backend} 不可用,无法启动会话。`, + `原因:${reason}`, + `请安装/修复后重试 —— ${installHint}`, + `(如确需在没有 ${backend} 的环境运行,可显式设置环境变量 BACKEND_TYPE=pty 用 PTY 后端兜底;` + + `但 PTY 会话不跨 daemon 重启存活,仅作应急。)`, + ].join('\n'); +} + export interface SelectedSessionBackend { backend: SessionBackend; isTmuxMode: boolean; diff --git a/src/config.ts b/src/config.ts index 5836284bb..228460284 100644 --- a/src/config.ts +++ b/src/config.ts @@ -1,6 +1,5 @@ import { networkInterfaces } from 'node:os'; import type { BackendType } from './adapters/backend/types.js'; -import { probeTmuxFunctional } from './setup/ensure-tmux.js'; import { resolveWorkerHttpHost } from './utils/worker-http.js'; /** Get the first non-loopback IPv4 address, fallback to localhost. */ @@ -26,15 +25,19 @@ export function getDashboardExternalHost(): string { } /** - * Pick the session backend. tmux is preferred (enables /adopt + per-client - * Web terminal attach) but only if it can actually start a server. The old - * check was `tmux -V`, which passes on machines where tmux is installed but - * broken (perms / config / linkage) and leaves the worker spamming "error - * connecting to /tmp/tmux-UID/default" forever. The functional probe filters - * those out so we silently fall back to PTY. + * Default session backend: always tmux (PTY 退役). + * + * PTY is no longer an automatic fallback. It used to be picked here whenever + * the tmux functional probe failed, which meant hosts with a broken/missing + * tmux silently ran on PTY — and then hit PTY's whole problem set (no survival + * across daemon restart, scrollback-relay quirks, …) without the user ever + * knowing they'd been downgraded. Now the default is tmux unconditionally; if + * tmux isn't functional the worker hard-gates the session with an actionable + * card (see decideBackendGate). PTY remains reachable only via an explicit + * BACKEND_TYPE=pty (or per-bot backendType:'pty') opt-in. */ function detectDefaultBackend(): Exclude { - return probeTmuxFunctional().ok ? 'tmux' : 'pty'; + return 'tmux'; } // Computed once: the packaged fallback data dir. The effective dir is read diff --git a/src/setup/index.ts b/src/setup/index.ts index a0c26e713..d83f93812 100644 --- a/src/setup/index.ts +++ b/src/setup/index.ts @@ -58,18 +58,30 @@ function herdrCliIds(): CliId[] { export async function ensureDependencies(): Promise { const platform = detectPlatform(); - // tmux: nice-to-have (enables /adopt + multi-pane Web terminal). Daemon - // still works on PTY backend without it, so failure is a warning, not fatal. + // tmux: REQUIRED (PTY 退役). PTY is no longer an automatic fallback, so a + // host without functional tmux can't start sessions unless the operator + // explicitly opts into the PTY escape hatch (BACKEND_TYPE=pty). Surface this + // loudly instead of pretending "常规对话不受影响" — that was true under the + // old silent-fallback behavior and is now misleading. const tmux = await ensureTmux(platform); + const ptyOptIn = (process.env.BACKEND_TYPE ?? '').toLowerCase() === 'pty'; if (tmux.installed) { if (!tmux.freshInstall) console.log(`✓ tmux ${tmux.version} (existing)`); - } else { + } else if (ptyOptIn) { console.warn(''); - console.warn('⚠️ tmux 不可用,已退回到 PTY backend'); + console.warn('⚠️ tmux 不可用,但已显式设置 BACKEND_TYPE=pty —— 将使用 PTY 后端兜底。'); console.warn(` 原因:${tmux.reason ?? '未知'}`); - if (tmux.manualCommand) console.warn(` 手动尝试:${tmux.manualCommand}`); - console.warn(' 影响:/adopt(接管已有 CLI 会话)和多人 Web 终端不可用;常规对话不受影响。'); + console.warn(' 注意:PTY 会话不跨 daemon 重启存活,/adopt 与多人 Web 终端不可用。'); console.warn(''); + } else { + console.error(''); + console.error('❌ tmux 不可用,botmux 会话将无法启动。'); + console.error(` 原因:${tmux.reason ?? '未知'}`); + if (tmux.manualCommand) console.error(` 请安装:${tmux.manualCommand}`); + console.error(' 安装好 tmux 后重试即可。'); + console.error(' (如确需在没有 tmux 的环境运行,可显式设置环境变量 BACKEND_TYPE=pty 用 PTY 后端兜底,'); + console.error(' 但 PTY 会话不跨 daemon 重启存活,仅作应急。)'); + console.error(''); } // Fonts second — best-effort. diff --git a/src/worker.ts b/src/worker.ts index 55bbe81e0..54bebc461 100644 --- a/src/worker.ts +++ b/src/worker.ts @@ -76,10 +76,10 @@ import { ZellijBackend, ZELLIJ_CONFIG_KDL } from './adapters/backend/zellij-back import { ZellijObserveBackend } from './adapters/backend/zellij-observe-backend.js'; import { zellijEnv } from './setup/ensure-zellij.js'; import { isObserveBackend, type ObserveBackend } from './adapters/backend/types.js'; -import { selectSessionBackend } from './adapters/backend/session-backend-selector.js'; +import { selectSessionBackend, decideBackendGate, backendGateUserMessage } from './adapters/backend/session-backend-selector.js'; import { prepareSandbox, attachSandboxOutbox, startOutboxWatcher, sandboxEnabled, sandboxedClaudeDataDir } from './adapters/backend/sandbox.js'; import type { BackendType, SessionBackend } from './adapters/backend/types.js'; -import { tmuxEnv } from './setup/ensure-tmux.js'; +import { tmuxEnv, probeTmuxFunctional } from './setup/ensure-tmux.js'; import { IdleDetector } from './utils/idle-detector.js'; import { ScreenAnalyzer } from './utils/screen-analyzer.js'; import { captureToPng } from './utils/screenshot-renderer.js'; @@ -3765,28 +3765,48 @@ function spawnCli(cfg: Extract): void { } cliAdapter = createCliAdapterSync(cfg.cliId as any, cfg.cliPathOverride); - // backendType=tmux trust-but-verify: an explicit per-bot config (or - // BACKEND_TYPE=tmux env override) bypasses config.ts's auto-detect, so - // the worker re-probes here. Existing botmux tmux sessions are more - // authoritative than the disposable "can we create a new server?" probe: - // abandoning one after a transient probe failure would spawn a duplicate CLI - // under raw PTY and make later submits invisible to the real Codex history. + // backendType trust-but-verify + HARD GATE (PTY 退役): an explicit per-bot + // config (or BACKEND_TYPE env override) bypasses config.ts's default, so the + // worker re-probes the requested persistent backend here. A requested + // tmux/herdr/zellij backend that isn't functional NO LONGER silently + // degrades to raw PTY — that silent fallback was the root of the "secretly + // running on PTY, then hitting all of PTY's problems" bug class. Instead we + // refuse to spawn and post an actionable card (user_notify) telling the user + // to install the backend, or to explicitly opt into PTY with BACKEND_TYPE=pty. + // + // Existing botmux sessions stay authoritative over the disposable "can we + // create a new server?" probe: a live session reattaches regardless of a + // transient probe failure (PR#249), so it's exempt from the gate. let effectiveBackend = cfg.backendType; - if (effectiveBackend === 'tmux') { - const existingSessionName = TmuxBackend.sessionName(cfg.sessionId); - const hasExistingSession = TmuxBackend.hasSession(existingSessionName); - if (!hasExistingSession && !TmuxBackend.isAvailable()) { - log('tmux backend requested but functional probe failed and no existing session is available — falling back to PTY backend'); - effectiveBackend = 'pty'; + { + let available = true; + let reason = ''; + let hasExistingSession = false; + if (effectiveBackend === 'tmux') { + hasExistingSession = TmuxBackend.hasSession(TmuxBackend.sessionName(cfg.sessionId)); + if (!hasExistingSession) { + const probe = probeTmuxFunctional(); + available = probe.ok; + if (!probe.ok) reason = probe.reason; + } + } else if (effectiveBackend === 'herdr') { + available = HerdrBackend.isAvailable(); + reason = 'herdr 功能性探针失败'; + } else if (effectiveBackend === 'zellij') { + available = ZellijBackend.isAvailable(); + reason = 'zellij 功能性探针失败(需 zellij >= 0.44)'; + } + const decision = decideBackendGate({ requested: effectiveBackend, available, hasExistingSession }); + if (decision.action === 'gate') { + const detail = reason || decision.reason; + log(`${effectiveBackend} backend unavailable and silent PTY fallback is disabled (set BACKEND_TYPE=pty to opt in): ${detail}`); + // user_notify is delivered to the Lark thread by the daemon (type:'error' + // is log-only); send it BEFORE throwing so the card lands. The throw is + // caught by the init handler, which sends type:'error' and exits — the + // IPC channel flushes these small messages before exit. + send({ type: 'user_notify', message: backendGateUserMessage(effectiveBackend, detail), turnId: cfg.turnId }); + throw new Error(`${effectiveBackend} backend unavailable; refusing silent PTY fallback (set BACKEND_TYPE=pty to opt in): ${detail}`); } - } - if (effectiveBackend === 'herdr' && !HerdrBackend.isAvailable()) { - log('herdr backend requested but probe failed — falling back to PTY backend'); - effectiveBackend = 'pty'; - } - if (effectiveBackend === 'zellij' && !ZellijBackend.isAvailable()) { - log('zellij backend requested but functional probe failed (need zellij >= 0.44) — falling back to PTY backend'); - effectiveBackend = 'pty'; } effectiveBackendType = effectiveBackend; const selectedBackend = selectSessionBackend({ sessionId: cfg.sessionId, backendType: effectiveBackend }); diff --git a/test/backend-gate.test.ts b/test/backend-gate.test.ts new file mode 100644 index 000000000..602fb2529 --- /dev/null +++ b/test/backend-gate.test.ts @@ -0,0 +1,45 @@ +import { describe, it, expect } from 'vitest'; +import { + decideBackendGate, + backendGateUserMessage, +} from '../src/adapters/backend/session-backend-selector.js'; + +describe('decideBackendGate (PTY 退役 hard gate)', () => { + it('always spawns when PTY is explicitly requested (escape hatch), even if "unavailable"', () => { + expect( + decideBackendGate({ requested: 'pty', available: false, hasExistingSession: false }), + ).toEqual({ action: 'spawn' }); + }); + + it('spawns tmux when the functional probe passes', () => { + expect( + decideBackendGate({ requested: 'tmux', available: true, hasExistingSession: false }), + ).toEqual({ action: 'spawn' }); + }); + + it('GATES tmux when probe fails and no live session exists (no silent PTY fallback)', () => { + const d = decideBackendGate({ requested: 'tmux', available: false, hasExistingSession: false }); + expect(d.action).toBe('gate'); + }); + + it('reattaches a live tmux session despite a transient probe failure (PR#249 exemption)', () => { + expect( + decideBackendGate({ requested: 'tmux', available: false, hasExistingSession: true }), + ).toEqual({ action: 'spawn' }); + }); + + it('gates herdr / zellij when unavailable instead of degrading to PTY', () => { + expect(decideBackendGate({ requested: 'herdr', available: false, hasExistingSession: false }).action).toBe('gate'); + expect(decideBackendGate({ requested: 'zellij', available: false, hasExistingSession: false }).action).toBe('gate'); + }); +}); + +describe('backendGateUserMessage', () => { + it('includes the reason, an install hint, and the explicit PTY escape hatch', () => { + const msg = backendGateUserMessage('tmux', 'tmux 二进制不在 PATH 上'); + expect(msg).toContain('tmux 不可用'); + expect(msg).toContain('tmux 二进制不在 PATH 上'); + expect(msg).toContain('brew install tmux'); + expect(msg).toContain('BACKEND_TYPE=pty'); + }); +}); diff --git a/test/worker-pipe-initial-screen-order.test.ts b/test/worker-pipe-initial-screen-order.test.ts index 5f51dc8e1..f0ef1049c 100644 --- a/test/worker-pipe-initial-screen-order.test.ts +++ b/test/worker-pipe-initial-screen-order.test.ts @@ -96,16 +96,21 @@ describe('worker pipe initial screen ordering', () => { expect(helper).not.toContain('pendingMessages.length > 0'); }); - it('checks for an existing tmux session before falling back to pty', () => { + it('hard-gates an unavailable persistent backend instead of silently falling back to pty', () => { const source = readFileSync(join(process.cwd(), 'src/worker.ts'), 'utf8'); const guardStart = source.indexOf('let effectiveBackend = cfg.backendType;'); - const guardEnd = source.indexOf("if (effectiveBackend === 'herdr'", guardStart); + const guardEnd = source.indexOf('effectiveBackendType = effectiveBackend;', guardStart); const guard = source.slice(guardStart, guardEnd); expect(guardStart).toBeGreaterThan(-1); expect(guardEnd).toBeGreaterThan(guardStart); - expect(guard).toContain('const hasExistingSession = TmuxBackend.hasSession(existingSessionName);'); - expect(guard).toContain('!hasExistingSession && !TmuxBackend.isAvailable()'); - expect(guard.indexOf('TmuxBackend.hasSession')).toBeLessThan(guard.indexOf('TmuxBackend.isAvailable')); + // A live tmux session is checked before probing so it can reattach (PR#249). + expect(guard).toContain('TmuxBackend.hasSession(TmuxBackend.sessionName(cfg.sessionId))'); + expect(guard.indexOf('TmuxBackend.hasSession')).toBeLessThan(guard.indexOf('probeTmuxFunctional')); + // The decision is made by the pure gate helper, and a gate posts an + // actionable card + throws — it must NOT silently downgrade to pty. + expect(guard).toContain('decideBackendGate('); + expect(guard).toContain("send({ type: 'user_notify'"); + expect(guard).not.toContain("effectiveBackend = 'pty'"); }); }); From 45d47c3aff86b8501009f42dd3887b2668836a19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=94=B3=E6=99=97?= Date: Wed, 24 Jun 2026 01:36:36 +0800 Subject: [PATCH 2/2] =?UTF-8?q?fix(backend):=20=E6=8C=81=E4=B9=85=E5=8C=96?= =?UTF-8?q?=E4=BC=9A=E8=AF=9D=20backendType=EF=BC=8C=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E5=8D=87=E7=BA=A7=E5=90=8E=E6=97=A7=20PTY=20=E4=BC=9A=E8=AF=9D?= =?UTF-8?q?=E8=A2=AB=E8=AF=AF=E5=88=A4=E4=B8=BA=20tmux=20=E5=83=B5?= =?UTF-8?q?=E5=B0=B8=E5=85=B3=E9=97=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex review 抓到的 P1:默认后端翻成恒 tmux 后,未持久化 backendType 的旧会话 (在无 tmux 机器上隐式跑 PTY 的) 在 daemon 重启时会被 getSessionPersistentBackendType 按当前默认值推断成 tmux,restore 探到 bmx- pane 不存在便当僵尸关掉,而不是像 PTY 那样保留 active 记录等下一条消息 lazy resume。 - types.ts: Session 新增 backendType,spawn 时落盘 - worker-pool.ts: fork 时把 initMsg.backendType 落到 session(对齐 cliId 落盘) - persistent-backend.ts: getSessionPersistentBackendType 优先读 initConfig/session.backendType, 再读显式 per-bot 配置;三者皆无(=旧会话且 bot 未钉后端)返回 undefined, 不再回落到现默认值 tmux,从而保守保留旧会话走 lazy resume 附带处理 Codex 的两个 minor: - worker.ts: zellij 与 tmux 一样先查 hasSession 再探针(保留 PR#249 重连语义), herdr 的 isAvailable 是无副作用 --version 无需豁免 - session-backend-selector.ts / ensure-zellij.ts: 订正过时的「fall back to PTY」注释 - 新增 test/persistent-backend-type.test.ts;restore-zombie-close 夹具补 backendType 戳记 --- .../backend/session-backend-selector.ts | 11 ++-- src/core/persistent-backend.ts | 31 ++++++---- src/core/worker-pool.ts | 12 ++++ src/setup/ensure-zellij.ts | 7 ++- src/types.ts | 12 ++++ src/worker.ts | 15 ++++- test/persistent-backend-type.test.ts | 56 +++++++++++++++++++ test/restore-zombie-close.test.ts | 5 ++ 8 files changed, 129 insertions(+), 20 deletions(-) create mode 100644 test/persistent-backend-type.test.ts diff --git a/src/adapters/backend/session-backend-selector.ts b/src/adapters/backend/session-backend-selector.ts index 0f518248a..ff8dbc0ba 100644 --- a/src/adapters/backend/session-backend-selector.ts +++ b/src/adapters/backend/session-backend-selector.ts @@ -20,10 +20,13 @@ export type BackendGateDecision = * per-bot `backendType: 'pty'` — which arrives here as `requested === 'pty'` * and is always allowed straight through. * - * An already-running persistent session reattaches regardless of a transient - * probe failure (a disposable "can we start a new server?" probe is far less - * authoritative than a live session — see PR#249): abandoning it would spawn a - * duplicate CLI and orphan the real conversation. + * `hasExistingSession` lets an already-running persistent session reattach + * regardless of a transient probe failure (a disposable "can we start a new + * server?" probe is far less authoritative than a live session — see PR#249): + * abandoning it would spawn a duplicate CLI and orphan the real conversation. + * The caller computes it only for backends whose probe is a disposable + * session (tmux, zellij); herdr's probe is a cheap non-destructive + * `herdr --version`, so it passes `hasExistingSession: false`. */ export function decideBackendGate(opts: { requested: BackendType; diff --git a/src/core/persistent-backend.ts b/src/core/persistent-backend.ts index 9cae05867..e770c7d29 100644 --- a/src/core/persistent-backend.ts +++ b/src/core/persistent-backend.ts @@ -11,7 +11,6 @@ * worker-pool and session-manager import it, and those two already form an * import cycle with each other. */ -import { config } from '../config.js'; import { getBot } from '../bot-registry.js'; import { TmuxBackend } from '../adapters/backend/tmux-backend.js'; import { HerdrBackend } from '../adapters/backend/herdr-backend.js'; @@ -28,19 +27,31 @@ export function isSuspendableBackendType( } /** - * Resolve which persistent backend (if any) backs a session: prefer the - * worker's stored init config — the per-session truth captured at spawn time, - * which survives idle-suspend and tracks bot-config drift — then the bot - * config, then the daemon default (covers lazy-restored sessions that never - * forked a worker, where initConfig is unset). + * Resolve which persistent backend (if any) backs a session. + * + * Precedence, most authoritative first: + * 1. `ds.initConfig?.backendType` — the live worker's resolved backend this run. + * 2. `ds.session.backendType` — the backend stamped on the persisted session + * at spawn time (survives daemon restart; see Session.backendType). + * 3. An explicit per-bot `backendType` — authoritative even for legacy + * sessions, since the bot's choice didn't change across the PTY退役 flip. + * + * If NONE of those resolve, the session predates backendType stamping AND its + * bot pins no backend, so it ran on the OLD probe-based daemon default — which + * could have been PTY on a tmux-less host. We deliberately do NOT fall back to + * the current `config.daemon.backendType` (now always tmux): doing so would + * make `restoreActiveSessions` probe for a `bmx-` pane that never existed, + * find it 'missing', and zombie-close a perfectly recoverable session. Treating + * it as non-persistent keeps the worker-less active record for lazy resume; a + * genuinely surviving tmux pane still reattaches lazily on the next message + * (and gets stamped then). */ export function getSessionPersistentBackendType(ds: DaemonSession): PersistentBackendType | undefined { - let backendType: BackendType | undefined = ds.initConfig?.backendType; + let backendType: BackendType | undefined = ds.initConfig?.backendType ?? ds.session.backendType; if (!backendType) { - backendType = config.daemon.backendType; try { - backendType = getBot(ds.larkAppId).config.backendType ?? backendType; - } catch { /* bot deregistered — keep daemon default */ } + backendType = getBot(ds.larkAppId).config.backendType; + } catch { /* bot deregistered */ } } return isSuspendableBackendType(backendType) ? backendType : undefined; } diff --git a/src/core/worker-pool.ts b/src/core/worker-pool.ts index 19675aa8b..a99ddc1e0 100644 --- a/src/core/worker-pool.ts +++ b/src/core/worker-pool.ts @@ -1679,6 +1679,18 @@ export function forkWorker(ds: DaemonSession, prompt: string, resume = false): v sessionStore.updateSession(ds.session); } + // Stamp the resolved backend on the persisted session. Since PTY退役, the + // worker no longer silently downgrades an unavailable backend (it hard-gates + // instead), so the requested backend here IS the effective one for any + // session that actually runs. Restore reads this back (see + // getSessionPersistentBackendType) so an upgraded daemon doesn't re-derive a + // session's backend from the now-always-tmux default and misclassify a legacy + // PTY session as a tmux zombie. + if (ds.session.backendType !== initMsg.backendType) { + ds.session.backendType = initMsg.backendType; + sessionStore.updateSession(ds.session); + } + // Use shared handler for IPC messages and exit setupWorkerHandlers(ds, worker); diff --git a/src/setup/ensure-zellij.ts b/src/setup/ensure-zellij.ts index 0587d5f65..0cddc7cc8 100644 --- a/src/setup/ensure-zellij.ts +++ b/src/setup/ensure-zellij.ts @@ -3,9 +3,10 @@ * * Zellij is an OPT-IN backend (BACKEND_TYPE=zellij) — there's no auto-install * here (tmux stays the default). We only need a functional probe so the worker - * can fall back to PTY when zellij is requested but unusable, and an env - * sanitiser so nested-session vars don't make our `zellij` calls target the - * wrong server. + * can hard-gate the session (post an actionable card, refuse to start) when + * zellij is requested but unusable — it no longer silently falls back to PTY — + * and an env sanitiser so nested-session vars don't make our `zellij` calls + * target the wrong server. * * The driveable automation surface (action write/dump-screen/list-panes --json, * headless `attach --create-background`) landed in zellij 0.40–0.44; we require diff --git a/src/types.ts b/src/types.ts index 706287711..ab7c665a3 100644 --- a/src/types.ts +++ b/src/types.ts @@ -123,6 +123,18 @@ export interface Session { suspendedColdResume?: boolean; /** CLI used to spawn this session — stamped on every save so closed sessions retain it. */ cliId?: import('./adapters/cli/types.js').CliId; + /** + * Session backend resolved AT SPAWN TIME (tmux/herdr/zellij/pty). Stamped on + * fork so restore can resolve the backend authoritatively from the session + * itself instead of re-deriving it from the live daemon default — which + * changed when PTY stopped being an automatic fallback (default is now always + * tmux). Without this, a session that was created under the old probe-based + * default (e.g. implicit PTY on a tmux-less host) would, after upgrade + + * restart, be misread as tmux and zombie-closed because no `bmx-` pane + * exists. Undefined on sessions persisted before this field existed → treated + * conservatively (see getSessionPersistentBackendType). + */ + backendType?: BackendType; /** * Sandbox decision RECORDED AT SESSION CREATION (overlay file-isolation). The * live bot flag (BotConfig.sandbox) can be toggled later, but a session's diff --git a/src/worker.ts b/src/worker.ts index 54bebc461..b840c226f 100644 --- a/src/worker.ts +++ b/src/worker.ts @@ -3789,12 +3789,21 @@ function spawnCli(cfg: Extract): void { available = probe.ok; if (!probe.ok) reason = probe.reason; } + } else if (effectiveBackend === 'zellij') { + // Like tmux, zellij's probe is a disposable background session, so a + // live named session is more authoritative than a transient probe + // failure (PR#249 semantics) — check it first so we reattach, not gate. + hasExistingSession = ZellijBackend.hasSession(ZellijBackend.sessionName(cfg.sessionId)); + if (!hasExistingSession) { + available = ZellijBackend.isAvailable(); + reason = 'zellij 功能性探针失败(需 zellij >= 0.44)'; + } } else if (effectiveBackend === 'herdr') { + // herdr's isAvailable() is a cheap, non-destructive `herdr --version` + // (not a disposable session probe), so it has no PR#249 false-negative + // risk and needs no existing-session exemption. available = HerdrBackend.isAvailable(); reason = 'herdr 功能性探针失败'; - } else if (effectiveBackend === 'zellij') { - available = ZellijBackend.isAvailable(); - reason = 'zellij 功能性探针失败(需 zellij >= 0.44)'; } const decision = decideBackendGate({ requested: effectiveBackend, available, hasExistingSession }); if (decision.action === 'gate') { diff --git a/test/persistent-backend-type.test.ts b/test/persistent-backend-type.test.ts new file mode 100644 index 000000000..58561c529 --- /dev/null +++ b/test/persistent-backend-type.test.ts @@ -0,0 +1,56 @@ +/** + * getSessionPersistentBackendType precedence + the PTY退役 legacy-safety fix. + * + * Regression target (Codex P1 on PR #289): after the default backend flipped to + * always-tmux, a session created under the OLD probe-based default (implicit PTY + * on a tmux-less host) — with no per-session backendType stamped and a bot that + * pins no backend — must NOT be re-derived as tmux. Otherwise restore probes for + * a `bmx-` pane that never existed and zombie-closes a recoverable session. + * + * Run: pnpm vitest run test/persistent-backend-type.test.ts + */ +import { describe, it, expect, vi, beforeEach } from 'vitest'; + +// Mutable per-test bot backend config the mocked getBot returns. +const bot = vi.hoisted(() => ({ backendType: undefined as string | undefined })); + +vi.mock('../src/bot-registry.js', () => ({ + getBot: vi.fn(() => ({ config: { backendType: bot.backendType } })), +})); + +import { getSessionPersistentBackendType } from '../src/core/persistent-backend.js'; + +function ds(opts: { initBackend?: string; sessionBackend?: string }): any { + return { + larkAppId: 'app1', + initConfig: opts.initBackend ? { backendType: opts.initBackend } : undefined, + session: { sessionId: 'abcdef12', backendType: opts.sessionBackend }, + }; +} + +describe('getSessionPersistentBackendType', () => { + beforeEach(() => { bot.backendType = undefined; }); + + it('prefers the live worker initConfig backend', () => { + expect(getSessionPersistentBackendType(ds({ initBackend: 'tmux', sessionBackend: 'zellij' }))).toBe('tmux'); + }); + + it('falls back to the backend stamped on the persisted session', () => { + expect(getSessionPersistentBackendType(ds({ sessionBackend: 'zellij' }))).toBe('zellij'); + }); + + it('uses an explicit per-bot backend when the session has none stamped', () => { + bot.backendType = 'herdr'; + expect(getSessionPersistentBackendType(ds({}))).toBe('herdr'); + }); + + it('LEGACY SAFETY: unstamped session + bot pins no backend → undefined (not tmux), so restore keeps it for lazy resume instead of zombie-closing', () => { + bot.backendType = undefined; + expect(getSessionPersistentBackendType(ds({}))).toBeUndefined(); + }); + + it('a stamped pty session is not a persistent backend', () => { + expect(getSessionPersistentBackendType(ds({ sessionBackend: 'pty' }))).toBeUndefined(); + expect(getSessionPersistentBackendType(ds({ initBackend: 'pty' }))).toBeUndefined(); + }); +}); diff --git a/test/restore-zombie-close.test.ts b/test/restore-zombie-close.test.ts index bc00263af..639d358b0 100644 --- a/test/restore-zombie-close.test.ts +++ b/test/restore-zombie-close.test.ts @@ -172,6 +172,11 @@ function makeActivePersistentSession(rootMessageId: string) { s.workingDir = '/tmp/proj'; s.cliId = 'claude-code'; s.scope = 'thread'; + // Real tmux sessions now carry their backend stamped at spawn time + // (Session.backendType); getSessionPersistentBackendType reads it back rather + // than re-deriving from the daemon default. Stamp it so this fixture models a + // genuine tmux-backed session. + s.backendType = 'tmux'; sessionStore.updateSession(s); return s; // left active }