diff --git a/docs/agents-development.md b/docs/agents-development.md index afc347be4c..5edfedc011 100644 --- a/docs/agents-development.md +++ b/docs/agents-development.md @@ -147,6 +147,10 @@ All agent packages use `tsdown` for building. The `pnpm dev` command in each sta - **Agent logic changes** (`agents`): Same — restart the entrypoint after rebuild. - **UI changes** (`agents-server-ui`): Vite HMR — changes appear instantly. +## Developing with coding-agents + +For dev iteration without rebuilding the Docker image, spawn coding-agents with `target: 'host'` and a bind-mount workspace. The agent runs `claude` directly on the host with no isolation; the lifecycle, persistence, and resume behavior are otherwise identical to the sandbox target. + ## Working with examples The `examples/deep-survey` example demonstrates a custom agent with its own entity types: diff --git a/docs/superpowers/plans/2026-04-30-coding-agents-mvp.md b/docs/superpowers/plans/2026-04-30-coding-agents-mvp.md new file mode 100644 index 0000000000..25ab5aa3e1 --- /dev/null +++ b/docs/superpowers/plans/2026-04-30-coding-agents-mvp.md @@ -0,0 +1,1221 @@ +# Coding Agents Platform Primitive — MVP Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Ship a minimum viable `@electric-ax/coding-agents` package that proves the core architecture: a Docker sandbox + a stdio bridge to the Claude CLI + a normalized event stream. Validation bar: an integration smoke test that starts a sandbox, runs `claude --print --output-format=stream-json` inside it, parses the JSONL output, and asserts `session_init` and `assistant_message` events were captured. + +**Architecture:** Three modules in a new package — `LocalDockerProvider` (subprocess-driven Docker CLI; no `dockerode` dep to keep it small), `StdioBridge` (parses claude's stream-json output via `agent-session-protocol`'s `normalize`), and a tiny in-memory `Sandbox` lifecycle (start, exec, stop). No runtime API surface, no entity wiring, no UI in this MVP — those come after smoke green. + +**Tech Stack:** TypeScript, Vitest, tsdown, `agent-session-protocol@0.0.2` (already in workspace), Node `child_process`, Docker. + +**Spec scope cuts (intentional, MVP):** + +- Claude only, not Codex. +- No `LifecycleManager` (idle hibernation, pin/release). +- No workspace registry / refcount. +- No `ctx.spawnCodingAgent` API surface on `HandlerContext`. +- No built-in `coding-agent` entity wiring. +- No UI updates. +- No same-kind/cross-kind resume; single-shot turn only. +- Existing `coder` entity stays in place — no removal in MVP. + +These cuts are deliberate. Once the smoke test passes, the broader spec gets implemented in follow-on plans. + +**Reference spec:** `docs/superpowers/specs/2026-04-30-coding-agents-platform-primitive-design.md` + +--- + +## File Structure + +``` +packages/coding-agents/ ← NEW package +├── package.json +├── tsconfig.json +├── tsdown.config.ts +├── vitest.config.ts +├── .gitignore +├── src/ +│ ├── index.ts ← public exports +│ ├── types.ts ← all interfaces +│ ├── providers/ +│ │ └── local-docker.ts ← LocalDockerProvider +│ ├── bridge/ +│ │ └── stdio-bridge.ts ← StdioBridge +│ └── log.ts ← pino logger (mirrors agents-runtime/src/log.ts pattern) +├── docker/ +│ ├── Dockerfile ← node + claude installed +│ └── entrypoint.sh ← container PID 1, keeps it alive +└── test/ + ├── unit/ + │ ├── stdio-bridge.test.ts ← unit tests with stubbed exec + │ └── local-docker.test.ts ← unit tests against fake docker bin (post-MVP, optional) + ├── integration/ + │ └── smoke.test.ts ← REAL Docker + REAL Claude CLI + real API key + └── support/ + ├── build-image.ts ← helper to build the test image + └── env.ts ← reads /tmp/.electric-coding-agents-env +``` + +**No changes to other packages in this MVP.** + +--- + +## Phase Plan + +| Phase | Tasks | Parallelism | Depends on | +| ----- | ------------- | ------------------------------- | ---------- | +| 0 | 0.1, 0.2 | sequential | — | +| 1 | 1.A, 1.B, 1.C | parallel (3 independent agents) | Phase 0 | +| 2 | 2.1 | sequential | Phase 1 | +| 3 | iteration | sequential | Phase 2 | + +--- + +## Phase 0 — Foundation (sequential) + +### Task 0.1 — Scaffold package + +**Files to create:** + +- `packages/coding-agents/package.json` +- `packages/coding-agents/tsconfig.json` +- `packages/coding-agents/tsdown.config.ts` +- `packages/coding-agents/vitest.config.ts` +- `packages/coding-agents/.gitignore` + +The patterns mirror `packages/agents-runtime/` exactly. Copy versions of `tsdown`, `vitest`, `typescript`, `@types/node` from there. + +- [ ] **Step 1: Write `packages/coding-agents/package.json`** + +```json +{ + "name": "@electric-ax/coding-agents", + "version": "0.0.1", + "description": "Sandbox + bridge layer for spawning coding agents (Claude Code, Codex) under Electric Agents.", + "repository": { + "type": "git", + "url": "git+https://github.com/electric-sql/electric.git", + "directory": "packages/coding-agents" + }, + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "types": "./dist/index.d.ts", + "scripts": { + "build": "tsdown", + "dev": "tsdown --watch", + "test": "vitest run", + "test:watch": "vitest", + "test:integration": "DOCKER=1 vitest run test/integration", + "typecheck": "tsc --noEmit", + "stylecheck": "eslint . --quiet" + }, + "exports": { + ".": { + "import": { + "types": "./dist/index.d.ts", + "default": "./dist/index.js" + }, + "require": { + "types": "./dist/index.d.cts", + "default": "./dist/index.cjs" + } + }, + "./package.json": "./package.json" + }, + "dependencies": { + "agent-session-protocol": "^0.0.2", + "pino": "^10.3.1", + "pino-pretty": "^13.0.0", + "zod": "^4.3.6" + }, + "devDependencies": { + "@types/node": "^22.19.15", + "tsdown": "^0.9.0", + "typescript": "^5.7.0", + "vitest": "^3.2.4" + }, + "files": ["dist", "docker"], + "sideEffects": false, + "license": "Apache-2.0" +} +``` + +- [ ] **Step 2: Write `packages/coding-agents/tsconfig.json`** + +```json +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src", + "types": ["node", "vitest/globals"] + }, + "include": ["src/**/*", "test/**/*"], + "exclude": ["dist", "node_modules"] +} +``` + +If `tsconfig.base.json` does not exist, copy the compilerOptions from `packages/agents-runtime/tsconfig.json` instead. + +- [ ] **Step 3: Write `packages/coding-agents/tsdown.config.ts`** + +Mirror `packages/agents-runtime/tsdown.config.ts`. The minimum is: + +```ts +import { defineConfig } from 'tsdown' + +export default defineConfig({ + entry: ['./src/index.ts'], + outDir: 'dist', + format: ['esm', 'cjs'], + dts: true, + clean: true, + sourcemap: true, +}) +``` + +- [ ] **Step 4: Write `packages/coding-agents/vitest.config.ts`** + +```ts +import { defineConfig } from 'vitest/config' + +export default defineConfig({ + test: { + globals: true, + environment: 'node', + testTimeout: 120_000, // integration tests build images, can be slow + }, +}) +``` + +- [ ] **Step 5: Write `packages/coding-agents/.gitignore`** + +``` +dist +node_modules +.vitest-temp +coverage +``` + +- [ ] **Step 6: Run `pnpm install` from repo root** + +``` +pnpm install +``` + +Expect: workspace picks up the new package; no errors. + +- [ ] **Step 7: Verify the package builds (no source yet → typecheck-only)** + +``` +pnpm -C packages/coding-agents typecheck +``` + +Expect: clean (no `src/` files yet, but typecheck against an empty include shouldn't error). +If it errors due to `include: ["src/**/*"]` matching nothing, add an empty `src/index.ts` with `export {}` first. + +- [ ] **Step 8: Commit** + +``` +git add packages/coding-agents +git commit -m "feat(coding-agents): scaffold @electric-ax/coding-agents package" +``` + +--- + +### Task 0.2 — Define core types & log + +**Files:** + +- Create: `packages/coding-agents/src/types.ts` +- Create: `packages/coding-agents/src/log.ts` +- Create: `packages/coding-agents/src/index.ts` (replace empty version from 0.1.7) + +- [ ] **Step 1: Write `src/log.ts`** + +```ts +import pino from 'pino' + +export const log = pino({ + name: 'coding-agents', + level: process.env.LOG_LEVEL ?? 'info', + ...(process.env.NODE_ENV !== 'production' + ? { + transport: { + target: 'pino-pretty', + options: { colorize: true, translateTime: 'HH:MM:ss.l' }, + }, + } + : {}), +}) +``` + +- [ ] **Step 2: Write `src/types.ts`** + +```ts +import type { NormalizedEvent } from 'agent-session-protocol' + +export type CodingAgentKind = 'claude' | 'codex' + +// ─── Sandbox provider ────────────────────────────────────────────────────── + +export interface SandboxSpec { + /** Stable agent identity (e.g. //coding-agent/). */ + agentId: string + kind: CodingAgentKind + workspace: + | { type: 'volume'; name: string } + | { type: 'bindMount'; hostPath: string } + /** Env vars exposed inside the sandbox (ANTHROPIC_API_KEY, etc.). */ + env: Record +} + +export interface ExecRequest { + cmd: string[] + cwd?: string + env?: Record + stdin?: 'pipe' | 'ignore' +} + +export interface ExecHandle { + /** Async iterables of stdout/stderr lines (UTF-8, newline-stripped). */ + stdout: AsyncIterable + stderr: AsyncIterable + /** Available iff request.stdin === 'pipe'. */ + writeStdin?: (chunk: string) => Promise + closeStdin?: () => Promise + wait(): Promise<{ exitCode: number }> + kill(signal?: NodeJS.Signals): void +} + +export interface SandboxInstance { + instanceId: string + agentId: string + /** Path inside sandbox where the workspace volume / bind-mount is mounted. */ + workspaceMount: string + exec(args: ExecRequest): Promise +} + +export interface RecoveredSandbox { + agentId: string + instanceId: string + status: 'running' | 'stopped' +} + +export interface SandboxProvider { + readonly name: string + start(spec: SandboxSpec): Promise + stop(instanceId: string): Promise + destroy(agentId: string): Promise + status(agentId: string): Promise<'running' | 'stopped' | 'unknown'> + /** Discover sandboxes adopted across host restarts. MVP: may return []. */ + recover(): Promise> +} + +// ─── Bridge ──────────────────────────────────────────────────────────────── + +export interface RunTurnArgs { + sandbox: SandboxInstance + kind: CodingAgentKind + /** Resume id; undefined for first turn. */ + nativeSessionId?: string + prompt: string + /** Model to pass to the CLI (e.g. 'claude-haiku-4-5-20251001'). */ + model?: string + /** Sink for normalized events as parsed off CLI stdout. */ + onEvent: (e: NormalizedEvent) => void + /** Sink for raw native JSONL lines (tee'd to a sidecar collection). */ + onNativeLine?: (line: string) => void +} + +export interface RunTurnResult { + /** Discovered or provided session id. */ + nativeSessionId?: string + exitCode: number + /** First assistant_message text (for parent's wake payload). */ + finalText?: string +} + +export interface Bridge { + runTurn(args: RunTurnArgs): Promise +} +``` + +- [ ] **Step 3: Write `src/index.ts`** + +```ts +export type { + CodingAgentKind, + SandboxSpec, + ExecRequest, + ExecHandle, + SandboxInstance, + SandboxProvider, + RecoveredSandbox, + RunTurnArgs, + RunTurnResult, + Bridge, +} from './types' +export { LocalDockerProvider } from './providers/local-docker' +export { StdioBridge } from './bridge/stdio-bridge' +``` + +(Step 3 references modules that don't exist yet; that's fine — tests in Phase 1 will create them. For the typecheck in Step 5 below, temporarily comment out the two `LocalDockerProvider`/`StdioBridge` re-exports until Phase 1 lands.) + +- [ ] **Step 4: Verify the package typechecks** + +``` +pnpm -C packages/coding-agents typecheck +``` + +Expect: clean. + +- [ ] **Step 5: Commit** + +``` +git add packages/coding-agents/src +git commit -m "feat(coding-agents): define core types" +``` + +--- + +## Phase 1 — Independent components (parallel, 3 agents) + +These three tasks touch disjoint files. Dispatch them in parallel. + +### Task 1.A — Dockerfile + entrypoint + +**Files:** + +- Create: `packages/coding-agents/docker/Dockerfile` +- Create: `packages/coding-agents/docker/entrypoint.sh` +- Create: `packages/coding-agents/test/support/build-image.ts` + +**Constraints / notes:** + +- Image must contain: `node` ≥ 22, `npm`, the official Claude CLI from npm, `git`, and `bash`. +- Claude is published as `@anthropic-ai/claude-code` on npm. Install with `npm install -g @anthropic-ai/claude-code`. The bin name is `claude`. +- Use `node:22-bookworm-slim` as the base — it's small enough and has glibc (musl on alpine breaks some npm postinstall scripts). +- The container's PID 1 must stay alive between `docker exec` invocations. Use `tail -f /dev/null`. +- Image tag for tests: `electric-ax/coding-agent-sandbox:test`. + +- [ ] **Step 1: Write `docker/Dockerfile`** + +```dockerfile +FROM node:22-bookworm-slim + +# Install OS deps: git (claude needs it), curl (claude installer occasionally probes), bash, ca-certs. +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + git \ + bash \ + tini \ + && rm -rf /var/lib/apt/lists/* + +# Non-root user for the agent. Claude's home is needed for ~/.claude transcript dir. +RUN useradd -m -s /bin/bash -u 1000 agent + +# Install the Claude CLI globally. Pin a recent version to avoid drift; can bump later. +# (Use the floating tag for now; pin in v1.) +RUN npm install -g @anthropic-ai/claude-code@latest \ + && claude --version + +# Workspace mount point. The provider attaches a volume here. +RUN mkdir -p /workspace \ + && chown agent:agent /workspace + +USER agent +WORKDIR /workspace + +COPY --chown=agent:agent docker/entrypoint.sh /home/agent/entrypoint.sh +RUN chmod +x /home/agent/entrypoint.sh + +ENTRYPOINT ["/usr/bin/tini", "--", "/home/agent/entrypoint.sh"] +``` + +- [ ] **Step 2: Write `docker/entrypoint.sh`** + +```bash +#!/usr/bin/env bash +set -euo pipefail +# PID 1 just stays alive so docker exec can attach. Real work is done via exec. +exec tail -f /dev/null +``` + +- [ ] **Step 3: Write `test/support/build-image.ts`** + +```ts +import { spawn } from 'node:child_process' +import { dirname, resolve } from 'node:path' +import { fileURLToPath } from 'node:url' + +const here = dirname(fileURLToPath(import.meta.url)) +const PACKAGE_ROOT = resolve(here, '../..') + +export const TEST_IMAGE_TAG = 'electric-ax/coding-agent-sandbox:test' + +/** + * Build the test image. Idempotent: re-runs are cheap if Docker layer cache is warm. + * Throws on non-zero exit. + */ +export async function buildTestImage(): Promise { + await new Promise((resolveBuild, rejectBuild) => { + const child = spawn( + 'docker', + ['build', '-t', TEST_IMAGE_TAG, '-f', 'docker/Dockerfile', '.'], + { cwd: PACKAGE_ROOT, stdio: 'inherit' } + ) + child.on('error', rejectBuild) + child.on('exit', (code) => { + if (code === 0) resolveBuild() + else rejectBuild(new Error(`docker build exited ${code}`)) + }) + }) +} +``` + +- [ ] **Step 4: Build the image to verify it works** + +``` +cd packages/coding-agents +docker build -t electric-ax/coding-agent-sandbox:test -f docker/Dockerfile . +``` + +Expect: succeeds; final layer reports `claude --version`. + +- [ ] **Step 5: Smoke-check Claude inside the container** + +``` +docker run --rm electric-ax/coding-agent-sandbox:test claude --version +``` + +Expect: prints the claude version (e.g. `2.1.116 (Claude Code)`). + +- [ ] **Step 6: Commit** + +``` +git add packages/coding-agents/docker packages/coding-agents/test/support +git commit -m "feat(coding-agents): add Dockerfile and image build helper" +``` + +--- + +### Task 1.B — `LocalDockerProvider` + +**Files:** + +- Create: `packages/coding-agents/src/providers/local-docker.ts` +- Create: `packages/coding-agents/test/unit/local-docker.test.ts` (smoke unit; integration coverage is Phase 2) + +**Constraints:** + +- Use Node `child_process.spawn` to drive the `docker` CLI. No `dockerode` dependency. +- `start()` is idempotent: if a container with `electric-ax.agent-id=` exists and is running, attach to it. +- Container labels: `electric-ax.agent-id=`, `electric-ax.kind=`, `electric-ax.workspace-name=`. +- Volumes: + - `volume`: ensures `coding-agent-workspace-` exists, mounts at `/workspace`. + - `bindMount`: mounts `realpath(hostPath)` at `/workspace`. +- Exec environment must merge `spec.env` so `ANTHROPIC_API_KEY` flows through. +- `exec` returns line-by-line async iterables and a `wait()` that resolves the exit code. + +- [ ] **Step 1: Write `src/providers/local-docker.ts`** + +```ts +import { spawn } from 'node:child_process' +import { realpath } from 'node:fs/promises' +import { createInterface } from 'node:readline' +import type { Readable, Writable } from 'node:stream' +import { log } from '../log' +import type { + ExecHandle, + ExecRequest, + RecoveredSandbox, + SandboxInstance, + SandboxProvider, + SandboxSpec, +} from '../types' + +const IMAGE = + process.env.CODING_AGENT_IMAGE ?? 'electric-ax/coding-agent-sandbox:test' + +export interface LocalDockerProviderOptions { + /** Override the image tag (default: env CODING_AGENT_IMAGE or test image). */ + image?: string +} + +export class LocalDockerProvider implements SandboxProvider { + readonly name = 'local-docker' + private readonly image: string + + constructor(opts: LocalDockerProviderOptions = {}) { + this.image = opts.image ?? IMAGE + } + + async start(spec: SandboxSpec): Promise { + const existing = await this.findContainerByAgentId(spec.agentId) + if (existing && existing.running) { + log.debug( + { agentId: spec.agentId, instanceId: existing.id }, + 'attaching to existing sandbox' + ) + return this.makeInstance(existing.id, spec) + } + if (existing && !existing.running) { + // Stale stopped container with same agentId. Remove it first. + await runDocker(['rm', '-f', existing.id]) + } + + const labels = [ + `electric-ax.agent-id=${spec.agentId}`, + `electric-ax.kind=${spec.kind}`, + `electric-ax.workspace-name=${ + spec.workspace.type === 'volume' ? spec.workspace.name : 'bind-mount' + }`, + ] + + const mount = await this.mountFlag(spec) + + const args = [ + 'run', + '-d', + '--rm=false', + ...labels.flatMap((l) => ['--label', l]), + mount, + this.image, + ] + + const { stdout } = await runDocker(args) + const instanceId = stdout.trim() + log.info({ agentId: spec.agentId, instanceId }, 'started sandbox') + return this.makeInstance(instanceId, spec) + } + + async stop(instanceId: string): Promise { + await runDocker(['stop', '-t', '5', instanceId]).catch((err) => { + log.warn( + { err, instanceId }, + 'docker stop failed (probably already stopped)' + ) + }) + await runDocker(['rm', '-f', instanceId]).catch(() => undefined) + } + + async destroy(agentId: string): Promise { + const c = await this.findContainerByAgentId(agentId) + if (c) await this.stop(c.id) + // Volume cleanup is intentionally NOT done in MVP — tests clean up explicitly. + } + + async status(agentId: string): Promise<'running' | 'stopped' | 'unknown'> { + const c = await this.findContainerByAgentId(agentId) + if (!c) return 'unknown' + return c.running ? 'running' : 'stopped' + } + + async recover(): Promise> { + const { stdout } = await runDocker([ + 'ps', + '-a', + '--format', + '{{.ID}}\t{{.Label "electric-ax.agent-id"}}\t{{.State}}', + '--filter', + 'label=electric-ax.agent-id', + ]) + return stdout + .trim() + .split('\n') + .filter(Boolean) + .map((line) => { + const [id, agentId, state] = line.split('\t') + return { + instanceId: id ?? '', + agentId: agentId ?? '', + status: state === 'running' ? 'running' : 'stopped', + } + }) + } + + // ── private helpers ── + + private async findContainerByAgentId( + agentId: string + ): Promise<{ id: string; running: boolean } | null> { + const { stdout } = await runDocker([ + 'ps', + '-a', + '--format', + '{{.ID}}\t{{.State}}', + '--filter', + `label=electric-ax.agent-id=${agentId}`, + ]) + const line = stdout + .trim() + .split('\n') + .find((l) => l.length > 0) + if (!line) return null + const [id, state] = line.split('\t') + return { id: id ?? '', running: state === 'running' } + } + + private async mountFlag(spec: SandboxSpec): Promise { + if (spec.workspace.type === 'volume') { + const volName = `coding-agent-workspace-${spec.workspace.name}` + // ensure the volume exists (docker auto-creates on first use, but explicit is friendlier) + await runDocker(['volume', 'create', volName]).catch(() => undefined) + return `--mount=type=volume,source=${volName},target=/workspace` + } + const real = await realpath(spec.workspace.hostPath) + return `--mount=type=bind,source=${real},target=/workspace` + } + + private makeInstance(instanceId: string, spec: SandboxSpec): SandboxInstance { + return { + instanceId, + agentId: spec.agentId, + workspaceMount: '/workspace', + exec: (args) => execInContainer(instanceId, args, spec.env), + } + } +} + +// ── docker CLI helpers ── + +async function runDocker( + args: ReadonlyArray +): Promise<{ stdout: string; stderr: string }> { + return new Promise((resolveCmd, rejectCmd) => { + const child = spawn('docker', args, { stdio: ['ignore', 'pipe', 'pipe'] }) + let stdout = '' + let stderr = '' + child.stdout.on('data', (d) => (stdout += d.toString())) + child.stderr.on('data', (d) => (stderr += d.toString())) + child.on('error', rejectCmd) + child.on('exit', (code) => { + if (code === 0) resolveCmd({ stdout, stderr }) + else + rejectCmd( + new Error(`docker ${args.join(' ')} exited ${code}: ${stderr}`) + ) + }) + }) +} + +function lineIterator(stream: Readable): AsyncIterable { + const rl = createInterface({ input: stream, crlfDelay: Infinity }) + return rl as unknown as AsyncIterable +} + +async function execInContainer( + containerId: string, + req: ExecRequest, + baseEnv: Record +): Promise { + const env = { ...baseEnv, ...(req.env ?? {}) } + const args: Array = ['exec', '-i'] + if (req.cwd) args.push('-w', req.cwd) + for (const [k, v] of Object.entries(env)) args.push('-e', `${k}=${v}`) + args.push(containerId, ...req.cmd) + + const child = spawn('docker', args, { + stdio: [req.stdin === 'pipe' ? 'pipe' : 'ignore', 'pipe', 'pipe'], + }) + + let exitCode: number | null = null + const exitPromise = new Promise<{ exitCode: number }>( + (resolveWait, rejectWait) => { + child.on('error', rejectWait) + child.on('exit', (code) => { + exitCode = code ?? -1 + resolveWait({ exitCode }) + }) + } + ) + + const stdinStream = child.stdin as Writable | null + + return { + stdout: lineIterator(child.stdout!), + stderr: lineIterator(child.stderr!), + writeStdin: stdinStream + ? async (chunk) => { + await new Promise((res, rej) => { + stdinStream.write(chunk, (err) => (err ? rej(err) : res())) + }) + } + : undefined, + closeStdin: stdinStream + ? async () => { + await new Promise((res) => { + stdinStream.end(res) + }) + } + : undefined, + wait: () => exitPromise, + kill: (signal = 'SIGTERM') => { + try { + child.kill(signal) + } catch { + // already dead + } + }, + } +} +``` + +- [ ] **Step 2: Write `test/unit/local-docker.test.ts`** — minimal type-only smoke + +```ts +import { describe, it, expect } from 'vitest' +import { LocalDockerProvider } from '../../src/providers/local-docker' + +describe('LocalDockerProvider construction', () => { + it('exposes name "local-docker"', () => { + const p = new LocalDockerProvider() + expect(p.name).toBe('local-docker') + }) +}) +``` + +- [ ] **Step 3: Run `pnpm -C packages/coding-agents test test/unit/local-docker.test.ts`** + +Expect: PASS. + +- [ ] **Step 4: Commit** + +``` +git add packages/coding-agents/src/providers packages/coding-agents/test/unit/local-docker.test.ts +git commit -m "feat(coding-agents): add LocalDockerProvider" +``` + +--- + +### Task 1.C — `StdioBridge` + +**Files:** + +- Create: `packages/coding-agents/src/bridge/stdio-bridge.ts` +- Create: `packages/coding-agents/test/unit/stdio-bridge.test.ts` + +**Constraints / claude CLI conventions (verified against `claude --help`):** + +- Required flags for streaming JSONL output: `--print --output-format=stream-json --verbose`. The `--verbose` flag is required when combining `--print` with `--output-format=stream-json`. +- `--input-format=stream-json` is for streaming JSON _input_; we just want to send a single prompt, so we either pipe the prompt on stdin (default text input) or pass it on argv. Pipe on stdin to mirror existing patterns. +- `--dangerously-skip-permissions` — required for non-interactive autonomous runs. +- `--model ` — pass `'claude-haiku-4-5-20251001'` for cheap test runs. +- Resume: `--resume ` — out of scope for MVP; bridge ignores `nativeSessionId` for now (logs a warning if set). + +**Event normalization:** + +- `agent-session-protocol` exports `normalize(lines: string[], agent: 'claude'): NormalizedEvent[]`. Use it on each accumulated batch — but we want to emit events per line. The library also ships line-level normalization functions; if they're not directly exposed, we batch internally and call `normalize(batch, 'claude')` on each new line and emit only the events we haven't emitted yet. +- Cleanest first-pass: collect all stdout lines into a buffer, call `normalize(buf, 'claude')` once at end, emit. Streaming-during-turn is a v2 optimization. The smoke test only asserts events are present, not real-time-ness, so batch-at-end is fine for MVP. + +- [ ] **Step 1: Write `src/bridge/stdio-bridge.ts`** + +```ts +import { normalize } from 'agent-session-protocol' +import type { NormalizedEvent } from 'agent-session-protocol' +import { log } from '../log' +import type { Bridge, RunTurnArgs, RunTurnResult } from '../types' + +export class StdioBridge implements Bridge { + async runTurn(args: RunTurnArgs): Promise { + if (args.kind !== 'claude') { + throw new Error( + `StdioBridge MVP supports only 'claude', got '${args.kind}'` + ) + } + if (args.nativeSessionId) { + log.warn( + { nativeSessionId: args.nativeSessionId }, + 'StdioBridge MVP does not implement resume — running fresh turn' + ) + } + + const cliArgs: Array = [ + '--print', + '--output-format=stream-json', + '--verbose', + '--dangerously-skip-permissions', + ] + if (args.model) cliArgs.push('--model', args.model) + + const handle = await args.sandbox.exec({ + cmd: ['claude', ...cliArgs], + cwd: args.sandbox.workspaceMount, + stdin: 'pipe', + }) + + // Pipe prompt on stdin, then close. + if (!handle.writeStdin || !handle.closeStdin) { + throw new Error( + 'StdioBridge requires stdin pipe but ExecHandle lacks one' + ) + } + await handle.writeStdin(args.prompt) + await handle.closeStdin() + + const rawLines: Array = [] + const stderrLines: Array = [] + + const drainStderr = async () => { + for await (const line of handle.stderr) { + stderrLines.push(line) + } + } + const drainStdout = async () => { + for await (const line of handle.stdout) { + if (!line) continue + rawLines.push(line) + if (args.onNativeLine) args.onNativeLine(line) + } + } + + await Promise.all([drainStdout(), drainStderr()]) + const exitInfo = await handle.wait() + + if (exitInfo.exitCode !== 0) { + const stderrPreview = stderrLines.join('\n').slice(0, 800) || '' + throw new Error( + `claude CLI exited ${exitInfo.exitCode}. stderr=${stderrPreview}` + ) + } + + let events: Array = [] + try { + events = normalize(rawLines, 'claude') + } catch (err) { + log.error({ err, sample: rawLines.slice(0, 3) }, 'normalize failed') + throw err + } + + for (const e of events) args.onEvent(e) + + const sessionInit = events.find((e) => e.type === 'session_init') + const lastAssistant = [...events] + .reverse() + .find((e) => e.type === 'assistant_message') + + return { + nativeSessionId: + sessionInit && 'sessionId' in sessionInit + ? (sessionInit as { sessionId?: string }).sessionId + : undefined, + exitCode: exitInfo.exitCode, + finalText: + lastAssistant && 'text' in lastAssistant + ? (lastAssistant as { text?: string }).text + : undefined, + } + } +} +``` + +- [ ] **Step 2: Write `test/unit/stdio-bridge.test.ts`** + +```ts +import { describe, expect, it } from 'vitest' +import { StdioBridge } from '../../src/bridge/stdio-bridge' +import type { ExecHandle, ExecRequest, SandboxInstance } from '../../src/types' + +function fakeSandbox(opts: { + stdoutLines: Array + stderrLines?: Array + exitCode?: number + onCmd?: (cmd: ReadonlyArray) => void + onStdin?: (chunk: string) => void +}): SandboxInstance { + return { + instanceId: 'fake', + agentId: '/x/coding-agent/y', + workspaceMount: '/workspace', + async exec(req: ExecRequest): Promise { + opts.onCmd?.(req.cmd) + const stdoutLines = opts.stdoutLines.slice() + const stderrLines = (opts.stderrLines ?? []).slice() + let stdinBuf = '' + return { + stdout: (async function* () { + for (const l of stdoutLines) yield l + })(), + stderr: (async function* () { + for (const l of stderrLines) yield l + })(), + writeStdin: async (chunk) => { + stdinBuf += chunk + opts.onStdin?.(chunk) + }, + closeStdin: async () => undefined, + wait: async () => ({ exitCode: opts.exitCode ?? 0 }), + kill: () => undefined, + } + }, + } +} + +describe('StdioBridge', () => { + it('rejects non-claude kinds', async () => { + const b = new StdioBridge() + await expect( + b.runTurn({ + sandbox: fakeSandbox({ stdoutLines: [] }), + kind: 'codex' as 'claude', + prompt: 'x', + onEvent: () => undefined, + }) + ).rejects.toThrow(/MVP supports only 'claude'/) + }) + + it('passes the prompt through stdin and runs the right CLI args', async () => { + let cmd: ReadonlyArray = [] + let stdin = '' + const b = new StdioBridge() + await b.runTurn({ + sandbox: fakeSandbox({ + stdoutLines: ['{"type":"system","subtype":"init","session_id":"abc"}'], + onCmd: (c) => (cmd = c), + onStdin: (s) => (stdin = s), + }), + kind: 'claude', + prompt: 'hello world', + model: 'claude-haiku-4-5-20251001', + onEvent: () => undefined, + }) + expect(cmd[0]).toBe('claude') + expect(cmd).toContain('--print') + expect(cmd).toContain('--output-format=stream-json') + expect(cmd).toContain('--verbose') + expect(cmd).toContain('--dangerously-skip-permissions') + expect(cmd).toContain('--model') + expect(cmd).toContain('claude-haiku-4-5-20251001') + expect(stdin).toBe('hello world') + }) + + it('throws with stderr when CLI exits non-zero', async () => { + const b = new StdioBridge() + await expect( + b.runTurn({ + sandbox: fakeSandbox({ + stdoutLines: [], + stderrLines: ['fatal: bad thing'], + exitCode: 1, + }), + kind: 'claude', + prompt: 'x', + onEvent: () => undefined, + }) + ).rejects.toThrow(/claude CLI exited 1.*fatal: bad thing/) + }) +}) +``` + +(Note: the test that depends on real `agent-session-protocol` normalization of synthetic JSONL is omitted — the integration smoke test in Phase 2 covers that path with real CLI output.) + +- [ ] **Step 3: Run `pnpm -C packages/coding-agents test test/unit/stdio-bridge.test.ts`** + +Expect: PASS. + +- [ ] **Step 4: Commit** + +``` +git add packages/coding-agents/src/bridge packages/coding-agents/test/unit/stdio-bridge.test.ts +git commit -m "feat(coding-agents): add StdioBridge" +``` + +--- + +## Phase 2 — Integration smoke (sequential) + +### Task 2.1 — End-to-end smoke test + +**Files:** + +- Create: `packages/coding-agents/test/support/env.ts` +- Create: `packages/coding-agents/test/integration/smoke.test.ts` + +**Validation goal:** + +1. Build the test image. +2. `LocalDockerProvider.start()` a sandbox with a per-test volume and `ANTHROPIC_API_KEY` from the env file. +3. `StdioBridge.runTurn()` runs `claude --print` inside, with prompt `"Reply with the single word: ok"`. +4. Assert: at least one `session_init` event and at least one `assistant_message` event were captured. +5. Cleanup: `provider.destroy(agentId)` removes the container. + +- [ ] **Step 1: Write `test/support/env.ts`** + +```ts +import { readFileSync } from 'node:fs' + +const KEY_FILE = '/tmp/.electric-coding-agents-env' + +export interface TestEnv { + ANTHROPIC_API_KEY: string + ANTHROPIC_MODEL: string +} + +let cached: TestEnv | null = null + +export function loadTestEnv(): TestEnv { + if (cached) return cached + let raw: string + try { + raw = readFileSync(KEY_FILE, 'utf-8') + } catch (e) { + throw new Error( + `Integration tests require ${KEY_FILE} (mode 600) with ANTHROPIC_API_KEY=… and ANTHROPIC_MODEL=…` + ) + } + const out: Partial = {} + for (const line of raw.split('\n')) { + const trimmed = line.trim() + if (!trimmed || trimmed.startsWith('#')) continue + const eq = trimmed.indexOf('=') + if (eq < 0) continue + const k = trimmed.slice(0, eq) + const v = trimmed.slice(eq + 1) + if (k === 'ANTHROPIC_API_KEY' || k === 'ANTHROPIC_MODEL') out[k] = v + } + if (!out.ANTHROPIC_API_KEY) { + throw new Error(`${KEY_FILE} must contain ANTHROPIC_API_KEY=…`) + } + cached = { + ANTHROPIC_API_KEY: out.ANTHROPIC_API_KEY, + ANTHROPIC_MODEL: out.ANTHROPIC_MODEL ?? 'claude-haiku-4-5-20251001', + } + return cached +} +``` + +- [ ] **Step 2: Write `test/integration/smoke.test.ts`** + +```ts +import { describe, expect, beforeAll, afterAll, it } from 'vitest' +import type { NormalizedEvent } from 'agent-session-protocol' +import { LocalDockerProvider } from '../../src/providers/local-docker' +import { StdioBridge } from '../../src/bridge/stdio-bridge' +import { buildTestImage, TEST_IMAGE_TAG } from '../support/build-image' +import { loadTestEnv } from '../support/env' + +const SHOULD_RUN = process.env.DOCKER === '1' +const describeMaybe = SHOULD_RUN ? describe : describe.skip + +describeMaybe('coding-agents smoke (real Docker + real Claude)', () => { + const provider = new LocalDockerProvider({ image: TEST_IMAGE_TAG }) + const bridge = new StdioBridge() + const agentId = `/test/coding-agent/${Date.now().toString(36)}` + const events: Array = [] + + beforeAll(async () => { + await buildTestImage() + }, 600_000) + + afterAll(async () => { + await provider.destroy(agentId).catch(() => undefined) + }) + + it('starts a sandbox, runs claude, captures session_init + assistant_message', async () => { + const env = loadTestEnv() + const sandbox = await provider.start({ + agentId, + kind: 'claude', + workspace: { type: 'volume', name: agentId.replace(/[^a-z0-9-]/gi, '-') }, + env: { ANTHROPIC_API_KEY: env.ANTHROPIC_API_KEY }, + }) + + const result = await bridge.runTurn({ + sandbox, + kind: 'claude', + prompt: 'Reply with the single word: ok', + model: env.ANTHROPIC_MODEL, + onEvent: (e) => events.push(e), + }) + + expect(result.exitCode).toBe(0) + expect(events.find((e) => e.type === 'session_init')).toBeTruthy() + expect(events.find((e) => e.type === 'assistant_message')).toBeTruthy() + // sanity: response text isn't empty + expect(result.finalText && result.finalText.length > 0).toBe(true) + }, 180_000) +}) +``` + +- [ ] **Step 3: Run the smoke test** + +``` +DOCKER=1 pnpm -C packages/coding-agents test:integration +``` + +Expect: PASS within ~3 minutes (image build + claude invocation). + +If it fails, **iterate** (Phase 3): inspect output, adjust the bridge / dockerfile / provider, re-run. Maximum 5 iterations before declaring blocked and writing the report. + +- [ ] **Step 4: Commit** + +``` +git add packages/coding-agents/test/support/env.ts packages/coding-agents/test/integration +git commit -m "test(coding-agents): integration smoke against real Docker + Claude" +``` + +--- + +## Phase 3 — Iteration (when smoke fails) + +For each failure, follow this protocol (max 5 cycles): + +1. Capture full failure output. +2. Hypothesize 1-3 likely causes (e.g., wrong claude flags, missing env, container exits early). +3. Pick the highest-likelihood fix; apply it. +4. Re-run smoke. +5. If still failing, document in the report (Phase 4) and try the next hypothesis. + +Common failure modes to anticipate: + +- **`claude: not found`** → image install path issue. Check `which claude` inside the container; ensure the npm global bin is in PATH. +- **`ANTHROPIC_API_KEY not set`** → env not piped through `docker exec -e`. Verify `LocalDockerProvider.execInContainer` is forwarding the env. +- **`--verbose required with --output-format=stream-json`** → already accounted for, but if claude version drifts the message may differ. +- **Empty stdout** → Claude may be writing JSON only when it has the API key valid. Check stderr. +- **`normalize` throws** → a line is not valid JSON. Filter empty/non-JSON lines before passing. +- **Container exits before exec lands** → `tini` + `tail -f /dev/null` should keep it alive. Add `docker logs ` debug. +- **Permission errors on volume** → ensure `chown agent:agent /workspace` in Dockerfile. + +After a passing run, even if some flakiness was observed, treat first green as success and proceed to Phase 4. + +If 5 cycles pass without green, **stop** and write the report describing the blocker. + +--- + +## Phase 4 — Report + +### Task 4.1 — Write report + +**File:** `docs/superpowers/specs/notes/2026-04-30-coding-agents-mvp-report.md` + +- [ ] **Step 1: Write report markdown** + +Include: + +- Goal & validation bar. +- What worked: tasks/phases that landed cleanly on first try. +- What broke: each bug, hypothesis, fix attempt, outcome. +- Token usage / time on wall clock if observable. +- Open questions for the next iteration. +- Recommended next steps to extend the MVP toward the full spec. + +- [ ] **Step 2: Commit** + +``` +git add docs/superpowers/specs/notes/2026-04-30-coding-agents-mvp-report.md +git commit -m "docs(coding-agents): MVP run report" +``` + +--- + +## Self-review checklist (post-write) + +- [x] **Spec coverage:** Plan covers a subset of the full spec — explicitly scoped down to "claude in docker via Provider + Bridge". The full spec sections this MVP defers to follow-on plans: + - LifecycleManager, workspace registry / lease, runtime API surface, built-in entity, UI updates, codex support, resume flow, conformance suite, removal of `coder` entity. All listed under "Spec scope cuts". +- [x] **Placeholder scan:** No TBDs / TODOs / "appropriate handling" in the steps. +- [x] **Type consistency:** `RunTurnArgs.kind`, `RunTurnArgs.model`, `RunTurnArgs.onEvent`, `RunTurnArgs.onNativeLine` consistent across `types.ts`, `stdio-bridge.ts`, and the smoke test. +- [x] **Approval:** Pre-approved per user instruction ("approve everything"). Proceeding to dispatch. diff --git a/docs/superpowers/plans/2026-04-30-coding-agents-slice-a.md b/docs/superpowers/plans/2026-04-30-coding-agents-slice-a.md new file mode 100644 index 0000000000..3c97bec700 --- /dev/null +++ b/docs/superpowers/plans/2026-04-30-coding-agents-slice-a.md @@ -0,0 +1,2709 @@ +# Coding Agents — Slice A Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Wire the existing `LocalDockerProvider` + `StdioBridge` (from the MVP) into a first-class platform primitive: a built-in `coding-agent` entity, a `LifecycleManager`, a `WorkspaceRegistry`, and the typed `ctx.spawnCodingAgent` / `ctx.observeCodingAgent` API on `HandlerContext`. Validation bar: an integration test that spawns a `coding-agent` from a parent test entity, awaits a `runFinished` wake with the response text, exercises pin/release/idle hibernation, lease-serializes two agents on a shared workspace, simulates server crash mid-turn and asserts orphan reconciliation. + +**Architecture:** New code lives in `@electric-ax/coding-agents/src/{lifecycle-manager.ts, workspace-registry.ts, entity/*}`. The runtime gets typed wrappers (`ctx.spawnCodingAgent` / `ctx.observeCodingAgent`) that desugar to `ctx.spawn('coding-agent', ...)` / `ctx.observe(...)`. The entity handler closes over the LM + WR; collection access uses the StreamDB pattern (`ctx.db.collections.X.get`, `ctx.db.actions.X_insert/X_update`). Server bootstrap (`packages/agents/src/bootstrap.ts`) adds `registerCodingAgent(registry, { provider, bridge })` next to `registerCodingSession(registry)`. Legacy `coder` entity coexists. + +**Spec divergences (resolved from spec's Open Questions section):** + +- **No `onBoot` registry hook.** The runtime's `EntityRegistry.define()` has no `onBoot` parameter. We don't add one in Slice A. Instead: first-wake init in the handler seeds `sessionMeta`, and the LM/WR rebuild lazily on first handler invocation (gated by an idempotent in-process flag). Reduces runtime surface area; no behavior loss for Slice A. +- **No `ctx.deleteEntityStream`.** `destroy()` becomes "stop sandbox + drop workspace ref + set `sessionMeta.status='destroyed'` + future inbox messages return early". The entity stream stays as a tombstone. Durable cleanup is Slice B. +- **`workspace.sharedRefs` from a client `ctx`.** Server-only state. Client handles return `sharedRefs: 1`. Documented in `state()` JSDoc. + +**Tech Stack:** TypeScript, Vitest, Node `child_process`, Docker, `agent-session-protocol@0.0.2`, `zod` (collection + inbox schemas). + +**Reference spec:** `docs/superpowers/specs/2026-04-30-coding-agents-slice-a-design.md` + +--- + +## File Structure + +``` +packages/coding-agents/ ← extend existing package +├── src/ +│ ├── index.ts ← +exports for new types and registerCodingAgent +│ ├── types.ts ← +SpawnCodingAgentOptions, CodingAgentStatus, RunSummary +│ ├── lifecycle-manager.ts ← NEW +│ ├── workspace-registry.ts ← NEW +│ ├── entity/ +│ │ ├── collections.ts ← NEW: schemas + wire constants +│ │ ├── messages.ts ← NEW: inbox message schemas +│ │ ├── handler.ts ← NEW: the entity handler +│ │ └── register.ts ← NEW: registerCodingAgent +│ ├── providers/local-docker.ts ← (existing, no changes for Slice A) +│ ├── bridge/stdio-bridge.ts ← (existing, no changes) +│ └── log.ts ← (existing) +└── test/ + ├── unit/ + │ ├── workspace-registry.test.ts ← NEW + │ ├── lifecycle-manager.test.ts ← NEW + │ ├── entity-handler.test.ts ← NEW + │ ├── local-docker.test.ts ← (existing) + │ └── stdio-bridge.test.ts ← (existing) + └── integration/ + ├── slice-a.test.ts ← NEW + ├── smoke.test.ts ← (existing) + └── support/ + ├── build-image.ts ← (existing) + └── env.ts ← (existing) + +packages/agents-runtime/ +└── src/ + ├── types.ts ← +SpawnCodingAgentOptions, CodingAgentHandle, HandlerContext.spawnCodingAgent / observeCodingAgent + └── context-factory.ts ← +spawnCodingAgent / observeCodingAgent impls + +packages/agents/ +└── src/bootstrap.ts ← +registerCodingAgent call + +docs/superpowers/specs/notes/ +└── 2026-04-30-coding-agents-slice-a-report.md ← NEW (Phase 5) +``` + +--- + +## Phase Plan + +| Phase | Tasks | Parallelism | Depends on | +| ----- | ------------- | ------------------------------- | ---------- | +| 0 | 0.1, 0.2 | sequential | — | +| 1 | 1.A, 1.B | parallel (2 independent agents) | Phase 0 | +| 2 | 2.1, 2.2, 2.3 | sequential | Phase 1 | +| 3 | 3.1 | sequential | Phase 2 | +| 4 | 4.1 | sequential | Phase 3 | +| 5 | 5.1 (report) | sequential | Phase 4 | + +Total tasks: 8 (excluding report). Estimated wall time per task: 10-30 min. + +--- + +## Phase 0 — Foundation (sequential) + +### Task 0.1 — Wire constants, collection schemas, inbox schemas + +**Files:** + +- Create: `packages/coding-agents/src/entity/collections.ts` +- Create: `packages/coding-agents/src/entity/messages.ts` + +- [ ] **Step 1: Write `src/entity/collections.ts`** + +```ts +import { z } from 'zod' + +export const CODING_AGENT_SESSION_META_COLLECTION_TYPE = + 'coding-agent.sessionMeta' +export const CODING_AGENT_RUNS_COLLECTION_TYPE = 'coding-agent.runs' +export const CODING_AGENT_EVENTS_COLLECTION_TYPE = 'coding-agent.events' +export const CODING_AGENT_LIFECYCLE_COLLECTION_TYPE = 'coding-agent.lifecycle' + +export const codingAgentStatusSchema = z.enum([ + 'cold', + 'starting', + 'idle', + 'running', + 'stopping', + 'error', + 'destroyed', +]) +export type CodingAgentStatus = z.infer + +export const sessionMetaRowSchema = z.object({ + key: z.literal('current'), + status: codingAgentStatusSchema, + kind: z.enum(['claude']), + pinned: z.boolean(), + workspaceIdentity: z.string(), + workspaceSpec: z.discriminatedUnion('type', [ + z.object({ + type: z.literal('volume'), + name: z.string(), + }), + z.object({ + type: z.literal('bindMount'), + hostPath: z.string(), + }), + ]), + idleTimeoutMs: z.number(), + keepWarm: z.boolean(), + instanceId: z.string().optional(), + lastError: z.string().optional(), + currentPromptInboxKey: z.string().optional(), +}) +export type SessionMetaRow = z.infer + +export const runRowSchema = z.object({ + key: z.string(), + startedAt: z.number(), + endedAt: z.number().optional(), + status: z.enum(['running', 'completed', 'failed']), + finishReason: z.string().optional(), + promptInboxKey: z.string(), + responseText: z.string().optional(), +}) +export type RunRow = z.infer + +export const eventRowSchema = z.object({ + key: z.string(), + runId: z.string(), + seq: z.number(), + ts: z.number(), + type: z.string(), + payload: z.looseObject({}), +}) +export type EventRow = z.infer + +export const lifecycleRowSchema = z.object({ + key: z.string(), + ts: z.number(), + event: z.enum([ + 'sandbox.starting', + 'sandbox.started', + 'sandbox.stopped', + 'sandbox.failed', + 'pin', + 'release', + 'orphan.detected', + ]), + detail: z.string().optional(), +}) +export type LifecycleRow = z.infer +``` + +- [ ] **Step 2: Write `src/entity/messages.ts`** + +```ts +import { z } from 'zod' + +export const promptMessageSchema = z.object({ + text: z.string(), +}) +export const pinMessageSchema = z.object({}).strict() +export const releaseMessageSchema = z.object({}).strict() +export const stopMessageSchema = z.object({}).strict() +export const destroyMessageSchema = z.object({}).strict() + +export type PromptMessage = z.infer +``` + +- [ ] **Step 3: Verify typecheck** + +``` +pnpm -C packages/coding-agents typecheck +``` + +Expect: clean. + +- [ ] **Step 4: Commit** + +``` +git add packages/coding-agents/src/entity +git commit -m "feat(coding-agents): collection + inbox message schemas for coding-agent entity" +``` + +--- + +### Task 0.2 — Public types extension + +**Files:** + +- Modify: `packages/coding-agents/src/types.ts` + +- [ ] **Step 1: Append to `src/types.ts`** + +Add after the existing types: + +```ts +import type { CodingAgentStatus } from './entity/collections' + +// ─── Slice A: SpawnCodingAgentOptions / RunSummary ────────────────────────── + +export interface SpawnCodingAgentOptions { + /** Stable id, scoped to the spawning entity. */ + id: string + /** Slice A: 'claude' only. */ + kind: 'claude' + /** + * Workspace mount. Identity is the lease key. + * { type: 'volume', name: 'foo' } → 'volume:foo' + * { type: 'volume' } → 'volume:' + * { type: 'bindMount', hostPath: P } → 'bindMount:' + */ + workspace: + | { type: 'volume'; name?: string } + | { type: 'bindMount'; hostPath: string } + /** Initial prompt; queued before the first wake. */ + initialPrompt?: string + /** Slice A: 'runFinished' only. */ + wake?: { on: 'runFinished'; includeResponse?: boolean } + /** Lifecycle overrides. */ + lifecycle?: { idleTimeoutMs?: number; keepWarm?: boolean } +} + +export interface RunSummary { + runId: string + startedAt: number + endedAt?: number + status: 'running' | 'completed' | 'failed' + promptInboxKey: string + responseText?: string +} + +export type { CodingAgentStatus } + +/** Defaults applied when a SpawnCodingAgentOptions field is omitted. */ +export const SLICE_A_DEFAULTS = { + idleTimeoutMs: 5 * 60_000, + coldBootBudgetMs: 30_000, + runTimeoutMs: 30 * 60_000, + keepWarm: false, +} as const +``` + +- [ ] **Step 2: Verify typecheck** + +``` +pnpm -C packages/coding-agents typecheck +``` + +Expect: clean. + +- [ ] **Step 3: Commit** + +``` +git add packages/coding-agents/src/types.ts +git commit -m "feat(coding-agents): add SpawnCodingAgentOptions, RunSummary, defaults" +``` + +--- + +## Phase 1 — Pure components (parallel, 2 agents) + +These two tasks touch disjoint files. Dispatch in parallel. + +### Task 1.A — `WorkspaceRegistry` + +**Files:** + +- Create: `packages/coding-agents/src/workspace-registry.ts` +- Create: `packages/coding-agents/test/unit/workspace-registry.test.ts` + +- [ ] **Step 1: Write the failing test first** + +```ts +// test/unit/workspace-registry.test.ts +import { describe, it, expect } from 'vitest' +import { WorkspaceRegistry } from '../../src/workspace-registry' + +describe('WorkspaceRegistry.resolveIdentity', () => { + it('resolves volume:name when name is provided', async () => { + const r = await WorkspaceRegistry.resolveIdentity('/p/coding-agent/x', { + type: 'volume', + name: 'foo', + }) + expect(r.identity).toBe('volume:foo') + expect(r.resolved).toEqual({ type: 'volume', name: 'foo' }) + }) + + it('resolves volume: when name is omitted', async () => { + const r = await WorkspaceRegistry.resolveIdentity('/p/coding-agent/x', { + type: 'volume', + }) + expect(r.identity).toBe('volume:/p/coding-agent/x') + expect(r.resolved).toEqual({ type: 'volume', name: '/p/coding-agent/x' }) + }) + + it('resolves bindMount: for bind mounts', async () => { + const r = await WorkspaceRegistry.resolveIdentity('/p/coding-agent/x', { + type: 'bindMount', + hostPath: '/tmp', + }) + expect(r.identity).toMatch(/^bindMount:\/(private\/)?tmp$/) + }) +}) + +describe('WorkspaceRegistry refcount', () => { + it('tracks refs across register/release', () => { + const wr = new WorkspaceRegistry() + expect(wr.refs('volume:foo')).toBe(0) + wr.register('volume:foo', 'a') + wr.register('volume:foo', 'b') + expect(wr.refs('volume:foo')).toBe(2) + wr.release('volume:foo', 'a') + expect(wr.refs('volume:foo')).toBe(1) + wr.release('volume:foo', 'a') // double-release is idempotent + expect(wr.refs('volume:foo')).toBe(1) + wr.release('volume:foo', 'b') + expect(wr.refs('volume:foo')).toBe(0) + }) +}) + +describe('WorkspaceRegistry mutex', () => { + it('serializes acquire calls per identity', async () => { + const wr = new WorkspaceRegistry() + const order: Array = [] + const a = wr.acquire('volume:foo').then((release) => { + order.push('a-acquired') + return new Promise((res) => + setTimeout(() => { + order.push('a-release') + release() + res() + }, 50) + ) + }) + // Make sure b queues behind a + await new Promise((r) => setTimeout(r, 5)) + const b = wr.acquire('volume:foo').then((release) => { + order.push('b-acquired') + release() + }) + await Promise.all([a, b]) + expect(order).toEqual(['a-acquired', 'a-release', 'b-acquired']) + }) + + it('does not serialize across distinct identities', async () => { + const wr = new WorkspaceRegistry() + const order: Array = [] + const a = wr.acquire('volume:foo').then((release) => { + order.push('a-acq') + return new Promise((res) => + setTimeout(() => { + release() + res() + }, 50) + ) + }) + const b = wr.acquire('volume:bar').then((release) => { + order.push('b-acq') + release() + }) + await Promise.all([a, b]) + // b runs before a finishes + expect(order[0]).toBe('a-acq') + expect(order[1]).toBe('b-acq') + }) +}) + +describe('WorkspaceRegistry.rebuild', () => { + it('replays a snapshot from durable state', () => { + const wr = new WorkspaceRegistry() + wr.rebuild([ + { identity: 'volume:foo', agentId: 'a' }, + { identity: 'volume:foo', agentId: 'b' }, + { identity: 'volume:bar', agentId: 'c' }, + ]) + expect(wr.refs('volume:foo')).toBe(2) + expect(wr.refs('volume:bar')).toBe(1) + }) +}) +``` + +- [ ] **Step 2: Run the test to verify it fails** + +``` +pnpm -C packages/coding-agents test test/unit/workspace-registry.test.ts +``` + +Expect: FAIL with module-not-found on `../../src/workspace-registry`. + +- [ ] **Step 3: Write `src/workspace-registry.ts`** + +```ts +import { realpath } from 'node:fs/promises' + +export type ResolvedWorkspaceSpec = + | { type: 'volume'; name: string } + | { type: 'bindMount'; hostPath: string } + +export class WorkspaceRegistry { + private readonly refsByIdentity = new Map>() + private readonly chainByIdentity = new Map>() + + static async resolveIdentity( + agentId: string, + spec: + | { type: 'volume'; name?: string } + | { type: 'bindMount'; hostPath: string } + ): Promise<{ identity: string; resolved: ResolvedWorkspaceSpec }> { + if (spec.type === 'volume') { + const name = spec.name ?? agentId + return { + identity: `volume:${name}`, + resolved: { type: 'volume', name }, + } + } + const real = await realpath(spec.hostPath) + return { + identity: `bindMount:${real}`, + resolved: { type: 'bindMount', hostPath: real }, + } + } + + register(identity: string, agentId: string): void { + let set = this.refsByIdentity.get(identity) + if (!set) { + set = new Set() + this.refsByIdentity.set(identity, set) + } + set.add(agentId) + } + + release(identity: string, agentId: string): void { + const set = this.refsByIdentity.get(identity) + if (!set) return + set.delete(agentId) + if (set.size === 0) this.refsByIdentity.delete(identity) + } + + refs(identity: string): number { + return this.refsByIdentity.get(identity)?.size ?? 0 + } + + /** + * Acquire the per-identity mutex. Returns a release fn. + * The mutex chains promises: each acquire waits for the prior chain to settle. + */ + acquire(identity: string): Promise<() => void> { + const prior = this.chainByIdentity.get(identity) ?? Promise.resolve() + let releaseFn: () => void + const next = new Promise((res) => { + releaseFn = res + }) + this.chainByIdentity.set( + identity, + prior.then(() => next) + ) + return prior.then(() => releaseFn!) + } + + rebuild(snapshots: Array<{ identity: string; agentId: string }>): void { + this.refsByIdentity.clear() + this.chainByIdentity.clear() + for (const { identity, agentId } of snapshots) { + this.register(identity, agentId) + } + } +} +``` + +- [ ] **Step 4: Run the test, verify it passes** + +``` +pnpm -C packages/coding-agents test test/unit/workspace-registry.test.ts +``` + +Expect: PASS. + +- [ ] **Step 5: Commit** + +``` +git add packages/coding-agents/src/workspace-registry.ts packages/coding-agents/test/unit/workspace-registry.test.ts +git commit -m "feat(coding-agents): WorkspaceRegistry with identity resolution, refcount, mutex" +``` + +--- + +### Task 1.B — `LifecycleManager` + +**Files:** + +- Create: `packages/coding-agents/src/lifecycle-manager.ts` +- Create: `packages/coding-agents/test/unit/lifecycle-manager.test.ts` + +**Constraints:** + +- LM is constructed with `{ provider, bridge }`. +- LM exposes: `ensureRunning(spec)`, `stop(agentId)`, `destroy(agentId)`, `armIdleTimer(agentId, ms, onFire)`, `cancelIdleTimer(agentId)`, `pin(agentId)`, `release(agentId)`, `pinCount(agentId)`, `resetPinCount(agentId)`, `adoptRunningContainers()`. +- LM exposes `startedAtMs: number` (captured in constructor). +- Idle timer is a `Map`. Pin count is `Map`. +- Pin count semantics: `pin` increments and cancels active idle timer; `release` decrements (clamped at 0). + +- [ ] **Step 1: Write the failing test** + +```ts +// test/unit/lifecycle-manager.test.ts +import { describe, it, expect, vi } from 'vitest' +import { LifecycleManager } from '../../src/lifecycle-manager' +import type { + Bridge, + ExecHandle, + ExecRequest, + RecoveredSandbox, + RunTurnArgs, + RunTurnResult, + SandboxInstance, + SandboxProvider, + SandboxSpec, +} from '../../src/types' + +function fakeProvider(): SandboxProvider & { + starts: Array + stops: Array +} { + const stub: SandboxInstance = { + instanceId: 'inst-1', + agentId: '', + workspaceMount: '/workspace', + async exec(_req: ExecRequest): Promise { + throw new Error('not used') + }, + } + const fp: any = { + name: 'fake', + starts: [] as Array, + stops: [] as Array, + async start(spec: SandboxSpec): Promise { + fp.starts.push(spec) + return { ...stub, agentId: spec.agentId } + }, + async stop(instanceId: string): Promise { + fp.stops.push(instanceId) + }, + async destroy(_id: string): Promise {}, + async status(_id: string): Promise<'running' | 'stopped' | 'unknown'> { + return 'running' + }, + async recover(): Promise> { + return [] + }, + } + return fp +} + +const fakeBridge: Bridge = { + async runTurn(_args: RunTurnArgs): Promise { + return { exitCode: 0 } + }, +} + +describe('LifecycleManager pin refcount', () => { + it('increments and decrements with a floor at 0', () => { + const lm = new LifecycleManager({ + provider: fakeProvider(), + bridge: fakeBridge, + }) + expect(lm.pinCount('a')).toBe(0) + expect(lm.pin('a').count).toBe(1) + expect(lm.pin('a').count).toBe(2) + expect(lm.release('a').count).toBe(1) + expect(lm.release('a').count).toBe(0) + // Extra release is clamped + expect(lm.release('a').count).toBe(0) + }) + + it('resetPinCount clears to 0', () => { + const lm = new LifecycleManager({ + provider: fakeProvider(), + bridge: fakeBridge, + }) + lm.pin('a') + lm.pin('a') + lm.resetPinCount('a') + expect(lm.pinCount('a')).toBe(0) + }) +}) + +describe('LifecycleManager idle timer', () => { + it('arms and fires onFire after ms elapses', async () => { + const lm = new LifecycleManager({ + provider: fakeProvider(), + bridge: fakeBridge, + }) + const onFire = vi.fn() + lm.armIdleTimer('a', 20, onFire) + await new Promise((r) => setTimeout(r, 50)) + expect(onFire).toHaveBeenCalledTimes(1) + }) + + it('cancelIdleTimer prevents fire', async () => { + const lm = new LifecycleManager({ + provider: fakeProvider(), + bridge: fakeBridge, + }) + const onFire = vi.fn() + lm.armIdleTimer('a', 20, onFire) + lm.cancelIdleTimer('a') + await new Promise((r) => setTimeout(r, 50)) + expect(onFire).not.toHaveBeenCalled() + }) + + it('arming twice cancels prior timer', async () => { + const lm = new LifecycleManager({ + provider: fakeProvider(), + bridge: fakeBridge, + }) + const first = vi.fn() + const second = vi.fn() + lm.armIdleTimer('a', 20, first) + lm.armIdleTimer('a', 20, second) + await new Promise((r) => setTimeout(r, 50)) + expect(first).not.toHaveBeenCalled() + expect(second).toHaveBeenCalled() + }) +}) + +describe('LifecycleManager ensureRunning', () => { + it('forwards to provider.start', async () => { + const fp = fakeProvider() + const lm = new LifecycleManager({ provider: fp, bridge: fakeBridge }) + await lm.ensureRunning({ + agentId: '/x/coding-agent/y', + kind: 'claude', + workspace: { type: 'volume', name: 'w' }, + env: { K: 'v' }, + }) + expect(fp.starts).toHaveLength(1) + expect(fp.starts[0]!.agentId).toBe('/x/coding-agent/y') + }) +}) + +describe('LifecycleManager.startedAtMs', () => { + it('captures a timestamp at construction', () => { + const before = Date.now() + const lm = new LifecycleManager({ + provider: fakeProvider(), + bridge: fakeBridge, + }) + const after = Date.now() + expect(lm.startedAtMs).toBeGreaterThanOrEqual(before) + expect(lm.startedAtMs).toBeLessThanOrEqual(after) + }) +}) +``` + +- [ ] **Step 2: Run the test, verify it fails** + +``` +pnpm -C packages/coding-agents test test/unit/lifecycle-manager.test.ts +``` + +Expect: FAIL on module-not-found. + +- [ ] **Step 3: Write `src/lifecycle-manager.ts`** + +```ts +import { log } from './log' +import type { + Bridge, + RecoveredSandbox, + SandboxInstance, + SandboxProvider, + SandboxSpec, +} from './types' + +export interface LifecycleManagerDeps { + provider: SandboxProvider + bridge: Bridge +} + +export class LifecycleManager { + readonly provider: SandboxProvider + readonly bridge: Bridge + /** Wall-clock ms captured at construction. Used to detect orphan runs. */ + readonly startedAtMs: number + + private readonly idleTimers = new Map() + private readonly pinCounts = new Map() + + constructor(deps: LifecycleManagerDeps) { + this.provider = deps.provider + this.bridge = deps.bridge + this.startedAtMs = Date.now() + } + + // ── sandbox lifecycle ── + + async ensureRunning(spec: SandboxSpec): Promise { + return this.provider.start(spec) + } + + async stop(agentId: string): Promise { + this.cancelIdleTimer(agentId) + // The provider.destroy/stop interface is keyed by instanceId, not agentId. + // We rely on provider.destroy(agentId) which finds + removes by label. + await this.provider.destroy(agentId).catch((err) => { + log.warn( + { err, agentId }, + 'lifecycleManager.stop: provider.destroy failed' + ) + }) + } + + async destroy(agentId: string): Promise { + await this.stop(agentId) + this.pinCounts.delete(agentId) + } + + async adoptRunningContainers(): Promise> { + return this.provider.recover() + } + + // ── idle timer ── + + armIdleTimer(agentId: string, ms: number, onFire: () => void): void { + this.cancelIdleTimer(agentId) + const handle = setTimeout(() => { + this.idleTimers.delete(agentId) + try { + onFire() + } catch (err) { + log.warn({ err, agentId }, 'idle timer onFire threw') + } + }, ms) + this.idleTimers.set(agentId, handle) + } + + cancelIdleTimer(agentId: string): void { + const handle = this.idleTimers.get(agentId) + if (handle) { + clearTimeout(handle) + this.idleTimers.delete(agentId) + } + } + + // ── pin refcount ── + + pin(agentId: string): { count: number } { + const next = (this.pinCounts.get(agentId) ?? 0) + 1 + this.pinCounts.set(agentId, next) + if (next === 1) this.cancelIdleTimer(agentId) + return { count: next } + } + + release(agentId: string): { count: number } { + const cur = this.pinCounts.get(agentId) ?? 0 + const next = Math.max(0, cur - 1) + if (next === 0) this.pinCounts.delete(agentId) + else this.pinCounts.set(agentId, next) + return { count: next } + } + + pinCount(agentId: string): number { + return this.pinCounts.get(agentId) ?? 0 + } + + resetPinCount(agentId: string): void { + this.pinCounts.delete(agentId) + } +} +``` + +- [ ] **Step 4: Run the test, verify it passes** + +``` +pnpm -C packages/coding-agents test test/unit/lifecycle-manager.test.ts +``` + +Expect: PASS. + +- [ ] **Step 5: Commit** + +``` +git add packages/coding-agents/src/lifecycle-manager.ts packages/coding-agents/test/unit/lifecycle-manager.test.ts +git commit -m "feat(coding-agents): LifecycleManager with idle timer and pin refcount" +``` + +--- + +## Phase 2 — Entity (sequential) + +### Task 2.1 — Entity handler + +**Files:** + +- Create: `packages/coding-agents/src/entity/handler.ts` +- Create: `packages/coding-agents/test/unit/entity-handler.test.ts` + +**Constraints:** + +- The handler is a function `makeCodingAgentHandler(lm, wr, options)` returning an async `(ctx, wake) => void`. +- `options: { defaults: { idleTimeoutMs, coldBootBudgetMs, runTimeoutMs }, env: () => Record }`. +- The handler reads/writes the StreamDB pattern: `ctx.db.collections.X.get`, `ctx.db.actions.X_insert/X_update`. +- Inbox messages: pending messages are ones with `key > sessionMeta.lastInboxKey`. Slice A reuses `sessionMeta` to track this since we don't have a separate `cursorState`. Add a `lastInboxKey?: string` field. +- Reconcile rules from spec table apply on every entry (after first-wake init). + +- [ ] **Step 1: Add `lastInboxKey` to the meta schema** + +Modify `packages/coding-agents/src/entity/collections.ts`. Add `lastInboxKey: z.string().optional()` to `sessionMetaRowSchema`: + +```ts +export const sessionMetaRowSchema = z.object({ + // ... existing fields ... + lastInboxKey: z.string().optional(), +}) +``` + +- [ ] **Step 2: Write the failing test** + +```ts +// test/unit/entity-handler.test.ts +import { describe, it, expect, vi, beforeEach } from 'vitest' +import { z } from 'zod' +import { makeCodingAgentHandler } from '../../src/entity/handler' +import { LifecycleManager } from '../../src/lifecycle-manager' +import { WorkspaceRegistry } from '../../src/workspace-registry' +import type { + Bridge, + RunTurnArgs, + RunTurnResult, + SandboxInstance, + SandboxProvider, + SandboxSpec, +} from '../../src/types' + +// ── Fakes ── + +interface InboxRow { + key: string + payload?: unknown + message_type?: string +} + +interface CollectionStub { + rows: Map + get(k: string): any + toArray: Array +} + +function makeCollection(): CollectionStub { + const rows = new Map() + return { + rows, + get(k: string) { + return rows.get(k) + }, + get toArray(): Array { + return Array.from(rows.values()) + }, + } +} + +function makeFakeCtx(opts: { + entityUrl: string + args?: Record + inbox?: Array + meta?: any + runs?: Array +}) { + const sessionMeta = makeCollection() + const runs = makeCollection() + const events = makeCollection() + const lifecycle = makeCollection() + const inbox = makeCollection() + + if (opts.meta) sessionMeta.rows.set('current', opts.meta) + for (const r of opts.runs ?? []) runs.rows.set(r.key, r) + for (const i of opts.inbox ?? []) inbox.rows.set(i.key, i) + + const recordedRuns: Array<{ + key: string + status?: string + response?: string + }> = [] + let runCounter = 0 + + const ctx: any = { + entityUrl: opts.entityUrl, + entityType: 'coding-agent', + args: opts.args ?? {}, + tags: {}, + firstWake: false, + db: { + collections: { sessionMeta, runs, events, lifecycle, inbox }, + actions: { + sessionMeta_insert: ({ row }: { row: any }) => + sessionMeta.rows.set(row.key, row), + sessionMeta_update: ({ + key, + updater, + }: { + key: string + updater: (d: any) => void + }) => { + const cur = sessionMeta.rows.get(key) + if (cur) updater(cur) + }, + runs_insert: ({ row }: { row: any }) => runs.rows.set(row.key, row), + runs_update: ({ + key, + updater, + }: { + key: string + updater: (d: any) => void + }) => { + const cur = runs.rows.get(key) + if (cur) updater(cur) + }, + events_insert: ({ row }: { row: any }) => events.rows.set(row.key, row), + lifecycle_insert: ({ row }: { row: any }) => + lifecycle.rows.set(row.key, row), + }, + }, + recordRun() { + const key = `run-${++runCounter}` + const ent = { key, status: undefined as string | undefined, response: '' } + recordedRuns.push(ent) + return { + key, + end({ status }: { status: string }) { + ent.status = status + }, + attachResponse(text: string) { + ent.response += text + }, + } + }, + setTag: () => Promise.resolve(), + send: vi.fn(), + } + + return { ctx, recordedRuns } +} + +function makeFakeProvider( + initialStatus: 'running' | 'stopped' | 'unknown' = 'stopped' +) { + const stub: SandboxInstance = { + instanceId: 'inst-1', + agentId: '', + workspaceMount: '/workspace', + async exec() { + throw new Error('not used') + }, + } + const fp: any = { + name: 'fake', + statusReturn: initialStatus, + async start(spec: SandboxSpec): Promise { + return { ...stub, agentId: spec.agentId } + }, + async stop(_id: string) {}, + async destroy(_id: string) {}, + async status() { + return fp.statusReturn + }, + async recover() { + return [] + }, + } + return fp +} + +describe('entity handler — first-wake init', () => { + it('seeds sessionMeta when none exists, using args', async () => { + const lm = new LifecycleManager({ + provider: makeFakeProvider(), + bridge: { + async runTurn() { + return { exitCode: 0 } + }, + }, + }) + const wr = new WorkspaceRegistry() + const handler = makeCodingAgentHandler(lm, wr, { + defaults: { + idleTimeoutMs: 1000, + coldBootBudgetMs: 5000, + runTimeoutMs: 5000, + }, + env: () => ({}), + }) + + const { ctx } = makeFakeCtx({ + entityUrl: '/test/coding-agent/x', + args: { + kind: 'claude', + workspace: { type: 'volume', name: 'w' }, + }, + }) + + await handler(ctx, { type: 'message_received' } as any) + + const meta = ctx.db.collections.sessionMeta.get('current') + expect(meta).toBeDefined() + expect(meta.status).toBe('cold') + expect(meta.kind).toBe('claude') + expect(meta.workspaceIdentity).toBe('volume:w') + expect(meta.pinned).toBe(false) + }) +}) + +describe('entity handler — pin/release', () => { + it('pin sets pinned=true and cancels timer', async () => { + const lm = new LifecycleManager({ + provider: makeFakeProvider('running'), + bridge: { + async runTurn() { + return { exitCode: 0 } + }, + }, + }) + const wr = new WorkspaceRegistry() + const handler = makeCodingAgentHandler(lm, wr, { + defaults: { + idleTimeoutMs: 1000, + coldBootBudgetMs: 5000, + runTimeoutMs: 5000, + }, + env: () => ({}), + }) + const meta = { + key: 'current', + status: 'idle', + kind: 'claude', + pinned: false, + workspaceIdentity: 'volume:w', + workspaceSpec: { type: 'volume', name: 'w' }, + idleTimeoutMs: 1000, + keepWarm: false, + } + const { ctx } = makeFakeCtx({ + entityUrl: '/t/coding-agent/x', + meta, + inbox: [{ key: 'i1', message_type: 'pin' }], + }) + await handler(ctx, { type: 'message_received' } as any) + expect(ctx.db.collections.sessionMeta.get('current').pinned).toBe(true) + expect(lm.pinCount('/t/coding-agent/x')).toBe(1) + }) +}) + +describe('entity handler — reconcile orphan run', () => { + it('marks orphan run failed when meta=running and run.startedAt < lm.startedAtMs', async () => { + const lm = new LifecycleManager({ + provider: makeFakeProvider('stopped'), + bridge: { + async runTurn() { + return { exitCode: 0 } + }, + }, + }) + const wr = new WorkspaceRegistry() + const handler = makeCodingAgentHandler(lm, wr, { + defaults: { + idleTimeoutMs: 1000, + coldBootBudgetMs: 5000, + runTimeoutMs: 5000, + }, + env: () => ({}), + }) + const oldStart = lm.startedAtMs - 10_000 + const meta = { + key: 'current', + status: 'running', + kind: 'claude', + pinned: false, + workspaceIdentity: 'volume:w', + workspaceSpec: { type: 'volume', name: 'w' }, + idleTimeoutMs: 1000, + keepWarm: false, + instanceId: 'old-inst', + } + const orphanRun = { + key: 'run-old', + startedAt: oldStart, + status: 'running', + promptInboxKey: 'i0', + } + const { ctx } = makeFakeCtx({ + entityUrl: '/t/coding-agent/x', + meta, + runs: [orphanRun], + }) + await handler(ctx, { type: 'message_received' } as any) + const updated = ctx.db.collections.runs.get('run-old') + expect(updated.status).toBe('failed') + expect(updated.finishReason).toBe('orphaned') + expect(ctx.db.collections.sessionMeta.get('current').status).toBe('cold') + }) +}) + +describe('entity handler — processPrompt happy path', () => { + it('runs a turn, records events, ends run completed', async () => { + const events: Array = [ + { type: 'session_init', sessionId: 'abc', ts: 1 }, + { type: 'assistant_message', text: 'hello', ts: 2 }, + ] + const bridge: Bridge = { + async runTurn(args: RunTurnArgs): Promise { + for (const e of events) args.onEvent(e as any) + return { exitCode: 0, finalText: 'hello' } + }, + } + const lm = new LifecycleManager({ + provider: makeFakeProvider('stopped'), + bridge, + }) + const wr = new WorkspaceRegistry() + const handler = makeCodingAgentHandler(lm, wr, { + defaults: { + idleTimeoutMs: 1000, + coldBootBudgetMs: 5000, + runTimeoutMs: 5000, + }, + env: () => ({ ANTHROPIC_API_KEY: 'sk-test' }), + }) + const meta = { + key: 'current', + status: 'cold', + kind: 'claude', + pinned: false, + workspaceIdentity: 'volume:w', + workspaceSpec: { type: 'volume', name: 'w' }, + idleTimeoutMs: 1000, + keepWarm: false, + } + const { ctx, recordedRuns } = makeFakeCtx({ + entityUrl: '/t/coding-agent/x', + meta, + inbox: [{ key: 'i1', message_type: 'prompt', payload: { text: 'hi' } }], + }) + await handler(ctx, { type: 'message_received' } as any) + + expect(recordedRuns).toHaveLength(1) + expect(recordedRuns[0]!.status).toBe('completed') + expect(recordedRuns[0]!.response).toBe('hello') + + const finalMeta = ctx.db.collections.sessionMeta.get('current') + expect(finalMeta.status).toBe('idle') + + const runs = Array.from(ctx.db.collections.runs.rows.values()) + expect(runs).toHaveLength(1) + expect((runs[0] as any).status).toBe('completed') + + const eventRows = Array.from(ctx.db.collections.events.rows.values()) + expect(eventRows).toHaveLength(2) + }) +}) +``` + +- [ ] **Step 3: Run the test, verify it fails** + +``` +pnpm -C packages/coding-agents test test/unit/entity-handler.test.ts +``` + +Expect: FAIL on missing module. + +- [ ] **Step 4: Write `src/entity/handler.ts`** + +```ts +import type { NormalizedEvent } from 'agent-session-protocol' +import { log } from '../log' +import { WorkspaceRegistry } from '../workspace-registry' +import type { LifecycleManager } from '../lifecycle-manager' +import type { + RunRow, + SessionMetaRow, + EventRow, + LifecycleRow, +} from './collections' +import { promptMessageSchema } from './messages' + +export interface CodingAgentHandlerOptions { + defaults: { + idleTimeoutMs: number + coldBootBudgetMs: number + runTimeoutMs: number + } + /** Called per-turn to source CLI env (e.g. ANTHROPIC_API_KEY). */ + env: () => Record +} + +interface InboxRow { + key: string + payload?: unknown + message_type?: string +} + +const NS_MAX = String(Number.MAX_SAFE_INTEGER).length + +function nextRunId(existing: ReadonlyArray<{ key: string }>): string { + // Deterministic: run-N where N = count + 1 + return `run-${existing.length + 1}` +} + +function eventKey(runId: string, seq: number): string { + return `${runId}:${String(seq).padStart(NS_MAX, '0')}` +} + +function lifecycleKey(label: string): string { + return `${label}:${Date.now()}-${Math.floor(Math.random() * 1000)}` +} + +function raceTimeout(p: Promise, ms: number): Promise { + return new Promise((resolve, reject) => { + const handle = setTimeout(() => { + const e = new Error('TimeoutError') + ;(e as any).name = 'TimeoutError' + reject(e) + }, ms) + p.then( + (v) => { + clearTimeout(handle) + resolve(v) + }, + (err) => { + clearTimeout(handle) + reject(err) + } + ) + }) +} + +export function makeCodingAgentHandler( + lm: LifecycleManager, + wr: WorkspaceRegistry, + options: CodingAgentHandlerOptions +) { + return async function handleCodingAgentEntity( + ctx: any, + _wake: any + ): Promise { + const agentId = ctx.entityUrl as string + const sessionMetaCol = ctx.db.collections.sessionMeta + const runsCol = ctx.db.collections.runs + const eventsCol = ctx.db.collections.events + const lifecycleCol = ctx.db.collections.lifecycle + const inboxCol = ctx.db.collections.inbox + + // ─── 1) FIRST-WAKE INIT ──────────────────────────────────────────────── + + let meta = sessionMetaCol.get('current') as SessionMetaRow | undefined + if (!meta) { + const args = ctx.args as { + kind?: 'claude' + workspace?: any + lifecycle?: { idleTimeoutMs?: number; keepWarm?: boolean } + } + const ws = args.workspace ?? { type: 'volume' } + const resolved = await WorkspaceRegistry.resolveIdentity(agentId, ws) + const idleTimeoutMs = + args.lifecycle?.idleTimeoutMs ?? options.defaults.idleTimeoutMs + const keepWarm = args.lifecycle?.keepWarm ?? false + const initial: SessionMetaRow = { + key: 'current', + status: 'cold', + kind: args.kind ?? 'claude', + pinned: false, + workspaceIdentity: resolved.identity, + workspaceSpec: resolved.resolved, + idleTimeoutMs, + keepWarm, + } + ctx.db.actions.sessionMeta_insert({ row: initial }) + wr.register(resolved.identity, agentId) + meta = initial + } + + if (meta.status === 'destroyed') { + // Tombstoned. Ignore everything. + return + } + + // ─── 2) RECONCILE ────────────────────────────────────────────────────── + + const providerStatus = await lm.provider.status(agentId) + const openRun = (runsCol.toArray as Array).find( + (r) => r.status === 'running' + ) + const isOrphaned = openRun && openRun.startedAt < lm.startedAtMs + + if (meta.status === 'running' && providerStatus !== 'running') { + if (openRun) { + ctx.db.actions.runs_update({ + key: openRun.key, + updater: (d: RunRow) => { + d.status = 'failed' + d.finishReason = 'orphaned' + d.endedAt = Date.now() + }, + }) + } + ctx.db.actions.lifecycle_insert({ + row: { + key: lifecycleKey('orphan'), + ts: Date.now(), + event: 'orphan.detected', + } satisfies LifecycleRow, + }) + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'cold' + d.instanceId = undefined + }, + }) + meta = sessionMetaCol.get('current')! + } else if ( + meta.status === 'running' && + providerStatus === 'running' && + isOrphaned + ) { + ctx.db.actions.runs_update({ + key: openRun!.key, + updater: (d: RunRow) => { + d.status = 'failed' + d.finishReason = 'orphaned' + d.endedAt = Date.now() + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: lifecycleKey('orphan'), + ts: Date.now(), + event: 'orphan.detected', + } satisfies LifecycleRow, + }) + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'idle' + }, + }) + meta = sessionMetaCol.get('current')! + } else if (meta.status === 'idle' && providerStatus === 'stopped') { + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'cold' + d.instanceId = undefined + }, + }) + meta = sessionMetaCol.get('current')! + } else if ( + (meta.status === 'starting' || meta.status === 'stopping') && + providerStatus !== 'running' + ) { + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'cold' + }, + }) + meta = sessionMetaCol.get('current')! + } else if ( + (meta.status === 'starting' || meta.status === 'stopping') && + providerStatus === 'running' + ) { + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'idle' + }, + }) + meta = sessionMetaCol.get('current')! + } + + // ─── 3) PROCESS PENDING INBOX ────────────────────────────────────────── + + const inboxRows = (inboxCol.toArray as Array) + .slice() + .sort((a, b) => (a.key < b.key ? -1 : a.key > b.key ? 1 : 0)) + const lastKey = meta.lastInboxKey ?? '' + const pending = inboxRows.filter((m) => m.key > lastKey) + + for (const inboxMsg of pending) { + try { + await dispatchInboxMessage(ctx, lm, wr, options, inboxMsg) + } catch (err) { + log.error({ err, inboxMsg }, 'coding-agent handler dispatch threw') + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'error' + d.lastError = err instanceof Error ? err.message : String(err) + }, + }) + } + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.lastInboxKey = inboxMsg.key + }, + }) + meta = sessionMetaCol.get('current')! + if (meta.status === 'destroyed') return + } + } +} + +async function dispatchInboxMessage( + ctx: any, + lm: LifecycleManager, + wr: WorkspaceRegistry, + options: CodingAgentHandlerOptions, + inboxMsg: InboxRow +): Promise { + const type = inboxMsg.message_type ?? 'prompt' + switch (type) { + case 'prompt': + return processPrompt(ctx, lm, wr, options, inboxMsg) + case 'pin': + return processPin(ctx, lm) + case 'release': + return processRelease(ctx, lm) + case 'stop': + return processStop(ctx, lm) + case 'destroy': + return processDestroy(ctx, lm, wr) + default: + log.warn({ type }, 'coding-agent: unknown inbox message type') + } +} + +async function processPrompt( + ctx: any, + lm: LifecycleManager, + wr: WorkspaceRegistry, + options: CodingAgentHandlerOptions, + inboxMsg: InboxRow +): Promise { + const parsed = promptMessageSchema.safeParse(inboxMsg.payload) + if (!parsed.success) return + const promptText = parsed.data.text + const agentId = ctx.entityUrl as string + const sessionMetaCol = ctx.db.collections.sessionMeta + const runsCol = ctx.db.collections.runs + const eventsCol = ctx.db.collections.events + const lifecycleCol = ctx.db.collections.lifecycle + + let meta = sessionMetaCol.get('current') as SessionMetaRow + + // Cold-boot: ensure sandbox up + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'starting' + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: `boot:${Date.now()}`, + ts: Date.now(), + event: 'sandbox.starting', + } satisfies LifecycleRow, + }) + + let sandbox + try { + sandbox = await raceTimeout( + lm.ensureRunning({ + agentId, + kind: meta.kind, + workspace: meta.workspaceSpec, + env: options.env(), + }), + options.defaults.coldBootBudgetMs + ) + } catch (err) { + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'error' + d.lastError = err instanceof Error ? err.message : String(err) + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: `boot:${Date.now()}`, + ts: Date.now(), + event: 'sandbox.failed', + detail: err instanceof Error ? err.message : String(err), + } satisfies LifecycleRow, + }) + return + } + + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'idle' + d.instanceId = sandbox.instanceId + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: `boot:${Date.now()}`, + ts: Date.now(), + event: 'sandbox.started', + } satisfies LifecycleRow, + }) + + meta = sessionMetaCol.get('current')! + const releaseLease = await wr.acquire(meta.workspaceIdentity) + try { + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'running' + d.currentPromptInboxKey = inboxMsg.key + }, + }) + + const recordedRun = ctx.recordRun() + const runId = recordedRun.key + ctx.db.actions.runs_insert({ + row: { + key: runId, + startedAt: Date.now(), + status: 'running', + promptInboxKey: inboxMsg.key, + } satisfies RunRow, + }) + + let seq = 0 + let finalText: string | undefined + try { + const result = await raceTimeout( + lm.bridge.runTurn({ + sandbox, + kind: meta.kind, + prompt: promptText, + onEvent: (e: NormalizedEvent) => { + ctx.db.actions.events_insert({ + row: { + key: eventKey(runId, seq), + runId, + seq, + ts: Date.now(), + type: e.type, + payload: e as unknown as Record, + } satisfies EventRow, + }) + seq++ + }, + }), + options.defaults.runTimeoutMs + ) + finalText = result.finalText + ctx.db.actions.runs_update({ + key: runId, + updater: (d: RunRow) => { + d.status = 'completed' + d.endedAt = Date.now() + d.responseText = finalText + }, + }) + if (finalText) recordedRun.attachResponse(finalText) + recordedRun.end({ status: 'completed' }) + } catch (err) { + const reason = + err instanceof Error && err.name === 'TimeoutError' + ? 'timeout' + : `cli-exit:${(err instanceof Error ? err.message : String(err)).slice(0, 200)}` + ctx.db.actions.runs_update({ + key: runId, + updater: (d: RunRow) => { + d.status = 'failed' + d.endedAt = Date.now() + d.finishReason = reason + }, + }) + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'error' + d.lastError = err instanceof Error ? err.message : String(err) + }, + }) + recordedRun.end({ status: 'failed', finishReason: reason }) + return + } + + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'idle' + d.currentPromptInboxKey = undefined + }, + }) + + if (!meta.keepWarm && lm.pinCount(agentId) === 0) { + lm.armIdleTimer(agentId, meta.idleTimeoutMs, () => { + // Fire-and-forget: provider.destroy is keyed by agentId. + void lm.provider.destroy(agentId).catch((err) => { + log.warn({ err, agentId }, 'idle stop failed') + }) + }) + } + } finally { + releaseLease() + } +} + +function processPin(ctx: any, lm: LifecycleManager): void { + const agentId = ctx.entityUrl as string + const { count } = lm.pin(agentId) + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.pinned = true + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: `pin:${Date.now()}`, + ts: Date.now(), + event: 'pin', + detail: `count=${count}`, + } satisfies LifecycleRow, + }) +} + +function processRelease(ctx: any, lm: LifecycleManager): void { + const agentId = ctx.entityUrl as string + const { count } = lm.release(agentId) + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.pinned = count > 0 + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: `release:${Date.now()}`, + ts: Date.now(), + event: 'release', + detail: `count=${count}`, + } satisfies LifecycleRow, + }) + if (count === 0) { + const meta = ctx.db.collections.sessionMeta.get('current') as SessionMetaRow + if (!meta.keepWarm && meta.status === 'idle') { + lm.armIdleTimer(agentId, meta.idleTimeoutMs, () => { + void lm.provider.destroy(agentId).catch(() => undefined) + }) + } + } +} + +async function processStop(ctx: any, lm: LifecycleManager): Promise { + const agentId = ctx.entityUrl as string + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'stopping' + }, + }) + await lm.stop(agentId) + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'cold' + d.instanceId = undefined + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: `stop:${Date.now()}`, + ts: Date.now(), + event: 'sandbox.stopped', + } satisfies LifecycleRow, + }) +} + +async function processDestroy( + ctx: any, + lm: LifecycleManager, + wr: WorkspaceRegistry +): Promise { + const agentId = ctx.entityUrl as string + const meta = ctx.db.collections.sessionMeta.get('current') as SessionMetaRow + await lm.destroy(agentId) + if (meta) wr.release(meta.workspaceIdentity, agentId) + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'destroyed' + d.instanceId = undefined + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: `destroy:${Date.now()}`, + ts: Date.now(), + event: 'sandbox.stopped', + detail: 'destroyed', + } satisfies LifecycleRow, + }) +} +``` + +- [ ] **Step 5: Run the test, verify it passes** + +``` +pnpm -C packages/coding-agents test test/unit/entity-handler.test.ts +``` + +Expect: PASS (4 tests). + +- [ ] **Step 6: Run full unit test suite to confirm no regressions** + +``` +pnpm -C packages/coding-agents test +``` + +Expect: all unit tests pass. + +- [ ] **Step 7: Commit** + +``` +git add packages/coding-agents/src/entity/handler.ts packages/coding-agents/src/entity/collections.ts packages/coding-agents/test/unit/entity-handler.test.ts +git commit -m "feat(coding-agents): entity handler with reconcile, prompt/pin/release/stop/destroy" +``` + +--- + +### Task 2.2 — `registerCodingAgent` + +**Files:** + +- Create: `packages/coding-agents/src/entity/register.ts` +- Modify: `packages/coding-agents/src/index.ts` + +- [ ] **Step 1: Write `src/entity/register.ts`** + +```ts +import type { EntityRegistry } from '@electric-ax/agents-runtime' +import { LifecycleManager } from '../lifecycle-manager' +import { WorkspaceRegistry } from '../workspace-registry' +import { SLICE_A_DEFAULTS } from '../types' +import type { Bridge, SandboxProvider } from '../types' +import { + CODING_AGENT_EVENTS_COLLECTION_TYPE, + CODING_AGENT_LIFECYCLE_COLLECTION_TYPE, + CODING_AGENT_RUNS_COLLECTION_TYPE, + CODING_AGENT_SESSION_META_COLLECTION_TYPE, + eventRowSchema, + lifecycleRowSchema, + runRowSchema, + sessionMetaRowSchema, +} from './collections' +import { + destroyMessageSchema, + pinMessageSchema, + promptMessageSchema, + releaseMessageSchema, + stopMessageSchema, +} from './messages' +import { makeCodingAgentHandler } from './handler' +import { z } from 'zod' + +export interface RegisterCodingAgentDeps { + provider: SandboxProvider + bridge: Bridge + /** Override defaults; used by tests. */ + defaults?: Partial<{ + idleTimeoutMs: number + coldBootBudgetMs: number + runTimeoutMs: number + }> + /** Per-turn env supplier. Defaults to forwarding ANTHROPIC_API_KEY from process.env. */ + env?: () => Record +} + +const creationArgsSchema = z.object({ + kind: z.enum(['claude']).optional(), + workspace: z + .union([ + z.object({ + type: z.literal('volume'), + name: z.string().optional(), + }), + z.object({ + type: z.literal('bindMount'), + hostPath: z.string(), + }), + ]) + .optional(), + lifecycle: z + .object({ + idleTimeoutMs: z.number().optional(), + keepWarm: z.boolean().optional(), + }) + .optional(), +}) + +export function registerCodingAgent( + registry: EntityRegistry, + deps: RegisterCodingAgentDeps +): void { + const lm = new LifecycleManager(deps) + const wr = new WorkspaceRegistry() + const defaults = { + idleTimeoutMs: + deps.defaults?.idleTimeoutMs ?? SLICE_A_DEFAULTS.idleTimeoutMs, + coldBootBudgetMs: + deps.defaults?.coldBootBudgetMs ?? SLICE_A_DEFAULTS.coldBootBudgetMs, + runTimeoutMs: deps.defaults?.runTimeoutMs ?? SLICE_A_DEFAULTS.runTimeoutMs, + } + const env = + deps.env ?? + (() => { + const out: Record = {} + const k = process.env.ANTHROPIC_API_KEY + if (k) out.ANTHROPIC_API_KEY = k + return out + }) + + registry.define('coding-agent', { + description: + 'Runs a Claude Code CLI session inside a Docker sandbox. Manages lifecycle (cold/idle/running) and workspace lease.', + creationSchema: creationArgsSchema, + inboxSchemas: { + prompt: promptMessageSchema, + pin: pinMessageSchema, + release: releaseMessageSchema, + stop: stopMessageSchema, + destroy: destroyMessageSchema, + }, + state: { + sessionMeta: { + schema: sessionMetaRowSchema, + type: CODING_AGENT_SESSION_META_COLLECTION_TYPE, + primaryKey: 'key', + }, + runs: { + schema: runRowSchema, + type: CODING_AGENT_RUNS_COLLECTION_TYPE, + primaryKey: 'key', + }, + events: { + schema: eventRowSchema, + type: CODING_AGENT_EVENTS_COLLECTION_TYPE, + primaryKey: 'key', + }, + lifecycle: { + schema: lifecycleRowSchema, + type: CODING_AGENT_LIFECYCLE_COLLECTION_TYPE, + primaryKey: 'key', + }, + }, + handler: makeCodingAgentHandler(lm, wr, { defaults, env }), + }) +} + +/** Test-only accessor for asserting workspace registry state from outside. */ +export interface CodingAgentInternals { + lifecycleManager: LifecycleManager + workspaceRegistry: WorkspaceRegistry +} +``` + +- [ ] **Step 2: Update `src/index.ts`** + +Replace contents: + +```ts +export type { + CodingAgentKind, + SandboxSpec, + ExecRequest, + ExecHandle, + SandboxInstance, + SandboxProvider, + RecoveredSandbox, + RunTurnArgs, + RunTurnResult, + Bridge, + SpawnCodingAgentOptions, + RunSummary, + CodingAgentStatus, +} from './types' +export { LocalDockerProvider } from './providers/local-docker' +export { StdioBridge } from './bridge/stdio-bridge' +export { LifecycleManager } from './lifecycle-manager' +export { WorkspaceRegistry } from './workspace-registry' +export { + registerCodingAgent, + type RegisterCodingAgentDeps, +} from './entity/register' +export { + CODING_AGENT_SESSION_META_COLLECTION_TYPE, + CODING_AGENT_RUNS_COLLECTION_TYPE, + CODING_AGENT_EVENTS_COLLECTION_TYPE, + CODING_AGENT_LIFECYCLE_COLLECTION_TYPE, +} from './entity/collections' +``` + +- [ ] **Step 3: Run typecheck** + +``` +pnpm -C packages/coding-agents typecheck +``` + +Expect: clean. + +- [ ] **Step 4: Run all unit tests** + +``` +pnpm -C packages/coding-agents test +``` + +Expect: all pass. + +- [ ] **Step 5: Commit** + +``` +git add packages/coding-agents/src/entity/register.ts packages/coding-agents/src/index.ts +git commit -m "feat(coding-agents): registerCodingAgent helper" +``` + +--- + +### Task 2.3 — Runtime API surface (`ctx.spawnCodingAgent` / `observeCodingAgent`) + +**Files:** + +- Modify: `packages/agents-runtime/src/types.ts` (add types and HandlerContext methods) +- Modify: `packages/agents-runtime/src/context-factory.ts` (add impl) + +- [ ] **Step 1: Read the existing `useCodingAgent` impl as a reference** + +Already known location: `packages/agents-runtime/src/context-factory.ts:561-629`. New helpers will be placed alongside it. + +- [ ] **Step 2: Add types in `packages/agents-runtime/src/types.ts`** + +Find the existing `CodingSessionHandle` interface (~line 800). Insert these new types **after** it: + +```ts +// ─── Coding Agent (Slice A) ─────────────────────────────────────────────── + +export type CodingAgentSliceAStatus = + | 'cold' + | 'starting' + | 'idle' + | 'running' + | 'stopping' + | 'error' + | 'destroyed' + +export interface SpawnCodingAgentOptions { + id: string + kind: 'claude' + workspace: + | { type: 'volume'; name?: string } + | { type: 'bindMount'; hostPath: string } + initialPrompt?: string + wake?: { on: 'runFinished'; includeResponse?: boolean } + lifecycle?: { idleTimeoutMs?: number; keepWarm?: boolean } +} + +export interface CodingAgentRunSummary { + runId: string + startedAt: number + endedAt?: number + status: 'running' | 'completed' | 'failed' + promptInboxKey: string + responseText?: string +} + +export interface CodingAgentState { + status: CodingAgentSliceAStatus + pinned: boolean + workspace: { identity: string; sharedRefs: number } + lastError?: string + runs: ReadonlyArray +} + +export interface CodingAgentHandle { + readonly url: string + readonly kind: 'claude' + send(prompt: string): Promise<{ runId: string }> + events(opts?: { since?: 'start' | 'now' }): AsyncIterable + state(): CodingAgentState + pin(): Promise + release(): Promise + stop(): Promise + destroy(): Promise +} +``` + +Then **add to the `HandlerContext` interface** (the one defined ~line 882). Insert these two methods after `useCodingAgent`: + +```ts +/** + * Spawn (or attach to) a `coding-agent` entity that runs a CLI inside a + * Docker sandbox with managed lifecycle (cold/idle/running, idle hibernation, + * pin/release, workspace lease). Requires `registerCodingAgent` to have been + * called on the runtime's registry. + */ +spawnCodingAgent: (opts: SpawnCodingAgentOptions) => Promise +observeCodingAgent: (id: string) => Promise +``` + +- [ ] **Step 3: Implement in `packages/agents-runtime/src/context-factory.ts`** + +Find `async useCodingAgent(...)` (line ~561). Insert these two new methods immediately after it (before `send(...)`): + +```ts + async spawnCodingAgent( + opts: SpawnCodingAgentOptions + ): Promise { + const spawnArgs: Record = { + kind: opts.kind, + workspace: opts.workspace, + } + if (opts.lifecycle !== undefined) spawnArgs.lifecycle = opts.lifecycle + + const initialMessage = + opts.initialPrompt !== undefined + ? { type: 'prompt' as const, payload: { text: opts.initialPrompt } } + : undefined + + const wake: Wake = opts.wake + ? `runFinished` + : `runFinished` + + const entityHandle = await config.doSpawn( + 'coding-agent', + opts.id, + spawnArgs, + { + observe: true, + wake, + ...(initialMessage ? { initialMessage } : {}), + } + ) + + return makeCodingAgentHandle( + config, + entityHandle.url, + entityHandle + ) + }, + async observeCodingAgent(id: string): Promise { + const url = `${entityUrl}/coding-agent/${id}` + const entityHandle = await (config.doObserve as any)({ + sourceType: 'entity', + path: url, + }) + return makeCodingAgentHandle(config, url, entityHandle) + }, +``` + +Then add this helper at the bottom of the same file (above the closing return of `createContextFactory` or whatever exports it — find the right scope by reading file context): + +```ts +function makeCodingAgentHandle( + config: any, + url: string, + entityHandle: any +): CodingAgentHandle { + const sendInbox = ( + payload: unknown, + type: string + ): Promise<{ runId: string }> => { + config.executeSend({ + targetUrl: url, + payload, + type, + }) + // The inbox key isn't known to the caller; surface a synthetic id. + return Promise.resolve({ runId: `run-pending-${Date.now()}` }) + } + + const readMeta = (): any => { + const c = entityHandle.db?.collections?.sessionMeta + return c?.get?.('current') + } + const readRuns = (): Array => { + const c = entityHandle.db?.collections?.runs + if (!c) return [] + const rows = (c as { toArray?: unknown }).toArray + if (!Array.isArray(rows)) return [] + return rows.map((r: any) => ({ + runId: r.key, + startedAt: r.startedAt, + endedAt: r.endedAt, + status: r.status, + promptInboxKey: r.promptInboxKey, + responseText: r.responseText, + })) + } + + return { + url, + kind: 'claude', + send: (text: string) => { + config.executeSend({ + targetUrl: url, + payload: { text }, + type: 'prompt', + }) + return Promise.resolve({ runId: `run-pending-${Date.now()}` }) + }, + pin: () => sendInbox({}, 'pin').then(() => undefined), + release: () => sendInbox({}, 'release').then(() => undefined), + stop: () => sendInbox({}, 'stop').then(() => undefined), + destroy: () => sendInbox({}, 'destroy').then(() => undefined), + state(): CodingAgentState { + const meta = readMeta() + return { + status: meta?.status ?? 'cold', + pinned: meta?.pinned ?? false, + workspace: { + identity: meta?.workspaceIdentity ?? '', + sharedRefs: 1, // server-only state; see Slice A spec + }, + lastError: meta?.lastError, + runs: readRuns(), + } + }, + events(opts?: { since?: 'start' | 'now' }) { + // Slice A: simple async iterator that yields current rows then stops. + // Live tailing is added with the UI in Slice C. + const since = opts?.since ?? 'now' + const c = entityHandle.db?.collections?.events + const rows: Array<{ payload: unknown }> = + c && Array.isArray((c as any).toArray) ? (c as any).toArray : [] + const initial = since === 'start' ? rows.slice() : [] + return (async function* () { + for (const r of initial) { + yield r.payload + } + })() + }, + } +} +``` + +Imports needed at the top of the file (verify they aren't already imported): + +```ts +import type { + SpawnCodingAgentOptions, + CodingAgentHandle, + CodingAgentState, + CodingAgentRunSummary, +} from './types' +``` + +- [ ] **Step 4: Add a runtime unit test** + +Create `packages/agents-runtime/test/spawn-coding-agent.test.ts`: + +```ts +import { describe, it, expect, vi } from 'vitest' +// NOTE: This test calls into the context factory at a low level. The real +// runtime test suite verifies the broader integration. Slice A only asserts +// the desugaring contract. + +import type { CodingAgentHandle, SpawnCodingAgentOptions } from '../src/types' + +describe('ctx.spawnCodingAgent desugaring', () => { + // Lightweight contract test: importing the runtime's types confirms the + // public surface compiles. Runtime-level integration coverage is in + // packages/coding-agents/test/integration/slice-a.test.ts. + it('exports SpawnCodingAgentOptions', () => { + const opts: SpawnCodingAgentOptions = { + id: 'x', + kind: 'claude', + workspace: { type: 'volume' }, + } + expect(opts.kind).toBe('claude') + }) + it('exports CodingAgentHandle shape', () => { + const noopHandle: CodingAgentHandle = { + url: '/x', + kind: 'claude', + send: async () => ({ runId: 'r' }), + events: async function* () {}, + state: () => ({ + status: 'cold', + pinned: false, + workspace: { identity: '', sharedRefs: 1 }, + runs: [], + }), + pin: async () => undefined, + release: async () => undefined, + stop: async () => undefined, + destroy: async () => undefined, + } + expect(noopHandle.kind).toBe('claude') + }) +}) +``` + +- [ ] **Step 5: Run runtime typecheck and tests** + +``` +pnpm -C packages/agents-runtime typecheck +pnpm -C packages/agents-runtime test test/spawn-coding-agent.test.ts +``` + +Expect: clean typecheck; test passes. + +If the file `packages/agents-runtime/test/` doesn't exist or vitest config is different, look at existing tests in `packages/agents-runtime/` for the right path. + +- [ ] **Step 6: Commit** + +``` +git add packages/agents-runtime/src/types.ts packages/agents-runtime/src/context-factory.ts packages/agents-runtime/test/spawn-coding-agent.test.ts +git commit -m "feat(agents-runtime): ctx.spawnCodingAgent / observeCodingAgent typed primitives" +``` + +--- + +## Phase 3 — Server wiring (sequential) + +### Task 3.1 — Bootstrap call + +**Files:** + +- Modify: `packages/agents/src/bootstrap.ts` + +- [ ] **Step 1: Read the existing bootstrap, locate the `registerCodingSession` call** + +The line is `packages/agents/src/bootstrap.ts:119`. Confirm by `grep -n registerCodingSession packages/agents/src/bootstrap.ts`. + +- [ ] **Step 2: Modify `bootstrap.ts`** + +Add imports at the top (next to the existing `registerCodingSession` import): + +```ts +import { + LocalDockerProvider, + StdioBridge, + registerCodingAgent, +} from '@electric-ax/coding-agents' +``` + +After the existing `registerCodingSession(...)` line (line 119), add: + +```ts +registerCodingSession(registry, { defaultWorkingDirectory: cwd }) +typeNames.push(`coder`) + +// NEW for Slice A: +registerCodingAgent(registry, { + provider: new LocalDockerProvider(), + bridge: new StdioBridge(), +}) +typeNames.push(`coding-agent`) +``` + +- [ ] **Step 3: Add `@electric-ax/coding-agents` to `packages/agents/package.json` dependencies** if not already present. + +Check first: + +``` +grep '"@electric-ax/coding-agents"' packages/agents/package.json +``` + +If missing, add to `dependencies`: + +```json + "@electric-ax/coding-agents": "workspace:*", +``` + +Then re-install: + +``` +pnpm install +``` + +- [ ] **Step 4: Verify everything builds** + +``` +pnpm -C packages/agents typecheck +pnpm -C packages/agents-runtime typecheck +pnpm -C packages/coding-agents typecheck +``` + +Expect: all clean. + +- [ ] **Step 5: Run all package unit tests** + +``` +pnpm -C packages/coding-agents test +pnpm -C packages/agents-runtime test +pnpm -C packages/agents test +``` + +Expect: all pass (no regressions in legacy `coder` flows). + +- [ ] **Step 6: Commit** + +``` +git add packages/agents/src/bootstrap.ts packages/agents/package.json pnpm-lock.yaml +git commit -m "feat(agents): wire registerCodingAgent into bootstrap" +``` + +--- + +## Phase 4 — Integration smoke (sequential) + +### Task 4.1 — End-to-end Slice A test + +**Files:** + +- Create: `packages/coding-agents/test/integration/slice-a.test.ts` + +**Validation goals (one test, eight assertions):** + +1. Build/load the test image (existing helper). +2. Spawn the `coding-agent` entity via the runtime registry directly (no full `agents-server`; we drive it with a minimal harness). +3. Send a prompt; assert the `runs` collection ends with `status='completed'`, `responseText` non-empty. +4. Pin; sleep past `idleTimeoutMs=2000`; assert `provider.status` returns `'running'`. +5. Release; sleep past idle; assert `provider.status` returns `'stopped'`. +6. Send another prompt; assert cold-boot path executes; response received. +7. Spawn second agent on same workspace name; concurrently send to both; assert run order via `runs` collection timestamps (lease-serialized). +8. Crash recovery: tear down LM/WR/handler, re-`registerCodingAgent` with the same provider, observe entity state, send prompt; assert the prior `runs` row was reconciled to `failed: orphaned`, new run completes. +9. Destroy; assert `meta.status='destroyed'`, container removed. + +**This is a lot for one test file.** Acceptable: the spec called for one e2e test. Internally, organize it as `describe('Slice A integration', ...)` with one big `it('full flow', ...)` so wall time is amortized over a single image build + sandbox lifecycle. + +The "minimal harness" is the tricky bit. Slice A doesn't need a full `agents-server`; the unit tests already use a fake ctx. For integration, we need real StreamDB collections + the real handler invocation. Two options: + +- **Option A (preferred):** Reuse `packages/agents-runtime/test/` infrastructure if it exposes a test harness. (Read `packages/agents-runtime/test/` to confirm.) +- **Option B:** Write a minimal harness in `test/integration/support/test-runtime.ts` that builds the StreamDB + executes the handler. + +If neither is feasible within this task's time budget, the implementer should fall back to a reduced test that exercises the entity handler against fake-but-real-enough collections (with a real Docker provider and real bridge), and document this as a Phase 5 follow-up. + +- [ ] **Step 1: Locate existing runtime test harness** + +``` +ls packages/agents-runtime/test +grep -r 'createRuntimeHandler\|defineEntity' packages/agents-runtime/test/ | head -20 +``` + +If a clean test harness exists (e.g. an in-memory runtime that drives entity handlers end-to-end), use it. If not, proceed with the option B fallback below. + +- [ ] **Step 2: Write the integration test (Option B fallback)** + +```ts +// packages/coding-agents/test/integration/slice-a.test.ts +import { describe, it, expect, beforeAll, afterAll } from 'vitest' +import { + LocalDockerProvider, + StdioBridge, + WorkspaceRegistry, + LifecycleManager, +} from '../../src' +import { makeCodingAgentHandler } from '../../src/entity/handler' +import { + CODING_AGENT_EVENTS_COLLECTION_TYPE, + CODING_AGENT_LIFECYCLE_COLLECTION_TYPE, + CODING_AGENT_RUNS_COLLECTION_TYPE, + CODING_AGENT_SESSION_META_COLLECTION_TYPE, +} from '../../src/entity/collections' +import { buildTestImage, TEST_IMAGE_TAG } from '../support/build-image' +import { loadTestEnv } from '../support/env' + +const SHOULD_RUN = process.env.DOCKER === '1' +const describeMaybe = SHOULD_RUN ? describe : describe.skip + +interface CollectionStub { + rows: Map + get(k: string): any + toArray: Array +} + +function makeCollection(): CollectionStub { + const rows = new Map() + return { + rows, + get(k) { + return rows.get(k) + }, + get toArray() { + return Array.from(rows.values()) + }, + } +} + +interface FakeCtxState { + sessionMeta: CollectionStub + runs: CollectionStub + events: CollectionStub + lifecycle: CollectionStub + inbox: CollectionStub + recordedRuns: Array<{ key: string; status?: string; response: string }> +} + +function makeFakeCtx(entityUrl: string, args: Record) { + const state: FakeCtxState = { + sessionMeta: makeCollection(), + runs: makeCollection(), + events: makeCollection(), + lifecycle: makeCollection(), + inbox: makeCollection(), + recordedRuns: [], + } + let runCounter = 0 + const ctx: any = { + entityUrl, + entityType: 'coding-agent', + args, + tags: {}, + firstWake: false, + db: { + collections: state, + actions: { + sessionMeta_insert: ({ row }: any) => + state.sessionMeta.rows.set(row.key, row), + sessionMeta_update: ({ key, updater }: any) => { + const r = state.sessionMeta.rows.get(key) + if (r) updater(r) + }, + runs_insert: ({ row }: any) => state.runs.rows.set(row.key, row), + runs_update: ({ key, updater }: any) => { + const r = state.runs.rows.get(key) + if (r) updater(r) + }, + events_insert: ({ row }: any) => state.events.rows.set(row.key, row), + lifecycle_insert: ({ row }: any) => + state.lifecycle.rows.set(row.key, row), + }, + }, + recordRun() { + const key = `run-${++runCounter}` + const ent = { key, status: undefined as string | undefined, response: '' } + state.recordedRuns.push(ent) + return { + key, + end({ status }: { status: string }) { + ent.status = status + }, + attachResponse(text: string) { + ent.response += text + }, + } + }, + setTag: () => Promise.resolve(), + send: () => undefined, + } + return { ctx, state } +} + +function pushInbox( + state: FakeCtxState, + key: string, + message_type: string, + payload: any = {} +) { + state.inbox.rows.set(key, { key, message_type, payload }) +} + +describeMaybe('Slice A — full integration', () => { + beforeAll(async () => { + await buildTestImage() + }, 600_000) + + it('spawns, runs prompt, lease-serializes, recovers from crash, destroys', async () => { + const env = loadTestEnv() + const provider = new LocalDockerProvider({ image: TEST_IMAGE_TAG }) + const bridge = new StdioBridge() + const wr = new WorkspaceRegistry() + let lm = new LifecycleManager({ provider, bridge }) + const handler = makeCodingAgentHandler(lm, wr, { + defaults: { + idleTimeoutMs: 2000, + coldBootBudgetMs: 30_000, + runTimeoutMs: 120_000, + }, + env: () => ({ ANTHROPIC_API_KEY: env.ANTHROPIC_API_KEY }), + }) + + const agentA = `/test/coding-agent/a-${Date.now().toString(36)}` + const sharedName = `slice-a-shared-${Date.now().toString(36)}` + const args = { + kind: 'claude', + workspace: { type: 'volume', name: sharedName }, + lifecycle: { idleTimeoutMs: 2000 }, + } + const { ctx: ctxA, state: stateA } = makeFakeCtx(agentA, args) + + // 1) First-wake init + await handler(ctxA, { type: 'message_received' }) + expect(stateA.sessionMeta.get('current').status).toBe('cold') + + // 2) Send prompt; cold boot + run + pushInbox(stateA, 'i1', 'prompt', { + text: 'Reply with the single word: ok', + }) + await handler(ctxA, { type: 'message_received' }) + + const metaA1 = stateA.sessionMeta.get('current') + expect(metaA1.status).toBe('idle') + const runsA = Array.from(stateA.runs.rows.values()) as any[] + expect(runsA).toHaveLength(1) + expect(runsA[0].status).toBe('completed') + expect(runsA[0].responseText?.length ?? 0).toBeGreaterThan(0) + + // 3) Pin + idle wait + pushInbox(stateA, 'i2', 'pin') + await handler(ctxA, { type: 'message_received' }) + expect(stateA.sessionMeta.get('current').pinned).toBe(true) + + await new Promise((r) => setTimeout(r, 2500)) + expect(await provider.status(agentA)).toBe('running') + + // 4) Release + idle wait => sandbox stops + pushInbox(stateA, 'i3', 'release') + await handler(ctxA, { type: 'message_received' }) + await new Promise((r) => setTimeout(r, 2500)) + expect(await provider.status(agentA)).toBe('unknown') + + // 5) Second prompt: cold-boot path + pushInbox(stateA, 'i4', 'prompt', { text: 'Reply: again' }) + await handler(ctxA, { type: 'message_received' }) + const runsA2 = Array.from(stateA.runs.rows.values()) as any[] + expect(runsA2).toHaveLength(2) + expect(runsA2[1].status).toBe('completed') + + // 6) Second agent on same workspace, lease-serialized + const agentB = `/test/coding-agent/b-${Date.now().toString(36)}` + const { ctx: ctxB, state: stateB } = makeFakeCtx(agentB, args) + await handler(ctxB, { type: 'message_received' }) // first-wake init + pushInbox(stateB, 'j1', 'prompt', { text: 'Reply: B' }) + pushInbox(stateA, 'i5', 'prompt', { text: 'Reply: A' }) + await Promise.all([ + handler(ctxA, { type: 'message_received' }), + handler(ctxB, { type: 'message_received' }), + ]) + const runsAFinal = Array.from(stateA.runs.rows.values()) as any[] + const runsBFinal = Array.from(stateB.runs.rows.values()) as any[] + expect(runsAFinal[runsAFinal.length - 1].status).toBe('completed') + expect(runsBFinal[0].status).toBe('completed') + // Lease serialization: A's last run and B's run intervals don't overlap. + const lastA = runsAFinal[runsAFinal.length - 1] + const firstB = runsBFinal[0] + const noOverlap = + lastA.endedAt <= firstB.startedAt || firstB.endedAt <= lastA.startedAt + expect(noOverlap).toBe(true) + + // 7) Crash-recovery sim: re-register LM with the same provider; verify + // a stale running row gets reconciled. + // Manually inject a stale 'running' row predating the new lm. + const oldRunStart = Date.now() - 60_000 + stateA.runs.rows.set('stale', { + key: 'stale', + startedAt: oldRunStart, + status: 'running', + promptInboxKey: 'fake', + } as any) + stateA.sessionMeta.rows.set('current', { + ...stateA.sessionMeta.get('current'), + status: 'running', + }) + const lm2 = new LifecycleManager({ provider, bridge }) + const handler2 = makeCodingAgentHandler(lm2, wr, { + defaults: { + idleTimeoutMs: 2000, + coldBootBudgetMs: 30_000, + runTimeoutMs: 120_000, + }, + env: () => ({ ANTHROPIC_API_KEY: env.ANTHROPIC_API_KEY }), + }) + pushInbox(stateA, 'i6', 'prompt', { text: 'after crash' }) + await handler2(ctxA, { type: 'message_received' }) + expect((stateA.runs.get('stale') as any).status).toBe('failed') + expect((stateA.runs.get('stale') as any).finishReason).toBe('orphaned') + const newRuns = (Array.from(stateA.runs.rows.values()) as any[]).filter( + (r) => r.status === 'completed' && r.key !== 'stale' + ) + expect(newRuns.length).toBeGreaterThan(0) + + // 8) Destroy + pushInbox(stateA, 'i7', 'destroy') + await handler2(ctxA, { type: 'message_received' }) + expect(stateA.sessionMeta.get('current').status).toBe('destroyed') + expect(await provider.status(agentA)).toBe('unknown') + + // Cleanup B + await provider.destroy(agentB).catch(() => undefined) + }, 360_000) +}) +``` + +- [ ] **Step 3: Run the integration test** + +``` +DOCKER=1 pnpm -C packages/coding-agents test test/integration/slice-a.test.ts +``` + +Expect: PASS within ~6 minutes (image cached + 3-4 real claude invocations). + +If it fails, **iterate** (max 5 cycles): + +1. Capture failure output. +2. Form a hypothesis (most likely: timing on idle, lease ordering, image name mismatch, env not piped through). +3. Apply fix. +4. Re-run. + +Common pitfalls: + +- **`provider.status` returns `unknown` (not `stopped`).** Adjust assertion: `expect(['stopped', 'unknown']).toContain(s)`. +- **Lease lock-up due to never-completing first prompt.** Verify ANTHROPIC_API_KEY is being piped (`docker logs ` for the bridge's stderr). +- **Second prompt after pin/release fails because container idle-killed mid-flight.** Increase the wait between events. + +After 5 unsuccessful cycles, write a Phase 5 report describing the blocker and stop. + +- [ ] **Step 4: Run all tests one last time** + +``` +pnpm -C packages/coding-agents test +``` + +Expect: all pass (unit + integration). + +- [ ] **Step 5: Commit** + +``` +git add packages/coding-agents/test/integration/slice-a.test.ts +git commit -m "test(coding-agents): Slice A integration smoke (entity, lifecycle, lease, recovery)" +``` + +--- + +## Phase 5 — Report + +### Task 5.1 — Run report + +**Files:** + +- Create: `docs/superpowers/specs/notes/2026-04-30-coding-agents-slice-a-report.md` + +- [ ] **Step 1: Write report markdown** + +Cover: + +- Validation bar + outcome. +- Per-task: what landed cleanly, what required iteration, fix details. +- Known gaps versus the spec (the two divergences declared up-top: no `onBoot` hook, no `deleteEntityStream`). +- Time + token usage for the run. +- Recommended Slice B priorities (resume + remove-coder + Horton tools). + +- [ ] **Step 2: Commit** + +``` +git add docs/superpowers/specs/notes/2026-04-30-coding-agents-slice-a-report.md +git commit -m "docs(coding-agents): Slice A run report" +``` + +--- + +## Self-review checklist + +- [x] **Spec coverage:** + - Built-in entity → Task 2.1, 2.2 ✓ + - LifecycleManager → Task 1.B ✓ + - WorkspaceRegistry → Task 1.A ✓ + - `ctx.spawnCodingAgent` / `observeCodingAgent` → Task 2.3 ✓ + - Pin/release/stop/destroy → Task 2.1 ✓ + - Crash recovery → Task 2.1 (reconcile rules) + Task 4.1 (validation) ✓ + - Workspace lease serialization → Task 1.A + Task 4.1 (validation) ✓ + - Server bootstrap → Task 3.1 ✓ + - Integration test → Task 4.1 ✓ + - Spec divergences (no onBoot, no deleteEntityStream) declared at plan top ✓ +- [x] **Placeholder scan:** No "TBD", "TODO", "appropriate handling" left in steps. The Phase 4 fallback explicitly admits the harness-design choice may be revisited; that's a known trade-off, not a placeholder. +- [x] **Type consistency:** + - `CodingAgentStatus` includes `'destroyed'` (added because `destroy()` tombstones). + - `SessionMetaRow.lastInboxKey` declared in Task 2.1 Step 1 before being used in handler. + - `CodingAgentHandle.events()` returns `AsyncIterable` in runtime types (Slice A) since the runtime can't depend on `agent-session-protocol` types directly. Documented. +- [x] **Approval:** Pre-approved per user's "implemnt" message. diff --git a/docs/superpowers/plans/2026-04-30-coding-agents-slice-b.md b/docs/superpowers/plans/2026-04-30-coding-agents-slice-b.md new file mode 100644 index 0000000000..cd4281cf78 --- /dev/null +++ b/docs/superpowers/plans/2026-04-30-coding-agents-slice-b.md @@ -0,0 +1,3030 @@ +# Coding Agents — Slice B Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Complete the coding-agent platform-primitive migration: wire resume (nativeJsonl collection + `--resume` flag), swap Horton from legacy `coder` to `coding-agent`, delete the legacy `coder` entity and all legacy runtime types, and ship a `CodingAgentView` / `CodingAgentTimeline` / `CodingAgentSpawnDialog` UI surface wired to the new entity's collections. Validation bar: unit tests for resume materialisation, Horton tool swap verified by handler unit test, and an integration test that sends two prompts to the same `coding-agent` and asserts the second run's response references the first prompt's content (proving resume is lossless). + +**Architecture:** `nativeJsonl` is a new fifth collection on the `coding-agent` entity. The handler tees each raw JSONL line from `bridge.runTurn` into the collection via `onNativeLine`. On cold-boot of an agent with prior `nativeJsonl` rows, the handler calls `sandbox.exec` to write the lines into `/tmp/resume.jsonl`, extracts `nativeSessionId` from `sessionMeta`, and passes `--resume ` to `StdioBridge.runTurn`. `StdioBridge` no longer warns; it passes the id through. Horton's `createHortonTools` switches from `createSpawnCoderTool` / `createPromptCoderTool` (legacy `coder`) to new `createSpawnCodingAgentTool` / `createPromptCodingAgentTool` (new `coding-agent`). Legacy files (`coding-session.ts`, `spawn-coder.ts`) and their runtime types are deleted. UI adds `CodingAgentView`, `useCodingAgent`, `CodingAgentTimeline`, `CodingAgentSpawnDialog`; router and sidebar switch on `'coding-agent'` instead of `CODING_SESSION_ENTITY_TYPE`. + +**Spec divergences (resolved):** + +- **`onNativeLine` already wired in `StdioBridge`.** Lines 51-56 of `bridge/stdio-bridge.ts` already call `args.onNativeLine(line)` in `drainStdout`. Task 1.1 needs only a unit test (not a re-implementation). Task 1.2 adds the actual `--resume` argument. +- **Horton tool validation string in `prompt_coding_agent`.** Legacy `prompt_coder` validated `coder_url.startsWith('/coder/')`. New tool validates `coding_agent_url.startsWith('/coding-agent/')`. +- **UI "Pin/Release/Stop" buttons ship as message sends**, not as a special RPC. They call `ctx.db.actions` on the inbox of the entity to send `pin`, `release`, or `stop` messages (same as the test's `pushInbox`). The `EntityHeader` receives the `db` object when `entity.type === 'coding-agent'`. +- **E2E test uses the FakeCtx pattern** from `test/integration/slice-a.test.ts` extended with a `nativeJsonl` collection stub, not the `agents-server` docker-compose harness. The `agents-server` harness requires an external postgres+electric stack and is out of scope for Slice B. + +**Tech Stack:** TypeScript, Vitest, React, `@radix-ui/themes`, `lucide-react`, `zod`, Docker (integration test only). + +**Reference spec:** `docs/superpowers/specs/2026-04-30-coding-agents-slice-b-design.md` + +--- + +## File Structure + +``` +packages/coding-agents/ ← extend +├── src/ +│ ├── index.ts ← +CODING_AGENT_NATIVE_JSONL_COLLECTION_TYPE +│ ├── entity/ +│ │ ├── collections.ts ← +nativeJsonl schema, +nativeSessionId on sessionMeta +│ │ ├── handler.ts ← +tee onNativeLine, +resume materialisation, +nativeSessionId capture +│ │ └── register.ts ← +nativeJsonl state entry +│ └── bridge/stdio-bridge.ts ← remove warning, add --resume when nativeSessionId present +└── test/ + ├── unit/ + │ ├── stdio-bridge-resume.test.ts ← NEW: --resume arg wired unit test + │ └── handler-resume.test.ts ← NEW: tee + materialise unit tests + └── integration/ + └── slice-b.test.ts ← NEW: lossless resume integration test + +packages/agents/src/ +├── bootstrap.ts ← remove registerCodingSession + 'coder' push +├── tools/ +│ ├── spawn-coder.ts ← DELETE (legacy) +│ ├── spawn-coding-agent.ts ← NEW +│ └── prompt-coding-agent.ts ← NEW +└── agents/ + ├── coding-session.ts ← DELETE (legacy) + └── horton.ts ← swap imports + tool list + system prompt + +packages/agents-runtime/src/ +├── types.ts ← delete legacy Coding Session types/interface +├── context-factory.ts ← delete useCodingAgent impl +└── index.ts ← remove legacy exports + +packages/agents-server-ui/src/ +├── components/ +│ ├── StatusDot.tsx ← +coding-agent status colors +│ ├── EntityHeader.tsx ← +Pin/Release/Stop for coding-agent +│ ├── ToolCallView.tsx ← +spawn_coding_agent, prompt_coding_agent cases +│ ├── CodingAgentView.tsx ← NEW +│ ├── CodingAgentTimeline.tsx ← NEW +│ └── CodingAgentSpawnDialog.tsx ← NEW +├── hooks/ +│ └── useCodingAgent.ts ← NEW +└── router.tsx ← swap CODING_SESSION_ENTITY_TYPE → 'coding-agent' + +packages/agents-server-ui/src/components/Sidebar.tsx ← swap coder dialog → CodingAgentSpawnDialog + +docs/superpowers/specs/notes/ +└── 2026-04-30-coding-agents-slice-b-report.md ← NEW (Phase 8) +``` + +--- + +## Phase Plan + +| Phase | Tasks | Parallelism | Depends on | +| ----- | ------------------ | ---------------------------------------------------- | ---------- | +| 0 | 0.1 | sequential | — | +| 1 | 1.1, 1.2, 1.3, 1.4 | 1.1 + 1.2 parallel; 1.3 after 1.1+1.2; 1.4 after 1.3 | Phase 0 | +| 2 | 2.1, 2.2, 2.3 | sequential | Phase 1 | +| 3 | 3.1, 3.2 | parallel (2 independent agents) | Phase 2 | +| 4 | 4.1, 4.2, 4.3, 4.4 | 4.1–4.3 parallel; 4.4 after all | Phase 3 | +| 5 | 5.1 | sequential | Phase 4 | +| 6 | 6.1 | sequential | Phase 5 | +| 7 | 7.1 | sequential | Phase 6 | +| 8 | 8.1 (report) | sequential | Phase 7 | + +Total tasks: 15 (excluding report). Estimated wall time per task: 15-40 min. + +--- + +## Phase 0 — Extend collections + sessionMeta schema (sequential) + +### Task 0.1 — Add `nativeJsonl` collection and `nativeSessionId` to `sessionMeta` + +**Files:** + +- Modify: `packages/coding-agents/src/entity/collections.ts` +- Modify: `packages/coding-agents/src/index.ts` + +- [ ] **Step 1: Edit `packages/coding-agents/src/entity/collections.ts`** + +Add the constant, schema, and type after the existing `lifecycleRowSchema`. Also add `nativeSessionId` to `sessionMetaRowSchema`. + +```ts +// packages/coding-agents/src/entity/collections.ts +import { z } from 'zod' + +export const CODING_AGENT_SESSION_META_COLLECTION_TYPE = `coding-agent.sessionMeta` +export const CODING_AGENT_RUNS_COLLECTION_TYPE = `coding-agent.runs` +export const CODING_AGENT_EVENTS_COLLECTION_TYPE = `coding-agent.events` +export const CODING_AGENT_LIFECYCLE_COLLECTION_TYPE = `coding-agent.lifecycle` +export const CODING_AGENT_NATIVE_JSONL_COLLECTION_TYPE = `coding-agent.nativeJsonl` + +export const codingAgentStatusSchema = z.enum([ + `cold`, + `starting`, + `idle`, + `running`, + `stopping`, + `error`, + `destroyed`, +]) +export type CodingAgentStatus = z.infer + +export const sessionMetaRowSchema = z.object({ + key: z.literal(`current`), + status: codingAgentStatusSchema, + kind: z.enum([`claude`]), + pinned: z.boolean(), + workspaceIdentity: z.string(), + workspaceSpec: z.discriminatedUnion(`type`, [ + z.object({ + type: z.literal(`volume`), + name: z.string(), + }), + z.object({ + type: z.literal(`bindMount`), + hostPath: z.string(), + }), + ]), + idleTimeoutMs: z.number(), + keepWarm: z.boolean(), + instanceId: z.string().optional(), + lastError: z.string().optional(), + currentPromptInboxKey: z.string().optional(), + lastInboxKey: z.string().optional(), + nativeSessionId: z.string().optional(), // ← NEW in Slice B +}) +export type SessionMetaRow = z.infer + +export const runRowSchema = z.object({ + key: z.string(), + startedAt: z.number(), + endedAt: z.number().optional(), + status: z.enum([`running`, `completed`, `failed`]), + finishReason: z.string().optional(), + promptInboxKey: z.string(), + responseText: z.string().optional(), +}) +export type RunRow = z.infer + +export const eventRowSchema = z.object({ + key: z.string(), + runId: z.string(), + seq: z.number(), + ts: z.number(), + type: z.string(), + payload: z.looseObject({}), +}) +export type EventRow = z.infer + +export const lifecycleRowSchema = z.object({ + key: z.string(), + ts: z.number(), + event: z.enum([ + `sandbox.starting`, + `sandbox.started`, + `sandbox.stopped`, + `sandbox.failed`, + `pin`, + `release`, + `orphan.detected`, + `resume.restored`, // ← NEW in Slice B + ]), + detail: z.string().optional(), +}) +export type LifecycleRow = z.infer + +// ─── nativeJsonl — NEW in Slice B ──────────────────────────────────────────── + +export const nativeJsonlRowSchema = z.object({ + key: z.string(), // `${runId}:${seq}` — sortable + runId: z.string(), + seq: z.number(), + line: z.string(), // raw JSONL line from claude CLI stdout +}) +export type NativeJsonlRow = z.infer +``` + +- [ ] **Step 2: Edit `packages/coding-agents/src/index.ts`** + +Add `CODING_AGENT_NATIVE_JSONL_COLLECTION_TYPE` to the existing collection-type re-exports: + +```ts +export { + CODING_AGENT_SESSION_META_COLLECTION_TYPE, + CODING_AGENT_RUNS_COLLECTION_TYPE, + CODING_AGENT_EVENTS_COLLECTION_TYPE, + CODING_AGENT_LIFECYCLE_COLLECTION_TYPE, + CODING_AGENT_NATIVE_JSONL_COLLECTION_TYPE, // ← add this line +} from './entity/collections' +``` + +- [ ] **Step 3: Verify TypeScript compiles** + +```bash +cd packages/coding-agents && npx tsc --noEmit +``` + +**Commit:** + +``` +git add packages/coding-agents/src/entity/collections.ts packages/coding-agents/src/index.ts +git commit -m "feat(coding-agents): add nativeJsonl collection schema and nativeSessionId to sessionMeta" +``` + +--- + +## Phase 1 — StdioBridge resume wiring + handler tee + capture + materialise (sequential-ish) + +### Task 1.1 — Unit test for existing `onNativeLine` wiring (already implemented) + +**Context:** `onNativeLine` is already wired in `bridge/stdio-bridge.ts` lines 51-56: + +```ts +if (args.onNativeLine) args.onNativeLine(line) +``` + +This task only adds a unit test to lock the behaviour. + +**Files:** + +- Create: `packages/coding-agents/test/unit/stdio-bridge-resume.test.ts` + +- [ ] **Step 1: Write the unit test** + +```ts +// packages/coding-agents/test/unit/stdio-bridge-resume.test.ts +import { describe, it, expect, vi } from 'vitest' +import { StdioBridge } from '../../src/bridge/stdio-bridge' +import type { SandboxInstance, RunTurnArgs } from '../../src/types' + +/** + * Minimal sandbox double: exec returns a fake handle whose stdout + * yields the lines we supply, stderr is empty, and wait() returns 0. + */ +function makeFakeSandbox(stdoutLines: string[]): SandboxInstance { + const handle = { + stdout: (async function* () { + for (const l of stdoutLines) yield l + })(), + stderr: (async function* () {})(), + writeStdin: vi.fn().mockResolvedValue(undefined), + closeStdin: vi.fn().mockResolvedValue(undefined), + wait: vi.fn().mockResolvedValue({ exitCode: 0 }), + } + return { + instanceId: `fake-instance`, + workspaceMount: `/workspace`, + exec: vi.fn().mockResolvedValue(handle), + destroy: vi.fn(), + } as unknown as SandboxInstance +} + +describe(`StdioBridge — onNativeLine`, () => { + it(`calls onNativeLine for every non-empty stdout line`, async () => { + // Minimal valid claude stream-json: session_init + result line. + const lines = [ + JSON.stringify({ + type: `system`, + subtype: `init`, + session_id: `sess-1`, + tools: [], + mcp_servers: [], + }), + JSON.stringify({ + type: `result`, + subtype: `success`, + result: `ok`, + session_id: `sess-1`, + is_error: false, + }), + ] + const sandbox = makeFakeSandbox(lines) + const bridge = new StdioBridge() + const received: string[] = [] + + await bridge.runTurn({ + sandbox, + kind: `claude`, + prompt: `hello`, + onEvent: () => undefined, + onNativeLine: (l) => received.push(l), + } as RunTurnArgs) + + expect(received).toEqual(lines) + }) + + it(`does not call onNativeLine for empty lines`, async () => { + const lines = [ + ``, + JSON.stringify({ + type: `result`, + subtype: `success`, + result: `ok`, + session_id: `s`, + is_error: false, + }), + ] + const sandbox = makeFakeSandbox(lines) + const bridge = new StdioBridge() + const received: string[] = [] + + await bridge.runTurn({ + sandbox, + kind: `claude`, + prompt: `hi`, + onEvent: () => undefined, + onNativeLine: (l) => received.push(l), + } as RunTurnArgs) + + // Empty string should have been skipped by the `if (!line) continue` guard. + expect(received.every((l) => l.length > 0)).toBe(true) + }) +}) +``` + +- [ ] **Step 2: Run the unit test to confirm it passes** + +```bash +cd packages/coding-agents && npx vitest run test/unit/stdio-bridge-resume.test.ts +``` + +**Commit:** + +``` +git add packages/coding-agents/test/unit/stdio-bridge-resume.test.ts +git commit -m "test(coding-agents): unit test — onNativeLine already wired in StdioBridge" +``` + +--- + +### Task 1.2 — Wire `--resume ` in `StdioBridge` + +**Files:** + +- Modify: `packages/coding-agents/src/bridge/stdio-bridge.ts` + +- [ ] **Step 1: Replace the warning block and add `--resume` to `cliArgs`** + +Current code (lines 13-18): + +```ts +if (args.nativeSessionId) { + log.warn( + { nativeSessionId: args.nativeSessionId }, + `StdioBridge MVP does not implement resume — running fresh turn` + ) +} +``` + +Replace with nothing (delete the block), and after the `cliArgs` array definition add: + +```ts +if (args.nativeSessionId) cliArgs.push(`--resume`, args.nativeSessionId) +``` + +Full resulting file: + +```ts +// packages/coding-agents/src/bridge/stdio-bridge.ts +import { normalize } from 'agent-session-protocol' +import type { NormalizedEvent } from 'agent-session-protocol' +import { log } from '../log' +import type { Bridge, RunTurnArgs, RunTurnResult } from '../types' + +export class StdioBridge implements Bridge { + async runTurn(args: RunTurnArgs): Promise { + if (args.kind !== `claude`) { + throw new Error( + `StdioBridge MVP supports only 'claude', got '${args.kind}'` + ) + } + + const cliArgs: Array = [ + `--print`, + `--output-format=stream-json`, + `--verbose`, + `--dangerously-skip-permissions`, + ] + if (args.model) cliArgs.push(`--model`, args.model) + if (args.nativeSessionId) cliArgs.push(`--resume`, args.nativeSessionId) + + const handle = await args.sandbox.exec({ + cmd: [`claude`, ...cliArgs], + cwd: args.sandbox.workspaceMount, + stdin: `pipe`, + }) + + if (!handle.writeStdin || !handle.closeStdin) { + throw new Error( + `StdioBridge requires stdin pipe but ExecHandle lacks one` + ) + } + await handle.writeStdin(args.prompt) + await handle.closeStdin() + + const rawLines: Array = [] + const stderrLines: Array = [] + + const drainStderr = async () => { + for await (const line of handle.stderr) { + stderrLines.push(line) + } + } + const drainStdout = async () => { + for await (const line of handle.stdout) { + if (!line) continue + rawLines.push(line) + if (args.onNativeLine) args.onNativeLine(line) + } + } + + await Promise.all([drainStdout(), drainStderr()]) + const exitInfo = await handle.wait() + + if (exitInfo.exitCode !== 0) { + const stderrPreview = stderrLines.join(`\n`).slice(0, 800) || `` + throw new Error( + `claude CLI exited ${exitInfo.exitCode}. stderr=${stderrPreview}` + ) + } + + let events: Array = [] + try { + events = normalize(rawLines, `claude`) + } catch (err) { + log.error({ err, sample: rawLines.slice(0, 3) }, `normalize failed`) + throw err + } + + for (const e of events) args.onEvent(e) + + const sessionInit = events.find((e) => e.type === `session_init`) + const lastAssistant = [...events] + .reverse() + .find((e) => e.type === `assistant_message`) + + return { + nativeSessionId: + sessionInit && `sessionId` in sessionInit + ? (sessionInit as { sessionId?: string }).sessionId + : undefined, + exitCode: exitInfo.exitCode, + finalText: + lastAssistant && `text` in lastAssistant + ? (lastAssistant as { text?: string }).text + : undefined, + } + } +} +``` + +- [ ] **Step 2: Add unit test for `--resume` arg in `stdio-bridge-resume.test.ts`** + +Append this test to the existing `stdio-bridge-resume.test.ts`: + +```ts +describe(`StdioBridge — --resume`, () => { + it(`passes --resume to exec cmd when nativeSessionId is provided`, async () => { + const lines = [ + JSON.stringify({ + type: `result`, + subtype: `success`, + result: `ok`, + session_id: `s`, + is_error: false, + }), + ] + const sandbox = makeFakeSandbox(lines) + const bridge = new StdioBridge() + + await bridge.runTurn({ + sandbox, + kind: `claude`, + prompt: `hi`, + onEvent: () => undefined, + nativeSessionId: `native-sess-abc`, + } as RunTurnArgs) + + const execCall = (sandbox.exec as ReturnType).mock.calls[0][0] + expect(execCall.cmd).toContain(`--resume`) + expect(execCall.cmd).toContain(`native-sess-abc`) + }) + + it(`does not pass --resume when nativeSessionId is absent`, async () => { + const lines = [ + JSON.stringify({ + type: `result`, + subtype: `success`, + result: `ok`, + session_id: `s`, + is_error: false, + }), + ] + const sandbox = makeFakeSandbox(lines) + const bridge = new StdioBridge() + + await bridge.runTurn({ + sandbox, + kind: `claude`, + prompt: `hi`, + onEvent: () => undefined, + } as RunTurnArgs) + + const execCall = (sandbox.exec as ReturnType).mock.calls[0][0] + expect(execCall.cmd).not.toContain(`--resume`) + }) +}) +``` + +- [ ] **Step 3: Run all stdio-bridge tests** + +```bash +cd packages/coding-agents && npx vitest run test/unit/stdio-bridge-resume.test.ts +``` + +**Commit:** + +``` +git add packages/coding-agents/src/bridge/stdio-bridge.ts packages/coding-agents/test/unit/stdio-bridge-resume.test.ts +git commit -m "feat(coding-agents): wire --resume in StdioBridge" +``` + +--- + +### Task 1.3 — Handler: tee `onNativeLine` into `nativeJsonl` collection + capture `nativeSessionId` + +**Files:** + +- Modify: `packages/coding-agents/src/entity/handler.ts` + +The changes are in `processPrompt`. There are two distinct changes: + +**A) Tee raw lines into `nativeJsonl` inside the `runTurn` call.** + +Replace the `runTurn` call (currently lines 371-389 of the original) with a version that adds `onNativeLine`: + +```ts +// Inside processPrompt, in the try block after runs_insert: +let nativeLineSeq = 0 +const result = await raceTimeout( + lm.bridge.runTurn({ + sandbox, + kind: meta.kind, + prompt: promptText, + nativeSessionId: meta.nativeSessionId, // pass stored id (may be undefined on first run) + onNativeLine: (line: string) => { + ctx.db.actions.nativeJsonl_insert({ + row: { + key: eventKey(runId, nativeLineSeq), + runId, + seq: nativeLineSeq, + line, + } satisfies NativeJsonlRow, + }) + nativeLineSeq++ + }, + onEvent: (e: NormalizedEvent) => { + ctx.db.actions.events_insert({ + row: { + key: eventKey(runId, seq), + runId, + seq, + ts: Date.now(), + type: e.type, + payload: e as unknown as Record, + } satisfies EventRow, + }) + seq++ + }, + }), + options.defaults.runTimeoutMs +) +``` + +**B) Capture `nativeSessionId` from the result and persist it in `sessionMeta`.** + +After the `result = await raceTimeout(...)` resolves and before the `runs_update completed` block: + +```ts +// Persist nativeSessionId from this turn if we don't have one yet. +if (result.nativeSessionId && !meta.nativeSessionId) { + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.nativeSessionId = result.nativeSessionId + }, + }) +} +``` + +- [ ] **Step 1: Add `NativeJsonlRow` import at top of handler.ts** + +```ts +import type { + RunRow, + SessionMetaRow, + EventRow, + LifecycleRow, + NativeJsonlRow, // ← add +} from './collections' +``` + +- [ ] **Step 2: Apply changes A and B to `processPrompt`** + +The full updated `processPrompt` run block (replacing from `let seq = 0` to the `recordedRun.end({ status: 'completed' })` call): + +```ts +let seq = 0 +let nativeLineSeq = 0 +let finalText: string | undefined +try { + const result = await raceTimeout( + lm.bridge.runTurn({ + sandbox, + kind: meta.kind, + prompt: promptText, + nativeSessionId: meta.nativeSessionId, + onNativeLine: (line: string) => { + ctx.db.actions.nativeJsonl_insert({ + row: { + key: eventKey(runId, nativeLineSeq), + runId, + seq: nativeLineSeq, + line, + } satisfies NativeJsonlRow, + }) + nativeLineSeq++ + }, + onEvent: (e: NormalizedEvent) => { + ctx.db.actions.events_insert({ + row: { + key: eventKey(runId, seq), + runId, + seq, + ts: Date.now(), + type: e.type, + payload: e as unknown as Record, + } satisfies EventRow, + }) + seq++ + }, + }), + options.defaults.runTimeoutMs + ) + finalText = result.finalText + + // Persist nativeSessionId from this turn if we don't have one yet. + if (result.nativeSessionId && !meta.nativeSessionId) { + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.nativeSessionId = result.nativeSessionId + }, + }) + } + + ctx.db.actions.runs_update({ + key: runId, + updater: (d: RunRow) => { + d.status = `completed` + d.endedAt = Date.now() + d.responseText = finalText + }, + }) + if (finalText) recordedRun.attachResponse(finalText) + recordedRun.end({ status: `completed` }) +} catch (err) { + // ... (rest of catch block unchanged) +``` + +- [ ] **Step 3: TypeScript check** + +```bash +cd packages/coding-agents && npx tsc --noEmit +``` + +**Commit:** + +``` +git add packages/coding-agents/src/entity/handler.ts +git commit -m "feat(coding-agents): tee onNativeLine into nativeJsonl and capture nativeSessionId per turn" +``` + +--- + +### Task 1.4 — Handler: cold-boot materialise prior `nativeJsonl` for resume + +**Files:** + +- Modify: `packages/coding-agents/src/entity/handler.ts` + +On cold-boot, before calling `lm.bridge.runTurn`, if `meta.nativeSessionId` is set and `nativeJsonl` rows exist, write them to `/tmp/resume.jsonl` inside the sandbox and pass that path to `--resume` via the already-wired `nativeSessionId` field. + +**Note on path:** `claude --resume` expects the native session id (the UUID), not a file path. The CLI looks for the session's JSONL file in `~/.claude/projects//`. The sanitised path of `/workspace` is `-workspace` (replace `/` → `-`, strip leading `-` → net result: `workspace`, but the claude CLI converts `/workspace` to `-workspace` by replacing every `/` with `-` and prepending nothing; actually `~/.claude/projects/` + replace(`/workspace`, `/`, `-`) = `-workspace`). So we must write the materialized file to `~/.claude/projects/-workspace/.jsonl` inside the container. + +The exec command to materialise: + +``` +sandbox.exec({ cmd: ['sh', '-c', `mkdir -p ~/.claude/projects/-workspace && cat > ~/.claude/projects/-workspace/.jsonl <<'__JSONL__'\n\n__JSONL__`] }) +``` + +Because the lines may contain special characters, it is safer to write the file via a base64-encoded payload piped through `base64 -d`: + +```ts +const b64 = Buffer.from(lines.join('\n') + '\n').toString('base64') +await sandbox.exec({ + cmd: [ + 'sh', + '-c', + `mkdir -p ~/.claude/projects/-workspace && printf '%s' '${b64}' | base64 -d > ~/.claude/projects/-workspace/${nativeSessionId}.jsonl`, + ], + cwd: sandbox.workspaceMount, +}) +``` + +- [ ] **Step 1: Add materialise helper function at the top of `handler.ts` (after imports)** + +```ts +/** + * Sanitise an absolute path for use as the claude project directory name + * under ~/.claude/projects/. The CLI replaces every `/` with `-`, producing + * e.g. `/workspace` → `-workspace`. + */ +function sanitiseCwd(cwd: string): string { + return cwd.replace(/\//g, `-`) +} + +/** + * Materialise nativeJsonl rows into the container's ~/.claude/projects/ so + * that `claude --resume ` finds its session file. + */ +async function materialiseResume( + sandbox: SandboxInstance, + nativeSessionId: string, + lines: string[] +): Promise { + if (lines.length === 0) return + const projectDir = sanitiseCwd(sandbox.workspaceMount) + const jsonlContent = lines.join(`\n`) + `\n` + // Base64-encode to avoid quoting issues with special chars in JSONL lines. + const b64 = Buffer.from(jsonlContent).toString(`base64`) + await sandbox.exec({ + cmd: [ + `sh`, + `-c`, + `mkdir -p ~/.claude/projects/${projectDir} && printf '%s' '${b64}' | base64 -d > ~/.claude/projects/${projectDir}/${nativeSessionId}.jsonl`, + ], + cwd: sandbox.workspaceMount, + }) +} +``` + +- [ ] **Step 2: Add `SandboxInstance` import** + +The handler already imports from lifecycle-manager and workspace-registry. Add the `SandboxInstance` type import: + +```ts +import type { SandboxInstance } from '../types' +``` + +- [ ] **Step 3: Call `materialiseResume` inside `processPrompt`, after the sandbox is up** + +After the `ctx.db.actions.lifecycle_insert` for `sandbox.started` and before `wr.acquire`: + +```ts +// Resume materialisation: if we have a prior nativeSessionId and nativeJsonl +// rows, write them into the container so --resume finds the session file. +if (meta.nativeSessionId) { + const nativeJsonlCol = ctx.db.collections.nativeJsonl + const allLines: string[] = (nativeJsonlCol.toArray as Array) + .slice() + .sort((a, b) => (a.key < b.key ? -1 : a.key > b.key ? 1 : 0)) + .map((r) => r.line) + + if (allLines.length > 0) { + await materialiseResume(sandbox, meta.nativeSessionId, allLines) + ctx.db.actions.lifecycle_insert({ + row: { + key: lifecycleKey(`resume`), + ts: Date.now(), + event: `resume.restored`, + detail: `lines=${allLines.length}`, + } satisfies LifecycleRow, + }) + } +} +``` + +- [ ] **Step 4: TypeScript check** + +```bash +cd packages/coding-agents && npx tsc --noEmit +``` + +- [ ] **Step 5: Write unit test for materialise** + +Create `packages/coding-agents/test/unit/handler-resume.test.ts`: + +```ts +// packages/coding-agents/test/unit/handler-resume.test.ts +import { describe, it, expect, vi } from 'vitest' + +// Pull the helper via a small re-export shim if it's not exported, +// or test it indirectly via the handler. Here we test it indirectly +// by asserting that sandbox.exec receives the right cmd. + +// Since materialiseResume is not exported, we exercise it through +// processPrompt via makeFakeCtx (adapted from slice-a.test.ts). + +import { makeCodingAgentHandler } from '../../src/entity/handler' +import type { LifecycleManager } from '../../src/lifecycle-manager' +import type { SandboxInstance } from '../../src/types' +import type { + NativeJsonlRow, + SessionMetaRow, +} from '../../src/entity/collections' + +// ---------- minimal doubles -------------------------------------------------- + +function makeExecHandle(stdoutLines: string[]) { + return { + stdout: (async function* () { + for (const l of stdoutLines) yield l + })(), + stderr: (async function* () {})(), + writeStdin: vi.fn().mockResolvedValue(undefined), + closeStdin: vi.fn().mockResolvedValue(undefined), + wait: vi.fn().mockResolvedValue({ exitCode: 0 }), + } +} + +function makeSandbox( + stdoutLines: string[] +): SandboxInstance & { execCalls: any[] } { + const execCalls: any[] = [] + return { + instanceId: `inst-1`, + workspaceMount: `/workspace`, + exec: vi.fn(async (req) => { + execCalls.push(req) + return makeExecHandle(stdoutLines) + }), + destroy: vi.fn(), + execCalls, + } as any +} + +function makeMinimalLm(sandbox: SandboxInstance) { + const lm = { + startedAtMs: Date.now(), + provider: { + status: vi.fn().mockResolvedValue(`stopped`), + destroy: vi.fn().mockResolvedValue(undefined), + }, + bridge: { + runTurn: vi.fn().mockResolvedValue({ + nativeSessionId: `native-1`, + finalText: `reply`, + exitCode: 0, + }), + }, + ensureRunning: vi.fn().mockResolvedValue(sandbox), + stop: vi.fn().mockResolvedValue(undefined), + destroy: vi.fn().mockResolvedValue(undefined), + pin: vi.fn().mockReturnValue({ count: 1 }), + release: vi.fn().mockReturnValue({ count: 0 }), + pinCount: vi.fn().mockReturnValue(0), + armIdleTimer: vi.fn(), + } + return lm as unknown as LifecycleManager +} + +interface CollectionStub { + rows: Map + get(k: string): any + toArray: Array +} + +function makeCollection(): CollectionStub { + const rows = new Map() + return { + rows, + get(k: string) { + return rows.get(k) + }, + get toArray(): Array { + return Array.from(rows.values()) + }, + } +} + +function makeFakeCtx(entityUrl: string, args: Record) { + const state = { + sessionMeta: makeCollection(), + runs: makeCollection(), + events: makeCollection(), + lifecycle: makeCollection(), + nativeJsonl: makeCollection(), + inbox: makeCollection(), + } + let runCounter = 0 + const ctx: any = { + entityUrl, + entityType: `coding-agent`, + args, + tags: {}, + firstWake: false, + db: { + collections: state, + actions: { + sessionMeta_insert: ({ row }: any) => + state.sessionMeta.rows.set(row.key, row), + sessionMeta_update: ({ key, updater }: any) => { + const r = state.sessionMeta.rows.get(key) + if (r) updater(r) + }, + runs_insert: ({ row }: any) => state.runs.rows.set(row.key, row), + runs_update: ({ key, updater }: any) => { + const r = state.runs.rows.get(key) + if (r) updater(r) + }, + events_insert: ({ row }: any) => state.events.rows.set(row.key, row), + lifecycle_insert: ({ row }: any) => + state.lifecycle.rows.set(row.key, row), + nativeJsonl_insert: ({ row }: any) => + state.nativeJsonl.rows.set(row.key, row), + }, + }, + recordRun() { + const key = `run-${++runCounter}` + const ent: any = { key, status: undefined, response: `` } + state.runs.rows.set(key, ent) + return { + key, + end({ status }: { status: string }) { + ent.status = status + }, + attachResponse(text: string) { + ent.response += text + }, + } + }, + setTag: () => Promise.resolve(), + send: () => undefined, + } + return { ctx, state } +} + +// ---------- tests ------------------------------------------------------------ + +describe(`handler resume materialisation`, () => { + it(`calls sandbox.exec to materialise nativeJsonl rows on cold-boot when nativeSessionId is set`, async () => { + const sandbox = makeSandbox([]) + const lm = makeMinimalLm(sandbox) + + // Pre-seed nativeJsonl rows and sessionMeta with a nativeSessionId. + const { ctx, state } = makeFakeCtx(`/test/ca/resume-1`, { + kind: `claude`, + workspaceType: `volume`, + workspaceName: `vol-1`, + }) + const { WorkspaceRegistry } = await import('../../src/workspace-registry') + const wr = new WorkspaceRegistry() + + const handler = makeCodingAgentHandler(lm, wr, { + defaults: { + idleTimeoutMs: 500, + coldBootBudgetMs: 30_000, + runTimeoutMs: 60_000, + }, + env: () => ({}), + }) + + // First wake — initialises sessionMeta (status: cold) + await handler(ctx, { type: `message_received` }) + + // Manually inject nativeSessionId and nativeJsonl rows (simulating a prior run). + state.sessionMeta.rows.set(`current`, { + ...(state.sessionMeta.get(`current`) as SessionMetaRow), + nativeSessionId: `native-sess-xyz`, + }) + const fakeJsonlLine = JSON.stringify({ + type: `result`, + subtype: `success`, + result: `prior`, + session_id: `native-sess-xyz`, + is_error: false, + }) + state.nativeJsonl.rows.set(`run-1:000000000000000`, { + key: `run-1:000000000000000`, + runId: `run-1`, + seq: 0, + line: fakeJsonlLine, + } satisfies NativeJsonlRow) + + // Second wake with a prompt — should trigger materialise. + state.inbox.rows.set(`i1`, { + key: `i1`, + message_type: `prompt`, + payload: { text: `second prompt` }, + }) + await handler(ctx, { type: `message_received` }) + + // sandbox.exec should have been called at least twice: + // once for materialise, once for the claude CLI invocation. + // The materialise call has a shell command containing base64. + const shellCalls = ( + sandbox.exec as ReturnType + ).mock.calls.filter((c: any[]) => c[0]?.cmd?.[0] === `sh`) + expect(shellCalls.length).toBeGreaterThan(0) + const cmd = shellCalls[0][0].cmd.join(` `) + expect(cmd).toContain(`native-sess-xyz.jsonl`) + expect(cmd).toContain(`base64`) + }) + + it(`adds a resume.restored lifecycle row after materialisation`, async () => { + const sandbox = makeSandbox([]) + const lm = makeMinimalLm(sandbox) + const { ctx, state } = makeFakeCtx(`/test/ca/resume-2`, { + kind: `claude`, + workspaceType: `volume`, + workspaceName: `vol-2`, + }) + const { WorkspaceRegistry } = await import('../../src/workspace-registry') + const wr = new WorkspaceRegistry() + + const handler = makeCodingAgentHandler(lm, wr, { + defaults: { + idleTimeoutMs: 500, + coldBootBudgetMs: 30_000, + runTimeoutMs: 60_000, + }, + env: () => ({}), + }) + + await handler(ctx, { type: `message_received` }) + + state.sessionMeta.rows.set(`current`, { + ...(state.sessionMeta.get(`current`) as SessionMetaRow), + nativeSessionId: `native-sess-abc`, + }) + state.nativeJsonl.rows.set(`run-1:0`, { + key: `run-1:0`, + runId: `run-1`, + seq: 0, + line: `{"type":"result","subtype":"success","result":"x","session_id":"native-sess-abc","is_error":false}`, + } satisfies NativeJsonlRow) + + state.inbox.rows.set(`i1`, { + key: `i1`, + message_type: `prompt`, + payload: { text: `hello again` }, + }) + await handler(ctx, { type: `message_received` }) + + const lifecycleRows = Array.from(state.lifecycle.rows.values()) as any[] + const resumeRow = lifecycleRows.find((r) => r.event === `resume.restored`) + expect(resumeRow).toBeDefined() + expect(resumeRow.detail).toMatch(/lines=1/) + }) +}) +``` + +- [ ] **Step 6: Run unit tests** + +```bash +cd packages/coding-agents && npx vitest run test/unit/handler-resume.test.ts +``` + +**Commit:** + +``` +git add packages/coding-agents/src/entity/handler.ts packages/coding-agents/test/unit/handler-resume.test.ts +git commit -m "feat(coding-agents): materialise nativeJsonl on cold-boot for --resume" +``` + +--- + +## Phase 2 — Add `nativeJsonl` to `register.ts` + update `FakeCtx` helper (sequential) + +### Task 2.1 — Register `nativeJsonl` collection in entity definition + +**Files:** + +- Modify: `packages/coding-agents/src/entity/register.ts` + +- [ ] **Step 1: Add `CODING_AGENT_NATIVE_JSONL_COLLECTION_TYPE` and `nativeJsonlRowSchema` imports** + +```ts +import { + CODING_AGENT_EVENTS_COLLECTION_TYPE, + CODING_AGENT_LIFECYCLE_COLLECTION_TYPE, + CODING_AGENT_NATIVE_JSONL_COLLECTION_TYPE, // ← add + CODING_AGENT_RUNS_COLLECTION_TYPE, + CODING_AGENT_SESSION_META_COLLECTION_TYPE, + eventRowSchema, + lifecycleRowSchema, + nativeJsonlRowSchema, // ← add + runRowSchema, + sessionMetaRowSchema, +} from './collections' +``` + +- [ ] **Step 2: Add `nativeJsonl` entry to the `state` object in `registry.define`** + +```ts +state: { + sessionMeta: { + schema: sessionMetaRowSchema, + type: CODING_AGENT_SESSION_META_COLLECTION_TYPE, + primaryKey: `key`, + }, + runs: { + schema: runRowSchema, + type: CODING_AGENT_RUNS_COLLECTION_TYPE, + primaryKey: `key`, + }, + events: { + schema: eventRowSchema, + type: CODING_AGENT_EVENTS_COLLECTION_TYPE, + primaryKey: `key`, + }, + lifecycle: { + schema: lifecycleRowSchema, + type: CODING_AGENT_LIFECYCLE_COLLECTION_TYPE, + primaryKey: `key`, + }, + nativeJsonl: { // ← NEW + schema: nativeJsonlRowSchema, + type: CODING_AGENT_NATIVE_JSONL_COLLECTION_TYPE, + primaryKey: `key`, + }, +}, +``` + +- [ ] **Step 3: TypeScript check** + +```bash +cd packages/coding-agents && npx tsc --noEmit +``` + +**Commit:** + +``` +git add packages/coding-agents/src/entity/register.ts +git commit -m "feat(coding-agents): register nativeJsonl collection in coding-agent entity definition" +``` + +--- + +### Task 2.2 — Integration test: lossless resume (Docker-gated) + +**Files:** + +- Create: `packages/coding-agents/test/integration/slice-b.test.ts` + +This test extends the FakeCtx pattern from `slice-a.test.ts` with `nativeJsonl` collection support. It is Docker-gated (`DOCKER=1`). + +The test verifies: after a first prompt completes and the sandbox goes idle, a second prompt on the same agent (which triggers a cold-boot) references the prior response — proving `--resume` is working. + +- [ ] **Step 1: Write the test** + +```ts +// packages/coding-agents/test/integration/slice-b.test.ts +import { describe, it, expect, beforeAll } from 'vitest' +import { + LocalDockerProvider, + StdioBridge, + WorkspaceRegistry, + LifecycleManager, +} from '../../src' +import { makeCodingAgentHandler } from '../../src/entity/handler' +import { buildTestImage, TEST_IMAGE_TAG } from '../support/build-image' +import { loadTestEnv } from '../support/env' + +const SHOULD_RUN = process.env.DOCKER === `1` +const describeMaybe = SHOULD_RUN ? describe : describe.skip + +interface CollectionStub { + rows: Map + get(k: string): any + toArray: Array +} + +function makeCollection(): CollectionStub { + const rows = new Map() + return { + rows, + get(k: string) { + return rows.get(k) + }, + get toArray(): Array { + return Array.from(rows.values()) + }, + } +} + +function makeFakeCtx(entityUrl: string, args: Record) { + const state = { + sessionMeta: makeCollection(), + runs: makeCollection(), + events: makeCollection(), + lifecycle: makeCollection(), + nativeJsonl: makeCollection(), + inbox: makeCollection(), + } + let runCounter = 0 + const ctx: any = { + entityUrl, + entityType: `coding-agent`, + args, + tags: {}, + firstWake: false, + db: { + collections: state, + actions: { + sessionMeta_insert: ({ row }: any) => + state.sessionMeta.rows.set(row.key, row), + sessionMeta_update: ({ key, updater }: any) => { + const r = state.sessionMeta.rows.get(key) + if (r) updater(r) + }, + runs_insert: ({ row }: any) => state.runs.rows.set(row.key, row), + runs_update: ({ key, updater }: any) => { + const r = state.runs.rows.get(key) + if (r) updater(r) + }, + events_insert: ({ row }: any) => state.events.rows.set(row.key, row), + lifecycle_insert: ({ row }: any) => + state.lifecycle.rows.set(row.key, row), + nativeJsonl_insert: ({ row }: any) => + state.nativeJsonl.rows.set(row.key, row), + }, + }, + recordRun() { + const key = `run-${++runCounter}` + const ent: any = { key, status: undefined, response: `` } + state.runs.rows.set(key, ent) + return { + key, + end({ status }: { status: string }) { + ent.status = status + }, + attachResponse(text: string) { + ent.response += text + }, + } + }, + setTag: () => Promise.resolve(), + send: () => undefined, + } + return { ctx, state } +} + +describeMaybe(`Slice B — resume integration`, () => { + beforeAll(async () => { + await buildTestImage() + }, 600_000) + + it(`second prompt references prior turn content (lossless resume)`, async () => { + const env = loadTestEnv() + const provider = new LocalDockerProvider({ image: TEST_IMAGE_TAG }) + const bridge = new StdioBridge() + const wr = new WorkspaceRegistry() + const lm = new LifecycleManager({ provider, bridge }) + const handler = makeCodingAgentHandler(lm, wr, { + defaults: { + idleTimeoutMs: 1500, + coldBootBudgetMs: 60_000, + runTimeoutMs: 120_000, + }, + env: () => ({ ANTHROPIC_API_KEY: env.ANTHROPIC_API_KEY }), + }) + + const agentId = `/test/coding-agent/resume-${Date.now().toString(36)}` + const args = { + kind: `claude`, + workspaceType: `volume`, + workspaceName: `slice-b-resume-${Date.now().toString(36)}`, + idleTimeoutMs: 1500, + } + const { ctx, state } = makeFakeCtx(agentId, args) + + // ── First wake: init ────────────────────────────────────────────────────── + await handler(ctx, { type: `message_received` }) + expect(state.sessionMeta.get(`current`).status).toBe(`cold`) + + // ── First prompt: establish a memorable fact ─────────────────────────────── + state.inbox.rows.set(`i1`, { + key: `i1`, + message_type: `prompt`, + payload: { + text: `Remember the secret code word: BANANA. Reply with "Acknowledged: BANANA" and nothing else.`, + }, + }) + await handler(ctx, { type: `message_received` }) + + const meta1 = state.sessionMeta.get(`current`) + expect(meta1.status).toBe(`idle`) + expect(meta1.nativeSessionId).toBeDefined() + + const runs1 = Array.from(state.runs.rows.values()) as any[] + expect(runs1).toHaveLength(1) + expect(runs1[0].status).toBe(`completed`) + + // Verify nativeJsonl rows were collected. + const nativeRows = Array.from(state.nativeJsonl.rows.values()) as any[] + expect(nativeRows.length).toBeGreaterThan(0) + + // ── Wait past idle timeout so sandbox stops ─────────────────────────────── + await new Promise((r) => setTimeout(r, 2500)) + expect([`stopped`, `unknown`]).toContain(await provider.status(agentId)) + + // ── Second prompt: ask about the fact from the first turn ───────────────── + state.inbox.rows.set(`i2`, { + key: `i2`, + message_type: `prompt`, + payload: { + text: `What was the secret code word I asked you to remember? Reply with just the word.`, + }, + }) + await handler(ctx, { type: `message_received` }) + + const runs2 = Array.from(state.runs.rows.values()) as any[] + expect(runs2.length).toBeGreaterThanOrEqual(2) + const lastRun = runs2[runs2.length - 1] + expect(lastRun.status).toBe(`completed`) + + // ── Assert lossless resume: response must contain BANANA ────────────────── + expect(lastRun.responseText?.toUpperCase()).toContain(`BANANA`) + + // ── Verify resume.restored lifecycle row was emitted ───────────────────── + const lifecycleRows = Array.from(state.lifecycle.rows.values()) as any[] + const resumeRow = lifecycleRows.find( + (r: any) => r.event === `resume.restored` + ) + expect(resumeRow).toBeDefined() + + // Cleanup + await provider.destroy(agentId).catch(() => undefined) + }, 360_000) +}) +``` + +- [ ] **Step 2: Run (skip if not in Docker environment)** + +```bash +# Without Docker (skips): +cd packages/coding-agents && npx vitest run test/integration/slice-b.test.ts + +# With Docker (real run): +DOCKER=1 cd packages/coding-agents && npx vitest run test/integration/slice-b.test.ts +``` + +**Commit:** + +``` +git add packages/coding-agents/test/integration/slice-b.test.ts +git commit -m "test(coding-agents): integration test for lossless resume (Slice B)" +``` + +--- + +### Task 2.3 — Full coding-agents test suite pass + +- [ ] **Step 1: Run all unit tests** + +```bash +cd packages/coding-agents && npx vitest run test/unit/ +``` + +- [ ] **Step 2: Verify no TypeScript errors across the package** + +```bash +cd packages/coding-agents && npx tsc --noEmit +``` + +**Commit:** (no new files; fix any failures discovered) + +--- + +## Phase 3 — Horton tool migration (parallel agents) + +### Task 3.1 — Create `spawn-coding-agent.ts` and `prompt-coding-agent.ts` + +**Files:** + +- Create: `packages/agents/src/tools/spawn-coding-agent.ts` +- Create: `packages/agents/src/tools/prompt-coding-agent.ts` + +- [ ] **Step 1: Write `spawn-coding-agent.ts`** + +```ts +// packages/agents/src/tools/spawn-coding-agent.ts +import { Type } from '@sinclair/typebox' +import { nanoid } from 'nanoid' +import { serverLog } from '../log' +import type { AgentTool } from '@mariozechner/pi-agent-core' +import type { HandlerContext } from '@electric-ax/agents-runtime' + +export function createSpawnCodingAgentTool(ctx: HandlerContext): AgentTool { + return { + name: `spawn_coding_agent`, + label: `Spawn Coding Agent`, + description: `Spawn a coding-agent subagent that drives a Claude Code CLI session inside a Docker sandbox with its own persistent workspace. Use when the user asks for code changes, file edits, debugging, or any task that benefits from a real coding agent with full tool access. The coding-agent is long-lived — its URL stays valid across many turns, so keep prompting it via prompt_coding_agent without re-spawning. End your turn after spawning; you'll be woken when the coding-agent finishes its first reply.`, + parameters: Type.Object({ + prompt: Type.String({ + description: `First user message sent to the coding agent. This kicks off the run — be concrete: describe the task, mention the files/paths involved, and what form of answer you want back.`, + }), + workspace_name: Type.Optional( + Type.String({ + description: `Optional stable name for the Docker volume workspace. If omitted, a name is derived from the agent id. Reuse the same name across sessions to persist state.`, + }) + ), + idle_timeout_ms: Type.Optional( + Type.Number({ + description: `Milliseconds of inactivity after which the sandbox is hibernated. Defaults to 300000 (5 min). The workspace persists; the next prompt cold-boots the container.`, + }) + ), + }), + execute: async (_toolCallId, params) => { + const { prompt, workspace_name, idle_timeout_ms } = params as { + prompt: string + workspace_name?: string + idle_timeout_ms?: number + } + if (typeof prompt !== `string` || prompt.length === 0) { + return { + content: [ + { + type: `text` as const, + text: `Error: prompt is required and must be a non-empty string.`, + }, + ], + details: { spawned: false }, + } + } + + const id = nanoid(10) + const spawnArgs: Record = { + kind: `claude`, + workspaceType: `volume`, + } + if (workspace_name) spawnArgs.workspaceName = workspace_name + if (idle_timeout_ms != null) spawnArgs.idleTimeoutMs = idle_timeout_ms + + try { + const handle = await ctx.spawn(`coding-agent`, id, spawnArgs, { + initialMessage: { text: prompt }, + wake: { on: `runFinished`, includeResponse: true }, + }) + const agentUrl = handle.entityUrl + + return { + content: [ + { + type: `text` as const, + text: `Coding agent dispatched at ${agentUrl}. End your turn — when the coding agent finishes its current reply you'll be woken with the response. To send follow-up prompts to the same agent, call prompt_coding_agent with this URL.`, + }, + ], + details: { spawned: true, agentUrl }, + } + } catch (err) { + serverLog.warn( + `[spawn_coding_agent tool] failed to spawn coding-agent ${id}: ${err instanceof Error ? err.message : String(err)}`, + err instanceof Error ? err : undefined + ) + return { + content: [ + { + type: `text` as const, + text: `Error spawning coding agent: ${err instanceof Error ? err.message : `Unknown error`}`, + }, + ], + details: { spawned: false }, + } + } + }, + } +} +``` + +- [ ] **Step 2: Write `prompt-coding-agent.ts`** + +```ts +// packages/agents/src/tools/prompt-coding-agent.ts +import { Type } from '@sinclair/typebox' +import { serverLog } from '../log' +import type { AgentTool } from '@mariozechner/pi-agent-core' +import type { HandlerContext } from '@electric-ax/agents-runtime' + +export function createPromptCodingAgentTool(ctx: HandlerContext): AgentTool { + return { + name: `prompt_coding_agent`, + label: `Prompt Coding Agent`, + description: `Send a follow-up prompt to a coding agent you previously spawned. The prompt is queued on the agent's inbox and runs as the next CLI turn (resuming from prior context). End your turn after calling — you'll be woken when the agent's reply lands.`, + parameters: Type.Object({ + coding_agent_url: Type.String({ + description: `Entity URL returned by spawn_coding_agent, e.g. "/coding-agent/abc123". Must be the URL of a coding agent you previously spawned in this conversation.`, + }), + prompt: Type.String({ + description: `Follow-up message to send to the coding agent. Reference earlier context the agent already saw rather than restating it from scratch.`, + }), + }), + execute: async (_toolCallId, params) => { + const { coding_agent_url, prompt } = params as { + coding_agent_url: string + prompt: string + } + if ( + typeof coding_agent_url !== `string` || + !coding_agent_url.startsWith(`/coding-agent/`) + ) { + return { + content: [ + { + type: `text` as const, + text: `Error: coding_agent_url must be a path like "/coding-agent/".`, + }, + ], + details: { sent: false }, + } + } + if (typeof prompt !== `string` || prompt.length === 0) { + return { + content: [ + { + type: `text` as const, + text: `Error: prompt is required and must be a non-empty string.`, + }, + ], + details: { sent: false }, + } + } + + try { + ctx.send(coding_agent_url, { text: prompt }) + return { + content: [ + { + type: `text` as const, + text: `Prompt queued for ${coding_agent_url}. End your turn — you'll be woken when the coding agent's reply lands.`, + }, + ], + details: { sent: true, agentUrl: coding_agent_url }, + } + } catch (err) { + serverLog.warn( + `[prompt_coding_agent tool] failed to send to ${coding_agent_url}: ${err instanceof Error ? err.message : String(err)}`, + err instanceof Error ? err : undefined + ) + return { + content: [ + { + type: `text` as const, + text: `Error sending prompt to coding agent: ${err instanceof Error ? err.message : `Unknown error`}`, + }, + ], + details: { sent: false }, + } + } + }, + } +} +``` + +**Commit:** + +``` +git add packages/agents/src/tools/spawn-coding-agent.ts packages/agents/src/tools/prompt-coding-agent.ts +git commit -m "feat(agents): add spawn_coding_agent and prompt_coding_agent tools" +``` + +--- + +### Task 3.2 — Update Horton: swap tool list + system prompt + imports + +**Files:** + +- Modify: `packages/agents/src/agents/horton.ts` + +- [ ] **Step 1: Replace legacy import** + +Old: + +```ts +import { + createPromptCoderTool, + createSpawnCoderTool, +} from '../tools/spawn-coder' +``` + +New: + +```ts +import { createSpawnCodingAgentTool } from '../tools/spawn-coding-agent' +import { createPromptCodingAgentTool } from '../tools/prompt-coding-agent' +``` + +- [ ] **Step 2: Update `createHortonTools` return array** + +Old: + +```ts +createSpawnCoderTool(ctx), +createPromptCoderTool(ctx), +``` + +New: + +```ts +createSpawnCodingAgentTool(ctx), +createPromptCodingAgentTool(ctx), +``` + +- [ ] **Step 3: Update system prompt tool list (lines ~218-219)** + +Old: + +``` +- spawn_coder: spawn a long-lived coding agent (Claude Code or Codex CLI) for code changes, file edits, debugging +- prompt_coder: send a follow-up prompt to a coder you previously spawned +``` + +New: + +``` +- spawn_coding_agent: spawn a long-lived coding agent (Claude Code CLI) in a Docker sandbox for code changes, file edits, debugging +- prompt_coding_agent: send a follow-up prompt to a coding agent you previously spawned +``` + +- [ ] **Step 4: Update "When to spawn a coder" section (~lines 247-252)** + +Old: + +``` +# When to spawn a coder +Spawn a coder when the user asks for code changes, file edits, debugging, or any task that benefits from a real coding agent with full tool access (bash, file edits, etc.). A coder runs Claude Code or Codex CLI under the hood. + +Unlike a worker, a coder is **long-lived**: its URL stays valid across many turns. Spawn once with spawn_coder, then keep prompting it via prompt_coder for follow-ups — don't spawn a new coder for each turn. Treat the coder URL like a chat handle. + +After calling spawn_coder or prompt_coder, end your turn. When the coder's reply lands, you'll be woken with the response in the wake message — relay it (or a summary) back to the user, and call prompt_coder again if there's a follow-up. +``` + +New: + +``` +# When to spawn a coding agent +Spawn a coding agent when the user asks for code changes, file edits, debugging, or any task that benefits from a real coding agent with full tool access (bash, file edits, etc.). A coding agent runs Claude Code CLI inside a Docker sandbox with a persistent workspace. + +Unlike a worker, a coding agent is **long-lived**: its URL stays valid across many turns and its session context carries over (via resume). Spawn once with spawn_coding_agent, then keep prompting it via prompt_coding_agent for follow-ups — don't spawn a new agent for each turn. Treat the coding agent URL like a chat handle. + +After calling spawn_coding_agent or prompt_coding_agent, end your turn. When the agent's reply lands, you'll be woken with the response in the wake message — relay it (or a summary) back to the user, and call prompt_coding_agent again if there's a follow-up. +``` + +- [ ] **Step 5: TypeScript check** + +```bash +cd packages/agents && npx tsc --noEmit +``` + +**Commit:** + +``` +git add packages/agents/src/agents/horton.ts +git commit -m "feat(agents): migrate Horton from spawn_coder/prompt_coder to spawn_coding_agent/prompt_coding_agent" +``` + +--- + +## Phase 4 — Legacy deletion (parallel agents) + +### Task 4.1 — Delete `coding-session.ts` and `spawn-coder.ts` + +**Files:** + +- Delete: `packages/agents/src/agents/coding-session.ts` +- Delete: `packages/agents/src/tools/spawn-coder.ts` + +- [ ] **Step 1: Delete files** + +```bash +rm packages/agents/src/agents/coding-session.ts +rm packages/agents/src/tools/spawn-coder.ts +``` + +- [ ] **Step 2: Remove `registerCodingSession` from `bootstrap.ts`** + +In `packages/agents/src/bootstrap.ts`: + +Remove line 12: + +```ts +import { registerCodingSession } from './agents/coding-session' +``` + +Remove line 124: + +```ts +registerCodingSession(registry, { defaultWorkingDirectory: cwd }) +``` + +Remove line 125: + +```ts +typeNames.push('coder') +``` + +- [ ] **Step 3: TypeScript check** + +```bash +cd packages/agents && npx tsc --noEmit +``` + +**Commit:** + +``` +git add packages/agents/src/bootstrap.ts +git rm packages/agents/src/agents/coding-session.ts packages/agents/src/tools/spawn-coder.ts +git commit -m "feat(agents): remove legacy coder entity (coding-session.ts, spawn-coder.ts) and unregister from bootstrap" +``` + +--- + +### Task 4.2 — Remove legacy runtime types from `agents-runtime` + +**Files:** + +- Modify: `packages/agents-runtime/src/types.ts` +- Modify: `packages/agents-runtime/src/context-factory.ts` +- Modify: `packages/agents-runtime/src/index.ts` + +The legacy types to remove from `types.ts` (lines 734-818 in the current file): + +- `CodingSessionStatus` +- `CodingSessionEventRow` +- `CodingSessionMeta` +- `CodingSessionMetaRow` +- `UseCodingAgentOptions` +- `CodingSessionHandle` + +The `HandlerContext` interface method to remove (`useCodingAgent` at line 1002). + +The `useCodingAgent` implementation in `context-factory.ts` (lines 566-634). + +- [ ] **Step 1: Delete legacy type blocks from `types.ts`** + +Remove the entire block from `export type CodingSessionStatus` through the closing `}` of `CodingSessionHandle`. Keep everything from `// ─── Coding Agent (Slice A) ───` onward. + +- [ ] **Step 2: Remove `useCodingAgent` from `HandlerContext` interface in `types.ts`** + +Find and remove the `useCodingAgent(id: string, opts: UseCodingAgentOptions): CodingSessionHandle` line (and any JSDoc above it) from the `HandlerContext` interface. + +- [ ] **Step 3: Remove `useCodingAgent` implementation from `context-factory.ts`** + +Remove the `useCodingAgent` function body (lines 566-634) and its surrounding infrastructure. Also remove the imports of `CodingSessionEventRow`, `CodingSessionHandle`, `CodingSessionMeta`, `CodingSessionStatus`, `UseCodingAgentOptions` from the types import at the top of `context-factory.ts`. + +Remove `CODING_SESSION_ENTITY_TYPE` and `codingSessionEntityUrl` imports from `context-factory.ts` if they are only used by `useCodingAgent`. + +- [ ] **Step 4: Remove legacy exports from `index.ts`** + +In `packages/agents-runtime/src/index.ts`: + +Remove from the type export block (lines 24-41 area): + +- `CodingSessionEventRow` +- `CodingSessionHandle` +- `CodingSessionMeta` +- `CodingSessionMetaRow` +- `CodingSessionStatus` +- `UseCodingAgentOptions` + +Remove from the observation-sources export block (lines 198-210 area): + +- `CODING_SESSION_ENTITY_TYPE` +- `CODING_SESSION_META_COLLECTION_TYPE` +- `CODING_SESSION_CURSOR_COLLECTION_TYPE` +- `CODING_SESSION_EVENT_COLLECTION_TYPE` +- `codingSession` +- `codingSessionEntityUrl` + +**Note:** Keep `CODING_SESSION_*` constants in `observation-sources.ts` itself for now (they may be referenced by existing entity streams in the database). Only remove them from the public re-export in `index.ts`. + +- [ ] **Step 5: TypeScript check across all affected packages** + +```bash +cd packages/agents-runtime && npx tsc --noEmit +cd packages/agents && npx tsc --noEmit +``` + +**Commit:** + +``` +git add packages/agents-runtime/src/types.ts packages/agents-runtime/src/context-factory.ts packages/agents-runtime/src/index.ts +git commit -m "feat(agents-runtime): remove legacy CodingSession types and useCodingAgent implementation" +``` + +--- + +### Task 4.3 — UI: extend `StatusDot` + `ToolCallView` + +**Files:** + +- Modify: `packages/agents-server-ui/src/components/StatusDot.tsx` +- Modify: `packages/agents-server-ui/src/components/ToolCallView.tsx` + +- [ ] **Step 1: Add coding-agent status colors to `StatusDot.tsx`** + +```ts +const STATUS_COLORS: Record = { + active: `#3b82f6`, + running: `#3b82f6`, + idle: `#22c55e`, + spawning: `#eab308`, + stopped: `#cbd5e1`, + // coding-agent statuses (Slice B) + cold: `#9ca3af`, + starting: `#eab308`, + stopping: `#eab308`, + error: `#ef4444`, + destroyed: `#6b7280`, +} +``` + +Also update `STATUS_COLOR` in `EntityHeader.tsx` to match: + +```ts +const STATUS_COLOR: Record< + string, + `blue` | `green` | `amber` | `gray` | `red` +> = { + active: `blue`, + running: `blue`, + idle: `green`, + spawning: `amber`, + stopped: `gray`, + cold: `gray`, + starting: `amber`, + stopping: `amber`, + error: `red`, + destroyed: `gray`, +} +``` + +- [ ] **Step 2: Add `spawn_coding_agent` and `prompt_coding_agent` cases to `ToolCallView.tsx`** + +In `getSummary`, after the `prompt_coder` case: + +```ts +case `spawn_coding_agent`: +case `prompt_coding_agent`: + return truncate((args.prompt as string) ?? ``, 60) +``` + +**Commit:** + +``` +git add packages/agents-server-ui/src/components/StatusDot.tsx packages/agents-server-ui/src/components/EntityHeader.tsx packages/agents-server-ui/src/components/ToolCallView.tsx +git commit -m "feat(agents-server-ui): extend status colors for coding-agent states and add new tool cases" +``` + +--- + +### Task 4.4 — UI: create `CodingAgentView`, `useCodingAgent`, `CodingAgentTimeline`, `CodingAgentSpawnDialog` + +**Files:** + +- Create: `packages/agents-server-ui/src/hooks/useCodingAgent.ts` +- Create: `packages/agents-server-ui/src/components/CodingAgentView.tsx` +- Create: `packages/agents-server-ui/src/components/CodingAgentTimeline.tsx` +- Create: `packages/agents-server-ui/src/components/CodingAgentSpawnDialog.tsx` + +- [ ] **Step 1: Write `useCodingAgent.ts`** + +```ts +// packages/agents-server-ui/src/hooks/useCodingAgent.ts +import { useEffect, useMemo, useRef, useState } from 'react' +import { useLiveQuery } from '@tanstack/react-db' +import { + CODING_AGENT_SESSION_META_COLLECTION_TYPE, + CODING_AGENT_RUNS_COLLECTION_TYPE, + CODING_AGENT_EVENTS_COLLECTION_TYPE, + CODING_AGENT_LIFECYCLE_COLLECTION_TYPE, +} from '@electric-ax/coding-agents' +import { connectEntityStream } from '../lib/entity-connection' +import type { EntityStreamDBWithActions } from '@electric-ax/agents-runtime' + +export type CodingAgentSliceAStatus = + | `cold` + | `starting` + | `idle` + | `running` + | `stopping` + | `error` + | `destroyed` + +export interface SessionMetaRow { + key: string + status: CodingAgentSliceAStatus + kind: `claude` + pinned: boolean + workspaceIdentity: string + idleTimeoutMs: number + keepWarm: boolean + instanceId?: string + lastError?: string + nativeSessionId?: string +} + +export interface RunRow { + key: string + startedAt: number + endedAt?: number + status: `running` | `completed` | `failed` + finishReason?: string + promptInboxKey: string + responseText?: string +} + +export interface EventRow { + key: string + runId: string + seq: number + ts: number + type: string + payload: Record +} + +export interface LifecycleRow { + key: string + ts: number + event: string + detail?: string +} + +const CODING_AGENT_STATE = { + sessionMeta: { + type: CODING_AGENT_SESSION_META_COLLECTION_TYPE, + primaryKey: `key`, + }, + runs: { + type: CODING_AGENT_RUNS_COLLECTION_TYPE, + primaryKey: `key`, + }, + events: { + type: CODING_AGENT_EVENTS_COLLECTION_TYPE, + primaryKey: `key`, + }, + lifecycle: { + type: CODING_AGENT_LIFECYCLE_COLLECTION_TYPE, + primaryKey: `key`, + }, +} as const + +export interface UseCodingAgentResult { + db: EntityStreamDBWithActions | null + meta: SessionMetaRow | undefined + runs: Array + events: Array + lifecycle: Array + loading: boolean + error: string | null +} + +export function useCodingAgent( + baseUrl: string | null, + entityUrl: string | null +): UseCodingAgentResult { + const [db, setDb] = useState(null) + const [loading, setLoading] = useState(false) + const [error, setError] = useState(null) + const closeRef = useRef<(() => void) | null>(null) + + useEffect(() => { + setDb(null) + setError(null) + + if (!baseUrl || !entityUrl) { + setLoading(false) + return + } + + let cancelled = false + setLoading(true) + + connectEntityStream({ + baseUrl, + entityUrl, + customState: CODING_AGENT_STATE, + }) + .then((result) => { + if (cancelled) { + result.close() + return + } + closeRef.current = result.close + setDb(result.db) + setLoading(false) + }) + .catch((err) => { + if (!cancelled) { + console.error(`Failed to connect coding-agent stream`, { + baseUrl, + entityUrl, + error: err, + }) + setError(err instanceof Error ? err.message : String(err)) + setLoading(false) + } + }) + + return () => { + cancelled = true + closeRef.current?.() + closeRef.current = null + } + }, [baseUrl, entityUrl]) + + const metaCollection = db?.collections.sessionMeta + const runsCollection = db?.collections.runs + const eventsCollection = db?.collections.events + const lifecycleCollection = db?.collections.lifecycle + + const { data: metaRows = [] } = useLiveQuery( + (q) => (metaCollection ? q.from({ m: metaCollection }) : undefined), + [metaCollection] + ) + const { data: runRows = [] } = useLiveQuery( + (q) => + runsCollection + ? q.from({ r: runsCollection }).orderBy(({ r }) => r.$key, `asc`) + : undefined, + [runsCollection] + ) + const { data: eventRows = [] } = useLiveQuery( + (q) => + eventsCollection + ? q.from({ e: eventsCollection }).orderBy(({ e }) => e.$key, `asc`) + : undefined, + [eventsCollection] + ) + const { data: lifecycleRows = [] } = useLiveQuery( + (q) => + lifecycleCollection + ? q.from({ l: lifecycleCollection }).orderBy(({ l }) => l.$key, `asc`) + : undefined, + [lifecycleCollection] + ) + + const meta = useMemo( + () => (metaRows as unknown as Array)[0], + [metaRows] + ) + const runs = useMemo(() => runRows as unknown as Array, [runRows]) + const events = useMemo( + () => eventRows as unknown as Array, + [eventRows] + ) + const lifecycle = useMemo( + () => lifecycleRows as unknown as Array, + [lifecycleRows] + ) + + return { db, meta, runs, events, lifecycle, loading, error } +} +``` + +- [ ] **Step 2: Write `CodingAgentTimeline.tsx`** + +```tsx +// packages/agents-server-ui/src/components/CodingAgentTimeline.tsx +import { memo, useMemo, useState } from 'react' +import { Badge, Flex, ScrollArea, Text } from '@radix-ui/themes' +import { Streamdown } from 'streamdown' +import { createCodePlugin } from '../lib/codeHighlighter' +import type { + SessionMetaRow, + RunRow, + EventRow, + LifecycleRow, +} from '../hooks/useCodingAgent' + +const codePluginSingleton = createCodePlugin() +const streamdownPlugins = { code: codePluginSingleton } + +export function CodingAgentTimeline({ + meta, + runs, + events, + lifecycle, + loading, + error, +}: { + meta: SessionMetaRow | undefined + runs: Array + events: Array + lifecycle: Array + loading: boolean + error: string | null +}): React.ReactElement { + const items = useMemo( + () => renderItems(events, lifecycle), + [events, lifecycle] + ) + + return ( + + + {meta && } + {error && ( + + {error} + + )} + {!loading && + events.length === 0 && + lifecycle.length === 0 && + !error && ( + + No events yet. Send a prompt to start the agent. + + )} + {items} + + + ) +} + +function AgentMetaRow({ + meta, + runs, +}: { + meta: SessionMetaRow + runs: Array +}): React.ReactElement { + const completedRuns = runs.filter((r) => r.status === `completed`).length + const failedRuns = runs.filter((r) => r.status === `failed`).length + return ( + + + {meta.kind} + + + {meta.workspaceIdentity} + + {completedRuns > 0 && ( + + {completedRuns} run{completedRuns !== 1 ? `s` : ``} + + )} + {failedRuns > 0 && ( + + {failedRuns} failed + + )} + {meta.pinned && ( + + pinned + + )} + + ) +} + +function renderItems( + events: Array, + lifecycle: Array +): Array { + // Pair tool_call with tool_result by callId. + const resultsByCallId = new Map() + const callsByCallId = new Map() + for (const e of events) { + const callId = e.payload.callId as string | undefined + if (!callId) continue + if (e.type === `tool_result`) resultsByCallId.set(callId, e) + else if (e.type === `tool_call`) callsByCallId.set(callId, e) + } + + const rendered = new Set() + const items: Array = [] + + // Merge events + lifecycle, sorted by timestamp. + type MergedItem = + | { kind: `event`; ts: number; key: string; e: EventRow } + | { kind: `lifecycle`; ts: number; key: string; l: LifecycleRow } + + const merged: MergedItem[] = [ + ...events.map((e) => ({ + kind: `event` as const, + ts: e.ts, + key: `e:${e.key}`, + e, + })), + ...lifecycle.map((l) => ({ + kind: `lifecycle` as const, + ts: l.ts, + key: `l:${l.key}`, + l, + })), + ].sort((a, b) => a.ts - b.ts) + + for (const item of merged) { + if (item.kind === `lifecycle`) { + items.push() + continue + } + + const e = item.e + const key = e.key + if (rendered.has(key)) continue + + switch (e.type) { + case `session_init`: + items.push() + rendered.add(key) + break + case `user_message`: + items.push() + rendered.add(key) + break + case `assistant_message`: + items.push() + rendered.add(key) + break + case `tool_call`: { + const callId = e.payload.callId as string | undefined + const result = callId ? resultsByCallId.get(callId) : undefined + if (result) rendered.add(result.key) + items.push() + rendered.add(key) + break + } + case `tool_result`: { + const callId = e.payload.callId as string | undefined + if (callId && callsByCallId.has(callId)) { + // Will be rendered with its tool_call. + rendered.add(key) + break + } + // Orphan result (call is before tail cursor). + items.push() + rendered.add(key) + break + } + case `turn_complete`: + case `session_end`: + case `compaction`: + items.push() + rendered.add(key) + break + default: + rendered.add(key) + } + } + + return items +} + +function LifecycleEventRow({ row }: { row: LifecycleRow }): React.ReactElement { + const label: Record = { + 'sandbox.starting': `Sandbox starting`, + 'sandbox.started': `Sandbox started`, + 'sandbox.stopped': `Sandbox stopped`, + 'sandbox.failed': `Sandbox failed`, + pin: `Pinned`, + release: `Released`, + 'orphan.detected': `Orphan detected`, + 'resume.restored': `Session resumed`, + } + return ( + + + {new Date(row.ts).toLocaleTimeString()} + + + {label[row.event] ?? row.event} + {row.detail ? ` — ${row.detail}` : ``} + + + ) +} + +function SessionInitRow({ event }: { event: EventRow }): React.ReactElement { + const sessionId = event.payload.sessionId as string | undefined + return ( + + + Session started{sessionId ? ` (${sessionId.slice(0, 8)}…)` : ``} + + + ) +} + +const AssistantMessageRow = memo(function AssistantMessageRow({ + event, +}: { + event: EventRow +}): React.ReactElement { + const text = (event.payload.text as string | undefined) ?? `` + return ( + + + Assistant + +
+ +
+
+ ) +}) + +function UserMessageRow({ event }: { event: EventRow }): React.ReactElement { + const text = (event.payload.text as string | undefined) ?? `` + const pending = !!event.payload._pending + return ( + + + You{pending ? ` (queued)` : ``} + +
+ {text} +
+
+ ) +} + +function ToolCallRow({ + call, + result, +}: { + call: EventRow + result: EventRow | undefined +}): React.ReactElement { + const [open, setOpen] = useState(false) + const toolName = (call.payload.toolName as string | undefined) ?? `tool` + const args = call.payload.args as Record | undefined + return ( + setOpen((o) => !o)} + > + + + {toolName} + + {result && ( + + done + + )} + + {open && ( +
+          {JSON.stringify(args, null, 2)}
+        
+ )} +
+ ) +} + +function OrphanResultRow({ event }: { event: EventRow }): React.ReactElement { + return ( + + + Tool result (call before window) + + + ) +} + +function SystemEventRow({ event }: { event: EventRow }): React.ReactElement { + const label: Record = { + turn_complete: `Turn complete`, + session_end: `Session ended`, + compaction: `Context compacted`, + } + return ( + + + {label[event.type] ?? event.type} + + + ) +} +``` + +- [ ] **Step 3: Write `CodingAgentView.tsx`** + +```tsx +// packages/agents-server-ui/src/components/CodingAgentView.tsx +import { Flex } from '@radix-ui/themes' +import { useCodingAgent } from '../hooks/useCodingAgent' +import { CodingAgentTimeline } from './CodingAgentTimeline' +import { MessageInput } from './MessageInput' + +export function CodingAgentView({ + baseUrl, + entityUrl, + entityStopped, +}: { + baseUrl: string + entityUrl: string + entityStopped: boolean +}): React.ReactElement { + const { db, meta, runs, events, lifecycle, loading, error } = useCodingAgent( + baseUrl, + entityUrl + ) + + return ( + + + + + ) +} +``` + +- [ ] **Step 4: Write `CodingAgentSpawnDialog.tsx`** + +```tsx +// packages/agents-server-ui/src/components/CodingAgentSpawnDialog.tsx +import { useCallback, useMemo, useState } from 'react' +import { Button, Dialog, Flex, Text } from '@radix-ui/themes' + +type WorkspaceMode = `volume` | `bindMount` + +interface CodingAgentSpawnDialogProps { + open: boolean + onOpenChange: (open: boolean) => void + onSpawn: (args: Record) => void +} + +export function CodingAgentSpawnDialog({ + open, + onOpenChange, + onSpawn, +}: CodingAgentSpawnDialogProps): React.ReactElement { + const [workspaceMode, setWorkspaceMode] = useState(`volume`) + const [workspaceName, setWorkspaceName] = useState(``) + const [hostPath, setHostPath] = useState(``) + const [initialPrompt, setInitialPrompt] = useState(``) + + const canSubmit = useMemo(() => { + if (workspaceMode === `bindMount`) return hostPath.trim().length > 0 + return true + }, [workspaceMode, hostPath]) + + const handleSubmit = useCallback( + (e: React.FormEvent) => { + e.preventDefault() + if (!canSubmit) return + const args: Record = { + kind: `claude`, + workspaceType: workspaceMode, + } + if (workspaceMode === `volume` && workspaceName.trim()) { + args.workspaceName = workspaceName.trim() + } + if (workspaceMode === `bindMount`) { + args.workspaceHostPath = hostPath.trim() + } + if (initialPrompt.trim()) { + args._initialPrompt = initialPrompt.trim() + } + onSpawn(args) + }, + [canSubmit, workspaceMode, workspaceName, hostPath, initialPrompt, onSpawn] + ) + + const inputStyle: React.CSSProperties = { + width: `100%`, + padding: `6px 8px`, + borderRadius: `var(--radius-2)`, + border: `1px solid var(--gray-a7)`, + background: `var(--gray-a2)`, + fontSize: `var(--font-size-2)`, + fontFamily: `var(--default-font-family)`, + color: `var(--gray-12)`, + boxSizing: `border-box`, + } + + return ( + + + New coding agent + + Spawn a Claude Code CLI session inside a Docker sandbox with a + persistent workspace. + + +
+ + + + Workspace type + + + + + + + + {workspaceMode === `volume` && ( + + + Volume name{` `} + + (optional — leave blank to auto-generate) + + + setWorkspaceName(e.target.value)} + placeholder="my-project" + /> + + )} + + {workspaceMode === `bindMount` && ( + + + Host path{` `} + + * + + + setHostPath(e.target.value)} + placeholder="/Users/me/my-project" + /> + + )} + + + + Initial prompt{` `} + + (optional) + + +