From c2c0f45070fae677fc4caecc462d15b12a13baee Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 00:55:21 +0100 Subject: [PATCH 001/279] docs(specs): add coding-agents platform primitive design Design for a new platform primitive `ctx.spawnCodingAgent()` that runs Claude Code / Codex inside managed sandboxes, with the durable stream as the source of truth and per-workspace volumes shareable across agents under a single-writer lease. Co-Authored-By: Claude Opus 4.7 (1M context) --- ...coding-agents-platform-primitive-design.md | 633 ++++++++++++++++++ 1 file changed, 633 insertions(+) create mode 100644 docs/superpowers/specs/2026-04-30-coding-agents-platform-primitive-design.md diff --git a/docs/superpowers/specs/2026-04-30-coding-agents-platform-primitive-design.md b/docs/superpowers/specs/2026-04-30-coding-agents-platform-primitive-design.md new file mode 100644 index 0000000000..4b1d36f203 --- /dev/null +++ b/docs/superpowers/specs/2026-04-30-coding-agents-platform-primitive-design.md @@ -0,0 +1,633 @@ +# Coding Agents — Platform Primitive + +**Status:** Draft +**Date:** 2026-04-30 +**Author:** Valter Balegas +**Scope:** Add a first-class platform primitive for spawning and observing coding agents (Claude Code, Codex) inside managed sandboxes, with the durable stream as the source of truth. + +## Summary + +Introduce a typed `ctx.spawnCodingAgent()` primitive on `HandlerContext`. The primitive wraps a built-in `coding-agent` entity that runs a CLI (Claude Code or Codex) inside a managed sandbox. The agent's full event history lives in a single durable stream; the sandbox is cattle (recreatable from the stream); workspace state lives in a per-workspace volume that can be shared across agents under a single-writer lease. + +A new `@electric-ax/coding-agents` package owns the sandbox provider, the CLI bridge, and the lifecycle manager. The local-first MVP ships with a Docker provider and a stdio bridge. Remote providers (Modal, Fly, E2B) and a shim-based bridge are designed-for but out of scope for v1. + +The existing `coder` entity (`packages/agents/src/agents/coding-session.ts`) and its tools (`spawn-coder.ts`, `prompt-coder.ts`) are removed and replaced. + +## Goals + +1. **Decouple agent state from compute.** The full event history of a coding agent lives in an append-only durable stream. The sandbox can die at any time; the agent can be reconstructed. +2. **Sandbox isolation.** CLIs run inside a sandbox, not as host child processes. The sandbox provider is pluggable. +3. **Durable resume.** A new sandbox materializes the prior session at the same logical point. Same-kind resume is lossless; cross-kind is semantic. +4. **Native observability.** The entire history surfaces in the existing StreamDB / agents-server-ui flow, with no new sync mechanism. +5. **Composable.** Other entities can spawn coding agents, observe them, send prompts, and react to their events. +6. **Multi-agent ready.** Two coding agents can share a working tree safely (lease-serialized), so a parent entity can run, e.g., a `claude` implementation pass and a `codex` review pass on the same checkout. + +## Non-goals (v1) + +- Remote sandbox providers (Modal, Fly, E2B, Cloudflare). Designed-for; not implemented. +- Shim-in-sandbox bridge. Designed-for; not implemented. +- ACP (Agent Client Protocol) external adapter. +- Replay / time-travel UI scrubber. +- Per-event approve/deny UI for `permission_request`. +- Workspace file browser in the UI. +- Memory-snapshot lifecycle. +- Pre-warmed sandbox pools. +- Multi-tenant authorization beyond what `agents-server` already enforces. + +## Background + +The repo already ships a `coder` entity in `packages/agents/src/agents/coding-session.ts`. It runs `claude` / `codex` as a host child process, mirrors normalized events from the CLI's JSONL transcript into the entity's StreamDB collections via `agent-session-protocol`, and supports `spawn` / `send` from other entities. Its limitations: + +- The CLI runs on the host. No isolation. No per-task filesystem. +- The on-disk JSONL in `~/.claude/projects/...` is the resumable truth, not the durable stream. If the host's home directory is wiped, a session can't be resumed. +- The entity is registered as user-level code in `@electric-ax/agents`, not as a platform primitive. There is no typed API for entity authors. + +The new design treats coding agents as a first-class platform concept, like `useAgent` is for the LLM loop. + +## Architecture + +``` + Entity author code + ┌──────────────────────────────────────────────────────────────┐ + │ ctx.spawnCodingAgent({ kind, workspace, sandbox? }) │ + │ ctx.observeCodingAgent(id) │ + └──────────────────────────────────────────────────────────────┘ + │ exposed by @electric-ax/agents-runtime + ▼ + ┌──────────────────────────────────────────────────────────────┐ + │ CodingAgentHandle · built-in `coding-agent` │ + │ entity registered by @electric-ax/coding-agents │ + └──────────────────────────────────────────────────────────────┘ + │ + ▼ + ┌─────────────────────────┐ ┌─────────────────────────────────┐ + │ Bridge (StdioBridge) │ │ LifecycleManager │ + │ runTurn → events │ │ state machine, idle timers, │ + │ via agent-session- │ │ pin/release, workspace lease │ + │ protocol normalize │ └─────────────────────────────────┘ + └─────────────────────────┘ + │ + ▼ + ┌──────────────────────────────────────────────────────────────┐ + │ SandboxProvider — LocalDockerProvider in v1 │ + │ start · stop · destroy · status · recover │ + └──────────────────────────────────────────────────────────────┘ + │ + ▼ + ┌──────────────────────────────────────────────────────────────┐ + │ Durable Stream (entity log) · Workspace volume (shared) │ + └──────────────────────────────────────────────────────────────┘ +``` + +### Packages + +| Package | Role | +| -------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `@electric-ax/agents-runtime` (existing) | Adds `ctx.spawnCodingAgent` / `ctx.observeCodingAgent` and the `CodingAgentHandle` type. No Docker / CLI knowledge. | +| `@electric-ax/coding-agents` (new) | The plumbing: built-in entity, `SandboxProvider`, `Bridge`, `LifecycleManager`, integration with `agent-session-protocol`. Imported and registered by `agents-server`'s entrypoint. | +| `@electric-ax/agents-server-ui` (existing) | Extends existing `CodingSession*` components for the new status states, header provenance, pin/stop, lifecycle events, and shared-workspace indicator. | +| `agents-server` (existing) | Unchanged. The new entity type slots into existing wake/observe/spawn machinery. | +| `agents-server-conformance-tests` (existing) | Gains a `coding-agent` suite, parameterized by provider. | + +### Removed + +- `packages/agents/src/agents/coding-session.ts` (the `coder` entity) +- `packages/agents/src/tools/spawn-coder.ts` +- `packages/agents/src/tools/prompt-coder.ts` + +Replaced by the new primitive plus tools `spawn_coding_agent` / `prompt_coding_agent` that wrap it for use by Horton. + +## Platform primitive API + +```ts +// Exposed on HandlerContext from @electric-ax/agents-runtime + +interface HandlerContext { + // ... existing fields + + spawnCodingAgent(options: SpawnCodingAgentOptions): Promise + observeCodingAgent(id: string): Promise +} + +interface SpawnCodingAgentOptions { + /** Stable id, scoped to the spawning entity. */ + id: string + + /** Which CLI to run. */ + kind: 'claude' | 'codex' + + /** + * Workspace mount. Workspace identity is the lease key: + * - { type: 'volume', name: 'foo' } → "volume:foo" + * - { type: 'volume' } → "volume:" (default) + * - { type: 'bindMount', hostPath: P } → "bindMount:" + * + * Two agents that resolve to the same identity share the volume and + * are serialized at runTurn boundaries by the workspace lease. + */ + workspace: + | { type: 'volume'; name?: string } + | { type: 'bindMount'; hostPath: string } + + /** + * Optional sandbox provider override (provider name from the registry). + * Defaults to the agents-server platform config (`local-docker` for v1). + */ + sandbox?: string + + /** Initial prompt; queued before the first wake. */ + initialPrompt?: string + + /** When to wake the parent. */ + wake?: { on: 'runFinished' | 'eventAppended'; includeResponse?: boolean } + + /** Lifecycle overrides. */ + lifecycle?: { idleTimeoutMs?: number; keepWarm?: boolean } +} + +interface CodingAgentHandle { + /** Stable URL: //coding-agent/ */ + readonly url: string + readonly kind: 'claude' | 'codex' + + /** Queue a prompt. Resolves once durably enqueued (not when CLI replies). */ + send(prompt: string): Promise<{ runId: string }> + + /** Async iterable over normalized events for this agent. */ + events(opts?: { since?: 'start' | 'now' }): AsyncIterable + + /** + * Synchronous snapshot of state. + * + * `status`, `pinned`, `lastError`, `runs` come from the entity's + * StreamDB collections. `workspace.sharedRefs` is read from the + * agents-server's in-memory workspace registry — not from StreamDB — + * so it reflects live cross-agent sharing without an extra stream. + */ + state(): { + status: 'cold' | 'starting' | 'idle' | 'running' | 'stopping' | 'error' + pinned: boolean + workspace: { identity: string; sharedRefs: number } + lastError?: string + runs: ReadonlyArray + } + + /** Lifecycle escape hatches. */ + pin(): Promise + release(): Promise + stop(): Promise // tear down sandbox; state survives in stream + destroy(): Promise // tear down + drop refcount on workspace + delete entity stream +} + +type NormalizedEvent = // re-exported from agent-session-protocol + + | SessionInitEvent + | UserMessageEvent + | AssistantMessageEvent + | ThinkingEvent + | ToolCallEvent + | ToolResultEvent + | TurnCompleteEvent + | TurnAbortedEvent + | CompactionEvent + | PermissionRequestEvent + | PermissionResponseEvent + | ErrorEvent + | SessionEndEvent + +interface RunSummary { + runId: string + startedAt: number + endedAt?: number + status: 'running' | 'completed' | 'failed' + promptInboxKey: string + responseText?: string +} +``` + +### Wake semantics + +- `wake: { on: 'runFinished' }` — parent woken once the CLI exits a turn. +- `wake: { on: 'eventAppended' }` — finer-grained streaming wakes. + +### Why a typed primitive (not `ctx.spawn('coding-agent', ...)`) + +- Static `kind` typing with autocomplete. +- Coding-agent-specific affordances (`pin`, `release`, `state.runs`) without leaking entity internals. +- Workspace shape validated at spawn time, not at first wake. +- Internally still resolves to an entity URL and reuses all spawn/observe/wake machinery — sugar with type safety. + +### Internal entity type + +The runtime registers a built-in `coding-agent` entity type. Authors cannot `defineEntity('coding-agent', …)` themselves; the type is reserved. + +### How handle methods desugar onto the entity + +`send(prompt)`, `pin()`, `release()`, `stop()`, `destroy()` all desugar to typed inbox messages on the underlying `coding-agent` entity (`message_type: 'prompt' | 'pin' | 'release' | 'stop' | 'destroy'`). The built-in handler interprets each message type. This keeps the platform primitive on top of existing entity machinery — no new transport, no new wake type. The same messages are dispatched by the UI's pin/release/stop buttons. + +## Sandbox provider + +```ts +// @electric-ax/coding-agents/src/sandbox-provider.ts + +interface SandboxProvider { + readonly name: string // 'local-docker' | 'modal' | 'fly' | ... + + /** + * Boot a sandbox for the given coding-agent identity. + * Idempotent: if a sandbox for `agentId` is running, return it. + * Workspace volume is attached at /workspace. + * The CLI's session dir (~/.claude or ~/.codex) is on tmpfs inside + * the container — populated on start by the runtime from the + * entity's nativeJsonl collection. + */ + start(spec: SandboxSpec): Promise + + /** Stop a sandbox. Workspace volume is preserved. */ + stop(instanceId: string): Promise + + /** Drop refcount on workspace; delete only when last referent. */ + destroy(agentId: string): Promise + + /** Current state for an agent. */ + status(agentId: string): Promise<'running' | 'stopped' | 'unknown'> + + /** On agents-server boot: discover agent's sandboxes by container labels. */ + recover(): Promise> +} + +interface SandboxSpec { + agentId: string // //coding-agent/ + kind: 'claude' | 'codex' + workspace: + | { type: 'volume'; name: string } // resolved name (not the optional from the API) + | { type: 'bindMount'; hostPath: string } + env: Record // ANTHROPIC_API_KEY etc. +} + +interface SandboxInstance { + instanceId: string + agentId: string + workspaceMount: string // '/workspace' inside the sandbox + exec(args: ExecRequest): Promise +} + +interface ExecRequest { + cmd: string[] + cwd?: string + env?: Record + stdin?: 'pipe' | 'ignore' +} + +interface ExecHandle { + stdout: AsyncIterable // line-by-line + stderr: AsyncIterable + stdin?: WritableStream + wait(): Promise<{ exitCode: number }> + kill(signal?: NodeJS.Signals): void +} + +interface RecoveredSandbox { + agentId: string + instanceId: string + status: 'running' | 'stopped' +} +``` + +### `LocalDockerProvider` (v1) + +- Wraps `dockerode` (or `child_process` `docker` CLI). +- Image: `electricsql/coding-agent-sandbox:` — Debian-slim Node base with `claude` and `codex` baked in. Single image, two CLIs. Published from the same release that ships `@electric-ax/coding-agents`. Version pinned in the package. +- Container PID 1 is `tail -f /dev/null` (kept alive for `docker exec`); each turn runs as a fresh `docker exec`. +- Volume conventions: + - `coding-agent-workspace-` (or `` if `name` omitted) → mounted at `/workspace`. + - `~/.claude` and `~/.codex` are tmpfs mounts inside the container. +- Bind-mount mode mounts the host path at `/workspace` instead. Same lifecycle. +- Container labels: `electric-ax.agent-id`, `electric-ax.kind`, `electric-ax.parent-entity`, `electric-ax.workspace-name`. Used by `recover()` and refcount queries. +- `recover()` runs `docker ps -a --filter label=electric-ax.agent-id` and returns instances matched against the entity manifest. + +## Bridge + +```ts +// @electric-ax/coding-agents/src/bridge.ts + +interface Bridge { + /** + * Run one CLI turn. Returns when the CLI exits. + * Streams events as they arrive; caller persists them. + * Holds the workspace lease for the duration. + */ + runTurn(args: RunTurnArgs): Promise +} + +interface RunTurnArgs { + sandbox: SandboxInstance + kind: 'claude' | 'codex' + /** Native session id for resume. Undefined on the first turn. */ + nativeSessionId?: string + prompt: string + /** Sink for events parsed off CLI stdout. */ + onEvent: (e: NormalizedEvent) => void + /** Sink for raw native JSONL lines (tee'd to nativeJsonl collection). */ + onNativeLine: (line: string) => void +} + +interface RunTurnResult { + nativeSessionId: string + exitCode: number + finalText?: string +} +``` + +### `StdioBridge` (v1) + +- Spawns the CLI inside the sandbox via `sandbox.exec`: + - **Claude:** `claude [-r ] --dangerously-skip-permissions -p` (prompt on stdin), `--output-format=stream-json`. + - **Codex:** `codex exec --skip-git-repo-check --json [resume ] ` (prompt on argv). +- Reads stdout line-by-line, normalizes via `agent-session-protocol`'s `normalize()`, emits via `onEvent`. Each raw line is also tee'd via `onNativeLine`. +- On exit non-zero: throws with captured stdout/stderr (truncated to 4 KB each). +- Unparseable line: logged, dropped, doesn't fail the turn. +- ~120 LOC plus normalizer. + +### `ShimBridge` (out of scope for v1) + +The same `Bridge` interface accommodates a future shim implementation: a small Node process running as the sandbox's main process subscribes to a "commands" sub-stream and writes to a "results" sub-stream. The entity-facing API is unchanged. Designed-for, not built. + +## State model + +### Per coding-agent state + +| Where | What | +| ----------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- | +| **Durable stream** (the entity's own log) | Single append-only stream backing all collections. | +| **`sessionMeta`** collection (singleton) | `{ kind, nativeSessionId?, status, pinned, error?, workspaceIdentity }`. | +| **`runs`** collection | One row per CLI turn: `{ runId, startedAt, endedAt?, status, promptInboxKey, responseText? }`. | +| **`events`** collection | Projection of `NormalizedEvent`s, indexed by `(runId, ts)` for UI / live queries. | +| **`nativeJsonl`** collection | Raw `claude` / `codex` JSONL lines, per-kind. Used only for cold-boot resume. | +| **`lifecycle`** collection | Sandbox-infra events (`sandbox.started`, `sandbox.stopped`, `resume.restored`) for muted timeline rendering. Not part of the conversation. | +| **Workspace volume** | `coding-agent-workspace-` (Docker named volume) or bind-mount path. Shared across agents. Out-of-band on purpose: workspaces can be huge. | + +Total: **one durable stream per agent**. **Zero-or-one workspace volumes per workspace identity** (zero for bind-mount; shared across all agents using the same identity). No session volume — `~/.claude` / `~/.codex` is tmpfs, materialized from `nativeJsonl` on every container start. + +### Workspace identity & sharing + +Workspace identity is the lease key: + +- `{ type: 'volume', name: 'foo' }` → `volume:foo` +- `{ type: 'volume' }` → `volume:` (per-agent default) +- `{ type: 'bindMount', hostPath: P }` → `bindMount:` + +Multiple agents that resolve to the same identity share the volume and are serialized at `runTurn` boundaries by the workspace lease (a per-identity mutex on the lifecycle manager). Concurrent `IDLE` agents on a shared workspace coexist freely; only `RUNNING` is serialized. + +### Refcount on workspace volumes + +- Tracked by an in-memory registry on agents-server: `workspaceIdentity → Set`. +- Authoritative source on restart is the entity manifest (which agents exist and what workspace identity each declares in its `sessionMeta`). Container labels (`electric-ax.workspace-name`) are a cross-check for adoption but not a primary source of truth. +- `destroy()` decrements; the volume is removed only when the last referent is destroyed. +- Bind-mount paths are **never** deleted by the runtime — they are host-owned. `destroy()` only drops the registry entry. +- Volume names validated against `[a-z0-9-]{1,63}`. Runtime prefixes `coding-agent-workspace-`. + +## Lifecycle + +``` + ┌──────────┐ + spawn ──────▶│ COLD │◀──── idle-timeout fires + └────┬─────┘ (& !pinned) + │ send() + ▼ + ┌──────────┐ + │ STARTING │ provider.start() + └────┬─────┘ + tmpfs restore + start failed │ ready + ┌────────────┴────────────┐ + ▼ ▼ + ┌────────┐ ┌──────────┐ + │ ERROR │ │ IDLE │◀───┐ + └────┬───┘ └────┬─────┘ │ + │ next send │ send() │ runTurn + ▼ ▼ │ done + ┌────────┐ ┌──────────┐ │ + │ COLD │◀──────┐ │ RUNNING │────┘ + └────────┘ │ └────┬─────┘ + │ │ stop() + │ ▼ + │ ┌──────────┐ + └─────────│ STOPPING │ drain & SIGTERM, + SIGKILL └──────────┘ flush partial events + after 5 s +``` + +### Rules + +- `COLD → STARTING → IDLE` is the cold-boot path. The first `send()` after hibernation pays this cost; warm prompts go `IDLE → RUNNING → IDLE`. +- The idle timer fires only in `IDLE`, only if `!pinned`. Workspace + entity stream survive; in-memory CLI process and tmpfs die. +- `pin()` clears the timer and prevents auto-stop. `release()` re-arms it. `pin()` is reference-counted: N pins need N releases. +- `stop()` is explicit teardown — moves directly to `COLD` even from `RUNNING` (SIGTERM → SIGKILL after 5 s grace). Partial events flushed before kill. +- `destroy()` is `stop()` + drop workspace refcount + delete entity stream. Irreversible. +- `ERROR` is terminal for the current attempt. The next `send()` retries `start()`. `lastError` is exposed on `state()`. + +### Concurrency + +- **One running CLI per workspace**, enforced by the workspace lease. Held across `bridge.runTurn` only; not across `IDLE` windows. +- **Per-agent inbox queue**: a second `send()` while the agent is `RUNNING` queues on the inbox (existing entity machinery — no new code). +- **Per-workspace queue**: a `send()` to agent A while agent B (same workspace) is `RUNNING` causes A's `runTurn` to await the lease. +- The bind-mount lease key is `realpath(hostPath)` — symlinks cannot bypass the lease. + +### Crash recovery + +- On agents-server boot, `LocalDockerProvider.recover()` adopts containers labeled `electric-ax.agent-id`. Status is queried; running ones reattach (entity rehydrates `sessionMeta` from stream); stopped ones become `COLD`. +- An orphaned in-flight run (`runs` row with `status=running` but no terminating event) is detected and marked `failed` with `reason=orphaned`. Workspace lease is released. +- This is the failure mode where the future `ShimBridge` wins — the host's stdio handle is gone after a crash. v1 accepts this for local dev. + +### Defaults (config-tunable) + +| Setting | Default | +| ------------------------ | ------------------------------- | +| `idleTimeoutMs` | 5 × 60 000 | +| `coldBootBudgetMs` | 30 000 | +| `runTimeoutMs` | 30 × 60 000 | +| `keepWarm` | `false` | +| `maxConcurrentSandboxes` | 8 (per-server; queue otherwise) | + +## Resume flow + +``` +parent entity runtime / coding-agent sandbox provider CLI + │ │ │ │ + │ send("fix bug") │ │ │ + │─────────────────────────────▶│ enqueue prompt │ │ + │ │ status="starting" │ │ + │ │ start(spec) ─────────────────▶ pull image │ + │ │ │ attach workspace volume │ + │ │ │ → SandboxInstance │ + │ │ read nativeJsonl coll │ │ + │ │ denormalize → tmpfs │ │ + │ │ (skip if files present) │ │ + │ │ │ │ + │ │ acquire workspace lease │ │ + │ │ bridge.runTurn ──────────────▶ exec claude --resume │ + │ │ │ --print │ + │ │ │ --output-format= │ + │ │ │ stream-json │──▶ run + │ │ stdout JSONL line ◀─────────│◀─────────────────────────│ + │ │ append → nativeJsonl coll │ + │ │ normalize → events coll │ + │ │ (live UI updates here) │ + │ │ exit 0 │ + │ │ release workspace lease │ + │ │ status="idle" │ + │ │ schedule idle timer │ + │ wake(runFinished, text) ◀──│ │ │ + │ │ ⏱ idle timeout fires │ + │ │ if !pinned: provider.stop() │ + │ │ status="cold" │ +``` + +### Two resume paths + +- **Same-kind (lossless).** `nativeJsonl` collection (filtered by kind) → `denormalize` → write JSONL into tmpfs → CLI runs `--resume` and sees the file. The CLI writes new events to the same JSONL; the bridge tees them back into the collection. +- **Cross-kind (semantic).** When `kind` changes (e.g., user forks claude→codex on the same agent): `events` (canonical) collection → `denormalize` for the new kind → write into a fresh tmpfs JSONL → start CLI with new id. Tool-call shapes become generically represented; same-conversation semantics preserved. + +### Why `nativeJsonl` AND `events`? + +- `events` is portable, stable, cross-kind: what entities, the UI, and parent wakes consume. +- `nativeJsonl` is the resumable truth for the CLI: rich, kind-specific, lossless. Without it, same-kind resume would drift on tool-call vendor fields. + +This dichotomy is the same as the `agent-session-protocol` model — we inherit it for free. + +## Observability & UI + +### Reused from existing `agents-server-ui` + +- `CodingSessionTimeline.tsx` — renders normalized events. Vocabulary already matches. +- `CodingSessionView.tsx`, `useCodingSession.ts` — bind collections, handle pending rows. +- `CodingSessionSpawnDialog.tsx` — spawn UI. +- `Sidebar.tsx`, `EntityTimeline.tsx`, `EntityHeader.tsx`, `MessageInput.tsx`, `stateExplorer/*` — generic. +- `CODING_SESSION_*_COLLECTION_TYPE` constants are kept stable (aliased from new symbols) to avoid breaking storage. + +### New in v1 + +1. **Status enum extended** — `cold | starting | idle | running | stopping | error`. Extend `StatusDot` color map. +2. **Header gets sandbox provenance** — provider name, workspace identity, "shared with N other agents" indicator (when refcount > 1), pinned indicator. +3. **Header action buttons** — Pin / Release / Stop, dispatched as control messages on the entity inbox. +4. **Spawn dialog adds `workspace` selector** — volume (with optional name) or bind-mount (with hostPath). Provider selector is post-MVP. +5. **Lifecycle events render as muted timeline rows** — `sandbox.started`, `sandbox.stopped`, `resume.restored`. Sourced from the new `lifecycle` collection (separate from `events` because they're not conversation history). + +### Out of v1 UI + +- Multi-agent diff view (compare claude vs codex on same prompt). +- Replay scrubber / time-travel. +- Per-event approve/deny for `permission_request` (CLIs run with skip-permissions flags). +- Workspace file browser. +- "Open workspace in editor" link. + +### Telemetry + +OpenTelemetry spans for `sandbox.start`, `bridge.runTurn`, `resume.restore` (already wired into agents-server's Jaeger setup). Per-agent metrics: cold-boot latency, turn latency, event throughput, idle hibernations. No new dashboards in v1. + +## Built-in agent tools + +Horton (`packages/agents/src/agents/horton.ts`) currently uses `spawn_coder` / `prompt_coder`. These are replaced by: + +- `spawn_coding_agent` — wraps `ctx.spawnCodingAgent` with the same UX as the current `spawn_coder` (initialMessage + `wake: { on: 'runFinished', includeResponse: true }`). New parameter: optional workspace name to enable sharing. +- `prompt_coding_agent` — wraps `ctx.observeCodingAgent(id).send(prompt)`. + +The tool descriptions are updated to mention sandboxing and workspace sharing. + +## Testing strategy + +### Layer 1 — Unit (no Docker, no API keys) + +- `LifecycleManager` state-machine transitions, idle timer, pin reference counting, concurrent `send` queueing. Backed by `FakeSandboxProvider` (in-memory) and `FakeBridge` (scripted events). +- `ResumeRestore`: given a sidecar of recorded events, asserts correct `denormalize` output is written to the right tmpfs path, with idempotency. +- `CodingAgentHandle` API-shape tests; `spawnCodingAgent` option validation; `observeCodingAgent` rebinds without re-spawning. +- Workspace identity resolution: `volume:foo`, `bindMount:realpath`, default-to-agentId. +- Workspace lease: per-identity mutex, IDLE coexistence, RUNNING serialization. +- Vitest. Sub-second. + +### Layer 2 — Integration (real Docker, fake CLI) + +- `LocalDockerProvider`: `start` creates the right labels/volumes/env, `start` is idempotent, `stop`/`destroy` clean up correctly with refcount, `recover()` adopts labeled containers after a simulated host restart. +- `StdioBridge` against a `fake-cli` binary baked into a test image — a tiny Node script that reads a fixture name from env and emits a recorded JSONL transcript on stdout. Tests JSONL parsing, exit codes, error capture, streaming order. +- Recorded fixtures in `test/fixtures/{claude,codex}/{first-turn, resume-turn, tool-call, error}.jsonl`. Captured once from real CLIs; checked in. +- Gated by `DOCKER=1` env (skipped otherwise). + +### Layer 3 — Conformance suite (provider-agnostic) + +- New `coding-agent` suite in `packages/agents-server-conformance-tests`. Parameterized by `SandboxProvider`. +- Scenarios: cold-boot + first prompt, warm second prompt, resume after `stop`, crash-recovery / orphaned run, workspace persists across teardown, cross-kind resume, shared-workspace lease serialization. +- v1 runs against `LocalDockerProvider` only. Future Modal / Fly impls reuse the suite. + +### Layer 4 — End-to-end smoke (real CLIs, real keys) + +- Single test per kind: parent entity spawns coding agent, sends `"echo hello and create hello.txt"`, awaits `runFinished` wake, asserts response contains "hello" and `hello.txt` exists in the workspace. +- Tagged `@slow`. Requires `ANTHROPIC_API_KEY` / `OPENAI_API_KEY`. Runs nightly + post-merge to `main`. Catches CLI-version drift. + +### UI tests + +- Component tests for `StatusDot` color mapping across the seven states, `CodingSessionSpawnDialog` workspace validation, header pin/release dispatch. +- No new e2e browser tests in v1. + +### Manual smoke checklist (PR description) + +- Spawn agent via UI → send prompt → see streaming timeline. +- Pin → wait > idle timeout → confirm sandbox stays up. +- Release → wait > idle timeout → confirm container stops, status flips `COLD`. +- Send another prompt → confirm resume works (claude session id matches across the gap). +- Bind-mount mode: edits land on the host filesystem. +- Spawn second agent on the same workspace name → confirm shared-refs indicator → run prompt on agent A while sending to agent B → confirm B's lease wait. +- `docker kill` agents-server while CLI is running → restart server → confirm in-flight run is `failed`, container reaped, next prompt works. + +## MVP scope + +### v1 ships + +- `@electric-ax/coding-agents` package: `SandboxProvider`, `Bridge`, `LocalDockerProvider`, `StdioBridge`, `LifecycleManager`, workspace-lease registry. +- `ctx.spawnCodingAgent` / `ctx.observeCodingAgent` on `HandlerContext`. +- Built-in `coding-agent` entity registered automatically when `@electric-ax/coding-agents` is imported by the server entrypoint. +- Two CLIs: `claude` and `codex`. +- Image `electricsql/coding-agent-sandbox:` published from the same release; pinned in the package. +- One durable stream per agent. Zero-or-one shareable workspace volumes per workspace identity. No session volume; `~/.claude` and `~/.codex` are tmpfs. +- Cold-boot resume via tmpfs materialization from `nativeJsonl` collection. +- Lifecycle: idle hibernation, pin/release, stop/destroy, refcount-aware workspace cleanup, container-label crash recovery. +- UI: extend existing `CodingSession*` components per §Observability & UI. +- Tools: `spawn_coding_agent`, `prompt_coding_agent` for Horton. +- Tests: unit + integration + conformance + E2E smoke per §Testing strategy. +- Removal of `coder` entity, `spawn-coder.ts`, `prompt-coder.ts`. Collection-type wire strings kept stable; aliased from new symbols. + +### Out of scope for v1 + +- `ShimBridge` and remote provider impls (Modal / Fly / E2B / Cloudflare). +- ACP adapter. +- Cross-kind resume in the spawn dialog (works programmatically; no UI affordance yet). +- Per-event approve/deny UI for `permission_request`. +- Replay / time-travel UI scrubber. +- Workspace file browser. +- Multi-tenant authorization on coding-agent endpoints (inherits agents-server's existing). +- Memory-snapshot lifecycle. +- Pre-warmed sandbox pools. +- "Open workspace in editor" link. +- Telemetry dashboard (spans emitted; no dashboard work). + +### Migration + +The `coder` entity is removed in the same release. No backwards-compat shim — internal feature, no external consumers depend on the API. Existing in-flight `coder` sessions on running dev environments are dropped. + +## Open questions + +- **API key injection.** Inherits agents-server's existing env handling; no new surface in this design. Confirm during implementation that `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` flow into `SandboxSpec.env` cleanly without ending up in container labels or stream events. +- **Workspace cleanup grace period.** Currently the volume is deleted immediately when the last referent is `destroy()`'d. Consider a grace period (e.g., 10 minutes) before delete in case the operator regrets it. Decide during implementation; either default is defensible. + +## References + +- `packages/agents/src/agents/coding-session.ts` — existing `coder` entity (to be removed). +- `node_modules/.pnpm/agent-session-protocol@0.0.2/node_modules/agent-session-protocol/README.md` — full asp spec. +- `packages/agents-runtime/src/define-entity.ts` — entity registry. +- `packages/agents-server/src/electric-agents-manager.ts` — server orchestration. +- `packages/agents-server-ui/src/components/CodingSessionTimeline.tsx` — existing timeline renderer (reused). +- [Agent Session Protocol](https://github.com/kevin-dp/agent-session-protocol). +- [mattpocock/sandcastle](https://github.com/mattpocock/sandcastle) — reference impl for stdin/stdout JSONL bridge. +- [OpenHands runtime](https://docs.openhands.dev/usage/architecture/runtime) — reference impl for server-in-sandbox + EventStream. +- [Anthropic Claude Agent SDK](https://code.claude.com/docs/en/agent-sdk/overview). +- [OpenAI Codex non-interactive mode](https://developers.openai.com/codex/noninteractive). +- [Agent Client Protocol](https://agentclientprotocol.com/) — designed-for ACP adapter (out of scope). From 47ee6ae22d328f8a80670352b18202de59e6e4cf Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 01:22:38 +0100 Subject: [PATCH 002/279] docs(plans): add coding-agents MVP plan Co-Authored-By: Claude Opus 4.7 (1M context) --- .../plans/2026-04-30-coding-agents-mvp.md | 1221 +++++++++++++++++ 1 file changed, 1221 insertions(+) create mode 100644 docs/superpowers/plans/2026-04-30-coding-agents-mvp.md diff --git a/docs/superpowers/plans/2026-04-30-coding-agents-mvp.md b/docs/superpowers/plans/2026-04-30-coding-agents-mvp.md new file mode 100644 index 0000000000..25ab5aa3e1 --- /dev/null +++ b/docs/superpowers/plans/2026-04-30-coding-agents-mvp.md @@ -0,0 +1,1221 @@ +# Coding Agents Platform Primitive — MVP Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Ship a minimum viable `@electric-ax/coding-agents` package that proves the core architecture: a Docker sandbox + a stdio bridge to the Claude CLI + a normalized event stream. Validation bar: an integration smoke test that starts a sandbox, runs `claude --print --output-format=stream-json` inside it, parses the JSONL output, and asserts `session_init` and `assistant_message` events were captured. + +**Architecture:** Three modules in a new package — `LocalDockerProvider` (subprocess-driven Docker CLI; no `dockerode` dep to keep it small), `StdioBridge` (parses claude's stream-json output via `agent-session-protocol`'s `normalize`), and a tiny in-memory `Sandbox` lifecycle (start, exec, stop). No runtime API surface, no entity wiring, no UI in this MVP — those come after smoke green. + +**Tech Stack:** TypeScript, Vitest, tsdown, `agent-session-protocol@0.0.2` (already in workspace), Node `child_process`, Docker. + +**Spec scope cuts (intentional, MVP):** + +- Claude only, not Codex. +- No `LifecycleManager` (idle hibernation, pin/release). +- No workspace registry / refcount. +- No `ctx.spawnCodingAgent` API surface on `HandlerContext`. +- No built-in `coding-agent` entity wiring. +- No UI updates. +- No same-kind/cross-kind resume; single-shot turn only. +- Existing `coder` entity stays in place — no removal in MVP. + +These cuts are deliberate. Once the smoke test passes, the broader spec gets implemented in follow-on plans. + +**Reference spec:** `docs/superpowers/specs/2026-04-30-coding-agents-platform-primitive-design.md` + +--- + +## File Structure + +``` +packages/coding-agents/ ← NEW package +├── package.json +├── tsconfig.json +├── tsdown.config.ts +├── vitest.config.ts +├── .gitignore +├── src/ +│ ├── index.ts ← public exports +│ ├── types.ts ← all interfaces +│ ├── providers/ +│ │ └── local-docker.ts ← LocalDockerProvider +│ ├── bridge/ +│ │ └── stdio-bridge.ts ← StdioBridge +│ └── log.ts ← pino logger (mirrors agents-runtime/src/log.ts pattern) +├── docker/ +│ ├── Dockerfile ← node + claude installed +│ └── entrypoint.sh ← container PID 1, keeps it alive +└── test/ + ├── unit/ + │ ├── stdio-bridge.test.ts ← unit tests with stubbed exec + │ └── local-docker.test.ts ← unit tests against fake docker bin (post-MVP, optional) + ├── integration/ + │ └── smoke.test.ts ← REAL Docker + REAL Claude CLI + real API key + └── support/ + ├── build-image.ts ← helper to build the test image + └── env.ts ← reads /tmp/.electric-coding-agents-env +``` + +**No changes to other packages in this MVP.** + +--- + +## Phase Plan + +| Phase | Tasks | Parallelism | Depends on | +| ----- | ------------- | ------------------------------- | ---------- | +| 0 | 0.1, 0.2 | sequential | — | +| 1 | 1.A, 1.B, 1.C | parallel (3 independent agents) | Phase 0 | +| 2 | 2.1 | sequential | Phase 1 | +| 3 | iteration | sequential | Phase 2 | + +--- + +## Phase 0 — Foundation (sequential) + +### Task 0.1 — Scaffold package + +**Files to create:** + +- `packages/coding-agents/package.json` +- `packages/coding-agents/tsconfig.json` +- `packages/coding-agents/tsdown.config.ts` +- `packages/coding-agents/vitest.config.ts` +- `packages/coding-agents/.gitignore` + +The patterns mirror `packages/agents-runtime/` exactly. Copy versions of `tsdown`, `vitest`, `typescript`, `@types/node` from there. + +- [ ] **Step 1: Write `packages/coding-agents/package.json`** + +```json +{ + "name": "@electric-ax/coding-agents", + "version": "0.0.1", + "description": "Sandbox + bridge layer for spawning coding agents (Claude Code, Codex) under Electric Agents.", + "repository": { + "type": "git", + "url": "git+https://github.com/electric-sql/electric.git", + "directory": "packages/coding-agents" + }, + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "types": "./dist/index.d.ts", + "scripts": { + "build": "tsdown", + "dev": "tsdown --watch", + "test": "vitest run", + "test:watch": "vitest", + "test:integration": "DOCKER=1 vitest run test/integration", + "typecheck": "tsc --noEmit", + "stylecheck": "eslint . --quiet" + }, + "exports": { + ".": { + "import": { + "types": "./dist/index.d.ts", + "default": "./dist/index.js" + }, + "require": { + "types": "./dist/index.d.cts", + "default": "./dist/index.cjs" + } + }, + "./package.json": "./package.json" + }, + "dependencies": { + "agent-session-protocol": "^0.0.2", + "pino": "^10.3.1", + "pino-pretty": "^13.0.0", + "zod": "^4.3.6" + }, + "devDependencies": { + "@types/node": "^22.19.15", + "tsdown": "^0.9.0", + "typescript": "^5.7.0", + "vitest": "^3.2.4" + }, + "files": ["dist", "docker"], + "sideEffects": false, + "license": "Apache-2.0" +} +``` + +- [ ] **Step 2: Write `packages/coding-agents/tsconfig.json`** + +```json +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src", + "types": ["node", "vitest/globals"] + }, + "include": ["src/**/*", "test/**/*"], + "exclude": ["dist", "node_modules"] +} +``` + +If `tsconfig.base.json` does not exist, copy the compilerOptions from `packages/agents-runtime/tsconfig.json` instead. + +- [ ] **Step 3: Write `packages/coding-agents/tsdown.config.ts`** + +Mirror `packages/agents-runtime/tsdown.config.ts`. The minimum is: + +```ts +import { defineConfig } from 'tsdown' + +export default defineConfig({ + entry: ['./src/index.ts'], + outDir: 'dist', + format: ['esm', 'cjs'], + dts: true, + clean: true, + sourcemap: true, +}) +``` + +- [ ] **Step 4: Write `packages/coding-agents/vitest.config.ts`** + +```ts +import { defineConfig } from 'vitest/config' + +export default defineConfig({ + test: { + globals: true, + environment: 'node', + testTimeout: 120_000, // integration tests build images, can be slow + }, +}) +``` + +- [ ] **Step 5: Write `packages/coding-agents/.gitignore`** + +``` +dist +node_modules +.vitest-temp +coverage +``` + +- [ ] **Step 6: Run `pnpm install` from repo root** + +``` +pnpm install +``` + +Expect: workspace picks up the new package; no errors. + +- [ ] **Step 7: Verify the package builds (no source yet → typecheck-only)** + +``` +pnpm -C packages/coding-agents typecheck +``` + +Expect: clean (no `src/` files yet, but typecheck against an empty include shouldn't error). +If it errors due to `include: ["src/**/*"]` matching nothing, add an empty `src/index.ts` with `export {}` first. + +- [ ] **Step 8: Commit** + +``` +git add packages/coding-agents +git commit -m "feat(coding-agents): scaffold @electric-ax/coding-agents package" +``` + +--- + +### Task 0.2 — Define core types & log + +**Files:** + +- Create: `packages/coding-agents/src/types.ts` +- Create: `packages/coding-agents/src/log.ts` +- Create: `packages/coding-agents/src/index.ts` (replace empty version from 0.1.7) + +- [ ] **Step 1: Write `src/log.ts`** + +```ts +import pino from 'pino' + +export const log = pino({ + name: 'coding-agents', + level: process.env.LOG_LEVEL ?? 'info', + ...(process.env.NODE_ENV !== 'production' + ? { + transport: { + target: 'pino-pretty', + options: { colorize: true, translateTime: 'HH:MM:ss.l' }, + }, + } + : {}), +}) +``` + +- [ ] **Step 2: Write `src/types.ts`** + +```ts +import type { NormalizedEvent } from 'agent-session-protocol' + +export type CodingAgentKind = 'claude' | 'codex' + +// ─── Sandbox provider ────────────────────────────────────────────────────── + +export interface SandboxSpec { + /** Stable agent identity (e.g. //coding-agent/). */ + agentId: string + kind: CodingAgentKind + workspace: + | { type: 'volume'; name: string } + | { type: 'bindMount'; hostPath: string } + /** Env vars exposed inside the sandbox (ANTHROPIC_API_KEY, etc.). */ + env: Record +} + +export interface ExecRequest { + cmd: string[] + cwd?: string + env?: Record + stdin?: 'pipe' | 'ignore' +} + +export interface ExecHandle { + /** Async iterables of stdout/stderr lines (UTF-8, newline-stripped). */ + stdout: AsyncIterable + stderr: AsyncIterable + /** Available iff request.stdin === 'pipe'. */ + writeStdin?: (chunk: string) => Promise + closeStdin?: () => Promise + wait(): Promise<{ exitCode: number }> + kill(signal?: NodeJS.Signals): void +} + +export interface SandboxInstance { + instanceId: string + agentId: string + /** Path inside sandbox where the workspace volume / bind-mount is mounted. */ + workspaceMount: string + exec(args: ExecRequest): Promise +} + +export interface RecoveredSandbox { + agentId: string + instanceId: string + status: 'running' | 'stopped' +} + +export interface SandboxProvider { + readonly name: string + start(spec: SandboxSpec): Promise + stop(instanceId: string): Promise + destroy(agentId: string): Promise + status(agentId: string): Promise<'running' | 'stopped' | 'unknown'> + /** Discover sandboxes adopted across host restarts. MVP: may return []. */ + recover(): Promise> +} + +// ─── Bridge ──────────────────────────────────────────────────────────────── + +export interface RunTurnArgs { + sandbox: SandboxInstance + kind: CodingAgentKind + /** Resume id; undefined for first turn. */ + nativeSessionId?: string + prompt: string + /** Model to pass to the CLI (e.g. 'claude-haiku-4-5-20251001'). */ + model?: string + /** Sink for normalized events as parsed off CLI stdout. */ + onEvent: (e: NormalizedEvent) => void + /** Sink for raw native JSONL lines (tee'd to a sidecar collection). */ + onNativeLine?: (line: string) => void +} + +export interface RunTurnResult { + /** Discovered or provided session id. */ + nativeSessionId?: string + exitCode: number + /** First assistant_message text (for parent's wake payload). */ + finalText?: string +} + +export interface Bridge { + runTurn(args: RunTurnArgs): Promise +} +``` + +- [ ] **Step 3: Write `src/index.ts`** + +```ts +export type { + CodingAgentKind, + SandboxSpec, + ExecRequest, + ExecHandle, + SandboxInstance, + SandboxProvider, + RecoveredSandbox, + RunTurnArgs, + RunTurnResult, + Bridge, +} from './types' +export { LocalDockerProvider } from './providers/local-docker' +export { StdioBridge } from './bridge/stdio-bridge' +``` + +(Step 3 references modules that don't exist yet; that's fine — tests in Phase 1 will create them. For the typecheck in Step 5 below, temporarily comment out the two `LocalDockerProvider`/`StdioBridge` re-exports until Phase 1 lands.) + +- [ ] **Step 4: Verify the package typechecks** + +``` +pnpm -C packages/coding-agents typecheck +``` + +Expect: clean. + +- [ ] **Step 5: Commit** + +``` +git add packages/coding-agents/src +git commit -m "feat(coding-agents): define core types" +``` + +--- + +## Phase 1 — Independent components (parallel, 3 agents) + +These three tasks touch disjoint files. Dispatch them in parallel. + +### Task 1.A — Dockerfile + entrypoint + +**Files:** + +- Create: `packages/coding-agents/docker/Dockerfile` +- Create: `packages/coding-agents/docker/entrypoint.sh` +- Create: `packages/coding-agents/test/support/build-image.ts` + +**Constraints / notes:** + +- Image must contain: `node` ≥ 22, `npm`, the official Claude CLI from npm, `git`, and `bash`. +- Claude is published as `@anthropic-ai/claude-code` on npm. Install with `npm install -g @anthropic-ai/claude-code`. The bin name is `claude`. +- Use `node:22-bookworm-slim` as the base — it's small enough and has glibc (musl on alpine breaks some npm postinstall scripts). +- The container's PID 1 must stay alive between `docker exec` invocations. Use `tail -f /dev/null`. +- Image tag for tests: `electric-ax/coding-agent-sandbox:test`. + +- [ ] **Step 1: Write `docker/Dockerfile`** + +```dockerfile +FROM node:22-bookworm-slim + +# Install OS deps: git (claude needs it), curl (claude installer occasionally probes), bash, ca-certs. +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + git \ + bash \ + tini \ + && rm -rf /var/lib/apt/lists/* + +# Non-root user for the agent. Claude's home is needed for ~/.claude transcript dir. +RUN useradd -m -s /bin/bash -u 1000 agent + +# Install the Claude CLI globally. Pin a recent version to avoid drift; can bump later. +# (Use the floating tag for now; pin in v1.) +RUN npm install -g @anthropic-ai/claude-code@latest \ + && claude --version + +# Workspace mount point. The provider attaches a volume here. +RUN mkdir -p /workspace \ + && chown agent:agent /workspace + +USER agent +WORKDIR /workspace + +COPY --chown=agent:agent docker/entrypoint.sh /home/agent/entrypoint.sh +RUN chmod +x /home/agent/entrypoint.sh + +ENTRYPOINT ["/usr/bin/tini", "--", "/home/agent/entrypoint.sh"] +``` + +- [ ] **Step 2: Write `docker/entrypoint.sh`** + +```bash +#!/usr/bin/env bash +set -euo pipefail +# PID 1 just stays alive so docker exec can attach. Real work is done via exec. +exec tail -f /dev/null +``` + +- [ ] **Step 3: Write `test/support/build-image.ts`** + +```ts +import { spawn } from 'node:child_process' +import { dirname, resolve } from 'node:path' +import { fileURLToPath } from 'node:url' + +const here = dirname(fileURLToPath(import.meta.url)) +const PACKAGE_ROOT = resolve(here, '../..') + +export const TEST_IMAGE_TAG = 'electric-ax/coding-agent-sandbox:test' + +/** + * Build the test image. Idempotent: re-runs are cheap if Docker layer cache is warm. + * Throws on non-zero exit. + */ +export async function buildTestImage(): Promise { + await new Promise((resolveBuild, rejectBuild) => { + const child = spawn( + 'docker', + ['build', '-t', TEST_IMAGE_TAG, '-f', 'docker/Dockerfile', '.'], + { cwd: PACKAGE_ROOT, stdio: 'inherit' } + ) + child.on('error', rejectBuild) + child.on('exit', (code) => { + if (code === 0) resolveBuild() + else rejectBuild(new Error(`docker build exited ${code}`)) + }) + }) +} +``` + +- [ ] **Step 4: Build the image to verify it works** + +``` +cd packages/coding-agents +docker build -t electric-ax/coding-agent-sandbox:test -f docker/Dockerfile . +``` + +Expect: succeeds; final layer reports `claude --version`. + +- [ ] **Step 5: Smoke-check Claude inside the container** + +``` +docker run --rm electric-ax/coding-agent-sandbox:test claude --version +``` + +Expect: prints the claude version (e.g. `2.1.116 (Claude Code)`). + +- [ ] **Step 6: Commit** + +``` +git add packages/coding-agents/docker packages/coding-agents/test/support +git commit -m "feat(coding-agents): add Dockerfile and image build helper" +``` + +--- + +### Task 1.B — `LocalDockerProvider` + +**Files:** + +- Create: `packages/coding-agents/src/providers/local-docker.ts` +- Create: `packages/coding-agents/test/unit/local-docker.test.ts` (smoke unit; integration coverage is Phase 2) + +**Constraints:** + +- Use Node `child_process.spawn` to drive the `docker` CLI. No `dockerode` dependency. +- `start()` is idempotent: if a container with `electric-ax.agent-id=` exists and is running, attach to it. +- Container labels: `electric-ax.agent-id=`, `electric-ax.kind=`, `electric-ax.workspace-name=`. +- Volumes: + - `volume`: ensures `coding-agent-workspace-` exists, mounts at `/workspace`. + - `bindMount`: mounts `realpath(hostPath)` at `/workspace`. +- Exec environment must merge `spec.env` so `ANTHROPIC_API_KEY` flows through. +- `exec` returns line-by-line async iterables and a `wait()` that resolves the exit code. + +- [ ] **Step 1: Write `src/providers/local-docker.ts`** + +```ts +import { spawn } from 'node:child_process' +import { realpath } from 'node:fs/promises' +import { createInterface } from 'node:readline' +import type { Readable, Writable } from 'node:stream' +import { log } from '../log' +import type { + ExecHandle, + ExecRequest, + RecoveredSandbox, + SandboxInstance, + SandboxProvider, + SandboxSpec, +} from '../types' + +const IMAGE = + process.env.CODING_AGENT_IMAGE ?? 'electric-ax/coding-agent-sandbox:test' + +export interface LocalDockerProviderOptions { + /** Override the image tag (default: env CODING_AGENT_IMAGE or test image). */ + image?: string +} + +export class LocalDockerProvider implements SandboxProvider { + readonly name = 'local-docker' + private readonly image: string + + constructor(opts: LocalDockerProviderOptions = {}) { + this.image = opts.image ?? IMAGE + } + + async start(spec: SandboxSpec): Promise { + const existing = await this.findContainerByAgentId(spec.agentId) + if (existing && existing.running) { + log.debug( + { agentId: spec.agentId, instanceId: existing.id }, + 'attaching to existing sandbox' + ) + return this.makeInstance(existing.id, spec) + } + if (existing && !existing.running) { + // Stale stopped container with same agentId. Remove it first. + await runDocker(['rm', '-f', existing.id]) + } + + const labels = [ + `electric-ax.agent-id=${spec.agentId}`, + `electric-ax.kind=${spec.kind}`, + `electric-ax.workspace-name=${ + spec.workspace.type === 'volume' ? spec.workspace.name : 'bind-mount' + }`, + ] + + const mount = await this.mountFlag(spec) + + const args = [ + 'run', + '-d', + '--rm=false', + ...labels.flatMap((l) => ['--label', l]), + mount, + this.image, + ] + + const { stdout } = await runDocker(args) + const instanceId = stdout.trim() + log.info({ agentId: spec.agentId, instanceId }, 'started sandbox') + return this.makeInstance(instanceId, spec) + } + + async stop(instanceId: string): Promise { + await runDocker(['stop', '-t', '5', instanceId]).catch((err) => { + log.warn( + { err, instanceId }, + 'docker stop failed (probably already stopped)' + ) + }) + await runDocker(['rm', '-f', instanceId]).catch(() => undefined) + } + + async destroy(agentId: string): Promise { + const c = await this.findContainerByAgentId(agentId) + if (c) await this.stop(c.id) + // Volume cleanup is intentionally NOT done in MVP — tests clean up explicitly. + } + + async status(agentId: string): Promise<'running' | 'stopped' | 'unknown'> { + const c = await this.findContainerByAgentId(agentId) + if (!c) return 'unknown' + return c.running ? 'running' : 'stopped' + } + + async recover(): Promise> { + const { stdout } = await runDocker([ + 'ps', + '-a', + '--format', + '{{.ID}}\t{{.Label "electric-ax.agent-id"}}\t{{.State}}', + '--filter', + 'label=electric-ax.agent-id', + ]) + return stdout + .trim() + .split('\n') + .filter(Boolean) + .map((line) => { + const [id, agentId, state] = line.split('\t') + return { + instanceId: id ?? '', + agentId: agentId ?? '', + status: state === 'running' ? 'running' : 'stopped', + } + }) + } + + // ── private helpers ── + + private async findContainerByAgentId( + agentId: string + ): Promise<{ id: string; running: boolean } | null> { + const { stdout } = await runDocker([ + 'ps', + '-a', + '--format', + '{{.ID}}\t{{.State}}', + '--filter', + `label=electric-ax.agent-id=${agentId}`, + ]) + const line = stdout + .trim() + .split('\n') + .find((l) => l.length > 0) + if (!line) return null + const [id, state] = line.split('\t') + return { id: id ?? '', running: state === 'running' } + } + + private async mountFlag(spec: SandboxSpec): Promise { + if (spec.workspace.type === 'volume') { + const volName = `coding-agent-workspace-${spec.workspace.name}` + // ensure the volume exists (docker auto-creates on first use, but explicit is friendlier) + await runDocker(['volume', 'create', volName]).catch(() => undefined) + return `--mount=type=volume,source=${volName},target=/workspace` + } + const real = await realpath(spec.workspace.hostPath) + return `--mount=type=bind,source=${real},target=/workspace` + } + + private makeInstance(instanceId: string, spec: SandboxSpec): SandboxInstance { + return { + instanceId, + agentId: spec.agentId, + workspaceMount: '/workspace', + exec: (args) => execInContainer(instanceId, args, spec.env), + } + } +} + +// ── docker CLI helpers ── + +async function runDocker( + args: ReadonlyArray +): Promise<{ stdout: string; stderr: string }> { + return new Promise((resolveCmd, rejectCmd) => { + const child = spawn('docker', args, { stdio: ['ignore', 'pipe', 'pipe'] }) + let stdout = '' + let stderr = '' + child.stdout.on('data', (d) => (stdout += d.toString())) + child.stderr.on('data', (d) => (stderr += d.toString())) + child.on('error', rejectCmd) + child.on('exit', (code) => { + if (code === 0) resolveCmd({ stdout, stderr }) + else + rejectCmd( + new Error(`docker ${args.join(' ')} exited ${code}: ${stderr}`) + ) + }) + }) +} + +function lineIterator(stream: Readable): AsyncIterable { + const rl = createInterface({ input: stream, crlfDelay: Infinity }) + return rl as unknown as AsyncIterable +} + +async function execInContainer( + containerId: string, + req: ExecRequest, + baseEnv: Record +): Promise { + const env = { ...baseEnv, ...(req.env ?? {}) } + const args: Array = ['exec', '-i'] + if (req.cwd) args.push('-w', req.cwd) + for (const [k, v] of Object.entries(env)) args.push('-e', `${k}=${v}`) + args.push(containerId, ...req.cmd) + + const child = spawn('docker', args, { + stdio: [req.stdin === 'pipe' ? 'pipe' : 'ignore', 'pipe', 'pipe'], + }) + + let exitCode: number | null = null + const exitPromise = new Promise<{ exitCode: number }>( + (resolveWait, rejectWait) => { + child.on('error', rejectWait) + child.on('exit', (code) => { + exitCode = code ?? -1 + resolveWait({ exitCode }) + }) + } + ) + + const stdinStream = child.stdin as Writable | null + + return { + stdout: lineIterator(child.stdout!), + stderr: lineIterator(child.stderr!), + writeStdin: stdinStream + ? async (chunk) => { + await new Promise((res, rej) => { + stdinStream.write(chunk, (err) => (err ? rej(err) : res())) + }) + } + : undefined, + closeStdin: stdinStream + ? async () => { + await new Promise((res) => { + stdinStream.end(res) + }) + } + : undefined, + wait: () => exitPromise, + kill: (signal = 'SIGTERM') => { + try { + child.kill(signal) + } catch { + // already dead + } + }, + } +} +``` + +- [ ] **Step 2: Write `test/unit/local-docker.test.ts`** — minimal type-only smoke + +```ts +import { describe, it, expect } from 'vitest' +import { LocalDockerProvider } from '../../src/providers/local-docker' + +describe('LocalDockerProvider construction', () => { + it('exposes name "local-docker"', () => { + const p = new LocalDockerProvider() + expect(p.name).toBe('local-docker') + }) +}) +``` + +- [ ] **Step 3: Run `pnpm -C packages/coding-agents test test/unit/local-docker.test.ts`** + +Expect: PASS. + +- [ ] **Step 4: Commit** + +``` +git add packages/coding-agents/src/providers packages/coding-agents/test/unit/local-docker.test.ts +git commit -m "feat(coding-agents): add LocalDockerProvider" +``` + +--- + +### Task 1.C — `StdioBridge` + +**Files:** + +- Create: `packages/coding-agents/src/bridge/stdio-bridge.ts` +- Create: `packages/coding-agents/test/unit/stdio-bridge.test.ts` + +**Constraints / claude CLI conventions (verified against `claude --help`):** + +- Required flags for streaming JSONL output: `--print --output-format=stream-json --verbose`. The `--verbose` flag is required when combining `--print` with `--output-format=stream-json`. +- `--input-format=stream-json` is for streaming JSON _input_; we just want to send a single prompt, so we either pipe the prompt on stdin (default text input) or pass it on argv. Pipe on stdin to mirror existing patterns. +- `--dangerously-skip-permissions` — required for non-interactive autonomous runs. +- `--model ` — pass `'claude-haiku-4-5-20251001'` for cheap test runs. +- Resume: `--resume ` — out of scope for MVP; bridge ignores `nativeSessionId` for now (logs a warning if set). + +**Event normalization:** + +- `agent-session-protocol` exports `normalize(lines: string[], agent: 'claude'): NormalizedEvent[]`. Use it on each accumulated batch — but we want to emit events per line. The library also ships line-level normalization functions; if they're not directly exposed, we batch internally and call `normalize(batch, 'claude')` on each new line and emit only the events we haven't emitted yet. +- Cleanest first-pass: collect all stdout lines into a buffer, call `normalize(buf, 'claude')` once at end, emit. Streaming-during-turn is a v2 optimization. The smoke test only asserts events are present, not real-time-ness, so batch-at-end is fine for MVP. + +- [ ] **Step 1: Write `src/bridge/stdio-bridge.ts`** + +```ts +import { normalize } from 'agent-session-protocol' +import type { NormalizedEvent } from 'agent-session-protocol' +import { log } from '../log' +import type { Bridge, RunTurnArgs, RunTurnResult } from '../types' + +export class StdioBridge implements Bridge { + async runTurn(args: RunTurnArgs): Promise { + if (args.kind !== 'claude') { + throw new Error( + `StdioBridge MVP supports only 'claude', got '${args.kind}'` + ) + } + if (args.nativeSessionId) { + log.warn( + { nativeSessionId: args.nativeSessionId }, + 'StdioBridge MVP does not implement resume — running fresh turn' + ) + } + + const cliArgs: Array = [ + '--print', + '--output-format=stream-json', + '--verbose', + '--dangerously-skip-permissions', + ] + if (args.model) cliArgs.push('--model', args.model) + + const handle = await args.sandbox.exec({ + cmd: ['claude', ...cliArgs], + cwd: args.sandbox.workspaceMount, + stdin: 'pipe', + }) + + // Pipe prompt on stdin, then close. + if (!handle.writeStdin || !handle.closeStdin) { + throw new Error( + 'StdioBridge requires stdin pipe but ExecHandle lacks one' + ) + } + await handle.writeStdin(args.prompt) + await handle.closeStdin() + + const rawLines: Array = [] + const stderrLines: Array = [] + + const drainStderr = async () => { + for await (const line of handle.stderr) { + stderrLines.push(line) + } + } + const drainStdout = async () => { + for await (const line of handle.stdout) { + if (!line) continue + rawLines.push(line) + if (args.onNativeLine) args.onNativeLine(line) + } + } + + await Promise.all([drainStdout(), drainStderr()]) + const exitInfo = await handle.wait() + + if (exitInfo.exitCode !== 0) { + const stderrPreview = stderrLines.join('\n').slice(0, 800) || '' + throw new Error( + `claude CLI exited ${exitInfo.exitCode}. stderr=${stderrPreview}` + ) + } + + let events: Array = [] + try { + events = normalize(rawLines, 'claude') + } catch (err) { + log.error({ err, sample: rawLines.slice(0, 3) }, 'normalize failed') + throw err + } + + for (const e of events) args.onEvent(e) + + const sessionInit = events.find((e) => e.type === 'session_init') + const lastAssistant = [...events] + .reverse() + .find((e) => e.type === 'assistant_message') + + return { + nativeSessionId: + sessionInit && 'sessionId' in sessionInit + ? (sessionInit as { sessionId?: string }).sessionId + : undefined, + exitCode: exitInfo.exitCode, + finalText: + lastAssistant && 'text' in lastAssistant + ? (lastAssistant as { text?: string }).text + : undefined, + } + } +} +``` + +- [ ] **Step 2: Write `test/unit/stdio-bridge.test.ts`** + +```ts +import { describe, expect, it } from 'vitest' +import { StdioBridge } from '../../src/bridge/stdio-bridge' +import type { ExecHandle, ExecRequest, SandboxInstance } from '../../src/types' + +function fakeSandbox(opts: { + stdoutLines: Array + stderrLines?: Array + exitCode?: number + onCmd?: (cmd: ReadonlyArray) => void + onStdin?: (chunk: string) => void +}): SandboxInstance { + return { + instanceId: 'fake', + agentId: '/x/coding-agent/y', + workspaceMount: '/workspace', + async exec(req: ExecRequest): Promise { + opts.onCmd?.(req.cmd) + const stdoutLines = opts.stdoutLines.slice() + const stderrLines = (opts.stderrLines ?? []).slice() + let stdinBuf = '' + return { + stdout: (async function* () { + for (const l of stdoutLines) yield l + })(), + stderr: (async function* () { + for (const l of stderrLines) yield l + })(), + writeStdin: async (chunk) => { + stdinBuf += chunk + opts.onStdin?.(chunk) + }, + closeStdin: async () => undefined, + wait: async () => ({ exitCode: opts.exitCode ?? 0 }), + kill: () => undefined, + } + }, + } +} + +describe('StdioBridge', () => { + it('rejects non-claude kinds', async () => { + const b = new StdioBridge() + await expect( + b.runTurn({ + sandbox: fakeSandbox({ stdoutLines: [] }), + kind: 'codex' as 'claude', + prompt: 'x', + onEvent: () => undefined, + }) + ).rejects.toThrow(/MVP supports only 'claude'/) + }) + + it('passes the prompt through stdin and runs the right CLI args', async () => { + let cmd: ReadonlyArray = [] + let stdin = '' + const b = new StdioBridge() + await b.runTurn({ + sandbox: fakeSandbox({ + stdoutLines: ['{"type":"system","subtype":"init","session_id":"abc"}'], + onCmd: (c) => (cmd = c), + onStdin: (s) => (stdin = s), + }), + kind: 'claude', + prompt: 'hello world', + model: 'claude-haiku-4-5-20251001', + onEvent: () => undefined, + }) + expect(cmd[0]).toBe('claude') + expect(cmd).toContain('--print') + expect(cmd).toContain('--output-format=stream-json') + expect(cmd).toContain('--verbose') + expect(cmd).toContain('--dangerously-skip-permissions') + expect(cmd).toContain('--model') + expect(cmd).toContain('claude-haiku-4-5-20251001') + expect(stdin).toBe('hello world') + }) + + it('throws with stderr when CLI exits non-zero', async () => { + const b = new StdioBridge() + await expect( + b.runTurn({ + sandbox: fakeSandbox({ + stdoutLines: [], + stderrLines: ['fatal: bad thing'], + exitCode: 1, + }), + kind: 'claude', + prompt: 'x', + onEvent: () => undefined, + }) + ).rejects.toThrow(/claude CLI exited 1.*fatal: bad thing/) + }) +}) +``` + +(Note: the test that depends on real `agent-session-protocol` normalization of synthetic JSONL is omitted — the integration smoke test in Phase 2 covers that path with real CLI output.) + +- [ ] **Step 3: Run `pnpm -C packages/coding-agents test test/unit/stdio-bridge.test.ts`** + +Expect: PASS. + +- [ ] **Step 4: Commit** + +``` +git add packages/coding-agents/src/bridge packages/coding-agents/test/unit/stdio-bridge.test.ts +git commit -m "feat(coding-agents): add StdioBridge" +``` + +--- + +## Phase 2 — Integration smoke (sequential) + +### Task 2.1 — End-to-end smoke test + +**Files:** + +- Create: `packages/coding-agents/test/support/env.ts` +- Create: `packages/coding-agents/test/integration/smoke.test.ts` + +**Validation goal:** + +1. Build the test image. +2. `LocalDockerProvider.start()` a sandbox with a per-test volume and `ANTHROPIC_API_KEY` from the env file. +3. `StdioBridge.runTurn()` runs `claude --print` inside, with prompt `"Reply with the single word: ok"`. +4. Assert: at least one `session_init` event and at least one `assistant_message` event were captured. +5. Cleanup: `provider.destroy(agentId)` removes the container. + +- [ ] **Step 1: Write `test/support/env.ts`** + +```ts +import { readFileSync } from 'node:fs' + +const KEY_FILE = '/tmp/.electric-coding-agents-env' + +export interface TestEnv { + ANTHROPIC_API_KEY: string + ANTHROPIC_MODEL: string +} + +let cached: TestEnv | null = null + +export function loadTestEnv(): TestEnv { + if (cached) return cached + let raw: string + try { + raw = readFileSync(KEY_FILE, 'utf-8') + } catch (e) { + throw new Error( + `Integration tests require ${KEY_FILE} (mode 600) with ANTHROPIC_API_KEY=… and ANTHROPIC_MODEL=…` + ) + } + const out: Partial = {} + for (const line of raw.split('\n')) { + const trimmed = line.trim() + if (!trimmed || trimmed.startsWith('#')) continue + const eq = trimmed.indexOf('=') + if (eq < 0) continue + const k = trimmed.slice(0, eq) + const v = trimmed.slice(eq + 1) + if (k === 'ANTHROPIC_API_KEY' || k === 'ANTHROPIC_MODEL') out[k] = v + } + if (!out.ANTHROPIC_API_KEY) { + throw new Error(`${KEY_FILE} must contain ANTHROPIC_API_KEY=…`) + } + cached = { + ANTHROPIC_API_KEY: out.ANTHROPIC_API_KEY, + ANTHROPIC_MODEL: out.ANTHROPIC_MODEL ?? 'claude-haiku-4-5-20251001', + } + return cached +} +``` + +- [ ] **Step 2: Write `test/integration/smoke.test.ts`** + +```ts +import { describe, expect, beforeAll, afterAll, it } from 'vitest' +import type { NormalizedEvent } from 'agent-session-protocol' +import { LocalDockerProvider } from '../../src/providers/local-docker' +import { StdioBridge } from '../../src/bridge/stdio-bridge' +import { buildTestImage, TEST_IMAGE_TAG } from '../support/build-image' +import { loadTestEnv } from '../support/env' + +const SHOULD_RUN = process.env.DOCKER === '1' +const describeMaybe = SHOULD_RUN ? describe : describe.skip + +describeMaybe('coding-agents smoke (real Docker + real Claude)', () => { + const provider = new LocalDockerProvider({ image: TEST_IMAGE_TAG }) + const bridge = new StdioBridge() + const agentId = `/test/coding-agent/${Date.now().toString(36)}` + const events: Array = [] + + beforeAll(async () => { + await buildTestImage() + }, 600_000) + + afterAll(async () => { + await provider.destroy(agentId).catch(() => undefined) + }) + + it('starts a sandbox, runs claude, captures session_init + assistant_message', async () => { + const env = loadTestEnv() + const sandbox = await provider.start({ + agentId, + kind: 'claude', + workspace: { type: 'volume', name: agentId.replace(/[^a-z0-9-]/gi, '-') }, + env: { ANTHROPIC_API_KEY: env.ANTHROPIC_API_KEY }, + }) + + const result = await bridge.runTurn({ + sandbox, + kind: 'claude', + prompt: 'Reply with the single word: ok', + model: env.ANTHROPIC_MODEL, + onEvent: (e) => events.push(e), + }) + + expect(result.exitCode).toBe(0) + expect(events.find((e) => e.type === 'session_init')).toBeTruthy() + expect(events.find((e) => e.type === 'assistant_message')).toBeTruthy() + // sanity: response text isn't empty + expect(result.finalText && result.finalText.length > 0).toBe(true) + }, 180_000) +}) +``` + +- [ ] **Step 3: Run the smoke test** + +``` +DOCKER=1 pnpm -C packages/coding-agents test:integration +``` + +Expect: PASS within ~3 minutes (image build + claude invocation). + +If it fails, **iterate** (Phase 3): inspect output, adjust the bridge / dockerfile / provider, re-run. Maximum 5 iterations before declaring blocked and writing the report. + +- [ ] **Step 4: Commit** + +``` +git add packages/coding-agents/test/support/env.ts packages/coding-agents/test/integration +git commit -m "test(coding-agents): integration smoke against real Docker + Claude" +``` + +--- + +## Phase 3 — Iteration (when smoke fails) + +For each failure, follow this protocol (max 5 cycles): + +1. Capture full failure output. +2. Hypothesize 1-3 likely causes (e.g., wrong claude flags, missing env, container exits early). +3. Pick the highest-likelihood fix; apply it. +4. Re-run smoke. +5. If still failing, document in the report (Phase 4) and try the next hypothesis. + +Common failure modes to anticipate: + +- **`claude: not found`** → image install path issue. Check `which claude` inside the container; ensure the npm global bin is in PATH. +- **`ANTHROPIC_API_KEY not set`** → env not piped through `docker exec -e`. Verify `LocalDockerProvider.execInContainer` is forwarding the env. +- **`--verbose required with --output-format=stream-json`** → already accounted for, but if claude version drifts the message may differ. +- **Empty stdout** → Claude may be writing JSON only when it has the API key valid. Check stderr. +- **`normalize` throws** → a line is not valid JSON. Filter empty/non-JSON lines before passing. +- **Container exits before exec lands** → `tini` + `tail -f /dev/null` should keep it alive. Add `docker logs ` debug. +- **Permission errors on volume** → ensure `chown agent:agent /workspace` in Dockerfile. + +After a passing run, even if some flakiness was observed, treat first green as success and proceed to Phase 4. + +If 5 cycles pass without green, **stop** and write the report describing the blocker. + +--- + +## Phase 4 — Report + +### Task 4.1 — Write report + +**File:** `docs/superpowers/specs/notes/2026-04-30-coding-agents-mvp-report.md` + +- [ ] **Step 1: Write report markdown** + +Include: + +- Goal & validation bar. +- What worked: tasks/phases that landed cleanly on first try. +- What broke: each bug, hypothesis, fix attempt, outcome. +- Token usage / time on wall clock if observable. +- Open questions for the next iteration. +- Recommended next steps to extend the MVP toward the full spec. + +- [ ] **Step 2: Commit** + +``` +git add docs/superpowers/specs/notes/2026-04-30-coding-agents-mvp-report.md +git commit -m "docs(coding-agents): MVP run report" +``` + +--- + +## Self-review checklist (post-write) + +- [x] **Spec coverage:** Plan covers a subset of the full spec — explicitly scoped down to "claude in docker via Provider + Bridge". The full spec sections this MVP defers to follow-on plans: + - LifecycleManager, workspace registry / lease, runtime API surface, built-in entity, UI updates, codex support, resume flow, conformance suite, removal of `coder` entity. All listed under "Spec scope cuts". +- [x] **Placeholder scan:** No TBDs / TODOs / "appropriate handling" in the steps. +- [x] **Type consistency:** `RunTurnArgs.kind`, `RunTurnArgs.model`, `RunTurnArgs.onEvent`, `RunTurnArgs.onNativeLine` consistent across `types.ts`, `stdio-bridge.ts`, and the smoke test. +- [x] **Approval:** Pre-approved per user instruction ("approve everything"). Proceeding to dispatch. From 6a334900a9ef2492071aaa6218184047a8a2c857 Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 01:24:05 +0100 Subject: [PATCH 003/279] feat(coding-agents): scaffold @electric-ax/coding-agents package --- packages/coding-agents/.gitignore | 4 ++ packages/coding-agents/package.json | 54 +++++++++++++++++++++++++ packages/coding-agents/src/index.ts | 1 + packages/coding-agents/tsconfig.json | 21 ++++++++++ packages/coding-agents/tsdown.config.ts | 10 +++++ packages/coding-agents/vitest.config.ts | 9 +++++ 6 files changed, 99 insertions(+) create mode 100644 packages/coding-agents/.gitignore create mode 100644 packages/coding-agents/package.json create mode 100644 packages/coding-agents/src/index.ts create mode 100644 packages/coding-agents/tsconfig.json create mode 100644 packages/coding-agents/tsdown.config.ts create mode 100644 packages/coding-agents/vitest.config.ts diff --git a/packages/coding-agents/.gitignore b/packages/coding-agents/.gitignore new file mode 100644 index 0000000000..8b25f88395 --- /dev/null +++ b/packages/coding-agents/.gitignore @@ -0,0 +1,4 @@ +dist +node_modules +.vitest-temp +coverage diff --git a/packages/coding-agents/package.json b/packages/coding-agents/package.json new file mode 100644 index 0000000000..0adc00d5e0 --- /dev/null +++ b/packages/coding-agents/package.json @@ -0,0 +1,54 @@ +{ + "name": "@electric-ax/coding-agents", + "version": "0.0.1", + "description": "Sandbox + bridge layer for spawning coding agents (Claude Code, Codex) under Electric Agents.", + "repository": { + "type": "git", + "url": "git+https://github.com/electric-sql/electric.git", + "directory": "packages/coding-agents" + }, + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "types": "./dist/index.d.ts", + "scripts": { + "build": "tsdown", + "dev": "tsdown --watch", + "test": "vitest run", + "test:watch": "vitest", + "test:integration": "DOCKER=1 vitest run test/integration", + "typecheck": "tsc --noEmit", + "stylecheck": "eslint . --quiet" + }, + "exports": { + ".": { + "import": { + "types": "./dist/index.d.ts", + "default": "./dist/index.js" + }, + "require": { + "types": "./dist/index.d.cts", + "default": "./dist/index.cjs" + } + }, + "./package.json": "./package.json" + }, + "dependencies": { + "agent-session-protocol": "^0.0.2", + "pino": "^10.3.1", + "pino-pretty": "^13.0.0", + "zod": "^4.3.6" + }, + "devDependencies": { + "@types/node": "^22.19.15", + "tsdown": "^0.9.0", + "typescript": "^5.7.0", + "vitest": "^3.2.4" + }, + "files": [ + "dist", + "docker" + ], + "sideEffects": false, + "license": "Apache-2.0" +} diff --git a/packages/coding-agents/src/index.ts b/packages/coding-agents/src/index.ts new file mode 100644 index 0000000000..336ce12bb9 --- /dev/null +++ b/packages/coding-agents/src/index.ts @@ -0,0 +1 @@ +export {} diff --git a/packages/coding-agents/tsconfig.json b/packages/coding-agents/tsconfig.json new file mode 100644 index 0000000000..93400c1a05 --- /dev/null +++ b/packages/coding-agents/tsconfig.json @@ -0,0 +1,21 @@ +{ + "compilerOptions": { + "isolatedDeclarations": false, + "moduleResolution": "Bundler", + "module": "ESNext", + "target": "ES2022", + "lib": ["ESNext", "DOM"], + "allowJs": true, + "skipLibCheck": true, + "noEmit": true, + "strict": true, + "forceConsistentCasingInFileNames": true, + "esModuleInterop": true, + "baseUrl": ".", + "outDir": "./dist", + "rootDir": "./src", + "types": ["node", "vitest/globals"] + }, + "include": ["src/**/*", "test/**/*"], + "exclude": ["dist", "node_modules"] +} diff --git a/packages/coding-agents/tsdown.config.ts b/packages/coding-agents/tsdown.config.ts new file mode 100644 index 0000000000..80af2cffe0 --- /dev/null +++ b/packages/coding-agents/tsdown.config.ts @@ -0,0 +1,10 @@ +import { defineConfig } from 'tsdown' + +export default defineConfig({ + entry: [`./src/index.ts`], + outDir: `dist`, + format: [`esm`, `cjs`], + dts: true, + clean: true, + sourcemap: true, +}) diff --git a/packages/coding-agents/vitest.config.ts b/packages/coding-agents/vitest.config.ts new file mode 100644 index 0000000000..714b528421 --- /dev/null +++ b/packages/coding-agents/vitest.config.ts @@ -0,0 +1,9 @@ +import { defineConfig } from 'vitest/config' + +export default defineConfig({ + test: { + globals: true, + environment: `node`, + testTimeout: 120_000, // integration tests build images, can be slow + }, +}) From 0c9d3cf2fc514a5b42181a0ae328166aab163fab Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 01:24:42 +0100 Subject: [PATCH 004/279] feat(coding-agents): define core types --- packages/coding-agents/src/index.ts | 15 ++++- packages/coding-agents/src/log.ts | 14 +++++ packages/coding-agents/src/types.ts | 86 +++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 packages/coding-agents/src/log.ts create mode 100644 packages/coding-agents/src/types.ts diff --git a/packages/coding-agents/src/index.ts b/packages/coding-agents/src/index.ts index 336ce12bb9..dd1063c3dc 100644 --- a/packages/coding-agents/src/index.ts +++ b/packages/coding-agents/src/index.ts @@ -1 +1,14 @@ -export {} +export type { + CodingAgentKind, + SandboxSpec, + ExecRequest, + ExecHandle, + SandboxInstance, + SandboxProvider, + RecoveredSandbox, + RunTurnArgs, + RunTurnResult, + Bridge, +} from './types' +// export { LocalDockerProvider } from './providers/local-docker' +// export { StdioBridge } from './bridge/stdio-bridge' diff --git a/packages/coding-agents/src/log.ts b/packages/coding-agents/src/log.ts new file mode 100644 index 0000000000..5eb9a0cc9f --- /dev/null +++ b/packages/coding-agents/src/log.ts @@ -0,0 +1,14 @@ +import pino from 'pino' + +export const log = pino({ + name: `coding-agents`, + level: process.env.LOG_LEVEL ?? `info`, + ...(process.env.NODE_ENV !== `production` + ? { + transport: { + target: `pino-pretty`, + options: { colorize: true, translateTime: `HH:MM:ss.l` }, + }, + } + : {}), +}) diff --git a/packages/coding-agents/src/types.ts b/packages/coding-agents/src/types.ts new file mode 100644 index 0000000000..b8f55f2d42 --- /dev/null +++ b/packages/coding-agents/src/types.ts @@ -0,0 +1,86 @@ +import type { NormalizedEvent } from 'agent-session-protocol' + +export type CodingAgentKind = `claude` | `codex` + +// ─── Sandbox provider ────────────────────────────────────────────────────── + +export interface SandboxSpec { + /** Stable agent identity (e.g. //coding-agent/). */ + agentId: string + kind: CodingAgentKind + workspace: + | { type: `volume`; name: string } + | { type: `bindMount`; hostPath: string } + /** Env vars exposed inside the sandbox (ANTHROPIC_API_KEY, etc.). */ + env: Record +} + +export interface ExecRequest { + cmd: string[] + cwd?: string + env?: Record + stdin?: `pipe` | `ignore` +} + +export interface ExecHandle { + /** Async iterables of stdout/stderr lines (UTF-8, newline-stripped). */ + stdout: AsyncIterable + stderr: AsyncIterable + /** Available iff request.stdin === 'pipe'. */ + writeStdin?: (chunk: string) => Promise + closeStdin?: () => Promise + wait(): Promise<{ exitCode: number }> + kill(signal?: NodeJS.Signals): void +} + +export interface SandboxInstance { + instanceId: string + agentId: string + /** Path inside sandbox where the workspace volume / bind-mount is mounted. */ + workspaceMount: string + exec(args: ExecRequest): Promise +} + +export interface RecoveredSandbox { + agentId: string + instanceId: string + status: `running` | `stopped` +} + +export interface SandboxProvider { + readonly name: string + start(spec: SandboxSpec): Promise + stop(instanceId: string): Promise + destroy(agentId: string): Promise + status(agentId: string): Promise<`running` | `stopped` | `unknown`> + /** Discover sandboxes adopted across host restarts. MVP: may return []. */ + recover(): Promise> +} + +// ─── Bridge ──────────────────────────────────────────────────────────────── + +export interface RunTurnArgs { + sandbox: SandboxInstance + kind: CodingAgentKind + /** Resume id; undefined for first turn. */ + nativeSessionId?: string + prompt: string + /** Model to pass to the CLI (e.g. 'claude-haiku-4-5-20251001'). */ + model?: string + /** Sink for normalized events as parsed off CLI stdout. */ + onEvent: (e: NormalizedEvent) => void + /** Sink for raw native JSONL lines (tee'd to a sidecar collection). */ + onNativeLine?: (line: string) => void +} + +export interface RunTurnResult { + /** Discovered or provided session id. */ + nativeSessionId?: string + exitCode: number + /** First assistant_message text (for parent's wake payload). */ + finalText?: string +} + +export interface Bridge { + runTurn(args: RunTurnArgs): Promise +} From 4af98f3b5ca2074efebb1d1c99be918d33ea4155 Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 01:27:31 +0100 Subject: [PATCH 005/279] feat(coding-agents): add LocalDockerProvider Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/providers/local-docker.ts | 243 ++++++++++++++++++ .../test/unit/local-docker.test.ts | 9 + 2 files changed, 252 insertions(+) create mode 100644 packages/coding-agents/src/providers/local-docker.ts create mode 100644 packages/coding-agents/test/unit/local-docker.test.ts diff --git a/packages/coding-agents/src/providers/local-docker.ts b/packages/coding-agents/src/providers/local-docker.ts new file mode 100644 index 0000000000..8a9f1f9f99 --- /dev/null +++ b/packages/coding-agents/src/providers/local-docker.ts @@ -0,0 +1,243 @@ +import { spawn } from 'node:child_process' +import { realpath } from 'node:fs/promises' +import { createInterface } from 'node:readline' +import type { Readable, Writable } from 'node:stream' +import { log } from '../log' +import type { + ExecHandle, + ExecRequest, + RecoveredSandbox, + SandboxInstance, + SandboxProvider, + SandboxSpec, +} from '../types' + +const IMAGE = + process.env.CODING_AGENT_IMAGE ?? `electric-ax/coding-agent-sandbox:test` + +export interface LocalDockerProviderOptions { + /** Override the image tag (default: env CODING_AGENT_IMAGE or test image). */ + image?: string +} + +export class LocalDockerProvider implements SandboxProvider { + readonly name = `local-docker` + private readonly image: string + + constructor(opts: LocalDockerProviderOptions = {}) { + this.image = opts.image ?? IMAGE + } + + async start(spec: SandboxSpec): Promise { + const existing = await this.findContainerByAgentId(spec.agentId) + if (existing && existing.running) { + log.debug( + { agentId: spec.agentId, instanceId: existing.id }, + `attaching to existing sandbox` + ) + return this.makeInstance(existing.id, spec) + } + if (existing && !existing.running) { + // Stale stopped container with same agentId. Remove it first. + await runDocker([`rm`, `-f`, existing.id]) + } + + const labels = [ + `electric-ax.agent-id=${spec.agentId}`, + `electric-ax.kind=${spec.kind}`, + `electric-ax.workspace-name=${ + spec.workspace.type === `volume` ? spec.workspace.name : `bind-mount` + }`, + ] + + const mount = await this.mountFlag(spec) + + const args = [ + `run`, + `-d`, + `--rm=false`, + ...labels.flatMap((l) => [`--label`, l]), + mount, + this.image, + ] + + const { stdout } = await runDocker(args) + const instanceId = stdout.trim() + log.info({ agentId: spec.agentId, instanceId }, `started sandbox`) + return this.makeInstance(instanceId, spec) + } + + async stop(instanceId: string): Promise { + await runDocker([`stop`, `-t`, `5`, instanceId]).catch((err) => { + log.warn( + { err, instanceId }, + `docker stop failed (probably already stopped)` + ) + }) + await runDocker([`rm`, `-f`, instanceId]).catch(() => undefined) + } + + async destroy(agentId: string): Promise { + const c = await this.findContainerByAgentId(agentId) + if (c) await this.stop(c.id) + // Volume cleanup is intentionally NOT done in MVP — tests clean up explicitly. + } + + async status(agentId: string): Promise<`running` | `stopped` | `unknown`> { + const c = await this.findContainerByAgentId(agentId) + if (!c) return `unknown` + return c.running ? `running` : `stopped` + } + + async recover(): Promise> { + const { stdout } = await runDocker([ + `ps`, + `-a`, + `--format`, + `{{.ID}}\t{{.Label "electric-ax.agent-id"}}\t{{.State}}`, + `--filter`, + `label=electric-ax.agent-id`, + ]) + return stdout + .trim() + .split(`\n`) + .filter(Boolean) + .map((line) => { + const [id, agentId, state] = line.split(`\t`) + return { + instanceId: id ?? ``, + agentId: agentId ?? ``, + status: state === `running` ? `running` : `stopped`, + } + }) + } + + // ── private helpers ── + + private async findContainerByAgentId( + agentId: string + ): Promise<{ id: string; running: boolean } | null> { + const { stdout } = await runDocker([ + `ps`, + `-a`, + `--format`, + `{{.ID}}\t{{.State}}`, + `--filter`, + `label=electric-ax.agent-id=${agentId}`, + ]) + const line = stdout + .trim() + .split(`\n`) + .find((l) => l.length > 0) + if (!line) return null + const [id, state] = line.split(`\t`) + return { id: id ?? ``, running: state === `running` } + } + + private async mountFlag(spec: SandboxSpec): Promise { + if (spec.workspace.type === `volume`) { + const volName = `coding-agent-workspace-${spec.workspace.name}` + // ensure the volume exists (docker auto-creates on first use, but explicit is friendlier) + await runDocker([`volume`, `create`, volName]).catch(() => undefined) + return `--mount=type=volume,source=${volName},target=/workspace` + } + const real = await realpath(spec.workspace.hostPath) + return `--mount=type=bind,source=${real},target=/workspace` + } + + private makeInstance(instanceId: string, spec: SandboxSpec): SandboxInstance { + return { + instanceId, + agentId: spec.agentId, + workspaceMount: `/workspace`, + exec: (args) => execInContainer(instanceId, args, spec.env), + } + } +} + +// ── docker CLI helpers ── + +async function runDocker( + args: ReadonlyArray +): Promise<{ stdout: string; stderr: string }> { + return new Promise((resolveCmd, rejectCmd) => { + const child = spawn(`docker`, args as Array, { + stdio: [`ignore`, `pipe`, `pipe`], + }) + let stdout = `` + let stderr = `` + child.stdout.on(`data`, (d) => (stdout += d.toString())) + child.stderr.on(`data`, (d) => (stderr += d.toString())) + child.on(`error`, rejectCmd) + child.on(`exit`, (code) => { + if (code === 0) resolveCmd({ stdout, stderr }) + else + rejectCmd( + new Error(`docker ${args.join(` `)} exited ${code}: ${stderr}`) + ) + }) + }) +} + +function lineIterator(stream: Readable): AsyncIterable { + const rl = createInterface({ input: stream, crlfDelay: Infinity }) + return rl as unknown as AsyncIterable +} + +async function execInContainer( + containerId: string, + req: ExecRequest, + baseEnv: Record +): Promise { + const env = { ...baseEnv, ...(req.env ?? {}) } + const args: Array = [`exec`, `-i`] + if (req.cwd) args.push(`-w`, req.cwd) + for (const [k, v] of Object.entries(env)) args.push(`-e`, `${k}=${v}`) + args.push(containerId, ...req.cmd) + + const child = spawn(`docker`, args, { + stdio: [req.stdin === `pipe` ? `pipe` : `ignore`, `pipe`, `pipe`], + }) + + let exitCode: number | null = null + const exitPromise = new Promise<{ exitCode: number }>( + (resolveWait, rejectWait) => { + child.on(`error`, rejectWait) + child.on(`exit`, (code) => { + exitCode = code ?? -1 + resolveWait({ exitCode }) + }) + } + ) + // touch exitCode to silence unused-var warnings if any + void exitCode + + const stdinStream = child.stdin as Writable | null + + return { + stdout: lineIterator(child.stdout!), + stderr: lineIterator(child.stderr!), + writeStdin: stdinStream + ? async (chunk) => { + await new Promise((res, rej) => { + stdinStream.write(chunk, (err) => (err ? rej(err) : res())) + }) + } + : undefined, + closeStdin: stdinStream + ? async () => { + await new Promise((res) => { + stdinStream.end(res) + }) + } + : undefined, + wait: () => exitPromise, + kill: (signal = `SIGTERM`) => { + try { + child.kill(signal) + } catch { + // already dead + } + }, + } +} diff --git a/packages/coding-agents/test/unit/local-docker.test.ts b/packages/coding-agents/test/unit/local-docker.test.ts new file mode 100644 index 0000000000..7661063c0c --- /dev/null +++ b/packages/coding-agents/test/unit/local-docker.test.ts @@ -0,0 +1,9 @@ +import { describe, it, expect } from 'vitest' +import { LocalDockerProvider } from '../../src/providers/local-docker' + +describe(`LocalDockerProvider construction`, () => { + it(`exposes name "local-docker"`, () => { + const p = new LocalDockerProvider() + expect(p.name).toBe(`local-docker`) + }) +}) From 0a1c660a820f60aa2bc8d32d49de77ee61266eb0 Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 01:28:23 +0100 Subject: [PATCH 006/279] feat(coding-agents): add StdioBridge --- .../coding-agents/src/bridge/stdio-bridge.ts | 96 +++++++++++++++++++ .../test/unit/stdio-bridge.test.ts | 91 ++++++++++++++++++ 2 files changed, 187 insertions(+) create mode 100644 packages/coding-agents/src/bridge/stdio-bridge.ts create mode 100644 packages/coding-agents/test/unit/stdio-bridge.test.ts diff --git a/packages/coding-agents/src/bridge/stdio-bridge.ts b/packages/coding-agents/src/bridge/stdio-bridge.ts new file mode 100644 index 0000000000..015eadeffc --- /dev/null +++ b/packages/coding-agents/src/bridge/stdio-bridge.ts @@ -0,0 +1,96 @@ +import { normalize } from 'agent-session-protocol' +import type { NormalizedEvent } from 'agent-session-protocol' +import { log } from '../log' +import type { Bridge, RunTurnArgs, RunTurnResult } from '../types' + +export class StdioBridge implements Bridge { + async runTurn(args: RunTurnArgs): Promise { + if (args.kind !== `claude`) { + throw new Error( + `StdioBridge MVP supports only 'claude', got '${args.kind}'` + ) + } + if (args.nativeSessionId) { + log.warn( + { nativeSessionId: args.nativeSessionId }, + `StdioBridge MVP does not implement resume — running fresh turn` + ) + } + + const cliArgs: Array = [ + `--print`, + `--output-format=stream-json`, + `--verbose`, + `--dangerously-skip-permissions`, + ] + if (args.model) cliArgs.push(`--model`, args.model) + + const handle = await args.sandbox.exec({ + cmd: [`claude`, ...cliArgs], + cwd: args.sandbox.workspaceMount, + stdin: `pipe`, + }) + + // Pipe prompt on stdin, then close. + if (!handle.writeStdin || !handle.closeStdin) { + throw new Error( + `StdioBridge requires stdin pipe but ExecHandle lacks one` + ) + } + await handle.writeStdin(args.prompt) + await handle.closeStdin() + + const rawLines: Array = [] + const stderrLines: Array = [] + + const drainStderr = async () => { + for await (const line of handle.stderr) { + stderrLines.push(line) + } + } + const drainStdout = async () => { + for await (const line of handle.stdout) { + if (!line) continue + rawLines.push(line) + if (args.onNativeLine) args.onNativeLine(line) + } + } + + await Promise.all([drainStdout(), drainStderr()]) + const exitInfo = await handle.wait() + + if (exitInfo.exitCode !== 0) { + const stderrPreview = stderrLines.join(`\n`).slice(0, 800) || `` + throw new Error( + `claude CLI exited ${exitInfo.exitCode}. stderr=${stderrPreview}` + ) + } + + let events: Array = [] + try { + events = normalize(rawLines, `claude`) + } catch (err) { + log.error({ err, sample: rawLines.slice(0, 3) }, `normalize failed`) + throw err + } + + for (const e of events) args.onEvent(e) + + const sessionInit = events.find((e) => e.type === `session_init`) + const lastAssistant = [...events] + .reverse() + .find((e) => e.type === `assistant_message`) + + return { + nativeSessionId: + sessionInit && `sessionId` in sessionInit + ? (sessionInit as { sessionId?: string }).sessionId + : undefined, + exitCode: exitInfo.exitCode, + finalText: + lastAssistant && `text` in lastAssistant + ? (lastAssistant as { text?: string }).text + : undefined, + } + } +} diff --git a/packages/coding-agents/test/unit/stdio-bridge.test.ts b/packages/coding-agents/test/unit/stdio-bridge.test.ts new file mode 100644 index 0000000000..6d31f768b0 --- /dev/null +++ b/packages/coding-agents/test/unit/stdio-bridge.test.ts @@ -0,0 +1,91 @@ +import { describe, expect, it } from 'vitest' +import { StdioBridge } from '../../src/bridge/stdio-bridge' +import type { ExecHandle, ExecRequest, SandboxInstance } from '../../src/types' + +function fakeSandbox(opts: { + stdoutLines: Array + stderrLines?: Array + exitCode?: number + onCmd?: (cmd: ReadonlyArray) => void + onStdin?: (chunk: string) => void +}): SandboxInstance { + return { + instanceId: `fake`, + agentId: `/x/coding-agent/y`, + workspaceMount: `/workspace`, + async exec(req: ExecRequest): Promise { + opts.onCmd?.(req.cmd) + const stdoutLines = opts.stdoutLines.slice() + const stderrLines = (opts.stderrLines ?? []).slice() + return { + stdout: (async function* () { + for (const l of stdoutLines) yield l + })(), + stderr: (async function* () { + for (const l of stderrLines) yield l + })(), + writeStdin: async (chunk) => { + opts.onStdin?.(chunk) + }, + closeStdin: async () => undefined, + wait: async () => ({ exitCode: opts.exitCode ?? 0 }), + kill: () => undefined, + } + }, + } +} + +describe(`StdioBridge`, () => { + it(`rejects non-claude kinds`, async () => { + const b = new StdioBridge() + await expect( + b.runTurn({ + sandbox: fakeSandbox({ stdoutLines: [] }), + kind: `codex` as `claude`, + prompt: `x`, + onEvent: () => undefined, + }) + ).rejects.toThrow(/MVP supports only 'claude'/) + }) + + it(`passes the prompt through stdin and runs the right CLI args`, async () => { + let cmd: ReadonlyArray = [] + let stdin = `` + const b = new StdioBridge() + await b.runTurn({ + sandbox: fakeSandbox({ + stdoutLines: [`{"type":"system","subtype":"init","session_id":"abc"}`], + onCmd: (c) => (cmd = c), + onStdin: (s) => (stdin = s), + }), + kind: `claude`, + prompt: `hello world`, + model: `claude-haiku-4-5-20251001`, + onEvent: () => undefined, + }) + expect(cmd[0]).toBe(`claude`) + expect(cmd).toContain(`--print`) + expect(cmd).toContain(`--output-format=stream-json`) + expect(cmd).toContain(`--verbose`) + expect(cmd).toContain(`--dangerously-skip-permissions`) + expect(cmd).toContain(`--model`) + expect(cmd).toContain(`claude-haiku-4-5-20251001`) + expect(stdin).toBe(`hello world`) + }) + + it(`throws with stderr when CLI exits non-zero`, async () => { + const b = new StdioBridge() + await expect( + b.runTurn({ + sandbox: fakeSandbox({ + stdoutLines: [], + stderrLines: [`fatal: bad thing`], + exitCode: 1, + }), + kind: `claude`, + prompt: `x`, + onEvent: () => undefined, + }) + ).rejects.toThrow(/claude CLI exited 1.*fatal: bad thing/) + }) +}) From 7d7a01fc0b2f65a7973cd4ca1720af2088ea854f Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 01:32:34 +0100 Subject: [PATCH 007/279] feat(coding-agents): add Dockerfile and image build helper --- packages/coding-agents/docker/Dockerfile | 33 +++++++++++++++++++ packages/coding-agents/docker/entrypoint.sh | 8 +++++ .../coding-agents/test/support/build-image.ts | 27 +++++++++++++++ 3 files changed, 68 insertions(+) create mode 100644 packages/coding-agents/docker/Dockerfile create mode 100755 packages/coding-agents/docker/entrypoint.sh create mode 100644 packages/coding-agents/test/support/build-image.ts diff --git a/packages/coding-agents/docker/Dockerfile b/packages/coding-agents/docker/Dockerfile new file mode 100644 index 0000000000..58ab1ce8a3 --- /dev/null +++ b/packages/coding-agents/docker/Dockerfile @@ -0,0 +1,33 @@ +FROM node:22-bookworm-slim + +# Install OS deps: git (claude needs it), curl (claude installer occasionally probes), bash, ca-certs. +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + git \ + bash \ + tini \ + && rm -rf /var/lib/apt/lists/* + +# Non-root user for the agent. Claude's home is needed for ~/.claude transcript dir. +# node:22-bookworm-slim ships with a pre-existing `node` user at UID 1000; remove it first. +RUN userdel -r node 2>/dev/null || true \ + && useradd -m -s /bin/bash -u 1000 agent + +# Install the Claude CLI globally. Pin a recent version to avoid drift; can bump later. +# (Use the floating tag for now; pin in v1.) +RUN npm install -g @anthropic-ai/claude-code@latest \ + && claude --version + +# Workspace mount point. The provider attaches a volume here. +RUN mkdir -p /workspace \ + && chown agent:agent /workspace + +USER agent +WORKDIR /workspace + +COPY --chown=agent:agent docker/entrypoint.sh /home/agent/entrypoint.sh +RUN chmod +x /home/agent/entrypoint.sh + +ENTRYPOINT ["/usr/bin/tini", "--", "/home/agent/entrypoint.sh"] diff --git a/packages/coding-agents/docker/entrypoint.sh b/packages/coding-agents/docker/entrypoint.sh new file mode 100755 index 0000000000..6acc10b323 --- /dev/null +++ b/packages/coding-agents/docker/entrypoint.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail +# If args are passed (e.g. `docker run image claude --version`), run them. +# Otherwise PID 1 just stays alive so docker exec can attach. +if [ "$#" -gt 0 ]; then + exec "$@" +fi +exec tail -f /dev/null diff --git a/packages/coding-agents/test/support/build-image.ts b/packages/coding-agents/test/support/build-image.ts new file mode 100644 index 0000000000..f4932258c2 --- /dev/null +++ b/packages/coding-agents/test/support/build-image.ts @@ -0,0 +1,27 @@ +import { spawn } from 'node:child_process' +import { dirname, resolve } from 'node:path' +import { fileURLToPath } from 'node:url' + +const here = dirname(fileURLToPath(import.meta.url)) +const PACKAGE_ROOT = resolve(here, `../..`) + +export const TEST_IMAGE_TAG = `electric-ax/coding-agent-sandbox:test` + +/** + * Build the test image. Idempotent: re-runs are cheap if Docker layer cache is warm. + * Throws on non-zero exit. + */ +export async function buildTestImage(): Promise { + await new Promise((resolveBuild, rejectBuild) => { + const child = spawn( + `docker`, + [`build`, `-t`, TEST_IMAGE_TAG, `-f`, `docker/Dockerfile`, `.`], + { cwd: PACKAGE_ROOT, stdio: `inherit` } + ) + child.on(`error`, rejectBuild) + child.on(`exit`, (code) => { + if (code === 0) resolveBuild() + else rejectBuild(new Error(`docker build exited ${code}`)) + }) + }) +} From 27ee432a28540d43e61f5c7827d9b8f7a532b589 Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 01:33:28 +0100 Subject: [PATCH 008/279] fix(coding-agents): drop tsconfig rootDir, wire up provider+bridge re-exports Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/coding-agents/src/index.ts | 4 ++-- packages/coding-agents/tsconfig.json | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/packages/coding-agents/src/index.ts b/packages/coding-agents/src/index.ts index dd1063c3dc..c1dd62b07a 100644 --- a/packages/coding-agents/src/index.ts +++ b/packages/coding-agents/src/index.ts @@ -10,5 +10,5 @@ export type { RunTurnResult, Bridge, } from './types' -// export { LocalDockerProvider } from './providers/local-docker' -// export { StdioBridge } from './bridge/stdio-bridge' +export { LocalDockerProvider } from './providers/local-docker' +export { StdioBridge } from './bridge/stdio-bridge' diff --git a/packages/coding-agents/tsconfig.json b/packages/coding-agents/tsconfig.json index 93400c1a05..bbe258cf06 100644 --- a/packages/coding-agents/tsconfig.json +++ b/packages/coding-agents/tsconfig.json @@ -13,7 +13,6 @@ "esModuleInterop": true, "baseUrl": ".", "outDir": "./dist", - "rootDir": "./src", "types": ["node", "vitest/globals"] }, "include": ["src/**/*", "test/**/*"], From b178f0e417261b8216e8cf5dae5f05cc48b24a05 Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 01:34:58 +0100 Subject: [PATCH 009/279] test(coding-agents): integration smoke against real Docker + Claude Co-Authored-By: Claude Opus 4.7 (1M context) --- .../test/integration/smoke.test.ts | 48 +++++++++++++++++++ packages/coding-agents/test/support/env.ts | 40 ++++++++++++++++ 2 files changed, 88 insertions(+) create mode 100644 packages/coding-agents/test/integration/smoke.test.ts create mode 100644 packages/coding-agents/test/support/env.ts diff --git a/packages/coding-agents/test/integration/smoke.test.ts b/packages/coding-agents/test/integration/smoke.test.ts new file mode 100644 index 0000000000..0b7dad8e63 --- /dev/null +++ b/packages/coding-agents/test/integration/smoke.test.ts @@ -0,0 +1,48 @@ +import { describe, expect, beforeAll, afterAll, it } from 'vitest' +import type { NormalizedEvent } from 'agent-session-protocol' +import { LocalDockerProvider } from '../../src/providers/local-docker' +import { StdioBridge } from '../../src/bridge/stdio-bridge' +import { buildTestImage, TEST_IMAGE_TAG } from '../support/build-image' +import { loadTestEnv } from '../support/env' + +const SHOULD_RUN = process.env.DOCKER === `1` +const describeMaybe = SHOULD_RUN ? describe : describe.skip + +describeMaybe(`coding-agents smoke (real Docker + real Claude)`, () => { + const provider = new LocalDockerProvider({ image: TEST_IMAGE_TAG }) + const bridge = new StdioBridge() + const agentId = `/test/coding-agent/${Date.now().toString(36)}` + const events: Array = [] + + beforeAll(async () => { + await buildTestImage() + }, 600_000) + + afterAll(async () => { + await provider.destroy(agentId).catch(() => undefined) + }) + + it(`starts a sandbox, runs claude, captures session_init + assistant_message`, async () => { + const env = loadTestEnv() + const sandbox = await provider.start({ + agentId, + kind: `claude`, + workspace: { type: `volume`, name: agentId.replace(/[^a-z0-9-]/gi, `-`) }, + env: { ANTHROPIC_API_KEY: env.ANTHROPIC_API_KEY }, + }) + + const result = await bridge.runTurn({ + sandbox, + kind: `claude`, + prompt: `Reply with the single word: ok`, + model: env.ANTHROPIC_MODEL, + onEvent: (e) => events.push(e), + }) + + expect(result.exitCode).toBe(0) + expect(events.find((e) => e.type === `session_init`)).toBeTruthy() + expect(events.find((e) => e.type === `assistant_message`)).toBeTruthy() + // sanity: response text isn't empty + expect(result.finalText && result.finalText.length > 0).toBe(true) + }, 180_000) +}) diff --git a/packages/coding-agents/test/support/env.ts b/packages/coding-agents/test/support/env.ts new file mode 100644 index 0000000000..6ef6903d8d --- /dev/null +++ b/packages/coding-agents/test/support/env.ts @@ -0,0 +1,40 @@ +import { readFileSync } from 'node:fs' + +const KEY_FILE = `/tmp/.electric-coding-agents-env` + +export interface TestEnv { + ANTHROPIC_API_KEY: string + ANTHROPIC_MODEL: string +} + +let cached: TestEnv | null = null + +export function loadTestEnv(): TestEnv { + if (cached) return cached + let raw: string + try { + raw = readFileSync(KEY_FILE, `utf-8`) + } catch { + throw new Error( + `Integration tests require ${KEY_FILE} (mode 600) with ANTHROPIC_API_KEY=… and ANTHROPIC_MODEL=…` + ) + } + const out: Partial = {} + for (const line of raw.split(`\n`)) { + const trimmed = line.trim() + if (!trimmed || trimmed.startsWith(`#`)) continue + const eq = trimmed.indexOf(`=`) + if (eq < 0) continue + const k = trimmed.slice(0, eq) + const v = trimmed.slice(eq + 1) + if (k === `ANTHROPIC_API_KEY` || k === `ANTHROPIC_MODEL`) out[k] = v + } + if (!out.ANTHROPIC_API_KEY) { + throw new Error(`${KEY_FILE} must contain ANTHROPIC_API_KEY=…`) + } + cached = { + ANTHROPIC_API_KEY: out.ANTHROPIC_API_KEY, + ANTHROPIC_MODEL: out.ANTHROPIC_MODEL ?? `claude-haiku-4-5-20251001`, + } + return cached +} From 0d9183c7300a70544499bf63fb3aef83d12bc48f Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 01:37:01 +0100 Subject: [PATCH 010/279] docs(coding-agents): MVP run report Co-Authored-By: Claude Opus 4.7 (1M context) --- .../2026-04-30-coding-agents-mvp-report.md | 128 ++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 docs/superpowers/specs/notes/2026-04-30-coding-agents-mvp-report.md diff --git a/docs/superpowers/specs/notes/2026-04-30-coding-agents-mvp-report.md b/docs/superpowers/specs/notes/2026-04-30-coding-agents-mvp-report.md new file mode 100644 index 0000000000..ff415524b0 --- /dev/null +++ b/docs/superpowers/specs/notes/2026-04-30-coding-agents-mvp-report.md @@ -0,0 +1,128 @@ +# Coding Agents MVP — Run Report + +**Date:** 2026-04-30 +**Plan:** `docs/superpowers/plans/2026-04-30-coding-agents-mvp.md` +**Spec:** `docs/superpowers/specs/2026-04-30-coding-agents-platform-primitive-design.md` +**Validation bar:** integration smoke test starts a Docker sandbox, runs `claude --print` inside it, asserts `session_init` + `assistant_message` events. +**Outcome:** ✅ Green on first integration-test run. No iteration cycle needed. + +## Result + +``` +✓ test/unit/local-docker.test.ts (1 test) 2 ms +✓ test/unit/stdio-bridge.test.ts (3 tests) 4 ms +✓ test/integration/smoke.test.ts (1 test) 3.05 s ← validation bar +``` + +Wall clock from "Phase 0 dispatched" to "smoke green": + +- Phase 0 (foundation, 1 agent): ~2 min +- Phase 1 (3 parallel agents): ~7.5 min (gated by Dockerfile + image build at 1.A) +- Consolidation (parent session): ~1 min (tsconfig fix + index re-exports) +- Phase 2 (smoke, 1 agent): ~1.5 min (test itself: 3.05 s; rest was setup) + +**Total:** ~12 minutes of agent wall-time for a working sandbox + bridge + smoke. + +API cost: ~$0.001 per smoke run on `claude-haiku-4-5-20251001`. + +## What worked first time + +- **The four-phase plan.** Phase 0 (sequential foundation) → Phase 1 (3 parallel independent components) → Phase 2 (single integration agent) mapped cleanly to the file structure. No agent had to wait on another within a phase. +- **Pre-grounding by reading existing patterns.** `packages/agents-runtime/`'s `package.json`, `tsconfig.json`, `tsdown.config.ts`, `vitest.config.ts` were the templates. Subagents copied those exactly. +- **`agent-session-protocol@0.0.2`'s `normalize(lines, 'claude')`.** No signature divergence vs. the plan's assumption. Parsed real `claude --print --output-format=stream-json` output cleanly without filtering. +- **Image build cached aggressively.** First build ~22 s no-cache; subsequent rebuilds ~0.7 s. Smoke test re-runs are essentially free locally. +- **The stdin-piped prompt + `--print --output-format=stream-json --verbose --dangerously-skip-permissions --model claude-haiku-4-5-20251001` flag set.** Worked verbatim. + +## What had to be fixed mid-flight + +### 1. `tsconfig.json` `rootDir` vs. `include: ["test/**/*"]` clash + +**Symptom:** Phase 1.B and Phase 1.C agents both reported `TS6059: File 'X' is not under 'rootDir'` when typechecking. The `tsconfig.json` (copied from `packages/agents-runtime/`) had `"rootDir": "./src"` while `"include"` matched `test/**/*`. + +**Why three agents independently flagged it but couldn't fix it:** the Phase 1 agents had explicit constraints to touch only their own files (no cross-cutting `tsconfig.json` edits) — to prevent merge conflicts on the parent commit. The agents correctly did the right thing locally (their tests passed) and surfaced the issue to the parent. + +**Fix:** Parent session removed `"rootDir"` (single line). Single consolidation commit (`27ee432a2`). + +**Lesson:** When dispatching parallel agents that all need TS to compile, the parent should fix obvious project-config issues _up front_ before dispatching. Or the plan should pre-empt with the right config. + +### 2. `useradd -u 1000` collided with `node:22-bookworm-slim`'s built-in `node` user (UID 1000) + +**Symptom:** First Dockerfile build attempt failed with `useradd: UID 1000 is not unique`. + +**Hypothesis:** The base image already provisions a non-root user. + +**Fix:** Phase 1.A agent added `userdel -r node 2>/dev/null || true` before the `useradd`. Build went green. + +**Lesson:** Plans that bake `useradd -u 1000` shouldn't assume the base image is empty. Either pick a UID like 1001 or do the userdel-then-useradd dance shown above. Prefer the latter — keeps the convention `agent` user. + +### 3. `entrypoint.sh` ignored `$@`, breaking `docker run image claude --version` + +**Symptom:** The plan's verbatim entrypoint (`exec tail -f /dev/null`) caused `docker run image claude --version` to hang on `tail` instead of executing `claude --version`. With `ENTRYPOINT` set, positional args become args to the entrypoint, not a replacement command. + +**Fix:** Phase 1.A agent made the entrypoint arg-aware — exec `$@` if any args were passed, fall back to `tail -f /dev/null` otherwise. Both `docker run image` (no-arg, idle PID 1) and `docker run image claude --version` (one-shot) now work. + +**Lesson:** When using `tini` + `tail` for a long-lived sandbox, the entrypoint must still respect `CMD`/positional args, otherwise smoke checks like `docker run IMAGE claude --version` won't work. + +## Other notes + +- **Lint-staged backtick conversion.** Repo's pre-commit hook converted all single-quoted strings to backticks via prettier/eslint. Subagents matched the existing style automatically once they read Phase 0's source. No semantic impact. +- **Async iterables for `stdout` / `stderr` worked smoothly.** `node:readline.createInterface(stream)` typed-as `AsyncIterable` and consumed via `for await`. No backpressure issues observed. +- **Volume permissions.** `chown agent:agent /workspace` + `USER agent` in the Dockerfile combined with Docker's volume-mount default ownership preserved write access. No permission errors observed. +- **`--include-partial-messages` not used in MVP.** With `claude --print` we get the full assistant message in one event at the end. For streaming UIs we'll add it later. Not needed for the validation bar. + +## What's NOT done (vs. the full design spec) + +The MVP intentionally cut these — listed here so the next plan can pick up: + +1. **Codex support.** Bridge currently rejects `kind: 'codex'`. Spec needs codex CLI bundled into the image and a parallel arg-set in the bridge. +2. **`LifecycleManager`** — idle hibernation, `pin`/`release` reference counting, state machine, crash recovery via container labels. +3. **Workspace registry + lease.** Per-workspace mutex; refcount on shareable volumes; bind-mount realpath canonicalization. Without this, two agents on the same volume can race. +4. **Resume.** `nativeSessionId` is currently logged-and-ignored. Needs `--resume ` plumbing + sidecar JSONL collection write/read for cold-boot restore. +5. **`ctx.spawnCodingAgent` / `ctx.observeCodingAgent`.** No runtime API surface. Today only the Provider + Bridge are usable directly. +6. **Built-in `coding-agent` entity.** No entity registration, no `runs` / `events` / `nativeJsonl` / `lifecycle` collections, no inbox-driven prompt queueing. +7. **UI updates.** Status enum extension, header sandbox provenance row, pin/release/stop buttons, lifecycle event rendering, shared-workspace indicator. +8. **Tools.** `spawn_coding_agent` / `prompt_coding_agent` for Horton. +9. **Removal of legacy `coder` entity.** `packages/agents/src/agents/coding-session.ts`, `spawn-coder.ts`, `prompt-coder.ts` still in place. +10. **Conformance suite + cross-kind resume tests.** +11. **Crash recovery flow.** `provider.recover()` returns labeled containers correctly, but no orphan-run detection / `runs.status=failed` transition exists yet. + +## Recommended next steps (priority order) + +1. Add `LifecycleManager` + workspace lease (small, unlocks correct multi-agent behavior). +2. Add `ctx.spawnCodingAgent` API surface + built-in `coding-agent` entity (medium; integration with `agents-server` lifecycle). +3. Add resume (`--resume`, sidecar collection, denormalize on cold boot). +4. Replace legacy `coder` + update Horton's tools. +5. UI extensions. +6. Codex support (CLI bundling + bridge arg path). +7. Conformance suite for the parameterized `SandboxProvider` interface (sets up future Modal/Fly impls). + +## Artifacts + +Commits on `main` (in order): + +1. `6a334900a` — scaffold `@electric-ax/coding-agents` package +2. `0c9d3cf2f` — define core types +3. `7d7a01fc0` — Dockerfile + image build helper +4. `4af98f3b5` — `LocalDockerProvider` +5. `0a1c660a8` — `StdioBridge` +6. `27ee432a2` — fix tsconfig + wire re-exports +7. `b178f0e41` — integration smoke against real Docker + Claude + +Image: `electric-ax/coding-agent-sandbox:test` (loaded locally; not pushed). + +API key: stored at `/tmp/.electric-coding-agents-env` (mode 600, outside repo). + +## How to re-run + +```bash +# Rebuild image (cached if no Dockerfile changes) +docker build -t electric-ax/coding-agent-sandbox:test \ + -f packages/coding-agents/docker/Dockerfile \ + packages/coding-agents + +# Run all unit tests (no Docker required) +pnpm -C packages/coding-agents test + +# Run the smoke test (needs Docker + /tmp/.electric-coding-agents-env) +DOCKER=1 pnpm -C packages/coding-agents test:integration +``` From 4a9a7e58b27320f23beb364388a2b5458c3c1e91 Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 07:14:26 +0100 Subject: [PATCH 011/279] docs(specs): add Slice A design for coding-agents runtime API + entity Specs the next iteration after the MVP Provider+Bridge: built-in coding-agent entity, LifecycleManager, WorkspaceRegistry, and the typed ctx.spawnCodingAgent / ctx.observeCodingAgent on HandlerContext. Coexists with the legacy `coder` entity; removal is Slice B. Co-Authored-By: Claude Opus 4.7 (1M context) --- ...2026-04-30-coding-agents-slice-a-design.md | 807 ++++++++++++++++++ 1 file changed, 807 insertions(+) create mode 100644 docs/superpowers/specs/2026-04-30-coding-agents-slice-a-design.md diff --git a/docs/superpowers/specs/2026-04-30-coding-agents-slice-a-design.md b/docs/superpowers/specs/2026-04-30-coding-agents-slice-a-design.md new file mode 100644 index 0000000000..f47230f2df --- /dev/null +++ b/docs/superpowers/specs/2026-04-30-coding-agents-slice-a-design.md @@ -0,0 +1,807 @@ +# Coding Agents — Slice A: Runtime API + Built-in Entity + Lifecycle + +**Status:** Draft +**Date:** 2026-04-30 +**Author:** Valter Balegas +**Parent spec:** `docs/superpowers/specs/2026-04-30-coding-agents-platform-primitive-design.md` +**Predecessor:** `docs/superpowers/specs/notes/2026-04-30-coding-agents-mvp-report.md` (the Provider + Bridge MVP) + +## Summary + +Slice A is the second iteration of the coding-agents platform primitive. The MVP shipped a `LocalDockerProvider` and a `StdioBridge` in `@electric-ax/coding-agents`. Slice A wires those into a first-class runtime primitive: a built-in `coding-agent` entity, a `LifecycleManager` that runs the state machine, a `WorkspaceRegistry` that serializes shared volumes, and the typed `ctx.spawnCodingAgent` / `ctx.observeCodingAgent` API on `HandlerContext`. + +After Slice A, an entity author can write `await ctx.spawnCodingAgent({ kind: 'claude', workspace: { type: 'volume' }, initialPrompt: 'fix the bug' })`, await a `runFinished` wake on the parent with the response text, and exercise pin/release/stop/destroy lifecycle controls — all backed by a Docker sandbox with proper crash recovery. + +The legacy `coder` entity (`packages/agents/src/agents/coding-session.ts`) is **not** removed in Slice A; it coexists under a different entity type name and disjoint collection-type wires. Removal is Slice B. + +## Goals + +1. **Typed primitive on `ctx`.** `ctx.spawnCodingAgent({ ... })` returns a `CodingAgentHandle`. Mirrors the existing `ctx.useCodingAgent` pattern (typed wrapper over `ctx.spawn(, ...)`). +2. **Built-in entity.** A `coding-agent` entity type registered at server bootstrap, with `sessionMeta` / `runs` / `events` / `lifecycle` collections. Authors cannot `defineEntity('coding-agent', …)`. +3. **Lifecycle correctness.** The 6-state machine (`cold` / `starting` / `idle` / `running` / `stopping` / `error`) is enforced. Idle hibernation works. Pin/release works. +4. **Multi-agent ready.** Two agents on the same workspace identity coexist while idle and serialize at `runTurn` boundaries. +5. **Crash-recoverable.** Server restart adopts running containers via `provider.recover()`. Orphaned in-flight runs are reconciled to `failed` on the next handler entry. **Goal: dev iteration doesn't require manual `docker rm` between server restarts.** +6. **Test coverage.** Unit suite for `LifecycleManager` + `WorkspaceRegistry` + entity handler. One real-Docker integration test exercising the full flow including crash recovery and lease serialization. + +## Non-goals (Slice A) + +- **Resume.** `nativeJsonl` collection writes, `--resume ` plumbing, cold-boot tmpfs materialization. Each cold boot starts a fresh CLI session. **(Slice B.)** +- **Codex support.** Bridge still rejects `kind: 'codex'`. **(Slice C.)** +- **Removing the legacy `coder` entity** + its tools (`spawn-coder.ts`, `prompt-coder.ts`). **(Slice B.)** +- **New Horton tools** (`spawn_coding_agent`, `prompt_coding_agent`). **(Slice B.)** +- **UI extensions** — status enum extension, header sandbox provenance, pin/release/stop buttons, lifecycle row rendering. **(Slice C.)** +- **Conformance suite** parameterized by `SandboxProvider`. **(Slice C.)** +- **`wake.on: 'eventAppended'`.** Slice A wakes only on `runFinished`. (No streaming UI consumer yet.) +- **`sandbox?` provider override on `SpawnCodingAgentOptions`.** Only one provider exists. +- **Per-event approve/deny for `permission_request`.** CLIs run with `--dangerously-skip-permissions`. + +## Architecture + +``` + Entity author code + ┌──────────────────────────────────────────────────────────────┐ + │ ctx.spawnCodingAgent({ kind, workspace, ... }) │ ← @electric-ax/agents-runtime + │ ctx.observeCodingAgent(id) │ + └──────────────────────────────────────────────────────────────┘ + │ desugars to ctx.spawn('coding-agent', ...) + ▼ + ┌──────────────────────────────────────────────────────────────┐ + │ Built-in `coding-agent` entity (registerCodingAgent) │ ← @electric-ax/coding-agents + │ · handler.ts drives the state machine │ + │ · collections: sessionMeta, runs, events, lifecycle │ + │ · inbox messages: prompt | pin | release | stop | destroy │ + └──────────────────────────────────────────────────────────────┘ + │ closure-scoped deps + ▼ + ┌─────────────────────────┐ ┌─────────────────────────────────┐ + │ Bridge (StdioBridge) │ │ LifecycleManager │ + │ runTurn → events │ │ · in-process state │ + │ (Slice MVP) │ │ · idle timer (setTimeout) │ + └─────────────────────────┘ │ · pin refcount (in-memory) │ + │ │ · armIdleTimer/ensureRunning │ + │ └─────────────────────────────────┘ + │ │ + │ ▼ + │ ┌─────────────────────────────────┐ + │ │ WorkspaceRegistry │ + │ │ · identity → ref-set │ + │ │ · per-identity mutex (acquire) │ + │ └─────────────────────────────────┘ + ▼ + ┌──────────────────────────────────────────────────────────────┐ + │ SandboxProvider (LocalDockerProvider — Slice MVP) │ + │ · recover() returns adopted containers on server boot │ + └──────────────────────────────────────────────────────────────┘ +``` + +### Package boundary rules + +- `@electric-ax/agents-runtime` knows the entity _type name_ `'coding-agent'` and the _handle shape_ `CodingAgentHandle`. **Does not** import `@electric-ax/coding-agents`. +- `@electric-ax/coding-agents` is the only place Docker / CLI / lifecycle logic lives. Owns `LifecycleManager`, `WorkspaceRegistry`, the entity handler, and the registration helper. +- `agents-server` bootstrap is the seam: it instantiates `LocalDockerProvider` + `StdioBridge`, calls `registerCodingAgent(registry, { provider, bridge })`, and proceeds. +- The legacy `coder` entity coexists. Different entity type name (`'coding-agent'` vs `'coder'`); disjoint collection-type wires (`CODING_AGENT_*_COLLECTION_TYPE`). + +## File layout + +``` +packages/coding-agents/ ← extend existing +├── src/ +│ ├── index.ts ← +export registerCodingAgent and new types +│ ├── types.ts ← +SpawnCodingAgentOptions, CodingAgentStatus, RunSummary +│ ├── providers/local-docker.ts ← (existing) +recover() filter on agentId prefix +│ ├── bridge/stdio-bridge.ts ← (existing) +│ ├── lifecycle-manager.ts ← NEW +│ ├── workspace-registry.ts ← NEW +│ ├── log.ts ← (existing) +│ └── entity/ +│ ├── register.ts ← NEW: registerCodingAgent(registry, deps) +│ ├── handler.ts ← NEW: the entity handler +│ ├── collections.ts ← NEW: schemas + collection-type wire constants +│ └── messages.ts ← NEW: inbox message types and zod schemas +└── test/ + ├── unit/ + │ ├── lifecycle-manager.test.ts ← NEW + │ ├── workspace-registry.test.ts ← NEW + │ ├── entity-handler.test.ts ← NEW + │ └── (existing unit tests stay) + └── integration/ + ├── smoke.test.ts ← (existing — kept) + └── slice-a.test.ts ← NEW: full e2e + +packages/agents-runtime/ +└── src/ + ├── types.ts ← +HandlerContext.spawnCodingAgent / observeCodingAgent + ├── context-factory.ts ← +spawnCodingAgent / observeCodingAgent impl + └── (CodingAgentHandle co-located in types.ts) + +packages/agents-server/ +└── src/entrypoint-lib.ts (or wherever bootstrap lives) + ← +call registerCodingAgent(registry, { provider, bridge }) + +packages/agents/ ← UNCHANGED in Slice A +``` + +## Public types + +### Runtime API (added to `HandlerContext`) + +```ts +// packages/agents-runtime/src/types.ts + +interface HandlerContext { + // ... existing fields + + spawnCodingAgent(opts: SpawnCodingAgentOptions): Promise + observeCodingAgent(id: string): Promise +} + +interface SpawnCodingAgentOptions { + /** Stable id, scoped to the spawning entity. */ + id: string + + /** Slice A: 'claude' only. */ + kind: 'claude' + + /** + * Workspace mount. Identity is the lease key: + * { type: 'volume', name: 'foo' } → 'volume:foo' + * { type: 'volume' } → 'volume:' + * { type: 'bindMount', hostPath: P } → 'bindMount:' + */ + workspace: + | { type: 'volume'; name?: string } + | { type: 'bindMount'; hostPath: string } + + /** Initial prompt; queued before the first wake. */ + initialPrompt?: string + + /** Slice A: 'runFinished' only. */ + wake?: { on: 'runFinished'; includeResponse?: boolean } + + /** Lifecycle overrides. */ + lifecycle?: { idleTimeoutMs?: number; keepWarm?: boolean } +} + +interface CodingAgentHandle { + /** Stable URL: //coding-agent/ */ + readonly url: string + readonly kind: 'claude' + + /** Queue a prompt. Resolves once durably enqueued. */ + send(prompt: string): Promise<{ runId: string }> + + /** + * Async iterable over normalized events for this agent. + * `since: 'start'` replays from the first persisted event. + * `since: 'now'` (default) tails from the current tail. + */ + events(opts?: { since?: 'start' | 'now' }): AsyncIterable + + /** Sync snapshot. */ + state(): { + status: 'cold' | 'starting' | 'idle' | 'running' | 'stopping' | 'error' + pinned: boolean + workspace: { identity: string; sharedRefs: number } + lastError?: string + runs: ReadonlyArray + } + + pin(): Promise + release(): Promise + stop(): Promise + destroy(): Promise +} + +interface RunSummary { + runId: string + startedAt: number + endedAt?: number + status: 'running' | 'completed' | 'failed' + promptInboxKey: string + responseText?: string +} +``` + +### Inbox messages (entity-internal) + +```ts +// packages/coding-agents/src/entity/messages.ts + +type CodingAgentInboxMessage = + | { type: 'prompt'; text: string } + | { type: 'pin' } + | { type: 'release' } + | { type: 'stop' } + | { type: 'destroy' } +``` + +`CodingAgentHandle.send(prompt)` desugars to `{ type: 'prompt', text: prompt }`. `pin/release/stop/destroy` desugar to their respective bare-message types. Each is dispatched on the entity inbox via the runtime's existing `ctx.send(entityUrl, message)` machinery. + +### Collections + +```ts +// packages/coding-agents/src/entity/collections.ts + +export const CODING_AGENT_SESSION_META_COLLECTION_TYPE = + 'coding-agent.sessionMeta' +export const CODING_AGENT_RUNS_COLLECTION_TYPE = 'coding-agent.runs' +export const CODING_AGENT_EVENTS_COLLECTION_TYPE = 'coding-agent.events' +export const CODING_AGENT_LIFECYCLE_COLLECTION_TYPE = 'coding-agent.lifecycle' + +interface SessionMetaRow { + key: 'current' + status: 'cold' | 'starting' | 'idle' | 'running' | 'stopping' | 'error' + kind: 'claude' + pinned: boolean + workspaceIdentity: string // 'volume:foo' | 'bindMount:/abs/p' + workspaceSpec: // raw input, for re-resolve on rehydrate + | { type: 'volume'; name: string } // resolved name (may equal agentId) + | { type: 'bindMount'; hostPath: string } + idleTimeoutMs: number + keepWarm: boolean + instanceId?: string // current sandbox instance, when present + lastError?: string + currentPromptInboxKey?: string +} + +interface RunRow { + key: string // runId (nanoid) + startedAt: number + endedAt?: number + status: 'running' | 'completed' | 'failed' + finishReason?: string // 'cli-exit-N' | 'timeout' | 'orphaned' | 'stopped' + promptInboxKey: string + responseText?: string +} + +interface EventRow { + key: string // : + runId: string + seq: number + ts: number + type: NormalizedEvent['type'] + payload: NormalizedEvent +} + +interface LifecycleRow { + key: string // :: (or 'startup:' for non-run) + ts: number + event: + | 'sandbox.starting' + | 'sandbox.started' + | 'sandbox.stopped' + | 'sandbox.failed' + | 'pin' + | 'release' + | 'orphan.detected' + detail?: string +} +``` + +The `lifecycle` collection is **separate** from `events` because lifecycle rows are infrastructure provenance, not conversation history. Slice C will render them as muted timeline rows; Slice A persists them anyway so the data is there when the UI lands. + +## Component design + +### `LifecycleManager` — `src/lifecycle-manager.ts` + +In-process singleton, instantiated once per `registerCodingAgent` call. Owned by the registration helper's closure. + +```ts +class LifecycleManager { + constructor(deps: { provider: SandboxProvider; bridge: Bridge }) + + // Sandbox lifecycle (called by handler) + async ensureRunning(spec: SandboxSpec): Promise + async stop(agentId: string): Promise + async destroy(agentId: string): Promise + + // Idle timer (in-memory) + armIdleTimer(agentId: string, ms: number, onFire: () => void): void + cancelIdleTimer(agentId: string): void + + // Pin refcount (in-memory; durable boolean is sessionMeta.pinned) + pin(agentId: string): { count: number } + release(agentId: string): { count: number } + pinCount(agentId: string): number + resetPinCount(agentId: string): void // called on registration helper boot + + // Recovery + async adoptRunningContainers(): Promise // wraps provider.recover() +} +``` + +**`onFire` callback** is how the LM tells the handler to do post-timeout work. Since the handler can't run between invocations, the callback's job is to: + +- Call `provider.stop(instanceId)` (this is the LM's own job, actually — runs synchronously on timer fire). +- Optionally enqueue an inbox `_idle_fired` self-message **(NOT done in Slice A)** — instead, the next real handler invocation reconciles via `provider.status()`. + +So in practice `onFire` just emits a log and updates an in-memory `Map` shadow. The handler's reconcile step queries the provider directly on next entry. **No out-of-handler stream writes.** + +**`pinCount` is in-memory.** On server restart, all pin counts reset to 0. Holders that wanted to keep their pins must re-pin. `sessionMeta.pinned` is `pinCount > 0`. + +### `WorkspaceRegistry` — `src/workspace-registry.ts` + +In-process singleton. Two responsibilities: refcount tracking, per-identity mutex. + +```ts +class WorkspaceRegistry { + /** Resolve a SpawnCodingAgentOptions.workspace into a stable identity. */ + static async resolveIdentity( + agentId: string, + spec: SpawnCodingAgentOptions['workspace'] + ): Promise<{ identity: string; resolved: ResolvedWorkspaceSpec }> + + // Refcount + register(identity: string, agentId: string): void + release(identity: string, agentId: string): void + refs(identity: string): number + + // Per-identity mutex + acquire(identity: string): Promise<() => void> // returns release fn + + // Bulk rebuild on server boot + rebuild(snapshots: Array<{ identity: string; agentId: string }>): void +} +``` + +**Mutex implementation.** A simple `Map`: `acquire` chains a new promise; the returned release fn resolves the chain. Unbounded queue; FIFO ordering. + +**`rebuild`** is called by the registration helper at boot, after the helper scans existing `coding-agent` entities' `sessionMeta.workspaceIdentity`. Pending mutex waiters from before the restart are not preserved (no work was lost — they were waiting between turns). + +### Entity handler — `src/entity/handler.ts` + +Single function, ~250 LOC. Pseudocode (Slice A): + +The `lm` and `wr` are closed over by the handler at registration time — see `registerCodingAgent` below. They are **not** added to `HandlerContext`; only the entity-handler closure references them. + +```ts +function makeCodingAgentHandler(lm: LifecycleManager, wr: WorkspaceRegistry) { + return async function handleCodingAgentEntity( + ctx: HandlerContext, + wake: Wake + ) { + const agentId = ctx.entityUrl + const meta = await ctx.collections.sessionMeta.get('current') + + // (1) RECONCILE — apply the table rules from §Lifecycle state machine + if (meta) { + await reconcile(ctx, lm, meta) + } + + // (2) DISPATCH + switch (wake.message.type) { + case 'prompt': + return processPrompt(ctx, lm, wr, wake.message) + case 'pin': + return processPin(ctx, lm, agentId) + case 'release': + return processRelease(ctx, lm, agentId) + case 'stop': + return processStop(ctx, lm, agentId) + case 'destroy': + return processDestroy(ctx, lm, wr, agentId) + } + } +} +``` + +`reconcile()` reads `provider.status(agentId)` and the open `runs` row, then applies the table to update `sessionMeta` and (if orphaned) the run row + a `lifecycle` row. It is the single durable side-effect path on entry. + +`processPrompt` is the heavy one: + +```ts +async function processPrompt( + ctx: HandlerContext, + lm: LifecycleManager, + wr: WorkspaceRegistry, + msg: { type: 'prompt'; text: string; _inboxKey: string } +) { + const agentId = ctx.entityUrl + const meta = await ctx.collections.sessionMeta.get('current') // !undefined post-init + const env = bridgeEnvFromServerConfig() // ANTHROPIC_API_KEY etc., from server bootstrap + + // Cold-boot: ensure sandbox started + await ctx.collections.sessionMeta.update('current', { status: 'starting' }) + await ctx.collections.lifecycle.insert({ + event: 'sandbox.starting', + ts: Date.now(), + key: `boot:${Date.now()}`, + }) + + let sandbox: SandboxInstance + try { + sandbox = await raceTimeout( + lm.ensureRunning({ + agentId, + kind: meta.kind, + workspace: meta.workspaceSpec, + env, + }), + coldBootBudgetMs + ) + } catch (err) { + await ctx.collections.sessionMeta.update('current', { + status: 'error', + lastError: String(err), + }) + await ctx.collections.lifecycle.insert({ + event: 'sandbox.failed', + ts: Date.now(), + key: `boot:${Date.now()}`, + detail: String(err), + }) + return + } + + await ctx.collections.sessionMeta.update('current', { + status: 'idle', + instanceId: sandbox.instanceId, + }) + await ctx.collections.lifecycle.insert({ + event: 'sandbox.started', + ts: Date.now(), + key: `boot:${Date.now()}`, + }) + + // Acquire workspace lease (waits if another agent holds it) + const releaseLease = await wr.acquire(meta.workspaceIdentity) + + try { + await ctx.collections.sessionMeta.update('current', { + status: 'running', + currentPromptInboxKey: msg._inboxKey, + }) + const run = ctx.recordRun() + const runId = run.key + await ctx.collections.runs.insert({ + key: runId, + startedAt: Date.now(), + status: 'running', + promptInboxKey: msg._inboxKey, + }) + + let seq = 0 + try { + const result = await raceTimeout( + lm.bridge.runTurn({ + sandbox, + kind: meta.kind, + prompt: msg.text, + onEvent: async (e) => { + await ctx.collections.events.insert({ + key: `${runId}:${seq}`, + runId, + seq, + ts: Date.now(), + type: e.type, + payload: e, + }) + seq++ + }, + }), + runTimeoutMs + ) + await ctx.collections.runs.update(runId, { + status: 'completed', + endedAt: Date.now(), + responseText: result.finalText, + }) + run.attachResponse(result.finalText ?? '') + run.end({ status: 'completed' }) + } catch (err) { + const reason = + err.name === 'TimeoutError' + ? 'timeout' + : `cli-exit:${String(err).slice(0, 200)}` + await ctx.collections.runs.update(runId, { + status: 'failed', + endedAt: Date.now(), + finishReason: reason, + }) + await ctx.collections.sessionMeta.update('current', { + status: 'error', + lastError: String(err), + }) + run.end({ status: 'failed' }) + return + } + + await ctx.collections.sessionMeta.update('current', { + status: 'idle', + currentPromptInboxKey: undefined, + }) + if (!meta.keepWarm) { + lm.armIdleTimer(agentId, meta.idleTimeoutMs, () => + lm.provider.stop(sandbox.instanceId) + ) + } + } finally { + releaseLease() + } +} +``` + +`processPin`, `processRelease` manage the LM's in-memory refcount and idle timer; update `sessionMeta.pinned`. `processStop` calls `lm.stop`, sets `status='cold'`. `processDestroy` calls `lm.destroy`, `wr.release`, then `ctx.deleteEntityStream()`. + +### Runtime helper — `packages/agents-runtime/src/context-factory.ts` + +Mirrors the existing `useCodingAgent` (lines 561-629 of `context-factory.ts`): + +```ts +async function spawnCodingAgent( + ctx, + opts: SpawnCodingAgentOptions +): Promise { + const handle = await ctx.spawn( + 'coding-agent', + opts.id, + { + kind: opts.kind, + workspace: opts.workspace, + lifecycle: opts.lifecycle, + }, + { + initialMessage: opts.initialPrompt + ? { type: 'prompt', text: opts.initialPrompt } + : undefined, + wake: opts.wake ?? { on: 'runFinished', includeResponse: true }, + } + ) + return makeHandle(ctx, handle.url) +} + +async function observeCodingAgent(ctx, id: string): Promise { + const url = scopedUrl(ctx, 'coding-agent', id) + await ctx.observe(url) + return makeHandle(ctx, url) +} + +function makeHandle(ctx, url: string): CodingAgentHandle { + return { + url, + kind: 'claude', + send: (text) => ctx.send(url, { type: 'prompt', text }), + pin: () => ctx.send(url, { type: 'pin' }), + release: () => ctx.send(url, { type: 'release' }), + stop: () => ctx.send(url, { type: 'stop' }), + destroy: () => ctx.send(url, { type: 'destroy' }), + state: () => readState(ctx, url), + events: (o) => tailEvents(ctx, url, o?.since ?? 'now'), + } +} +``` + +The `state()` reader needs `WorkspaceRegistry.refs(identity)`, which is in-process state on `agents-server`. The runtime accesses it via a small reader function injected at server bootstrap (one-line dependency on the server side; runtime exposes a setter). On the client side, `state().workspace.sharedRefs` falls back to `1` (the agent itself). Slice A documents this client/server asymmetry; Slice C may surface a server-side query API. + +### Registration helper — `src/entity/register.ts` + +```ts +export interface RegisterCodingAgentDeps { + provider: SandboxProvider + bridge: Bridge + /** Override defaults; used by tests. */ + defaults?: { + idleTimeoutMs?: number + coldBootBudgetMs?: number + runTimeoutMs?: number + } +} + +export function registerCodingAgent( + registry: EntityRegistry, + deps: RegisterCodingAgentDeps +): void { + const lm = new LifecycleManager(deps) + const wr = new WorkspaceRegistry() + registry.define('coding-agent', { + collections: { sessionMeta, runs, events, lifecycle }, + inboxSchema: codingAgentInboxSchema, + handler: makeCodingAgentHandler(lm, wr), + onBoot: async ({ scanEntities }) => { + // Rebuild workspace registry from durable state + const all = await scanEntities('coding-agent') + wr.rebuild( + all.map((e) => ({ + identity: e.sessionMeta.workspaceIdentity, + agentId: e.url, + })) + ) + // Adopt running containers; do not write durable state — + // reconcile happens on next handler entry per agent. + await lm.adoptRunningContainers() + }, + }) +} +``` + +**`onBoot` hook.** Slice A introduces a per-type `onBoot` hook on the registry definition. It receives a small context with `scanEntities(type)` (returns the per-entity sessionMeta + url for all entities of `type`). The hook is fired once per server process at registry initialization, before any handler runs. + +If the existing `EntityRegistry` doesn't have this hook, Slice A adds it (one method on `define-entity.ts`, one boot-time call in `electric-agents-manager.ts`). Confirmed scope-add during writing-plans by reading those files. (Listed under §Open questions for explicit confirmation.) + +## Lifecycle state machine + +``` + ┌──────────┐ + spawn ─────▶│ COLD │◀── reconcile: provider says stopped + └────┬─────┘ + │ prompt + ▼ + ┌──────────┐ + │ STARTING │ provider.start (idempotent; reattach if running) + └────┬─────┘ + cold-boot timeout │ ready + ┌──────────┴──────────┐ + ▼ ▼ + ┌────────┐ ┌──────────┐ + │ ERROR │ │ IDLE │◀───────┐ + └────┬───┘ └────┬─────┘ │ + │ next prompt │ prompt │ runTurn + ▼ ▼ │ done + ┌────────┐ ┌──────────┐ │ + │ COLD │◀─────┐ │ RUNNING │─────────┘ + └────────┘ │ └────┬─────┘ + │ │ stop/destroy + │ ▼ + │ ┌──────────┐ + │ │ STOPPING │ SIGTERM → SIGKILL after 5s + └─────└──────────┘ + idle-timer fire + (provider.stop direct) +``` + +**Reconcile rules** (every handler entry, before dispatch). The handler queries `provider.status(agentId)` and inspects the open `runs` row (if any), then applies: + +``` +let openRun = await runs.findOpen() // status === 'running' && !endedAt +let isOrphaned = openRun && openRun.startedAt < lm.startedAtMs + // run started before THIS process started + // ⇒ left over from a prior process +``` + +| Durable `meta.status` | `provider.status()` | `isOrphaned`? | Action | +| ---------------------- | --------------------- | ------------- | --------------------------------------------------------------------------- | +| `running` | `running` | true | mark openRun `failed: orphaned`; `meta.status='idle'` (sandbox kept) | +| `running` | `running` | false | leave (genuinely in-flight in this process) | +| `running` | `stopped` / `unknown` | n/a | mark openRun `failed: orphaned`; `meta.status='cold'`; clear `instanceId` | +| `idle` | `stopped` | n/a | `meta.status='cold'`; clear `instanceId` (idle timer fired between entries) | +| `idle` | `running` | n/a | leave | +| `cold` | `running` | n/a | leave (orphaned container; cleaned on next stop/destroy) | +| `cold` | `stopped` / `unknown` | n/a | leave | +| `error` | any | n/a | leave; next `prompt` retries `start` | +| `starting`, `stopping` | `running` | n/a | `meta.status='idle'` | +| `starting`, `stopping` | `stopped` / `unknown` | n/a | `meta.status='cold'` | + +`lm.startedAtMs` is the wall-clock millisecond timestamp captured when the `LifecycleManager` is instantiated (i.e., at server boot). Any `runs` row with `startedAt < lm.startedAtMs` and `status='running'` definitionally cannot be tracked by the current process. + +## Workspace identity & lease + +| Spec input | Identity | +| ------------------------------------ | ------------------------- | +| `{ type: 'volume', name: 'foo' }` | `volume:foo` | +| `{ type: 'volume' }` (no name) | `volume:` | +| `{ type: 'bindMount', hostPath: P }` | `bindMount:` | + +Stored on `sessionMeta.workspaceIdentity` so it survives reconcile and server restart. + +**Ref tracking.** `WorkspaceRegistry.register(identity, agentId)` is called once per agent during `processPrompt`'s cold-boot path (idempotent). Decremented in `processDestroy`. Consumed by `state().workspace.sharedRefs`. + +**Mutex.** `acquire(identity)` returns a release fn. Held only across `bridge.runTurn`. Two `IDLE` agents on the same identity coexist freely; only `RUNNING` is serialized. + +**Lease wait is unbounded in Slice A.** No deadlock possible — every holder finishes a turn (timeout or completion). Acceptable for dev workloads. A bound can be added later. + +## Crash recovery + +**On `agents-server` boot** (`registerCodingAgent.onBoot`): + +1. Scan all `coding-agent` entities, rebuild `WorkspaceRegistry`. +2. Call `provider.recover()` → list of `{ agentId, instanceId, status }`. +3. Do **not** mutate durable state at this point. The first handler entry per agent does it. + +**On first handler entry per agent after restart** — the reconcile step (see the table in §Lifecycle state machine) handles all cases. The two crash-relevant rows are: + +- `meta=running, provider=running, isOrphaned=true` → mark orphan, transition to `idle`. The container is still up; the bridge handle from the dead process is gone. Next prompt re-execs. +- `meta=running, provider=stopped/unknown` → mark orphan, transition to `cold`. Next prompt cold-boots a fresh container. + +**Validation:** the integration test simulates server restart by tearing down the LM/registry and re-creating from scratch with the container still running. + +## Defaults + +| Setting | Default | +| ------------------ | -------------------- | +| `idleTimeoutMs` | 5 × 60 000 (5 min) | +| `coldBootBudgetMs` | 30 000 | +| `runTimeoutMs` | 30 × 60 000 (30 min) | +| `keepWarm` | `false` | + +All overridable per-spawn via `lifecycle?:` and via `RegisterCodingAgentDeps.defaults` for tests. + +## Error handling + +- **`provider.start` fails / cold-boot timeout** → `meta.status='error'`, `lastError=msg`, force-remove partial container. Next prompt retries. +- **`bridge.runTurn` non-zero exit** → run `failed: cli-exit:`, `meta.status='error'`. Sandbox kept up. +- **Run timeout** → `kill('SIGTERM')`, 5 s grace, `kill('SIGKILL')`. Run `failed: timeout`. Sandbox kept up. +- **Sandbox crashes mid-turn** (container dies) → bridge throws on stream close → run `failed: cli-exit:`. Reconcile on next entry sets cold. +- **Server crashes mid-turn** → orphan reconcile on next handler entry. +- **Lease wait** → unbounded. Documented. +- **`stop()` while running** → SIGTERM exec; `provider.stop`; release lease. Run `failed: stopped`. +- **`destroy()` while running** → `stop()` then `provider.destroy(agentId)`; `wr.release`; `ctx.deleteEntityStream()`. Idempotent on partial failure. + +## Testing strategy + +### Layer 1 — Unit (no Docker) + +- **`lifecycle-manager.test.ts`** — state transitions through cold/starting/idle/running, idle timer arm/cancel, pin refcount (n pins need n releases, idle timer suspended while pinned), error transition. Backed by `FakeSandboxProvider` + `FakeBridge` (in-memory, scripted). +- **`workspace-registry.test.ts`** — three identity resolutions, refcount add/sub, mutex serialization (assert only one `acquire` resolved at a time), realpath on bindMount, `rebuild` from snapshot. +- **`entity-handler.test.ts`** — per-message dispatch (prompt/pin/release/stop/destroy do the right ops), reconcile-on-entry across the matrix above, durable-status reconciliation when provider says `stopped`. +- **`runtime-handle.test.ts`** (`packages/agents-runtime/test/`) — `ctx.spawnCodingAgent` desugars correctly, handle methods desugar to inbox messages, `state()` reads three collections. + +Vitest. Sub-second per file. + +### Layer 2 — Integration (real Docker, real Claude) + +Single file `slice-a.test.ts`. Reuses the existing test image. Gated by `DOCKER=1`. ~3 min wall time target. + +Sequence: + +1. Bootstrap a minimal `agents-server` instance with `registerCodingAgent` wired in. +2. Spawn parent test entity that calls `ctx.spawnCodingAgent({ kind: 'claude', workspace: { type: 'volume' }, initialPrompt: 'reply: ok' })` and awaits `runFinished` wake. Assert response text matches. +3. Call `handle.pin()`, sleep past `idleTimeoutMs=2s` (overridden), assert `provider.status === 'running'`. +4. Call `handle.release()`, sleep past idle, assert `provider.status === 'stopped'`. +5. Call `handle.send('reply: again')`, assert cold-boot path executes, response received. +6. Spawn second agent on same workspace name; concurrently send prompts to both; assert second agent's run starts only after first's run ends (lease serialization). +7. Mid-turn, `provider.stop` the container directly; assert run flips to `failed`; next prompt works. +8. Server-restart simulation: dispose LM/registry/handle, re-`registerCodingAgent`, re-acquire handle via `observeCodingAgent`; assert `recover()` finds the container, orphan-run is detected on next handler entry, fresh prompt succeeds. +9. `handle.destroy()`; assert container removed, volume removed (no other refs), entity stream gone. + +### Out of Slice A + +- No conformance suite (Slice C). +- No browser/UI tests (Slice C). +- No legacy `coder` removal regression suite (Slice B). + +## Migration + +No removals in Slice A. The legacy `coder` entity (`packages/agents/src/agents/coding-session.ts`) and its tools are unchanged. + +`agents-server` registers both at boot: + +```ts +registerCodingSession(registry) // existing 'coder' type — UNCHANGED +registerCodingAgent(registry, { + // NEW 'coding-agent' type + provider: new LocalDockerProvider(), + bridge: new StdioBridge(), +}) +``` + +The two type names and disjoint collection-type wires guarantee no storage conflict. UI continues to work against `coder` until Slice C extends it for `coding-agent`. + +## Open questions + +- **`onBoot` registry hook.** Does `EntityRegistry` already expose a per-type `onBoot`? If not, this slice adds one (small change, scoped to `define-entity.ts` + `electric-agents-manager.ts`). Resolve during writing-plans by reading those files. +- **`ctx.deleteEntityStream` shape.** Used in `processDestroy`. Confirm during implementation that the runtime exposes a primitive for "drop all collections + halt observation". If not, fall back to "mark stream tombstone" semantic. +- **`workspace.sharedRefs` from a client `ctx`.** The client-side runtime can't see server-side `WorkspaceRegistry`. Slice A clients see `sharedRefs: 1`. Document; Slice C may add a server query. +- **Pin survival across server restart.** Slice A: pin counts are in-memory only. Slice B may persist refcount-by-key if real workloads need it. + +## Scope cuts referenced from full design spec + +Carried forward from the parent spec, **deferred**: + +- Resume (`nativeJsonl` + `--resume`). +- Codex. +- `wake.on: 'eventAppended'`. +- `sandbox?` provider override. +- UI (status enum extension, header provenance, pin/release/stop buttons, lifecycle row rendering, spawn dialog workspace selector). +- Tools (`spawn_coding_agent`, `prompt_coding_agent`). +- Removal of legacy `coder` entity. +- Conformance suite (provider-parameterized). +- Cross-kind resume. + +## References + +- `docs/superpowers/specs/2026-04-30-coding-agents-platform-primitive-design.md` — parent design. +- `docs/superpowers/specs/notes/2026-04-30-coding-agents-mvp-report.md` — predecessor report. +- `packages/agents-runtime/src/context-factory.ts:561-629` — `useCodingAgent` template to mirror. +- `packages/agents/src/agents/coding-session.ts` — legacy `coder` entity (coexists; not removed). +- `packages/agents-runtime/src/define-entity.ts` — entity registration mechanism. +- `packages/agents-server/src/electric-agents-manager.ts` — server-side type registration. From 615eb92a078bd6458f3c386d4736cc7ecbe20efe Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 07:22:54 +0100 Subject: [PATCH 012/279] docs(plans): add Slice A implementation plan for coding-agents Maps the Slice A design spec into 8 bite-sized tasks across 5 phases: foundation schemas, LifecycleManager + WorkspaceRegistry (parallel), entity handler + register helper, runtime API surface, server bootstrap wiring, integration smoke. Declares two spec divergences up-front (no onBoot hook, no deleteEntityStream). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../plans/2026-04-30-coding-agents-slice-a.md | 2709 +++++++++++++++++ 1 file changed, 2709 insertions(+) create mode 100644 docs/superpowers/plans/2026-04-30-coding-agents-slice-a.md diff --git a/docs/superpowers/plans/2026-04-30-coding-agents-slice-a.md b/docs/superpowers/plans/2026-04-30-coding-agents-slice-a.md new file mode 100644 index 0000000000..3c97bec700 --- /dev/null +++ b/docs/superpowers/plans/2026-04-30-coding-agents-slice-a.md @@ -0,0 +1,2709 @@ +# Coding Agents — Slice A Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Wire the existing `LocalDockerProvider` + `StdioBridge` (from the MVP) into a first-class platform primitive: a built-in `coding-agent` entity, a `LifecycleManager`, a `WorkspaceRegistry`, and the typed `ctx.spawnCodingAgent` / `ctx.observeCodingAgent` API on `HandlerContext`. Validation bar: an integration test that spawns a `coding-agent` from a parent test entity, awaits a `runFinished` wake with the response text, exercises pin/release/idle hibernation, lease-serializes two agents on a shared workspace, simulates server crash mid-turn and asserts orphan reconciliation. + +**Architecture:** New code lives in `@electric-ax/coding-agents/src/{lifecycle-manager.ts, workspace-registry.ts, entity/*}`. The runtime gets typed wrappers (`ctx.spawnCodingAgent` / `ctx.observeCodingAgent`) that desugar to `ctx.spawn('coding-agent', ...)` / `ctx.observe(...)`. The entity handler closes over the LM + WR; collection access uses the StreamDB pattern (`ctx.db.collections.X.get`, `ctx.db.actions.X_insert/X_update`). Server bootstrap (`packages/agents/src/bootstrap.ts`) adds `registerCodingAgent(registry, { provider, bridge })` next to `registerCodingSession(registry)`. Legacy `coder` entity coexists. + +**Spec divergences (resolved from spec's Open Questions section):** + +- **No `onBoot` registry hook.** The runtime's `EntityRegistry.define()` has no `onBoot` parameter. We don't add one in Slice A. Instead: first-wake init in the handler seeds `sessionMeta`, and the LM/WR rebuild lazily on first handler invocation (gated by an idempotent in-process flag). Reduces runtime surface area; no behavior loss for Slice A. +- **No `ctx.deleteEntityStream`.** `destroy()` becomes "stop sandbox + drop workspace ref + set `sessionMeta.status='destroyed'` + future inbox messages return early". The entity stream stays as a tombstone. Durable cleanup is Slice B. +- **`workspace.sharedRefs` from a client `ctx`.** Server-only state. Client handles return `sharedRefs: 1`. Documented in `state()` JSDoc. + +**Tech Stack:** TypeScript, Vitest, Node `child_process`, Docker, `agent-session-protocol@0.0.2`, `zod` (collection + inbox schemas). + +**Reference spec:** `docs/superpowers/specs/2026-04-30-coding-agents-slice-a-design.md` + +--- + +## File Structure + +``` +packages/coding-agents/ ← extend existing package +├── src/ +│ ├── index.ts ← +exports for new types and registerCodingAgent +│ ├── types.ts ← +SpawnCodingAgentOptions, CodingAgentStatus, RunSummary +│ ├── lifecycle-manager.ts ← NEW +│ ├── workspace-registry.ts ← NEW +│ ├── entity/ +│ │ ├── collections.ts ← NEW: schemas + wire constants +│ │ ├── messages.ts ← NEW: inbox message schemas +│ │ ├── handler.ts ← NEW: the entity handler +│ │ └── register.ts ← NEW: registerCodingAgent +│ ├── providers/local-docker.ts ← (existing, no changes for Slice A) +│ ├── bridge/stdio-bridge.ts ← (existing, no changes) +│ └── log.ts ← (existing) +└── test/ + ├── unit/ + │ ├── workspace-registry.test.ts ← NEW + │ ├── lifecycle-manager.test.ts ← NEW + │ ├── entity-handler.test.ts ← NEW + │ ├── local-docker.test.ts ← (existing) + │ └── stdio-bridge.test.ts ← (existing) + └── integration/ + ├── slice-a.test.ts ← NEW + ├── smoke.test.ts ← (existing) + └── support/ + ├── build-image.ts ← (existing) + └── env.ts ← (existing) + +packages/agents-runtime/ +└── src/ + ├── types.ts ← +SpawnCodingAgentOptions, CodingAgentHandle, HandlerContext.spawnCodingAgent / observeCodingAgent + └── context-factory.ts ← +spawnCodingAgent / observeCodingAgent impls + +packages/agents/ +└── src/bootstrap.ts ← +registerCodingAgent call + +docs/superpowers/specs/notes/ +└── 2026-04-30-coding-agents-slice-a-report.md ← NEW (Phase 5) +``` + +--- + +## Phase Plan + +| Phase | Tasks | Parallelism | Depends on | +| ----- | ------------- | ------------------------------- | ---------- | +| 0 | 0.1, 0.2 | sequential | — | +| 1 | 1.A, 1.B | parallel (2 independent agents) | Phase 0 | +| 2 | 2.1, 2.2, 2.3 | sequential | Phase 1 | +| 3 | 3.1 | sequential | Phase 2 | +| 4 | 4.1 | sequential | Phase 3 | +| 5 | 5.1 (report) | sequential | Phase 4 | + +Total tasks: 8 (excluding report). Estimated wall time per task: 10-30 min. + +--- + +## Phase 0 — Foundation (sequential) + +### Task 0.1 — Wire constants, collection schemas, inbox schemas + +**Files:** + +- Create: `packages/coding-agents/src/entity/collections.ts` +- Create: `packages/coding-agents/src/entity/messages.ts` + +- [ ] **Step 1: Write `src/entity/collections.ts`** + +```ts +import { z } from 'zod' + +export const CODING_AGENT_SESSION_META_COLLECTION_TYPE = + 'coding-agent.sessionMeta' +export const CODING_AGENT_RUNS_COLLECTION_TYPE = 'coding-agent.runs' +export const CODING_AGENT_EVENTS_COLLECTION_TYPE = 'coding-agent.events' +export const CODING_AGENT_LIFECYCLE_COLLECTION_TYPE = 'coding-agent.lifecycle' + +export const codingAgentStatusSchema = z.enum([ + 'cold', + 'starting', + 'idle', + 'running', + 'stopping', + 'error', + 'destroyed', +]) +export type CodingAgentStatus = z.infer + +export const sessionMetaRowSchema = z.object({ + key: z.literal('current'), + status: codingAgentStatusSchema, + kind: z.enum(['claude']), + pinned: z.boolean(), + workspaceIdentity: z.string(), + workspaceSpec: z.discriminatedUnion('type', [ + z.object({ + type: z.literal('volume'), + name: z.string(), + }), + z.object({ + type: z.literal('bindMount'), + hostPath: z.string(), + }), + ]), + idleTimeoutMs: z.number(), + keepWarm: z.boolean(), + instanceId: z.string().optional(), + lastError: z.string().optional(), + currentPromptInboxKey: z.string().optional(), +}) +export type SessionMetaRow = z.infer + +export const runRowSchema = z.object({ + key: z.string(), + startedAt: z.number(), + endedAt: z.number().optional(), + status: z.enum(['running', 'completed', 'failed']), + finishReason: z.string().optional(), + promptInboxKey: z.string(), + responseText: z.string().optional(), +}) +export type RunRow = z.infer + +export const eventRowSchema = z.object({ + key: z.string(), + runId: z.string(), + seq: z.number(), + ts: z.number(), + type: z.string(), + payload: z.looseObject({}), +}) +export type EventRow = z.infer + +export const lifecycleRowSchema = z.object({ + key: z.string(), + ts: z.number(), + event: z.enum([ + 'sandbox.starting', + 'sandbox.started', + 'sandbox.stopped', + 'sandbox.failed', + 'pin', + 'release', + 'orphan.detected', + ]), + detail: z.string().optional(), +}) +export type LifecycleRow = z.infer +``` + +- [ ] **Step 2: Write `src/entity/messages.ts`** + +```ts +import { z } from 'zod' + +export const promptMessageSchema = z.object({ + text: z.string(), +}) +export const pinMessageSchema = z.object({}).strict() +export const releaseMessageSchema = z.object({}).strict() +export const stopMessageSchema = z.object({}).strict() +export const destroyMessageSchema = z.object({}).strict() + +export type PromptMessage = z.infer +``` + +- [ ] **Step 3: Verify typecheck** + +``` +pnpm -C packages/coding-agents typecheck +``` + +Expect: clean. + +- [ ] **Step 4: Commit** + +``` +git add packages/coding-agents/src/entity +git commit -m "feat(coding-agents): collection + inbox message schemas for coding-agent entity" +``` + +--- + +### Task 0.2 — Public types extension + +**Files:** + +- Modify: `packages/coding-agents/src/types.ts` + +- [ ] **Step 1: Append to `src/types.ts`** + +Add after the existing types: + +```ts +import type { CodingAgentStatus } from './entity/collections' + +// ─── Slice A: SpawnCodingAgentOptions / RunSummary ────────────────────────── + +export interface SpawnCodingAgentOptions { + /** Stable id, scoped to the spawning entity. */ + id: string + /** Slice A: 'claude' only. */ + kind: 'claude' + /** + * Workspace mount. Identity is the lease key. + * { type: 'volume', name: 'foo' } → 'volume:foo' + * { type: 'volume' } → 'volume:' + * { type: 'bindMount', hostPath: P } → 'bindMount:' + */ + workspace: + | { type: 'volume'; name?: string } + | { type: 'bindMount'; hostPath: string } + /** Initial prompt; queued before the first wake. */ + initialPrompt?: string + /** Slice A: 'runFinished' only. */ + wake?: { on: 'runFinished'; includeResponse?: boolean } + /** Lifecycle overrides. */ + lifecycle?: { idleTimeoutMs?: number; keepWarm?: boolean } +} + +export interface RunSummary { + runId: string + startedAt: number + endedAt?: number + status: 'running' | 'completed' | 'failed' + promptInboxKey: string + responseText?: string +} + +export type { CodingAgentStatus } + +/** Defaults applied when a SpawnCodingAgentOptions field is omitted. */ +export const SLICE_A_DEFAULTS = { + idleTimeoutMs: 5 * 60_000, + coldBootBudgetMs: 30_000, + runTimeoutMs: 30 * 60_000, + keepWarm: false, +} as const +``` + +- [ ] **Step 2: Verify typecheck** + +``` +pnpm -C packages/coding-agents typecheck +``` + +Expect: clean. + +- [ ] **Step 3: Commit** + +``` +git add packages/coding-agents/src/types.ts +git commit -m "feat(coding-agents): add SpawnCodingAgentOptions, RunSummary, defaults" +``` + +--- + +## Phase 1 — Pure components (parallel, 2 agents) + +These two tasks touch disjoint files. Dispatch in parallel. + +### Task 1.A — `WorkspaceRegistry` + +**Files:** + +- Create: `packages/coding-agents/src/workspace-registry.ts` +- Create: `packages/coding-agents/test/unit/workspace-registry.test.ts` + +- [ ] **Step 1: Write the failing test first** + +```ts +// test/unit/workspace-registry.test.ts +import { describe, it, expect } from 'vitest' +import { WorkspaceRegistry } from '../../src/workspace-registry' + +describe('WorkspaceRegistry.resolveIdentity', () => { + it('resolves volume:name when name is provided', async () => { + const r = await WorkspaceRegistry.resolveIdentity('/p/coding-agent/x', { + type: 'volume', + name: 'foo', + }) + expect(r.identity).toBe('volume:foo') + expect(r.resolved).toEqual({ type: 'volume', name: 'foo' }) + }) + + it('resolves volume: when name is omitted', async () => { + const r = await WorkspaceRegistry.resolveIdentity('/p/coding-agent/x', { + type: 'volume', + }) + expect(r.identity).toBe('volume:/p/coding-agent/x') + expect(r.resolved).toEqual({ type: 'volume', name: '/p/coding-agent/x' }) + }) + + it('resolves bindMount: for bind mounts', async () => { + const r = await WorkspaceRegistry.resolveIdentity('/p/coding-agent/x', { + type: 'bindMount', + hostPath: '/tmp', + }) + expect(r.identity).toMatch(/^bindMount:\/(private\/)?tmp$/) + }) +}) + +describe('WorkspaceRegistry refcount', () => { + it('tracks refs across register/release', () => { + const wr = new WorkspaceRegistry() + expect(wr.refs('volume:foo')).toBe(0) + wr.register('volume:foo', 'a') + wr.register('volume:foo', 'b') + expect(wr.refs('volume:foo')).toBe(2) + wr.release('volume:foo', 'a') + expect(wr.refs('volume:foo')).toBe(1) + wr.release('volume:foo', 'a') // double-release is idempotent + expect(wr.refs('volume:foo')).toBe(1) + wr.release('volume:foo', 'b') + expect(wr.refs('volume:foo')).toBe(0) + }) +}) + +describe('WorkspaceRegistry mutex', () => { + it('serializes acquire calls per identity', async () => { + const wr = new WorkspaceRegistry() + const order: Array = [] + const a = wr.acquire('volume:foo').then((release) => { + order.push('a-acquired') + return new Promise((res) => + setTimeout(() => { + order.push('a-release') + release() + res() + }, 50) + ) + }) + // Make sure b queues behind a + await new Promise((r) => setTimeout(r, 5)) + const b = wr.acquire('volume:foo').then((release) => { + order.push('b-acquired') + release() + }) + await Promise.all([a, b]) + expect(order).toEqual(['a-acquired', 'a-release', 'b-acquired']) + }) + + it('does not serialize across distinct identities', async () => { + const wr = new WorkspaceRegistry() + const order: Array = [] + const a = wr.acquire('volume:foo').then((release) => { + order.push('a-acq') + return new Promise((res) => + setTimeout(() => { + release() + res() + }, 50) + ) + }) + const b = wr.acquire('volume:bar').then((release) => { + order.push('b-acq') + release() + }) + await Promise.all([a, b]) + // b runs before a finishes + expect(order[0]).toBe('a-acq') + expect(order[1]).toBe('b-acq') + }) +}) + +describe('WorkspaceRegistry.rebuild', () => { + it('replays a snapshot from durable state', () => { + const wr = new WorkspaceRegistry() + wr.rebuild([ + { identity: 'volume:foo', agentId: 'a' }, + { identity: 'volume:foo', agentId: 'b' }, + { identity: 'volume:bar', agentId: 'c' }, + ]) + expect(wr.refs('volume:foo')).toBe(2) + expect(wr.refs('volume:bar')).toBe(1) + }) +}) +``` + +- [ ] **Step 2: Run the test to verify it fails** + +``` +pnpm -C packages/coding-agents test test/unit/workspace-registry.test.ts +``` + +Expect: FAIL with module-not-found on `../../src/workspace-registry`. + +- [ ] **Step 3: Write `src/workspace-registry.ts`** + +```ts +import { realpath } from 'node:fs/promises' + +export type ResolvedWorkspaceSpec = + | { type: 'volume'; name: string } + | { type: 'bindMount'; hostPath: string } + +export class WorkspaceRegistry { + private readonly refsByIdentity = new Map>() + private readonly chainByIdentity = new Map>() + + static async resolveIdentity( + agentId: string, + spec: + | { type: 'volume'; name?: string } + | { type: 'bindMount'; hostPath: string } + ): Promise<{ identity: string; resolved: ResolvedWorkspaceSpec }> { + if (spec.type === 'volume') { + const name = spec.name ?? agentId + return { + identity: `volume:${name}`, + resolved: { type: 'volume', name }, + } + } + const real = await realpath(spec.hostPath) + return { + identity: `bindMount:${real}`, + resolved: { type: 'bindMount', hostPath: real }, + } + } + + register(identity: string, agentId: string): void { + let set = this.refsByIdentity.get(identity) + if (!set) { + set = new Set() + this.refsByIdentity.set(identity, set) + } + set.add(agentId) + } + + release(identity: string, agentId: string): void { + const set = this.refsByIdentity.get(identity) + if (!set) return + set.delete(agentId) + if (set.size === 0) this.refsByIdentity.delete(identity) + } + + refs(identity: string): number { + return this.refsByIdentity.get(identity)?.size ?? 0 + } + + /** + * Acquire the per-identity mutex. Returns a release fn. + * The mutex chains promises: each acquire waits for the prior chain to settle. + */ + acquire(identity: string): Promise<() => void> { + const prior = this.chainByIdentity.get(identity) ?? Promise.resolve() + let releaseFn: () => void + const next = new Promise((res) => { + releaseFn = res + }) + this.chainByIdentity.set( + identity, + prior.then(() => next) + ) + return prior.then(() => releaseFn!) + } + + rebuild(snapshots: Array<{ identity: string; agentId: string }>): void { + this.refsByIdentity.clear() + this.chainByIdentity.clear() + for (const { identity, agentId } of snapshots) { + this.register(identity, agentId) + } + } +} +``` + +- [ ] **Step 4: Run the test, verify it passes** + +``` +pnpm -C packages/coding-agents test test/unit/workspace-registry.test.ts +``` + +Expect: PASS. + +- [ ] **Step 5: Commit** + +``` +git add packages/coding-agents/src/workspace-registry.ts packages/coding-agents/test/unit/workspace-registry.test.ts +git commit -m "feat(coding-agents): WorkspaceRegistry with identity resolution, refcount, mutex" +``` + +--- + +### Task 1.B — `LifecycleManager` + +**Files:** + +- Create: `packages/coding-agents/src/lifecycle-manager.ts` +- Create: `packages/coding-agents/test/unit/lifecycle-manager.test.ts` + +**Constraints:** + +- LM is constructed with `{ provider, bridge }`. +- LM exposes: `ensureRunning(spec)`, `stop(agentId)`, `destroy(agentId)`, `armIdleTimer(agentId, ms, onFire)`, `cancelIdleTimer(agentId)`, `pin(agentId)`, `release(agentId)`, `pinCount(agentId)`, `resetPinCount(agentId)`, `adoptRunningContainers()`. +- LM exposes `startedAtMs: number` (captured in constructor). +- Idle timer is a `Map`. Pin count is `Map`. +- Pin count semantics: `pin` increments and cancels active idle timer; `release` decrements (clamped at 0). + +- [ ] **Step 1: Write the failing test** + +```ts +// test/unit/lifecycle-manager.test.ts +import { describe, it, expect, vi } from 'vitest' +import { LifecycleManager } from '../../src/lifecycle-manager' +import type { + Bridge, + ExecHandle, + ExecRequest, + RecoveredSandbox, + RunTurnArgs, + RunTurnResult, + SandboxInstance, + SandboxProvider, + SandboxSpec, +} from '../../src/types' + +function fakeProvider(): SandboxProvider & { + starts: Array + stops: Array +} { + const stub: SandboxInstance = { + instanceId: 'inst-1', + agentId: '', + workspaceMount: '/workspace', + async exec(_req: ExecRequest): Promise { + throw new Error('not used') + }, + } + const fp: any = { + name: 'fake', + starts: [] as Array, + stops: [] as Array, + async start(spec: SandboxSpec): Promise { + fp.starts.push(spec) + return { ...stub, agentId: spec.agentId } + }, + async stop(instanceId: string): Promise { + fp.stops.push(instanceId) + }, + async destroy(_id: string): Promise {}, + async status(_id: string): Promise<'running' | 'stopped' | 'unknown'> { + return 'running' + }, + async recover(): Promise> { + return [] + }, + } + return fp +} + +const fakeBridge: Bridge = { + async runTurn(_args: RunTurnArgs): Promise { + return { exitCode: 0 } + }, +} + +describe('LifecycleManager pin refcount', () => { + it('increments and decrements with a floor at 0', () => { + const lm = new LifecycleManager({ + provider: fakeProvider(), + bridge: fakeBridge, + }) + expect(lm.pinCount('a')).toBe(0) + expect(lm.pin('a').count).toBe(1) + expect(lm.pin('a').count).toBe(2) + expect(lm.release('a').count).toBe(1) + expect(lm.release('a').count).toBe(0) + // Extra release is clamped + expect(lm.release('a').count).toBe(0) + }) + + it('resetPinCount clears to 0', () => { + const lm = new LifecycleManager({ + provider: fakeProvider(), + bridge: fakeBridge, + }) + lm.pin('a') + lm.pin('a') + lm.resetPinCount('a') + expect(lm.pinCount('a')).toBe(0) + }) +}) + +describe('LifecycleManager idle timer', () => { + it('arms and fires onFire after ms elapses', async () => { + const lm = new LifecycleManager({ + provider: fakeProvider(), + bridge: fakeBridge, + }) + const onFire = vi.fn() + lm.armIdleTimer('a', 20, onFire) + await new Promise((r) => setTimeout(r, 50)) + expect(onFire).toHaveBeenCalledTimes(1) + }) + + it('cancelIdleTimer prevents fire', async () => { + const lm = new LifecycleManager({ + provider: fakeProvider(), + bridge: fakeBridge, + }) + const onFire = vi.fn() + lm.armIdleTimer('a', 20, onFire) + lm.cancelIdleTimer('a') + await new Promise((r) => setTimeout(r, 50)) + expect(onFire).not.toHaveBeenCalled() + }) + + it('arming twice cancels prior timer', async () => { + const lm = new LifecycleManager({ + provider: fakeProvider(), + bridge: fakeBridge, + }) + const first = vi.fn() + const second = vi.fn() + lm.armIdleTimer('a', 20, first) + lm.armIdleTimer('a', 20, second) + await new Promise((r) => setTimeout(r, 50)) + expect(first).not.toHaveBeenCalled() + expect(second).toHaveBeenCalled() + }) +}) + +describe('LifecycleManager ensureRunning', () => { + it('forwards to provider.start', async () => { + const fp = fakeProvider() + const lm = new LifecycleManager({ provider: fp, bridge: fakeBridge }) + await lm.ensureRunning({ + agentId: '/x/coding-agent/y', + kind: 'claude', + workspace: { type: 'volume', name: 'w' }, + env: { K: 'v' }, + }) + expect(fp.starts).toHaveLength(1) + expect(fp.starts[0]!.agentId).toBe('/x/coding-agent/y') + }) +}) + +describe('LifecycleManager.startedAtMs', () => { + it('captures a timestamp at construction', () => { + const before = Date.now() + const lm = new LifecycleManager({ + provider: fakeProvider(), + bridge: fakeBridge, + }) + const after = Date.now() + expect(lm.startedAtMs).toBeGreaterThanOrEqual(before) + expect(lm.startedAtMs).toBeLessThanOrEqual(after) + }) +}) +``` + +- [ ] **Step 2: Run the test, verify it fails** + +``` +pnpm -C packages/coding-agents test test/unit/lifecycle-manager.test.ts +``` + +Expect: FAIL on module-not-found. + +- [ ] **Step 3: Write `src/lifecycle-manager.ts`** + +```ts +import { log } from './log' +import type { + Bridge, + RecoveredSandbox, + SandboxInstance, + SandboxProvider, + SandboxSpec, +} from './types' + +export interface LifecycleManagerDeps { + provider: SandboxProvider + bridge: Bridge +} + +export class LifecycleManager { + readonly provider: SandboxProvider + readonly bridge: Bridge + /** Wall-clock ms captured at construction. Used to detect orphan runs. */ + readonly startedAtMs: number + + private readonly idleTimers = new Map() + private readonly pinCounts = new Map() + + constructor(deps: LifecycleManagerDeps) { + this.provider = deps.provider + this.bridge = deps.bridge + this.startedAtMs = Date.now() + } + + // ── sandbox lifecycle ── + + async ensureRunning(spec: SandboxSpec): Promise { + return this.provider.start(spec) + } + + async stop(agentId: string): Promise { + this.cancelIdleTimer(agentId) + // The provider.destroy/stop interface is keyed by instanceId, not agentId. + // We rely on provider.destroy(agentId) which finds + removes by label. + await this.provider.destroy(agentId).catch((err) => { + log.warn( + { err, agentId }, + 'lifecycleManager.stop: provider.destroy failed' + ) + }) + } + + async destroy(agentId: string): Promise { + await this.stop(agentId) + this.pinCounts.delete(agentId) + } + + async adoptRunningContainers(): Promise> { + return this.provider.recover() + } + + // ── idle timer ── + + armIdleTimer(agentId: string, ms: number, onFire: () => void): void { + this.cancelIdleTimer(agentId) + const handle = setTimeout(() => { + this.idleTimers.delete(agentId) + try { + onFire() + } catch (err) { + log.warn({ err, agentId }, 'idle timer onFire threw') + } + }, ms) + this.idleTimers.set(agentId, handle) + } + + cancelIdleTimer(agentId: string): void { + const handle = this.idleTimers.get(agentId) + if (handle) { + clearTimeout(handle) + this.idleTimers.delete(agentId) + } + } + + // ── pin refcount ── + + pin(agentId: string): { count: number } { + const next = (this.pinCounts.get(agentId) ?? 0) + 1 + this.pinCounts.set(agentId, next) + if (next === 1) this.cancelIdleTimer(agentId) + return { count: next } + } + + release(agentId: string): { count: number } { + const cur = this.pinCounts.get(agentId) ?? 0 + const next = Math.max(0, cur - 1) + if (next === 0) this.pinCounts.delete(agentId) + else this.pinCounts.set(agentId, next) + return { count: next } + } + + pinCount(agentId: string): number { + return this.pinCounts.get(agentId) ?? 0 + } + + resetPinCount(agentId: string): void { + this.pinCounts.delete(agentId) + } +} +``` + +- [ ] **Step 4: Run the test, verify it passes** + +``` +pnpm -C packages/coding-agents test test/unit/lifecycle-manager.test.ts +``` + +Expect: PASS. + +- [ ] **Step 5: Commit** + +``` +git add packages/coding-agents/src/lifecycle-manager.ts packages/coding-agents/test/unit/lifecycle-manager.test.ts +git commit -m "feat(coding-agents): LifecycleManager with idle timer and pin refcount" +``` + +--- + +## Phase 2 — Entity (sequential) + +### Task 2.1 — Entity handler + +**Files:** + +- Create: `packages/coding-agents/src/entity/handler.ts` +- Create: `packages/coding-agents/test/unit/entity-handler.test.ts` + +**Constraints:** + +- The handler is a function `makeCodingAgentHandler(lm, wr, options)` returning an async `(ctx, wake) => void`. +- `options: { defaults: { idleTimeoutMs, coldBootBudgetMs, runTimeoutMs }, env: () => Record }`. +- The handler reads/writes the StreamDB pattern: `ctx.db.collections.X.get`, `ctx.db.actions.X_insert/X_update`. +- Inbox messages: pending messages are ones with `key > sessionMeta.lastInboxKey`. Slice A reuses `sessionMeta` to track this since we don't have a separate `cursorState`. Add a `lastInboxKey?: string` field. +- Reconcile rules from spec table apply on every entry (after first-wake init). + +- [ ] **Step 1: Add `lastInboxKey` to the meta schema** + +Modify `packages/coding-agents/src/entity/collections.ts`. Add `lastInboxKey: z.string().optional()` to `sessionMetaRowSchema`: + +```ts +export const sessionMetaRowSchema = z.object({ + // ... existing fields ... + lastInboxKey: z.string().optional(), +}) +``` + +- [ ] **Step 2: Write the failing test** + +```ts +// test/unit/entity-handler.test.ts +import { describe, it, expect, vi, beforeEach } from 'vitest' +import { z } from 'zod' +import { makeCodingAgentHandler } from '../../src/entity/handler' +import { LifecycleManager } from '../../src/lifecycle-manager' +import { WorkspaceRegistry } from '../../src/workspace-registry' +import type { + Bridge, + RunTurnArgs, + RunTurnResult, + SandboxInstance, + SandboxProvider, + SandboxSpec, +} from '../../src/types' + +// ── Fakes ── + +interface InboxRow { + key: string + payload?: unknown + message_type?: string +} + +interface CollectionStub { + rows: Map + get(k: string): any + toArray: Array +} + +function makeCollection(): CollectionStub { + const rows = new Map() + return { + rows, + get(k: string) { + return rows.get(k) + }, + get toArray(): Array { + return Array.from(rows.values()) + }, + } +} + +function makeFakeCtx(opts: { + entityUrl: string + args?: Record + inbox?: Array + meta?: any + runs?: Array +}) { + const sessionMeta = makeCollection() + const runs = makeCollection() + const events = makeCollection() + const lifecycle = makeCollection() + const inbox = makeCollection() + + if (opts.meta) sessionMeta.rows.set('current', opts.meta) + for (const r of opts.runs ?? []) runs.rows.set(r.key, r) + for (const i of opts.inbox ?? []) inbox.rows.set(i.key, i) + + const recordedRuns: Array<{ + key: string + status?: string + response?: string + }> = [] + let runCounter = 0 + + const ctx: any = { + entityUrl: opts.entityUrl, + entityType: 'coding-agent', + args: opts.args ?? {}, + tags: {}, + firstWake: false, + db: { + collections: { sessionMeta, runs, events, lifecycle, inbox }, + actions: { + sessionMeta_insert: ({ row }: { row: any }) => + sessionMeta.rows.set(row.key, row), + sessionMeta_update: ({ + key, + updater, + }: { + key: string + updater: (d: any) => void + }) => { + const cur = sessionMeta.rows.get(key) + if (cur) updater(cur) + }, + runs_insert: ({ row }: { row: any }) => runs.rows.set(row.key, row), + runs_update: ({ + key, + updater, + }: { + key: string + updater: (d: any) => void + }) => { + const cur = runs.rows.get(key) + if (cur) updater(cur) + }, + events_insert: ({ row }: { row: any }) => events.rows.set(row.key, row), + lifecycle_insert: ({ row }: { row: any }) => + lifecycle.rows.set(row.key, row), + }, + }, + recordRun() { + const key = `run-${++runCounter}` + const ent = { key, status: undefined as string | undefined, response: '' } + recordedRuns.push(ent) + return { + key, + end({ status }: { status: string }) { + ent.status = status + }, + attachResponse(text: string) { + ent.response += text + }, + } + }, + setTag: () => Promise.resolve(), + send: vi.fn(), + } + + return { ctx, recordedRuns } +} + +function makeFakeProvider( + initialStatus: 'running' | 'stopped' | 'unknown' = 'stopped' +) { + const stub: SandboxInstance = { + instanceId: 'inst-1', + agentId: '', + workspaceMount: '/workspace', + async exec() { + throw new Error('not used') + }, + } + const fp: any = { + name: 'fake', + statusReturn: initialStatus, + async start(spec: SandboxSpec): Promise { + return { ...stub, agentId: spec.agentId } + }, + async stop(_id: string) {}, + async destroy(_id: string) {}, + async status() { + return fp.statusReturn + }, + async recover() { + return [] + }, + } + return fp +} + +describe('entity handler — first-wake init', () => { + it('seeds sessionMeta when none exists, using args', async () => { + const lm = new LifecycleManager({ + provider: makeFakeProvider(), + bridge: { + async runTurn() { + return { exitCode: 0 } + }, + }, + }) + const wr = new WorkspaceRegistry() + const handler = makeCodingAgentHandler(lm, wr, { + defaults: { + idleTimeoutMs: 1000, + coldBootBudgetMs: 5000, + runTimeoutMs: 5000, + }, + env: () => ({}), + }) + + const { ctx } = makeFakeCtx({ + entityUrl: '/test/coding-agent/x', + args: { + kind: 'claude', + workspace: { type: 'volume', name: 'w' }, + }, + }) + + await handler(ctx, { type: 'message_received' } as any) + + const meta = ctx.db.collections.sessionMeta.get('current') + expect(meta).toBeDefined() + expect(meta.status).toBe('cold') + expect(meta.kind).toBe('claude') + expect(meta.workspaceIdentity).toBe('volume:w') + expect(meta.pinned).toBe(false) + }) +}) + +describe('entity handler — pin/release', () => { + it('pin sets pinned=true and cancels timer', async () => { + const lm = new LifecycleManager({ + provider: makeFakeProvider('running'), + bridge: { + async runTurn() { + return { exitCode: 0 } + }, + }, + }) + const wr = new WorkspaceRegistry() + const handler = makeCodingAgentHandler(lm, wr, { + defaults: { + idleTimeoutMs: 1000, + coldBootBudgetMs: 5000, + runTimeoutMs: 5000, + }, + env: () => ({}), + }) + const meta = { + key: 'current', + status: 'idle', + kind: 'claude', + pinned: false, + workspaceIdentity: 'volume:w', + workspaceSpec: { type: 'volume', name: 'w' }, + idleTimeoutMs: 1000, + keepWarm: false, + } + const { ctx } = makeFakeCtx({ + entityUrl: '/t/coding-agent/x', + meta, + inbox: [{ key: 'i1', message_type: 'pin' }], + }) + await handler(ctx, { type: 'message_received' } as any) + expect(ctx.db.collections.sessionMeta.get('current').pinned).toBe(true) + expect(lm.pinCount('/t/coding-agent/x')).toBe(1) + }) +}) + +describe('entity handler — reconcile orphan run', () => { + it('marks orphan run failed when meta=running and run.startedAt < lm.startedAtMs', async () => { + const lm = new LifecycleManager({ + provider: makeFakeProvider('stopped'), + bridge: { + async runTurn() { + return { exitCode: 0 } + }, + }, + }) + const wr = new WorkspaceRegistry() + const handler = makeCodingAgentHandler(lm, wr, { + defaults: { + idleTimeoutMs: 1000, + coldBootBudgetMs: 5000, + runTimeoutMs: 5000, + }, + env: () => ({}), + }) + const oldStart = lm.startedAtMs - 10_000 + const meta = { + key: 'current', + status: 'running', + kind: 'claude', + pinned: false, + workspaceIdentity: 'volume:w', + workspaceSpec: { type: 'volume', name: 'w' }, + idleTimeoutMs: 1000, + keepWarm: false, + instanceId: 'old-inst', + } + const orphanRun = { + key: 'run-old', + startedAt: oldStart, + status: 'running', + promptInboxKey: 'i0', + } + const { ctx } = makeFakeCtx({ + entityUrl: '/t/coding-agent/x', + meta, + runs: [orphanRun], + }) + await handler(ctx, { type: 'message_received' } as any) + const updated = ctx.db.collections.runs.get('run-old') + expect(updated.status).toBe('failed') + expect(updated.finishReason).toBe('orphaned') + expect(ctx.db.collections.sessionMeta.get('current').status).toBe('cold') + }) +}) + +describe('entity handler — processPrompt happy path', () => { + it('runs a turn, records events, ends run completed', async () => { + const events: Array = [ + { type: 'session_init', sessionId: 'abc', ts: 1 }, + { type: 'assistant_message', text: 'hello', ts: 2 }, + ] + const bridge: Bridge = { + async runTurn(args: RunTurnArgs): Promise { + for (const e of events) args.onEvent(e as any) + return { exitCode: 0, finalText: 'hello' } + }, + } + const lm = new LifecycleManager({ + provider: makeFakeProvider('stopped'), + bridge, + }) + const wr = new WorkspaceRegistry() + const handler = makeCodingAgentHandler(lm, wr, { + defaults: { + idleTimeoutMs: 1000, + coldBootBudgetMs: 5000, + runTimeoutMs: 5000, + }, + env: () => ({ ANTHROPIC_API_KEY: 'sk-test' }), + }) + const meta = { + key: 'current', + status: 'cold', + kind: 'claude', + pinned: false, + workspaceIdentity: 'volume:w', + workspaceSpec: { type: 'volume', name: 'w' }, + idleTimeoutMs: 1000, + keepWarm: false, + } + const { ctx, recordedRuns } = makeFakeCtx({ + entityUrl: '/t/coding-agent/x', + meta, + inbox: [{ key: 'i1', message_type: 'prompt', payload: { text: 'hi' } }], + }) + await handler(ctx, { type: 'message_received' } as any) + + expect(recordedRuns).toHaveLength(1) + expect(recordedRuns[0]!.status).toBe('completed') + expect(recordedRuns[0]!.response).toBe('hello') + + const finalMeta = ctx.db.collections.sessionMeta.get('current') + expect(finalMeta.status).toBe('idle') + + const runs = Array.from(ctx.db.collections.runs.rows.values()) + expect(runs).toHaveLength(1) + expect((runs[0] as any).status).toBe('completed') + + const eventRows = Array.from(ctx.db.collections.events.rows.values()) + expect(eventRows).toHaveLength(2) + }) +}) +``` + +- [ ] **Step 3: Run the test, verify it fails** + +``` +pnpm -C packages/coding-agents test test/unit/entity-handler.test.ts +``` + +Expect: FAIL on missing module. + +- [ ] **Step 4: Write `src/entity/handler.ts`** + +```ts +import type { NormalizedEvent } from 'agent-session-protocol' +import { log } from '../log' +import { WorkspaceRegistry } from '../workspace-registry' +import type { LifecycleManager } from '../lifecycle-manager' +import type { + RunRow, + SessionMetaRow, + EventRow, + LifecycleRow, +} from './collections' +import { promptMessageSchema } from './messages' + +export interface CodingAgentHandlerOptions { + defaults: { + idleTimeoutMs: number + coldBootBudgetMs: number + runTimeoutMs: number + } + /** Called per-turn to source CLI env (e.g. ANTHROPIC_API_KEY). */ + env: () => Record +} + +interface InboxRow { + key: string + payload?: unknown + message_type?: string +} + +const NS_MAX = String(Number.MAX_SAFE_INTEGER).length + +function nextRunId(existing: ReadonlyArray<{ key: string }>): string { + // Deterministic: run-N where N = count + 1 + return `run-${existing.length + 1}` +} + +function eventKey(runId: string, seq: number): string { + return `${runId}:${String(seq).padStart(NS_MAX, '0')}` +} + +function lifecycleKey(label: string): string { + return `${label}:${Date.now()}-${Math.floor(Math.random() * 1000)}` +} + +function raceTimeout(p: Promise, ms: number): Promise { + return new Promise((resolve, reject) => { + const handle = setTimeout(() => { + const e = new Error('TimeoutError') + ;(e as any).name = 'TimeoutError' + reject(e) + }, ms) + p.then( + (v) => { + clearTimeout(handle) + resolve(v) + }, + (err) => { + clearTimeout(handle) + reject(err) + } + ) + }) +} + +export function makeCodingAgentHandler( + lm: LifecycleManager, + wr: WorkspaceRegistry, + options: CodingAgentHandlerOptions +) { + return async function handleCodingAgentEntity( + ctx: any, + _wake: any + ): Promise { + const agentId = ctx.entityUrl as string + const sessionMetaCol = ctx.db.collections.sessionMeta + const runsCol = ctx.db.collections.runs + const eventsCol = ctx.db.collections.events + const lifecycleCol = ctx.db.collections.lifecycle + const inboxCol = ctx.db.collections.inbox + + // ─── 1) FIRST-WAKE INIT ──────────────────────────────────────────────── + + let meta = sessionMetaCol.get('current') as SessionMetaRow | undefined + if (!meta) { + const args = ctx.args as { + kind?: 'claude' + workspace?: any + lifecycle?: { idleTimeoutMs?: number; keepWarm?: boolean } + } + const ws = args.workspace ?? { type: 'volume' } + const resolved = await WorkspaceRegistry.resolveIdentity(agentId, ws) + const idleTimeoutMs = + args.lifecycle?.idleTimeoutMs ?? options.defaults.idleTimeoutMs + const keepWarm = args.lifecycle?.keepWarm ?? false + const initial: SessionMetaRow = { + key: 'current', + status: 'cold', + kind: args.kind ?? 'claude', + pinned: false, + workspaceIdentity: resolved.identity, + workspaceSpec: resolved.resolved, + idleTimeoutMs, + keepWarm, + } + ctx.db.actions.sessionMeta_insert({ row: initial }) + wr.register(resolved.identity, agentId) + meta = initial + } + + if (meta.status === 'destroyed') { + // Tombstoned. Ignore everything. + return + } + + // ─── 2) RECONCILE ────────────────────────────────────────────────────── + + const providerStatus = await lm.provider.status(agentId) + const openRun = (runsCol.toArray as Array).find( + (r) => r.status === 'running' + ) + const isOrphaned = openRun && openRun.startedAt < lm.startedAtMs + + if (meta.status === 'running' && providerStatus !== 'running') { + if (openRun) { + ctx.db.actions.runs_update({ + key: openRun.key, + updater: (d: RunRow) => { + d.status = 'failed' + d.finishReason = 'orphaned' + d.endedAt = Date.now() + }, + }) + } + ctx.db.actions.lifecycle_insert({ + row: { + key: lifecycleKey('orphan'), + ts: Date.now(), + event: 'orphan.detected', + } satisfies LifecycleRow, + }) + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'cold' + d.instanceId = undefined + }, + }) + meta = sessionMetaCol.get('current')! + } else if ( + meta.status === 'running' && + providerStatus === 'running' && + isOrphaned + ) { + ctx.db.actions.runs_update({ + key: openRun!.key, + updater: (d: RunRow) => { + d.status = 'failed' + d.finishReason = 'orphaned' + d.endedAt = Date.now() + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: lifecycleKey('orphan'), + ts: Date.now(), + event: 'orphan.detected', + } satisfies LifecycleRow, + }) + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'idle' + }, + }) + meta = sessionMetaCol.get('current')! + } else if (meta.status === 'idle' && providerStatus === 'stopped') { + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'cold' + d.instanceId = undefined + }, + }) + meta = sessionMetaCol.get('current')! + } else if ( + (meta.status === 'starting' || meta.status === 'stopping') && + providerStatus !== 'running' + ) { + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'cold' + }, + }) + meta = sessionMetaCol.get('current')! + } else if ( + (meta.status === 'starting' || meta.status === 'stopping') && + providerStatus === 'running' + ) { + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'idle' + }, + }) + meta = sessionMetaCol.get('current')! + } + + // ─── 3) PROCESS PENDING INBOX ────────────────────────────────────────── + + const inboxRows = (inboxCol.toArray as Array) + .slice() + .sort((a, b) => (a.key < b.key ? -1 : a.key > b.key ? 1 : 0)) + const lastKey = meta.lastInboxKey ?? '' + const pending = inboxRows.filter((m) => m.key > lastKey) + + for (const inboxMsg of pending) { + try { + await dispatchInboxMessage(ctx, lm, wr, options, inboxMsg) + } catch (err) { + log.error({ err, inboxMsg }, 'coding-agent handler dispatch threw') + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'error' + d.lastError = err instanceof Error ? err.message : String(err) + }, + }) + } + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.lastInboxKey = inboxMsg.key + }, + }) + meta = sessionMetaCol.get('current')! + if (meta.status === 'destroyed') return + } + } +} + +async function dispatchInboxMessage( + ctx: any, + lm: LifecycleManager, + wr: WorkspaceRegistry, + options: CodingAgentHandlerOptions, + inboxMsg: InboxRow +): Promise { + const type = inboxMsg.message_type ?? 'prompt' + switch (type) { + case 'prompt': + return processPrompt(ctx, lm, wr, options, inboxMsg) + case 'pin': + return processPin(ctx, lm) + case 'release': + return processRelease(ctx, lm) + case 'stop': + return processStop(ctx, lm) + case 'destroy': + return processDestroy(ctx, lm, wr) + default: + log.warn({ type }, 'coding-agent: unknown inbox message type') + } +} + +async function processPrompt( + ctx: any, + lm: LifecycleManager, + wr: WorkspaceRegistry, + options: CodingAgentHandlerOptions, + inboxMsg: InboxRow +): Promise { + const parsed = promptMessageSchema.safeParse(inboxMsg.payload) + if (!parsed.success) return + const promptText = parsed.data.text + const agentId = ctx.entityUrl as string + const sessionMetaCol = ctx.db.collections.sessionMeta + const runsCol = ctx.db.collections.runs + const eventsCol = ctx.db.collections.events + const lifecycleCol = ctx.db.collections.lifecycle + + let meta = sessionMetaCol.get('current') as SessionMetaRow + + // Cold-boot: ensure sandbox up + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'starting' + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: `boot:${Date.now()}`, + ts: Date.now(), + event: 'sandbox.starting', + } satisfies LifecycleRow, + }) + + let sandbox + try { + sandbox = await raceTimeout( + lm.ensureRunning({ + agentId, + kind: meta.kind, + workspace: meta.workspaceSpec, + env: options.env(), + }), + options.defaults.coldBootBudgetMs + ) + } catch (err) { + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'error' + d.lastError = err instanceof Error ? err.message : String(err) + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: `boot:${Date.now()}`, + ts: Date.now(), + event: 'sandbox.failed', + detail: err instanceof Error ? err.message : String(err), + } satisfies LifecycleRow, + }) + return + } + + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'idle' + d.instanceId = sandbox.instanceId + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: `boot:${Date.now()}`, + ts: Date.now(), + event: 'sandbox.started', + } satisfies LifecycleRow, + }) + + meta = sessionMetaCol.get('current')! + const releaseLease = await wr.acquire(meta.workspaceIdentity) + try { + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'running' + d.currentPromptInboxKey = inboxMsg.key + }, + }) + + const recordedRun = ctx.recordRun() + const runId = recordedRun.key + ctx.db.actions.runs_insert({ + row: { + key: runId, + startedAt: Date.now(), + status: 'running', + promptInboxKey: inboxMsg.key, + } satisfies RunRow, + }) + + let seq = 0 + let finalText: string | undefined + try { + const result = await raceTimeout( + lm.bridge.runTurn({ + sandbox, + kind: meta.kind, + prompt: promptText, + onEvent: (e: NormalizedEvent) => { + ctx.db.actions.events_insert({ + row: { + key: eventKey(runId, seq), + runId, + seq, + ts: Date.now(), + type: e.type, + payload: e as unknown as Record, + } satisfies EventRow, + }) + seq++ + }, + }), + options.defaults.runTimeoutMs + ) + finalText = result.finalText + ctx.db.actions.runs_update({ + key: runId, + updater: (d: RunRow) => { + d.status = 'completed' + d.endedAt = Date.now() + d.responseText = finalText + }, + }) + if (finalText) recordedRun.attachResponse(finalText) + recordedRun.end({ status: 'completed' }) + } catch (err) { + const reason = + err instanceof Error && err.name === 'TimeoutError' + ? 'timeout' + : `cli-exit:${(err instanceof Error ? err.message : String(err)).slice(0, 200)}` + ctx.db.actions.runs_update({ + key: runId, + updater: (d: RunRow) => { + d.status = 'failed' + d.endedAt = Date.now() + d.finishReason = reason + }, + }) + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'error' + d.lastError = err instanceof Error ? err.message : String(err) + }, + }) + recordedRun.end({ status: 'failed', finishReason: reason }) + return + } + + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'idle' + d.currentPromptInboxKey = undefined + }, + }) + + if (!meta.keepWarm && lm.pinCount(agentId) === 0) { + lm.armIdleTimer(agentId, meta.idleTimeoutMs, () => { + // Fire-and-forget: provider.destroy is keyed by agentId. + void lm.provider.destroy(agentId).catch((err) => { + log.warn({ err, agentId }, 'idle stop failed') + }) + }) + } + } finally { + releaseLease() + } +} + +function processPin(ctx: any, lm: LifecycleManager): void { + const agentId = ctx.entityUrl as string + const { count } = lm.pin(agentId) + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.pinned = true + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: `pin:${Date.now()}`, + ts: Date.now(), + event: 'pin', + detail: `count=${count}`, + } satisfies LifecycleRow, + }) +} + +function processRelease(ctx: any, lm: LifecycleManager): void { + const agentId = ctx.entityUrl as string + const { count } = lm.release(agentId) + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.pinned = count > 0 + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: `release:${Date.now()}`, + ts: Date.now(), + event: 'release', + detail: `count=${count}`, + } satisfies LifecycleRow, + }) + if (count === 0) { + const meta = ctx.db.collections.sessionMeta.get('current') as SessionMetaRow + if (!meta.keepWarm && meta.status === 'idle') { + lm.armIdleTimer(agentId, meta.idleTimeoutMs, () => { + void lm.provider.destroy(agentId).catch(() => undefined) + }) + } + } +} + +async function processStop(ctx: any, lm: LifecycleManager): Promise { + const agentId = ctx.entityUrl as string + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'stopping' + }, + }) + await lm.stop(agentId) + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'cold' + d.instanceId = undefined + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: `stop:${Date.now()}`, + ts: Date.now(), + event: 'sandbox.stopped', + } satisfies LifecycleRow, + }) +} + +async function processDestroy( + ctx: any, + lm: LifecycleManager, + wr: WorkspaceRegistry +): Promise { + const agentId = ctx.entityUrl as string + const meta = ctx.db.collections.sessionMeta.get('current') as SessionMetaRow + await lm.destroy(agentId) + if (meta) wr.release(meta.workspaceIdentity, agentId) + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d: SessionMetaRow) => { + d.status = 'destroyed' + d.instanceId = undefined + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: `destroy:${Date.now()}`, + ts: Date.now(), + event: 'sandbox.stopped', + detail: 'destroyed', + } satisfies LifecycleRow, + }) +} +``` + +- [ ] **Step 5: Run the test, verify it passes** + +``` +pnpm -C packages/coding-agents test test/unit/entity-handler.test.ts +``` + +Expect: PASS (4 tests). + +- [ ] **Step 6: Run full unit test suite to confirm no regressions** + +``` +pnpm -C packages/coding-agents test +``` + +Expect: all unit tests pass. + +- [ ] **Step 7: Commit** + +``` +git add packages/coding-agents/src/entity/handler.ts packages/coding-agents/src/entity/collections.ts packages/coding-agents/test/unit/entity-handler.test.ts +git commit -m "feat(coding-agents): entity handler with reconcile, prompt/pin/release/stop/destroy" +``` + +--- + +### Task 2.2 — `registerCodingAgent` + +**Files:** + +- Create: `packages/coding-agents/src/entity/register.ts` +- Modify: `packages/coding-agents/src/index.ts` + +- [ ] **Step 1: Write `src/entity/register.ts`** + +```ts +import type { EntityRegistry } from '@electric-ax/agents-runtime' +import { LifecycleManager } from '../lifecycle-manager' +import { WorkspaceRegistry } from '../workspace-registry' +import { SLICE_A_DEFAULTS } from '../types' +import type { Bridge, SandboxProvider } from '../types' +import { + CODING_AGENT_EVENTS_COLLECTION_TYPE, + CODING_AGENT_LIFECYCLE_COLLECTION_TYPE, + CODING_AGENT_RUNS_COLLECTION_TYPE, + CODING_AGENT_SESSION_META_COLLECTION_TYPE, + eventRowSchema, + lifecycleRowSchema, + runRowSchema, + sessionMetaRowSchema, +} from './collections' +import { + destroyMessageSchema, + pinMessageSchema, + promptMessageSchema, + releaseMessageSchema, + stopMessageSchema, +} from './messages' +import { makeCodingAgentHandler } from './handler' +import { z } from 'zod' + +export interface RegisterCodingAgentDeps { + provider: SandboxProvider + bridge: Bridge + /** Override defaults; used by tests. */ + defaults?: Partial<{ + idleTimeoutMs: number + coldBootBudgetMs: number + runTimeoutMs: number + }> + /** Per-turn env supplier. Defaults to forwarding ANTHROPIC_API_KEY from process.env. */ + env?: () => Record +} + +const creationArgsSchema = z.object({ + kind: z.enum(['claude']).optional(), + workspace: z + .union([ + z.object({ + type: z.literal('volume'), + name: z.string().optional(), + }), + z.object({ + type: z.literal('bindMount'), + hostPath: z.string(), + }), + ]) + .optional(), + lifecycle: z + .object({ + idleTimeoutMs: z.number().optional(), + keepWarm: z.boolean().optional(), + }) + .optional(), +}) + +export function registerCodingAgent( + registry: EntityRegistry, + deps: RegisterCodingAgentDeps +): void { + const lm = new LifecycleManager(deps) + const wr = new WorkspaceRegistry() + const defaults = { + idleTimeoutMs: + deps.defaults?.idleTimeoutMs ?? SLICE_A_DEFAULTS.idleTimeoutMs, + coldBootBudgetMs: + deps.defaults?.coldBootBudgetMs ?? SLICE_A_DEFAULTS.coldBootBudgetMs, + runTimeoutMs: deps.defaults?.runTimeoutMs ?? SLICE_A_DEFAULTS.runTimeoutMs, + } + const env = + deps.env ?? + (() => { + const out: Record = {} + const k = process.env.ANTHROPIC_API_KEY + if (k) out.ANTHROPIC_API_KEY = k + return out + }) + + registry.define('coding-agent', { + description: + 'Runs a Claude Code CLI session inside a Docker sandbox. Manages lifecycle (cold/idle/running) and workspace lease.', + creationSchema: creationArgsSchema, + inboxSchemas: { + prompt: promptMessageSchema, + pin: pinMessageSchema, + release: releaseMessageSchema, + stop: stopMessageSchema, + destroy: destroyMessageSchema, + }, + state: { + sessionMeta: { + schema: sessionMetaRowSchema, + type: CODING_AGENT_SESSION_META_COLLECTION_TYPE, + primaryKey: 'key', + }, + runs: { + schema: runRowSchema, + type: CODING_AGENT_RUNS_COLLECTION_TYPE, + primaryKey: 'key', + }, + events: { + schema: eventRowSchema, + type: CODING_AGENT_EVENTS_COLLECTION_TYPE, + primaryKey: 'key', + }, + lifecycle: { + schema: lifecycleRowSchema, + type: CODING_AGENT_LIFECYCLE_COLLECTION_TYPE, + primaryKey: 'key', + }, + }, + handler: makeCodingAgentHandler(lm, wr, { defaults, env }), + }) +} + +/** Test-only accessor for asserting workspace registry state from outside. */ +export interface CodingAgentInternals { + lifecycleManager: LifecycleManager + workspaceRegistry: WorkspaceRegistry +} +``` + +- [ ] **Step 2: Update `src/index.ts`** + +Replace contents: + +```ts +export type { + CodingAgentKind, + SandboxSpec, + ExecRequest, + ExecHandle, + SandboxInstance, + SandboxProvider, + RecoveredSandbox, + RunTurnArgs, + RunTurnResult, + Bridge, + SpawnCodingAgentOptions, + RunSummary, + CodingAgentStatus, +} from './types' +export { LocalDockerProvider } from './providers/local-docker' +export { StdioBridge } from './bridge/stdio-bridge' +export { LifecycleManager } from './lifecycle-manager' +export { WorkspaceRegistry } from './workspace-registry' +export { + registerCodingAgent, + type RegisterCodingAgentDeps, +} from './entity/register' +export { + CODING_AGENT_SESSION_META_COLLECTION_TYPE, + CODING_AGENT_RUNS_COLLECTION_TYPE, + CODING_AGENT_EVENTS_COLLECTION_TYPE, + CODING_AGENT_LIFECYCLE_COLLECTION_TYPE, +} from './entity/collections' +``` + +- [ ] **Step 3: Run typecheck** + +``` +pnpm -C packages/coding-agents typecheck +``` + +Expect: clean. + +- [ ] **Step 4: Run all unit tests** + +``` +pnpm -C packages/coding-agents test +``` + +Expect: all pass. + +- [ ] **Step 5: Commit** + +``` +git add packages/coding-agents/src/entity/register.ts packages/coding-agents/src/index.ts +git commit -m "feat(coding-agents): registerCodingAgent helper" +``` + +--- + +### Task 2.3 — Runtime API surface (`ctx.spawnCodingAgent` / `observeCodingAgent`) + +**Files:** + +- Modify: `packages/agents-runtime/src/types.ts` (add types and HandlerContext methods) +- Modify: `packages/agents-runtime/src/context-factory.ts` (add impl) + +- [ ] **Step 1: Read the existing `useCodingAgent` impl as a reference** + +Already known location: `packages/agents-runtime/src/context-factory.ts:561-629`. New helpers will be placed alongside it. + +- [ ] **Step 2: Add types in `packages/agents-runtime/src/types.ts`** + +Find the existing `CodingSessionHandle` interface (~line 800). Insert these new types **after** it: + +```ts +// ─── Coding Agent (Slice A) ─────────────────────────────────────────────── + +export type CodingAgentSliceAStatus = + | 'cold' + | 'starting' + | 'idle' + | 'running' + | 'stopping' + | 'error' + | 'destroyed' + +export interface SpawnCodingAgentOptions { + id: string + kind: 'claude' + workspace: + | { type: 'volume'; name?: string } + | { type: 'bindMount'; hostPath: string } + initialPrompt?: string + wake?: { on: 'runFinished'; includeResponse?: boolean } + lifecycle?: { idleTimeoutMs?: number; keepWarm?: boolean } +} + +export interface CodingAgentRunSummary { + runId: string + startedAt: number + endedAt?: number + status: 'running' | 'completed' | 'failed' + promptInboxKey: string + responseText?: string +} + +export interface CodingAgentState { + status: CodingAgentSliceAStatus + pinned: boolean + workspace: { identity: string; sharedRefs: number } + lastError?: string + runs: ReadonlyArray +} + +export interface CodingAgentHandle { + readonly url: string + readonly kind: 'claude' + send(prompt: string): Promise<{ runId: string }> + events(opts?: { since?: 'start' | 'now' }): AsyncIterable + state(): CodingAgentState + pin(): Promise + release(): Promise + stop(): Promise + destroy(): Promise +} +``` + +Then **add to the `HandlerContext` interface** (the one defined ~line 882). Insert these two methods after `useCodingAgent`: + +```ts +/** + * Spawn (or attach to) a `coding-agent` entity that runs a CLI inside a + * Docker sandbox with managed lifecycle (cold/idle/running, idle hibernation, + * pin/release, workspace lease). Requires `registerCodingAgent` to have been + * called on the runtime's registry. + */ +spawnCodingAgent: (opts: SpawnCodingAgentOptions) => Promise +observeCodingAgent: (id: string) => Promise +``` + +- [ ] **Step 3: Implement in `packages/agents-runtime/src/context-factory.ts`** + +Find `async useCodingAgent(...)` (line ~561). Insert these two new methods immediately after it (before `send(...)`): + +```ts + async spawnCodingAgent( + opts: SpawnCodingAgentOptions + ): Promise { + const spawnArgs: Record = { + kind: opts.kind, + workspace: opts.workspace, + } + if (opts.lifecycle !== undefined) spawnArgs.lifecycle = opts.lifecycle + + const initialMessage = + opts.initialPrompt !== undefined + ? { type: 'prompt' as const, payload: { text: opts.initialPrompt } } + : undefined + + const wake: Wake = opts.wake + ? `runFinished` + : `runFinished` + + const entityHandle = await config.doSpawn( + 'coding-agent', + opts.id, + spawnArgs, + { + observe: true, + wake, + ...(initialMessage ? { initialMessage } : {}), + } + ) + + return makeCodingAgentHandle( + config, + entityHandle.url, + entityHandle + ) + }, + async observeCodingAgent(id: string): Promise { + const url = `${entityUrl}/coding-agent/${id}` + const entityHandle = await (config.doObserve as any)({ + sourceType: 'entity', + path: url, + }) + return makeCodingAgentHandle(config, url, entityHandle) + }, +``` + +Then add this helper at the bottom of the same file (above the closing return of `createContextFactory` or whatever exports it — find the right scope by reading file context): + +```ts +function makeCodingAgentHandle( + config: any, + url: string, + entityHandle: any +): CodingAgentHandle { + const sendInbox = ( + payload: unknown, + type: string + ): Promise<{ runId: string }> => { + config.executeSend({ + targetUrl: url, + payload, + type, + }) + // The inbox key isn't known to the caller; surface a synthetic id. + return Promise.resolve({ runId: `run-pending-${Date.now()}` }) + } + + const readMeta = (): any => { + const c = entityHandle.db?.collections?.sessionMeta + return c?.get?.('current') + } + const readRuns = (): Array => { + const c = entityHandle.db?.collections?.runs + if (!c) return [] + const rows = (c as { toArray?: unknown }).toArray + if (!Array.isArray(rows)) return [] + return rows.map((r: any) => ({ + runId: r.key, + startedAt: r.startedAt, + endedAt: r.endedAt, + status: r.status, + promptInboxKey: r.promptInboxKey, + responseText: r.responseText, + })) + } + + return { + url, + kind: 'claude', + send: (text: string) => { + config.executeSend({ + targetUrl: url, + payload: { text }, + type: 'prompt', + }) + return Promise.resolve({ runId: `run-pending-${Date.now()}` }) + }, + pin: () => sendInbox({}, 'pin').then(() => undefined), + release: () => sendInbox({}, 'release').then(() => undefined), + stop: () => sendInbox({}, 'stop').then(() => undefined), + destroy: () => sendInbox({}, 'destroy').then(() => undefined), + state(): CodingAgentState { + const meta = readMeta() + return { + status: meta?.status ?? 'cold', + pinned: meta?.pinned ?? false, + workspace: { + identity: meta?.workspaceIdentity ?? '', + sharedRefs: 1, // server-only state; see Slice A spec + }, + lastError: meta?.lastError, + runs: readRuns(), + } + }, + events(opts?: { since?: 'start' | 'now' }) { + // Slice A: simple async iterator that yields current rows then stops. + // Live tailing is added with the UI in Slice C. + const since = opts?.since ?? 'now' + const c = entityHandle.db?.collections?.events + const rows: Array<{ payload: unknown }> = + c && Array.isArray((c as any).toArray) ? (c as any).toArray : [] + const initial = since === 'start' ? rows.slice() : [] + return (async function* () { + for (const r of initial) { + yield r.payload + } + })() + }, + } +} +``` + +Imports needed at the top of the file (verify they aren't already imported): + +```ts +import type { + SpawnCodingAgentOptions, + CodingAgentHandle, + CodingAgentState, + CodingAgentRunSummary, +} from './types' +``` + +- [ ] **Step 4: Add a runtime unit test** + +Create `packages/agents-runtime/test/spawn-coding-agent.test.ts`: + +```ts +import { describe, it, expect, vi } from 'vitest' +// NOTE: This test calls into the context factory at a low level. The real +// runtime test suite verifies the broader integration. Slice A only asserts +// the desugaring contract. + +import type { CodingAgentHandle, SpawnCodingAgentOptions } from '../src/types' + +describe('ctx.spawnCodingAgent desugaring', () => { + // Lightweight contract test: importing the runtime's types confirms the + // public surface compiles. Runtime-level integration coverage is in + // packages/coding-agents/test/integration/slice-a.test.ts. + it('exports SpawnCodingAgentOptions', () => { + const opts: SpawnCodingAgentOptions = { + id: 'x', + kind: 'claude', + workspace: { type: 'volume' }, + } + expect(opts.kind).toBe('claude') + }) + it('exports CodingAgentHandle shape', () => { + const noopHandle: CodingAgentHandle = { + url: '/x', + kind: 'claude', + send: async () => ({ runId: 'r' }), + events: async function* () {}, + state: () => ({ + status: 'cold', + pinned: false, + workspace: { identity: '', sharedRefs: 1 }, + runs: [], + }), + pin: async () => undefined, + release: async () => undefined, + stop: async () => undefined, + destroy: async () => undefined, + } + expect(noopHandle.kind).toBe('claude') + }) +}) +``` + +- [ ] **Step 5: Run runtime typecheck and tests** + +``` +pnpm -C packages/agents-runtime typecheck +pnpm -C packages/agents-runtime test test/spawn-coding-agent.test.ts +``` + +Expect: clean typecheck; test passes. + +If the file `packages/agents-runtime/test/` doesn't exist or vitest config is different, look at existing tests in `packages/agents-runtime/` for the right path. + +- [ ] **Step 6: Commit** + +``` +git add packages/agents-runtime/src/types.ts packages/agents-runtime/src/context-factory.ts packages/agents-runtime/test/spawn-coding-agent.test.ts +git commit -m "feat(agents-runtime): ctx.spawnCodingAgent / observeCodingAgent typed primitives" +``` + +--- + +## Phase 3 — Server wiring (sequential) + +### Task 3.1 — Bootstrap call + +**Files:** + +- Modify: `packages/agents/src/bootstrap.ts` + +- [ ] **Step 1: Read the existing bootstrap, locate the `registerCodingSession` call** + +The line is `packages/agents/src/bootstrap.ts:119`. Confirm by `grep -n registerCodingSession packages/agents/src/bootstrap.ts`. + +- [ ] **Step 2: Modify `bootstrap.ts`** + +Add imports at the top (next to the existing `registerCodingSession` import): + +```ts +import { + LocalDockerProvider, + StdioBridge, + registerCodingAgent, +} from '@electric-ax/coding-agents' +``` + +After the existing `registerCodingSession(...)` line (line 119), add: + +```ts +registerCodingSession(registry, { defaultWorkingDirectory: cwd }) +typeNames.push(`coder`) + +// NEW for Slice A: +registerCodingAgent(registry, { + provider: new LocalDockerProvider(), + bridge: new StdioBridge(), +}) +typeNames.push(`coding-agent`) +``` + +- [ ] **Step 3: Add `@electric-ax/coding-agents` to `packages/agents/package.json` dependencies** if not already present. + +Check first: + +``` +grep '"@electric-ax/coding-agents"' packages/agents/package.json +``` + +If missing, add to `dependencies`: + +```json + "@electric-ax/coding-agents": "workspace:*", +``` + +Then re-install: + +``` +pnpm install +``` + +- [ ] **Step 4: Verify everything builds** + +``` +pnpm -C packages/agents typecheck +pnpm -C packages/agents-runtime typecheck +pnpm -C packages/coding-agents typecheck +``` + +Expect: all clean. + +- [ ] **Step 5: Run all package unit tests** + +``` +pnpm -C packages/coding-agents test +pnpm -C packages/agents-runtime test +pnpm -C packages/agents test +``` + +Expect: all pass (no regressions in legacy `coder` flows). + +- [ ] **Step 6: Commit** + +``` +git add packages/agents/src/bootstrap.ts packages/agents/package.json pnpm-lock.yaml +git commit -m "feat(agents): wire registerCodingAgent into bootstrap" +``` + +--- + +## Phase 4 — Integration smoke (sequential) + +### Task 4.1 — End-to-end Slice A test + +**Files:** + +- Create: `packages/coding-agents/test/integration/slice-a.test.ts` + +**Validation goals (one test, eight assertions):** + +1. Build/load the test image (existing helper). +2. Spawn the `coding-agent` entity via the runtime registry directly (no full `agents-server`; we drive it with a minimal harness). +3. Send a prompt; assert the `runs` collection ends with `status='completed'`, `responseText` non-empty. +4. Pin; sleep past `idleTimeoutMs=2000`; assert `provider.status` returns `'running'`. +5. Release; sleep past idle; assert `provider.status` returns `'stopped'`. +6. Send another prompt; assert cold-boot path executes; response received. +7. Spawn second agent on same workspace name; concurrently send to both; assert run order via `runs` collection timestamps (lease-serialized). +8. Crash recovery: tear down LM/WR/handler, re-`registerCodingAgent` with the same provider, observe entity state, send prompt; assert the prior `runs` row was reconciled to `failed: orphaned`, new run completes. +9. Destroy; assert `meta.status='destroyed'`, container removed. + +**This is a lot for one test file.** Acceptable: the spec called for one e2e test. Internally, organize it as `describe('Slice A integration', ...)` with one big `it('full flow', ...)` so wall time is amortized over a single image build + sandbox lifecycle. + +The "minimal harness" is the tricky bit. Slice A doesn't need a full `agents-server`; the unit tests already use a fake ctx. For integration, we need real StreamDB collections + the real handler invocation. Two options: + +- **Option A (preferred):** Reuse `packages/agents-runtime/test/` infrastructure if it exposes a test harness. (Read `packages/agents-runtime/test/` to confirm.) +- **Option B:** Write a minimal harness in `test/integration/support/test-runtime.ts` that builds the StreamDB + executes the handler. + +If neither is feasible within this task's time budget, the implementer should fall back to a reduced test that exercises the entity handler against fake-but-real-enough collections (with a real Docker provider and real bridge), and document this as a Phase 5 follow-up. + +- [ ] **Step 1: Locate existing runtime test harness** + +``` +ls packages/agents-runtime/test +grep -r 'createRuntimeHandler\|defineEntity' packages/agents-runtime/test/ | head -20 +``` + +If a clean test harness exists (e.g. an in-memory runtime that drives entity handlers end-to-end), use it. If not, proceed with the option B fallback below. + +- [ ] **Step 2: Write the integration test (Option B fallback)** + +```ts +// packages/coding-agents/test/integration/slice-a.test.ts +import { describe, it, expect, beforeAll, afterAll } from 'vitest' +import { + LocalDockerProvider, + StdioBridge, + WorkspaceRegistry, + LifecycleManager, +} from '../../src' +import { makeCodingAgentHandler } from '../../src/entity/handler' +import { + CODING_AGENT_EVENTS_COLLECTION_TYPE, + CODING_AGENT_LIFECYCLE_COLLECTION_TYPE, + CODING_AGENT_RUNS_COLLECTION_TYPE, + CODING_AGENT_SESSION_META_COLLECTION_TYPE, +} from '../../src/entity/collections' +import { buildTestImage, TEST_IMAGE_TAG } from '../support/build-image' +import { loadTestEnv } from '../support/env' + +const SHOULD_RUN = process.env.DOCKER === '1' +const describeMaybe = SHOULD_RUN ? describe : describe.skip + +interface CollectionStub { + rows: Map + get(k: string): any + toArray: Array +} + +function makeCollection(): CollectionStub { + const rows = new Map() + return { + rows, + get(k) { + return rows.get(k) + }, + get toArray() { + return Array.from(rows.values()) + }, + } +} + +interface FakeCtxState { + sessionMeta: CollectionStub + runs: CollectionStub + events: CollectionStub + lifecycle: CollectionStub + inbox: CollectionStub + recordedRuns: Array<{ key: string; status?: string; response: string }> +} + +function makeFakeCtx(entityUrl: string, args: Record) { + const state: FakeCtxState = { + sessionMeta: makeCollection(), + runs: makeCollection(), + events: makeCollection(), + lifecycle: makeCollection(), + inbox: makeCollection(), + recordedRuns: [], + } + let runCounter = 0 + const ctx: any = { + entityUrl, + entityType: 'coding-agent', + args, + tags: {}, + firstWake: false, + db: { + collections: state, + actions: { + sessionMeta_insert: ({ row }: any) => + state.sessionMeta.rows.set(row.key, row), + sessionMeta_update: ({ key, updater }: any) => { + const r = state.sessionMeta.rows.get(key) + if (r) updater(r) + }, + runs_insert: ({ row }: any) => state.runs.rows.set(row.key, row), + runs_update: ({ key, updater }: any) => { + const r = state.runs.rows.get(key) + if (r) updater(r) + }, + events_insert: ({ row }: any) => state.events.rows.set(row.key, row), + lifecycle_insert: ({ row }: any) => + state.lifecycle.rows.set(row.key, row), + }, + }, + recordRun() { + const key = `run-${++runCounter}` + const ent = { key, status: undefined as string | undefined, response: '' } + state.recordedRuns.push(ent) + return { + key, + end({ status }: { status: string }) { + ent.status = status + }, + attachResponse(text: string) { + ent.response += text + }, + } + }, + setTag: () => Promise.resolve(), + send: () => undefined, + } + return { ctx, state } +} + +function pushInbox( + state: FakeCtxState, + key: string, + message_type: string, + payload: any = {} +) { + state.inbox.rows.set(key, { key, message_type, payload }) +} + +describeMaybe('Slice A — full integration', () => { + beforeAll(async () => { + await buildTestImage() + }, 600_000) + + it('spawns, runs prompt, lease-serializes, recovers from crash, destroys', async () => { + const env = loadTestEnv() + const provider = new LocalDockerProvider({ image: TEST_IMAGE_TAG }) + const bridge = new StdioBridge() + const wr = new WorkspaceRegistry() + let lm = new LifecycleManager({ provider, bridge }) + const handler = makeCodingAgentHandler(lm, wr, { + defaults: { + idleTimeoutMs: 2000, + coldBootBudgetMs: 30_000, + runTimeoutMs: 120_000, + }, + env: () => ({ ANTHROPIC_API_KEY: env.ANTHROPIC_API_KEY }), + }) + + const agentA = `/test/coding-agent/a-${Date.now().toString(36)}` + const sharedName = `slice-a-shared-${Date.now().toString(36)}` + const args = { + kind: 'claude', + workspace: { type: 'volume', name: sharedName }, + lifecycle: { idleTimeoutMs: 2000 }, + } + const { ctx: ctxA, state: stateA } = makeFakeCtx(agentA, args) + + // 1) First-wake init + await handler(ctxA, { type: 'message_received' }) + expect(stateA.sessionMeta.get('current').status).toBe('cold') + + // 2) Send prompt; cold boot + run + pushInbox(stateA, 'i1', 'prompt', { + text: 'Reply with the single word: ok', + }) + await handler(ctxA, { type: 'message_received' }) + + const metaA1 = stateA.sessionMeta.get('current') + expect(metaA1.status).toBe('idle') + const runsA = Array.from(stateA.runs.rows.values()) as any[] + expect(runsA).toHaveLength(1) + expect(runsA[0].status).toBe('completed') + expect(runsA[0].responseText?.length ?? 0).toBeGreaterThan(0) + + // 3) Pin + idle wait + pushInbox(stateA, 'i2', 'pin') + await handler(ctxA, { type: 'message_received' }) + expect(stateA.sessionMeta.get('current').pinned).toBe(true) + + await new Promise((r) => setTimeout(r, 2500)) + expect(await provider.status(agentA)).toBe('running') + + // 4) Release + idle wait => sandbox stops + pushInbox(stateA, 'i3', 'release') + await handler(ctxA, { type: 'message_received' }) + await new Promise((r) => setTimeout(r, 2500)) + expect(await provider.status(agentA)).toBe('unknown') + + // 5) Second prompt: cold-boot path + pushInbox(stateA, 'i4', 'prompt', { text: 'Reply: again' }) + await handler(ctxA, { type: 'message_received' }) + const runsA2 = Array.from(stateA.runs.rows.values()) as any[] + expect(runsA2).toHaveLength(2) + expect(runsA2[1].status).toBe('completed') + + // 6) Second agent on same workspace, lease-serialized + const agentB = `/test/coding-agent/b-${Date.now().toString(36)}` + const { ctx: ctxB, state: stateB } = makeFakeCtx(agentB, args) + await handler(ctxB, { type: 'message_received' }) // first-wake init + pushInbox(stateB, 'j1', 'prompt', { text: 'Reply: B' }) + pushInbox(stateA, 'i5', 'prompt', { text: 'Reply: A' }) + await Promise.all([ + handler(ctxA, { type: 'message_received' }), + handler(ctxB, { type: 'message_received' }), + ]) + const runsAFinal = Array.from(stateA.runs.rows.values()) as any[] + const runsBFinal = Array.from(stateB.runs.rows.values()) as any[] + expect(runsAFinal[runsAFinal.length - 1].status).toBe('completed') + expect(runsBFinal[0].status).toBe('completed') + // Lease serialization: A's last run and B's run intervals don't overlap. + const lastA = runsAFinal[runsAFinal.length - 1] + const firstB = runsBFinal[0] + const noOverlap = + lastA.endedAt <= firstB.startedAt || firstB.endedAt <= lastA.startedAt + expect(noOverlap).toBe(true) + + // 7) Crash-recovery sim: re-register LM with the same provider; verify + // a stale running row gets reconciled. + // Manually inject a stale 'running' row predating the new lm. + const oldRunStart = Date.now() - 60_000 + stateA.runs.rows.set('stale', { + key: 'stale', + startedAt: oldRunStart, + status: 'running', + promptInboxKey: 'fake', + } as any) + stateA.sessionMeta.rows.set('current', { + ...stateA.sessionMeta.get('current'), + status: 'running', + }) + const lm2 = new LifecycleManager({ provider, bridge }) + const handler2 = makeCodingAgentHandler(lm2, wr, { + defaults: { + idleTimeoutMs: 2000, + coldBootBudgetMs: 30_000, + runTimeoutMs: 120_000, + }, + env: () => ({ ANTHROPIC_API_KEY: env.ANTHROPIC_API_KEY }), + }) + pushInbox(stateA, 'i6', 'prompt', { text: 'after crash' }) + await handler2(ctxA, { type: 'message_received' }) + expect((stateA.runs.get('stale') as any).status).toBe('failed') + expect((stateA.runs.get('stale') as any).finishReason).toBe('orphaned') + const newRuns = (Array.from(stateA.runs.rows.values()) as any[]).filter( + (r) => r.status === 'completed' && r.key !== 'stale' + ) + expect(newRuns.length).toBeGreaterThan(0) + + // 8) Destroy + pushInbox(stateA, 'i7', 'destroy') + await handler2(ctxA, { type: 'message_received' }) + expect(stateA.sessionMeta.get('current').status).toBe('destroyed') + expect(await provider.status(agentA)).toBe('unknown') + + // Cleanup B + await provider.destroy(agentB).catch(() => undefined) + }, 360_000) +}) +``` + +- [ ] **Step 3: Run the integration test** + +``` +DOCKER=1 pnpm -C packages/coding-agents test test/integration/slice-a.test.ts +``` + +Expect: PASS within ~6 minutes (image cached + 3-4 real claude invocations). + +If it fails, **iterate** (max 5 cycles): + +1. Capture failure output. +2. Form a hypothesis (most likely: timing on idle, lease ordering, image name mismatch, env not piped through). +3. Apply fix. +4. Re-run. + +Common pitfalls: + +- **`provider.status` returns `unknown` (not `stopped`).** Adjust assertion: `expect(['stopped', 'unknown']).toContain(s)`. +- **Lease lock-up due to never-completing first prompt.** Verify ANTHROPIC_API_KEY is being piped (`docker logs ` for the bridge's stderr). +- **Second prompt after pin/release fails because container idle-killed mid-flight.** Increase the wait between events. + +After 5 unsuccessful cycles, write a Phase 5 report describing the blocker and stop. + +- [ ] **Step 4: Run all tests one last time** + +``` +pnpm -C packages/coding-agents test +``` + +Expect: all pass (unit + integration). + +- [ ] **Step 5: Commit** + +``` +git add packages/coding-agents/test/integration/slice-a.test.ts +git commit -m "test(coding-agents): Slice A integration smoke (entity, lifecycle, lease, recovery)" +``` + +--- + +## Phase 5 — Report + +### Task 5.1 — Run report + +**Files:** + +- Create: `docs/superpowers/specs/notes/2026-04-30-coding-agents-slice-a-report.md` + +- [ ] **Step 1: Write report markdown** + +Cover: + +- Validation bar + outcome. +- Per-task: what landed cleanly, what required iteration, fix details. +- Known gaps versus the spec (the two divergences declared up-top: no `onBoot` hook, no `deleteEntityStream`). +- Time + token usage for the run. +- Recommended Slice B priorities (resume + remove-coder + Horton tools). + +- [ ] **Step 2: Commit** + +``` +git add docs/superpowers/specs/notes/2026-04-30-coding-agents-slice-a-report.md +git commit -m "docs(coding-agents): Slice A run report" +``` + +--- + +## Self-review checklist + +- [x] **Spec coverage:** + - Built-in entity → Task 2.1, 2.2 ✓ + - LifecycleManager → Task 1.B ✓ + - WorkspaceRegistry → Task 1.A ✓ + - `ctx.spawnCodingAgent` / `observeCodingAgent` → Task 2.3 ✓ + - Pin/release/stop/destroy → Task 2.1 ✓ + - Crash recovery → Task 2.1 (reconcile rules) + Task 4.1 (validation) ✓ + - Workspace lease serialization → Task 1.A + Task 4.1 (validation) ✓ + - Server bootstrap → Task 3.1 ✓ + - Integration test → Task 4.1 ✓ + - Spec divergences (no onBoot, no deleteEntityStream) declared at plan top ✓ +- [x] **Placeholder scan:** No "TBD", "TODO", "appropriate handling" left in steps. The Phase 4 fallback explicitly admits the harness-design choice may be revisited; that's a known trade-off, not a placeholder. +- [x] **Type consistency:** + - `CodingAgentStatus` includes `'destroyed'` (added because `destroy()` tombstones). + - `SessionMetaRow.lastInboxKey` declared in Task 2.1 Step 1 before being used in handler. + - `CodingAgentHandle.events()` returns `AsyncIterable` in runtime types (Slice A) since the runtime can't depend on `agent-session-protocol` types directly. Documented. +- [x] **Approval:** Pre-approved per user's "implemnt" message. From 2a43456b4ec751c661a30fbe3a5a3cb177156e0c Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 07:26:47 +0100 Subject: [PATCH 013/279] feat(coding-agents): collection + inbox message schemas for coding-agent entity --- .../coding-agents/src/entity/collections.ts | 78 +++++++++++++++++++ packages/coding-agents/src/entity/messages.ts | 11 +++ 2 files changed, 89 insertions(+) create mode 100644 packages/coding-agents/src/entity/collections.ts create mode 100644 packages/coding-agents/src/entity/messages.ts diff --git a/packages/coding-agents/src/entity/collections.ts b/packages/coding-agents/src/entity/collections.ts new file mode 100644 index 0000000000..46fb5722d4 --- /dev/null +++ b/packages/coding-agents/src/entity/collections.ts @@ -0,0 +1,78 @@ +import { z } from 'zod' + +export const CODING_AGENT_SESSION_META_COLLECTION_TYPE = `coding-agent.sessionMeta` +export const CODING_AGENT_RUNS_COLLECTION_TYPE = `coding-agent.runs` +export const CODING_AGENT_EVENTS_COLLECTION_TYPE = `coding-agent.events` +export const CODING_AGENT_LIFECYCLE_COLLECTION_TYPE = `coding-agent.lifecycle` + +export const codingAgentStatusSchema = z.enum([ + `cold`, + `starting`, + `idle`, + `running`, + `stopping`, + `error`, + `destroyed`, +]) +export type CodingAgentStatus = z.infer + +export const sessionMetaRowSchema = z.object({ + key: z.literal(`current`), + status: codingAgentStatusSchema, + kind: z.enum([`claude`]), + pinned: z.boolean(), + workspaceIdentity: z.string(), + workspaceSpec: z.discriminatedUnion(`type`, [ + z.object({ + type: z.literal(`volume`), + name: z.string(), + }), + z.object({ + type: z.literal(`bindMount`), + hostPath: z.string(), + }), + ]), + idleTimeoutMs: z.number(), + keepWarm: z.boolean(), + instanceId: z.string().optional(), + lastError: z.string().optional(), + currentPromptInboxKey: z.string().optional(), +}) +export type SessionMetaRow = z.infer + +export const runRowSchema = z.object({ + key: z.string(), + startedAt: z.number(), + endedAt: z.number().optional(), + status: z.enum([`running`, `completed`, `failed`]), + finishReason: z.string().optional(), + promptInboxKey: z.string(), + responseText: z.string().optional(), +}) +export type RunRow = z.infer + +export const eventRowSchema = z.object({ + key: z.string(), + runId: z.string(), + seq: z.number(), + ts: z.number(), + type: z.string(), + payload: z.looseObject({}), +}) +export type EventRow = z.infer + +export const lifecycleRowSchema = z.object({ + key: z.string(), + ts: z.number(), + event: z.enum([ + `sandbox.starting`, + `sandbox.started`, + `sandbox.stopped`, + `sandbox.failed`, + `pin`, + `release`, + `orphan.detected`, + ]), + detail: z.string().optional(), +}) +export type LifecycleRow = z.infer diff --git a/packages/coding-agents/src/entity/messages.ts b/packages/coding-agents/src/entity/messages.ts new file mode 100644 index 0000000000..cf3be9a1f8 --- /dev/null +++ b/packages/coding-agents/src/entity/messages.ts @@ -0,0 +1,11 @@ +import { z } from 'zod' + +export const promptMessageSchema = z.object({ + text: z.string(), +}) +export const pinMessageSchema = z.object({}).strict() +export const releaseMessageSchema = z.object({}).strict() +export const stopMessageSchema = z.object({}).strict() +export const destroyMessageSchema = z.object({}).strict() + +export type PromptMessage = z.infer From 70e8a95fb7e49a6fd439b2477fc5233cb2ceebd0 Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 07:31:35 +0100 Subject: [PATCH 014/279] feat(coding-agents): add SpawnCodingAgentOptions, RunSummary, defaults --- packages/coding-agents/src/types.ts | 44 +++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/packages/coding-agents/src/types.ts b/packages/coding-agents/src/types.ts index b8f55f2d42..544f3815dc 100644 --- a/packages/coding-agents/src/types.ts +++ b/packages/coding-agents/src/types.ts @@ -1,4 +1,5 @@ import type { NormalizedEvent } from 'agent-session-protocol' +import type { CodingAgentStatus } from './entity/collections' export type CodingAgentKind = `claude` | `codex` @@ -84,3 +85,46 @@ export interface RunTurnResult { export interface Bridge { runTurn(args: RunTurnArgs): Promise } + +// ─── Slice A: SpawnCodingAgentOptions / RunSummary ────────────────────────── + +export interface SpawnCodingAgentOptions { + /** Stable id, scoped to the spawning entity. */ + id: string + /** Slice A: 'claude' only. */ + kind: `claude` + /** + * Workspace mount. Identity is the lease key. + * { type: 'volume', name: 'foo' } → 'volume:foo' + * { type: 'volume' } → 'volume:' + * { type: 'bindMount', hostPath: P } → 'bindMount:' + */ + workspace: + | { type: `volume`; name?: string } + | { type: `bindMount`; hostPath: string } + /** Initial prompt; queued before the first wake. */ + initialPrompt?: string + /** Slice A: 'runFinished' only. */ + wake?: { on: `runFinished`; includeResponse?: boolean } + /** Lifecycle overrides. */ + lifecycle?: { idleTimeoutMs?: number; keepWarm?: boolean } +} + +export interface RunSummary { + runId: string + startedAt: number + endedAt?: number + status: `running` | `completed` | `failed` + promptInboxKey: string + responseText?: string +} + +export type { CodingAgentStatus } + +/** Defaults applied when a SpawnCodingAgentOptions field is omitted. */ +export const SLICE_A_DEFAULTS = { + idleTimeoutMs: 5 * 60_000, + coldBootBudgetMs: 30_000, + runTimeoutMs: 30 * 60_000, + keepWarm: false, +} as const From b31dcb924194c1ca36b82470e6428c53a71269ef Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 07:34:45 +0100 Subject: [PATCH 015/279] feat(coding-agents): WorkspaceRegistry with identity resolution, refcount, mutex --- .../coding-agents/src/workspace-registry.ts | 75 +++++++++++++ .../test/unit/workspace-registry.test.ts | 105 ++++++++++++++++++ 2 files changed, 180 insertions(+) create mode 100644 packages/coding-agents/src/workspace-registry.ts create mode 100644 packages/coding-agents/test/unit/workspace-registry.test.ts diff --git a/packages/coding-agents/src/workspace-registry.ts b/packages/coding-agents/src/workspace-registry.ts new file mode 100644 index 0000000000..bdba388ce0 --- /dev/null +++ b/packages/coding-agents/src/workspace-registry.ts @@ -0,0 +1,75 @@ +import { realpath } from 'node:fs/promises' + +export type ResolvedWorkspaceSpec = + | { type: `volume`; name: string } + | { type: `bindMount`; hostPath: string } + +export class WorkspaceRegistry { + private readonly refsByIdentity = new Map>() + private readonly chainByIdentity = new Map>() + + static async resolveIdentity( + agentId: string, + spec: + | { type: `volume`; name?: string } + | { type: `bindMount`; hostPath: string } + ): Promise<{ identity: string; resolved: ResolvedWorkspaceSpec }> { + if (spec.type === `volume`) { + const name = spec.name ?? agentId + return { + identity: `volume:${name}`, + resolved: { type: `volume`, name }, + } + } + const real = await realpath(spec.hostPath) + return { + identity: `bindMount:${real}`, + resolved: { type: `bindMount`, hostPath: real }, + } + } + + register(identity: string, agentId: string): void { + let set = this.refsByIdentity.get(identity) + if (!set) { + set = new Set() + this.refsByIdentity.set(identity, set) + } + set.add(agentId) + } + + release(identity: string, agentId: string): void { + const set = this.refsByIdentity.get(identity) + if (!set) return + set.delete(agentId) + if (set.size === 0) this.refsByIdentity.delete(identity) + } + + refs(identity: string): number { + return this.refsByIdentity.get(identity)?.size ?? 0 + } + + /** + * Acquire the per-identity mutex. Returns a release fn. + * The mutex chains promises: each acquire waits for the prior chain to settle. + */ + acquire(identity: string): Promise<() => void> { + const prior = this.chainByIdentity.get(identity) ?? Promise.resolve() + let releaseFn: () => void + const next = new Promise((res) => { + releaseFn = res + }) + this.chainByIdentity.set( + identity, + prior.then(() => next) + ) + return prior.then(() => releaseFn!) + } + + rebuild(snapshots: Array<{ identity: string; agentId: string }>): void { + this.refsByIdentity.clear() + this.chainByIdentity.clear() + for (const { identity, agentId } of snapshots) { + this.register(identity, agentId) + } + } +} diff --git a/packages/coding-agents/test/unit/workspace-registry.test.ts b/packages/coding-agents/test/unit/workspace-registry.test.ts new file mode 100644 index 0000000000..975782f48b --- /dev/null +++ b/packages/coding-agents/test/unit/workspace-registry.test.ts @@ -0,0 +1,105 @@ +import { describe, it, expect } from 'vitest' +import { WorkspaceRegistry } from '../../src/workspace-registry' + +describe(`WorkspaceRegistry.resolveIdentity`, () => { + it(`resolves volume:name when name is provided`, async () => { + const r = await WorkspaceRegistry.resolveIdentity(`/p/coding-agent/x`, { + type: `volume`, + name: `foo`, + }) + expect(r.identity).toBe(`volume:foo`) + expect(r.resolved).toEqual({ type: `volume`, name: `foo` }) + }) + + it(`resolves volume: when name is omitted`, async () => { + const r = await WorkspaceRegistry.resolveIdentity(`/p/coding-agent/x`, { + type: `volume`, + }) + expect(r.identity).toBe(`volume:/p/coding-agent/x`) + expect(r.resolved).toEqual({ type: `volume`, name: `/p/coding-agent/x` }) + }) + + it(`resolves bindMount: for bind mounts`, async () => { + const r = await WorkspaceRegistry.resolveIdentity(`/p/coding-agent/x`, { + type: `bindMount`, + hostPath: `/tmp`, + }) + expect(r.identity).toMatch(/^bindMount:\/(private\/)?tmp$/) + }) +}) + +describe(`WorkspaceRegistry refcount`, () => { + it(`tracks refs across register/release`, () => { + const wr = new WorkspaceRegistry() + expect(wr.refs(`volume:foo`)).toBe(0) + wr.register(`volume:foo`, `a`) + wr.register(`volume:foo`, `b`) + expect(wr.refs(`volume:foo`)).toBe(2) + wr.release(`volume:foo`, `a`) + expect(wr.refs(`volume:foo`)).toBe(1) + wr.release(`volume:foo`, `a`) // double-release is idempotent + expect(wr.refs(`volume:foo`)).toBe(1) + wr.release(`volume:foo`, `b`) + expect(wr.refs(`volume:foo`)).toBe(0) + }) +}) + +describe(`WorkspaceRegistry mutex`, () => { + it(`serializes acquire calls per identity`, async () => { + const wr = new WorkspaceRegistry() + const order: Array = [] + const a = wr.acquire(`volume:foo`).then((release) => { + order.push(`a-acquired`) + return new Promise((res) => + setTimeout(() => { + order.push(`a-release`) + release() + res() + }, 50) + ) + }) + // Make sure b queues behind a + await new Promise((r) => setTimeout(r, 5)) + const b = wr.acquire(`volume:foo`).then((release) => { + order.push(`b-acquired`) + release() + }) + await Promise.all([a, b]) + expect(order).toEqual([`a-acquired`, `a-release`, `b-acquired`]) + }) + + it(`does not serialize across distinct identities`, async () => { + const wr = new WorkspaceRegistry() + const order: Array = [] + const a = wr.acquire(`volume:foo`).then((release) => { + order.push(`a-acq`) + return new Promise((res) => + setTimeout(() => { + release() + res() + }, 50) + ) + }) + const b = wr.acquire(`volume:bar`).then((release) => { + order.push(`b-acq`) + release() + }) + await Promise.all([a, b]) + // b runs before a finishes + expect(order[0]).toBe(`a-acq`) + expect(order[1]).toBe(`b-acq`) + }) +}) + +describe(`WorkspaceRegistry.rebuild`, () => { + it(`replays a snapshot from durable state`, () => { + const wr = new WorkspaceRegistry() + wr.rebuild([ + { identity: `volume:foo`, agentId: `a` }, + { identity: `volume:foo`, agentId: `b` }, + { identity: `volume:bar`, agentId: `c` }, + ]) + expect(wr.refs(`volume:foo`)).toBe(2) + expect(wr.refs(`volume:bar`)).toBe(1) + }) +}) From 1841c38e4756450ebac41891f6885d5dd65d9372 Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 07:39:25 +0100 Subject: [PATCH 016/279] feat(coding-agents): LifecycleManager with idle timer and pin refcount --- .../coding-agents/src/lifecycle-manager.ts | 104 +++++++++++++ .../test/unit/lifecycle-manager.test.ts | 147 ++++++++++++++++++ 2 files changed, 251 insertions(+) create mode 100644 packages/coding-agents/src/lifecycle-manager.ts create mode 100644 packages/coding-agents/test/unit/lifecycle-manager.test.ts diff --git a/packages/coding-agents/src/lifecycle-manager.ts b/packages/coding-agents/src/lifecycle-manager.ts new file mode 100644 index 0000000000..4a1873e531 --- /dev/null +++ b/packages/coding-agents/src/lifecycle-manager.ts @@ -0,0 +1,104 @@ +import { log } from './log' +import type { + Bridge, + RecoveredSandbox, + SandboxInstance, + SandboxProvider, + SandboxSpec, +} from './types' + +export interface LifecycleManagerDeps { + provider: SandboxProvider + bridge: Bridge +} + +export class LifecycleManager { + readonly provider: SandboxProvider + readonly bridge: Bridge + /** Wall-clock ms captured at construction. Used to detect orphan runs. */ + readonly startedAtMs: number + + private readonly idleTimers = new Map() + private readonly pinCounts = new Map() + + constructor(deps: LifecycleManagerDeps) { + this.provider = deps.provider + this.bridge = deps.bridge + this.startedAtMs = Date.now() + } + + // ── sandbox lifecycle ── + + async ensureRunning(spec: SandboxSpec): Promise { + return this.provider.start(spec) + } + + async stop(agentId: string): Promise { + this.cancelIdleTimer(agentId) + // The provider.destroy/stop interface is keyed by instanceId, not agentId. + // We rely on provider.destroy(agentId) which finds + removes by label. + await this.provider.destroy(agentId).catch((err) => { + log.warn( + { err, agentId }, + `lifecycleManager.stop: provider.destroy failed` + ) + }) + } + + async destroy(agentId: string): Promise { + await this.stop(agentId) + this.pinCounts.delete(agentId) + } + + async adoptRunningContainers(): Promise> { + return this.provider.recover() + } + + // ── idle timer ── + + armIdleTimer(agentId: string, ms: number, onFire: () => void): void { + this.cancelIdleTimer(agentId) + const handle = setTimeout(() => { + this.idleTimers.delete(agentId) + try { + onFire() + } catch (err) { + log.warn({ err, agentId }, `idle timer onFire threw`) + } + }, ms) + this.idleTimers.set(agentId, handle) + } + + cancelIdleTimer(agentId: string): void { + const handle = this.idleTimers.get(agentId) + if (handle) { + clearTimeout(handle) + this.idleTimers.delete(agentId) + } + } + + // ── pin refcount ── + + pin(agentId: string): { count: number } { + const next = (this.pinCounts.get(agentId) ?? 0) + 1 + this.pinCounts.set(agentId, next) + if (next === 1) this.cancelIdleTimer(agentId) + return { count: next } + } + + release(agentId: string): { count: number } { + const cur = this.pinCounts.get(agentId) ?? 0 + const next = Math.max(0, cur - 1) + if (next === 0) this.pinCounts.delete(agentId) + else this.pinCounts.set(agentId, next) + return { count: next } + } + + pinCount(agentId: string): number { + return this.pinCounts.get(agentId) ?? 0 + } + + resetPinCount(agentId: string): void { + this.pinCounts.delete(agentId) + } +} diff --git a/packages/coding-agents/test/unit/lifecycle-manager.test.ts b/packages/coding-agents/test/unit/lifecycle-manager.test.ts new file mode 100644 index 0000000000..6077002fa1 --- /dev/null +++ b/packages/coding-agents/test/unit/lifecycle-manager.test.ts @@ -0,0 +1,147 @@ +import { describe, it, expect, vi } from 'vitest' +import { LifecycleManager } from '../../src/lifecycle-manager' +import type { + Bridge, + ExecHandle, + ExecRequest, + RecoveredSandbox, + RunTurnArgs, + RunTurnResult, + SandboxInstance, + SandboxProvider, + SandboxSpec, +} from '../../src/types' + +function fakeProvider(): SandboxProvider & { + starts: Array + stops: Array +} { + const stub: SandboxInstance = { + instanceId: `inst-1`, + agentId: ``, + workspaceMount: `/workspace`, + async exec(_req: ExecRequest): Promise { + throw new Error(`not used`) + }, + } + const fp: any = { + name: `fake`, + starts: [] as Array, + stops: [] as Array, + async start(spec: SandboxSpec): Promise { + fp.starts.push(spec) + return { ...stub, agentId: spec.agentId } + }, + async stop(instanceId: string): Promise { + fp.stops.push(instanceId) + }, + async destroy(_id: string): Promise {}, + async status(_id: string): Promise<`running` | `stopped` | `unknown`> { + return `running` + }, + async recover(): Promise> { + return [] + }, + } + return fp +} + +const fakeBridge: Bridge = { + async runTurn(_args: RunTurnArgs): Promise { + return { exitCode: 0 } + }, +} + +describe(`LifecycleManager pin refcount`, () => { + it(`increments and decrements with a floor at 0`, () => { + const lm = new LifecycleManager({ + provider: fakeProvider(), + bridge: fakeBridge, + }) + expect(lm.pinCount(`a`)).toBe(0) + expect(lm.pin(`a`).count).toBe(1) + expect(lm.pin(`a`).count).toBe(2) + expect(lm.release(`a`).count).toBe(1) + expect(lm.release(`a`).count).toBe(0) + // Extra release is clamped + expect(lm.release(`a`).count).toBe(0) + }) + + it(`resetPinCount clears to 0`, () => { + const lm = new LifecycleManager({ + provider: fakeProvider(), + bridge: fakeBridge, + }) + lm.pin(`a`) + lm.pin(`a`) + lm.resetPinCount(`a`) + expect(lm.pinCount(`a`)).toBe(0) + }) +}) + +describe(`LifecycleManager idle timer`, () => { + it(`arms and fires onFire after ms elapses`, async () => { + const lm = new LifecycleManager({ + provider: fakeProvider(), + bridge: fakeBridge, + }) + const onFire = vi.fn() + lm.armIdleTimer(`a`, 20, onFire) + await new Promise((r) => setTimeout(r, 50)) + expect(onFire).toHaveBeenCalledTimes(1) + }) + + it(`cancelIdleTimer prevents fire`, async () => { + const lm = new LifecycleManager({ + provider: fakeProvider(), + bridge: fakeBridge, + }) + const onFire = vi.fn() + lm.armIdleTimer(`a`, 20, onFire) + lm.cancelIdleTimer(`a`) + await new Promise((r) => setTimeout(r, 50)) + expect(onFire).not.toHaveBeenCalled() + }) + + it(`arming twice cancels prior timer`, async () => { + const lm = new LifecycleManager({ + provider: fakeProvider(), + bridge: fakeBridge, + }) + const first = vi.fn() + const second = vi.fn() + lm.armIdleTimer(`a`, 20, first) + lm.armIdleTimer(`a`, 20, second) + await new Promise((r) => setTimeout(r, 50)) + expect(first).not.toHaveBeenCalled() + expect(second).toHaveBeenCalled() + }) +}) + +describe(`LifecycleManager ensureRunning`, () => { + it(`forwards to provider.start`, async () => { + const fp = fakeProvider() + const lm = new LifecycleManager({ provider: fp, bridge: fakeBridge }) + await lm.ensureRunning({ + agentId: `/x/coding-agent/y`, + kind: `claude`, + workspace: { type: `volume`, name: `w` }, + env: { K: `v` }, + }) + expect(fp.starts).toHaveLength(1) + expect(fp.starts[0]!.agentId).toBe(`/x/coding-agent/y`) + }) +}) + +describe(`LifecycleManager.startedAtMs`, () => { + it(`captures a timestamp at construction`, () => { + const before = Date.now() + const lm = new LifecycleManager({ + provider: fakeProvider(), + bridge: fakeBridge, + }) + const after = Date.now() + expect(lm.startedAtMs).toBeGreaterThanOrEqual(before) + expect(lm.startedAtMs).toBeLessThanOrEqual(after) + }) +}) From 627b2afb703f1ba777f3376386c4f06b68405363 Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 07:48:06 +0100 Subject: [PATCH 017/279] feat(coding-agents): entity handler with reconcile, prompt/pin/release/stop/destroy Implements Task 2.1 (Slice A): adds lastInboxKey to sessionMeta schema and creates makeCodingAgentHandler driving LifecycleManager + WorkspaceRegistry with full reconcile-on-entry logic and inbox cursor tracking. Co-Authored-By: Claude Sonnet 4.6 --- .../coding-agents/src/entity/collections.ts | 1 + packages/coding-agents/src/entity/handler.ts | 530 ++++++++++++++++++ .../test/unit/entity-handler.test.ts | 336 +++++++++++ 3 files changed, 867 insertions(+) create mode 100644 packages/coding-agents/src/entity/handler.ts create mode 100644 packages/coding-agents/test/unit/entity-handler.test.ts diff --git a/packages/coding-agents/src/entity/collections.ts b/packages/coding-agents/src/entity/collections.ts index 46fb5722d4..131a021c0c 100644 --- a/packages/coding-agents/src/entity/collections.ts +++ b/packages/coding-agents/src/entity/collections.ts @@ -37,6 +37,7 @@ export const sessionMetaRowSchema = z.object({ instanceId: z.string().optional(), lastError: z.string().optional(), currentPromptInboxKey: z.string().optional(), + lastInboxKey: z.string().optional(), }) export type SessionMetaRow = z.infer diff --git a/packages/coding-agents/src/entity/handler.ts b/packages/coding-agents/src/entity/handler.ts new file mode 100644 index 0000000000..032df1585a --- /dev/null +++ b/packages/coding-agents/src/entity/handler.ts @@ -0,0 +1,530 @@ +import type { NormalizedEvent } from 'agent-session-protocol' +import { log } from '../log' +import { WorkspaceRegistry } from '../workspace-registry' +import type { LifecycleManager } from '../lifecycle-manager' +import type { + RunRow, + SessionMetaRow, + EventRow, + LifecycleRow, +} from './collections' +import { promptMessageSchema } from './messages' + +export interface CodingAgentHandlerOptions { + defaults: { + idleTimeoutMs: number + coldBootBudgetMs: number + runTimeoutMs: number + } + /** Called per-turn to source CLI env (e.g. ANTHROPIC_API_KEY). */ + env: () => Record +} + +interface InboxRow { + key: string + payload?: unknown + message_type?: string +} + +const NS_MAX = String(Number.MAX_SAFE_INTEGER).length + +function eventKey(runId: string, seq: number): string { + return `${runId}:${String(seq).padStart(NS_MAX, `0`)}` +} + +function lifecycleKey(label: string): string { + return `${label}:${Date.now()}-${Math.floor(Math.random() * 1000)}` +} + +function raceTimeout(p: Promise, ms: number): Promise { + return new Promise((resolve, reject) => { + const handle = setTimeout(() => { + const e = new Error(`TimeoutError`) + ;(e as any).name = `TimeoutError` + reject(e) + }, ms) + p.then( + (v) => { + clearTimeout(handle) + resolve(v) + }, + (err) => { + clearTimeout(handle) + reject(err) + } + ) + }) +} + +export function makeCodingAgentHandler( + lm: LifecycleManager, + wr: WorkspaceRegistry, + options: CodingAgentHandlerOptions +) { + return async function handleCodingAgentEntity( + ctx: any, + _wake: any + ): Promise { + const agentId = ctx.entityUrl as string + const sessionMetaCol = ctx.db.collections.sessionMeta + const runsCol = ctx.db.collections.runs + const inboxCol = ctx.db.collections.inbox + + // ─── 1) FIRST-WAKE INIT ──────────────────────────────────────────────── + + let meta = sessionMetaCol.get(`current`) as SessionMetaRow | undefined + if (!meta) { + const args = ctx.args as { + kind?: `claude` + workspace?: any + lifecycle?: { idleTimeoutMs?: number; keepWarm?: boolean } + } + const ws = args.workspace ?? { type: `volume` } + const resolved = await WorkspaceRegistry.resolveIdentity(agentId, ws) + const idleTimeoutMs = + args.lifecycle?.idleTimeoutMs ?? options.defaults.idleTimeoutMs + const keepWarm = args.lifecycle?.keepWarm ?? false + const initial: SessionMetaRow = { + key: `current`, + status: `cold`, + kind: args.kind ?? `claude`, + pinned: false, + workspaceIdentity: resolved.identity, + workspaceSpec: resolved.resolved, + idleTimeoutMs, + keepWarm, + } + ctx.db.actions.sessionMeta_insert({ row: initial }) + wr.register(resolved.identity, agentId) + meta = initial + } + + if (meta.status === `destroyed`) { + // Tombstoned. Ignore everything. + return + } + + // ─── 2) RECONCILE ────────────────────────────────────────────────────── + + const providerStatus = await lm.provider.status(agentId) + const openRun = (runsCol.toArray as Array).find( + (r) => r.status === `running` + ) + const isOrphaned = openRun && openRun.startedAt < lm.startedAtMs + + if (meta.status === `running` && providerStatus !== `running`) { + if (openRun) { + ctx.db.actions.runs_update({ + key: openRun.key, + updater: (d: RunRow) => { + d.status = `failed` + d.finishReason = `orphaned` + d.endedAt = Date.now() + }, + }) + } + ctx.db.actions.lifecycle_insert({ + row: { + key: lifecycleKey(`orphan`), + ts: Date.now(), + event: `orphan.detected`, + } satisfies LifecycleRow, + }) + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.status = `cold` + d.instanceId = undefined + }, + }) + meta = sessionMetaCol.get(`current`)! + } else if ( + meta.status === `running` && + providerStatus === `running` && + isOrphaned + ) { + ctx.db.actions.runs_update({ + key: openRun!.key, + updater: (d: RunRow) => { + d.status = `failed` + d.finishReason = `orphaned` + d.endedAt = Date.now() + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: lifecycleKey(`orphan`), + ts: Date.now(), + event: `orphan.detected`, + } satisfies LifecycleRow, + }) + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.status = `idle` + }, + }) + meta = sessionMetaCol.get(`current`)! + } else if (meta.status === `idle` && providerStatus === `stopped`) { + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.status = `cold` + d.instanceId = undefined + }, + }) + meta = sessionMetaCol.get(`current`)! + } else if ( + (meta.status === `starting` || meta.status === `stopping`) && + providerStatus !== `running` + ) { + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.status = `cold` + }, + }) + meta = sessionMetaCol.get(`current`)! + } else if ( + (meta.status === `starting` || meta.status === `stopping`) && + providerStatus === `running` + ) { + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.status = `idle` + }, + }) + meta = sessionMetaCol.get(`current`)! + } + + // ─── 3) PROCESS PENDING INBOX ────────────────────────────────────────── + + const inboxRows = (inboxCol.toArray as Array) + .slice() + .sort((a, b) => (a.key < b.key ? -1 : a.key > b.key ? 1 : 0)) + const lastKey = meta.lastInboxKey ?? `` + const pending = inboxRows.filter((m) => m.key > lastKey) + + for (const inboxMsg of pending) { + try { + await dispatchInboxMessage(ctx, lm, wr, options, inboxMsg) + } catch (err) { + log.error({ err, inboxMsg }, `coding-agent handler dispatch threw`) + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.status = `error` + d.lastError = err instanceof Error ? err.message : String(err) + }, + }) + } + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.lastInboxKey = inboxMsg.key + }, + }) + meta = sessionMetaCol.get(`current`)! + if (meta.status === `destroyed`) return + } + } +} + +async function dispatchInboxMessage( + ctx: any, + lm: LifecycleManager, + wr: WorkspaceRegistry, + options: CodingAgentHandlerOptions, + inboxMsg: InboxRow +): Promise { + const type = inboxMsg.message_type ?? `prompt` + switch (type) { + case `prompt`: + return processPrompt(ctx, lm, wr, options, inboxMsg) + case `pin`: + return processPin(ctx, lm) + case `release`: + return processRelease(ctx, lm) + case `stop`: + return processStop(ctx, lm) + case `destroy`: + return processDestroy(ctx, lm, wr) + default: + log.warn({ type }, `coding-agent: unknown inbox message type`) + } +} + +async function processPrompt( + ctx: any, + lm: LifecycleManager, + wr: WorkspaceRegistry, + options: CodingAgentHandlerOptions, + inboxMsg: InboxRow +): Promise { + const parsed = promptMessageSchema.safeParse(inboxMsg.payload) + if (!parsed.success) return + const promptText = parsed.data.text + const agentId = ctx.entityUrl as string + const sessionMetaCol = ctx.db.collections.sessionMeta + + let meta = sessionMetaCol.get(`current`) as SessionMetaRow + + // Cold-boot: ensure sandbox up + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.status = `starting` + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: `boot:${Date.now()}`, + ts: Date.now(), + event: `sandbox.starting`, + } satisfies LifecycleRow, + }) + + let sandbox + try { + sandbox = await raceTimeout( + lm.ensureRunning({ + agentId, + kind: meta.kind, + workspace: meta.workspaceSpec, + env: options.env(), + }), + options.defaults.coldBootBudgetMs + ) + } catch (err) { + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.status = `error` + d.lastError = err instanceof Error ? err.message : String(err) + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: `boot:${Date.now()}`, + ts: Date.now(), + event: `sandbox.failed`, + detail: err instanceof Error ? err.message : String(err), + } satisfies LifecycleRow, + }) + return + } + + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.status = `idle` + d.instanceId = sandbox.instanceId + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: `boot:${Date.now()}`, + ts: Date.now(), + event: `sandbox.started`, + } satisfies LifecycleRow, + }) + + meta = sessionMetaCol.get(`current`)! + const releaseLease = await wr.acquire(meta.workspaceIdentity) + try { + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.status = `running` + d.currentPromptInboxKey = inboxMsg.key + }, + }) + + const recordedRun = ctx.recordRun() + const runId = recordedRun.key + ctx.db.actions.runs_insert({ + row: { + key: runId, + startedAt: Date.now(), + status: `running`, + promptInboxKey: inboxMsg.key, + } satisfies RunRow, + }) + + let seq = 0 + let finalText: string | undefined + try { + const result = await raceTimeout( + lm.bridge.runTurn({ + sandbox, + kind: meta.kind, + prompt: promptText, + onEvent: (e: NormalizedEvent) => { + ctx.db.actions.events_insert({ + row: { + key: eventKey(runId, seq), + runId, + seq, + ts: Date.now(), + type: e.type, + payload: e as unknown as Record, + } satisfies EventRow, + }) + seq++ + }, + }), + options.defaults.runTimeoutMs + ) + finalText = result.finalText + ctx.db.actions.runs_update({ + key: runId, + updater: (d: RunRow) => { + d.status = `completed` + d.endedAt = Date.now() + d.responseText = finalText + }, + }) + if (finalText) recordedRun.attachResponse(finalText) + recordedRun.end({ status: `completed` }) + } catch (err) { + const reason = + err instanceof Error && err.name === `TimeoutError` + ? `timeout` + : `cli-exit:${(err instanceof Error ? err.message : String(err)).slice(0, 200)}` + ctx.db.actions.runs_update({ + key: runId, + updater: (d: RunRow) => { + d.status = `failed` + d.endedAt = Date.now() + d.finishReason = reason + }, + }) + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.status = `error` + d.lastError = err instanceof Error ? err.message : String(err) + }, + }) + recordedRun.end({ status: `failed` }) + return + } + + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.status = `idle` + d.currentPromptInboxKey = undefined + }, + }) + + if (!meta.keepWarm && lm.pinCount(agentId) === 0) { + lm.armIdleTimer(agentId, meta.idleTimeoutMs, () => { + // Fire-and-forget: provider.destroy is keyed by agentId. + void lm.provider.destroy(agentId).catch((err) => { + log.warn({ err, agentId }, `idle stop failed`) + }) + }) + } + } finally { + releaseLease() + } +} + +function processPin(ctx: any, lm: LifecycleManager): void { + const agentId = ctx.entityUrl as string + const { count } = lm.pin(agentId) + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.pinned = true + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: `pin:${Date.now()}`, + ts: Date.now(), + event: `pin`, + detail: `count=${count}`, + } satisfies LifecycleRow, + }) +} + +function processRelease(ctx: any, lm: LifecycleManager): void { + const agentId = ctx.entityUrl as string + const { count } = lm.release(agentId) + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.pinned = count > 0 + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: `release:${Date.now()}`, + ts: Date.now(), + event: `release`, + detail: `count=${count}`, + } satisfies LifecycleRow, + }) + if (count === 0) { + const meta = ctx.db.collections.sessionMeta.get(`current`) as SessionMetaRow + if (!meta.keepWarm && meta.status === `idle`) { + lm.armIdleTimer(agentId, meta.idleTimeoutMs, () => { + void lm.provider.destroy(agentId).catch(() => undefined) + }) + } + } +} + +async function processStop(ctx: any, lm: LifecycleManager): Promise { + const agentId = ctx.entityUrl as string + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.status = `stopping` + }, + }) + await lm.stop(agentId) + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.status = `cold` + d.instanceId = undefined + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: `stop:${Date.now()}`, + ts: Date.now(), + event: `sandbox.stopped`, + } satisfies LifecycleRow, + }) +} + +async function processDestroy( + ctx: any, + lm: LifecycleManager, + wr: WorkspaceRegistry +): Promise { + const agentId = ctx.entityUrl as string + const meta = ctx.db.collections.sessionMeta.get(`current`) as SessionMetaRow + await lm.destroy(agentId) + if (meta) wr.release(meta.workspaceIdentity, agentId) + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.status = `destroyed` + d.instanceId = undefined + }, + }) + ctx.db.actions.lifecycle_insert({ + row: { + key: `destroy:${Date.now()}`, + ts: Date.now(), + event: `sandbox.stopped`, + detail: `destroyed`, + } satisfies LifecycleRow, + }) +} diff --git a/packages/coding-agents/test/unit/entity-handler.test.ts b/packages/coding-agents/test/unit/entity-handler.test.ts new file mode 100644 index 0000000000..fc5f78354b --- /dev/null +++ b/packages/coding-agents/test/unit/entity-handler.test.ts @@ -0,0 +1,336 @@ +import { describe, it, expect, vi } from 'vitest' +import { makeCodingAgentHandler } from '../../src/entity/handler' +import { LifecycleManager } from '../../src/lifecycle-manager' +import { WorkspaceRegistry } from '../../src/workspace-registry' +import type { + Bridge, + RunTurnArgs, + RunTurnResult, + SandboxInstance, + SandboxSpec, +} from '../../src/types' + +// ── Fakes ── + +interface InboxRow { + key: string + payload?: unknown + message_type?: string +} + +interface CollectionStub { + rows: Map + get(k: string): any + toArray: Array +} + +function makeCollection(): CollectionStub { + const rows = new Map() + return { + rows, + get(k: string) { + return rows.get(k) + }, + get toArray(): Array { + return Array.from(rows.values()) + }, + } +} + +function makeFakeCtx(opts: { + entityUrl: string + args?: Record + inbox?: Array + meta?: any + runs?: Array +}) { + const sessionMeta = makeCollection() + const runs = makeCollection() + const events = makeCollection() + const lifecycle = makeCollection() + const inbox = makeCollection() + + if (opts.meta) sessionMeta.rows.set(`current`, opts.meta) + for (const r of opts.runs ?? []) runs.rows.set(r.key, r) + for (const i of opts.inbox ?? []) inbox.rows.set(i.key, i) + + const recordedRuns: Array<{ + key: string + status?: string + response: string + }> = [] + let runCounter = 0 + + const ctx: any = { + entityUrl: opts.entityUrl, + entityType: `coding-agent`, + args: opts.args ?? {}, + tags: {}, + firstWake: false, + db: { + collections: { sessionMeta, runs, events, lifecycle, inbox }, + actions: { + sessionMeta_insert: ({ row }: { row: any }) => + sessionMeta.rows.set(row.key, row), + sessionMeta_update: ({ + key, + updater, + }: { + key: string + updater: (d: any) => void + }) => { + const cur = sessionMeta.rows.get(key) + if (cur) updater(cur) + }, + runs_insert: ({ row }: { row: any }) => runs.rows.set(row.key, row), + runs_update: ({ + key, + updater, + }: { + key: string + updater: (d: any) => void + }) => { + const cur = runs.rows.get(key) + if (cur) updater(cur) + }, + events_insert: ({ row }: { row: any }) => events.rows.set(row.key, row), + lifecycle_insert: ({ row }: { row: any }) => + lifecycle.rows.set(row.key, row), + }, + }, + recordRun() { + const key = `run-${++runCounter}` + const ent = { key, status: undefined as string | undefined, response: `` } + recordedRuns.push(ent) + return { + key, + end({ status }: { status: string }) { + ent.status = status + }, + attachResponse(text: string) { + ent.response += text + }, + } + }, + setTag: () => Promise.resolve(), + send: vi.fn(), + } + + return { ctx, recordedRuns } +} + +function makeFakeProvider( + initialStatus: `running` | `stopped` | `unknown` = `stopped` +) { + const stub: SandboxInstance = { + instanceId: `inst-1`, + agentId: ``, + workspaceMount: `/workspace`, + async exec() { + throw new Error(`not used`) + }, + } + const fp: any = { + name: `fake`, + statusReturn: initialStatus, + async start(spec: SandboxSpec): Promise { + return { ...stub, agentId: spec.agentId } + }, + async stop(_id: string) {}, + async destroy(_id: string) {}, + async status() { + return fp.statusReturn + }, + async recover() { + return [] + }, + } + return fp +} + +describe(`entity handler — first-wake init`, () => { + it(`seeds sessionMeta when none exists, using args`, async () => { + const lm = new LifecycleManager({ + provider: makeFakeProvider(), + bridge: { + async runTurn() { + return { exitCode: 0 } + }, + }, + }) + const wr = new WorkspaceRegistry() + const handler = makeCodingAgentHandler(lm, wr, { + defaults: { + idleTimeoutMs: 1000, + coldBootBudgetMs: 5000, + runTimeoutMs: 5000, + }, + env: () => ({}), + }) + + const { ctx } = makeFakeCtx({ + entityUrl: `/test/coding-agent/x`, + args: { + kind: `claude`, + workspace: { type: `volume`, name: `w` }, + }, + }) + + await handler(ctx, { type: `message_received` } as any) + + const meta = ctx.db.collections.sessionMeta.get(`current`) + expect(meta).toBeDefined() + expect(meta.status).toBe(`cold`) + expect(meta.kind).toBe(`claude`) + expect(meta.workspaceIdentity).toBe(`volume:w`) + expect(meta.pinned).toBe(false) + }) +}) + +describe(`entity handler — pin/release`, () => { + it(`pin sets pinned=true and cancels timer`, async () => { + const lm = new LifecycleManager({ + provider: makeFakeProvider(`running`), + bridge: { + async runTurn() { + return { exitCode: 0 } + }, + }, + }) + const wr = new WorkspaceRegistry() + const handler = makeCodingAgentHandler(lm, wr, { + defaults: { + idleTimeoutMs: 1000, + coldBootBudgetMs: 5000, + runTimeoutMs: 5000, + }, + env: () => ({}), + }) + const meta = { + key: `current`, + status: `idle`, + kind: `claude`, + pinned: false, + workspaceIdentity: `volume:w`, + workspaceSpec: { type: `volume`, name: `w` }, + idleTimeoutMs: 1000, + keepWarm: false, + } + const { ctx } = makeFakeCtx({ + entityUrl: `/t/coding-agent/x`, + meta, + inbox: [{ key: `i1`, message_type: `pin` }], + }) + await handler(ctx, { type: `message_received` } as any) + expect(ctx.db.collections.sessionMeta.get(`current`).pinned).toBe(true) + expect(lm.pinCount(`/t/coding-agent/x`)).toBe(1) + }) +}) + +describe(`entity handler — reconcile orphan run`, () => { + it(`marks orphan run failed when meta=running and run.startedAt < lm.startedAtMs`, async () => { + const lm = new LifecycleManager({ + provider: makeFakeProvider(`stopped`), + bridge: { + async runTurn() { + return { exitCode: 0 } + }, + }, + }) + const wr = new WorkspaceRegistry() + const handler = makeCodingAgentHandler(lm, wr, { + defaults: { + idleTimeoutMs: 1000, + coldBootBudgetMs: 5000, + runTimeoutMs: 5000, + }, + env: () => ({}), + }) + const oldStart = lm.startedAtMs - 10_000 + const meta = { + key: `current`, + status: `running`, + kind: `claude`, + pinned: false, + workspaceIdentity: `volume:w`, + workspaceSpec: { type: `volume`, name: `w` }, + idleTimeoutMs: 1000, + keepWarm: false, + instanceId: `old-inst`, + } + const orphanRun = { + key: `run-old`, + startedAt: oldStart, + status: `running`, + promptInboxKey: `i0`, + } + const { ctx } = makeFakeCtx({ + entityUrl: `/t/coding-agent/x`, + meta, + runs: [orphanRun], + }) + await handler(ctx, { type: `message_received` } as any) + const updated = ctx.db.collections.runs.get(`run-old`) + expect(updated.status).toBe(`failed`) + expect(updated.finishReason).toBe(`orphaned`) + expect(ctx.db.collections.sessionMeta.get(`current`).status).toBe(`cold`) + }) +}) + +describe(`entity handler — processPrompt happy path`, () => { + it(`runs a turn, records events, ends run completed`, async () => { + const events: Array = [ + { type: `session_init`, sessionId: `abc`, ts: 1 }, + { type: `assistant_message`, text: `hello`, ts: 2 }, + ] + const bridge: Bridge = { + async runTurn(args: RunTurnArgs): Promise { + for (const e of events) args.onEvent(e as any) + return { exitCode: 0, finalText: `hello` } + }, + } + const lm = new LifecycleManager({ + provider: makeFakeProvider(`stopped`), + bridge, + }) + const wr = new WorkspaceRegistry() + const handler = makeCodingAgentHandler(lm, wr, { + defaults: { + idleTimeoutMs: 1000, + coldBootBudgetMs: 5000, + runTimeoutMs: 5000, + }, + env: () => ({ ANTHROPIC_API_KEY: `sk-test` }), + }) + const meta = { + key: `current`, + status: `cold`, + kind: `claude`, + pinned: false, + workspaceIdentity: `volume:w`, + workspaceSpec: { type: `volume`, name: `w` }, + idleTimeoutMs: 1000, + keepWarm: false, + } + const { ctx, recordedRuns } = makeFakeCtx({ + entityUrl: `/t/coding-agent/x`, + meta, + inbox: [{ key: `i1`, message_type: `prompt`, payload: { text: `hi` } }], + }) + await handler(ctx, { type: `message_received` } as any) + + expect(recordedRuns).toHaveLength(1) + expect(recordedRuns[0]!.status).toBe(`completed`) + expect(recordedRuns[0]!.response).toBe(`hello`) + + const finalMeta = ctx.db.collections.sessionMeta.get(`current`) + expect(finalMeta.status).toBe(`idle`) + + const runs = Array.from(ctx.db.collections.runs.rows.values()) + expect(runs).toHaveLength(1) + expect((runs[0] as any).status).toBe(`completed`) + + const eventRows = Array.from(ctx.db.collections.events.rows.values()) + expect(eventRows).toHaveLength(2) + }) +}) From d5efd727ec3f31439e3d4618894df0272c5806d8 Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 07:55:07 +0100 Subject: [PATCH 018/279] fix(coding-agents): tighten meta type narrowing, unique lifecycle keys, fresh meta read for idle timer Co-Authored-By: Claude Sonnet 4.6 --- packages/coding-agents/src/entity/handler.ts | 42 +++++++++++--------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/packages/coding-agents/src/entity/handler.ts b/packages/coding-agents/src/entity/handler.ts index 032df1585a..72f188bcc8 100644 --- a/packages/coding-agents/src/entity/handler.ts +++ b/packages/coding-agents/src/entity/handler.ts @@ -72,8 +72,11 @@ export function makeCodingAgentHandler( // ─── 1) FIRST-WAKE INIT ──────────────────────────────────────────────── - let meta = sessionMetaCol.get(`current`) as SessionMetaRow | undefined - if (!meta) { + const initialMeta = sessionMetaCol.get(`current`) as + | SessionMetaRow + | undefined + let meta: SessionMetaRow + if (!initialMeta) { const args = ctx.args as { kind?: `claude` workspace?: any @@ -97,6 +100,8 @@ export function makeCodingAgentHandler( ctx.db.actions.sessionMeta_insert({ row: initial }) wr.register(resolved.identity, agentId) meta = initial + } else { + meta = initialMeta } if (meta.status === `destroyed`) { @@ -137,7 +142,7 @@ export function makeCodingAgentHandler( d.instanceId = undefined }, }) - meta = sessionMetaCol.get(`current`)! + meta = sessionMetaCol.get(`current`) as SessionMetaRow } else if ( meta.status === `running` && providerStatus === `running` && @@ -164,7 +169,7 @@ export function makeCodingAgentHandler( d.status = `idle` }, }) - meta = sessionMetaCol.get(`current`)! + meta = sessionMetaCol.get(`current`) as SessionMetaRow } else if (meta.status === `idle` && providerStatus === `stopped`) { ctx.db.actions.sessionMeta_update({ key: `current`, @@ -173,7 +178,7 @@ export function makeCodingAgentHandler( d.instanceId = undefined }, }) - meta = sessionMetaCol.get(`current`)! + meta = sessionMetaCol.get(`current`) as SessionMetaRow } else if ( (meta.status === `starting` || meta.status === `stopping`) && providerStatus !== `running` @@ -184,7 +189,7 @@ export function makeCodingAgentHandler( d.status = `cold` }, }) - meta = sessionMetaCol.get(`current`)! + meta = sessionMetaCol.get(`current`) as SessionMetaRow } else if ( (meta.status === `starting` || meta.status === `stopping`) && providerStatus === `running` @@ -195,7 +200,7 @@ export function makeCodingAgentHandler( d.status = `idle` }, }) - meta = sessionMetaCol.get(`current`)! + meta = sessionMetaCol.get(`current`) as SessionMetaRow } // ─── 3) PROCESS PENDING INBOX ────────────────────────────────────────── @@ -225,7 +230,7 @@ export function makeCodingAgentHandler( d.lastInboxKey = inboxMsg.key }, }) - meta = sessionMetaCol.get(`current`)! + meta = sessionMetaCol.get(`current`) as SessionMetaRow if (meta.status === `destroyed`) return } } @@ -279,7 +284,7 @@ async function processPrompt( }) ctx.db.actions.lifecycle_insert({ row: { - key: `boot:${Date.now()}`, + key: lifecycleKey(`boot`), ts: Date.now(), event: `sandbox.starting`, } satisfies LifecycleRow, @@ -306,7 +311,7 @@ async function processPrompt( }) ctx.db.actions.lifecycle_insert({ row: { - key: `boot:${Date.now()}`, + key: lifecycleKey(`boot`), ts: Date.now(), event: `sandbox.failed`, detail: err instanceof Error ? err.message : String(err), @@ -324,13 +329,13 @@ async function processPrompt( }) ctx.db.actions.lifecycle_insert({ row: { - key: `boot:${Date.now()}`, + key: lifecycleKey(`boot`), ts: Date.now(), event: `sandbox.started`, } satisfies LifecycleRow, }) - meta = sessionMetaCol.get(`current`)! + meta = sessionMetaCol.get(`current`) as SessionMetaRow const releaseLease = await wr.acquire(meta.workspaceIdentity) try { ctx.db.actions.sessionMeta_update({ @@ -419,8 +424,9 @@ async function processPrompt( }, }) - if (!meta.keepWarm && lm.pinCount(agentId) === 0) { - lm.armIdleTimer(agentId, meta.idleTimeoutMs, () => { + const finalMeta = sessionMetaCol.get(`current`) as SessionMetaRow + if (!finalMeta.keepWarm && lm.pinCount(agentId) === 0) { + lm.armIdleTimer(agentId, finalMeta.idleTimeoutMs, () => { // Fire-and-forget: provider.destroy is keyed by agentId. void lm.provider.destroy(agentId).catch((err) => { log.warn({ err, agentId }, `idle stop failed`) @@ -443,7 +449,7 @@ function processPin(ctx: any, lm: LifecycleManager): void { }) ctx.db.actions.lifecycle_insert({ row: { - key: `pin:${Date.now()}`, + key: lifecycleKey(`pin`), ts: Date.now(), event: `pin`, detail: `count=${count}`, @@ -462,7 +468,7 @@ function processRelease(ctx: any, lm: LifecycleManager): void { }) ctx.db.actions.lifecycle_insert({ row: { - key: `release:${Date.now()}`, + key: lifecycleKey(`release`), ts: Date.now(), event: `release`, detail: `count=${count}`, @@ -496,7 +502,7 @@ async function processStop(ctx: any, lm: LifecycleManager): Promise { }) ctx.db.actions.lifecycle_insert({ row: { - key: `stop:${Date.now()}`, + key: lifecycleKey(`stop`), ts: Date.now(), event: `sandbox.stopped`, } satisfies LifecycleRow, @@ -521,7 +527,7 @@ async function processDestroy( }) ctx.db.actions.lifecycle_insert({ row: { - key: `destroy:${Date.now()}`, + key: lifecycleKey(`destroy`), ts: Date.now(), event: `sandbox.stopped`, detail: `destroyed`, From 036ce99f2ff2875ffec41fa06730193c6ec35b90 Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 07:59:35 +0100 Subject: [PATCH 019/279] feat(coding-agents): registerCodingAgent helper --- packages/coding-agents/package.json | 1 + packages/coding-agents/src/entity/register.ts | 123 ++++++++++++++++++ packages/coding-agents/src/index.ts | 15 +++ 3 files changed, 139 insertions(+) create mode 100644 packages/coding-agents/src/entity/register.ts diff --git a/packages/coding-agents/package.json b/packages/coding-agents/package.json index 0adc00d5e0..2a5c502565 100644 --- a/packages/coding-agents/package.json +++ b/packages/coding-agents/package.json @@ -34,6 +34,7 @@ "./package.json": "./package.json" }, "dependencies": { + "@electric-ax/agents-runtime": "workspace:*", "agent-session-protocol": "^0.0.2", "pino": "^10.3.1", "pino-pretty": "^13.0.0", diff --git a/packages/coding-agents/src/entity/register.ts b/packages/coding-agents/src/entity/register.ts new file mode 100644 index 0000000000..2b75f221d8 --- /dev/null +++ b/packages/coding-agents/src/entity/register.ts @@ -0,0 +1,123 @@ +import type { EntityRegistry } from '@electric-ax/agents-runtime' +import { LifecycleManager } from '../lifecycle-manager' +import { WorkspaceRegistry } from '../workspace-registry' +import { SLICE_A_DEFAULTS } from '../types' +import type { Bridge, SandboxProvider } from '../types' +import { + CODING_AGENT_EVENTS_COLLECTION_TYPE, + CODING_AGENT_LIFECYCLE_COLLECTION_TYPE, + CODING_AGENT_RUNS_COLLECTION_TYPE, + CODING_AGENT_SESSION_META_COLLECTION_TYPE, + eventRowSchema, + lifecycleRowSchema, + runRowSchema, + sessionMetaRowSchema, +} from './collections' +import { + destroyMessageSchema, + pinMessageSchema, + promptMessageSchema, + releaseMessageSchema, + stopMessageSchema, +} from './messages' +import { makeCodingAgentHandler } from './handler' +import { z } from 'zod' + +export interface RegisterCodingAgentDeps { + provider: SandboxProvider + bridge: Bridge + /** Override defaults; used by tests. */ + defaults?: Partial<{ + idleTimeoutMs: number + coldBootBudgetMs: number + runTimeoutMs: number + }> + /** Per-turn env supplier. Defaults to forwarding ANTHROPIC_API_KEY from process.env. */ + env?: () => Record +} + +const creationArgsSchema = z.object({ + kind: z.enum([`claude`]).optional(), + workspace: z + .union([ + z.object({ + type: z.literal(`volume`), + name: z.string().optional(), + }), + z.object({ + type: z.literal(`bindMount`), + hostPath: z.string(), + }), + ]) + .optional(), + lifecycle: z + .object({ + idleTimeoutMs: z.number().optional(), + keepWarm: z.boolean().optional(), + }) + .optional(), +}) + +export function registerCodingAgent( + registry: EntityRegistry, + deps: RegisterCodingAgentDeps +): void { + const lm = new LifecycleManager(deps) + const wr = new WorkspaceRegistry() + const defaults = { + idleTimeoutMs: + deps.defaults?.idleTimeoutMs ?? SLICE_A_DEFAULTS.idleTimeoutMs, + coldBootBudgetMs: + deps.defaults?.coldBootBudgetMs ?? SLICE_A_DEFAULTS.coldBootBudgetMs, + runTimeoutMs: deps.defaults?.runTimeoutMs ?? SLICE_A_DEFAULTS.runTimeoutMs, + } + const env = + deps.env ?? + (() => { + const out: Record = {} + const k = process.env.ANTHROPIC_API_KEY + if (k) out.ANTHROPIC_API_KEY = k + return out + }) + + registry.define(`coding-agent`, { + description: `Runs a Claude Code CLI session inside a Docker sandbox. Manages lifecycle (cold/idle/running) and workspace lease.`, + creationSchema: creationArgsSchema, + inboxSchemas: { + prompt: promptMessageSchema, + pin: pinMessageSchema, + release: releaseMessageSchema, + stop: stopMessageSchema, + destroy: destroyMessageSchema, + }, + state: { + sessionMeta: { + schema: sessionMetaRowSchema, + type: CODING_AGENT_SESSION_META_COLLECTION_TYPE, + primaryKey: `key`, + }, + runs: { + schema: runRowSchema, + type: CODING_AGENT_RUNS_COLLECTION_TYPE, + primaryKey: `key`, + }, + events: { + schema: eventRowSchema, + type: CODING_AGENT_EVENTS_COLLECTION_TYPE, + primaryKey: `key`, + }, + lifecycle: { + schema: lifecycleRowSchema, + type: CODING_AGENT_LIFECYCLE_COLLECTION_TYPE, + primaryKey: `key`, + }, + }, + handler: makeCodingAgentHandler(lm, wr, { defaults, env }), + }) +} + +/** Test-only accessor for asserting workspace registry state from outside. */ +export interface CodingAgentInternals { + lifecycleManager: LifecycleManager + workspaceRegistry: WorkspaceRegistry +} diff --git a/packages/coding-agents/src/index.ts b/packages/coding-agents/src/index.ts index c1dd62b07a..bc06882fc7 100644 --- a/packages/coding-agents/src/index.ts +++ b/packages/coding-agents/src/index.ts @@ -9,6 +9,21 @@ export type { RunTurnArgs, RunTurnResult, Bridge, + SpawnCodingAgentOptions, + RunSummary, + CodingAgentStatus, } from './types' export { LocalDockerProvider } from './providers/local-docker' export { StdioBridge } from './bridge/stdio-bridge' +export { LifecycleManager } from './lifecycle-manager' +export { WorkspaceRegistry } from './workspace-registry' +export { + registerCodingAgent, + type RegisterCodingAgentDeps, +} from './entity/register' +export { + CODING_AGENT_SESSION_META_COLLECTION_TYPE, + CODING_AGENT_RUNS_COLLECTION_TYPE, + CODING_AGENT_EVENTS_COLLECTION_TYPE, + CODING_AGENT_LIFECYCLE_COLLECTION_TYPE, +} from './entity/collections' From 22a97c590be954a933970e62b6f35da97186436d Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 08:07:05 +0100 Subject: [PATCH 020/279] refactor(coding-agents): remove unused CodingAgentInternals interface --- packages/coding-agents/src/entity/register.ts | 6 ------ 1 file changed, 6 deletions(-) diff --git a/packages/coding-agents/src/entity/register.ts b/packages/coding-agents/src/entity/register.ts index 2b75f221d8..82c1b5d615 100644 --- a/packages/coding-agents/src/entity/register.ts +++ b/packages/coding-agents/src/entity/register.ts @@ -115,9 +115,3 @@ export function registerCodingAgent( handler: makeCodingAgentHandler(lm, wr, { defaults, env }), }) } - -/** Test-only accessor for asserting workspace registry state from outside. */ -export interface CodingAgentInternals { - lifecycleManager: LifecycleManager - workspaceRegistry: WorkspaceRegistry -} From 260e9146ed645d26472e2dca8c0acba6f465101c Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 08:12:31 +0100 Subject: [PATCH 021/279] feat(agents-runtime): ctx.spawnCodingAgent / observeCodingAgent typed primitives Adds SpawnCodingAgentOptions, CodingAgentHandle and supporting types to types.ts, implements spawnCodingAgent and observeCodingAgent on HandlerContext (mirroring useCodingAgent), and ships a makeCodingAgentHandle helper. Contract test adds 2 cases. Co-Authored-By: Claude Sonnet 4.6 --- .../agents-runtime/src/context-factory.ts | 121 ++++++++++++++++++ packages/agents-runtime/src/types.ts | 61 +++++++++ .../test/spawn-coding-agent.test.ts | 32 +++++ 3 files changed, 214 insertions(+) create mode 100644 packages/agents-runtime/test/spawn-coding-agent.test.ts diff --git a/packages/agents-runtime/src/context-factory.ts b/packages/agents-runtime/src/context-factory.ts index 713316f653..002f843bb7 100644 --- a/packages/agents-runtime/src/context-factory.ts +++ b/packages/agents-runtime/src/context-factory.ts @@ -16,6 +16,7 @@ import { CACHE_TIERS } from './types' import { CODING_SESSION_ENTITY_TYPE, codingSessionEntityUrl, + entity as entityObservationSource, } from './observation-sources' import type { ChangeEvent } from '@durable-streams/state' import type { @@ -24,6 +25,9 @@ import type { AgentModel, AgentRunResult, AgentTool, + CodingAgentHandle, + CodingAgentRunSummary, + CodingAgentState, CodingSessionEventRow, CodingSessionHandle, CodingSessionMeta, @@ -37,6 +41,7 @@ import type { RunHandle, SharedStateHandle, SharedStateSchemaMap, + SpawnCodingAgentOptions, StateProxy, TimelineProjectionOpts, UseCodingAgentOptions, @@ -627,6 +632,45 @@ export function createHandlerContext( } return handle }, + async spawnCodingAgent( + opts: SpawnCodingAgentOptions + ): Promise { + const spawnArgs: Record = { + kind: opts.kind, + workspace: opts.workspace, + } + if (opts.lifecycle !== undefined) spawnArgs.lifecycle = opts.lifecycle + + const initialMessage = + opts.initialPrompt !== undefined + ? { type: `prompt` as const, payload: { text: opts.initialPrompt } } + : undefined + + // Slice A: only `runFinished` wake (eventAppended is Slice C). + const wake: Wake = `runFinished` + + const entityHandle = await config.doSpawn( + `coding-agent`, + opts.id, + spawnArgs, + { + observe: true, + wake, + ...(initialMessage ? { initialMessage } : {}), + } + ) + + const agentUrl = `/coding-agent/${opts.id}` + return makeCodingAgentHandle(config, agentUrl, entityHandle) + }, + async observeCodingAgent(id: string): Promise { + const url = `/coding-agent/${id}` + const entityHandle = await config.doObserve( + entityObservationSource(url), + `runFinished` + ) + return makeCodingAgentHandle(config, url, entityHandle) + }, send( entityUrl: string, payload: unknown, @@ -691,3 +735,80 @@ export function createHandlerContext( return { ctx, getSleepRequested: () => sleepRequested } } + +function makeCodingAgentHandle( + config: HandlerContextConfig, + url: string, + entityHandle: { db?: { collections?: any } } +): CodingAgentHandle { + const readMeta = (): any => { + const c = entityHandle.db?.collections?.sessionMeta + return c?.get?.(`current`) + } + const readRuns = (): Array => { + const c = entityHandle.db?.collections?.runs + if (!c) return [] + const rows = (c as { toArray?: unknown }).toArray + if (!Array.isArray(rows)) return [] + return rows.map((r: any) => ({ + runId: r.key, + startedAt: r.startedAt, + endedAt: r.endedAt, + status: r.status, + promptInboxKey: r.promptInboxKey, + responseText: r.responseText, + })) + } + + return { + url, + kind: `claude`, + send: (text: string) => { + config.executeSend({ + targetUrl: url, + payload: { text }, + type: `prompt`, + }) + return Promise.resolve({ runId: `run-pending-${Date.now()}` }) + }, + pin: () => { + config.executeSend({ targetUrl: url, payload: {}, type: `pin` }) + return Promise.resolve() + }, + release: () => { + config.executeSend({ targetUrl: url, payload: {}, type: `release` }) + return Promise.resolve() + }, + stop: () => { + config.executeSend({ targetUrl: url, payload: {}, type: `stop` }) + return Promise.resolve() + }, + destroy: () => { + config.executeSend({ targetUrl: url, payload: {}, type: `destroy` }) + return Promise.resolve() + }, + state(): CodingAgentState { + const meta = readMeta() + return { + status: meta?.status ?? `cold`, + pinned: meta?.pinned ?? false, + workspace: { + identity: meta?.workspaceIdentity ?? ``, + sharedRefs: 1, // Server-only state; Slice A clients see 1. + }, + lastError: meta?.lastError, + runs: readRuns(), + } + }, + events(opts?: { since?: `start` | `now` }) { + const since = opts?.since ?? `now` + const c = entityHandle.db?.collections?.events + const rows: Array<{ payload: unknown }> = + c && Array.isArray((c as any).toArray) ? (c as any).toArray : [] + const initial = since === `start` ? rows.slice() : [] + return (async function* () { + for (const r of initial) yield r.payload + })() + }, + } +} diff --git a/packages/agents-runtime/src/types.ts b/packages/agents-runtime/src/types.ts index c3e8bb5586..072d88be53 100644 --- a/packages/agents-runtime/src/types.ts +++ b/packages/agents-runtime/src/types.ts @@ -817,6 +817,57 @@ export interface CodingSessionHandle { readonly messages: ReadonlyArray } +// ─── Coding Agent (Slice A) ─────────────────────────────────────────────── + +export type CodingAgentSliceAStatus = + | `cold` + | `starting` + | `idle` + | `running` + | `stopping` + | `error` + | `destroyed` + +export interface SpawnCodingAgentOptions { + id: string + kind: `claude` + workspace: + | { type: `volume`; name?: string } + | { type: `bindMount`; hostPath: string } + initialPrompt?: string + wake?: { on: `runFinished`; includeResponse?: boolean } + lifecycle?: { idleTimeoutMs?: number; keepWarm?: boolean } +} + +export interface CodingAgentRunSummary { + runId: string + startedAt: number + endedAt?: number + status: `running` | `completed` | `failed` + promptInboxKey: string + responseText?: string +} + +export interface CodingAgentState { + status: CodingAgentSliceAStatus + pinned: boolean + workspace: { identity: string; sharedRefs: number } + lastError?: string + runs: ReadonlyArray +} + +export interface CodingAgentHandle { + readonly url: string + readonly kind: `claude` + send(prompt: string): Promise<{ runId: string }> + events(opts?: { since?: `start` | `now` }): AsyncIterable + state(): CodingAgentState + pin(): Promise + release(): Promise + stop(): Promise + destroy(): Promise +} + export interface AgentConfig { systemPrompt: string model: AgentModel @@ -952,6 +1003,16 @@ export interface HandlerContext< sessionId: string, opts: UseCodingAgentOptions ) => Promise + /** + * Spawn (or attach to) a `coding-agent` entity that runs a CLI inside a + * Docker sandbox with managed lifecycle (cold/idle/running, idle hibernation, + * pin/release, workspace lease). Requires `registerCodingAgent` to have been + * called on the runtime's registry. + */ + spawnCodingAgent: ( + opts: SpawnCodingAgentOptions + ) => Promise + observeCodingAgent: (id: string) => Promise send: ( entityUrl: string, payload: unknown, diff --git a/packages/agents-runtime/test/spawn-coding-agent.test.ts b/packages/agents-runtime/test/spawn-coding-agent.test.ts new file mode 100644 index 0000000000..92c6e9bc73 --- /dev/null +++ b/packages/agents-runtime/test/spawn-coding-agent.test.ts @@ -0,0 +1,32 @@ +import { describe, it, expect } from 'vitest' +import type { CodingAgentHandle, SpawnCodingAgentOptions } from '../src/types' + +describe(`ctx.spawnCodingAgent contract`, () => { + it(`exports SpawnCodingAgentOptions with \`claude\` kind`, () => { + const opts: SpawnCodingAgentOptions = { + id: `x`, + kind: `claude`, + workspace: { type: `volume` }, + } + expect(opts.kind).toBe(`claude`) + }) + it(`CodingAgentHandle has the expected method shape`, () => { + const noopHandle: CodingAgentHandle = { + url: `/x`, + kind: `claude`, + send: async () => ({ runId: `r` }), + events: async function* () {}, + state: () => ({ + status: `cold`, + pinned: false, + workspace: { identity: ``, sharedRefs: 1 }, + runs: [], + }), + pin: async () => undefined, + release: async () => undefined, + stop: async () => undefined, + destroy: async () => undefined, + } + expect(noopHandle.kind).toBe(`claude`) + }) +}) From 3781c9cc922fc9347306d4736f6ac86ae16fcd9e Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 08:21:54 +0100 Subject: [PATCH 022/279] fix(agents-runtime): drop misleading runId placeholder from send(); return Promise Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/agents-runtime/src/context-factory.ts | 2 +- packages/agents-runtime/src/types.ts | 2 +- packages/agents-runtime/test/spawn-coding-agent.test.ts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/agents-runtime/src/context-factory.ts b/packages/agents-runtime/src/context-factory.ts index 002f843bb7..6cfc6809b6 100644 --- a/packages/agents-runtime/src/context-factory.ts +++ b/packages/agents-runtime/src/context-factory.ts @@ -769,7 +769,7 @@ function makeCodingAgentHandle( payload: { text }, type: `prompt`, }) - return Promise.resolve({ runId: `run-pending-${Date.now()}` }) + return Promise.resolve() }, pin: () => { config.executeSend({ targetUrl: url, payload: {}, type: `pin` }) diff --git a/packages/agents-runtime/src/types.ts b/packages/agents-runtime/src/types.ts index 072d88be53..5dce09e86b 100644 --- a/packages/agents-runtime/src/types.ts +++ b/packages/agents-runtime/src/types.ts @@ -859,7 +859,7 @@ export interface CodingAgentState { export interface CodingAgentHandle { readonly url: string readonly kind: `claude` - send(prompt: string): Promise<{ runId: string }> + send(prompt: string): Promise events(opts?: { since?: `start` | `now` }): AsyncIterable state(): CodingAgentState pin(): Promise diff --git a/packages/agents-runtime/test/spawn-coding-agent.test.ts b/packages/agents-runtime/test/spawn-coding-agent.test.ts index 92c6e9bc73..7b229b2038 100644 --- a/packages/agents-runtime/test/spawn-coding-agent.test.ts +++ b/packages/agents-runtime/test/spawn-coding-agent.test.ts @@ -14,7 +14,7 @@ describe(`ctx.spawnCodingAgent contract`, () => { const noopHandle: CodingAgentHandle = { url: `/x`, kind: `claude`, - send: async () => ({ runId: `r` }), + send: async () => undefined, events: async function* () {}, state: () => ({ status: `cold`, From e5da51dca18a6964cf62f5d1dc551f735fbeb022 Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 08:24:59 +0100 Subject: [PATCH 023/279] feat(agents): wire registerCodingAgent into bootstrap --- packages/agents/package.json | 1 + packages/agents/src/bootstrap.ts | 12 ++++ pnpm-lock.yaml | 102 ++++++++++++++++++++++++++----- 3 files changed, 101 insertions(+), 14 deletions(-) diff --git a/packages/agents/package.json b/packages/agents/package.json index 5c7bf66967..d62247b522 100644 --- a/packages/agents/package.json +++ b/packages/agents/package.json @@ -43,6 +43,7 @@ "@anthropic-ai/sdk": "^0.78.0", "@durable-streams/state": "npm:@electric-ax/durable-streams-state-beta@^0.3.1", "@electric-ax/agents-runtime": "workspace:*", + "@electric-ax/coding-agents": "workspace:*", "@mariozechner/pi-agent-core": "^0.70.2", "@mariozechner/pi-ai": "^0.70.2", "@sinclair/typebox": "^0.34.48", diff --git a/packages/agents/src/bootstrap.ts b/packages/agents/src/bootstrap.ts index 5d3ec8b3c7..b06aa9b750 100644 --- a/packages/agents/src/bootstrap.ts +++ b/packages/agents/src/bootstrap.ts @@ -10,6 +10,11 @@ import { } from '@electric-ax/agents-runtime' import { serverLog } from './log' import { registerCodingSession } from './agents/coding-session' +import { + LocalDockerProvider, + StdioBridge, + registerCodingAgent, +} from '@electric-ax/coding-agents' import { registerHorton } from './agents/horton' import { registerWorker } from './agents/worker' import { createSkillsRegistry } from './skills/registry' @@ -119,6 +124,13 @@ export async function createBuiltinAgentHandler( registerCodingSession(registry, { defaultWorkingDirectory: cwd }) typeNames.push(`coder`) + // NEW for Slice A: built-in coding-agent entity (Docker sandbox + lifecycle). + registerCodingAgent(registry, { + provider: new LocalDockerProvider(), + bridge: new StdioBridge(), + }) + typeNames.push(`coding-agent`) + const runtime = createRuntimeHandler({ baseUrl: agentServerUrl, serveEndpoint, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c25aaa3b3b..6c4b1fe8d6 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1511,6 +1511,9 @@ importers: '@electric-ax/agents-runtime': specifier: workspace:* version: link:../agents-runtime + '@electric-ax/coding-agents': + specifier: workspace:* + version: link:../coding-agents '@mariozechner/pi-agent-core': specifier: ^0.70.2 version: 0.70.2(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))(ws@8.20.0)(zod@4.3.6) @@ -1835,6 +1838,37 @@ importers: specifier: ^4.1.0 version: 4.1.5(@opentelemetry/api@1.9.1)(@types/node@25.6.0)(@vitest/coverage-v8@4.1.5)(jsdom@29.1.0(@noble/hashes@2.0.1))(vite@7.1.7(@types/node@25.6.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.46.2)(tsx@4.20.3)(yaml@2.8.1)) + packages/coding-agents: + dependencies: + '@electric-ax/agents-runtime': + specifier: workspace:* + version: link:../agents-runtime + agent-session-protocol: + specifier: ^0.0.2 + version: 0.0.2 + pino: + specifier: ^10.3.1 + version: 10.3.1 + pino-pretty: + specifier: ^13.0.0 + version: 13.1.3 + zod: + specifier: ^4.3.6 + version: 4.3.6 + devDependencies: + '@types/node': + specifier: ^22.19.15 + version: 22.19.17 + tsdown: + specifier: ^0.9.0 + version: 0.9.9(typescript@5.8.3) + typescript: + specifier: ^5.7.0 + version: 5.8.3 + vitest: + specifier: ^3.2.4 + version: 3.2.4(@types/debug@4.1.12)(@types/node@22.19.17)(jsdom@29.1.0(@noble/hashes@2.0.1))(lightningcss@1.30.1)(terser@5.46.2) + packages/electric-ax: dependencies: '@durable-streams/client': @@ -23494,7 +23528,7 @@ snapshots: jose: 6.2.3 json-schema-typed: 8.0.2 pkce-challenge: 5.0.1 - raw-body: 3.0.0 + raw-body: 3.0.2 zod: 3.25.76 zod-to-json-schema: 3.25.2(zod@3.25.76) transitivePeerDependencies: @@ -23516,7 +23550,7 @@ snapshots: jose: 6.2.3 json-schema-typed: 8.0.2 pkce-challenge: 5.0.1 - raw-body: 3.0.0 + raw-body: 3.0.2 zod: 4.3.6 zod-to-json-schema: 3.25.2(zod@4.3.6) transitivePeerDependencies: @@ -29697,7 +29731,7 @@ snapshots: anymatch@3.1.3: dependencies: normalize-path: 3.0.0 - picomatch: 2.3.1 + picomatch: 2.3.2 arg@5.0.2: {} @@ -30150,7 +30184,7 @@ snapshots: bytes: 3.1.2 content-type: 1.0.5 debug: 4.4.3 - http-errors: 2.0.0 + http-errors: 2.0.1 iconv-lite: 0.7.2 on-finished: 2.4.1 qs: 6.15.1 @@ -32598,19 +32632,19 @@ snapshots: etag: 1.8.1 finalhandler: 2.1.0 fresh: 2.0.0 - http-errors: 2.0.0 + http-errors: 2.0.1 merge-descriptors: 2.0.0 mime-types: 3.0.1 on-finished: 2.4.1 once: 1.4.0 parseurl: 1.3.3 proxy-addr: 2.0.7 - qs: 6.14.0 + qs: 6.15.1 range-parser: 1.2.1 router: 2.2.0 send: 1.2.0 serve-static: 2.2.0 - statuses: 2.0.1 + statuses: 2.0.2 type-is: 2.0.1 vary: 1.1.2 transitivePeerDependencies: @@ -33900,7 +33934,7 @@ snapshots: chalk: 4.1.2 ci-info: 3.9.0 graceful-fs: 4.2.11 - picomatch: 2.3.1 + picomatch: 2.3.2 jest-validate@29.7.0: dependencies: @@ -34323,7 +34357,7 @@ snapshots: lightningcss@1.30.1: dependencies: - detect-libc: 2.0.4 + detect-libc: 2.1.2 optionalDependencies: lightningcss-darwin-arm64: 1.30.1 lightningcss-darwin-x64: 1.30.1 @@ -37049,7 +37083,7 @@ snapshots: readdirp@3.6.0: dependencies: - picomatch: 2.3.1 + picomatch: 2.3.2 readdirp@4.0.2: {} @@ -37362,9 +37396,9 @@ snapshots: rolldown-plugin-dts@0.9.11(rolldown@1.0.0-beta.8-commit.151352b(typescript@5.8.3))(typescript@5.8.3): dependencies: - '@babel/generator': 7.28.5 - '@babel/parser': 7.28.5 - '@babel/types': 7.28.5 + '@babel/generator': 7.29.1 + '@babel/parser': 7.29.2 + '@babel/types': 7.29.0 ast-kit: 1.4.3 debug: 4.4.3 dts-resolver: 1.2.0 @@ -39531,7 +39565,7 @@ snapshots: expect-type: 1.3.0 magic-string: 0.30.21 pathe: 2.0.3 - picomatch: 4.0.3 + picomatch: 4.0.4 std-env: 3.10.0 tinybench: 2.9.0 tinyexec: 0.3.2 @@ -39556,6 +39590,46 @@ snapshots: - supports-color - terser + vitest@3.2.4(@types/debug@4.1.12)(@types/node@22.19.17)(jsdom@29.1.0(@noble/hashes@2.0.1))(lightningcss@1.30.1)(terser@5.46.2): + dependencies: + '@types/chai': 5.2.2 + '@vitest/expect': 3.2.4 + '@vitest/mocker': 3.2.4(vite@5.4.10(@types/node@22.19.17)(lightningcss@1.30.1)(terser@5.46.2)) + '@vitest/pretty-format': 3.2.4 + '@vitest/runner': 3.2.4 + '@vitest/snapshot': 3.2.4 + '@vitest/spy': 3.2.4 + '@vitest/utils': 3.2.4 + chai: 5.3.3 + debug: 4.4.3 + expect-type: 1.3.0 + magic-string: 0.30.21 + pathe: 2.0.3 + picomatch: 4.0.4 + std-env: 3.10.0 + tinybench: 2.9.0 + tinyexec: 0.3.2 + tinyglobby: 0.2.15 + tinypool: 1.1.1 + tinyrainbow: 2.0.0 + vite: 5.4.10(@types/node@22.19.17)(lightningcss@1.30.1)(terser@5.46.2) + vite-node: 3.2.4(@types/node@22.19.17)(lightningcss@1.30.1)(terser@5.46.2) + why-is-node-running: 2.3.0 + optionalDependencies: + '@types/debug': 4.1.12 + '@types/node': 22.19.17 + jsdom: 29.1.0(@noble/hashes@2.0.1) + transitivePeerDependencies: + - less + - lightningcss + - msw + - sass + - sass-embedded + - stylus + - sugarss + - supports-color + - terser + vitest@4.0.15(@opentelemetry/api@1.9.1)(@types/node@20.17.6)(jiti@2.6.1)(jsdom@29.1.0(@noble/hashes@2.0.1))(lightningcss@1.30.1)(terser@5.46.2)(tsx@4.20.3)(yaml@2.8.1): dependencies: '@vitest/expect': 4.0.15 From e1fb7eaa6235706d5428a6dc7eb964aa9ed31b12 Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 08:34:10 +0100 Subject: [PATCH 024/279] test(coding-agents): Slice A integration smoke (entity, lifecycle, lease, recovery) Exercises the full coding-agent flow with real Docker + Claude API: first-wake init, cold-boot, pin/release idle hibernation, workspace lease serialization across two agents, crash-recovery orphan reconciliation, and destroy. Uses the fake-but-real-enough ctx harness pattern. Co-Authored-By: Claude Sonnet 4.6 --- .../test/integration/slice-a.test.ts | 254 ++++++++++++++++++ 1 file changed, 254 insertions(+) create mode 100644 packages/coding-agents/test/integration/slice-a.test.ts diff --git a/packages/coding-agents/test/integration/slice-a.test.ts b/packages/coding-agents/test/integration/slice-a.test.ts new file mode 100644 index 0000000000..39596fea0d --- /dev/null +++ b/packages/coding-agents/test/integration/slice-a.test.ts @@ -0,0 +1,254 @@ +import { describe, it, expect, beforeAll } from 'vitest' +import { + LocalDockerProvider, + StdioBridge, + WorkspaceRegistry, + LifecycleManager, +} from '../../src' +import { makeCodingAgentHandler } from '../../src/entity/handler' +import { buildTestImage, TEST_IMAGE_TAG } from '../support/build-image' +import { loadTestEnv } from '../support/env' + +const SHOULD_RUN = process.env.DOCKER === `1` +const describeMaybe = SHOULD_RUN ? describe : describe.skip + +interface CollectionStub { + rows: Map + get(k: string): any + toArray: Array +} + +function makeCollection(): CollectionStub { + const rows = new Map() + return { + rows, + get(k: string) { + return rows.get(k) + }, + get toArray(): Array { + return Array.from(rows.values()) + }, + } +} + +interface FakeCtxState { + sessionMeta: CollectionStub + runs: CollectionStub + events: CollectionStub + lifecycle: CollectionStub + inbox: CollectionStub +} + +function makeFakeCtx(entityUrl: string, args: Record) { + const state: FakeCtxState = { + sessionMeta: makeCollection(), + runs: makeCollection(), + events: makeCollection(), + lifecycle: makeCollection(), + inbox: makeCollection(), + } + let runCounter = 0 + const ctx: any = { + entityUrl, + entityType: `coding-agent`, + args, + tags: {}, + firstWake: false, + db: { + collections: state, + actions: { + sessionMeta_insert: ({ row }: any) => + state.sessionMeta.rows.set(row.key, row), + sessionMeta_update: ({ key, updater }: any) => { + const r = state.sessionMeta.rows.get(key) + if (r) updater(r) + }, + runs_insert: ({ row }: any) => state.runs.rows.set(row.key, row), + runs_update: ({ key, updater }: any) => { + const r = state.runs.rows.get(key) + if (r) updater(r) + }, + events_insert: ({ row }: any) => state.events.rows.set(row.key, row), + lifecycle_insert: ({ row }: any) => + state.lifecycle.rows.set(row.key, row), + }, + }, + recordRun() { + const key = `run-${++runCounter}` + const ent: { key: string; status?: string; response: string } = { + key, + status: undefined, + response: ``, + } + return { + key, + end({ status }: { status: string }) { + ent.status = status + }, + attachResponse(text: string) { + ent.response += text + }, + } + }, + setTag: () => Promise.resolve(), + send: () => undefined, + } + return { ctx, state } +} + +function pushInbox( + state: FakeCtxState, + key: string, + message_type: string, + payload: any = {} +) { + state.inbox.rows.set(key, { key, message_type, payload }) +} + +describeMaybe(`Slice A — full integration`, () => { + beforeAll(async () => { + await buildTestImage() + }, 600_000) + + it(`spawns, runs prompt, lease-serializes, recovers from crash, destroys`, async () => { + const env = loadTestEnv() + const provider = new LocalDockerProvider({ image: TEST_IMAGE_TAG }) + const bridge = new StdioBridge() + const wr = new WorkspaceRegistry() + const lm = new LifecycleManager({ provider, bridge }) + const handler = makeCodingAgentHandler(lm, wr, { + defaults: { + idleTimeoutMs: 2000, + coldBootBudgetMs: 60_000, + runTimeoutMs: 120_000, + }, + env: () => ({ ANTHROPIC_API_KEY: env.ANTHROPIC_API_KEY }), + }) + + const agentA = `/test/coding-agent/a-${Date.now().toString(36)}` + const sharedName = `slice-a-shared-${Date.now().toString(36)}` + const args = { + kind: `claude`, + workspace: { type: `volume`, name: sharedName }, + lifecycle: { idleTimeoutMs: 2000 }, + } + const { ctx: ctxA, state: stateA } = makeFakeCtx(agentA, args) + + // ── Assertion 1: First-wake init ────────────────────────────────────────── + await handler(ctxA, { type: `message_received` }) + expect(stateA.sessionMeta.get(`current`).status).toBe(`cold`) + + // ── Assertion 2: Send prompt; cold boot + run completes ─────────────────── + pushInbox(stateA, `i1`, `prompt`, { + text: `Reply with the single word: ok`, + }) + await handler(ctxA, { type: `message_received` }) + + const metaA1 = stateA.sessionMeta.get(`current`) + expect(metaA1.status).toBe(`idle`) + const runsA = Array.from(stateA.runs.rows.values()) as any[] + expect(runsA).toHaveLength(1) + expect(runsA[0].status).toBe(`completed`) + expect((runsA[0].responseText?.length ?? 0) > 0).toBe(true) + + // ── Assertion 3: Pin; sleep past idle timeout; container still running ──── + pushInbox(stateA, `i2`, `pin`) + await handler(ctxA, { type: `message_received` }) + expect(stateA.sessionMeta.get(`current`).pinned).toBe(true) + + await new Promise((r) => setTimeout(r, 3000)) + expect([`running`]).toContain(await provider.status(agentA)) + + // ── Assertion 4: Release; sleep past idle; sandbox stops ───────────────── + pushInbox(stateA, `i3`, `release`) + await handler(ctxA, { type: `message_received` }) + await new Promise((r) => setTimeout(r, 3000)) + expect([`stopped`, `unknown`]).toContain(await provider.status(agentA)) + + // ── Assertion 5: Second prompt triggers cold-boot path ──────────────────── + pushInbox(stateA, `i4`, `prompt`, { text: `Reply: again` }) + await handler(ctxA, { type: `message_received` }) + const runsA2 = Array.from(stateA.runs.rows.values()) as any[] + expect(runsA2.length).toBeGreaterThanOrEqual(2) + expect(runsA2[runsA2.length - 1].status).toBe(`completed`) + + // ── Assertion 6: Second agent on same workspace, lease-serialized ───────── + // Wait past the idle timer so A's container is already stopped before + // we launch the concurrent test. This ensures no in-flight idle-timer + // kill can interrupt the concurrent run. + await new Promise((r) => setTimeout(r, 3000)) + + const agentB = `/test/coding-agent/b-${Date.now().toString(36)}` + const { ctx: ctxB, state: stateB } = makeFakeCtx(agentB, args) + // First-wake init for B + await handler(ctxB, { type: `message_received` }) + + pushInbox(stateB, `j1`, `prompt`, { text: `Reply: B` }) + pushInbox(stateA, `i5`, `prompt`, { text: `Reply: A` }) + await Promise.all([ + handler(ctxA, { type: `message_received` }), + handler(ctxB, { type: `message_received` }), + ]) + + const runsAFinal = Array.from(stateA.runs.rows.values()) as any[] + const runsBFinal = Array.from(stateB.runs.rows.values()) as any[] + expect(runsAFinal[runsAFinal.length - 1].status).toBe(`completed`) + expect(runsBFinal[0].status).toBe(`completed`) + + // Lease serialization: A's last run and B's first run must not overlap. + const lastA = runsAFinal[runsAFinal.length - 1] + const firstB = runsBFinal[0] + const noOverlap = + lastA.endedAt <= firstB.startedAt || firstB.endedAt <= lastA.startedAt + expect(noOverlap).toBe(true) + + // ── Assertion 7: Crash recovery ─────────────────────────────────────────── + // Simulate a "prior LM crash" by creating lm2 (new startedAtMs). + // Inject a stale 'running' row predating lm2 into stateA. + const oldRunStart = Date.now() - 60_000 + stateA.runs.rows.set(`stale`, { + key: `stale`, + startedAt: oldRunStart, + status: `running`, + promptInboxKey: `fake`, + } as any) + stateA.sessionMeta.rows.set(`current`, { + ...stateA.sessionMeta.get(`current`), + status: `running`, + }) + + // Small delay to ensure lm2.startedAtMs > oldRunStart + await new Promise((r) => setTimeout(r, 50)) + + const lm2 = new LifecycleManager({ provider, bridge }) + const handler2 = makeCodingAgentHandler(lm2, wr, { + defaults: { + idleTimeoutMs: 2000, + coldBootBudgetMs: 60_000, + runTimeoutMs: 120_000, + }, + env: () => ({ ANTHROPIC_API_KEY: env.ANTHROPIC_API_KEY }), + }) + + pushInbox(stateA, `i6`, `prompt`, { text: `after crash` }) + await handler2(ctxA, { type: `message_received` }) + + // Stale run must be reconciled to orphaned + expect((stateA.runs.get(`stale`) as any).status).toBe(`failed`) + expect((stateA.runs.get(`stale`) as any).finishReason).toBe(`orphaned`) + // A new run must have completed + const newRuns = (Array.from(stateA.runs.rows.values()) as any[]).filter( + (r) => r.status === `completed` && r.key !== `stale` + ) + expect(newRuns.length).toBeGreaterThan(0) + + // ── Assertion 8: Destroy ────────────────────────────────────────────────── + pushInbox(stateA, `i7`, `destroy`) + await handler2(ctxA, { type: `message_received` }) + expect(stateA.sessionMeta.get(`current`).status).toBe(`destroyed`) + expect([`stopped`, `unknown`]).toContain(await provider.status(agentA)) + + // Cleanup B + await provider.destroy(agentB).catch(() => undefined) + }, 360_000) +}) From 030494a9ccbc8279aa65df4a1829ccc3d8ac53b5 Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 08:40:18 +0100 Subject: [PATCH 025/279] docs(coding-agents): Slice A run report --- ...2026-04-30-coding-agents-slice-a-report.md | 144 ++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 docs/superpowers/specs/notes/2026-04-30-coding-agents-slice-a-report.md diff --git a/docs/superpowers/specs/notes/2026-04-30-coding-agents-slice-a-report.md b/docs/superpowers/specs/notes/2026-04-30-coding-agents-slice-a-report.md new file mode 100644 index 0000000000..7d493dcc24 --- /dev/null +++ b/docs/superpowers/specs/notes/2026-04-30-coding-agents-slice-a-report.md @@ -0,0 +1,144 @@ +# Coding Agents Slice A — Run Report + +**Date:** 2026-04-30 +**Plan:** `docs/superpowers/plans/2026-04-30-coding-agents-slice-a.md` +**Spec:** `docs/superpowers/specs/2026-04-30-coding-agents-slice-a-design.md` +**Validation bar:** integration smoke test exercising entity lifecycle (spawn, pin, release, stop), lease acquisition, crash recovery via container label inspection, and destroy. +**Outcome:** ✅ Green on second integration-test run. One timing adjustment cycle required. + +## Result + +``` +✓ packages/coding-agents/src/workspace-registry.test.ts (7 tests) 8 ms +✓ packages/coding-agents/src/lifecycle-manager.test.ts (7 tests) 12 ms +✓ packages/coding-agents/src/entity-handler.test.ts (4 tests) 15 ms +✓ packages/coding-agents/src/runtime-contract.test.ts (2 tests) 3 ms +✓ test/integration/slice-a.test.ts (1 test) 49.8 s ← validation bar +``` + +Unit test summary: 20 new tests + 368 existing = **388 total.** All passing. + +Coding-agents package totals: **22 unit + 1 integration = 23 tests.** Integration test wall clock: ~50 s. + +## What worked first time + +- **Closure-scoped `registerCodingAgent(registry, deps)` registration pattern.** The entity handler closes over `LifecycleManager` and `WorkspaceRegistry` cleanly. No runtime extension API was needed — the helper wires both dependencies into the handler's scope without leaking them into the public contract. +- **Reconcile-on-handler-entry for orphan-run detection.** Comparing `lm.startedAtMs < runs.startedAt` proved sufficient to detect runs orphaned by a prior crash. No complex log scanning required. +- **Reusing existing `ctx.recordRun()` / `attachResponse()` / `end()` machinery for parent-wake signaling.** The prompt response already triggers `runFinished` wake on the parent session. No new wake plumbing was needed. +- **TDD on pure components (WorkspaceRegistry, LifecycleManager).** Tests were written against the spec; implementation followed; all tests passed on first run. No test-code divergence. + +## What had to be fixed mid-flight + +### 1. Spec divergence: no `onBoot` registry hook + +**Symptom:** The original spec assumed `EntityRegistry.define` would expose an `onBoot` hook for initialization. The runtime has no such hook. + +**Resolution:** Boot logic folded into the handler's first-wake branch. On first entry, check if `sessionMeta` exists in the collection; if absent, seed a fresh `SessionMetaRow` with `status='active'` and `keepWarm=true`. The `WorkspaceRegistry` and `LifecycleManager` are both freshly constructed per `registerCodingAgent` call, so explicit boot wiring is unnecessary. + +### 2. Spec divergence: no `ctx.deleteEntityStream` + +**Symptom:** The runtime has no primitive to delete an entity's durable stream. The destroy flow expected this. + +**Resolution:** `destroy()` becomes a tombstone operation: container removed via the provider, workspace ref dropped, `sessionMeta.status` set to `'destroyed'`, and all subsequent inbox messages return early via a status guard. Documented as a Slice B improvement (true stream cleanup). + +### 3. Task 2.1: type narrowing failure in session meta + +**Symptom:** After first-wake init, `meta` was typed as `SessionMetaRow | undefined`. Downstream `.pin()` / `.release()` calls errored. + +**Fix:** Refactored init to declare a `const initialMeta` and always assign it via an if/else to a `let meta: SessionMetaRow`. Removed redundant `!` assertions. + +### 4. Task 2.1: lifecycle key collision race + +**Symptom:** Three `lifecycleKey` inserts in `processPrompt` (boot, pin/release, stop/destroy) could collide on millisecond ticks, causing duplicate-key errors. + +**Fix:** Used the existing `lifecycleKey('label')` helper consistently: `lifecycleKey('boot')`, `lifecycleKey('pin')`, `lifecycleKey('release')`, `lifecycleKey('stop')`, `lifecycleKey('destroy')`. All unique by construction. + +### 5. Task 2.1: stale meta snapshot for idle-timer arm + +**Symptom:** The idle-timer arming code read `meta.keepWarm` and `meta.idleTimeoutMs` from a stale snapshot. Changes made in the same handler entry were not reflected. + +**Fix:** Re-read `meta` from `ctx.db.collections.sessionMeta.get(agentId)` just before arming the idle timer, ensuring fresh values. + +### 6. Task 2.2: unused test-accessor type + +**Symptom:** `CodingAgentInternals` was defined but never used outside tests. + +**Fix:** Removed the type entirely. + +### 7. Task 2.3: `send()` returned a fake run id + +**Symptom:** Initial implementation returned `Promise<{ runId: 'run-pending-${Date.now()}' }>`. The actual run id only exists after the entity processes the message and writes to the `runs` collection. + +**Fix:** Changed return type to `Promise`. Real run ids surface via `state().runs` or the parent's `runFinished` wake signal, consistent with the rest of the handle (`pin`, `release`, `stop`, `destroy` all return `Promise`). + +### 8. Task 2.3: misleading spec URL convention + +**Symptom:** Spec documented the entity handle URL as `//coding-agent/`. The runtime uses a flat URL convention: `//`. + +**Resolution:** Implementation matches the actual runtime convention. Noted for a future spec edit. + +### 9. Task 4.1: integration test timing cycle + +**Cycle 1 failure:** The idle-timer (2 s) fired mid-concurrent-run, removing the container and failing assertions. + +**Cycle 2 fix:** Increased idle waits to 3 s and added a 3-second drain wait before the concurrent assertion, allowing the prior section's idle timer to expire fully before re-using the workspace. + +## Other notes + +- **Synchronous collection API.** The repo uses `@durable-streams/state`-style collections (`ctx.db.collections.X.get(k)`, `ctx.db.actions.X_insert/X_update`). Different from typical async ORMs. Documented in the legacy `coder` entity as a reference. +- **`LocalDockerProvider.destroy()` behavior.** This method finds and removes a container by agent label. The `LifecycleManager.stop()` method calls `provider.destroy(agentId)` (NOT `provider.stop(instanceId)`). See the comment in lifecycle-manager.ts:38–39 explaining the distinction. +- **Pre-commit hook string normalization.** The repo's lint-staged hook converts single-quoted strings to backticks per project convention. Once subagents read existing source, they adapted automatically. +- **Unbounded workspace lease.** No acquire timeout is set. Acceptable for Slice A; can be added in a follow-up if real workloads stall on lease contention. + +## What's NOT done (vs. the full design spec) + +These were intentionally deferred. Listed here for the next plan: + +1. **Resume.** `nativeJsonl` collection, `--resume ` plumbing, cold-boot tmpfs materialization. **(Slice B.)** +2. **Codex support.** Bridge still rejects `kind: 'codex'`. **(Slice C.)** +3. **Removal of legacy `coder` entity** + `spawn-coder.ts` / `prompt-coder.ts` tools. **(Slice B.)** +4. **New Horton tools** (`spawn_coding_agent`, `prompt_coding_agent`). **(Slice B.)** +5. **UI extensions.** Status enum, header sandbox provenance, pin/release/stop buttons, lifecycle row rendering. **(Slice C.)** +6. **Conformance suite** parameterized by `SandboxProvider`. **(Slice C.)** +7. **`wake.on: 'eventAppended'`** for streaming UI. **(Slice C.)** +8. **`sandbox?` provider override** on `SpawnCodingAgentOptions`. (Single-provider for now.) +9. **Live `events()` tailing.** Slice A returns snapshot async-iterable; live tailing lands with the UI consumer. **(Slice C.)** +10. **Server-side `state().workspace.sharedRefs` accuracy** from a client handler context. Client handlers see `sharedRefs: 1`. Documented. + +## Recommended next steps (priority order for Slice B) + +1. Add resume (`--resume`, sidecar `nativeJsonl` collection, cold-boot denormalize). +2. Add `provider.recover()` integration on agents-server boot to populate the `WorkspaceRegistry` from durable entity state (currently rebuild happens lazily on first handler entry per agent — works but is deferred). +3. Add Horton tools (`spawn_coding_agent`, `prompt_coding_agent`) matching the shape of legacy `spawn_coder` / `prompt_coder`. +4. Remove the legacy `coder` entity once Horton tools are in place and no other callsites depend on it. +5. (Independent) Tighten `ctx: any` in the entity handler to bind to a specific `HandlerContext` shape. +6. (Independent) Update spec doc to correct the `//coding-agent/` URL convention to flat `//`. + +## Artifacts + +Commits on `coding-agents-slice-a` branch (in order): + +1. `2a43456b4` — collection + inbox message schemas +2. `70e8a95fb` — public types extension (SpawnCodingAgentOptions, SLICE_A_DEFAULTS) +3. `b31dcb924` — WorkspaceRegistry +4. `1841c38e4` — LifecycleManager +5. `627b2afb7` — entity handler (reconcile, dispatch, processPrompt) +6. `d5efd727e` — fix: tighten meta type narrowing, unique lifecycle keys, fresh meta read for idle timer +7. `036ce99f2` — registerCodingAgent helper +8. `22a97c590` — refactor: remove unused CodingAgentInternals +9. `260e9146e` — runtime API: ctx.spawnCodingAgent / observeCodingAgent +10. `3781c9cc9` — fix: drop misleading runId placeholder from send() +11. `e5da51dca` — wire registerCodingAgent into bootstrap +12. `e1fb7eaa6` — Slice A integration smoke test + +Branch: `coding-agents-slice-a` (forked from `main` at `a31e8a8a0` to keep main clean). + +## How to re-run + +```bash +# Unit tests (no Docker required) +pnpm -C packages/coding-agents test + +# Integration test (requires Docker + /tmp/.electric-coding-agents-env) +DOCKER=1 pnpm -C packages/coding-agents test test/integration/slice-a.test.ts +``` From c65276ea07d2b04c7f9dfc9169d92ec9555dd518 Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 08:50:38 +0100 Subject: [PATCH 026/279] fix(agents-runtime): spawnCodingAgent initialMessage shape (drop prompt/payload wrapping) --- packages/agents-runtime/src/context-factory.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/agents-runtime/src/context-factory.ts b/packages/agents-runtime/src/context-factory.ts index 6cfc6809b6..df5d91c4eb 100644 --- a/packages/agents-runtime/src/context-factory.ts +++ b/packages/agents-runtime/src/context-factory.ts @@ -641,9 +641,12 @@ export function createHandlerContext( } if (opts.lifecycle !== undefined) spawnArgs.lifecycle = opts.lifecycle + // initialMessage is stored verbatim as the inbox row's payload (no message_type + // extraction in the spawn path). Match the entity's promptMessageSchema shape: + // flat { text } object, NOT { type: 'prompt', payload: { text } }. const initialMessage = opts.initialPrompt !== undefined - ? { type: `prompt` as const, payload: { text: opts.initialPrompt } } + ? { text: opts.initialPrompt } : undefined // Slice A: only `runFinished` wake (eventAppended is Slice C). From d10c31614a3a8fe6ba06767be0a31c2101741669 Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 08:51:55 +0100 Subject: [PATCH 027/279] docs(coding-agents): record Slice A initialMessage fix in run report Document the post-Slice-A final-review fix (commit c65276ea0) and flag the missing test coverage as a Slice B priority. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../specs/notes/2026-04-30-coding-agents-slice-a-report.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/superpowers/specs/notes/2026-04-30-coding-agents-slice-a-report.md b/docs/superpowers/specs/notes/2026-04-30-coding-agents-slice-a-report.md index 7d493dcc24..229b8c1a05 100644 --- a/docs/superpowers/specs/notes/2026-04-30-coding-agents-slice-a-report.md +++ b/docs/superpowers/specs/notes/2026-04-30-coding-agents-slice-a-report.md @@ -130,9 +130,13 @@ Commits on `coding-agents-slice-a` branch (in order): 10. `3781c9cc9` — fix: drop misleading runId placeholder from send() 11. `e5da51dca` — wire registerCodingAgent into bootstrap 12. `e1fb7eaa6` — Slice A integration smoke test +13. `030494a9c` — Slice A run report (this document) +14. `c65276ea0` — fix: spawnCodingAgent initialMessage shape (drop prompt/payload wrapping) Branch: `coding-agents-slice-a` (forked from `main` at `a31e8a8a0` to keep main clean). +**Final-review caveat.** The post-Slice-A code review caught a Critical bug not exercised by any test: `spawnCodingAgent`'s `initialPrompt` path wrapped the message as `{ type: 'prompt', payload: { text } }`, but the runtime stores the entire `initialMessage` verbatim as the inbox row's payload, causing `promptMessageSchema.safeParse` to reject and silently drop the prompt. Fix in commit `c65276ea0` flattens to `{ text }` (matching the legacy `spawn_coder` pattern). The integration test does not cover this path because it drives the handler directly. **Slice B should add a runtime-level integration test that exercises `ctx.spawnCodingAgent({ initialPrompt })` end-to-end.** + ## How to re-run ```bash From 98aa20789c9b2b9b807d9d07975a52500d56f6c3 Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 13:46:17 +0100 Subject: [PATCH 028/279] fix(coding-agents): slugify agentId-derived volume name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When SpawnCodingAgentOptions.workspace.name is omitted, the default name was the raw agentId (e.g. /coding-agent/abc123), which Docker rejects as a volume source — volume names require [a-zA-Z0-9_.-]. provider.start would fail and the entity would be stuck in 'error'. WorkspaceRegistry.resolveIdentity now slugifies the agentId before using it as the default volume name. Caller-provided names are unchanged. Adds a unit test for the slug behavior. Surfaced when manually spawning a coding-agent from the UI; the integration test always passed an explicit workspace.name and so never exercised the agentId default. --- packages/coding-agents/src/workspace-registry.ts | 15 ++++++++++++++- .../test/unit/workspace-registry.test.ts | 14 +++++++++++--- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/packages/coding-agents/src/workspace-registry.ts b/packages/coding-agents/src/workspace-registry.ts index bdba388ce0..c76e24efef 100644 --- a/packages/coding-agents/src/workspace-registry.ts +++ b/packages/coding-agents/src/workspace-registry.ts @@ -4,6 +4,19 @@ export type ResolvedWorkspaceSpec = | { type: `volume`; name: string } | { type: `bindMount`; hostPath: string } +/** + * Docker volume names must match `[a-zA-Z0-9][a-zA-Z0-9_.-]*`. Entity URLs + * (the agentId) include `/` and other invalid characters, so we slugify + * before using them as a default volume name. + */ +function slugifyForVolumeName(s: string): string { + return s + .replace(/[^a-zA-Z0-9_.-]/g, `-`) + .replace(/-+/g, `-`) + .replace(/^[-_.]+/, ``) + .replace(/[-_.]+$/, ``) +} + export class WorkspaceRegistry { private readonly refsByIdentity = new Map>() private readonly chainByIdentity = new Map>() @@ -15,7 +28,7 @@ export class WorkspaceRegistry { | { type: `bindMount`; hostPath: string } ): Promise<{ identity: string; resolved: ResolvedWorkspaceSpec }> { if (spec.type === `volume`) { - const name = spec.name ?? agentId + const name = spec.name ?? slugifyForVolumeName(agentId) return { identity: `volume:${name}`, resolved: { type: `volume`, name }, diff --git a/packages/coding-agents/test/unit/workspace-registry.test.ts b/packages/coding-agents/test/unit/workspace-registry.test.ts index 975782f48b..0af9445fae 100644 --- a/packages/coding-agents/test/unit/workspace-registry.test.ts +++ b/packages/coding-agents/test/unit/workspace-registry.test.ts @@ -11,12 +11,20 @@ describe(`WorkspaceRegistry.resolveIdentity`, () => { expect(r.resolved).toEqual({ type: `volume`, name: `foo` }) }) - it(`resolves volume: when name is omitted`, async () => { + it(`resolves volume: when name is omitted`, async () => { const r = await WorkspaceRegistry.resolveIdentity(`/p/coding-agent/x`, { type: `volume`, }) - expect(r.identity).toBe(`volume:/p/coding-agent/x`) - expect(r.resolved).toEqual({ type: `volume`, name: `/p/coding-agent/x` }) + // agentId slugified: '/' → '-', leading separators stripped. + expect(r.identity).toBe(`volume:p-coding-agent-x`) + expect(r.resolved).toEqual({ type: `volume`, name: `p-coding-agent-x` }) + }) + + it(`slugifies invalid Docker volume name characters in agentId`, async () => { + const r = await WorkspaceRegistry.resolveIdentity(`/a/b@c/d!`, { + type: `volume`, + }) + expect(r.identity).toMatch(/^volume:[a-zA-Z0-9][a-zA-Z0-9_.-]*$/) }) it(`resolves bindMount: for bind mounts`, async () => { From 42753144916049fec2e3ba9da80b587166fd1830 Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 13:53:10 +0100 Subject: [PATCH 029/279] fix(coding-agents): flatten coding-agent creationSchema for UI dialog MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The agents-server-ui SpawnArgsDialog only renders simple JSON-Schema property types (string/number/boolean/enum). Nested objects and unions don't render at all and the spawn request returns 422. Flatten the entity's creation schema to: kind?, workspaceType?, workspaceName?, workspaceHostPath?, idleTimeoutMs?, keepWarm? The handler reconstructs the nested workspace shape on first-wake init. The typed ctx.spawnCodingAgent({ workspace: {...}, lifecycle: {...} }) API surface is unchanged — the runtime helper translates to the flat fields when forwarding to ctx.spawn('coding-agent', ...). Updates entity-handler unit test and slice-a integration test to use the flat shape. Surfaced when manually spawning a coding-agent with a custom workspace name from the UI; the dialog couldn't render the nested workspace union and the 422 from the agents-server validator was reported as "Spawn failed (422). The server may be missing ANTHROPIC_API_KEY." (generic error wrapping in Sidebar.tsx). --- .../agents-runtime/src/context-factory.ts | 23 +++++++++++--- packages/coding-agents/src/entity/handler.ts | 20 +++++++++---- packages/coding-agents/src/entity/register.ts | 30 ++++++++----------- .../test/integration/slice-a.test.ts | 5 ++-- .../test/unit/entity-handler.test.ts | 3 +- 5 files changed, 50 insertions(+), 31 deletions(-) diff --git a/packages/agents-runtime/src/context-factory.ts b/packages/agents-runtime/src/context-factory.ts index df5d91c4eb..2f3ad942c0 100644 --- a/packages/agents-runtime/src/context-factory.ts +++ b/packages/agents-runtime/src/context-factory.ts @@ -635,11 +635,26 @@ export function createHandlerContext( async spawnCodingAgent( opts: SpawnCodingAgentOptions ): Promise { - const spawnArgs: Record = { - kind: opts.kind, - workspace: opts.workspace, + // The coding-agent entity's creationSchema is FLAT (the agents-server-ui + // SpawnArgsDialog only renders simple types). Translate the nested + // SpawnCodingAgentOptions.workspace into the flat workspaceType/Name/HostPath + // fields that the handler reconstructs on first-wake init. + const spawnArgs: Record = { kind: opts.kind } + if (opts.workspace.type === `volume`) { + spawnArgs.workspaceType = `volume` + if (opts.workspace.name !== undefined) { + spawnArgs.workspaceName = opts.workspace.name + } + } else { + spawnArgs.workspaceType = `bindMount` + spawnArgs.workspaceHostPath = opts.workspace.hostPath + } + if (opts.lifecycle?.idleTimeoutMs !== undefined) { + spawnArgs.idleTimeoutMs = opts.lifecycle.idleTimeoutMs + } + if (opts.lifecycle?.keepWarm !== undefined) { + spawnArgs.keepWarm = opts.lifecycle.keepWarm } - if (opts.lifecycle !== undefined) spawnArgs.lifecycle = opts.lifecycle // initialMessage is stored verbatim as the inbox row's payload (no message_type // extraction in the spawn path). Match the entity's promptMessageSchema shape: diff --git a/packages/coding-agents/src/entity/handler.ts b/packages/coding-agents/src/entity/handler.ts index 72f188bcc8..36b24c9b40 100644 --- a/packages/coding-agents/src/entity/handler.ts +++ b/packages/coding-agents/src/entity/handler.ts @@ -79,14 +79,22 @@ export function makeCodingAgentHandler( if (!initialMeta) { const args = ctx.args as { kind?: `claude` - workspace?: any - lifecycle?: { idleTimeoutMs?: number; keepWarm?: boolean } + workspaceType?: `volume` | `bindMount` + workspaceName?: string + workspaceHostPath?: string + idleTimeoutMs?: number + keepWarm?: boolean } - const ws = args.workspace ?? { type: `volume` } + const ws = + args.workspaceType === `bindMount` + ? { + type: `bindMount` as const, + hostPath: args.workspaceHostPath ?? process.cwd(), + } + : { type: `volume` as const, name: args.workspaceName } const resolved = await WorkspaceRegistry.resolveIdentity(agentId, ws) - const idleTimeoutMs = - args.lifecycle?.idleTimeoutMs ?? options.defaults.idleTimeoutMs - const keepWarm = args.lifecycle?.keepWarm ?? false + const idleTimeoutMs = args.idleTimeoutMs ?? options.defaults.idleTimeoutMs + const keepWarm = args.keepWarm ?? false const initial: SessionMetaRow = { key: `current`, status: `cold`, diff --git a/packages/coding-agents/src/entity/register.ts b/packages/coding-agents/src/entity/register.ts index 82c1b5d615..9e9880b35e 100644 --- a/packages/coding-agents/src/entity/register.ts +++ b/packages/coding-agents/src/entity/register.ts @@ -36,26 +36,20 @@ export interface RegisterCodingAgentDeps { env?: () => Record } +// NOTE: Flat shape (no nested objects, no unions). The agents-server-ui's +// SpawnArgsDialog only renders simple JSON-Schema property types +// (string/number/boolean/enum) — nested objects and unions don't render +// at all and the dialog rejects the request. The handler reconstructs +// the nested workspace shape from these flat fields on first-wake init. const creationArgsSchema = z.object({ kind: z.enum([`claude`]).optional(), - workspace: z - .union([ - z.object({ - type: z.literal(`volume`), - name: z.string().optional(), - }), - z.object({ - type: z.literal(`bindMount`), - hostPath: z.string(), - }), - ]) - .optional(), - lifecycle: z - .object({ - idleTimeoutMs: z.number().optional(), - keepWarm: z.boolean().optional(), - }) - .optional(), + workspaceType: z.enum([`volume`, `bindMount`]).optional(), + /** For workspaceType='volume'. Defaults to slug(agentId) when omitted. */ + workspaceName: z.string().optional(), + /** For workspaceType='bindMount'. Required when workspaceType='bindMount'. */ + workspaceHostPath: z.string().optional(), + idleTimeoutMs: z.number().optional(), + keepWarm: z.boolean().optional(), }) export function registerCodingAgent( diff --git a/packages/coding-agents/test/integration/slice-a.test.ts b/packages/coding-agents/test/integration/slice-a.test.ts index 39596fea0d..4537290e86 100644 --- a/packages/coding-agents/test/integration/slice-a.test.ts +++ b/packages/coding-agents/test/integration/slice-a.test.ts @@ -129,8 +129,9 @@ describeMaybe(`Slice A — full integration`, () => { const sharedName = `slice-a-shared-${Date.now().toString(36)}` const args = { kind: `claude`, - workspace: { type: `volume`, name: sharedName }, - lifecycle: { idleTimeoutMs: 2000 }, + workspaceType: `volume`, + workspaceName: sharedName, + idleTimeoutMs: 2000, } const { ctx: ctxA, state: stateA } = makeFakeCtx(agentA, args) diff --git a/packages/coding-agents/test/unit/entity-handler.test.ts b/packages/coding-agents/test/unit/entity-handler.test.ts index fc5f78354b..942ad892c2 100644 --- a/packages/coding-agents/test/unit/entity-handler.test.ts +++ b/packages/coding-agents/test/unit/entity-handler.test.ts @@ -172,7 +172,8 @@ describe(`entity handler — first-wake init`, () => { entityUrl: `/test/coding-agent/x`, args: { kind: `claude`, - workspace: { type: `volume`, name: `w` }, + workspaceType: `volume`, + workspaceName: `w`, }, }) From 86aea614c9501137f19e8d5f3138acf778fcde4d Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 15:02:14 +0100 Subject: [PATCH 030/279] docs(specs): add Slice B design for coding-agents migration completion Slice B finishes the platform-primitive migration: resume via nativeJsonl tee + cold-boot materialization, Horton tool migration to spawn_coding_agent / prompt_coding_agent, full removal of the legacy coder entity (source, tools, runtime types, UI, bootstrap), and full UI revamp (CodingAgent* components, status enum extension, header Pin/Release/Stop buttons, lifecycle row rendering). Plus a runtime- level e2e test that closes the gap which hid Slice A's slug and flat-schema bugs. Co-Authored-By: Claude Opus 4.7 (1M context) --- ...2026-04-30-coding-agents-slice-b-design.md | 633 ++++++++++++++++++ 1 file changed, 633 insertions(+) create mode 100644 docs/superpowers/specs/2026-04-30-coding-agents-slice-b-design.md diff --git a/docs/superpowers/specs/2026-04-30-coding-agents-slice-b-design.md b/docs/superpowers/specs/2026-04-30-coding-agents-slice-b-design.md new file mode 100644 index 0000000000..56e5ca5531 --- /dev/null +++ b/docs/superpowers/specs/2026-04-30-coding-agents-slice-b-design.md @@ -0,0 +1,633 @@ +# Coding Agents — Slice B: Resume + Horton Migration + Legacy Coder Removal + UI Revamp + +**Status:** Draft +**Date:** 2026-04-30 +**Author:** Valter Balegas +**Parent spec:** `docs/superpowers/specs/2026-04-30-coding-agents-platform-primitive-design.md` +**Predecessors:** + +- `docs/superpowers/specs/notes/2026-04-30-coding-agents-mvp-report.md` (MVP — Provider + Bridge) +- `docs/superpowers/specs/2026-04-30-coding-agents-slice-a-design.md` (Slice A — runtime API + entity + lifecycle) +- `docs/superpowers/specs/notes/2026-04-30-coding-agents-slice-a-report.md` (Slice A run report) + +## Summary + +Slice B finishes the platform-primitive migration. After Slice A, the new `coding-agent` entity exists alongside the legacy `coder`, but cold-boot loses session continuity (every new sandbox starts a fresh CLI session), Horton still spawns the legacy entity, the legacy entity remains in the codebase, and the UI's chat surface is wired only to the legacy entity. Slice B closes all four gaps in one merge: + +1. **Resume.** A new `nativeJsonl` collection captures every raw `claude` JSONL line per turn. On cold-boot of an agent that has prior runs, the handler reads the collection, materializes the JSONL into the sandbox's tmpfs, and runs `claude --resume `. Same-kind resume is lossless. +2. **Horton tool migration.** New tools `spawn_coding_agent` / `prompt_coding_agent` mirror the legacy `spawn_coder` / `prompt_coder`'s API but spawn `coding-agent` entities via `ctx.spawnCodingAgent`. Horton's tool list swaps to the new pair. +3. **Legacy `coder` removal.** Delete `packages/agents/src/agents/coding-session.ts`, `spawn-coder.ts`, `prompt-coder.ts`, and the runtime-side `useCodingAgent` / `CodingSessionHandle` types. Remove `registerCodingSession` from the bootstrap. +4. **UI revamp.** New `CodingAgentView` / `CodingAgentTimeline` / `useCodingAgent` / `CodingAgentSpawnDialog` components replace the legacy `CodingSession*` set, wire `coding-agent` collections, extend the status enum, render the `lifecycle` collection as muted timeline rows, and add Pin/Release/Stop buttons in the header. + +After Slice B, the new `coding-agent` is the **only** coding-agent type in the codebase, and the runtime, entity, sandbox, bridge, server, UI, and Horton all consume it. The `electric-ax/coding-agent-sandbox:test` image is unchanged. + +## Goals + +1. **Same-kind resume is lossless.** A second prompt to a `coding-agent` after an idle hibernation produces a CLI session that sees all prior turns. Verified by an integration test that asserts the second response references the first prompt's content. +2. **Horton uses the new entity.** `Spawn a coder` from Horton produces a `coding-agent` entity backed by a Docker sandbox, not a legacy `coder` entity backed by a host child process. +3. **Legacy `coder` is gone from the codebase.** No source files, no runtime types, no UI components, no bootstrap registration, no Horton tool reference. +4. **UI surface for `coding-agent` matches or exceeds the legacy `coder` surface.** Spawn dialog with workspace selector, chat timeline with assistant/user/tool-call rows, status dot covering all six states, Pin/Release/Stop buttons in the header, lifecycle rows rendered as muted entries. +5. **End-to-end runtime test exercises `ctx.spawnCodingAgent` from a parent entity.** Uses a real agents-server in-process; closes the test gap that hid Slice A's two manual-testing bugs (slug, flat-schema). + +## Non-goals (Slice B) + +- **Codex support.** Bridge still rejects `kind: 'codex'`. Slice C. +- **Cross-kind resume.** Same-kind only. The architecture supports it (events collection is canonical) but no UI affordance and no integration test in Slice B. +- **`provider.recover()` cleanup of orphaned containers.** Containers labeled with `electric-ax.agent-id` whose corresponding entity was never created (or was destroyed) accumulate; manual cleanup. Slice C. +- **Sandbox provenance and "shared with N" indicators in the header.** Add status enum + Pin/Release/Stop + lifecycle rows. Sandbox provenance display itself defers. +- **Conformance suite parameterized by `SandboxProvider`.** Slice C. +- **Per-event approve/deny for `permission_request`.** CLIs continue to run with `--dangerously-skip-permissions`. +- **Replay / time-travel UI scrubber.** Slice C. + +## Architecture + +``` + Entity author code + ┌──────────────────────────────────────────────────────────────┐ + │ ctx.spawnCodingAgent / ctx.observeCodingAgent (Slice A) │ ← agents-runtime + └──────────────────────────────────────────────────────────────┘ + │ + ▼ + ┌──────────────────────────────────────────────────────────────┐ + │ coding-agent entity │ ← coding-agents + │ collections: sessionMeta, runs, events, │ + │ lifecycle, nativeJsonl ← NEW in Slice B │ + │ handler now does: │ + │ - capture nativeSessionId from session_init events │ + │ - tee bridge runTurn lines into nativeJsonl │ + │ - on cold-boot, materialize prior nativeJsonl as JSONL │ + │ file inside sandbox tmpfs and pass --resume │ + └──────────────────────────────────────────────────────────────┘ + │ + ▼ + ┌─────────────────────────┐ ┌─────────────────────────────────┐ + │ StdioBridge (Slice A) │ │ LifecycleManager (Slice A) │ + │ + onNativeLine wired │ │ + boot() for eager WR rebuild │ + └─────────────────────────┘ └─────────────────────────────────┘ + │ + ▼ + ┌──────────────────────────────────────────────────────────────┐ + │ LocalDockerProvider (Slice A) — unchanged │ + └──────────────────────────────────────────────────────────────┘ +``` + +**Component-level changes from Slice A:** + +| Component | Change | +| --------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `LocalDockerProvider` | Unchanged. | +| `StdioBridge` | Wire `onNativeLine` callback to emit per stdout line (Slice A type already exists). Pass `--resume ` when caller provides `nativeSessionId`. | +| `LifecycleManager` | Add `boot()` callback for eager `WorkspaceRegistry` rebuild from durable entity state. | +| `WorkspaceRegistry` | Unchanged. | +| `coding-agent` entity | +`nativeJsonl` collection; capture `nativeSessionId` from `session_init`; tee raw lines; cold-boot resume materialization; lifecycle row for `resume.restored`. | +| `agents-runtime` | Drop `CodingSessionHandle` + `useCodingAgent`; keep `CodingAgentHandle` + `spawnCodingAgent` / `observeCodingAgent`. | +| `agents` package | Drop `coding-session.ts`, `spawn-coder.ts`, `prompt-coder.ts`. Add `spawn-coding-agent.ts`, `prompt-coding-agent.ts`. Update Horton tool list. | +| `agents-server-ui` | Drop `CodingSession*` components and hook. Add `CodingAgent*` replacements. Extend status dot. Add lifecycle row renderer. Pin/Release/Stop buttons in `EntityHeader`. New `CodingAgentSpawnDialog`. | +| `agents-server` | Bootstrap calls `registerCodingAgent(...).boot()` after type registration. | +| `agents-server-conformance-tests` | Unchanged in Slice B (parameterized suite is Slice C). | + +## Public types + +### Runtime — added (or refined) + +```ts +// packages/agents-runtime/src/types.ts + +// Slice A's CodingAgentHandle keeps its surface, but events() now actually +// streams (was a snapshot). The `runId` returned by send() promise stays +// `Promise` — the durable run id is exposed via state().runs. +interface CodingAgentHandle { + readonly url: string + readonly kind: 'claude' + send(prompt: string): Promise + events(opts?: { since?: 'start' | 'now' }): AsyncIterable + state(): CodingAgentState + pin(): Promise + release(): Promise + stop(): Promise + destroy(): Promise +} + +// state() now also exposes nativeSessionId for diagnostic visibility +interface CodingAgentState { + status: CodingAgentSliceAStatus + pinned: boolean + workspace: { identity: string; sharedRefs: number } + lastError?: string + /** Slice B: the underlying claude session id, when known. */ + nativeSessionId?: string + runs: ReadonlyArray +} +``` + +### Runtime — removed + +```ts +// Deleted from packages/agents-runtime/src/types.ts: +// - interface CodingSessionHandle +// - HandlerContext.useCodingAgent +// - All CodingSessionEventRow / CodingSessionMeta / CodingSessionStatus types +// +// Deleted from packages/agents-runtime/src/context-factory.ts: +// - useCodingAgent implementation +``` + +The runtime keeps `entityUrl`, `spawn`, `observe`, `spawnCodingAgent`, `observeCodingAgent`, etc. Only the legacy-coder-specific surface is removed. + +### Entity collection — added + +```ts +// packages/coding-agents/src/entity/collections.ts + +export const CODING_AGENT_NATIVE_JSONL_COLLECTION_TYPE = + 'coding-agent.nativeJsonl' + +export const nativeJsonlRowSchema = z.object({ + /** `:` — chronological within a turn. */ + key: z.string(), + runId: z.string(), + seq: z.number(), + ts: z.number(), + /** The raw stdout line from the CLI, UTF-8, newline-stripped. */ + line: z.string(), + /** The native session id this line belongs to (claude --resume target). */ + nativeSessionId: z.string(), + /** The CLI kind (always 'claude' in Slice B; future-proofing). */ + kind: z.enum(['claude']), +}) +export type NativeJsonlRow = z.infer +``` + +The collection is registered as a fifth state collection on the entity: + +```ts +state: { + sessionMeta: { ... }, + runs: { ... }, + events: { ... }, + lifecycle: { ... }, + nativeJsonl: { schema: nativeJsonlRowSchema, + type: CODING_AGENT_NATIVE_JSONL_COLLECTION_TYPE, + primaryKey: 'key' }, +} +``` + +### `SessionMetaRow` — extended + +```ts +export const sessionMetaRowSchema = z.object({ + // ... all Slice A fields ... + nativeSessionId: z.string().optional(), // ← NEW: discovered from session_init +}) +``` + +## Resume data flow + +### Tee path (during a turn) + +``` +processPrompt + ├── ensure sandbox started + ├── on first turn: no --resume flag (claude creates a fresh session) + ├── on subsequent turn: read sessionMeta.nativeSessionId; if set, + │ materialize nativeJsonl into sandbox tmpfs (see Materialize path below) + │ and pass --resume + ├── bridge.runTurn({ + │ sandbox, kind, prompt, + │ nativeSessionId: meta.nativeSessionId, ← NEW: tells bridge to add --resume + │ onEvent: append to events collection (Slice A) + │ onNativeLine: append to nativeJsonl collection (Slice B) + │ }) + ├── If session_init event had a sessionId, write it to sessionMeta.nativeSessionId + ├── done. +``` + +The `StdioBridge` already exposes `onNativeLine?: (line: string) => void` in `RunTurnArgs` (Slice A type). Slice A's bridge implementation accumulates `rawLines` for batch normalization at end-of-turn but never invokes `onNativeLine` per-line. Slice B wires the per-line invocation. + +### Materialize path (cold-boot of agent with prior turns) + +``` +processPrompt entry, before bridge.runTurn: + if (meta.nativeSessionId) { + rows = nativeJsonlCol.toArray + .filter(r => r.nativeSessionId === meta.nativeSessionId) + .sort((a, b) => a.runId.localeCompare(b.runId) || a.seq - b.seq) + if (rows.length > 0) { + // Path inside the container — claude's expected location. + sanitized = sanitizePath(/workspace) // claude expects this transform + jsonlPath = `~/.claude/projects/${sanitized}/${meta.nativeSessionId}.jsonl` + contents = rows.map(r => r.line).join('\n') + '\n' + // Pipe via stdin to avoid quoting hell. The sandbox-side helper: + // bash -c 'mkdir -p $(dirname "$1") && cat > "$1"' _ "$path" + handle = sandbox.exec({ + cmd: ['bash', '-c', 'mkdir -p "$(dirname "$1")" && cat > "$1"', '_', jsonlPath], + stdin: 'pipe', + }) + await handle.writeStdin(contents); await handle.closeStdin() + await handle.wait() + lifecycle.insert({ event: 'resume.restored', detail: `${rows.length} lines` }) + } + } +``` + +The path-sanitization (`/workspace` → e.g. `-workspace`) follows claude's existing convention; verified during implementation against `claude-code` source. + +### Capture `nativeSessionId` + +The first `session_init` event of any turn carries the CLI's session id. The handler captures it the first time it sees one and writes to `sessionMeta.nativeSessionId`: + +```ts +onEvent: (e: NormalizedEvent) => { + if (e.type === 'session_init' && 'sessionId' in e && !meta.nativeSessionId) { + ctx.db.actions.sessionMeta_update({ + key: 'current', + updater: (d) => { d.nativeSessionId = e.sessionId }, + }) + meta = sessionMetaCol.get('current')! + } + ctx.db.actions.events_insert({ ... }) +} +``` + +### Why per-line tee (vs blob-after-turn) + +- **Partial-turn durability.** A crashed turn (server crash mid-`runTurn`) leaves the partial `nativeJsonl` in the durable stream. Reconcile on next entry sees an open run; nativeJsonl rows show how far we got. Replay starts with the same session id and the CLI sees its own partial transcript on disk. +- **No second `docker exec` per turn.** Blob-extract requires a second exec at end-of-turn to read the file out. Per-line tee uses the bridge's existing stdout stream. +- **Type already present.** `RunTurnArgs.onNativeLine` is in Slice A's API surface; we just wire it. + +### Resume semantics + +- **Same agent + same kind.** Lossless. Materialize → `--resume` → CLI sees prior turns. +- **Empty `nativeJsonl`.** First turn ever, or all prior turns failed mid-flight before producing any output. No materialization, no `--resume` flag. CLI creates a fresh session. +- **Cross-kind.** Out of scope. The handler verifies `meta.kind === args.kind` matches; mismatch is an error. +- **Mid-resume failure.** If materialization fails (e.g., `docker exec` reports non-zero), the handler logs `sandbox.failed`, sets `status='error'`, and returns. Next prompt retries. + +## Horton tool migration + +### New tools + +```ts +// packages/agents/src/tools/spawn-coding-agent.ts + +export const spawnCodingAgentTool: AgentTool = { + type: 'function', + name: 'spawn_coding_agent', + description: + 'Spawn a sandboxed coding agent (Claude Code in Docker) and prompt it. ' + + "Returns the agent's response when the run finishes. Use for non-trivial " + + 'code edits, multi-file changes, or work that needs filesystem isolation.', + parameters: { + /* zod schema: prompt: string, workspaceName?: string */ + }, + async execute(args, ctx) { + const id = nanoid(10) + const handle = await ctx.spawnCodingAgent({ + id, + kind: 'claude', + workspace: args.workspaceName + ? { type: 'volume', name: args.workspaceName } + : { type: 'volume' }, + initialPrompt: args.prompt, + wake: { on: 'runFinished', includeResponse: true }, + }) + // Wait for the run to finish via existing entity-runtime wake flow. + // The result returns from the parent's runFinished wake payload. + return { + content: [{ type: 'text', text: 'Spawned' }], + details: { spawned: true, codingAgentUrl: handle.url }, + } + }, +} +``` + +```ts +// packages/agents/src/tools/prompt-coding-agent.ts + +export const promptCodingAgentTool: AgentTool = { + type: 'function', + name: 'prompt_coding_agent', + description: 'Send a follow-up prompt to an existing coding-agent.', + parameters: { + /* zod schema: codingAgentUrl, prompt */ + }, + async execute(args, ctx) { + const handle = await ctx.observeCodingAgent(extractId(args.codingAgentUrl)) + await handle.send(args.prompt) + return { + content: [{ type: 'text', text: 'Sent' }], + details: { sent: true, codingAgentUrl: handle.url }, + } + }, +} +``` + +The new tools' parameter shapes intentionally mirror `spawn_coder` / `prompt_coder` for consumer transparency: a `prompt` field, a optional id-or-url field. The tool result `details` keys are renamed (`coderUrl` → `codingAgentUrl`) to match the new entity name. + +### Horton wiring + +`packages/agents/src/agents/horton.ts` swaps `spawn_coder` and `prompt_coder` for the new pair in its tool list. Tool descriptions are updated to mention sandboxing and workspace sharing. Existing Horton tests that mock `spawn_coder` are updated to mock `spawn_coding_agent`. + +## Legacy `coder` removal + +### Files deleted + +- `packages/agents/src/agents/coding-session.ts` (~800 LOC) +- `packages/agents/src/tools/spawn-coder.ts` +- `packages/agents/src/tools/prompt-coder.ts` +- `packages/agents-server-ui/src/components/CodingSessionView.tsx` +- `packages/agents-server-ui/src/components/CodingSessionTimeline.tsx` +- `packages/agents-server-ui/src/components/CodingSessionSpawnDialog.tsx` +- `packages/agents-server-ui/src/hooks/useCodingSession.ts` + +### Runtime types removed + +```ts +// packages/agents-runtime/src/types.ts +// - interface CodingSessionHandle +// - HandlerContext.useCodingAgent +// - CodingSessionMeta, CodingSessionStatus, CodingSessionEventRow +// - UseCodingAgentOptions +// - CODING_SESSION_*_COLLECTION_TYPE re-exports +// +// packages/agents-runtime/src/context-factory.ts +// - useCodingAgent impl in createHandlerContext() +``` + +### Bootstrap + +```ts +// packages/agents/src/bootstrap.ts (after Slice B) +// +// REMOVED: +// import { registerCodingSession } from './agents/coding-session' +// registerCodingSession(registry, { defaultWorkingDirectory: cwd }) +// typeNames.push('coder') +// +// KEPT (Slice A): +// import { registerCodingAgent, LocalDockerProvider, StdioBridge } +// from '@electric-ax/coding-agents' +// const codingAgent = registerCodingAgent(registry, { +// provider: new LocalDockerProvider(), +// bridge: new StdioBridge(), +// }) +// typeNames.push('coding-agent') +// +// NEW (Slice B): eager workspace-registry rebuild before serving traffic +// await codingAgent.boot() +``` + +### Existing `coder` durable streams + +Existing `coder` entities in users' dev environments reference an entity type that no longer exists post-migration. The agents-server returns 404 for unknown types when listing or rendering. The UI's "all entities" sidebar filters out unknown types (already does this for the legacy `worker` entity that's also hidden). No data is migrated; users with active `coder` sessions are informed in the slice's release notes. + +## UI revamp + +### New components + +| Component | Replaces | Wires | +| ------------------------ | -------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `CodingAgentView` | `CodingSessionView` | `useCodingAgent` hook; renders timeline + input + state explorer panel. | +| `CodingAgentTimeline` | `CodingSessionTimeline` | `events` + `lifecycle` collections; renders both via `EntityTimelineEntry` + new `LifecycleRow`. | +| `useCodingAgent` | `useCodingSession` | Reads `coding-agent` collections via collection-type wires. | +| `CodingAgentSpawnDialog` | `CodingSessionSpawnDialog` | Workspace selector (volume name field, bind-mount path field), kind locked to 'claude'. | +| `LifecycleRow` | (new) | Renders a `lifecycle` collection row (sandbox.start/stopped/failed, pin/release, orphan.detected, resume.restored) as a muted, single-line entry distinct from chat events. | + +### Status dot extension + +```ts +// packages/agents-server-ui/src/components/StatusDot.tsx +const STATUS_COLORS: Record = { + // existing + spawning: '#eab308', // amber + idle: '#22c55e', // green + running: '#3b82f6', // blue + error: '#ef4444', // red + // Slice B additions + cold: '#9ca3af', // gray + starting: '#eab308', // amber (matches spawning) + stopping: '#eab308', // amber + destroyed: '#6b7280', // dim gray +} +``` + +### Header buttons (when entity type is `coding-agent`) + +`EntityHeader.tsx` adds three buttons next to the existing pin/kill controls: + +- **Pin** — sends `{}, type: 'pin'` inbox message. Disabled when `meta.pinned`. +- **Release** — sends `{}, type: 'release'`. Disabled when `!meta.pinned`. +- **Stop** — sends `{}, type: 'stop'`. Confirmation dialog on click (the sandbox-stop is reversible by next prompt, but explicit). + +The existing global "kill" button is kept for `destroy` (drops the workspace ref + tombstones the entity). The pin/release/stop trio are entity-type-specific affordances. + +### Spawn dialog + +`CodingAgentSpawnDialog` is a small bespoke dialog (not the generic `SpawnArgsDialog`) because: + +- The `creationSchema` is flat from Slice A's flat-schema fix, but a workspace-mode toggle (volume vs bindMount) reads better as a radio than as two separate optional text inputs. +- The dialog can autocomplete existing volume names by querying `docker volume ls --filter label=...` — but this requires server-side support that's out of scope for Slice B. The Slice B dialog is just two radio options + corresponding text inputs. + +``` +┌──────────── Spawn Coding Agent ─────────────┐ +│ Workspace │ +│ ◉ Volume ○ Bind mount │ +│ Name (optional): [_____________________] │ +│ Defaults to a per-agent slugged name. │ +│ │ +│ Initial prompt (optional) │ +│ [_______________________________________] │ +│ │ +│ [Cancel] [Spawn] │ +└─────────────────────────────────────────────┘ +``` + +When "Bind mount" is selected, "Name" is replaced with "Host path: [text input, validated as absolute path]". + +### Lifecycle row rendering + +Lifecycle rows are interleaved with `events` rows by timestamp in the timeline. Visual distinction: + +- Muted background (`var(--gray-a3)`). +- One-line summary: e.g. "▸ sandbox started (instance abc-123)". +- Click expands to show `detail` field (if present). + +### Router changes + +```ts +// packages/agents-server-ui/src/router.tsx (after Slice B) +// +// REMOVED: +// if (selectedEntity.type === CODING_SESSION_ENTITY_TYPE) { CodingSessionView ... } +// +// REPLACED WITH: +// if (selectedEntity.type === CODING_AGENT_ENTITY_TYPE) { +// +// } +``` + +### Sidebar changes + +`Sidebar.tsx` swaps: + +- `setCodingDialogOpen(true)` → `setCodingAgentDialogOpen(true)` for the new entity type. +- Tool-call rendering (`ToolCallView.tsx`): label `spawn_coder` → `spawn_coding_agent`, `prompt_coder` → `prompt_coding_agent`. + +## `WorkspaceRegistry` eager rebuild + +```ts +// packages/coding-agents/src/entity/register.ts (after Slice B) + +interface CodingAgentRegistration { + /** Eager WR + recover sync. Call after registry.registerTypes(). */ + boot: () => Promise +} + +export function registerCodingAgent( + registry: EntityRegistry, + deps: RegisterCodingAgentDeps +): CodingAgentRegistration { + // ... Slice A registry.define logic ... + + return { + async boot() { + // 1. Scan all coding-agent entities' sessionMeta from durable state + // via the agents-server's entity-bridge API. Populate WR with + // workspaceIdentity → agentId mapping. + const allEntities = await deps.scanEntities('coding-agent') + wr.rebuild( + allEntities.map((e) => ({ + identity: e.sessionMeta.workspaceIdentity, + agentId: e.url, + })) + ) + + // 2. Provider recovery: list containers labeled with our agentIds. + // Just informational in Slice B; no automatic cleanup. + const recovered = await lm.adoptRunningContainers() + log.info({ count: recovered.length }, 'recovered sandboxes') + }, + } +} +``` + +`deps.scanEntities` is a new dependency injected by the bootstrap. The bootstrap supplies a function that calls into the agents-server's entity store API. The dependency seam keeps `coding-agents` independent of the agents-server (no direct import). + +```ts +// packages/agents/src/bootstrap.ts + +const codingAgentRegistration = registerCodingAgent(registry, { + provider: new LocalDockerProvider(), + bridge: new StdioBridge(), + scanEntities: async (type) => { + return runtimeServerClient.listEntities({ type }).then((rows) => + rows.map((r) => ({ + url: r.url, + sessionMeta: r.collections.sessionMeta?.get('current'), + })) + ) + }, +}) +typeNames.push('coding-agent') +// ... after registry sync: +await codingAgentRegistration.boot() +``` + +## State machine — unchanged from Slice A + +The 7-state machine (`cold | starting | idle | running | stopping | error | destroyed`) is the same. Resume materialization happens **inside the `STARTING → IDLE` transition** of `processPrompt`, immediately after `provider.start` succeeds and immediately before the workspace lease is acquired: + +``` +COLD → STARTING (provider.start) +STARTING → STARTING (resume.materialize, if meta.nativeSessionId set) +STARTING → IDLE +IDLE → RUNNING (lease acquire + recordRun + bridge.runTurn) +RUNNING → IDLE +``` + +The `resume.restored` lifecycle row is inserted between materialization and lease acquisition. + +## Error handling + +- **Materialization failure** (docker exec non-zero, broken pipe). Mark `sessionMeta.status='error'`, `lastError`, lifecycle row `sandbox.failed` with `detail='materialize'`. Run is not started. Next prompt retries — same `nativeSessionId`, same `nativeJsonl` rows, fresh attempt. +- **Bridge runs but `--resume` rejects** (claude returns non-zero with "session not found"). The CLI's transcript got out of sync. Clear `sessionMeta.nativeSessionId`, run completes with `failed: cli-exit:resume-rejected`. Next prompt cold-boots a fresh session (no `--resume` flag). +- **`session_init` event missing or has no `sessionId`** (CLI bug or model-API failure). `nativeSessionId` stays `undefined`. The next turn cold-boots fresh (same as a first turn). No data corruption. +- **Eager `boot()` fails** (entity scan errors out, LMDB locked, etc.). Server boot fails fast — better to surface the error than serve traffic with a half-populated registry. The error message includes which entity caused the failure. +- **`boot()` finds entities the runtime can't load** (orphaned coder durable streams post-migration). Skip with a warning; do not abort. + +## Testing strategy + +### Layer 1 — Unit (no Docker) + +- **`resume.test.ts`** — `materializeNativeJsonl(rows, sessionId, exec)` constructs the right `bash -c` argv, pipes the right concatenated content to stdin, calls into a fake `exec` correctly. Idempotency: re-materialize from the same rows produces a byte-identical file. +- **`session-init-capture.test.ts`** — given a fake bridge that emits a `session_init` with `sessionId='abc'`, the handler writes `'abc'` to `sessionMeta.nativeSessionId`. A second `session_init` in the same run is ignored. +- **Existing entity-handler tests** — extended to cover the resume branch: prompt with `meta.nativeSessionId` set → materialization called before lease acquire. +- **`spawn-coding-agent.test.ts`, `prompt-coding-agent.test.ts`** — the new Horton tools; assert they desugar to `ctx.spawnCodingAgent` / `ctx.observeCodingAgent` and return the right `details` shape. +- **UI component tests** — `LifecycleRow` rendering, `StatusDot` color map covers all seven states, `CodingAgentSpawnDialog` form-validation (volume vs bind-mount toggle). +- **Removed:** all legacy `useCodingSession` / `CodingSession*` / `coder` / `spawn-coder` tests. + +### Layer 2 — Integration (real Docker, real Claude) + +- **`resume-end-to-end.test.ts`** — spawn a `coding-agent`, send "remember the number 42", await runFinished, send a second prompt "what number did I tell you?", await runFinished. Assert second response contains "42". Validates the tee + materialize round-trip. +- **`spawn-end-to-end.test.ts`** — drive an in-process agents-server. Use a parent test entity that calls `ctx.spawnCodingAgent({ workspace: { type: 'volume' } })`. Verify the entity is created with the correct flat creationSchema args, the handler runs, the run completes with response text. **Closes the gap that hid Slice A's slug + flat-schema bugs.** +- **Existing `slice-a.test.ts`** — kept; verifies all Slice A invariants (lease serialization, crash recovery, destroy) still hold post-migration. +- All gated by `DOCKER=1`. Image already cached locally. + +### Layer 3 — UI tests + +- Component tests for `CodingAgentView`, `CodingAgentTimeline`, `LifecycleRow`, `CodingAgentSpawnDialog`. +- No new e2e browser tests in Slice B (browser e2e is Slice C's conformance suite). + +### Manual smoke checklist + +- Spawn a fresh `coding-agent` from the UI; send "Reply with the single word: ok"; assert response shows in timeline. +- Send a second message; assert it's resumed (response references the first turn's content). +- Pin → wait > idle timeout → container stays up. Release → wait → container stops. +- Send another prompt → cold-boot path materializes, response received. +- Stop → status flips `cold`. Send another prompt → fresh boot. +- Destroy → entity tombstoned; UI hides it (or shows tombstone marker). +- Have Horton spawn a coder ("write a hello world script") → ✓ produces a `coding-agent` entity (not a legacy `coder`). Visible in sidebar with the new entity type. + +## Migration + +This is a **destructive migration**. The legacy `coder` entity, its tools, its UI, and its runtime types are all removed in the same merge. There is no shim, no backwards-compat alias, no opt-in flag. Existing `coder` durable streams in dev environments remain in storage but become unreachable (no entity type registered to read them). + +**Release notes (for the PR description and CHANGELOG):** + +- The `coder` entity type is removed. Use `coding-agent` instead. +- `ctx.useCodingAgent` is removed. Use `ctx.spawnCodingAgent` / `ctx.observeCodingAgent`. +- The `spawn_coder` and `prompt_coder` Horton tools are removed. Use `spawn_coding_agent` and `prompt_coding_agent`. +- Existing `coder` entities in dev environments are dropped. Re-spawn as `coding-agent` after upgrade. +- The wire constants `CODING_SESSION_*_COLLECTION_TYPE` are removed. The new `CODING_AGENT_*_COLLECTION_TYPE` constants are exported by `@electric-ax/coding-agents`. + +## Open questions + +- **Path-sanitization for the JSONL file location.** Claude transforms the `cwd` into a directory name under `~/.claude/projects/` via a specific algorithm. We must replicate it (or call into a claude-code helper if one exists). Resolve during writing-plans by reading the claude-code source. +- **`scanEntities` API on the runtime.** The boot() integration depends on a server-side function that lists entities by type. Confirm the agents-server exposes this (or add a thin wrapper around the existing entity-bridge). Resolve during writing-plans. +- **Lifecycle row collation with events.** The timeline needs to merge two collections by timestamp. Existing `EntityTimeline` reads `events` only; we need to extend it (or have `useCodingAgent` produce a merged feed). Pick during implementation. + +## Scope cuts referenced from Slice B + +Carried forward, **deferred** to Slice C or beyond: + +- Codex support in the bridge. +- Cross-kind resume. +- `provider.recover()` orphan-container cleanup. +- Sandbox provenance display in the header (provider name, "shared with N"). +- Workspace volume autocomplete in the spawn dialog. +- Conformance suite parameterized by `SandboxProvider`. +- Per-event approve/deny for `permission_request`. +- Replay / time-travel UI scrubber. +- Workspace file browser. +- Memory-snapshot lifecycle. + +## References + +- `docs/superpowers/specs/2026-04-30-coding-agents-platform-primitive-design.md` — parent design. +- `docs/superpowers/specs/2026-04-30-coding-agents-slice-a-design.md` — Slice A design. +- `docs/superpowers/specs/notes/2026-04-30-coding-agents-slice-a-report.md` — Slice A run report (with the Slice B priority list this spec executes). +- `packages/coding-agents/src/bridge/stdio-bridge.ts` — bridge with `onNativeLine` already typed (Slice A) but not wired. +- `packages/coding-agents/src/entity/handler.ts` — Slice A handler the resume path extends. +- `packages/agents/src/agents/coding-session.ts` — legacy entity to be removed. +- `packages/agents/src/tools/spawn-coder.ts`, `prompt-coder.ts` — legacy tools to be removed. +- `packages/agents-server-ui/src/components/CodingSession*.tsx`, `useCodingSession.ts` — legacy UI to be removed. +- `packages/agents-server-ui/src/router.tsx:158` — coder-specific routing branch to be replaced. From b395211e4d83112ce5632c390fa4f4b4240d0bd8 Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 15:05:58 +0100 Subject: [PATCH 031/279] docs(specs): defer Slice B eager WR rebuild to Slice C The eager rebuild was scoped here to support state().workspace.sharedRefs accuracy after server restart, but the UI indicator consuming that field (sandbox provenance / 'shared with N' header) is also Slice C. Defer eager rebuild to land alongside its consumer; keep Slice A's lazy per-agent rebuild on first handler entry. --- ...2026-04-30-coding-agents-slice-b-design.md | 73 +++---------------- 1 file changed, 10 insertions(+), 63 deletions(-) diff --git a/docs/superpowers/specs/2026-04-30-coding-agents-slice-b-design.md b/docs/superpowers/specs/2026-04-30-coding-agents-slice-b-design.md index 56e5ca5531..706ae0f46c 100644 --- a/docs/superpowers/specs/2026-04-30-coding-agents-slice-b-design.md +++ b/docs/superpowers/specs/2026-04-30-coding-agents-slice-b-design.md @@ -34,6 +34,7 @@ After Slice B, the new `coding-agent` is the **only** coding-agent type in the c - **Codex support.** Bridge still rejects `kind: 'codex'`. Slice C. - **Cross-kind resume.** Same-kind only. The architecture supports it (events collection is canonical) but no UI affordance and no integration test in Slice B. - **`provider.recover()` cleanup of orphaned containers.** Containers labeled with `electric-ax.agent-id` whose corresponding entity was never created (or was destroyed) accumulate; manual cleanup. Slice C. +- **Eager `WorkspaceRegistry` rebuild at server boot.** Slice A's lazy populate (per agent on first handler entry) is kept. The eager-rebuild via `boot()` was originally in this slice to support accurate `state().workspace.sharedRefs` after server restart, but the UI indicator that consumes that field — sandbox provenance / "shared with N" header — is also Slice C. Defer eager rebuild to land alongside its consumer. - **Sandbox provenance and "shared with N" indicators in the header.** Add status enum + Pin/Release/Stop + lifecycle rows. Sandbox provenance display itself defers. - **Conformance suite parameterized by `SandboxProvider`.** Slice C. - **Per-event approve/deny for `permission_request`.** CLIs continue to run with `--dangerously-skip-permissions`. @@ -62,7 +63,8 @@ After Slice B, the new `coding-agent` is the **only** coding-agent type in the c ▼ ┌─────────────────────────┐ ┌─────────────────────────────────┐ │ StdioBridge (Slice A) │ │ LifecycleManager (Slice A) │ - │ + onNativeLine wired │ │ + boot() for eager WR rebuild │ + │ + onNativeLine wired │ │ Unchanged │ + │ + --resume │ │ │ └─────────────────────────┘ └─────────────────────────────────┘ │ ▼ @@ -77,7 +79,7 @@ After Slice B, the new `coding-agent` is the **only** coding-agent type in the c | --------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `LocalDockerProvider` | Unchanged. | | `StdioBridge` | Wire `onNativeLine` callback to emit per stdout line (Slice A type already exists). Pass `--resume ` when caller provides `nativeSessionId`. | -| `LifecycleManager` | Add `boot()` callback for eager `WorkspaceRegistry` rebuild from durable entity state. | +| `LifecycleManager` | Unchanged. | | `WorkspaceRegistry` | Unchanged. | | `coding-agent` entity | +`nativeJsonl` collection; capture `nativeSessionId` from `session_init`; tee raw lines; cold-boot resume materialization; lifecycle row for `resume.restored`. | | `agents-runtime` | Drop `CodingSessionHandle` + `useCodingAgent`; keep `CodingAgentHandle` + `spawnCodingAgent` / `observeCodingAgent`. | @@ -372,8 +374,9 @@ The new tools' parameter shapes intentionally mirror `spawn_coder` / `prompt_cod // }) // typeNames.push('coding-agent') // -// NEW (Slice B): eager workspace-registry rebuild before serving traffic -// await codingAgent.boot() +// NOTE: Eager WR rebuild via `boot()` was originally proposed for Slice B, +// but is deferred to Slice C alongside its UI consumer. Slice A's lazy +// per-agent rebuild on first handler entry is kept. ``` ### Existing `coder` durable streams @@ -472,65 +475,9 @@ Lifecycle rows are interleaved with `events` rows by timestamp in the timeline. - `setCodingDialogOpen(true)` → `setCodingAgentDialogOpen(true)` for the new entity type. - Tool-call rendering (`ToolCallView.tsx`): label `spawn_coder` → `spawn_coding_agent`, `prompt_coder` → `prompt_coding_agent`. -## `WorkspaceRegistry` eager rebuild +## `WorkspaceRegistry` rebuild — deferred -```ts -// packages/coding-agents/src/entity/register.ts (after Slice B) - -interface CodingAgentRegistration { - /** Eager WR + recover sync. Call after registry.registerTypes(). */ - boot: () => Promise -} - -export function registerCodingAgent( - registry: EntityRegistry, - deps: RegisterCodingAgentDeps -): CodingAgentRegistration { - // ... Slice A registry.define logic ... - - return { - async boot() { - // 1. Scan all coding-agent entities' sessionMeta from durable state - // via the agents-server's entity-bridge API. Populate WR with - // workspaceIdentity → agentId mapping. - const allEntities = await deps.scanEntities('coding-agent') - wr.rebuild( - allEntities.map((e) => ({ - identity: e.sessionMeta.workspaceIdentity, - agentId: e.url, - })) - ) - - // 2. Provider recovery: list containers labeled with our agentIds. - // Just informational in Slice B; no automatic cleanup. - const recovered = await lm.adoptRunningContainers() - log.info({ count: recovered.length }, 'recovered sandboxes') - }, - } -} -``` - -`deps.scanEntities` is a new dependency injected by the bootstrap. The bootstrap supplies a function that calls into the agents-server's entity store API. The dependency seam keeps `coding-agents` independent of the agents-server (no direct import). - -```ts -// packages/agents/src/bootstrap.ts - -const codingAgentRegistration = registerCodingAgent(registry, { - provider: new LocalDockerProvider(), - bridge: new StdioBridge(), - scanEntities: async (type) => { - return runtimeServerClient.listEntities({ type }).then((rows) => - rows.map((r) => ({ - url: r.url, - sessionMeta: r.collections.sessionMeta?.get('current'), - })) - ) - }, -}) -typeNames.push('coding-agent') -// ... after registry sync: -await codingAgentRegistration.boot() -``` +Slice A's lazy populate (per-agent on first handler entry) is kept. Eager rebuild via a new `boot()` callback was scoped here originally but is deferred to Slice C alongside the UI's "shared with N agents" header indicator that consumes `state().workspace.sharedRefs`. Without that consumer, eager rebuild adds runtime contract surface (`scanEntities` dependency) for no user-visible benefit. ## State machine — unchanged from Slice A @@ -602,7 +549,7 @@ This is a **destructive migration**. The legacy `coder` entity, its tools, its U ## Open questions - **Path-sanitization for the JSONL file location.** Claude transforms the `cwd` into a directory name under `~/.claude/projects/` via a specific algorithm. We must replicate it (or call into a claude-code helper if one exists). Resolve during writing-plans by reading the claude-code source. -- **`scanEntities` API on the runtime.** The boot() integration depends on a server-side function that lists entities by type. Confirm the agents-server exposes this (or add a thin wrapper around the existing entity-bridge). Resolve during writing-plans. +- **`scanEntities` API on the runtime.** No longer needed — eager rebuild is deferred to Slice C alongside the UI consumer. (Resolved by deferral.) - **Lifecycle row collation with events.** The timeline needs to merge two collections by timestamp. Existing `EntityTimeline` reads `events` only; we need to extend it (or have `useCodingAgent` produce a merged feed). Pick during implementation. ## Scope cuts referenced from Slice B From b24a438ae1102c00260cda1a3ab3b86d4cffd5a0 Mon Sep 17 00:00:00 2001 From: Valter Balegas Date: Thu, 30 Apr 2026 15:27:43 +0100 Subject: [PATCH 032/279] docs(plans): add Slice B implementation plan for coding-agents migration --- .../plans/2026-04-30-coding-agents-slice-b.md | 3030 +++++++++++++++++ 1 file changed, 3030 insertions(+) create mode 100644 docs/superpowers/plans/2026-04-30-coding-agents-slice-b.md diff --git a/docs/superpowers/plans/2026-04-30-coding-agents-slice-b.md b/docs/superpowers/plans/2026-04-30-coding-agents-slice-b.md new file mode 100644 index 0000000000..cd4281cf78 --- /dev/null +++ b/docs/superpowers/plans/2026-04-30-coding-agents-slice-b.md @@ -0,0 +1,3030 @@ +# Coding Agents — Slice B Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Complete the coding-agent platform-primitive migration: wire resume (nativeJsonl collection + `--resume` flag), swap Horton from legacy `coder` to `coding-agent`, delete the legacy `coder` entity and all legacy runtime types, and ship a `CodingAgentView` / `CodingAgentTimeline` / `CodingAgentSpawnDialog` UI surface wired to the new entity's collections. Validation bar: unit tests for resume materialisation, Horton tool swap verified by handler unit test, and an integration test that sends two prompts to the same `coding-agent` and asserts the second run's response references the first prompt's content (proving resume is lossless). + +**Architecture:** `nativeJsonl` is a new fifth collection on the `coding-agent` entity. The handler tees each raw JSONL line from `bridge.runTurn` into the collection via `onNativeLine`. On cold-boot of an agent with prior `nativeJsonl` rows, the handler calls `sandbox.exec` to write the lines into `/tmp/resume.jsonl`, extracts `nativeSessionId` from `sessionMeta`, and passes `--resume ` to `StdioBridge.runTurn`. `StdioBridge` no longer warns; it passes the id through. Horton's `createHortonTools` switches from `createSpawnCoderTool` / `createPromptCoderTool` (legacy `coder`) to new `createSpawnCodingAgentTool` / `createPromptCodingAgentTool` (new `coding-agent`). Legacy files (`coding-session.ts`, `spawn-coder.ts`) and their runtime types are deleted. UI adds `CodingAgentView`, `useCodingAgent`, `CodingAgentTimeline`, `CodingAgentSpawnDialog`; router and sidebar switch on `'coding-agent'` instead of `CODING_SESSION_ENTITY_TYPE`. + +**Spec divergences (resolved):** + +- **`onNativeLine` already wired in `StdioBridge`.** Lines 51-56 of `bridge/stdio-bridge.ts` already call `args.onNativeLine(line)` in `drainStdout`. Task 1.1 needs only a unit test (not a re-implementation). Task 1.2 adds the actual `--resume` argument. +- **Horton tool validation string in `prompt_coding_agent`.** Legacy `prompt_coder` validated `coder_url.startsWith('/coder/')`. New tool validates `coding_agent_url.startsWith('/coding-agent/')`. +- **UI "Pin/Release/Stop" buttons ship as message sends**, not as a special RPC. They call `ctx.db.actions` on the inbox of the entity to send `pin`, `release`, or `stop` messages (same as the test's `pushInbox`). The `EntityHeader` receives the `db` object when `entity.type === 'coding-agent'`. +- **E2E test uses the FakeCtx pattern** from `test/integration/slice-a.test.ts` extended with a `nativeJsonl` collection stub, not the `agents-server` docker-compose harness. The `agents-server` harness requires an external postgres+electric stack and is out of scope for Slice B. + +**Tech Stack:** TypeScript, Vitest, React, `@radix-ui/themes`, `lucide-react`, `zod`, Docker (integration test only). + +**Reference spec:** `docs/superpowers/specs/2026-04-30-coding-agents-slice-b-design.md` + +--- + +## File Structure + +``` +packages/coding-agents/ ← extend +├── src/ +│ ├── index.ts ← +CODING_AGENT_NATIVE_JSONL_COLLECTION_TYPE +│ ├── entity/ +│ │ ├── collections.ts ← +nativeJsonl schema, +nativeSessionId on sessionMeta +│ │ ├── handler.ts ← +tee onNativeLine, +resume materialisation, +nativeSessionId capture +│ │ └── register.ts ← +nativeJsonl state entry +│ └── bridge/stdio-bridge.ts ← remove warning, add --resume when nativeSessionId present +└── test/ + ├── unit/ + │ ├── stdio-bridge-resume.test.ts ← NEW: --resume arg wired unit test + │ └── handler-resume.test.ts ← NEW: tee + materialise unit tests + └── integration/ + └── slice-b.test.ts ← NEW: lossless resume integration test + +packages/agents/src/ +├── bootstrap.ts ← remove registerCodingSession + 'coder' push +├── tools/ +│ ├── spawn-coder.ts ← DELETE (legacy) +│ ├── spawn-coding-agent.ts ← NEW +│ └── prompt-coding-agent.ts ← NEW +└── agents/ + ├── coding-session.ts ← DELETE (legacy) + └── horton.ts ← swap imports + tool list + system prompt + +packages/agents-runtime/src/ +├── types.ts ← delete legacy Coding Session types/interface +├── context-factory.ts ← delete useCodingAgent impl +└── index.ts ← remove legacy exports + +packages/agents-server-ui/src/ +├── components/ +│ ├── StatusDot.tsx ← +coding-agent status colors +│ ├── EntityHeader.tsx ← +Pin/Release/Stop for coding-agent +│ ├── ToolCallView.tsx ← +spawn_coding_agent, prompt_coding_agent cases +│ ├── CodingAgentView.tsx ← NEW +│ ├── CodingAgentTimeline.tsx ← NEW +│ └── CodingAgentSpawnDialog.tsx ← NEW +├── hooks/ +│ └── useCodingAgent.ts ← NEW +└── router.tsx ← swap CODING_SESSION_ENTITY_TYPE → 'coding-agent' + +packages/agents-server-ui/src/components/Sidebar.tsx ← swap coder dialog → CodingAgentSpawnDialog + +docs/superpowers/specs/notes/ +└── 2026-04-30-coding-agents-slice-b-report.md ← NEW (Phase 8) +``` + +--- + +## Phase Plan + +| Phase | Tasks | Parallelism | Depends on | +| ----- | ------------------ | ---------------------------------------------------- | ---------- | +| 0 | 0.1 | sequential | — | +| 1 | 1.1, 1.2, 1.3, 1.4 | 1.1 + 1.2 parallel; 1.3 after 1.1+1.2; 1.4 after 1.3 | Phase 0 | +| 2 | 2.1, 2.2, 2.3 | sequential | Phase 1 | +| 3 | 3.1, 3.2 | parallel (2 independent agents) | Phase 2 | +| 4 | 4.1, 4.2, 4.3, 4.4 | 4.1–4.3 parallel; 4.4 after all | Phase 3 | +| 5 | 5.1 | sequential | Phase 4 | +| 6 | 6.1 | sequential | Phase 5 | +| 7 | 7.1 | sequential | Phase 6 | +| 8 | 8.1 (report) | sequential | Phase 7 | + +Total tasks: 15 (excluding report). Estimated wall time per task: 15-40 min. + +--- + +## Phase 0 — Extend collections + sessionMeta schema (sequential) + +### Task 0.1 — Add `nativeJsonl` collection and `nativeSessionId` to `sessionMeta` + +**Files:** + +- Modify: `packages/coding-agents/src/entity/collections.ts` +- Modify: `packages/coding-agents/src/index.ts` + +- [ ] **Step 1: Edit `packages/coding-agents/src/entity/collections.ts`** + +Add the constant, schema, and type after the existing `lifecycleRowSchema`. Also add `nativeSessionId` to `sessionMetaRowSchema`. + +```ts +// packages/coding-agents/src/entity/collections.ts +import { z } from 'zod' + +export const CODING_AGENT_SESSION_META_COLLECTION_TYPE = `coding-agent.sessionMeta` +export const CODING_AGENT_RUNS_COLLECTION_TYPE = `coding-agent.runs` +export const CODING_AGENT_EVENTS_COLLECTION_TYPE = `coding-agent.events` +export const CODING_AGENT_LIFECYCLE_COLLECTION_TYPE = `coding-agent.lifecycle` +export const CODING_AGENT_NATIVE_JSONL_COLLECTION_TYPE = `coding-agent.nativeJsonl` + +export const codingAgentStatusSchema = z.enum([ + `cold`, + `starting`, + `idle`, + `running`, + `stopping`, + `error`, + `destroyed`, +]) +export type CodingAgentStatus = z.infer + +export const sessionMetaRowSchema = z.object({ + key: z.literal(`current`), + status: codingAgentStatusSchema, + kind: z.enum([`claude`]), + pinned: z.boolean(), + workspaceIdentity: z.string(), + workspaceSpec: z.discriminatedUnion(`type`, [ + z.object({ + type: z.literal(`volume`), + name: z.string(), + }), + z.object({ + type: z.literal(`bindMount`), + hostPath: z.string(), + }), + ]), + idleTimeoutMs: z.number(), + keepWarm: z.boolean(), + instanceId: z.string().optional(), + lastError: z.string().optional(), + currentPromptInboxKey: z.string().optional(), + lastInboxKey: z.string().optional(), + nativeSessionId: z.string().optional(), // ← NEW in Slice B +}) +export type SessionMetaRow = z.infer + +export const runRowSchema = z.object({ + key: z.string(), + startedAt: z.number(), + endedAt: z.number().optional(), + status: z.enum([`running`, `completed`, `failed`]), + finishReason: z.string().optional(), + promptInboxKey: z.string(), + responseText: z.string().optional(), +}) +export type RunRow = z.infer + +export const eventRowSchema = z.object({ + key: z.string(), + runId: z.string(), + seq: z.number(), + ts: z.number(), + type: z.string(), + payload: z.looseObject({}), +}) +export type EventRow = z.infer + +export const lifecycleRowSchema = z.object({ + key: z.string(), + ts: z.number(), + event: z.enum([ + `sandbox.starting`, + `sandbox.started`, + `sandbox.stopped`, + `sandbox.failed`, + `pin`, + `release`, + `orphan.detected`, + `resume.restored`, // ← NEW in Slice B + ]), + detail: z.string().optional(), +}) +export type LifecycleRow = z.infer + +// ─── nativeJsonl — NEW in Slice B ──────────────────────────────────────────── + +export const nativeJsonlRowSchema = z.object({ + key: z.string(), // `${runId}:${seq}` — sortable + runId: z.string(), + seq: z.number(), + line: z.string(), // raw JSONL line from claude CLI stdout +}) +export type NativeJsonlRow = z.infer +``` + +- [ ] **Step 2: Edit `packages/coding-agents/src/index.ts`** + +Add `CODING_AGENT_NATIVE_JSONL_COLLECTION_TYPE` to the existing collection-type re-exports: + +```ts +export { + CODING_AGENT_SESSION_META_COLLECTION_TYPE, + CODING_AGENT_RUNS_COLLECTION_TYPE, + CODING_AGENT_EVENTS_COLLECTION_TYPE, + CODING_AGENT_LIFECYCLE_COLLECTION_TYPE, + CODING_AGENT_NATIVE_JSONL_COLLECTION_TYPE, // ← add this line +} from './entity/collections' +``` + +- [ ] **Step 3: Verify TypeScript compiles** + +```bash +cd packages/coding-agents && npx tsc --noEmit +``` + +**Commit:** + +``` +git add packages/coding-agents/src/entity/collections.ts packages/coding-agents/src/index.ts +git commit -m "feat(coding-agents): add nativeJsonl collection schema and nativeSessionId to sessionMeta" +``` + +--- + +## Phase 1 — StdioBridge resume wiring + handler tee + capture + materialise (sequential-ish) + +### Task 1.1 — Unit test for existing `onNativeLine` wiring (already implemented) + +**Context:** `onNativeLine` is already wired in `bridge/stdio-bridge.ts` lines 51-56: + +```ts +if (args.onNativeLine) args.onNativeLine(line) +``` + +This task only adds a unit test to lock the behaviour. + +**Files:** + +- Create: `packages/coding-agents/test/unit/stdio-bridge-resume.test.ts` + +- [ ] **Step 1: Write the unit test** + +```ts +// packages/coding-agents/test/unit/stdio-bridge-resume.test.ts +import { describe, it, expect, vi } from 'vitest' +import { StdioBridge } from '../../src/bridge/stdio-bridge' +import type { SandboxInstance, RunTurnArgs } from '../../src/types' + +/** + * Minimal sandbox double: exec returns a fake handle whose stdout + * yields the lines we supply, stderr is empty, and wait() returns 0. + */ +function makeFakeSandbox(stdoutLines: string[]): SandboxInstance { + const handle = { + stdout: (async function* () { + for (const l of stdoutLines) yield l + })(), + stderr: (async function* () {})(), + writeStdin: vi.fn().mockResolvedValue(undefined), + closeStdin: vi.fn().mockResolvedValue(undefined), + wait: vi.fn().mockResolvedValue({ exitCode: 0 }), + } + return { + instanceId: `fake-instance`, + workspaceMount: `/workspace`, + exec: vi.fn().mockResolvedValue(handle), + destroy: vi.fn(), + } as unknown as SandboxInstance +} + +describe(`StdioBridge — onNativeLine`, () => { + it(`calls onNativeLine for every non-empty stdout line`, async () => { + // Minimal valid claude stream-json: session_init + result line. + const lines = [ + JSON.stringify({ + type: `system`, + subtype: `init`, + session_id: `sess-1`, + tools: [], + mcp_servers: [], + }), + JSON.stringify({ + type: `result`, + subtype: `success`, + result: `ok`, + session_id: `sess-1`, + is_error: false, + }), + ] + const sandbox = makeFakeSandbox(lines) + const bridge = new StdioBridge() + const received: string[] = [] + + await bridge.runTurn({ + sandbox, + kind: `claude`, + prompt: `hello`, + onEvent: () => undefined, + onNativeLine: (l) => received.push(l), + } as RunTurnArgs) + + expect(received).toEqual(lines) + }) + + it(`does not call onNativeLine for empty lines`, async () => { + const lines = [ + ``, + JSON.stringify({ + type: `result`, + subtype: `success`, + result: `ok`, + session_id: `s`, + is_error: false, + }), + ] + const sandbox = makeFakeSandbox(lines) + const bridge = new StdioBridge() + const received: string[] = [] + + await bridge.runTurn({ + sandbox, + kind: `claude`, + prompt: `hi`, + onEvent: () => undefined, + onNativeLine: (l) => received.push(l), + } as RunTurnArgs) + + // Empty string should have been skipped by the `if (!line) continue` guard. + expect(received.every((l) => l.length > 0)).toBe(true) + }) +}) +``` + +- [ ] **Step 2: Run the unit test to confirm it passes** + +```bash +cd packages/coding-agents && npx vitest run test/unit/stdio-bridge-resume.test.ts +``` + +**Commit:** + +``` +git add packages/coding-agents/test/unit/stdio-bridge-resume.test.ts +git commit -m "test(coding-agents): unit test — onNativeLine already wired in StdioBridge" +``` + +--- + +### Task 1.2 — Wire `--resume ` in `StdioBridge` + +**Files:** + +- Modify: `packages/coding-agents/src/bridge/stdio-bridge.ts` + +- [ ] **Step 1: Replace the warning block and add `--resume` to `cliArgs`** + +Current code (lines 13-18): + +```ts +if (args.nativeSessionId) { + log.warn( + { nativeSessionId: args.nativeSessionId }, + `StdioBridge MVP does not implement resume — running fresh turn` + ) +} +``` + +Replace with nothing (delete the block), and after the `cliArgs` array definition add: + +```ts +if (args.nativeSessionId) cliArgs.push(`--resume`, args.nativeSessionId) +``` + +Full resulting file: + +```ts +// packages/coding-agents/src/bridge/stdio-bridge.ts +import { normalize } from 'agent-session-protocol' +import type { NormalizedEvent } from 'agent-session-protocol' +import { log } from '../log' +import type { Bridge, RunTurnArgs, RunTurnResult } from '../types' + +export class StdioBridge implements Bridge { + async runTurn(args: RunTurnArgs): Promise { + if (args.kind !== `claude`) { + throw new Error( + `StdioBridge MVP supports only 'claude', got '${args.kind}'` + ) + } + + const cliArgs: Array = [ + `--print`, + `--output-format=stream-json`, + `--verbose`, + `--dangerously-skip-permissions`, + ] + if (args.model) cliArgs.push(`--model`, args.model) + if (args.nativeSessionId) cliArgs.push(`--resume`, args.nativeSessionId) + + const handle = await args.sandbox.exec({ + cmd: [`claude`, ...cliArgs], + cwd: args.sandbox.workspaceMount, + stdin: `pipe`, + }) + + if (!handle.writeStdin || !handle.closeStdin) { + throw new Error( + `StdioBridge requires stdin pipe but ExecHandle lacks one` + ) + } + await handle.writeStdin(args.prompt) + await handle.closeStdin() + + const rawLines: Array = [] + const stderrLines: Array = [] + + const drainStderr = async () => { + for await (const line of handle.stderr) { + stderrLines.push(line) + } + } + const drainStdout = async () => { + for await (const line of handle.stdout) { + if (!line) continue + rawLines.push(line) + if (args.onNativeLine) args.onNativeLine(line) + } + } + + await Promise.all([drainStdout(), drainStderr()]) + const exitInfo = await handle.wait() + + if (exitInfo.exitCode !== 0) { + const stderrPreview = stderrLines.join(`\n`).slice(0, 800) || `` + throw new Error( + `claude CLI exited ${exitInfo.exitCode}. stderr=${stderrPreview}` + ) + } + + let events: Array = [] + try { + events = normalize(rawLines, `claude`) + } catch (err) { + log.error({ err, sample: rawLines.slice(0, 3) }, `normalize failed`) + throw err + } + + for (const e of events) args.onEvent(e) + + const sessionInit = events.find((e) => e.type === `session_init`) + const lastAssistant = [...events] + .reverse() + .find((e) => e.type === `assistant_message`) + + return { + nativeSessionId: + sessionInit && `sessionId` in sessionInit + ? (sessionInit as { sessionId?: string }).sessionId + : undefined, + exitCode: exitInfo.exitCode, + finalText: + lastAssistant && `text` in lastAssistant + ? (lastAssistant as { text?: string }).text + : undefined, + } + } +} +``` + +- [ ] **Step 2: Add unit test for `--resume` arg in `stdio-bridge-resume.test.ts`** + +Append this test to the existing `stdio-bridge-resume.test.ts`: + +```ts +describe(`StdioBridge — --resume`, () => { + it(`passes --resume to exec cmd when nativeSessionId is provided`, async () => { + const lines = [ + JSON.stringify({ + type: `result`, + subtype: `success`, + result: `ok`, + session_id: `s`, + is_error: false, + }), + ] + const sandbox = makeFakeSandbox(lines) + const bridge = new StdioBridge() + + await bridge.runTurn({ + sandbox, + kind: `claude`, + prompt: `hi`, + onEvent: () => undefined, + nativeSessionId: `native-sess-abc`, + } as RunTurnArgs) + + const execCall = (sandbox.exec as ReturnType).mock.calls[0][0] + expect(execCall.cmd).toContain(`--resume`) + expect(execCall.cmd).toContain(`native-sess-abc`) + }) + + it(`does not pass --resume when nativeSessionId is absent`, async () => { + const lines = [ + JSON.stringify({ + type: `result`, + subtype: `success`, + result: `ok`, + session_id: `s`, + is_error: false, + }), + ] + const sandbox = makeFakeSandbox(lines) + const bridge = new StdioBridge() + + await bridge.runTurn({ + sandbox, + kind: `claude`, + prompt: `hi`, + onEvent: () => undefined, + } as RunTurnArgs) + + const execCall = (sandbox.exec as ReturnType).mock.calls[0][0] + expect(execCall.cmd).not.toContain(`--resume`) + }) +}) +``` + +- [ ] **Step 3: Run all stdio-bridge tests** + +```bash +cd packages/coding-agents && npx vitest run test/unit/stdio-bridge-resume.test.ts +``` + +**Commit:** + +``` +git add packages/coding-agents/src/bridge/stdio-bridge.ts packages/coding-agents/test/unit/stdio-bridge-resume.test.ts +git commit -m "feat(coding-agents): wire --resume in StdioBridge" +``` + +--- + +### Task 1.3 — Handler: tee `onNativeLine` into `nativeJsonl` collection + capture `nativeSessionId` + +**Files:** + +- Modify: `packages/coding-agents/src/entity/handler.ts` + +The changes are in `processPrompt`. There are two distinct changes: + +**A) Tee raw lines into `nativeJsonl` inside the `runTurn` call.** + +Replace the `runTurn` call (currently lines 371-389 of the original) with a version that adds `onNativeLine`: + +```ts +// Inside processPrompt, in the try block after runs_insert: +let nativeLineSeq = 0 +const result = await raceTimeout( + lm.bridge.runTurn({ + sandbox, + kind: meta.kind, + prompt: promptText, + nativeSessionId: meta.nativeSessionId, // pass stored id (may be undefined on first run) + onNativeLine: (line: string) => { + ctx.db.actions.nativeJsonl_insert({ + row: { + key: eventKey(runId, nativeLineSeq), + runId, + seq: nativeLineSeq, + line, + } satisfies NativeJsonlRow, + }) + nativeLineSeq++ + }, + onEvent: (e: NormalizedEvent) => { + ctx.db.actions.events_insert({ + row: { + key: eventKey(runId, seq), + runId, + seq, + ts: Date.now(), + type: e.type, + payload: e as unknown as Record, + } satisfies EventRow, + }) + seq++ + }, + }), + options.defaults.runTimeoutMs +) +``` + +**B) Capture `nativeSessionId` from the result and persist it in `sessionMeta`.** + +After the `result = await raceTimeout(...)` resolves and before the `runs_update completed` block: + +```ts +// Persist nativeSessionId from this turn if we don't have one yet. +if (result.nativeSessionId && !meta.nativeSessionId) { + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.nativeSessionId = result.nativeSessionId + }, + }) +} +``` + +- [ ] **Step 1: Add `NativeJsonlRow` import at top of handler.ts** + +```ts +import type { + RunRow, + SessionMetaRow, + EventRow, + LifecycleRow, + NativeJsonlRow, // ← add +} from './collections' +``` + +- [ ] **Step 2: Apply changes A and B to `processPrompt`** + +The full updated `processPrompt` run block (replacing from `let seq = 0` to the `recordedRun.end({ status: 'completed' })` call): + +```ts +let seq = 0 +let nativeLineSeq = 0 +let finalText: string | undefined +try { + const result = await raceTimeout( + lm.bridge.runTurn({ + sandbox, + kind: meta.kind, + prompt: promptText, + nativeSessionId: meta.nativeSessionId, + onNativeLine: (line: string) => { + ctx.db.actions.nativeJsonl_insert({ + row: { + key: eventKey(runId, nativeLineSeq), + runId, + seq: nativeLineSeq, + line, + } satisfies NativeJsonlRow, + }) + nativeLineSeq++ + }, + onEvent: (e: NormalizedEvent) => { + ctx.db.actions.events_insert({ + row: { + key: eventKey(runId, seq), + runId, + seq, + ts: Date.now(), + type: e.type, + payload: e as unknown as Record, + } satisfies EventRow, + }) + seq++ + }, + }), + options.defaults.runTimeoutMs + ) + finalText = result.finalText + + // Persist nativeSessionId from this turn if we don't have one yet. + if (result.nativeSessionId && !meta.nativeSessionId) { + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.nativeSessionId = result.nativeSessionId + }, + }) + } + + ctx.db.actions.runs_update({ + key: runId, + updater: (d: RunRow) => { + d.status = `completed` + d.endedAt = Date.now() + d.responseText = finalText + }, + }) + if (finalText) recordedRun.attachResponse(finalText) + recordedRun.end({ status: `completed` }) +} catch (err) { + // ... (rest of catch block unchanged) +``` + +- [ ] **Step 3: TypeScript check** + +```bash +cd packages/coding-agents && npx tsc --noEmit +``` + +**Commit:** + +``` +git add packages/coding-agents/src/entity/handler.ts +git commit -m "feat(coding-agents): tee onNativeLine into nativeJsonl and capture nativeSessionId per turn" +``` + +--- + +### Task 1.4 — Handler: cold-boot materialise prior `nativeJsonl` for resume + +**Files:** + +- Modify: `packages/coding-agents/src/entity/handler.ts` + +On cold-boot, before calling `lm.bridge.runTurn`, if `meta.nativeSessionId` is set and `nativeJsonl` rows exist, write them to `/tmp/resume.jsonl` inside the sandbox and pass that path to `--resume` via the already-wired `nativeSessionId` field. + +**Note on path:** `claude --resume` expects the native session id (the UUID), not a file path. The CLI looks for the session's JSONL file in `~/.claude/projects//`. The sanitised path of `/workspace` is `-workspace` (replace `/` → `-`, strip leading `-` → net result: `workspace`, but the claude CLI converts `/workspace` to `-workspace` by replacing every `/` with `-` and prepending nothing; actually `~/.claude/projects/` + replace(`/workspace`, `/`, `-`) = `-workspace`). So we must write the materialized file to `~/.claude/projects/-workspace/.jsonl` inside the container. + +The exec command to materialise: + +``` +sandbox.exec({ cmd: ['sh', '-c', `mkdir -p ~/.claude/projects/-workspace && cat > ~/.claude/projects/-workspace/.jsonl <<'__JSONL__'\n\n__JSONL__`] }) +``` + +Because the lines may contain special characters, it is safer to write the file via a base64-encoded payload piped through `base64 -d`: + +```ts +const b64 = Buffer.from(lines.join('\n') + '\n').toString('base64') +await sandbox.exec({ + cmd: [ + 'sh', + '-c', + `mkdir -p ~/.claude/projects/-workspace && printf '%s' '${b64}' | base64 -d > ~/.claude/projects/-workspace/${nativeSessionId}.jsonl`, + ], + cwd: sandbox.workspaceMount, +}) +``` + +- [ ] **Step 1: Add materialise helper function at the top of `handler.ts` (after imports)** + +```ts +/** + * Sanitise an absolute path for use as the claude project directory name + * under ~/.claude/projects/. The CLI replaces every `/` with `-`, producing + * e.g. `/workspace` → `-workspace`. + */ +function sanitiseCwd(cwd: string): string { + return cwd.replace(/\//g, `-`) +} + +/** + * Materialise nativeJsonl rows into the container's ~/.claude/projects/ so + * that `claude --resume ` finds its session file. + */ +async function materialiseResume( + sandbox: SandboxInstance, + nativeSessionId: string, + lines: string[] +): Promise { + if (lines.length === 0) return + const projectDir = sanitiseCwd(sandbox.workspaceMount) + const jsonlContent = lines.join(`\n`) + `\n` + // Base64-encode to avoid quoting issues with special chars in JSONL lines. + const b64 = Buffer.from(jsonlContent).toString(`base64`) + await sandbox.exec({ + cmd: [ + `sh`, + `-c`, + `mkdir -p ~/.claude/projects/${projectDir} && printf '%s' '${b64}' | base64 -d > ~/.claude/projects/${projectDir}/${nativeSessionId}.jsonl`, + ], + cwd: sandbox.workspaceMount, + }) +} +``` + +- [ ] **Step 2: Add `SandboxInstance` import** + +The handler already imports from lifecycle-manager and workspace-registry. Add the `SandboxInstance` type import: + +```ts +import type { SandboxInstance } from '../types' +``` + +- [ ] **Step 3: Call `materialiseResume` inside `processPrompt`, after the sandbox is up** + +After the `ctx.db.actions.lifecycle_insert` for `sandbox.started` and before `wr.acquire`: + +```ts +// Resume materialisation: if we have a prior nativeSessionId and nativeJsonl +// rows, write them into the container so --resume finds the session file. +if (meta.nativeSessionId) { + const nativeJsonlCol = ctx.db.collections.nativeJsonl + const allLines: string[] = (nativeJsonlCol.toArray as Array) + .slice() + .sort((a, b) => (a.key < b.key ? -1 : a.key > b.key ? 1 : 0)) + .map((r) => r.line) + + if (allLines.length > 0) { + await materialiseResume(sandbox, meta.nativeSessionId, allLines) + ctx.db.actions.lifecycle_insert({ + row: { + key: lifecycleKey(`resume`), + ts: Date.now(), + event: `resume.restored`, + detail: `lines=${allLines.length}`, + } satisfies LifecycleRow, + }) + } +} +``` + +- [ ] **Step 4: TypeScript check** + +```bash +cd packages/coding-agents && npx tsc --noEmit +``` + +- [ ] **Step 5: Write unit test for materialise** + +Create `packages/coding-agents/test/unit/handler-resume.test.ts`: + +```ts +// packages/coding-agents/test/unit/handler-resume.test.ts +import { describe, it, expect, vi } from 'vitest' + +// Pull the helper via a small re-export shim if it's not exported, +// or test it indirectly via the handler. Here we test it indirectly +// by asserting that sandbox.exec receives the right cmd. + +// Since materialiseResume is not exported, we exercise it through +// processPrompt via makeFakeCtx (adapted from slice-a.test.ts). + +import { makeCodingAgentHandler } from '../../src/entity/handler' +import type { LifecycleManager } from '../../src/lifecycle-manager' +import type { SandboxInstance } from '../../src/types' +import type { + NativeJsonlRow, + SessionMetaRow, +} from '../../src/entity/collections' + +// ---------- minimal doubles -------------------------------------------------- + +function makeExecHandle(stdoutLines: string[]) { + return { + stdout: (async function* () { + for (const l of stdoutLines) yield l + })(), + stderr: (async function* () {})(), + writeStdin: vi.fn().mockResolvedValue(undefined), + closeStdin: vi.fn().mockResolvedValue(undefined), + wait: vi.fn().mockResolvedValue({ exitCode: 0 }), + } +} + +function makeSandbox( + stdoutLines: string[] +): SandboxInstance & { execCalls: any[] } { + const execCalls: any[] = [] + return { + instanceId: `inst-1`, + workspaceMount: `/workspace`, + exec: vi.fn(async (req) => { + execCalls.push(req) + return makeExecHandle(stdoutLines) + }), + destroy: vi.fn(), + execCalls, + } as any +} + +function makeMinimalLm(sandbox: SandboxInstance) { + const lm = { + startedAtMs: Date.now(), + provider: { + status: vi.fn().mockResolvedValue(`stopped`), + destroy: vi.fn().mockResolvedValue(undefined), + }, + bridge: { + runTurn: vi.fn().mockResolvedValue({ + nativeSessionId: `native-1`, + finalText: `reply`, + exitCode: 0, + }), + }, + ensureRunning: vi.fn().mockResolvedValue(sandbox), + stop: vi.fn().mockResolvedValue(undefined), + destroy: vi.fn().mockResolvedValue(undefined), + pin: vi.fn().mockReturnValue({ count: 1 }), + release: vi.fn().mockReturnValue({ count: 0 }), + pinCount: vi.fn().mockReturnValue(0), + armIdleTimer: vi.fn(), + } + return lm as unknown as LifecycleManager +} + +interface CollectionStub { + rows: Map + get(k: string): any + toArray: Array +} + +function makeCollection(): CollectionStub { + const rows = new Map() + return { + rows, + get(k: string) { + return rows.get(k) + }, + get toArray(): Array { + return Array.from(rows.values()) + }, + } +} + +function makeFakeCtx(entityUrl: string, args: Record) { + const state = { + sessionMeta: makeCollection(), + runs: makeCollection(), + events: makeCollection(), + lifecycle: makeCollection(), + nativeJsonl: makeCollection(), + inbox: makeCollection(), + } + let runCounter = 0 + const ctx: any = { + entityUrl, + entityType: `coding-agent`, + args, + tags: {}, + firstWake: false, + db: { + collections: state, + actions: { + sessionMeta_insert: ({ row }: any) => + state.sessionMeta.rows.set(row.key, row), + sessionMeta_update: ({ key, updater }: any) => { + const r = state.sessionMeta.rows.get(key) + if (r) updater(r) + }, + runs_insert: ({ row }: any) => state.runs.rows.set(row.key, row), + runs_update: ({ key, updater }: any) => { + const r = state.runs.rows.get(key) + if (r) updater(r) + }, + events_insert: ({ row }: any) => state.events.rows.set(row.key, row), + lifecycle_insert: ({ row }: any) => + state.lifecycle.rows.set(row.key, row), + nativeJsonl_insert: ({ row }: any) => + state.nativeJsonl.rows.set(row.key, row), + }, + }, + recordRun() { + const key = `run-${++runCounter}` + const ent: any = { key, status: undefined, response: `` } + state.runs.rows.set(key, ent) + return { + key, + end({ status }: { status: string }) { + ent.status = status + }, + attachResponse(text: string) { + ent.response += text + }, + } + }, + setTag: () => Promise.resolve(), + send: () => undefined, + } + return { ctx, state } +} + +// ---------- tests ------------------------------------------------------------ + +describe(`handler resume materialisation`, () => { + it(`calls sandbox.exec to materialise nativeJsonl rows on cold-boot when nativeSessionId is set`, async () => { + const sandbox = makeSandbox([]) + const lm = makeMinimalLm(sandbox) + + // Pre-seed nativeJsonl rows and sessionMeta with a nativeSessionId. + const { ctx, state } = makeFakeCtx(`/test/ca/resume-1`, { + kind: `claude`, + workspaceType: `volume`, + workspaceName: `vol-1`, + }) + const { WorkspaceRegistry } = await import('../../src/workspace-registry') + const wr = new WorkspaceRegistry() + + const handler = makeCodingAgentHandler(lm, wr, { + defaults: { + idleTimeoutMs: 500, + coldBootBudgetMs: 30_000, + runTimeoutMs: 60_000, + }, + env: () => ({}), + }) + + // First wake — initialises sessionMeta (status: cold) + await handler(ctx, { type: `message_received` }) + + // Manually inject nativeSessionId and nativeJsonl rows (simulating a prior run). + state.sessionMeta.rows.set(`current`, { + ...(state.sessionMeta.get(`current`) as SessionMetaRow), + nativeSessionId: `native-sess-xyz`, + }) + const fakeJsonlLine = JSON.stringify({ + type: `result`, + subtype: `success`, + result: `prior`, + session_id: `native-sess-xyz`, + is_error: false, + }) + state.nativeJsonl.rows.set(`run-1:000000000000000`, { + key: `run-1:000000000000000`, + runId: `run-1`, + seq: 0, + line: fakeJsonlLine, + } satisfies NativeJsonlRow) + + // Second wake with a prompt — should trigger materialise. + state.inbox.rows.set(`i1`, { + key: `i1`, + message_type: `prompt`, + payload: { text: `second prompt` }, + }) + await handler(ctx, { type: `message_received` }) + + // sandbox.exec should have been called at least twice: + // once for materialise, once for the claude CLI invocation. + // The materialise call has a shell command containing base64. + const shellCalls = ( + sandbox.exec as ReturnType + ).mock.calls.filter((c: any[]) => c[0]?.cmd?.[0] === `sh`) + expect(shellCalls.length).toBeGreaterThan(0) + const cmd = shellCalls[0][0].cmd.join(` `) + expect(cmd).toContain(`native-sess-xyz.jsonl`) + expect(cmd).toContain(`base64`) + }) + + it(`adds a resume.restored lifecycle row after materialisation`, async () => { + const sandbox = makeSandbox([]) + const lm = makeMinimalLm(sandbox) + const { ctx, state } = makeFakeCtx(`/test/ca/resume-2`, { + kind: `claude`, + workspaceType: `volume`, + workspaceName: `vol-2`, + }) + const { WorkspaceRegistry } = await import('../../src/workspace-registry') + const wr = new WorkspaceRegistry() + + const handler = makeCodingAgentHandler(lm, wr, { + defaults: { + idleTimeoutMs: 500, + coldBootBudgetMs: 30_000, + runTimeoutMs: 60_000, + }, + env: () => ({}), + }) + + await handler(ctx, { type: `message_received` }) + + state.sessionMeta.rows.set(`current`, { + ...(state.sessionMeta.get(`current`) as SessionMetaRow), + nativeSessionId: `native-sess-abc`, + }) + state.nativeJsonl.rows.set(`run-1:0`, { + key: `run-1:0`, + runId: `run-1`, + seq: 0, + line: `{"type":"result","subtype":"success","result":"x","session_id":"native-sess-abc","is_error":false}`, + } satisfies NativeJsonlRow) + + state.inbox.rows.set(`i1`, { + key: `i1`, + message_type: `prompt`, + payload: { text: `hello again` }, + }) + await handler(ctx, { type: `message_received` }) + + const lifecycleRows = Array.from(state.lifecycle.rows.values()) as any[] + const resumeRow = lifecycleRows.find((r) => r.event === `resume.restored`) + expect(resumeRow).toBeDefined() + expect(resumeRow.detail).toMatch(/lines=1/) + }) +}) +``` + +- [ ] **Step 6: Run unit tests** + +```bash +cd packages/coding-agents && npx vitest run test/unit/handler-resume.test.ts +``` + +**Commit:** + +``` +git add packages/coding-agents/src/entity/handler.ts packages/coding-agents/test/unit/handler-resume.test.ts +git commit -m "feat(coding-agents): materialise nativeJsonl on cold-boot for --resume" +``` + +--- + +## Phase 2 — Add `nativeJsonl` to `register.ts` + update `FakeCtx` helper (sequential) + +### Task 2.1 — Register `nativeJsonl` collection in entity definition + +**Files:** + +- Modify: `packages/coding-agents/src/entity/register.ts` + +- [ ] **Step 1: Add `CODING_AGENT_NATIVE_JSONL_COLLECTION_TYPE` and `nativeJsonlRowSchema` imports** + +```ts +import { + CODING_AGENT_EVENTS_COLLECTION_TYPE, + CODING_AGENT_LIFECYCLE_COLLECTION_TYPE, + CODING_AGENT_NATIVE_JSONL_COLLECTION_TYPE, // ← add + CODING_AGENT_RUNS_COLLECTION_TYPE, + CODING_AGENT_SESSION_META_COLLECTION_TYPE, + eventRowSchema, + lifecycleRowSchema, + nativeJsonlRowSchema, // ← add + runRowSchema, + sessionMetaRowSchema, +} from './collections' +``` + +- [ ] **Step 2: Add `nativeJsonl` entry to the `state` object in `registry.define`** + +```ts +state: { + sessionMeta: { + schema: sessionMetaRowSchema, + type: CODING_AGENT_SESSION_META_COLLECTION_TYPE, + primaryKey: `key`, + }, + runs: { + schema: runRowSchema, + type: CODING_AGENT_RUNS_COLLECTION_TYPE, + primaryKey: `key`, + }, + events: { + schema: eventRowSchema, + type: CODING_AGENT_EVENTS_COLLECTION_TYPE, + primaryKey: `key`, + }, + lifecycle: { + schema: lifecycleRowSchema, + type: CODING_AGENT_LIFECYCLE_COLLECTION_TYPE, + primaryKey: `key`, + }, + nativeJsonl: { // ← NEW + schema: nativeJsonlRowSchema, + type: CODING_AGENT_NATIVE_JSONL_COLLECTION_TYPE, + primaryKey: `key`, + }, +}, +``` + +- [ ] **Step 3: TypeScript check** + +```bash +cd packages/coding-agents && npx tsc --noEmit +``` + +**Commit:** + +``` +git add packages/coding-agents/src/entity/register.ts +git commit -m "feat(coding-agents): register nativeJsonl collection in coding-agent entity definition" +``` + +--- + +### Task 2.2 — Integration test: lossless resume (Docker-gated) + +**Files:** + +- Create: `packages/coding-agents/test/integration/slice-b.test.ts` + +This test extends the FakeCtx pattern from `slice-a.test.ts` with `nativeJsonl` collection support. It is Docker-gated (`DOCKER=1`). + +The test verifies: after a first prompt completes and the sandbox goes idle, a second prompt on the same agent (which triggers a cold-boot) references the prior response — proving `--resume` is working. + +- [ ] **Step 1: Write the test** + +```ts +// packages/coding-agents/test/integration/slice-b.test.ts +import { describe, it, expect, beforeAll } from 'vitest' +import { + LocalDockerProvider, + StdioBridge, + WorkspaceRegistry, + LifecycleManager, +} from '../../src' +import { makeCodingAgentHandler } from '../../src/entity/handler' +import { buildTestImage, TEST_IMAGE_TAG } from '../support/build-image' +import { loadTestEnv } from '../support/env' + +const SHOULD_RUN = process.env.DOCKER === `1` +const describeMaybe = SHOULD_RUN ? describe : describe.skip + +interface CollectionStub { + rows: Map + get(k: string): any + toArray: Array +} + +function makeCollection(): CollectionStub { + const rows = new Map() + return { + rows, + get(k: string) { + return rows.get(k) + }, + get toArray(): Array { + return Array.from(rows.values()) + }, + } +} + +function makeFakeCtx(entityUrl: string, args: Record) { + const state = { + sessionMeta: makeCollection(), + runs: makeCollection(), + events: makeCollection(), + lifecycle: makeCollection(), + nativeJsonl: makeCollection(), + inbox: makeCollection(), + } + let runCounter = 0 + const ctx: any = { + entityUrl, + entityType: `coding-agent`, + args, + tags: {}, + firstWake: false, + db: { + collections: state, + actions: { + sessionMeta_insert: ({ row }: any) => + state.sessionMeta.rows.set(row.key, row), + sessionMeta_update: ({ key, updater }: any) => { + const r = state.sessionMeta.rows.get(key) + if (r) updater(r) + }, + runs_insert: ({ row }: any) => state.runs.rows.set(row.key, row), + runs_update: ({ key, updater }: any) => { + const r = state.runs.rows.get(key) + if (r) updater(r) + }, + events_insert: ({ row }: any) => state.events.rows.set(row.key, row), + lifecycle_insert: ({ row }: any) => + state.lifecycle.rows.set(row.key, row), + nativeJsonl_insert: ({ row }: any) => + state.nativeJsonl.rows.set(row.key, row), + }, + }, + recordRun() { + const key = `run-${++runCounter}` + const ent: any = { key, status: undefined, response: `` } + state.runs.rows.set(key, ent) + return { + key, + end({ status }: { status: string }) { + ent.status = status + }, + attachResponse(text: string) { + ent.response += text + }, + } + }, + setTag: () => Promise.resolve(), + send: () => undefined, + } + return { ctx, state } +} + +describeMaybe(`Slice B — resume integration`, () => { + beforeAll(async () => { + await buildTestImage() + }, 600_000) + + it(`second prompt references prior turn content (lossless resume)`, async () => { + const env = loadTestEnv() + const provider = new LocalDockerProvider({ image: TEST_IMAGE_TAG }) + const bridge = new StdioBridge() + const wr = new WorkspaceRegistry() + const lm = new LifecycleManager({ provider, bridge }) + const handler = makeCodingAgentHandler(lm, wr, { + defaults: { + idleTimeoutMs: 1500, + coldBootBudgetMs: 60_000, + runTimeoutMs: 120_000, + }, + env: () => ({ ANTHROPIC_API_KEY: env.ANTHROPIC_API_KEY }), + }) + + const agentId = `/test/coding-agent/resume-${Date.now().toString(36)}` + const args = { + kind: `claude`, + workspaceType: `volume`, + workspaceName: `slice-b-resume-${Date.now().toString(36)}`, + idleTimeoutMs: 1500, + } + const { ctx, state } = makeFakeCtx(agentId, args) + + // ── First wake: init ────────────────────────────────────────────────────── + await handler(ctx, { type: `message_received` }) + expect(state.sessionMeta.get(`current`).status).toBe(`cold`) + + // ── First prompt: establish a memorable fact ─────────────────────────────── + state.inbox.rows.set(`i1`, { + key: `i1`, + message_type: `prompt`, + payload: { + text: `Remember the secret code word: BANANA. Reply with "Acknowledged: BANANA" and nothing else.`, + }, + }) + await handler(ctx, { type: `message_received` }) + + const meta1 = state.sessionMeta.get(`current`) + expect(meta1.status).toBe(`idle`) + expect(meta1.nativeSessionId).toBeDefined() + + const runs1 = Array.from(state.runs.rows.values()) as any[] + expect(runs1).toHaveLength(1) + expect(runs1[0].status).toBe(`completed`) + + // Verify nativeJsonl rows were collected. + const nativeRows = Array.from(state.nativeJsonl.rows.values()) as any[] + expect(nativeRows.length).toBeGreaterThan(0) + + // ── Wait past idle timeout so sandbox stops ─────────────────────────────── + await new Promise((r) => setTimeout(r, 2500)) + expect([`stopped`, `unknown`]).toContain(await provider.status(agentId)) + + // ── Second prompt: ask about the fact from the first turn ───────────────── + state.inbox.rows.set(`i2`, { + key: `i2`, + message_type: `prompt`, + payload: { + text: `What was the secret code word I asked you to remember? Reply with just the word.`, + }, + }) + await handler(ctx, { type: `message_received` }) + + const runs2 = Array.from(state.runs.rows.values()) as any[] + expect(runs2.length).toBeGreaterThanOrEqual(2) + const lastRun = runs2[runs2.length - 1] + expect(lastRun.status).toBe(`completed`) + + // ── Assert lossless resume: response must contain BANANA ────────────────── + expect(lastRun.responseText?.toUpperCase()).toContain(`BANANA`) + + // ── Verify resume.restored lifecycle row was emitted ───────────────────── + const lifecycleRows = Array.from(state.lifecycle.rows.values()) as any[] + const resumeRow = lifecycleRows.find( + (r: any) => r.event === `resume.restored` + ) + expect(resumeRow).toBeDefined() + + // Cleanup + await provider.destroy(agentId).catch(() => undefined) + }, 360_000) +}) +``` + +- [ ] **Step 2: Run (skip if not in Docker environment)** + +```bash +# Without Docker (skips): +cd packages/coding-agents && npx vitest run test/integration/slice-b.test.ts + +# With Docker (real run): +DOCKER=1 cd packages/coding-agents && npx vitest run test/integration/slice-b.test.ts +``` + +**Commit:** + +``` +git add packages/coding-agents/test/integration/slice-b.test.ts +git commit -m "test(coding-agents): integration test for lossless resume (Slice B)" +``` + +--- + +### Task 2.3 — Full coding-agents test suite pass + +- [ ] **Step 1: Run all unit tests** + +```bash +cd packages/coding-agents && npx vitest run test/unit/ +``` + +- [ ] **Step 2: Verify no TypeScript errors across the package** + +```bash +cd packages/coding-agents && npx tsc --noEmit +``` + +**Commit:** (no new files; fix any failures discovered) + +--- + +## Phase 3 — Horton tool migration (parallel agents) + +### Task 3.1 — Create `spawn-coding-agent.ts` and `prompt-coding-agent.ts` + +**Files:** + +- Create: `packages/agents/src/tools/spawn-coding-agent.ts` +- Create: `packages/agents/src/tools/prompt-coding-agent.ts` + +- [ ] **Step 1: Write `spawn-coding-agent.ts`** + +```ts +// packages/agents/src/tools/spawn-coding-agent.ts +import { Type } from '@sinclair/typebox' +import { nanoid } from 'nanoid' +import { serverLog } from '../log' +import type { AgentTool } from '@mariozechner/pi-agent-core' +import type { HandlerContext } from '@electric-ax/agents-runtime' + +export function createSpawnCodingAgentTool(ctx: HandlerContext): AgentTool { + return { + name: `spawn_coding_agent`, + label: `Spawn Coding Agent`, + description: `Spawn a coding-agent subagent that drives a Claude Code CLI session inside a Docker sandbox with its own persistent workspace. Use when the user asks for code changes, file edits, debugging, or any task that benefits from a real coding agent with full tool access. The coding-agent is long-lived — its URL stays valid across many turns, so keep prompting it via prompt_coding_agent without re-spawning. End your turn after spawning; you'll be woken when the coding-agent finishes its first reply.`, + parameters: Type.Object({ + prompt: Type.String({ + description: `First user message sent to the coding agent. This kicks off the run — be concrete: describe the task, mention the files/paths involved, and what form of answer you want back.`, + }), + workspace_name: Type.Optional( + Type.String({ + description: `Optional stable name for the Docker volume workspace. If omitted, a name is derived from the agent id. Reuse the same name across sessions to persist state.`, + }) + ), + idle_timeout_ms: Type.Optional( + Type.Number({ + description: `Milliseconds of inactivity after which the sandbox is hibernated. Defaults to 300000 (5 min). The workspace persists; the next prompt cold-boots the container.`, + }) + ), + }), + execute: async (_toolCallId, params) => { + const { prompt, workspace_name, idle_timeout_ms } = params as { + prompt: string + workspace_name?: string + idle_timeout_ms?: number + } + if (typeof prompt !== `string` || prompt.length === 0) { + return { + content: [ + { + type: `text` as const, + text: `Error: prompt is required and must be a non-empty string.`, + }, + ], + details: { spawned: false }, + } + } + + const id = nanoid(10) + const spawnArgs: Record = { + kind: `claude`, + workspaceType: `volume`, + } + if (workspace_name) spawnArgs.workspaceName = workspace_name + if (idle_timeout_ms != null) spawnArgs.idleTimeoutMs = idle_timeout_ms + + try { + const handle = await ctx.spawn(`coding-agent`, id, spawnArgs, { + initialMessage: { text: prompt }, + wake: { on: `runFinished`, includeResponse: true }, + }) + const agentUrl = handle.entityUrl + + return { + content: [ + { + type: `text` as const, + text: `Coding agent dispatched at ${agentUrl}. End your turn — when the coding agent finishes its current reply you'll be woken with the response. To send follow-up prompts to the same agent, call prompt_coding_agent with this URL.`, + }, + ], + details: { spawned: true, agentUrl }, + } + } catch (err) { + serverLog.warn( + `[spawn_coding_agent tool] failed to spawn coding-agent ${id}: ${err instanceof Error ? err.message : String(err)}`, + err instanceof Error ? err : undefined + ) + return { + content: [ + { + type: `text` as const, + text: `Error spawning coding agent: ${err instanceof Error ? err.message : `Unknown error`}`, + }, + ], + details: { spawned: false }, + } + } + }, + } +} +``` + +- [ ] **Step 2: Write `prompt-coding-agent.ts`** + +```ts +// packages/agents/src/tools/prompt-coding-agent.ts +import { Type } from '@sinclair/typebox' +import { serverLog } from '../log' +import type { AgentTool } from '@mariozechner/pi-agent-core' +import type { HandlerContext } from '@electric-ax/agents-runtime' + +export function createPromptCodingAgentTool(ctx: HandlerContext): AgentTool { + return { + name: `prompt_coding_agent`, + label: `Prompt Coding Agent`, + description: `Send a follow-up prompt to a coding agent you previously spawned. The prompt is queued on the agent's inbox and runs as the next CLI turn (resuming from prior context). End your turn after calling — you'll be woken when the agent's reply lands.`, + parameters: Type.Object({ + coding_agent_url: Type.String({ + description: `Entity URL returned by spawn_coding_agent, e.g. "/coding-agent/abc123". Must be the URL of a coding agent you previously spawned in this conversation.`, + }), + prompt: Type.String({ + description: `Follow-up message to send to the coding agent. Reference earlier context the agent already saw rather than restating it from scratch.`, + }), + }), + execute: async (_toolCallId, params) => { + const { coding_agent_url, prompt } = params as { + coding_agent_url: string + prompt: string + } + if ( + typeof coding_agent_url !== `string` || + !coding_agent_url.startsWith(`/coding-agent/`) + ) { + return { + content: [ + { + type: `text` as const, + text: `Error: coding_agent_url must be a path like "/coding-agent/".`, + }, + ], + details: { sent: false }, + } + } + if (typeof prompt !== `string` || prompt.length === 0) { + return { + content: [ + { + type: `text` as const, + text: `Error: prompt is required and must be a non-empty string.`, + }, + ], + details: { sent: false }, + } + } + + try { + ctx.send(coding_agent_url, { text: prompt }) + return { + content: [ + { + type: `text` as const, + text: `Prompt queued for ${coding_agent_url}. End your turn — you'll be woken when the coding agent's reply lands.`, + }, + ], + details: { sent: true, agentUrl: coding_agent_url }, + } + } catch (err) { + serverLog.warn( + `[prompt_coding_agent tool] failed to send to ${coding_agent_url}: ${err instanceof Error ? err.message : String(err)}`, + err instanceof Error ? err : undefined + ) + return { + content: [ + { + type: `text` as const, + text: `Error sending prompt to coding agent: ${err instanceof Error ? err.message : `Unknown error`}`, + }, + ], + details: { sent: false }, + } + } + }, + } +} +``` + +**Commit:** + +``` +git add packages/agents/src/tools/spawn-coding-agent.ts packages/agents/src/tools/prompt-coding-agent.ts +git commit -m "feat(agents): add spawn_coding_agent and prompt_coding_agent tools" +``` + +--- + +### Task 3.2 — Update Horton: swap tool list + system prompt + imports + +**Files:** + +- Modify: `packages/agents/src/agents/horton.ts` + +- [ ] **Step 1: Replace legacy import** + +Old: + +```ts +import { + createPromptCoderTool, + createSpawnCoderTool, +} from '../tools/spawn-coder' +``` + +New: + +```ts +import { createSpawnCodingAgentTool } from '../tools/spawn-coding-agent' +import { createPromptCodingAgentTool } from '../tools/prompt-coding-agent' +``` + +- [ ] **Step 2: Update `createHortonTools` return array** + +Old: + +```ts +createSpawnCoderTool(ctx), +createPromptCoderTool(ctx), +``` + +New: + +```ts +createSpawnCodingAgentTool(ctx), +createPromptCodingAgentTool(ctx), +``` + +- [ ] **Step 3: Update system prompt tool list (lines ~218-219)** + +Old: + +``` +- spawn_coder: spawn a long-lived coding agent (Claude Code or Codex CLI) for code changes, file edits, debugging +- prompt_coder: send a follow-up prompt to a coder you previously spawned +``` + +New: + +``` +- spawn_coding_agent: spawn a long-lived coding agent (Claude Code CLI) in a Docker sandbox for code changes, file edits, debugging +- prompt_coding_agent: send a follow-up prompt to a coding agent you previously spawned +``` + +- [ ] **Step 4: Update "When to spawn a coder" section (~lines 247-252)** + +Old: + +``` +# When to spawn a coder +Spawn a coder when the user asks for code changes, file edits, debugging, or any task that benefits from a real coding agent with full tool access (bash, file edits, etc.). A coder runs Claude Code or Codex CLI under the hood. + +Unlike a worker, a coder is **long-lived**: its URL stays valid across many turns. Spawn once with spawn_coder, then keep prompting it via prompt_coder for follow-ups — don't spawn a new coder for each turn. Treat the coder URL like a chat handle. + +After calling spawn_coder or prompt_coder, end your turn. When the coder's reply lands, you'll be woken with the response in the wake message — relay it (or a summary) back to the user, and call prompt_coder again if there's a follow-up. +``` + +New: + +``` +# When to spawn a coding agent +Spawn a coding agent when the user asks for code changes, file edits, debugging, or any task that benefits from a real coding agent with full tool access (bash, file edits, etc.). A coding agent runs Claude Code CLI inside a Docker sandbox with a persistent workspace. + +Unlike a worker, a coding agent is **long-lived**: its URL stays valid across many turns and its session context carries over (via resume). Spawn once with spawn_coding_agent, then keep prompting it via prompt_coding_agent for follow-ups — don't spawn a new agent for each turn. Treat the coding agent URL like a chat handle. + +After calling spawn_coding_agent or prompt_coding_agent, end your turn. When the agent's reply lands, you'll be woken with the response in the wake message — relay it (or a summary) back to the user, and call prompt_coding_agent again if there's a follow-up. +``` + +- [ ] **Step 5: TypeScript check** + +```bash +cd packages/agents && npx tsc --noEmit +``` + +**Commit:** + +``` +git add packages/agents/src/agents/horton.ts +git commit -m "feat(agents): migrate Horton from spawn_coder/prompt_coder to spawn_coding_agent/prompt_coding_agent" +``` + +--- + +## Phase 4 — Legacy deletion (parallel agents) + +### Task 4.1 — Delete `coding-session.ts` and `spawn-coder.ts` + +**Files:** + +- Delete: `packages/agents/src/agents/coding-session.ts` +- Delete: `packages/agents/src/tools/spawn-coder.ts` + +- [ ] **Step 1: Delete files** + +```bash +rm packages/agents/src/agents/coding-session.ts +rm packages/agents/src/tools/spawn-coder.ts +``` + +- [ ] **Step 2: Remove `registerCodingSession` from `bootstrap.ts`** + +In `packages/agents/src/bootstrap.ts`: + +Remove line 12: + +```ts +import { registerCodingSession } from './agents/coding-session' +``` + +Remove line 124: + +```ts +registerCodingSession(registry, { defaultWorkingDirectory: cwd }) +``` + +Remove line 125: + +```ts +typeNames.push('coder') +``` + +- [ ] **Step 3: TypeScript check** + +```bash +cd packages/agents && npx tsc --noEmit +``` + +**Commit:** + +``` +git add packages/agents/src/bootstrap.ts +git rm packages/agents/src/agents/coding-session.ts packages/agents/src/tools/spawn-coder.ts +git commit -m "feat(agents): remove legacy coder entity (coding-session.ts, spawn-coder.ts) and unregister from bootstrap" +``` + +--- + +### Task 4.2 — Remove legacy runtime types from `agents-runtime` + +**Files:** + +- Modify: `packages/agents-runtime/src/types.ts` +- Modify: `packages/agents-runtime/src/context-factory.ts` +- Modify: `packages/agents-runtime/src/index.ts` + +The legacy types to remove from `types.ts` (lines 734-818 in the current file): + +- `CodingSessionStatus` +- `CodingSessionEventRow` +- `CodingSessionMeta` +- `CodingSessionMetaRow` +- `UseCodingAgentOptions` +- `CodingSessionHandle` + +The `HandlerContext` interface method to remove (`useCodingAgent` at line 1002). + +The `useCodingAgent` implementation in `context-factory.ts` (lines 566-634). + +- [ ] **Step 1: Delete legacy type blocks from `types.ts`** + +Remove the entire block from `export type CodingSessionStatus` through the closing `}` of `CodingSessionHandle`. Keep everything from `// ─── Coding Agent (Slice A) ───` onward. + +- [ ] **Step 2: Remove `useCodingAgent` from `HandlerContext` interface in `types.ts`** + +Find and remove the `useCodingAgent(id: string, opts: UseCodingAgentOptions): CodingSessionHandle` line (and any JSDoc above it) from the `HandlerContext` interface. + +- [ ] **Step 3: Remove `useCodingAgent` implementation from `context-factory.ts`** + +Remove the `useCodingAgent` function body (lines 566-634) and its surrounding infrastructure. Also remove the imports of `CodingSessionEventRow`, `CodingSessionHandle`, `CodingSessionMeta`, `CodingSessionStatus`, `UseCodingAgentOptions` from the types import at the top of `context-factory.ts`. + +Remove `CODING_SESSION_ENTITY_TYPE` and `codingSessionEntityUrl` imports from `context-factory.ts` if they are only used by `useCodingAgent`. + +- [ ] **Step 4: Remove legacy exports from `index.ts`** + +In `packages/agents-runtime/src/index.ts`: + +Remove from the type export block (lines 24-41 area): + +- `CodingSessionEventRow` +- `CodingSessionHandle` +- `CodingSessionMeta` +- `CodingSessionMetaRow` +- `CodingSessionStatus` +- `UseCodingAgentOptions` + +Remove from the observation-sources export block (lines 198-210 area): + +- `CODING_SESSION_ENTITY_TYPE` +- `CODING_SESSION_META_COLLECTION_TYPE` +- `CODING_SESSION_CURSOR_COLLECTION_TYPE` +- `CODING_SESSION_EVENT_COLLECTION_TYPE` +- `codingSession` +- `codingSessionEntityUrl` + +**Note:** Keep `CODING_SESSION_*` constants in `observation-sources.ts` itself for now (they may be referenced by existing entity streams in the database). Only remove them from the public re-export in `index.ts`. + +- [ ] **Step 5: TypeScript check across all affected packages** + +```bash +cd packages/agents-runtime && npx tsc --noEmit +cd packages/agents && npx tsc --noEmit +``` + +**Commit:** + +``` +git add packages/agents-runtime/src/types.ts packages/agents-runtime/src/context-factory.ts packages/agents-runtime/src/index.ts +git commit -m "feat(agents-runtime): remove legacy CodingSession types and useCodingAgent implementation" +``` + +--- + +### Task 4.3 — UI: extend `StatusDot` + `ToolCallView` + +**Files:** + +- Modify: `packages/agents-server-ui/src/components/StatusDot.tsx` +- Modify: `packages/agents-server-ui/src/components/ToolCallView.tsx` + +- [ ] **Step 1: Add coding-agent status colors to `StatusDot.tsx`** + +```ts +const STATUS_COLORS: Record = { + active: `#3b82f6`, + running: `#3b82f6`, + idle: `#22c55e`, + spawning: `#eab308`, + stopped: `#cbd5e1`, + // coding-agent statuses (Slice B) + cold: `#9ca3af`, + starting: `#eab308`, + stopping: `#eab308`, + error: `#ef4444`, + destroyed: `#6b7280`, +} +``` + +Also update `STATUS_COLOR` in `EntityHeader.tsx` to match: + +```ts +const STATUS_COLOR: Record< + string, + `blue` | `green` | `amber` | `gray` | `red` +> = { + active: `blue`, + running: `blue`, + idle: `green`, + spawning: `amber`, + stopped: `gray`, + cold: `gray`, + starting: `amber`, + stopping: `amber`, + error: `red`, + destroyed: `gray`, +} +``` + +- [ ] **Step 2: Add `spawn_coding_agent` and `prompt_coding_agent` cases to `ToolCallView.tsx`** + +In `getSummary`, after the `prompt_coder` case: + +```ts +case `spawn_coding_agent`: +case `prompt_coding_agent`: + return truncate((args.prompt as string) ?? ``, 60) +``` + +**Commit:** + +``` +git add packages/agents-server-ui/src/components/StatusDot.tsx packages/agents-server-ui/src/components/EntityHeader.tsx packages/agents-server-ui/src/components/ToolCallView.tsx +git commit -m "feat(agents-server-ui): extend status colors for coding-agent states and add new tool cases" +``` + +--- + +### Task 4.4 — UI: create `CodingAgentView`, `useCodingAgent`, `CodingAgentTimeline`, `CodingAgentSpawnDialog` + +**Files:** + +- Create: `packages/agents-server-ui/src/hooks/useCodingAgent.ts` +- Create: `packages/agents-server-ui/src/components/CodingAgentView.tsx` +- Create: `packages/agents-server-ui/src/components/CodingAgentTimeline.tsx` +- Create: `packages/agents-server-ui/src/components/CodingAgentSpawnDialog.tsx` + +- [ ] **Step 1: Write `useCodingAgent.ts`** + +```ts +// packages/agents-server-ui/src/hooks/useCodingAgent.ts +import { useEffect, useMemo, useRef, useState } from 'react' +import { useLiveQuery } from '@tanstack/react-db' +import { + CODING_AGENT_SESSION_META_COLLECTION_TYPE, + CODING_AGENT_RUNS_COLLECTION_TYPE, + CODING_AGENT_EVENTS_COLLECTION_TYPE, + CODING_AGENT_LIFECYCLE_COLLECTION_TYPE, +} from '@electric-ax/coding-agents' +import { connectEntityStream } from '../lib/entity-connection' +import type { EntityStreamDBWithActions } from '@electric-ax/agents-runtime' + +export type CodingAgentSliceAStatus = + | `cold` + | `starting` + | `idle` + | `running` + | `stopping` + | `error` + | `destroyed` + +export interface SessionMetaRow { + key: string + status: CodingAgentSliceAStatus + kind: `claude` + pinned: boolean + workspaceIdentity: string + idleTimeoutMs: number + keepWarm: boolean + instanceId?: string + lastError?: string + nativeSessionId?: string +} + +export interface RunRow { + key: string + startedAt: number + endedAt?: number + status: `running` | `completed` | `failed` + finishReason?: string + promptInboxKey: string + responseText?: string +} + +export interface EventRow { + key: string + runId: string + seq: number + ts: number + type: string + payload: Record +} + +export interface LifecycleRow { + key: string + ts: number + event: string + detail?: string +} + +const CODING_AGENT_STATE = { + sessionMeta: { + type: CODING_AGENT_SESSION_META_COLLECTION_TYPE, + primaryKey: `key`, + }, + runs: { + type: CODING_AGENT_RUNS_COLLECTION_TYPE, + primaryKey: `key`, + }, + events: { + type: CODING_AGENT_EVENTS_COLLECTION_TYPE, + primaryKey: `key`, + }, + lifecycle: { + type: CODING_AGENT_LIFECYCLE_COLLECTION_TYPE, + primaryKey: `key`, + }, +} as const + +export interface UseCodingAgentResult { + db: EntityStreamDBWithActions | null + meta: SessionMetaRow | undefined + runs: Array + events: Array + lifecycle: Array + loading: boolean + error: string | null +} + +export function useCodingAgent( + baseUrl: string | null, + entityUrl: string | null +): UseCodingAgentResult { + const [db, setDb] = useState(null) + const [loading, setLoading] = useState(false) + const [error, setError] = useState(null) + const closeRef = useRef<(() => void) | null>(null) + + useEffect(() => { + setDb(null) + setError(null) + + if (!baseUrl || !entityUrl) { + setLoading(false) + return + } + + let cancelled = false + setLoading(true) + + connectEntityStream({ + baseUrl, + entityUrl, + customState: CODING_AGENT_STATE, + }) + .then((result) => { + if (cancelled) { + result.close() + return + } + closeRef.current = result.close + setDb(result.db) + setLoading(false) + }) + .catch((err) => { + if (!cancelled) { + console.error(`Failed to connect coding-agent stream`, { + baseUrl, + entityUrl, + error: err, + }) + setError(err instanceof Error ? err.message : String(err)) + setLoading(false) + } + }) + + return () => { + cancelled = true + closeRef.current?.() + closeRef.current = null + } + }, [baseUrl, entityUrl]) + + const metaCollection = db?.collections.sessionMeta + const runsCollection = db?.collections.runs + const eventsCollection = db?.collections.events + const lifecycleCollection = db?.collections.lifecycle + + const { data: metaRows = [] } = useLiveQuery( + (q) => (metaCollection ? q.from({ m: metaCollection }) : undefined), + [metaCollection] + ) + const { data: runRows = [] } = useLiveQuery( + (q) => + runsCollection + ? q.from({ r: runsCollection }).orderBy(({ r }) => r.$key, `asc`) + : undefined, + [runsCollection] + ) + const { data: eventRows = [] } = useLiveQuery( + (q) => + eventsCollection + ? q.from({ e: eventsCollection }).orderBy(({ e }) => e.$key, `asc`) + : undefined, + [eventsCollection] + ) + const { data: lifecycleRows = [] } = useLiveQuery( + (q) => + lifecycleCollection + ? q.from({ l: lifecycleCollection }).orderBy(({ l }) => l.$key, `asc`) + : undefined, + [lifecycleCollection] + ) + + const meta = useMemo( + () => (metaRows as unknown as Array)[0], + [metaRows] + ) + const runs = useMemo(() => runRows as unknown as Array, [runRows]) + const events = useMemo( + () => eventRows as unknown as Array, + [eventRows] + ) + const lifecycle = useMemo( + () => lifecycleRows as unknown as Array, + [lifecycleRows] + ) + + return { db, meta, runs, events, lifecycle, loading, error } +} +``` + +- [ ] **Step 2: Write `CodingAgentTimeline.tsx`** + +```tsx +// packages/agents-server-ui/src/components/CodingAgentTimeline.tsx +import { memo, useMemo, useState } from 'react' +import { Badge, Flex, ScrollArea, Text } from '@radix-ui/themes' +import { Streamdown } from 'streamdown' +import { createCodePlugin } from '../lib/codeHighlighter' +import type { + SessionMetaRow, + RunRow, + EventRow, + LifecycleRow, +} from '../hooks/useCodingAgent' + +const codePluginSingleton = createCodePlugin() +const streamdownPlugins = { code: codePluginSingleton } + +export function CodingAgentTimeline({ + meta, + runs, + events, + lifecycle, + loading, + error, +}: { + meta: SessionMetaRow | undefined + runs: Array + events: Array + lifecycle: Array + loading: boolean + error: string | null +}): React.ReactElement { + const items = useMemo( + () => renderItems(events, lifecycle), + [events, lifecycle] + ) + + return ( + + + {meta && } + {error && ( + + {error} + + )} + {!loading && + events.length === 0 && + lifecycle.length === 0 && + !error && ( + + No events yet. Send a prompt to start the agent. + + )} + {items} + + + ) +} + +function AgentMetaRow({ + meta, + runs, +}: { + meta: SessionMetaRow + runs: Array +}): React.ReactElement { + const completedRuns = runs.filter((r) => r.status === `completed`).length + const failedRuns = runs.filter((r) => r.status === `failed`).length + return ( + + + {meta.kind} + + + {meta.workspaceIdentity} + + {completedRuns > 0 && ( + + {completedRuns} run{completedRuns !== 1 ? `s` : ``} + + )} + {failedRuns > 0 && ( + + {failedRuns} failed + + )} + {meta.pinned && ( + + pinned + + )} + + ) +} + +function renderItems( + events: Array, + lifecycle: Array +): Array { + // Pair tool_call with tool_result by callId. + const resultsByCallId = new Map() + const callsByCallId = new Map() + for (const e of events) { + const callId = e.payload.callId as string | undefined + if (!callId) continue + if (e.type === `tool_result`) resultsByCallId.set(callId, e) + else if (e.type === `tool_call`) callsByCallId.set(callId, e) + } + + const rendered = new Set() + const items: Array = [] + + // Merge events + lifecycle, sorted by timestamp. + type MergedItem = + | { kind: `event`; ts: number; key: string; e: EventRow } + | { kind: `lifecycle`; ts: number; key: string; l: LifecycleRow } + + const merged: MergedItem[] = [ + ...events.map((e) => ({ + kind: `event` as const, + ts: e.ts, + key: `e:${e.key}`, + e, + })), + ...lifecycle.map((l) => ({ + kind: `lifecycle` as const, + ts: l.ts, + key: `l:${l.key}`, + l, + })), + ].sort((a, b) => a.ts - b.ts) + + for (const item of merged) { + if (item.kind === `lifecycle`) { + items.push() + continue + } + + const e = item.e + const key = e.key + if (rendered.has(key)) continue + + switch (e.type) { + case `session_init`: + items.push() + rendered.add(key) + break + case `user_message`: + items.push() + rendered.add(key) + break + case `assistant_message`: + items.push() + rendered.add(key) + break + case `tool_call`: { + const callId = e.payload.callId as string | undefined + const result = callId ? resultsByCallId.get(callId) : undefined + if (result) rendered.add(result.key) + items.push() + rendered.add(key) + break + } + case `tool_result`: { + const callId = e.payload.callId as string | undefined + if (callId && callsByCallId.has(callId)) { + // Will be rendered with its tool_call. + rendered.add(key) + break + } + // Orphan result (call is before tail cursor). + items.push() + rendered.add(key) + break + } + case `turn_complete`: + case `session_end`: + case `compaction`: + items.push() + rendered.add(key) + break + default: + rendered.add(key) + } + } + + return items +} + +function LifecycleEventRow({ row }: { row: LifecycleRow }): React.ReactElement { + const label: Record = { + 'sandbox.starting': `Sandbox starting`, + 'sandbox.started': `Sandbox started`, + 'sandbox.stopped': `Sandbox stopped`, + 'sandbox.failed': `Sandbox failed`, + pin: `Pinned`, + release: `Released`, + 'orphan.detected': `Orphan detected`, + 'resume.restored': `Session resumed`, + } + return ( + + + {new Date(row.ts).toLocaleTimeString()} + + + {label[row.event] ?? row.event} + {row.detail ? ` — ${row.detail}` : ``} + + + ) +} + +function SessionInitRow({ event }: { event: EventRow }): React.ReactElement { + const sessionId = event.payload.sessionId as string | undefined + return ( + + + Session started{sessionId ? ` (${sessionId.slice(0, 8)}…)` : ``} + + + ) +} + +const AssistantMessageRow = memo(function AssistantMessageRow({ + event, +}: { + event: EventRow +}): React.ReactElement { + const text = (event.payload.text as string | undefined) ?? `` + return ( + + + Assistant + +
+ +
+
+ ) +}) + +function UserMessageRow({ event }: { event: EventRow }): React.ReactElement { + const text = (event.payload.text as string | undefined) ?? `` + const pending = !!event.payload._pending + return ( + + + You{pending ? ` (queued)` : ``} + +
+ {text} +
+
+ ) +} + +function ToolCallRow({ + call, + result, +}: { + call: EventRow + result: EventRow | undefined +}): React.ReactElement { + const [open, setOpen] = useState(false) + const toolName = (call.payload.toolName as string | undefined) ?? `tool` + const args = call.payload.args as Record | undefined + return ( + setOpen((o) => !o)} + > + + + {toolName} + + {result && ( + + done + + )} + + {open && ( +
+          {JSON.stringify(args, null, 2)}
+        
+ )} +
+ ) +} + +function OrphanResultRow({ event }: { event: EventRow }): React.ReactElement { + return ( + + + Tool result (call before window) + + + ) +} + +function SystemEventRow({ event }: { event: EventRow }): React.ReactElement { + const label: Record = { + turn_complete: `Turn complete`, + session_end: `Session ended`, + compaction: `Context compacted`, + } + return ( + + + {label[event.type] ?? event.type} + + + ) +} +``` + +- [ ] **Step 3: Write `CodingAgentView.tsx`** + +```tsx +// packages/agents-server-ui/src/components/CodingAgentView.tsx +import { Flex } from '@radix-ui/themes' +import { useCodingAgent } from '../hooks/useCodingAgent' +import { CodingAgentTimeline } from './CodingAgentTimeline' +import { MessageInput } from './MessageInput' + +export function CodingAgentView({ + baseUrl, + entityUrl, + entityStopped, +}: { + baseUrl: string + entityUrl: string + entityStopped: boolean +}): React.ReactElement { + const { db, meta, runs, events, lifecycle, loading, error } = useCodingAgent( + baseUrl, + entityUrl + ) + + return ( + + + + + ) +} +``` + +- [ ] **Step 4: Write `CodingAgentSpawnDialog.tsx`** + +```tsx +// packages/agents-server-ui/src/components/CodingAgentSpawnDialog.tsx +import { useCallback, useMemo, useState } from 'react' +import { Button, Dialog, Flex, Text } from '@radix-ui/themes' + +type WorkspaceMode = `volume` | `bindMount` + +interface CodingAgentSpawnDialogProps { + open: boolean + onOpenChange: (open: boolean) => void + onSpawn: (args: Record) => void +} + +export function CodingAgentSpawnDialog({ + open, + onOpenChange, + onSpawn, +}: CodingAgentSpawnDialogProps): React.ReactElement { + const [workspaceMode, setWorkspaceMode] = useState(`volume`) + const [workspaceName, setWorkspaceName] = useState(``) + const [hostPath, setHostPath] = useState(``) + const [initialPrompt, setInitialPrompt] = useState(``) + + const canSubmit = useMemo(() => { + if (workspaceMode === `bindMount`) return hostPath.trim().length > 0 + return true + }, [workspaceMode, hostPath]) + + const handleSubmit = useCallback( + (e: React.FormEvent) => { + e.preventDefault() + if (!canSubmit) return + const args: Record = { + kind: `claude`, + workspaceType: workspaceMode, + } + if (workspaceMode === `volume` && workspaceName.trim()) { + args.workspaceName = workspaceName.trim() + } + if (workspaceMode === `bindMount`) { + args.workspaceHostPath = hostPath.trim() + } + if (initialPrompt.trim()) { + args._initialPrompt = initialPrompt.trim() + } + onSpawn(args) + }, + [canSubmit, workspaceMode, workspaceName, hostPath, initialPrompt, onSpawn] + ) + + const inputStyle: React.CSSProperties = { + width: `100%`, + padding: `6px 8px`, + borderRadius: `var(--radius-2)`, + border: `1px solid var(--gray-a7)`, + background: `var(--gray-a2)`, + fontSize: `var(--font-size-2)`, + fontFamily: `var(--default-font-family)`, + color: `var(--gray-12)`, + boxSizing: `border-box`, + } + + return ( + + + New coding agent + + Spawn a Claude Code CLI session inside a Docker sandbox with a + persistent workspace. + + +
+ + + + Workspace type + + + + + + + + {workspaceMode === `volume` && ( + + + Volume name{` `} + + (optional — leave blank to auto-generate) + + + setWorkspaceName(e.target.value)} + placeholder="my-project" + /> + + )} + + {workspaceMode === `bindMount` && ( + + + Host path{` `} + + * + + + setHostPath(e.target.value)} + placeholder="/Users/me/my-project" + /> + + )} + + + + Initial prompt{` `} + + (optional) + + +