diff --git a/.gitignore b/.gitignore index 62dacec..69810f5 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,5 @@ coverage/ .turbo/ bundle/ .opencode/ +.worktrees/ +tmp/ diff --git a/AGENTS.md b/AGENTS.md index 157c7ef..237307e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -8,7 +8,7 @@ SafeClaw is an AI coding assistant with multi-provider LLM support (GitHub Copil The target user is an individual developer who wants AI-assisted coding with strong guarantees against prompt injection, malicious tool calls, and data exfiltration. -**Linux-only. Node.js >= 22. pnpm 9+.** +**Linux and macOS. Node.js >= 22. pnpm 9+.** ## Repository Structure @@ -83,10 +83,10 @@ Each tool declares `requiredCapabilities` and implements a `ToolHandler` interfa `packages/core/src/tools/process-manager.ts` -- Tracks spawned child processes by UUID. Features: ring buffer output capture (1MB max per process), automatic cleanup after 1 hour, maximum 8 concurrent processes. Used by the `process` builtin tool. ### Sandbox -- `packages/sandbox/src/sandbox.ts` -- Spawns child process with `unshare` + native helper -- `packages/sandbox/src/policy-builder.ts` -- `PolicyBuilder` class with fluent API; `PolicyBuilder.forDevelopment(cwd, options?)` creates a development-ready policy with allowlisted system paths, compiler toolchains (JVM, GCC), an expanded ~120 syscall allowlist, and support for `extraExecutePaths`/`extraReadWritePaths` via `DevelopmentPolicyOptions` +- `packages/sandbox/src/sandbox.ts` -- Wraps commands via `@anthropic-ai/sandbox-runtime` (`SandboxManager.wrapWithSandbox()`) as the outer layer; injects the C helper as the inner process via `--policy-file ` when found +- `packages/sandbox/src/policy-builder.ts` -- `PolicyBuilder` class with fluent API; `PolicyBuilder.forDevelopment(cwd, options?)` creates a development-ready policy with allowlisted system paths, compiler toolchains (JVM, GCC), an expanded ~120 syscall allowlist, and support for `extraExecutePaths`/`extraReadWritePaths` via `DevelopmentPolicyOptions`; `PolicyBuilder.toRuntimeConfig(policy)` translates `SandboxPolicy` to `SandboxRuntimeConfig` for sandbox-runtime (write allowlist + credential dir denylist) - `native/src/main.c` -- C helper binary that applies: Landlock filesystem rules, seccomp-BPF syscall filtering, capability dropping, `PR_SET_NO_NEW_PRIVS` -- Policy sent to helper via fd 3 as JSON +- Policy sent to helper via `--policy-file ` (JSON written to a temp file at mode 0o600; cleaned up after each execution) ### Vault `packages/vault/src/vault.ts` -- AES-256-GCM encrypted JSON file store. Keys derived via scrypt from passphrase or fetched from OS keyring (GNOME `secret-tool`). File permissions enforced at 0o600. @@ -114,7 +114,8 @@ Bootstrap flow (`packages/cli/src/commands/bootstrap.ts`): 4. Load builtin skill manifest 5. Read `brave_api_key` from vault; if present, include web_search tool in tool registry 6. Create ProcessManager for background process tracking -7. Create: CapabilityRegistry -> CapabilityEnforcer -> ToolRegistry -> Sandbox -> ToolOrchestrator -> ContextCompactor -> Agent +7. Initialize `SandboxManager` network proxy (via `PolicyBuilder.toRuntimeConfig()`) +8. Create: CapabilityRegistry -> CapabilityEnforcer -> ToolRegistry -> Sandbox -> ToolOrchestrator -> ContextCompactor -> Agent 8. Return `{ agent, sessionManager, capabilityRegistry, auditLog }` CLI commands: `chat` (default), `onboard`, `audit`, `serve`/`server`, `doctor`, `help`, `version` diff --git a/README.md b/README.md index 2d1c041..a5dd904 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ SafeClaw is a secure AI coding assistant with mandatory OS-level sandboxing, sig curl -fsSL https://raw.githubusercontent.com/linuxdevel/safeclaw/main/install.sh | bash ``` -Requires: Linux (x86\_64 or arm64), Node.js >= 22, bubblewrap (`apt install bubblewrap`). +Requires: Linux (x86\_64 or arm64) or macOS, Node.js >= 22, bubblewrap (`apt install bubblewrap`), socat (`apt install socat`). After install, run `safeclaw onboard` for first-time setup. @@ -20,7 +20,7 @@ After install, run `safeclaw onboard` for first-time setup. ### Security -- Zero-trust security model with mandatory OS-level sandboxing — bubblewrap `pivot_root` filesystem isolation (outer) + Landlock + seccomp-BPF + capability dropping (inner), Linux namespaces (PID, net, mount, IPC, UTS) +- Zero-trust security model with mandatory OS-level sandboxing — `@anthropic-ai/sandbox-runtime` outer layer (bubblewrap `pivot_root` + bind mounts on Linux; sandbox-exec on macOS) + C helper inner layer (Landlock + seccomp-BPF + capability dropping on Linux). Supports Linux and macOS. - Development-ready sandbox policy via `PolicyBuilder.forDevelopment()` — allows compilers (GCC, JVM), package managers, and standard dev tools while enforcing kernel-level access control. Selective home directory binding hides `~/.ssh`, `~/.aws`, `~/.gnupg` structurally. - AES-256-GCM encrypted secrets vault with OS keyring or passphrase-derived keys - Ed25519-signed skill manifests with capability declarations and runtime enforcement @@ -69,7 +69,7 @@ Planned features in implementation order: | 2 | Automatic context compaction | [plan](docs/plans/2026-03-05-context-compaction.md) | High | | 3 | Streaming UX (Phase 1 — readline) | [plan](docs/plans/2026-03-05-streaming-ux.md) | High | | 4 | Better CLI/TUI (Ink-based) | [plan](docs/plans/2026-03-05-tui.md) | High | -| 5 | Bubblewrap sandbox (`pivot_root` isolation) | [design](docs/plans/2026-03-07-bubblewrap-sandbox-design.md) · [plan](docs/plans/2026-03-07-bubblewrap-sandbox-implementation.md) | High | +| 5 | Sandbox-runtime integration (`pivot_root` + macOS support) | [design](docs/plans/2026-03-07-bubblewrap-sandbox-design.md) · [plan](docs/plans/2026-03-20-sandbox-runtime-integration.md) | **Done** | | 6 | Parallel agents | [plan](docs/plans/2026-03-05-parallel-agents.md) | Medium | | 7 | Long-running background agents | [plan](docs/plans/2026-03-05-background-agents.md) | Medium | | 8 | Superpowers skill integration | [plan](docs/plans/2026-03-05-superpowers-integration.md) | Medium | @@ -93,7 +93,7 @@ Planned features in implementation order: Monorepo structure: - `@safeclaw/vault` — Encrypted secrets storage -- `@safeclaw/sandbox` — OS-level process sandboxing with bubblewrap (`pivot_root`) + C helper (Landlock/seccomp) and `PolicyBuilder` for development-ready policies +- `@safeclaw/sandbox` — OS-level process sandboxing: outer layer via `@anthropic-ai/sandbox-runtime` (bwrap on Linux, sandbox-exec on macOS), inner layer via C helper (Landlock + seccomp-BPF + cap-drop). `PolicyBuilder` for development-ready policies; `PolicyBuilder.toRuntimeConfig()` translates policies for sandbox-runtime. - `@safeclaw/core` — Capabilities, agent runtime, sessions, tools, skills, model providers, copilot client - `@safeclaw/gateway` — HTTP server with auth and rate limiting - `@safeclaw/cli` — Command-line interface diff --git a/docs/architecture.md b/docs/architecture.md index da88dc7..b171bb9 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -74,9 +74,10 @@ OS-level process isolation using Linux kernel features. - `SandboxPolicy` / `DEFAULT_POLICY`: policy types with maximally restrictive defaults - `PolicyBuilder`: fluent API for constructing sandbox policies; `PolicyBuilder.forDevelopment(cwd)` creates a ready-made policy for software development with allowlisted system paths, compiler toolchains, and an expanded syscall set -- `detectKernelCapabilities`: probes `/proc` for Landlock, seccomp, namespace support +- `detectKernelCapabilities`: probes `/proc` for Landlock, seccomp, namespace support; also detects bubblewrap availability - `assertSandboxSupported`: throws if required kernel features are missing -- `Sandbox` class: executes commands under policy (stub in v1; types and policies are real) +- `Sandbox` class: executes commands by wrapping via `SandboxManager.wrapWithSandbox()` (outer layer) and injecting the C helper as the inner process via `--policy-file` when found +- `PolicyBuilder.toRuntimeConfig()`: translates `SandboxPolicy` to `SandboxRuntimeConfig` for sandbox-runtime (write allowlist, credential dir denylist, network domains) No dependencies on other SafeClaw packages. @@ -148,7 +149,7 @@ Command-line interface adapter. - `runOnboarding`: five-step onboarding wizard (kernel check, auth, vault, signing key, model selection) - `setupChat`: wires the CLI adapter to the agent - `runAudit`: generates security audit reports (text or JSON) -- `runDoctor`: runs 12 diagnostic checks across system, security, config, and connectivity categories +- `runDoctor`: runs diagnostic checks across system (platform, bwrap, socat, rg, helper binary), security (Landlock, seccomp, namespaces), config, and connectivity categories Dependencies: `@safeclaw/core`, `@safeclaw/gateway`, `@safeclaw/sandbox`, `@safeclaw/vault`. @@ -294,9 +295,9 @@ The audit log maintains the last N entries in memory (default: 100). The `safecl ## Key design decisions -### Linux-only (v1) +### Linux and macOS -SafeClaw v1 targets Linux exclusively. The sandboxing architecture depends on Landlock (kernel >= 5.13), seccomp-BPF, and Linux namespaces. These have no direct equivalents on macOS or Windows. Future versions may add platform-specific sandboxing. +SafeClaw supports Linux and macOS. On Linux, the full stack is available: bubblewrap `pivot_root` container (via `@anthropic-ai/sandbox-runtime`) + Landlock + seccomp-BPF + capability dropping (via C helper). On macOS, the outer `sandbox-exec` layer is used; the C helper and Linux-specific enforcement (Landlock, seccomp, namespaces) are not active. Windows is not supported. ### Multi-provider LLM support diff --git a/docs/getting-started.md b/docs/getting-started.md index 4ca8f65..60c10df 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -4,26 +4,34 @@ - **Node.js** >= 22.0.0 - **pnpm** >= 9.0.0 -- **Linux** with kernel >= 5.13 (required for Landlock, seccomp-BPF, and namespaces) +- **Linux** with kernel >= 5.13 (required for Landlock, seccomp-BPF, and namespaces), **or macOS** (uses sandbox-exec for outer isolation; C helper not available on macOS) +- **bubblewrap** (`apt install bubblewrap` on Debian/Ubuntu) — required on Linux for `pivot_root` filesystem isolation +- **socat** (`apt install socat` on Debian/Ubuntu) — required by `@anthropic-ai/sandbox-runtime` for network proxy - **GNOME Keyring** (`secret-tool`) if using OS keyring for vault encryption (optional; passphrase fallback available) - A **GitHub account** with Copilot access (for default Copilot provider), or an **OpenAI** or **Anthropic** API key -### Verifying kernel support +### Verifying sandbox support -SafeClaw requires mandatory sandboxing. Check that your kernel supports the necessary features: +Run `safeclaw doctor` after installation to verify all dependencies. To manually check: ```bash -# Kernel version (must be >= 5.13) +# bubblewrap (Linux) +which bwrap && bwrap --version + +# socat (network proxy) +which socat + +# Kernel version (Linux, must be >= 5.13 for Landlock) uname -r -# Seccomp support +# Seccomp support (Linux) grep Seccomp /proc/self/status -# Namespace support +# Namespace support (Linux) ls /proc/self/ns/{user,pid,net,mnt} ``` -All four namespaces (user, PID, net, mount) must be available. If any are missing, SafeClaw will warn during onboarding and sandbox isolation will be limited. +All four namespaces (user, PID, net, mount) must be available on Linux. If any are missing, SafeClaw will warn during onboarding and sandbox isolation will be limited. On macOS, namespace-based isolation is replaced by sandbox-exec. ## Installation @@ -209,8 +217,8 @@ Run diagnostic checks to verify your SafeClaw installation: safeclaw doctor ``` -The doctor command runs 12 checks across four categories: -- **System**: Node.js version, native helper binary, disk space +The doctor command runs checks across four categories: +- **System**: Node.js version, platform support, bubblewrap (`bwrap`), native helper binary, socat, ripgrep (`rg`), disk space - **Security**: Landlock, seccomp, namespace support - **Config**: vault accessibility, signing key, default model - **Connectivity**: GitHub Copilot API reachability diff --git a/docs/plans/2026-03-20-sandbox-runtime-integration.md b/docs/plans/2026-03-20-sandbox-runtime-integration.md new file mode 100644 index 0000000..7b14d77 --- /dev/null +++ b/docs/plans/2026-03-20-sandbox-runtime-integration.md @@ -0,0 +1,1677 @@ +# Sandbox Runtime Integration Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Replace SafeClaw's custom namespace/bwrap code with `@anthropic-ai/sandbox-runtime` as the outer isolation layer, gaining macOS support, network domain filtering, and socat-based proxy control, while retaining the C helper (Landlock + seccomp + cap-drop) as an inner layer inside bwrap. + +**Architecture:** Phase 1 wires `SandboxManager.wrapWithSandbox()` into the `Sandbox` class, which becomes an adapter translating `SandboxPolicy` → `SandboxRuntimeConfig`. Phase 2 injects the C helper as the inner process by writing a policy temp file and passing `--policy-file` (the helper already supports this flag — no C changes needed). The spawn chain becomes: `bwrap [sandbox-runtime] → helper [Landlock+seccomp+cap-drop] → command`. + +**Tech Stack:** `@anthropic-ai/sandbox-runtime` pinned to git SHA `20f5176a94314038695bee13779eb9eebbbaeb49`, existing C helper binary, TypeScript (ESM strict), vitest. + +**Supersedes:** `docs/plans/2026-03-07-bubblewrap-sandbox-design.md` — that plan assumed SafeClaw would implement bwrap directly; this delegates to sandbox-runtime instead and additionally gains macOS + network proxy support. + +--- + +## File Map + +| File | Change | +|------|--------| +| `packages/sandbox/package.json` | Add `@anthropic-ai/sandbox-runtime` git dep | +| `packages/sandbox/src/types.ts` | Extend `NetworkPolicy`, `EnforcementLayers`, `KernelCapabilities` | +| `packages/sandbox/src/policy-builder.ts` | Add `toRuntimeConfig()` method; extend `DevelopmentPolicyOptions` for network | +| `packages/sandbox/src/policy-builder.test.ts` | Add `toRuntimeConfig()` tests | +| `packages/sandbox/src/detect.ts` | Replace `unshare` check with sandbox-runtime dep checks | +| `packages/sandbox/src/detect.test.ts` | Update detect tests | +| `packages/sandbox/src/sandbox.ts` | Rewrite `execute()` to use `SandboxManager.wrapWithSandbox()` | +| `packages/sandbox/src/sandbox.test.ts` | Update spawn chain tests | +| `packages/sandbox/src/index.ts` | Export new `NetworkPolicy` type | +| `packages/cli/src/commands/bootstrap.ts` | Call `SandboxManager.initialize()` before `new Sandbox()` | +| `packages/cli/src/commands/bootstrap.test.ts` | Add initialize call test | +| `packages/cli/src/commands/doctor-checks.ts` | Replace `unshareCheck`; add `bwrapCheck`, `socatCheck`, `ripgrepCheck` | +| `packages/cli/src/commands/doctor-checks.test.ts` | Update check tests | +| `packages/cli/src/commands/doctor.ts` | Update check list | + +--- + +## Phase 1 — sandbox-runtime as outer isolation layer + +### Task 1: Add @anthropic-ai/sandbox-runtime dependency + +**Files:** +- Modify: `packages/sandbox/package.json` + +- [ ] **Step 1: Add the git dependency** + +```json +{ + "name": "@safeclaw/sandbox", + "version": "0.0.1", + "private": true, + "type": "module", + "main": "dist/index.js", + "types": "dist/index.d.ts", + "scripts": { + "build": "tsc -p tsconfig.json" + }, + "dependencies": { + "@anthropic-ai/sandbox-runtime": "git+ssh://git@github.com/anthropic-experimental/sandbox-runtime.git#20f5176a94314038695bee13779eb9eebbbaeb49" + }, + "files": ["dist"] +} +``` + +- [ ] **Step 2: Install** + +```bash +pnpm install +``` + +Expected: resolves without error. `node_modules/@anthropic-ai/sandbox-runtime` exists. + +- [ ] **Step 3: Verify types resolve** + +```bash +pnpm typecheck +``` + +Expected: PASS (no type errors yet — we haven't imported it). + +- [ ] **Step 4: Commit** + +```bash +git add packages/sandbox/package.json pnpm-lock.yaml +git commit -m "chore(sandbox): add @anthropic-ai/sandbox-runtime git dependency" +``` + +--- + +### Task 2: Extend types + +**Files:** +- Modify: `packages/sandbox/src/types.ts` +- Modify: `packages/sandbox/src/index.ts` + +Background: `SandboxPolicy.network` is currently `"none" | "localhost" | "filtered"`. We extend it to support a structured domain-allowlist variant. `EnforcementLayers` gains `pivotRoot` and `bindMounts`. `KernelCapabilities` gains `bwrap`. + +- [ ] **Step 1: Write type tests first** (in `packages/sandbox/src/types.test.ts`, new file) + +```typescript +import { describe, it, expectTypeOf } from "vitest"; +import type { NetworkPolicy, SandboxPolicy, EnforcementLayers, KernelCapabilities } from "./types.js"; + +describe("NetworkPolicy", () => { + it("accepts 'none'", () => { + const n: NetworkPolicy = "none"; + expectTypeOf(n).toMatchTypeOf(); + }); + + it("accepts domain allowlist object", () => { + const n: NetworkPolicy = { allowedDomains: ["github.com", "*.npmjs.org"] }; + expectTypeOf(n).toMatchTypeOf(); + }); + + it("accepts domain allowlist with deniedDomains", () => { + const n: NetworkPolicy = { allowedDomains: [], deniedDomains: ["evil.com"] }; + expectTypeOf(n).toMatchTypeOf(); + }); +}); + +describe("EnforcementLayers", () => { + it("has pivotRoot and bindMounts fields", () => { + const e: EnforcementLayers = { + namespaces: true, pivotRoot: true, bindMounts: true, + landlock: false, seccomp: false, capDrop: false, + }; + expectTypeOf(e.pivotRoot).toBeBoolean(); + expectTypeOf(e.bindMounts).toBeBoolean(); + }); +}); + +describe("KernelCapabilities", () => { + it("has bwrap field", () => { + const k: KernelCapabilities = { + landlock: { supported: true, abiVersion: 3 }, + seccomp: { supported: true }, + namespaces: { user: true, pid: true, net: true, mnt: true }, + bwrap: { available: true, path: "/usr/bin/bwrap", version: "0.9.0" }, + }; + expectTypeOf(k.bwrap.available).toBeBoolean(); + }); +}); +``` + +- [ ] **Step 2: Run to verify it fails** + +```bash +pnpm test --filter @safeclaw/sandbox -- types.test +``` + +Expected: FAIL — `NetworkPolicy`, `pivotRoot`, `bindMounts`, `bwrap` not yet defined. + +- [ ] **Step 3: Update `packages/sandbox/src/types.ts`** + +```typescript +/** Filesystem access rule for Landlock */ +export interface PathRule { + path: string; + access: "read" | "write" | "readwrite" | "execute" | "readwriteexecute"; +} + +/** + * Network policy for a sandbox execution. + * - "none": block all outbound network (net namespace, no proxy) + * - "localhost": allow only loopback + * - object: route through sandbox-runtime proxy with domain allowlist/denylist + */ +export type NetworkPolicy = + | "none" + | "localhost" + | { allowedDomains: string[]; deniedDomains?: string[] }; + +/** Sandbox policy — defines isolation constraints for a single execution */ +export interface SandboxPolicy { + filesystem: { allow: PathRule[]; deny: PathRule[] }; + syscalls: { allow: string[]; defaultDeny: true }; + network: NetworkPolicy; + namespaces: { pid: boolean; net: boolean; mnt: boolean; user: boolean }; + timeoutMs?: number | undefined; +} + +/** Which enforcement layers were active during execution */ +export interface EnforcementLayers { + namespaces: boolean; + pivotRoot: boolean; // bwrap pivot_root was used + bindMounts: boolean; // bwrap bind-mount FS isolation was active + landlock: boolean; + seccomp: boolean; + capDrop: boolean; +} + +/** Result of a sandboxed execution */ +export interface SandboxResult { + exitCode: number; + stdout: string; + stderr: string; + durationMs: number; + killed: boolean; + killReason?: "timeout" | "oom" | "signal" | undefined; + enforcement?: EnforcementLayers | undefined; +} + +/** Kernel feature availability */ +export interface KernelCapabilities { + landlock: { supported: boolean; abiVersion: number }; + seccomp: { supported: boolean }; + namespaces: { user: boolean; pid: boolean; net: boolean; mnt: boolean }; + bwrap: { available: boolean; path: string | undefined; version: string | undefined }; +} + +/** Default sandbox policy — maximum restriction */ +export const DEFAULT_POLICY: SandboxPolicy = { + filesystem: { allow: [], deny: [] }, + syscalls: { + allow: [ + "read", "write", "exit", "exit_group", "brk", "mmap", "close", + "fstat", "mprotect", "munmap", "rt_sigaction", "rt_sigprocmask", + "ioctl", "access", "getpid", "clone", "execve", "wait4", "uname", + "fcntl", "getcwd", "arch_prctl", "set_tid_address", "set_robust_list", + "rseq", "prlimit64", "getrandom", + ], + defaultDeny: true, + }, + network: "none", + namespaces: { pid: true, net: true, mnt: true, user: true }, + timeoutMs: 30_000, +}; +``` + +- [ ] **Step 4: Export `NetworkPolicy` from `packages/sandbox/src/index.ts`** + +Add to existing exports: +```typescript +export type { NetworkPolicy } from "./types.js"; +``` + +- [ ] **Step 5: Fix compilation breakage in policy-builder.ts** + +In `policy-builder.ts`, `build()` returns `SandboxPolicy`. The `network: "none"` literal still works because `"none"` is part of `NetworkPolicy`. No change needed. + +Check existing tests still type-check: +```bash +pnpm typecheck +``` + +Expected: PASS (or only errors about code that explicitly checks `=== "filtered"` — fix those by checking `typeof policy.network === "object"`). + +- [ ] **Step 6: Run the new type tests** + +```bash +pnpm test --filter @safeclaw/sandbox -- types.test +``` + +Expected: PASS. + +- [ ] **Step 7: Run all sandbox tests** + +```bash +pnpm test --filter @safeclaw/sandbox +``` + +Expected: PASS. (The existing tests reference `network: "none"` which is still valid.) + +- [ ] **Step 8: Commit** + +```bash +git add packages/sandbox/src/types.ts packages/sandbox/src/types.test.ts packages/sandbox/src/index.ts +git commit -m "feat(sandbox): extend NetworkPolicy type and EnforcementLayers/KernelCapabilities" +``` + +--- + +### Task 3: PolicyBuilder.toRuntimeConfig() + +**Files:** +- Modify: `packages/sandbox/src/policy-builder.ts` +- Modify: `packages/sandbox/src/policy-builder.test.ts` + +Background: `SandboxRuntimeConfig` (from sandbox-runtime) uses a different model than SafeClaw's Landlock-style `SandboxPolicy`: +- **Reads**: sandbox-runtime is permissive-by-default (deny specific dirs) vs SafeClaw's allowlist-only. We translate by denying the sensitive home dirs and letting everything else be readable. +- **Writes**: both use allowlist-only. Map `readwrite`/`readwriteexecute` PathRules to `filesystem.allowWrite`. +- **Network**: `"none"` → `allowedDomains: []`; object → pass through. + +The sensitive dirs always denied for reads (credentials/config that must not leak): +``` +~/.ssh ~/.aws ~/.gnupg ~/.kube ~/.docker ~/.gcloud ~/.azure +``` + +- [ ] **Step 1: Write failing tests for `toRuntimeConfig()` in `policy-builder.test.ts`** + +Add to the existing test file: + +```typescript +import { homedir } from "node:os"; +import type { SandboxRuntimeConfig } from "@anthropic-ai/sandbox-runtime"; + +describe("PolicyBuilder.toRuntimeConfig()", () => { + it("maps readwrite PathRules to allowWrite", () => { + const policy = new PolicyBuilder() + .addReadWrite("/project") + .addReadWrite("/tmp") + .build(); + const rtConfig: SandboxRuntimeConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.filesystem.allowWrite).toContain("/project"); + expect(rtConfig.filesystem.allowWrite).toContain("/tmp"); + }); + + it("maps readwriteexecute PathRules to allowWrite", () => { + const policy = new PolicyBuilder().addReadWriteExecute("/workspace").build(); + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.filesystem.allowWrite).toContain("/workspace"); + }); + + it("does not add read-only or execute-only paths to allowWrite", () => { + const policy = new PolicyBuilder() + .addReadOnly("/etc") + .addReadExecute("/usr/bin") + .build(); + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.filesystem.allowWrite).not.toContain("/etc"); + expect(rtConfig.filesystem.allowWrite).not.toContain("/usr/bin"); + }); + + it("adds sensitive home dirs to denyRead", () => { + const policy = new PolicyBuilder().build(); + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + const home = homedir(); + expect(rtConfig.filesystem.denyRead).toContain(`${home}/.ssh`); + expect(rtConfig.filesystem.denyRead).toContain(`${home}/.aws`); + expect(rtConfig.filesystem.denyRead).toContain(`${home}/.gnupg`); + }); + + it("maps network: 'none' to allowedDomains: []", () => { + const policy = { ...DEFAULT_POLICY, network: "none" as const }; + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.network.allowedDomains).toEqual([]); + }); + + it("maps network object to allowedDomains/deniedDomains", () => { + const policy: SandboxPolicy = { + ...DEFAULT_POLICY, + network: { allowedDomains: ["github.com", "*.npmjs.org"], deniedDomains: ["evil.com"] }, + }; + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.network.allowedDomains).toEqual(["github.com", "*.npmjs.org"]); + expect(rtConfig.network.deniedDomains).toEqual(["evil.com"]); + }); + + it("forDevelopment().toRuntimeConfig() includes cwd in allowWrite", () => { + const cwd = "/home/user/project"; + const policy = PolicyBuilder.forDevelopment(cwd); + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.filesystem.allowWrite).toContain(cwd); + }); +}); +``` + +- [ ] **Step 2: Run to verify it fails** + +```bash +pnpm test --filter @safeclaw/sandbox -- policy-builder.test +``` + +Expected: FAIL — `toRuntimeConfig` not defined. + +- [ ] **Step 3: Add `toRuntimeConfig()` to `PolicyBuilder` in `policy-builder.ts`** + +Add imports at the top: +```typescript +import { homedir } from "node:os"; +import type { SandboxRuntimeConfig } from "@anthropic-ai/sandbox-runtime"; +import type { SandboxPolicy, NetworkPolicy } from "./types.js"; +``` + +Add after the `build()` method and before `forDevelopment()`: + +```typescript +/** + * Translates a SafeClaw SandboxPolicy into a SandboxRuntimeConfig for + * @anthropic-ai/sandbox-runtime. + * + * Read model difference: SafeClaw uses an allowlist (Landlock); sandbox-runtime + * is permissive-by-default with an explicit denylist. We translate by denying + * the sensitive credential dirs that must never be readable. + * + * Write model: both use allowlists. PathRules with access "readwrite" or + * "readwriteexecute" map to filesystem.allowWrite. + */ +static toRuntimeConfig(policy: SandboxPolicy): SandboxRuntimeConfig { + // ── Filesystem ──────────────────────────────────────────────────── + const allowWrite = policy.filesystem.allow + .filter((r) => r.access === "readwrite" || r.access === "readwriteexecute") + .map((r) => r.path); + + // Always deny reads to credential/secret directories. + // sandbox-runtime also enforces mandatory deny on dangerous files (.bashrc, + // .git/hooks, etc.) regardless of this config — these are complementary. + const home = homedir(); + const denyRead = [ + `${home}/.ssh`, + `${home}/.aws`, + `${home}/.gnupg`, + `${home}/.kube`, + `${home}/.docker`, + `${home}/.gcloud`, + `${home}/.azure`, + ]; + + // ── Network ─────────────────────────────────────────────────────── + const network = buildNetworkConfig(policy.network); + + return { + filesystem: { + allowWrite, + denyWrite: [], + denyRead, + }, + network, + }; +} +``` + +Add the private helper after the class: + +```typescript +function buildNetworkConfig( + network: NetworkPolicy, +): SandboxRuntimeConfig["network"] { + if (network === "none") { + return { allowedDomains: [], deniedDomains: [] }; + } + if (network === "localhost") { + return { allowedDomains: ["localhost"], deniedDomains: [] }; + } + return { + allowedDomains: network.allowedDomains, + deniedDomains: network.deniedDomains ?? [], + }; +} +``` + +Also add the `DevelopmentPolicyOptions` extension for network: + +```typescript +export interface DevelopmentPolicyOptions { + extraExecutePaths?: string[]; + extraReadWritePaths?: string[]; + extraReadOnlyPaths?: string[]; + /** + * Network domains the sandboxed process may connect to. + * Default: [] (block all network). Use this to allow e.g. npm registry. + * Example: ["registry.npmjs.org", "*.github.com"] + */ + allowedNetworkDomains?: string[]; +} +``` + +And update `forDevelopment()` to apply it (near the end of the method): + +```typescript +// ── Network ────────────────────────────────────────────────────────── +const networkPolicy: NetworkPolicy = + options?.allowedNetworkDomains !== undefined + ? { allowedDomains: options.allowedNetworkDomains } + : "none"; + +return { ...builder.build(), network: networkPolicy }; +``` + +> Note: `forDevelopment()` currently calls `builder.build()` which hard-codes `network: "none"`. Extract to a local variable so the override can be applied. + +- [ ] **Step 4: Run tests** + +```bash +pnpm test --filter @safeclaw/sandbox -- policy-builder.test +``` + +Expected: PASS. + +- [ ] **Step 5: Typecheck** + +```bash +pnpm typecheck +``` + +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +git add packages/sandbox/src/policy-builder.ts packages/sandbox/src/policy-builder.test.ts +git commit -m "feat(sandbox): add PolicyBuilder.toRuntimeConfig() translating to SandboxRuntimeConfig" +``` + +--- + +### Task 4: Update detect.ts + +**Files:** +- Modify: `packages/sandbox/src/detect.ts` +- Modify: `packages/sandbox/src/detect.test.ts` + +Background: `detect.ts` currently checks `unshare`, Landlock kernel version, and seccomp. We replace this with sandbox-runtime's `SandboxManager.checkDependencies()` for the platform-specific checks, and probe for `bwrap` directly for `KernelCapabilities`. The Linux-only restriction is removed — macOS is now supported via `sandbox-exec`. + +- [ ] **Step 1: Write failing detect tests** + +Replace the existing `detect.test.ts` with: + +```typescript +import { describe, it, expect, vi, beforeEach } from "vitest"; + +const mockIsSupportedPlatform = vi.fn<() => boolean>(); +const mockCheckDeps = vi.fn<() => { errors: string[]; warnings: string[] }>(); +const mockWhichBwrap = vi.fn<() => string | null>(); + +vi.mock("@anthropic-ai/sandbox-runtime", () => ({ + SandboxManager: { + isSupportedPlatform: mockIsSupportedPlatform, + checkDependencies: mockCheckDeps, + }, +})); + +vi.mock("node:child_process", () => ({ + execFileSync: mockWhichBwrap, +})); + +const { detectKernelCapabilities, assertSandboxSupported } = await import("./detect.js"); + +describe("detectKernelCapabilities()", () => { + beforeEach(() => vi.clearAllMocks()); + + it("reports bwrap available when which bwrap succeeds", () => { + mockWhichBwrap.mockReturnValue("/usr/bin/bwrap"); + const caps = detectKernelCapabilities(); + expect(caps.bwrap.available).toBe(true); + expect(caps.bwrap.path).toBe("/usr/bin/bwrap"); + }); + + it("reports bwrap unavailable when which bwrap fails", () => { + mockWhichBwrap.mockImplementation(() => { throw new Error("not found"); }); + const caps = detectKernelCapabilities(); + expect(caps.bwrap.available).toBe(false); + expect(caps.bwrap.path).toBeUndefined(); + }); +}); + +describe("assertSandboxSupported()", () => { + beforeEach(() => vi.clearAllMocks()); + + it("does not throw when platform is supported and deps are OK", () => { + mockIsSupportedPlatform.mockReturnValue(true); + mockCheckDeps.mockReturnValue({ errors: [], warnings: [] }); + expect(() => assertSandboxSupported()).not.toThrow(); + }); + + it("throws when platform is not supported", () => { + mockIsSupportedPlatform.mockReturnValue(false); + mockCheckDeps.mockReturnValue({ errors: [], warnings: [] }); + expect(() => assertSandboxSupported()).toThrow(/platform/i); + }); + + it("throws when sandbox-runtime deps are missing", () => { + mockIsSupportedPlatform.mockReturnValue(true); + mockCheckDeps.mockReturnValue({ errors: ["bubblewrap not found"], warnings: [] }); + expect(() => assertSandboxSupported()).toThrow(/bubblewrap not found/); + }); +}); +``` + +- [ ] **Step 2: Run to verify it fails** + +```bash +pnpm test --filter @safeclaw/sandbox -- detect.test +``` + +Expected: FAIL — detect.ts doesn't use SandboxManager yet. + +- [ ] **Step 3: Rewrite `packages/sandbox/src/detect.ts`** + +All imports must be at the top of the file — ESM hoists them automatically but oxlint enforces `import/first` (zero lint diagnostics required). + +```typescript +import { execFileSync, execFileSyncOptionsWithStringEncoding } from "node:child_process"; +import { readFileSync, existsSync } from "node:fs"; +import { SandboxManager } from "@anthropic-ai/sandbox-runtime"; +import type { KernelCapabilities } from "./types.js"; + +/** + * Probes system capabilities relevant to sandboxing. + * Returns KernelCapabilities with bwrap probe on Linux; on macOS the + * bwrap fields are always unavailable (macOS uses sandbox-exec instead). + */ +export function detectKernelCapabilities(): KernelCapabilities { + let bwrapPath: string | undefined; + let bwrapVersion: string | undefined; + + try { + bwrapPath = execFileSync("which", ["bwrap"], { encoding: "utf8" }).trim(); + try { + bwrapVersion = execFileSync("bwrap", ["--version"], { encoding: "utf8" }) + .trim() + .split("\n")[0]; + } catch { + // version flag not supported or bwrap not runnable — path is still valid + } + } catch { + // bwrap not on PATH + } + + // Landlock / seccomp / namespace detection is Linux-only; on macOS these + // are undefined/false since sandbox-runtime uses sandbox-exec there. + const isLinux = process.platform === "linux"; + + return { + landlock: { + supported: isLinux ? detectLandlock() : false, + abiVersion: isLinux ? detectLandlockAbi() : 0, + }, + seccomp: { supported: isLinux ? detectSeccomp() : false }, + namespaces: { + user: isLinux ? existsSync("/proc/self/ns/user") : false, + pid: isLinux ? existsSync("/proc/self/ns/pid") : false, + net: isLinux ? existsSync("/proc/self/ns/net") : false, + mnt: isLinux ? existsSync("/proc/self/ns/mnt") : false, + }, + bwrap: { + available: bwrapPath !== undefined, + path: bwrapPath, + version: bwrapVersion, + }, + }; +} + +/** + * Throws a descriptive error if the current platform and dependencies + * do not support sandbox-runtime isolation. + */ +export function assertSandboxSupported(): KernelCapabilities { + if (!SandboxManager.isSupportedPlatform()) { + throw new Error( + `SafeClaw sandbox is not supported on this platform (${process.platform}). ` + + `Supported: Linux (kernel ≥ 5.13, bubblewrap, socat, ripgrep) and macOS.`, + ); + } + + const deps = SandboxManager.checkDependencies(); + if (deps.errors.length > 0) { + throw new Error( + `Sandbox dependencies missing: ${deps.errors.join(", ")}. ` + + `On Linux install: apt install bubblewrap socat ripgrep`, + ); + } + + return detectKernelCapabilities(); +} + +// ── Linux helpers ────────────────────────────────────────────────────── + +const LANDLOCK_MIN_KERNEL: [number, number] = [5, 13]; + +function parseKernelVersion(release: string): [number, number] { + const parts = release.trim().split("."); + return [parseInt(parts[0] ?? "0", 10), parseInt(parts[1] ?? "0", 10)]; +} + +function detectLandlock(): boolean { + try { + const release = readFileSync("/proc/sys/kernel/osrelease", "utf8"); + const [major, minor] = parseKernelVersion(release); + return ( + major > LANDLOCK_MIN_KERNEL[0] || + (major === LANDLOCK_MIN_KERNEL[0] && minor >= LANDLOCK_MIN_KERNEL[1]) + ); + } catch { + return false; + } +} + +function detectLandlockAbi(): number { + try { + const release = readFileSync("/proc/sys/kernel/osrelease", "utf8"); + const [major, minor] = parseKernelVersion(release); + if (major > 6 || (major === 6 && minor >= 2)) return 3; + if (major > 5 || (major === 5 && minor >= 19)) return 2; + if (major > 5 || (major === 5 && minor >= 13)) return 1; + return 0; + } catch { + return 0; + } +} + +function detectSeccomp(): boolean { + try { + const status = readFileSync("/proc/self/status", "utf8"); + return /Seccomp:\s*[12]/.test(status); + } catch { + return false; + } +} +``` + +- [ ] **Step 4: Run tests** + +```bash +pnpm test --filter @safeclaw/sandbox -- detect.test +``` + +Expected: PASS. + +- [ ] **Step 5: Run all sandbox tests** + +```bash +pnpm test --filter @safeclaw/sandbox +``` + +Expected: PASS (previous tests still work). + +- [ ] **Step 6: Commit** + +```bash +git add packages/sandbox/src/detect.ts packages/sandbox/src/detect.test.ts +git commit -m "feat(sandbox): replace unshare detection with sandbox-runtime dependency checks" +``` + +--- + +### Task 5: Rewrite Sandbox.execute() to use SandboxManager + +**Files:** +- Modify: `packages/sandbox/src/sandbox.ts` +- Modify: `packages/sandbox/src/sandbox.test.ts` + +Background: `Sandbox.execute()` currently spawns `unshare [flags] -- helper -- command`. We replace this with: `SandboxManager.wrapWithSandbox(shellCmd, undefined, rtConfig)` which returns a shell command string, then spawn via `/bin/sh -c`. The C helper integration (Landlock/cap-drop) is deferred to Task 8. + +`SandboxManager.initialize()` **must be called** in `bootstrapAgent()` (Task 6) before constructing `Sandbox`. The `Sandbox` class verifies this at construction time. + +Shell quoting: a POSIX single-quote escape avoids adding a new dependency. + +- [ ] **Step 1: Write failing tests in `sandbox.test.ts`** + +Replace the existing test file: + +```typescript +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { DEFAULT_POLICY } from "./types.js"; +import type { KernelCapabilities } from "./types.js"; + +// Mock sandbox-runtime and helper before dynamic import +const mockAssertSandboxSupported = vi.fn<() => KernelCapabilities>(); +const mockFindHelper = vi.fn<() => string | undefined>(); +const mockWrapWithSandbox = vi.fn<(cmd: string) => Promise>(); +const mockIsSupportedPlatform = vi.fn<() => boolean>(); +const mockIsSandboxingEnabled = vi.fn<() => boolean>(); +const mockCleanupAfterCommand = vi.fn<() => void>(); + +vi.mock("./detect.js", () => ({ + assertSandboxSupported: mockAssertSandboxSupported, +})); + +vi.mock("./helper.js", () => ({ + findHelper: () => mockFindHelper(), +})); + +vi.mock("@anthropic-ai/sandbox-runtime", () => ({ + SandboxManager: { + isSupportedPlatform: mockIsSupportedPlatform, + isSandboxingEnabled: mockIsSandboxingEnabled, + wrapWithSandbox: mockWrapWithSandbox, + cleanupAfterCommand: mockCleanupAfterCommand, + }, +})); + +const { Sandbox } = await import("./sandbox.js"); + +const FULL_CAPS: KernelCapabilities = { + landlock: { supported: true, abiVersion: 3 }, + seccomp: { supported: true }, + namespaces: { user: true, pid: true, net: true, mnt: true }, + bwrap: { available: true, path: "/usr/bin/bwrap", version: "0.9.0" }, +}; + +describe("Sandbox", () => { + beforeEach(() => { + vi.clearAllMocks(); + mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); + mockIsSandboxingEnabled.mockReturnValue(true); + mockFindHelper.mockReturnValue(undefined); + }); + + it("constructor calls assertSandboxSupported", () => { + new Sandbox(DEFAULT_POLICY); + expect(mockAssertSandboxSupported).toHaveBeenCalledOnce(); + }); + + it("constructor throws if not initialized (isSandboxingEnabled returns false)", () => { + mockIsSandboxingEnabled.mockReturnValue(false); + expect(() => new Sandbox(DEFAULT_POLICY)).toThrow(/initialize/i); + }); + + it("getPolicy returns a copy of the policy", () => { + const sandbox = new Sandbox(DEFAULT_POLICY); + const policy = sandbox.getPolicy(); + expect(policy).toEqual(DEFAULT_POLICY); + expect(policy).not.toBe(DEFAULT_POLICY); + }); +}); + +describe("Sandbox.execute()", () => { + beforeEach(() => { + vi.clearAllMocks(); + mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); + mockIsSandboxingEnabled.mockReturnValue(true); + mockFindHelper.mockReturnValue(undefined); + }); + + it("calls wrapWithSandbox with shell-quoted command", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/echo hello"); + const sandbox = new Sandbox(DEFAULT_POLICY); + await sandbox.execute("/bin/echo", ["hello"]); + expect(mockWrapWithSandbox).toHaveBeenCalledOnce(); + const wrappedArg: string = mockWrapWithSandbox.mock.calls[0]![0]!; + expect(wrappedArg).toContain("echo"); + expect(wrappedArg).toContain("hello"); + }); + + it("calls cleanupAfterCommand after execution", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/true"); + const sandbox = new Sandbox(DEFAULT_POLICY); + await sandbox.execute("/bin/true", []); + expect(mockCleanupAfterCommand).toHaveBeenCalledOnce(); + }); + + it("returns stdout and exitCode from the spawned command", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/echo hello"); + const sandbox = new Sandbox(DEFAULT_POLICY); + const result = await sandbox.execute("/bin/echo", ["hello"]); + expect(result.exitCode).toBe(0); + expect(result.stdout).toContain("hello"); + }); + + it("kills process after timeout and returns killReason=timeout", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/sleep 60"); + const policy = { ...DEFAULT_POLICY, timeoutMs: 100 }; + const sandbox = new Sandbox(policy); + const result = await sandbox.execute("/bin/sleep", ["60"]); + expect(result.killed).toBe(true); + expect(result.killReason).toBe("timeout"); + }); + + it("reports pivotRoot=true and bindMounts=true on Linux", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/true"); + const sandbox = new Sandbox(DEFAULT_POLICY); + const result = await sandbox.execute("/bin/true", []); + // These are set based on platform; in CI (Linux) both should be true + expect(typeof result.enforcement?.pivotRoot).toBe("boolean"); + expect(typeof result.enforcement?.bindMounts).toBe("boolean"); + }); + + it("calls cleanupAfterCommand even when command fails", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/false"); + const sandbox = new Sandbox(DEFAULT_POLICY); + await sandbox.execute("/bin/false", []); + expect(mockCleanupAfterCommand).toHaveBeenCalledOnce(); + }); +}); +``` + +- [ ] **Step 2: Run to verify it fails** + +```bash +pnpm test --filter @safeclaw/sandbox -- sandbox.test +``` + +Expected: FAIL. + +- [ ] **Step 3: Rewrite `packages/sandbox/src/sandbox.ts`** + +Note: remove the now-dead `buildUnshareFlags()` private method from the old code — it will trigger `no-unused-vars` under oxlint. + +```typescript +import { spawn } from "node:child_process"; +import { SandboxManager } from "@anthropic-ai/sandbox-runtime"; +import type { SandboxPolicy, SandboxResult, EnforcementLayers } from "./types.js"; +import { assertSandboxSupported } from "./detect.js"; +import { PolicyBuilder } from "./policy-builder.js"; + +/** POSIX single-quote shell escaping. Safe for all byte values. */ +function shEscape(arg: string): string { + return "'" + arg.replace(/'/g, "'\\''") + "'"; +} + +export class Sandbox { + private readonly policy: SandboxPolicy; + + constructor(policy: SandboxPolicy) { + assertSandboxSupported(); + if (!SandboxManager.isSandboxingEnabled()) { + throw new Error( + "SandboxManager is not initialized. Call SandboxManager.initialize() " + + "before constructing a Sandbox (see bootstrapAgent()).", + ); + } + this.policy = policy; + } + + async execute(command: string, args: string[]): Promise { + const start = performance.now(); + const timeout = this.policy.timeoutMs ?? 30_000; + + // Build the inner shell command. In Phase 1 the helper is not injected; + // Task 8 adds `--policy-file` injection for Landlock + cap-drop. + const shellCmd = [command, ...args].map(shEscape).join(" "); + + // Translate SafeClaw policy to sandbox-runtime config + const rtConfig = PolicyBuilder.toRuntimeConfig(this.policy); + + // Wrap via sandbox-runtime (bwrap on Linux, sandbox-exec on macOS) + const wrappedCmd = await SandboxManager.wrapWithSandbox( + shellCmd, + undefined, + rtConfig, + ); + + const isLinux = process.platform === "linux"; + const enforcement: EnforcementLayers = { + namespaces: isLinux, + pivotRoot: isLinux, + bindMounts: true, + landlock: false, // Phase 2: re-enabled when helper is injected + seccomp: isLinux, // sandbox-runtime applies seccomp for unix socket blocking on Linux + capDrop: false, // Phase 2: re-enabled when helper is injected + }; + + return new Promise((resolve) => { + const stdoutChunks: Buffer[] = []; + const stderrChunks: Buffer[] = []; + let killed = false; + let killReason: "timeout" | "oom" | "signal" | undefined; + + const proc = spawn("/bin/sh", ["-c", wrappedCmd], { + stdio: ["ignore", "pipe", "pipe"], + detached: true, + }); + + const timer = setTimeout(() => { + killed = true; + killReason = "timeout"; + if (proc.pid !== undefined) { + try { + process.kill(-proc.pid, "SIGKILL"); + } catch { + proc.kill("SIGKILL"); + } + } else { + proc.kill("SIGKILL"); + } + }, timeout); + + proc.stdout!.on("data", (chunk: Buffer) => stdoutChunks.push(chunk)); + proc.stderr!.on("data", (chunk: Buffer) => stderrChunks.push(chunk)); + + proc.on("close", (code: number | null) => { + clearTimeout(timer); + // Clean up bwrap leftover mount points (no-op on macOS) + SandboxManager.cleanupAfterCommand(); + resolve({ + exitCode: code ?? 1, + stdout: Buffer.concat(stdoutChunks).toString(), + stderr: Buffer.concat(stderrChunks).toString(), + durationMs: performance.now() - start, + killed, + killReason, + enforcement, + }); + }); + + proc.on("error", (err: Error) => { + clearTimeout(timer); + SandboxManager.cleanupAfterCommand(); + resolve({ + exitCode: 1, + stdout: "", + stderr: err.message, + durationMs: performance.now() - start, + killed: false, + enforcement, + }); + }); + }); + } + + getPolicy(): SandboxPolicy { + return structuredClone(this.policy); + } +} +``` + +Note: the `findHelper` import is unused in Phase 1 — remove it to avoid lint errors. It will be re-added in Task 8. + +- [ ] **Step 4: Run tests** + +```bash +pnpm test --filter @safeclaw/sandbox -- sandbox.test +``` + +Expected: PASS. + +- [ ] **Step 5: Run all sandbox tests** + +```bash +pnpm test --filter @safeclaw/sandbox +``` + +Expected: PASS. + +- [ ] **Step 6: Typecheck** + +```bash +pnpm typecheck +``` + +Expected: PASS. + +- [ ] **Step 7: Commit** + +```bash +git add packages/sandbox/src/sandbox.ts packages/sandbox/src/sandbox.test.ts +git commit -m "feat(sandbox): rewrite Sandbox.execute() to use SandboxManager.wrapWithSandbox()" +``` + +--- + +### Task 6: Initialize SandboxManager in bootstrapAgent + +**Files:** +- Modify: `packages/cli/src/commands/bootstrap.ts` +- Modify: `packages/cli/src/commands/bootstrap.test.ts` + +Background: `SandboxManager.initialize(config)` is async and starts the network proxy infrastructure (HTTP proxy, SOCKS5 proxy, socat Unix socket bridges on Linux). It must be called once before any `Sandbox` is constructed or `wrapWithSandbox()` is called. It also registers a process `exit`/`SIGINT`/`SIGTERM` handler for cleanup automatically. + +The base config passed to `initialize()` sets up the proxy servers. Per-call `customConfig` in `wrapWithSandbox()` (passed from `PolicyBuilder.toRuntimeConfig()`) overrides filesystem and network restrictions per execution. + +- [ ] **Step 1: Write failing test for SandboxManager initialization** + +In `bootstrap.test.ts`, add a test that verifies `SandboxManager.initialize` was called: + +```typescript +// At the top of the mock setup in bootstrap.test.ts, add: +const mockSandboxManagerInitialize = vi.fn<() => Promise>().mockResolvedValue(undefined); +const mockSandboxManagerIsSupportedPlatform = vi.fn<() => boolean>().mockReturnValue(true); +const mockSandboxManagerIsSandboxingEnabled = vi.fn<() => boolean>().mockReturnValue(true); + +vi.mock("@anthropic-ai/sandbox-runtime", () => ({ + SandboxManager: { + initialize: mockSandboxManagerInitialize, + isSupportedPlatform: mockSandboxManagerIsSupportedPlatform, + isSandboxingEnabled: mockSandboxManagerIsSandboxingEnabled, + wrapWithSandbox: vi.fn().mockResolvedValue("/bin/true"), + cleanupAfterCommand: vi.fn(), + reset: vi.fn().mockResolvedValue(undefined), + }, +})); + +// Add test: +it("calls SandboxManager.initialize before constructing Sandbox", async () => { + await bootstrapAgent(validDeps); + expect(mockSandboxManagerInitialize).toHaveBeenCalledOnce(); + // initialize must be called before Sandbox is constructed + // (verified by order — assertSandboxSupported mock checks isSandboxingEnabled) +}); +``` + +- [ ] **Step 2: Run to verify it fails** + +```bash +pnpm test --filter @safeclaw/cli -- bootstrap.test +``` + +Expected: FAIL — `SandboxManager.initialize` not called yet. + +- [ ] **Step 3: Add `SandboxManager.initialize()` call to `bootstrapAgent()`** + +In `bootstrap.ts`, add import: +```typescript +import { SandboxManager } from "@anthropic-ai/sandbox-runtime"; +``` + +In `bootstrapAgent()`, before the `let sandbox: Sandbox | undefined` block (around line 185), add: + +```typescript +// Initialize sandbox-runtime network proxy infrastructure. +// Uses a base "block all network" config; per-execution configs are passed +// as customConfig in Sandbox.execute() → SandboxManager.wrapWithSandbox(). +try { + await SandboxManager.initialize({ + filesystem: { allowWrite: [], denyWrite: [], denyRead: [] }, + network: { allowedDomains: [], deniedDomains: [] }, + }); +} catch (err: unknown) { + const detail = err instanceof Error ? err.message : String(err); + output.write( + `Warning: sandbox network proxy failed to initialize (${detail}). ` + + `Filesystem isolation will still be applied.\n`, + ); +} +``` + +- [ ] **Step 4: Run tests** + +```bash +pnpm test --filter @safeclaw/cli -- bootstrap.test +``` + +Expected: PASS. + +- [ ] **Step 5: Run all CLI tests** + +```bash +pnpm test --filter @safeclaw/cli +``` + +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +git add packages/cli/src/commands/bootstrap.ts packages/cli/src/commands/bootstrap.test.ts +git commit -m "feat(cli): initialize SandboxManager network proxy before constructing Sandbox" +``` + +--- + +### Task 7: Update doctor checks + +**Files:** +- Modify: `packages/cli/src/commands/doctor-checks.ts` +- Modify: `packages/cli/src/commands/doctor-checks.test.ts` +- Modify: `packages/cli/src/commands/doctor.ts` + +Background: `unshareCheck` is replaced by `bwrapCheck`. Two new checks are added: `socatCheck` (Linux only; socat bridges proxy sockets into the bwrap network namespace) and `ripgrepCheck` (sandbox-runtime uses ripgrep to scan for dangerous files before each command). The `sandboxHelperCheck` remains but is downgraded to `warn` since the helper is now optional. + +- [ ] **Step 1: Write failing tests for new checks** + +Add to `doctor-checks.test.ts`: + +```typescript +describe("bwrapCheck", () => { + it("passes when bwrap is available", async () => { + const check = bwrapCheck({ execFileSync: () => "/usr/bin/bwrap" }); + const result = await check.run(); + expect(result.status).toBe("pass"); + expect(result.message).toMatch(/bwrap/); + }); + + it("fails when bwrap is not found", async () => { + const check = bwrapCheck({ execFileSync: () => { throw new Error("not found"); } }); + const result = await check.run(); + expect(result.status).toBe("fail"); + expect(result.detail).toMatch(/apt install bubblewrap/); + }); +}); + +describe("socatCheck", () => { + it("passes when socat is available on linux", async () => { + const check = socatCheck({ + execFileSync: () => "/usr/bin/socat", + platform: "linux", + }); + const result = await check.run(); + expect(result.status).toBe("pass"); + }); + + it("skips on macOS (socat not required)", async () => { + const check = socatCheck({ + execFileSync: () => { throw new Error(); }, + platform: "darwin", + }); + const result = await check.run(); + expect(result.status).toBe("pass"); + expect(result.message).toMatch(/not required/); + }); + + it("fails when socat is missing on linux", async () => { + const check = socatCheck({ + execFileSync: () => { throw new Error("not found"); }, + platform: "linux", + }); + const result = await check.run(); + expect(result.status).toBe("fail"); + expect(result.detail).toMatch(/apt install socat/); + }); +}); + +describe("ripgrepCheck", () => { + it("passes when rg is available", async () => { + const check = ripgrepCheck({ execFileSync: () => "/usr/bin/rg" }); + const result = await check.run(); + expect(result.status).toBe("pass"); + }); + + it("fails when rg is not found", async () => { + const check = ripgrepCheck({ execFileSync: () => { throw new Error(); } }); + const result = await check.run(); + expect(result.status).toBe("fail"); + expect(result.detail).toMatch(/apt install ripgrep/); + }); +}); +``` + +- [ ] **Step 2: Run to verify it fails** + +```bash +pnpm test --filter @safeclaw/cli -- doctor-checks.test +``` + +Expected: FAIL — `bwrapCheck`, `socatCheck`, `ripgrepCheck` not defined. + +- [ ] **Step 3: Add checks to `doctor-checks.ts`** + +Replace `unshareCheck` with `bwrapCheck`, and add `socatCheck` and `ripgrepCheck`: + +```typescript +// Remove unshareCheck entirely, add: + +export interface BwrapDeps { + execFileSync: (cmd: string, args: string[]) => string; +} + +export function bwrapCheck( + deps: BwrapDeps = { + execFileSync: (cmd, args) => defaultExecFileSync(cmd, args, { encoding: "utf8" }), + }, +): DiagnosticCheck { + return { + name: "bwrap", + category: "security", + async run(): Promise { + try { + const path = deps.execFileSync("which", ["bwrap"]).trim(); + return { status: "pass", message: `bubblewrap: ${path}` }; + } catch { + return { + status: "fail", + message: "bubblewrap not found", + detail: + "bubblewrap is required for filesystem isolation on Linux. " + + "Install: apt install bubblewrap", + }; + } + }, + }; +} + +export interface SocatDeps { + execFileSync: (cmd: string, args: string[]) => string; + platform: string; +} + +export function socatCheck( + deps: SocatDeps = { + execFileSync: (cmd, args) => defaultExecFileSync(cmd, args, { encoding: "utf8" }), + platform: process.platform, + }, +): DiagnosticCheck { + return { + name: "socat", + category: "security", + async run(): Promise { + if (deps.platform !== "linux") { + return { status: "pass", message: "socat not required on this platform" }; + } + try { + const path = deps.execFileSync("which", ["socat"]).trim(); + return { status: "pass", message: `socat: ${path}` }; + } catch { + return { + status: "fail", + message: "socat not found", + detail: + "socat is required for network proxy bridging on Linux. " + + "Install: apt install socat", + }; + } + }, + }; +} + +export interface RipgrepDeps { + execFileSync: (cmd: string, args: string[]) => string; +} + +export function ripgrepCheck( + deps: RipgrepDeps = { + execFileSync: (cmd, args) => defaultExecFileSync(cmd, args, { encoding: "utf8" }), + }, +): DiagnosticCheck { + return { + name: "ripgrep", + category: "security", + async run(): Promise { + try { + const path = deps.execFileSync("which", ["rg"]).trim(); + return { status: "pass", message: `ripgrep: ${path}` }; + } catch { + return { + status: "fail", + message: "ripgrep (rg) not found", + detail: + "ripgrep is required by sandbox-runtime to scan for dangerous files " + + "before each sandboxed command. Install: apt install ripgrep", + }; + } + }, + }; +} +``` + +Also update `sandboxHelperCheck` message — change `status: "warn"` detail to note the helper is optional (provides Landlock + cap-drop) rather than required: + +```typescript +return { + status: "warn", + message: "Sandbox helper not found", + detail: + "The native sandbox helper binary is not installed. " + + "Filesystem and network isolation via bubblewrap will still apply, " + + "but Landlock and capability-dropping will be inactive. " + + "Run 'make -C native' to build it.", +}; +``` + +- [ ] **Step 4: Update `doctor.ts` check list** + +Find where `unshareCheck` is registered and replace it. Also update `linuxCheck` — sandbox-runtime now supports macOS so the hard Linux requirement is gone. Replace `linuxCheck` with a `platformCheck` that passes on both `linux` and `darwin`: + +```typescript +// In doctor-checks.ts, replace linuxCheck with: +export function platformCheck( + deps: PlatformDeps = { platform: process.platform }, +): DiagnosticCheck { + return { + name: "platform", + category: "system", + async run(): Promise { + if (deps.platform === "linux" || deps.platform === "darwin") { + return { status: "pass", message: `Platform: ${deps.platform} (supported)` }; + } + return { + status: "fail", + message: `Platform ${deps.platform} is not supported`, + detail: "SafeClaw requires Linux or macOS.", + }; + }, + }; +} +``` + +In `doctor.ts`, swap: +```typescript +// Remove: linuxCheck() +// Remove: unshareCheck() +// Add: +platformCheck(), +bwrapCheck(), +socatCheck(), +ripgrepCheck(), +``` + +Export `platformCheck`, `bwrapCheck`, `socatCheck`, `ripgrepCheck` from `doctor-checks.ts`. + +Add tests for `platformCheck`: +```typescript +describe("platformCheck", () => { + it("passes on linux", async () => { + const r = await platformCheck({ platform: "linux" }).run(); + expect(r.status).toBe("pass"); + }); + it("passes on darwin", async () => { + const r = await platformCheck({ platform: "darwin" }).run(); + expect(r.status).toBe("pass"); + }); + it("fails on win32", async () => { + const r = await platformCheck({ platform: "win32" }).run(); + expect(r.status).toBe("fail"); + }); +}); +``` + +- [ ] **Step 5: Run tests** + +```bash +pnpm test --filter @safeclaw/cli -- doctor-checks.test +pnpm test --filter @safeclaw/cli -- doctor.test +``` + +Expected: PASS on both. + +- [ ] **Step 6: Full build + test** + +```bash +pnpm build && pnpm test +``` + +Expected: all tests pass, zero lint errors. + +```bash +pnpm lint +``` + +Expected: zero diagnostics. + +- [ ] **Step 7: Commit** + +```bash +git add packages/cli/src/commands/doctor-checks.ts \ + packages/cli/src/commands/doctor-checks.test.ts \ + packages/cli/src/commands/doctor.ts +git commit -m "feat(cli): replace unshareCheck with bwrap/socat/ripgrep checks for sandbox-runtime" +``` + +--- + +## Phase 2 — C helper as inner Landlock + cap-drop layer + +### Task 8: Inject C helper inside bwrap via --policy-file + +**Files:** +- Modify: `packages/sandbox/src/sandbox.ts` +- Modify: `packages/sandbox/src/sandbox.test.ts` + +Background: The C helper at `native/safeclaw-sandbox-helper` already supports `--policy-file `, which reads a policy JSON file instead of fd 3. The file must have mode `0600` and be owned by the current user (enforced by `policy_read_file()` in `policy.c`). + +When the helper is present, the spawn chain becomes: +``` +/bin/sh -c "bwrap [sandbox-runtime args] /bin/sh -c ' --policy-file -- [args]'" +``` + +The helper binary and temp file directory (`/tmp`) must be accessible inside the bwrap container. `/tmp` is always bind-mounted by sandbox-runtime. The helper path (e.g. `/usr/local/bin/safeclaw-sandbox-helper` or `~/.safeclaw/bin/safeclaw-sandbox-helper`) must either be in a system path included by bwrap, or explicitly added to `allowWrite`. + +- [ ] **Step 1: Write failing tests** + +Add to `sandbox.test.ts`: + +```typescript +describe("Sandbox.execute() with helper", () => { + beforeEach(() => { + vi.clearAllMocks(); + mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); + mockIsSandboxingEnabled.mockReturnValue(true); + }); + + it("includes --policy-file in the inner command when helper is found", async () => { + mockFindHelper.mockReturnValue("/usr/local/bin/safeclaw-sandbox-helper"); + mockWrapWithSandbox.mockImplementation(async (cmd: string) => cmd); + + const sandbox = new Sandbox(DEFAULT_POLICY); + await sandbox.execute("/bin/echo", ["hello"]); + + const innerCmd: string = mockWrapWithSandbox.mock.calls[0]![0]!; + expect(innerCmd).toContain("safeclaw-sandbox-helper"); + expect(innerCmd).toContain("--policy-file"); + expect(innerCmd).toContain("--"); + expect(innerCmd).toContain("echo"); + }); + + it("sets enforcement.landlock=true and enforcement.capDrop=true when helper is found", async () => { + mockFindHelper.mockReturnValue("/usr/local/bin/safeclaw-sandbox-helper"); + mockWrapWithSandbox.mockResolvedValue("/bin/true"); + + const sandbox = new Sandbox(DEFAULT_POLICY); + const result = await sandbox.execute("/bin/true", []); + + expect(result.enforcement?.landlock).toBe(true); + expect(result.enforcement?.capDrop).toBe(true); + }); + + it("does NOT set landlock/capDrop when helper is not found", async () => { + mockFindHelper.mockReturnValue(undefined); + mockWrapWithSandbox.mockResolvedValue("/bin/true"); + + const sandbox = new Sandbox(DEFAULT_POLICY); + const result = await sandbox.execute("/bin/true", []); + + expect(result.enforcement?.landlock).toBe(false); + expect(result.enforcement?.capDrop).toBe(false); + }); + + it("cleans up policy temp file even if command fails", async () => { + // Mock node:fs so we can verify writeFileSync and rmSync are both called + const mockWriteFileSync = vi.fn(); + const mockRmSync = vi.fn(); + vi.mock("node:fs", () => ({ + writeFileSync: mockWriteFileSync, + rmSync: mockRmSync, + })); + + mockFindHelper.mockReturnValue("/usr/local/bin/safeclaw-sandbox-helper"); + mockWrapWithSandbox.mockResolvedValue("/bin/false"); + + const sandbox = new Sandbox(DEFAULT_POLICY); + await sandbox.execute("/bin/false", []); + + expect(mockWriteFileSync).toHaveBeenCalledOnce(); + // rmSync must be called with force:true to clean up the temp file + expect(mockRmSync).toHaveBeenCalledWith( + expect.stringContaining("safeclaw-policy-"), + { force: true }, + ); + }); +}); +``` + +- [ ] **Step 2: Run to verify it fails** + +```bash +pnpm test --filter @safeclaw/sandbox -- sandbox.test +``` + +Expected: FAIL — helper injection not yet implemented. + +- [ ] **Step 3: Add policy-file injection to `Sandbox.execute()`** + +Update `packages/sandbox/src/sandbox.ts`. Add imports: + +```typescript +import { writeFileSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { findHelper } from "./helper.js"; +``` + +In `execute()`, replace the `shellCmd` / `wrappedCmd` section with: + +```typescript +const helperPath = findHelper(); +const useHelper = helperPath !== undefined; + +let policyTmpPath: string | undefined; +let innerCmd: string; + +if (useHelper) { + // Write policy JSON to a temp file (mode 0600, as required by policy_read_file). + // The helper enforces the permissions check itself — this is defense-in-depth. + policyTmpPath = join( + tmpdir(), + `safeclaw-policy-${process.pid}-${Date.now()}.json`, + ); + writeFileSync( + policyTmpPath, + JSON.stringify({ + filesystem: this.policy.filesystem, + syscalls: this.policy.syscalls, + }), + { mode: 0o600 }, + ); + innerCmd = [ + helperPath, + "--policy-file", policyTmpPath, + "--", + command, + ...args, + ] + .map(shEscape) + .join(" "); +} else { + innerCmd = [command, ...args].map(shEscape).join(" "); +} + +// Translate to sandbox-runtime config. When helper is present, add its +// directory to allowWrite so bwrap bind-mounts it into the container. +const rtConfig = PolicyBuilder.toRuntimeConfig(this.policy); +if (useHelper && helperPath !== undefined) { + const helperDir = helperPath.substring(0, helperPath.lastIndexOf("/")); + // Only add if not already a system path (system paths are included by bwrap automatically) + const systemPaths = ["/bin", "/usr/bin", "/usr/local/bin", "/sbin", "/usr/sbin"]; + if (!systemPaths.includes(helperDir)) { + rtConfig.filesystem.allowWrite = [ + ...rtConfig.filesystem.allowWrite, + helperDir, + ]; + } +} + +const wrappedCmd = await SandboxManager.wrapWithSandbox(innerCmd, undefined, rtConfig); +``` + +Update `enforcement` to reflect helper presence: + +```typescript +const enforcement: EnforcementLayers = { + namespaces: isLinux, + pivotRoot: isLinux, + bindMounts: true, + landlock: useHelper, + seccomp: isLinux, + capDrop: useHelper, +}; +``` + +Add cleanup of `policyTmpPath` in both `close` and `error` handlers: + +```typescript +proc.on("close", (code: number | null) => { + clearTimeout(timer); + if (policyTmpPath !== undefined) { + try { rmSync(policyTmpPath, { force: true }); } catch { /* ignore */ } + } + SandboxManager.cleanupAfterCommand(); + resolve({ ... }); +}); + +proc.on("error", (err: Error) => { + clearTimeout(timer); + if (policyTmpPath !== undefined) { + try { rmSync(policyTmpPath, { force: true }); } catch { /* ignore */ } + } + SandboxManager.cleanupAfterCommand(); + resolve({ ... }); +}); +``` + +- [ ] **Step 4: Run tests** + +```bash +pnpm test --filter @safeclaw/sandbox -- sandbox.test +``` + +Expected: PASS. + +- [ ] **Step 5: Full build + test + lint** + +```bash +pnpm build && pnpm test && pnpm lint +``` + +Expected: all pass, zero lint diagnostics. + +- [ ] **Step 6: Integration smoke test (requires bwrap + helper on PATH)** + +```bash +# Build helper first if needed +make -C native + +# Run a simple sandboxed command +node -e " +import('@safeclaw/sandbox').then(async ({ Sandbox, PolicyBuilder, SandboxManager }) => { + await SandboxManager.initialize({ filesystem: { allowWrite: [], denyWrite: [], denyRead: [] }, network: { allowedDomains: [], deniedDomains: [] }}); + const sandbox = new Sandbox(PolicyBuilder.forDevelopment(process.cwd())); + const result = await sandbox.execute('/bin/echo', ['hello from sandbox']); + console.log(result); + await SandboxManager.reset(); +}); +" +``` + +Expected: `{ exitCode: 0, stdout: 'hello from sandbox\n', enforcement: { landlock: true, capDrop: true, ... } }`. + +- [ ] **Step 7: Commit** + +```bash +git add packages/sandbox/src/sandbox.ts packages/sandbox/src/sandbox.test.ts +git commit -m "feat(sandbox): inject C helper via --policy-file for Landlock + cap-drop inside bwrap" +``` + +--- + +## Risks and Notes + +| Risk | Mitigation | +|------|-----------| +| `SandboxManager.initialize()` called before `Sandbox` constructor | Enforced: `Sandbox` constructor checks `isSandboxingEnabled()` and throws with clear message | +| Policy temp file leaked if process crashes between `writeFile` and `rmSync` | Temp file lives in `/tmp` — OS cleans it on reboot. Filename includes PID so post-mortem identification is possible | +| Helper binary not accessible inside bwrap when in non-system path | Task 8 adds `helperDir` to `allowWrite`; the `systemPaths` exclusion list may need extension for unusual installs | +| sandbox-runtime v0.0.42 is pre-stable | Pinned to exact git SHA `20f5176`. Check for updates before shipping. | +| `wrapWithSandbox()` may fail if proxy not initialized | The `catch` block in `bootstrapAgent()` catches init failures; filesystem isolation still applies | +| macOS: `sandbox-exec` behavior differs from bwrap | Tested by sandbox-runtime; our `PolicyBuilder.toRuntimeConfig()` translation is platform-agnostic | +| Seccomp conflict: sandbox-runtime installs unix-socket BPF, helper installs its own syscall filter | Both apply inside the process. sandbox-runtime's filter applies first (outer bwrap layer); helper's filter applies inside. Filters stack (both must allow a syscall for it to proceed). Verify the helper's syscall allowlist includes all syscalls needed by sandbox-runtime's filter management | +| `process.kill(-proc.pid, 'SIGKILL')` on timeout may not reach bwrap's children | The outer `/bin/sh` is the process group leader (`detached: true`). bwrap inherits the group, and sandboxed children inherit from bwrap. Verify with `execute('/bin/sleep', ['60'])` + short timeout — if `result.killed === true` the group kill worked. Add an integration test for this. | +| Helper binary not bind-mounted into bwrap when in non-system path | Task 8 adds `helperDir` to `rtConfig.filesystem.allowWrite`. sandbox-runtime maps `allowWrite` paths to `--bind` (read+write), which also allows execute on Linux. If the helper is in a non-standard location (e.g. `~/.safeclaw/bin/`), verify it is executable inside the bwrap container by running the integration smoke test with `SAFECLAW_HELPER_PATH=~/.safeclaw/bin/safeclaw-sandbox-helper`. | diff --git a/docs/sandboxing.md b/docs/sandboxing.md index 534cf4f..2fce3aa 100644 --- a/docs/sandboxing.md +++ b/docs/sandboxing.md @@ -22,33 +22,43 @@ SafeClaw treats every tool execution as untrusted. The sandbox limits the blast ## Architecture +SafeClaw uses a two-layer sandbox. The outer layer (provided by `@anthropic-ai/sandbox-runtime`) creates a container using bubblewrap on Linux or sandbox-exec on macOS. The inner layer (the C helper binary) applies Landlock, seccomp-BPF, and capability dropping inside the container. + ``` ┌──────────────────────────────────────────────────────────────┐ │ Node.js: Sandbox.execute(command, args) │ │ │ │ 1. Resolve helper binary (discovery) │ -│ 2. Serialize policy JSON (filesystem + syscalls) │ -│ 3. Spawn: unshare [ns-flags] -- helper -- command [args] │ -│ 4. Write policy JSON to fd 3 │ -│ 5. Collect stdout/stderr, enforce timeout │ +│ 2. Write policy JSON to temp file (mode 0o600) │ +│ 3. Build inner command: helper --policy-file -- cmd │ +│ 4. Translate SandboxPolicy → SandboxRuntimeConfig │ +│ (PolicyBuilder.toRuntimeConfig) │ +│ 5. SandboxManager.wrapWithSandbox(innerCmd, rtConfig) │ +│ 6. Spawn wrapped command via /bin/sh -c │ +│ 7. Collect stdout/stderr, enforce timeout │ +│ 8. Cleanup: delete temp policy file │ └──────────────────────┬───────────────────────────────────────┘ │ fork+exec ▼ ┌──────────────────────────────────────────────────────────────┐ -│ unshare(1) │ -│ Creates Linux namespaces: │ -│ - PID namespace (--pid --fork) │ -│ - Network namespace (--net) │ -│ - Mount namespace (--mount) │ -│ - User namespace (--user --map-root-user) │ +│ @anthropic-ai/sandbox-runtime (outer layer) │ +│ │ +│ Linux (bubblewrap): │ +│ - pivot_root: new filesystem root with bind-mounted paths │ +│ - PID, network, mount, user namespaces │ +│ - Network proxy (socat) for controlled domain access │ +│ │ +│ macOS (sandbox-exec): │ +│ - sandbox-exec profile restricts filesystem + network │ └──────────────────────┬───────────────────────────────────────┘ │ exec ▼ ┌──────────────────────────────────────────────────────────────┐ │ safeclaw-sandbox-helper (static C binary, ~800 KB) │ +│ (Linux only; skipped on macOS) │ │ │ │ 1. Self-checks (refuse setuid, PR_SET_NO_NEW_PRIVS) │ -│ 2. Read policy JSON from fd 3 │ +│ 2. Read policy JSON from --policy-file path │ │ 3. Apply Landlock filesystem restrictions │ │ 4. Close all fds > 2 (fd hygiene) │ │ 5. Drop all Linux capabilities │ @@ -60,10 +70,10 @@ SafeClaw treats every tool execution as untrusted. The sandbox limits the blast ┌──────────────────────────────────────────────────────────────┐ │ Target command (e.g., /bin/bash -c "npm test") │ │ │ -│ Runs with ALL restrictions active: │ -│ - Filesystem: only declared paths accessible │ +│ Runs with ALL restrictions active (Linux): │ +│ - Filesystem: pivot_root container + Landlock path rules │ │ - Syscalls: only allow-listed syscalls permitted │ -│ - Network: isolated (no connectivity) │ +│ - Network: isolated via namespace + network proxy │ │ - Capabilities: all dropped │ │ - Privileges: cannot escalate (NO_NEW_PRIVS) │ └──────────────────────────────────────────────────────────────┘ @@ -73,18 +83,31 @@ SafeClaw treats every tool execution as untrusted. The sandbox limits the blast ## Enforcement layers -### Layer 1: Linux namespaces (via `unshare`) +### Layer 1: Container isolation (via `@anthropic-ai/sandbox-runtime`) + +The outer layer uses `@anthropic-ai/sandbox-runtime` to create an isolated process container. The specific mechanism varies by platform: + +**Linux (bubblewrap / bwrap):** -Namespaces provide coarse-grained isolation at the kernel level. +| Feature | Effect | +|---------|--------| +| `pivot_root` | New filesystem root; only bind-mounted paths are visible | +| PID namespace | Process sees only its own PID tree; cannot signal host processes | +| Network namespace | Fresh network stack; external connectivity controlled by network proxy | +| Mount namespace | Isolated mount table; filesystem changes don't affect host | +| User namespace | Unprivileged user mapping; enables other namespaces without root | +| Network proxy (socat) | Intercepts outbound connections; enforces `allowedDomains`/`deniedDomains` | -| Namespace | Flag | Effect | -|-----------|------|--------| -| PID | `--pid --fork` | Process sees only its own PID tree; cannot signal host processes | -| Network | `--net` | Fresh network stack with only loopback; no external connectivity | -| Mount | `--mount` | Isolated mount table; filesystem modifications don't affect host | -| User | `--user --map-root-user` | Unprivileged user mapping; enables other namespaces without root | +`PolicyBuilder.toRuntimeConfig()` translates a `SandboxPolicy` into a `SandboxRuntimeConfig`: +- `filesystem.allow` rules with `readwrite` or `readwriteexecute` access map to `allowWrite` (bind-mounted read-write) +- Sensitive credential directories (`~/.ssh`, `~/.aws`, `~/.gnupg`, etc.) that exist as real directories (not symlinks) are added to `denyRead`; symlinks are excluded to avoid bwrap failures on WSL2 +- Network policy maps to `allowedDomains`/`deniedDomains` -Namespace isolation is handled by the standard `unshare(1)` utility. This is the baseline -- it works even without the helper binary. +**macOS (sandbox-exec):** + +The macOS `sandbox-exec` utility applies a Seatbelt profile that restricts filesystem access and network connectivity. Linux-specific layers (Landlock, seccomp-BPF, namespaces) are not available on macOS. + +This is the baseline — it works even without the C helper binary. ### Layer 2: Landlock filesystem restrictions @@ -159,7 +182,7 @@ The enforcement order within the helper is critical for correctness: ## Policy format -The policy JSON written to fd 3 contains only the fields relevant to the helper (namespace and network isolation are handled by `unshare`): +The policy JSON written to the temp file (and passed to the helper via `--policy-file`) contains only the fields relevant to the helper (namespace and network isolation are handled by the outer sandbox-runtime layer): ```json { @@ -178,7 +201,7 @@ The policy JSON written to fd 3 contains only the fields relevant to the helper } ``` -The full `SandboxPolicy` type (in TypeScript) also includes `network`, `namespaces`, and `timeoutMs` -- these are consumed by the Node.js layer, not the helper. +The full `SandboxPolicy` type (in TypeScript) also includes `network`, `namespaces`, and `timeoutMs` -- these are consumed by the Node.js layer and translated to `SandboxRuntimeConfig` for sandbox-runtime, not passed to the helper. --- @@ -186,32 +209,35 @@ The full `SandboxPolicy` type (in TypeScript) also includes `network`, `namespac ### Discovery order -`Sandbox.execute()` searches for the helper binary in this order: +`findHelper()` searches for the helper binary in this order: 1. **`SAFECLAW_HELPER_PATH`** environment variable -- for custom installations and testing 2. **Co-located path** -- `native/safeclaw-sandbox-helper` relative to the package 3. **User install path** -- `~/.safeclaw/bin/safeclaw-sandbox-helper` 4. **System PATH** -- resolved via `which` -If the helper is found and executable, all four enforcement layers (namespaces + Landlock + seccomp + capability drop) are active. If not found, SafeClaw falls back to namespace-only isolation. +If the helper is found and executable, the full enforcement stack (outer sandbox-runtime container + inner Landlock + seccomp + capability drop) is active. If not found, only the outer sandbox-runtime layer is applied. > **TODO:** Re-add SHA-256 integrity verification of the helper binary once builds are reproducible. Currently, the binary hash changes across compiler versions and build environments, making a hardcoded hash impractical without a release process that stamps it. ### Graceful degradation -| Helper status | Enforcement | -|---------------|-------------| -| Found | Namespaces + Landlock + seccomp + capability drop | -| Not found | Namespaces only | +| Platform | Helper status | Enforcement | +|----------|---------------|-------------| +| Linux | Found | bwrap container (pivot_root + namespaces) + Landlock + seccomp + capability drop | +| Linux | Not found | bwrap container only (pivot_root + namespaces) | +| macOS | N/A | sandbox-exec profile only | The `SandboxResult.enforcement` field reports which layers were active: ```typescript interface EnforcementLayers { - namespaces: boolean; // unshare was used - landlock: boolean; // Landlock filesystem restrictions active - seccomp: boolean; // seccomp-BPF syscall filter active - capDrop: boolean; // all capabilities dropped + namespaces: boolean; // Linux namespaces active (bwrap) + pivotRoot: boolean; // pivot_root filesystem isolation (bwrap on Linux) + bindMounts: boolean; // bind-mounted paths (always true when sandbox-runtime runs) + landlock: boolean; // Landlock filesystem restrictions active (C helper) + seccomp: boolean; // seccomp-BPF syscall filter active (Linux + helper) + capDrop: boolean; // all capabilities dropped (C helper) } ``` @@ -273,15 +299,22 @@ interface EnforcementLayers { --- -## Kernel requirements +## Platform requirements -SafeClaw v1 requires Linux with: +### Linux - **Kernel >= 5.13** -- for Landlock LSM support - **seccomp-BPF** -- enabled in kernel config (`CONFIG_SECCOMP_FILTER=y`) - **User namespaces** -- `sysctl kernel.unprivileged_userns_clone=1` (default on most distros) +- **bubblewrap** -- `bwrap` binary in PATH (`apt install bubblewrap`) +- **socat** -- required by sandbox-runtime network proxy (`apt install socat`) + +### macOS + +- macOS 10.14+ (Mojave) for sandbox-exec support +- socat (`brew install socat`) for network proxy -The `safeclaw onboard` command checks these requirements during setup. The `detectKernelCapabilities()` function provides programmatic detection. +The `safeclaw onboard` command checks these requirements during setup. The `detectKernelCapabilities()` function provides programmatic detection. `safeclaw doctor` runs `bwrapCheck`, `socatCheck`, and `platformCheck` to verify the environment. --- diff --git a/docs/security-model.md b/docs/security-model.md index 420b236..49908e2 100644 --- a/docs/security-model.md +++ b/docs/security-model.md @@ -11,7 +11,11 @@ Two principles drive every design decision: ## Sandboxing architecture -SafeClaw uses three Linux kernel mechanisms for mandatory process isolation. Sandboxing cannot be disabled. +SafeClaw uses a two-layer sandbox for mandatory process isolation. Sandboxing cannot be disabled. + +The **outer layer** is provided by `@anthropic-ai/sandbox-runtime`: bubblewrap (`bwrap`) on Linux with `pivot_root` filesystem isolation and Linux namespaces (PID, net, mount, user); `sandbox-exec` on macOS. + +The **inner layer** is the C helper binary (`safeclaw-sandbox-helper`), which applies Landlock, seccomp-BPF, and capability dropping inside the container. It is active on Linux when the helper binary is found. ### Landlock @@ -45,15 +49,15 @@ set_robust_list, rseq, prlimit64, getrandom - **Networking**: socket, connect, bind, listen, accept, accept4, recvfrom, sendto, recvmsg, sendmsg, shutdown, setsockopt, getsockopt, getpeername, getsockname, socketpair - **Misc**: ioctl, prctl, arch_prctl, set_tid_address, set_robust_list, futex, sched_yield, sched_getaffinity, uname, prlimit64, getrandom, rseq, memfd_create, copy_file_range, fadvise64, fallocate, ftruncate, truncate, mlock, munlock, mincore -### Linux namespaces +### Linux namespaces (via bubblewrap) -Four namespace types isolate sandboxed processes: +On Linux, four namespace types isolate sandboxed processes inside the bwrap container: | Namespace | Purpose | |-----------|---------| | PID | Process sees only its own PID tree | -| Network | No network access (or localhost-only) | -| Mount | Isolated filesystem view | +| Network | Isolated network stack; external access controlled by sandbox-runtime network proxy | +| Mount | Isolated filesystem view with `pivot_root` | | User | Unprivileged user mapping | ### DEFAULT_POLICY @@ -84,7 +88,7 @@ const sandbox = new Sandbox(policy); The development policy grants: - **Execute access**: `/usr/bin`, `/usr/local/bin`, `/bin`, `/usr/sbin`, `/sbin`, `/usr/lib/jvm`, `/usr/lib/gcc`, `/usr/libexec`, plus the Node.js install prefix - **Read access**: `/usr/include`, `/usr/share`, shared library paths (`/lib`, `/usr/lib`, `/lib64`, `/usr/lib64`), `/etc`, `/proc`, `/dev/null`, `/dev/urandom`, `/dev/zero`, `/dev/random` -- **Read-write access**: CWD, `/tmp`, `~/.safeclaw` +- **Read-write access**: CWD, `/tmp`, `/dev/null` - **Extra paths**: `DevelopmentPolicyOptions` supports `extraExecutePaths` (e.g., `~/.cargo/bin`, `~/.rustup`) and `extraReadWritePaths` for user-local toolchains - **Expanded syscalls**: ~120 syscalls (see seccomp-BPF section above) - **Network**: `"none"` (unchanged from default) diff --git a/packages/cli/package.json b/packages/cli/package.json index c619e0b..6fa4d3b 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -12,6 +12,7 @@ "build": "tsc -p tsconfig.json" }, "dependencies": { + "@anthropic-ai/sandbox-runtime": "git+ssh://git@github.com/anthropic-experimental/sandbox-runtime.git#20f5176a94314038695bee13779eb9eebbbaeb49", "@safeclaw/core": "workspace:*", "@safeclaw/gateway": "workspace:*", "@safeclaw/sandbox": "workspace:*", diff --git a/packages/cli/src/commands/bootstrap.test.ts b/packages/cli/src/commands/bootstrap.test.ts index fa423cf..3c35f2d 100644 --- a/packages/cli/src/commands/bootstrap.test.ts +++ b/packages/cli/src/commands/bootstrap.test.ts @@ -6,6 +6,22 @@ import { CapabilityRegistry, SessionManager, } from "@safeclaw/core"; + +const mockSandboxManagerInitialize = vi.fn<() => Promise>().mockResolvedValue(undefined); +const mockSandboxManagerIsSupportedPlatform = vi.fn<() => boolean>().mockReturnValue(true); +const mockSandboxManagerIsSandboxingEnabled = vi.fn<() => boolean>().mockReturnValue(true); + +vi.mock("@anthropic-ai/sandbox-runtime", () => ({ + SandboxManager: { + initialize: mockSandboxManagerInitialize, + isSupportedPlatform: mockSandboxManagerIsSupportedPlatform, + isSandboxingEnabled: mockSandboxManagerIsSandboxingEnabled, + wrapWithSandbox: vi.fn().mockResolvedValue("/bin/true"), + cleanupAfterCommand: vi.fn(), + reset: vi.fn().mockResolvedValue(undefined), + }, +})); + const MockSandbox = vi.fn(); const mockForDevelopment = vi.fn().mockReturnValue({ filesystem: { @@ -83,6 +99,7 @@ function createMockDeps( describe("bootstrapAgent", () => { beforeEach(() => { + mockSandboxManagerInitialize.mockClear(); MockSandbox.mockReset(); mockForDevelopment.mockClear(); mockForDevelopment.mockReturnValue({ @@ -290,6 +307,12 @@ describe("bootstrapAgent", () => { ); }); + it("calls SandboxManager.initialize before constructing Sandbox", async () => { + const deps = createMockDeps(); + await bootstrapAgent(deps); + expect(mockSandboxManagerInitialize).toHaveBeenCalledOnce(); + }); + it("falls back gracefully when Sandbox constructor throws", async () => { MockSandbox.mockImplementation(() => { throw new Error("sandbox not supported"); diff --git a/packages/cli/src/commands/bootstrap.ts b/packages/cli/src/commands/bootstrap.ts index 06d360a..1be1d19 100644 --- a/packages/cli/src/commands/bootstrap.ts +++ b/packages/cli/src/commands/bootstrap.ts @@ -33,6 +33,7 @@ import { deriveKeyFromPassphrase as defaultDeriveKey, } from "@safeclaw/vault"; import { Sandbox, PolicyBuilder } from "@safeclaw/sandbox"; +import { SandboxManager } from "@anthropic-ai/sandbox-runtime"; import { readPassphrase as defaultReadPassphrase } from "../readPassphrase.js"; export interface BootstrapDeps { @@ -182,6 +183,22 @@ export async function bootstrapAgent( toolRegistry.register(tool); } + // Initialize sandbox-runtime network proxy infrastructure. + // Uses a base "block all network" config; per-execution configs are passed + // as customConfig in Sandbox.execute() → SandboxManager.wrapWithSandbox(). + try { + await SandboxManager.initialize({ + filesystem: { allowWrite: [], denyWrite: [], denyRead: [] }, + network: { allowedDomains: [], deniedDomains: [] }, + }); + } catch (err: unknown) { + const detail = err instanceof Error ? err.message : String(err); + output.write( + `Warning: sandbox network proxy failed to initialize (${detail}). ` + + `Filesystem isolation will still be applied.\n`, + ); + } + let sandbox: Sandbox | undefined; try { sandbox = new Sandbox(sandboxPolicy); diff --git a/packages/cli/src/commands/doctor-checks.test.ts b/packages/cli/src/commands/doctor-checks.test.ts index 908322e..85e9f31 100644 --- a/packages/cli/src/commands/doctor-checks.test.ts +++ b/packages/cli/src/commands/doctor-checks.test.ts @@ -1,11 +1,13 @@ import { describe, it, expect } from "vitest"; import { nodeVersionCheck, - linuxCheck, + platformCheck, architectureCheck, vaultExistsCheck, sandboxHelperCheck, - unshareCheck, + bwrapCheck, + socatCheck, + ripgrepCheck, landlockCheck, seccompCheck, userNamespaceCheck, @@ -32,18 +34,24 @@ describe("nodeVersionCheck", () => { }); }); -describe("linuxCheck", () => { +describe("platformCheck", () => { it("passes on linux", async () => { - const check = linuxCheck({ platform: "linux" }); + const check = platformCheck({ platform: "linux" }); const result = await check.run(); expect(result.status).toBe("pass"); }); - it("fails on non-linux", async () => { - const check = linuxCheck({ platform: "darwin" }); + it("passes on darwin", async () => { + const check = platformCheck({ platform: "darwin" }); + const result = await check.run(); + expect(result.status).toBe("pass"); + }); + + it("fails on unsupported platform", async () => { + const check = platformCheck({ platform: "win32" }); const result = await check.run(); expect(result.status).toBe("fail"); - expect(result.message).toContain("darwin"); + expect(result.message).toContain("win32"); }); }); @@ -99,20 +107,57 @@ describe("sandboxHelperCheck", () => { }); }); -describe("unshareCheck", () => { - it("passes when unshare is available", async () => { - const check = unshareCheck({ execFileSync: () => "/usr/bin/unshare\n" }); +describe("bwrapCheck", () => { + it("passes when bwrap is available", async () => { + const check = bwrapCheck({ execFileSync: () => "/usr/bin/bwrap\n" }); const result = await check.run(); expect(result.status).toBe("pass"); + expect(result.message).toContain("bwrap"); }); - it("fails when unshare is not found", async () => { - const check = unshareCheck({ + it("fails when bwrap is not found", async () => { + const check = bwrapCheck({ execFileSync: () => { throw new Error("not found"); }, }); const result = await check.run(); expect(result.status).toBe("fail"); - expect(result.detail).toContain("unshare"); + expect(result.detail).toContain("bwrap"); + }); +}); + +describe("socatCheck", () => { + it("passes when socat is available", async () => { + const check = socatCheck({ execFileSync: () => "/usr/bin/socat\n" }); + const result = await check.run(); + expect(result.status).toBe("pass"); + expect(result.message).toContain("socat"); + }); + + it("warns when socat is not found", async () => { + const check = socatCheck({ + execFileSync: () => { throw new Error("not found"); }, + }); + const result = await check.run(); + expect(result.status).toBe("warn"); + expect(result.detail).toContain("socat"); + }); +}); + +describe("ripgrepCheck", () => { + it("passes when rg is available", async () => { + const check = ripgrepCheck({ execFileSync: () => "/usr/bin/rg\n" }); + const result = await check.run(); + expect(result.status).toBe("pass"); + expect(result.message).toContain("rg"); + }); + + it("warns when rg is not found", async () => { + const check = ripgrepCheck({ + execFileSync: () => { throw new Error("not found"); }, + }); + const result = await check.run(); + expect(result.status).toBe("warn"); + expect(result.detail).toContain("ripgrep"); }); }); @@ -121,6 +166,7 @@ function makeKernelCaps(overrides: Partial = {}): KernelCapa landlock: { supported: true, abiVersion: 3 }, seccomp: { supported: true }, namespaces: { user: true, pid: true, net: true, mnt: true }, + bwrap: { available: true, path: "/usr/bin/bwrap", version: "0.9.0" }, ...overrides, }; } diff --git a/packages/cli/src/commands/doctor-checks.ts b/packages/cli/src/commands/doctor-checks.ts index 4c656e3..c2c87f7 100644 --- a/packages/cli/src/commands/doctor-checks.ts +++ b/packages/cli/src/commands/doctor-checks.ts @@ -44,20 +44,21 @@ export interface PlatformDeps { platform: string; } -export function linuxCheck( +export function platformCheck( deps: PlatformDeps = { platform: process.platform }, ): DiagnosticCheck { + const supported = new Set(["linux", "darwin"]); return { - name: "linux", + name: "platform", category: "system", async run(): Promise { - if (deps.platform === "linux") { - return { status: "pass", message: "Running on Linux" }; + if (supported.has(deps.platform)) { + return { status: "pass", message: `Platform: ${deps.platform}` }; } return { status: "fail", - message: `Running on ${deps.platform}`, - detail: "SafeClaw requires Linux.", + message: `Unsupported platform: ${deps.platform}`, + detail: "SafeClaw supports Linux and macOS (darwin) only.", }; }, }; @@ -152,30 +153,91 @@ export function sandboxHelperCheck( }; } -export interface UnshareDeps { +export interface BwrapDeps { execFileSync: (cmd: string, args: string[]) => string; } -export function unshareCheck( - deps: UnshareDeps = { +export function bwrapCheck( + deps: BwrapDeps = { execFileSync: (cmd: string, args: string[]) => defaultExecFileSync(cmd, args, { encoding: "utf8" }), }, ): DiagnosticCheck { return { - name: "unshare", + name: "bwrap", category: "security", async run(): Promise { try { - deps.execFileSync("which", ["unshare"]); - return { status: "pass", message: "unshare command available" }; + const path = deps.execFileSync("which", ["bwrap"]).trim(); + return { status: "pass", message: `bwrap available: ${path}` }; } catch { return { status: "fail", - message: "unshare command not found", + message: "bwrap not found", detail: - "The 'unshare' command is required for namespace isolation. " + - "Install util-linux: apt install util-linux", + "bubblewrap (bwrap) is required for sandbox isolation. " + + "Install: apt install bubblewrap", + }; + } + }, + }; +} + +export interface SocatDeps { + execFileSync: (cmd: string, args: string[]) => string; +} + +export function socatCheck( + deps: SocatDeps = { + execFileSync: (cmd: string, args: string[]) => + defaultExecFileSync(cmd, args, { encoding: "utf8" }), + }, +): DiagnosticCheck { + return { + name: "socat", + category: "security", + async run(): Promise { + try { + const path = deps.execFileSync("which", ["socat"]).trim(); + return { status: "pass", message: `socat available: ${path}` }; + } catch { + return { + status: "warn", + message: "socat not found", + detail: + "socat is used by sandbox-runtime for network proxying. " + + "Without it, network domain filtering will not work. " + + "Install: apt install socat", + }; + } + }, + }; +} + +export interface RipgrepDeps { + execFileSync: (cmd: string, args: string[]) => string; +} + +export function ripgrepCheck( + deps: RipgrepDeps = { + execFileSync: (cmd: string, args: string[]) => + defaultExecFileSync(cmd, args, { encoding: "utf8" }), + }, +): DiagnosticCheck { + return { + name: "ripgrep", + category: "security", + async run(): Promise { + try { + const path = deps.execFileSync("which", ["rg"]).trim(); + return { status: "pass", message: `rg available: ${path}` }; + } catch { + return { + status: "warn", + message: "rg (ripgrep) not found", + detail: + "ripgrep is used by the search tool for fast code search. " + + "Install: apt install ripgrep", }; } }, diff --git a/packages/cli/src/commands/doctor.ts b/packages/cli/src/commands/doctor.ts index 800eaaa..d2835ff 100644 --- a/packages/cli/src/commands/doctor.ts +++ b/packages/cli/src/commands/doctor.ts @@ -1,11 +1,13 @@ import type { DiagnosticCheck, DiagnosticResult } from "./doctor-types.js"; import { nodeVersionCheck, - linuxCheck, + platformCheck, architectureCheck, vaultExistsCheck, sandboxHelperCheck, - unshareCheck, + bwrapCheck, + socatCheck, + ripgrepCheck, landlockCheck, seccompCheck, userNamespaceCheck, @@ -159,10 +161,12 @@ export function createDefaultChecks(): DiagnosticCheck[] { return [ // System nodeVersionCheck(), - linuxCheck(), + platformCheck(), architectureCheck(), // Security - unshareCheck(), + bwrapCheck(), + socatCheck(), + ripgrepCheck(), landlockCheck(), seccompCheck(), userNamespaceCheck(), diff --git a/packages/core/src/copilot/auth.test.ts b/packages/core/src/copilot/auth.test.ts index 08aa5a5..d4f6722 100644 --- a/packages/core/src/copilot/auth.test.ts +++ b/packages/core/src/copilot/auth.test.ts @@ -56,6 +56,7 @@ describe("requestDeviceCode", () => { client_id: "test-client-id", scope: "copilot read:user", }), + signal: expect.any(AbortSignal), }); expect(result).toEqual(deviceResponse); }); @@ -92,6 +93,7 @@ describe("pollForToken", () => { device_code: "dc_123", grant_type: "urn:ietf:params:oauth:grant-type:device_code", }), + signal: expect.any(AbortSignal), }); expect(result).toEqual(tokenResponse); }); @@ -121,6 +123,31 @@ describe("pollForToken", () => { vi.useRealTimers(); }); + it("handles slow_down by increasing interval and retrying", async () => { + vi.useFakeTimers(); + + const slowDown = { error: "slow_down" }; + const tokenResponse = { + access_token: "gho_abc123", + token_type: "bearer", + scope: "copilot", + }; + fetchMock + .mockResolvedValueOnce(jsonResponse(slowDown)) + .mockResolvedValueOnce(jsonResponse(tokenResponse)); + + const promise = pollForToken(config, "dc_123", 5); + + // slow_down increases interval from 5 to 10; advance past the 10-second delay + await vi.advanceTimersByTimeAsync(10_000); + + const result = await promise; + expect(fetchMock).toHaveBeenCalledTimes(2); + expect(result).toEqual(tokenResponse); + + vi.useRealTimers(); + }); + it("throws on non-pending errors", async () => { fetchMock.mockResolvedValueOnce( jsonResponse({ error: "access_denied" }), @@ -149,6 +176,7 @@ describe("getCopilotToken", () => { Authorization: "token gho_abc123", Accept: "application/json", }, + signal: expect.any(AbortSignal), }); expect(result).toEqual({ token: "tid=copilot_token_xyz", diff --git a/packages/core/src/copilot/auth.ts b/packages/core/src/copilot/auth.ts index 55c7d6a..6d84e77 100644 --- a/packages/core/src/copilot/auth.ts +++ b/packages/core/src/copilot/auth.ts @@ -27,6 +27,7 @@ export async function requestDeviceCode( client_id: config.clientId, scope: config.scopes.join(" "), }), + signal: AbortSignal.timeout(AUTH_FETCH_TIMEOUT_MS), }); if (!response.ok) { @@ -38,15 +39,21 @@ export async function requestDeviceCode( return (await response.json()) as DeviceCodeResponse; } +/** Timeout for individual auth fetch requests (ms). */ +const AUTH_FETCH_TIMEOUT_MS = 30_000; + /** * Poll GitHub for the OAuth token after the user has authorized the device. - * Retries on "authorization_pending" errors, respecting the given interval. + * Retries on "authorization_pending" and "slow_down" errors, respecting the + * given interval. "slow_down" increases the interval by 5 s as required by + * the GitHub Device Flow spec. */ export async function pollForToken( config: CopilotAuthConfig, deviceCode: string, interval: number, ): Promise { + let currentInterval = interval; for (;;) { const response = await fetch(GITHUB_TOKEN_URL, { method: "POST", @@ -59,6 +66,7 @@ export async function pollForToken( device_code: deviceCode, grant_type: "urn:ietf:params:oauth:grant-type:device_code", }), + signal: AbortSignal.timeout(AUTH_FETCH_TIMEOUT_MS), }); if (!response.ok) { @@ -73,7 +81,13 @@ export async function pollForToken( if ("error" in data) { if (data.error === "authorization_pending") { - await delay(interval * 1000); + await delay(currentInterval * 1000); + continue; + } + if (data.error === "slow_down") { + // GitHub requires increasing the interval by 5 s on slow_down + currentInterval += 5; + await delay(currentInterval * 1000); continue; } throw new Error(`Token poll error: ${data.error}`); @@ -95,6 +109,7 @@ export async function getCopilotToken( Authorization: `token ${githubToken}`, Accept: "application/json", }, + signal: AbortSignal.timeout(AUTH_FETCH_TIMEOUT_MS), }); if (!response.ok) { diff --git a/packages/sandbox/package.json b/packages/sandbox/package.json index 0de5762..18a5a5e 100644 --- a/packages/sandbox/package.json +++ b/packages/sandbox/package.json @@ -8,5 +8,8 @@ "scripts": { "build": "tsc -p tsconfig.json" }, + "dependencies": { + "@anthropic-ai/sandbox-runtime": "git+ssh://git@github.com/anthropic-experimental/sandbox-runtime.git#20f5176a94314038695bee13779eb9eebbbaeb49" + }, "files": ["dist"] } diff --git a/packages/sandbox/src/detect.test.ts b/packages/sandbox/src/detect.test.ts index 64b54c0..e2ef91a 100644 --- a/packages/sandbox/src/detect.test.ts +++ b/packages/sandbox/src/detect.test.ts @@ -1,150 +1,59 @@ import { describe, it, expect, vi, beforeEach } from "vitest"; -import * as fs from "node:fs"; -vi.mock("node:fs"); +const mockIsSupportedPlatform = vi.fn<() => boolean>(); +const mockCheckDeps = vi.fn<() => { errors: string[]; warnings: string[] }>(); +const mockWhichBwrap = vi.fn<() => string>(); -const mockedFs = vi.mocked(fs); +vi.mock("@anthropic-ai/sandbox-runtime", () => ({ + SandboxManager: { + isSupportedPlatform: mockIsSupportedPlatform, + checkDependencies: mockCheckDeps, + }, +})); -function mockKernel(release: string, status: string, nsFiles: string[]): void { - mockedFs.readFileSync.mockImplementation((path: fs.PathOrFileDescriptor) => { - if (path === "/proc/sys/kernel/osrelease") return release; - if (path === "/proc/self/status") return status; - throw new Error(`Unexpected readFileSync: ${String(path)}`); - }); - mockedFs.existsSync.mockImplementation((path: fs.PathLike) => - nsFiles.includes(String(path)), - ); -} - -// Import after mock setup -const { detectKernelCapabilities, assertSandboxSupported } = await import( - "./detect.js" -); - -describe("detectKernelCapabilities", () => { - beforeEach(() => { - vi.clearAllMocks(); - }); - - it("detects Landlock support on kernel 6.1.0 with ABI v2", () => { - mockKernel("6.1.0-generic\n", "Seccomp:\t2\n", [ - "/proc/self/ns/user", - "/proc/self/ns/pid", - "/proc/self/ns/net", - "/proc/self/ns/mnt", - ]); - - const caps = detectKernelCapabilities(); - expect(caps.landlock.supported).toBe(true); - expect(caps.landlock.abiVersion).toBe(2); - }); - - it("detects Landlock ABI v3 on kernel 6.2.0", () => { - mockKernel("6.2.0\n", "Seccomp:\t2\n", []); - - const caps = detectKernelCapabilities(); - expect(caps.landlock.supported).toBe(true); - expect(caps.landlock.abiVersion).toBe(3); - }); - - it("detects Landlock ABI v1 on kernel 5.13.0", () => { - mockKernel("5.13.0\n", "Seccomp:\t2\n", []); - - const caps = detectKernelCapabilities(); - expect(caps.landlock.supported).toBe(true); - expect(caps.landlock.abiVersion).toBe(1); - }); - - it("reports no Landlock support on kernel 5.12.0", () => { - mockKernel("5.12.0\n", "Seccomp:\t2\n", []); - - const caps = detectKernelCapabilities(); - expect(caps.landlock.supported).toBe(false); - expect(caps.landlock.abiVersion).toBe(0); - }); - - it("detects seccomp from /proc/self/status containing Seccomp: 2", () => { - mockKernel("6.1.0\n", "Name:\tnode\nSeccomp:\t2\nGroups:\t", []); - - const caps = detectKernelCapabilities(); - expect(caps.seccomp.supported).toBe(true); - }); - - it("detects seccomp mode 1 as supported", () => { - mockKernel("6.1.0\n", "Seccomp:\t1\n", []); - - const caps = detectKernelCapabilities(); - expect(caps.seccomp.supported).toBe(true); - }); +vi.mock("node:child_process", () => ({ + execFileSync: mockWhichBwrap, +})); - it("detects no seccomp when Seccomp: 0", () => { - mockKernel("6.1.0\n", "Seccomp:\t0\n", []); +const { detectKernelCapabilities, assertSandboxSupported } = await import("./detect.js"); - const caps = detectKernelCapabilities(); - expect(caps.seccomp.supported).toBe(false); - }); - - it("detects missing namespace support when /proc/self/ns/user does not exist", () => { - mockKernel("6.1.0\n", "Seccomp:\t2\n", [ - "/proc/self/ns/pid", - "/proc/self/ns/net", - "/proc/self/ns/mnt", - ]); +describe("detectKernelCapabilities()", () => { + beforeEach(() => vi.clearAllMocks()); + it("reports bwrap available when which bwrap succeeds", () => { + mockWhichBwrap.mockReturnValue("/usr/bin/bwrap"); const caps = detectKernelCapabilities(); - expect(caps.namespaces.user).toBe(false); - expect(caps.namespaces.pid).toBe(true); - expect(caps.namespaces.net).toBe(true); - expect(caps.namespaces.mnt).toBe(true); + expect(caps.bwrap.available).toBe(true); + expect(caps.bwrap.path).toBe("/usr/bin/bwrap"); }); - it("detects all namespaces present", () => { - mockKernel("6.1.0\n", "Seccomp:\t2\n", [ - "/proc/self/ns/user", - "/proc/self/ns/pid", - "/proc/self/ns/net", - "/proc/self/ns/mnt", - ]); - + it("reports bwrap unavailable when which bwrap fails", () => { + mockWhichBwrap.mockImplementation(() => { throw new Error("not found"); }); const caps = detectKernelCapabilities(); - expect(caps.namespaces.user).toBe(true); - expect(caps.namespaces.pid).toBe(true); - expect(caps.namespaces.net).toBe(true); - expect(caps.namespaces.mnt).toBe(true); + expect(caps.bwrap.available).toBe(false); + expect(caps.bwrap.path).toBeUndefined(); }); }); -describe("assertSandboxSupported", () => { - beforeEach(() => { - vi.clearAllMocks(); - }); - - it("throws if kernel is 4.0.0 (no Landlock)", () => { - mockKernel("4.0.0\n", "Seccomp:\t0\n", []); +describe("assertSandboxSupported()", () => { + beforeEach(() => vi.clearAllMocks()); - expect(() => assertSandboxSupported()).toThrow( - /Missing kernel features.*Landlock/, - ); + it("does not throw when platform is supported and deps are OK", () => { + mockIsSupportedPlatform.mockReturnValue(true); + mockCheckDeps.mockReturnValue({ errors: [], warnings: [] }); + mockWhichBwrap.mockReturnValue("/usr/bin/bwrap"); + expect(() => assertSandboxSupported()).not.toThrow(); }); - it("throws listing all missing features", () => { - mockKernel("4.0.0\n", "Seccomp:\t0\n", ["/proc/self/ns/mnt"]); - - expect(() => assertSandboxSupported()).toThrow( - /Landlock.*seccomp-BPF.*User namespaces.*PID namespaces/, - ); + it("throws when platform is not supported", () => { + mockIsSupportedPlatform.mockReturnValue(false); + mockCheckDeps.mockReturnValue({ errors: [], warnings: [] }); + expect(() => assertSandboxSupported()).toThrow(/platform/i); }); - it("returns capabilities when all features present", () => { - mockKernel("6.1.0\n", "Seccomp:\t2\n", [ - "/proc/self/ns/user", - "/proc/self/ns/pid", - "/proc/self/ns/net", - "/proc/self/ns/mnt", - ]); - - const caps = assertSandboxSupported(); - expect(caps.landlock.supported).toBe(true); - expect(caps.seccomp.supported).toBe(true); + it("throws when sandbox-runtime deps are missing", () => { + mockIsSupportedPlatform.mockReturnValue(true); + mockCheckDeps.mockReturnValue({ errors: ["bubblewrap not found"], warnings: [] }); + expect(() => assertSandboxSupported()).toThrow(/bubblewrap not found/); }); }); diff --git a/packages/sandbox/src/detect.ts b/packages/sandbox/src/detect.ts index 8c80bbe..bb2685b 100644 --- a/packages/sandbox/src/detect.ts +++ b/packages/sandbox/src/detect.ts @@ -1,59 +1,117 @@ +import { execFileSync } from "node:child_process"; import { readFileSync, existsSync } from "node:fs"; +import { SandboxManager } from "@anthropic-ai/sandbox-runtime"; import type { KernelCapabilities } from "./types.js"; -const LANDLOCK_MIN_KERNEL: [number, number] = [5, 13]; - -function parseKernelVersion(release: string): [number, number] { - const parts = release.trim().split("."); - return [parseInt(parts[0] ?? "0", 10), parseInt(parts[1] ?? "0", 10)]; -} - -function kernelAtLeast(release: string, min: [number, number]): boolean { - const [major, minor] = parseKernelVersion(release); - return major > min[0] || (major === min[0] && minor >= min[1]); -} - +/** + * Probes system capabilities relevant to sandboxing. + * Returns KernelCapabilities with bwrap probe on Linux; on macOS the + * bwrap fields are always unavailable (macOS uses sandbox-exec instead). + */ export function detectKernelCapabilities(): KernelCapabilities { - const release = readFileSync("/proc/sys/kernel/osrelease", "utf8"); - const status = readFileSync("/proc/self/status", "utf8"); + let bwrapPath: string | undefined; + let bwrapVersion: string | undefined; - const landlockSupported = kernelAtLeast(release, LANDLOCK_MIN_KERNEL); - let landlockAbi = 0; - if (landlockSupported) { - const [major, minor] = parseKernelVersion(release); - if (major > 6 || (major === 6 && minor >= 2)) landlockAbi = 3; - else if (major > 5 || (major === 5 && minor >= 19)) landlockAbi = 2; - else landlockAbi = 1; + try { + bwrapPath = execFileSync("which", ["bwrap"], { encoding: "utf8" }).trim(); + try { + bwrapVersion = execFileSync("bwrap", ["--version"], { encoding: "utf8" }) + .trim() + .split("\n")[0]; + } catch { + // version flag not supported or bwrap not runnable — path is still valid + } + } catch { + // bwrap not on PATH } - const seccompSupported = /Seccomp:\s*[12]/.test(status); + // Landlock / seccomp / namespace detection is Linux-only; on macOS these + // are undefined/false since sandbox-runtime uses sandbox-exec there. + const isLinux = process.platform === "linux"; return { - landlock: { supported: landlockSupported, abiVersion: landlockAbi }, - seccomp: { supported: seccompSupported }, + landlock: { + supported: isLinux ? detectLandlock() : false, + abiVersion: isLinux ? detectLandlockAbi() : 0, + }, + seccomp: { supported: isLinux ? detectSeccomp() : false }, namespaces: { - user: existsSync("/proc/self/ns/user"), - pid: existsSync("/proc/self/ns/pid"), - net: existsSync("/proc/self/ns/net"), - mnt: existsSync("/proc/self/ns/mnt"), + user: isLinux ? existsSync("/proc/self/ns/user") : false, + pid: isLinux ? existsSync("/proc/self/ns/pid") : false, + net: isLinux ? existsSync("/proc/self/ns/net") : false, + mnt: isLinux ? existsSync("/proc/self/ns/mnt") : false, + }, + bwrap: { + available: bwrapPath !== undefined, + path: bwrapPath, + version: bwrapVersion, }, }; } +/** + * Throws a descriptive error if the current platform and dependencies + * do not support sandbox-runtime isolation. + */ export function assertSandboxSupported(): KernelCapabilities { - const caps = detectKernelCapabilities(); - const missing: string[] = []; - if (!caps.landlock.supported) - missing.push("Landlock (requires kernel >= 5.13)"); - if (!caps.seccomp.supported) missing.push("seccomp-BPF"); - if (!caps.namespaces.user) missing.push("User namespaces"); - if (!caps.namespaces.pid) missing.push("PID namespaces"); - - if (missing.length > 0) { + if (!SandboxManager.isSupportedPlatform()) { throw new Error( - `SafeClaw requires mandatory sandbox support. Missing kernel features: ${missing.join(", ")}. ` + - `SafeClaw v1 is Linux-only and requires a modern kernel (>= 5.13).`, + `SafeClaw sandbox is not supported on this platform (${process.platform}). ` + + `Supported: Linux (kernel ≥ 5.13, bubblewrap, socat, ripgrep) and macOS.`, ); } - return caps; + + const deps = SandboxManager.checkDependencies(); + if (deps.errors.length > 0) { + throw new Error( + `Sandbox dependencies missing: ${deps.errors.join(", ")}. ` + + `On Linux install: apt install bubblewrap socat ripgrep`, + ); + } + + return detectKernelCapabilities(); +} + +// ── Linux helpers ────────────────────────────────────────────────────── + +const LANDLOCK_MIN_KERNEL: [number, number] = [5, 13]; + +function parseKernelVersion(release: string): [number, number] { + const parts = release.trim().split("."); + return [parseInt(parts[0] ?? "0", 10), parseInt(parts[1] ?? "0", 10)]; +} + +function detectLandlock(): boolean { + try { + const release = readFileSync("/proc/sys/kernel/osrelease", "utf8"); + const [major, minor] = parseKernelVersion(release); + return ( + major > LANDLOCK_MIN_KERNEL[0] || + (major === LANDLOCK_MIN_KERNEL[0] && minor >= LANDLOCK_MIN_KERNEL[1]) + ); + } catch { + return false; + } +} + +function detectLandlockAbi(): number { + try { + const release = readFileSync("/proc/sys/kernel/osrelease", "utf8"); + const [major, minor] = parseKernelVersion(release); + if (major > 6 || (major === 6 && minor >= 2)) return 3; + if (major > 5 || (major === 5 && minor >= 19)) return 2; + if (major > 5 || (major === 5 && minor >= 13)) return 1; + return 0; + } catch { + return 0; + } +} + +function detectSeccomp(): boolean { + try { + const status = readFileSync("/proc/self/status", "utf8"); + return /Seccomp:\s*[12]/.test(status); + } catch { + return false; + } } diff --git a/packages/sandbox/src/index.ts b/packages/sandbox/src/index.ts index 26e1743..5d65375 100644 --- a/packages/sandbox/src/index.ts +++ b/packages/sandbox/src/index.ts @@ -5,6 +5,7 @@ export { } from "./detect.js"; export { DEFAULT_POLICY } from "./types.js"; export type { + NetworkPolicy, SandboxPolicy, SandboxResult, PathRule, diff --git a/packages/sandbox/src/integration.test.ts b/packages/sandbox/src/integration.test.ts index cc1f132..de12d4f 100644 --- a/packages/sandbox/src/integration.test.ts +++ b/packages/sandbox/src/integration.test.ts @@ -1,8 +1,9 @@ -import { describe, it, expect } from "vitest"; +import { describe, it, expect, beforeAll, afterAll } from "vitest"; import { execFileSync } from "node:child_process"; import { existsSync } from "node:fs"; import { join, dirname } from "node:path"; import { fileURLToPath } from "node:url"; +import { SandboxManager } from "@anthropic-ai/sandbox-runtime"; import { Sandbox } from "./sandbox.js"; import { DEFAULT_POLICY } from "./types.js"; import type { SandboxPolicy } from "./types.js"; @@ -68,6 +69,27 @@ const ECHO_POLICY: SandboxPolicy = { }; describe("Sandbox integration (real binary)", () => { + beforeAll(async () => { + // Initialize sandbox-runtime proxy infrastructure before any Sandbox is constructed. + // This mirrors what bootstrapAgent() does in production. + try { + await SandboxManager.initialize({ + filesystem: { allowWrite: [], denyWrite: [], denyRead: [] }, + network: { allowedDomains: [], deniedDomains: [] }, + }); + } catch { + // Initialization failure is non-fatal — filesystem isolation still applies. + } + }); + + afterAll(async () => { + try { + await SandboxManager.reset(); + } catch { + // ignore cleanup errors + } + }); + it.skipIf(!helperExists || !canUnshareUser)( "seccomp enforcement blocks missing syscalls with DEFAULT_POLICY", async () => { diff --git a/packages/sandbox/src/policy-builder.test.ts b/packages/sandbox/src/policy-builder.test.ts index 7825dab..25bc0a2 100644 --- a/packages/sandbox/src/policy-builder.test.ts +++ b/packages/sandbox/src/policy-builder.test.ts @@ -1,7 +1,15 @@ -import { describe, it, expect, beforeEach } from "vitest"; +import { describe, it, expect, beforeEach, vi } from "vitest"; import { homedir } from "node:os"; import { PolicyBuilder } from "./policy-builder.js"; import type { SandboxPolicy, PathRule } from "./types.js"; +import { DEFAULT_POLICY } from "./types.js"; +import type { SandboxRuntimeConfig } from "@anthropic-ai/sandbox-runtime"; + +// Mock lstatSync so toRuntimeConfig() tests are deterministic regardless of +// which credential directories actually exist on the test machine. +vi.mock("node:fs", () => ({ + lstatSync: () => ({ isDirectory: () => true }), +})); describe("PolicyBuilder", () => { describe("addReadExecute()", () => { @@ -345,3 +353,63 @@ describe("PolicyBuilder", () => { }); }); }); + +describe("PolicyBuilder.toRuntimeConfig()", () => { + it("maps readwrite PathRules to allowWrite", () => { + const policy = new PolicyBuilder() + .addReadWrite("/project") + .addReadWrite("/tmp") + .build(); + const rtConfig: SandboxRuntimeConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.filesystem.allowWrite).toContain("/project"); + expect(rtConfig.filesystem.allowWrite).toContain("/tmp"); + }); + + it("maps readwriteexecute PathRules to allowWrite", () => { + const policy = new PolicyBuilder().addReadWriteExecute("/workspace").build(); + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.filesystem.allowWrite).toContain("/workspace"); + }); + + it("does not add read-only or execute-only paths to allowWrite", () => { + const policy = new PolicyBuilder() + .addReadOnly("/etc") + .addReadExecute("/usr/bin") + .build(); + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.filesystem.allowWrite).not.toContain("/etc"); + expect(rtConfig.filesystem.allowWrite).not.toContain("/usr/bin"); + }); + + it("adds sensitive home dirs to denyRead", () => { + const policy = new PolicyBuilder().build(); + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + const home = homedir(); + expect(rtConfig.filesystem.denyRead).toContain(`${home}/.ssh`); + expect(rtConfig.filesystem.denyRead).toContain(`${home}/.aws`); + expect(rtConfig.filesystem.denyRead).toContain(`${home}/.gnupg`); + }); + + it("maps network: 'none' to allowedDomains: []", () => { + const policy = { ...DEFAULT_POLICY, network: "none" as const }; + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.network.allowedDomains).toEqual([]); + }); + + it("maps network object to allowedDomains/deniedDomains", () => { + const policy: SandboxPolicy = { + ...DEFAULT_POLICY, + network: { allowedDomains: ["github.com", "*.npmjs.org"], deniedDomains: ["evil.com"] }, + }; + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.network.allowedDomains).toEqual(["github.com", "*.npmjs.org"]); + expect(rtConfig.network.deniedDomains).toEqual(["evil.com"]); + }); + + it("forDevelopment().toRuntimeConfig() includes cwd in allowWrite", () => { + const cwd = "/home/user/project"; + const policy = PolicyBuilder.forDevelopment(cwd); + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.filesystem.allowWrite).toContain(cwd); + }); +}); diff --git a/packages/sandbox/src/policy-builder.ts b/packages/sandbox/src/policy-builder.ts index c30c1a5..04eb886 100644 --- a/packages/sandbox/src/policy-builder.ts +++ b/packages/sandbox/src/policy-builder.ts @@ -1,6 +1,8 @@ +import { lstatSync } from "node:fs"; import { homedir } from "node:os"; import { dirname } from "node:path"; -import type { SandboxPolicy, PathRule } from "./types.js"; +import type { SandboxRuntimeConfig } from "@anthropic-ai/sandbox-runtime"; +import type { SandboxPolicy, PathRule, NetworkPolicy } from "./types.js"; /** Options for customizing the development policy */ export interface DevelopmentPolicyOptions { @@ -8,6 +10,14 @@ export interface DevelopmentPolicyOptions { extraExecutePaths?: string[]; /** Additional paths that need readwrite access (e.g. ~/.cache) */ extraReadWritePaths?: string[]; + /** Additional read-only paths */ + extraReadOnlyPaths?: string[]; + /** + * Network domains the sandboxed process may connect to. + * Default: [] (block all network). Use this to allow e.g. npm registry. + * Example: ["registry.npmjs.org", "*.github.com"] + */ + allowedNetworkDomains?: string[]; } /** @@ -64,6 +74,57 @@ export class PolicyBuilder { }; } + /** + * Translates a SafeClaw SandboxPolicy into a SandboxRuntimeConfig for + * @anthropic-ai/sandbox-runtime. + * + * Read model difference: SafeClaw uses an allowlist (Landlock); sandbox-runtime + * is permissive-by-default with an explicit denylist. We translate by denying + * the sensitive credential dirs that must never be readable. + * + * Write model: both use allowlists. PathRules with access "readwrite" or + * "readwriteexecute" map to filesystem.allowWrite. + */ + static toRuntimeConfig(policy: SandboxPolicy): SandboxRuntimeConfig { + // ── Filesystem ──────────────────────────────────────────────────── + const allowWrite = policy.filesystem.allow + .filter((r) => r.access === "readwrite" || r.access === "readwriteexecute") + .map((r) => r.path); + + // Always deny reads to credential/secret directories that exist. + // Non-existent paths are skipped — bwrap cannot bind-mount over them. + // sandbox-runtime also enforces mandatory deny on dangerous files (.bashrc, + // .git/hooks, etc.) regardless of this config — these are complementary. + const home = homedir(); + const denyRead = [ + `${home}/.ssh`, + `${home}/.aws`, + `${home}/.gnupg`, + `${home}/.kube`, + `${home}/.docker`, + `${home}/.gcloud`, + `${home}/.azure`, + ].filter((p) => { + try { + return lstatSync(p).isDirectory(); + } catch { + return false; + } + }); + + // ── Network ─────────────────────────────────────────────────────── + const network = buildNetworkConfig(policy.network); + + return { + filesystem: { + allowWrite, + denyWrite: [], + denyRead, + }, + network, + }; + } + /** * Creates a policy suitable for software development. * @@ -149,6 +210,11 @@ export class PolicyBuilder { builder.addReadWrite(p); } } + if (options?.extraReadOnlyPaths) { + for (const p of options.extraReadOnlyPaths) { + builder.addReadOnly(p); + } + } // ── Expanded syscall allowlist ─────────────────────────────────── // Includes all DEFAULT_POLICY syscalls plus what common dev tools need @@ -156,8 +222,29 @@ export class PolicyBuilder { builder.syscalls.push(sc); } - return builder.build(); + // ── Network ────────────────────────────────────────────────────── + const networkPolicy: NetworkPolicy = + options?.allowedNetworkDomains !== undefined + ? { allowedDomains: options.allowedNetworkDomains } + : "none"; + + return { ...builder.build(), network: networkPolicy }; + } +} + +function buildNetworkConfig( + network: NetworkPolicy, +): SandboxRuntimeConfig["network"] { + if (network === "none") { + return { allowedDomains: [], deniedDomains: [] }; + } + if (network === "localhost") { + return { allowedDomains: ["localhost"], deniedDomains: [] }; } + return { + allowedDomains: network.allowedDomains, + deniedDomains: network.deniedDomains ?? [], + }; } /** diff --git a/packages/sandbox/src/sandbox.test.ts b/packages/sandbox/src/sandbox.test.ts index 4a4b563..137d147 100644 --- a/packages/sandbox/src/sandbox.test.ts +++ b/packages/sandbox/src/sandbox.test.ts @@ -1,25 +1,15 @@ import { describe, it, expect, vi, beforeEach } from "vitest"; -import { execFileSync } from "node:child_process"; import { DEFAULT_POLICY } from "./types.js"; import type { KernelCapabilities } from "./types.js"; -/** - * Probe whether user namespaces work on this machine. - * GitHub Actions runners and some containers restrict unprivileged - * user namespaces, causing `unshare --user` to fail. - */ -let canUnshareUser = false; -try { - execFileSync("unshare", ["--user", "--map-root-user", "--", "/bin/true"], { - timeout: 3000, - }); - canUnshareUser = true; -} catch { - // user namespaces not available — skip dependent tests -} - +// Mock sandbox-runtime and helper before dynamic import const mockAssertSandboxSupported = vi.fn<() => KernelCapabilities>(); const mockFindHelper = vi.fn<() => string | undefined>(); +const mockWrapWithSandbox = vi.fn<(cmd: string) => Promise>(); +const mockIsSandboxingEnabled = vi.fn<() => boolean>(); +const mockCleanupAfterCommand = vi.fn<() => void>(); +const mockWriteFileSync = vi.fn<() => void>(); +const mockRmSync = vi.fn<() => void>(); vi.mock("./detect.js", () => ({ assertSandboxSupported: mockAssertSandboxSupported, @@ -29,179 +19,174 @@ vi.mock("./helper.js", () => ({ findHelper: () => mockFindHelper(), })); +vi.mock("@anthropic-ai/sandbox-runtime", () => ({ + SandboxManager: { + isSandboxingEnabled: mockIsSandboxingEnabled, + wrapWithSandbox: mockWrapWithSandbox, + cleanupAfterCommand: mockCleanupAfterCommand, + }, +})); + +vi.mock("node:fs", () => ({ + writeFileSync: (...args: unknown[]) => mockWriteFileSync(...args), + rmSync: (...args: unknown[]) => mockRmSync(...args), + lstatSync: () => ({ isDirectory: () => false }), +})); + const { Sandbox } = await import("./sandbox.js"); const FULL_CAPS: KernelCapabilities = { landlock: { supported: true, abiVersion: 3 }, seccomp: { supported: true }, namespaces: { user: true, pid: true, net: true, mnt: true }, + bwrap: { available: true, path: "/usr/bin/bwrap", version: "0.9.0" }, }; describe("Sandbox", () => { beforeEach(() => { vi.clearAllMocks(); + mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); + mockIsSandboxingEnabled.mockReturnValue(true); mockFindHelper.mockReturnValue(undefined); + mockWriteFileSync.mockReturnValue(undefined); + mockRmSync.mockReturnValue(undefined); }); it("constructor calls assertSandboxSupported", () => { - mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); - new Sandbox(DEFAULT_POLICY); - expect(mockAssertSandboxSupported).toHaveBeenCalledOnce(); }); - it("constructor throws if sandbox not supported", () => { - mockAssertSandboxSupported.mockImplementation(() => { - throw new Error("Missing kernel features: Landlock"); - }); - - expect(() => new Sandbox(DEFAULT_POLICY)).toThrow( - /Missing kernel features/, - ); + it("constructor throws if not initialized (isSandboxingEnabled returns false)", () => { + mockIsSandboxingEnabled.mockReturnValue(false); + expect(() => new Sandbox(DEFAULT_POLICY)).toThrow(/initialize/i); }); it("getPolicy returns a copy of the policy", () => { - mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); - const sandbox = new Sandbox(DEFAULT_POLICY); const policy = sandbox.getPolicy(); - expect(policy).toEqual(DEFAULT_POLICY); expect(policy).not.toBe(DEFAULT_POLICY); }); - }); describe("Sandbox.execute()", () => { beforeEach(() => { vi.clearAllMocks(); + mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); + mockIsSandboxingEnabled.mockReturnValue(true); mockFindHelper.mockReturnValue(undefined); + mockWriteFileSync.mockReturnValue(undefined); + mockRmSync.mockReturnValue(undefined); }); - it.skipIf(!canUnshareUser)( - "runs a command and returns stdout", - async () => { - mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); - const sandbox = new Sandbox(DEFAULT_POLICY); - const result = await sandbox.execute("/bin/echo", ["hello"]); - expect(result.stdout).toContain("hello"); - expect(result.exitCode).toBe(0); - expect(result.killed).toBe(false); - }, - ); - - it.skipIf(!canUnshareUser)( - "returns non-zero exit code on failure", - async () => { - mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); - const sandbox = new Sandbox(DEFAULT_POLICY); - const result = await sandbox.execute("/bin/false", []); - expect(result.exitCode).not.toBe(0); - expect(result.killed).toBe(false); - }, - ); - - it.skipIf(!canUnshareUser)( - "kills process after timeout", - async () => { - mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); - const policy = { ...DEFAULT_POLICY, timeoutMs: 100 }; - const sandbox = new Sandbox(policy); - const result = await sandbox.execute("/bin/sleep", ["10"]); - expect(result.killed).toBe(true); - expect(result.killReason).toBe("timeout"); - }, - ); - - it.skipIf(!canUnshareUser)("captures stderr", async () => { - mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); + it("calls wrapWithSandbox with shell-quoted command", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/echo hello"); const sandbox = new Sandbox(DEFAULT_POLICY); - const result = await sandbox.execute("/bin/sh", ["-c", "echo error >&2"]); - expect(result.stderr).toContain("error"); + await sandbox.execute("/bin/echo", ["hello"]); + expect(mockWrapWithSandbox).toHaveBeenCalledOnce(); + const wrappedArg: string = mockWrapWithSandbox.mock.calls[0]![0]!; + expect(wrappedArg).toContain("echo"); + expect(wrappedArg).toContain("hello"); }); - it.skipIf(!canUnshareUser)("reports durationMs", async () => { - mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); + it("calls cleanupAfterCommand after execution", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/true"); + const sandbox = new Sandbox(DEFAULT_POLICY); + await sandbox.execute("/bin/true", []); + expect(mockCleanupAfterCommand).toHaveBeenCalledOnce(); + }); + + it("returns stdout and exitCode from the spawned command", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/echo hello"); + const sandbox = new Sandbox(DEFAULT_POLICY); + const result = await sandbox.execute("/bin/echo", ["hello"]); + expect(result.exitCode).toBe(0); + expect(result.stdout).toContain("hello"); + }); + + it("kills process after timeout and returns killReason=timeout", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/sleep 60"); + const policy = { ...DEFAULT_POLICY, timeoutMs: 100 }; + const sandbox = new Sandbox(policy); + const result = await sandbox.execute("/bin/sleep", ["60"]); + expect(result.killed).toBe(true); + expect(result.killReason).toBe("timeout"); + }); + + it("reports pivotRoot=true and bindMounts=true on Linux", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/true"); const sandbox = new Sandbox(DEFAULT_POLICY); const result = await sandbox.execute("/bin/true", []); - expect(result.durationMs).toBeGreaterThanOrEqual(0); - }); - - it.skipIf(!canUnshareUser)( - "mount namespace isolates filesystem changes from host", - async () => { - mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); - const policy = { - ...DEFAULT_POLICY, - namespaces: { pid: false, net: false, mnt: true, user: true }, - }; - const sandbox = new Sandbox(policy); - const result = await sandbox.execute("/bin/sh", [ - "-c", - "cat /proc/self/mounts | wc -l", - ]); - expect(result.exitCode).toBe(0); - expect(parseInt(result.stdout.trim(), 10)).toBeGreaterThan(0); - }, - ); - - it.skipIf(!canUnshareUser)( - "blocks network access in network namespace", - async () => { - mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); - const policy = { - ...DEFAULT_POLICY, - namespaces: { pid: false, net: true, mnt: false, user: true }, - }; - const sandbox = new Sandbox(policy); - const result = await sandbox.execute("/bin/sh", [ - "-c", - "ip link show 2>/dev/null | grep -oP '(?<=: )\\w+(?=:)' | sort", - ]); - expect(result.stdout.trim()).toBe("lo"); - expect(result.exitCode).toBe(0); - }, - ); + // These are set based on platform; in CI (Linux) both should be true + expect(typeof result.enforcement?.pivotRoot).toBe("boolean"); + expect(typeof result.enforcement?.bindMounts).toBe("boolean"); + }); + + it("calls cleanupAfterCommand even when command fails", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/false"); + const sandbox = new Sandbox(DEFAULT_POLICY); + await sandbox.execute("/bin/false", []); + expect(mockCleanupAfterCommand).toHaveBeenCalledOnce(); + }); }); -describe("Sandbox.execute() helper integration", () => { +describe("Sandbox.execute() with helper", () => { beforeEach(() => { vi.clearAllMocks(); mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); + mockIsSandboxingEnabled.mockReturnValue(true); + mockWriteFileSync.mockReturnValue(undefined); + mockRmSync.mockReturnValue(undefined); + }); + + it("includes --policy-file in the inner command when helper is found", async () => { + mockFindHelper.mockReturnValue("/usr/local/bin/safeclaw-sandbox-helper"); + mockWrapWithSandbox.mockImplementation(async (cmd: string) => cmd); + + const sandbox = new Sandbox(DEFAULT_POLICY); + await sandbox.execute("/bin/echo", ["hello"]); + + const innerCmd: string = mockWrapWithSandbox.mock.calls[0]![0]!; + expect(innerCmd).toContain("safeclaw-sandbox-helper"); + expect(innerCmd).toContain("--policy-file"); + expect(innerCmd).toContain("--"); + expect(innerCmd).toContain("echo"); + }); + + it("sets enforcement.landlock=true and enforcement.capDrop=true when helper is found", async () => { + mockFindHelper.mockReturnValue("/usr/local/bin/safeclaw-sandbox-helper"); + mockWrapWithSandbox.mockResolvedValue("/bin/true"); + + const sandbox = new Sandbox(DEFAULT_POLICY); + const result = await sandbox.execute("/bin/true", []); + + expect(result.enforcement?.landlock).toBe(true); + expect(result.enforcement?.capDrop).toBe(true); + }); + + it("does NOT set landlock/capDrop when helper is not found", async () => { mockFindHelper.mockReturnValue(undefined); + mockWrapWithSandbox.mockResolvedValue("/bin/true"); + + const sandbox = new Sandbox(DEFAULT_POLICY); + const result = await sandbox.execute("/bin/true", []); + + expect(result.enforcement?.landlock).toBe(false); + expect(result.enforcement?.capDrop).toBe(false); }); - it.skipIf(!canUnshareUser)( - "sets enforcement.namespaces=true even without helper", - async () => { - mockFindHelper.mockReturnValue(undefined); - - const sandbox = new Sandbox(DEFAULT_POLICY); - const result = await sandbox.execute("/bin/true", []); - - expect(result.exitCode).toBe(0); - expect(result.enforcement).toBeDefined(); - expect(result.enforcement!.namespaces).toBe(true); - expect(result.enforcement!.landlock).toBe(false); - expect(result.enforcement!.seccomp).toBe(false); - expect(result.enforcement!.capDrop).toBe(false); - }, - ); - - it.skipIf(!canUnshareUser)( - "sets full enforcement when helper is found", - async () => { - mockFindHelper.mockReturnValue("/usr/local/bin/safeclaw-sandbox-helper"); - - const sandbox = new Sandbox(DEFAULT_POLICY); - const result = await sandbox.execute("/bin/true", []); - - expect(result.enforcement).toBeDefined(); - expect(result.enforcement!.namespaces).toBe(true); - expect(result.enforcement!.landlock).toBe(true); - expect(result.enforcement!.seccomp).toBe(true); - expect(result.enforcement!.capDrop).toBe(true); - }, - ); + it("cleans up policy temp file even if command fails", async () => { + mockFindHelper.mockReturnValue("/usr/local/bin/safeclaw-sandbox-helper"); + mockWrapWithSandbox.mockResolvedValue("/bin/false"); + + const sandbox = new Sandbox(DEFAULT_POLICY); + await sandbox.execute("/bin/false", []); + + expect(mockWriteFileSync).toHaveBeenCalledOnce(); + expect(mockRmSync).toHaveBeenCalledWith( + expect.stringContaining("safeclaw-policy-"), + { force: true }, + ); + }); }); diff --git a/packages/sandbox/src/sandbox.ts b/packages/sandbox/src/sandbox.ts index 0454d4d..927c7cc 100644 --- a/packages/sandbox/src/sandbox.ts +++ b/packages/sandbox/src/sandbox.ts @@ -1,14 +1,29 @@ import { spawn } from "node:child_process"; -import type { Writable } from "node:stream"; +import { writeFileSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { SandboxManager } from "@anthropic-ai/sandbox-runtime"; import type { SandboxPolicy, SandboxResult, EnforcementLayers } from "./types.js"; import { assertSandboxSupported } from "./detect.js"; import { findHelper } from "./helper.js"; +import { PolicyBuilder } from "./policy-builder.js"; + +/** POSIX single-quote shell escaping. Safe for all byte values. */ +function shEscape(arg: string): string { + return "'" + arg.replace(/'/g, "'\\''") + "'"; +} export class Sandbox { private readonly policy: SandboxPolicy; constructor(policy: SandboxPolicy) { assertSandboxSupported(); + if (!SandboxManager.isSandboxingEnabled()) { + throw new Error( + "SandboxManager is not initialized. Call SandboxManager.initialize() " + + "before constructing a Sandbox (see bootstrapAgent()).", + ); + } this.policy = policy; } @@ -16,77 +31,88 @@ export class Sandbox { const start = performance.now(); const timeout = this.policy.timeoutMs ?? 30_000; - // Build unshare flags from policy namespace settings - const unshareFlags = this.buildUnshareFlags(); - - // If we have unshare flags, wrap: unshare [flags] -- command [args] - // Otherwise run directly - const useUnshare = unshareFlags.length > 0; - - // Resolve helper binary - // TODO: Re-add SHA-256 integrity verification once builds are reproducible + // Inject C helper (Landlock + seccomp + cap-drop) as the inner process + // when the helper binary is available, using --policy-file for the policy. const helperPath = findHelper(); const useHelper = helperPath !== undefined; - // Build enforcement metadata + let policyTmpPath: string | undefined; + let innerCmd: string; + + if (useHelper) { + // Write policy JSON to a temp file (mode 0600, as required by policy_read_file). + policyTmpPath = join( + tmpdir(), + `safeclaw-policy-${process.pid.toString()}-${Date.now().toString()}.json`, + ); + writeFileSync( + policyTmpPath, + JSON.stringify({ + filesystem: this.policy.filesystem, + syscalls: this.policy.syscalls, + }), + { mode: 0o600 }, + ); + innerCmd = [ + helperPath, + "--policy-file", + policyTmpPath, + "--", + command, + ...args, + ] + .map(shEscape) + .join(" "); + } else { + innerCmd = [command, ...args].map(shEscape).join(" "); + } + + // Translate SafeClaw policy to sandbox-runtime config. When helper is + // present and in a non-system path, add its directory to allowWrite so + // bwrap bind-mounts it into the container. + const rtConfig = PolicyBuilder.toRuntimeConfig(this.policy); + if (useHelper && helperPath !== undefined) { + const helperDir = helperPath.substring(0, helperPath.lastIndexOf("/")); + const systemPaths = ["/bin", "/usr/bin", "/usr/local/bin", "/sbin", "/usr/sbin"]; + if (!systemPaths.includes(helperDir)) { + rtConfig.filesystem.allowWrite = [ + ...rtConfig.filesystem.allowWrite, + helperDir, + ]; + } + } + + // Wrap via sandbox-runtime (bwrap on Linux, sandbox-exec on macOS) + const wrappedCmd = await SandboxManager.wrapWithSandbox( + innerCmd, + undefined, + rtConfig, + ); + + const isLinux = process.platform === "linux"; const enforcement: EnforcementLayers = { - namespaces: useUnshare, + namespaces: isLinux, + pivotRoot: isLinux, + bindMounts: true, landlock: useHelper, - seccomp: useHelper, + seccomp: isLinux, // sandbox-runtime applies seccomp for unix socket blocking on Linux capDrop: useHelper, }; - // Build spawn command and args based on available isolation - let spawnCmd: string; - let spawnArgs: string[]; - let stdio: ("ignore" | "pipe")[]; - - if (useUnshare && helperPath !== undefined) { - spawnCmd = "unshare"; - spawnArgs = [...unshareFlags, "--", helperPath, "--", command, ...args]; - stdio = ["ignore", "pipe", "pipe", "pipe"]; - } else if (useUnshare) { - spawnCmd = "unshare"; - spawnArgs = [...unshareFlags, "--", command, ...args]; - stdio = ["ignore", "pipe", "pipe"]; - } else if (helperPath !== undefined) { - spawnCmd = helperPath; - spawnArgs = ["--", command, ...args]; - stdio = ["ignore", "pipe", "pipe", "pipe"]; - } else { - spawnCmd = command; - spawnArgs = args; - stdio = ["ignore", "pipe", "pipe"]; - } - return new Promise((resolve) => { const stdoutChunks: Buffer[] = []; const stderrChunks: Buffer[] = []; let killed = false; let killReason: "timeout" | "oom" | "signal" | undefined; - const proc = spawn(spawnCmd, spawnArgs, { - stdio, + const proc = spawn("/bin/sh", ["-c", wrappedCmd], { + stdio: ["ignore", "pipe", "pipe"], detached: true, }); - // Write policy JSON to fd 3 when using helper - if (useHelper) { - const fd3 = proc.stdio[3] as Writable; - fd3.on("error", () => { - // Ignored: the child may exit before reading fd 3 - }); - const policyJson = JSON.stringify({ - filesystem: this.policy.filesystem, - syscalls: this.policy.syscalls, - }); - fd3.end(policyJson); - } - const timer = setTimeout(() => { killed = true; killReason = "timeout"; - // Kill entire process group (unshare + forked children) if (proc.pid !== undefined) { try { process.kill(-proc.pid, "SIGKILL"); @@ -103,6 +129,11 @@ export class Sandbox { proc.on("close", (code: number | null) => { clearTimeout(timer); + if (policyTmpPath !== undefined) { + try { rmSync(policyTmpPath, { force: true }); } catch { /* ignore */ } + } + // Clean up bwrap leftover mount points (no-op on macOS) + SandboxManager.cleanupAfterCommand(); resolve({ exitCode: code ?? 1, stdout: Buffer.concat(stdoutChunks).toString(), @@ -116,6 +147,10 @@ export class Sandbox { proc.on("error", (err: Error) => { clearTimeout(timer); + if (policyTmpPath !== undefined) { + try { rmSync(policyTmpPath, { force: true }); } catch { /* ignore */ } + } + SandboxManager.cleanupAfterCommand(); resolve({ exitCode: 1, stdout: "", @@ -131,16 +166,4 @@ export class Sandbox { getPolicy(): SandboxPolicy { return structuredClone(this.policy); } - - private buildUnshareFlags(): string[] { - const flags: string[] = []; - const ns = this.policy.namespaces; - - if (ns.pid) flags.push("--pid", "--fork"); - if (ns.net) flags.push("--net"); - if (ns.mnt) flags.push("--mount"); - if (ns.user) flags.push("--user", "--map-root-user"); - - return flags; - } } diff --git a/packages/sandbox/src/types.test.ts b/packages/sandbox/src/types.test.ts new file mode 100644 index 0000000..b796e07 --- /dev/null +++ b/packages/sandbox/src/types.test.ts @@ -0,0 +1,42 @@ +import { describe, it, expectTypeOf } from "vitest"; +import type { NetworkPolicy, EnforcementLayers, KernelCapabilities } from "./types.js"; + +describe("NetworkPolicy", () => { + it("accepts 'none'", () => { + const n: NetworkPolicy = "none"; + expectTypeOf(n).toMatchTypeOf(); + }); + + it("accepts domain allowlist object", () => { + const n: NetworkPolicy = { allowedDomains: ["github.com", "*.npmjs.org"] }; + expectTypeOf(n).toMatchTypeOf(); + }); + + it("accepts domain allowlist with deniedDomains", () => { + const n: NetworkPolicy = { allowedDomains: [], deniedDomains: ["evil.com"] }; + expectTypeOf(n).toMatchTypeOf(); + }); +}); + +describe("EnforcementLayers", () => { + it("has pivotRoot and bindMounts fields", () => { + const e: EnforcementLayers = { + namespaces: true, pivotRoot: true, bindMounts: true, + landlock: false, seccomp: false, capDrop: false, + }; + expectTypeOf(e.pivotRoot).toBeBoolean(); + expectTypeOf(e.bindMounts).toBeBoolean(); + }); +}); + +describe("KernelCapabilities", () => { + it("has bwrap field", () => { + const k: KernelCapabilities = { + landlock: { supported: true, abiVersion: 3 }, + seccomp: { supported: true }, + namespaces: { user: true, pid: true, net: true, mnt: true }, + bwrap: { available: true, path: "/usr/bin/bwrap", version: "0.9.0" }, + }; + expectTypeOf(k.bwrap.available).toBeBoolean(); + }); +}); diff --git a/packages/sandbox/src/types.ts b/packages/sandbox/src/types.ts index fe737f5..e4b4695 100644 --- a/packages/sandbox/src/types.ts +++ b/packages/sandbox/src/types.ts @@ -4,11 +4,22 @@ export interface PathRule { access: "read" | "write" | "readwrite" | "execute" | "readwriteexecute"; } +/** + * Network policy for a sandbox execution. + * - "none": block all outbound network (net namespace, no proxy) + * - "localhost": allow only loopback + * - object: route through sandbox-runtime proxy with domain allowlist/denylist + */ +export type NetworkPolicy = + | "none" + | "localhost" + | { allowedDomains: string[]; deniedDomains?: string[] }; + /** Sandbox policy — defines isolation constraints for a single execution */ export interface SandboxPolicy { filesystem: { allow: PathRule[]; deny: PathRule[] }; syscalls: { allow: string[]; defaultDeny: true }; - network: "none" | "localhost" | "filtered"; + network: NetworkPolicy; namespaces: { pid: boolean; net: boolean; mnt: boolean; user: boolean }; timeoutMs?: number | undefined; } @@ -16,6 +27,8 @@ export interface SandboxPolicy { /** Which enforcement layers were active during execution */ export interface EnforcementLayers { namespaces: boolean; + pivotRoot: boolean; // bwrap pivot_root was used + bindMounts: boolean; // bwrap bind-mount FS isolation was active landlock: boolean; seccomp: boolean; capDrop: boolean; @@ -37,6 +50,7 @@ export interface KernelCapabilities { landlock: { supported: boolean; abiVersion: number }; seccomp: { supported: boolean }; namespaces: { user: boolean; pid: boolean; net: boolean; mnt: boolean }; + bwrap: { available: boolean; path: string | undefined; version: string | undefined }; } /** Default sandbox policy — maximum restriction */ diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 1fac622..8699b08 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -64,7 +64,11 @@ importers: specifier: ^8.18.1 version: 8.18.1 - packages/sandbox: {} + packages/sandbox: + dependencies: + '@anthropic-ai/sandbox-runtime': + specifier: git+ssh://git@github.com/anthropic-experimental/sandbox-runtime.git#20f5176a94314038695bee13779eb9eebbbaeb49 + version: https://codeload.github.com/anthropic-experimental/sandbox-runtime/tar.gz/20f5176a94314038695bee13779eb9eebbbaeb49 packages/vault: {} @@ -76,6 +80,12 @@ importers: packages: + '@anthropic-ai/sandbox-runtime@https://codeload.github.com/anthropic-experimental/sandbox-runtime/tar.gz/20f5176a94314038695bee13779eb9eebbbaeb49': + resolution: {tarball: https://codeload.github.com/anthropic-experimental/sandbox-runtime/tar.gz/20f5176a94314038695bee13779eb9eebbbaeb49} + version: 0.0.42 + engines: {node: '>=18.0.0'} + hasBin: true + '@babel/helper-string-parser@7.27.1': resolution: {integrity: sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==} engines: {node: '>=6.9.0'} @@ -384,6 +394,9 @@ packages: cpu: [x64] os: [win32] + '@pondwader/socks5-server@1.0.10': + resolution: {integrity: sha512-bQY06wzzR8D2+vVCUoBsr5QS2U6UgPUQRmErNwtsuI6vLcyRKkafjkr3KxbtGFf9aBBIV2mcvlsKD1UYaIV+sg==} + '@rollup/rollup-android-arm-eabi@4.59.0': resolution: {integrity: sha512-upnNBkA6ZH2VKGcBj9Fyl9IGNPULcjXRlg0LLeaioQWueH30p6IXtJEbKAgvyv+mJaMxSm1l6xwDXYjpEMiLMg==} cpu: [arm] @@ -534,6 +547,12 @@ packages: '@types/estree@1.0.8': resolution: {integrity: sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==} + '@types/lodash-es@4.17.12': + resolution: {integrity: sha512-0NgftHUcV4v34VhXm8QBSftKVXtbkBG3ViCjs6+eJ5a6y6Mi/jiFGPc1sC7QK+9BFhWrURE3EOggmWaSxL9OzQ==} + + '@types/lodash@4.17.24': + resolution: {integrity: sha512-gIW7lQLZbue7lRSWEFql49QJJWThrTFFeIMJdp3eH4tKoxm1OvEPg02rm4wCCSHS0cL3/Fizimb35b7k8atwsQ==} + '@types/node@22.19.13': resolution: {integrity: sha512-akNQMv0wW5uyRpD2v2IEyRSZiR+BeGuoB6L310EgGObO44HSMNT8z1xzio28V8qOrgYaopIDNA18YgdXd+qTiw==} @@ -589,6 +608,10 @@ packages: resolution: {integrity: sha512-NUPRluOfOiTKBKvWPtSD4PhFvWCqOi0BGStNWs57X9js7XGTprSmFoz5F0tWhR4WPjNeR9jXqdC7/UpSJTnlRg==} engines: {node: '>=18'} + commander@12.1.0: + resolution: {integrity: sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==} + engines: {node: '>=18'} + es-module-lexer@1.7.0: resolution: {integrity: sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==} @@ -640,6 +663,9 @@ packages: js-tokens@10.0.0: resolution: {integrity: sha512-lM/UBzQmfJRo9ABXbPWemivdCW8V2G8FHaHdypQaIy523snUjog0W71ayWXTjiR+ixeMyVHN2XcpnTd/liPg/Q==} + lodash-es@4.17.23: + resolution: {integrity: sha512-kVI48u3PZr38HdYz98UmfPnXl2DXrpdctLrFLCd3kOx1xUkOmpFPx7gCWWM5MPkL/fD8zb+Ph0QzjGFs4+hHWg==} + magic-string@0.30.21: resolution: {integrity: sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==} @@ -692,6 +718,10 @@ packages: engines: {node: '>=10'} hasBin: true + shell-quote@1.8.3: + resolution: {integrity: sha512-ObmnIF4hXNg1BqhnHmgbDETF8dLPCggZWBjkQfhZpbszZnYur5DUljTcCHii5LC3J5E0yeO/1LIMyH+UvHQgyw==} + engines: {node: '>= 0.4'} + siginfo@2.0.0: resolution: {integrity: sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==} @@ -823,8 +853,20 @@ packages: utf-8-validate: optional: true + zod@3.25.76: + resolution: {integrity: sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==} + snapshots: + '@anthropic-ai/sandbox-runtime@https://codeload.github.com/anthropic-experimental/sandbox-runtime/tar.gz/20f5176a94314038695bee13779eb9eebbbaeb49': + dependencies: + '@pondwader/socks5-server': 1.0.10 + '@types/lodash-es': 4.17.12 + commander: 12.1.0 + lodash-es: 4.17.23 + shell-quote: 1.8.3 + zod: 3.25.76 + '@babel/helper-string-parser@7.27.1': {} '@babel/helper-validator-identifier@7.28.5': {} @@ -983,6 +1025,8 @@ snapshots: '@oxlint/binding-win32-x64-msvc@1.50.0': optional: true + '@pondwader/socks5-server@1.0.10': {} + '@rollup/rollup-android-arm-eabi@4.59.0': optional: true @@ -1069,6 +1113,12 @@ snapshots: '@types/estree@1.0.8': {} + '@types/lodash-es@4.17.12': + dependencies: + '@types/lodash': 4.17.24 + + '@types/lodash@4.17.24': {} + '@types/node@22.19.13': dependencies: undici-types: 6.21.0 @@ -1140,6 +1190,8 @@ snapshots: chai@6.2.2: {} + commander@12.1.0: {} + es-module-lexer@1.7.0: {} esbuild@0.27.3: @@ -1203,6 +1255,8 @@ snapshots: js-tokens@10.0.0: {} + lodash-es@4.17.23: {} + magic-string@0.30.21: dependencies: '@jridgewell/sourcemap-codec': 1.5.5 @@ -1288,6 +1342,8 @@ snapshots: semver@7.7.4: {} + shell-quote@1.8.3: {} + siginfo@2.0.0: {} source-map-js@1.2.1: {} @@ -1370,3 +1426,5 @@ snapshots: stackback: 0.0.2 ws@8.19.0: {} + + zod@3.25.76: {} diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 167898f..f452b23 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -1,4 +1,7 @@ packages: - packages/* -onlyBuiltDependencies: '["esbuild", "@oxlint/binding-linux-x64-gnu"]' +onlyBuiltDependencies: + - esbuild + - "@oxlint/binding-linux-x64-gnu" + - "@anthropic-ai/sandbox-runtime" diff --git a/test/security/sandbox-escape.test.ts b/test/security/sandbox-escape.test.ts index bc4608f..1876c42 100644 --- a/test/security/sandbox-escape.test.ts +++ b/test/security/sandbox-escape.test.ts @@ -22,12 +22,14 @@ const UNSUPPORTED_CAPS: KernelCapabilities = { landlock: { supported: false, abiVersion: 0 }, seccomp: { supported: false }, namespaces: { user: false, pid: false, net: false, mnt: false }, + bwrap: { available: false, path: undefined, version: undefined }, }; const FULL_CAPS: KernelCapabilities = { landlock: { supported: true, abiVersion: 3 }, seccomp: { supported: true }, namespaces: { user: true, pid: true, net: true, mnt: true }, + bwrap: { available: true, path: "/usr/bin/bwrap", version: "0.9.0" }, }; const mockAssert = vi.fn<() => KernelCapabilities>(); @@ -45,6 +47,14 @@ vi.mock("../../packages/sandbox/src/helper.js", () => ({ findHelper: () => undefined, })); +vi.mock("@anthropic-ai/sandbox-runtime", () => ({ + SandboxManager: { + isSandboxingEnabled: () => true, + wrapWithSandbox: async (cmd: string) => cmd, + cleanupAfterCommand: () => undefined, + }, +})); + const { Sandbox } = await import("@safeclaw/sandbox"); describe("Sandbox escape prevention", () => { @@ -114,7 +124,7 @@ describe("Sandbox escape prevention", () => { const sandbox = new Sandbox(DEFAULT_POLICY); const policy = sandbox.getPolicy(); - (policy as SandboxPolicy).network = "filtered"; + (policy as SandboxPolicy).network = "localhost"; const policyAgain = sandbox.getPolicy(); expect(policyAgain.network).toBe("none"); diff --git a/vitest.config.ts b/vitest.config.ts index d949c14..3f330cf 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -9,6 +9,13 @@ export default defineConfig({ "@safeclaw/vault": resolve(__dirname, "packages/vault/src/index.ts"), "@safeclaw/gateway": resolve(__dirname, "packages/gateway/src/index.ts"), "@safeclaw/cli": resolve(__dirname, "packages/cli/src/index.ts"), + // sandbox-runtime lives in packages/sandbox/node_modules (not root), so we + // pin it to a single resolved path so vi.mock("@anthropic-ai/sandbox-runtime") + // works from any test file in the workspace. + "@anthropic-ai/sandbox-runtime": resolve( + __dirname, + "packages/sandbox/node_modules/@anthropic-ai/sandbox-runtime/dist/index.js", + ), }, }, test: {