From 9e92639f3eb3d2d29a6ece86a170b4fac1353b98 Mon Sep 17 00:00:00 2001 From: Arne Brune Olsen Date: Fri, 20 Mar 2026 09:48:11 +0100 Subject: [PATCH 01/13] chore(sandbox): add @anthropic-ai/sandbox-runtime git dependency --- packages/sandbox/package.json | 3 ++ pnpm-lock.yaml | 60 ++++++++++++++++++++++++++++++++++- pnpm-workspace.yaml | 5 ++- 3 files changed, 66 insertions(+), 2 deletions(-) diff --git a/packages/sandbox/package.json b/packages/sandbox/package.json index 0de5762..18a5a5e 100644 --- a/packages/sandbox/package.json +++ b/packages/sandbox/package.json @@ -8,5 +8,8 @@ "scripts": { "build": "tsc -p tsconfig.json" }, + "dependencies": { + "@anthropic-ai/sandbox-runtime": "git+ssh://git@github.com/anthropic-experimental/sandbox-runtime.git#20f5176a94314038695bee13779eb9eebbbaeb49" + }, "files": ["dist"] } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 1fac622..8699b08 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -64,7 +64,11 @@ importers: specifier: ^8.18.1 version: 8.18.1 - packages/sandbox: {} + packages/sandbox: + dependencies: + '@anthropic-ai/sandbox-runtime': + specifier: git+ssh://git@github.com/anthropic-experimental/sandbox-runtime.git#20f5176a94314038695bee13779eb9eebbbaeb49 + version: https://codeload.github.com/anthropic-experimental/sandbox-runtime/tar.gz/20f5176a94314038695bee13779eb9eebbbaeb49 packages/vault: {} @@ -76,6 +80,12 @@ importers: packages: + '@anthropic-ai/sandbox-runtime@https://codeload.github.com/anthropic-experimental/sandbox-runtime/tar.gz/20f5176a94314038695bee13779eb9eebbbaeb49': + resolution: {tarball: https://codeload.github.com/anthropic-experimental/sandbox-runtime/tar.gz/20f5176a94314038695bee13779eb9eebbbaeb49} + version: 0.0.42 + engines: {node: '>=18.0.0'} + hasBin: true + '@babel/helper-string-parser@7.27.1': resolution: {integrity: sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==} engines: {node: '>=6.9.0'} @@ -384,6 +394,9 @@ packages: cpu: [x64] os: [win32] + '@pondwader/socks5-server@1.0.10': + resolution: {integrity: sha512-bQY06wzzR8D2+vVCUoBsr5QS2U6UgPUQRmErNwtsuI6vLcyRKkafjkr3KxbtGFf9aBBIV2mcvlsKD1UYaIV+sg==} + '@rollup/rollup-android-arm-eabi@4.59.0': resolution: {integrity: sha512-upnNBkA6ZH2VKGcBj9Fyl9IGNPULcjXRlg0LLeaioQWueH30p6IXtJEbKAgvyv+mJaMxSm1l6xwDXYjpEMiLMg==} cpu: [arm] @@ -534,6 +547,12 @@ packages: '@types/estree@1.0.8': resolution: {integrity: sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==} + '@types/lodash-es@4.17.12': + resolution: {integrity: sha512-0NgftHUcV4v34VhXm8QBSftKVXtbkBG3ViCjs6+eJ5a6y6Mi/jiFGPc1sC7QK+9BFhWrURE3EOggmWaSxL9OzQ==} + + '@types/lodash@4.17.24': + resolution: {integrity: sha512-gIW7lQLZbue7lRSWEFql49QJJWThrTFFeIMJdp3eH4tKoxm1OvEPg02rm4wCCSHS0cL3/Fizimb35b7k8atwsQ==} + '@types/node@22.19.13': resolution: {integrity: sha512-akNQMv0wW5uyRpD2v2IEyRSZiR+BeGuoB6L310EgGObO44HSMNT8z1xzio28V8qOrgYaopIDNA18YgdXd+qTiw==} @@ -589,6 +608,10 @@ packages: resolution: {integrity: sha512-NUPRluOfOiTKBKvWPtSD4PhFvWCqOi0BGStNWs57X9js7XGTprSmFoz5F0tWhR4WPjNeR9jXqdC7/UpSJTnlRg==} engines: {node: '>=18'} + commander@12.1.0: + resolution: {integrity: sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==} + engines: {node: '>=18'} + es-module-lexer@1.7.0: resolution: {integrity: sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==} @@ -640,6 +663,9 @@ packages: js-tokens@10.0.0: resolution: {integrity: sha512-lM/UBzQmfJRo9ABXbPWemivdCW8V2G8FHaHdypQaIy523snUjog0W71ayWXTjiR+ixeMyVHN2XcpnTd/liPg/Q==} + lodash-es@4.17.23: + resolution: {integrity: sha512-kVI48u3PZr38HdYz98UmfPnXl2DXrpdctLrFLCd3kOx1xUkOmpFPx7gCWWM5MPkL/fD8zb+Ph0QzjGFs4+hHWg==} + magic-string@0.30.21: resolution: {integrity: sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==} @@ -692,6 +718,10 @@ packages: engines: {node: '>=10'} hasBin: true + shell-quote@1.8.3: + resolution: {integrity: sha512-ObmnIF4hXNg1BqhnHmgbDETF8dLPCggZWBjkQfhZpbszZnYur5DUljTcCHii5LC3J5E0yeO/1LIMyH+UvHQgyw==} + engines: {node: '>= 0.4'} + siginfo@2.0.0: resolution: {integrity: sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==} @@ -823,8 +853,20 @@ packages: utf-8-validate: optional: true + zod@3.25.76: + resolution: {integrity: sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==} + snapshots: + '@anthropic-ai/sandbox-runtime@https://codeload.github.com/anthropic-experimental/sandbox-runtime/tar.gz/20f5176a94314038695bee13779eb9eebbbaeb49': + dependencies: + '@pondwader/socks5-server': 1.0.10 + '@types/lodash-es': 4.17.12 + commander: 12.1.0 + lodash-es: 4.17.23 + shell-quote: 1.8.3 + zod: 3.25.76 + '@babel/helper-string-parser@7.27.1': {} '@babel/helper-validator-identifier@7.28.5': {} @@ -983,6 +1025,8 @@ snapshots: '@oxlint/binding-win32-x64-msvc@1.50.0': optional: true + '@pondwader/socks5-server@1.0.10': {} + '@rollup/rollup-android-arm-eabi@4.59.0': optional: true @@ -1069,6 +1113,12 @@ snapshots: '@types/estree@1.0.8': {} + '@types/lodash-es@4.17.12': + dependencies: + '@types/lodash': 4.17.24 + + '@types/lodash@4.17.24': {} + '@types/node@22.19.13': dependencies: undici-types: 6.21.0 @@ -1140,6 +1190,8 @@ snapshots: chai@6.2.2: {} + commander@12.1.0: {} + es-module-lexer@1.7.0: {} esbuild@0.27.3: @@ -1203,6 +1255,8 @@ snapshots: js-tokens@10.0.0: {} + lodash-es@4.17.23: {} + magic-string@0.30.21: dependencies: '@jridgewell/sourcemap-codec': 1.5.5 @@ -1288,6 +1342,8 @@ snapshots: semver@7.7.4: {} + shell-quote@1.8.3: {} + siginfo@2.0.0: {} source-map-js@1.2.1: {} @@ -1370,3 +1426,5 @@ snapshots: stackback: 0.0.2 ws@8.19.0: {} + + zod@3.25.76: {} diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 167898f..f452b23 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -1,4 +1,7 @@ packages: - packages/* -onlyBuiltDependencies: '["esbuild", "@oxlint/binding-linux-x64-gnu"]' +onlyBuiltDependencies: + - esbuild + - "@oxlint/binding-linux-x64-gnu" + - "@anthropic-ai/sandbox-runtime" From eff4215a1554084930fbadb28f757601b9d73135 Mon Sep 17 00:00:00 2001 From: Arne Brune Olsen Date: Fri, 20 Mar 2026 09:54:15 +0100 Subject: [PATCH 02/13] feat(sandbox): extend NetworkPolicy type and EnforcementLayers/KernelCapabilities --- packages/sandbox/src/index.ts | 1 + packages/sandbox/src/types.test.ts | 42 ++++++++++++++++++++++++++++++ packages/sandbox/src/types.ts | 16 +++++++++++- 3 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 packages/sandbox/src/types.test.ts diff --git a/packages/sandbox/src/index.ts b/packages/sandbox/src/index.ts index 26e1743..5d65375 100644 --- a/packages/sandbox/src/index.ts +++ b/packages/sandbox/src/index.ts @@ -5,6 +5,7 @@ export { } from "./detect.js"; export { DEFAULT_POLICY } from "./types.js"; export type { + NetworkPolicy, SandboxPolicy, SandboxResult, PathRule, diff --git a/packages/sandbox/src/types.test.ts b/packages/sandbox/src/types.test.ts new file mode 100644 index 0000000..8ac37c9 --- /dev/null +++ b/packages/sandbox/src/types.test.ts @@ -0,0 +1,42 @@ +import { describe, it, expectTypeOf } from "vitest"; +import type { NetworkPolicy, SandboxPolicy, EnforcementLayers, KernelCapabilities } from "./types.js"; + +describe("NetworkPolicy", () => { + it("accepts 'none'", () => { + const n: NetworkPolicy = "none"; + expectTypeOf(n).toMatchTypeOf(); + }); + + it("accepts domain allowlist object", () => { + const n: NetworkPolicy = { allowedDomains: ["github.com", "*.npmjs.org"] }; + expectTypeOf(n).toMatchTypeOf(); + }); + + it("accepts domain allowlist with deniedDomains", () => { + const n: NetworkPolicy = { allowedDomains: [], deniedDomains: ["evil.com"] }; + expectTypeOf(n).toMatchTypeOf(); + }); +}); + +describe("EnforcementLayers", () => { + it("has pivotRoot and bindMounts fields", () => { + const e: EnforcementLayers = { + namespaces: true, pivotRoot: true, bindMounts: true, + landlock: false, seccomp: false, capDrop: false, + }; + expectTypeOf(e.pivotRoot).toBeBoolean(); + expectTypeOf(e.bindMounts).toBeBoolean(); + }); +}); + +describe("KernelCapabilities", () => { + it("has bwrap field", () => { + const k: KernelCapabilities = { + landlock: { supported: true, abiVersion: 3 }, + seccomp: { supported: true }, + namespaces: { user: true, pid: true, net: true, mnt: true }, + bwrap: { available: true, path: "/usr/bin/bwrap", version: "0.9.0" }, + }; + expectTypeOf(k.bwrap.available).toBeBoolean(); + }); +}); diff --git a/packages/sandbox/src/types.ts b/packages/sandbox/src/types.ts index fe737f5..e4b4695 100644 --- a/packages/sandbox/src/types.ts +++ b/packages/sandbox/src/types.ts @@ -4,11 +4,22 @@ export interface PathRule { access: "read" | "write" | "readwrite" | "execute" | "readwriteexecute"; } +/** + * Network policy for a sandbox execution. + * - "none": block all outbound network (net namespace, no proxy) + * - "localhost": allow only loopback + * - object: route through sandbox-runtime proxy with domain allowlist/denylist + */ +export type NetworkPolicy = + | "none" + | "localhost" + | { allowedDomains: string[]; deniedDomains?: string[] }; + /** Sandbox policy — defines isolation constraints for a single execution */ export interface SandboxPolicy { filesystem: { allow: PathRule[]; deny: PathRule[] }; syscalls: { allow: string[]; defaultDeny: true }; - network: "none" | "localhost" | "filtered"; + network: NetworkPolicy; namespaces: { pid: boolean; net: boolean; mnt: boolean; user: boolean }; timeoutMs?: number | undefined; } @@ -16,6 +27,8 @@ export interface SandboxPolicy { /** Which enforcement layers were active during execution */ export interface EnforcementLayers { namespaces: boolean; + pivotRoot: boolean; // bwrap pivot_root was used + bindMounts: boolean; // bwrap bind-mount FS isolation was active landlock: boolean; seccomp: boolean; capDrop: boolean; @@ -37,6 +50,7 @@ export interface KernelCapabilities { landlock: { supported: boolean; abiVersion: number }; seccomp: { supported: boolean }; namespaces: { user: boolean; pid: boolean; net: boolean; mnt: boolean }; + bwrap: { available: boolean; path: string | undefined; version: string | undefined }; } /** Default sandbox policy — maximum restriction */ From 00753c831cbc8730eb1992e1c0f643701d1ea158 Mon Sep 17 00:00:00 2001 From: Arne Brune Olsen Date: Fri, 20 Mar 2026 09:59:05 +0100 Subject: [PATCH 03/13] feat(sandbox): add PolicyBuilder.toRuntimeConfig() translating to SandboxRuntimeConfig --- packages/sandbox/src/policy-builder.test.ts | 62 +++++++++++++++ packages/sandbox/src/policy-builder.ts | 83 ++++++++++++++++++++- packages/sandbox/src/types.test.ts | 2 +- 3 files changed, 144 insertions(+), 3 deletions(-) diff --git a/packages/sandbox/src/policy-builder.test.ts b/packages/sandbox/src/policy-builder.test.ts index 7825dab..d412a93 100644 --- a/packages/sandbox/src/policy-builder.test.ts +++ b/packages/sandbox/src/policy-builder.test.ts @@ -2,6 +2,8 @@ import { describe, it, expect, beforeEach } from "vitest"; import { homedir } from "node:os"; import { PolicyBuilder } from "./policy-builder.js"; import type { SandboxPolicy, PathRule } from "./types.js"; +import { DEFAULT_POLICY } from "./types.js"; +import type { SandboxRuntimeConfig } from "@anthropic-ai/sandbox-runtime"; describe("PolicyBuilder", () => { describe("addReadExecute()", () => { @@ -345,3 +347,63 @@ describe("PolicyBuilder", () => { }); }); }); + +describe("PolicyBuilder.toRuntimeConfig()", () => { + it("maps readwrite PathRules to allowWrite", () => { + const policy = new PolicyBuilder() + .addReadWrite("/project") + .addReadWrite("/tmp") + .build(); + const rtConfig: SandboxRuntimeConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.filesystem.allowWrite).toContain("/project"); + expect(rtConfig.filesystem.allowWrite).toContain("/tmp"); + }); + + it("maps readwriteexecute PathRules to allowWrite", () => { + const policy = new PolicyBuilder().addReadWriteExecute("/workspace").build(); + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.filesystem.allowWrite).toContain("/workspace"); + }); + + it("does not add read-only or execute-only paths to allowWrite", () => { + const policy = new PolicyBuilder() + .addReadOnly("/etc") + .addReadExecute("/usr/bin") + .build(); + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.filesystem.allowWrite).not.toContain("/etc"); + expect(rtConfig.filesystem.allowWrite).not.toContain("/usr/bin"); + }); + + it("adds sensitive home dirs to denyRead", () => { + const policy = new PolicyBuilder().build(); + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + const home = homedir(); + expect(rtConfig.filesystem.denyRead).toContain(`${home}/.ssh`); + expect(rtConfig.filesystem.denyRead).toContain(`${home}/.aws`); + expect(rtConfig.filesystem.denyRead).toContain(`${home}/.gnupg`); + }); + + it("maps network: 'none' to allowedDomains: []", () => { + const policy = { ...DEFAULT_POLICY, network: "none" as const }; + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.network.allowedDomains).toEqual([]); + }); + + it("maps network object to allowedDomains/deniedDomains", () => { + const policy: SandboxPolicy = { + ...DEFAULT_POLICY, + network: { allowedDomains: ["github.com", "*.npmjs.org"], deniedDomains: ["evil.com"] }, + }; + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.network.allowedDomains).toEqual(["github.com", "*.npmjs.org"]); + expect(rtConfig.network.deniedDomains).toEqual(["evil.com"]); + }); + + it("forDevelopment().toRuntimeConfig() includes cwd in allowWrite", () => { + const cwd = "/home/user/project"; + const policy = PolicyBuilder.forDevelopment(cwd); + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.filesystem.allowWrite).toContain(cwd); + }); +}); diff --git a/packages/sandbox/src/policy-builder.ts b/packages/sandbox/src/policy-builder.ts index c30c1a5..634365a 100644 --- a/packages/sandbox/src/policy-builder.ts +++ b/packages/sandbox/src/policy-builder.ts @@ -1,6 +1,7 @@ import { homedir } from "node:os"; import { dirname } from "node:path"; -import type { SandboxPolicy, PathRule } from "./types.js"; +import type { SandboxRuntimeConfig } from "@anthropic-ai/sandbox-runtime"; +import type { SandboxPolicy, PathRule, NetworkPolicy } from "./types.js"; /** Options for customizing the development policy */ export interface DevelopmentPolicyOptions { @@ -8,6 +9,14 @@ export interface DevelopmentPolicyOptions { extraExecutePaths?: string[]; /** Additional paths that need readwrite access (e.g. ~/.cache) */ extraReadWritePaths?: string[]; + /** Additional read-only paths */ + extraReadOnlyPaths?: string[]; + /** + * Network domains the sandboxed process may connect to. + * Default: [] (block all network). Use this to allow e.g. npm registry. + * Example: ["registry.npmjs.org", "*.github.com"] + */ + allowedNetworkDomains?: string[]; } /** @@ -64,6 +73,50 @@ export class PolicyBuilder { }; } + /** + * Translates a SafeClaw SandboxPolicy into a SandboxRuntimeConfig for + * @anthropic-ai/sandbox-runtime. + * + * Read model difference: SafeClaw uses an allowlist (Landlock); sandbox-runtime + * is permissive-by-default with an explicit denylist. We translate by denying + * the sensitive credential dirs that must never be readable. + * + * Write model: both use allowlists. PathRules with access "readwrite" or + * "readwriteexecute" map to filesystem.allowWrite. + */ + static toRuntimeConfig(policy: SandboxPolicy): SandboxRuntimeConfig { + // ── Filesystem ──────────────────────────────────────────────────── + const allowWrite = policy.filesystem.allow + .filter((r) => r.access === "readwrite" || r.access === "readwriteexecute") + .map((r) => r.path); + + // Always deny reads to credential/secret directories. + // sandbox-runtime also enforces mandatory deny on dangerous files (.bashrc, + // .git/hooks, etc.) regardless of this config — these are complementary. + const home = homedir(); + const denyRead = [ + `${home}/.ssh`, + `${home}/.aws`, + `${home}/.gnupg`, + `${home}/.kube`, + `${home}/.docker`, + `${home}/.gcloud`, + `${home}/.azure`, + ]; + + // ── Network ─────────────────────────────────────────────────────── + const network = buildNetworkConfig(policy.network); + + return { + filesystem: { + allowWrite, + denyWrite: [], + denyRead, + }, + network, + }; + } + /** * Creates a policy suitable for software development. * @@ -149,6 +202,11 @@ export class PolicyBuilder { builder.addReadWrite(p); } } + if (options?.extraReadOnlyPaths) { + for (const p of options.extraReadOnlyPaths) { + builder.addReadOnly(p); + } + } // ── Expanded syscall allowlist ─────────────────────────────────── // Includes all DEFAULT_POLICY syscalls plus what common dev tools need @@ -156,8 +214,29 @@ export class PolicyBuilder { builder.syscalls.push(sc); } - return builder.build(); + // ── Network ────────────────────────────────────────────────────── + const networkPolicy: NetworkPolicy = + options?.allowedNetworkDomains !== undefined + ? { allowedDomains: options.allowedNetworkDomains } + : "none"; + + return { ...builder.build(), network: networkPolicy }; + } +} + +function buildNetworkConfig( + network: NetworkPolicy, +): SandboxRuntimeConfig["network"] { + if (network === "none") { + return { allowedDomains: [], deniedDomains: [] }; + } + if (network === "localhost") { + return { allowedDomains: ["localhost"], deniedDomains: [] }; } + return { + allowedDomains: network.allowedDomains, + deniedDomains: network.deniedDomains ?? [], + }; } /** diff --git a/packages/sandbox/src/types.test.ts b/packages/sandbox/src/types.test.ts index 8ac37c9..b796e07 100644 --- a/packages/sandbox/src/types.test.ts +++ b/packages/sandbox/src/types.test.ts @@ -1,5 +1,5 @@ import { describe, it, expectTypeOf } from "vitest"; -import type { NetworkPolicy, SandboxPolicy, EnforcementLayers, KernelCapabilities } from "./types.js"; +import type { NetworkPolicy, EnforcementLayers, KernelCapabilities } from "./types.js"; describe("NetworkPolicy", () => { it("accepts 'none'", () => { From c25704107656955b0dcd4224d94a7477233fa03b Mon Sep 17 00:00:00 2001 From: Arne Brune Olsen Date: Fri, 20 Mar 2026 10:00:58 +0100 Subject: [PATCH 04/13] feat(sandbox): replace unshare detection with sandbox-runtime dependency checks --- packages/sandbox/src/detect.test.ts | 167 +++++++--------------------- packages/sandbox/src/detect.ts | 138 ++++++++++++++++------- 2 files changed, 136 insertions(+), 169 deletions(-) diff --git a/packages/sandbox/src/detect.test.ts b/packages/sandbox/src/detect.test.ts index 64b54c0..e2ef91a 100644 --- a/packages/sandbox/src/detect.test.ts +++ b/packages/sandbox/src/detect.test.ts @@ -1,150 +1,59 @@ import { describe, it, expect, vi, beforeEach } from "vitest"; -import * as fs from "node:fs"; -vi.mock("node:fs"); +const mockIsSupportedPlatform = vi.fn<() => boolean>(); +const mockCheckDeps = vi.fn<() => { errors: string[]; warnings: string[] }>(); +const mockWhichBwrap = vi.fn<() => string>(); -const mockedFs = vi.mocked(fs); +vi.mock("@anthropic-ai/sandbox-runtime", () => ({ + SandboxManager: { + isSupportedPlatform: mockIsSupportedPlatform, + checkDependencies: mockCheckDeps, + }, +})); -function mockKernel(release: string, status: string, nsFiles: string[]): void { - mockedFs.readFileSync.mockImplementation((path: fs.PathOrFileDescriptor) => { - if (path === "/proc/sys/kernel/osrelease") return release; - if (path === "/proc/self/status") return status; - throw new Error(`Unexpected readFileSync: ${String(path)}`); - }); - mockedFs.existsSync.mockImplementation((path: fs.PathLike) => - nsFiles.includes(String(path)), - ); -} - -// Import after mock setup -const { detectKernelCapabilities, assertSandboxSupported } = await import( - "./detect.js" -); - -describe("detectKernelCapabilities", () => { - beforeEach(() => { - vi.clearAllMocks(); - }); - - it("detects Landlock support on kernel 6.1.0 with ABI v2", () => { - mockKernel("6.1.0-generic\n", "Seccomp:\t2\n", [ - "/proc/self/ns/user", - "/proc/self/ns/pid", - "/proc/self/ns/net", - "/proc/self/ns/mnt", - ]); - - const caps = detectKernelCapabilities(); - expect(caps.landlock.supported).toBe(true); - expect(caps.landlock.abiVersion).toBe(2); - }); - - it("detects Landlock ABI v3 on kernel 6.2.0", () => { - mockKernel("6.2.0\n", "Seccomp:\t2\n", []); - - const caps = detectKernelCapabilities(); - expect(caps.landlock.supported).toBe(true); - expect(caps.landlock.abiVersion).toBe(3); - }); - - it("detects Landlock ABI v1 on kernel 5.13.0", () => { - mockKernel("5.13.0\n", "Seccomp:\t2\n", []); - - const caps = detectKernelCapabilities(); - expect(caps.landlock.supported).toBe(true); - expect(caps.landlock.abiVersion).toBe(1); - }); - - it("reports no Landlock support on kernel 5.12.0", () => { - mockKernel("5.12.0\n", "Seccomp:\t2\n", []); - - const caps = detectKernelCapabilities(); - expect(caps.landlock.supported).toBe(false); - expect(caps.landlock.abiVersion).toBe(0); - }); - - it("detects seccomp from /proc/self/status containing Seccomp: 2", () => { - mockKernel("6.1.0\n", "Name:\tnode\nSeccomp:\t2\nGroups:\t", []); - - const caps = detectKernelCapabilities(); - expect(caps.seccomp.supported).toBe(true); - }); - - it("detects seccomp mode 1 as supported", () => { - mockKernel("6.1.0\n", "Seccomp:\t1\n", []); - - const caps = detectKernelCapabilities(); - expect(caps.seccomp.supported).toBe(true); - }); +vi.mock("node:child_process", () => ({ + execFileSync: mockWhichBwrap, +})); - it("detects no seccomp when Seccomp: 0", () => { - mockKernel("6.1.0\n", "Seccomp:\t0\n", []); +const { detectKernelCapabilities, assertSandboxSupported } = await import("./detect.js"); - const caps = detectKernelCapabilities(); - expect(caps.seccomp.supported).toBe(false); - }); - - it("detects missing namespace support when /proc/self/ns/user does not exist", () => { - mockKernel("6.1.0\n", "Seccomp:\t2\n", [ - "/proc/self/ns/pid", - "/proc/self/ns/net", - "/proc/self/ns/mnt", - ]); +describe("detectKernelCapabilities()", () => { + beforeEach(() => vi.clearAllMocks()); + it("reports bwrap available when which bwrap succeeds", () => { + mockWhichBwrap.mockReturnValue("/usr/bin/bwrap"); const caps = detectKernelCapabilities(); - expect(caps.namespaces.user).toBe(false); - expect(caps.namespaces.pid).toBe(true); - expect(caps.namespaces.net).toBe(true); - expect(caps.namespaces.mnt).toBe(true); + expect(caps.bwrap.available).toBe(true); + expect(caps.bwrap.path).toBe("/usr/bin/bwrap"); }); - it("detects all namespaces present", () => { - mockKernel("6.1.0\n", "Seccomp:\t2\n", [ - "/proc/self/ns/user", - "/proc/self/ns/pid", - "/proc/self/ns/net", - "/proc/self/ns/mnt", - ]); - + it("reports bwrap unavailable when which bwrap fails", () => { + mockWhichBwrap.mockImplementation(() => { throw new Error("not found"); }); const caps = detectKernelCapabilities(); - expect(caps.namespaces.user).toBe(true); - expect(caps.namespaces.pid).toBe(true); - expect(caps.namespaces.net).toBe(true); - expect(caps.namespaces.mnt).toBe(true); + expect(caps.bwrap.available).toBe(false); + expect(caps.bwrap.path).toBeUndefined(); }); }); -describe("assertSandboxSupported", () => { - beforeEach(() => { - vi.clearAllMocks(); - }); - - it("throws if kernel is 4.0.0 (no Landlock)", () => { - mockKernel("4.0.0\n", "Seccomp:\t0\n", []); +describe("assertSandboxSupported()", () => { + beforeEach(() => vi.clearAllMocks()); - expect(() => assertSandboxSupported()).toThrow( - /Missing kernel features.*Landlock/, - ); + it("does not throw when platform is supported and deps are OK", () => { + mockIsSupportedPlatform.mockReturnValue(true); + mockCheckDeps.mockReturnValue({ errors: [], warnings: [] }); + mockWhichBwrap.mockReturnValue("/usr/bin/bwrap"); + expect(() => assertSandboxSupported()).not.toThrow(); }); - it("throws listing all missing features", () => { - mockKernel("4.0.0\n", "Seccomp:\t0\n", ["/proc/self/ns/mnt"]); - - expect(() => assertSandboxSupported()).toThrow( - /Landlock.*seccomp-BPF.*User namespaces.*PID namespaces/, - ); + it("throws when platform is not supported", () => { + mockIsSupportedPlatform.mockReturnValue(false); + mockCheckDeps.mockReturnValue({ errors: [], warnings: [] }); + expect(() => assertSandboxSupported()).toThrow(/platform/i); }); - it("returns capabilities when all features present", () => { - mockKernel("6.1.0\n", "Seccomp:\t2\n", [ - "/proc/self/ns/user", - "/proc/self/ns/pid", - "/proc/self/ns/net", - "/proc/self/ns/mnt", - ]); - - const caps = assertSandboxSupported(); - expect(caps.landlock.supported).toBe(true); - expect(caps.seccomp.supported).toBe(true); + it("throws when sandbox-runtime deps are missing", () => { + mockIsSupportedPlatform.mockReturnValue(true); + mockCheckDeps.mockReturnValue({ errors: ["bubblewrap not found"], warnings: [] }); + expect(() => assertSandboxSupported()).toThrow(/bubblewrap not found/); }); }); diff --git a/packages/sandbox/src/detect.ts b/packages/sandbox/src/detect.ts index 8c80bbe..bb2685b 100644 --- a/packages/sandbox/src/detect.ts +++ b/packages/sandbox/src/detect.ts @@ -1,59 +1,117 @@ +import { execFileSync } from "node:child_process"; import { readFileSync, existsSync } from "node:fs"; +import { SandboxManager } from "@anthropic-ai/sandbox-runtime"; import type { KernelCapabilities } from "./types.js"; -const LANDLOCK_MIN_KERNEL: [number, number] = [5, 13]; - -function parseKernelVersion(release: string): [number, number] { - const parts = release.trim().split("."); - return [parseInt(parts[0] ?? "0", 10), parseInt(parts[1] ?? "0", 10)]; -} - -function kernelAtLeast(release: string, min: [number, number]): boolean { - const [major, minor] = parseKernelVersion(release); - return major > min[0] || (major === min[0] && minor >= min[1]); -} - +/** + * Probes system capabilities relevant to sandboxing. + * Returns KernelCapabilities with bwrap probe on Linux; on macOS the + * bwrap fields are always unavailable (macOS uses sandbox-exec instead). + */ export function detectKernelCapabilities(): KernelCapabilities { - const release = readFileSync("/proc/sys/kernel/osrelease", "utf8"); - const status = readFileSync("/proc/self/status", "utf8"); + let bwrapPath: string | undefined; + let bwrapVersion: string | undefined; - const landlockSupported = kernelAtLeast(release, LANDLOCK_MIN_KERNEL); - let landlockAbi = 0; - if (landlockSupported) { - const [major, minor] = parseKernelVersion(release); - if (major > 6 || (major === 6 && minor >= 2)) landlockAbi = 3; - else if (major > 5 || (major === 5 && minor >= 19)) landlockAbi = 2; - else landlockAbi = 1; + try { + bwrapPath = execFileSync("which", ["bwrap"], { encoding: "utf8" }).trim(); + try { + bwrapVersion = execFileSync("bwrap", ["--version"], { encoding: "utf8" }) + .trim() + .split("\n")[0]; + } catch { + // version flag not supported or bwrap not runnable — path is still valid + } + } catch { + // bwrap not on PATH } - const seccompSupported = /Seccomp:\s*[12]/.test(status); + // Landlock / seccomp / namespace detection is Linux-only; on macOS these + // are undefined/false since sandbox-runtime uses sandbox-exec there. + const isLinux = process.platform === "linux"; return { - landlock: { supported: landlockSupported, abiVersion: landlockAbi }, - seccomp: { supported: seccompSupported }, + landlock: { + supported: isLinux ? detectLandlock() : false, + abiVersion: isLinux ? detectLandlockAbi() : 0, + }, + seccomp: { supported: isLinux ? detectSeccomp() : false }, namespaces: { - user: existsSync("/proc/self/ns/user"), - pid: existsSync("/proc/self/ns/pid"), - net: existsSync("/proc/self/ns/net"), - mnt: existsSync("/proc/self/ns/mnt"), + user: isLinux ? existsSync("/proc/self/ns/user") : false, + pid: isLinux ? existsSync("/proc/self/ns/pid") : false, + net: isLinux ? existsSync("/proc/self/ns/net") : false, + mnt: isLinux ? existsSync("/proc/self/ns/mnt") : false, + }, + bwrap: { + available: bwrapPath !== undefined, + path: bwrapPath, + version: bwrapVersion, }, }; } +/** + * Throws a descriptive error if the current platform and dependencies + * do not support sandbox-runtime isolation. + */ export function assertSandboxSupported(): KernelCapabilities { - const caps = detectKernelCapabilities(); - const missing: string[] = []; - if (!caps.landlock.supported) - missing.push("Landlock (requires kernel >= 5.13)"); - if (!caps.seccomp.supported) missing.push("seccomp-BPF"); - if (!caps.namespaces.user) missing.push("User namespaces"); - if (!caps.namespaces.pid) missing.push("PID namespaces"); - - if (missing.length > 0) { + if (!SandboxManager.isSupportedPlatform()) { throw new Error( - `SafeClaw requires mandatory sandbox support. Missing kernel features: ${missing.join(", ")}. ` + - `SafeClaw v1 is Linux-only and requires a modern kernel (>= 5.13).`, + `SafeClaw sandbox is not supported on this platform (${process.platform}). ` + + `Supported: Linux (kernel ≥ 5.13, bubblewrap, socat, ripgrep) and macOS.`, ); } - return caps; + + const deps = SandboxManager.checkDependencies(); + if (deps.errors.length > 0) { + throw new Error( + `Sandbox dependencies missing: ${deps.errors.join(", ")}. ` + + `On Linux install: apt install bubblewrap socat ripgrep`, + ); + } + + return detectKernelCapabilities(); +} + +// ── Linux helpers ────────────────────────────────────────────────────── + +const LANDLOCK_MIN_KERNEL: [number, number] = [5, 13]; + +function parseKernelVersion(release: string): [number, number] { + const parts = release.trim().split("."); + return [parseInt(parts[0] ?? "0", 10), parseInt(parts[1] ?? "0", 10)]; +} + +function detectLandlock(): boolean { + try { + const release = readFileSync("/proc/sys/kernel/osrelease", "utf8"); + const [major, minor] = parseKernelVersion(release); + return ( + major > LANDLOCK_MIN_KERNEL[0] || + (major === LANDLOCK_MIN_KERNEL[0] && minor >= LANDLOCK_MIN_KERNEL[1]) + ); + } catch { + return false; + } +} + +function detectLandlockAbi(): number { + try { + const release = readFileSync("/proc/sys/kernel/osrelease", "utf8"); + const [major, minor] = parseKernelVersion(release); + if (major > 6 || (major === 6 && minor >= 2)) return 3; + if (major > 5 || (major === 5 && minor >= 19)) return 2; + if (major > 5 || (major === 5 && minor >= 13)) return 1; + return 0; + } catch { + return 0; + } +} + +function detectSeccomp(): boolean { + try { + const status = readFileSync("/proc/self/status", "utf8"); + return /Seccomp:\s*[12]/.test(status); + } catch { + return false; + } } From 19d12a9d3348926072a4b1352f8c6eb7d3b15223 Mon Sep 17 00:00:00 2001 From: Arne Brune Olsen Date: Fri, 20 Mar 2026 10:19:24 +0100 Subject: [PATCH 05/13] feat(sandbox): rewrite Sandbox.execute() to use SandboxManager.wrapWithSandbox() --- packages/sandbox/src/integration.test.ts | 24 ++- packages/sandbox/src/sandbox.test.ts | 207 +++++++---------------- packages/sandbox/src/sandbox.ts | 103 ++++------- test/security/sandbox-escape.test.ts | 12 +- vitest.config.ts | 7 + 5 files changed, 137 insertions(+), 216 deletions(-) diff --git a/packages/sandbox/src/integration.test.ts b/packages/sandbox/src/integration.test.ts index cc1f132..de12d4f 100644 --- a/packages/sandbox/src/integration.test.ts +++ b/packages/sandbox/src/integration.test.ts @@ -1,8 +1,9 @@ -import { describe, it, expect } from "vitest"; +import { describe, it, expect, beforeAll, afterAll } from "vitest"; import { execFileSync } from "node:child_process"; import { existsSync } from "node:fs"; import { join, dirname } from "node:path"; import { fileURLToPath } from "node:url"; +import { SandboxManager } from "@anthropic-ai/sandbox-runtime"; import { Sandbox } from "./sandbox.js"; import { DEFAULT_POLICY } from "./types.js"; import type { SandboxPolicy } from "./types.js"; @@ -68,6 +69,27 @@ const ECHO_POLICY: SandboxPolicy = { }; describe("Sandbox integration (real binary)", () => { + beforeAll(async () => { + // Initialize sandbox-runtime proxy infrastructure before any Sandbox is constructed. + // This mirrors what bootstrapAgent() does in production. + try { + await SandboxManager.initialize({ + filesystem: { allowWrite: [], denyWrite: [], denyRead: [] }, + network: { allowedDomains: [], deniedDomains: [] }, + }); + } catch { + // Initialization failure is non-fatal — filesystem isolation still applies. + } + }); + + afterAll(async () => { + try { + await SandboxManager.reset(); + } catch { + // ignore cleanup errors + } + }); + it.skipIf(!helperExists || !canUnshareUser)( "seccomp enforcement blocks missing syscalls with DEFAULT_POLICY", async () => { diff --git a/packages/sandbox/src/sandbox.test.ts b/packages/sandbox/src/sandbox.test.ts index 4a4b563..c182e4d 100644 --- a/packages/sandbox/src/sandbox.test.ts +++ b/packages/sandbox/src/sandbox.test.ts @@ -1,25 +1,13 @@ import { describe, it, expect, vi, beforeEach } from "vitest"; -import { execFileSync } from "node:child_process"; import { DEFAULT_POLICY } from "./types.js"; import type { KernelCapabilities } from "./types.js"; -/** - * Probe whether user namespaces work on this machine. - * GitHub Actions runners and some containers restrict unprivileged - * user namespaces, causing `unshare --user` to fail. - */ -let canUnshareUser = false; -try { - execFileSync("unshare", ["--user", "--map-root-user", "--", "/bin/true"], { - timeout: 3000, - }); - canUnshareUser = true; -} catch { - // user namespaces not available — skip dependent tests -} - +// Mock sandbox-runtime and helper before dynamic import const mockAssertSandboxSupported = vi.fn<() => KernelCapabilities>(); const mockFindHelper = vi.fn<() => string | undefined>(); +const mockWrapWithSandbox = vi.fn<(cmd: string) => Promise>(); +const mockIsSandboxingEnabled = vi.fn<() => boolean>(); +const mockCleanupAfterCommand = vi.fn<() => void>(); vi.mock("./detect.js", () => ({ assertSandboxSupported: mockAssertSandboxSupported, @@ -29,179 +17,104 @@ vi.mock("./helper.js", () => ({ findHelper: () => mockFindHelper(), })); +vi.mock("@anthropic-ai/sandbox-runtime", () => ({ + SandboxManager: { + isSandboxingEnabled: mockIsSandboxingEnabled, + wrapWithSandbox: mockWrapWithSandbox, + cleanupAfterCommand: mockCleanupAfterCommand, + }, +})); + const { Sandbox } = await import("./sandbox.js"); const FULL_CAPS: KernelCapabilities = { landlock: { supported: true, abiVersion: 3 }, seccomp: { supported: true }, namespaces: { user: true, pid: true, net: true, mnt: true }, + bwrap: { available: true, path: "/usr/bin/bwrap", version: "0.9.0" }, }; describe("Sandbox", () => { beforeEach(() => { vi.clearAllMocks(); + mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); + mockIsSandboxingEnabled.mockReturnValue(true); mockFindHelper.mockReturnValue(undefined); }); it("constructor calls assertSandboxSupported", () => { - mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); - new Sandbox(DEFAULT_POLICY); - expect(mockAssertSandboxSupported).toHaveBeenCalledOnce(); }); - it("constructor throws if sandbox not supported", () => { - mockAssertSandboxSupported.mockImplementation(() => { - throw new Error("Missing kernel features: Landlock"); - }); - - expect(() => new Sandbox(DEFAULT_POLICY)).toThrow( - /Missing kernel features/, - ); + it("constructor throws if not initialized (isSandboxingEnabled returns false)", () => { + mockIsSandboxingEnabled.mockReturnValue(false); + expect(() => new Sandbox(DEFAULT_POLICY)).toThrow(/initialize/i); }); it("getPolicy returns a copy of the policy", () => { - mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); - const sandbox = new Sandbox(DEFAULT_POLICY); const policy = sandbox.getPolicy(); - expect(policy).toEqual(DEFAULT_POLICY); expect(policy).not.toBe(DEFAULT_POLICY); }); - }); describe("Sandbox.execute()", () => { beforeEach(() => { vi.clearAllMocks(); + mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); + mockIsSandboxingEnabled.mockReturnValue(true); mockFindHelper.mockReturnValue(undefined); }); - it.skipIf(!canUnshareUser)( - "runs a command and returns stdout", - async () => { - mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); - const sandbox = new Sandbox(DEFAULT_POLICY); - const result = await sandbox.execute("/bin/echo", ["hello"]); - expect(result.stdout).toContain("hello"); - expect(result.exitCode).toBe(0); - expect(result.killed).toBe(false); - }, - ); - - it.skipIf(!canUnshareUser)( - "returns non-zero exit code on failure", - async () => { - mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); - const sandbox = new Sandbox(DEFAULT_POLICY); - const result = await sandbox.execute("/bin/false", []); - expect(result.exitCode).not.toBe(0); - expect(result.killed).toBe(false); - }, - ); - - it.skipIf(!canUnshareUser)( - "kills process after timeout", - async () => { - mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); - const policy = { ...DEFAULT_POLICY, timeoutMs: 100 }; - const sandbox = new Sandbox(policy); - const result = await sandbox.execute("/bin/sleep", ["10"]); - expect(result.killed).toBe(true); - expect(result.killReason).toBe("timeout"); - }, - ); - - it.skipIf(!canUnshareUser)("captures stderr", async () => { - mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); + it("calls wrapWithSandbox with shell-quoted command", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/echo hello"); const sandbox = new Sandbox(DEFAULT_POLICY); - const result = await sandbox.execute("/bin/sh", ["-c", "echo error >&2"]); - expect(result.stderr).toContain("error"); + await sandbox.execute("/bin/echo", ["hello"]); + expect(mockWrapWithSandbox).toHaveBeenCalledOnce(); + const wrappedArg: string = mockWrapWithSandbox.mock.calls[0]![0]!; + expect(wrappedArg).toContain("echo"); + expect(wrappedArg).toContain("hello"); }); - it.skipIf(!canUnshareUser)("reports durationMs", async () => { - mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); + it("calls cleanupAfterCommand after execution", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/true"); const sandbox = new Sandbox(DEFAULT_POLICY); - const result = await sandbox.execute("/bin/true", []); - expect(result.durationMs).toBeGreaterThanOrEqual(0); + await sandbox.execute("/bin/true", []); + expect(mockCleanupAfterCommand).toHaveBeenCalledOnce(); }); - it.skipIf(!canUnshareUser)( - "mount namespace isolates filesystem changes from host", - async () => { - mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); - const policy = { - ...DEFAULT_POLICY, - namespaces: { pid: false, net: false, mnt: true, user: true }, - }; - const sandbox = new Sandbox(policy); - const result = await sandbox.execute("/bin/sh", [ - "-c", - "cat /proc/self/mounts | wc -l", - ]); - expect(result.exitCode).toBe(0); - expect(parseInt(result.stdout.trim(), 10)).toBeGreaterThan(0); - }, - ); - - it.skipIf(!canUnshareUser)( - "blocks network access in network namespace", - async () => { - mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); - const policy = { - ...DEFAULT_POLICY, - namespaces: { pid: false, net: true, mnt: false, user: true }, - }; - const sandbox = new Sandbox(policy); - const result = await sandbox.execute("/bin/sh", [ - "-c", - "ip link show 2>/dev/null | grep -oP '(?<=: )\\w+(?=:)' | sort", - ]); - expect(result.stdout.trim()).toBe("lo"); - expect(result.exitCode).toBe(0); - }, - ); -}); + it("returns stdout and exitCode from the spawned command", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/echo hello"); + const sandbox = new Sandbox(DEFAULT_POLICY); + const result = await sandbox.execute("/bin/echo", ["hello"]); + expect(result.exitCode).toBe(0); + expect(result.stdout).toContain("hello"); + }); -describe("Sandbox.execute() helper integration", () => { - beforeEach(() => { - vi.clearAllMocks(); - mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); - mockFindHelper.mockReturnValue(undefined); + it("kills process after timeout and returns killReason=timeout", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/sleep 60"); + const policy = { ...DEFAULT_POLICY, timeoutMs: 100 }; + const sandbox = new Sandbox(policy); + const result = await sandbox.execute("/bin/sleep", ["60"]); + expect(result.killed).toBe(true); + expect(result.killReason).toBe("timeout"); }); - it.skipIf(!canUnshareUser)( - "sets enforcement.namespaces=true even without helper", - async () => { - mockFindHelper.mockReturnValue(undefined); - - const sandbox = new Sandbox(DEFAULT_POLICY); - const result = await sandbox.execute("/bin/true", []); - - expect(result.exitCode).toBe(0); - expect(result.enforcement).toBeDefined(); - expect(result.enforcement!.namespaces).toBe(true); - expect(result.enforcement!.landlock).toBe(false); - expect(result.enforcement!.seccomp).toBe(false); - expect(result.enforcement!.capDrop).toBe(false); - }, - ); - - it.skipIf(!canUnshareUser)( - "sets full enforcement when helper is found", - async () => { - mockFindHelper.mockReturnValue("/usr/local/bin/safeclaw-sandbox-helper"); - - const sandbox = new Sandbox(DEFAULT_POLICY); - const result = await sandbox.execute("/bin/true", []); - - expect(result.enforcement).toBeDefined(); - expect(result.enforcement!.namespaces).toBe(true); - expect(result.enforcement!.landlock).toBe(true); - expect(result.enforcement!.seccomp).toBe(true); - expect(result.enforcement!.capDrop).toBe(true); - }, - ); + it("reports pivotRoot=true and bindMounts=true on Linux", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/true"); + const sandbox = new Sandbox(DEFAULT_POLICY); + const result = await sandbox.execute("/bin/true", []); + // These are set based on platform; in CI (Linux) both should be true + expect(typeof result.enforcement?.pivotRoot).toBe("boolean"); + expect(typeof result.enforcement?.bindMounts).toBe("boolean"); + }); + + it("calls cleanupAfterCommand even when command fails", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/false"); + const sandbox = new Sandbox(DEFAULT_POLICY); + await sandbox.execute("/bin/false", []); + expect(mockCleanupAfterCommand).toHaveBeenCalledOnce(); + }); }); diff --git a/packages/sandbox/src/sandbox.ts b/packages/sandbox/src/sandbox.ts index 0454d4d..b055d18 100644 --- a/packages/sandbox/src/sandbox.ts +++ b/packages/sandbox/src/sandbox.ts @@ -1,14 +1,25 @@ import { spawn } from "node:child_process"; -import type { Writable } from "node:stream"; +import { SandboxManager } from "@anthropic-ai/sandbox-runtime"; import type { SandboxPolicy, SandboxResult, EnforcementLayers } from "./types.js"; import { assertSandboxSupported } from "./detect.js"; -import { findHelper } from "./helper.js"; +import { PolicyBuilder } from "./policy-builder.js"; + +/** POSIX single-quote shell escaping. Safe for all byte values. */ +function shEscape(arg: string): string { + return "'" + arg.replace(/'/g, "'\\''") + "'"; +} export class Sandbox { private readonly policy: SandboxPolicy; constructor(policy: SandboxPolicy) { assertSandboxSupported(); + if (!SandboxManager.isSandboxingEnabled()) { + throw new Error( + "SandboxManager is not initialized. Call SandboxManager.initialize() " + + "before constructing a Sandbox (see bootstrapAgent()).", + ); + } this.policy = policy; } @@ -16,77 +27,44 @@ export class Sandbox { const start = performance.now(); const timeout = this.policy.timeoutMs ?? 30_000; - // Build unshare flags from policy namespace settings - const unshareFlags = this.buildUnshareFlags(); + // Build the inner shell command. In Phase 1 the helper is not injected; + // Task 8 adds `--policy-file` injection for Landlock + cap-drop. + const shellCmd = [command, ...args].map(shEscape).join(" "); - // If we have unshare flags, wrap: unshare [flags] -- command [args] - // Otherwise run directly - const useUnshare = unshareFlags.length > 0; + // Translate SafeClaw policy to sandbox-runtime config + const rtConfig = PolicyBuilder.toRuntimeConfig(this.policy); - // Resolve helper binary - // TODO: Re-add SHA-256 integrity verification once builds are reproducible - const helperPath = findHelper(); - const useHelper = helperPath !== undefined; + // Wrap via sandbox-runtime (bwrap on Linux, sandbox-exec on macOS) + const wrappedCmd = await SandboxManager.wrapWithSandbox( + shellCmd, + undefined, + rtConfig, + ); - // Build enforcement metadata + const isLinux = process.platform === "linux"; const enforcement: EnforcementLayers = { - namespaces: useUnshare, - landlock: useHelper, - seccomp: useHelper, - capDrop: useHelper, + namespaces: isLinux, + pivotRoot: isLinux, + bindMounts: true, + landlock: false, // Phase 2: re-enabled when helper is injected + seccomp: isLinux, // sandbox-runtime applies seccomp for unix socket blocking on Linux + capDrop: false, // Phase 2: re-enabled when helper is injected }; - // Build spawn command and args based on available isolation - let spawnCmd: string; - let spawnArgs: string[]; - let stdio: ("ignore" | "pipe")[]; - - if (useUnshare && helperPath !== undefined) { - spawnCmd = "unshare"; - spawnArgs = [...unshareFlags, "--", helperPath, "--", command, ...args]; - stdio = ["ignore", "pipe", "pipe", "pipe"]; - } else if (useUnshare) { - spawnCmd = "unshare"; - spawnArgs = [...unshareFlags, "--", command, ...args]; - stdio = ["ignore", "pipe", "pipe"]; - } else if (helperPath !== undefined) { - spawnCmd = helperPath; - spawnArgs = ["--", command, ...args]; - stdio = ["ignore", "pipe", "pipe", "pipe"]; - } else { - spawnCmd = command; - spawnArgs = args; - stdio = ["ignore", "pipe", "pipe"]; - } - return new Promise((resolve) => { const stdoutChunks: Buffer[] = []; const stderrChunks: Buffer[] = []; let killed = false; let killReason: "timeout" | "oom" | "signal" | undefined; - const proc = spawn(spawnCmd, spawnArgs, { - stdio, + const proc = spawn("/bin/sh", ["-c", wrappedCmd], { + stdio: ["ignore", "pipe", "pipe"], detached: true, }); - // Write policy JSON to fd 3 when using helper - if (useHelper) { - const fd3 = proc.stdio[3] as Writable; - fd3.on("error", () => { - // Ignored: the child may exit before reading fd 3 - }); - const policyJson = JSON.stringify({ - filesystem: this.policy.filesystem, - syscalls: this.policy.syscalls, - }); - fd3.end(policyJson); - } - const timer = setTimeout(() => { killed = true; killReason = "timeout"; - // Kill entire process group (unshare + forked children) if (proc.pid !== undefined) { try { process.kill(-proc.pid, "SIGKILL"); @@ -103,6 +81,8 @@ export class Sandbox { proc.on("close", (code: number | null) => { clearTimeout(timer); + // Clean up bwrap leftover mount points (no-op on macOS) + SandboxManager.cleanupAfterCommand(); resolve({ exitCode: code ?? 1, stdout: Buffer.concat(stdoutChunks).toString(), @@ -116,6 +96,7 @@ export class Sandbox { proc.on("error", (err: Error) => { clearTimeout(timer); + SandboxManager.cleanupAfterCommand(); resolve({ exitCode: 1, stdout: "", @@ -131,16 +112,4 @@ export class Sandbox { getPolicy(): SandboxPolicy { return structuredClone(this.policy); } - - private buildUnshareFlags(): string[] { - const flags: string[] = []; - const ns = this.policy.namespaces; - - if (ns.pid) flags.push("--pid", "--fork"); - if (ns.net) flags.push("--net"); - if (ns.mnt) flags.push("--mount"); - if (ns.user) flags.push("--user", "--map-root-user"); - - return flags; - } } diff --git a/test/security/sandbox-escape.test.ts b/test/security/sandbox-escape.test.ts index bc4608f..1876c42 100644 --- a/test/security/sandbox-escape.test.ts +++ b/test/security/sandbox-escape.test.ts @@ -22,12 +22,14 @@ const UNSUPPORTED_CAPS: KernelCapabilities = { landlock: { supported: false, abiVersion: 0 }, seccomp: { supported: false }, namespaces: { user: false, pid: false, net: false, mnt: false }, + bwrap: { available: false, path: undefined, version: undefined }, }; const FULL_CAPS: KernelCapabilities = { landlock: { supported: true, abiVersion: 3 }, seccomp: { supported: true }, namespaces: { user: true, pid: true, net: true, mnt: true }, + bwrap: { available: true, path: "/usr/bin/bwrap", version: "0.9.0" }, }; const mockAssert = vi.fn<() => KernelCapabilities>(); @@ -45,6 +47,14 @@ vi.mock("../../packages/sandbox/src/helper.js", () => ({ findHelper: () => undefined, })); +vi.mock("@anthropic-ai/sandbox-runtime", () => ({ + SandboxManager: { + isSandboxingEnabled: () => true, + wrapWithSandbox: async (cmd: string) => cmd, + cleanupAfterCommand: () => undefined, + }, +})); + const { Sandbox } = await import("@safeclaw/sandbox"); describe("Sandbox escape prevention", () => { @@ -114,7 +124,7 @@ describe("Sandbox escape prevention", () => { const sandbox = new Sandbox(DEFAULT_POLICY); const policy = sandbox.getPolicy(); - (policy as SandboxPolicy).network = "filtered"; + (policy as SandboxPolicy).network = "localhost"; const policyAgain = sandbox.getPolicy(); expect(policyAgain.network).toBe("none"); diff --git a/vitest.config.ts b/vitest.config.ts index d949c14..3f330cf 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -9,6 +9,13 @@ export default defineConfig({ "@safeclaw/vault": resolve(__dirname, "packages/vault/src/index.ts"), "@safeclaw/gateway": resolve(__dirname, "packages/gateway/src/index.ts"), "@safeclaw/cli": resolve(__dirname, "packages/cli/src/index.ts"), + // sandbox-runtime lives in packages/sandbox/node_modules (not root), so we + // pin it to a single resolved path so vi.mock("@anthropic-ai/sandbox-runtime") + // works from any test file in the workspace. + "@anthropic-ai/sandbox-runtime": resolve( + __dirname, + "packages/sandbox/node_modules/@anthropic-ai/sandbox-runtime/dist/index.js", + ), }, }, test: { From 86766b23b356c1e2a50fbd0c6750387bfbe37e71 Mon Sep 17 00:00:00 2001 From: Arne Brune Olsen Date: Fri, 20 Mar 2026 10:21:36 +0100 Subject: [PATCH 06/13] feat(cli): initialize SandboxManager network proxy before constructing Sandbox --- packages/cli/src/commands/bootstrap.test.ts | 23 +++++++++++++++++++++ packages/cli/src/commands/bootstrap.ts | 17 +++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/packages/cli/src/commands/bootstrap.test.ts b/packages/cli/src/commands/bootstrap.test.ts index fa423cf..3c35f2d 100644 --- a/packages/cli/src/commands/bootstrap.test.ts +++ b/packages/cli/src/commands/bootstrap.test.ts @@ -6,6 +6,22 @@ import { CapabilityRegistry, SessionManager, } from "@safeclaw/core"; + +const mockSandboxManagerInitialize = vi.fn<() => Promise>().mockResolvedValue(undefined); +const mockSandboxManagerIsSupportedPlatform = vi.fn<() => boolean>().mockReturnValue(true); +const mockSandboxManagerIsSandboxingEnabled = vi.fn<() => boolean>().mockReturnValue(true); + +vi.mock("@anthropic-ai/sandbox-runtime", () => ({ + SandboxManager: { + initialize: mockSandboxManagerInitialize, + isSupportedPlatform: mockSandboxManagerIsSupportedPlatform, + isSandboxingEnabled: mockSandboxManagerIsSandboxingEnabled, + wrapWithSandbox: vi.fn().mockResolvedValue("/bin/true"), + cleanupAfterCommand: vi.fn(), + reset: vi.fn().mockResolvedValue(undefined), + }, +})); + const MockSandbox = vi.fn(); const mockForDevelopment = vi.fn().mockReturnValue({ filesystem: { @@ -83,6 +99,7 @@ function createMockDeps( describe("bootstrapAgent", () => { beforeEach(() => { + mockSandboxManagerInitialize.mockClear(); MockSandbox.mockReset(); mockForDevelopment.mockClear(); mockForDevelopment.mockReturnValue({ @@ -290,6 +307,12 @@ describe("bootstrapAgent", () => { ); }); + it("calls SandboxManager.initialize before constructing Sandbox", async () => { + const deps = createMockDeps(); + await bootstrapAgent(deps); + expect(mockSandboxManagerInitialize).toHaveBeenCalledOnce(); + }); + it("falls back gracefully when Sandbox constructor throws", async () => { MockSandbox.mockImplementation(() => { throw new Error("sandbox not supported"); diff --git a/packages/cli/src/commands/bootstrap.ts b/packages/cli/src/commands/bootstrap.ts index 06d360a..1be1d19 100644 --- a/packages/cli/src/commands/bootstrap.ts +++ b/packages/cli/src/commands/bootstrap.ts @@ -33,6 +33,7 @@ import { deriveKeyFromPassphrase as defaultDeriveKey, } from "@safeclaw/vault"; import { Sandbox, PolicyBuilder } from "@safeclaw/sandbox"; +import { SandboxManager } from "@anthropic-ai/sandbox-runtime"; import { readPassphrase as defaultReadPassphrase } from "../readPassphrase.js"; export interface BootstrapDeps { @@ -182,6 +183,22 @@ export async function bootstrapAgent( toolRegistry.register(tool); } + // Initialize sandbox-runtime network proxy infrastructure. + // Uses a base "block all network" config; per-execution configs are passed + // as customConfig in Sandbox.execute() → SandboxManager.wrapWithSandbox(). + try { + await SandboxManager.initialize({ + filesystem: { allowWrite: [], denyWrite: [], denyRead: [] }, + network: { allowedDomains: [], deniedDomains: [] }, + }); + } catch (err: unknown) { + const detail = err instanceof Error ? err.message : String(err); + output.write( + `Warning: sandbox network proxy failed to initialize (${detail}). ` + + `Filesystem isolation will still be applied.\n`, + ); + } + let sandbox: Sandbox | undefined; try { sandbox = new Sandbox(sandboxPolicy); From 4bfacad3fdfe27d5d272267c24b874925f7d8e3c Mon Sep 17 00:00:00 2001 From: Arne Brune Olsen Date: Fri, 20 Mar 2026 10:25:37 +0100 Subject: [PATCH 07/13] feat(cli): update doctor checks for sandbox-runtime Replace linuxCheck with platformCheck (passes on linux and darwin). Replace unshareCheck with bwrapCheck (bubblewrap is now the outer isolation layer). Add socatCheck and ripgrepCheck for sandbox-runtime network proxy and search tool dependencies. Co-Authored-By: Claude Sonnet 4.6 --- .../cli/src/commands/doctor-checks.test.ts | 72 ++++++++++++--- packages/cli/src/commands/doctor-checks.ts | 92 ++++++++++++++++--- packages/cli/src/commands/doctor.ts | 12 ++- 3 files changed, 144 insertions(+), 32 deletions(-) diff --git a/packages/cli/src/commands/doctor-checks.test.ts b/packages/cli/src/commands/doctor-checks.test.ts index 908322e..85e9f31 100644 --- a/packages/cli/src/commands/doctor-checks.test.ts +++ b/packages/cli/src/commands/doctor-checks.test.ts @@ -1,11 +1,13 @@ import { describe, it, expect } from "vitest"; import { nodeVersionCheck, - linuxCheck, + platformCheck, architectureCheck, vaultExistsCheck, sandboxHelperCheck, - unshareCheck, + bwrapCheck, + socatCheck, + ripgrepCheck, landlockCheck, seccompCheck, userNamespaceCheck, @@ -32,18 +34,24 @@ describe("nodeVersionCheck", () => { }); }); -describe("linuxCheck", () => { +describe("platformCheck", () => { it("passes on linux", async () => { - const check = linuxCheck({ platform: "linux" }); + const check = platformCheck({ platform: "linux" }); const result = await check.run(); expect(result.status).toBe("pass"); }); - it("fails on non-linux", async () => { - const check = linuxCheck({ platform: "darwin" }); + it("passes on darwin", async () => { + const check = platformCheck({ platform: "darwin" }); + const result = await check.run(); + expect(result.status).toBe("pass"); + }); + + it("fails on unsupported platform", async () => { + const check = platformCheck({ platform: "win32" }); const result = await check.run(); expect(result.status).toBe("fail"); - expect(result.message).toContain("darwin"); + expect(result.message).toContain("win32"); }); }); @@ -99,20 +107,57 @@ describe("sandboxHelperCheck", () => { }); }); -describe("unshareCheck", () => { - it("passes when unshare is available", async () => { - const check = unshareCheck({ execFileSync: () => "/usr/bin/unshare\n" }); +describe("bwrapCheck", () => { + it("passes when bwrap is available", async () => { + const check = bwrapCheck({ execFileSync: () => "/usr/bin/bwrap\n" }); const result = await check.run(); expect(result.status).toBe("pass"); + expect(result.message).toContain("bwrap"); }); - it("fails when unshare is not found", async () => { - const check = unshareCheck({ + it("fails when bwrap is not found", async () => { + const check = bwrapCheck({ execFileSync: () => { throw new Error("not found"); }, }); const result = await check.run(); expect(result.status).toBe("fail"); - expect(result.detail).toContain("unshare"); + expect(result.detail).toContain("bwrap"); + }); +}); + +describe("socatCheck", () => { + it("passes when socat is available", async () => { + const check = socatCheck({ execFileSync: () => "/usr/bin/socat\n" }); + const result = await check.run(); + expect(result.status).toBe("pass"); + expect(result.message).toContain("socat"); + }); + + it("warns when socat is not found", async () => { + const check = socatCheck({ + execFileSync: () => { throw new Error("not found"); }, + }); + const result = await check.run(); + expect(result.status).toBe("warn"); + expect(result.detail).toContain("socat"); + }); +}); + +describe("ripgrepCheck", () => { + it("passes when rg is available", async () => { + const check = ripgrepCheck({ execFileSync: () => "/usr/bin/rg\n" }); + const result = await check.run(); + expect(result.status).toBe("pass"); + expect(result.message).toContain("rg"); + }); + + it("warns when rg is not found", async () => { + const check = ripgrepCheck({ + execFileSync: () => { throw new Error("not found"); }, + }); + const result = await check.run(); + expect(result.status).toBe("warn"); + expect(result.detail).toContain("ripgrep"); }); }); @@ -121,6 +166,7 @@ function makeKernelCaps(overrides: Partial = {}): KernelCapa landlock: { supported: true, abiVersion: 3 }, seccomp: { supported: true }, namespaces: { user: true, pid: true, net: true, mnt: true }, + bwrap: { available: true, path: "/usr/bin/bwrap", version: "0.9.0" }, ...overrides, }; } diff --git a/packages/cli/src/commands/doctor-checks.ts b/packages/cli/src/commands/doctor-checks.ts index 4c656e3..c2c87f7 100644 --- a/packages/cli/src/commands/doctor-checks.ts +++ b/packages/cli/src/commands/doctor-checks.ts @@ -44,20 +44,21 @@ export interface PlatformDeps { platform: string; } -export function linuxCheck( +export function platformCheck( deps: PlatformDeps = { platform: process.platform }, ): DiagnosticCheck { + const supported = new Set(["linux", "darwin"]); return { - name: "linux", + name: "platform", category: "system", async run(): Promise { - if (deps.platform === "linux") { - return { status: "pass", message: "Running on Linux" }; + if (supported.has(deps.platform)) { + return { status: "pass", message: `Platform: ${deps.platform}` }; } return { status: "fail", - message: `Running on ${deps.platform}`, - detail: "SafeClaw requires Linux.", + message: `Unsupported platform: ${deps.platform}`, + detail: "SafeClaw supports Linux and macOS (darwin) only.", }; }, }; @@ -152,30 +153,91 @@ export function sandboxHelperCheck( }; } -export interface UnshareDeps { +export interface BwrapDeps { execFileSync: (cmd: string, args: string[]) => string; } -export function unshareCheck( - deps: UnshareDeps = { +export function bwrapCheck( + deps: BwrapDeps = { execFileSync: (cmd: string, args: string[]) => defaultExecFileSync(cmd, args, { encoding: "utf8" }), }, ): DiagnosticCheck { return { - name: "unshare", + name: "bwrap", category: "security", async run(): Promise { try { - deps.execFileSync("which", ["unshare"]); - return { status: "pass", message: "unshare command available" }; + const path = deps.execFileSync("which", ["bwrap"]).trim(); + return { status: "pass", message: `bwrap available: ${path}` }; } catch { return { status: "fail", - message: "unshare command not found", + message: "bwrap not found", detail: - "The 'unshare' command is required for namespace isolation. " + - "Install util-linux: apt install util-linux", + "bubblewrap (bwrap) is required for sandbox isolation. " + + "Install: apt install bubblewrap", + }; + } + }, + }; +} + +export interface SocatDeps { + execFileSync: (cmd: string, args: string[]) => string; +} + +export function socatCheck( + deps: SocatDeps = { + execFileSync: (cmd: string, args: string[]) => + defaultExecFileSync(cmd, args, { encoding: "utf8" }), + }, +): DiagnosticCheck { + return { + name: "socat", + category: "security", + async run(): Promise { + try { + const path = deps.execFileSync("which", ["socat"]).trim(); + return { status: "pass", message: `socat available: ${path}` }; + } catch { + return { + status: "warn", + message: "socat not found", + detail: + "socat is used by sandbox-runtime for network proxying. " + + "Without it, network domain filtering will not work. " + + "Install: apt install socat", + }; + } + }, + }; +} + +export interface RipgrepDeps { + execFileSync: (cmd: string, args: string[]) => string; +} + +export function ripgrepCheck( + deps: RipgrepDeps = { + execFileSync: (cmd: string, args: string[]) => + defaultExecFileSync(cmd, args, { encoding: "utf8" }), + }, +): DiagnosticCheck { + return { + name: "ripgrep", + category: "security", + async run(): Promise { + try { + const path = deps.execFileSync("which", ["rg"]).trim(); + return { status: "pass", message: `rg available: ${path}` }; + } catch { + return { + status: "warn", + message: "rg (ripgrep) not found", + detail: + "ripgrep is used by the search tool for fast code search. " + + "Install: apt install ripgrep", }; } }, diff --git a/packages/cli/src/commands/doctor.ts b/packages/cli/src/commands/doctor.ts index 800eaaa..d2835ff 100644 --- a/packages/cli/src/commands/doctor.ts +++ b/packages/cli/src/commands/doctor.ts @@ -1,11 +1,13 @@ import type { DiagnosticCheck, DiagnosticResult } from "./doctor-types.js"; import { nodeVersionCheck, - linuxCheck, + platformCheck, architectureCheck, vaultExistsCheck, sandboxHelperCheck, - unshareCheck, + bwrapCheck, + socatCheck, + ripgrepCheck, landlockCheck, seccompCheck, userNamespaceCheck, @@ -159,10 +161,12 @@ export function createDefaultChecks(): DiagnosticCheck[] { return [ // System nodeVersionCheck(), - linuxCheck(), + platformCheck(), architectureCheck(), // Security - unshareCheck(), + bwrapCheck(), + socatCheck(), + ripgrepCheck(), landlockCheck(), seccompCheck(), userNamespaceCheck(), From 67aa8dc0efc4b1d8bd6aa14f005cbb9438f04fd0 Mon Sep 17 00:00:00 2001 From: Arne Brune Olsen Date: Fri, 20 Mar 2026 10:28:08 +0100 Subject: [PATCH 08/13] feat(sandbox): inject C helper via --policy-file for Landlock + cap-drop inside bwrap When the native helper binary is found, Sandbox.execute() now writes the policy (filesystem + syscalls) to a mode-0600 temp file and runs: helper --policy-file -- command [args] as the inner command inside the bwrap container. This re-enables the Landlock filesystem restrictions and capability-drop layer (Phase 2). enforcement.landlock and enforcement.capDrop are now true when the helper is present. The temp file is cleaned up in both close and error handlers. If the helper is in a non-system directory, its parent is added to rtConfig.filesystem.allowWrite so bwrap bind-mounts it into the container. Co-Authored-By: Claude Sonnet 4.6 --- packages/sandbox/src/sandbox.test.ts | 71 ++++++++++++++++++++++++++++ packages/sandbox/src/sandbox.ts | 68 +++++++++++++++++++++++--- 2 files changed, 132 insertions(+), 7 deletions(-) diff --git a/packages/sandbox/src/sandbox.test.ts b/packages/sandbox/src/sandbox.test.ts index c182e4d..26aed68 100644 --- a/packages/sandbox/src/sandbox.test.ts +++ b/packages/sandbox/src/sandbox.test.ts @@ -8,6 +8,8 @@ const mockFindHelper = vi.fn<() => string | undefined>(); const mockWrapWithSandbox = vi.fn<(cmd: string) => Promise>(); const mockIsSandboxingEnabled = vi.fn<() => boolean>(); const mockCleanupAfterCommand = vi.fn<() => void>(); +const mockWriteFileSync = vi.fn<() => void>(); +const mockRmSync = vi.fn<() => void>(); vi.mock("./detect.js", () => ({ assertSandboxSupported: mockAssertSandboxSupported, @@ -25,6 +27,11 @@ vi.mock("@anthropic-ai/sandbox-runtime", () => ({ }, })); +vi.mock("node:fs", () => ({ + writeFileSync: (...args: unknown[]) => mockWriteFileSync(...args), + rmSync: (...args: unknown[]) => mockRmSync(...args), +})); + const { Sandbox } = await import("./sandbox.js"); const FULL_CAPS: KernelCapabilities = { @@ -40,6 +47,8 @@ describe("Sandbox", () => { mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); mockIsSandboxingEnabled.mockReturnValue(true); mockFindHelper.mockReturnValue(undefined); + mockWriteFileSync.mockReturnValue(undefined); + mockRmSync.mockReturnValue(undefined); }); it("constructor calls assertSandboxSupported", () => { @@ -66,6 +75,8 @@ describe("Sandbox.execute()", () => { mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); mockIsSandboxingEnabled.mockReturnValue(true); mockFindHelper.mockReturnValue(undefined); + mockWriteFileSync.mockReturnValue(undefined); + mockRmSync.mockReturnValue(undefined); }); it("calls wrapWithSandbox with shell-quoted command", async () => { @@ -118,3 +129,63 @@ describe("Sandbox.execute()", () => { expect(mockCleanupAfterCommand).toHaveBeenCalledOnce(); }); }); + +describe("Sandbox.execute() with helper", () => { + beforeEach(() => { + vi.clearAllMocks(); + mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); + mockIsSandboxingEnabled.mockReturnValue(true); + mockWriteFileSync.mockReturnValue(undefined); + mockRmSync.mockReturnValue(undefined); + }); + + it("includes --policy-file in the inner command when helper is found", async () => { + mockFindHelper.mockReturnValue("/usr/local/bin/safeclaw-sandbox-helper"); + mockWrapWithSandbox.mockImplementation(async (cmd: string) => cmd); + + const sandbox = new Sandbox(DEFAULT_POLICY); + await sandbox.execute("/bin/echo", ["hello"]); + + const innerCmd: string = mockWrapWithSandbox.mock.calls[0]![0]!; + expect(innerCmd).toContain("safeclaw-sandbox-helper"); + expect(innerCmd).toContain("--policy-file"); + expect(innerCmd).toContain("--"); + expect(innerCmd).toContain("echo"); + }); + + it("sets enforcement.landlock=true and enforcement.capDrop=true when helper is found", async () => { + mockFindHelper.mockReturnValue("/usr/local/bin/safeclaw-sandbox-helper"); + mockWrapWithSandbox.mockResolvedValue("/bin/true"); + + const sandbox = new Sandbox(DEFAULT_POLICY); + const result = await sandbox.execute("/bin/true", []); + + expect(result.enforcement?.landlock).toBe(true); + expect(result.enforcement?.capDrop).toBe(true); + }); + + it("does NOT set landlock/capDrop when helper is not found", async () => { + mockFindHelper.mockReturnValue(undefined); + mockWrapWithSandbox.mockResolvedValue("/bin/true"); + + const sandbox = new Sandbox(DEFAULT_POLICY); + const result = await sandbox.execute("/bin/true", []); + + expect(result.enforcement?.landlock).toBe(false); + expect(result.enforcement?.capDrop).toBe(false); + }); + + it("cleans up policy temp file even if command fails", async () => { + mockFindHelper.mockReturnValue("/usr/local/bin/safeclaw-sandbox-helper"); + mockWrapWithSandbox.mockResolvedValue("/bin/false"); + + const sandbox = new Sandbox(DEFAULT_POLICY); + await sandbox.execute("/bin/false", []); + + expect(mockWriteFileSync).toHaveBeenCalledOnce(); + expect(mockRmSync).toHaveBeenCalledWith( + expect.stringContaining("safeclaw-policy-"), + { force: true }, + ); + }); +}); diff --git a/packages/sandbox/src/sandbox.ts b/packages/sandbox/src/sandbox.ts index b055d18..927c7cc 100644 --- a/packages/sandbox/src/sandbox.ts +++ b/packages/sandbox/src/sandbox.ts @@ -1,7 +1,11 @@ import { spawn } from "node:child_process"; +import { writeFileSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; import { SandboxManager } from "@anthropic-ai/sandbox-runtime"; import type { SandboxPolicy, SandboxResult, EnforcementLayers } from "./types.js"; import { assertSandboxSupported } from "./detect.js"; +import { findHelper } from "./helper.js"; import { PolicyBuilder } from "./policy-builder.js"; /** POSIX single-quote shell escaping. Safe for all byte values. */ @@ -27,16 +31,60 @@ export class Sandbox { const start = performance.now(); const timeout = this.policy.timeoutMs ?? 30_000; - // Build the inner shell command. In Phase 1 the helper is not injected; - // Task 8 adds `--policy-file` injection for Landlock + cap-drop. - const shellCmd = [command, ...args].map(shEscape).join(" "); + // Inject C helper (Landlock + seccomp + cap-drop) as the inner process + // when the helper binary is available, using --policy-file for the policy. + const helperPath = findHelper(); + const useHelper = helperPath !== undefined; - // Translate SafeClaw policy to sandbox-runtime config + let policyTmpPath: string | undefined; + let innerCmd: string; + + if (useHelper) { + // Write policy JSON to a temp file (mode 0600, as required by policy_read_file). + policyTmpPath = join( + tmpdir(), + `safeclaw-policy-${process.pid.toString()}-${Date.now().toString()}.json`, + ); + writeFileSync( + policyTmpPath, + JSON.stringify({ + filesystem: this.policy.filesystem, + syscalls: this.policy.syscalls, + }), + { mode: 0o600 }, + ); + innerCmd = [ + helperPath, + "--policy-file", + policyTmpPath, + "--", + command, + ...args, + ] + .map(shEscape) + .join(" "); + } else { + innerCmd = [command, ...args].map(shEscape).join(" "); + } + + // Translate SafeClaw policy to sandbox-runtime config. When helper is + // present and in a non-system path, add its directory to allowWrite so + // bwrap bind-mounts it into the container. const rtConfig = PolicyBuilder.toRuntimeConfig(this.policy); + if (useHelper && helperPath !== undefined) { + const helperDir = helperPath.substring(0, helperPath.lastIndexOf("/")); + const systemPaths = ["/bin", "/usr/bin", "/usr/local/bin", "/sbin", "/usr/sbin"]; + if (!systemPaths.includes(helperDir)) { + rtConfig.filesystem.allowWrite = [ + ...rtConfig.filesystem.allowWrite, + helperDir, + ]; + } + } // Wrap via sandbox-runtime (bwrap on Linux, sandbox-exec on macOS) const wrappedCmd = await SandboxManager.wrapWithSandbox( - shellCmd, + innerCmd, undefined, rtConfig, ); @@ -46,9 +94,9 @@ export class Sandbox { namespaces: isLinux, pivotRoot: isLinux, bindMounts: true, - landlock: false, // Phase 2: re-enabled when helper is injected + landlock: useHelper, seccomp: isLinux, // sandbox-runtime applies seccomp for unix socket blocking on Linux - capDrop: false, // Phase 2: re-enabled when helper is injected + capDrop: useHelper, }; return new Promise((resolve) => { @@ -81,6 +129,9 @@ export class Sandbox { proc.on("close", (code: number | null) => { clearTimeout(timer); + if (policyTmpPath !== undefined) { + try { rmSync(policyTmpPath, { force: true }); } catch { /* ignore */ } + } // Clean up bwrap leftover mount points (no-op on macOS) SandboxManager.cleanupAfterCommand(); resolve({ @@ -96,6 +147,9 @@ export class Sandbox { proc.on("error", (err: Error) => { clearTimeout(timer); + if (policyTmpPath !== undefined) { + try { rmSync(policyTmpPath, { force: true }); } catch { /* ignore */ } + } SandboxManager.cleanupAfterCommand(); resolve({ exitCode: 1, From 2414d0f03e4c257c0374c9b6a5de5df6ee7cd2b0 Mon Sep 17 00:00:00 2001 From: Arne Brune Olsen Date: Fri, 20 Mar 2026 10:47:34 +0100 Subject: [PATCH 09/13] fix(sandbox): filter denyRead paths by real directories to avoid bwrap failure PolicyBuilder.toRuntimeConfig() used to include all sensitive home dirs (~/.ssh, ~/.aws, etc.) in denyRead unconditionally. bwrap cannot mount tmpfs over non-existent paths or symlinks to external filesystems (e.g. WSL2 symlinks pointing to /mnt/c/...), causing the sandbox command to fail with "Can't mount tmpfs on /newroot/home/...". Fix: use lstatSync to filter denyRead to only include paths that are real directories (not symlinks, not missing). Symlinks to Windows paths on WSL2 are excluded. Update sandbox.test.ts and policy-builder.test.ts mocks to cover the new lstatSync import. Co-Authored-By: Claude Sonnet 4.6 --- .gitignore | 2 + .../2026-03-20-sandbox-runtime-integration.md | 1677 +++++++++++++++++ packages/sandbox/src/policy-builder.test.ts | 8 +- packages/sandbox/src/policy-builder.ts | 12 +- packages/sandbox/src/sandbox.test.ts | 1 + 5 files changed, 1697 insertions(+), 3 deletions(-) create mode 100644 docs/plans/2026-03-20-sandbox-runtime-integration.md diff --git a/.gitignore b/.gitignore index 62dacec..69810f5 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,5 @@ coverage/ .turbo/ bundle/ .opencode/ +.worktrees/ +tmp/ diff --git a/docs/plans/2026-03-20-sandbox-runtime-integration.md b/docs/plans/2026-03-20-sandbox-runtime-integration.md new file mode 100644 index 0000000..7b14d77 --- /dev/null +++ b/docs/plans/2026-03-20-sandbox-runtime-integration.md @@ -0,0 +1,1677 @@ +# Sandbox Runtime Integration Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Replace SafeClaw's custom namespace/bwrap code with `@anthropic-ai/sandbox-runtime` as the outer isolation layer, gaining macOS support, network domain filtering, and socat-based proxy control, while retaining the C helper (Landlock + seccomp + cap-drop) as an inner layer inside bwrap. + +**Architecture:** Phase 1 wires `SandboxManager.wrapWithSandbox()` into the `Sandbox` class, which becomes an adapter translating `SandboxPolicy` → `SandboxRuntimeConfig`. Phase 2 injects the C helper as the inner process by writing a policy temp file and passing `--policy-file` (the helper already supports this flag — no C changes needed). The spawn chain becomes: `bwrap [sandbox-runtime] → helper [Landlock+seccomp+cap-drop] → command`. + +**Tech Stack:** `@anthropic-ai/sandbox-runtime` pinned to git SHA `20f5176a94314038695bee13779eb9eebbbaeb49`, existing C helper binary, TypeScript (ESM strict), vitest. + +**Supersedes:** `docs/plans/2026-03-07-bubblewrap-sandbox-design.md` — that plan assumed SafeClaw would implement bwrap directly; this delegates to sandbox-runtime instead and additionally gains macOS + network proxy support. + +--- + +## File Map + +| File | Change | +|------|--------| +| `packages/sandbox/package.json` | Add `@anthropic-ai/sandbox-runtime` git dep | +| `packages/sandbox/src/types.ts` | Extend `NetworkPolicy`, `EnforcementLayers`, `KernelCapabilities` | +| `packages/sandbox/src/policy-builder.ts` | Add `toRuntimeConfig()` method; extend `DevelopmentPolicyOptions` for network | +| `packages/sandbox/src/policy-builder.test.ts` | Add `toRuntimeConfig()` tests | +| `packages/sandbox/src/detect.ts` | Replace `unshare` check with sandbox-runtime dep checks | +| `packages/sandbox/src/detect.test.ts` | Update detect tests | +| `packages/sandbox/src/sandbox.ts` | Rewrite `execute()` to use `SandboxManager.wrapWithSandbox()` | +| `packages/sandbox/src/sandbox.test.ts` | Update spawn chain tests | +| `packages/sandbox/src/index.ts` | Export new `NetworkPolicy` type | +| `packages/cli/src/commands/bootstrap.ts` | Call `SandboxManager.initialize()` before `new Sandbox()` | +| `packages/cli/src/commands/bootstrap.test.ts` | Add initialize call test | +| `packages/cli/src/commands/doctor-checks.ts` | Replace `unshareCheck`; add `bwrapCheck`, `socatCheck`, `ripgrepCheck` | +| `packages/cli/src/commands/doctor-checks.test.ts` | Update check tests | +| `packages/cli/src/commands/doctor.ts` | Update check list | + +--- + +## Phase 1 — sandbox-runtime as outer isolation layer + +### Task 1: Add @anthropic-ai/sandbox-runtime dependency + +**Files:** +- Modify: `packages/sandbox/package.json` + +- [ ] **Step 1: Add the git dependency** + +```json +{ + "name": "@safeclaw/sandbox", + "version": "0.0.1", + "private": true, + "type": "module", + "main": "dist/index.js", + "types": "dist/index.d.ts", + "scripts": { + "build": "tsc -p tsconfig.json" + }, + "dependencies": { + "@anthropic-ai/sandbox-runtime": "git+ssh://git@github.com/anthropic-experimental/sandbox-runtime.git#20f5176a94314038695bee13779eb9eebbbaeb49" + }, + "files": ["dist"] +} +``` + +- [ ] **Step 2: Install** + +```bash +pnpm install +``` + +Expected: resolves without error. `node_modules/@anthropic-ai/sandbox-runtime` exists. + +- [ ] **Step 3: Verify types resolve** + +```bash +pnpm typecheck +``` + +Expected: PASS (no type errors yet — we haven't imported it). + +- [ ] **Step 4: Commit** + +```bash +git add packages/sandbox/package.json pnpm-lock.yaml +git commit -m "chore(sandbox): add @anthropic-ai/sandbox-runtime git dependency" +``` + +--- + +### Task 2: Extend types + +**Files:** +- Modify: `packages/sandbox/src/types.ts` +- Modify: `packages/sandbox/src/index.ts` + +Background: `SandboxPolicy.network` is currently `"none" | "localhost" | "filtered"`. We extend it to support a structured domain-allowlist variant. `EnforcementLayers` gains `pivotRoot` and `bindMounts`. `KernelCapabilities` gains `bwrap`. + +- [ ] **Step 1: Write type tests first** (in `packages/sandbox/src/types.test.ts`, new file) + +```typescript +import { describe, it, expectTypeOf } from "vitest"; +import type { NetworkPolicy, SandboxPolicy, EnforcementLayers, KernelCapabilities } from "./types.js"; + +describe("NetworkPolicy", () => { + it("accepts 'none'", () => { + const n: NetworkPolicy = "none"; + expectTypeOf(n).toMatchTypeOf(); + }); + + it("accepts domain allowlist object", () => { + const n: NetworkPolicy = { allowedDomains: ["github.com", "*.npmjs.org"] }; + expectTypeOf(n).toMatchTypeOf(); + }); + + it("accepts domain allowlist with deniedDomains", () => { + const n: NetworkPolicy = { allowedDomains: [], deniedDomains: ["evil.com"] }; + expectTypeOf(n).toMatchTypeOf(); + }); +}); + +describe("EnforcementLayers", () => { + it("has pivotRoot and bindMounts fields", () => { + const e: EnforcementLayers = { + namespaces: true, pivotRoot: true, bindMounts: true, + landlock: false, seccomp: false, capDrop: false, + }; + expectTypeOf(e.pivotRoot).toBeBoolean(); + expectTypeOf(e.bindMounts).toBeBoolean(); + }); +}); + +describe("KernelCapabilities", () => { + it("has bwrap field", () => { + const k: KernelCapabilities = { + landlock: { supported: true, abiVersion: 3 }, + seccomp: { supported: true }, + namespaces: { user: true, pid: true, net: true, mnt: true }, + bwrap: { available: true, path: "/usr/bin/bwrap", version: "0.9.0" }, + }; + expectTypeOf(k.bwrap.available).toBeBoolean(); + }); +}); +``` + +- [ ] **Step 2: Run to verify it fails** + +```bash +pnpm test --filter @safeclaw/sandbox -- types.test +``` + +Expected: FAIL — `NetworkPolicy`, `pivotRoot`, `bindMounts`, `bwrap` not yet defined. + +- [ ] **Step 3: Update `packages/sandbox/src/types.ts`** + +```typescript +/** Filesystem access rule for Landlock */ +export interface PathRule { + path: string; + access: "read" | "write" | "readwrite" | "execute" | "readwriteexecute"; +} + +/** + * Network policy for a sandbox execution. + * - "none": block all outbound network (net namespace, no proxy) + * - "localhost": allow only loopback + * - object: route through sandbox-runtime proxy with domain allowlist/denylist + */ +export type NetworkPolicy = + | "none" + | "localhost" + | { allowedDomains: string[]; deniedDomains?: string[] }; + +/** Sandbox policy — defines isolation constraints for a single execution */ +export interface SandboxPolicy { + filesystem: { allow: PathRule[]; deny: PathRule[] }; + syscalls: { allow: string[]; defaultDeny: true }; + network: NetworkPolicy; + namespaces: { pid: boolean; net: boolean; mnt: boolean; user: boolean }; + timeoutMs?: number | undefined; +} + +/** Which enforcement layers were active during execution */ +export interface EnforcementLayers { + namespaces: boolean; + pivotRoot: boolean; // bwrap pivot_root was used + bindMounts: boolean; // bwrap bind-mount FS isolation was active + landlock: boolean; + seccomp: boolean; + capDrop: boolean; +} + +/** Result of a sandboxed execution */ +export interface SandboxResult { + exitCode: number; + stdout: string; + stderr: string; + durationMs: number; + killed: boolean; + killReason?: "timeout" | "oom" | "signal" | undefined; + enforcement?: EnforcementLayers | undefined; +} + +/** Kernel feature availability */ +export interface KernelCapabilities { + landlock: { supported: boolean; abiVersion: number }; + seccomp: { supported: boolean }; + namespaces: { user: boolean; pid: boolean; net: boolean; mnt: boolean }; + bwrap: { available: boolean; path: string | undefined; version: string | undefined }; +} + +/** Default sandbox policy — maximum restriction */ +export const DEFAULT_POLICY: SandboxPolicy = { + filesystem: { allow: [], deny: [] }, + syscalls: { + allow: [ + "read", "write", "exit", "exit_group", "brk", "mmap", "close", + "fstat", "mprotect", "munmap", "rt_sigaction", "rt_sigprocmask", + "ioctl", "access", "getpid", "clone", "execve", "wait4", "uname", + "fcntl", "getcwd", "arch_prctl", "set_tid_address", "set_robust_list", + "rseq", "prlimit64", "getrandom", + ], + defaultDeny: true, + }, + network: "none", + namespaces: { pid: true, net: true, mnt: true, user: true }, + timeoutMs: 30_000, +}; +``` + +- [ ] **Step 4: Export `NetworkPolicy` from `packages/sandbox/src/index.ts`** + +Add to existing exports: +```typescript +export type { NetworkPolicy } from "./types.js"; +``` + +- [ ] **Step 5: Fix compilation breakage in policy-builder.ts** + +In `policy-builder.ts`, `build()` returns `SandboxPolicy`. The `network: "none"` literal still works because `"none"` is part of `NetworkPolicy`. No change needed. + +Check existing tests still type-check: +```bash +pnpm typecheck +``` + +Expected: PASS (or only errors about code that explicitly checks `=== "filtered"` — fix those by checking `typeof policy.network === "object"`). + +- [ ] **Step 6: Run the new type tests** + +```bash +pnpm test --filter @safeclaw/sandbox -- types.test +``` + +Expected: PASS. + +- [ ] **Step 7: Run all sandbox tests** + +```bash +pnpm test --filter @safeclaw/sandbox +``` + +Expected: PASS. (The existing tests reference `network: "none"` which is still valid.) + +- [ ] **Step 8: Commit** + +```bash +git add packages/sandbox/src/types.ts packages/sandbox/src/types.test.ts packages/sandbox/src/index.ts +git commit -m "feat(sandbox): extend NetworkPolicy type and EnforcementLayers/KernelCapabilities" +``` + +--- + +### Task 3: PolicyBuilder.toRuntimeConfig() + +**Files:** +- Modify: `packages/sandbox/src/policy-builder.ts` +- Modify: `packages/sandbox/src/policy-builder.test.ts` + +Background: `SandboxRuntimeConfig` (from sandbox-runtime) uses a different model than SafeClaw's Landlock-style `SandboxPolicy`: +- **Reads**: sandbox-runtime is permissive-by-default (deny specific dirs) vs SafeClaw's allowlist-only. We translate by denying the sensitive home dirs and letting everything else be readable. +- **Writes**: both use allowlist-only. Map `readwrite`/`readwriteexecute` PathRules to `filesystem.allowWrite`. +- **Network**: `"none"` → `allowedDomains: []`; object → pass through. + +The sensitive dirs always denied for reads (credentials/config that must not leak): +``` +~/.ssh ~/.aws ~/.gnupg ~/.kube ~/.docker ~/.gcloud ~/.azure +``` + +- [ ] **Step 1: Write failing tests for `toRuntimeConfig()` in `policy-builder.test.ts`** + +Add to the existing test file: + +```typescript +import { homedir } from "node:os"; +import type { SandboxRuntimeConfig } from "@anthropic-ai/sandbox-runtime"; + +describe("PolicyBuilder.toRuntimeConfig()", () => { + it("maps readwrite PathRules to allowWrite", () => { + const policy = new PolicyBuilder() + .addReadWrite("/project") + .addReadWrite("/tmp") + .build(); + const rtConfig: SandboxRuntimeConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.filesystem.allowWrite).toContain("/project"); + expect(rtConfig.filesystem.allowWrite).toContain("/tmp"); + }); + + it("maps readwriteexecute PathRules to allowWrite", () => { + const policy = new PolicyBuilder().addReadWriteExecute("/workspace").build(); + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.filesystem.allowWrite).toContain("/workspace"); + }); + + it("does not add read-only or execute-only paths to allowWrite", () => { + const policy = new PolicyBuilder() + .addReadOnly("/etc") + .addReadExecute("/usr/bin") + .build(); + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.filesystem.allowWrite).not.toContain("/etc"); + expect(rtConfig.filesystem.allowWrite).not.toContain("/usr/bin"); + }); + + it("adds sensitive home dirs to denyRead", () => { + const policy = new PolicyBuilder().build(); + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + const home = homedir(); + expect(rtConfig.filesystem.denyRead).toContain(`${home}/.ssh`); + expect(rtConfig.filesystem.denyRead).toContain(`${home}/.aws`); + expect(rtConfig.filesystem.denyRead).toContain(`${home}/.gnupg`); + }); + + it("maps network: 'none' to allowedDomains: []", () => { + const policy = { ...DEFAULT_POLICY, network: "none" as const }; + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.network.allowedDomains).toEqual([]); + }); + + it("maps network object to allowedDomains/deniedDomains", () => { + const policy: SandboxPolicy = { + ...DEFAULT_POLICY, + network: { allowedDomains: ["github.com", "*.npmjs.org"], deniedDomains: ["evil.com"] }, + }; + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.network.allowedDomains).toEqual(["github.com", "*.npmjs.org"]); + expect(rtConfig.network.deniedDomains).toEqual(["evil.com"]); + }); + + it("forDevelopment().toRuntimeConfig() includes cwd in allowWrite", () => { + const cwd = "/home/user/project"; + const policy = PolicyBuilder.forDevelopment(cwd); + const rtConfig = PolicyBuilder.toRuntimeConfig(policy); + expect(rtConfig.filesystem.allowWrite).toContain(cwd); + }); +}); +``` + +- [ ] **Step 2: Run to verify it fails** + +```bash +pnpm test --filter @safeclaw/sandbox -- policy-builder.test +``` + +Expected: FAIL — `toRuntimeConfig` not defined. + +- [ ] **Step 3: Add `toRuntimeConfig()` to `PolicyBuilder` in `policy-builder.ts`** + +Add imports at the top: +```typescript +import { homedir } from "node:os"; +import type { SandboxRuntimeConfig } from "@anthropic-ai/sandbox-runtime"; +import type { SandboxPolicy, NetworkPolicy } from "./types.js"; +``` + +Add after the `build()` method and before `forDevelopment()`: + +```typescript +/** + * Translates a SafeClaw SandboxPolicy into a SandboxRuntimeConfig for + * @anthropic-ai/sandbox-runtime. + * + * Read model difference: SafeClaw uses an allowlist (Landlock); sandbox-runtime + * is permissive-by-default with an explicit denylist. We translate by denying + * the sensitive credential dirs that must never be readable. + * + * Write model: both use allowlists. PathRules with access "readwrite" or + * "readwriteexecute" map to filesystem.allowWrite. + */ +static toRuntimeConfig(policy: SandboxPolicy): SandboxRuntimeConfig { + // ── Filesystem ──────────────────────────────────────────────────── + const allowWrite = policy.filesystem.allow + .filter((r) => r.access === "readwrite" || r.access === "readwriteexecute") + .map((r) => r.path); + + // Always deny reads to credential/secret directories. + // sandbox-runtime also enforces mandatory deny on dangerous files (.bashrc, + // .git/hooks, etc.) regardless of this config — these are complementary. + const home = homedir(); + const denyRead = [ + `${home}/.ssh`, + `${home}/.aws`, + `${home}/.gnupg`, + `${home}/.kube`, + `${home}/.docker`, + `${home}/.gcloud`, + `${home}/.azure`, + ]; + + // ── Network ─────────────────────────────────────────────────────── + const network = buildNetworkConfig(policy.network); + + return { + filesystem: { + allowWrite, + denyWrite: [], + denyRead, + }, + network, + }; +} +``` + +Add the private helper after the class: + +```typescript +function buildNetworkConfig( + network: NetworkPolicy, +): SandboxRuntimeConfig["network"] { + if (network === "none") { + return { allowedDomains: [], deniedDomains: [] }; + } + if (network === "localhost") { + return { allowedDomains: ["localhost"], deniedDomains: [] }; + } + return { + allowedDomains: network.allowedDomains, + deniedDomains: network.deniedDomains ?? [], + }; +} +``` + +Also add the `DevelopmentPolicyOptions` extension for network: + +```typescript +export interface DevelopmentPolicyOptions { + extraExecutePaths?: string[]; + extraReadWritePaths?: string[]; + extraReadOnlyPaths?: string[]; + /** + * Network domains the sandboxed process may connect to. + * Default: [] (block all network). Use this to allow e.g. npm registry. + * Example: ["registry.npmjs.org", "*.github.com"] + */ + allowedNetworkDomains?: string[]; +} +``` + +And update `forDevelopment()` to apply it (near the end of the method): + +```typescript +// ── Network ────────────────────────────────────────────────────────── +const networkPolicy: NetworkPolicy = + options?.allowedNetworkDomains !== undefined + ? { allowedDomains: options.allowedNetworkDomains } + : "none"; + +return { ...builder.build(), network: networkPolicy }; +``` + +> Note: `forDevelopment()` currently calls `builder.build()` which hard-codes `network: "none"`. Extract to a local variable so the override can be applied. + +- [ ] **Step 4: Run tests** + +```bash +pnpm test --filter @safeclaw/sandbox -- policy-builder.test +``` + +Expected: PASS. + +- [ ] **Step 5: Typecheck** + +```bash +pnpm typecheck +``` + +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +git add packages/sandbox/src/policy-builder.ts packages/sandbox/src/policy-builder.test.ts +git commit -m "feat(sandbox): add PolicyBuilder.toRuntimeConfig() translating to SandboxRuntimeConfig" +``` + +--- + +### Task 4: Update detect.ts + +**Files:** +- Modify: `packages/sandbox/src/detect.ts` +- Modify: `packages/sandbox/src/detect.test.ts` + +Background: `detect.ts` currently checks `unshare`, Landlock kernel version, and seccomp. We replace this with sandbox-runtime's `SandboxManager.checkDependencies()` for the platform-specific checks, and probe for `bwrap` directly for `KernelCapabilities`. The Linux-only restriction is removed — macOS is now supported via `sandbox-exec`. + +- [ ] **Step 1: Write failing detect tests** + +Replace the existing `detect.test.ts` with: + +```typescript +import { describe, it, expect, vi, beforeEach } from "vitest"; + +const mockIsSupportedPlatform = vi.fn<() => boolean>(); +const mockCheckDeps = vi.fn<() => { errors: string[]; warnings: string[] }>(); +const mockWhichBwrap = vi.fn<() => string | null>(); + +vi.mock("@anthropic-ai/sandbox-runtime", () => ({ + SandboxManager: { + isSupportedPlatform: mockIsSupportedPlatform, + checkDependencies: mockCheckDeps, + }, +})); + +vi.mock("node:child_process", () => ({ + execFileSync: mockWhichBwrap, +})); + +const { detectKernelCapabilities, assertSandboxSupported } = await import("./detect.js"); + +describe("detectKernelCapabilities()", () => { + beforeEach(() => vi.clearAllMocks()); + + it("reports bwrap available when which bwrap succeeds", () => { + mockWhichBwrap.mockReturnValue("/usr/bin/bwrap"); + const caps = detectKernelCapabilities(); + expect(caps.bwrap.available).toBe(true); + expect(caps.bwrap.path).toBe("/usr/bin/bwrap"); + }); + + it("reports bwrap unavailable when which bwrap fails", () => { + mockWhichBwrap.mockImplementation(() => { throw new Error("not found"); }); + const caps = detectKernelCapabilities(); + expect(caps.bwrap.available).toBe(false); + expect(caps.bwrap.path).toBeUndefined(); + }); +}); + +describe("assertSandboxSupported()", () => { + beforeEach(() => vi.clearAllMocks()); + + it("does not throw when platform is supported and deps are OK", () => { + mockIsSupportedPlatform.mockReturnValue(true); + mockCheckDeps.mockReturnValue({ errors: [], warnings: [] }); + expect(() => assertSandboxSupported()).not.toThrow(); + }); + + it("throws when platform is not supported", () => { + mockIsSupportedPlatform.mockReturnValue(false); + mockCheckDeps.mockReturnValue({ errors: [], warnings: [] }); + expect(() => assertSandboxSupported()).toThrow(/platform/i); + }); + + it("throws when sandbox-runtime deps are missing", () => { + mockIsSupportedPlatform.mockReturnValue(true); + mockCheckDeps.mockReturnValue({ errors: ["bubblewrap not found"], warnings: [] }); + expect(() => assertSandboxSupported()).toThrow(/bubblewrap not found/); + }); +}); +``` + +- [ ] **Step 2: Run to verify it fails** + +```bash +pnpm test --filter @safeclaw/sandbox -- detect.test +``` + +Expected: FAIL — detect.ts doesn't use SandboxManager yet. + +- [ ] **Step 3: Rewrite `packages/sandbox/src/detect.ts`** + +All imports must be at the top of the file — ESM hoists them automatically but oxlint enforces `import/first` (zero lint diagnostics required). + +```typescript +import { execFileSync, execFileSyncOptionsWithStringEncoding } from "node:child_process"; +import { readFileSync, existsSync } from "node:fs"; +import { SandboxManager } from "@anthropic-ai/sandbox-runtime"; +import type { KernelCapabilities } from "./types.js"; + +/** + * Probes system capabilities relevant to sandboxing. + * Returns KernelCapabilities with bwrap probe on Linux; on macOS the + * bwrap fields are always unavailable (macOS uses sandbox-exec instead). + */ +export function detectKernelCapabilities(): KernelCapabilities { + let bwrapPath: string | undefined; + let bwrapVersion: string | undefined; + + try { + bwrapPath = execFileSync("which", ["bwrap"], { encoding: "utf8" }).trim(); + try { + bwrapVersion = execFileSync("bwrap", ["--version"], { encoding: "utf8" }) + .trim() + .split("\n")[0]; + } catch { + // version flag not supported or bwrap not runnable — path is still valid + } + } catch { + // bwrap not on PATH + } + + // Landlock / seccomp / namespace detection is Linux-only; on macOS these + // are undefined/false since sandbox-runtime uses sandbox-exec there. + const isLinux = process.platform === "linux"; + + return { + landlock: { + supported: isLinux ? detectLandlock() : false, + abiVersion: isLinux ? detectLandlockAbi() : 0, + }, + seccomp: { supported: isLinux ? detectSeccomp() : false }, + namespaces: { + user: isLinux ? existsSync("/proc/self/ns/user") : false, + pid: isLinux ? existsSync("/proc/self/ns/pid") : false, + net: isLinux ? existsSync("/proc/self/ns/net") : false, + mnt: isLinux ? existsSync("/proc/self/ns/mnt") : false, + }, + bwrap: { + available: bwrapPath !== undefined, + path: bwrapPath, + version: bwrapVersion, + }, + }; +} + +/** + * Throws a descriptive error if the current platform and dependencies + * do not support sandbox-runtime isolation. + */ +export function assertSandboxSupported(): KernelCapabilities { + if (!SandboxManager.isSupportedPlatform()) { + throw new Error( + `SafeClaw sandbox is not supported on this platform (${process.platform}). ` + + `Supported: Linux (kernel ≥ 5.13, bubblewrap, socat, ripgrep) and macOS.`, + ); + } + + const deps = SandboxManager.checkDependencies(); + if (deps.errors.length > 0) { + throw new Error( + `Sandbox dependencies missing: ${deps.errors.join(", ")}. ` + + `On Linux install: apt install bubblewrap socat ripgrep`, + ); + } + + return detectKernelCapabilities(); +} + +// ── Linux helpers ────────────────────────────────────────────────────── + +const LANDLOCK_MIN_KERNEL: [number, number] = [5, 13]; + +function parseKernelVersion(release: string): [number, number] { + const parts = release.trim().split("."); + return [parseInt(parts[0] ?? "0", 10), parseInt(parts[1] ?? "0", 10)]; +} + +function detectLandlock(): boolean { + try { + const release = readFileSync("/proc/sys/kernel/osrelease", "utf8"); + const [major, minor] = parseKernelVersion(release); + return ( + major > LANDLOCK_MIN_KERNEL[0] || + (major === LANDLOCK_MIN_KERNEL[0] && minor >= LANDLOCK_MIN_KERNEL[1]) + ); + } catch { + return false; + } +} + +function detectLandlockAbi(): number { + try { + const release = readFileSync("/proc/sys/kernel/osrelease", "utf8"); + const [major, minor] = parseKernelVersion(release); + if (major > 6 || (major === 6 && minor >= 2)) return 3; + if (major > 5 || (major === 5 && minor >= 19)) return 2; + if (major > 5 || (major === 5 && minor >= 13)) return 1; + return 0; + } catch { + return 0; + } +} + +function detectSeccomp(): boolean { + try { + const status = readFileSync("/proc/self/status", "utf8"); + return /Seccomp:\s*[12]/.test(status); + } catch { + return false; + } +} +``` + +- [ ] **Step 4: Run tests** + +```bash +pnpm test --filter @safeclaw/sandbox -- detect.test +``` + +Expected: PASS. + +- [ ] **Step 5: Run all sandbox tests** + +```bash +pnpm test --filter @safeclaw/sandbox +``` + +Expected: PASS (previous tests still work). + +- [ ] **Step 6: Commit** + +```bash +git add packages/sandbox/src/detect.ts packages/sandbox/src/detect.test.ts +git commit -m "feat(sandbox): replace unshare detection with sandbox-runtime dependency checks" +``` + +--- + +### Task 5: Rewrite Sandbox.execute() to use SandboxManager + +**Files:** +- Modify: `packages/sandbox/src/sandbox.ts` +- Modify: `packages/sandbox/src/sandbox.test.ts` + +Background: `Sandbox.execute()` currently spawns `unshare [flags] -- helper -- command`. We replace this with: `SandboxManager.wrapWithSandbox(shellCmd, undefined, rtConfig)` which returns a shell command string, then spawn via `/bin/sh -c`. The C helper integration (Landlock/cap-drop) is deferred to Task 8. + +`SandboxManager.initialize()` **must be called** in `bootstrapAgent()` (Task 6) before constructing `Sandbox`. The `Sandbox` class verifies this at construction time. + +Shell quoting: a POSIX single-quote escape avoids adding a new dependency. + +- [ ] **Step 1: Write failing tests in `sandbox.test.ts`** + +Replace the existing test file: + +```typescript +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { DEFAULT_POLICY } from "./types.js"; +import type { KernelCapabilities } from "./types.js"; + +// Mock sandbox-runtime and helper before dynamic import +const mockAssertSandboxSupported = vi.fn<() => KernelCapabilities>(); +const mockFindHelper = vi.fn<() => string | undefined>(); +const mockWrapWithSandbox = vi.fn<(cmd: string) => Promise>(); +const mockIsSupportedPlatform = vi.fn<() => boolean>(); +const mockIsSandboxingEnabled = vi.fn<() => boolean>(); +const mockCleanupAfterCommand = vi.fn<() => void>(); + +vi.mock("./detect.js", () => ({ + assertSandboxSupported: mockAssertSandboxSupported, +})); + +vi.mock("./helper.js", () => ({ + findHelper: () => mockFindHelper(), +})); + +vi.mock("@anthropic-ai/sandbox-runtime", () => ({ + SandboxManager: { + isSupportedPlatform: mockIsSupportedPlatform, + isSandboxingEnabled: mockIsSandboxingEnabled, + wrapWithSandbox: mockWrapWithSandbox, + cleanupAfterCommand: mockCleanupAfterCommand, + }, +})); + +const { Sandbox } = await import("./sandbox.js"); + +const FULL_CAPS: KernelCapabilities = { + landlock: { supported: true, abiVersion: 3 }, + seccomp: { supported: true }, + namespaces: { user: true, pid: true, net: true, mnt: true }, + bwrap: { available: true, path: "/usr/bin/bwrap", version: "0.9.0" }, +}; + +describe("Sandbox", () => { + beforeEach(() => { + vi.clearAllMocks(); + mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); + mockIsSandboxingEnabled.mockReturnValue(true); + mockFindHelper.mockReturnValue(undefined); + }); + + it("constructor calls assertSandboxSupported", () => { + new Sandbox(DEFAULT_POLICY); + expect(mockAssertSandboxSupported).toHaveBeenCalledOnce(); + }); + + it("constructor throws if not initialized (isSandboxingEnabled returns false)", () => { + mockIsSandboxingEnabled.mockReturnValue(false); + expect(() => new Sandbox(DEFAULT_POLICY)).toThrow(/initialize/i); + }); + + it("getPolicy returns a copy of the policy", () => { + const sandbox = new Sandbox(DEFAULT_POLICY); + const policy = sandbox.getPolicy(); + expect(policy).toEqual(DEFAULT_POLICY); + expect(policy).not.toBe(DEFAULT_POLICY); + }); +}); + +describe("Sandbox.execute()", () => { + beforeEach(() => { + vi.clearAllMocks(); + mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); + mockIsSandboxingEnabled.mockReturnValue(true); + mockFindHelper.mockReturnValue(undefined); + }); + + it("calls wrapWithSandbox with shell-quoted command", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/echo hello"); + const sandbox = new Sandbox(DEFAULT_POLICY); + await sandbox.execute("/bin/echo", ["hello"]); + expect(mockWrapWithSandbox).toHaveBeenCalledOnce(); + const wrappedArg: string = mockWrapWithSandbox.mock.calls[0]![0]!; + expect(wrappedArg).toContain("echo"); + expect(wrappedArg).toContain("hello"); + }); + + it("calls cleanupAfterCommand after execution", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/true"); + const sandbox = new Sandbox(DEFAULT_POLICY); + await sandbox.execute("/bin/true", []); + expect(mockCleanupAfterCommand).toHaveBeenCalledOnce(); + }); + + it("returns stdout and exitCode from the spawned command", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/echo hello"); + const sandbox = new Sandbox(DEFAULT_POLICY); + const result = await sandbox.execute("/bin/echo", ["hello"]); + expect(result.exitCode).toBe(0); + expect(result.stdout).toContain("hello"); + }); + + it("kills process after timeout and returns killReason=timeout", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/sleep 60"); + const policy = { ...DEFAULT_POLICY, timeoutMs: 100 }; + const sandbox = new Sandbox(policy); + const result = await sandbox.execute("/bin/sleep", ["60"]); + expect(result.killed).toBe(true); + expect(result.killReason).toBe("timeout"); + }); + + it("reports pivotRoot=true and bindMounts=true on Linux", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/true"); + const sandbox = new Sandbox(DEFAULT_POLICY); + const result = await sandbox.execute("/bin/true", []); + // These are set based on platform; in CI (Linux) both should be true + expect(typeof result.enforcement?.pivotRoot).toBe("boolean"); + expect(typeof result.enforcement?.bindMounts).toBe("boolean"); + }); + + it("calls cleanupAfterCommand even when command fails", async () => { + mockWrapWithSandbox.mockResolvedValue("/bin/false"); + const sandbox = new Sandbox(DEFAULT_POLICY); + await sandbox.execute("/bin/false", []); + expect(mockCleanupAfterCommand).toHaveBeenCalledOnce(); + }); +}); +``` + +- [ ] **Step 2: Run to verify it fails** + +```bash +pnpm test --filter @safeclaw/sandbox -- sandbox.test +``` + +Expected: FAIL. + +- [ ] **Step 3: Rewrite `packages/sandbox/src/sandbox.ts`** + +Note: remove the now-dead `buildUnshareFlags()` private method from the old code — it will trigger `no-unused-vars` under oxlint. + +```typescript +import { spawn } from "node:child_process"; +import { SandboxManager } from "@anthropic-ai/sandbox-runtime"; +import type { SandboxPolicy, SandboxResult, EnforcementLayers } from "./types.js"; +import { assertSandboxSupported } from "./detect.js"; +import { PolicyBuilder } from "./policy-builder.js"; + +/** POSIX single-quote shell escaping. Safe for all byte values. */ +function shEscape(arg: string): string { + return "'" + arg.replace(/'/g, "'\\''") + "'"; +} + +export class Sandbox { + private readonly policy: SandboxPolicy; + + constructor(policy: SandboxPolicy) { + assertSandboxSupported(); + if (!SandboxManager.isSandboxingEnabled()) { + throw new Error( + "SandboxManager is not initialized. Call SandboxManager.initialize() " + + "before constructing a Sandbox (see bootstrapAgent()).", + ); + } + this.policy = policy; + } + + async execute(command: string, args: string[]): Promise { + const start = performance.now(); + const timeout = this.policy.timeoutMs ?? 30_000; + + // Build the inner shell command. In Phase 1 the helper is not injected; + // Task 8 adds `--policy-file` injection for Landlock + cap-drop. + const shellCmd = [command, ...args].map(shEscape).join(" "); + + // Translate SafeClaw policy to sandbox-runtime config + const rtConfig = PolicyBuilder.toRuntimeConfig(this.policy); + + // Wrap via sandbox-runtime (bwrap on Linux, sandbox-exec on macOS) + const wrappedCmd = await SandboxManager.wrapWithSandbox( + shellCmd, + undefined, + rtConfig, + ); + + const isLinux = process.platform === "linux"; + const enforcement: EnforcementLayers = { + namespaces: isLinux, + pivotRoot: isLinux, + bindMounts: true, + landlock: false, // Phase 2: re-enabled when helper is injected + seccomp: isLinux, // sandbox-runtime applies seccomp for unix socket blocking on Linux + capDrop: false, // Phase 2: re-enabled when helper is injected + }; + + return new Promise((resolve) => { + const stdoutChunks: Buffer[] = []; + const stderrChunks: Buffer[] = []; + let killed = false; + let killReason: "timeout" | "oom" | "signal" | undefined; + + const proc = spawn("/bin/sh", ["-c", wrappedCmd], { + stdio: ["ignore", "pipe", "pipe"], + detached: true, + }); + + const timer = setTimeout(() => { + killed = true; + killReason = "timeout"; + if (proc.pid !== undefined) { + try { + process.kill(-proc.pid, "SIGKILL"); + } catch { + proc.kill("SIGKILL"); + } + } else { + proc.kill("SIGKILL"); + } + }, timeout); + + proc.stdout!.on("data", (chunk: Buffer) => stdoutChunks.push(chunk)); + proc.stderr!.on("data", (chunk: Buffer) => stderrChunks.push(chunk)); + + proc.on("close", (code: number | null) => { + clearTimeout(timer); + // Clean up bwrap leftover mount points (no-op on macOS) + SandboxManager.cleanupAfterCommand(); + resolve({ + exitCode: code ?? 1, + stdout: Buffer.concat(stdoutChunks).toString(), + stderr: Buffer.concat(stderrChunks).toString(), + durationMs: performance.now() - start, + killed, + killReason, + enforcement, + }); + }); + + proc.on("error", (err: Error) => { + clearTimeout(timer); + SandboxManager.cleanupAfterCommand(); + resolve({ + exitCode: 1, + stdout: "", + stderr: err.message, + durationMs: performance.now() - start, + killed: false, + enforcement, + }); + }); + }); + } + + getPolicy(): SandboxPolicy { + return structuredClone(this.policy); + } +} +``` + +Note: the `findHelper` import is unused in Phase 1 — remove it to avoid lint errors. It will be re-added in Task 8. + +- [ ] **Step 4: Run tests** + +```bash +pnpm test --filter @safeclaw/sandbox -- sandbox.test +``` + +Expected: PASS. + +- [ ] **Step 5: Run all sandbox tests** + +```bash +pnpm test --filter @safeclaw/sandbox +``` + +Expected: PASS. + +- [ ] **Step 6: Typecheck** + +```bash +pnpm typecheck +``` + +Expected: PASS. + +- [ ] **Step 7: Commit** + +```bash +git add packages/sandbox/src/sandbox.ts packages/sandbox/src/sandbox.test.ts +git commit -m "feat(sandbox): rewrite Sandbox.execute() to use SandboxManager.wrapWithSandbox()" +``` + +--- + +### Task 6: Initialize SandboxManager in bootstrapAgent + +**Files:** +- Modify: `packages/cli/src/commands/bootstrap.ts` +- Modify: `packages/cli/src/commands/bootstrap.test.ts` + +Background: `SandboxManager.initialize(config)` is async and starts the network proxy infrastructure (HTTP proxy, SOCKS5 proxy, socat Unix socket bridges on Linux). It must be called once before any `Sandbox` is constructed or `wrapWithSandbox()` is called. It also registers a process `exit`/`SIGINT`/`SIGTERM` handler for cleanup automatically. + +The base config passed to `initialize()` sets up the proxy servers. Per-call `customConfig` in `wrapWithSandbox()` (passed from `PolicyBuilder.toRuntimeConfig()`) overrides filesystem and network restrictions per execution. + +- [ ] **Step 1: Write failing test for SandboxManager initialization** + +In `bootstrap.test.ts`, add a test that verifies `SandboxManager.initialize` was called: + +```typescript +// At the top of the mock setup in bootstrap.test.ts, add: +const mockSandboxManagerInitialize = vi.fn<() => Promise>().mockResolvedValue(undefined); +const mockSandboxManagerIsSupportedPlatform = vi.fn<() => boolean>().mockReturnValue(true); +const mockSandboxManagerIsSandboxingEnabled = vi.fn<() => boolean>().mockReturnValue(true); + +vi.mock("@anthropic-ai/sandbox-runtime", () => ({ + SandboxManager: { + initialize: mockSandboxManagerInitialize, + isSupportedPlatform: mockSandboxManagerIsSupportedPlatform, + isSandboxingEnabled: mockSandboxManagerIsSandboxingEnabled, + wrapWithSandbox: vi.fn().mockResolvedValue("/bin/true"), + cleanupAfterCommand: vi.fn(), + reset: vi.fn().mockResolvedValue(undefined), + }, +})); + +// Add test: +it("calls SandboxManager.initialize before constructing Sandbox", async () => { + await bootstrapAgent(validDeps); + expect(mockSandboxManagerInitialize).toHaveBeenCalledOnce(); + // initialize must be called before Sandbox is constructed + // (verified by order — assertSandboxSupported mock checks isSandboxingEnabled) +}); +``` + +- [ ] **Step 2: Run to verify it fails** + +```bash +pnpm test --filter @safeclaw/cli -- bootstrap.test +``` + +Expected: FAIL — `SandboxManager.initialize` not called yet. + +- [ ] **Step 3: Add `SandboxManager.initialize()` call to `bootstrapAgent()`** + +In `bootstrap.ts`, add import: +```typescript +import { SandboxManager } from "@anthropic-ai/sandbox-runtime"; +``` + +In `bootstrapAgent()`, before the `let sandbox: Sandbox | undefined` block (around line 185), add: + +```typescript +// Initialize sandbox-runtime network proxy infrastructure. +// Uses a base "block all network" config; per-execution configs are passed +// as customConfig in Sandbox.execute() → SandboxManager.wrapWithSandbox(). +try { + await SandboxManager.initialize({ + filesystem: { allowWrite: [], denyWrite: [], denyRead: [] }, + network: { allowedDomains: [], deniedDomains: [] }, + }); +} catch (err: unknown) { + const detail = err instanceof Error ? err.message : String(err); + output.write( + `Warning: sandbox network proxy failed to initialize (${detail}). ` + + `Filesystem isolation will still be applied.\n`, + ); +} +``` + +- [ ] **Step 4: Run tests** + +```bash +pnpm test --filter @safeclaw/cli -- bootstrap.test +``` + +Expected: PASS. + +- [ ] **Step 5: Run all CLI tests** + +```bash +pnpm test --filter @safeclaw/cli +``` + +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +git add packages/cli/src/commands/bootstrap.ts packages/cli/src/commands/bootstrap.test.ts +git commit -m "feat(cli): initialize SandboxManager network proxy before constructing Sandbox" +``` + +--- + +### Task 7: Update doctor checks + +**Files:** +- Modify: `packages/cli/src/commands/doctor-checks.ts` +- Modify: `packages/cli/src/commands/doctor-checks.test.ts` +- Modify: `packages/cli/src/commands/doctor.ts` + +Background: `unshareCheck` is replaced by `bwrapCheck`. Two new checks are added: `socatCheck` (Linux only; socat bridges proxy sockets into the bwrap network namespace) and `ripgrepCheck` (sandbox-runtime uses ripgrep to scan for dangerous files before each command). The `sandboxHelperCheck` remains but is downgraded to `warn` since the helper is now optional. + +- [ ] **Step 1: Write failing tests for new checks** + +Add to `doctor-checks.test.ts`: + +```typescript +describe("bwrapCheck", () => { + it("passes when bwrap is available", async () => { + const check = bwrapCheck({ execFileSync: () => "/usr/bin/bwrap" }); + const result = await check.run(); + expect(result.status).toBe("pass"); + expect(result.message).toMatch(/bwrap/); + }); + + it("fails when bwrap is not found", async () => { + const check = bwrapCheck({ execFileSync: () => { throw new Error("not found"); } }); + const result = await check.run(); + expect(result.status).toBe("fail"); + expect(result.detail).toMatch(/apt install bubblewrap/); + }); +}); + +describe("socatCheck", () => { + it("passes when socat is available on linux", async () => { + const check = socatCheck({ + execFileSync: () => "/usr/bin/socat", + platform: "linux", + }); + const result = await check.run(); + expect(result.status).toBe("pass"); + }); + + it("skips on macOS (socat not required)", async () => { + const check = socatCheck({ + execFileSync: () => { throw new Error(); }, + platform: "darwin", + }); + const result = await check.run(); + expect(result.status).toBe("pass"); + expect(result.message).toMatch(/not required/); + }); + + it("fails when socat is missing on linux", async () => { + const check = socatCheck({ + execFileSync: () => { throw new Error("not found"); }, + platform: "linux", + }); + const result = await check.run(); + expect(result.status).toBe("fail"); + expect(result.detail).toMatch(/apt install socat/); + }); +}); + +describe("ripgrepCheck", () => { + it("passes when rg is available", async () => { + const check = ripgrepCheck({ execFileSync: () => "/usr/bin/rg" }); + const result = await check.run(); + expect(result.status).toBe("pass"); + }); + + it("fails when rg is not found", async () => { + const check = ripgrepCheck({ execFileSync: () => { throw new Error(); } }); + const result = await check.run(); + expect(result.status).toBe("fail"); + expect(result.detail).toMatch(/apt install ripgrep/); + }); +}); +``` + +- [ ] **Step 2: Run to verify it fails** + +```bash +pnpm test --filter @safeclaw/cli -- doctor-checks.test +``` + +Expected: FAIL — `bwrapCheck`, `socatCheck`, `ripgrepCheck` not defined. + +- [ ] **Step 3: Add checks to `doctor-checks.ts`** + +Replace `unshareCheck` with `bwrapCheck`, and add `socatCheck` and `ripgrepCheck`: + +```typescript +// Remove unshareCheck entirely, add: + +export interface BwrapDeps { + execFileSync: (cmd: string, args: string[]) => string; +} + +export function bwrapCheck( + deps: BwrapDeps = { + execFileSync: (cmd, args) => defaultExecFileSync(cmd, args, { encoding: "utf8" }), + }, +): DiagnosticCheck { + return { + name: "bwrap", + category: "security", + async run(): Promise { + try { + const path = deps.execFileSync("which", ["bwrap"]).trim(); + return { status: "pass", message: `bubblewrap: ${path}` }; + } catch { + return { + status: "fail", + message: "bubblewrap not found", + detail: + "bubblewrap is required for filesystem isolation on Linux. " + + "Install: apt install bubblewrap", + }; + } + }, + }; +} + +export interface SocatDeps { + execFileSync: (cmd: string, args: string[]) => string; + platform: string; +} + +export function socatCheck( + deps: SocatDeps = { + execFileSync: (cmd, args) => defaultExecFileSync(cmd, args, { encoding: "utf8" }), + platform: process.platform, + }, +): DiagnosticCheck { + return { + name: "socat", + category: "security", + async run(): Promise { + if (deps.platform !== "linux") { + return { status: "pass", message: "socat not required on this platform" }; + } + try { + const path = deps.execFileSync("which", ["socat"]).trim(); + return { status: "pass", message: `socat: ${path}` }; + } catch { + return { + status: "fail", + message: "socat not found", + detail: + "socat is required for network proxy bridging on Linux. " + + "Install: apt install socat", + }; + } + }, + }; +} + +export interface RipgrepDeps { + execFileSync: (cmd: string, args: string[]) => string; +} + +export function ripgrepCheck( + deps: RipgrepDeps = { + execFileSync: (cmd, args) => defaultExecFileSync(cmd, args, { encoding: "utf8" }), + }, +): DiagnosticCheck { + return { + name: "ripgrep", + category: "security", + async run(): Promise { + try { + const path = deps.execFileSync("which", ["rg"]).trim(); + return { status: "pass", message: `ripgrep: ${path}` }; + } catch { + return { + status: "fail", + message: "ripgrep (rg) not found", + detail: + "ripgrep is required by sandbox-runtime to scan for dangerous files " + + "before each sandboxed command. Install: apt install ripgrep", + }; + } + }, + }; +} +``` + +Also update `sandboxHelperCheck` message — change `status: "warn"` detail to note the helper is optional (provides Landlock + cap-drop) rather than required: + +```typescript +return { + status: "warn", + message: "Sandbox helper not found", + detail: + "The native sandbox helper binary is not installed. " + + "Filesystem and network isolation via bubblewrap will still apply, " + + "but Landlock and capability-dropping will be inactive. " + + "Run 'make -C native' to build it.", +}; +``` + +- [ ] **Step 4: Update `doctor.ts` check list** + +Find where `unshareCheck` is registered and replace it. Also update `linuxCheck` — sandbox-runtime now supports macOS so the hard Linux requirement is gone. Replace `linuxCheck` with a `platformCheck` that passes on both `linux` and `darwin`: + +```typescript +// In doctor-checks.ts, replace linuxCheck with: +export function platformCheck( + deps: PlatformDeps = { platform: process.platform }, +): DiagnosticCheck { + return { + name: "platform", + category: "system", + async run(): Promise { + if (deps.platform === "linux" || deps.platform === "darwin") { + return { status: "pass", message: `Platform: ${deps.platform} (supported)` }; + } + return { + status: "fail", + message: `Platform ${deps.platform} is not supported`, + detail: "SafeClaw requires Linux or macOS.", + }; + }, + }; +} +``` + +In `doctor.ts`, swap: +```typescript +// Remove: linuxCheck() +// Remove: unshareCheck() +// Add: +platformCheck(), +bwrapCheck(), +socatCheck(), +ripgrepCheck(), +``` + +Export `platformCheck`, `bwrapCheck`, `socatCheck`, `ripgrepCheck` from `doctor-checks.ts`. + +Add tests for `platformCheck`: +```typescript +describe("platformCheck", () => { + it("passes on linux", async () => { + const r = await platformCheck({ platform: "linux" }).run(); + expect(r.status).toBe("pass"); + }); + it("passes on darwin", async () => { + const r = await platformCheck({ platform: "darwin" }).run(); + expect(r.status).toBe("pass"); + }); + it("fails on win32", async () => { + const r = await platformCheck({ platform: "win32" }).run(); + expect(r.status).toBe("fail"); + }); +}); +``` + +- [ ] **Step 5: Run tests** + +```bash +pnpm test --filter @safeclaw/cli -- doctor-checks.test +pnpm test --filter @safeclaw/cli -- doctor.test +``` + +Expected: PASS on both. + +- [ ] **Step 6: Full build + test** + +```bash +pnpm build && pnpm test +``` + +Expected: all tests pass, zero lint errors. + +```bash +pnpm lint +``` + +Expected: zero diagnostics. + +- [ ] **Step 7: Commit** + +```bash +git add packages/cli/src/commands/doctor-checks.ts \ + packages/cli/src/commands/doctor-checks.test.ts \ + packages/cli/src/commands/doctor.ts +git commit -m "feat(cli): replace unshareCheck with bwrap/socat/ripgrep checks for sandbox-runtime" +``` + +--- + +## Phase 2 — C helper as inner Landlock + cap-drop layer + +### Task 8: Inject C helper inside bwrap via --policy-file + +**Files:** +- Modify: `packages/sandbox/src/sandbox.ts` +- Modify: `packages/sandbox/src/sandbox.test.ts` + +Background: The C helper at `native/safeclaw-sandbox-helper` already supports `--policy-file `, which reads a policy JSON file instead of fd 3. The file must have mode `0600` and be owned by the current user (enforced by `policy_read_file()` in `policy.c`). + +When the helper is present, the spawn chain becomes: +``` +/bin/sh -c "bwrap [sandbox-runtime args] /bin/sh -c ' --policy-file -- [args]'" +``` + +The helper binary and temp file directory (`/tmp`) must be accessible inside the bwrap container. `/tmp` is always bind-mounted by sandbox-runtime. The helper path (e.g. `/usr/local/bin/safeclaw-sandbox-helper` or `~/.safeclaw/bin/safeclaw-sandbox-helper`) must either be in a system path included by bwrap, or explicitly added to `allowWrite`. + +- [ ] **Step 1: Write failing tests** + +Add to `sandbox.test.ts`: + +```typescript +describe("Sandbox.execute() with helper", () => { + beforeEach(() => { + vi.clearAllMocks(); + mockAssertSandboxSupported.mockReturnValue(FULL_CAPS); + mockIsSandboxingEnabled.mockReturnValue(true); + }); + + it("includes --policy-file in the inner command when helper is found", async () => { + mockFindHelper.mockReturnValue("/usr/local/bin/safeclaw-sandbox-helper"); + mockWrapWithSandbox.mockImplementation(async (cmd: string) => cmd); + + const sandbox = new Sandbox(DEFAULT_POLICY); + await sandbox.execute("/bin/echo", ["hello"]); + + const innerCmd: string = mockWrapWithSandbox.mock.calls[0]![0]!; + expect(innerCmd).toContain("safeclaw-sandbox-helper"); + expect(innerCmd).toContain("--policy-file"); + expect(innerCmd).toContain("--"); + expect(innerCmd).toContain("echo"); + }); + + it("sets enforcement.landlock=true and enforcement.capDrop=true when helper is found", async () => { + mockFindHelper.mockReturnValue("/usr/local/bin/safeclaw-sandbox-helper"); + mockWrapWithSandbox.mockResolvedValue("/bin/true"); + + const sandbox = new Sandbox(DEFAULT_POLICY); + const result = await sandbox.execute("/bin/true", []); + + expect(result.enforcement?.landlock).toBe(true); + expect(result.enforcement?.capDrop).toBe(true); + }); + + it("does NOT set landlock/capDrop when helper is not found", async () => { + mockFindHelper.mockReturnValue(undefined); + mockWrapWithSandbox.mockResolvedValue("/bin/true"); + + const sandbox = new Sandbox(DEFAULT_POLICY); + const result = await sandbox.execute("/bin/true", []); + + expect(result.enforcement?.landlock).toBe(false); + expect(result.enforcement?.capDrop).toBe(false); + }); + + it("cleans up policy temp file even if command fails", async () => { + // Mock node:fs so we can verify writeFileSync and rmSync are both called + const mockWriteFileSync = vi.fn(); + const mockRmSync = vi.fn(); + vi.mock("node:fs", () => ({ + writeFileSync: mockWriteFileSync, + rmSync: mockRmSync, + })); + + mockFindHelper.mockReturnValue("/usr/local/bin/safeclaw-sandbox-helper"); + mockWrapWithSandbox.mockResolvedValue("/bin/false"); + + const sandbox = new Sandbox(DEFAULT_POLICY); + await sandbox.execute("/bin/false", []); + + expect(mockWriteFileSync).toHaveBeenCalledOnce(); + // rmSync must be called with force:true to clean up the temp file + expect(mockRmSync).toHaveBeenCalledWith( + expect.stringContaining("safeclaw-policy-"), + { force: true }, + ); + }); +}); +``` + +- [ ] **Step 2: Run to verify it fails** + +```bash +pnpm test --filter @safeclaw/sandbox -- sandbox.test +``` + +Expected: FAIL — helper injection not yet implemented. + +- [ ] **Step 3: Add policy-file injection to `Sandbox.execute()`** + +Update `packages/sandbox/src/sandbox.ts`. Add imports: + +```typescript +import { writeFileSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { findHelper } from "./helper.js"; +``` + +In `execute()`, replace the `shellCmd` / `wrappedCmd` section with: + +```typescript +const helperPath = findHelper(); +const useHelper = helperPath !== undefined; + +let policyTmpPath: string | undefined; +let innerCmd: string; + +if (useHelper) { + // Write policy JSON to a temp file (mode 0600, as required by policy_read_file). + // The helper enforces the permissions check itself — this is defense-in-depth. + policyTmpPath = join( + tmpdir(), + `safeclaw-policy-${process.pid}-${Date.now()}.json`, + ); + writeFileSync( + policyTmpPath, + JSON.stringify({ + filesystem: this.policy.filesystem, + syscalls: this.policy.syscalls, + }), + { mode: 0o600 }, + ); + innerCmd = [ + helperPath, + "--policy-file", policyTmpPath, + "--", + command, + ...args, + ] + .map(shEscape) + .join(" "); +} else { + innerCmd = [command, ...args].map(shEscape).join(" "); +} + +// Translate to sandbox-runtime config. When helper is present, add its +// directory to allowWrite so bwrap bind-mounts it into the container. +const rtConfig = PolicyBuilder.toRuntimeConfig(this.policy); +if (useHelper && helperPath !== undefined) { + const helperDir = helperPath.substring(0, helperPath.lastIndexOf("/")); + // Only add if not already a system path (system paths are included by bwrap automatically) + const systemPaths = ["/bin", "/usr/bin", "/usr/local/bin", "/sbin", "/usr/sbin"]; + if (!systemPaths.includes(helperDir)) { + rtConfig.filesystem.allowWrite = [ + ...rtConfig.filesystem.allowWrite, + helperDir, + ]; + } +} + +const wrappedCmd = await SandboxManager.wrapWithSandbox(innerCmd, undefined, rtConfig); +``` + +Update `enforcement` to reflect helper presence: + +```typescript +const enforcement: EnforcementLayers = { + namespaces: isLinux, + pivotRoot: isLinux, + bindMounts: true, + landlock: useHelper, + seccomp: isLinux, + capDrop: useHelper, +}; +``` + +Add cleanup of `policyTmpPath` in both `close` and `error` handlers: + +```typescript +proc.on("close", (code: number | null) => { + clearTimeout(timer); + if (policyTmpPath !== undefined) { + try { rmSync(policyTmpPath, { force: true }); } catch { /* ignore */ } + } + SandboxManager.cleanupAfterCommand(); + resolve({ ... }); +}); + +proc.on("error", (err: Error) => { + clearTimeout(timer); + if (policyTmpPath !== undefined) { + try { rmSync(policyTmpPath, { force: true }); } catch { /* ignore */ } + } + SandboxManager.cleanupAfterCommand(); + resolve({ ... }); +}); +``` + +- [ ] **Step 4: Run tests** + +```bash +pnpm test --filter @safeclaw/sandbox -- sandbox.test +``` + +Expected: PASS. + +- [ ] **Step 5: Full build + test + lint** + +```bash +pnpm build && pnpm test && pnpm lint +``` + +Expected: all pass, zero lint diagnostics. + +- [ ] **Step 6: Integration smoke test (requires bwrap + helper on PATH)** + +```bash +# Build helper first if needed +make -C native + +# Run a simple sandboxed command +node -e " +import('@safeclaw/sandbox').then(async ({ Sandbox, PolicyBuilder, SandboxManager }) => { + await SandboxManager.initialize({ filesystem: { allowWrite: [], denyWrite: [], denyRead: [] }, network: { allowedDomains: [], deniedDomains: [] }}); + const sandbox = new Sandbox(PolicyBuilder.forDevelopment(process.cwd())); + const result = await sandbox.execute('/bin/echo', ['hello from sandbox']); + console.log(result); + await SandboxManager.reset(); +}); +" +``` + +Expected: `{ exitCode: 0, stdout: 'hello from sandbox\n', enforcement: { landlock: true, capDrop: true, ... } }`. + +- [ ] **Step 7: Commit** + +```bash +git add packages/sandbox/src/sandbox.ts packages/sandbox/src/sandbox.test.ts +git commit -m "feat(sandbox): inject C helper via --policy-file for Landlock + cap-drop inside bwrap" +``` + +--- + +## Risks and Notes + +| Risk | Mitigation | +|------|-----------| +| `SandboxManager.initialize()` called before `Sandbox` constructor | Enforced: `Sandbox` constructor checks `isSandboxingEnabled()` and throws with clear message | +| Policy temp file leaked if process crashes between `writeFile` and `rmSync` | Temp file lives in `/tmp` — OS cleans it on reboot. Filename includes PID so post-mortem identification is possible | +| Helper binary not accessible inside bwrap when in non-system path | Task 8 adds `helperDir` to `allowWrite`; the `systemPaths` exclusion list may need extension for unusual installs | +| sandbox-runtime v0.0.42 is pre-stable | Pinned to exact git SHA `20f5176`. Check for updates before shipping. | +| `wrapWithSandbox()` may fail if proxy not initialized | The `catch` block in `bootstrapAgent()` catches init failures; filesystem isolation still applies | +| macOS: `sandbox-exec` behavior differs from bwrap | Tested by sandbox-runtime; our `PolicyBuilder.toRuntimeConfig()` translation is platform-agnostic | +| Seccomp conflict: sandbox-runtime installs unix-socket BPF, helper installs its own syscall filter | Both apply inside the process. sandbox-runtime's filter applies first (outer bwrap layer); helper's filter applies inside. Filters stack (both must allow a syscall for it to proceed). Verify the helper's syscall allowlist includes all syscalls needed by sandbox-runtime's filter management | +| `process.kill(-proc.pid, 'SIGKILL')` on timeout may not reach bwrap's children | The outer `/bin/sh` is the process group leader (`detached: true`). bwrap inherits the group, and sandboxed children inherit from bwrap. Verify with `execute('/bin/sleep', ['60'])` + short timeout — if `result.killed === true` the group kill worked. Add an integration test for this. | +| Helper binary not bind-mounted into bwrap when in non-system path | Task 8 adds `helperDir` to `rtConfig.filesystem.allowWrite`. sandbox-runtime maps `allowWrite` paths to `--bind` (read+write), which also allows execute on Linux. If the helper is in a non-standard location (e.g. `~/.safeclaw/bin/`), verify it is executable inside the bwrap container by running the integration smoke test with `SAFECLAW_HELPER_PATH=~/.safeclaw/bin/safeclaw-sandbox-helper`. | diff --git a/packages/sandbox/src/policy-builder.test.ts b/packages/sandbox/src/policy-builder.test.ts index d412a93..25bc0a2 100644 --- a/packages/sandbox/src/policy-builder.test.ts +++ b/packages/sandbox/src/policy-builder.test.ts @@ -1,10 +1,16 @@ -import { describe, it, expect, beforeEach } from "vitest"; +import { describe, it, expect, beforeEach, vi } from "vitest"; import { homedir } from "node:os"; import { PolicyBuilder } from "./policy-builder.js"; import type { SandboxPolicy, PathRule } from "./types.js"; import { DEFAULT_POLICY } from "./types.js"; import type { SandboxRuntimeConfig } from "@anthropic-ai/sandbox-runtime"; +// Mock lstatSync so toRuntimeConfig() tests are deterministic regardless of +// which credential directories actually exist on the test machine. +vi.mock("node:fs", () => ({ + lstatSync: () => ({ isDirectory: () => true }), +})); + describe("PolicyBuilder", () => { describe("addReadExecute()", () => { it("adds a path with read access", () => { diff --git a/packages/sandbox/src/policy-builder.ts b/packages/sandbox/src/policy-builder.ts index 634365a..04eb886 100644 --- a/packages/sandbox/src/policy-builder.ts +++ b/packages/sandbox/src/policy-builder.ts @@ -1,3 +1,4 @@ +import { lstatSync } from "node:fs"; import { homedir } from "node:os"; import { dirname } from "node:path"; import type { SandboxRuntimeConfig } from "@anthropic-ai/sandbox-runtime"; @@ -90,7 +91,8 @@ export class PolicyBuilder { .filter((r) => r.access === "readwrite" || r.access === "readwriteexecute") .map((r) => r.path); - // Always deny reads to credential/secret directories. + // Always deny reads to credential/secret directories that exist. + // Non-existent paths are skipped — bwrap cannot bind-mount over them. // sandbox-runtime also enforces mandatory deny on dangerous files (.bashrc, // .git/hooks, etc.) regardless of this config — these are complementary. const home = homedir(); @@ -102,7 +104,13 @@ export class PolicyBuilder { `${home}/.docker`, `${home}/.gcloud`, `${home}/.azure`, - ]; + ].filter((p) => { + try { + return lstatSync(p).isDirectory(); + } catch { + return false; + } + }); // ── Network ─────────────────────────────────────────────────────── const network = buildNetworkConfig(policy.network); diff --git a/packages/sandbox/src/sandbox.test.ts b/packages/sandbox/src/sandbox.test.ts index 26aed68..137d147 100644 --- a/packages/sandbox/src/sandbox.test.ts +++ b/packages/sandbox/src/sandbox.test.ts @@ -30,6 +30,7 @@ vi.mock("@anthropic-ai/sandbox-runtime", () => ({ vi.mock("node:fs", () => ({ writeFileSync: (...args: unknown[]) => mockWriteFileSync(...args), rmSync: (...args: unknown[]) => mockRmSync(...args), + lstatSync: () => ({ isDirectory: () => false }), })); const { Sandbox } = await import("./sandbox.js"); From d8c22554a21327a84e259a7b358382268401521f Mon Sep 17 00:00:00 2001 From: Arne Brune Olsen Date: Fri, 20 Mar 2026 10:59:51 +0100 Subject: [PATCH 10/13] docs: update documentation for sandbox-runtime integration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - README: add macOS to platform requirements; add bubblewrap/socat prerequisites; update sandbox description and @safeclaw/sandbox blurb; mark Feature 5 (sandbox-runtime integration) as Done - AGENTS.md: update platform note (Linux + macOS); rewrite Sandbox section to reflect SandboxManager/wrapWithSandbox outer layer and --policy-file inner protocol; add SandboxManager.initialize() to bootstrap flow - docs/getting-started.md: add bubblewrap/socat to prerequisites; replace kernel-only verification with bwrap/socat checks; update doctor command category description - docs/sandboxing.md: rewrite architecture diagram to show two-layer model (sandbox-runtime outer + C helper inner); update Layer 1 from unshare to sandbox-runtime/bwrap; document PolicyBuilder.toRuntimeConfig and WSL2 symlink handling; update policy delivery from fd 3 to --policy-file; add pivotRoot/bindMounts to EnforcementLayers; update graceful degradation table; update platform requirements section - docs/security-model.md: rewrite sandboxing architecture intro for two-layer model; update namespaces section to reference bwrap; fix development policy (remove ~/.safeclaw from readwrite — it was never included; home dir is read-only) - docs/architecture.md: update @safeclaw/sandbox description; update Linux-only design decision to Linux + macOS; update doctor description Co-Authored-By: Claude Sonnet 4.6 --- AGENTS.md | 11 ++-- README.md | 8 +-- docs/architecture.md | 11 ++-- docs/getting-started.md | 26 ++++++---- docs/sandboxing.md | 109 ++++++++++++++++++++++++++-------------- docs/security-model.md | 16 +++--- 6 files changed, 114 insertions(+), 67 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 157c7ef..237307e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -8,7 +8,7 @@ SafeClaw is an AI coding assistant with multi-provider LLM support (GitHub Copil The target user is an individual developer who wants AI-assisted coding with strong guarantees against prompt injection, malicious tool calls, and data exfiltration. -**Linux-only. Node.js >= 22. pnpm 9+.** +**Linux and macOS. Node.js >= 22. pnpm 9+.** ## Repository Structure @@ -83,10 +83,10 @@ Each tool declares `requiredCapabilities` and implements a `ToolHandler` interfa `packages/core/src/tools/process-manager.ts` -- Tracks spawned child processes by UUID. Features: ring buffer output capture (1MB max per process), automatic cleanup after 1 hour, maximum 8 concurrent processes. Used by the `process` builtin tool. ### Sandbox -- `packages/sandbox/src/sandbox.ts` -- Spawns child process with `unshare` + native helper -- `packages/sandbox/src/policy-builder.ts` -- `PolicyBuilder` class with fluent API; `PolicyBuilder.forDevelopment(cwd, options?)` creates a development-ready policy with allowlisted system paths, compiler toolchains (JVM, GCC), an expanded ~120 syscall allowlist, and support for `extraExecutePaths`/`extraReadWritePaths` via `DevelopmentPolicyOptions` +- `packages/sandbox/src/sandbox.ts` -- Wraps commands via `@anthropic-ai/sandbox-runtime` (`SandboxManager.wrapWithSandbox()`) as the outer layer; injects the C helper as the inner process via `--policy-file ` when found +- `packages/sandbox/src/policy-builder.ts` -- `PolicyBuilder` class with fluent API; `PolicyBuilder.forDevelopment(cwd, options?)` creates a development-ready policy with allowlisted system paths, compiler toolchains (JVM, GCC), an expanded ~120 syscall allowlist, and support for `extraExecutePaths`/`extraReadWritePaths` via `DevelopmentPolicyOptions`; `PolicyBuilder.toRuntimeConfig(policy)` translates `SandboxPolicy` to `SandboxRuntimeConfig` for sandbox-runtime (write allowlist + credential dir denylist) - `native/src/main.c` -- C helper binary that applies: Landlock filesystem rules, seccomp-BPF syscall filtering, capability dropping, `PR_SET_NO_NEW_PRIVS` -- Policy sent to helper via fd 3 as JSON +- Policy sent to helper via `--policy-file ` (JSON written to a temp file at mode 0o600; cleaned up after each execution) ### Vault `packages/vault/src/vault.ts` -- AES-256-GCM encrypted JSON file store. Keys derived via scrypt from passphrase or fetched from OS keyring (GNOME `secret-tool`). File permissions enforced at 0o600. @@ -114,7 +114,8 @@ Bootstrap flow (`packages/cli/src/commands/bootstrap.ts`): 4. Load builtin skill manifest 5. Read `brave_api_key` from vault; if present, include web_search tool in tool registry 6. Create ProcessManager for background process tracking -7. Create: CapabilityRegistry -> CapabilityEnforcer -> ToolRegistry -> Sandbox -> ToolOrchestrator -> ContextCompactor -> Agent +7. Initialize `SandboxManager` network proxy (via `PolicyBuilder.toRuntimeConfig()`) +8. Create: CapabilityRegistry -> CapabilityEnforcer -> ToolRegistry -> Sandbox -> ToolOrchestrator -> ContextCompactor -> Agent 8. Return `{ agent, sessionManager, capabilityRegistry, auditLog }` CLI commands: `chat` (default), `onboard`, `audit`, `serve`/`server`, `doctor`, `help`, `version` diff --git a/README.md b/README.md index 2d1c041..a5dd904 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ SafeClaw is a secure AI coding assistant with mandatory OS-level sandboxing, sig curl -fsSL https://raw.githubusercontent.com/linuxdevel/safeclaw/main/install.sh | bash ``` -Requires: Linux (x86\_64 or arm64), Node.js >= 22, bubblewrap (`apt install bubblewrap`). +Requires: Linux (x86\_64 or arm64) or macOS, Node.js >= 22, bubblewrap (`apt install bubblewrap`), socat (`apt install socat`). After install, run `safeclaw onboard` for first-time setup. @@ -20,7 +20,7 @@ After install, run `safeclaw onboard` for first-time setup. ### Security -- Zero-trust security model with mandatory OS-level sandboxing — bubblewrap `pivot_root` filesystem isolation (outer) + Landlock + seccomp-BPF + capability dropping (inner), Linux namespaces (PID, net, mount, IPC, UTS) +- Zero-trust security model with mandatory OS-level sandboxing — `@anthropic-ai/sandbox-runtime` outer layer (bubblewrap `pivot_root` + bind mounts on Linux; sandbox-exec on macOS) + C helper inner layer (Landlock + seccomp-BPF + capability dropping on Linux). Supports Linux and macOS. - Development-ready sandbox policy via `PolicyBuilder.forDevelopment()` — allows compilers (GCC, JVM), package managers, and standard dev tools while enforcing kernel-level access control. Selective home directory binding hides `~/.ssh`, `~/.aws`, `~/.gnupg` structurally. - AES-256-GCM encrypted secrets vault with OS keyring or passphrase-derived keys - Ed25519-signed skill manifests with capability declarations and runtime enforcement @@ -69,7 +69,7 @@ Planned features in implementation order: | 2 | Automatic context compaction | [plan](docs/plans/2026-03-05-context-compaction.md) | High | | 3 | Streaming UX (Phase 1 — readline) | [plan](docs/plans/2026-03-05-streaming-ux.md) | High | | 4 | Better CLI/TUI (Ink-based) | [plan](docs/plans/2026-03-05-tui.md) | High | -| 5 | Bubblewrap sandbox (`pivot_root` isolation) | [design](docs/plans/2026-03-07-bubblewrap-sandbox-design.md) · [plan](docs/plans/2026-03-07-bubblewrap-sandbox-implementation.md) | High | +| 5 | Sandbox-runtime integration (`pivot_root` + macOS support) | [design](docs/plans/2026-03-07-bubblewrap-sandbox-design.md) · [plan](docs/plans/2026-03-20-sandbox-runtime-integration.md) | **Done** | | 6 | Parallel agents | [plan](docs/plans/2026-03-05-parallel-agents.md) | Medium | | 7 | Long-running background agents | [plan](docs/plans/2026-03-05-background-agents.md) | Medium | | 8 | Superpowers skill integration | [plan](docs/plans/2026-03-05-superpowers-integration.md) | Medium | @@ -93,7 +93,7 @@ Planned features in implementation order: Monorepo structure: - `@safeclaw/vault` — Encrypted secrets storage -- `@safeclaw/sandbox` — OS-level process sandboxing with bubblewrap (`pivot_root`) + C helper (Landlock/seccomp) and `PolicyBuilder` for development-ready policies +- `@safeclaw/sandbox` — OS-level process sandboxing: outer layer via `@anthropic-ai/sandbox-runtime` (bwrap on Linux, sandbox-exec on macOS), inner layer via C helper (Landlock + seccomp-BPF + cap-drop). `PolicyBuilder` for development-ready policies; `PolicyBuilder.toRuntimeConfig()` translates policies for sandbox-runtime. - `@safeclaw/core` — Capabilities, agent runtime, sessions, tools, skills, model providers, copilot client - `@safeclaw/gateway` — HTTP server with auth and rate limiting - `@safeclaw/cli` — Command-line interface diff --git a/docs/architecture.md b/docs/architecture.md index da88dc7..b171bb9 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -74,9 +74,10 @@ OS-level process isolation using Linux kernel features. - `SandboxPolicy` / `DEFAULT_POLICY`: policy types with maximally restrictive defaults - `PolicyBuilder`: fluent API for constructing sandbox policies; `PolicyBuilder.forDevelopment(cwd)` creates a ready-made policy for software development with allowlisted system paths, compiler toolchains, and an expanded syscall set -- `detectKernelCapabilities`: probes `/proc` for Landlock, seccomp, namespace support +- `detectKernelCapabilities`: probes `/proc` for Landlock, seccomp, namespace support; also detects bubblewrap availability - `assertSandboxSupported`: throws if required kernel features are missing -- `Sandbox` class: executes commands under policy (stub in v1; types and policies are real) +- `Sandbox` class: executes commands by wrapping via `SandboxManager.wrapWithSandbox()` (outer layer) and injecting the C helper as the inner process via `--policy-file` when found +- `PolicyBuilder.toRuntimeConfig()`: translates `SandboxPolicy` to `SandboxRuntimeConfig` for sandbox-runtime (write allowlist, credential dir denylist, network domains) No dependencies on other SafeClaw packages. @@ -148,7 +149,7 @@ Command-line interface adapter. - `runOnboarding`: five-step onboarding wizard (kernel check, auth, vault, signing key, model selection) - `setupChat`: wires the CLI adapter to the agent - `runAudit`: generates security audit reports (text or JSON) -- `runDoctor`: runs 12 diagnostic checks across system, security, config, and connectivity categories +- `runDoctor`: runs diagnostic checks across system (platform, bwrap, socat, rg, helper binary), security (Landlock, seccomp, namespaces), config, and connectivity categories Dependencies: `@safeclaw/core`, `@safeclaw/gateway`, `@safeclaw/sandbox`, `@safeclaw/vault`. @@ -294,9 +295,9 @@ The audit log maintains the last N entries in memory (default: 100). The `safecl ## Key design decisions -### Linux-only (v1) +### Linux and macOS -SafeClaw v1 targets Linux exclusively. The sandboxing architecture depends on Landlock (kernel >= 5.13), seccomp-BPF, and Linux namespaces. These have no direct equivalents on macOS or Windows. Future versions may add platform-specific sandboxing. +SafeClaw supports Linux and macOS. On Linux, the full stack is available: bubblewrap `pivot_root` container (via `@anthropic-ai/sandbox-runtime`) + Landlock + seccomp-BPF + capability dropping (via C helper). On macOS, the outer `sandbox-exec` layer is used; the C helper and Linux-specific enforcement (Landlock, seccomp, namespaces) are not active. Windows is not supported. ### Multi-provider LLM support diff --git a/docs/getting-started.md b/docs/getting-started.md index 4ca8f65..60c10df 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -4,26 +4,34 @@ - **Node.js** >= 22.0.0 - **pnpm** >= 9.0.0 -- **Linux** with kernel >= 5.13 (required for Landlock, seccomp-BPF, and namespaces) +- **Linux** with kernel >= 5.13 (required for Landlock, seccomp-BPF, and namespaces), **or macOS** (uses sandbox-exec for outer isolation; C helper not available on macOS) +- **bubblewrap** (`apt install bubblewrap` on Debian/Ubuntu) — required on Linux for `pivot_root` filesystem isolation +- **socat** (`apt install socat` on Debian/Ubuntu) — required by `@anthropic-ai/sandbox-runtime` for network proxy - **GNOME Keyring** (`secret-tool`) if using OS keyring for vault encryption (optional; passphrase fallback available) - A **GitHub account** with Copilot access (for default Copilot provider), or an **OpenAI** or **Anthropic** API key -### Verifying kernel support +### Verifying sandbox support -SafeClaw requires mandatory sandboxing. Check that your kernel supports the necessary features: +Run `safeclaw doctor` after installation to verify all dependencies. To manually check: ```bash -# Kernel version (must be >= 5.13) +# bubblewrap (Linux) +which bwrap && bwrap --version + +# socat (network proxy) +which socat + +# Kernel version (Linux, must be >= 5.13 for Landlock) uname -r -# Seccomp support +# Seccomp support (Linux) grep Seccomp /proc/self/status -# Namespace support +# Namespace support (Linux) ls /proc/self/ns/{user,pid,net,mnt} ``` -All four namespaces (user, PID, net, mount) must be available. If any are missing, SafeClaw will warn during onboarding and sandbox isolation will be limited. +All four namespaces (user, PID, net, mount) must be available on Linux. If any are missing, SafeClaw will warn during onboarding and sandbox isolation will be limited. On macOS, namespace-based isolation is replaced by sandbox-exec. ## Installation @@ -209,8 +217,8 @@ Run diagnostic checks to verify your SafeClaw installation: safeclaw doctor ``` -The doctor command runs 12 checks across four categories: -- **System**: Node.js version, native helper binary, disk space +The doctor command runs checks across four categories: +- **System**: Node.js version, platform support, bubblewrap (`bwrap`), native helper binary, socat, ripgrep (`rg`), disk space - **Security**: Landlock, seccomp, namespace support - **Config**: vault accessibility, signing key, default model - **Connectivity**: GitHub Copilot API reachability diff --git a/docs/sandboxing.md b/docs/sandboxing.md index 534cf4f..2fce3aa 100644 --- a/docs/sandboxing.md +++ b/docs/sandboxing.md @@ -22,33 +22,43 @@ SafeClaw treats every tool execution as untrusted. The sandbox limits the blast ## Architecture +SafeClaw uses a two-layer sandbox. The outer layer (provided by `@anthropic-ai/sandbox-runtime`) creates a container using bubblewrap on Linux or sandbox-exec on macOS. The inner layer (the C helper binary) applies Landlock, seccomp-BPF, and capability dropping inside the container. + ``` ┌──────────────────────────────────────────────────────────────┐ │ Node.js: Sandbox.execute(command, args) │ │ │ │ 1. Resolve helper binary (discovery) │ -│ 2. Serialize policy JSON (filesystem + syscalls) │ -│ 3. Spawn: unshare [ns-flags] -- helper -- command [args] │ -│ 4. Write policy JSON to fd 3 │ -│ 5. Collect stdout/stderr, enforce timeout │ +│ 2. Write policy JSON to temp file (mode 0o600) │ +│ 3. Build inner command: helper --policy-file -- cmd │ +│ 4. Translate SandboxPolicy → SandboxRuntimeConfig │ +│ (PolicyBuilder.toRuntimeConfig) │ +│ 5. SandboxManager.wrapWithSandbox(innerCmd, rtConfig) │ +│ 6. Spawn wrapped command via /bin/sh -c │ +│ 7. Collect stdout/stderr, enforce timeout │ +│ 8. Cleanup: delete temp policy file │ └──────────────────────┬───────────────────────────────────────┘ │ fork+exec ▼ ┌──────────────────────────────────────────────────────────────┐ -│ unshare(1) │ -│ Creates Linux namespaces: │ -│ - PID namespace (--pid --fork) │ -│ - Network namespace (--net) │ -│ - Mount namespace (--mount) │ -│ - User namespace (--user --map-root-user) │ +│ @anthropic-ai/sandbox-runtime (outer layer) │ +│ │ +│ Linux (bubblewrap): │ +│ - pivot_root: new filesystem root with bind-mounted paths │ +│ - PID, network, mount, user namespaces │ +│ - Network proxy (socat) for controlled domain access │ +│ │ +│ macOS (sandbox-exec): │ +│ - sandbox-exec profile restricts filesystem + network │ └──────────────────────┬───────────────────────────────────────┘ │ exec ▼ ┌──────────────────────────────────────────────────────────────┐ │ safeclaw-sandbox-helper (static C binary, ~800 KB) │ +│ (Linux only; skipped on macOS) │ │ │ │ 1. Self-checks (refuse setuid, PR_SET_NO_NEW_PRIVS) │ -│ 2. Read policy JSON from fd 3 │ +│ 2. Read policy JSON from --policy-file path │ │ 3. Apply Landlock filesystem restrictions │ │ 4. Close all fds > 2 (fd hygiene) │ │ 5. Drop all Linux capabilities │ @@ -60,10 +70,10 @@ SafeClaw treats every tool execution as untrusted. The sandbox limits the blast ┌──────────────────────────────────────────────────────────────┐ │ Target command (e.g., /bin/bash -c "npm test") │ │ │ -│ Runs with ALL restrictions active: │ -│ - Filesystem: only declared paths accessible │ +│ Runs with ALL restrictions active (Linux): │ +│ - Filesystem: pivot_root container + Landlock path rules │ │ - Syscalls: only allow-listed syscalls permitted │ -│ - Network: isolated (no connectivity) │ +│ - Network: isolated via namespace + network proxy │ │ - Capabilities: all dropped │ │ - Privileges: cannot escalate (NO_NEW_PRIVS) │ └──────────────────────────────────────────────────────────────┘ @@ -73,18 +83,31 @@ SafeClaw treats every tool execution as untrusted. The sandbox limits the blast ## Enforcement layers -### Layer 1: Linux namespaces (via `unshare`) +### Layer 1: Container isolation (via `@anthropic-ai/sandbox-runtime`) + +The outer layer uses `@anthropic-ai/sandbox-runtime` to create an isolated process container. The specific mechanism varies by platform: + +**Linux (bubblewrap / bwrap):** -Namespaces provide coarse-grained isolation at the kernel level. +| Feature | Effect | +|---------|--------| +| `pivot_root` | New filesystem root; only bind-mounted paths are visible | +| PID namespace | Process sees only its own PID tree; cannot signal host processes | +| Network namespace | Fresh network stack; external connectivity controlled by network proxy | +| Mount namespace | Isolated mount table; filesystem changes don't affect host | +| User namespace | Unprivileged user mapping; enables other namespaces without root | +| Network proxy (socat) | Intercepts outbound connections; enforces `allowedDomains`/`deniedDomains` | -| Namespace | Flag | Effect | -|-----------|------|--------| -| PID | `--pid --fork` | Process sees only its own PID tree; cannot signal host processes | -| Network | `--net` | Fresh network stack with only loopback; no external connectivity | -| Mount | `--mount` | Isolated mount table; filesystem modifications don't affect host | -| User | `--user --map-root-user` | Unprivileged user mapping; enables other namespaces without root | +`PolicyBuilder.toRuntimeConfig()` translates a `SandboxPolicy` into a `SandboxRuntimeConfig`: +- `filesystem.allow` rules with `readwrite` or `readwriteexecute` access map to `allowWrite` (bind-mounted read-write) +- Sensitive credential directories (`~/.ssh`, `~/.aws`, `~/.gnupg`, etc.) that exist as real directories (not symlinks) are added to `denyRead`; symlinks are excluded to avoid bwrap failures on WSL2 +- Network policy maps to `allowedDomains`/`deniedDomains` -Namespace isolation is handled by the standard `unshare(1)` utility. This is the baseline -- it works even without the helper binary. +**macOS (sandbox-exec):** + +The macOS `sandbox-exec` utility applies a Seatbelt profile that restricts filesystem access and network connectivity. Linux-specific layers (Landlock, seccomp-BPF, namespaces) are not available on macOS. + +This is the baseline — it works even without the C helper binary. ### Layer 2: Landlock filesystem restrictions @@ -159,7 +182,7 @@ The enforcement order within the helper is critical for correctness: ## Policy format -The policy JSON written to fd 3 contains only the fields relevant to the helper (namespace and network isolation are handled by `unshare`): +The policy JSON written to the temp file (and passed to the helper via `--policy-file`) contains only the fields relevant to the helper (namespace and network isolation are handled by the outer sandbox-runtime layer): ```json { @@ -178,7 +201,7 @@ The policy JSON written to fd 3 contains only the fields relevant to the helper } ``` -The full `SandboxPolicy` type (in TypeScript) also includes `network`, `namespaces`, and `timeoutMs` -- these are consumed by the Node.js layer, not the helper. +The full `SandboxPolicy` type (in TypeScript) also includes `network`, `namespaces`, and `timeoutMs` -- these are consumed by the Node.js layer and translated to `SandboxRuntimeConfig` for sandbox-runtime, not passed to the helper. --- @@ -186,32 +209,35 @@ The full `SandboxPolicy` type (in TypeScript) also includes `network`, `namespac ### Discovery order -`Sandbox.execute()` searches for the helper binary in this order: +`findHelper()` searches for the helper binary in this order: 1. **`SAFECLAW_HELPER_PATH`** environment variable -- for custom installations and testing 2. **Co-located path** -- `native/safeclaw-sandbox-helper` relative to the package 3. **User install path** -- `~/.safeclaw/bin/safeclaw-sandbox-helper` 4. **System PATH** -- resolved via `which` -If the helper is found and executable, all four enforcement layers (namespaces + Landlock + seccomp + capability drop) are active. If not found, SafeClaw falls back to namespace-only isolation. +If the helper is found and executable, the full enforcement stack (outer sandbox-runtime container + inner Landlock + seccomp + capability drop) is active. If not found, only the outer sandbox-runtime layer is applied. > **TODO:** Re-add SHA-256 integrity verification of the helper binary once builds are reproducible. Currently, the binary hash changes across compiler versions and build environments, making a hardcoded hash impractical without a release process that stamps it. ### Graceful degradation -| Helper status | Enforcement | -|---------------|-------------| -| Found | Namespaces + Landlock + seccomp + capability drop | -| Not found | Namespaces only | +| Platform | Helper status | Enforcement | +|----------|---------------|-------------| +| Linux | Found | bwrap container (pivot_root + namespaces) + Landlock + seccomp + capability drop | +| Linux | Not found | bwrap container only (pivot_root + namespaces) | +| macOS | N/A | sandbox-exec profile only | The `SandboxResult.enforcement` field reports which layers were active: ```typescript interface EnforcementLayers { - namespaces: boolean; // unshare was used - landlock: boolean; // Landlock filesystem restrictions active - seccomp: boolean; // seccomp-BPF syscall filter active - capDrop: boolean; // all capabilities dropped + namespaces: boolean; // Linux namespaces active (bwrap) + pivotRoot: boolean; // pivot_root filesystem isolation (bwrap on Linux) + bindMounts: boolean; // bind-mounted paths (always true when sandbox-runtime runs) + landlock: boolean; // Landlock filesystem restrictions active (C helper) + seccomp: boolean; // seccomp-BPF syscall filter active (Linux + helper) + capDrop: boolean; // all capabilities dropped (C helper) } ``` @@ -273,15 +299,22 @@ interface EnforcementLayers { --- -## Kernel requirements +## Platform requirements -SafeClaw v1 requires Linux with: +### Linux - **Kernel >= 5.13** -- for Landlock LSM support - **seccomp-BPF** -- enabled in kernel config (`CONFIG_SECCOMP_FILTER=y`) - **User namespaces** -- `sysctl kernel.unprivileged_userns_clone=1` (default on most distros) +- **bubblewrap** -- `bwrap` binary in PATH (`apt install bubblewrap`) +- **socat** -- required by sandbox-runtime network proxy (`apt install socat`) + +### macOS + +- macOS 10.14+ (Mojave) for sandbox-exec support +- socat (`brew install socat`) for network proxy -The `safeclaw onboard` command checks these requirements during setup. The `detectKernelCapabilities()` function provides programmatic detection. +The `safeclaw onboard` command checks these requirements during setup. The `detectKernelCapabilities()` function provides programmatic detection. `safeclaw doctor` runs `bwrapCheck`, `socatCheck`, and `platformCheck` to verify the environment. --- diff --git a/docs/security-model.md b/docs/security-model.md index 420b236..49908e2 100644 --- a/docs/security-model.md +++ b/docs/security-model.md @@ -11,7 +11,11 @@ Two principles drive every design decision: ## Sandboxing architecture -SafeClaw uses three Linux kernel mechanisms for mandatory process isolation. Sandboxing cannot be disabled. +SafeClaw uses a two-layer sandbox for mandatory process isolation. Sandboxing cannot be disabled. + +The **outer layer** is provided by `@anthropic-ai/sandbox-runtime`: bubblewrap (`bwrap`) on Linux with `pivot_root` filesystem isolation and Linux namespaces (PID, net, mount, user); `sandbox-exec` on macOS. + +The **inner layer** is the C helper binary (`safeclaw-sandbox-helper`), which applies Landlock, seccomp-BPF, and capability dropping inside the container. It is active on Linux when the helper binary is found. ### Landlock @@ -45,15 +49,15 @@ set_robust_list, rseq, prlimit64, getrandom - **Networking**: socket, connect, bind, listen, accept, accept4, recvfrom, sendto, recvmsg, sendmsg, shutdown, setsockopt, getsockopt, getpeername, getsockname, socketpair - **Misc**: ioctl, prctl, arch_prctl, set_tid_address, set_robust_list, futex, sched_yield, sched_getaffinity, uname, prlimit64, getrandom, rseq, memfd_create, copy_file_range, fadvise64, fallocate, ftruncate, truncate, mlock, munlock, mincore -### Linux namespaces +### Linux namespaces (via bubblewrap) -Four namespace types isolate sandboxed processes: +On Linux, four namespace types isolate sandboxed processes inside the bwrap container: | Namespace | Purpose | |-----------|---------| | PID | Process sees only its own PID tree | -| Network | No network access (or localhost-only) | -| Mount | Isolated filesystem view | +| Network | Isolated network stack; external access controlled by sandbox-runtime network proxy | +| Mount | Isolated filesystem view with `pivot_root` | | User | Unprivileged user mapping | ### DEFAULT_POLICY @@ -84,7 +88,7 @@ const sandbox = new Sandbox(policy); The development policy grants: - **Execute access**: `/usr/bin`, `/usr/local/bin`, `/bin`, `/usr/sbin`, `/sbin`, `/usr/lib/jvm`, `/usr/lib/gcc`, `/usr/libexec`, plus the Node.js install prefix - **Read access**: `/usr/include`, `/usr/share`, shared library paths (`/lib`, `/usr/lib`, `/lib64`, `/usr/lib64`), `/etc`, `/proc`, `/dev/null`, `/dev/urandom`, `/dev/zero`, `/dev/random` -- **Read-write access**: CWD, `/tmp`, `~/.safeclaw` +- **Read-write access**: CWD, `/tmp`, `/dev/null` - **Extra paths**: `DevelopmentPolicyOptions` supports `extraExecutePaths` (e.g., `~/.cargo/bin`, `~/.rustup`) and `extraReadWritePaths` for user-local toolchains - **Expanded syscalls**: ~120 syscalls (see seccomp-BPF section above) - **Network**: `"none"` (unchanged from default) From af3cd7f39705bcebadf7dd6918b96ee837af0cc1 Mon Sep 17 00:00:00 2001 From: Arne Brune Olsen Date: Fri, 20 Mar 2026 11:57:08 +0100 Subject: [PATCH 11/13] fix(cli): add @anthropic-ai/sandbox-runtime as direct CLI dependency bootstrap.ts imports SandboxManager directly from @anthropic-ai/sandbox-runtime but the package was only declared as a dependency of @safeclaw/sandbox, not @safeclaw/cli, causing a TS2307 build error. Co-Authored-By: Claude Sonnet 4.6 --- packages/cli/package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/cli/package.json b/packages/cli/package.json index c619e0b..6fa4d3b 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -12,6 +12,7 @@ "build": "tsc -p tsconfig.json" }, "dependencies": { + "@anthropic-ai/sandbox-runtime": "git+ssh://git@github.com/anthropic-experimental/sandbox-runtime.git#20f5176a94314038695bee13779eb9eebbbaeb49", "@safeclaw/core": "workspace:*", "@safeclaw/gateway": "workspace:*", "@safeclaw/sandbox": "workspace:*", From cbde971533ba74e65184d539780e11abb0248291 Mon Sep 17 00:00:00 2001 From: Arne Brune Olsen Date: Fri, 20 Mar 2026 12:02:25 +0100 Subject: [PATCH 12/13] fix(core): handle slow_down in device flow poll and add fetch timeouts GitHub's device flow returns 'slow_down' when the client polls too frequently. The previous code threw 'Token poll error: slow_down', which looked like an immediate failure during onboarding. - Handle slow_down by increasing the interval by 5 s (as required by the GitHub Device Flow spec) and continuing to poll - Add AbortSignal.timeout(30s) to requestDeviceCode and pollForToken fetch calls (consistent with CopilotClient.REQUEST_TIMEOUT_MS) - Update existing fetch call assertions to expect AbortSignal - Add test: slow_down increases interval and retries successfully Co-Authored-By: Claude Sonnet 4.6 --- packages/core/src/copilot/auth.test.ts | 27 ++++++++++++++++++++++++++ packages/core/src/copilot/auth.ts | 18 +++++++++++++++-- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/packages/core/src/copilot/auth.test.ts b/packages/core/src/copilot/auth.test.ts index 08aa5a5..41140e6 100644 --- a/packages/core/src/copilot/auth.test.ts +++ b/packages/core/src/copilot/auth.test.ts @@ -56,6 +56,7 @@ describe("requestDeviceCode", () => { client_id: "test-client-id", scope: "copilot read:user", }), + signal: expect.any(AbortSignal), }); expect(result).toEqual(deviceResponse); }); @@ -92,6 +93,7 @@ describe("pollForToken", () => { device_code: "dc_123", grant_type: "urn:ietf:params:oauth:grant-type:device_code", }), + signal: expect.any(AbortSignal), }); expect(result).toEqual(tokenResponse); }); @@ -121,6 +123,31 @@ describe("pollForToken", () => { vi.useRealTimers(); }); + it("handles slow_down by increasing interval and retrying", async () => { + vi.useFakeTimers(); + + const slowDown = { error: "slow_down" }; + const tokenResponse = { + access_token: "gho_abc123", + token_type: "bearer", + scope: "copilot", + }; + fetchMock + .mockResolvedValueOnce(jsonResponse(slowDown)) + .mockResolvedValueOnce(jsonResponse(tokenResponse)); + + const promise = pollForToken(config, "dc_123", 5); + + // slow_down increases interval from 5 to 10; advance past the 10-second delay + await vi.advanceTimersByTimeAsync(10_000); + + const result = await promise; + expect(fetchMock).toHaveBeenCalledTimes(2); + expect(result).toEqual(tokenResponse); + + vi.useRealTimers(); + }); + it("throws on non-pending errors", async () => { fetchMock.mockResolvedValueOnce( jsonResponse({ error: "access_denied" }), diff --git a/packages/core/src/copilot/auth.ts b/packages/core/src/copilot/auth.ts index 55c7d6a..a57eede 100644 --- a/packages/core/src/copilot/auth.ts +++ b/packages/core/src/copilot/auth.ts @@ -27,6 +27,7 @@ export async function requestDeviceCode( client_id: config.clientId, scope: config.scopes.join(" "), }), + signal: AbortSignal.timeout(AUTH_FETCH_TIMEOUT_MS), }); if (!response.ok) { @@ -38,15 +39,21 @@ export async function requestDeviceCode( return (await response.json()) as DeviceCodeResponse; } +/** Timeout for individual auth fetch requests (ms). */ +const AUTH_FETCH_TIMEOUT_MS = 30_000; + /** * Poll GitHub for the OAuth token after the user has authorized the device. - * Retries on "authorization_pending" errors, respecting the given interval. + * Retries on "authorization_pending" and "slow_down" errors, respecting the + * given interval. "slow_down" increases the interval by 5 s as required by + * the GitHub Device Flow spec. */ export async function pollForToken( config: CopilotAuthConfig, deviceCode: string, interval: number, ): Promise { + let currentInterval = interval; for (;;) { const response = await fetch(GITHUB_TOKEN_URL, { method: "POST", @@ -59,6 +66,7 @@ export async function pollForToken( device_code: deviceCode, grant_type: "urn:ietf:params:oauth:grant-type:device_code", }), + signal: AbortSignal.timeout(AUTH_FETCH_TIMEOUT_MS), }); if (!response.ok) { @@ -73,7 +81,13 @@ export async function pollForToken( if ("error" in data) { if (data.error === "authorization_pending") { - await delay(interval * 1000); + await delay(currentInterval * 1000); + continue; + } + if (data.error === "slow_down") { + // GitHub requires increasing the interval by 5 s on slow_down + currentInterval += 5; + await delay(currentInterval * 1000); continue; } throw new Error(`Token poll error: ${data.error}`); From 7310815f638d34d04a041d3bfc78b19b36f1dcb6 Mon Sep 17 00:00:00 2001 From: Arne Brune Olsen Date: Fri, 20 Mar 2026 12:06:56 +0100 Subject: [PATCH 13/13] fix(core): add fetch timeout to getCopilotToken getCopilotToken had no AbortSignal, causing an indefinite hang after the GitHub device flow poll succeeded but the Copilot API token exchange stalled. Co-Authored-By: Claude Sonnet 4.6 --- packages/core/src/copilot/auth.test.ts | 1 + packages/core/src/copilot/auth.ts | 1 + 2 files changed, 2 insertions(+) diff --git a/packages/core/src/copilot/auth.test.ts b/packages/core/src/copilot/auth.test.ts index 41140e6..d4f6722 100644 --- a/packages/core/src/copilot/auth.test.ts +++ b/packages/core/src/copilot/auth.test.ts @@ -176,6 +176,7 @@ describe("getCopilotToken", () => { Authorization: "token gho_abc123", Accept: "application/json", }, + signal: expect.any(AbortSignal), }); expect(result).toEqual({ token: "tid=copilot_token_xyz", diff --git a/packages/core/src/copilot/auth.ts b/packages/core/src/copilot/auth.ts index a57eede..6d84e77 100644 --- a/packages/core/src/copilot/auth.ts +++ b/packages/core/src/copilot/auth.ts @@ -109,6 +109,7 @@ export async function getCopilotToken( Authorization: `token ${githubToken}`, Accept: "application/json", }, + signal: AbortSignal.timeout(AUTH_FETCH_TIMEOUT_MS), }); if (!response.ok) {