diff --git a/container/Dockerfile b/container/Dockerfile
new file mode 100644
index 0000000..e93c772
--- /dev/null
+++ b/container/Dockerfile
@@ -0,0 +1,84 @@
+# syntax=docker/dockerfile:1
+# Stage 0 spike draft — the OUTER "builder" container image.
+#
+# Validates: unprivileged docker container running rootless podman inside.
+# agent-server is deliberately NOT installed here (Stage 2 adds it); this image
+# exists purely to prove the nesting works on the target host.
+#
+# See docs/plans/stage0-spike-brief.md for the validation tasks and
+# container/SPIKE-FINDINGS.md for the (to-be-recorded) results.
+FROM ubuntu:24.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+# podman + rootless plumbing:
+#   uidmap         → newuidmap/newgidmap setuid helpers (subordinate id mapping)
+#   slirp4netns    → rootless network namespace + port forwarding
+#   fuse-overlayfs → overlay storage in nested user namespaces (T4 tests native)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      podman uidmap slirp4netns fuse-overlayfs \
+      ca-certificates curl git iproute2 libcap2-bin \
+    && rm -rf /var/lib/apt/lists/*
+
+# CRITICAL FIX (see SPIKE-FINDINGS.md "newuidmap"): Ubuntu ships newuidmap /
+# newgidmap as setuid-root. Inside an unprivileged docker container that makes
+# euid=0 when they run, which fails the kernel's uid_map ownership shortcut and
+# forces a CAP_SYS_ADMIN-in-init-userns check that docker's bounding set denies
+# -> 'newuidmap: write to uid_map failed: Operation not permitted'.
+# Fedora/quay.io/podman-stable instead ship them with file capabilities, so
+# euid stays 1000 (== owner of the nested userns) and the ownership shortcut
+# applies. Replicate that here:
+# WARNING: this file-capabilities approach is INCOMPATIBLE with running the
+# outer container under `--security-opt no-new-privileges`. Under no_new_privs,
+# execve() cannot add file capabilities to the permitted set, so newuidmap would
+# silently lose CAP_SETUID and rootless podman fails at namespace setup with the
+# same 'Operation not permitted' shown above. Do NOT add no-new-privileges as a
+# "hardening" flag without first switching these helpers back to another mapping
+# mechanism. (https://www.kernel.org/doc/html/latest/userspace-api/no_new_privs.html)
+RUN chmod u-s /usr/bin/newuidmap /usr/bin/newgidmap \
+ && setcap cap_setuid+ep /usr/bin/newuidmap \
+ && setcap cap_setgid+ep /usr/bin/newgidmap
+
+# ubuntu:24.04 ships a default 'ubuntu' user at uid 1000; replace it with
+# 'builder' so the unprivileged uid is ours and predictable.
+RUN userdel -r ubuntu 2>/dev/null || true \
+ && useradd -m -s /bin/bash -u 1000 builder
+
+# Subordinate uid/gid ranges — rootless podman maps inner-container users into
+# these. Without them every nested `podman run` fails at user-namespace setup.
+RUN echo 'builder:100000:65536' > /etc/subuid \
+ && echo 'builder:100000:65536' > /etc/subgid
+
+# Podman config for a systemd-less nested environment:
+#   cgroup_manager=cgroupfs  → no systemd inside this container
+#   events_logger=file       → no journald inside this container
+#   storage: NATIVE rootless overlay (T4 winner: ~2.2x faster than
+#   fuse-overlayfs on this kernel 7.0 host, and needs no /dev/fuse device).
+#   fuse-overlayfs is left installed as a documented fallback only.
+RUN mkdir -p /home/builder/.config/containers \
+ && printf '[containers]\ndefault_sysctls = []\n\n[engine]\ncgroup_manager = "cgroupfs"\nevents_logger = "file"\n' \
+      > /home/builder/.config/containers/containers.conf \
+ && printf '[storage]\ndriver = "overlay"\n' \
+      > /home/builder/.config/containers/storage.conf \
+ && chown -R builder:builder /home/builder/.config
+
+# Pre-create the volume mountpoints OWNED BY builder. Docker initialises named
+# volumes by copying ownership from the image path; without this both volumes
+# mount root-owned and rootless podman dies with:
+#   mkdir /home/builder/.local/share/containers/storage: permission denied
+RUN mkdir -p /workspace /home/builder/.local/share/containers \
+ && chown -R builder:builder /workspace /home/builder/.local
+
+COPY entrypoint.sh /usr/local/bin/entrypoint.sh
+RUN chmod 0755 /usr/local/bin/entrypoint.sh
+
+USER builder
+WORKDIR /workspace
+
+# No systemd-logind to provision /run/user/1000; entrypoint creates this.
+ENV XDG_RUNTIME_DIR=/tmp/runtime-builder
+
+ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
+# Spike keeps the container alive for `docker exec` iteration; Stage 2 replaces
+# this with the agent-server process.
+CMD ["sleep", "infinity"]
diff --git a/container/INNER-APP-SPIKE.md b/container/INNER-APP-SPIKE.md
new file mode 100644
index 0000000..96f5f8b
--- /dev/null
+++ b/container/INNER-APP-SPIKE.md
@@ -0,0 +1,217 @@
+# Inner-App Spike — Realistic Builds Under Nested Rootless Podman
+
+**Date:** 2026-06-12
+**Runs on:** the same Stage 0 spikebox (the `builder-outer` container is already
+built and `./container/smoke.sh` passes). This is a follow-on, not a redo.
+**Parent plan:** `docs/plans/builder-containers-plan.md` (D5, D6, Stage 2)
+**Findings:** record everything in the "Findings" section at the bottom of this
+file as you go.
+
+## Why
+
+Stage 0 only proved trivial images (nginx, a one-line Alpine build) run nested.
+Before we commit to the template + two-container (DEV/PROD) deploy model, prove
+that a **realistic multi-stage app build** works inside the outer container under
+rootless podman + native overlay — including the things the real product depends
+on: two published ports, a redeploy cycle, optional hot-reload, and
+stack-agnosticism.
+
+## Hard rules
+
+- **Do not change the outer container's flags or image hardening.** Use the
+  `builder-outer` container exactly as Stage 0 left it (no `--privileged`, no new
+  caps). If something only works by weakening the outer container, that's a
+  finding, not a fix — stop and record it.
+- Everything happens **inside** the outer container via `docker exec` / `podman`,
+  in `/workspace`. Apps publish in the already-forwarded **10000–10009** range.
+- You write the sample apps yourself (small, known) — this is deterministic infra
+  validation, **not** an LLM app-building exercise.
+
+## Tasks
+
+### T1 — Multi-stage JS app, DEV + PROD, two ports
+- [x] In `/workspace/vite-sample`, scaffold a **minimal Vite app** (plain `npm create vite` SPA, no extras) with a multi-stage `Dockerfile`:
+  - `dev` target: installs deps, runs the Vite dev server bound to `0.0.0.0`
+  - `build` target: `vite build`
+  - `prod` target: a lean final stage serving the built `dist/` (e.g. `nginx:alpine` or a tiny static server) — **only the build output**, no `node_modules`/source
+- [x] `podman build --target dev  -t vite-sample:dev  .` and `--target prod -t vite-sample:prod .` both succeed inside the outer container
+- [x] Run both at once: dev on `:10000`, prod on `:10001`
+- [x] From the **host**: `curl 127.0.0.1:10000` and `curl 127.0.0.1:10001` both return the app
+- [x] Record: build times (cold), prod image size vs dev image size, any overlay/permission errors
+
+### T2 — Redeploy cycle + layer cache
+- [x] Edit a source file, then redeploy DEV: `podman build --target dev ... && podman rm -f <name> && podman run ...`
+- [x] Confirm the dependency-install layer is **cached** (rebuild only re-runs from the source-copy layer); record the warm rebuild time vs cold
+- [x] Confirm the change is visible via host curl
+
+### T3 — DEV hot-reload via bind mount (the D6 open question)
+- [x] Run the DEV container with the project dir bind-mounted: `podman run -d -v /workspace/vite-sample:/app -p 10000:<devport> vite-sample:dev` (adjust workdir so the mount lands where the dev server watches)
+- [x] Edit a source file on the host side of the mount; confirm whether the Vite dev server inside the container **hot-reloads without a rebuild** (check via curl / the HMR endpoint)
+- [x] Record: does a workspace bind mount work at all under nested rootless podman? Any uid/permission issues on the mounted files? Does file-watching/HMR fire across the mount? **This decides whether D6's hot-reload option is viable or we fall back to rebuild-redeploy.**
+
+### T4 — Stack-agnosticism smoke (non-JS)
+- [x] In `/workspace/py-sample`, a trivial **Python** app (e.g. Flask/`http.server`) with its own single-stage `Dockerfile`, published on `:10002`
+- [x] Build + run + host curl succeed
+- [x] Purpose: confirm the *mechanics* (build any Dockerfile, publish a port) are framework-neutral — so the deploy skill/metadata don't need JS assumptions
+
+### T5 — Resource sanity on a small box
+- [x] After T1–T4, record `podman images` total size, `df -h` on the podman storage volume, and peak memory during the heaviest build (`free -m` while building)
+- [x] Note whether anything thrashed/OOM'd; this informs the outer container's `--memory`/`--cpus` limits (Stage 4) and the box sizing recommendation
+
+## Acceptance
+
+- T1, T2, T4 green (multi-stage dev/prod build + run + two/three ports + redeploy, JS and Python).
+- T3 answered definitively (hot-reload works, or doesn't, with the reason).
+- Findings section filled in. No outer-container weakening introduced.
+
+Timebox: a couple of hours. If a multi-stage build fundamentally fails under
+nesting (e.g. native overlay chokes on many layers / large `node_modules`), stop
+and capture the exact error + `podman info` storage section — that's a Stage 2
+blocker we need to know about now, not later.
+
+---
+
+## Findings
+
+**Status:** COMPLETE — T1, T2, T4 green; T3 answered definitively (hot-reload
+**viable**); T5 recorded. **No outer-container weakening introduced** —
+`docker inspect` confirms `Privileged=false`, `CapAdd=[]`; the `builder-outer`
+image and `run-outer.sh` flags are byte-for-byte the Stage 0 set. Re-ran
+`./container/smoke.sh` first: 11/11 PASS.
+
+Method note: the outer container has **no node/npm** (correct — app builds happen
+*inside* podman using base images). Sample apps were hand-written (deterministic,
+per the brief) and `docker cp`'d into the `/workspace` volume as uid 1000
+(`builder`). All `curl`s below are from the **host** through both forwarding
+layers (host → docker publish `127.0.0.1:1000x` → outer → rootless podman →
+inner app).
+
+### Host / container
+- Outer image base `ubuntu:24.04`; inner podman **4.9.3**; native rootless
+  **overlay** (`Store.GraphDriverName=overlay`, no `mount_program`). Unchanged
+  from Stage 0. Outer main process uid 1000, `Privileged=false`, no added caps.
+- Box: 4 vCPU / 7.6 GiB RAM / 75 GiB disk (same Hetzner VM as Stage 0).
+- `docker.io/library/node:20-alpine` pulled in ~3 s; native overlay healthy.
+
+### T1 — multi-stage dev/prod + two ports
+- **dev build OK, prod build OK.** Cold times inside the outer container:
+  **dev ~6.8 s** (npm install dominates; base image pre-pulled), **prod ~2.2 s**
+  (its `build` stage reuses the `deps`/`npm install` layer already built for dev,
+  then `vite build` + nginx copy). A fully cold `--no-cache` dev build is ~6.5 s.
+- **prod 63.7 MB vs dev 239 MB** (~3.7× smaller). Prod = `nginx:alpine` + the
+  built `dist/` only; dev = `node:20-alpine` + `node_modules`.
+- **Both reachable from host:** `:10000` (dev) served the Vite HTML *with*
+  `/@vite/client` injected (dev server live) and served transformed source at
+  `/src/main.js`; `:10001` (prod) served the hashed/bundled assets, and the
+  bundle contained `BUILD_MARKER_V1` (proves it's the real build output).
+- **prod is non-root:** container runs as `uid=101(nginx)`; image has **no
+  `/app`, no `node_modules`, no source** — only `index.html` + `assets/`.
+  Achieved with a custom `nginx.conf` (pid + temp paths under `/tmp`, `listen
+  8080`) + `USER nginx`.
+- **No overlay/permission errors.** Multi-stage build over native overlay was
+  clean; no layer-count or `node_modules`-size problems at this (minimal-SPA) scale.
+
+### T2 — redeploy + cache
+- **Dep layer cached.** After editing `src/main.js`, the warm `--target dev`
+  rebuild printed `Using cache` for `COPY package.json` **and** `RUN npm install`;
+  only `COPY . .` (source) re-ran. **Warm rebuild ~0.67 s vs ~6.8 s cold** (~10×).
+- **Change visible:** `rm -f && run` redeploy on `:10000`, host curl of
+  `/src/main.js` returned the edited `BUILD_MARKER_V2`.
+
+### T3 — bind-mount hot-reload (D6 decision)
+- **Bind mount works** under nested rootless podman:
+  `podman run -v /workspace/vite-sample:/app -v /app/node_modules ...`. The Vite
+  dev server started normally (`ready in 183 ms`).
+- **uid/permissions:** files owned by uid 1000 (`builder`) on the outer appear as
+  `uid=0(root)` *inside* the inner container — the expected rootless-podman
+  container-root ↔ outer-user mapping. **No permission issues**; files read/served
+  fine. **Gotcha (not a blocker):** a bare `-v <proj>:/app` makes the host dir
+  **shadow the image's `/app/node_modules`** and the dev server can't find its
+  deps. Fix used: add an **anonymous volume `-v /app/node_modules`** to keep the
+  image's installed deps under the mount. The deploy skill MUST do this for the
+  hot-reload path.
+- **HMR fires on host-side edit without rebuild:** edited `src/main.js`
+  V2→V3 on the workspace side; with **no rebuild and no restart** the dev server
+  served `BUILD_MARKER_V3` and logged `[vite] page reload src/main.js`. inotify
+  propagates across the bind mount on native overlay — **no `usePolling` needed**.
+- **Verdict: hot-reload is VIABLE.** D6's optional bind-mount hot-reload works
+  under nested rootless podman. Recommend the DEV container use it (bind mount +
+  anon `node_modules` volume); rebuild-redeploy remains the fallback and is what
+  PROD/promote uses anyway.
+
+### T4 — non-JS stack
+- **Python/Flask: build + run + host curl all OK** on `:10002`. Single-stage
+  `python:3.12-alpine` Dockerfile; **`pip install` works under nesting** (cold
+  build ~6.2 s). Host curl returned `PY_MARKER_V1 flask-sample ok`.
+- **Nothing stack-specific leaked.** Identical mechanics as JS: write a
+  Dockerfile, `podman build -t`, `podman run -p hostPort:containerPort`. The
+  only contract is "a Dockerfile that publishes on the given port" — no JS
+  assumptions. Confirms D5's stack-agnosticism premise.
+
+### T5 — resources
+- **Images:** nominal sizes — node:20-alpine 138 MB, nginx:alpine 63.7 MB,
+  python:3.12-alpine 57.6 MB, vite dev 239 MB, vite prod 63.7 MB, py 71.2 MB.
+  **Actual on-disk podman storage volume: ~404 MB** (overlay layer dedup; the
+  alpine/node/nginx bases are shared across images).
+- **Peak build memory: ~1469 MB used** during a `--no-cache` dev build
+  (baseline ~1128 MB → build adds **~340 MB**). **No thrash, no OOM** (`dmesg`
+  clean); the 7.6 GiB box was never near pressure. Builds are largely
+  single-core and short.
+- **Implication for outer `--memory`/`--cpus` + box sizing:** build peaks are
+  modest (~350 MB delta) and brief; for the outer container, `--cpus 2` is
+  enough for snappy builds and a generous `--memory` (e.g. 2–4 GiB) leaves wide
+  headroom. At scale the **steady-state footprint of many idle inner containers**
+  (50 projects × 2) will dominate, not build spikes — size the box for resident
+  containers + image storage, not for build bursts. A 2 vCPU / 4 GiB box handles
+  the build/deploy loop comfortably; storage grows ~tens of MB per project after
+  base-image dedup.
+
+### Recommendations for the template (D5) and Stage 2
+- **Lean prod stage that worked (use as the template's shape):**
+  `build` stage runs `vite build`; `prod` = `nginx:alpine` + custom `nginx.conf`
+  (pid & temp paths under `/tmp`, `listen 8080`) + `COPY --from=build dist/` +
+  `USER nginx`. Result: 63.7 MB, non-root, no source/deps shipped. Avoids
+  orchestrator's "ship the whole build tree as root" anti-pattern (D5).
+- **`deps` layer as a cache anchor:** `COPY package.json* && RUN npm install`
+  *before* `COPY . .`, with `dev`/`build` both `FROM deps`. Gives sub-second warm
+  redeploys (T2) and lets prod's build reuse dev's install.
+- **Deploy skill MUST do / avoid:**
+  - **Hot-reload DEV:** `-v <projectDir>:/app` **plus** `-v /app/node_modules`
+    (anon volume) — without the second, the mount shadows deps and the dev server
+    breaks. (JS-specific; the skill should apply it only when a deps dir would be
+    shadowed.)
+  - **Don't assume container port 80.** The non-root prod stage listens on **8080**
+    and dev on **5173**; the skill maps `-p <reservedPort>:<containerPort>`, so the
+    container port is a template detail, not the reserved appx port. Keep them distinct.
+  - **Use fully-qualified image refs** (`docker.io/library/...`) in template
+    Dockerfiles — podman 4.9.3 has no implicit Docker Hub default; short names can
+    prompt/fail non-interactively. All builds here used FQ refs and were clean.
+  - Bind 0.0.0.0 in the dev server (`vite.config.js server.host`); prod nginx is fine.
+- **Open questions / blockers:** none blocking Stage 2.
+  - **Vite `allowedHosts` (verify + likely fix at Stage 1/3):** we only curled
+    via `127.0.0.1`, which Vite always allows, so the spike never exercised this.
+    Vite 5.4.x ships a `Host`-header allow-list (anti DNS-rebinding). Served
+    through appx's public **`*-dev.<domain>`** subdomain, the dev server sees a
+    domain that isn't on its list and answers *"Blocked request. This host is not
+    allowed."* — the user gets an error instead of their app. **PROD is
+    unaffected** (plain nginx, no host check). Fix: set
+    `server.allowedHosts` in the template's `vite.config.js`. Since the template
+    is baked once but the domain is per-project, make it env-driven and let the
+    deploy skill pass the value from `.pi/deployment.json`:
+    ```js
+    const devHost = process.env.VITE_DEV_ALLOWED_HOST;
+    server: { host: "0.0.0.0", port: 5173,
+              allowedHosts: devHost ? [devHost] : [] }
+    ```
+    then `podman run -e VITE_DEV_ALLOWED_HOST=eventx-dev.example.com ...`.
+    Alternatives: a leading-dot wildcard `[".example.com"]` (all dev subdomains
+    under one owned domain) or `true` (disable the check) — looser. Decide when
+    the template + skill are written; one-line change, not an infra blocker.
+  - **HMR websocket through the proxy (sibling of the above):** HMR uses a
+    `ws://`/`wss://` upgrade on the same published dev port and the same domain;
+    it worked on loopback here. appx's subdomain proxy must forward the WS
+    upgrade or the app loads but hot-reload silently dies —
+    track in the appx plan.
+  - The `container-smoke.sh` (Stage 2) should build **this Vite template's**
+    `dev`+`prod` targets and the bind-mount hot-reload run, not just nginx, to
+    keep these guarantees from regressing.
diff --git a/container/SPIKE-FINDINGS.md b/container/SPIKE-FINDINGS.md
new file mode 100644
index 0000000..146cf24
--- /dev/null
+++ b/container/SPIKE-FINDINGS.md
@@ -0,0 +1,259 @@
+# Stage 0 Spike Findings
+
+**Status:** COMPLETE — `./container/smoke.sh` exits 0 (11/11) under all hard constraints.
+**Brief:** `docs/plans/stage0-spike-brief.md`
+
+## Host
+
+- Provider / instance type: Hetzner KVM VM ("appx"), 4 vCPU, 7.6 GiB RAM, 75 GiB disk, 4 GiB swap
+- Distro + kernel (`lsb_release -ds`, `uname -rm`): **Ubuntu 26.04 LTS** (brief assumed 24.04 — see note), kernel `7.0.0-15-generic x86_64`
+- Arch: x86_64
+- Docker version (`docker --version`): Docker version 29.5.3, build d1c06ef (security options: apparmor, seccomp profile=builtin, cgroupns)
+- Outer image base: `ubuntu:24.04` (matches the brief's production target even though the host is 26.04)
+- Podman version inside outer (`podman --version`): **4.9.3** (Ubuntu 24.04 repo)
+
+**Note on distro:** the box is Ubuntu 26.04, not the 24.04 the brief targets. The
+relevant hardening is the same or stricter: `kernel.apparmor_restrict_unprivileged_userns = 1`
+(the 24.04 default that blocks nested userns) is active here too, and AppArmor is enabled
+(`/sys/module/apparmor/parameters/enabled = Y`). The OUTER IMAGE is `ubuntu:24.04`, so the
+in-image findings (podman 4.9.3, packaging, configs) are exactly the production target. The
+operator should still re-verify the host-side flags on a real 24.04 host before production.
+
+## Result summary
+
+**Yes — the unprivileged nested chain works on this host**, and `./container/smoke.sh` exits 0
+(11/11) with no `--privileged`, no added capabilities (no `SYS_ADMIN`), and the outer main
+process running as non-root uid 1000 (`builder`). The full path is proven: host → docker
+publish (`127.0.0.1:10000`) → outer container → rootless podman + slirp4netns → inner nginx,
+plus a working `podman build`, persistence across `docker restart`, and clean recovery via
+`podman start --all`. The single decisive fix was repackaging `newuidmap`/`newgidmap` with
+file capabilities (see headline finding); after that only four `docker run` knobs are needed,
+and `seccomp=unconfined` was further replaced by a strictly-tighter tailored profile.
+Remarkably, **no host-level sysctl/apparmor change was required** — the hardened Ubuntu
+defaults (`apparmor_restrict_unprivileged_userns=1`) are left untouched.
+
+## Headline finding: setuid-root `newuidmap` breaks rootless podman in an unprivileged container
+
+The single biggest blocker. Symptom on first run:
+
+```
+running `/usr/bin/newuidmap <pid> 0 1000 1 1 100000 65536`: newuidmap: write to uid_map failed: Operation not permitted
+Error: cannot set up namespace using "/usr/bin/newuidmap": exit status 1
+```
+
+**Root cause (traced with bpftrace, not guessed):** Ubuntu ships `newuidmap`/`newgidmap`
+as **setuid-root** (`-rwsr-xr-x`). Inside an unprivileged docker container they therefore
+run with `euid=0`. The kernel's `/proc/<pid>/uid_map` write path (`new_idmap_permitted`)
+has a shortcut: if the writer's euid equals the uid that *created* the target user
+namespace (here uid 1000 = `builder`), a single-extent self-map is allowed without any
+capability. With `euid=0` that shortcut does **not** apply, so the kernel instead requires
+`CAP_SYS_ADMIN` **in the initial user namespace**. docker's default capability bounding set
+(`0x00000000a80425fb`) excludes `CAP_SYS_ADMIN`, so the check fails. bpftrace on
+`cap_capable` confirmed the final failing check is `cap=21` (CAP_SYS_ADMIN), returning -1.
+
+This is **not** AppArmor and **not** seccomp — it fails identically with every AppArmor /
+seccomp sysctl set to 0. It is purely the setuid-vs-filecap packaging difference.
+
+**Fix (matches Fedora / `quay.io/podman/stable` / Dan Walsh's "Podman inside a container"
+blog):** ship the helpers with **file capabilities** instead of setuid-root, so euid stays
+1000 and the ownership shortcut applies:
+
+```dockerfile
+RUN chmod u-s /usr/bin/newuidmap /usr/bin/newgidmap \
+ && setcap cap_setuid+ep /usr/bin/newuidmap \
+ && setcap cap_setgid+ep /usr/bin/newgidmap
+```
+
+Verified: after this change `newuidmap <pid> 0 1000 1 1 100000 65536` returns OK with **no**
+added capabilities and `apparmor_restrict_unprivileged_userns=1` left at its hardened default.
+This is why `quay.io/podman/stable` "just works" as a nested image — it already does this.
+
+## Final `docker run` flag set
+
+From `container/run-outer.sh` (deletion-tested in T2 — each flag removed individually and the
+exact resulting error recorded):
+
+```
+docker run -d --name builder-outer \
+  --device /dev/net/tun \
+  --security-opt seccomp=$(pwd)/seccomp-builder.json \
+  --security-opt apparmor=unconfined \
+  --security-opt systempaths=unconfined \
+  -v builder-workspace:/workspace \
+  -v builder-podman-storage:/home/builder/.local/share/containers \
+  -p 127.0.0.1:10000-10009:10000-10009 \
+  builder-outer
+```
+
+| Flag | Needed? | Exact error when removed |
+| --- | --- | --- |
+| `--device /dev/net/tun` | **Yes** | `FAIL@run: /usr/bin/slirp4netns failed: "open(\"/dev/net/tun\"): No such file or directory"` — rootless slirp4netns networking is dead without it |
+| `--security-opt seccomp=seccomp-builder.json` | **Yes** | With docker's DEFAULT profile: `FAIL@info: Error: cannot re-exec process` (default profile blocks `mount(2)` and friends). Tailored profile is strictly tighter than `unconfined` — see T2 below |
+| `--security-opt apparmor=unconfined` | **Yes** | `FAIL@info: mount /home/builder/.local/share/containers/storage/overlay...: permission denied` — docker-default AppArmor profile (`docker-default`) blocks the rootless overlay `mount(2)`. **NB this is NOT the host `apparmor_restrict_unprivileged_userns` problem** — that one is solved entirely by the file-cap `newuidmap` fix. TODO: replace with a tailored AppArmor profile (deferred; containment loss is bounded — seccomp + userns + caps still apply) |
+| `--security-opt systempaths=unconfined` | **Yes** | `FAIL@run: crun: mount \`proc\` to \`proc\`: Operation not permitted` — docker masks `/proc` submounts (`/proc/sys`, `/proc/kcore`, ...); the kernel `mount_too_revealing()` check then refuses the inner container's fresh `proc` mount. `systempaths=unconfined` clears docker's `MaskedPaths`/`ReadonlyPaths`. **Adds no capabilities and no privilege**; the inner containers still get their own `/proc` masks from crun |
+| `--device /dev/fuse` | **No (removed)** | Was in the draft for fuse-overlayfs. Native overlay (T4) needs no FUSE device, so this flag was deleted |
+| `-v builder-workspace` | **Yes** | persistence: project files must survive container recreate (T3 verified) |
+| `-v builder-podman-storage` | **Yes** | persistence: inner images/containers metadata must survive recreate (T3 verified) |
+| `-p 127.0.0.1:10000-10009` | **Yes** | the host→inner port chain; loopback-only so appx proxies in. Without it the host curl check cannot reach the inner nginx |
+
+No `--cap-add` of any kind is used. `docker inspect` confirms `Privileged=false`.
+
+## T2 — tailored seccomp profile (replaces `seccomp=unconfined`)
+
+The brief asks to prefer Podman's `seccomp.json` over `unconfined` if it works. Result:
+
+- Podman's **stock** `seccomp.json` (from `containers-common`, present in the image at
+  `/usr/share/containers/seccomp.json`) gets further than docker's default (it allows
+  `mount`, so `podman info` succeeds) but the inner `podman run` dies at
+  `crun: sethostname: Operation not permitted`. Reason: the stock profile *allow-lists*
+  `sethostname` (and `setdomainname`, `setns`, plus `bpf`, `perf_event_open`, `quotactl`,
+  `fanotify_init`, `lookup_dcookie`) only `includes.caps = [CAP_SYS_ADMIN]`. Our unprivileged
+  outer has no `CAP_SYS_ADMIN`, so the runtime drops those allow-rules and the syscalls fall
+  through to `ERRNO`.
+- **Fix adopted:** `container/seccomp-builder.json` = stock profile with the `CAP_SYS_ADMIN`
+  gate removed from **only** `sethostname`, `setdomainname`, `setns` (the namespace-setup
+  syscalls the nested runtime needs). The genuinely dangerous gated syscalls
+  (`bpf`, `perf_event_open`, `quotactl`, `fanotify_init`, `lookup_dcookie`) stay **denied**.
+  This is **strictly tighter than `unconfined`**. `container/gen-seccomp.sh` regenerates it
+  from the image's stock profile and documents the provenance. Smoke stays 11/11 with it.
+
+## T2 sub-question — outer runtime: docker vs podman (informs `system-setup.sh`)
+
+Host change: installed `podman` 5.7.0 on the host to test it as the *outer* runtime.
+
+- **Rootless podman as outer: DOES NOT WORK.** Fails at `newuidmap` before anything else:
+  rootless podman runs the outer container inside *spike's* user namespace, whose `uid_map`
+  is `0 1000 1 / 1 100000 65536` — i.e. only 65536 subuids exist *inside* the outer userns.
+  The nested `builder` then asks to map its own `builder:100000:65536` range, which does not
+  fit → `newuidmap: write to uid_map failed: Operation not permitted`. This is the classic
+  rootless-in-rootless subuid-exhaustion problem; it would need a vastly larger host subuid
+  allocation **and** nested-range planning. Not viable as-is. (The seccomp advantage is moot
+  because the chain breaks earlier.)
+- **Rootful podman as outer (`sudo podman run`): WORKS, with a SMALLER security-flag set.**
+  Real-root model (like docker) so `newuidmap` is fine. `podman info` + inner `podman run`
+  creation succeed with **only** `--device /dev/net/tun --security-opt apparmor=unconfined`:
+  - **No `seccomp=` override needed** — podman's *default* seccomp profile allows `mount(2)`
+    (confirms the brief's premise). This is podman-outer's real advantage.
+  - **No `systempaths=unconfined` needed** — podman does not mask `/proc` the way docker
+    does, so the inner `proc` mount is not blocked.
+  - Still needs `apparmor=unconfined` (podman's default container AppArmor profile also
+    blocks the overlay `mount`) and `--device /dev/net/tun`.
+  - **Caveat (new delta):** podman's default network gave the outer container **no working
+    DNS** (even the outer could not resolve `registry-1.docker.io`), so image *pulls* fail
+    until DNS is configured (`--dns`, or host `aardvark-dns`/`netavark` setup). Docker's
+    default bridge ships an embedded resolver (`127.0.0.11`), so docker-outer has DNS for
+    free.
+- **Recommendation:** docker-outer is the proven, complete, lowest-friction path and is what
+  `run-outer.sh` uses. Rootful-podman-outer is a viable alternative that trades two security
+  flags (`seccomp`, `systempaths`) for (a) running the supervisor as root and (b) a DNS-config
+  requirement. If `system-setup.sh` later prefers podman-on-host for a smaller flag surface,
+  it must run podman **rootful** and configure container DNS. Rootless-podman-outer is a dead
+  end without large nested-subuid provisioning.
+
+## Host prerequisites
+
+**None required for the docker-outer path.** This is the headline operational result:
+
+- `kernel.apparmor_restrict_unprivileged_userns` was left at its hardened default **`1`**.
+  (It was toggled to 0 *during diagnosis only* and restored; the final green smoke runs with
+  it `=1`.) The file-cap `newuidmap` fix is what makes nested userns work, not a host sysctl.
+- No host AppArmor profile added.
+- No host sysctl changes persisted.
+- Only host package needed: **docker** (already required). The image installs its own
+  `podman`, `uidmap`, `slirp4netns`, `fuse-overlayfs`, `libcap2-bin`.
+- `podman` 5.7.0 was installed on the host **only to answer the T2 outer-runtime
+  sub-question**; it is NOT needed for the docker-outer path and can be removed.
+
+So `system-setup.sh` needs nothing beyond a docker install for the docker-outer design.
+
+### Host changes log (everything touched on the box)
+
+| Change | Persisted? | Purpose | Needed for the solution? |
+| --- | --- | --- | --- |
+| `sysctl kernel.apparmor_restrict_unprivileged_userns` toggled 1↔0 | **No** — restored to `1` | Diagnosis only (proved the blocker was NOT this sysctl) | No |
+| `sysctl kernel.apparmor_restrict_unprivileged_unconfined` toggled 1↔0 | **No** — restored to `1` | Diagnosis only | No |
+| `sysctl kernel.unprivileged_userns_apparmor_policy` toggled 1↔0 | **No** — restored to `1` | Diagnosis only | No |
+| `apt-get install podman` (5.7.0) | Yes (removable) | Answer the T2 outer-runtime sub-question | No (docker-outer path) |
+| `apt-get install bpftrace` | Yes (removable) | Trace the `newuidmap` EPERM to `cap_capable cap=21` | No |
+| `apt-get install strace gcc libc6-dev` **inside the outer container** | container-only | Diagnosis of the setuid/cap behaviour | No |
+
+Final host sysctl state verified: all three `= 1` (hardened defaults). The green smoke run
+uses **zero** persisted host changes beyond the pre-existing docker install.
+
+## Storage driver (T4)
+
+- **fuse-overlayfs:** works. Needs `--device /dev/fuse` and the `fuse-overlayfs` binary +
+  `mount_program` in `storage.conf`. Build benchmark (300-file image): **~1281 ms**.
+- **native rootless overlayfs (no `mount_program`):** works on this kernel 7.0 host (kernel
+  ≥ 5.13 supports rootless native overlay). Needs **no** `/dev/fuse` device. Build benchmark:
+  **~582 ms** — **~2.2× faster** than fuse-overlayfs.
+- **vfs (last-resort fallback):** not needed and not pinned — native overlay works, so the
+  slow full-copy VFS driver was not required.
+- **Pinned choice:** **native rootless overlay** (`storage.conf` = `[storage] driver =
+  "overlay"` with no `mount_program`). Faster *and* lets us drop `--device /dev/fuse`.
+  `fuse-overlayfs` is left installed as a documented fallback only.
+
+## Warmup timing (T3)
+
+- Cold first `podman info` (fresh storage volume): **~0.25 s** (`time` logged by entrypoint).
+- Warmed (entrypoint already ran / after restart): **~0.16–0.23 s**.
+- Negligible either way with native overlay; no warmup optimisation needed for Stage 2.
+
+## Restart behaviour (T3)
+
+- **Workspace volume:** survives `docker restart` (marker file intact). ✓
+- **Podman image store:** survives `docker restart` (built image still present). ✓
+- **Running inner containers after `docker restart`:** stop — they come back in state
+  `created` (not `running`). Expected: a `docker restart` kills all inner processes.
+- **`podman start --all` viable as the Stage 4 recovery mechanism? YES**, but only after the
+  entrypoint wipes the **stale transient runtime state** on each boot. `XDG_RUNTIME_DIR`
+  (`/tmp/runtime-builder`) lives in the container FS and *survives* `docker restart`, but the
+  rootless-podman **pause process** and **crun** state it references do not. Left stale,
+  podman fails with `invalid internal status, try resetting the pause process with "podman
+  system migrate"` and `podman start` fails with `crun: container already exists`. The
+  entrypoint now `rm -rf`s `$XDG_RUNTIME_DIR/{libpod,containers,netns,crun}` on every boot;
+  after that `podman start --all` cleanly resurrects the inner container **with its port
+  forwarding** (host curl succeeds again). The smoke test exercises exactly this path.
+
+## Port chain notes
+
+- Full chain works: host `127.0.0.1:10000` → docker publish → outer netns → rootless
+  **slirp4netns** → inner nginx `:80`. No latency surprises on loopback.
+- **slirp4netns requires `/dev/net/tun`** in the outer container (`--device /dev/net/tun`);
+  this is the only device the docker-outer path needs.
+- IPv6: harmless warnings only (`failed to set net.ipv6.conf.default.accept_dad ...`); IPv4
+  forwarding unaffected.
+- The read-only `net.ipv4.ping_group_range` sysctl that crun tries to set on container
+  create is suppressed by `default_sysctls = []` in `containers.conf` (baked into the image).
+- DNS *inside inner containers* under the docker-outer path works (docker bridge resolver).
+  Under podman-outer it does not (see T2 sub-question).
+
+## Recommendations for Stage 2
+
+The Stage 2 image and appx's Stage 3 supervisor should transcribe this verbatim:
+
+1. **Keep the four `docker run` knobs** exactly: `--device /dev/net/tun`,
+   `--security-opt seccomp=<seccomp-builder.json>`, `--security-opt apparmor=unconfined`,
+   `--security-opt systempaths=unconfined`. No `--privileged`, no `--cap-add`.
+2. **Keep the `newuidmap`/`newgidmap` file-cap fix** in the Dockerfile — it is the linchpin.
+   If Stage 2 switches the base to `quay.io/podman/stable`, that image already does this.
+3. **Pin native overlay** storage (no `mount_program`); do not re-add `--device /dev/fuse`.
+4. **Keep the entrypoint runtime-state wipe** — it is what makes `docker restart` +
+   `podman start --all` a reliable Stage 4 recovery mechanism.
+5. Ship `seccomp-builder.json` alongside the deploy scripts and reference it by absolute path;
+   `gen-seccomp.sh` regenerates it if the base podman version changes.
+6. Replace the spike `CMD ["sleep","infinity"]` with the agent-server process; publish 4001
+   and the app port range; add `AGENT_SERVER_*` env. The security flags are unaffected.
+7. **Deferred TODO:** replace `apparmor=unconfined` with a tailored AppArmor profile that
+   permits the overlay `mount` (mirrors what we did for seccomp). Bounded containment loss
+   for now (seccomp + userns + cap-bounding still apply).
+
+## Open questions / blockers
+
+- **None blocking.** The chain works unprivileged with hardened host defaults.
+- Re-verify on a genuine **Ubuntu 24.04** host (this box is 26.04, though the image is 24.04)
+  before production — expected to pass, but the host kernel/apparmor build differs.
+- Tailored AppArmor profile (item 7 above) is the one remaining hardening refinement.
+- If appx ever wants podman-on-host, settle the rootful-podman DNS configuration
+  (`aardvark-dns`/`netavark` or `--dns`) noted in the T2 sub-question.
+
diff --git a/container/entrypoint.sh b/container/entrypoint.sh
new file mode 100755
index 0000000..aed9caf
--- /dev/null
+++ b/container/entrypoint.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+# Outer-container entrypoint (Stage 0 spike).
+#
+# 1. Provision the runtime dir rootless podman expects (no systemd-logind here).
+# 2. Warm up podman storage so the first real build/run isn't slow and so a
+#    broken nested environment is visible in `docker logs` immediately.
+# 3. Exec the CMD (spike: sleep infinity; Stage 2: agent-server).
+set -euo pipefail
+
+mkdir -p "${XDG_RUNTIME_DIR:-/tmp/runtime-$(id -un)}"
+
+# XDG_RUNTIME_DIR is supposed to be ephemeral (tmpfs, wiped on boot). Here it
+# lives in the container filesystem, so it SURVIVES `docker restart` — but the
+# rootless-podman pause process it points at does NOT. The stale pause-pid then
+# makes every podman call fail with:
+#   "invalid internal status, try resetting the pause process with
+#    'podman system migrate': could not find any running process"
+# Wiping the transient runtime state on each boot restores clean-start
+# semantics; persistent state (images/containers metadata) lives in the
+# ~/.local/share/containers named volume and is untouched.
+rm -rf "${XDG_RUNTIME_DIR:?}/libpod" "${XDG_RUNTIME_DIR:?}/containers" \
+       "${XDG_RUNTIME_DIR:?}/netns" "${XDG_RUNTIME_DIR:?}/crun" 2>/dev/null || true
+
+echo "[entrypoint] podman warmup starting ($(date -Is))"
+if time podman info > /tmp/podman-info.log 2>&1; then
+	echo "[entrypoint] podman warmup OK"
+else
+	# Don't die: keep the container alive so the spike agent can exec in and debug.
+	echo "[entrypoint] WARNING: podman info FAILED — see /tmp/podman-info.log:"
+	tail -n 20 /tmp/podman-info.log || true
+fi
+
+exec "$@"
diff --git a/container/gen-seccomp.sh b/container/gen-seccomp.sh
new file mode 100755
index 0000000..baaadea
--- /dev/null
+++ b/container/gen-seccomp.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+# Regenerate container/seccomp-builder.json from podman's stock seccomp profile.
+#
+# Why this profile exists (Stage 0 spike, task T2): docker's DEFAULT seccomp
+# profile blocks mount(2), which rootless podman needs even for unprivileged
+# overlay/bind mounts, so the naive fix is seccomp=unconfined. Podman ships a
+# profile that allows mount, but it gates a handful of syscalls behind
+# "CAP_SYS_ADMIN" via the runtime's `includes.caps` mechanism. Our OUTER
+# container is unprivileged (no CAP_SYS_ADMIN), so those rules are dropped and
+# the gated syscalls fall through to ERRNO. Inner-container setup then dies at
+# `sethostname: Operation not permitted`.
+#
+# This profile = podman's stock profile with the CAP_SYS_ADMIN gate removed
+# from ONLY the namespace-setup syscalls the nested runtime needs
+# (sethostname, setdomainname, setns). The genuinely dangerous gated syscalls
+# (bpf, perf_event_open, quotactl, fanotify_init, lookup_dcookie) stay denied.
+# Net result: a tailored profile that is strictly tighter than `unconfined`.
+set -euo pipefail
+cd "$(dirname "$0")"
+docker build -t builder-outer . >/dev/null
+cid=$(docker create builder-outer)
+docker cp "$cid:/usr/share/containers/seccomp.json" /tmp/stock-seccomp.json
+docker rm "$cid" >/dev/null
+python3 - <<'PY'
+import json
+d=json.load(open('/tmp/stock-seccomp.json'))
+NEED={'sethostname','setdomainname','setns'}
+for s in d['syscalls']:
+    inc=s.get('includes',{})
+    if s['action']=='SCMP_ACT_ALLOW' and inc.get('caps')==['CAP_SYS_ADMIN']:
+        s['names']=[n for n in s['names'] if n in NEED]
+        s.pop('includes',None)
+    # Podman's stock profile also ships a complementary SCMP_ACT_ERRNO (deny)
+    # rule gated on excludes.caps=[CAP_SYS_ADMIN] that ALSO names these three
+    # syscalls. If left in place the generated profile both ALLOWs and ERRNOs
+    # the same syscalls; which rule wins is libseccomp/runtime-version-defined.
+    # Strip the names from the deny rule so the ALLOW above is unambiguous.
+    if s['action']=='SCMP_ACT_ERRNO' and s.get('excludes',{}).get('caps')==['CAP_SYS_ADMIN']:
+        s['names']=[n for n in s['names'] if n not in NEED]
+json.dump(d,open('seccomp-builder.json','w'),indent=1)
+print("wrote seccomp-builder.json")
+PY
diff --git a/container/run-outer.sh b/container/run-outer.sh
new file mode 100755
index 0000000..ddb4531
--- /dev/null
+++ b/container/run-outer.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+# Build and (re)start the outer builder container — Stage 0 spike.
+#
+# The flag set below is the FINAL PROVEN minimal set (task T2 complete): each
+# flag was deletion-tested and carries a one-line justification below and in
+# SPIKE-FINDINGS.md. `./smoke.sh` exits 0 with exactly these flags.
+#
+# Hard constraints honoured: no --privileged, no --cap-add SYS_ADMIN, non-root
+# user (outer main process is uid 1000 'builder').
+set -euo pipefail
+cd "$(dirname "$0")"
+
+readonly IMAGE="builder-outer"
+readonly NAME="builder-outer"
+readonly SECCOMP="$(pwd)/seccomp-builder.json"
+
+docker build -t "$IMAGE" .
+docker rm -f "$NAME" 2>/dev/null || true
+
+docker run -d --name "$NAME" \
+	--device /dev/net/tun \
+	--security-opt seccomp="$SECCOMP" \
+	--security-opt apparmor=unconfined \
+	--security-opt systempaths=unconfined \
+	-v builder-workspace:/workspace \
+	-v builder-podman-storage:/home/builder/.local/share/containers \
+	-p 127.0.0.1:10000-10009:10000-10009 \
+	"$IMAGE"
+
+# Final proven flag set (deletion-tested in T2; see SPIKE-FINDINGS.md):
+#   --device /dev/net/tun        rootless slirp4netns networking opens /dev/net/tun;
+#                                without it: 'open("/dev/net/tun"): No such file'
+#   seccomp=seccomp-builder.json tailored profile (podman's stock + ungated
+#                                sethostname/setdomainname/setns). Docker's
+#                                DEFAULT seccomp blocks mount(2) -> 'cannot
+#                                re-exec process'. Strictly tighter than
+#                                unconfined; see gen-seccomp.sh for provenance
+#   apparmor=unconfined          docker-default apparmor blocks the overlay
+#                                mount(2): 'mount ...overlay...: permission
+#                                denied'. (Host apparmor_restrict_unprivileged_
+#                                userns is handled by the file-cap newuidmap fix,
+#                                NOT by this flag.) TODO: tailored apparmor profile
+#   systempaths=unconfined       docker masks /proc submounts; kernel
+#                                mount_too_revealing() then blocks the inner
+#                                container's fresh proc mount: 'mount proc to
+#                                proc: Operation not permitted'. No caps/privilege
+#   builder-workspace volume     project files must survive container recreate
+#   builder-podman-storage vol   inner images/containers must survive recreate
+#   -p 127.0.0.1:10000-10009     app port range, loopback-only (appx proxies in)
+
+sleep 2
+docker logs "$NAME"
+echo
+echo "outer container '$NAME' is up. Try: docker exec -it $NAME podman info"
diff --git a/container/seccomp-builder.json b/container/seccomp-builder.json
new file mode 100644
index 0000000..c5a34e8
--- /dev/null
+++ b/container/seccomp-builder.json
@@ -0,0 +1,1038 @@
+{
+ "defaultAction": "SCMP_ACT_ERRNO",
+ "defaultErrnoRet": 38,
+ "defaultErrno": "ENOSYS",
+ "archMap": [
+  {
+   "architecture": "SCMP_ARCH_X86_64",
+   "subArchitectures": [
+    "SCMP_ARCH_X86",
+    "SCMP_ARCH_X32"
+   ]
+  },
+  {
+   "architecture": "SCMP_ARCH_AARCH64",
+   "subArchitectures": [
+    "SCMP_ARCH_ARM"
+   ]
+  },
+  {
+   "architecture": "SCMP_ARCH_MIPS64",
+   "subArchitectures": [
+    "SCMP_ARCH_MIPS",
+    "SCMP_ARCH_MIPS64N32"
+   ]
+  },
+  {
+   "architecture": "SCMP_ARCH_MIPS64N32",
+   "subArchitectures": [
+    "SCMP_ARCH_MIPS",
+    "SCMP_ARCH_MIPS64"
+   ]
+  },
+  {
+   "architecture": "SCMP_ARCH_MIPSEL64",
+   "subArchitectures": [
+    "SCMP_ARCH_MIPSEL",
+    "SCMP_ARCH_MIPSEL64N32"
+   ]
+  },
+  {
+   "architecture": "SCMP_ARCH_MIPSEL64N32",
+   "subArchitectures": [
+    "SCMP_ARCH_MIPSEL",
+    "SCMP_ARCH_MIPSEL64"
+   ]
+  },
+  {
+   "architecture": "SCMP_ARCH_S390X",
+   "subArchitectures": [
+    "SCMP_ARCH_S390"
+   ]
+  }
+ ],
+ "syscalls": [
+  {
+   "names": [
+    "bdflush",
+    "io_pgetevents",
+    "kexec_file_load",
+    "kexec_load",
+    "migrate_pages",
+    "move_pages",
+    "nfsservctl",
+    "nice",
+    "oldfstat",
+    "oldlstat",
+    "oldolduname",
+    "oldstat",
+    "olduname",
+    "pciconfig_iobase",
+    "pciconfig_read",
+    "pciconfig_write",
+    "sgetmask",
+    "ssetmask",
+    "swapcontext",
+    "swapoff",
+    "swapon",
+    "sysfs",
+    "uselib",
+    "userfaultfd",
+    "ustat",
+    "vm86",
+    "vm86old",
+    "vmsplice"
+   ],
+   "action": "SCMP_ACT_ERRNO",
+   "args": [],
+   "comment": "",
+   "includes": {},
+   "excludes": {},
+   "errnoRet": 1,
+   "errno": "EPERM"
+  },
+  {
+   "names": [
+    "_llseek",
+    "_newselect",
+    "accept",
+    "accept4",
+    "access",
+    "adjtimex",
+    "alarm",
+    "bind",
+    "brk",
+    "capget",
+    "capset",
+    "chdir",
+    "chmod",
+    "chown",
+    "chown32",
+    "clock_adjtime",
+    "clock_adjtime64",
+    "clock_getres",
+    "clock_getres_time64",
+    "clock_gettime",
+    "clock_gettime64",
+    "clock_nanosleep",
+    "clock_nanosleep_time64",
+    "clone",
+    "clone3",
+    "close",
+    "close_range",
+    "connect",
+    "copy_file_range",
+    "creat",
+    "dup",
+    "dup2",
+    "dup3",
+    "epoll_create",
+    "epoll_create1",
+    "epoll_ctl",
+    "epoll_ctl_old",
+    "epoll_pwait",
+    "epoll_pwait2",
+    "epoll_wait",
+    "epoll_wait_old",
+    "eventfd",
+    "eventfd2",
+    "execve",
+    "execveat",
+    "exit",
+    "exit_group",
+    "faccessat",
+    "faccessat2",
+    "fadvise64",
+    "fadvise64_64",
+    "fallocate",
+    "fanotify_mark",
+    "fchdir",
+    "fchmod",
+    "fchmodat",
+    "fchown",
+    "fchown32",
+    "fchownat",
+    "fcntl",
+    "fcntl64",
+    "fdatasync",
+    "fgetxattr",
+    "flistxattr",
+    "flock",
+    "fork",
+    "fremovexattr",
+    "fsconfig",
+    "fsetxattr",
+    "fsmount",
+    "fsopen",
+    "fspick",
+    "fstat",
+    "fstat64",
+    "fstatat64",
+    "fstatfs",
+    "fstatfs64",
+    "fsync",
+    "ftruncate",
+    "ftruncate64",
+    "futex",
+    "futex_time64",
+    "futimesat",
+    "get_mempolicy",
+    "get_robust_list",
+    "get_thread_area",
+    "getcpu",
+    "getcwd",
+    "getdents",
+    "getdents64",
+    "getegid",
+    "getegid32",
+    "geteuid",
+    "geteuid32",
+    "getgid",
+    "getgid32",
+    "getgroups",
+    "getgroups32",
+    "getitimer",
+    "getpeername",
+    "getpgid",
+    "getpgrp",
+    "getpid",
+    "getppid",
+    "getpriority",
+    "getrandom",
+    "getresgid",
+    "getresgid32",
+    "getresuid",
+    "getresuid32",
+    "getrlimit",
+    "getrusage",
+    "getsid",
+    "getsockname",
+    "getsockopt",
+    "gettid",
+    "gettimeofday",
+    "getuid",
+    "getuid32",
+    "getxattr",
+    "inotify_add_watch",
+    "inotify_init",
+    "inotify_init1",
+    "inotify_rm_watch",
+    "io_cancel",
+    "io_destroy",
+    "io_getevents",
+    "io_setup",
+    "io_submit",
+    "ioctl",
+    "ioprio_get",
+    "ioprio_set",
+    "ipc",
+    "keyctl",
+    "kill",
+    "landlock_add_rule",
+    "landlock_create_ruleset",
+    "landlock_restrict_self",
+    "lchown",
+    "lchown32",
+    "lgetxattr",
+    "link",
+    "linkat",
+    "listen",
+    "listxattr",
+    "llistxattr",
+    "lremovexattr",
+    "lseek",
+    "lsetxattr",
+    "lstat",
+    "lstat64",
+    "madvise",
+    "mbind",
+    "membarrier",
+    "memfd_create",
+    "memfd_secret",
+    "mincore",
+    "mkdir",
+    "mkdirat",
+    "mknod",
+    "mknodat",
+    "mlock",
+    "mlock2",
+    "mlockall",
+    "mmap",
+    "mmap2",
+    "mount",
+    "mount_setattr",
+    "move_mount",
+    "mprotect",
+    "mq_getsetattr",
+    "mq_notify",
+    "mq_open",
+    "mq_timedreceive",
+    "mq_timedreceive_time64",
+    "mq_timedsend",
+    "mq_timedsend_time64",
+    "mq_unlink",
+    "mremap",
+    "msgctl",
+    "msgget",
+    "msgrcv",
+    "msgsnd",
+    "msync",
+    "munlock",
+    "munlockall",
+    "munmap",
+    "name_to_handle_at",
+    "nanosleep",
+    "newfstatat",
+    "open",
+    "open_tree",
+    "openat",
+    "openat2",
+    "pause",
+    "pidfd_getfd",
+    "pidfd_open",
+    "pidfd_send_signal",
+    "pipe",
+    "pipe2",
+    "pivot_root",
+    "pkey_alloc",
+    "pkey_free",
+    "pkey_mprotect",
+    "poll",
+    "ppoll",
+    "ppoll_time64",
+    "prctl",
+    "pread64",
+    "preadv",
+    "preadv2",
+    "prlimit64",
+    "process_mrelease",
+    "process_vm_readv",
+    "process_vm_writev",
+    "pselect6",
+    "pselect6_time64",
+    "ptrace",
+    "pwrite64",
+    "pwritev",
+    "pwritev2",
+    "read",
+    "readahead",
+    "readdir",
+    "readlink",
+    "readlinkat",
+    "readv",
+    "reboot",
+    "recv",
+    "recvfrom",
+    "recvmmsg",
+    "recvmmsg_time64",
+    "recvmsg",
+    "remap_file_pages",
+    "removexattr",
+    "rename",
+    "renameat",
+    "renameat2",
+    "restart_syscall",
+    "rmdir",
+    "rseq",
+    "rt_sigaction",
+    "rt_sigpending",
+    "rt_sigprocmask",
+    "rt_sigqueueinfo",
+    "rt_sigreturn",
+    "rt_sigsuspend",
+    "rt_sigtimedwait",
+    "rt_sigtimedwait_time64",
+    "rt_tgsigqueueinfo",
+    "sched_get_priority_max",
+    "sched_get_priority_min",
+    "sched_getaffinity",
+    "sched_getattr",
+    "sched_getparam",
+    "sched_getscheduler",
+    "sched_rr_get_interval",
+    "sched_rr_get_interval_time64",
+    "sched_setaffinity",
+    "sched_setattr",
+    "sched_setparam",
+    "sched_setscheduler",
+    "sched_yield",
+    "seccomp",
+    "select",
+    "semctl",
+    "semget",
+    "semop",
+    "semtimedop",
+    "semtimedop_time64",
+    "send",
+    "sendfile",
+    "sendfile64",
+    "sendmmsg",
+    "sendmsg",
+    "sendto",
+    "set_mempolicy",
+    "set_robust_list",
+    "set_thread_area",
+    "set_tid_address",
+    "setfsgid",
+    "setfsgid32",
+    "setfsuid",
+    "setfsuid32",
+    "setgid",
+    "setgid32",
+    "setgroups",
+    "setgroups32",
+    "setitimer",
+    "setns",
+    "setpgid",
+    "setpriority",
+    "setregid",
+    "setregid32",
+    "setresgid",
+    "setresgid32",
+    "setresuid",
+    "setresuid32",
+    "setreuid",
+    "setreuid32",
+    "setrlimit",
+    "setsid",
+    "setsockopt",
+    "setuid",
+    "setuid32",
+    "setxattr",
+    "shmat",
+    "shmctl",
+    "shmdt",
+    "shmget",
+    "shutdown",
+    "sigaction",
+    "sigaltstack",
+    "signal",
+    "signalfd",
+    "signalfd4",
+    "sigpending",
+    "sigprocmask",
+    "sigreturn",
+    "sigsuspend",
+    "socketcall",
+    "socketpair",
+    "splice",
+    "stat",
+    "stat64",
+    "statfs",
+    "statfs64",
+    "statx",
+    "symlink",
+    "symlinkat",
+    "sync",
+    "sync_file_range",
+    "syncfs",
+    "syscall",
+    "sysinfo",
+    "syslog",
+    "tee",
+    "tgkill",
+    "time",
+    "timer_create",
+    "timer_delete",
+    "timer_getoverrun",
+    "timer_gettime",
+    "timer_gettime64",
+    "timer_settime",
+    "timer_settime64",
+    "timerfd",
+    "timerfd_create",
+    "timerfd_gettime",
+    "timerfd_gettime64",
+    "timerfd_settime",
+    "timerfd_settime64",
+    "times",
+    "tkill",
+    "truncate",
+    "truncate64",
+    "ugetrlimit",
+    "umask",
+    "umount",
+    "umount2",
+    "uname",
+    "unlink",
+    "unlinkat",
+    "unshare",
+    "utime",
+    "utimensat",
+    "utimensat_time64",
+    "utimes",
+    "vfork",
+    "wait4",
+    "waitid",
+    "waitpid",
+    "write",
+    "writev"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [],
+   "comment": "",
+   "includes": {},
+   "excludes": {}
+  },
+  {
+   "names": [
+    "personality"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [
+    {
+     "index": 0,
+     "value": 0,
+     "valueTwo": 0,
+     "op": "SCMP_CMP_EQ"
+    }
+   ],
+   "comment": "",
+   "includes": {},
+   "excludes": {}
+  },
+  {
+   "names": [
+    "personality"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [
+    {
+     "index": 0,
+     "value": 8,
+     "valueTwo": 0,
+     "op": "SCMP_CMP_EQ"
+    }
+   ],
+   "comment": "",
+   "includes": {},
+   "excludes": {}
+  },
+  {
+   "names": [
+    "personality"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [
+    {
+     "index": 0,
+     "value": 131072,
+     "valueTwo": 0,
+     "op": "SCMP_CMP_EQ"
+    }
+   ],
+   "comment": "",
+   "includes": {},
+   "excludes": {}
+  },
+  {
+   "names": [
+    "personality"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [
+    {
+     "index": 0,
+     "value": 131080,
+     "valueTwo": 0,
+     "op": "SCMP_CMP_EQ"
+    }
+   ],
+   "comment": "",
+   "includes": {},
+   "excludes": {}
+  },
+  {
+   "names": [
+    "personality"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [
+    {
+     "index": 0,
+     "value": 4294967295,
+     "valueTwo": 0,
+     "op": "SCMP_CMP_EQ"
+    }
+   ],
+   "comment": "",
+   "includes": {},
+   "excludes": {}
+  },
+  {
+   "names": [
+    "sync_file_range2"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [],
+   "comment": "",
+   "includes": {
+    "arches": [
+     "ppc64le"
+    ]
+   },
+   "excludes": {}
+  },
+  {
+   "names": [
+    "arm_fadvise64_64",
+    "arm_sync_file_range",
+    "breakpoint",
+    "cacheflush",
+    "set_tls",
+    "sync_file_range2"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [],
+   "comment": "",
+   "includes": {
+    "arches": [
+     "arm",
+     "arm64"
+    ]
+   },
+   "excludes": {}
+  },
+  {
+   "names": [
+    "arch_prctl"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [],
+   "comment": "",
+   "includes": {
+    "arches": [
+     "amd64",
+     "x32"
+    ]
+   },
+   "excludes": {}
+  },
+  {
+   "names": [
+    "modify_ldt"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [],
+   "comment": "",
+   "includes": {
+    "arches": [
+     "amd64",
+     "x32",
+     "x86"
+    ]
+   },
+   "excludes": {}
+  },
+  {
+   "names": [
+    "s390_pci_mmio_read",
+    "s390_pci_mmio_write",
+    "s390_runtime_instr"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [],
+   "comment": "",
+   "includes": {
+    "arches": [
+     "s390",
+     "s390x"
+    ]
+   },
+   "excludes": {}
+  },
+  {
+   "names": [
+    "open_by_handle_at"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [],
+   "comment": "",
+   "includes": {
+    "caps": [
+     "CAP_DAC_READ_SEARCH"
+    ]
+   },
+   "excludes": {}
+  },
+  {
+   "names": [
+    "open_by_handle_at"
+   ],
+   "action": "SCMP_ACT_ERRNO",
+   "args": [],
+   "comment": "",
+   "includes": {},
+   "excludes": {
+    "caps": [
+     "CAP_DAC_READ_SEARCH"
+    ]
+   },
+   "errnoRet": 1,
+   "errno": "EPERM"
+  },
+  {
+   "names": [
+    "setdomainname",
+    "sethostname",
+    "setns"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [],
+   "comment": "",
+   "excludes": {}
+  },
+  {
+   "names": [
+    "bpf",
+    "fanotify_init",
+    "lookup_dcookie",
+    "perf_event_open",
+    "quotactl"
+   ],
+   "action": "SCMP_ACT_ERRNO",
+   "args": [],
+   "comment": "",
+   "includes": {},
+   "excludes": {
+    "caps": [
+     "CAP_SYS_ADMIN"
+    ]
+   },
+   "errnoRet": 1,
+   "errno": "EPERM"
+  },
+  {
+   "names": [
+    "chroot"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [],
+   "comment": "",
+   "includes": {
+    "caps": [
+     "CAP_SYS_CHROOT"
+    ]
+   },
+   "excludes": {}
+  },
+  {
+   "names": [
+    "chroot"
+   ],
+   "action": "SCMP_ACT_ERRNO",
+   "args": [],
+   "comment": "",
+   "includes": {},
+   "excludes": {
+    "caps": [
+     "CAP_SYS_CHROOT"
+    ]
+   },
+   "errnoRet": 1,
+   "errno": "EPERM"
+  },
+  {
+   "names": [
+    "delete_module",
+    "finit_module",
+    "init_module",
+    "query_module"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [],
+   "comment": "",
+   "includes": {
+    "caps": [
+     "CAP_SYS_MODULE"
+    ]
+   },
+   "excludes": {}
+  },
+  {
+   "names": [
+    "delete_module",
+    "finit_module",
+    "init_module",
+    "query_module"
+   ],
+   "action": "SCMP_ACT_ERRNO",
+   "args": [],
+   "comment": "",
+   "includes": {},
+   "excludes": {
+    "caps": [
+     "CAP_SYS_MODULE"
+    ]
+   },
+   "errnoRet": 1,
+   "errno": "EPERM"
+  },
+  {
+   "names": [
+    "acct"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [],
+   "comment": "",
+   "includes": {
+    "caps": [
+     "CAP_SYS_PACCT"
+    ]
+   },
+   "excludes": {}
+  },
+  {
+   "names": [
+    "acct"
+   ],
+   "action": "SCMP_ACT_ERRNO",
+   "args": [],
+   "comment": "",
+   "includes": {},
+   "excludes": {
+    "caps": [
+     "CAP_SYS_PACCT"
+    ]
+   },
+   "errnoRet": 1,
+   "errno": "EPERM"
+  },
+  {
+   "names": [
+    "kcmp",
+    "process_madvise"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [],
+   "comment": "",
+   "includes": {
+    "caps": [
+     "CAP_SYS_PTRACE"
+    ]
+   },
+   "excludes": {}
+  },
+  {
+   "names": [
+    "kcmp",
+    "process_madvise"
+   ],
+   "action": "SCMP_ACT_ERRNO",
+   "args": [],
+   "comment": "",
+   "includes": {},
+   "excludes": {
+    "caps": [
+     "CAP_SYS_PTRACE"
+    ]
+   },
+   "errnoRet": 1,
+   "errno": "EPERM"
+  },
+  {
+   "names": [
+    "ioperm",
+    "iopl"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [],
+   "comment": "",
+   "includes": {
+    "caps": [
+     "CAP_SYS_RAWIO"
+    ]
+   },
+   "excludes": {}
+  },
+  {
+   "names": [
+    "ioperm",
+    "iopl"
+   ],
+   "action": "SCMP_ACT_ERRNO",
+   "args": [],
+   "comment": "",
+   "includes": {},
+   "excludes": {
+    "caps": [
+     "CAP_SYS_RAWIO"
+    ]
+   },
+   "errnoRet": 1,
+   "errno": "EPERM"
+  },
+  {
+   "names": [
+    "clock_settime",
+    "clock_settime64",
+    "settimeofday",
+    "stime"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [],
+   "comment": "",
+   "includes": {
+    "caps": [
+     "CAP_SYS_TIME"
+    ]
+   },
+   "excludes": {}
+  },
+  {
+   "names": [
+    "clock_settime",
+    "clock_settime64",
+    "settimeofday",
+    "stime"
+   ],
+   "action": "SCMP_ACT_ERRNO",
+   "args": [],
+   "comment": "",
+   "includes": {},
+   "excludes": {
+    "caps": [
+     "CAP_SYS_TIME"
+    ]
+   },
+   "errnoRet": 1,
+   "errno": "EPERM"
+  },
+  {
+   "names": [
+    "vhangup"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [],
+   "comment": "",
+   "includes": {
+    "caps": [
+     "CAP_SYS_TTY_CONFIG"
+    ]
+   },
+   "excludes": {}
+  },
+  {
+   "names": [
+    "vhangup"
+   ],
+   "action": "SCMP_ACT_ERRNO",
+   "args": [],
+   "comment": "",
+   "includes": {},
+   "excludes": {
+    "caps": [
+     "CAP_SYS_TTY_CONFIG"
+    ]
+   },
+   "errnoRet": 1,
+   "errno": "EPERM"
+  },
+  {
+   "names": [
+    "socket"
+   ],
+   "action": "SCMP_ACT_ERRNO",
+   "args": [
+    {
+     "index": 0,
+     "value": 16,
+     "valueTwo": 0,
+     "op": "SCMP_CMP_EQ"
+    },
+    {
+     "index": 2,
+     "value": 9,
+     "valueTwo": 0,
+     "op": "SCMP_CMP_EQ"
+    }
+   ],
+   "comment": "",
+   "includes": {},
+   "excludes": {
+    "caps": [
+     "CAP_AUDIT_WRITE"
+    ]
+   },
+   "errnoRet": 22,
+   "errno": "EINVAL"
+  },
+  {
+   "names": [
+    "socket"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [
+    {
+     "index": 2,
+     "value": 9,
+     "valueTwo": 0,
+     "op": "SCMP_CMP_NE"
+    }
+   ],
+   "comment": "",
+   "includes": {},
+   "excludes": {
+    "caps": [
+     "CAP_AUDIT_WRITE"
+    ]
+   }
+  },
+  {
+   "names": [
+    "socket"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [
+    {
+     "index": 0,
+     "value": 16,
+     "valueTwo": 0,
+     "op": "SCMP_CMP_NE"
+    }
+   ],
+   "comment": "",
+   "includes": {},
+   "excludes": {
+    "caps": [
+     "CAP_AUDIT_WRITE"
+    ]
+   }
+  },
+  {
+   "names": [
+    "socket"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": [
+    {
+     "index": 2,
+     "value": 9,
+     "valueTwo": 0,
+     "op": "SCMP_CMP_NE"
+    }
+   ],
+   "comment": "",
+   "includes": {},
+   "excludes": {
+    "caps": [
+     "CAP_AUDIT_WRITE"
+    ]
+   }
+  },
+  {
+   "names": [
+    "socket"
+   ],
+   "action": "SCMP_ACT_ALLOW",
+   "args": null,
+   "comment": "",
+   "includes": {
+    "caps": [
+     "CAP_AUDIT_WRITE"
+    ]
+   },
+   "excludes": {}
+  }
+ ]
+}
\ No newline at end of file
diff --git a/container/smoke.sh b/container/smoke.sh
new file mode 100755
index 0000000..e5d47a0
--- /dev/null
+++ b/container/smoke.sh
@@ -0,0 +1,111 @@
+#!/usr/bin/env bash
+# Stage 0 acceptance test — exits 0 iff every REQUIRED check passes.
+#
+# This is the spike's definition of done (docs/plans/stage0-spike-brief.md).
+# It validates the full nested chain on a fresh box:
+#   host → docker port publish → outer container → rootless podman → inner app
+#
+# Checks marked [observe] never fail the run; their outcome is recorded for
+# SPIKE-FINDINGS.md (e.g. whether inner containers survive an outer restart).
+set -uo pipefail
+cd "$(dirname "$0")"
+
+readonly NAME="builder-outer"
+readonly APP_PORT=10000
+PASS_COUNT=0
+FAIL_COUNT=0
+
+# ── helpers ──────────────────────────────────────────────────────────────────
+
+pass() { echo "  PASS: $1"; PASS_COUNT=$((PASS_COUNT + 1)); }
+fail() { echo "  FAIL: $1"; FAIL_COUNT=$((FAIL_COUNT + 1)); }
+
+check() { # check <description> <command...>
+	local description="$1"
+	shift
+	if "$@" > /tmp/smoke-last.log 2>&1; then
+		pass "$description"
+	else
+		fail "$description"
+		sed 's/^/    | /' /tmp/smoke-last.log | tail -n 15
+	fi
+}
+
+outer_exec() { docker exec "$NAME" "$@"; }
+
+curl_app() {
+	curl -fsS --retry 10 --retry-delay 1 --retry-connrefused --retry-all-errors \
+		"http://127.0.0.1:${APP_PORT}" > /dev/null
+}
+
+# ── 1. fresh outer container ─────────────────────────────────────────────────
+
+echo "[1] build + start outer container"
+check "run-outer.sh brings up the outer container" ./run-outer.sh
+
+echo "[2] outer container is unprivileged"
+check "main process uid is 1000 (builder)" \
+	bash -c "[ \"\$(docker exec $NAME id -u)\" = '1000' ]"
+check "container is not privileged" \
+	bash -c "[ \"\$(docker inspect -f '{{.HostConfig.Privileged}}' $NAME)\" = 'false' ]"
+
+echo "[3] podman works inside (warmup ran in entrypoint; see 'docker logs $NAME' for timing)"
+check "podman info succeeds" outer_exec podman info
+
+# ── 2. inner run: pull + serve + full port chain ─────────────────────────────
+
+echo "[4] inner container serves through both forwarding layers"
+outer_exec podman rm -f spike-web > /dev/null 2>&1
+check "podman run nginx publishing :${APP_PORT}" \
+	outer_exec podman run -d --name spike-web \
+	-p "${APP_PORT}:80" docker.io/library/nginx:alpine
+check "host curl 127.0.0.1:${APP_PORT} reaches the inner nginx" curl_app
+
+# ── 3. inner build: storage driver + build path ──────────────────────────────
+
+echo "[5] podman build works inside"
+check "podman build of a trivial image" outer_exec bash -c '
+	build_dir=$(mktemp -d) &&
+	printf "FROM docker.io/library/alpine:3.20\nRUN echo built-ok > /built\n" \
+		> "$build_dir/Dockerfile" &&
+	podman build -q -t spike-build-test "$build_dir"
+'
+check "built image runs and contains its layer" outer_exec bash -c \
+	'[ "$(podman run --rm spike-build-test cat /built)" = "built-ok" ]'
+
+# ── 4. restart semantics ─────────────────────────────────────────────────────
+
+echo "[6] outer restart: persistence + recovery"
+outer_exec bash -c "echo persists > /workspace/spike-marker" > /dev/null 2>&1
+docker restart "$NAME" > /dev/null
+sleep 3
+
+check "workspace volume survived restart" outer_exec bash -c \
+	'[ "$(cat /workspace/spike-marker)" = "persists" ]'
+check "podman image store survived restart" outer_exec bash -c \
+	'podman images --format "{{.Repository}}" | grep -q spike-build-test'
+
+inner_state=$(outer_exec podman inspect -f '{{.State.Status}}' spike-web 2>/dev/null || echo "gone")
+echo "  [observe] inner container state after outer restart: ${inner_state}"
+if outer_exec podman start spike-web > /dev/null 2>&1 && curl_app; then
+	echo "  [observe] 'podman start' resurrected the inner app (good for Stage 4: podman start --all)"
+	pass "app reachable again after restart (via podman start)"
+else
+	echo "  [observe] 'podman start' did NOT resurrect it — record in findings; trying re-create"
+	outer_exec podman rm -f spike-web > /dev/null 2>&1
+	check "app reachable again after restart (via re-create)" outer_exec \
+		podman run -d --name spike-web -p "${APP_PORT}:80" docker.io/library/nginx:alpine
+	check "host curl after re-create" curl_app
+fi
+
+# ── summary ──────────────────────────────────────────────────────────────────
+
+echo
+echo "──────────────────────────────────────────"
+echo "smoke result: ${PASS_COUNT} passed, ${FAIL_COUNT} failed"
+if [ "$FAIL_COUNT" -eq 0 ]; then
+	echo "STAGE 0 SMOKE: PASS"
+	exit 0
+fi
+echo "STAGE 0 SMOKE: FAIL"
+exit 1
diff --git a/docs/architecture/important/builder-container-architecture.md b/docs/architecture/important/builder-container-architecture.md
index f48edc6..346c9cb 100644
--- a/docs/architecture/important/builder-container-architecture.md
+++ b/docs/architecture/important/builder-container-architecture.md
@@ -75,6 +75,26 @@ Build a system where:
   • Inner containers: untrusted, run LLM-generated code, no creds
 ```
 
+## Decision: appx terminates and proxies app traffic
+
+The two early container rigs published ports differently — the Stage 0 spike
+(`container/`) bound `-p 127.0.0.1:10000-10009` (loopback only), while the
+production draft (`docker/builder/`) published ports directly to the host
+(`-p 4001`, `-p 3000-3010`). This is a real design decision, not an
+implementation detail, so it is recorded here as the single source of truth:
+
+**appx terminates app traffic and proxies it into the outer container.** The
+outer container publishes its ports on loopback only (`127.0.0.1:...`); appx is
+the edge that accepts external traffic and forwards it to those loopback ports.
+App ports are therefore **not** published directly to the host's external
+interfaces.
+
+Rationale: keeping the outer container loopback-only means a single trusted
+edge (appx) owns TLS termination, routing, and access control, and no
+agent-spawned inner app is ever directly reachable from outside the host. Run
+scripts and compose files should follow this contract; direct host publishing
+is only for local manual testing.
+
 ## Component Mapping
 
 | Concept                                     | What it maps to in code                                                                                                                                                  |
@@ -204,7 +224,7 @@ These aren't blockers for the stated case, just worth knowing:
 2. **All projects share the outer container's filesystem quota.** One project filling `/workspace` affects everyone. Disk quota or per-project mount points if it matters.
 3. **No process-level isolation between projects.** A bug in agent-server affects all projects. For single-admin, fine.
 4. **First-time podman storage init is slow.** Add `podman info` to the entrypoint to warm up.
-5. **Inner container ports must be allocated.** Either expose a port range (`-p 3000-3010:3000-3010`) and let the agent pick, or have a registry that hands out ports. The latter scales better.
+5. **Inner container ports must be allocated.** Either expose a port range (`-p 127.0.0.1:3000-3010:3000-3010`) and let the agent pick, or have a registry that hands out ports. The latter scales better. Per the decision above, the outer container binds these on loopback only and appx proxies external traffic in.
 6. **Outer container restart kills inner containers.** Inner Podman state lives in the outer container's filesystem. If you `docker restart builder`, all running apps die. Mount Podman storage as a volume if you want persistence: `-v podman-storage:/home/builder/.local/share/containers`.
 
 None of these are dealbreakers; just trade-offs to be aware of.
diff --git a/docs/architecture/other/containerisation-plan.md b/docs/architecture/other/containerisation-plan.md
new file mode 100644
index 0000000..3a3c7c1
--- /dev/null
+++ b/docs/architecture/other/containerisation-plan.md
@@ -0,0 +1,111 @@
+I've read through both codebases. Before proposing the plan, here's the grounding — what already exists and what the real gaps are.
+
+## Current state (gap analysis)
+
+**Already built (more than the doc's "What Needs to Be Built" assumes):**
+
+| Piece | Status |
+|---|---|
+| agent-server multi-project registry, `/v1/projects` lifecycle, per-project sessions, shared AuthStorage | ✅ done |
+| appx → agent-server registration (`agentserver.Client`, `EnsureProject`/`DeleteProject`, startup reconcile) | ✅ done |
+| appx port allocation (10000–10999 in SQLite, atomic) | ✅ done |
+| appx subdomain proxy `<name>.<domain>` → `127.0.0.1:<assignedPort>` | ✅ done |
+| Chat proxy/mirror (`/api/pi/...`) with allow-list | ✅ done |
+| Bearer token seam (`AGENT_SERVER_TOKEN`) | ✅ done |
+
+**Actual gaps:**
+
+1. **Port → agent handshake.** appx allocates the port but agent-server never learns it. The builder agent has no way to know it must run `podman run -p 10007:3000`. There's no metadata field on `POST /v1/projects`.
+2. **Builder deploy conventions** — system prompt / skill telling the agent: podman exists, here's your port, here's the redeploy procedure, name containers `<project>-app`.
+3. **Outer container image** — Dockerfile with nested rootless podman (the draft in `rootless-podman-isolation.md` is untested).
+4. **appx managing the outer container** — today the deploy is two systemd units on the host; nothing in Go touches docker.
+5. **Ops glue** — egress proxy reachability from inside the container, restart semantics (inner containers die with outer), volumes, deploy script rewrite.
+
+## Key design decisions to lock first
+
+These determine how cleanly the stages compose, so I'd decide them up front:
+
+**D1 — Port strategy: publish the appx port range on the outer container at create time.**
+`docker run -p 127.0.0.1:10000-10099:10000-10099`. The decisive argument: **appx's subdomain proxy already targets `127.0.0.1:<port>` and needs zero changes** whether agent-server runs on host (early stages) or in the container (later stages). Caveats:
+- Docker spawns a `docker-proxy` process per published port — shrink the range to ~100 ports (single admin doesn't need 1000 projects; the DB range constant can stay, just cap allocation).
+- Escalation path if it ever hurts: a single in-container reverse proxy (agent-server itself or caddy) on one published port, with appx sending `X-Appx-Port`. Don't build it now; it's a clean swap later because appx's routing is already centralized in one handler.
+- Reject `--network=host` — it throws away the network isolation boundary the whole architecture exists for.
+
+**D2 — Deployment metadata handshake: extend `POST /v1/projects` with a `deployment` object.**
+appx sends `{name, deployment: {port: 10007, url: "https://eventx.example.com"}}`; agent-server persists it in `projects.json` **and materializes it as `<project>/.pi/deployment.json`** so the agent can `cat` it, plus injects a short deployment section into the system prompt context. File + prompt beats prompt-only: survives context compaction, agent can re-read it.
+
+**D3 — Outer container management in Go: shell out to the `docker` CLI behind an interface, not the Docker SDK.**
+A `containerruntime` package with an interface (matching the existing `AgentRegistrar` fake-based testing pattern), implemented by exec-ing `docker` with `--format json`. Rationale: one container's lifecycle (ensure image, create, start, inspect, health-wait) doesn't justify the Docker SDK's dependency tree, and CLI-compat means the host runtime can be docker *or* podman for free. Industry practice is split here; for this scope CLI wins on simplicity and testability.
+
+**D4 — Builder agent's container runtime is config, not hardcoded.**
+In dev on macOS you'll run agent-server on the host where the "podman" might be podman-machine or Docker Desktop. Make the deploy skill reference `$APP_CONTAINER_RUNTIME` (default `podman`) so stage-1 dev work transfers untouched to the nested setup.
+
+## Staging: yes — podman-first, outer container later, plus one early de-risking spike
+
+Your instinct is right, with one amendment. The user-visible flow (steps 2–6 of your list) is ~80% of the work and is **completely independent of the outer container** — prompt engineering, the port handshake, redeploy UX all behave identically whether agent-server is containerized. The outer container is packaging/ops. *But* there's one real risk in deferring it: "works on host, breaks nested" — nested rootless podman has fragile flags (`/dev/fuse`, fuse-overlayfs, uidmap, seccomp). So:
+
+### Stage 0 — Nested-podman spike (timeboxed, ~1 day, parallel to everything)
+Pure validation, no feature code. On a Linux box (Hetzner/Lima/OrbStack VM — **not** macOS-native):
+- Build the draft Dockerfile from `rootless-podman-isolation.md`, fix it until `podman run -d -p 10000:80 nginx` works inside, and `curl 127.0.0.1:10000` works from the host through both forwarding layers.
+- Verify: volume-mounted `/workspace` and podman storage survive container restart; `podman info` warmup time; behavior after `docker restart`.
+- **Deliverable:** a known-good `Dockerfile` + `run-outer.sh` checked into `agent-server/` (or `appx/deploy/`), and a short findings note. This de-risks stages 3–4 and informs the stage-1 skill conventions.
+
+### Stage 1 — Full user flow, agent-server on host ("podman without outer container")
+The whole loop from your list (create project → agent builds app → container → subdomain → refine → redeploy), minimal moving parts:
+
+- **agent-server:** `deployment` metadata on project create (contract + `ProjectStore` record + `deployment.json` materialization + system-prompt injection). All unit-testable with the existing `node:test` suites — extend `projectLifecycle.test.ts`.
+- **appx:** `agentserver.Client.EnsureProject` gains the deployment payload (port from the store record, URL from `BaseDomain`); `Manager.Create` threads it through. Unit-test with the existing fake-registrar pattern.
+- **Builder deploy skill/prompt:** conventions — read `.pi/deployment.json`, `$APP_CONTAINER_RUNTIME build -t <project>-app .`, run with `-p <port>:<appPort>`, named containers, redeploy = `stop && rm && run`, health-check with curl before declaring success. This is where iteration time goes.
+- **Dev environment:** `task local` on macOS + Docker Desktop/podman-machine as the agent's runtime. The appx health checker (`AppRunning` TCP dial) already gives the UI deploy status for free.
+
+**Acceptance:** your steps 2–6 work end-to-end on `*.127.0.0.1.sslip.io` locally, including the refinement/redeploy cycle.
+
+### Stage 2 — agent-server inside the outer container, started manually
+Take the Stage 0 image, make it real: install agent-server in it, entrypoint (podman warmup, env-key injection — already supported via `ANTHROPIC_API_KEY` config), volumes for `/workspace` + podman storage, publish `127.0.0.1:4001` + the app port range. Run it via script/compose; point host-run appx at it via `APPX_AGENT_SERVER_URL` — **no appx code changes**, because of D1.
+
+**Acceptance:** the exact Stage 1 e2e passes with agent-server containerized. This isolates "does the nested environment break the flow" from "does appx manage containers correctly".
+
+### Stage 3 — appx creates/supervises the outer container at startup
+- `internal/containerruntime`: interface + docker-CLI implementation + fake; ensure-image / create-if-missing / start / health-wait (poll agent-server `/`), reconcile on appx boot.
+- Config: `APPX_AGENT_CONTAINER=true`, image ref, port range, key passthrough, generated `AGENT_SERVER_TOKEN` (now **mandatory** — the port is published, and OWASP A01 says don't rely on loopback alone once another party could reach it).
+- Egress: outer container needs `--add-host=host.docker.internal:host-gateway` and `HTTPS_PROXY` pointed at the host egress proxy; the proxy must listen on the docker bridge, not just loopback — this is a real change to check in `egress.ProxyAddr`.
+- Rewrite `deploy/`: drop `agent-server.service`, system-setup installs docker, appx.service gains docker group access.
+
+**Acceptance:** fresh VM → bootstrap → appx up → container exists → full e2e.
+
+### Stage 4 — Hardening
+- **Restart semantics:** outer restart kills inner apps (doc limitation #6). Cheapest fix: entrypoint runs `podman start --all` after warmup; appx `AppRunning` already reflects reality in the UI either way.
+- bash-tool `spawnHook` stripping `*_API_KEY` from child env (defense in depth per the architecture doc).
+- `--memory`/`--cpus` on the outer container; idle-runtime eviction only if memory proves a problem.
+- Security review pass (you have a precedent format in `docs/security/`).
+
+## Testing strategy (the cross-service/networking question)
+
+Apply the test pyramid, with an explicit rule for what each layer is allowed to touch:
+
+**1. Unit tests (every PR, no docker, both repos).** Everything behind seams: deployment-metadata plumbing (node:test), port-capped allocation, `containerruntime` logic against a fake CLI runner, client payloads against `httptest` fake agent-server. The repos already do exactly this style — extend, don't invent.
+
+**2. Deterministic infra smoke test — the key idea: bypass the LLM.** The expensive, flaky part of e2e is the agent; but the agent only ever runs bash commands. So script the *exact commands the deploy skill prescribes*:
+- `scripts/smoke-deploy.sh`: start agent-server (host or container per stage) + `appx --http`, create a project via `POST /api/projects`, assert agent-server has it with the right port in `deployment.json`, then deploy a canned hello-world app by running the skill's literal command sequence (via `docker exec` into the outer container in stages 2+), and `curl http://<name>.127.0.0.1.sslip.io:8080` expecting 200. Then redeploy a modified version and assert the change is visible.
+- This validates ports, volumes, proxy chain, registration — everything except LLM judgment — deterministically and in seconds-to-minutes.
+
+**3. Nested-container integration (Linux only, CI nightly or on-demand).** Stages 0/2/3 artifacts get a GitHub Actions job (ubuntu runners are full VMs; `--device /dev/fuse` works there — the podman project itself tests podman-in-docker this way): build outer image, run, podman hello-world inside, full port-chain curl. Don't attempt this on macOS at all; accept that macOS dev covers stage-1-shaped flows and Linux covers nesting.
+
+**4. LLM e2e — manual checklist + one optional scripted "golden prompt".** ("Build a single-page todo app and deploy it.") Asserts HTTP 200 on the subdomain within N minutes. Run before releases, not in CI — it's for catching prompt/skill regressions, and it's the only layer where prompt quality is visible.
+
+The principle threading through: **every networking boundary gets tested by a real connection at exactly one layer, and faked everywhere else.** No mocked-docker unit tests pretending to verify port forwarding, and no LLM in the loop for infrastructure verification.
+
+## Risk register (worth keeping visible)
+
+1. **Nested podman flags on target OS** — mitigated by Stage 0 spike before feature investment.
+2. **Port-range publish overhead** — mitigated by capping at ~100; escalation to in-container proxy is pre-designed (D1).
+3. **macOS/Linux divergence** — accepted and bounded: macOS = flow/prompt dev, Linux = nesting truth.
+4. **Outer restart kills apps** — Stage 4 `podman start --all`; UI already shows honest status.
+5. **Egress proxy reachability from container** — explicitly scoped into Stage 3; easy to forget and "works in dev" (where agent-server is on host).
+6. **Token becomes security-critical at Stage 3** — published port means bearer auth is no longer optional.
+
+---
+
+Rough effort: Stage 0 ~1 day, Stage 1 ~2–3 days (mostly skill iteration), Stage 2 ~1 day, Stage 3 ~2–3 days, Stage 4 ~1–2 days.
+
+Want me to write this up as `docs/plans/` entries in the respective repos (the agent-server contract change and the appx container-supervisor design each deserve a short ADR), and/or create todos for the stages so work can be claimed per-stage?
\ No newline at end of file
diff --git a/docs/architecture/other/orchestrator-comparison.md b/docs/architecture/other/orchestrator-comparison.md
new file mode 100644
index 0000000..2e02d0c
--- /dev/null
+++ b/docs/architecture/other/orchestrator-comparison.md
@@ -0,0 +1,108 @@
+# appx-orchestrator vs. agent-server + appx
+
+**Date:** 2026-06-12
+**Context:** `appx-orchestrator` is a parallel implementation of the same
+`docs/architecture/important/builder-container-architecture.md`. It is further
+along on product surface (template, preview, theme) but stops at the network
+edge. This doc is the concise "what to borrow, what to avoid, how appx closes
+the gap" reference.
+
+Both share the same core: one agent-server process, per-project builder agents,
+one outer container, rootless podman, inner app-only containers. They diverge on
+**how the app reaches the user**, **who owns the app's port**, and **how
+hardened the outer container is**.
+
+---
+
+## 1. Good things worth adopting into agent-server
+
+| Feature | What it is | Adoption note |
+| --- | --- | --- |
+| **App template seeding** | A runnable Next.js app baked into the outer image; copied into `/workspace/<project>` on first `POST /v1/projects`. Projects start as a working app, not an empty dir. | Highest-value borrow. agent-server needs the `templateDir` + `cpSync` seam (orchestrator already has it; ours doesn't yet). |
+| **Iframe preview UX** | Live app embedded next to the chat panel. | The product surface users love. appx makes it work remotely (§3). |
+| **DEV/PROD container split** | Multi-stage template Dockerfile: `dev` (hot-reload, preview) vs `prod` (built, served). | Good lifecycle model; adopt the multi-stage template. |
+| **Theme bridge** | Shell theme propagates into the app via `--app-*` CSS tokens + origin-checked `postMessage`; app can push `appx:start-prompt`. | Nice-to-have polish; adopt once the core loop works. |
+| **Global builder `AGENTS.md`** | Entrypoint installs a shared builder system prompt into `.pi-global/AGENTS.md`. | Complements our planned `deploy-app` skill — use both (prompt for "you're a builder", skill for deploy mechanics). |
+| **Headless backend / client-only UI** | `agent-client` is imported only by the frontend; agent-server has zero UI deps. | We already do this (appx embeds the React UI). Confirms the boundary is right. |
+
+---
+
+## 2. Issues with the orchestrator approach
+
+| # | Issue | Why it bites |
+| --- | --- | --- |
+| 1 | **Dangerously weakened outer container (the security boundary).** `run.sh` uses `--cap-add SYS_ADMIN` + `seccomp=unconfined` + `apparmor=unconfined` (+ `--device /dev/fuse`). | `--cap-add SYS_ADMIN` gives the outer container nearly-root powers over the host kernel (mounting, namespaces — the classic container-escape toolkit). Combined with their disabled seccomp+AppArmor, it badly weakens the very sandbox the whole design depends on — and our spike showed it's unnecessary: a 3-line `newuidmap` fix removes the need for it entirely (§3), with ~2× faster builds too. |
+| 2 | **Localhost-only preview — unusable on a deployed server.** The iframe `src` is `http://127.0.0.1:<port>`, and an iframe URL is resolved by the *viewer's browser*, not the server. | `127.0.0.1` always means "the machine asking" — i.e. the viewer's own laptop. It only works when the browser and the outer container are the **same machine** (a dev laptop). Host orchestrator on a server (e.g. Hetzner) and open it from your laptop: the shell + chat still work (chat is proxied server-side), but the preview iframe asks *your laptop* for `127.0.0.1:<port>`, where nothing is running → blank. Pointing it at the server's IP instead doesn't save it: that's an unauthenticated, non-TLS, port-collision-prone exposure, and an `http://` iframe inside an HTTPS shell is blocked as mixed content. **Net: the orchestrator preview is a single-machine dev tool; it cannot show a hosted app to a remote user.** |
+| 3 | **Agent-authoritative ports + `app.json` discovery.** The agent picks a port from a fixed 3000–3010 range and writes `.pi/app.json`; the shell reads (and must sanitize) that agent-supplied URL. | See §2.3 — fragile, insecure, and inverts the authority we want. ~10 ports also caps concurrent apps and invites collisions. |
+| 4 | **No external exposure.** No reverse proxy, no public URL, no per-user routing. | You can't hand anyone a link to a built app. The "PROD container" in the docs has no mechanism behind it — aspirational. |
+| 5 | **No TLS, no auth in front of apps.** | Any attempt to expose the raw port range is unauthenticated + unencrypted. An HTTPS shell also can't embed an `http://` iframe (mixed-content block). |
+| 6 | **Thin control plane.** The Next.js `app-shell` has no egress control, no durable per-project/user records, no health model beyond a preview ping. | No allowlist on outbound agent traffic; no multi-tenant accounting. |
+
+**Root cause of 2–5:** the preview is built on the assumption that the viewer
+sits at the machine running the app. The moment it's hosted, that assumption is
+false.
+
+### 2.3 Why `app.json` is an anti-pattern for us (the port-authority point)
+
+orchestrator is **agent-authoritative** on the port: the agent chooses it, so
+the shell has no other way to learn it — hence the agent writes `.pi/app.json`
+and the shell reads it back. That forces:
+
+- **Fragility** — routing depends on an LLM correctly writing a file: it can be
+  missing, malformed, or stale after a redeploy.
+- **A security smell** — the control plane consumes **agent-authored data for
+  routing**, so it must sanitize an agent-supplied URL (orchestrator literally
+  has `sanitizePreviewUrl`). If you have to sanitize it, you shouldn't be
+  trusting it as a source of truth.
+- **Inverted authority** — it reintroduces, through the back door, the agent
+  control over ports that a deterministic design removes.
+
+We want the opposite direction of authorship:
+
+> **`deployment.json` (ours): written by the control plane, read by the agent — an instruction.**
+> **`app.json` (theirs): written by the agent, read by the control plane — a discovery.**
+
+In our model **appx allocates the port** and already knows the URL at allocation
+time. There is nothing to discover. The agent's only obligation is to **publish
+the app on the port appx assigned** (`podman run -p <assigned>:<internal>`).
+Readiness — the one useful thing `app.json` gave orchestrator — is covered
+better by appx's own **TCP health check on the assigned port** (the existing
+`AppRunning`): deterministic, ground-truth, agent-independent. So `app.json` is
+dropped entirely.
+
+---
+
+## 3. How appx unblocks each issue
+
+appx is the **public edge + control plane** that orchestrator lacks.
+
+| Issue (§2) | appx capability that resolves it |
+| --- | --- |
+| 1 — weakened outer container | Our **Stage 0 spike** outer image: file-cap `newuidmap` (drops `SYS_ADMIN`), **tailored seccomp** (replaces `unconfined`), **native overlay** (drops `/dev/fuse`, ~2× faster). Restores all three sandbox barriers — strictly more secure + faster. Adopt into the outer image; orchestrator's builder should too. |
+| 2 — localhost-only preview | **Subdomain reverse proxy.** Each project gets `https://<project>.<domain>`, resolved server-side to the inner container's loopback port → the iframe loads from **any** browser, anywhere. |
+| 3 — agent ports + app.json | appx **allocates a dedicated port per project** (durable store) and **pushes it down** as `deployment.json` + prompt context. The agent *must* publish on that port; it never chooses one. No agent-written routing file; readiness via appx's port health check. Stable, collision-free, deterministic URLs. |
+| 4 — no external exposure | appx **is** the public origin. It routes `Host: <project>.<domain>` → `127.0.0.1:<assignedPort>` inside the host. "PROD" stops being aspirational: the subdomain *is* the production surface. |
+| 5 — no TLS/auth | appx **terminates TLS** (Let's Encrypt via CertMagic) and runs **auth middleware in front of every app and the chat**. HTTPS end-to-end means the iframe embeds cleanly; auth means apps aren't open to the world. |
+| 6 — thin control plane | appx adds the **egress allowlist proxy**, durable per-project records, password auth, and a real health model. |
+
+**Iframe caveat appx turns into a non-issue:** because appx is the doorman in
+front of every app, it can set/strip `X-Frame-Options` / CSP `frame-ancestors`
+at the proxy, so framing works over HTTPS regardless of what the generated app
+emits.
+
+---
+
+## 4. Target shape (the synthesis)
+
+> Take orchestrator's **product surface** — template seeding, iframe preview,
+> DEV/PROD split, theme bridge — and run it on **our agent-server**, with **appx
+> providing the edge** (subdomain + TLS + auth + **deterministic port
+> allocation**) and the **hardened outer container** from the Stage 0 spike.
+
+Result: *build on a server, view the live app from anywhere — embedded in an
+iframe or at its own URL — securely.* That is precisely what orchestrator cannot
+do today and what the appx control plane exists to enable.
+
+**Port authority — decided, not open:** appx allocates → the agent must bind the
+app to that port via podman → appx routes the subdomain to it and health-checks
+it. No agent-authored `app.json`; the control plane owns the address end to end.
diff --git a/docs/architecture/other/sidecar-vs-orchestrator.md b/docs/architecture/other/sidecar-vs-orchestrator.md
new file mode 100644
index 0000000..83e0f87
--- /dev/null
+++ b/docs/architecture/other/sidecar-vs-orchestrator.md
@@ -0,0 +1,90 @@
+I now have a clear picture of both. Note: the sidecar app (`create-appx-app`) actually _consumes_ `@appx/agent-server` — but it wires it in a fundamentally different **topology** than what agent-server was designed for. Here's the comparison.
+
+## First, what the two approaches actually are
+
+**Approach A — Sidecar (create-appx-app):** The builder is _embedded inside the app it builds_. One app = one agent-server pointed at `PROJECT_DIR = its own repo`. The preview pane and the chat dock are sibling React components in the _same_ running app, and the agent edits the _same source files_ that are being served. 1:1:1 — one app, one agent, one project dir.
+
+**Approach B — Orchestrator (agent-server's intended design):** One long-lived agent-server process is a _central control plane_ that manages **many** isolated projects. `ProjectRegistry` → lazily-built `ProjectRuntime` per project → `Map<sessionId, ProjectSession>`. The builder is **separate** from the apps it builds; agents modify code in `WORKSPACE_DIR/<id>/` and spin up the actual apps as **inner containers** (rootless podman) inside an unprivileged outer container. The builder never _is_ the app.
+
+The key structural difference: in A the agent edits the process that hosts the agent UI; in B the agent and the app it produces are different processes (indeed different containers).
+
+---
+
+## Dimension-by-dimension
+
+### 1. Isolation & security
+
+- **Sidecar:** Weak. Agent runs with `PROJECT_DIR` = the live app repo, same filesystem, same trust zone as the served UI. LLM-generated code and the control plane share one process boundary. Fine for a trusted single developer locally; not safe for running untrusted generated code or hostile prompts.
+- **Orchestrator:** Strong, by design. Three trust zones (host / trusted outer container with creds / untrusted inner containers running LLM code with **no creds**). Generated apps run nested in podman, can't touch the host, never see LLM keys. This is the whole point of `builder-container-architecture.md`.
+
+**Winner: Orchestrator** (clear, for anything beyond local single-user dev).
+
+### 2. Preview / feedback loop
+
+- **Sidecar:** Tightest possible loop. Agent writes a file → Next.js Fast Refresh → preview updates in-place in milliseconds, _in the same window as the chat_. No build, no container, no port allocation. This is the "build and preview simultaneously" magic.
+- **Orchestrator:** Looser. Agent runs `podman build` + `podman run`, allocates a port, you hit the app on a separate URL. Seconds-to-minutes per iteration, and preview is a separate surface from the builder UI.
+
+**Winner: Sidecar** (for iteration speed and the unified build-while-previewing UX).
+
+### 3. Multi-tenancy & scale
+
+- **Sidecar:** None. One app, one agent. To build N apps you run N independent app+agent pairs, each with its own credentials wiring. No shared model registry, no central registry of projects.
+- **Orchestrator:** Built for this. Durable `projects.json` registry, lazy per-project runtimes, **one shared `AuthStorage`/`ModelRegistry`** across all projects (set keys once at boot), idempotent project creation, restart-safe via the mounted volume.
+
+**Winner: Orchestrator.**
+
+### 4. Blast radius / self-modification risk
+
+- **Sidecar:** The dangerous one. The agent edits the _same code that renders the agent's own chat UI_. As we discussed, an edit to `app-shell.tsx`, `layout.tsx`, or `pi-runtime-provider.tsx` can break the refresh boundary or introduce a syntax error that takes down the builder UI itself — a chicken-and-egg failure. The `AGENTS.md` prompt explicitly begs the agent to "keep the chat shell working," which is a soft guardrail around a hard structural hazard.
+- **Orchestrator:** The builder control plane is physically separate from the built app. A broken generated app crashes its inner container; the agent-server, its UI, and other projects are untouched. (Caveat: all projects share _one_ agent-server process, so an agent-server bug — not an app bug — affects everyone. Documented limitation #3.)
+
+**Winner: Orchestrator** for control-plane safety.
+
+### 5. Credentials handling
+
+- **Sidecar:** Token injected server-side by the Next proxy (`AGENT_SERVER_TOKEN`), keys via env / LiteLLM extension. Decent, but the agent and the LLM keys live in the same trust zone as the served app.
+- **Orchestrator:** Keys live only in agent-server's heap in the trusted outer container; inner app containers never receive them unless explicitly passed; defense-in-depth via bash `spawnHook`. Cleaner separation.
+
+**Winner: Orchestrator.**
+
+### 6. Simplicity & developer experience
+
+- **Sidecar:** Dead simple to grasp and run: `pnpm agent:dev` + `pnpm dev`, or one Docker container with a bind mount. No podman, no nested containers, no port broker, no project registry. The repo _is_ the template — clone, point the agent at it, go. Great for "scaffold me an app and let me watch it form."
+- **Orchestrator:** More moving parts: outer container, rootless podman, FUSE device, port allocation, project provisioning, registry persistence. More to operate and debug.
+
+**Winner: Sidecar** for low-friction local/single-app use.
+
+### 7. Resource management & failure modes
+
+- **Sidecar:** No shared-resource contention because there's nothing shared — but also no limits and no orchestration; you manage each pair by hand.
+- **Orchestrator:** Central place to apply `--memory`/`--cpus`, idle-eviction sweeps, disk quotas. But projects share the outer container's CPU/mem/disk, so a runaway build in one project can starve another (documented limitations #1, #2).
+
+**Roughly even** — different trade-offs.
+
+### 8. Deployment lifecycle
+
+- **Sidecar:** There isn't really one — the "app" is the dev server. Great for prototyping, awkward as a path to a deployable artifact (you'd graduate it out of the sidecar shell).
+- **Orchestrator:** Produces real container images via podman — a natural path to deploy/scale (escalates cleanly to per-user containers, k8s pods, Sysbox/microVMs per the escalation table).
+
+**Winner: Orchestrator** for productionization.
+
+---
+
+## How to think about it
+
+They're not really competitors — they're optimized for different points in the lifecycle, and the sidecar is essentially **one constrained configuration** of the orchestrator (single project, `projectDir` = the app, no inner containers, hot-reload instead of podman):
+
+|                      | Sidecar (create-appx-app)                       | Orchestrator (agent-server)                 |
+| -------------------- | ----------------------------------------------- | ------------------------------------------- |
+| Best for             | Single dev, single app, fast in-place iteration | Many projects, isolation, deployable apps   |
+| Preview              | Same window, instant Fast Refresh               | Separate URL, container build               |
+| Isolation            | Same process/trust zone                         | Outer + nested inner containers             |
+| Multi-project        | No                                              | Yes (registry, shared auth)                 |
+| Control-plane safety | Agent can break its own UI                      | Builder separate from built app             |
+| Credentials          | Same zone as served app                         | Isolated in outer container heap            |
+| Ops complexity       | Minimal                                         | Higher (podman, ports, registry)            |
+| Scale path           | Re-architect                                    | Layered escalation (per-user, k8s, microVM) |
+
+**Bottom line:** The sidecar buys an unbeatable build-while-previewing feedback loop and trivial setup, at the cost of isolation, multi-tenancy, and the structural hazard of an agent that can crash its own UI. The orchestrator buys security boundaries, multi-project scale, shared credentials, deployable artifacts, and a clean separation between builder and built — at the cost of a slower loop and more operational machinery.
+
+A pragmatic product often wants **both**: the orchestrator as the backbone (isolation, multi-project, deploy), with a sidecar-style hot-reload preview surface bolted on for the inner-loop UX during active editing.
diff --git a/docs/superpowers/plans/2026-05-27-credentials-extraction.md b/docs/plans/2026-05-27-credentials-extraction.md
similarity index 100%
rename from docs/superpowers/plans/2026-05-27-credentials-extraction.md
rename to docs/plans/2026-05-27-credentials-extraction.md
diff --git a/docs/superpowers/plans/2026-06-02-pi-conventions-alignment.md b/docs/plans/2026-06-02-pi-conventions-alignment.md
similarity index 100%
rename from docs/superpowers/plans/2026-06-02-pi-conventions-alignment.md
rename to docs/plans/2026-06-02-pi-conventions-alignment.md
diff --git a/docs/plans/builder-containers-plan.md b/docs/plans/builder-containers-plan.md
new file mode 100644
index 0000000..ff5bc1d
--- /dev/null
+++ b/docs/plans/builder-containers-plan.md
@@ -0,0 +1,306 @@
+# Plan: Containerised Apps — agent-server Side
+
+**Date:** 2026-06-11
+**Status:** Draft
+**Scope:** Deployment metadata contract (dev + prod), app template seeding, two-container (dev/prod) deploy model, builder deploy skill/prompt, outer container image (nested rootless podman), smoke tests
+**Canonical architecture:** `docs/architecture/important/builder-container-architecture.md`
+**Sibling plan:** appx repo, `docs/plans/phase_9_plan.md` (control plane: port allocation, container supervision, subdomain routing)
+
+---
+
+## Goal
+
+Implement agent-server's half of the containerised apps architecture:
+
+1. appx starts ONE outer container at boot (agent-server + rootless podman inside).
+2. User creates a project in the appx UI; appx allocates **two ports** (a DEV and a PROD port) and registers the project here **with deployment metadata** (both ports + their public URLs).
+3. New projects are **seeded from a baked-in app template**, so they start as a runnable, containerised app (a lean multi-stage Dockerfile — no framework dev-server).
+4. The builder agent builds **one image** and runs it as **two inner podman containers** — DEV (iterate) and PROD (stable/shared) — each publishing its reserved port. DEV and PROD are the **same build** ("what you see is what ships").
+5. The user iterates against the DEV URL; refinements rebuild + redeploy DEV. When happy, the agent **promotes** (rebuilds PROD from current source). Both are real `https://…<domain>` URLs exposed by appx.
+
+## Division of labour
+
+| Concern | Owner |
+|---|---|
+| Port allocation (**two ports/project**), subdomain proxy (**dev + prod**), outer container lifecycle, host deploy scripts | appx |
+| Project registry + deployment metadata persistence, surfacing metadata to the agent, deploy skill, outer container **image**, podman-in-container validation | agent-server (this plan) |
+
+agent-server stays appx-agnostic: it receives a generic `deployment` object (dev + prod `{port, url}` pairs) on project create and makes it available to the agent. It never knows how appx mints ports or subdomains — only that two pairs were handed to it.
+
+> **appx-side implication (track in `phase_9_plan.md`):** appx must allocate a **pair** of ports per project and route **two** subdomains (prod `…<domain>`, dev e.g. `…-dev.<domain>`). The 100-port publish cap therefore means ~50 projects, not 100 — revisit the cap there.
+
+---
+
+## Design decisions
+
+### D1 — Deployment metadata rides on `POST /v1/projects` (dev + prod)
+
+Extend the create-project contract with an optional `deployment` object carrying
+**two environments**, each a `{port, url}` pair:
+
+```jsonc
+POST /v1/projects
+{
+  "name": "eventx",
+  "deployment": {
+    "dev":  { "port": 10006, "url": "https://eventx-dev.example.com" },
+    "prod": { "port": 10007, "url": "https://eventx.example.com" }
+  }
+}
+```
+
+- Both `dev` and `prod` (and their fields) are optional, so a single-port local dev setup or a partial registration is still valid.
+- Persisted on the `ProjectRecord` in `projects.json`.
+- **Idempotent re-POST with the same name updates `deployment`** (today it returns the existing record untouched). This lets appx's startup reconcile heal drift — e.g. a project that predates this feature gets its ports on the next boot.
+- Returned by `GET /v1/projects` / `GET /v1/projects/:id` so the control plane can verify registration.
+- agent-server never *reads a port back* from the agent — the pairs flow one way (appx → metadata → agent). Readiness is appx's concern (a health check on each port), never an agent-authored file. (Contrast: orchestrator's `.pi/app.json` — see `docs/architecture/other/orchestrator-comparison.md` §2.3.)
+
+### D2 — Metadata is surfaced to the agent as file + prompt section
+
+Two mechanisms, both generated from the same record:
+
+1. **`<projectDir>/.pi/deployment.json`** — materialised on create/update. The agent can `cat` it any time; it survives context compaction and session restarts.
+2. **System prompt injection** — `ProjectRuntime` appends a short generated "Deployment" section to the resolved system prompt when metadata exists (after the `.pi/AGENTS.md` content, never replacing it):
+
+```
+## Deployment
+This project runs as TWO containers from the SAME build (two instances, not two builds):
+- DEV  (iterate here):   host port 10006 → https://eventx-dev.example.com   (container <project>-app-dev)
+- PROD (stable, shared): host port 10007 → https://eventx.example.com       (container <project>-app-prod)
+Refinements rebuild + redeploy DEV; PROD changes only when you "promote".
+The app listens on its container port; map it with -p <reserved host port>:<container port>.
+Container runtime: podman. See the deploy-app skill for build/run/redeploy/promote conventions.
+Machine-readable copy: .pi/deployment.json
+```
+
+File-only would risk the agent never reading it; prompt-only would risk loss on compaction. Both is cheap.
+
+### D3 — Container runtime for the agent is env config, not hardcoded
+
+`APP_CONTAINER_RUNTIME` env var (default `podman`), threaded into the prompt section and the skill. In the outer container it is always `podman`; in local macOS dev (Stage 1, agent-server on host) it may be `docker` (Docker Desktop) or podman-machine. The skill references `$APP_CONTAINER_RUNTIME` so Stage 1 prompt iteration transfers untouched to the nested setup.
+
+### D4 — Deploy conventions live in a skill, not only in AGENTS.md
+
+Ship a `deploy-app` skill in this repo (`skills/deploy-app/SKILL.md`), loaded via `PI_SKILL_PATHS` in the outer image. Skills are versioned with agent-server, independent of any one project's `.pi/`, and the prompt section stays short (conventions load only when the agent deploys).
+
+### D5 — New projects are seeded from a baked-in app template
+
+`POST /v1/projects` copies a configured template dir into a freshly-created
+`WORKSPACE_DIR/{id}/` (only when the dir did not already exist; existing projects
+are untouched). A seeded, runnable starting point means the deploy skill's
+`build` + `run` commands work from the very first prompt instead of
+relying on the LLM to scaffold a correct app + Dockerfile from scratch. The
+**seeding mechanism** (a `templateDir` + recursive copy with a skip filter for
+`node_modules`/build output) is lifted from appx-orchestrator (comparison §1).
+
+- Config: `APPX_TEMPLATE_DIR` (absent ⇒ projects start empty, as today).
+- The outer image bakes the template at a fixed path and points `APPX_TEMPLATE_DIR` at it.
+
+**The template *content* is deliberately undecided.** We do not yet know the
+target stack — it may not even be a JS frontend (a Python backend, a full-stack
+framework, or several selectable templates are all on the table). Consequences:
+
+- **Start minimal:** a small **Vite SPA** is the provisional default, not Next.js
+  — fewer moving parts, faster builds under nested podman, easier to reason about
+  while the core loop is proven. Treat it as a throwaway placeholder, not a commitment.
+- **No framework assumptions leak outward.** agent-server, the deployment metadata,
+  and the deploy skill stay stack-agnostic — the only contract is "a Dockerfile
+  that builds a lean image serving on a port." Swapping the
+  template, or supporting several, must require zero changes outside `APPX_TEMPLATE_DIR`.
+- **We author the template's Dockerfile; we don't copy orchestrator's.**
+  Orchestrator's is a useful *structural* reference for the multi-stage shape
+  (deps → build → lean runtime), but its final stage ships the whole build tree
+  (dev deps + source) and runs as root — not optimal. Ours has a lean runtime
+  stage and a non-root user. The inner-app spike (`container/INNER-APP-SPIKE.md`)
+  **proved this builds and serves under nested rootless podman + native overlay**
+  (a 64 MB non-root Vite image; a Python app worked identically).
+
+### D6 — Two containers per app: DEV and PROD (same build, two instances)
+
+Each project deploys as two inner containers built from the **same Dockerfile**
+(one build target — no dev/prod divergence), on the two ports appx allocated (D1):
+
+| Env | Container | Image | Host port | Purpose |
+|---|---|---|---|---|
+| DEV  | `<project>-app-dev`  | `<project>-app:dev`  | dev port  | What the user iterates against; rebuilt + redeployed on each refinement. |
+| PROD | `<project>-app-prod` | `<project>-app:prod` | prod port | The stable/shared URL; rebuilt only on **promote**. |
+
+- **DEV is built exactly like PROD** (real built image, real server) — no
+  framework dev-server, no hot-reload. "What you see in DEV is what ships," and
+  every stack is treated uniformly (build an image, run it). This deliberately
+  keeps app-specific dev-server quirks (Vite `allowedHosts`, HMR WebSockets) out
+  of the template, the skill, **and** appx. See *Potential improvements* for the
+  hot-reload escape hatch if rebuild latency ever bites.
+- **Refinement loop (goal step 5):** rebuild the image and redeploy **DEV only**
+  (~seconds; the spike measured ~0.7 s warm rebuild + a fast `rm`/`run`). PROD's
+  URL stays stable while the user iterates.
+- **Promote** = rebuild PROD from current source and restart its container, so
+  PROD matches what the user approved in DEV.
+- Two image tags (`:dev`/`:prod`) keep the instances independent snapshots even
+  though they come from one Dockerfile.
+
+---
+
+## Staging (shared with appx plan)
+
+| Stage | What | Repo focus |
+|---|---|---|
+| 0 | Nested rootless podman spike (timeboxed ~1 day) | agent-server |
+| 1 | Full user flow with agent-server **on host** ("podman without outer container") | both |
+| 2 | agent-server inside the outer container, started manually | agent-server |
+| 3 | appx creates/supervises the outer container at startup | appx |
+| 4 | Hardening (restarts, key stripping, resource limits) | both |
+
+Rationale: the user-visible flow (Stage 1) is ~80% of the value and is independent of the outer container; the outer container is packaging. The Stage 0 spike de-risks the one thing that could invalidate Stage 1 decisions — nested podman flag fragility ("works on host, breaks nested").
+
+---
+
+## Stage 0 — Nested-podman spike ✅ DONE
+
+Validated on a Linux host. Artifacts committed under `container/`; full writeup in
+`container/SPIKE-FINDINGS.md`. Nested rootless podman works **unprivileged** with
+hardened host defaults intact.
+
+**Proven outer-container recipe (supersedes the old draft flags):**
+- **No `--privileged`, no `--cap-add SYS_ADMIN`**; the outer process runs as uid 1000.
+- The linchpin fix: repackage `newuidmap`/`newgidmap` with **file capabilities** (not setuid-root) — this, not any host sysctl, is what makes nested userns work.
+- Required `docker run` flags (each deletion-tested): `--device /dev/net/tun`, `--security-opt seccomp=container/seccomp-builder.json` (tailored, **stricter than `unconfined`**), `--security-opt apparmor=unconfined` (TODO: tailor), `--security-opt systempaths=unconfined`.
+- **Native rootless overlay** storage — `/dev/fuse` dropped, ~2× faster builds.
+- Restart recovery: entrypoint wipes stale `XDG_RUNTIME_DIR` runtime state, then `podman start --all` cleanly resurrects inner containers (informs Stage 4).
+- No host changes needed beyond installing docker.
+
+**Committed:** `container/{Dockerfile, run-outer.sh, entrypoint.sh, smoke.sh, seccomp-builder.json, gen-seccomp.sh, SPIKE-FINDINGS.md}`. Stages 2+ transcribe this recipe verbatim. One caveat: re-verify on a genuine Ubuntu 24.04 host (the spike box was 26.04 / kernel 7.0; the in-image podman target is 24.04).
+
+---
+
+## Stage 1 — Deployment metadata + deploy skill (agent-server on host)
+
+### Contract & registry
+
+- [ ] `src/contract`: add `deployment` (optional `{ dev?: {port?, url?}; prod?: {port?, url?} }`) to the create-project request and the `ProjectInfo` response schemas; regenerate `openapi.json`
+- [ ] `src/runtime/projectStore.ts`: `ProjectRecord` gains optional `deployment`; loader tolerates records without it (backward compatible)
+- [ ] `src/runtime/projectRegistry.ts`:
+  - `createProject({ name, deployment })` persists metadata; **same-name re-POST updates `deployment`** and rewrites the materialised file
+  - materialise `<projectDir>/.pi/deployment.json` (pretty-printed, stable key order) on create/update
+  - **template seeding (D5):** when the project dir is created fresh and `APPX_TEMPLATE_DIR` is set, recursively copy it in (skip `node_modules`/`.next`/`dist`/caches); leave existing dirs untouched. Lift orchestrator's `cpSync` + filter implementation
+- [ ] `src/http/projectsRoutes.ts`: accept/return the new field; validation: each present port must be an integer in 1024–65535 (reject privileged/garbage values at the boundary — fail fast)
+- [ ] `src/config.ts`: add `APPX_TEMPLATE_DIR` (optional; absent ⇒ no seeding)
+
+### Runtime / prompt
+
+- [ ] `src/config.ts`: add `APP_CONTAINER_RUNTIME` (default `"podman"`), validated non-empty string
+- [ ] `src/runtime/projectRuntime.ts`: extend `resolveSystemPrompt` (or a sibling helper) to append the generated Deployment section when the project has metadata. Keep generation in one pure function (`buildDeploymentPromptSection(deployment, containerRuntime)`) so it is unit-testable without a runtime
+
+### Deploy skill
+
+- [ ] `skills/deploy-app/SKILL.md` with the conventions (DEV + PROD, per D6 — same build, two instances):
+  - read `.pi/deployment.json` for the dev/prod ports and URLs
+  - DEV (refine): `$APP_CONTAINER_RUNTIME build -t <project>-app:dev .` → `run -d --name <project>-app-dev -p <devPort>:<containerPort> <project>-app:dev`
+  - PROD (promote): `$APP_CONTAINER_RUNTIME build -t <project>-app:prod .` → `run -d --name <project>-app-prod -p <prodPort>:<containerPort> <project>-app:prod`
+  - no `--target`: the template's Dockerfile has one final (lean, non-root) image; DEV and PROD differ only by tag/instance/port
+  - redeploy: `stop && rm && build && run` under the same `--name` (idempotent; never accumulate containers); refinements rebuild **DEV only**, promote rebuilds PROD
+  - `<containerPort>` is a template detail (e.g. 8080); always map `-p <reserved host port>:<containerPort>`, never assume they're equal
+  - multi-container apps (db etc.): suffix names `<project>-db`, only the app publishes the reserved port(s); inter-container traffic via a `<project>` podman network
+  - health check before declaring success: `curl -fsS 127.0.0.1:<port>` with retries; report the relevant public URL to the user
+  - **never** pass `*_API_KEY` env vars into app containers
+- [ ] Wire the skill into local dev runs via `PI_SKILL_PATHS` (document in README); the outer image bakes it in at Stage 2
+
+### Tests (Stage 1)
+
+- [ ] `test/projectLifecycle.test.ts`: deployment metadata (dev+prod) round-trips create → get → list; re-POST same name updates it; `.pi/deployment.json` written and rewritten; absent metadata ⇒ no file, no prompt section; **template seeding** copies into a fresh dir and skips an existing one
+- [ ] New `test/deploymentPrompt.test.ts`: `buildDeploymentPromptSection` output for dev-only / prod-only / both / absent metadata
+- [ ] Manual e2e (with appx running locally — see appx plan): create project in UI (seeded template runs immediately) → prompt a small change → DEV URL updates → promote → PROD URL reflects it. This is where skill iteration happens.
+
+**Acceptance:** the full create → deploy → view → refine → redeploy loop works locally with agent-server run via `npm run dev` and Docker Desktop/podman as `APP_CONTAINER_RUNTIME`.
+
+---
+
+## Stage 2 — Outer container image
+
+Promote the **committed Stage 0 artifacts** (`container/Dockerfile`,
+`run-outer.sh`, `entrypoint.sh`, `seccomp-builder.json`) from "keeps the container
+alive for exec" to "runs agent-server". Keep the proven flag set and the
+`newuidmap` file-cap + native-overlay fixes **verbatim** — do not reintroduce
+`/dev/fuse`, `SYS_ADMIN`, or `seccomp=unconfined`.
+
+- [ ] `container/Dockerfile` — extend the spike image:
+  - **multi-stage build** (lift orchestrator's pattern): a Node build stage that compiles agent-server, then copy the pruned runtime into the spike's ubuntu:24.04 stage (e.g. `npm ci && build` then copy `dist/` + production deps; orchestrator uses `pnpm deploy --prod /app`)
+  - keep the spike's rootless-podman setup (file-cap helpers, native-overlay `storage.conf`, `containers.conf`, subuid/subgid) unchanged
+  - bake `skills/deploy-app` at a fixed path; set `PI_SKILL_PATHS`
+  - bake the **app template** (provisional: a minimal Vite SPA, see D5 — lean multi-stage, single runtime target, non-root) at a fixed path; set `APPX_TEMPLATE_DIR`. `container-smoke.sh` builds it under nested rootless podman (proven in the inner-app spike; the smoke guards against regression)
+- [ ] `container/entrypoint.sh` — extend the spike entrypoint:
+  - keep the stale-runtime-state wipe + `podman info` warmup
+  - replace `sleep infinity` with agent-server (env: `WORKSPACE_DIR=/workspace`, `ANTHROPIC_API_KEY`, `AGENT_SERVER_TOKEN`, `APP_CONTAINER_RUNTIME=podman`, `APPX_TEMPLATE_DIR=...`, `AGENT_SERVER_HOST=0.0.0.0` — the container boundary takes over loopback's role; the **publish** stays loopback-only on the host side)
+- [ ] `container/run-outer.sh` — extend the spike script:
+  - add `-p 127.0.0.1:4001:4001` (API) alongside the existing app-port range publish (now a **pair-aware** range; see appx plan for the revised cap given two ports/project)
+  - keep volumes (workspace + named podman-storage volume) and the proven security flags
+- [ ] Run the **same Stage 1 manual e2e** with host-run appx pointed at the container via `APPX_AGENT_SERVER_URL=http://127.0.0.1:4001` — zero appx code changes expected
+
+### Tests (Stage 2)
+
+- [ ] `scripts/container-smoke.sh` (Linux): build image → run → poll `GET /` until healthy → `POST /v1/projects` with deployment metadata → assert `deployment.json` inside the container → `docker exec` the skill's literal command sequence to build **the seeded template** once and run it as DEV + PROD instances on the two ports (a realistic multi-stage build under nested rootless podman — not just nginx) → `curl 127.0.0.1:<devPort>` and `<prodPort>` from the host → restart outer container → assert registry + workspace survived.
+  This deliberately **bypasses the LLM**: the agent only ever runs bash commands, so executing the skill's exact commands validates all infrastructure (ports, volumes, nesting) deterministically.
+- [ ] CI: nightly/on-demand GitHub Actions job (ubuntu runners are full VMs; `--device /dev/fuse` works there) running `container-smoke.sh`
+
+**Acceptance:** Stage 1 e2e passes with agent-server containerised; `container-smoke.sh` green on Linux.
+
+---
+
+## Stage 4 — Hardening (agent-server items)
+
+(Stage 3 is appx-side; see sibling plan.)
+
+- [ ] Entrypoint resurrects inner apps after an outer restart: **wipe stale `XDG_RUNTIME_DIR` runtime state first**, then `podman start --all` (the spike proved bare `podman start --all` fails without the wipe; `entrypoint.sh` already does this — confirm it covers both DEV and PROD containers). Architecture doc limitation #6
+- [ ] Bash tool `spawnHook` strips `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` / `*_API_KEY` from child process env — defence in depth so keys can't leak into `podman run -e`-style invocations even by accident (OWASP secrets-management alignment; keys live in the process heap, not child envs)
+- [ ] Optional golden-prompt LLM smoke (manual, pre-release): "build a single-page todo app and deploy it" → assert HTTP 200 on the reserved port within N minutes. Catches prompt/skill regressions; not CI
+
+---
+
+## Testing strategy summary
+
+Every networking boundary is tested by a real connection at exactly one layer and faked everywhere else:
+
+| Layer | What | Where |
+|---|---|---|
+| Unit (`node:test`, every PR) | metadata round-trip, prompt section builder, config validation | this repo |
+| Infra smoke (deterministic, no LLM) | full port/volume/nesting chain via skill's literal commands | `scripts/container-smoke.sh`, Linux CI |
+| Cross-service smoke | appx ↔ agent-server ↔ subdomain chain | appx repo, `scripts/smoke-deploy.sh` |
+| LLM e2e | prompt/skill quality | manual golden-prompt checklist |
+
+## Potential improvements (deferred — not v1)
+
+Validated or low-risk upgrades we defer to keep v1 simple and uniform. None
+require app-specific logic in appx.
+
+### Hot-reload DEV (instant refinements)
+
+The inner-app spike (`container/INNER-APP-SPIKE.md`, T3) **proved** a faster
+refinement loop is feasible: bind-mount the project dir into the DEV container
+(`-v <projectDir>:/app` plus an anonymous `-v /app/node_modules` so the mount
+doesn't shadow installed deps) and run the framework's dev server. The agent then
+edits files in `/workspace` and the running DEV app **hot-reloads with no rebuild
+or redeploy** — HMR fired across the mount on native overlay, no polling needed.
+
+Deferred because it breaks v1's uniformity:
+- It's **framework-specific** (needs a dev server with HMR; a built static app or
+  a Python service has no equivalent), so it can't be the universal model.
+- It reintroduces dev-server quirks the template + skill must handle — Vite's
+  `server.allowedHosts` must include the project's dev subdomain (fed via env from
+  `.pi/deployment.json`), and the dev server's HMR WebSocket must traverse appx's
+  subdomain proxy.
+
+Safe to add later because it needs **no appx change specific to it**:
+`allowedHosts` lives in the template + skill; WebSocket forwarding is a generic
+proxy capability appx needs for user apps regardless. Adopt per-template if the
+rebuild-redeploy latency (a few seconds) proves to be real friction.
+
+## Risks
+
+1. **Nested podman flags on target OS** — retired by Stage 0 (proven recipe committed); only residual is re-verifying on a genuine Ubuntu 24.04 host.
+2. **"Works on host, breaks nested"** — mitigated by D3 (`APP_CONTAINER_RUNTIME`) + skill conventions written against `deployment.json`, not host assumptions.
+3. **Skill quality** — the only part needing real-LLM iteration; isolated in Stage 1 where the feedback loop is fastest (no containers in the way).
+4. **Outer restart kills inner apps** — addressed in Stage 4 (stale-state wipe + `podman start --all`); appx UI already shows honest per-port health.
+5. **Two ports/project halves project density** under appx's published-port cap and doubles subdomains — tracked in `phase_9_plan.md`; revisit the cap.
+6. **Refinement latency** — dev=prod means every refinement is a rebuild + redeploy (~seconds, not instant). Accepted for v1; hot-reload (see *Potential improvements*) is the escape hatch and needs no appx change.
+
+(Realistic multi-stage builds under nesting — once a risk — are now **validated** by `container/INNER-APP-SPIKE.md`: dev+prod instances on two ports, redeploy with layer cache, and a Python app all worked unprivileged; Stage 2 smoke guards against regression.)
diff --git a/docs/superpowers/plans/project-runtime-and-session-split.md b/docs/plans/project-runtime-and-session-split.md
similarity index 100%
rename from docs/superpowers/plans/project-runtime-and-session-split.md
rename to docs/plans/project-runtime-and-session-split.md
diff --git a/docs/plans/stage0-spike-brief.md b/docs/plans/stage0-spike-brief.md
new file mode 100644
index 0000000..a0ddd0a
--- /dev/null
+++ b/docs/plans/stage0-spike-brief.md
@@ -0,0 +1,181 @@
+# Stage 0 Spike Brief — Nested Rootless Podman ("Outer Builder Container")
+
+**Date:** 2026-06-11
+**Parent plan:** `docs/plans/builder-containers-plan.md` (Stage 0)
+**Architecture reference:** `docs/architecture/important/builder-container-architecture.md`
+**Background reading:** `docs/misc/other/rootless-podman-isolation.md` (the untested draft this spike validates)
+
+This document has two audiences:
+
+- **Section 0** is the runbook for the human operator preparing the box.
+- **Sections 1+** are the brief for the coding agent executing the spike.
+
+---
+
+## 0. Operator runbook (human — do this before handing off)
+
+Target: a throwaway Linux cloud VM. **Ubuntu 24.04** (it is the assumed production host OS and ships the strictest user-namespace defaults — if the spike passes here, easier distros are free). Minimum 2 vCPU / 4 GB RAM / 40 GB disk; see the hardware discussion in the parent plan thread.
+
+```bash
+# ── as root on the fresh server ──────────────────────────────────────────────
+apt-get update && apt-get install -y git curl rsync tmux jq
+curl -fsSL https://get.docker.com | sh          # Docker CE from the official repo
+curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && apt-get install -y nodejs
+
+# Work user for the coding agent. docker group is root-equivalent, and the
+# agent additionally gets passwordless sudo because T2 requires testing
+# host-level mitigations (sysctls, AppArmor profiles). Acceptable ONLY because
+# this box is throwaway and holds nothing but the spike + a disposable API key.
+adduser --disabled-password --gecos "" spike
+usermod -aG docker spike
+echo 'spike ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/spike && chmod 0440 /etc/sudoers.d/spike
+
+# SSH access for spike: the account has NO password (--disabled-password locks
+# it), so reuse the key Hetzner provisioned for root.
+mkdir -p /home/spike/.ssh
+cp /root/.ssh/authorized_keys /home/spike/.ssh/authorized_keys
+chown -R spike:spike /home/spike/.ssh
+chmod 700 /home/spike/.ssh && chmod 600 /home/spike/.ssh/authorized_keys
+
+# Swap (mandatory on a 4 GB box; harmless on bigger ones)
+fallocate -l 4G /swapfile && chmod 600 /swapfile && mkswap /swapfile && swapon /swapfile
+echo '/swapfile none swap sw 0 0' >> /etc/fstab
+
+# ── ship the repo: public repo → plain HTTPS clone, no credentials needed ────
+# First, from your laptop: commit + push the spike files (container/, docs/plans/)
+# so the clone includes them. Then, as spike on the box:
+git clone https://github.com/appx-org/agent-server.git ~/agent-server
+cd ~/agent-server && git switch -c stage0-spike
+git config user.name "stage0 spike agent" && git config user.email spike@localhost
+# Deliberately NO push credentials on the box (the agent has sudo); results
+# come back via git-over-SSH fetch from the laptop — see acceptance below.
+
+# ── as spike user: install the coding agent + a DISPOSABLE API key ──────────
+ssh spike@<SERVER_IP>
+npm config set prefix ~/.npm-global && echo 'export PATH=$HOME/.npm-global/bin:$PATH' >> ~/.bashrc
+npm install -g @earendil-works/pi-coding-agent     # or however you install pi
+echo 'export ANTHROPIC_API_KEY=sk-ant-...' >> ~/.bashrc   # fresh key, revoke after spike
+source ~/.bashrc
+
+# ── launch (inside tmux so it survives SSH drops) ───────────────────────────
+tmux new -s spike
+cd ~/agent-server && pi
+```
+
+Kickoff prompt to paste into the agent:
+
+> Read `docs/plans/stage0-spike-brief.md` in this repo — section 0 is already
+> done; execute sections 1–7. You have passwordless sudo for host-level
+> changes; record every host change and every finding in
+> `container/SPIKE-FINDINGS.md` as you go. Commit your work to the current
+> `stage0-spike` branch in small, described steps (you cannot push — that's
+> expected; the operator fetches from this box). The definition of done is
+> `./container/smoke.sh` exiting 0 under the brief's hard constraints (no
+> `--privileged`, no `SYS_ADMIN`, non-root outer user).
+
+**Acceptance (operator, when the agent reports done):**
+
+```bash
+# Re-verify from a clean slate — proves the findings, not the accumulated state:
+cd ~/agent-server/container
+docker rm -f builder-outer; docker volume rm -f builder-workspace builder-podman-storage
+docker system prune -af
+./smoke.sh    # must exit 0
+```
+
+Then: check `SPIKE-FINDINGS.md` is fully filled (every flag justified, host
+prereqs listed), and pull the agent's branch straight off the box — commit
+history included, still no credentials on the server:
+
+```bash
+# from laptop, inside the agent-server repo
+git remote add spikebox spike@<SERVER_IP>:agent-server
+git fetch spikebox stage0-spike
+git switch stage0-spike   # review, then merge/PR and push from the laptop
+```
+
+Finally: revoke the spike API key; destroy the server or keep it for Stage 1/2
+iteration (resizing up is easier than re-provisioning if you keep it).
+
+---
+
+## 1. Mission (coding agent starts here)
+
+Prove that an **unprivileged** Docker container can run **rootless Podman** well enough to build and serve real apps, on this exact host. Produce a known-good, *minimal* configuration that later stages will copy verbatim.
+
+Success is binary: `./container/smoke.sh` exits 0 on this box, with a flag set you can justify line by line.
+
+You are NOT building agent-server integration, prompts, or anything product-shaped. Infrastructure validation only.
+
+## 2. What is on disk
+
+| Path | What it is |
+|---|---|
+| `container/Dockerfile` | Draft outer image (Ubuntu 24.04 + podman stack). Starting point — expect to fix it. |
+| `container/entrypoint.sh` | Runtime-dir setup + podman warmup, then execs CMD. |
+| `container/run-outer.sh` | Builds the image and (re)starts the outer container with the **candidate** flag set. |
+| `container/smoke.sh` | The acceptance test. Your iteration loop is: edit → `./smoke.sh` → read failures → repeat. |
+| `container/SPIKE-FINDINGS.md` | Findings template. Fill it in **as you go**, not at the end. |
+
+## 3. Hard constraints
+
+1. **No `--privileged`. Ever.** The outer container being unprivileged *is the security boundary of the whole architecture* — a privileged "pass" is worthless.
+2. **No `--cap-add SYS_ADMIN`** unless you have exhausted alternatives; if you genuinely cannot avoid it, that is a major finding — document the exact error and stop to flag it.
+3. The outer container's main process must run as a **non-root user** (uid 1000 `builder`). `--user 0` workarounds are failures.
+4. Host-level changes (sysctls, apparmor profiles, packages) are **allowed but must be recorded** in findings — they become requirements for appx's deploy scripts (`system-setup.sh`).
+5. Work only in `~/agent-server/container/` and on docker state. Don't touch the rest of the repo.
+
+## 4. Tasks and acceptance criteria
+
+### T1 — Make the nested chain work
+- [ ] `./container/run-outer.sh` brings up the outer container; `docker exec builder-outer id -u` → `1000`
+- [ ] Inside: `podman run -d -p 10000:80 docker.io/library/nginx:alpine` succeeds
+- [ ] From the **host**: `curl -fsS http://127.0.0.1:10000` returns the nginx page (host → docker publish → outer netns → podman forward → inner container)
+- [ ] Inside: `podman build` of a trivial image succeeds and the built image runs
+
+### T2 — Minimise and justify the flag set
+- [ ] Deletion-test every `docker run` security flag and every host-level change: remove one, re-run `smoke.sh`, record the exact error it causes (or remove it permanently if nothing breaks)
+- [ ] Try replacing `seccomp=unconfined` with a tailored profile (Podman ships one that allows `mount`; see hints). If it works, prefer it; if not, record why — `unconfined` is acceptable for now with a documented TODO
+- [ ] **Outer-runtime sub-question (informs appx Stage 3):** the host runtime can be docker *or* podman. Podman's default seccomp profile allows `mount(2)` where docker's blocks it, so a podman *outer* may not need `seccomp=unconfined` at all. If podman is available on the box, run the same nested test with `podman run` as the outer command and record which flags become unnecessary. This decides whether `system-setup.sh` should prefer podman-on-host for a smaller attack surface
+- [ ] Outcome: `run-outer.sh` contains only flags that each carry a one-line justification in findings
+
+### T3 — Persistence and restart semantics
+- [ ] `docker restart builder-outer`: workspace volume content and podman images (named volume) survive
+- [ ] Record what happens to *running* inner containers across the restart (expected: stopped). Test whether `podman start --all` resurrects them cleanly — this decides Stage 4's recovery mechanism
+- [ ] Record first-`podman info` cold warmup time vs warmed (entrypoint logs it)
+
+### T4 — Storage driver determination
+- [ ] The draft pins `fuse-overlayfs`. Test native rootless overlayfs (kernel ≥ 5.13 supports it; this host is 6.8+): remove `mount_program` from `storage.conf`, reset podman storage, re-run smoke. Record which works and which is faster; pin the winner
+- [ ] Last-resort fallback if both overlay variants fail: `driver = "vfs"` — needs no FUSE device and no overlay nesting at all, at the cost of full-copy layers (slow, disk-hungry). If only VFS works, that's a major finding: record it and flag before Stage 2 builds on it
+
+### T5 — Findings
+- [ ] `container/SPIKE-FINDINGS.md` fully filled in (template provided). The Stage 2 image and appx's Stage 3 container-supervisor transcribe your flag set verbatim — incomplete findings = repeated debugging later
+
+## 5. Known pitfalls (read before debugging blind)
+
+These are researched, not guessed — check them in this order when something EPERMs:
+
+1. **Ubuntu 24.04 blocks unprivileged user namespaces via AppArmor.** `kernel.apparmor_restrict_unprivileged_userns=1` is default; nested podman fails with `apparmor="DENIED" operation="userns_create"` (visible in host `dmesg`/`journalctl -k`). Candidate fixes, in preference order — test which is actually sufficient:
+   a. `--security-opt apparmor=unconfined` on the outer container (containment loss is acceptable: seccomp/userns remain);
+   b. a host AppArmor profile granting `userns` to the container runtime;
+   c. host sysctl `kernel.apparmor_restrict_unprivileged_userns=0` (bluntest; if this is the only thing that works, record it as a deploy-script requirement).
+2. **Docker's default seccomp profile blocks `mount(2)`**, which rootless podman needs even for unprivileged FUSE/bind mounts. Hence `seccomp=unconfined` in the draft. The tailored alternative: Podman's own `seccomp.json` (in the `containers-common` package, `/usr/share/containers/seccomp.json`) allows `mount` — try `--security-opt seccomp=/path/on/host/seccomp.json`.
+3. **`/etc/subuid` / `/etc/subgid`** entries for `builder` must exist *inside the image* (draft has them). Errors like `cannot find UID/GID for user builder` point here; `podman system migrate` after changing them.
+4. **No systemd inside the container** → `cgroup_manager = "cgroupfs"` and `events_logger = "file"` (draft sets both in `containers.conf`). Resource limits inside the nest may be unavailable; that's fine, record it.
+5. **`XDG_RUNTIME_DIR`** must exist and be writable (no systemd-logind to create `/run/user/1000`). Draft uses `/tmp/runtime-builder` via entrypoint.
+6. **Use fully-qualified image names** (`docker.io/library/nginx:alpine`) — Ubuntu's podman has no unqualified-search registries configured and will error or prompt.
+7. **`--userns=keep-id` is a podman flag, not docker.** The reference doc's draft run command mixes them up; ignore it. With docker, "unprivileged" = `USER builder` in the image + no added caps.
+8. **Sanity-check trick:** `quay.io/podman/stable` is the upstream podman-in-container reference image. If our image fails mysteriously, run the same nested command in `podman/stable` with the same docker flags — if that also fails, the problem is host/flags; if it passes, the problem is our Dockerfile. Note the image only solves the *in-image* half (packages, subuid, conf); the docker-run flags and host prereqs are required with it too.
+9. **Sanctioned fallback:** if our Ubuntu-based Dockerfile fights you past ~2 hours of in-image issues, switching the base to `quay.io/podman/stable` (adding the `builder` uid-1000 user on top) is an acceptable T1 outcome — record the trade-off (Fedora base, unpinned podman version) in findings and keep the rest of the constraints unchanged. Host-side flag minimisation (T2) is unaffected by the base choice. There is field evidence this matters: in-image config differences alone have made the difference between needing `--privileged` and not (stackoverflow.com/q/75244579).
+10. **Canonical reference:** Dan Walsh's "How to use Podman inside of a container" (redhat.com/en/blog/podman-inside-container) is the authoritative walkthrough of every rootful/rootless nesting combination; our candidate flag set matches its non-privileged rootless-in-docker recipe. Consult it before inventing anything novel.
+
+## 6. Method
+
+- Iterate exclusively through `./container/smoke.sh` — it is the definition of done. Improve it if it misses something real (e.g. you discover DNS inside inner containers is broken — add a check), but never weaken a check to pass.
+- One change at a time; record each finding immediately in `SPIKE-FINDINGS.md`.
+- Host kernel logs are your AppArmor/seccomp oracle: `sudo journalctl -k --since -5min | grep -i -E 'apparmor|audit'`.
+- Disk hygiene on a small box: `docker system prune -f` and `podman system prune -f` (inside) between heavy iterations.
+
+## 7. Timebox & escalation
+
+This spike is timeboxed to ~1 day of focused work. If the chain fundamentally cannot work unprivileged on Ubuntu 24.04 (constraint 1–2 violations are the only outs), stop and write up: the exact failure, kernel/audit evidence, and which of the architecture's escalation paths (Sysbox runtime, different host distro, host-level podman) looks cheapest. Do not silently downgrade the constraints to get a green smoke run.
diff --git a/docs/superpowers/plans/use-agent-session-services.md b/docs/plans/use-agent-session-services.md
similarity index 100%
rename from docs/superpowers/plans/use-agent-session-services.md
rename to docs/plans/use-agent-session-services.md