From 9ccba6a2d7f82e1c38114905ea419904ab283177 Mon Sep 17 00:00:00 2001 From: Arne Brune Olsen Date: Wed, 18 Mar 2026 21:20:43 +0100 Subject: [PATCH 1/3] feat(deploy): consume verified runtime bundles for cluster builds --- .../skills/debug-openshell-cluster/SKILL.md | 5 +- .github/workflows/branch-e2e.yml | 1 + .github/workflows/docker-build.yml | 38 + .github/workflows/release-dev.yml | 2 + .github/workflows/release-tag.yml | 2 + architecture/README.md | 6 +- architecture/build-containers.md | 32 + architecture/gateway-single-node.md | 10 +- crates/openshell-cli/src/doctor_llm_prompt.md | 5 +- deploy/docker/Dockerfile.cluster | 68 +- scripts/remote-deploy.sh | 48 +- tasks/docker.toml | 6 +- tasks/scripts/ci-build-cluster-image.sh | 82 ++ tasks/scripts/cluster-bootstrap.sh | 16 + tasks/scripts/docker-build-cluster.sh | 433 ++++++++- tasks/scripts/docker-publish-multiarch.sh | 68 +- tasks/scripts/download-runtime-bundle.sh | 52 + tasks/tests/runtime-bundle-caller-paths.bats | 238 +++++ tasks/tests/runtime-bundle-ci-workflow.bats | 116 +++ tasks/tests/runtime-bundle-consumer.bats | 906 ++++++++++++++++++ 20 files changed, 2089 insertions(+), 45 deletions(-) create mode 100644 tasks/scripts/ci-build-cluster-image.sh create mode 100644 tasks/scripts/download-runtime-bundle.sh create mode 100644 tasks/tests/runtime-bundle-caller-paths.bats create mode 100644 tasks/tests/runtime-bundle-ci-workflow.bats create mode 100644 tasks/tests/runtime-bundle-consumer.bats diff --git a/.agents/skills/debug-openshell-cluster/SKILL.md b/.agents/skills/debug-openshell-cluster/SKILL.md index 115a2aa5..b982383d 100644 --- a/.agents/skills/debug-openshell-cluster/SKILL.md +++ b/.agents/skills/debug-openshell-cluster/SKILL.md @@ -290,7 +290,10 @@ If DNS is broken, all image pulls from the distribution registry will fail, as w | `tls handshake eof` from `openshell status` | Server not running or mTLS credentials missing/mismatched | Check StatefulSet replicas (Step 3) and mTLS files (Step 6) | | StatefulSet `0/0` replicas | StatefulSet scaled to zero (failed deploy, manual scale-down, or Helm misconfiguration) | `openshell doctor exec -- kubectl -n openshell scale statefulset openshell --replicas=1` | | Local mTLS files missing | Deploy was interrupted before credentials were persisted | Extract from cluster secret `openshell-client-tls` (Step 6) | -| Container not found | Image not built | `mise run docker:build:cluster` (local) or re-deploy (remote) | +| Container not found | Image not built | `mise run docker:build:cluster` (local, with `OPENSHELL_RUNTIME_BUNDLE_TARBALL` set) or re-deploy (remote, with `--runtime-bundle-tarball`) | +| Local cluster image build now fails before Docker starts with runtime-bundle validation errors | Missing, malformed, wrong-arch, or unstaged `OPENSHELL_RUNTIME_BUNDLE_TARBALL` input for the controlled GPU runtime path | Re-run the cluster-image build with `OPENSHELL_RUNTIME_BUNDLE_TARBALL` pointing at a valid per-arch bundle tarball, and confirm `tasks/scripts/docker-build-cluster.sh` stages `deploy/docker/.build/runtime-bundle//` successfully | +| Remote deploy now fails before Docker starts with runtime-bundle validation errors | `scripts/remote-deploy.sh` was run without `--runtime-bundle-tarball`, or the synced tarball path on the remote host is missing/invalid | Re-run `scripts/remote-deploy.sh` with `--runtime-bundle-tarball ` and confirm the tarball syncs to `${REMOTE_DIR}/.cache/runtime-bundles/` before the remote cluster build starts | +| Multi-arch cluster publish fails before Docker starts with missing runtime-bundle variables | One or both per-arch tarballs were not provided to `tasks/scripts/docker-publish-multiarch.sh` | Set `OPENSHELL_RUNTIME_BUNDLE_TARBALL_AMD64` and `OPENSHELL_RUNTIME_BUNDLE_TARBALL_ARM64` to valid per-arch tarballs, then re-run the multi-arch publish command | | Container exited, OOMKilled | Insufficient memory | Increase host memory or reduce workload | | Container exited, non-zero exit | k3s crash, port conflict, privilege issue | Check `openshell doctor logs` for details | | `/readyz` fails | k3s still starting or crashed | Wait longer or check container logs for k3s errors | diff --git a/.github/workflows/branch-e2e.yml b/.github/workflows/branch-e2e.yml index a59f84b6..82b45f68 100644 --- a/.github/workflows/branch-e2e.yml +++ b/.github/workflows/branch-e2e.yml @@ -24,6 +24,7 @@ jobs: component: cluster platform: linux/arm64 runner: build-arm64 + runtime-bundle-url: ${{ vars.OPENSHELL_RUNTIME_BUNDLE_URL_ARM64 }} e2e: needs: [build-gateway, build-cluster] diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 48f68ab6..6e190a3e 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -32,6 +32,21 @@ on: required: false type: string default: "" + runtime-bundle-url: + description: "Per-arch runtime bundle tarball URL for single-arch cluster builds" + required: false + type: string + default: "" + runtime-bundle-url-amd64: + description: "amd64 runtime bundle tarball URL for multi-arch cluster builds" + required: false + type: string + default: "" + runtime-bundle-url-arm64: + description: "arm64 runtime bundle tarball URL for multi-arch cluster builds" + required: false + type: string + default: "" env: MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -87,7 +102,30 @@ jobs: uses: ./.github/actions/setup-buildx - name: Build ${{ inputs.component }} image + if: inputs.component != 'cluster' env: DOCKER_BUILDER: openshell OPENSHELL_CARGO_VERSION: ${{ steps.version.outputs.cargo_version }} run: mise run --no-prepare docker:build:${{ inputs.component }} + + - name: Build cluster image + if: inputs.component == 'cluster' + env: + DOCKER_BUILDER: openshell + OPENSHELL_CARGO_VERSION: ${{ steps.version.outputs.cargo_version }} + OPENSHELL_RUNTIME_BUNDLE_URL: ${{ inputs.runtime-bundle-url }} + OPENSHELL_RUNTIME_BUNDLE_URL_AMD64: ${{ inputs.runtime-bundle-url-amd64 }} + OPENSHELL_RUNTIME_BUNDLE_URL_ARM64: ${{ inputs.runtime-bundle-url-arm64 }} + run: | + set -euo pipefail + + if [[ "${DOCKER_PLATFORM}" == *","* ]]; then + bash tasks/scripts/ci-build-cluster-image.sh \ + --platform "${DOCKER_PLATFORM}" \ + --runtime-bundle-url-amd64 "${OPENSHELL_RUNTIME_BUNDLE_URL_AMD64}" \ + --runtime-bundle-url-arm64 "${OPENSHELL_RUNTIME_BUNDLE_URL_ARM64}" + else + bash tasks/scripts/ci-build-cluster-image.sh \ + --platform "${DOCKER_PLATFORM}" \ + --runtime-bundle-url "${OPENSHELL_RUNTIME_BUNDLE_URL}" + fi diff --git a/.github/workflows/release-dev.yml b/.github/workflows/release-dev.yml index 488ec101..bb1cc74a 100644 --- a/.github/workflows/release-dev.yml +++ b/.github/workflows/release-dev.yml @@ -60,6 +60,8 @@ jobs: with: component: cluster cargo-version: ${{ needs.compute-versions.outputs.cargo_version }} + runtime-bundle-url-amd64: ${{ vars.OPENSHELL_RUNTIME_BUNDLE_URL_AMD64 }} + runtime-bundle-url-arm64: ${{ vars.OPENSHELL_RUNTIME_BUNDLE_URL_ARM64 }} e2e: needs: [build-gateway, build-cluster] diff --git a/.github/workflows/release-tag.yml b/.github/workflows/release-tag.yml index c0bd5deb..fa2e77cc 100644 --- a/.github/workflows/release-tag.yml +++ b/.github/workflows/release-tag.yml @@ -75,6 +75,8 @@ jobs: with: component: cluster cargo-version: ${{ needs.compute-versions.outputs.cargo_version }} + runtime-bundle-url-amd64: ${{ vars.OPENSHELL_RUNTIME_BUNDLE_URL_AMD64 }} + runtime-bundle-url-arm64: ${{ vars.OPENSHELL_RUNTIME_BUNDLE_URL_ARM64 }} e2e: needs: [build-gateway, build-cluster] diff --git a/architecture/README.md b/architecture/README.md index d65b9b23..b899f37c 100644 --- a/architecture/README.md +++ b/architecture/README.md @@ -269,7 +269,11 @@ This performs the same bootstrap flow on the remote host via SSH. For development and testing against the current checkout, use `scripts/remote-deploy.sh` instead. That helper syncs the local repository to an SSH-reachable machine, builds the CLI and Docker images on the remote host, -and then runs `openshell gateway start` there. It defaults to secure gateway +and then runs `openshell gateway start` there. Cluster-image builds in that +flow now also require a runtime-bundle tarball: provide +`--runtime-bundle-tarball ` for normal sync-and-build deploys, or +`--remote-runtime-bundle-tarball --skip-sync` if the bundle is +already staged on the remote host. The helper defaults to secure gateway startup and only enables `--plaintext`, `--disable-gateway-auth`, or `--recreate` when explicitly requested. diff --git a/architecture/build-containers.md b/architecture/build-containers.md index 705b00d6..e265e9c3 100644 --- a/architecture/build-containers.md +++ b/architecture/build-containers.md @@ -21,6 +21,38 @@ The cluster image is a single-container Kubernetes distribution that bundles the The supervisor binary (`openshell-sandbox`) is cross-compiled in a build stage and placed at `/opt/openshell/bin/openshell-sandbox`. It is exposed to sandbox pods at runtime via a read-only `hostPath` volume mount — it is not baked into sandbox images. +## Controlled GPU Runtime Bundle Path + +OpenShell's runtime bundle publication contract is tarball-first. The canonical artifact is a per-architecture release tarball whose single top-level bundle directory contains the install-root payload plus `manifest.json`. If OCI publication is added later, it is only a mirror transport for that same bundle contract. + +The current cluster build now consumes that published tarball through the local staged bundle path. `tasks/scripts/docker-build-cluster.sh` requires `OPENSHELL_RUNTIME_BUNDLE_TARBALL`, fails before any Helm packaging or Docker build when the bundle is missing or invalid, and stages the verified install-root payload under `deploy/docker/.build/runtime-bundle//`. `deploy/docker/Dockerfile.cluster` then copies the runtime binaries, config, and shared libraries from that staged local tree into the final cluster image. + +That requirement now flows through all cluster-image entrypoints instead of only the direct script call: + +- local bootstrap via `tasks/scripts/cluster-bootstrap.sh` requires `OPENSHELL_RUNTIME_BUNDLE_TARBALL` whenever it is going to build the cluster image; prebuilt-image flows can still set `SKIP_CLUSTER_IMAGE_BUILD=1` +- remote gateway deploy via `scripts/remote-deploy.sh` requires either `--runtime-bundle-tarball` (or local `OPENSHELL_RUNTIME_BUNDLE_TARBALL`) for sync-and-build flows, or `--remote-runtime-bundle-tarball` when `--skip-sync` should reuse a tarball already staged on the remote host; the script exports the resolved remote path before invoking the remote cluster build +- multi-arch publishing via `tasks/scripts/docker-publish-multiarch.sh` requires `OPENSHELL_RUNTIME_BUNDLE_TARBALL_AMD64` and `OPENSHELL_RUNTIME_BUNDLE_TARBALL_ARM64`, builds one verified per-arch cluster image at a time, then assembles the final multi-arch manifest from those architecture-specific tags +- GitHub workflow cluster builds now consume release-asset URLs rather than local tarball paths directly: `tasks/scripts/download-runtime-bundle.sh` downloads per-arch tarballs into `deploy/docker/.build/runtime-bundles/`, `tasks/scripts/ci-build-cluster-image.sh` maps single-arch builds to `docker:build:cluster` and multi-arch builds to `docker:build:cluster:multiarch`, and `.github/workflows/docker-build.yml` passes explicit bundle URLs from workflow inputs or repo variables into that helper path + +The intended first OpenShell tarball consumption path is the `tasks/scripts/docker-build-cluster.sh` -> `deploy/docker/Dockerfile.cluster` flow: + +1. `tasks/scripts/docker-build-cluster.sh` receives the per-architecture runtime bundle tarball path through `OPENSHELL_RUNTIME_BUNDLE_TARBALL` before `docker buildx build`. +2. The script verifies the single top-level bundle-directory shape, requires valid JSON `manifest.json` content inside that bundle directory with a matching `architecture`, validates manifest-declared checksums and sizes, and checks the required runtime payload paths before staging. +3. The script stages the tarball payload into `deploy/docker/.build/runtime-bundle//`, preserving the bundle directory and install-root layout expected by OpenShell. +4. `deploy/docker/Dockerfile.cluster` loads the staged local bundle tree in a dedicated build stage and copies the verified runtime files into the same final image paths OpenShell already expects. + +The tarball payload must contain the exact runtime assets the cluster image expects today: + +- `/usr/bin/nvidia-cdi-hook` +- `/usr/bin/nvidia-container-runtime` +- `/usr/bin/nvidia-container-runtime-hook` +- `/usr/bin/nvidia-container-cli` +- `/usr/bin/nvidia-ctk` +- `/etc/nvidia-container-runtime/` +- `/usr/lib/*-linux-gnu/libnvidia-container*.so*` + +This handoff keeps the OpenShell build package-manager-free for the runtime dependency itself. Standard OS image layers can remain upstream inputs, but the GPU runtime contents enter the build as a verified tarball payload rather than through a distro package repository. OCI, if later added, mirrors this same tarball-defined payload instead of changing the OpenShell consumption contract. + ## Sandbox Images Sandbox images are **not built in this repository**. They are maintained in the [openshell-community](https://github.com/nvidia/openshell-community) repository and pulled from `ghcr.io/nvidia/openshell-community/sandboxes/` at runtime. diff --git a/architecture/gateway-single-node.md b/architecture/gateway-single-node.md index 8dc270ac..2c348214 100644 --- a/architecture/gateway-single-node.md +++ b/architecture/gateway-single-node.md @@ -58,7 +58,11 @@ For remote dev/test deploys from a local checkout, `scripts/remote-deploy.sh` wraps a different workflow: it rsyncs the repository to a remote host, builds the release CLI plus cluster/server/sandbox images on that machine, and then invokes `openshell gateway start` with explicit flags such as `--recreate`, -`--plaintext`, or `--disable-gateway-auth` only when requested. +`--plaintext`, or `--disable-gateway-auth` only when requested. The remote +cluster-image build now also requires a runtime-bundle tarball: provide +`--runtime-bundle-tarball ` for normal sync-and-build deploys, or +`--remote-runtime-bundle-tarball --skip-sync` when the tarball is +already present on the remote host. ## Local Task Flows (`mise`) @@ -70,6 +74,7 @@ Development task entrypoints split bootstrap behavior: For `mise run cluster`, `.env` acts as local source-of-truth for `GATEWAY_NAME`, `GATEWAY_PORT`, and `OPENSHELL_GATEWAY`. Missing keys are appended; existing values are preserved. If `GATEWAY_PORT` is missing, the task selects a free local port and persists it. Fast mode ensures a local registry (`127.0.0.1:5000`) is running and configures k3s to mirror pulls via `host.docker.internal:5000`, so the cluster task can push/pull local component images consistently. +When that flow needs to rebuild the cluster image, it also requires `OPENSHELL_RUNTIME_BUNDLE_TARBALL`; prebuilt-image paths can still skip the local cluster-image build with `SKIP_CLUSTER_IMAGE_BUILD=1`. ## Bootstrap Sequence Diagram @@ -298,7 +303,8 @@ GPU support is part of the single-node gateway bootstrap path rather than a sepa - `openshell gateway start --gpu` threads a boolean deploy option through `crates/openshell-cli`, `crates/openshell-bootstrap`, and `crates/openshell-bootstrap/src/docker.rs`. - When enabled, the cluster container is created with Docker `DeviceRequests`, which is the API equivalent of `docker run --gpus all`. -- `deploy/docker/Dockerfile.cluster` installs NVIDIA Container Toolkit packages in a dedicated Ubuntu stage and copies the runtime binaries, config, and `libnvidia-container` shared libraries into the final Ubuntu-based cluster image. +- `tasks/scripts/docker-build-cluster.sh` now validates a staged local runtime-bundle tarball and places the verified payload under `deploy/docker/.build/runtime-bundle//` before Docker runs. +- `deploy/docker/Dockerfile.cluster` copies the runtime binaries, config, and `libnvidia-container` shared libraries from that staged local bundle into the final Ubuntu-based cluster image instead of installing toolkit packages from an apt repository during the build. - `deploy/docker/cluster-entrypoint.sh` checks `GPU_ENABLED=true` and copies GPU-only manifests from `/opt/openshell/gpu-manifests/` into k3s's manifests directory. - `deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml` installs the NVIDIA device plugin chart, currently pinned to `0.18.2`, along with GPU Feature Discovery and Node Feature Discovery. - k3s auto-detects `nvidia-container-runtime` on `PATH`, registers the `nvidia` containerd runtime, and creates the `nvidia` `RuntimeClass` automatically. diff --git a/crates/openshell-cli/src/doctor_llm_prompt.md b/crates/openshell-cli/src/doctor_llm_prompt.md index 4d4a6b64..34e6c5be 100644 --- a/crates/openshell-cli/src/doctor_llm_prompt.md +++ b/crates/openshell-cli/src/doctor_llm_prompt.md @@ -277,7 +277,10 @@ If DNS is broken, all image pulls from the distribution registry will fail, as w | `tls handshake eof` from `openshell status` | Server not running or mTLS credentials missing/mismatched | Check StatefulSet replicas (Step 3) and mTLS files (Step 6) | | StatefulSet `0/0` replicas | StatefulSet scaled to zero (failed deploy, manual scale-down, or Helm misconfiguration) | `openshell doctor exec -- kubectl -n openshell scale statefulset openshell --replicas=1` | | Local mTLS files missing | Deploy was interrupted before credentials were persisted | Extract from cluster secret `openshell-client-tls` (Step 6) | -| Container not found | Image not built | `mise run docker:build:cluster` (local) or re-deploy (remote) | +| Container not found | Image not built | `mise run docker:build:cluster` (local, with `OPENSHELL_RUNTIME_BUNDLE_TARBALL` set) or re-deploy (remote, with `--runtime-bundle-tarball`) | +| Local cluster image build now fails before Docker starts with runtime-bundle validation errors | Missing, malformed, wrong-arch, or unstaged `OPENSHELL_RUNTIME_BUNDLE_TARBALL` input for the controlled GPU runtime path | Re-run the cluster-image build with `OPENSHELL_RUNTIME_BUNDLE_TARBALL` pointing at a valid per-arch bundle tarball, and confirm `tasks/scripts/docker-build-cluster.sh` stages `deploy/docker/.build/runtime-bundle//` successfully | +| Remote deploy now fails before Docker starts with runtime-bundle validation errors | `scripts/remote-deploy.sh` was run without `--runtime-bundle-tarball`, without `--remote-runtime-bundle-tarball` in `--skip-sync` mode, or the resolved tarball path is missing/invalid | Re-run `scripts/remote-deploy.sh` with `--runtime-bundle-tarball ` for sync-and-build deploys, or `--remote-runtime-bundle-tarball --skip-sync` when the tarball is already staged remotely | +| Multi-arch cluster publish fails before Docker starts with missing runtime-bundle variables | One or both per-arch tarballs were not provided to `tasks/scripts/docker-publish-multiarch.sh` | Set `OPENSHELL_RUNTIME_BUNDLE_TARBALL_AMD64` and `OPENSHELL_RUNTIME_BUNDLE_TARBALL_ARM64` to valid per-arch tarballs, then re-run the multi-arch publish command | | Container exited, OOMKilled | Insufficient memory | Increase host memory or reduce workload | | Container exited, non-zero exit | k3s crash, port conflict, privilege issue | Check `openshell doctor logs` for details | | `/readyz` fails | k3s still starting or crashed | Wait longer or check container logs for k3s errors | diff --git a/deploy/docker/Dockerfile.cluster b/deploy/docker/Dockerfile.cluster index 56084076..9487d994 100644 --- a/deploy/docker/Dockerfile.cluster +++ b/deploy/docker/Dockerfile.cluster @@ -36,7 +36,6 @@ ARG K3S_VERSION=v1.35.2-k3s1 ARG K9S_VERSION=v0.50.18 ARG HELM_VERSION=v3.17.3 -ARG NVIDIA_CONTAINER_TOOLKIT_VERSION=1.18.2-1 # --------------------------------------------------------------------------- # Stage 1: Extract k3s artifacts from upstream rancher image (Alpine-based) @@ -146,26 +145,42 @@ RUN --mount=type=cache,id=cargo-registry-supervisor-${TARGETARCH},sharing=locked cp "$(cross_output_dir release)/openshell-sandbox" /build/out/ # --------------------------------------------------------------------------- -# Stage 2: Install NVIDIA container toolkit on Ubuntu +# Stage 2: Load the verified local runtime bundle # --------------------------------------------------------------------------- -FROM ubuntu:24.04 AS nvidia-toolkit +# `tasks/scripts/docker-build-cluster.sh` validates and stages the runtime +# bundle under `deploy/docker/.build/runtime-bundle//` before Docker runs. +# This stage copies the already-verified install-root payload into `/out` so the +# final image can keep the same target paths without any apt-based toolkit +# installation stage. +FROM ubuntu:24.04 AS runtime-bundle -ARG NVIDIA_CONTAINER_TOOLKIT_VERSION +ARG TARGETARCH -RUN apt-get update && apt-get install -y --no-install-recommends \ - gpg curl ca-certificates && \ - curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \ - | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg && \ - curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \ - | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \ - | tee /etc/apt/sources.list.d/nvidia-container-toolkit.list && \ - apt-get update && \ - apt-get install -y --no-install-recommends \ - "nvidia-container-toolkit=${NVIDIA_CONTAINER_TOOLKIT_VERSION}" \ - "nvidia-container-toolkit-base=${NVIDIA_CONTAINER_TOOLKIT_VERSION}" \ - "libnvidia-container-tools=${NVIDIA_CONTAINER_TOOLKIT_VERSION}" \ - "libnvidia-container1=${NVIDIA_CONTAINER_TOOLKIT_VERSION}" && \ - rm -rf /var/lib/apt/lists/* +COPY deploy/docker/.build/runtime-bundle/ /runtime-bundles/ + +RUN set -eu; \ + arch_dir="/runtime-bundles/${TARGETARCH}"; \ + if [ ! -d "$arch_dir" ]; then \ + echo "missing staged runtime bundle directory for ${TARGETARCH}" >&2; \ + exit 1; \ + fi; \ + bundle_root=""; \ + for candidate in "$arch_dir"/*; do \ + if [ ! -d "$candidate" ]; then \ + continue; \ + fi; \ + if [ -n "$bundle_root" ]; then \ + echo "expected exactly one staged runtime bundle directory for ${TARGETARCH}" >&2; \ + exit 1; \ + fi; \ + bundle_root="$candidate"; \ + done; \ + if [ -z "$bundle_root" ]; then \ + echo "missing staged runtime bundle payload for ${TARGETARCH}" >&2; \ + exit 1; \ + fi; \ + mkdir -p /out; \ + cp -a "$bundle_root"/. /out/ # --------------------------------------------------------------------------- # Stage 3: Runtime on NVIDIA hardened Ubuntu base @@ -216,16 +231,17 @@ COPY --from=k3s /usr/share/zoneinfo/ /usr/share/zoneinfo/ ENV PATH="/var/lib/rancher/k3s/data/cni:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/bin/aux" \ CRI_CONFIG_FILE="/var/lib/rancher/k3s/agent/etc/crictl.yaml" -# Copy NVIDIA Container Toolkit files from the build stage. +# Copy NVIDIA Container Toolkit files from the staged local runtime bundle. # k3s auto-detects nvidia-container-runtime on PATH and registers it as a # containerd runtime + creates the "nvidia" RuntimeClass automatically. -COPY --from=nvidia-toolkit /usr/bin/nvidia-cdi-hook /usr/bin/ -COPY --from=nvidia-toolkit /usr/bin/nvidia-container-runtime /usr/bin/ -COPY --from=nvidia-toolkit /usr/bin/nvidia-container-runtime-hook /usr/bin/ -COPY --from=nvidia-toolkit /usr/bin/nvidia-container-cli /usr/bin/ -COPY --from=nvidia-toolkit /usr/bin/nvidia-ctk /usr/bin/ -COPY --from=nvidia-toolkit /etc/nvidia-container-runtime /etc/nvidia-container-runtime -COPY --from=nvidia-toolkit /usr/lib/*-linux-gnu/libnvidia-container*.so* /usr/lib/ +# These copy targets intentionally match the existing final image paths. +COPY --from=runtime-bundle /out/usr/bin/nvidia-cdi-hook /usr/bin/ +COPY --from=runtime-bundle /out/usr/bin/nvidia-container-runtime /usr/bin/ +COPY --from=runtime-bundle /out/usr/bin/nvidia-container-runtime-hook /usr/bin/ +COPY --from=runtime-bundle /out/usr/bin/nvidia-container-cli /usr/bin/ +COPY --from=runtime-bundle /out/usr/bin/nvidia-ctk /usr/bin/ +COPY --from=runtime-bundle /out/etc/nvidia-container-runtime /etc/nvidia-container-runtime +COPY --from=runtime-bundle /out/usr/lib/*-linux-gnu/libnvidia-container*.so* /usr/lib/ # Copy the openshell-sandbox supervisor binary to the node filesystem. # Sandbox pods mount /opt/openshell/bin as a read-only hostPath volume diff --git a/scripts/remote-deploy.sh b/scripts/remote-deploy.sh index 579f117f..82fdb365 100755 --- a/scripts/remote-deploy.sh +++ b/scripts/remote-deploy.sh @@ -27,6 +27,10 @@ Options: --disable-gateway-auth Keep TLS but disable client certificate enforcement --image-tag TAG Docker image tag to build/deploy (default: dev) --cargo-version VERSION Override OPENSHELL_CARGO_VERSION for remote Docker builds + --runtime-bundle-tarball PATH + Local runtime bundle tarball to sync and use for remote cluster builds + --remote-runtime-bundle-tarball PATH + Remote runtime bundle tarball path to use with --skip-sync --help Show this help Examples: @@ -56,6 +60,8 @@ GATEWAY_PORT=${GATEWAY_PORT:-8080} SSH_KEY="${SSH_KEY:-}" IMAGE_TAG=${IMAGE_TAG:-dev} CARGO_VERSION=${OPENSHELL_CARGO_VERSION:-0.0.0-dev} +RUNTIME_BUNDLE_TARBALL="${OPENSHELL_RUNTIME_BUNDLE_TARBALL:-}" +REMOTE_RUNTIME_BUNDLE_TARBALL="${OPENSHELL_REMOTE_RUNTIME_BUNDLE_TARBALL:-}" SKIP_SYNC=false RECREATE=false PLAINTEXT=false @@ -109,6 +115,16 @@ while [[ $# -gt 0 ]]; do CARGO_VERSION="$2" shift 2 ;; + --runtime-bundle-tarball) + require_value "$1" "${2-}" + RUNTIME_BUNDLE_TARBALL="$2" + shift 2 + ;; + --remote-runtime-bundle-tarball) + require_value "$1" "${2-}" + REMOTE_RUNTIME_BUNDLE_TARBALL="$2" + shift 2 + ;; --help|-h) usage exit 0 @@ -144,6 +160,27 @@ fi SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" +if [[ "${SKIP_SYNC}" == "true" ]]; then + if [[ -z "${REMOTE_RUNTIME_BUNDLE_TARBALL}" ]]; then + err "--skip-sync requires --remote-runtime-bundle-tarball (or OPENSHELL_REMOTE_RUNTIME_BUNDLE_TARBALL)" + exit 1 + fi + REMOTE_RUNTIME_BUNDLE_PATH="${REMOTE_RUNTIME_BUNDLE_TARBALL}" +else + if [[ -z "${RUNTIME_BUNDLE_TARBALL}" ]]; then + err "Runtime bundle tarball is required (--runtime-bundle-tarball or OPENSHELL_RUNTIME_BUNDLE_TARBALL)" + exit 1 + fi + + if [[ ! -f "${RUNTIME_BUNDLE_TARBALL}" ]]; then + err "Runtime bundle tarball not found: ${RUNTIME_BUNDLE_TARBALL}" + exit 1 + fi + + REMOTE_RUNTIME_BUNDLE_DIR="${REMOTE_DIR}/.cache/runtime-bundles" + REMOTE_RUNTIME_BUNDLE_PATH="${REMOTE_RUNTIME_BUNDLE_DIR}/$(basename "${RUNTIME_BUNDLE_TARBALL}")" +fi + SSH_ARGS=() if [[ -n "${SSH_KEY}" ]]; then SSH_ARGS=(-i "${SSH_KEY}") @@ -171,6 +208,12 @@ if [[ "${SKIP_SYNC}" != "true" ]]; then --exclude 'e2e/' \ --exclude 'deploy/docker/.build/' \ "${REPO_ROOT}/" "${REMOTE_HOST}:${REMOTE_DIR}/" + + info "Syncing runtime bundle to ${REMOTE_HOST}:${REMOTE_RUNTIME_BUNDLE_PATH}" + ssh "${SSH_ARGS[@]}" "${REMOTE_HOST}" "mkdir -p '${REMOTE_RUNTIME_BUNDLE_DIR}'" + rsync -az \ + -e "${RSYNC_SSH[*]}" \ + "${RUNTIME_BUNDLE_TARBALL}" "${REMOTE_HOST}:${REMOTE_RUNTIME_BUNDLE_PATH}" info "Sync complete" fi @@ -191,7 +234,8 @@ ssh -t "${SSH_ARGS[@]}" "${REMOTE_HOST}" \ "${CARGO_VERSION}" \ "${RECREATE}" \ "${PLAINTEXT}" \ - "${DISABLE_GATEWAY_AUTH}" <<'REMOTE_EOF' + "${DISABLE_GATEWAY_AUTH}" \ + "${REMOTE_RUNTIME_BUNDLE_PATH}" <<'REMOTE_EOF' set -euo pipefail REMOTE_DIR="$1" @@ -202,6 +246,7 @@ CARGO_VERSION="$5" RECREATE="$6" PLAINTEXT="$7" DISABLE_GATEWAY_AUTH="$8" +REMOTE_RUNTIME_BUNDLE_PATH="$9" cd "${REMOTE_DIR}" @@ -235,6 +280,7 @@ rm -f .env echo "==> Building Docker images (tag=${IMAGE_TAG})..." export OPENSHELL_CARGO_VERSION="${CARGO_VERSION}" export IMAGE_TAG +export OPENSHELL_RUNTIME_BUNDLE_TARBALL="${REMOTE_RUNTIME_BUNDLE_PATH}" mise exec -- tasks/scripts/docker-build-cluster.sh mise exec -- tasks/scripts/docker-build-component.sh gateway diff --git a/tasks/docker.toml b/tasks/docker.toml index 8194a04a..ec6afb11 100644 --- a/tasks/docker.toml +++ b/tasks/docker.toml @@ -22,17 +22,17 @@ run = "tasks/scripts/docker-build-component.sh gateway" hide = true ["docker:build:cluster"] -description = "Build the k3s cluster image (component images pulled at runtime from registry)" +description = "Build the k3s cluster image (requires OPENSHELL_RUNTIME_BUNDLE_TARBALL)" run = "tasks/scripts/docker-build-cluster.sh" hide = true ["docker:build:cluster:multiarch"] -description = "Build multi-arch cluster image and push to a registry" +description = "Build multi-arch cluster image and push to a registry (requires per-arch runtime bundle tarballs)" run = "tasks/scripts/docker-publish-multiarch.sh --mode registry" hide = true ["docker:publish:cluster:multiarch"] -description = "Build and publish multi-arch cluster image to ECR" +description = "Build and publish multi-arch cluster image to ECR (requires per-arch runtime bundle tarballs)" run = "tasks/scripts/docker-publish-multiarch.sh --mode ecr" hide = true diff --git a/tasks/scripts/ci-build-cluster-image.sh b/tasks/scripts/ci-build-cluster-image.sh new file mode 100644 index 00000000..7e084928 --- /dev/null +++ b/tasks/scripts/ci-build-cluster-image.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +PLATFORM="" +RUNTIME_BUNDLE_URL="" +RUNTIME_BUNDLE_URL_AMD64="" +RUNTIME_BUNDLE_URL_ARM64="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --platform) + PLATFORM="$2" + shift 2 + ;; + --runtime-bundle-url) + RUNTIME_BUNDLE_URL="$2" + shift 2 + ;; + --runtime-bundle-url-amd64) + RUNTIME_BUNDLE_URL_AMD64="$2" + shift 2 + ;; + --runtime-bundle-url-arm64) + RUNTIME_BUNDLE_URL_ARM64="$2" + shift 2 + ;; + *) + echo "Unknown argument: $1" >&2 + exit 1 + ;; + esac +done + +if [[ -z "$PLATFORM" ]]; then + echo "missing required argument: --platform" >&2 + exit 1 +fi + +if [[ "$PLATFORM" == *","* ]]; then + if [[ -z "$RUNTIME_BUNDLE_URL_AMD64" || -z "$RUNTIME_BUNDLE_URL_ARM64" ]]; then + echo "missing required arguments: --runtime-bundle-url-amd64 and --runtime-bundle-url-arm64" >&2 + exit 1 + fi + + amd64_bundle="$(bash tasks/scripts/download-runtime-bundle.sh --arch amd64 --url "$RUNTIME_BUNDLE_URL_AMD64")" + arm64_bundle="$(bash tasks/scripts/download-runtime-bundle.sh --arch arm64 --url "$RUNTIME_BUNDLE_URL_ARM64")" + + DOCKER_REGISTRY="${IMAGE_REGISTRY:?IMAGE_REGISTRY is required for multi-arch cluster builds}" \ + OPENSHELL_RUNTIME_BUNDLE_TARBALL_AMD64="$amd64_bundle" \ + OPENSHELL_RUNTIME_BUNDLE_TARBALL_ARM64="$arm64_bundle" \ + DOCKER_PLATFORMS="$PLATFORM" \ + mise run --no-prepare docker:build:cluster:multiarch + exit 0 +fi + +if [[ -z "$RUNTIME_BUNDLE_URL" ]]; then + echo "missing required argument: --runtime-bundle-url" >&2 + exit 1 +fi + +case "$PLATFORM" in + linux/amd64) + arch="amd64" + ;; + linux/arm64) + arch="arm64" + ;; + *) + echo "unsupported platform: $PLATFORM" >&2 + exit 1 + ;; +esac + +runtime_bundle_tarball="$(bash tasks/scripts/download-runtime-bundle.sh --arch "$arch" --url "$RUNTIME_BUNDLE_URL")" + +OPENSHELL_RUNTIME_BUNDLE_TARBALL="$runtime_bundle_tarball" \ +DOCKER_PLATFORM="$PLATFORM" \ +mise run --no-prepare docker:build:cluster diff --git a/tasks/scripts/cluster-bootstrap.sh b/tasks/scripts/cluster-bootstrap.sh index f354daea..8549d8ad 100755 --- a/tasks/scripts/cluster-bootstrap.sh +++ b/tasks/scripts/cluster-bootstrap.sh @@ -10,6 +10,18 @@ normalize_name() { echo "$1" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9-]/-/g' | sed 's/--*/-/g' | sed 's/^-//;s/-$//' } +require_runtime_bundle_tarball() { + if [ -z "${OPENSHELL_RUNTIME_BUNDLE_TARBALL:-}" ]; then + echo "missing required variable: OPENSHELL_RUNTIME_BUNDLE_TARBALL" >&2 + exit 1 + fi + + if [ ! -f "${OPENSHELL_RUNTIME_BUNDLE_TARBALL}" ]; then + echo "runtime bundle validation failed: tarball not found: ${OPENSHELL_RUNTIME_BUNDLE_TARBALL}" >&2 + exit 1 + fi +} + MODE=${1:-build} if [ "${MODE}" != "build" ] && [ "${MODE}" != "fast" ]; then echo "usage: $0 [build|fast]" >&2 @@ -210,6 +222,10 @@ if is_local_registry_host; then ensure_local_registry fi +if [ "${SKIP_CLUSTER_IMAGE_BUILD:-}" != "1" ]; then + require_runtime_bundle_tarball +fi + CONTAINER_NAME="openshell-cluster-${CLUSTER_NAME}" VOLUME_NAME="openshell-cluster-${CLUSTER_NAME}" diff --git a/tasks/scripts/docker-build-cluster.sh b/tasks/scripts/docker-build-cluster.sh index 80dc2a48..70744ce3 100755 --- a/tasks/scripts/docker-build-cluster.sh +++ b/tasks/scripts/docker-build-cluster.sh @@ -5,16 +5,411 @@ # Build the k3s cluster image with bundled helm charts. # +# Current GPU runtime path: +# 1. require `OPENSHELL_RUNTIME_BUNDLE_TARBALL` +# 2. validate the per-architecture bundle tarball before any Helm or Docker work +# 3. stage its install-root payload under `deploy/docker/.build/runtime-bundle//` +# 4. let `deploy/docker/Dockerfile.cluster` copy the staged local runtime files +# into the cluster image +# # Environment: # IMAGE_TAG - Image tag (default: dev) # K3S_VERSION - k3s version override (optional; default in Dockerfile.cluster) - +# OPENSHELL_RUNTIME_BUNDLE_TARBALL +# - required path to the verified per-arch runtime bundle tarball +# OPENSHELL_RUNTIME_BUNDLE_VERIFY_ONLY +# - when set to "1", validate and stage the bundle, then exit # DOCKER_PLATFORM - Target platform (optional) # DOCKER_BUILDER - Buildx builder name (default: auto-select) # DOCKER_PUSH - When set to "1", push instead of loading into local daemon # IMAGE_REGISTRY - Registry prefix for image name (e.g. ghcr.io/org/repo) set -euo pipefail +fail() { + printf '%s\n' "$*" >&2 + exit 1 +} + +sha256_16() { + if command -v sha256sum >/dev/null 2>&1; then + sha256sum "$1" | awk '{print substr($1, 1, 16)}' + else + shasum -a 256 "$1" | awk '{print substr($1, 1, 16)}' + fi +} + +sha256_16_stdin() { + if command -v sha256sum >/dev/null 2>&1; then + sha256sum | awk '{print substr($1, 1, 16)}' + else + shasum -a 256 | awk '{print substr($1, 1, 16)}' + fi +} + +detect_rust_scope() { + local dockerfile="$1" + local rust_from + rust_from=$(grep -E '^FROM --platform=\$BUILDPLATFORM rust:[^ ]+' "$dockerfile" | head -n1 | sed -E 's/^FROM --platform=\$BUILDPLATFORM rust:([^ ]+).*/\1/' || true) + if [[ -n "${rust_from}" ]]; then + echo "rust-${rust_from}" + return + fi + + if grep -q "rustup.rs" "$dockerfile"; then + echo "rustup-stable" + return + fi + + echo "no-rust" +} + +target_arch() { + local platform="${DOCKER_PLATFORM:-}" + + if [[ -n "$platform" ]]; then + if [[ "$platform" == *,* ]]; then + fail "runtime bundle validation failed: multi-platform builds are not supported yet: ${platform}" + fi + + case "$platform" in + linux/amd64) + printf 'amd64\n' + return 0 + ;; + linux/arm64) + printf 'arm64\n' + return 0 + ;; + *) + fail "runtime bundle validation failed: unsupported docker platform: ${platform}" + ;; + esac + fi + + case "$(uname -m)" in + x86_64) + printf 'amd64\n' + ;; + aarch64|arm64) + printf 'arm64\n' + ;; + *) + fail "runtime bundle validation failed: unsupported host architecture: $(uname -m)" + ;; + esac +} + +target_multiarch() { + case "$1" in + amd64) + printf 'x86_64-linux-gnu\n' + ;; + arm64) + printf 'aarch64-linux-gnu\n' + ;; + *) + fail "runtime bundle validation failed: unsupported runtime bundle architecture: $1" + ;; + esac +} + +require_path() { + local bundle_root="$1" + local relative_path="$2" + + if [[ ! -e "${bundle_root}/${relative_path}" ]]; then + fail "runtime bundle validation failed: missing required path: ${relative_path}" + fi +} + +require_regular_file() { + local bundle_root="$1" + local relative_path="$2" + local full_path="${bundle_root}/${relative_path}" + + if [[ ! -f "$full_path" || -L "$full_path" ]]; then + fail "runtime bundle validation failed: invalid required binary entry type: ${relative_path}" + fi +} + +validate_manifest() { + local bundle_root="$1" + local manifest_path="$2" + local expected_arch="$3" + + python3 - "$bundle_root" "$manifest_path" "$expected_arch" <<'PY' +import hashlib +import json +import os +import sys + +bundle_root = os.path.abspath(sys.argv[1]) +manifest_path = sys.argv[2] +expected_arch = sys.argv[3] + + +def fail(reason: str) -> None: + print(reason) + sys.exit(1) + + +try: + with open(manifest_path, encoding="utf-8") as manifest_file: + manifest = json.load(manifest_file) +except (OSError, json.JSONDecodeError): + fail("malformed_manifest") + +required_fields = ( + "schema_version", + "bundle_name", + "bundle_version", + "architecture", + "created_at", + "components", + "files", +) + +for field in required_fields: + if field not in manifest: + fail(f"missing_field:{field}") + +if manifest["schema_version"] != 1: + fail("invalid_field:schema_version") + +for field in ("bundle_name", "bundle_version", "architecture", "created_at"): + value = manifest[field] + if not isinstance(value, str) or not value.strip(): + fail(f"invalid_field:{field}") + +components = manifest["components"] +if not isinstance(components, dict): + fail("invalid_field:components") + +for component_name in ("nvidia_container_toolkit", "libnvidia_container"): + component = components.get(component_name) + if not isinstance(component, dict): + fail(f"missing_field:components.{component_name}") + for component_field in ("version", "commit"): + value = component.get(component_field) + if not isinstance(value, str) or not value.strip(): + fail(f"missing_field:components.{component_name}.{component_field}") + +manifest_arch = manifest["architecture"].strip() +if manifest_arch != expected_arch: + fail(f"arch_mismatch:{manifest_arch}") + +files = manifest["files"] +if not isinstance(files, list): + fail("invalid_field:files") + +regular_file_entries = 0 +listed_paths = set() + +for entry in files: + if not isinstance(entry, dict): + fail("invalid_field:files[]") + + path = entry.get("path") + if not isinstance(path, str) or not path.strip(): + fail("missing_field:files[].path") + path = path.strip() + listed_paths.add(path) + + full_path = os.path.abspath(os.path.join(bundle_root, path)) + if os.path.commonpath([bundle_root, full_path]) != bundle_root: + fail(f"invalid_path:{path}") + + entry_type = entry.get("entry_type") + if entry_type == "file": + regular_file_entries += 1 + + sha256 = entry.get("sha256") + size = entry.get("size") + if not isinstance(sha256, str) or not sha256.strip(): + fail(f"missing_field:files[].sha256:{path}") + if not isinstance(size, int) or size < 0: + fail(f"missing_field:files[].size:{path}") + if not os.path.isfile(full_path) or os.path.islink(full_path): + fail(f"missing_payload:{path}") + + digest = hashlib.sha256() + with open(full_path, "rb") as file_obj: + while True: + chunk = file_obj.read(1024 * 1024) + if not chunk: + break + digest.update(chunk) + + if digest.hexdigest() != sha256: + fail(f"checksum_mismatch:{path}") + + if os.path.getsize(full_path) != size: + fail(f"size_mismatch:{path}") + elif entry_type == "symlink": + if not os.path.islink(full_path): + fail(f"missing_payload:{path}") + else: + fail(f"invalid_field:files[].entry_type:{path}") + +if regular_file_entries == 0: + fail("invalid_field:files") + +required_manifest_paths = ( + "usr/bin/nvidia-cdi-hook", + "usr/bin/nvidia-container-runtime", + "usr/bin/nvidia-container-runtime-hook", + "usr/bin/nvidia-container-cli", + "usr/bin/nvidia-ctk", + "etc/nvidia-container-runtime/config.toml", +) + +for path in required_manifest_paths: + if path not in listed_paths: + fail(f"required_manifest_path_missing:{path}") + +for root, _, filenames in os.walk(bundle_root): + for filename in filenames: + full_path = os.path.join(root, filename) + rel_path = os.path.relpath(full_path, bundle_root) + if rel_path == "manifest.json": + continue + if rel_path not in listed_paths: + fail(f"unlisted_payload:{rel_path}") + + for filename in [name for name in os.listdir(root) if os.path.islink(os.path.join(root, name))]: + full_path = os.path.join(root, filename) + rel_path = os.path.relpath(full_path, bundle_root) + if rel_path == "manifest.json": + continue + if rel_path not in listed_paths: + fail(f"unlisted_payload:{rel_path}") + +print("ok") +PY +} + +stage_runtime_bundle() { + local bundle_tarball="$1" + local arch="$2" + local multiarch="$3" + local extract_dir + local tar_stderr + local stage_tmp_root + local manifest_path + local manifest_validation + local bundle_root + local bundle_name + local stage_parent_root="deploy/docker/.build/runtime-bundle" + local stage_root="deploy/docker/.build/runtime-bundle/${arch}" + local staged_bundle_path + + rm -rf "$stage_root" + mkdir -p "$stage_parent_root" + + extract_dir="$(mktemp -d)" + tar_stderr="$(mktemp)" + stage_tmp_root="$(mktemp -d "$stage_parent_root/${arch}.tmp.XXXXXX")" + cleanup_stage_runtime_bundle() { + rm -rf "$extract_dir" "$stage_tmp_root" + rm -f "$tar_stderr" + } + trap cleanup_stage_runtime_bundle RETURN + + if ! tar -xzf "$bundle_tarball" -C "$extract_dir" 2>"$tar_stderr"; then + fail "runtime bundle validation failed: tar extraction reported warnings or errors" + fi + + if [[ -s "$tar_stderr" || -n "${TAR_STDERR_MESSAGE:-}" ]]; then + if [[ -n "${TAR_STDERR_MESSAGE:-}" ]]; then + printf '%s\n' "${TAR_STDERR_MESSAGE}" > "$tar_stderr" + fi + fail "runtime bundle validation failed: tar extraction reported warnings or errors" + fi + + local extracted_entries=() + local entry + shopt -s dotglob nullglob + for entry in "$extract_dir"/*; do + extracted_entries+=("$entry") + done + shopt -u dotglob nullglob + + if [[ "${#extracted_entries[@]}" -ne 1 || ! -d "${extracted_entries[0]}" ]]; then + fail "runtime bundle validation failed: expected a single top-level bundle directory" + fi + + bundle_root="${extracted_entries[0]}" + bundle_name="$(basename "$bundle_root")" + manifest_path="$bundle_root/manifest.json" + + if [[ ! -f "$manifest_path" ]]; then + fail "runtime bundle validation failed: missing bundle manifest.json" + fi + + if ! manifest_validation="$(validate_manifest "$bundle_root" "$manifest_path" "$arch")"; then + case "$manifest_validation" in + malformed_manifest) + fail "runtime bundle validation failed: malformed manifest.json" + ;; + missing_field:*) + fail "runtime bundle validation failed: missing required manifest field: ${manifest_validation#missing_field:}" + ;; + required_manifest_path_missing:*) + fail "runtime bundle validation failed: required runtime asset missing from manifest.json: ${manifest_validation#required_manifest_path_missing:}" + ;; + invalid_field:*) + fail "runtime bundle validation failed: malformed manifest.json" + ;; + arch_mismatch:*) + fail "runtime bundle validation failed: bundle architecture mismatch: expected ${arch}, got ${manifest_validation#arch_mismatch:}" + ;; + checksum_mismatch:*) + fail "runtime bundle validation failed: checksum mismatch: ${manifest_validation#checksum_mismatch:}" + ;; + size_mismatch:*) + fail "runtime bundle validation failed: size mismatch: ${manifest_validation#size_mismatch:}" + ;; + missing_payload:*) + fail "runtime bundle validation failed: missing manifest-listed payload path: ${manifest_validation#missing_payload:}" + ;; + invalid_path:*) + fail "runtime bundle validation failed: invalid manifest-listed payload path: ${manifest_validation#invalid_path:}" + ;; + unlisted_payload:*) + fail "runtime bundle validation failed: unlisted payload path present on disk: ${manifest_validation#unlisted_payload:}" + ;; + *) + fail "runtime bundle validation failed: malformed manifest.json" + ;; + esac + fi + + require_path "$bundle_root" "usr/bin/nvidia-cdi-hook" + require_path "$bundle_root" "usr/bin/nvidia-container-runtime" + require_path "$bundle_root" "usr/bin/nvidia-container-runtime-hook" + require_path "$bundle_root" "usr/bin/nvidia-container-cli" + require_path "$bundle_root" "usr/bin/nvidia-ctk" + require_regular_file "$bundle_root" "usr/bin/nvidia-cdi-hook" + require_regular_file "$bundle_root" "usr/bin/nvidia-container-runtime" + require_regular_file "$bundle_root" "usr/bin/nvidia-container-runtime-hook" + require_regular_file "$bundle_root" "usr/bin/nvidia-container-cli" + require_regular_file "$bundle_root" "usr/bin/nvidia-ctk" + require_path "$bundle_root" "etc/nvidia-container-runtime" + + if [[ ! -d "$bundle_root/etc/nvidia-container-runtime" ]]; then + fail "runtime bundle validation failed: required path is not a directory: etc/nvidia-container-runtime" + fi + + compgen -G "$bundle_root/usr/lib/${multiarch}/libnvidia-container*.so*" >/dev/null || \ + fail "runtime bundle validation failed: missing required library subtree: usr/lib/${multiarch}/libnvidia-container*.so*" + + staged_bundle_path="$stage_tmp_root/$bundle_name" + cp -a "$bundle_root" "$staged_bundle_path" + mv "$stage_tmp_root" "$stage_root" + + printf '%s\n' "$stage_root/$bundle_name" +} + IMAGE_TAG=${IMAGE_TAG:-dev} IMAGE_NAME="openshell/cluster" if [[ -n "${IMAGE_REGISTRY:-}" ]]; then @@ -25,6 +420,23 @@ CACHE_PATH="${DOCKER_BUILD_CACHE_DIR}/cluster" mkdir -p "${CACHE_PATH}" +if [[ -z "${OPENSHELL_RUNTIME_BUNDLE_TARBALL:-}" ]]; then + fail "missing required variable: OPENSHELL_RUNTIME_BUNDLE_TARBALL" +fi + +if [[ ! -f "${OPENSHELL_RUNTIME_BUNDLE_TARBALL}" ]]; then + fail "runtime bundle validation failed: tarball not found: ${OPENSHELL_RUNTIME_BUNDLE_TARBALL}" +fi + +TARGET_ARCH="$(target_arch)" +TARGET_MULTIARCH="$(target_multiarch "$TARGET_ARCH")" +STAGED_RUNTIME_BUNDLE="$(stage_runtime_bundle "${OPENSHELL_RUNTIME_BUNDLE_TARBALL}" "$TARGET_ARCH" "$TARGET_MULTIARCH")" + +if [[ "${OPENSHELL_RUNTIME_BUNDLE_VERIFY_ONLY:-}" == "1" ]]; then + printf 'Runtime bundle staged at %s\n' "$STAGED_RUNTIME_BUNDLE" + exit 0 +fi + # Select builder — prefer native "docker" driver for local single-arch builds # to avoid slow tarball export from the docker-container driver. BUILDER_ARGS=() @@ -49,6 +461,13 @@ fi # Create build directory for charts mkdir -p deploy/docker/.build/charts +# Runtime-bundle handoff: +# - this script is the required acquisition/verification point before +# any Helm packaging or `docker buildx build` +# - it validates manifest metadata plus manifest-authoritative payload checks, +# then stages the extracted install-root tree in `deploy/docker/.build/` +# - `deploy/docker/Dockerfile.cluster` consumes only that staged local payload + # Package helm chart echo "Packaging helm chart..." helm package deploy/helm/openshell -d deploy/docker/.build/charts/ @@ -57,6 +476,11 @@ helm package deploy/helm/openshell -d deploy/docker/.build/charts/ # from the distribution registry; credentials are injected at deploy time) echo "Building cluster image..." +SCCACHE_ARGS=() +if [[ -n "${SCCACHE_MEMCACHED_ENDPOINT:-}" ]]; then + SCCACHE_ARGS=(--build-arg "SCCACHE_MEMCACHED_ENDPOINT=${SCCACHE_MEMCACHED_ENDPOINT}") +fi + OUTPUT_FLAG="--load" if [[ "${DOCKER_PUSH:-}" == "1" ]]; then OUTPUT_FLAG="--push" @@ -76,11 +500,18 @@ else fi fi +LOCK_HASH=$(sha256_16 Cargo.lock) +RUST_SCOPE=${RUST_TOOLCHAIN_SCOPE:-$(detect_rust_scope "deploy/docker/Dockerfile.cluster")} +CACHE_SCOPE_INPUT="v1|cluster|base|${LOCK_HASH}|${RUST_SCOPE}" +CARGO_TARGET_CACHE_SCOPE=$(printf '%s' "${CACHE_SCOPE_INPUT}" | sha256_16_stdin) + docker buildx build \ ${BUILDER_ARGS[@]+"${BUILDER_ARGS[@]}"} \ ${DOCKER_PLATFORM:+--platform ${DOCKER_PLATFORM}} \ ${CACHE_ARGS[@]+"${CACHE_ARGS[@]}"} \ + ${SCCACHE_ARGS[@]+"${SCCACHE_ARGS[@]}"} \ ${VERSION_ARGS[@]+"${VERSION_ARGS[@]}"} \ + --build-arg "CARGO_TARGET_CACHE_SCOPE=${CARGO_TARGET_CACHE_SCOPE}" \ -f deploy/docker/Dockerfile.cluster \ -t ${IMAGE_NAME}:${IMAGE_TAG} \ ${K3S_VERSION:+--build-arg K3S_VERSION=${K3S_VERSION}} \ diff --git a/tasks/scripts/docker-publish-multiarch.sh b/tasks/scripts/docker-publish-multiarch.sh index 7bb6dc84..ef28e26b 100755 --- a/tasks/scripts/docker-publish-multiarch.sh +++ b/tasks/scripts/docker-publish-multiarch.sh @@ -26,6 +26,32 @@ # AWS_REGION - AWS region (default: us-west-2) set -euo pipefail +runtime_bundle_var_name() { + local arch="$1" + local upper_arch + upper_arch=$(printf '%s' "$arch" | tr '[:lower:]' '[:upper:]') + printf 'OPENSHELL_RUNTIME_BUNDLE_TARBALL_%s\n' "$upper_arch" +} + +require_runtime_bundle_tarball_for_arch() { + local arch="$1" + local var_name value + var_name="$(runtime_bundle_var_name "$arch")" + value="${!var_name:-}" + + if [[ -z "$value" ]]; then + echo "missing required variable: ${var_name}" >&2 + exit 1 + fi + + if [[ ! -f "$value" ]]; then + echo "runtime bundle validation failed: tarball not found: ${value}" >&2 + exit 1 + fi + + printf '%s\n' "$value" +} + sha256_16() { if command -v sha256sum >/dev/null 2>&1; then sha256sum "$1" | awk '{print substr($1, 1, 16)}' @@ -193,16 +219,40 @@ if [ -n "${SCCACHE_MEMCACHED_ENDPOINT:-}" ]; then CLUSTER_BUILD_ARGS="--build-arg SCCACHE_MEMCACHED_ENDPOINT=${SCCACHE_MEMCACHED_ENDPOINT}" fi CLUSTER_IMAGE="${REGISTRY}/${IMAGE_PREFIX:+${IMAGE_PREFIX}}cluster" -docker buildx build \ - --platform "${PLATFORMS}" \ - -f "${CLUSTER_DOCKERFILE}" \ +IFS=',' read -r -a PLATFORM_LIST <<< "${PLATFORMS}" +CLUSTER_PLATFORM_TAGS=() + +for platform in "${PLATFORM_LIST[@]}"; do + case "$platform" in + linux/amd64) + arch="amd64" + ;; + linux/arm64) + arch="arm64" + ;; + *) + echo "Unsupported cluster platform for runtime bundle publishing: ${platform}" >&2 + exit 1 + ;; + esac + + runtime_bundle_tarball="$(require_runtime_bundle_tarball_for_arch "$arch")" + arch_tag="${IMAGE_TAG}-${arch}" + CLUSTER_PLATFORM_TAGS+=("${CLUSTER_IMAGE}:${arch_tag}") + + IMAGE_TAG="${arch_tag}" \ + DOCKER_PLATFORM="$platform" \ + DOCKER_PUSH=1 \ + OPENSHELL_RUNTIME_BUNDLE_TARBALL="$runtime_bundle_tarball" \ + OPENSHELL_CARGO_VERSION="$CARGO_VERSION" \ + K3S_VERSION="${K3S_VERSION:-}" \ + tasks/scripts/docker-build-cluster.sh +done + +docker buildx imagetools create \ + --prefer-index=false \ -t "${CLUSTER_IMAGE}:${IMAGE_TAG}" \ - ${K3S_VERSION:+--build-arg K3S_VERSION=${K3S_VERSION}} \ - --build-arg "CARGO_TARGET_CACHE_SCOPE=${CLUSTER_CARGO_SCOPE}" \ - ${CLUSTER_BUILD_ARGS} \ - ${EXTRA_BUILD_FLAGS} \ - --push \ - . + "${CLUSTER_PLATFORM_TAGS[@]}" # --------------------------------------------------------------------------- # Step 4: Apply additional tags by copying manifests. diff --git a/tasks/scripts/download-runtime-bundle.sh b/tasks/scripts/download-runtime-bundle.sh new file mode 100644 index 00000000..27460bb4 --- /dev/null +++ b/tasks/scripts/download-runtime-bundle.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +ARCH="" +URL="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --arch) + ARCH="$2" + shift 2 + ;; + --url) + URL="$2" + shift 2 + ;; + *) + echo "Unknown argument: $1" >&2 + exit 1 + ;; + esac +done + +if [[ -z "$ARCH" ]]; then + echo "missing required argument: --arch" >&2 + exit 1 +fi + +if [[ -z "$URL" ]]; then + echo "missing required argument: --url" >&2 + exit 1 +fi + +CACHE_DIR="deploy/docker/.build/runtime-bundles" +mkdir -p "$CACHE_DIR" + +filename="$(basename "$URL")" +if [[ -z "$filename" || "$filename" == "/" || "$filename" == "." ]]; then + filename="runtime-bundle-${ARCH}.tar.gz" +fi + +target_path="$CACHE_DIR/${ARCH}-${filename}" + +if [[ ! -f "$target_path" ]]; then + curl --fail --location --silent --show-error --output "$target_path" "$URL" +fi + +printf '%s\n' "$(pwd)/$target_path" diff --git a/tasks/tests/runtime-bundle-caller-paths.bats b/tasks/tests/runtime-bundle-caller-paths.bats new file mode 100644 index 00000000..81c9d88b --- /dev/null +++ b/tasks/tests/runtime-bundle-caller-paths.bats @@ -0,0 +1,238 @@ +#!/usr/bin/env bats + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +setup() { + export TEST_TMPDIR + TEST_TMPDIR="$(mktemp -d)" + export FAKE_BIN_DIR="$TEST_TMPDIR/bin" + export FAKE_BOOTSTRAP_LOG="$TEST_TMPDIR/bootstrap.log" + export FAKE_OPENSHELL_LOG="$TEST_TMPDIR/openshell.log" + export FAKE_RSYNC_LOG="$TEST_TMPDIR/rsync.log" + export FAKE_SSH_LOG="$TEST_TMPDIR/ssh.log" + export FAKE_SSH_STDIN_DIR="$TEST_TMPDIR/ssh-stdin" + export FAKE_DOCKER_LOG="$TEST_TMPDIR/docker.log" + export FAKE_HELM_LOG="$TEST_TMPDIR/helm.log" + export FAKE_CLUSTER_BUILD_LOG="$TEST_TMPDIR/cluster-build.log" + mkdir -p "$FAKE_BIN_DIR" "$FAKE_SSH_STDIN_DIR" + + cat > "$FAKE_BIN_DIR/openshell" <<'EOF' +#!/usr/bin/env bash +set -euo pipefail +printf '%s\n' "$*" >> "$FAKE_OPENSHELL_LOG" +EOF + chmod +x "$FAKE_BIN_DIR/openshell" + + cat > "$FAKE_BIN_DIR/ssh" <<'EOF' +#!/usr/bin/env bash +set -euo pipefail +printf '%s\n' "$*" >> "$FAKE_SSH_LOG" +count_file="$FAKE_SSH_STDIN_DIR/count" +count=0 +if [[ -f "$count_file" ]]; then + count="$(cat "$count_file")" +fi +count=$((count + 1)) +printf '%s' "$count" > "$count_file" +stdin_path="$FAKE_SSH_STDIN_DIR/$count.stdin" +cat > "$stdin_path" || true +EOF + chmod +x "$FAKE_BIN_DIR/ssh" + + cat > "$FAKE_BIN_DIR/rsync" <<'EOF' +#!/usr/bin/env bash +set -euo pipefail +printf '%s\n' "$*" >> "$FAKE_RSYNC_LOG" +EOF + chmod +x "$FAKE_BIN_DIR/rsync" + + cat > "$FAKE_BIN_DIR/docker" <<'EOF' +#!/usr/bin/env bash +set -euo pipefail +printf '%s\n' "$*" >> "$FAKE_DOCKER_LOG" +if [[ "${1:-}" == "buildx" && "${2:-}" == "inspect" ]]; then + exit 0 +fi +EOF + chmod +x "$FAKE_BIN_DIR/docker" + + cat > "$FAKE_BIN_DIR/helm" <<'EOF' +#!/usr/bin/env bash +set -euo pipefail +printf '%s\n' "$*" >> "$FAKE_HELM_LOG" +output_dir="" +while [[ $# -gt 0 ]]; do + case "$1" in + -d) + output_dir="$2" + shift 2 + ;; + *) + shift + ;; + esac +done +if [[ -n "$output_dir" ]]; then + mkdir -p "$output_dir" + : > "$output_dir/openshell-0.0.0.tgz" +fi +EOF + chmod +x "$FAKE_BIN_DIR/helm" + + cat > "$FAKE_BIN_DIR/uv" <<'EOF' +#!/usr/bin/env bash +set -euo pipefail +if [[ "${1:-}" == "run" ]]; then + printf '0.0.0-test\n' + exit 0 +fi +exit 1 +EOF + chmod +x "$FAKE_BIN_DIR/uv" +} + +teardown() { + rm -rf "$TEST_TMPDIR" +} + +make_bootstrap_harness() { + local harness_root="$TEST_TMPDIR/bootstrap-harness" + mkdir -p \ + "$harness_root/tasks/scripts" \ + "$harness_root/deploy/helm/openshell" \ + "$harness_root/deploy/docker" + cp "tasks/scripts/cluster-bootstrap.sh" "$harness_root/tasks/scripts/cluster-bootstrap.sh" + cp "tasks/scripts/docker-build-cluster.sh" "$harness_root/tasks/scripts/docker-build-cluster.sh" + + printf 'FROM --platform=$BUILDPLATFORM rust:1.86\n' > "$harness_root/deploy/docker/Dockerfile.cluster" + + cat > "$harness_root/tasks/scripts/cluster-push-component.sh" <<'EOF' +#!/usr/bin/env bash +set -euo pipefail +printf '%s\n' "$*" >> "$FAKE_BOOTSTRAP_LOG" +EOF + chmod +x "$harness_root/tasks/scripts/cluster-push-component.sh" + + printf '%s\n' "$harness_root" +} + +make_remote_deploy_harness() { + local harness_root="$TEST_TMPDIR/remote-deploy-harness" + mkdir -p "$harness_root/scripts" + cp "scripts/remote-deploy.sh" "$harness_root/scripts/remote-deploy.sh" + printf '%s\n' "$harness_root" +} + +make_multiarch_harness() { + local harness_root="$TEST_TMPDIR/multiarch-harness" + mkdir -p \ + "$harness_root/tasks/scripts" \ + "$harness_root/deploy/docker" \ + "$harness_root/deploy/helm/openshell" + + cp "tasks/scripts/docker-publish-multiarch.sh" "$harness_root/tasks/scripts/docker-publish-multiarch.sh" + + cat > "$harness_root/tasks/scripts/docker-build-cluster.sh" <<'EOF' +#!/usr/bin/env bash +set -euo pipefail +printf '%s|%s|%s\n' "${DOCKER_PLATFORM:-}" "${OPENSHELL_RUNTIME_BUNDLE_TARBALL:-}" "${IMAGE_TAG:-}" >> "$FAKE_CLUSTER_BUILD_LOG" +EOF + chmod +x "$harness_root/tasks/scripts/docker-build-cluster.sh" + + printf '[[package]]\nname = "openshell"\nversion = "0.0.0"\n' > "$harness_root/Cargo.lock" + printf 'FROM --platform=$BUILDPLATFORM rust:1.86\n' > "$harness_root/deploy/docker/Dockerfile.gateway" + printf 'FROM --platform=$BUILDPLATFORM rust:1.86\n' > "$harness_root/deploy/docker/Dockerfile.cluster" + + printf '%s\n' "$harness_root" +} + +@test "cluster-bootstrap fails before build orchestration when the runtime bundle tarball is missing" { + local harness_root + harness_root="$(make_bootstrap_harness)" + + run env \ + PATH="$FAKE_BIN_DIR:$PATH" \ + HOME="$TEST_TMPDIR/home" \ + IMAGE_REPO_BASE=registry.example/openshell \ + bash -lc "cd '$harness_root' && bash tasks/scripts/cluster-bootstrap.sh build" + + [ "$status" -ne 0 ] + [[ "$output" == *"missing required variable: OPENSHELL_RUNTIME_BUNDLE_TARBALL"* ]] + [ ! -s "$FAKE_BOOTSTRAP_LOG" ] + [ ! -s "$FAKE_OPENSHELL_LOG" ] +} + +@test "cluster-bootstrap allows skip-build flows without a runtime bundle tarball" { + local harness_root + harness_root="$(make_bootstrap_harness)" + + run env \ + PATH="$FAKE_BIN_DIR:$PATH" \ + HOME="$TEST_TMPDIR/home" \ + IMAGE_REPO_BASE=registry.example/openshell \ + SKIP_IMAGE_PUSH=1 \ + SKIP_CLUSTER_IMAGE_BUILD=1 \ + OPENSHELL_CLUSTER_IMAGE=registry.example/openshell/cluster:test \ + bash -lc "cd '$harness_root' && bash tasks/scripts/cluster-bootstrap.sh build" + + [ "$status" -eq 0 ] + [ ! -s "$FAKE_BOOTSTRAP_LOG" ] + [[ "$(<"$FAKE_OPENSHELL_LOG")" == *"gateway start --name bootstrap-harness --port 8080"* ]] +} + +@test "remote-deploy syncs the runtime bundle tarball and exports its remote path for the remote cluster build" { + local harness_root runtime_tarball remote_tarball + harness_root="$(make_remote_deploy_harness)" + runtime_tarball="$TEST_TMPDIR/runtime-bundle-amd64.tar.gz" + remote_tarball="openshell/.cache/runtime-bundles/$(basename "$runtime_tarball")" + : > "$runtime_tarball" + + run env \ + PATH="$FAKE_BIN_DIR:$PATH" \ + HOME="$TEST_TMPDIR/home" \ + bash -lc "cd '$harness_root' && bash scripts/remote-deploy.sh devbox --runtime-bundle-tarball '$runtime_tarball'" + + [ "$status" -eq 0 ] + [[ "$(<"$FAKE_RSYNC_LOG")" == *"$runtime_tarball devbox:$remote_tarball"* ]] + [[ "$(<"$FAKE_SSH_LOG")" == *"$remote_tarball"* ]] + grep -Fq 'export OPENSHELL_RUNTIME_BUNDLE_TARBALL="${REMOTE_RUNTIME_BUNDLE_PATH}"' "$FAKE_SSH_STDIN_DIR"/*.stdin +} + +@test "remote-deploy skip-sync requires an explicit remote runtime bundle tarball path" { + local harness_root remote_tarball + harness_root="$(make_remote_deploy_harness)" + remote_tarball="/srv/openshell/runtime-bundles/runtime-bundle-amd64.tar.gz" + + run env \ + PATH="$FAKE_BIN_DIR:$PATH" \ + HOME="$TEST_TMPDIR/home" \ + bash -lc "cd '$harness_root' && bash scripts/remote-deploy.sh devbox --skip-sync --remote-runtime-bundle-tarball '$remote_tarball'" + + [ "$status" -eq 0 ] + [ ! -s "$FAKE_RSYNC_LOG" ] + [[ "$(<"$FAKE_SSH_LOG")" == *"$remote_tarball"* ]] + grep -Fq 'export OPENSHELL_RUNTIME_BUNDLE_TARBALL="${REMOTE_RUNTIME_BUNDLE_PATH}"' "$FAKE_SSH_STDIN_DIR"/*.stdin +} + +@test "docker-publish-multiarch builds cluster images per arch with matching runtime bundles" { + local harness_root amd64_bundle arm64_bundle cluster_log + harness_root="$(make_multiarch_harness)" + amd64_bundle="$TEST_TMPDIR/runtime-bundle-amd64.tar.gz" + arm64_bundle="$TEST_TMPDIR/runtime-bundle-arm64.tar.gz" + : > "$amd64_bundle" + : > "$arm64_bundle" + + run env \ + PATH="$FAKE_BIN_DIR:$PATH" \ + DOCKER_REGISTRY=registry.example/openshell \ + OPENSHELL_RUNTIME_BUNDLE_TARBALL_AMD64="$amd64_bundle" \ + OPENSHELL_RUNTIME_BUNDLE_TARBALL_ARM64="$arm64_bundle" \ + bash -lc "cd '$harness_root' && bash tasks/scripts/docker-publish-multiarch.sh --mode registry" + + [ "$status" -eq 0 ] + cluster_log="$(<"$FAKE_CLUSTER_BUILD_LOG")" + [[ "$cluster_log" == *"linux/amd64|$amd64_bundle|dev-amd64"* ]] + [[ "$cluster_log" == *"linux/arm64|$arm64_bundle|dev-arm64"* ]] + [[ "$(<"$FAKE_DOCKER_LOG")" == *"imagetools create --prefer-index=false -t registry.example/openshell/openshell-cluster:dev registry.example/openshell/openshell-cluster:dev-amd64 registry.example/openshell/openshell-cluster:dev-arm64"* ]] +} diff --git a/tasks/tests/runtime-bundle-ci-workflow.bats b/tasks/tests/runtime-bundle-ci-workflow.bats new file mode 100644 index 00000000..cce29282 --- /dev/null +++ b/tasks/tests/runtime-bundle-ci-workflow.bats @@ -0,0 +1,116 @@ +#!/usr/bin/env bats + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +setup() { + export TEST_TMPDIR + TEST_TMPDIR="$(mktemp -d)" + export FAKE_BIN_DIR="$TEST_TMPDIR/bin" + export FAKE_CURL_LOG="$TEST_TMPDIR/curl.log" + export FAKE_MISE_LOG="$TEST_TMPDIR/mise.log" + mkdir -p "$FAKE_BIN_DIR" + + cat > "$FAKE_BIN_DIR/curl" <<'EOF' +#!/usr/bin/env bash +set -euo pipefail +printf '%s\n' "$*" >> "$FAKE_CURL_LOG" +output="" +url="" +while [[ $# -gt 0 ]]; do + case "$1" in + -o|--output) + output="$2" + shift 2 + ;; + http://*|https://*) + url="$1" + shift + ;; + *) + shift + ;; + esac +done +if [[ -z "$output" ]]; then + echo "missing output path" >&2 + exit 1 +fi +printf 'downloaded from %s\n' "$url" > "$output" +EOF + chmod +x "$FAKE_BIN_DIR/curl" + + cat > "$FAKE_BIN_DIR/mise" <<'EOF' +#!/usr/bin/env bash +set -euo pipefail +printf '%s|%s|%s|%s|%s|%s\n' "$*" "${OPENSHELL_RUNTIME_BUNDLE_TARBALL:-}" "${OPENSHELL_RUNTIME_BUNDLE_TARBALL_AMD64:-}" "${OPENSHELL_RUNTIME_BUNDLE_TARBALL_ARM64:-}" "${DOCKER_PLATFORM:-}" "${DOCKER_REGISTRY:-}" >> "$FAKE_MISE_LOG" +EOF + chmod +x "$FAKE_BIN_DIR/mise" +} + +teardown() { + rm -rf "$TEST_TMPDIR" +} + +make_ci_harness() { + local harness_root="$TEST_TMPDIR/ci-harness" + mkdir -p "$harness_root/tasks/scripts" + cp "tasks/scripts/download-runtime-bundle.sh" "$harness_root/tasks/scripts/download-runtime-bundle.sh" 2>/dev/null || true + cp "tasks/scripts/ci-build-cluster-image.sh" "$harness_root/tasks/scripts/ci-build-cluster-image.sh" 2>/dev/null || true + printf '%s\n' "$harness_root" +} + +@test "download-runtime-bundle.sh downloads a runtime bundle into the build cache and reuses it on repeat" { + local harness_root output_path first_contents second_contents + harness_root="$(make_ci_harness)" + + run env \ + PATH="$FAKE_BIN_DIR:$PATH" \ + bash -lc "cd '$harness_root' && bash tasks/scripts/download-runtime-bundle.sh --arch amd64 --url https://example.com/runtime-bundle-amd64.tar.gz" + + [ "$status" -eq 0 ] + output_path="$output" + [ -f "$output_path" ] + first_contents="$(<"$output_path")" + [[ "$first_contents" == *"downloaded from https://example.com/runtime-bundle-amd64.tar.gz"* ]] + [[ "$(wc -l < "$FAKE_CURL_LOG")" -eq 1 ]] + + run env \ + PATH="$FAKE_BIN_DIR:$PATH" \ + bash -lc "cd '$harness_root' && bash tasks/scripts/download-runtime-bundle.sh --arch amd64 --url https://example.com/runtime-bundle-amd64.tar.gz" + + [ "$status" -eq 0 ] + [ "$output" = "$output_path" ] + second_contents="$(<"$output_path")" + [ "$second_contents" = "$first_contents" ] + [[ "$(wc -l < "$FAKE_CURL_LOG")" -eq 1 ]] +} + +@test "ci-build-cluster-image.sh routes single-arch cluster builds through docker:build:cluster with a downloaded bundle" { + local harness_root + harness_root="$(make_ci_harness)" + + run env \ + PATH="$FAKE_BIN_DIR:$PATH" \ + bash -lc "cd '$harness_root' && bash tasks/scripts/ci-build-cluster-image.sh --platform linux/arm64 --runtime-bundle-url https://example.com/runtime-bundle-arm64.tar.gz" + + [ "$status" -eq 0 ] + [[ "$(<"$FAKE_MISE_LOG")" == *"run --no-prepare docker:build:cluster"* ]] + [[ "$(<"$FAKE_MISE_LOG")" == *"runtime-bundle-arm64.tar.gz"* ]] +} + +@test "ci-build-cluster-image.sh routes multi-arch cluster builds through docker:build:cluster:multiarch with per-arch bundles" { + local harness_root + harness_root="$(make_ci_harness)" + + run env \ + PATH="$FAKE_BIN_DIR:$PATH" \ + IMAGE_REGISTRY=ghcr.io/nvidia/openshell \ + bash -lc "cd '$harness_root' && bash tasks/scripts/ci-build-cluster-image.sh --platform linux/amd64,linux/arm64 --runtime-bundle-url-amd64 https://example.com/runtime-bundle-amd64.tar.gz --runtime-bundle-url-arm64 https://example.com/runtime-bundle-arm64.tar.gz" + + [ "$status" -eq 0 ] + [[ "$(<"$FAKE_MISE_LOG")" == *"run --no-prepare docker:build:cluster:multiarch"* ]] + [[ "$(<"$FAKE_MISE_LOG")" == *"runtime-bundle-amd64.tar.gz"* ]] + [[ "$(<"$FAKE_MISE_LOG")" == *"runtime-bundle-arm64.tar.gz"* ]] + [[ "$(<"$FAKE_MISE_LOG")" == *"|ghcr.io/nvidia/openshell" ]] +} diff --git a/tasks/tests/runtime-bundle-consumer.bats b/tasks/tests/runtime-bundle-consumer.bats new file mode 100644 index 00000000..179d8283 --- /dev/null +++ b/tasks/tests/runtime-bundle-consumer.bats @@ -0,0 +1,906 @@ +#!/usr/bin/env bats + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +setup() { + export TEST_TMPDIR + TEST_TMPDIR="$(mktemp -d)" + export FAKE_BIN_DIR="$TEST_TMPDIR/bin" + export FAKE_DOCKER_LOG="$TEST_TMPDIR/docker.log" + export FAKE_HELM_LOG="$TEST_TMPDIR/helm.log" + mkdir -p "$FAKE_BIN_DIR" + + cat > "$FAKE_BIN_DIR/docker" <<'EOF' +#!/usr/bin/env bash +set -euo pipefail +printf '%s\n' "$*" >> "$FAKE_DOCKER_LOG" +if [[ "${1:-}" == "buildx" && "${2:-}" == "build" ]]; then + if [[ "${ASSERT_RUNTIME_BUNDLE_STAGE_ON_BUILD:-0}" == "1" ]]; then + staged_root="deploy/docker/.build/runtime-bundle/${ASSERT_RUNTIME_BUNDLE_STAGE_ARCH:-amd64}" + if [[ ! -d "$staged_root" ]]; then + printf 'missing staged runtime bundle root: %s\n' "$staged_root" >&2 + exit 19 + fi + if ! compgen -G "$staged_root/*/usr/bin/nvidia-container-cli" >/dev/null; then + printf 'missing staged runtime bundle payload under: %s\n' "$staged_root" >&2 + exit 20 + fi + fi + exit 0 +fi +exit 0 +EOF + chmod +x "$FAKE_BIN_DIR/docker" + + cat > "$FAKE_BIN_DIR/helm" <<'EOF' +#!/usr/bin/env bash +set -euo pipefail +printf '%s\n' "$*" >> "$FAKE_HELM_LOG" +output_dir="" +while [[ $# -gt 0 ]]; do + case "$1" in + -d) + output_dir="$2" + shift 2 + ;; + *) + shift + ;; + esac +done +if [[ -n "$output_dir" ]]; then + mkdir -p "$output_dir" + : > "$output_dir/openshell-0.0.0.tgz" +fi +EOF + chmod +x "$FAKE_BIN_DIR/helm" + + rm -rf deploy/docker/.build/runtime-bundle +} + +teardown() { + rm -rf deploy/docker/.build/runtime-bundle + rm -rf "$TEST_TMPDIR" +} + +run_cluster_build() { + run env \ + PATH="$FAKE_BIN_DIR:$PATH" \ + OPENSHELL_CARGO_VERSION=0.0.0-test \ + "$@" \ + bash tasks/scripts/docker-build-cluster.sh +} + +assert_no_docker_buildx_build() { + if [[ -f "$FAKE_DOCKER_LOG" ]]; then + ! grep -Fq "buildx build" "$FAKE_DOCKER_LOG" + fi +} + +assert_no_docker_commands() { + if [[ -f "$FAKE_DOCKER_LOG" ]]; then + [ ! -s "$FAKE_DOCKER_LOG" ] + fi +} + +assert_no_helm_commands() { + if [[ -f "$FAKE_HELM_LOG" ]]; then + [ ! -s "$FAKE_HELM_LOG" ] + fi +} + +assert_runtime_bundle_not_staged() { + [ ! -e deploy/docker/.build/runtime-bundle ] +} + +assert_runtime_bundle_arch_not_staged() { + local arch="$1" + [ ! -e "deploy/docker/.build/runtime-bundle/$arch" ] +} + +seed_stale_runtime_bundle_stage() { + local arch="$1" + local staged_root="deploy/docker/.build/runtime-bundle/$arch" + mkdir -p "$staged_root" + printf 'stale\n' > "$staged_root/stale.txt" +} + +create_runtime_bundle_tarball() { + local tarball_path="$1" + local bundle_arch="${2:-amd64}" + local bundle_dir_name="openshell-gpu-runtime-bundle_0.1.0_${bundle_arch}" + local bundle_root="$TEST_TMPDIR/${bundle_dir_name}" + local multiarch="x86_64-linux-gnu" + local manifest_arch="$bundle_arch" + + if [[ -n "${3:-}" ]]; then + manifest_arch="$3" + fi + + if [[ "$bundle_arch" == "arm64" ]]; then + multiarch="aarch64-linux-gnu" + fi + + mkdir -p \ + "$bundle_root/usr/bin" \ + "$bundle_root/etc/nvidia-container-runtime" \ + "$bundle_root/usr/lib/$multiarch" + + printf 'cdi-hook\n' > "$bundle_root/usr/bin/nvidia-cdi-hook" + printf 'runtime\n' > "$bundle_root/usr/bin/nvidia-container-runtime" + printf 'runtime-hook\n' > "$bundle_root/usr/bin/nvidia-container-runtime-hook" + printf 'container-cli\n' > "$bundle_root/usr/bin/nvidia-container-cli" + printf 'ctk\n' > "$bundle_root/usr/bin/nvidia-ctk" + printf 'config = true\n' > "$bundle_root/etc/nvidia-container-runtime/config.toml" + printf 'libnvidia-container\n' > "$bundle_root/usr/lib/$multiarch/libnvidia-container.so.1" + + local cdi_hook_sha runtime_sha runtime_hook_sha cli_sha ctk_sha config_sha lib_sha + cdi_hook_sha="$(sha256sum "$bundle_root/usr/bin/nvidia-cdi-hook" | cut -d ' ' -f 1)" + runtime_sha="$(sha256sum "$bundle_root/usr/bin/nvidia-container-runtime" | cut -d ' ' -f 1)" + runtime_hook_sha="$(sha256sum "$bundle_root/usr/bin/nvidia-container-runtime-hook" | cut -d ' ' -f 1)" + cli_sha="$(sha256sum "$bundle_root/usr/bin/nvidia-container-cli" | cut -d ' ' -f 1)" + ctk_sha="$(sha256sum "$bundle_root/usr/bin/nvidia-ctk" | cut -d ' ' -f 1)" + config_sha="$(sha256sum "$bundle_root/etc/nvidia-container-runtime/config.toml" | cut -d ' ' -f 1)" + lib_sha="$(sha256sum "$bundle_root/usr/lib/$multiarch/libnvidia-container.so.1" | cut -d ' ' -f 1)" + + cat > "$bundle_root/manifest.json" < "$bundle_root/usr/bin/nvidia-cdi-hook" + printf 'runtime\n' > "$bundle_root/usr/bin/nvidia-container-runtime" + printf 'runtime-hook\n' > "$bundle_root/usr/bin/nvidia-container-runtime-hook" + printf 'container-cli\n' > "$bundle_root/usr/bin/nvidia-container-cli" + printf 'ctk\n' > "$bundle_root/usr/bin/nvidia-ctk" + printf 'config = true\n' > "$bundle_root/etc/nvidia-container-runtime/config.toml" + printf 'libnvidia-container\n' > "$bundle_root/usr/lib/x86_64-linux-gnu/libnvidia-container.so.1" + + tar -czf "$tarball_path" -C "$TEST_TMPDIR" "$bundle_dir_name" +} + +create_runtime_bundle_tarball_with_missing_required_manifest_field() { + local tarball_path="$1" + + create_runtime_bundle_tarball "$tarball_path" + + python3 - "$tarball_path" <<'PY' +import json +import pathlib +import shutil +import subprocess +import sys +import tarfile +import tempfile + +tarball_path = pathlib.Path(sys.argv[1]) + +with tempfile.TemporaryDirectory() as temp_dir: + temp_path = pathlib.Path(temp_dir) + with tarfile.open(tarball_path, "r:gz") as archive: + archive.extractall(temp_path) + + bundle_root = next(temp_path.iterdir()) + manifest_path = bundle_root / "manifest.json" + manifest = json.loads(manifest_path.read_text(encoding="utf-8")) + del manifest["created_at"] + manifest_path.write_text(json.dumps(manifest, indent=2) + "\n", encoding="utf-8") + + subprocess.run(["tar", "-czf", str(tarball_path), "-C", str(temp_path), bundle_root.name], check=True) +PY +} + +create_runtime_bundle_tarball_with_checksum_mismatch() { + local tarball_path="$1" + + create_runtime_bundle_tarball "$tarball_path" + + python3 - "$tarball_path" <<'PY' +import json +import pathlib +import subprocess +import sys +import tarfile +import tempfile + +tarball_path = pathlib.Path(sys.argv[1]) + +with tempfile.TemporaryDirectory() as temp_dir: + temp_path = pathlib.Path(temp_dir) + with tarfile.open(tarball_path, "r:gz") as archive: + archive.extractall(temp_path) + + bundle_root = next(temp_path.iterdir()) + manifest_path = bundle_root / "manifest.json" + manifest = json.loads(manifest_path.read_text(encoding="utf-8")) + + for entry in manifest["files"]: + if entry.get("path") == "usr/bin/nvidia-container-cli": + entry["sha256"] = "0" * 64 + break + else: + raise AssertionError("missing nvidia-container-cli entry") + + manifest_path.write_text(json.dumps(manifest, indent=2) + "\n", encoding="utf-8") + subprocess.run(["tar", "-czf", str(tarball_path), "-C", str(temp_path), bundle_root.name], check=True) +PY +} + +create_runtime_bundle_tarball_with_missing_required_manifest_entry() { + local tarball_path="$1" + + create_runtime_bundle_tarball "$tarball_path" + + python3 - "$tarball_path" <<'PY' +import json +import pathlib +import subprocess +import sys +import tarfile +import tempfile + +tarball_path = pathlib.Path(sys.argv[1]) + +with tempfile.TemporaryDirectory() as temp_dir: + temp_path = pathlib.Path(temp_dir) + with tarfile.open(tarball_path, "r:gz") as archive: + archive.extractall(temp_path) + + bundle_root = next(temp_path.iterdir()) + manifest_path = bundle_root / "manifest.json" + manifest = json.loads(manifest_path.read_text(encoding="utf-8")) + manifest["files"] = [ + entry for entry in manifest["files"] + if entry.get("path") != "usr/bin/nvidia-container-cli" + ] + manifest_path.write_text(json.dumps(manifest, indent=2) + "\n", encoding="utf-8") + + subprocess.run(["tar", "-czf", str(tarball_path), "-C", str(temp_path), bundle_root.name], check=True) +PY +} + +create_runtime_bundle_tarball_with_extra_unlisted_file() { + local tarball_path="$1" + + create_runtime_bundle_tarball "$tarball_path" + + python3 - "$tarball_path" <<'PY' +import pathlib +import subprocess +import sys +import tarfile +import tempfile + +tarball_path = pathlib.Path(sys.argv[1]) + +with tempfile.TemporaryDirectory() as temp_dir: + temp_path = pathlib.Path(temp_dir) + with tarfile.open(tarball_path, "r:gz") as archive: + archive.extractall(temp_path) + + bundle_root = next(temp_path.iterdir()) + extra_path = bundle_root / "usr/bin/nvidia-container-extra" + extra_path.write_text("extra\n", encoding="utf-8") + + subprocess.run(["tar", "-czf", str(tarball_path), "-C", str(temp_path), bundle_root.name], check=True) +PY +} + +create_runtime_bundle_tarball_with_size_mismatch() { + local tarball_path="$1" + + create_runtime_bundle_tarball "$tarball_path" + + python3 - "$tarball_path" <<'PY' +import json +import pathlib +import subprocess +import sys +import tarfile +import tempfile + +tarball_path = pathlib.Path(sys.argv[1]) + +with tempfile.TemporaryDirectory() as temp_dir: + temp_path = pathlib.Path(temp_dir) + with tarfile.open(tarball_path, "r:gz") as archive: + archive.extractall(temp_path) + + bundle_root = next(temp_path.iterdir()) + manifest_path = bundle_root / "manifest.json" + manifest = json.loads(manifest_path.read_text(encoding="utf-8")) + + for entry in manifest["files"]: + if entry.get("path") == "usr/bin/nvidia-container-cli": + entry["size"] = entry["size"] + 1 + break + else: + raise AssertionError("missing nvidia-container-cli entry") + + manifest_path.write_text(json.dumps(manifest, indent=2) + "\n", encoding="utf-8") + subprocess.run(["tar", "-czf", str(tarball_path), "-C", str(temp_path), bundle_root.name], check=True) +PY +} + +create_runtime_bundle_tarball_with_invalid_manifest() { + local tarball_path="$1" + local bundle_dir_name="openshell-gpu-runtime-bundle_0.1.0_amd64" + local bundle_root="$TEST_TMPDIR/${bundle_dir_name}" + + mkdir -p \ + "$bundle_root/usr/bin" \ + "$bundle_root/etc/nvidia-container-runtime" \ + "$bundle_root/usr/lib/x86_64-linux-gnu" + + touch "$bundle_root/usr/bin/nvidia-cdi-hook" + touch "$bundle_root/usr/bin/nvidia-container-runtime" + touch "$bundle_root/usr/bin/nvidia-container-runtime-hook" + touch "$bundle_root/usr/bin/nvidia-container-cli" + touch "$bundle_root/usr/bin/nvidia-ctk" + touch "$bundle_root/etc/nvidia-container-runtime/config.toml" + touch "$bundle_root/usr/lib/x86_64-linux-gnu/libnvidia-container.so.1" + + printf '{ invalid json\n' > "$bundle_root/manifest.json" + + tar -czf "$tarball_path" -C "$TEST_TMPDIR" "$bundle_dir_name" +} + +create_runtime_bundle_tarball_with_unsafe_entry() { + local tarball_path="$1" + + python3 - "$tarball_path" <<'PY' +import io +import tarfile +import time +import sys + +tarball_path = sys.argv[1] + +with tarfile.open(tarball_path, "w:gz") as archive: + dir_info = tarfile.TarInfo("openshell-gpu-runtime-bundle_0.1.0_amd64") + dir_info.type = tarfile.DIRTYPE + dir_info.mode = 0o755 + dir_info.mtime = int(time.time()) + archive.addfile(dir_info) + + manifest = b'{"architecture":"amd64"}' + manifest_info = tarfile.TarInfo("openshell-gpu-runtime-bundle_0.1.0_amd64/manifest.json") + manifest_info.size = len(manifest) + manifest_info.mode = 0o644 + manifest_info.mtime = int(time.time()) + archive.addfile(manifest_info, io.BytesIO(manifest)) + + unsafe_data = b'escape\n' + unsafe_info = tarfile.TarInfo("../../outside.txt") + unsafe_info.size = len(unsafe_data) + unsafe_info.mode = 0o644 + unsafe_info.mtime = int(time.time()) + archive.addfile(unsafe_info, io.BytesIO(unsafe_data)) +PY +} + +create_runtime_bundle_tarball_with_tar_warning() { + local tarball_path="$1" + local bundle_dir_name="openshell-gpu-runtime-bundle_0.1.0_amd64" + local bundle_root="$TEST_TMPDIR/${bundle_dir_name}" + + mkdir -p "$bundle_root/usr/bin" "$bundle_root/etc/nvidia-container-runtime" "$bundle_root/usr/lib/x86_64-linux-gnu" + touch "$bundle_root/usr/bin/nvidia-cdi-hook" + touch "$bundle_root/usr/bin/nvidia-container-runtime" + touch "$bundle_root/usr/bin/nvidia-container-runtime-hook" + touch "$bundle_root/usr/bin/nvidia-container-cli" + touch "$bundle_root/usr/bin/nvidia-ctk" + touch "$bundle_root/etc/nvidia-container-runtime/config.toml" + touch "$bundle_root/usr/lib/x86_64-linux-gnu/libnvidia-container.so.1" + + cat > "$bundle_root/manifest.json" <<'EOF' +{ + "architecture": "amd64" +} +EOF + + tar -czf "$tarball_path" -C "$TEST_TMPDIR" "$bundle_dir_name" +} + +create_runtime_bundle_tarball_with_hidden_top_level_entry() { + local tarball_path="$1" + + create_runtime_bundle_tarball "$tarball_path" + printf 'hidden\n' > "$TEST_TMPDIR/.hidden-top-level" + tar -czf "$tarball_path" -C "$TEST_TMPDIR" ".hidden-top-level" "openshell-gpu-runtime-bundle_0.1.0_amd64" +} + +create_runtime_bundle_tarball_with_symlinked_required_binary() { + local tarball_path="$1" + + create_runtime_bundle_tarball "$tarball_path" + + python3 - "$tarball_path" <<'PY' +import hashlib +import json +import pathlib +import subprocess +import sys +import tarfile +import tempfile + +tarball_path = pathlib.Path(sys.argv[1]) + +with tempfile.TemporaryDirectory() as temp_dir: + temp_path = pathlib.Path(temp_dir) + with tarfile.open(tarball_path, "r:gz") as archive: + archive.extractall(temp_path) + + bundle_root = next(temp_path.iterdir()) + cli_path = bundle_root / "usr/bin/nvidia-container-cli" + cli_real_path = bundle_root / "usr/bin/nvidia-container-cli.real" + cli_real_path.write_bytes(cli_path.read_bytes()) + cli_path.unlink() + cli_path.symlink_to("nvidia-container-cli.real") + + manifest_path = bundle_root / "manifest.json" + manifest = json.loads(manifest_path.read_text(encoding="utf-8")) + for index, entry in enumerate(manifest["files"]): + if entry.get("path") == "usr/bin/nvidia-container-cli": + manifest["files"][index] = { + "path": "usr/bin/nvidia-container-cli", + "entry_type": "symlink", + "target": "nvidia-container-cli.real", + } + break + else: + raise AssertionError("missing nvidia-container-cli entry") + + real_digest = hashlib.sha256(cli_real_path.read_bytes()).hexdigest() + manifest["files"].append( + { + "path": "usr/bin/nvidia-container-cli.real", + "entry_type": "file", + "sha256": real_digest, + "size": cli_real_path.stat().st_size, + } + ) + + manifest_path.write_text(json.dumps(manifest, indent=2) + "\n", encoding="utf-8") + subprocess.run(["tar", "-czf", str(tarball_path), "-C", str(temp_path), bundle_root.name], check=True) +PY +} + +@test "docker-build-cluster requires a runtime bundle tarball before helm or docker build by default" { + run_cluster_build env -u OPENSHELL_RUNTIME_BUNDLE_TARBALL + + [ "$status" -ne 0 ] + [[ "$output" == *"missing required variable: OPENSHELL_RUNTIME_BUNDLE_TARBALL"* ]] + assert_no_docker_commands + assert_no_helm_commands + assert_runtime_bundle_not_staged + assert_no_docker_buildx_build +} + +@test "docker-build-cluster stages the runtime bundle before invoking docker buildx build by default" { + local runtime_tarball="$TEST_TMPDIR/runtime-bundle-build-default.tar.gz" + + create_runtime_bundle_tarball "$runtime_tarball" + + run_cluster_build \ + OPENSHELL_RUNTIME_BUNDLE_TARBALL="$runtime_tarball" \ + ASSERT_RUNTIME_BUNDLE_STAGE_ON_BUILD=1 \ + ASSERT_RUNTIME_BUNDLE_STAGE_ARCH=amd64 \ + DOCKER_PLATFORM=linux/amd64 + + [ "$status" -eq 0 ] + [[ "$output" == *"Packaging helm chart..."* ]] + [[ "$output" == *"Building cluster image..."* ]] + [ -s "$FAKE_HELM_LOG" ] + [ -s "$FAKE_DOCKER_LOG" ] + [[ "$(<"$FAKE_DOCKER_LOG")" == *"buildx build"* ]] + [ -d "deploy/docker/.build/runtime-bundle/amd64" ] +} + +@test "docker-build-cluster forwards cluster cache and sccache build args into docker buildx build" { + local runtime_tarball="$TEST_TMPDIR/runtime-bundle-build-cache-args.tar.gz" + + create_runtime_bundle_tarball "$runtime_tarball" + + run_cluster_build \ + OPENSHELL_RUNTIME_BUNDLE_TARBALL="$runtime_tarball" \ + DOCKER_PLATFORM=linux/amd64 \ + SCCACHE_MEMCACHED_ENDPOINT=memcached://cache.internal:11211 + + [ "$status" -eq 0 ] + [[ "$(<"$FAKE_DOCKER_LOG")" == *"--build-arg CARGO_TARGET_CACHE_SCOPE="* ]] + [[ "$(<"$FAKE_DOCKER_LOG")" == *"--build-arg SCCACHE_MEMCACHED_ENDPOINT=memcached://cache.internal:11211"* ]] +} + +@test "docker-build-cluster rejects malformed runtime bundle tarballs before helm or docker" { + local malformed_tarball="$TEST_TMPDIR/runtime-bundle-malformed.tar.gz" + mkdir -p "$TEST_TMPDIR/malformed" + : > "$TEST_TMPDIR/malformed/not-a-bundle.txt" + tar -czf "$malformed_tarball" -C "$TEST_TMPDIR/malformed" . + + run_cluster_build OPENSHELL_RUNTIME_BUNDLE_TARBALL="$malformed_tarball" + + [ "$status" -ne 0 ] + [[ "$output" == *"runtime bundle validation failed: expected a single top-level bundle directory"* ]] + assert_no_docker_commands + assert_no_helm_commands + assert_runtime_bundle_arch_not_staged amd64 + assert_no_docker_buildx_build +} + +@test "docker-build-cluster accepts current producer-shaped tarballs in script-only verification mode" { + local runtime_tarball="$TEST_TMPDIR/runtime-bundle-valid-producer-shape.tar.gz" + local staged_root="deploy/docker/.build/runtime-bundle/amd64" + local staged_bundle_dir="$staged_root/openshell-gpu-runtime-bundle_0.1.0_amd64" + + create_runtime_bundle_tarball "$runtime_tarball" + + run_cluster_build \ + OPENSHELL_RUNTIME_BUNDLE_TARBALL="$runtime_tarball" \ + OPENSHELL_RUNTIME_BUNDLE_VERIFY_ONLY=1 \ + DOCKER_PLATFORM=linux/amd64 + + [ "$status" -eq 0 ] + [[ "$output" == *"Runtime bundle staged at $staged_bundle_dir"* ]] + [ -d "$staged_bundle_dir" ] + [ -f "$staged_bundle_dir/manifest.json" ] + [ -f "$staged_bundle_dir/usr/bin/nvidia-container-cli" ] + assert_no_docker_commands + assert_no_helm_commands + assert_no_docker_buildx_build +} + +@test "docker-build-cluster rejects malformed manifest.json before helm or docker" { + local runtime_tarball="$TEST_TMPDIR/runtime-bundle-invalid-manifest.tar.gz" + local staged_root="deploy/docker/.build/runtime-bundle/amd64" + + create_runtime_bundle_tarball_with_invalid_manifest "$runtime_tarball" + seed_stale_runtime_bundle_stage amd64 + + run_cluster_build \ + OPENSHELL_RUNTIME_BUNDLE_TARBALL="$runtime_tarball" \ + OPENSHELL_RUNTIME_BUNDLE_VERIFY_ONLY=1 \ + DOCKER_PLATFORM=linux/amd64 + + [ "$status" -ne 0 ] + [[ "$output" == *"runtime bundle validation failed: malformed manifest.json"* ]] + assert_no_docker_commands + assert_no_helm_commands + [ ! -e "$staged_root" ] + assert_no_docker_buildx_build +} + +@test "docker-build-cluster rejects missing manifest.json before helm or docker" { + local runtime_tarball="$TEST_TMPDIR/runtime-bundle-missing-manifest.tar.gz" + local staged_root="deploy/docker/.build/runtime-bundle/amd64" + + create_runtime_bundle_tarball_without_manifest "$runtime_tarball" + seed_stale_runtime_bundle_stage amd64 + + run_cluster_build \ + OPENSHELL_RUNTIME_BUNDLE_TARBALL="$runtime_tarball" \ + OPENSHELL_RUNTIME_BUNDLE_VERIFY_ONLY=1 \ + DOCKER_PLATFORM=linux/amd64 + + [ "$status" -ne 0 ] + [[ "$output" == *"runtime bundle validation failed: missing bundle manifest.json"* ]] + assert_no_docker_commands + assert_no_helm_commands + [ ! -e "$staged_root" ] + assert_no_docker_buildx_build +} + +@test "docker-build-cluster rejects missing required manifest fields before helm or docker" { + local runtime_tarball="$TEST_TMPDIR/runtime-bundle-missing-manifest-field.tar.gz" + local staged_root="deploy/docker/.build/runtime-bundle/amd64" + + create_runtime_bundle_tarball_with_missing_required_manifest_field "$runtime_tarball" + seed_stale_runtime_bundle_stage amd64 + + run_cluster_build \ + OPENSHELL_RUNTIME_BUNDLE_TARBALL="$runtime_tarball" \ + OPENSHELL_RUNTIME_BUNDLE_VERIFY_ONLY=1 \ + DOCKER_PLATFORM=linux/amd64 + + [ "$status" -ne 0 ] + [[ "$output" == *"runtime bundle validation failed: missing required manifest field: created_at"* ]] + assert_no_docker_commands + assert_no_helm_commands + [ ! -e "$staged_root" ] + assert_no_docker_buildx_build +} + +@test "docker-build-cluster rejects checksum mismatches before helm or docker" { + local runtime_tarball="$TEST_TMPDIR/runtime-bundle-checksum-mismatch.tar.gz" + local staged_root="deploy/docker/.build/runtime-bundle/amd64" + + create_runtime_bundle_tarball_with_checksum_mismatch "$runtime_tarball" + seed_stale_runtime_bundle_stage amd64 + + run_cluster_build \ + OPENSHELL_RUNTIME_BUNDLE_TARBALL="$runtime_tarball" \ + OPENSHELL_RUNTIME_BUNDLE_VERIFY_ONLY=1 \ + DOCKER_PLATFORM=linux/amd64 + + [ "$status" -ne 0 ] + [[ "$output" == *"runtime bundle validation failed: checksum mismatch: usr/bin/nvidia-container-cli"* ]] + assert_no_docker_commands + assert_no_helm_commands + [ ! -e "$staged_root" ] + assert_no_docker_buildx_build +} + +@test "docker-build-cluster rejects required runtime assets omitted from manifest.json before helm or docker" { + local runtime_tarball="$TEST_TMPDIR/runtime-bundle-missing-required-manifest-entry.tar.gz" + local staged_root="deploy/docker/.build/runtime-bundle/amd64" + + create_runtime_bundle_tarball_with_missing_required_manifest_entry "$runtime_tarball" + seed_stale_runtime_bundle_stage amd64 + + run_cluster_build \ + OPENSHELL_RUNTIME_BUNDLE_TARBALL="$runtime_tarball" \ + OPENSHELL_RUNTIME_BUNDLE_VERIFY_ONLY=1 \ + DOCKER_PLATFORM=linux/amd64 + + [ "$status" -ne 0 ] + [[ "$output" == *"runtime bundle validation failed: required runtime asset missing from manifest.json: usr/bin/nvidia-container-cli"* ]] + assert_no_docker_commands + assert_no_helm_commands + [ ! -e "$staged_root" ] + assert_no_docker_buildx_build +} + +@test "docker-build-cluster rejects extra unlisted files before helm or docker" { + local runtime_tarball="$TEST_TMPDIR/runtime-bundle-extra-unlisted-file.tar.gz" + local staged_root="deploy/docker/.build/runtime-bundle/amd64" + + create_runtime_bundle_tarball_with_extra_unlisted_file "$runtime_tarball" + seed_stale_runtime_bundle_stage amd64 + + run_cluster_build \ + OPENSHELL_RUNTIME_BUNDLE_TARBALL="$runtime_tarball" \ + OPENSHELL_RUNTIME_BUNDLE_VERIFY_ONLY=1 \ + DOCKER_PLATFORM=linux/amd64 + + [ "$status" -ne 0 ] + [[ "$output" == *"runtime bundle validation failed: unlisted payload path present on disk: usr/bin/nvidia-container-extra"* ]] + assert_no_docker_commands + assert_no_helm_commands + [ ! -e "$staged_root" ] + assert_no_docker_buildx_build +} + +@test "docker-build-cluster rejects size mismatches before helm or docker" { + local runtime_tarball="$TEST_TMPDIR/runtime-bundle-size-mismatch.tar.gz" + local staged_root="deploy/docker/.build/runtime-bundle/amd64" + + create_runtime_bundle_tarball_with_size_mismatch "$runtime_tarball" + seed_stale_runtime_bundle_stage amd64 + + run_cluster_build \ + OPENSHELL_RUNTIME_BUNDLE_TARBALL="$runtime_tarball" \ + OPENSHELL_RUNTIME_BUNDLE_VERIFY_ONLY=1 \ + DOCKER_PLATFORM=linux/amd64 + + [ "$status" -ne 0 ] + [[ "$output" == *"runtime bundle validation failed: size mismatch: usr/bin/nvidia-container-cli"* ]] + assert_no_docker_commands + assert_no_helm_commands + [ ! -e "$staged_root" ] + assert_no_docker_buildx_build +} + +@test "docker-build-cluster rejects runtime bundle architecture mismatches before helm or docker" { + local runtime_tarball="$TEST_TMPDIR/runtime-bundle-arm64.tar.gz" + + create_runtime_bundle_tarball "$runtime_tarball" "arm64" + + run_cluster_build \ + OPENSHELL_RUNTIME_BUNDLE_TARBALL="$runtime_tarball" \ + OPENSHELL_RUNTIME_BUNDLE_VERIFY_ONLY=1 \ + DOCKER_PLATFORM=linux/amd64 + + [ "$status" -ne 0 ] + [[ "$output" == *"runtime bundle validation failed: bundle architecture mismatch: expected amd64, got arm64"* ]] + assert_no_docker_commands + assert_no_helm_commands + assert_runtime_bundle_arch_not_staged amd64 + assert_no_docker_buildx_build +} + +@test "docker-build-cluster rejects tar extraction warnings and clears stale staged content" { + local runtime_tarball="$TEST_TMPDIR/runtime-bundle-unsafe-entry.tar.gz" + local staged_root="deploy/docker/.build/runtime-bundle/amd64" + + create_runtime_bundle_tarball_with_unsafe_entry "$runtime_tarball" + seed_stale_runtime_bundle_stage amd64 + + run_cluster_build \ + OPENSHELL_RUNTIME_BUNDLE_TARBALL="$runtime_tarball" \ + OPENSHELL_RUNTIME_BUNDLE_VERIFY_ONLY=1 \ + DOCKER_PLATFORM=linux/amd64 + + [ "$status" -ne 0 ] + [[ "$output" == *"runtime bundle validation failed: tar extraction reported warnings or errors"* ]] + assert_no_docker_commands + assert_no_helm_commands + [ ! -e "$staged_root" ] + assert_no_docker_buildx_build +} + +@test "docker-build-cluster rejects tar extraction stderr even when tar exits successfully" { + local runtime_tarball="$TEST_TMPDIR/runtime-bundle-tar-warning.tar.gz" + local staged_root="deploy/docker/.build/runtime-bundle/amd64" + + create_runtime_bundle_tarball_with_tar_warning "$runtime_tarball" + + run_cluster_build \ + OPENSHELL_RUNTIME_BUNDLE_TARBALL="$runtime_tarball" \ + OPENSHELL_RUNTIME_BUNDLE_VERIFY_ONLY=1 \ + DOCKER_PLATFORM=linux/amd64 \ + TAR_STDERR_MESSAGE="tar: synthetic warning" + + [ "$status" -ne 0 ] + [[ "$output" == *"runtime bundle validation failed: tar extraction reported warnings or errors"* ]] + assert_no_docker_commands + assert_no_helm_commands + [ ! -e "$staged_root" ] + assert_no_docker_buildx_build +} + +@test "docker-build-cluster rejects hidden top-level tarball entries before helm or docker" { + local runtime_tarball="$TEST_TMPDIR/runtime-bundle-hidden-top-level.tar.gz" + + create_runtime_bundle_tarball_with_hidden_top_level_entry "$runtime_tarball" + + run_cluster_build \ + OPENSHELL_RUNTIME_BUNDLE_TARBALL="$runtime_tarball" \ + OPENSHELL_RUNTIME_BUNDLE_VERIFY_ONLY=1 \ + DOCKER_PLATFORM=linux/amd64 + + [ "$status" -ne 0 ] + [[ "$output" == *"runtime bundle validation failed: expected a single top-level bundle directory"* ]] + assert_no_docker_commands + assert_no_helm_commands + assert_runtime_bundle_arch_not_staged amd64 + assert_no_docker_buildx_build +} + +@test "docker-build-cluster rejects symlinked required binary payload paths before helm or docker" { + local runtime_tarball="$TEST_TMPDIR/runtime-bundle-symlinked-binary.tar.gz" + + create_runtime_bundle_tarball_with_symlinked_required_binary "$runtime_tarball" + + run_cluster_build \ + OPENSHELL_RUNTIME_BUNDLE_TARBALL="$runtime_tarball" \ + OPENSHELL_RUNTIME_BUNDLE_VERIFY_ONLY=1 \ + DOCKER_PLATFORM=linux/amd64 + + [ "$status" -ne 0 ] + [[ "$output" == *"runtime bundle validation failed: invalid required binary entry type: usr/bin/nvidia-container-cli"* ]] + assert_no_docker_commands + assert_no_helm_commands + assert_runtime_bundle_arch_not_staged amd64 + assert_no_docker_buildx_build +} + +@test "docker-build-cluster stages a valid runtime bundle in script-only verification mode" { + local runtime_tarball="$TEST_TMPDIR/runtime-bundle-valid.tar.gz" + local staged_root="deploy/docker/.build/runtime-bundle/amd64" + local staged_bundle_dir="$staged_root/openshell-gpu-runtime-bundle_0.1.0_amd64" + + create_runtime_bundle_tarball "$runtime_tarball" + + run_cluster_build \ + OPENSHELL_RUNTIME_BUNDLE_TARBALL="$runtime_tarball" \ + OPENSHELL_RUNTIME_BUNDLE_VERIFY_ONLY=1 \ + DOCKER_PLATFORM=linux/amd64 + + [ "$status" -eq 0 ] + [[ "$output" == *"Runtime bundle staged at $staged_bundle_dir"* ]] + [ -d "$staged_root" ] + [ -d "$staged_bundle_dir" ] + [ -f "$staged_bundle_dir/usr/bin/nvidia-container-cli" ] + [ -d "$staged_bundle_dir/etc/nvidia-container-runtime" ] + [ -f "$staged_bundle_dir/usr/lib/x86_64-linux-gnu/libnvidia-container.so.1" ] + assert_no_docker_commands + assert_no_helm_commands + assert_no_docker_buildx_build +} + +@test "Dockerfile.cluster consumes the staged local runtime bundle instead of the apt-installed nvidia-toolkit stage" { + run python3 - <<'PY' +from pathlib import Path +import sys + +dockerfile = Path("deploy/docker/Dockerfile.cluster").read_text(encoding="utf-8") + +checks = { + "removes apt toolkit stage": "FROM ubuntu:24.04 AS nvidia-toolkit" not in dockerfile, + "removes NVIDIA apt repo install": "nvidia.github.io/libnvidia-container" not in dockerfile, + "adds local runtime bundle stage": "FROM ubuntu:24.04 AS runtime-bundle" in dockerfile, + "copies staged runtime bundle context": "deploy/docker/.build/runtime-bundle/" in dockerfile, + "copies runtime files from local runtime bundle stage": "COPY --from=runtime-bundle /out/usr/bin/nvidia-container-cli /usr/bin/" in dockerfile, +} + +failed = [name for name, ok in checks.items() if not ok] +if failed: + print("; ".join(failed)) + sys.exit(1) +PY + + [ "$status" -eq 0 ] +} From 1ba216efade4dba09d6f1ae06d126adf4e4cce96 Mon Sep 17 00:00:00 2001 From: Arne Brune Olsen Date: Thu, 19 Mar 2026 12:07:27 +0100 Subject: [PATCH 2/3] feat(sandbox): add tool adapter validation and fork-owned trust --- .github/workflows/branch-e2e.yml | 5 + .github/workflows/docker-build.yml | 38 ++- .github/workflows/e2e-test.yml | 54 +++- .github/workflows/release-dev.yml | 21 +- .github/workflows/release-tag.yml | 18 +- architecture/README.md | 11 + architecture/inference-routing.md | 10 + architecture/sandbox-providers.md | 61 +++++ architecture/sandbox.md | 18 ++ .../src/providers/claude.rs | 15 +- .../src/providers/github.rs | 34 ++- .../src/providers/opencode.rs | 58 +++- crates/openshell-sandbox/src/child_env.rs | 49 ++++ crates/openshell-sandbox/src/lib.rs | 33 ++- crates/openshell-sandbox/src/secrets.rs | 194 +++++++++++++- e2e/install/bash_test.sh | 150 ++++++++++- e2e/install/fish_test.fish | 222 ++++++++++++++- e2e/install/helpers.sh | 252 +++++++++++++++++- e2e/install/sh_test.sh | 142 +++++++++- e2e/install/zsh_test.sh | 48 +++- e2e/rust/tests/tool_adapter_smoke.rs | 173 ++++++++++++ install.sh | 201 ++++++++++++-- tasks/scripts/ci-build-cluster-image.sh | 130 ++++++++- tasks/scripts/download-runtime-bundle.sh | 3 +- tasks/tests/runtime-bundle-ci-workflow.bats | 130 +++++++++ 25 files changed, 1989 insertions(+), 81 deletions(-) create mode 100644 e2e/rust/tests/tool_adapter_smoke.rs diff --git a/.github/workflows/branch-e2e.yml b/.github/workflows/branch-e2e.yml index 82b45f68..5ab0c399 100644 --- a/.github/workflows/branch-e2e.yml +++ b/.github/workflows/branch-e2e.yml @@ -25,6 +25,10 @@ jobs: platform: linux/arm64 runner: build-arm64 runtime-bundle-url: ${{ vars.OPENSHELL_RUNTIME_BUNDLE_URL_ARM64 }} + runtime-bundle-github-repo: ${{ github.repository_owner }}/nvidia-container-toolkit + runtime-bundle-release-tag: devel + runtime-bundle-filename-prefix: openshell-gpu-runtime-bundle + runtime-bundle-version: devel e2e: needs: [build-gateway, build-cluster] @@ -32,3 +36,4 @@ jobs: with: image-tag: ${{ github.sha }} runner: build-arm64 + run-tool-smoke-validations: true diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 6e190a3e..d3f97067 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -47,6 +47,26 @@ on: required: false type: string default: "" + runtime-bundle-github-repo: + description: "Runtime bundle producer GitHub repository" + required: false + type: string + default: "" + runtime-bundle-release-tag: + description: "Runtime bundle release tag used for derived defaults" + required: false + type: string + default: "" + runtime-bundle-filename-prefix: + description: "Runtime bundle asset filename prefix" + required: false + type: string + default: "" + runtime-bundle-version: + description: "Runtime bundle version token used in asset filenames" + required: false + type: string + default: "" env: MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -116,6 +136,10 @@ jobs: OPENSHELL_RUNTIME_BUNDLE_URL: ${{ inputs.runtime-bundle-url }} OPENSHELL_RUNTIME_BUNDLE_URL_AMD64: ${{ inputs.runtime-bundle-url-amd64 }} OPENSHELL_RUNTIME_BUNDLE_URL_ARM64: ${{ inputs.runtime-bundle-url-arm64 }} + OPENSHELL_RUNTIME_BUNDLE_GITHUB_REPO: ${{ inputs.runtime-bundle-github-repo }} + OPENSHELL_RUNTIME_BUNDLE_RELEASE_TAG: ${{ inputs.runtime-bundle-release-tag }} + OPENSHELL_RUNTIME_BUNDLE_FILENAME_PREFIX: ${{ inputs.runtime-bundle-filename-prefix }} + OPENSHELL_RUNTIME_BUNDLE_VERSION: ${{ inputs.runtime-bundle-version }} run: | set -euo pipefail @@ -123,9 +147,19 @@ jobs: bash tasks/scripts/ci-build-cluster-image.sh \ --platform "${DOCKER_PLATFORM}" \ --runtime-bundle-url-amd64 "${OPENSHELL_RUNTIME_BUNDLE_URL_AMD64}" \ - --runtime-bundle-url-arm64 "${OPENSHELL_RUNTIME_BUNDLE_URL_ARM64}" + --runtime-bundle-url-arm64 "${OPENSHELL_RUNTIME_BUNDLE_URL_ARM64}" \ + --runtime-bundle-github-repo "${OPENSHELL_RUNTIME_BUNDLE_GITHUB_REPO}" \ + --runtime-bundle-release-tag "${OPENSHELL_RUNTIME_BUNDLE_RELEASE_TAG}" \ + --runtime-bundle-filename-prefix "${OPENSHELL_RUNTIME_BUNDLE_FILENAME_PREFIX}" \ + --runtime-bundle-version "${OPENSHELL_RUNTIME_BUNDLE_VERSION}" else bash tasks/scripts/ci-build-cluster-image.sh \ --platform "${DOCKER_PLATFORM}" \ - --runtime-bundle-url "${OPENSHELL_RUNTIME_BUNDLE_URL}" + --runtime-bundle-url "${OPENSHELL_RUNTIME_BUNDLE_URL}" \ + --runtime-bundle-url-amd64 "${OPENSHELL_RUNTIME_BUNDLE_URL_AMD64}" \ + --runtime-bundle-url-arm64 "${OPENSHELL_RUNTIME_BUNDLE_URL_ARM64}" \ + --runtime-bundle-github-repo "${OPENSHELL_RUNTIME_BUNDLE_GITHUB_REPO}" \ + --runtime-bundle-release-tag "${OPENSHELL_RUNTIME_BUNDLE_RELEASE_TAG}" \ + --runtime-bundle-filename-prefix "${OPENSHELL_RUNTIME_BUNDLE_FILENAME_PREFIX}" \ + --runtime-bundle-version "${OPENSHELL_RUNTIME_BUNDLE_VERSION}" fi diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index f14ccb88..c21d10e5 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -1,6 +1,27 @@ name: E2E Test on: + workflow_dispatch: + inputs: + image-tag: + description: "Image tag to test (typically the commit SHA)" + required: true + type: string + runner: + description: "GitHub Actions runner label for the core E2E suite and optional smoke slices" + required: false + type: string + default: "build-amd64" + run-tool-smoke-validations: + description: "Add the first-class tool smoke evidence slice after the core E2E suite" + required: false + type: boolean + default: false + run-installer-selection-smoke: + description: "Add the installer selection smoke slice after the core E2E suite" + required: false + type: boolean + default: false workflow_call: inputs: image-tag: @@ -8,10 +29,20 @@ on: required: true type: string runner: - description: "GitHub Actions runner label" + description: "GitHub Actions runner label for the core E2E suite and optional smoke slices" required: false type: string default: "build-amd64" + run-tool-smoke-validations: + description: "Add the first-class tool smoke evidence slice after the core E2E suite" + required: false + type: boolean + default: false + run-installer-selection-smoke: + description: "Add the installer selection smoke slice after the core E2E suite" + required: false + type: boolean + default: false permissions: contents: read @@ -62,7 +93,26 @@ jobs: - name: Install SSH client for Rust CLI e2e tests run: apt-get update && apt-get install -y --no-install-recommends openssh-client && rm -rf /var/lib/apt/lists/* - - name: Run E2E tests + - name: Run core E2E suite run: | mise run --no-prepare --skip-deps e2e:python mise run --no-prepare --skip-deps e2e:rust + + - name: Record tool smoke evidence slice + if: ${{ inputs.run-tool-smoke-validations }} + run: | + printf 'Enabled first-class tool smoke evidence slice for image-tag=%s\n' "${IMAGE_TAG}" + { + printf '## Tool Smoke Evidence Slice\n\n' + printf -- '- Trigger: `run-tool-smoke-validations=true`\n' + printf -- '- Image tag: `%s`\n' "${IMAGE_TAG}" + printf -- '- Contract: run `tool_adapter_smoke` after the core E2E suite\n' + } >> "$GITHUB_STEP_SUMMARY" + + - name: Run first-class tool smoke evidence slice + if: ${{ inputs.run-tool-smoke-validations }} + run: cargo test --manifest-path e2e/rust/Cargo.toml --features e2e --test tool_adapter_smoke -- --nocapture + + - name: Run installer selection smoke slice + if: ${{ inputs.run-installer-selection-smoke }} + run: bash e2e/install/bash_test.sh diff --git a/.github/workflows/release-dev.yml b/.github/workflows/release-dev.yml index bb1cc74a..2b0ae6d0 100644 --- a/.github/workflows/release-dev.yml +++ b/.github/workflows/release-dev.yml @@ -62,6 +62,10 @@ jobs: cargo-version: ${{ needs.compute-versions.outputs.cargo_version }} runtime-bundle-url-amd64: ${{ vars.OPENSHELL_RUNTIME_BUNDLE_URL_AMD64 }} runtime-bundle-url-arm64: ${{ vars.OPENSHELL_RUNTIME_BUNDLE_URL_ARM64 }} + runtime-bundle-github-repo: ${{ github.repository_owner }}/nvidia-container-toolkit + runtime-bundle-release-tag: devel + runtime-bundle-filename-prefix: openshell-gpu-runtime-bundle + runtime-bundle-version: devel e2e: needs: [build-gateway, build-cluster] @@ -329,6 +333,12 @@ jobs: sha256sum *.tar.gz *.whl > openshell-checksums-sha256.txt cat openshell-checksums-sha256.txt + - name: Skip detached checksum signing for devel release + run: | + set -euo pipefail + echo "Devel releases publish checksum manifests without detached signatures in the active path." + echo "Detached checksum signing is deferred backlog work for both devel and tagged release workflows." + - name: Prune stale wheel assets from devel release uses: actions/github-script@v7 env: @@ -394,6 +404,7 @@ jobs: This build is automatically built on every commit to main that passes CI. > **NOTE**: This is a development build, not a tagged release, and may be unstable. + > **NOTE**: Checksum manifests are published in the active path. Detached checksum signing is deferred backlog work. ### Quick install @@ -407,7 +418,7 @@ jobs: Darwin-arm64) ASSET="openshell-aarch64-apple-darwin.tar.gz" ;; \ *) echo "Unsupported platform: ${OS}-${ARCH}" >&2; exit 1 ;; \ esac; \ - gh release download devel --repo NVIDIA/OpenShell --pattern "${ASSET}" -O - \ + gh release download devel --repo linuxdevel/OpenShell --pattern "${ASSET}" -O - \ | tar xz \ && sudo install -m 755 openshell /usr/local/bin/openshell' ``` @@ -416,10 +427,10 @@ jobs: | File | Platform | Install | |------|----------|---------| - | `openshell-x86_64-unknown-linux-musl.tar.gz` | Linux x86_64 | `gh release download devel --repo NVIDIA/OpenShell --pattern "openshell-x86_64-unknown-linux-musl.tar.gz" -O - \| tar xz && sudo install -m 755 openshell /usr/local/bin/openshell` | - | `openshell-aarch64-unknown-linux-musl.tar.gz` | Linux aarch64 / ARM64 | `gh release download devel --repo NVIDIA/OpenShell --pattern "openshell-aarch64-unknown-linux-musl.tar.gz" -O - \| tar xz && sudo install -m 755 openshell /usr/local/bin/openshell` | - | `openshell-aarch64-apple-darwin.tar.gz` | macOS Apple Silicon | `gh release download devel --repo NVIDIA/OpenShell --pattern "openshell-aarch64-apple-darwin.tar.gz" -O - \| tar xz && sudo install -m 755 openshell /usr/local/bin/openshell` | - | `openshell-*.whl` | Python wheels | `gh release download devel --repo NVIDIA/OpenShell --pattern "openshell-*.whl"` | + | `openshell-x86_64-unknown-linux-musl.tar.gz` | Linux x86_64 | `gh release download devel --repo linuxdevel/OpenShell --pattern "openshell-x86_64-unknown-linux-musl.tar.gz" -O - \| tar xz && sudo install -m 755 openshell /usr/local/bin/openshell` | + | `openshell-aarch64-unknown-linux-musl.tar.gz` | Linux aarch64 / ARM64 | `gh release download devel --repo linuxdevel/OpenShell --pattern "openshell-aarch64-unknown-linux-musl.tar.gz" -O - \| tar xz && sudo install -m 755 openshell /usr/local/bin/openshell` | + | `openshell-aarch64-apple-darwin.tar.gz` | macOS Apple Silicon | `gh release download devel --repo linuxdevel/OpenShell --pattern "openshell-aarch64-apple-darwin.tar.gz" -O - \| tar xz && sudo install -m 755 openshell /usr/local/bin/openshell` | + | `openshell-*.whl` | Python wheels | `gh release download devel --repo linuxdevel/OpenShell --pattern "openshell-*.whl"` | | `openshell-checksums-sha256.txt` | — | SHA256 checksums for all archives | files: | release/openshell-x86_64-unknown-linux-musl.tar.gz diff --git a/.github/workflows/release-tag.yml b/.github/workflows/release-tag.yml index fa2e77cc..01c166be 100644 --- a/.github/workflows/release-tag.yml +++ b/.github/workflows/release-tag.yml @@ -77,6 +77,10 @@ jobs: cargo-version: ${{ needs.compute-versions.outputs.cargo_version }} runtime-bundle-url-amd64: ${{ vars.OPENSHELL_RUNTIME_BUNDLE_URL_AMD64 }} runtime-bundle-url-arm64: ${{ vars.OPENSHELL_RUNTIME_BUNDLE_URL_ARM64 }} + runtime-bundle-github-repo: ${{ github.repository_owner }}/nvidia-container-toolkit + runtime-bundle-release-tag: ${{ inputs.tag || github.ref_name }} + runtime-bundle-filename-prefix: openshell-gpu-runtime-bundle + runtime-bundle-version: ${{ needs.compute-versions.outputs.semver }} e2e: needs: [build-gateway, build-cluster] @@ -347,13 +351,19 @@ jobs: name: python-wheels path: release/ - - name: Generate checksums + - name: Generate required checksum manifest run: | set -euo pipefail cd release sha256sum *.tar.gz *.whl > openshell-checksums-sha256.txt cat openshell-checksums-sha256.txt + - name: Note deferred detached checksum signing + run: | + set -euo pipefail + echo "Tagged releases require release/openshell-checksums-sha256.txt in the active path." + echo "Detached checksum signing remains deferred backlog work and is not enforced by this workflow yet." + - name: Create GitHub Release uses: softprops/action-gh-release@v2 with: @@ -362,12 +372,14 @@ jobs: tag_name: ${{ env.RELEASE_TAG }} generate_release_notes: true body: | - ## OpenShell ${{ env.RELEASE_TAG }} + ## OpenShell ${{ env.RELEASE_TAG }} + + Checksum manifest generation is required for tagged releases. Detached checksum signing remains deferred backlog work and is not enforced in the active release path. ### Quick install ```bash - curl -LsSf https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install.sh | OPENSHELL_VERSION=${{ env.RELEASE_TAG }} sh + curl -LsSf https://raw.githubusercontent.com/linuxdevel/OpenShell/main/install.sh | OPENSHELL_VERSION=${{ env.RELEASE_TAG }} sh ``` files: | diff --git a/architecture/README.md b/architecture/README.md index b899f37c..3e8193da 100644 --- a/architecture/README.md +++ b/architecture/README.md @@ -162,6 +162,8 @@ The provider system handles: This approach means users configure credentials once, and every sandbox that needs them receives them automatically at runtime. +Provider records are not the same thing as sandbox tool adapters. A provider answers "which external account or secret material does this sandbox need?" A tool adapter answers "how should a specific CLI tool such as `claude code` or `opencode` receive and use that provider context inside the sandbox?" The long-term design keeps both layers explicit so OpenShell can support multiple first-class tools without turning each provider plugin into a tool-specific one-off. + For more detail, see [Providers](sandbox-providers.md). ### Inference Routing @@ -211,6 +213,15 @@ The platform produces three container images: Builds use multi-stage Dockerfiles with caching to keep rebuild times fast. A Helm chart handles Kubernetes-level configuration (service ports, health checks, security contexts, resource limits). Build automation is managed through mise tasks. +OpenShell is the top-level orchestrator, not the only producer in the install chain. The coordinated fork-owned path is split across four repos: + +- `libnvidia-container/` produces the low-level GPU library payload. +- `nvidia-container-toolkit/` produces the verified runtime-bundle release assets that cluster builds consume. +- `OpenShell-Community/` produces sandbox and base images that carry packaged tool environments. +- `OpenShell/` consumes those outputs to assemble cluster and gateway images, apply tool/provider behavior, and expose the currently expected final setup/install interface. + +The final installer/setup owner is still an explicit workspace decision, but the current direction is for `OpenShell/` to remain the user-facing composition layer rather than rebuilding the other repos' responsibilities internally. + For more detail, see [Container Management](build-containers.md). ### Policy Language diff --git a/architecture/inference-routing.md b/architecture/inference-routing.md index 0d3a95af..57d53dd5 100644 --- a/architecture/inference-routing.md +++ b/architecture/inference-routing.md @@ -53,6 +53,16 @@ Each profile also defines `credential_key_names` (e.g. `["OPENAI_API_KEY"]`) and Unknown provider types return `None` from `profile_for()` and default to `Bearer` auth with no default headers via `auth_for_provider_type()`. +## Vendor Auth Projection Boundaries + +Inference provider profiles describe backend request shape, not full sandbox tool auth projection. The next phase keeps these concerns separate: + +- **Anthropic path:** `claude code` and any other Anthropic-facing tool may rely on provider records that discover `ANTHROPIC_API_KEY` or related fields, but the decision to expose those values to the child process belongs to the tool adapter contract in the sandbox layer. +- **GitHub / Copilot path:** GitHub token discovery (`GITHUB_TOKEN`, `GH_TOKEN`) is not enough on its own to define a GitHub Copilot model-access contract. Any Copilot-backed model flow must explicitly document which endpoints, token shapes, and tool adapters are supported. +- **Fail-closed rule:** if a tool/vendor combination is not explicitly documented, OpenShell should not silently treat generic provider discovery as authorization to project those credentials into a sandbox child process. + +In other words, provider profiles remain the source of truth for upstream inference protocol handling once traffic is routed, but tool adapters remain the source of truth for what vendor-facing auth material can enter the child process before routing begins. + ## Control Plane (Gateway) File: `crates/openshell-server/src/inference.rs` diff --git a/architecture/sandbox-providers.md b/architecture/sandbox-providers.md index dca36c59..3c6142c6 100644 --- a/architecture/sandbox-providers.md +++ b/architecture/sandbox-providers.md @@ -18,6 +18,8 @@ supervisor rewrites those placeholders back to the real secret values before for Access is enforced through the sandbox policy — the policy decides which outbound requests are allowed or denied based on the providers attached to that sandbox. +Providers are only one half of the runtime contract. The other half is the tool adapter that defines how a sandboxed CLI actually receives provider context. OpenShell's first-class tool targets are currently `claude code` and `opencode`, with the expectation that future tools follow the same adapter model instead of adding bespoke credential plumbing to the sandbox supervisor. + Core goals: - manage providers directly via CLI, @@ -26,6 +28,65 @@ Core goals: - project provider context into sandbox runtime, - drive sandbox policy to allow or deny outbound access to third-party services. +## Tool Adapter Matrix + +The tool adapter layer sits between generic provider discovery and the sandbox child process. It answers four questions for each first-class tool: + +- which env vars are allowed to appear in the child process +- which config file paths may be projected or synthesized +- whether a value must remain a placeholder until proxy rewrite time or may be projected directly +- which outbound endpoint categories the tool is expected to use + +Current first-class targets: + +| Tool | Primary purpose | Expected vendor families | Notes | +|---|---|---|---| +| `claude code` | Anthropic-oriented coding agent CLI | Anthropic first, future adapter growth possible | Should prefer placeholder env projection and explicit config-file mapping rather than raw secret sprawl | +| `opencode` | Open coding/runtime CLI with multiple provider backends | GitHub/Copilot, Anthropic, OpenAI-compatible families | Needs an adapter boundary that does not collapse GitHub auth, inference routing, and tool configuration into one concept | +| Future tool adapters | Extension point | Tool-specific | Must define env/config/endpoint needs explicitly before sandbox projection is allowed | + +This matrix is intentionally separate from provider discovery. For example, a `github` provider may supply credentials that `opencode` can use, but the decision to project those credentials into an `opencode` child process belongs to the tool adapter contract, not the provider plugin alone. + +## Tool Projection Contract + +The sandbox projection contract is per tool, not global. The current design target is: + +### `claude code` + +- **Environment variables:** only adapter-approved Anthropic-related variables should appear in the child process, and they should prefer placeholder values that are resolved at the proxy boundary rather than raw secret values. +- **Config files:** adapter-managed projection may eventually populate Claude-specific config or credential file locations, but only from an explicit allowlist of paths. +- **Direct secret projection:** disallowed by default; any exception must be documented as a tool-specific requirement. +- **Outbound endpoints:** Anthropic API endpoints plus any documented non-model support endpoints required by the tool. +- **Vendor-auth boundary:** Anthropic credential discovery and inference routing remain provider-layer responsibilities; the `claude code` adapter only decides which approved Anthropic-facing fields can enter the child process and whether they remain placeholders until proxy rewrite time. + +### `opencode` + +- **Environment variables:** adapter-approved variables may include provider-specific keys used by `opencode`, but only when the tool contract explicitly allows them. +- **Config files:** adapter-managed projection may populate `opencode` config file paths from an allowlisted set. +- **Direct secret projection:** disallowed by default; exceptions require an explicit tool/vendor contract. +- **Outbound endpoints:** GitHub/Copilot-related endpoints plus OpenAI-compatible or Anthropic-compatible inference endpoints only when the selected `opencode` adapter path explicitly supports them. +- **Vendor-auth boundary:** GitHub token projection and any future GitHub Copilot model access must be treated as an explicit adapter contract rather than inferred from generic `github` or `opencode` provider discovery alone. + +### Future tool adapters + +Before a new tool becomes first-class, it must define: + +- env-var projection needs +- config-file projection needs +- whether any direct secret projection is unavoidable +- outbound endpoint categories + +If that contract is not defined, the intended end state is for OpenShell to fail closed rather than guessing how to inject provider state into the child process. In the current Phase 1 slice, fail-closed enforcement only applies to detected first-class tool commands; all other commands still use the legacy generic projection path until later phases replace that fallback. + +## Vendor Auth Risk Notes + +The next phase of this work adds vendor-native auth/model design on top of the tool adapter layer. The critical constraints are: + +- Anthropic and GitHub/Copilot auth flows may require different projection shapes even when both ultimately drive model access. +- Provider discovery does not automatically imply that a credential is safe to inject into a child process for a given tool. +- Endpoint allowlists must be tied to explicit tool/vendor contracts, not to broad assumptions like "all GitHub endpoints" or "all Anthropic-compatible hosts". +- If a vendor flow depends on direct config-file or session-state projection rather than placeholder env vars, that must be documented as a deliberate exception and tested separately. + ## Data Model Provider is defined in `proto/datamodel.proto`: diff --git a/architecture/sandbox.md b/architecture/sandbox.md index a8e4d247..799dd186 100644 --- a/architecture/sandbox.md +++ b/architecture/sandbox.md @@ -36,6 +36,24 @@ All paths are relative to `crates/openshell-sandbox/src/`. ## Startup and Orchestration +## First-Class Sandbox Tools + +The sandbox runtime is intentionally generic about the child process it launches, but OpenShell still needs an explicit first-class tool matrix so credential projection, config-file layout, and network policy stay predictable. The current first-class tool targets are: + +- `claude code` +- `opencode` + +Future tools should fit the same adapter shape rather than bypassing it with tool-specific ad hoc logic in the supervisor. + +Phase 1 only enforces this boundary for detected first-class tool commands (`claude`, `opencode`). Other commands still use the legacy generic provider-env projection path until later phases tighten the adapter model further. + +The key separation is: + +- **tool adapter**: defines what a specific CLI tool needs inside the sandbox (env vars, config-file projection, trust-store expectations, endpoint categories) +- **provider/model routing**: defines how OpenShell discovers credentials, resolves providers, and routes model traffic such as Anthropic-compatible or OpenAI-compatible inference + +That separation matters because one provider may support multiple tools, and one tool may need credentials from multiple providers. The sandbox should therefore project a stable tool contract while the provider and inference layers remain the source of truth for credential discovery and backend routing. + The `run_sandbox()` function in `crates/openshell-sandbox/src/lib.rs` is the main orchestration entry point. It executes the following steps in order. ### Orchestration flow diff --git a/crates/openshell-providers/src/providers/claude.rs b/crates/openshell-providers/src/providers/claude.rs index 576b30e3..64bcd642 100644 --- a/crates/openshell-providers/src/providers/claude.rs +++ b/crates/openshell-providers/src/providers/claude.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{ - ProviderDiscoverySpec, ProviderError, ProviderPlugin, RealDiscoveryContext, discover_with_spec, + discover_with_spec, ProviderDiscoverySpec, ProviderError, ProviderPlugin, RealDiscoveryContext, }; pub struct ClaudeProvider; @@ -43,4 +43,17 @@ mod tests { Some(&"test-key".to_string()) ); } + + #[test] + fn discovers_legacy_claude_env_alias() { + let ctx = MockDiscoveryContext::new().with_env("CLAUDE_API_KEY", "legacy-key"); + let discovered = discover_with_spec(&SPEC, &ctx) + .expect("discovery") + .expect("provider"); + + assert_eq!( + discovered.credentials.get("CLAUDE_API_KEY"), + Some(&"legacy-key".to_string()) + ); + } } diff --git a/crates/openshell-providers/src/providers/github.rs b/crates/openshell-providers/src/providers/github.rs index 4ca25d6d..8a311d25 100644 --- a/crates/openshell-providers/src/providers/github.rs +++ b/crates/openshell-providers/src/providers/github.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{ - ProviderDiscoverySpec, ProviderError, ProviderPlugin, RealDiscoveryContext, discover_with_spec, + discover_with_spec, ProviderDiscoverySpec, ProviderError, ProviderPlugin, RealDiscoveryContext, }; pub struct GithubProvider; @@ -31,6 +31,7 @@ mod tests { use super::SPEC; use crate::discover_with_spec; use crate::test_helpers::MockDiscoveryContext; + use std::collections::HashMap; #[test] fn discovers_github_env_credentials() { @@ -43,4 +44,35 @@ mod tests { Some(&"gh-token".to_string()) ); } + + #[test] + fn discovers_github_token_env_alias() { + let ctx = MockDiscoveryContext::new().with_env("GITHUB_TOKEN", "github-token"); + let discovered = discover_with_spec(&SPEC, &ctx) + .expect("discovery") + .expect("provider"); + + assert_eq!( + discovered.credentials.get("GITHUB_TOKEN"), + Some(&"github-token".to_string()) + ); + } + + #[test] + fn discovers_both_github_token_env_vars_for_copilot_targeted_path() { + let ctx = MockDiscoveryContext::new() + .with_env("GITHUB_TOKEN", "github-token") + .with_env("GH_TOKEN", "gh-token"); + let discovered = discover_with_spec(&SPEC, &ctx) + .expect("discovery") + .expect("provider"); + + assert_eq!( + discovered.credentials, + HashMap::from([ + ("GITHUB_TOKEN".to_string(), "github-token".to_string()), + ("GH_TOKEN".to_string(), "gh-token".to_string()), + ]) + ); + } } diff --git a/crates/openshell-providers/src/providers/opencode.rs b/crates/openshell-providers/src/providers/opencode.rs index 417bdb6c..97949db6 100644 --- a/crates/openshell-providers/src/providers/opencode.rs +++ b/crates/openshell-providers/src/providers/opencode.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{ - ProviderDiscoverySpec, ProviderError, ProviderPlugin, RealDiscoveryContext, discover_with_spec, + discover_with_spec, ProviderDiscoverySpec, ProviderError, ProviderPlugin, RealDiscoveryContext, }; pub struct OpencodeProvider; @@ -32,15 +32,59 @@ mod tests { use crate::discover_with_spec; use crate::test_helpers::MockDiscoveryContext; - #[test] - fn discovers_opencode_env_credentials() { - let ctx = MockDiscoveryContext::new().with_env("OPENCODE_API_KEY", "op-key"); + fn assert_discovers_declared_opencode_credential(key: &str, value: &str) { + let ctx = MockDiscoveryContext::new().with_env(key, value); let discovered = discover_with_spec(&SPEC, &ctx) .expect("discovery") .expect("provider"); - assert_eq!( - discovered.credentials.get("OPENCODE_API_KEY"), - Some(&"op-key".to_string()) + + assert_eq!(discovered.credentials.get(key), Some(&value.to_string())); + } + + fn assert_github_credentials_are_not_discovered_as_opencode(ctx: MockDiscoveryContext) { + let discovered = discover_with_spec(&SPEC, &ctx).expect("discovery"); + + assert!( + discovered.is_none(), + "GitHub token discovery must stay fully separate from opencode provider discovery for the current Copilot-targeted contract" + ); + } + + #[test] + fn discovers_opencode_api_key_credential() { + assert_discovers_declared_opencode_credential("OPENCODE_API_KEY", "op-key"); + } + + #[test] + fn discovers_openrouter_api_key_credential() { + assert_discovers_declared_opencode_credential("OPENROUTER_API_KEY", "openrouter-key"); + } + + #[test] + fn discovers_openai_api_key_credential() { + assert_discovers_declared_opencode_credential("OPENAI_API_KEY", "openai-key"); + } + + #[test] + fn does_not_claim_github_token_discovery_as_opencode_credential() { + assert_github_credentials_are_not_discovered_as_opencode( + MockDiscoveryContext::new().with_env("GITHUB_TOKEN", "gh-token"), + ); + } + + #[test] + fn does_not_claim_gh_token_discovery_as_opencode_credential() { + assert_github_credentials_are_not_discovered_as_opencode( + MockDiscoveryContext::new().with_env("GH_TOKEN", "gh-token"), + ); + } + + #[test] + fn does_not_claim_github_discovery_when_both_github_token_env_vars_are_present() { + assert_github_credentials_are_not_discovered_as_opencode( + MockDiscoveryContext::new() + .with_env("GITHUB_TOKEN", "github-token") + .with_env("GH_TOKEN", "gh-token"), ); } } diff --git a/crates/openshell-sandbox/src/child_env.rs b/crates/openshell-sandbox/src/child_env.rs index ebd47e22..54704026 100644 --- a/crates/openshell-sandbox/src/child_env.rs +++ b/crates/openshell-sandbox/src/child_env.rs @@ -5,6 +5,34 @@ use std::path::Path; const LOCAL_NO_PROXY: &str = "127.0.0.1,localhost,::1"; +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum ToolAdapter { + ClaudeCode, + OpenCode, +} + +impl ToolAdapter { + pub(crate) fn command_name(self) -> &'static str { + match self { + Self::ClaudeCode => "claude", + Self::OpenCode => "opencode", + } + } +} + +pub(crate) fn detect_tool_adapter(command: &[String]) -> Option { + let first = command.first()?; + let basename = std::path::Path::new(first) + .file_name() + .and_then(|name| name.to_str())?; + + match basename { + "claude" => Some(ToolAdapter::ClaudeCode), + "opencode" => Some(ToolAdapter::OpenCode), + _ => None, + } +} + pub(crate) fn proxy_env_vars(proxy_url: &str) -> [(&'static str, String); 9] { [ ("ALL_PROXY", proxy_url.to_owned()), @@ -80,4 +108,25 @@ mod tests { assert!(stdout.contains("NODE_EXTRA_CA_CERTS=/etc/openshell-tls/openshell-ca.pem")); assert!(stdout.contains("SSL_CERT_FILE=/etc/openshell-tls/ca-bundle.pem")); } + + #[test] + fn detects_claude_tool_adapter_from_command_basename() { + let command = vec!["/usr/local/bin/claude".to_string(), "code".to_string()]; + + assert_eq!(detect_tool_adapter(&command), Some(ToolAdapter::ClaudeCode)); + } + + #[test] + fn detects_opencode_tool_adapter_from_command_basename() { + let command = vec!["opencode".to_string(), "sandbox".to_string()]; + + assert_eq!(detect_tool_adapter(&command), Some(ToolAdapter::OpenCode)); + } + + #[test] + fn rejects_unsupported_tool_adapter_command() { + let command = vec!["python".to_string(), "script.py".to_string()]; + + assert_eq!(detect_tool_adapter(&command), None); + } } diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index 754c3be0..0886490b 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -44,6 +44,7 @@ use crate::proxy::ProxyHandle; #[cfg(target_os = "linux")] use crate::sandbox::linux::netns::NetworkNamespace; use crate::secrets::SecretResolver; +use crate::child_env::detect_tool_adapter; pub use process::{ProcessHandle, ProcessStatus}; /// Default interval (seconds) for re-fetching the inference route bundle from @@ -202,7 +203,7 @@ pub async fn run_sandbox( std::collections::HashMap::new() }; - let (provider_env, secret_resolver) = SecretResolver::from_provider_env(provider_env); + let (provider_env, secret_resolver) = project_provider_env_for_command(&command, provider_env)?; let secret_resolver = secret_resolver.map(Arc::new); // Create identity cache for SHA256 TOFU when OPA is active @@ -1153,6 +1154,20 @@ fn validate_sandbox_user(policy: &SandboxPolicy) -> Result<()> { Ok(()) } +fn project_provider_env_for_command( + command: &[String], + provider_env: std::collections::HashMap, +) -> Result<( + std::collections::HashMap, + Option, +)> { + match detect_tool_adapter(command) { + Some(tool) => SecretResolver::from_tool_provider_env(tool, provider_env) + .map_err(|msg| miette::miette!(msg)), + None => Ok(SecretResolver::from_provider_env(provider_env)), + } +} + /// Prepare filesystem for the sandboxed process. /// /// Creates `read_write` directories if they don't exist and sets ownership @@ -1497,6 +1512,22 @@ mod tests { assert_eq!(system[0].name, "sandbox-system"); } + #[test] + fn project_provider_env_for_command_rejects_disallowed_tool_keys() { + let command = vec!["claude".to_string(), "code".to_string()]; + let provider_env = [ + ("ANTHROPIC_API_KEY".to_string(), "sk-test".to_string()), + ("GITHUB_TOKEN".to_string(), "gh-test".to_string()), + ] + .into_iter() + .collect(); + + let error = project_provider_env_for_command(&command, provider_env) + .expect_err("claude tool adapter must reject unrelated provider keys"); + + assert!(error.to_string().contains("GITHUB_TOKEN")); + } + // -- build_inference_context tests -- #[tokio::test] diff --git a/crates/openshell-sandbox/src/secrets.rs b/crates/openshell-sandbox/src/secrets.rs index 4ee1ee84..70744a5a 100644 --- a/crates/openshell-sandbox/src/secrets.rs +++ b/crates/openshell-sandbox/src/secrets.rs @@ -3,6 +3,8 @@ use std::collections::HashMap; +use crate::child_env::ToolAdapter; + const PLACEHOLDER_PREFIX: &str = "openshell:resolve:env:"; #[derive(Debug, Clone, Default)] @@ -30,6 +32,31 @@ impl SecretResolver { (child_env, Some(Self { by_placeholder })) } + pub(crate) fn from_tool_provider_env( + tool: ToolAdapter, + provider_env: HashMap, + ) -> Result<(HashMap, Option), String> { + if provider_env.is_empty() { + return Ok((HashMap::new(), None)); + } + + let allowed_keys = allowed_env_keys(tool); + let mut filtered = HashMap::with_capacity(provider_env.len()); + + for (key, value) in provider_env { + if !allowed_keys.contains(&key.as_str()) { + return Err(format!( + "tool '{}' does not allow projecting provider env key '{}'", + tool.command_name(), + key + )); + } + filtered.insert(key, value); + } + + Ok(Self::from_provider_env(filtered)) + } + pub(crate) fn resolve_placeholder(&self, value: &str) -> Option<&str> { self.by_placeholder.get(value).map(String::as_str) } @@ -48,6 +75,19 @@ impl SecretResolver { } } +fn allowed_env_keys(tool: ToolAdapter) -> &'static [&'static str] { + match tool { + ToolAdapter::ClaudeCode => &["ANTHROPIC_API_KEY", "CLAUDE_API_KEY"], + ToolAdapter::OpenCode => &[ + "OPENCODE_API_KEY", + "OPENROUTER_API_KEY", + "OPENAI_API_KEY", + "GITHUB_TOKEN", + "GH_TOKEN", + ], + } +} + pub(crate) fn placeholder_for_env_key(key: &str) -> String { format!("{PLACEHOLDER_PREFIX}{key}") } @@ -170,10 +210,9 @@ mod tests { assert!(rewritten.ends_with("\r\n\r\nhello")); } - /// Simulates the full round-trip: provider env → child placeholders → - /// HTTP headers → rewrite. This is the exact flow that occurs when a - /// sandbox child process reads placeholder env vars, constructs an HTTP - /// request, and the proxy rewrites headers before forwarding upstream. + /// Exercises the placeholder lifecycle in this module: provider env values + /// become child-visible placeholders, and placeholder-bearing HTTP headers + /// are rewritten to real secrets before forwarding. #[test] fn full_round_trip_child_env_to_rewritten_headers() { let provider_env: HashMap = [ @@ -253,6 +292,153 @@ mod tests { assert!(resolver.is_none()); } + #[test] + fn tool_projection_allows_only_documented_claude_keys() { + let (child_env, resolver) = SecretResolver::from_tool_provider_env( + ToolAdapter::ClaudeCode, + [ + ("ANTHROPIC_API_KEY".to_string(), "sk-test".to_string()), + ("CLAUDE_API_KEY".to_string(), "sk-alt".to_string()), + ] + .into_iter() + .collect(), + ) + .expect("claude projection succeeds"); + + assert_eq!(child_env.len(), 2); + assert!(resolver.is_some()); + } + + #[test] + fn tool_projection_rejects_disallowed_claude_key() { + let error = SecretResolver::from_tool_provider_env( + ToolAdapter::ClaudeCode, + [ + ("ANTHROPIC_API_KEY".to_string(), "sk-test".to_string()), + ("GITHUB_TOKEN".to_string(), "gh-test".to_string()), + ] + .into_iter() + .collect(), + ) + .expect_err("unexpected github token must be rejected for claude tool adapter"); + + assert!(error + .contains("tool 'claude' does not allow projecting provider env key 'GITHUB_TOKEN'")); + } + + #[test] + fn tool_projection_allows_documented_opencode_keys() { + let (child_env, resolver) = SecretResolver::from_tool_provider_env( + ToolAdapter::OpenCode, + [ + ("OPENCODE_API_KEY".to_string(), "opc-test".to_string()), + ("GITHUB_TOKEN".to_string(), "gh-test".to_string()), + ] + .into_iter() + .collect(), + ) + .expect("opencode projection succeeds"); + + assert_eq!(child_env.len(), 2); + assert!(resolver.is_some()); + } + + #[test] + fn tool_projection_allows_both_documented_opencode_github_tokens() { + let (child_env, resolver) = SecretResolver::from_tool_provider_env( + ToolAdapter::OpenCode, + [ + ("GITHUB_TOKEN".to_string(), "ghu-test".to_string()), + ("GH_TOKEN".to_string(), "ghs-test".to_string()), + ] + .into_iter() + .collect(), + ) + .expect("opencode projection succeeds for github token aliases"); + let resolver = resolver.expect("resolver"); + + assert_eq!( + child_env.get("GITHUB_TOKEN"), + Some(&placeholder_for_env_key("GITHUB_TOKEN")) + ); + assert_eq!( + child_env.get("GH_TOKEN"), + Some(&placeholder_for_env_key("GH_TOKEN")) + ); + assert_eq!( + resolver.resolve_placeholder(&placeholder_for_env_key("GITHUB_TOKEN")), + Some("ghu-test") + ); + assert_eq!( + resolver.resolve_placeholder(&placeholder_for_env_key("GH_TOKEN")), + Some("ghs-test") + ); + } + + #[test] + fn tool_projection_rejects_unrelated_opencode_key() { + let error = SecretResolver::from_tool_provider_env( + ToolAdapter::OpenCode, + [ + ("GITHUB_TOKEN".to_string(), "gh-test".to_string()), + ("UNRELATED_TOKEN".to_string(), "nope".to_string()), + ] + .into_iter() + .collect(), + ) + .expect_err("unexpected keys must be rejected for opencode tool adapter"); + + assert!(error.contains( + "tool 'opencode' does not allow projecting provider env key 'UNRELATED_TOKEN'" + )); + } + + #[test] + fn opencode_token_placeholders_resolve_only_when_forwarding_headers() { + let (child_env, resolver) = SecretResolver::from_tool_provider_env( + ToolAdapter::OpenCode, + [ + ("GITHUB_TOKEN".to_string(), "ghu-test".to_string()), + ("GH_TOKEN".to_string(), "ghs-test".to_string()), + ] + .into_iter() + .collect(), + ) + .expect("opencode projection succeeds for github token aliases"); + + let github_placeholder = child_env.get("GITHUB_TOKEN").expect("github placeholder"); + let gh_placeholder = child_env.get("GH_TOKEN").expect("gh placeholder"); + + assert_eq!(github_placeholder, &placeholder_for_env_key("GITHUB_TOKEN")); + assert_eq!(gh_placeholder, &placeholder_for_env_key("GH_TOKEN")); + assert!(!github_placeholder.contains("ghu-test")); + assert!(!gh_placeholder.contains("ghs-test")); + + let body = format!("body={gh_placeholder}"); + + let raw = format!( + "POST /v1/chat/completions HTTP/1.1\r\n\ + Authorization: Bearer {github_placeholder}\r\n\ + X-GitHub-Token: {gh_placeholder}\r\n\ + Content-Length: {}\r\n\r\n\ + {body}", + body.len() + ); + + assert!(raw.contains(github_placeholder)); + assert!(raw.contains(gh_placeholder)); + assert!(!raw.contains("ghu-test")); + assert!(!raw.contains("ghs-test")); + + let rewritten = rewrite_http_header_block(raw.as_bytes(), resolver.as_ref()); + let rewritten = String::from_utf8(rewritten).expect("utf8"); + + assert!(rewritten.contains("Authorization: Bearer ghu-test\r\n")); + assert!(rewritten.contains("X-GitHub-Token: ghs-test\r\n")); + assert!(rewritten.contains(&format!("Content-Length: {}\r\n", body.len()))); + assert!(rewritten.ends_with(&format!("\r\n\r\nbody={gh_placeholder}"))); + } + #[test] fn rewrite_with_no_resolver_returns_original() { let raw = b"GET / HTTP/1.1\r\nAuthorization: Bearer my-token\r\n\r\n"; diff --git a/e2e/install/bash_test.sh b/e2e/install/bash_test.sh index 2b4db1ca..54308099 100755 --- a/e2e/install/bash_test.sh +++ b/e2e/install/bash_test.sh @@ -4,7 +4,7 @@ # # Bash e2e tests for install.sh. # -# Downloads the latest release for real and validates: +# Uses fake release assets and validates: # - Binary is installed to the correct directory # - Binary is executable and runs # - PATH guidance shows the correct export command for bash @@ -41,9 +41,13 @@ test_binary_runs() { printf 'TEST: binary runs successfully\n' if _version="$("$INSTALL_DIR/openshell" --version 2>/dev/null)"; then - pass "openshell --version succeeds: $_version" + if [ "$_version" = "openshell ${TEST_RELEASE_VERSION}" ]; then + pass "openshell --version matches fake release: $_version" + else + fail "openshell --version matches fake release" "expected openshell ${TEST_RELEASE_VERSION}, got $_version" + fi else - fail "openshell --version succeeds" "exit code: $?" + fail "openshell --version matches fake release" "exit code: $?" fi } @@ -61,6 +65,132 @@ test_guidance_mentions_not_on_path() { assert_output_contains "$INSTALL_OUTPUT" "$INSTALL_DIR" "includes install dir in guidance" } +test_default_release_root_uses_fork_repo() { + printf 'TEST: default release root uses selected fork-owned repo\n' + + SHELL="/bin/bash" run_install + + assert_release_root_uses_repo "linuxdevel/OpenShell" "default release root" +} + +test_release_root_override_changes_repo() { + printf 'TEST: explicit repo override changes derived release root\n' + + SHELL="/bin/bash" run_install_with_env \ + OPENSHELL_RELEASE_REPO=example/custom-openshell + + assert_release_root_uses_repo "example/custom-openshell" "override release root" +} + +test_release_root_override_rejects_malformed_repo() { + printf 'TEST: malformed repo override fails clearly\n' + + SHELL="/bin/bash" run_install_expect_failure \ + OPENSHELL_RELEASE_REPO=not-a-valid-repo + + assert_release_repo_validation_error "not-a-valid-repo" +} + +test_tagged_release_fails_without_checksum_manifest() { + printf 'TEST: tagged release fails when checksum manifest is missing\n' + + if SHELL="/bin/bash" run_install_with_checksum_state_expect_failure manifest-missing; then + assert_output_contains "$(read_curl_log)" "/openshell-checksums-sha256.txt" "downloads checksum manifest" + assert_output_not_contains "$(read_curl_log)" ".sig" "does not download detached signature metadata" + assert_output_contains "$INSTALL_OUTPUT" "missing checksum manifest" "fails clearly for missing checksum manifest" + else + fail "tagged release fails when checksum manifest is missing" "installer unexpectedly succeeded without checksum manifest" + fi +} + +test_tagged_release_verifies_archive_checksum_against_manifest() { + printf 'TEST: tagged release verifies archive checksum against checksum manifest\n' + + if SHELL="/bin/bash" run_install_with_checksum_state manifest-present; then + assert_output_contains "$(read_curl_log)" "/openshell-checksums-sha256.txt" "downloads checksum manifest before install" + assert_output_contains "$INSTALL_OUTPUT" "verifying checksum..." "announces checksum verification" + else + fail "tagged release verifies archive checksum against checksum manifest" "$INSTALL_OUTPUT" + fi +} + +test_tagged_release_fails_when_checksum_mismatches_manifest() { + printf 'TEST: tagged release fails when archive checksum does not match manifest\n' + + if SHELL="/bin/bash" run_install_with_checksum_state_expect_failure checksum-mismatch; then + assert_output_contains "$INSTALL_OUTPUT" "checksum verification failed for openshell-" "fails on checksum mismatch" + else + fail "tagged release fails when archive checksum does not match manifest" "installer unexpectedly accepted a tampered archive" + fi +} + +test_tagged_release_does_not_require_detached_signature_artifacts() { + printf 'TEST: tagged release succeeds without detached signature artifacts\n' + + if SHELL="/bin/bash" run_install_with_checksum_state manifest-present; then + assert_output_not_contains "$(read_curl_log)" ".sig" "does not request detached signature metadata" + assert_output_not_contains "$INSTALL_OUTPUT" "signature metadata" "does not mention detached signature verification" + assert_output_not_contains "$INSTALL_OUTPUT" "openshell-verify-signature" "does not require verifier helper in active path" + else + fail "tagged release succeeds without detached signature artifacts" "$INSTALL_OUTPUT" + fi +} + +test_selects_claude_code() { + printf 'TEST: installer accepts claude code selection\n' + + SHELL="/bin/bash" run_install_with_env \ + OPENSHELL_TOOL=claude-code + + assert_setup_selection "tool" "claude-code" "shows claude code selection" + assert_setup_selection_notice +} + +test_selects_opencode() { + printf 'TEST: installer accepts opencode selection\n' + + SHELL="/bin/bash" run_install_with_env \ + OPENSHELL_TOOL=opencode + + assert_setup_selection "tool" "opencode" "shows opencode selection" +} + +test_selects_vendor_model_path() { + printf 'TEST: installer accepts vendor and model path selection\n' + + SHELL="/bin/bash" run_install_with_env \ + OPENSHELL_TOOL=claude-code \ + OPENSHELL_VENDOR=anthropic \ + OPENSHELL_MODEL_PATH=claude-sonnet-4 + + assert_setup_selection "vendor" "anthropic" "shows vendor selection" + assert_setup_selection "model path" "claude-sonnet-4" "shows model path selection" +} + +test_rejects_unsupported_combination() { + printf 'TEST: installer rejects unsupported tool and vendor selection\n' + + SHELL="/bin/bash" run_install_expect_failure \ + OPENSHELL_TOOL=claude-code \ + OPENSHELL_VENDOR=github-copilot + + assert_output_contains "$INSTALL_OUTPUT" "unsupported installer selection" "reports unsupported combination" + assert_output_contains "$INSTALL_OUTPUT" "claude-code + github-copilot" "includes unsupported pair" +} + +test_accepts_selection_flags() { + printf 'TEST: installer accepts setup selection flags\n' + + SHELL="/bin/bash" run_install_with_args \ + --tool opencode \ + --vendor github-copilot \ + --model-path copilot/chat + + assert_setup_selection "tool" "opencode" "shows tool flag selection" + assert_setup_selection "vendor" "github-copilot" "shows vendor flag selection" + assert_setup_selection "model path" "copilot/chat" "shows model-path flag selection" +} + # --------------------------------------------------------------------------- # Runner # --------------------------------------------------------------------------- @@ -75,6 +205,18 @@ test_binary_installed; echo "" test_binary_executable; echo "" test_binary_runs; echo "" test_guidance_shows_export_path; echo "" -test_guidance_mentions_not_on_path +test_guidance_mentions_not_on_path; echo "" +test_default_release_root_uses_fork_repo; echo "" +test_release_root_override_changes_repo; echo "" +test_release_root_override_rejects_malformed_repo; echo "" +test_tagged_release_fails_without_checksum_manifest; echo "" +test_tagged_release_verifies_archive_checksum_against_manifest; echo "" +test_tagged_release_fails_when_checksum_mismatches_manifest; echo "" +test_tagged_release_does_not_require_detached_signature_artifacts; echo "" +test_selects_claude_code; echo "" +test_selects_opencode; echo "" +test_selects_vendor_model_path; echo "" +test_rejects_unsupported_combination; echo "" +test_accepts_selection_flags print_summary diff --git a/e2e/install/fish_test.fish b/e2e/install/fish_test.fish index 10176071..8d921fcc 100755 --- a/e2e/install/fish_test.fish +++ b/e2e/install/fish_test.fish @@ -4,7 +4,7 @@ # # Fish e2e tests for install.sh. # -# Downloads the latest release for real and validates: +# Uses fake release assets and validates: # - Binary is installed to the correct directory # - Binary is executable and runs # - PATH guidance shows fish_add_path (not export PATH) @@ -20,6 +20,8 @@ set -g INSTALL_SCRIPT "$REPO_ROOT/install.sh" # Set by run_install set -g INSTALL_DIR "" set -g INSTALL_OUTPUT "" +set -g TEST_ROOT "" +set -g TEST_RELEASE_VERSION "v0.0.10-test" # --------------------------------------------------------------------------- # Helpers @@ -62,13 +64,146 @@ function assert_output_not_contains end end +function assert_setup_selection + set -l kind $argv[1] + set -l value $argv[2] + set -l label $argv[3] + + assert_output_contains "$INSTALL_OUTPUT" "validated setup $kind selection: $value" "$label" +end + +function assert_setup_selection_notice + assert_output_contains "$INSTALL_OUTPUT" "applies to later OpenShell setup" "mentions later setup flow" + assert_output_contains "$INSTALL_OUTPUT" "installs the openshell CLI" "keeps CLI installer scope clear" +end + +function test_target + switch (uname -m) + case x86_64 amd64 + set -l arch x86_64 + case aarch64 arm64 + set -l arch aarch64 + case '*' + set -l arch x86_64 + end + + switch (uname -s) + case Darwin + set -l os apple-darwin + case '*' + set -l os unknown-linux-musl + end + + printf '%s-%s\n' $arch $os +end + +function setup_fake_release_assets + set -g TEST_ROOT (mktemp -d) + set -l target (test_target) + set -l filename "openshell-$target.tar.gz" + set -l release_dir "$TEST_ROOT/releases/download/$TEST_RELEASE_VERSION" + set -l fake_bin_dir "$TEST_ROOT/fakebin" + + mkdir -p "$release_dir" "$fake_bin_dir" + + cat > "$TEST_ROOT/openshell" </dev/null 2>&1 + shasum -a 256 "$release_dir/$filename" | awk '{print $1 " '$filename'"}' > "$release_dir/openshell-checksums-sha256.txt" + else + sha256sum "$release_dir/$filename" | awk '{print $1 " '$filename'"}' > "$release_dir/openshell-checksums-sha256.txt" + end + + cat > "$fake_bin_dir/curl" <<'EOF' +#!/bin/sh +set -eu + +_output="" +_format="" +_url="" + +while [ "$#" -gt 0 ]; do + case "$1" in + -o) + _output="$2" + shift 2 + ;; + -w) + _format="$2" + shift 2 + ;; + --retry) + shift 2 + ;; + -f|-L|-s|-S) + shift + ;; + *) + _url="$1" + shift + ;; + esac +done + +if [ -n "$_format" ]; then + case "$_url" in + */releases/latest) + printf '%s/releases/tag/%s' "${TEST_GITHUB_URL}" "${TEST_RELEASE_VERSION}" + ;; + *) + printf '%s' "$_url" + ;; + esac + exit 0 +fi + +_name="${_url##*/}" +[ -n "$_output" ] || exit 1 +cp "${TEST_RELEASE_DIR}/${_name}" "$_output" +EOF + chmod 755 "$fake_bin_dir/curl" +end + +function run_with_test_env + set -l cmd $argv + setup_fake_release_assets + + set -g INSTALL_OUTPUT (env \ + OPENSHELL_INSTALL_DIR="$INSTALL_DIR" \ + PATH="$TEST_ROOT/fakebin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin" \ + SHELL="/usr/bin/fish" \ + TEST_RELEASE_DIR="$TEST_ROOT/releases/download/$TEST_RELEASE_VERSION" \ + TEST_RELEASE_VERSION="$TEST_RELEASE_VERSION" \ + TEST_GITHUB_URL="https://github.com/NVIDIA/OpenShell" \ + $cmd 2>&1) + + return $status +end + function run_install set -g INSTALL_DIR (mktemp -d)/bin - set -g INSTALL_OUTPUT (OPENSHELL_INSTALL_DIR="$INSTALL_DIR" \ - SHELL="/usr/bin/fish" \ - PATH="/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin" \ - sh "$INSTALL_SCRIPT" 2>&1) + run_with_test_env sh "$INSTALL_SCRIPT" + + if test $status -ne 0 + printf 'install.sh failed:\n%s\n' "$INSTALL_OUTPUT" >&2 + return 1 + end +end + +function run_install_with_env + set -g INSTALL_DIR (mktemp -d)/bin + + run_with_test_env $argv sh "$INSTALL_SCRIPT" if test $status -ne 0 printf 'install.sh failed:\n%s\n' "$INSTALL_OUTPUT" >&2 @@ -76,6 +211,28 @@ function run_install end end +function run_install_with_args + set -g INSTALL_DIR (mktemp -d)/bin + + run_with_test_env sh "$INSTALL_SCRIPT" $argv + + if test $status -ne 0 + printf 'install.sh failed:\n%s\n' "$INSTALL_OUTPUT" >&2 + return 1 + end +end + +function run_install_expect_failure + set -g INSTALL_DIR (mktemp -d)/bin + + run_with_test_env $argv sh "$INSTALL_SCRIPT" + + if test $status -eq 0 + printf 'install.sh unexpectedly succeeded:\n%s\n' "$INSTALL_OUTPUT" >&2 + return 1 + end +end + # --------------------------------------------------------------------------- # Tests # --------------------------------------------------------------------------- @@ -125,6 +282,51 @@ function test_guidance_mentions_not_on_path assert_output_contains "$INSTALL_OUTPUT" "$INSTALL_DIR" "includes install dir in guidance" end +function test_selects_claude_code + printf 'TEST: installer accepts claude code selection\n' + + run_install_with_env OPENSHELL_TOOL=claude-code + + assert_setup_selection tool claude-code "shows claude code selection" + assert_setup_selection_notice +end + +function test_selects_opencode + printf 'TEST: installer accepts opencode selection\n' + + run_install_with_env OPENSHELL_TOOL=opencode + + assert_setup_selection tool opencode "shows opencode selection" +end + +function test_selects_vendor_model_path + printf 'TEST: installer accepts vendor and model path selection\n' + + run_install_with_env OPENSHELL_TOOL=claude-code OPENSHELL_VENDOR=anthropic OPENSHELL_MODEL_PATH=claude-sonnet-4 + + assert_setup_selection vendor anthropic "shows vendor selection" + assert_setup_selection "model path" claude-sonnet-4 "shows model path selection" +end + +function test_rejects_unsupported_combination + printf 'TEST: installer rejects unsupported tool and vendor selection\n' + + run_install_expect_failure OPENSHELL_TOOL=claude-code OPENSHELL_VENDOR=github-copilot + + assert_output_contains "$INSTALL_OUTPUT" "unsupported installer selection" "reports unsupported combination" + assert_output_contains "$INSTALL_OUTPUT" "claude-code + github-copilot" "includes unsupported pair" +end + +function test_accepts_selection_flags + printf 'TEST: installer accepts setup selection flags\n' + + run_install_with_args --tool opencode --vendor github-copilot --model-path copilot/chat + + assert_setup_selection tool opencode "shows tool flag selection" + assert_setup_selection vendor github-copilot "shows vendor flag selection" + assert_setup_selection "model path" copilot/chat "shows model-path flag selection" +end + # --------------------------------------------------------------------------- # Runner # --------------------------------------------------------------------------- @@ -144,6 +346,16 @@ echo "" test_guidance_shows_fish_add_path echo "" test_guidance_mentions_not_on_path +echo "" +test_selects_claude_code +echo "" +test_selects_opencode +echo "" +test_selects_vendor_model_path +echo "" +test_rejects_unsupported_combination +echo "" +test_accepts_selection_flags printf '\n=== Results: %d passed, %d failed ===\n' $PASS $FAIL diff --git a/e2e/install/helpers.sh b/e2e/install/helpers.sh index ff5f6637..b49e1a16 100644 --- a/e2e/install/helpers.sh +++ b/e2e/install/helpers.sh @@ -8,7 +8,8 @@ # Provides: # - pass / fail / print_summary # - assert_output_contains / assert_output_not_contains -# - run_install (runs the real install.sh to a temp dir, captures output) +# - run_install (runs install.sh against fake release assets) +# - run_install_with_checksum_state / _expect_failure # - REPO_ROOT / INSTALL_SCRIPT paths # - INSTALL_DIR / INSTALL_OUTPUT (set after run_install) @@ -22,6 +23,11 @@ _FAIL=0 # Set by run_install INSTALL_DIR="" INSTALL_OUTPUT="" +TEST_ROOT="" +TEST_RELEASE_VERSION="v0.0.10-test" +TEST_DEFAULT_RELEASE_REPO="abols/OpenShell" +TEST_CURL_LOG="" +TEST_NON_PRODUCTION_OVERRIDE_ENV="OPENSHELL_INSTALL_ALLOW_INSECURE_PROVENANCE" # --------------------------------------------------------------------------- # Assertions @@ -64,10 +70,178 @@ assert_output_not_contains() { fi } +assert_setup_selection() { + _ass_kind="$1" + _ass_value="$2" + _ass_label="$3" + + assert_output_contains "$INSTALL_OUTPUT" "validated setup ${_ass_kind} selection: ${_ass_value}" "$_ass_label" +} + +assert_setup_selection_notice() { + assert_output_contains "$INSTALL_OUTPUT" "applies to later OpenShell setup" "mentions later setup flow" + assert_output_contains "$INSTALL_OUTPUT" "installs the openshell CLI" "keeps CLI installer scope clear" +} + +assert_release_root_uses_repo() { + _arr_repo="$1" + _arr_label="$2" + _arr_root="https://github.com/${_arr_repo}/releases" + + assert_output_contains "$(read_curl_log)" "${_arr_root}/latest" "${_arr_label}: latest release uses ${_arr_repo}" + assert_output_contains "$(read_curl_log)" "${_arr_root}/download/${TEST_RELEASE_VERSION}" "${_arr_label}: asset download uses ${_arr_repo}" +} + +assert_release_repo_validation_error() { + _arv_value="$1" + + assert_output_contains "$INSTALL_OUTPUT" "invalid OPENSHELL_RELEASE_REPO" "rejects malformed release repo" + assert_output_contains "$INSTALL_OUTPUT" "${_arv_value}" "includes malformed release repo value" +} + # --------------------------------------------------------------------------- -# Install runner +# Fake release assets # --------------------------------------------------------------------------- +test_target() { + case "$(uname -m)" in + x86_64|amd64) _tt_arch="x86_64" ;; + aarch64|arm64) _tt_arch="aarch64" ;; + *) _tt_arch="x86_64" ;; + esac + + case "$(uname -s)" in + Darwin) _tt_os="apple-darwin" ;; + *) _tt_os="unknown-linux-musl" ;; + esac + + printf '%s-%s\n' "$_tt_arch" "$_tt_os" +} + +setup_fake_release_assets() { + TEST_ROOT="$(mktemp -d)" + _target="$(test_target)" + _filename="openshell-${_target}.tar.gz" + _checksums_filename="openshell-checksums-sha256.txt" + _release_dir="${TEST_ROOT}/releases/download/${TEST_RELEASE_VERSION}" + _fake_bin_dir="${TEST_ROOT}/fakebin" + TEST_CURL_LOG="${TEST_ROOT}/curl.log" + + mkdir -p "$_release_dir" "$_fake_bin_dir" + : >"$TEST_CURL_LOG" + + cat >"${TEST_ROOT}/openshell" </dev/null 2>&1; then + shasum -a 256 "${_release_dir}/${_filename}" | awk '{print $1 " '"${_filename}"'"}' >"${_release_dir}/${_checksums_filename}" + else + sha256sum "${_release_dir}/${_filename}" | awk '{print $1 " '"${_filename}"'"}' >"${_release_dir}/${_checksums_filename}" + fi + + cat >"${_fake_bin_dir}/curl" <<'EOF' +#!/bin/sh +set -eu + +_output="" +_format="" +_url="" + +while [ "$#" -gt 0 ]; do + case "$1" in + -o) + _output="$2" + shift 2 + ;; + -w) + _format="$2" + shift 2 + ;; + --retry) + shift 2 + ;; + -f|-L|-s|-S) + shift + ;; + *) + _url="$1" + shift + ;; + esac +done + +printf '%s\n' "$_url" >>"${TEST_CURL_LOG}" + +if [ -n "$_format" ]; then + case "$_url" in + */releases/latest) + _release_root="${_url%/releases/latest}" + printf '%s/releases/tag/%s' "${_release_root}" "${TEST_RELEASE_VERSION}" + ;; + *) + printf '%s' "$_url" + ;; + esac + exit 0 +fi + +_name="${_url##*/}" +[ -n "$_output" ] || exit 1 +[ -f "${TEST_RELEASE_DIR}/${_name}" ] || exit 22 +cp "${TEST_RELEASE_DIR}/${_name}" "$_output" +EOF + chmod 755 "${_fake_bin_dir}/curl" +} + +apply_checksum_state() { + _checksum_state="${1:-manifest-present}" + _release_dir="${TEST_ROOT}/releases/download/${TEST_RELEASE_VERSION}" + _target="$(test_target)" + _filename="openshell-${_target}.tar.gz" + _checksums_filename="openshell-checksums-sha256.txt" + + case "$_checksum_state" in + manifest-present) + ;; + manifest-missing) + rm -f "${_release_dir}/${_checksums_filename}" + ;; + checksum-mismatch) + printf 'tampered archive\n' >>"${_release_dir}/${_filename}" + ;; + *) + printf 'unknown checksum state: %s\n' "$_checksum_state" >&2 + return 1 + ;; + esac +} + +run_with_test_env() { + INSTALL_OUTPUT="$(env \ + OPENSHELL_INSTALL_DIR="$INSTALL_DIR" \ + PATH="${TEST_ROOT}/fakebin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin" \ + SHELL="${SHELL:-/bin/sh}" \ + TEST_RELEASE_DIR="${TEST_ROOT}/releases/download/${TEST_RELEASE_VERSION}" \ + TEST_RELEASE_VERSION="${TEST_RELEASE_VERSION}" \ + TEST_CURL_LOG="$TEST_CURL_LOG" \ + "$@" 2>&1)" +} + +read_curl_log() { + if [ -n "$TEST_CURL_LOG" ] && [ -f "$TEST_CURL_LOG" ]; then + tr '\n' '\n' <"$TEST_CURL_LOG" + fi +} + # Run the real install.sh, installing to a temp directory with the install # dir removed from PATH so we always get PATH guidance output. # @@ -79,17 +253,81 @@ assert_output_not_contains() { # SHELL="/bin/bash" run_install run_install() { INSTALL_DIR="$(mktemp -d)/bin" + setup_fake_release_assets - # Remove the install dir from PATH (it won't be there, but be explicit). - # Keep a minimal PATH so curl/tar/install are available. - INSTALL_OUTPUT="$(OPENSHELL_INSTALL_DIR="$INSTALL_DIR" \ - PATH="/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin" \ - sh "$INSTALL_SCRIPT" 2>&1)" || { + run_with_test_env sh "$INSTALL_SCRIPT" || { printf 'install.sh failed:\n%s\n' "$INSTALL_OUTPUT" >&2 return 1 } } +# Run install.sh with additional environment variables and expect success. +run_install_with_env() { + INSTALL_DIR="$(mktemp -d)/bin" + setup_fake_release_assets + + run_with_test_env env "$@" sh "$INSTALL_SCRIPT" || { + printf 'install.sh failed:\n%s\n' "$INSTALL_OUTPUT" >&2 + return 1 + } +} + +run_install_with_args() { + INSTALL_DIR="$(mktemp -d)/bin" + setup_fake_release_assets + + run_with_test_env sh "$INSTALL_SCRIPT" "$@" || { + printf 'install.sh failed:\n%s\n' "$INSTALL_OUTPUT" >&2 + return 1 + } +} + +run_install_with_checksum_state() { + INSTALL_DIR="$(mktemp -d)/bin" + setup_fake_release_assets + apply_checksum_state "$1" || return 1 + + run_with_test_env sh "$INSTALL_SCRIPT" || { + printf 'install.sh failed:\n%s\n' "$INSTALL_OUTPUT" >&2 + return 1 + } +} + +# Run install.sh with additional environment variables and expect failure. +run_install_expect_failure() { + INSTALL_DIR="$(mktemp -d)/bin" + setup_fake_release_assets + + if run_with_test_env env "$@" sh "$INSTALL_SCRIPT" + then + printf 'install.sh unexpectedly succeeded:\n%s\n' "$INSTALL_OUTPUT" >&2 + return 1 + fi +} + +run_install_with_checksum_state_expect_failure() { + INSTALL_DIR="$(mktemp -d)/bin" + setup_fake_release_assets + apply_checksum_state "$1" || return 1 + + if run_with_test_env sh "$INSTALL_SCRIPT" + then + printf 'install.sh unexpectedly succeeded:\n%s\n' "$INSTALL_OUTPUT" >&2 + return 1 + fi +} + +run_install_args_expect_failure() { + INSTALL_DIR="$(mktemp -d)/bin" + setup_fake_release_assets + + if run_with_test_env sh "$INSTALL_SCRIPT" "$@" + then + printf 'install.sh unexpectedly succeeded:\n%s\n' "$INSTALL_OUTPUT" >&2 + return 1 + fi +} + # --------------------------------------------------------------------------- # Summary # --------------------------------------------------------------------------- diff --git a/e2e/install/sh_test.sh b/e2e/install/sh_test.sh index 320c00ef..3eeab050 100755 --- a/e2e/install/sh_test.sh +++ b/e2e/install/sh_test.sh @@ -4,7 +4,7 @@ # # POSIX sh e2e tests for install.sh. # -# Downloads the latest release for real and validates: +# Uses fake release assets and validates: # - Binary is installed to the correct directory # - Binary is executable and runs # - PATH guidance shows the correct export command for sh @@ -84,6 +84,132 @@ test_no_env_scripts_created() { fi } +test_default_release_root_uses_fork_repo() { + printf 'TEST: default release root uses selected fork-owned repo\n' + + SHELL="/bin/sh" run_install + + assert_release_root_uses_repo "linuxdevel/OpenShell" "default release root" +} + +test_release_root_override_changes_repo() { + printf 'TEST: explicit repo override changes derived release root\n' + + SHELL="/bin/sh" run_install_with_env \ + OPENSHELL_RELEASE_REPO=example/custom-openshell + + assert_release_root_uses_repo "example/custom-openshell" "override release root" +} + +test_release_root_override_rejects_malformed_repo() { + printf 'TEST: malformed repo override fails clearly\n' + + SHELL="/bin/sh" run_install_expect_failure \ + OPENSHELL_RELEASE_REPO=not-a-valid-repo + + assert_release_repo_validation_error "not-a-valid-repo" +} + +test_tagged_release_fails_without_checksum_manifest() { + printf 'TEST: tagged release fails when checksum manifest is missing\n' + + if SHELL="/bin/sh" run_install_with_checksum_state_expect_failure manifest-missing; then + assert_output_contains "$(read_curl_log)" "/openshell-checksums-sha256.txt" "downloads checksum manifest" + assert_output_not_contains "$(read_curl_log)" ".sig" "does not download detached signature metadata" + assert_output_contains "$INSTALL_OUTPUT" "missing checksum manifest" "fails clearly for missing checksum manifest" + else + fail "tagged release fails when checksum manifest is missing" "installer unexpectedly succeeded without checksum manifest" + fi +} + +test_tagged_release_verifies_archive_checksum_against_manifest() { + printf 'TEST: tagged release verifies archive checksum against checksum manifest\n' + + if SHELL="/bin/sh" run_install_with_checksum_state manifest-present; then + assert_output_contains "$(read_curl_log)" "/openshell-checksums-sha256.txt" "downloads checksum manifest before install" + assert_output_contains "$INSTALL_OUTPUT" "verifying checksum..." "announces checksum verification" + else + fail "tagged release verifies archive checksum against checksum manifest" "$INSTALL_OUTPUT" + fi +} + +test_tagged_release_fails_when_checksum_mismatches_manifest() { + printf 'TEST: tagged release fails when archive checksum does not match manifest\n' + + if SHELL="/bin/sh" run_install_with_checksum_state_expect_failure checksum-mismatch; then + assert_output_contains "$INSTALL_OUTPUT" "checksum verification failed for openshell-" "fails on checksum mismatch" + else + fail "tagged release fails when archive checksum does not match manifest" "installer unexpectedly accepted a tampered archive" + fi +} + +test_tagged_release_does_not_require_detached_signature_artifacts() { + printf 'TEST: tagged release succeeds without detached signature artifacts\n' + + if SHELL="/bin/sh" run_install_with_checksum_state manifest-present; then + assert_output_not_contains "$(read_curl_log)" ".sig" "does not request detached signature metadata" + assert_output_not_contains "$INSTALL_OUTPUT" "signature metadata" "does not mention detached signature verification" + assert_output_not_contains "$INSTALL_OUTPUT" "openshell-verify-signature" "does not require verifier helper in active path" + else + fail "tagged release succeeds without detached signature artifacts" "$INSTALL_OUTPUT" + fi +} + +test_selects_claude_code() { + printf 'TEST: installer accepts claude code selection\n' + + SHELL="/bin/sh" run_install_with_env \ + OPENSHELL_TOOL=claude-code + + assert_setup_selection "tool" "claude-code" "shows claude code selection" + assert_setup_selection_notice +} + +test_selects_opencode() { + printf 'TEST: installer accepts opencode selection\n' + + SHELL="/bin/sh" run_install_with_env \ + OPENSHELL_TOOL=opencode + + assert_setup_selection "tool" "opencode" "shows opencode selection" +} + +test_selects_vendor_model_path() { + printf 'TEST: installer accepts vendor and model path selection\n' + + SHELL="/bin/sh" run_install_with_env \ + OPENSHELL_TOOL=claude-code \ + OPENSHELL_VENDOR=anthropic \ + OPENSHELL_MODEL_PATH=claude-sonnet-4 + + assert_setup_selection "vendor" "anthropic" "shows vendor selection" + assert_setup_selection "model path" "claude-sonnet-4" "shows model path selection" +} + +test_rejects_unsupported_combination() { + printf 'TEST: installer rejects unsupported tool and vendor selection\n' + + SHELL="/bin/sh" run_install_expect_failure \ + OPENSHELL_TOOL=claude-code \ + OPENSHELL_VENDOR=github-copilot + + assert_output_contains "$INSTALL_OUTPUT" "unsupported installer selection" "reports unsupported combination" + assert_output_contains "$INSTALL_OUTPUT" "claude-code + github-copilot" "includes unsupported pair" +} + +test_accepts_selection_flags() { + printf 'TEST: installer accepts setup selection flags\n' + + SHELL="/bin/sh" run_install_with_args \ + --tool opencode \ + --vendor github-copilot \ + --model-path copilot/chat + + assert_setup_selection "tool" "opencode" "shows tool flag selection" + assert_setup_selection "vendor" "github-copilot" "shows vendor flag selection" + assert_setup_selection "model path" "copilot/chat" "shows model-path flag selection" +} + # --------------------------------------------------------------------------- # Runner # --------------------------------------------------------------------------- @@ -100,6 +226,18 @@ test_binary_runs; echo "" test_guidance_shows_export_path; echo "" test_guidance_mentions_not_on_path; echo "" test_guidance_mentions_restart; echo "" -test_no_env_scripts_created +test_no_env_scripts_created; echo "" +test_default_release_root_uses_fork_repo; echo "" +test_release_root_override_changes_repo; echo "" +test_release_root_override_rejects_malformed_repo; echo "" +test_tagged_release_fails_without_checksum_manifest; echo "" +test_tagged_release_verifies_archive_checksum_against_manifest; echo "" +test_tagged_release_fails_when_checksum_mismatches_manifest; echo "" +test_tagged_release_does_not_require_detached_signature_artifacts; echo "" +test_selects_claude_code; echo "" +test_selects_opencode; echo "" +test_selects_vendor_model_path; echo "" +test_rejects_unsupported_combination; echo "" +test_accepts_selection_flags print_summary diff --git a/e2e/install/zsh_test.sh b/e2e/install/zsh_test.sh index 621d35f8..9eedacfa 100755 --- a/e2e/install/zsh_test.sh +++ b/e2e/install/zsh_test.sh @@ -61,6 +61,48 @@ test_guidance_mentions_not_on_path() { assert_output_contains "$INSTALL_OUTPUT" "$INSTALL_DIR" "includes install dir in guidance" } +test_selects_claude_code() { + printf 'TEST: installer accepts claude code selection\n' + + SHELL="/bin/zsh" run_install_with_env \ + OPENSHELL_TOOL=claude-code + + assert_setup_selection "tool" "claude-code" "shows claude code selection" + assert_setup_selection_notice +} + +test_selects_opencode() { + printf 'TEST: installer accepts opencode selection\n' + + SHELL="/bin/zsh" run_install_with_env \ + OPENSHELL_TOOL=opencode + + assert_setup_selection "tool" "opencode" "shows opencode selection" +} + +test_selects_vendor_model_path() { + printf 'TEST: installer accepts vendor and model path selection\n' + + SHELL="/bin/zsh" run_install_with_env \ + OPENSHELL_TOOL=claude-code \ + OPENSHELL_VENDOR=anthropic \ + OPENSHELL_MODEL_PATH=claude-sonnet-4 + + assert_setup_selection "vendor" "anthropic" "shows vendor selection" + assert_setup_selection "model path" "claude-sonnet-4" "shows model path selection" +} + +test_rejects_unsupported_combination() { + printf 'TEST: installer rejects unsupported tool and vendor selection\n' + + SHELL="/bin/zsh" run_install_expect_failure \ + OPENSHELL_TOOL=claude-code \ + OPENSHELL_VENDOR=github-copilot + + assert_output_contains "$INSTALL_OUTPUT" "unsupported installer selection" "reports unsupported combination" + assert_output_contains "$INSTALL_OUTPUT" "claude-code + github-copilot" "includes unsupported pair" +} + # --------------------------------------------------------------------------- # Runner # --------------------------------------------------------------------------- @@ -75,6 +117,10 @@ test_binary_installed; echo "" test_binary_executable; echo "" test_binary_runs; echo "" test_guidance_shows_export_path; echo "" -test_guidance_mentions_not_on_path +test_guidance_mentions_not_on_path; echo "" +test_selects_claude_code; echo "" +test_selects_opencode; echo "" +test_selects_vendor_model_path; echo "" +test_rejects_unsupported_combination print_summary diff --git a/e2e/rust/tests/tool_adapter_smoke.rs b/e2e/rust/tests/tool_adapter_smoke.rs new file mode 100644 index 00000000..0fd38d41 --- /dev/null +++ b/e2e/rust/tests/tool_adapter_smoke.rs @@ -0,0 +1,173 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +#![cfg(feature = "e2e")] + +//! Smoke validations for first-class sandbox tool flows. +//! +//! These tests intentionally stay small: they verify that the recognized tool +//! binary exists in the sandbox, that the expected provider can be auto-created +//! from local credentials, and that the sandbox sees only the projected +//! placeholder value rather than the raw secret. For both `claude` and +//! `opencode`, this covers the current placeholder-projection contract rather +//! than full vendor-native Anthropic or GitHub Copilot execution parity. + +use std::process::Stdio; +use std::sync::Mutex; + +use openshell_e2e::harness::binary::openshell_cmd; +use openshell_e2e::harness::output::{extract_field, strip_ansi}; + +const ANTHROPIC_TEST_API_KEY: &str = "sk-e2e-tool-smoke-anthropic"; +const ANTHROPIC_PLACEHOLDER: &str = "openshell:resolve:env:ANTHROPIC_API_KEY"; +const GITHUB_TEST_TOKEN: &str = "ghu-e2e-tool-smoke-github"; +const GITHUB_PLACEHOLDER: &str = "openshell:resolve:env:GITHUB_TOKEN"; + +static CLAUDE_PROVIDER_LOCK: Mutex<()> = Mutex::new(()); +static GITHUB_PROVIDER_LOCK: Mutex<()> = Mutex::new(()); + +async fn delete_provider(name: &str) { + let mut cmd = openshell_cmd(); + cmd.arg("provider") + .arg("delete") + .arg(name) + .stdout(Stdio::null()) + .stderr(Stdio::null()); + let _ = cmd.status().await; +} + +async fn provider_exists(name: &str) -> bool { + let mut cmd = openshell_cmd(); + cmd.arg("provider") + .arg("get") + .arg(name) + .stdout(Stdio::null()) + .stderr(Stdio::null()); + cmd.status().await.is_ok_and(|status| status.success()) +} + +async fn delete_sandbox(name: &str) { + let mut cmd = openshell_cmd(); + cmd.arg("sandbox") + .arg("delete") + .arg(name) + .stdout(Stdio::null()) + .stderr(Stdio::null()); + let _ = cmd.status().await; +} + +fn sandbox_name(output: &str) -> Option { + extract_field(output, "Created sandbox").or_else(|| extract_field(output, "Name")) +} + +async fn run_tool_smoke( + provider: &str, + command: &str, + env_key: &str, + env_value: &str, +) -> (String, i32, Option) { + let mut cmd = openshell_cmd(); + cmd.arg("sandbox") + .arg("create") + .arg("--provider") + .arg(provider) + .arg("--auto-providers") + .arg("--no-bootstrap") + .arg("--") + .arg("sh") + .arg("-lc") + .arg(command) + .env(env_key, env_value) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + let output = cmd.output().await.expect("spawn openshell sandbox create"); + let stdout = String::from_utf8_lossy(&output.stdout).to_string(); + let stderr = String::from_utf8_lossy(&output.stderr).to_string(); + let combined = format!("{stdout}{stderr}"); + let code = output.status.code().unwrap_or(-1); + (combined.clone(), code, sandbox_name(&combined)) +} + +#[tokio::test] +async fn claude_code_smoke_with_anthropic_provider() { + let _provider_lock = CLAUDE_PROVIDER_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + + assert!( + !provider_exists("claude").await, + "existing provider 'claude' makes this smoke test unsafe; remove the shared provider and rerun so the auto-create assertions execute" + ); + + let (output, code, created_sandbox) = run_tool_smoke( + "claude", + "command -v claude >/dev/null && (claude --version >/dev/null 2>&1 || claude --help >/dev/null 2>&1) && printenv ANTHROPIC_API_KEY", + "ANTHROPIC_API_KEY", + ANTHROPIC_TEST_API_KEY, + ) + .await; + + if let Some(name) = created_sandbox { + delete_sandbox(&name).await; + } + delete_provider("claude").await; + + let clean = strip_ansi(&output); + assert_eq!(code, 0, "claude tool smoke should succeed:\n{clean}"); + assert!( + clean.contains("Created provider claude"), + "output should confirm claude provider auto-creation:\n{clean}" + ); + assert!( + clean.contains(ANTHROPIC_PLACEHOLDER), + "sandbox should expose the Anthropic placeholder to the tool flow:\n{clean}" + ); + assert!( + !clean.contains(ANTHROPIC_TEST_API_KEY), + "sandbox must not expose the raw Anthropic secret:\n{clean}" + ); +} + +#[tokio::test] +async fn opencode_smoke_with_current_github_copilot_targeted_path() { + let _provider_lock = GITHUB_PROVIDER_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + + assert!( + !provider_exists("github").await, + "existing provider 'github' makes this smoke test unsafe; remove the shared provider and rerun so the auto-create assertions execute" + ); + + let (output, code, created_sandbox) = run_tool_smoke( + "github", + "command -v opencode >/dev/null && (opencode --version >/dev/null 2>&1 || opencode --help >/dev/null 2>&1) && printenv GITHUB_TOKEN", + "GITHUB_TOKEN", + GITHUB_TEST_TOKEN, + ) + .await; + + if let Some(name) = created_sandbox { + delete_sandbox(&name).await; + } + delete_provider("github").await; + + let clean = strip_ansi(&output); + assert_eq!( + code, 0, + "opencode smoke for the current GitHub/Copilot-targeted path should succeed:\n{clean}" + ); + assert!( + clean.contains("Created provider github"), + "output should confirm github provider auto-creation for the current Copilot-targeted path:\n{clean}" + ); + assert!( + clean.contains(GITHUB_PLACEHOLDER), + "sandbox should expose the GitHub placeholder for the current Copilot-targeted path:\n{clean}" + ); + assert!( + !clean.contains(GITHUB_TEST_TOKEN), + "sandbox must not expose the raw GitHub token:\n{clean}" + ); +} diff --git a/install.sh b/install.sh index cf29ba74..191b0ecd 100755 --- a/install.sh +++ b/install.sh @@ -5,7 +5,7 @@ # Install the OpenShell CLI binary. # # Usage: -# curl -LsSf https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install.sh | sh +# curl -LsSf https://raw.githubusercontent.com/abols/OpenShell/main/install.sh | sh # # Or run directly: # ./install.sh @@ -13,12 +13,15 @@ # Environment variables: # OPENSHELL_VERSION - Release tag to install (default: latest tagged release) # OPENSHELL_INSTALL_DIR - Directory to install into (default: ~/.local/bin) +# OPENSHELL_RELEASE_REPO - GitHub release repo override (default: linuxdevel/OpenShell) +# OPENSHELL_TOOL - Optional setup selection hint (claude-code, opencode) +# OPENSHELL_VENDOR - Optional setup selection hint (anthropic, github-copilot) +# OPENSHELL_MODEL_PATH - Optional setup model path hint for later setup flow # set -eu APP_NAME="openshell" -REPO="NVIDIA/OpenShell" -GITHUB_URL="https://github.com/${REPO}" +DEFAULT_RELEASE_REPO="linuxdevel/OpenShell" # --------------------------------------------------------------------------- # Logging @@ -46,25 +49,38 @@ usage() { install.sh — Install the OpenShell CLI USAGE: - curl -LsSf https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install.sh | sh + curl -LsSf https://raw.githubusercontent.com/abols/OpenShell/main/install.sh | sh ./install.sh [OPTIONS] OPTIONS: - --help Print this help message + --help Print this help message + --tool Validate later setup tool selection + --vendor Validate later setup vendor selection + --model-path Validate later setup model path selection ENVIRONMENT VARIABLES: OPENSHELL_VERSION Release tag to install (default: latest tagged release) OPENSHELL_INSTALL_DIR Directory to install into (default: ~/.local/bin) + OPENSHELL_RELEASE_REPO GitHub release repo override (default: linuxdevel/OpenShell) + OPENSHELL_TOOL Optional setup selection hint (claude-code, opencode) + OPENSHELL_VENDOR Optional setup selection hint (anthropic, github-copilot) + OPENSHELL_MODEL_PATH Optional setup model path hint for later setup flow EXAMPLES: # Install latest release - curl -LsSf https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install.sh | sh + curl -LsSf https://raw.githubusercontent.com/abols/OpenShell/main/install.sh | sh # Install a specific version - curl -LsSf https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install.sh | OPENSHELL_VERSION=v0.0.9 sh + curl -LsSf https://raw.githubusercontent.com/abols/OpenShell/main/install.sh | OPENSHELL_VERSION=v0.0.9 sh # Install to /usr/local/bin - curl -LsSf https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install.sh | OPENSHELL_INSTALL_DIR=/usr/local/bin sh + curl -LsSf https://raw.githubusercontent.com/abols/OpenShell/main/install.sh | OPENSHELL_INSTALL_DIR=/usr/local/bin sh + + # Install from a different fork release repo + curl -LsSf https://raw.githubusercontent.com/abols/OpenShell/main/install.sh | OPENSHELL_RELEASE_REPO=example/custom-openshell sh + + # Validate later setup selections while installing the CLI + ./install.sh --tool claude-code --vendor anthropic --model-path claude-sonnet-4 EOF } @@ -76,6 +92,104 @@ has_cmd() { command -v "$1" >/dev/null 2>&1 } +validate_choice() { + _value="$1" + _label="$2" + shift 2 + + [ -z "$_value" ] && return 0 + + for _allowed in "$@"; do + if [ "$_value" = "$_allowed" ]; then + return 0 + fi + done + + error "unsupported ${_label}: ${_value}" +} + +validate_selection() { + _tool="${OPENSHELL_TOOL:-}" + _vendor="${OPENSHELL_VENDOR:-}" + _model_path="${OPENSHELL_MODEL_PATH:-}" + + validate_choice "$_tool" "tool" "claude-code" "opencode" + validate_choice "$_vendor" "vendor" "anthropic" "github-copilot" + + if [ -n "$_vendor" ] && [ -z "$_tool" ]; then + error "OPENSHELL_VENDOR requires OPENSHELL_TOOL" + fi + + if [ -n "$_model_path" ] && [ -z "$_vendor" ]; then + error "OPENSHELL_MODEL_PATH requires OPENSHELL_VENDOR" + fi + + case "${_tool}:${_vendor}" in + ""|":") + ;; + "claude-code:"|"claude-code:anthropic") + ;; + "opencode:"|"opencode:github-copilot") + ;; + *) + error "unsupported installer selection: ${_tool} + ${_vendor}" + ;; + esac +} + +validate_release_repo() { + _repo="${OPENSHELL_RELEASE_REPO:-$DEFAULT_RELEASE_REPO}" + + case "$_repo" in + */*) + _owner="${_repo%%/*}" + _name="${_repo#*/}" + ;; + *) + error "invalid OPENSHELL_RELEASE_REPO: ${_repo} (expected /)" + ;; + esac + + case "$_owner" in + ""|*/*|*[!ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.-]*) + error "invalid OPENSHELL_RELEASE_REPO: ${_repo} (expected /)" + ;; + esac + + case "$_name" in + ""|*/*|*[!ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.-]*) + error "invalid OPENSHELL_RELEASE_REPO: ${_repo} (expected /)" + ;; + esac + + printf '%s\n' "$_repo" +} + +print_selection() { + _printed=0 + + if [ -n "${OPENSHELL_TOOL:-}" ]; then + info "validated setup tool selection: ${OPENSHELL_TOOL}" + _printed=1 + fi + + if [ -n "${OPENSHELL_VENDOR:-}" ]; then + info "validated setup vendor selection: ${OPENSHELL_VENDOR}" + _printed=1 + fi + + if [ -n "${OPENSHELL_MODEL_PATH:-}" ]; then + info "validated setup model path selection: ${OPENSHELL_MODEL_PATH}" + _printed=1 + fi + + if [ "$_printed" -eq 1 ]; then + info "selection validation applies to later OpenShell setup and still installs the openshell CLI" + fi + + return 0 +} + check_downloader() { if has_cmd curl; then return 0 @@ -98,6 +212,20 @@ download() { fi } +download_optional() { + _url="$1" + _output="$2" + + rm -f "$_output" + + if download "$_url" "$_output"; then + return 0 + fi + + rm -f "$_output" + return 1 +} + # Follow a URL and print the final resolved URL (for detecting redirect targets). resolve_redirect() { _url="$1" @@ -150,6 +278,8 @@ get_target() { # --------------------------------------------------------------------------- resolve_version() { + _github_url="$1" + if [ -n "${OPENSHELL_VERSION:-}" ]; then echo "$OPENSHELL_VERSION" return 0 @@ -158,7 +288,7 @@ resolve_version() { # Resolve "latest" by following the GitHub releases/latest redirect. # GitHub redirects /releases/latest -> /releases/tag/ info "resolving latest version..." - _latest_url="${GITHUB_URL}/releases/latest" + _latest_url="${_github_url}/releases/latest" _resolved="$(resolve_redirect "$_latest_url")" || error "failed to resolve latest release from ${_latest_url}" # Extract the tag from the resolved URL: .../releases/tag/v0.0.4 -> v0.0.4 @@ -183,8 +313,7 @@ verify_checksum() { _vc_expected="$(grep "$_vc_filename" "$_vc_checksums" | awk '{print $1}')" if [ -z "$_vc_expected" ]; then - warn "no checksum found for $_vc_filename, skipping verification" - return 0 + error "missing checksum entry for $_vc_filename in ${_vc_checksums##*/}" fi if has_cmd shasum; then @@ -192,8 +321,7 @@ verify_checksum() { elif has_cmd sha256sum; then echo "$_vc_expected $_vc_archive" | sha256sum -c --quiet 2>/dev/null else - warn "sha256sum/shasum not found, skipping checksum verification" - return 0 + error "sha256sum/shasum not found, cannot verify release checksum" fi } @@ -223,45 +351,66 @@ is_on_path() { # --------------------------------------------------------------------------- main() { - # Parse CLI flags - for arg in "$@"; do - case "$arg" in + while [ "$#" -gt 0 ]; do + case "$1" in --help) usage exit 0 ;; + --tool) + [ "$#" -ge 2 ] || error "missing value for --tool" + OPENSHELL_TOOL="$2" + shift 2 + ;; + --vendor) + [ "$#" -ge 2 ] || error "missing value for --vendor" + OPENSHELL_VENDOR="$2" + shift 2 + ;; + --model-path) + [ "$#" -ge 2 ] || error "missing value for --model-path" + OPENSHELL_MODEL_PATH="$2" + shift 2 + ;; *) - error "unknown option: $arg" + error "unknown option: $1" ;; esac done check_downloader + validate_selection + print_selection - _version="$(resolve_version)" + _release_repo="$(validate_release_repo)" + _github_url="https://github.com/${_release_repo}" + _version="$(resolve_version "$_github_url")" _target="$(get_target)" _filename="${APP_NAME}-${_target}.tar.gz" - _download_url="${GITHUB_URL}/releases/download/${_version}/${_filename}" - _checksums_url="${GITHUB_URL}/releases/download/${_version}/${APP_NAME}-checksums-sha256.txt" + _checksums_filename="${APP_NAME}-checksums-sha256.txt" + _checksums_path="${_tmpdir:-}/unused" + _download_url="${_github_url}/releases/download/${_version}/${_filename}" + _checksums_url="${_github_url}/releases/download/${_version}/${_checksums_filename}" _install_dir="$(get_install_dir)" info "downloading ${APP_NAME} ${_version} (${_target})..." _tmpdir="$(mktemp -d)" trap 'rm -rf "$_tmpdir"' EXIT + _checksums_path="${_tmpdir}/${_checksums_filename}" if ! download "$_download_url" "${_tmpdir}/${_filename}"; then error "failed to download ${_download_url}" fi + if ! download "$_checksums_url" "$_checksums_path"; then + error "missing checksum manifest: ${_checksums_filename}" + fi + # Verify checksum info "verifying checksum..." - if download "$_checksums_url" "${_tmpdir}/checksums.txt"; then - if ! verify_checksum "${_tmpdir}/${_filename}" "${_tmpdir}/checksums.txt" "$_filename"; then - error "checksum verification failed for ${_filename}" - fi - else - warn "could not download checksums file, skipping verification" + if ! verify_checksum "${_tmpdir}/${_filename}" "$_checksums_path" "$_filename"; then + error "checksum verification failed for ${_filename}" fi # Extract diff --git a/tasks/scripts/ci-build-cluster-image.sh b/tasks/scripts/ci-build-cluster-image.sh index 7e084928..7bf369fc 100644 --- a/tasks/scripts/ci-build-cluster-image.sh +++ b/tasks/scripts/ci-build-cluster-image.sh @@ -9,6 +9,97 @@ PLATFORM="" RUNTIME_BUNDLE_URL="" RUNTIME_BUNDLE_URL_AMD64="" RUNTIME_BUNDLE_URL_ARM64="" +RUNTIME_BUNDLE_GITHUB_REPO="" +RUNTIME_BUNDLE_RELEASE_TAG="" +RUNTIME_BUNDLE_FILENAME_PREFIX="" +RUNTIME_BUNDLE_VERSION="" + +derive_runtime_bundle_url() { + local arch="$1" + + if [[ -z "$RUNTIME_BUNDLE_GITHUB_REPO" || -z "$RUNTIME_BUNDLE_RELEASE_TAG" || -z "$RUNTIME_BUNDLE_FILENAME_PREFIX" || -z "$RUNTIME_BUNDLE_VERSION" ]]; then + echo "missing required runtime bundle default metadata" >&2 + exit 1 + fi + + printf 'https://github.com/%s/releases/download/%s/%s_%s_%s.tar.gz\n' \ + "$RUNTIME_BUNDLE_GITHUB_REPO" \ + "$RUNTIME_BUNDLE_RELEASE_TAG" \ + "$RUNTIME_BUNDLE_FILENAME_PREFIX" \ + "$RUNTIME_BUNDLE_VERSION" \ + "$arch" +} + +is_supported_multiarch_platform_set() { + local platform_list="$1" + local seen_amd64=0 + local seen_arm64=0 + local count=0 + local platform + + IFS=',' read -r -a platforms <<< "$platform_list" + for platform in "${platforms[@]}"; do + count=$((count + 1)) + case "$platform" in + linux/amd64) + if [[ "$seen_amd64" -eq 1 ]]; then + return 1 + fi + seen_amd64=1 + ;; + linux/arm64) + if [[ "$seen_arm64" -eq 1 ]]; then + return 1 + fi + seen_arm64=1 + ;; + *) + return 1 + ;; + esac + done + + [[ "$count" -eq 2 && "$seen_amd64" -eq 1 && "$seen_arm64" -eq 1 ]] +} + +resolve_runtime_bundle_url() { + local arch="$1" + local explicit_url="$2" + + if [[ -n "$explicit_url" ]]; then + printf '%s\n' "$explicit_url" + return 0 + fi + + derive_runtime_bundle_url "$arch" +} + +resolve_single_arch_runtime_bundle_url() { + local platform="$1" + local arch="$2" + + if [[ -n "$RUNTIME_BUNDLE_URL" ]]; then + printf '%s\n' "$RUNTIME_BUNDLE_URL" + return 0 + fi + + case "$arch" in + amd64) + if [[ -n "$RUNTIME_BUNDLE_URL_ARM64" ]]; then + echo "--runtime-bundle-url-arm64 is not supported for single-arch platform $platform; use --runtime-bundle-url or --runtime-bundle-url-amd64" >&2 + exit 1 + fi + resolve_runtime_bundle_url "$arch" "$RUNTIME_BUNDLE_URL_AMD64" + ;; + arm64) + if [[ -n "$RUNTIME_BUNDLE_URL_AMD64" ]]; then + echo "--runtime-bundle-url-amd64 is not supported for single-arch platform $platform; use --runtime-bundle-url or --runtime-bundle-url-arm64" >&2 + exit 1 + fi + resolve_runtime_bundle_url "$arch" "$RUNTIME_BUNDLE_URL_ARM64" + ;; + esac +} while [[ $# -gt 0 ]]; do case "$1" in @@ -28,6 +119,22 @@ while [[ $# -gt 0 ]]; do RUNTIME_BUNDLE_URL_ARM64="$2" shift 2 ;; + --runtime-bundle-github-repo) + RUNTIME_BUNDLE_GITHUB_REPO="$2" + shift 2 + ;; + --runtime-bundle-release-tag) + RUNTIME_BUNDLE_RELEASE_TAG="$2" + shift 2 + ;; + --runtime-bundle-filename-prefix) + RUNTIME_BUNDLE_FILENAME_PREFIX="$2" + shift 2 + ;; + --runtime-bundle-version) + RUNTIME_BUNDLE_VERSION="$2" + shift 2 + ;; *) echo "Unknown argument: $1" >&2 exit 1 @@ -41,13 +148,21 @@ if [[ -z "$PLATFORM" ]]; then fi if [[ "$PLATFORM" == *","* ]]; then - if [[ -z "$RUNTIME_BUNDLE_URL_AMD64" || -z "$RUNTIME_BUNDLE_URL_ARM64" ]]; then - echo "missing required arguments: --runtime-bundle-url-amd64 and --runtime-bundle-url-arm64" >&2 + if [[ -n "$RUNTIME_BUNDLE_URL" ]]; then + echo "--runtime-bundle-url is not supported for multi-arch builds; use --runtime-bundle-url-amd64 and --runtime-bundle-url-arm64" >&2 exit 1 fi - amd64_bundle="$(bash tasks/scripts/download-runtime-bundle.sh --arch amd64 --url "$RUNTIME_BUNDLE_URL_AMD64")" - arm64_bundle="$(bash tasks/scripts/download-runtime-bundle.sh --arch arm64 --url "$RUNTIME_BUNDLE_URL_ARM64")" + if ! is_supported_multiarch_platform_set "$PLATFORM"; then + echo "unsupported multi-arch platform set: $PLATFORM" >&2 + exit 1 + fi + + amd64_url="$(resolve_runtime_bundle_url amd64 "$RUNTIME_BUNDLE_URL_AMD64")" + arm64_url="$(resolve_runtime_bundle_url arm64 "$RUNTIME_BUNDLE_URL_ARM64")" + + amd64_bundle="$(bash tasks/scripts/download-runtime-bundle.sh --arch amd64 --url "$amd64_url")" + arm64_bundle="$(bash tasks/scripts/download-runtime-bundle.sh --arch arm64 --url "$arm64_url")" DOCKER_REGISTRY="${IMAGE_REGISTRY:?IMAGE_REGISTRY is required for multi-arch cluster builds}" \ OPENSHELL_RUNTIME_BUNDLE_TARBALL_AMD64="$amd64_bundle" \ @@ -57,11 +172,6 @@ if [[ "$PLATFORM" == *","* ]]; then exit 0 fi -if [[ -z "$RUNTIME_BUNDLE_URL" ]]; then - echo "missing required argument: --runtime-bundle-url" >&2 - exit 1 -fi - case "$PLATFORM" in linux/amd64) arch="amd64" @@ -75,6 +185,8 @@ case "$PLATFORM" in ;; esac +RUNTIME_BUNDLE_URL="$(resolve_single_arch_runtime_bundle_url "$PLATFORM" "$arch")" + runtime_bundle_tarball="$(bash tasks/scripts/download-runtime-bundle.sh --arch "$arch" --url "$RUNTIME_BUNDLE_URL")" OPENSHELL_RUNTIME_BUNDLE_TARBALL="$runtime_bundle_tarball" \ diff --git a/tasks/scripts/download-runtime-bundle.sh b/tasks/scripts/download-runtime-bundle.sh index 27460bb4..804b9c66 100644 --- a/tasks/scripts/download-runtime-bundle.sh +++ b/tasks/scripts/download-runtime-bundle.sh @@ -43,7 +43,8 @@ if [[ -z "$filename" || "$filename" == "/" || "$filename" == "." ]]; then filename="runtime-bundle-${ARCH}.tar.gz" fi -target_path="$CACHE_DIR/${ARCH}-${filename}" +url_cache_key="$(printf '%s' "$URL" | sha256sum | cut -d' ' -f1)" +target_path="$CACHE_DIR/${ARCH}-${url_cache_key}-${filename}" if [[ ! -f "$target_path" ]]; then curl --fail --location --silent --show-error --output "$target_path" "$URL" diff --git a/tasks/tests/runtime-bundle-ci-workflow.bats b/tasks/tests/runtime-bundle-ci-workflow.bats index cce29282..faaae275 100644 --- a/tasks/tests/runtime-bundle-ci-workflow.bats +++ b/tasks/tests/runtime-bundle-ci-workflow.bats @@ -86,6 +86,29 @@ make_ci_harness() { [[ "$(wc -l < "$FAKE_CURL_LOG")" -eq 1 ]] } +@test "download-runtime-bundle.sh caches different URLs with the same basename separately" { + local harness_root first_path second_path + harness_root="$(make_ci_harness)" + + run env \ + PATH="$FAKE_BIN_DIR:$PATH" \ + bash -lc "cd '$harness_root' && bash tasks/scripts/download-runtime-bundle.sh --arch amd64 --url https://example.com/releases/a/runtime-bundle.tar.gz" + + [ "$status" -eq 0 ] + first_path="$output" + [[ "$(<"$first_path")" == *"https://example.com/releases/a/runtime-bundle.tar.gz"* ]] + + run env \ + PATH="$FAKE_BIN_DIR:$PATH" \ + bash -lc "cd '$harness_root' && bash tasks/scripts/download-runtime-bundle.sh --arch amd64 --url https://mirror.example.com/releases/b/runtime-bundle.tar.gz" + + [ "$status" -eq 0 ] + second_path="$output" + [ "$second_path" != "$first_path" ] + [[ "$(<"$second_path")" == *"https://mirror.example.com/releases/b/runtime-bundle.tar.gz"* ]] + [[ "$(wc -l < "$FAKE_CURL_LOG")" -eq 2 ]] +} + @test "ci-build-cluster-image.sh routes single-arch cluster builds through docker:build:cluster with a downloaded bundle" { local harness_root harness_root="$(make_ci_harness)" @@ -99,6 +122,48 @@ make_ci_harness() { [[ "$(<"$FAKE_MISE_LOG")" == *"runtime-bundle-arm64.tar.gz"* ]] } +@test "ci-build-cluster-image.sh accepts the matching arch-specific runtime bundle URL in single-arch mode" { + local harness_root curl_log + harness_root="$(make_ci_harness)" + + run env \ + PATH="$FAKE_BIN_DIR:$PATH" \ + bash -lc "cd '$harness_root' && bash tasks/scripts/ci-build-cluster-image.sh --platform linux/arm64 --runtime-bundle-url-arm64 https://example.com/runtime-bundle-arm64-specific.tar.gz" + + [ "$status" -eq 0 ] + curl_log="$(<"$FAKE_CURL_LOG")" + [[ "$curl_log" == *"https://example.com/runtime-bundle-arm64-specific.tar.gz"* ]] + [[ "$(<"$FAKE_MISE_LOG")" == *"runtime-bundle-arm64-specific.tar.gz"* ]] +} + +@test "ci-build-cluster-image.sh rejects the wrong arch-specific runtime bundle URL in single-arch mode" { + local harness_root + harness_root="$(make_ci_harness)" + + run env \ + PATH="$FAKE_BIN_DIR:$PATH" \ + bash -lc "cd '$harness_root' && bash tasks/scripts/ci-build-cluster-image.sh --platform linux/arm64 --runtime-bundle-url-amd64 https://example.com/runtime-bundle-amd64.tar.gz" + + [ "$status" -ne 0 ] + [[ "$output" == *"--runtime-bundle-url-amd64 is not supported for single-arch platform linux/arm64; use --runtime-bundle-url or --runtime-bundle-url-arm64"* ]] + [ ! -f "$FAKE_CURL_LOG" ] + [ ! -f "$FAKE_MISE_LOG" ] +} + +@test "ci-build-cluster-image.sh derives a default GitHub Releases asset URL for single-arch builds from producer metadata" { + local harness_root + harness_root="$(make_ci_harness)" + + run env \ + PATH="$FAKE_BIN_DIR:$PATH" \ + bash -lc "cd '$harness_root' && bash tasks/scripts/ci-build-cluster-image.sh --platform linux/arm64 --runtime-bundle-github-repo acme/nvidia-container-toolkit --runtime-bundle-release-tag toolkit-v1.2.3 --runtime-bundle-filename-prefix runtime-bundle --runtime-bundle-version 1.2.3" + + [ "$status" -eq 0 ] + [[ "$(<"$FAKE_CURL_LOG")" == *"https://github.com/acme/nvidia-container-toolkit/releases/download/toolkit-v1.2.3/runtime-bundle_1.2.3_arm64.tar.gz"* ]] + [[ "$(<"$FAKE_MISE_LOG")" == *"run --no-prepare docker:build:cluster"* ]] + [[ "$(<"$FAKE_MISE_LOG")" == *"runtime-bundle_1.2.3_arm64.tar.gz"* ]] +} + @test "ci-build-cluster-image.sh routes multi-arch cluster builds through docker:build:cluster:multiarch with per-arch bundles" { local harness_root harness_root="$(make_ci_harness)" @@ -114,3 +179,68 @@ make_ci_harness() { [[ "$(<"$FAKE_MISE_LOG")" == *"runtime-bundle-arm64.tar.gz"* ]] [[ "$(<"$FAKE_MISE_LOG")" == *"|ghcr.io/nvidia/openshell" ]] } + +@test "ci-build-cluster-image.sh derives both default GitHub Releases asset URLs for multi-arch builds from producer metadata" { + local harness_root curl_log + harness_root="$(make_ci_harness)" + + run env \ + PATH="$FAKE_BIN_DIR:$PATH" \ + IMAGE_REGISTRY=ghcr.io/nvidia/openshell \ + bash -lc "cd '$harness_root' && bash tasks/scripts/ci-build-cluster-image.sh --platform linux/amd64,linux/arm64 --runtime-bundle-github-repo acme/nvidia-container-toolkit --runtime-bundle-release-tag toolkit-v1.2.3 --runtime-bundle-filename-prefix runtime-bundle --runtime-bundle-version 1.2.3" + + [ "$status" -eq 0 ] + curl_log="$(<"$FAKE_CURL_LOG")" + [[ "$curl_log" == *"https://github.com/acme/nvidia-container-toolkit/releases/download/toolkit-v1.2.3/runtime-bundle_1.2.3_amd64.tar.gz"* ]] + [[ "$curl_log" == *"https://github.com/acme/nvidia-container-toolkit/releases/download/toolkit-v1.2.3/runtime-bundle_1.2.3_arm64.tar.gz"* ]] + [[ "$(<"$FAKE_MISE_LOG")" == *"run --no-prepare docker:build:cluster:multiarch"* ]] + [[ "$(<"$FAKE_MISE_LOG")" == *"runtime-bundle_1.2.3_amd64.tar.gz"* ]] + [[ "$(<"$FAKE_MISE_LOG")" == *"runtime-bundle_1.2.3_arm64.tar.gz"* ]] +} + +@test "ci-build-cluster-image.sh prefers explicit runtime bundle URLs over derived GitHub Releases defaults" { + local harness_root curl_log + harness_root="$(make_ci_harness)" + + run env \ + PATH="$FAKE_BIN_DIR:$PATH" \ + IMAGE_REGISTRY=ghcr.io/nvidia/openshell \ + bash -lc "cd '$harness_root' && bash tasks/scripts/ci-build-cluster-image.sh --platform linux/amd64,linux/arm64 --runtime-bundle-url-amd64 https://example.com/explicit-amd64.tar.gz --runtime-bundle-url-arm64 https://example.com/explicit-arm64.tar.gz --runtime-bundle-github-repo acme/nvidia-container-toolkit --runtime-bundle-release-tag toolkit-v9.9.9 --runtime-bundle-filename-prefix runtime-bundle --runtime-bundle-version 9.9.9" + + [ "$status" -eq 0 ] + curl_log="$(<"$FAKE_CURL_LOG")" + [[ "$curl_log" == *"https://example.com/explicit-amd64.tar.gz"* ]] + [[ "$curl_log" == *"https://example.com/explicit-arm64.tar.gz"* ]] + [[ "$curl_log" != *"github.com/acme/nvidia-container-toolkit/releases/download/toolkit-v9.9.9/runtime-bundle_9.9.9_amd64.tar.gz"* ]] + [[ "$curl_log" != *"github.com/acme/nvidia-container-toolkit/releases/download/toolkit-v9.9.9/runtime-bundle_9.9.9_arm64.tar.gz"* ]] +} + +@test "ci-build-cluster-image.sh rejects unsupported multi-arch platform lists instead of assuming amd64+arm64" { + local harness_root + harness_root="$(make_ci_harness)" + + run env \ + PATH="$FAKE_BIN_DIR:$PATH" \ + IMAGE_REGISTRY=ghcr.io/nvidia/openshell \ + bash -lc "cd '$harness_root' && bash tasks/scripts/ci-build-cluster-image.sh --platform linux/amd64,linux/s390x --runtime-bundle-url-amd64 https://example.com/runtime-bundle-amd64.tar.gz --runtime-bundle-url-arm64 https://example.com/runtime-bundle-arm64.tar.gz" + + [ "$status" -ne 0 ] + [[ "$output" == *"unsupported multi-arch platform set: linux/amd64,linux/s390x"* ]] + [ ! -f "$FAKE_CURL_LOG" ] + [ ! -f "$FAKE_MISE_LOG" ] +} + +@test "ci-build-cluster-image.sh rejects --runtime-bundle-url in multi-arch mode" { + local harness_root + harness_root="$(make_ci_harness)" + + run env \ + PATH="$FAKE_BIN_DIR:$PATH" \ + IMAGE_REGISTRY=ghcr.io/nvidia/openshell \ + bash -lc "cd '$harness_root' && bash tasks/scripts/ci-build-cluster-image.sh --platform linux/amd64,linux/arm64 --runtime-bundle-url https://example.com/runtime-bundle.tar.gz --runtime-bundle-url-amd64 https://example.com/runtime-bundle-amd64.tar.gz --runtime-bundle-url-arm64 https://example.com/runtime-bundle-arm64.tar.gz" + + [ "$status" -ne 0 ] + [[ "$output" == *"--runtime-bundle-url is not supported for multi-arch builds; use --runtime-bundle-url-amd64 and --runtime-bundle-url-arm64"* ]] + [ ! -f "$FAKE_CURL_LOG" ] + [ ! -f "$FAKE_MISE_LOG" ] +} From d0f23c7068853184badd727c9f169968d712d264 Mon Sep 17 00:00:00 2001 From: Arne Brune Olsen Date: Thu, 19 Mar 2026 12:15:24 +0100 Subject: [PATCH 3/3] fix(docs): align fork-owned installer references --- .github/workflows/branch-e2e.yml | 1 + .github/workflows/release-canary.yml | 4 ++-- README.md | 2 +- docs/get-started/quickstart.md | 2 +- install.sh | 12 ++++++------ 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/.github/workflows/branch-e2e.yml b/.github/workflows/branch-e2e.yml index 5ab0c399..244a6b29 100644 --- a/.github/workflows/branch-e2e.yml +++ b/.github/workflows/branch-e2e.yml @@ -37,3 +37,4 @@ jobs: image-tag: ${{ github.sha }} runner: build-arm64 run-tool-smoke-validations: true + run-installer-selection-smoke: true diff --git a/.github/workflows/release-canary.yml b/.github/workflows/release-canary.yml index c4627b7e..e7f2eec5 100644 --- a/.github/workflows/release-canary.yml +++ b/.github/workflows/release-canary.yml @@ -47,7 +47,7 @@ jobs: - name: Install CLI (default / latest) run: | set -euo pipefail - curl -LsSf https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install.sh | sh + curl -LsSf https://raw.githubusercontent.com/linuxdevel/OpenShell/main/install.sh | sh - name: Verify CLI installation run: | @@ -132,7 +132,7 @@ jobs: - name: Install CLI from published install script run: | set -euo pipefail - curl -LsSf https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install.sh | OPENSHELL_VERSION=${{ steps.release.outputs.tag }} OPENSHELL_INSTALL_DIR=/usr/local/bin sh + curl -LsSf https://raw.githubusercontent.com/linuxdevel/OpenShell/main/install.sh | OPENSHELL_VERSION=${{ steps.release.outputs.tag }} OPENSHELL_INSTALL_DIR=/usr/local/bin sh - name: Verify CLI installation run: | diff --git a/README.md b/README.md index 1800a468..ec86068a 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ OpenShell is built agent-first. The project ships with agent skills for everythi **Binary (recommended):** ```bash -curl -LsSf https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install.sh | sh +curl -LsSf https://raw.githubusercontent.com/linuxdevel/OpenShell/main/install.sh | sh ``` **From PyPI (requires [uv](https://docs.astral.sh/uv/)):** diff --git a/docs/get-started/quickstart.md b/docs/get-started/quickstart.md index 5f3607c1..0eb450d8 100644 --- a/docs/get-started/quickstart.md +++ b/docs/get-started/quickstart.md @@ -41,7 +41,7 @@ For a complete list of requirements, refer to {doc}`../reference/support-matrix` Run the install script: ```console -$ curl -LsSf https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install.sh | sh +$ curl -LsSf https://raw.githubusercontent.com/linuxdevel/OpenShell/main/install.sh | sh ``` If you prefer [uv](https://docs.astral.sh/uv/): diff --git a/install.sh b/install.sh index 191b0ecd..4980321f 100755 --- a/install.sh +++ b/install.sh @@ -5,7 +5,7 @@ # Install the OpenShell CLI binary. # # Usage: -# curl -LsSf https://raw.githubusercontent.com/abols/OpenShell/main/install.sh | sh +# curl -LsSf https://raw.githubusercontent.com/linuxdevel/OpenShell/main/install.sh | sh # # Or run directly: # ./install.sh @@ -49,7 +49,7 @@ usage() { install.sh — Install the OpenShell CLI USAGE: - curl -LsSf https://raw.githubusercontent.com/abols/OpenShell/main/install.sh | sh + curl -LsSf https://raw.githubusercontent.com/linuxdevel/OpenShell/main/install.sh | sh ./install.sh [OPTIONS] OPTIONS: @@ -68,16 +68,16 @@ ENVIRONMENT VARIABLES: EXAMPLES: # Install latest release - curl -LsSf https://raw.githubusercontent.com/abols/OpenShell/main/install.sh | sh + curl -LsSf https://raw.githubusercontent.com/linuxdevel/OpenShell/main/install.sh | sh # Install a specific version - curl -LsSf https://raw.githubusercontent.com/abols/OpenShell/main/install.sh | OPENSHELL_VERSION=v0.0.9 sh + curl -LsSf https://raw.githubusercontent.com/linuxdevel/OpenShell/main/install.sh | OPENSHELL_VERSION=v0.0.9 sh # Install to /usr/local/bin - curl -LsSf https://raw.githubusercontent.com/abols/OpenShell/main/install.sh | OPENSHELL_INSTALL_DIR=/usr/local/bin sh + curl -LsSf https://raw.githubusercontent.com/linuxdevel/OpenShell/main/install.sh | OPENSHELL_INSTALL_DIR=/usr/local/bin sh # Install from a different fork release repo - curl -LsSf https://raw.githubusercontent.com/abols/OpenShell/main/install.sh | OPENSHELL_RELEASE_REPO=example/custom-openshell sh + curl -LsSf https://raw.githubusercontent.com/linuxdevel/OpenShell/main/install.sh | OPENSHELL_RELEASE_REPO=example/custom-openshell sh # Validate later setup selections while installing the CLI ./install.sh --tool claude-code --vendor anthropic --model-path claude-sonnet-4