From f9ade1940c7fd32084d1b48b42db4d07efed5a9d Mon Sep 17 00:00:00 2001 From: tyeth Date: Tue, 17 Mar 2026 20:09:27 +0000 Subject: [PATCH 1/3] fix(gpu): add WSL2 GPU support via CDI mode and bundle device plugin chart WSL2 virtualises GPU access through /dev/dxg instead of native /dev/nvidia* device nodes, which breaks the entire NVIDIA k8s device plugin detection chain. Three changes fix this: 1. Detect WSL2 in cluster-entrypoint.sh and configure CDI mode: - Generate CDI spec with nvidia-ctk (auto-detects WSL mode) - Patch the spec to include libdxcore.so (nvidia-ctk bug omits it) - Switch nvidia-container-runtime from auto to cdi mode - Deploy a job to label the node with pci-10de.present=true (NFD can't see NVIDIA PCI on WSL2's virtualised bus) 2. Bundle the nvidia-device-plugin Helm chart in the cluster image instead of fetching from the upstream GitHub Pages repo at startup. The repo URL (nvidia.github.io/k8s-device-plugin/index.yaml) currently returns 404. 3. Update the HelmChart CR to reference the bundled local chart tarball via the k3s static charts API endpoint. Closes NVIDIA/OpenShell#404 --- deploy/docker/cluster-entrypoint.sh | 96 +++++++++++++++++++ .../nvidia-device-plugin-helmchart.yaml | 4 +- tasks/scripts/docker-build-cluster.sh | 12 +++ tasks/scripts/docker-publish-multiarch.sh | 12 +++ 4 files changed, 121 insertions(+), 3 deletions(-) diff --git a/deploy/docker/cluster-entrypoint.sh b/deploy/docker/cluster-entrypoint.sh index 19fae35d..221eebb7 100644 --- a/deploy/docker/cluster-entrypoint.sh +++ b/deploy/docker/cluster-entrypoint.sh @@ -328,6 +328,102 @@ if [ "${GPU_ENABLED:-}" = "true" ]; then cp "$manifest" "$K3S_MANIFESTS/" done fi + + # ------------------------------------------------------------------- + # WSL2 GPU support: CDI mode + libdxcore.so injection + node labeling + # ------------------------------------------------------------------- + # WSL2 virtualises GPU access through /dev/dxg instead of native + # /dev/nvidia* device nodes. The legacy nvidia-container-runtime + # injection path fails because: + # 1. NVML can't initialise without libdxcore.so (the bridge between + # Linux NVML and the Windows DirectX GPU Kernel via /dev/dxg) + # 2. NFD can't detect NVIDIA PCI vendor (WSL2 hides PCI topology) + # + # Fix: switch to CDI mode, patch the CDI spec with libdxcore.so, and + # add a k3s manifest that labels the node for the device plugin + # DaemonSet affinity. + if [ -c /dev/dxg ]; then + echo "WSL2 detected (/dev/dxg present) — configuring CDI mode for GPU" + + # 1. Generate CDI spec (nvidia-ctk auto-detects WSL mode) + if command -v nvidia-ctk >/dev/null 2>&1; then + mkdir -p /var/run/cdi + nvidia-ctk cdi generate --output=/var/run/cdi/nvidia.yaml 2>&1 || true + + # 2. Patch CDI spec: add libdxcore.so mount (nvidia-ctk misses it) + DXCORE_PATH=$(find /usr/lib -name "libdxcore.so" 2>/dev/null | head -1) + if [ -n "$DXCORE_PATH" ] && [ -f /var/run/cdi/nvidia.yaml ]; then + DXCORE_DIR=$(dirname "$DXCORE_PATH") + # Insert libdxcore mount after the mounts: key + sed -i "/^ mounts:/a\\ + - hostPath: $DXCORE_PATH\\ + containerPath: $DXCORE_PATH\\ + options:\\ + - ro\\ + - nosuid\\ + - nodev\\ + - rbind\\ + - rprivate" /var/run/cdi/nvidia.yaml + # Add ldcache folder for libdxcore directory + sed -i "s|update-ldcache|update-ldcache\n - --folder\n - $DXCORE_DIR|" /var/run/cdi/nvidia.yaml + echo "CDI spec patched with libdxcore.so from $DXCORE_PATH" + else + echo "Warning: libdxcore.so not found — NVML may fail inside pods" + fi + fi + + # 3. Switch nvidia container runtime to CDI mode + NVIDIA_RUNTIME_CONFIG="/etc/nvidia-container-runtime/config.toml" + if [ -f "$NVIDIA_RUNTIME_CONFIG" ]; then + sed -i 's/mode = "auto"/mode = "cdi"/' "$NVIDIA_RUNTIME_CONFIG" + echo "nvidia-container-runtime switched to CDI mode" + fi + + # 4. Create a k3s manifest to label the node with NVIDIA PCI vendor + # (NFD can't detect it on WSL2 since PCI topology is virtualised) + cat > "$K3S_MANIFESTS/wsl2-gpu-node-label.yaml" <<'WSLEOF' +apiVersion: batch/v1 +kind: Job +metadata: + name: wsl2-gpu-node-label + namespace: kube-system +spec: + template: + spec: + serviceAccountName: default + hostNetwork: true + tolerations: + - operator: Exists + containers: + - name: label + image: rancher/mirrored-library-busybox:1.37.0 + command: + - /bin/sh + - -c + - | + # Wait for the API server, then label the node + until wget -qO- --no-check-certificate https://kubernetes.default.svc/api/v1/nodes 2>/dev/null | grep -q '"items"'; do + sleep 2 + done + NODE=$(wget -qO- --no-check-certificate \ + -H "Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" \ + https://kubernetes.default.svc/api/v1/nodes 2>/dev/null \ + | sed -n 's/.*"name":"\([^"]*\)".*/\1/p' | head -1) + if [ -n "$NODE" ]; then + wget -qO- --no-check-certificate \ + -H "Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" \ + -H "Content-Type: application/strategic-merge-patch+json" \ + --method=PATCH \ + --body-data='{"metadata":{"labels":{"feature.node.kubernetes.io/pci-10de.present":"true"}}}' \ + "https://kubernetes.default.svc/api/v1/nodes/$NODE" >/dev/null 2>&1 \ + && echo "Labeled node $NODE with pci-10de.present=true" \ + || echo "Warning: failed to label node $NODE" + fi + restartPolicy: OnFailure + backoffLimit: 10 +WSLEOF + echo "WSL2 GPU node-label job manifest installed" + fi fi # --------------------------------------------------------------------------- diff --git a/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml b/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml index 57503d31..4aa6743d 100644 --- a/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml +++ b/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml @@ -19,9 +19,7 @@ metadata: name: nvidia-device-plugin namespace: kube-system spec: - repo: https://nvidia.github.io/k8s-device-plugin - chart: nvidia-device-plugin - version: "0.18.2" + chart: https://%{KUBERNETES_API}%/static/charts/nvidia-device-plugin-0.18.2.tgz targetNamespace: nvidia-device-plugin createNamespace: true valuesContent: |- diff --git a/tasks/scripts/docker-build-cluster.sh b/tasks/scripts/docker-build-cluster.sh index 80dc2a48..ef9f9393 100755 --- a/tasks/scripts/docker-build-cluster.sh +++ b/tasks/scripts/docker-build-cluster.sh @@ -53,6 +53,18 @@ mkdir -p deploy/docker/.build/charts echo "Packaging helm chart..." helm package deploy/helm/openshell -d deploy/docker/.build/charts/ +# Download nvidia-device-plugin chart for GPU support (bundled to avoid +# dependency on the upstream GitHub Pages Helm repo at cluster start time) +NVIDIA_DP_VERSION="0.18.2" +NVIDIA_DP_CHART="deploy/docker/.build/charts/nvidia-device-plugin-${NVIDIA_DP_VERSION}.tgz" +if [ ! -f "$NVIDIA_DP_CHART" ]; then + echo "Downloading nvidia-device-plugin chart v${NVIDIA_DP_VERSION}..." + curl -fsSL "https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/gh-pages/stable/nvidia-device-plugin-${NVIDIA_DP_VERSION}.tgz" \ + -o "$NVIDIA_DP_CHART" || { + echo "Warning: failed to download nvidia-device-plugin chart; GPU support may not work" + } +fi + # Build cluster image (no bundled component images — they are pulled at runtime # from the distribution registry; credentials are injected at deploy time) echo "Building cluster image..." diff --git a/tasks/scripts/docker-publish-multiarch.sh b/tasks/scripts/docker-publish-multiarch.sh index 7bb6dc84..7395ed0c 100755 --- a/tasks/scripts/docker-publish-multiarch.sh +++ b/tasks/scripts/docker-publish-multiarch.sh @@ -176,6 +176,18 @@ mkdir -p deploy/docker/.build/charts echo "Packaging helm chart..." helm package deploy/helm/openshell -d deploy/docker/.build/charts/ +# Download nvidia-device-plugin chart for GPU support (bundled to avoid +# dependency on the upstream GitHub Pages Helm repo at cluster start time) +NVIDIA_DP_VERSION="0.18.2" +NVIDIA_DP_CHART="deploy/docker/.build/charts/nvidia-device-plugin-${NVIDIA_DP_VERSION}.tgz" +if [ ! -f "$NVIDIA_DP_CHART" ]; then + echo "Downloading nvidia-device-plugin chart v${NVIDIA_DP_VERSION}..." + curl -fsSL "https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/gh-pages/stable/nvidia-device-plugin-${NVIDIA_DP_VERSION}.tgz" \ + -o "$NVIDIA_DP_CHART" || { + echo "Warning: failed to download nvidia-device-plugin chart; GPU support may not work" + } +fi + # --------------------------------------------------------------------------- # Step 3: Build and push multi-arch cluster image. # The cluster image includes the supervisor binary (built from Rust source) From af1ae24e82c1707a5ce42deaa2babad1fc1ace94 Mon Sep 17 00:00:00 2001 From: tyeth Date: Tue, 17 Mar 2026 20:20:45 +0000 Subject: [PATCH 2/3] fix(gpu): revert helm chart bundling, keep only WSL2 CDI fix The upstream Helm repo URL works fine; remove the unnecessary chart bundling and local reference changes. --- .../nvidia-device-plugin-helmchart.yaml | 4 +++- tasks/scripts/docker-build-cluster.sh | 12 ------------ tasks/scripts/docker-publish-multiarch.sh | 12 ------------ 3 files changed, 3 insertions(+), 25 deletions(-) diff --git a/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml b/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml index 4aa6743d..57503d31 100644 --- a/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml +++ b/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml @@ -19,7 +19,9 @@ metadata: name: nvidia-device-plugin namespace: kube-system spec: - chart: https://%{KUBERNETES_API}%/static/charts/nvidia-device-plugin-0.18.2.tgz + repo: https://nvidia.github.io/k8s-device-plugin + chart: nvidia-device-plugin + version: "0.18.2" targetNamespace: nvidia-device-plugin createNamespace: true valuesContent: |- diff --git a/tasks/scripts/docker-build-cluster.sh b/tasks/scripts/docker-build-cluster.sh index ef9f9393..80dc2a48 100755 --- a/tasks/scripts/docker-build-cluster.sh +++ b/tasks/scripts/docker-build-cluster.sh @@ -53,18 +53,6 @@ mkdir -p deploy/docker/.build/charts echo "Packaging helm chart..." helm package deploy/helm/openshell -d deploy/docker/.build/charts/ -# Download nvidia-device-plugin chart for GPU support (bundled to avoid -# dependency on the upstream GitHub Pages Helm repo at cluster start time) -NVIDIA_DP_VERSION="0.18.2" -NVIDIA_DP_CHART="deploy/docker/.build/charts/nvidia-device-plugin-${NVIDIA_DP_VERSION}.tgz" -if [ ! -f "$NVIDIA_DP_CHART" ]; then - echo "Downloading nvidia-device-plugin chart v${NVIDIA_DP_VERSION}..." - curl -fsSL "https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/gh-pages/stable/nvidia-device-plugin-${NVIDIA_DP_VERSION}.tgz" \ - -o "$NVIDIA_DP_CHART" || { - echo "Warning: failed to download nvidia-device-plugin chart; GPU support may not work" - } -fi - # Build cluster image (no bundled component images — they are pulled at runtime # from the distribution registry; credentials are injected at deploy time) echo "Building cluster image..." diff --git a/tasks/scripts/docker-publish-multiarch.sh b/tasks/scripts/docker-publish-multiarch.sh index 7395ed0c..7bb6dc84 100755 --- a/tasks/scripts/docker-publish-multiarch.sh +++ b/tasks/scripts/docker-publish-multiarch.sh @@ -176,18 +176,6 @@ mkdir -p deploy/docker/.build/charts echo "Packaging helm chart..." helm package deploy/helm/openshell -d deploy/docker/.build/charts/ -# Download nvidia-device-plugin chart for GPU support (bundled to avoid -# dependency on the upstream GitHub Pages Helm repo at cluster start time) -NVIDIA_DP_VERSION="0.18.2" -NVIDIA_DP_CHART="deploy/docker/.build/charts/nvidia-device-plugin-${NVIDIA_DP_VERSION}.tgz" -if [ ! -f "$NVIDIA_DP_CHART" ]; then - echo "Downloading nvidia-device-plugin chart v${NVIDIA_DP_VERSION}..." - curl -fsSL "https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/gh-pages/stable/nvidia-device-plugin-${NVIDIA_DP_VERSION}.tgz" \ - -o "$NVIDIA_DP_CHART" || { - echo "Warning: failed to download nvidia-device-plugin chart; GPU support may not work" - } -fi - # --------------------------------------------------------------------------- # Step 3: Build and push multi-arch cluster image. # The cluster image includes the supervisor binary (built from Rust source) From dae043b80371d9a395ab4ef927565fb9c7ccf481 Mon Sep 17 00:00:00 2001 From: tyeth Date: Tue, 17 Mar 2026 20:51:42 +0000 Subject: [PATCH 3/3] fix(gpu): add WSL2 GPU support via CDI mode WSL2 virtualises GPU access through /dev/dxg instead of native /dev/nvidia* device nodes, which breaks the entire NVIDIA k8s device plugin detection chain. This patch detects WSL2 at container startup and applies fixes: 1. Generate CDI spec with nvidia-ctk (auto-detects WSL mode) 2. Add per-GPU UUID and index device entries to CDI spec (nvidia-ctk only generates name=all but the device plugin assigns GPUs by UUID) 3. Bump CDI spec version from 0.3.0 to 0.5.0 (library minimum) 4. Patch the spec to include libdxcore.so (nvidia-ctk bug omits it; this library bridges Linux NVML to the Windows DirectX GPU Kernel) 5. Switch nvidia-container-runtime from auto to cdi mode 6. Deploy a job to label the node with pci-10de.present=true (NFD can't see NVIDIA PCI on WSL2's virtualised bus) Closes NVIDIA/OpenShell#404 --- deploy/docker/cluster-entrypoint.sh | 31 ++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/deploy/docker/cluster-entrypoint.sh b/deploy/docker/cluster-entrypoint.sh index 221eebb7..acf58aab 100644 --- a/deploy/docker/cluster-entrypoint.sh +++ b/deploy/docker/cluster-entrypoint.sh @@ -350,7 +350,32 @@ if [ "${GPU_ENABLED:-}" = "true" ]; then mkdir -p /var/run/cdi nvidia-ctk cdi generate --output=/var/run/cdi/nvidia.yaml 2>&1 || true - # 2. Patch CDI spec: add libdxcore.so mount (nvidia-ctk misses it) + # 2. Add per-GPU device entries (UUID and index) to CDI spec. + # nvidia-ctk only generates name=all, but the device plugin + # assigns GPUs by UUID which must resolve as a CDI device. + if [ -f /var/run/cdi/nvidia.yaml ] && command -v nvidia-smi >/dev/null 2>&1; then + idx=0 + nvidia-smi --query-gpu=gpu_uuid --format=csv,noheader 2>/dev/null | while read -r uuid; do + uuid=$(echo "$uuid" | tr -d ' ') + [ -z "$uuid" ] && continue + sed -i "/- name: all/a\\ + - name: $uuid\\ + containerEdits:\\ + deviceNodes:\\ + - path: /dev/dxg\\ + - name: \"$idx\"\\ + containerEdits:\\ + deviceNodes:\\ + - path: /dev/dxg" /var/run/cdi/nvidia.yaml + idx=$((idx + 1)) + done + # nvidia-ctk cdi generate uses cdiVersion 0.3.0 but the + # installed CDI library requires >= 0.5.0 + sed -i 's/cdiVersion: 0\.3\.0/cdiVersion: 0.5.0/' /var/run/cdi/nvidia.yaml + echo "CDI spec: added per-GPU UUID and index device entries" + fi + + # 4. Patch CDI spec: add libdxcore.so mount (nvidia-ctk misses it) DXCORE_PATH=$(find /usr/lib -name "libdxcore.so" 2>/dev/null | head -1) if [ -n "$DXCORE_PATH" ] && [ -f /var/run/cdi/nvidia.yaml ]; then DXCORE_DIR=$(dirname "$DXCORE_PATH") @@ -372,14 +397,14 @@ if [ "${GPU_ENABLED:-}" = "true" ]; then fi fi - # 3. Switch nvidia container runtime to CDI mode + # 5. Switch nvidia container runtime to CDI mode NVIDIA_RUNTIME_CONFIG="/etc/nvidia-container-runtime/config.toml" if [ -f "$NVIDIA_RUNTIME_CONFIG" ]; then sed -i 's/mode = "auto"/mode = "cdi"/' "$NVIDIA_RUNTIME_CONFIG" echo "nvidia-container-runtime switched to CDI mode" fi - # 4. Create a k3s manifest to label the node with NVIDIA PCI vendor + # 6. Create a k3s manifest to label the node with NVIDIA PCI vendor # (NFD can't detect it on WSL2 since PCI topology is virtualised) cat > "$K3S_MANIFESTS/wsl2-gpu-node-label.yaml" <<'WSLEOF' apiVersion: batch/v1