From f9ade1940c7fd32084d1b48b42db4d07efed5a9d Mon Sep 17 00:00:00 2001
From: tyeth <tyethgundry@googlemail.com>
Date: Tue, 17 Mar 2026 20:09:27 +0000
Subject: [PATCH 1/3] fix(gpu): add WSL2 GPU support via CDI mode and bundle
 device plugin chart

WSL2 virtualises GPU access through /dev/dxg instead of native /dev/nvidia*
device nodes, which breaks the entire NVIDIA k8s device plugin detection
chain. Three changes fix this:

1. Detect WSL2 in cluster-entrypoint.sh and configure CDI mode:
   - Generate CDI spec with nvidia-ctk (auto-detects WSL mode)
   - Patch the spec to include libdxcore.so (nvidia-ctk bug omits it)
   - Switch nvidia-container-runtime from auto to cdi mode
   - Deploy a job to label the node with pci-10de.present=true
     (NFD can't see NVIDIA PCI on WSL2's virtualised bus)

2. Bundle the nvidia-device-plugin Helm chart in the cluster image
   instead of fetching from the upstream GitHub Pages repo at startup.
   The repo URL (nvidia.github.io/k8s-device-plugin/index.yaml)
   currently returns 404.

3. Update the HelmChart CR to reference the bundled local chart
   tarball via the k3s static charts API endpoint.

Closes NVIDIA/OpenShell#404
---
 deploy/docker/cluster-entrypoint.sh           | 96 +++++++++++++++++++
 .../nvidia-device-plugin-helmchart.yaml       |  4 +-
 tasks/scripts/docker-build-cluster.sh         | 12 +++
 tasks/scripts/docker-publish-multiarch.sh     | 12 +++
 4 files changed, 121 insertions(+), 3 deletions(-)

diff --git a/deploy/docker/cluster-entrypoint.sh b/deploy/docker/cluster-entrypoint.sh
index 19fae35d..221eebb7 100644
--- a/deploy/docker/cluster-entrypoint.sh
+++ b/deploy/docker/cluster-entrypoint.sh
@@ -328,6 +328,102 @@ if [ "${GPU_ENABLED:-}" = "true" ]; then
             cp "$manifest" "$K3S_MANIFESTS/"
         done
     fi
+
+    # -------------------------------------------------------------------
+    # WSL2 GPU support: CDI mode + libdxcore.so injection + node labeling
+    # -------------------------------------------------------------------
+    # WSL2 virtualises GPU access through /dev/dxg instead of native
+    # /dev/nvidia* device nodes. The legacy nvidia-container-runtime
+    # injection path fails because:
+    #   1. NVML can't initialise without libdxcore.so (the bridge between
+    #      Linux NVML and the Windows DirectX GPU Kernel via /dev/dxg)
+    #   2. NFD can't detect NVIDIA PCI vendor (WSL2 hides PCI topology)
+    #
+    # Fix: switch to CDI mode, patch the CDI spec with libdxcore.so, and
+    # add a k3s manifest that labels the node for the device plugin
+    # DaemonSet affinity.
+    if [ -c /dev/dxg ]; then
+        echo "WSL2 detected (/dev/dxg present) — configuring CDI mode for GPU"
+
+        # 1. Generate CDI spec (nvidia-ctk auto-detects WSL mode)
+        if command -v nvidia-ctk >/dev/null 2>&1; then
+            mkdir -p /var/run/cdi
+            nvidia-ctk cdi generate --output=/var/run/cdi/nvidia.yaml 2>&1 || true
+
+            # 2. Patch CDI spec: add libdxcore.so mount (nvidia-ctk misses it)
+            DXCORE_PATH=$(find /usr/lib -name "libdxcore.so" 2>/dev/null | head -1)
+            if [ -n "$DXCORE_PATH" ] && [ -f /var/run/cdi/nvidia.yaml ]; then
+                DXCORE_DIR=$(dirname "$DXCORE_PATH")
+                # Insert libdxcore mount after the mounts: key
+                sed -i "/^    mounts:/a\\
+        - hostPath: $DXCORE_PATH\\
+          containerPath: $DXCORE_PATH\\
+          options:\\
+            - ro\\
+            - nosuid\\
+            - nodev\\
+            - rbind\\
+            - rprivate" /var/run/cdi/nvidia.yaml
+                # Add ldcache folder for libdxcore directory
+                sed -i "s|update-ldcache|update-ldcache\n            - --folder\n            - $DXCORE_DIR|" /var/run/cdi/nvidia.yaml
+                echo "CDI spec patched with libdxcore.so from $DXCORE_PATH"
+            else
+                echo "Warning: libdxcore.so not found — NVML may fail inside pods"
+            fi
+        fi
+
+        # 3. Switch nvidia container runtime to CDI mode
+        NVIDIA_RUNTIME_CONFIG="/etc/nvidia-container-runtime/config.toml"
+        if [ -f "$NVIDIA_RUNTIME_CONFIG" ]; then
+            sed -i 's/mode = "auto"/mode = "cdi"/' "$NVIDIA_RUNTIME_CONFIG"
+            echo "nvidia-container-runtime switched to CDI mode"
+        fi
+
+        # 4. Create a k3s manifest to label the node with NVIDIA PCI vendor
+        #    (NFD can't detect it on WSL2 since PCI topology is virtualised)
+        cat > "$K3S_MANIFESTS/wsl2-gpu-node-label.yaml" <<'WSLEOF'
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: wsl2-gpu-node-label
+  namespace: kube-system
+spec:
+  template:
+    spec:
+      serviceAccountName: default
+      hostNetwork: true
+      tolerations:
+        - operator: Exists
+      containers:
+        - name: label
+          image: rancher/mirrored-library-busybox:1.37.0
+          command:
+            - /bin/sh
+            - -c
+            - |
+              # Wait for the API server, then label the node
+              until wget -qO- --no-check-certificate https://kubernetes.default.svc/api/v1/nodes 2>/dev/null | grep -q '"items"'; do
+                sleep 2
+              done
+              NODE=$(wget -qO- --no-check-certificate \
+                -H "Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" \
+                https://kubernetes.default.svc/api/v1/nodes 2>/dev/null \
+                | sed -n 's/.*"name":"\([^"]*\)".*/\1/p' | head -1)
+              if [ -n "$NODE" ]; then
+                wget -qO- --no-check-certificate \
+                  -H "Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" \
+                  -H "Content-Type: application/strategic-merge-patch+json" \
+                  --method=PATCH \
+                  --body-data='{"metadata":{"labels":{"feature.node.kubernetes.io/pci-10de.present":"true"}}}' \
+                  "https://kubernetes.default.svc/api/v1/nodes/$NODE" >/dev/null 2>&1 \
+                  && echo "Labeled node $NODE with pci-10de.present=true" \
+                  || echo "Warning: failed to label node $NODE"
+              fi
+      restartPolicy: OnFailure
+  backoffLimit: 10
+WSLEOF
+        echo "WSL2 GPU node-label job manifest installed"
+    fi
 fi
 
 # ---------------------------------------------------------------------------
diff --git a/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml b/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml
index 57503d31..4aa6743d 100644
--- a/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml
+++ b/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml
@@ -19,9 +19,7 @@ metadata:
   name: nvidia-device-plugin
   namespace: kube-system
 spec:
-  repo: https://nvidia.github.io/k8s-device-plugin
-  chart: nvidia-device-plugin
-  version: "0.18.2"
+  chart: https://%{KUBERNETES_API}%/static/charts/nvidia-device-plugin-0.18.2.tgz
   targetNamespace: nvidia-device-plugin
   createNamespace: true
   valuesContent: |-
diff --git a/tasks/scripts/docker-build-cluster.sh b/tasks/scripts/docker-build-cluster.sh
index 80dc2a48..ef9f9393 100755
--- a/tasks/scripts/docker-build-cluster.sh
+++ b/tasks/scripts/docker-build-cluster.sh
@@ -53,6 +53,18 @@ mkdir -p deploy/docker/.build/charts
 echo "Packaging helm chart..."
 helm package deploy/helm/openshell -d deploy/docker/.build/charts/
 
+# Download nvidia-device-plugin chart for GPU support (bundled to avoid
+# dependency on the upstream GitHub Pages Helm repo at cluster start time)
+NVIDIA_DP_VERSION="0.18.2"
+NVIDIA_DP_CHART="deploy/docker/.build/charts/nvidia-device-plugin-${NVIDIA_DP_VERSION}.tgz"
+if [ ! -f "$NVIDIA_DP_CHART" ]; then
+    echo "Downloading nvidia-device-plugin chart v${NVIDIA_DP_VERSION}..."
+    curl -fsSL "https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/gh-pages/stable/nvidia-device-plugin-${NVIDIA_DP_VERSION}.tgz" \
+        -o "$NVIDIA_DP_CHART" || {
+        echo "Warning: failed to download nvidia-device-plugin chart; GPU support may not work"
+    }
+fi
+
 # Build cluster image (no bundled component images — they are pulled at runtime
 # from the distribution registry; credentials are injected at deploy time)
 echo "Building cluster image..."
diff --git a/tasks/scripts/docker-publish-multiarch.sh b/tasks/scripts/docker-publish-multiarch.sh
index 7bb6dc84..7395ed0c 100755
--- a/tasks/scripts/docker-publish-multiarch.sh
+++ b/tasks/scripts/docker-publish-multiarch.sh
@@ -176,6 +176,18 @@ mkdir -p deploy/docker/.build/charts
 echo "Packaging helm chart..."
 helm package deploy/helm/openshell -d deploy/docker/.build/charts/
 
+# Download nvidia-device-plugin chart for GPU support (bundled to avoid
+# dependency on the upstream GitHub Pages Helm repo at cluster start time)
+NVIDIA_DP_VERSION="0.18.2"
+NVIDIA_DP_CHART="deploy/docker/.build/charts/nvidia-device-plugin-${NVIDIA_DP_VERSION}.tgz"
+if [ ! -f "$NVIDIA_DP_CHART" ]; then
+    echo "Downloading nvidia-device-plugin chart v${NVIDIA_DP_VERSION}..."
+    curl -fsSL "https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/gh-pages/stable/nvidia-device-plugin-${NVIDIA_DP_VERSION}.tgz" \
+        -o "$NVIDIA_DP_CHART" || {
+        echo "Warning: failed to download nvidia-device-plugin chart; GPU support may not work"
+    }
+fi
+
 # ---------------------------------------------------------------------------
 # Step 3: Build and push multi-arch cluster image.
 # The cluster image includes the supervisor binary (built from Rust source)

From af1ae24e82c1707a5ce42deaa2babad1fc1ace94 Mon Sep 17 00:00:00 2001
From: tyeth <tyethgundry@googlemail.com>
Date: Tue, 17 Mar 2026 20:20:45 +0000
Subject: [PATCH 2/3] fix(gpu): revert helm chart bundling, keep only WSL2 CDI
 fix

The upstream Helm repo URL works fine; remove the unnecessary chart
bundling and local reference changes.
---
 .../nvidia-device-plugin-helmchart.yaml              |  4 +++-
 tasks/scripts/docker-build-cluster.sh                | 12 ------------
 tasks/scripts/docker-publish-multiarch.sh            | 12 ------------
 3 files changed, 3 insertions(+), 25 deletions(-)

diff --git a/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml b/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml
index 4aa6743d..57503d31 100644
--- a/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml
+++ b/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml
@@ -19,7 +19,9 @@ metadata:
   name: nvidia-device-plugin
   namespace: kube-system
 spec:
-  chart: https://%{KUBERNETES_API}%/static/charts/nvidia-device-plugin-0.18.2.tgz
+  repo: https://nvidia.github.io/k8s-device-plugin
+  chart: nvidia-device-plugin
+  version: "0.18.2"
   targetNamespace: nvidia-device-plugin
   createNamespace: true
   valuesContent: |-
diff --git a/tasks/scripts/docker-build-cluster.sh b/tasks/scripts/docker-build-cluster.sh
index ef9f9393..80dc2a48 100755
--- a/tasks/scripts/docker-build-cluster.sh
+++ b/tasks/scripts/docker-build-cluster.sh
@@ -53,18 +53,6 @@ mkdir -p deploy/docker/.build/charts
 echo "Packaging helm chart..."
 helm package deploy/helm/openshell -d deploy/docker/.build/charts/
 
-# Download nvidia-device-plugin chart for GPU support (bundled to avoid
-# dependency on the upstream GitHub Pages Helm repo at cluster start time)
-NVIDIA_DP_VERSION="0.18.2"
-NVIDIA_DP_CHART="deploy/docker/.build/charts/nvidia-device-plugin-${NVIDIA_DP_VERSION}.tgz"
-if [ ! -f "$NVIDIA_DP_CHART" ]; then
-    echo "Downloading nvidia-device-plugin chart v${NVIDIA_DP_VERSION}..."
-    curl -fsSL "https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/gh-pages/stable/nvidia-device-plugin-${NVIDIA_DP_VERSION}.tgz" \
-        -o "$NVIDIA_DP_CHART" || {
-        echo "Warning: failed to download nvidia-device-plugin chart; GPU support may not work"
-    }
-fi
-
 # Build cluster image (no bundled component images — they are pulled at runtime
 # from the distribution registry; credentials are injected at deploy time)
 echo "Building cluster image..."
diff --git a/tasks/scripts/docker-publish-multiarch.sh b/tasks/scripts/docker-publish-multiarch.sh
index 7395ed0c..7bb6dc84 100755
--- a/tasks/scripts/docker-publish-multiarch.sh
+++ b/tasks/scripts/docker-publish-multiarch.sh
@@ -176,18 +176,6 @@ mkdir -p deploy/docker/.build/charts
 echo "Packaging helm chart..."
 helm package deploy/helm/openshell -d deploy/docker/.build/charts/
 
-# Download nvidia-device-plugin chart for GPU support (bundled to avoid
-# dependency on the upstream GitHub Pages Helm repo at cluster start time)
-NVIDIA_DP_VERSION="0.18.2"
-NVIDIA_DP_CHART="deploy/docker/.build/charts/nvidia-device-plugin-${NVIDIA_DP_VERSION}.tgz"
-if [ ! -f "$NVIDIA_DP_CHART" ]; then
-    echo "Downloading nvidia-device-plugin chart v${NVIDIA_DP_VERSION}..."
-    curl -fsSL "https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/gh-pages/stable/nvidia-device-plugin-${NVIDIA_DP_VERSION}.tgz" \
-        -o "$NVIDIA_DP_CHART" || {
-        echo "Warning: failed to download nvidia-device-plugin chart; GPU support may not work"
-    }
-fi
-
 # ---------------------------------------------------------------------------
 # Step 3: Build and push multi-arch cluster image.
 # The cluster image includes the supervisor binary (built from Rust source)

From dae043b80371d9a395ab4ef927565fb9c7ccf481 Mon Sep 17 00:00:00 2001
From: tyeth <tyethgundry@googlemail.com>
Date: Tue, 17 Mar 2026 20:51:42 +0000
Subject: [PATCH 3/3] fix(gpu): add WSL2 GPU support via CDI mode

WSL2 virtualises GPU access through /dev/dxg instead of native /dev/nvidia*
device nodes, which breaks the entire NVIDIA k8s device plugin detection
chain. This patch detects WSL2 at container startup and applies fixes:

1. Generate CDI spec with nvidia-ctk (auto-detects WSL mode)
2. Add per-GPU UUID and index device entries to CDI spec (nvidia-ctk
   only generates name=all but the device plugin assigns GPUs by UUID)
3. Bump CDI spec version from 0.3.0 to 0.5.0 (library minimum)
4. Patch the spec to include libdxcore.so (nvidia-ctk bug omits it;
   this library bridges Linux NVML to the Windows DirectX GPU Kernel)
5. Switch nvidia-container-runtime from auto to cdi mode
6. Deploy a job to label the node with pci-10de.present=true
   (NFD can't see NVIDIA PCI on WSL2's virtualised bus)

Closes NVIDIA/OpenShell#404
---
 deploy/docker/cluster-entrypoint.sh | 31 ++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/deploy/docker/cluster-entrypoint.sh b/deploy/docker/cluster-entrypoint.sh
index 221eebb7..acf58aab 100644
--- a/deploy/docker/cluster-entrypoint.sh
+++ b/deploy/docker/cluster-entrypoint.sh
@@ -350,7 +350,32 @@ if [ "${GPU_ENABLED:-}" = "true" ]; then
             mkdir -p /var/run/cdi
             nvidia-ctk cdi generate --output=/var/run/cdi/nvidia.yaml 2>&1 || true
 
-            # 2. Patch CDI spec: add libdxcore.so mount (nvidia-ctk misses it)
+            # 2. Add per-GPU device entries (UUID and index) to CDI spec.
+            #    nvidia-ctk only generates name=all, but the device plugin
+            #    assigns GPUs by UUID which must resolve as a CDI device.
+            if [ -f /var/run/cdi/nvidia.yaml ] && command -v nvidia-smi >/dev/null 2>&1; then
+                idx=0
+                nvidia-smi --query-gpu=gpu_uuid --format=csv,noheader 2>/dev/null | while read -r uuid; do
+                    uuid=$(echo "$uuid" | tr -d ' ')
+                    [ -z "$uuid" ] && continue
+                    sed -i "/- name: all/a\\
+    - name: $uuid\\
+      containerEdits:\\
+        deviceNodes:\\
+            - path: /dev/dxg\\
+    - name: \"$idx\"\\
+      containerEdits:\\
+        deviceNodes:\\
+            - path: /dev/dxg" /var/run/cdi/nvidia.yaml
+                    idx=$((idx + 1))
+                done
+                # nvidia-ctk cdi generate uses cdiVersion 0.3.0 but the
+                # installed CDI library requires >= 0.5.0
+                sed -i 's/cdiVersion: 0\.3\.0/cdiVersion: 0.5.0/' /var/run/cdi/nvidia.yaml
+                echo "CDI spec: added per-GPU UUID and index device entries"
+            fi
+
+            # 4. Patch CDI spec: add libdxcore.so mount (nvidia-ctk misses it)
             DXCORE_PATH=$(find /usr/lib -name "libdxcore.so" 2>/dev/null | head -1)
             if [ -n "$DXCORE_PATH" ] && [ -f /var/run/cdi/nvidia.yaml ]; then
                 DXCORE_DIR=$(dirname "$DXCORE_PATH")
@@ -372,14 +397,14 @@ if [ "${GPU_ENABLED:-}" = "true" ]; then
             fi
         fi
 
-        # 3. Switch nvidia container runtime to CDI mode
+        # 5. Switch nvidia container runtime to CDI mode
         NVIDIA_RUNTIME_CONFIG="/etc/nvidia-container-runtime/config.toml"
         if [ -f "$NVIDIA_RUNTIME_CONFIG" ]; then
             sed -i 's/mode = "auto"/mode = "cdi"/' "$NVIDIA_RUNTIME_CONFIG"
             echo "nvidia-container-runtime switched to CDI mode"
         fi
 
-        # 4. Create a k3s manifest to label the node with NVIDIA PCI vendor
+        # 6. Create a k3s manifest to label the node with NVIDIA PCI vendor
         #    (NFD can't detect it on WSL2 since PCI topology is virtualised)
         cat > "$K3S_MANIFESTS/wsl2-gpu-node-label.yaml" <<'WSLEOF'
 apiVersion: batch/v1