From bc1d2f470054eb320295bcbf10df2630631171d3 Mon Sep 17 00:00:00 2001 From: Arjun Date: Fri, 7 Nov 2025 18:23:22 +0000 Subject: [PATCH 1/2] update vfio validation Signed-off-by: Arjun --- cmd/nvidia-validator/main.go | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/cmd/nvidia-validator/main.go b/cmd/nvidia-validator/main.go index 3dd1f5224..82df7a554 100644 --- a/cmd/nvidia-validator/main.go +++ b/cmd/nvidia-validator/main.go @@ -1734,6 +1734,31 @@ func (v *VGPUDevices) validate() error { } func (v *VGPUDevices) runValidation() error { + nvpci := nvpci.New() + GPUDevices, err := nvpci.GetGPUs() + if err != nil { + return fmt.Errorf("error checking for GPU devices on the host: %w", err) + } + + mdevBusPath := "/sys/class/mdev_bus" + entries, err := os.ReadDir(mdevBusPath) + if err != nil { + return fmt.Errorf("unable to read mdev_bus directory: %v", err) + } + + if len(entries) == 0 { + for _, device := range GPUDevices { + if device.SriovInfo.PhysicalFunction == nil { + continue + } + totalVF := int(device.SriovInfo.PhysicalFunction.TotalVFs) + if totalVF > 0 { + log.Infof("Found GPU device with SR-IOV VFs: %s (TotalVFs: %d)", device.Address, totalVF) + return nil + } + } + } + nvmdev := nvmdev.New() vGPUDevices, err := nvmdev.GetAllDevices() if err != nil { @@ -1746,14 +1771,14 @@ func (v *VGPUDevices) runValidation() error { return fmt.Errorf("no vGPU devices found") } - log.Infof("Found %d vGPU devices", numDevices) + log.Infof("Found %d MDEV vGPU devices", numDevices) return nil } for { numDevices := len(vGPUDevices) if numDevices > 0 { - log.Infof("Found %d vGPU devices", numDevices) + log.Infof("Found %d MDEV vGPU devices", numDevices) return nil } log.Infof("No vGPU devices found, retrying after %d seconds", sleepIntervalSecondsFlag) From 6a0e8b8af4aa9b6d04808f74bc3998d033f270e3 Mon Sep 17 00:00:00 2001 From: Arjun Date: Tue, 20 Jan 2026 19:28:45 +0000 Subject: [PATCH 2/2] switched to vfio check --- cmd/nvidia-validator/main.go | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/cmd/nvidia-validator/main.go b/cmd/nvidia-validator/main.go index 82df7a554..e154679e7 100644 --- a/cmd/nvidia-validator/main.go +++ b/cmd/nvidia-validator/main.go @@ -1740,23 +1740,14 @@ func (v *VGPUDevices) runValidation() error { return fmt.Errorf("error checking for GPU devices on the host: %w", err) } - mdevBusPath := "/sys/class/mdev_bus" - entries, err := os.ReadDir(mdevBusPath) - if err != nil { - return fmt.Errorf("unable to read mdev_bus directory: %v", err) - } - - if len(entries) == 0 { - for _, device := range GPUDevices { - if device.SriovInfo.PhysicalFunction == nil { - continue - } - totalVF := int(device.SriovInfo.PhysicalFunction.TotalVFs) - if totalVF > 0 { - log.Infof("Found GPU device with SR-IOV VFs: %s (TotalVFs: %d)", device.Address, totalVF) - return nil - } - } + for _, device := range GPUDevices { + creatableTypesFile := filepath.Join(device.Path, "virtfn0", "nvidia", "creatable_vgpu_types") + + _, statErr := os.Stat(creatableTypesFile) + if statErr == nil { + log.Infof("Found creatable_vgpu_types file for device: %s", device.Address) + return nil + } } nvmdev := nvmdev.New()