rapidsai · mdboom · Apr 28, 2026
diff --git a/dask-metrics/README.md b/dask-metrics/README.md
@@ -177,21 +177,21 @@ Additionally, the box plot has the `freq` parameter (default 15), which controls
 
 In the case that you might want to track some metric that is not provided by default, you can use custom metric functions to get the job done.
 
-Each custom metric function must take 2 arguments, even if they are unused: a `worker` object and a pynvml device `handle` (or handles).
+Each custom metric function must take 2 arguments, even if they are unused: a `worker` object and a `cuda.core.system.Device` `device` (or devices).
 
-An example custom metric function that tracks power usage across GPUs in a cluster might look like this:
+An example custom metric function that tracks fan speed across GPUs in a cluster might look like this:
 
 ```python
 from dask_metrics import custom_metric
 
 @custom_metric('power')
-def power_usage(worker, handle):
-    return pynvml.nvmlDeviceGetPowerUsage(handle)
+def fan_speed(worker, device):
+    return device.get_fan(0).speed
 ```
 
 You use the `custom_metric` decorator to indicate the name of this metric (what will show up as the column name) and whether this metric is run for all devices on a worker separately or all together at once (by setting the kwarg `per_device` in the decorator to `False`).
 
-The previous example is a case of the former and `handle` respresents a single device handle the function is run for. In the case of the latter, `handle` is actually passed a list of the pynvml device handles instead and allows you to make comparisons between all the values for each GPU at a point in time.
+The previous example is a case of the former and `device` respresents a single device handle the function is run for. In the case of the latter, `device` is actually passed a list of the `cuda.core.system.Device` objects instead and allows you to make comparisons between all the values for each GPU at a point in time.
 
 Note that custom metric functions are also passed a reference to the `Worker` object representing each worker, giving you access to track information about task states and and anything else Dask is up to.
 

diff --git a/dask-metrics/dask_metrics/monitor.py b/dask-metrics/dask_metrics/monitor.py
@@ -11,10 +11,12 @@
 import cudf
 import asyncio
 import time
-import pynvml
 import re
 
 
+from cuda.core import system
+
+
 def custom_metric(name, per_device=True):
     def metric(func):
         return {"op": func, "name": name, "per_device": per_device}
@@ -595,7 +597,6 @@ def setup(self, worker):
     def start_recording(self):
         self.start = time.time()
         self.stop = 0
-        pynvml.nvmlInit()
 
         # start loop
         self.loop = asyncio.ensure_future(self.log_metrics())
@@ -604,7 +605,6 @@ def stop_recording(self):
         self.stop = time.time()
         if not self.loop.cancelled():
             self.loop.cancel()  # shut down the loop
-        pynvml.nvmlShutdown()
 
     async def shutdown(self):
         self.stop_recording()
@@ -666,16 +666,16 @@ def op(func):
             return op
 
         @operation("total-mem")
-        def total_mem(worker, handle):
-            return pynvml.nvmlDeviceGetMemoryInfo(handle).used
+        def total_mem(worker, device):
+            return device.memory_info.used
 
         @operation("mem-util")
-        def mem_util(worker, handle):
-            return pynvml.nvmlDeviceGetUtilizationRates(handle).memory
+        def mem_util(worker, device):
+            return device.utilization.memory
 
         @operation("compute-util")
-        def compute_util(worker, handle):
-            return pynvml.nvmlDeviceGetUtilizationRates(handle).gpu
+        def compute_util(worker, device):
+            return device.utilization.gpu
 
         self.tracking_list = [o for o in operations if o["name"] in tracking]
         self.tracking_list += custom
@@ -698,7 +698,7 @@ def dump_partial(self):
         self.metrics_on_disk = True
 
     async def log_metrics(self):
-        ## the loop that polls the gpu for metrics using pynvml
+        ## the loop that polls the gpu for metrics using cuda.core.system
         while self.stop == 0:
             # universal metrics always tracked
             self.metrics["job"].append(self.job_number)
@@ -716,13 +716,12 @@ async def log_metrics(self):
 
     def device_info(self, operation):
         ## applies operation over all device handles and returns the results
-        device_count = pynvml.nvmlDeviceGetCount()
-        handles = [pynvml.nvmlDeviceGetHandleByIndex(i) for i in range(device_count)]
+        devices = list(system.Device.get_all_devices())
         if operation["per_device"]:
-            results = [str(operation["op"](self.worker, handle)) for handle in handles]
+            results = [str(operation["op"](self.worker, device)) for device in devices]
             return ", ".join(results)  # join together with commas
         else:
-            return operation["op"](self.worker, handles)
+            return operation["op"](self.worker, devices)
 
 
 class MetricState(Enum):