Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 26 additions & 20 deletions nvml-wrapper-sys/nvml.h
Original file line number Diff line number Diff line change
Expand Up @@ -2358,27 +2358,33 @@ typedef enum nvmlDeviceGpuRecoveryAction_s {
#define NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_13 248 //!< Count of symbol errors that are corrected - bin 13
#define NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_14 249 //!< Count of symbol errors that are corrected - bin 14
#define NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_15 250 //!< Count of symbol errors that are corrected - bin 15
/* Clock Event Reason and Sync Power Balancing */
#define NVML_FI_DEV_CLOCKS_EVENT_REASON_SW_THERM_SLOWDOWN 251 //!< Throttling to ensure ((GPU temp < GPU Max Operating Temp) && (Memory Temp < Memory Max Operating Temp)) in ns
#define NVML_FI_DEV_CLOCKS_EVENT_REASON_HW_THERM_SLOWDOWN 252 //!< Throttling by HW thermal slowdown in ns
#define NVML_FI_DEV_CLOCKS_EVENT_REASON_HW_POWER_BRAKE_SLOWDOWN 253 //!< Throttling by HW power brake slowdown in ns
#define NVML_FI_DEV_POWER_SYNC_BALANCING_FREQ 254 //!< Power Sync balancing frequency
#define NVML_FI_DEV_POWER_SYNC_BALANCING_AF 255 //!< Power Sync balancing AF
/* Power Smoothing */
#define NVML_FI_PWR_SMOOTHING_ENABLED 251 //!< Enablement (0/DISABLED or 1/ENABLED)
#define NVML_FI_PWR_SMOOTHING_PRIV_LVL 252 //!< Current privilege level
#define NVML_FI_PWR_SMOOTHING_IMM_RAMP_DOWN_ENABLED 253 //!< Immediate ramp down enablement (0/DISABLED or 1/ENABLED)
#define NVML_FI_PWR_SMOOTHING_APPLIED_TMP_CEIL 254 //!< Applied TMP ceiling value in Watts
#define NVML_FI_PWR_SMOOTHING_APPLIED_TMP_FLOOR 255 //!< Applied TMP floor value in Watts
#define NVML_FI_PWR_SMOOTHING_MAX_PERCENT_TMP_FLOOR_SETTING 256 //!< Max % TMP Floor value
#define NVML_FI_PWR_SMOOTHING_MIN_PERCENT_TMP_FLOOR_SETTING 257 //!< Min % TMP Floor value
#define NVML_FI_PWR_SMOOTHING_HW_CIRCUITRY_PERCENT_LIFETIME_REMAINING 258 //!< HW Circuitry % lifetime remaining
#define NVML_FI_PWR_SMOOTHING_MAX_NUM_PRESET_PROFILES 259 //!< Max number of preset profiles
#define NVML_FI_PWR_SMOOTHING_PROFILE_PERCENT_TMP_FLOOR 260 //!< % TMP floor for a given profile
#define NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_UP_RATE 261 //!< Ramp up rate in mW/s for a given profile
#define NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_RATE 262 //!< Ramp down rate in mW/s for a given profile
#define NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_HYST_VAL 263 //!< Ramp down hysteresis value in ms for a given profile
#define NVML_FI_PWR_SMOOTHING_ACTIVE_PRESET_PROFILE 264 //!< Active preset profile number
#define NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PERCENT_TMP_FLOOR 265 //!< % TMP floor for a given profile
#define NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_UP_RATE 266 //!< Ramp up rate in mW/s for a given profile
#define NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_RATE 267 //!< Ramp down rate in mW/s for a given profile
#define NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL 268 //!< Ramp down hysteresis value in ms for a given profile

#define NVML_FI_MAX 269 //!< One greater than the largest field ID defined above
#define NVML_FI_PWR_SMOOTHING_ENABLED 256 //!< Enablement (0/DISABLED or 1/ENABLED)
#define NVML_FI_PWR_SMOOTHING_PRIV_LVL 257 //!< Current privilege level
#define NVML_FI_PWR_SMOOTHING_IMM_RAMP_DOWN_ENABLED 258 //!< Immediate ramp down enablement (0/DISABLED or 1/ENABLED)
#define NVML_FI_PWR_SMOOTHING_APPLIED_TMP_CEIL 259 //!< Applied TMP ceiling value in Watts
#define NVML_FI_PWR_SMOOTHING_APPLIED_TMP_FLOOR 260 //!< Applied TMP floor value in Watts
#define NVML_FI_PWR_SMOOTHING_MAX_PERCENT_TMP_FLOOR_SETTING 261 //!< Max % TMP Floor value
#define NVML_FI_PWR_SMOOTHING_MIN_PERCENT_TMP_FLOOR_SETTING 262 //!< Min % TMP Floor value
#define NVML_FI_PWR_SMOOTHING_HW_CIRCUITRY_PERCENT_LIFETIME_REMAINING 263 //!< HW Circuitry % lifetime remaining
#define NVML_FI_PWR_SMOOTHING_MAX_NUM_PRESET_PROFILES 264 //!< Max number of preset profiles
#define NVML_FI_PWR_SMOOTHING_PROFILE_PERCENT_TMP_FLOOR 265 //!< % TMP floor for a given profile
#define NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_UP_RATE 266 //!< Ramp up rate in mW/s for a given profile
#define NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_RATE 267 //!< Ramp down rate in mW/s for a given profile
#define NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_HYST_VAL 268 //!< Ramp down hysteresis value in ms for a given profile
#define NVML_FI_PWR_SMOOTHING_ACTIVE_PRESET_PROFILE 269 //!< Active preset profile number
#define NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PERCENT_TMP_FLOOR 270 //!< % TMP floor for a given profile
#define NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_UP_RATE 271 //!< Ramp up rate in mW/s for a given profile
#define NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_RATE 272 //!< Ramp down rate in mW/s for a given profile
#define NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL 273 //!< Ramp down hysteresis value in ms for a given profile

#define NVML_FI_MAX 274 //!< One greater than the largest field ID defined above

/**
* NVML_FI_DEV_NVLINK_GET_POWER_THRESHOLD_UNITS
Expand Down
43 changes: 24 additions & 19 deletions nvml-wrapper-sys/src/bindings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,25 +343,30 @@ pub mod field_id {
pub const NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_13: u32 = 248;
pub const NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_14: u32 = 249;
pub const NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_15: u32 = 250;
pub const NVML_FI_PWR_SMOOTHING_ENABLED: u32 = 251;
pub const NVML_FI_PWR_SMOOTHING_PRIV_LVL: u32 = 252;
pub const NVML_FI_PWR_SMOOTHING_IMM_RAMP_DOWN_ENABLED: u32 = 253;
pub const NVML_FI_PWR_SMOOTHING_APPLIED_TMP_CEIL: u32 = 254;
pub const NVML_FI_PWR_SMOOTHING_APPLIED_TMP_FLOOR: u32 = 255;
pub const NVML_FI_PWR_SMOOTHING_MAX_PERCENT_TMP_FLOOR_SETTING: u32 = 256;
pub const NVML_FI_PWR_SMOOTHING_MIN_PERCENT_TMP_FLOOR_SETTING: u32 = 257;
pub const NVML_FI_PWR_SMOOTHING_HW_CIRCUITRY_PERCENT_LIFETIME_REMAINING: u32 = 258;
pub const NVML_FI_PWR_SMOOTHING_MAX_NUM_PRESET_PROFILES: u32 = 259;
pub const NVML_FI_PWR_SMOOTHING_PROFILE_PERCENT_TMP_FLOOR: u32 = 260;
pub const NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_UP_RATE: u32 = 261;
pub const NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_RATE: u32 = 262;
pub const NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_HYST_VAL: u32 = 263;
pub const NVML_FI_PWR_SMOOTHING_ACTIVE_PRESET_PROFILE: u32 = 264;
pub const NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PERCENT_TMP_FLOOR: u32 = 265;
pub const NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_UP_RATE: u32 = 266;
pub const NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_RATE: u32 = 267;
pub const NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL: u32 = 268;
pub const NVML_FI_MAX: u32 = 269;
pub const NVML_FI_DEV_CLOCKS_EVENT_REASON_SW_THERM_SLOWDOWN: u32 = 251;
pub const NVML_FI_DEV_CLOCKS_EVENT_REASON_HW_THERM_SLOWDOWN: u32 = 252;
pub const NVML_FI_DEV_CLOCKS_EVENT_REASON_HW_POWER_BRAKE_SLOWDOWN: u32 = 253;
pub const NVML_FI_DEV_POWER_SYNC_BALANCING_FREQ: u32 = 254;
pub const NVML_FI_DEV_POWER_SYNC_BALANCING_AF: u32 = 255;
pub const NVML_FI_PWR_SMOOTHING_ENABLED: u32 = 256;
pub const NVML_FI_PWR_SMOOTHING_PRIV_LVL: u32 = 257;
pub const NVML_FI_PWR_SMOOTHING_IMM_RAMP_DOWN_ENABLED: u32 = 258;
pub const NVML_FI_PWR_SMOOTHING_APPLIED_TMP_CEIL: u32 = 259;
pub const NVML_FI_PWR_SMOOTHING_APPLIED_TMP_FLOOR: u32 = 260;
pub const NVML_FI_PWR_SMOOTHING_MAX_PERCENT_TMP_FLOOR_SETTING: u32 = 261;
pub const NVML_FI_PWR_SMOOTHING_MIN_PERCENT_TMP_FLOOR_SETTING: u32 = 262;
pub const NVML_FI_PWR_SMOOTHING_HW_CIRCUITRY_PERCENT_LIFETIME_REMAINING: u32 = 263;
pub const NVML_FI_PWR_SMOOTHING_MAX_NUM_PRESET_PROFILES: u32 = 264;
pub const NVML_FI_PWR_SMOOTHING_PROFILE_PERCENT_TMP_FLOOR: u32 = 265;
pub const NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_UP_RATE: u32 = 266;
pub const NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_RATE: u32 = 267;
pub const NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_HYST_VAL: u32 = 268;
pub const NVML_FI_PWR_SMOOTHING_ACTIVE_PRESET_PROFILE: u32 = 269;
pub const NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PERCENT_TMP_FLOOR: u32 = 270;
pub const NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_UP_RATE: u32 = 271;
pub const NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_RATE: u32 = 272;
pub const NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL: u32 = 273;
pub const NVML_FI_MAX: u32 = 274;
}
pub const NVML_NVLINK_LOW_POWER_THRESHOLD_UNIT_100US: u32 = 0;
pub const NVML_NVLINK_LOW_POWER_THRESHOLD_UNIT_50US: u32 = 1;
Expand Down
141 changes: 140 additions & 1 deletion nvml-wrapper/src/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3653,7 +3653,7 @@ impl<'nvml> Device<'nvml> {

for id in id_slice.iter() {
let mut raw: nvmlFieldValue_t = mem::zeroed();
raw.fieldId = id.0;
raw.fieldId = crate::translate_field_id(self.nvml.field_id_scheme, id.0);

field_values.push(raw);
}
Expand Down Expand Up @@ -7443,6 +7443,145 @@ mod test {
})
}

/// Verify that the v12↔v13U1 field ID remapping works correctly at runtime.
///
/// On a v13U1+ driver (>= 580.82), CLOCKS_EVENT_REASON fields must be
/// remapped from their canonical v12 IDs (251-253) to the driver's v13U1
/// IDs (269-271). If the remapping is broken, the driver would interpret
/// these as PWR_SMOOTHING fields instead, returning either NotSupported
/// or silently wrong data.
///
/// The CLOCKS_EVENT_REASON fields return throttle-reason nanosecond
/// counters and should work on most GPUs (including consumer cards like
/// the RTX 4090). PWR_SMOOTHING fields are Blackwell-only and should
/// return NotSupported on older architectures — so if we get a successful
/// result, we know the remapping sent the right ID to the driver.
#[test]
fn field_values_for_v12_v13u1_remapping() {
let nvml = nvml();

let driver = nvml
.sys_driver_version()
.unwrap_or_else(|_| "unknown".into());
let scheme = nvml.field_id_scheme();
println!("Driver: {driver}, scheme: {scheme:?}");

// (canonical v12 name, v12 ID, expected to work on most GPUs?)
let fields: &[(&str, u32)] = &[
(
"CLOCKS_EVENT_REASON_SW_THERM_SLOWDOWN",
NVML_FI_DEV_CLOCKS_EVENT_REASON_SW_THERM_SLOWDOWN,
),
(
"CLOCKS_EVENT_REASON_HW_THERM_SLOWDOWN",
NVML_FI_DEV_CLOCKS_EVENT_REASON_HW_THERM_SLOWDOWN,
),
(
"CLOCKS_EVENT_REASON_HW_POWER_BRAKE_SLOWDOWN",
NVML_FI_DEV_CLOCKS_EVENT_REASON_HW_POWER_BRAKE_SLOWDOWN,
),
(
"POWER_SYNC_BALANCING_FREQ",
NVML_FI_DEV_POWER_SYNC_BALANCING_FREQ,
),
(
"POWER_SYNC_BALANCING_AF",
NVML_FI_DEV_POWER_SYNC_BALANCING_AF,
),
("PWR_SMOOTHING_ENABLED", NVML_FI_PWR_SMOOTHING_ENABLED),
("PWR_SMOOTHING_PRIV_LVL", NVML_FI_PWR_SMOOTHING_PRIV_LVL),
(
"PWR_SMOOTHING_IMM_RAMP_DOWN_ENABLED",
NVML_FI_PWR_SMOOTHING_IMM_RAMP_DOWN_ENABLED,
),
(
"PWR_SMOOTHING_APPLIED_TMP_CEIL",
NVML_FI_PWR_SMOOTHING_APPLIED_TMP_CEIL,
),
(
"PWR_SMOOTHING_APPLIED_TMP_FLOOR",
NVML_FI_PWR_SMOOTHING_APPLIED_TMP_FLOOR,
),
(
"PWR_SMOOTHING_MAX_PERCENT_TMP_FLOOR_SETTING",
NVML_FI_PWR_SMOOTHING_MAX_PERCENT_TMP_FLOOR_SETTING,
),
(
"PWR_SMOOTHING_MIN_PERCENT_TMP_FLOOR_SETTING",
NVML_FI_PWR_SMOOTHING_MIN_PERCENT_TMP_FLOOR_SETTING,
),
(
"PWR_SMOOTHING_HW_CIRCUITRY_PERCENT_LIFETIME_REMAINING",
NVML_FI_PWR_SMOOTHING_HW_CIRCUITRY_PERCENT_LIFETIME_REMAINING,
),
(
"PWR_SMOOTHING_MAX_NUM_PRESET_PROFILES",
NVML_FI_PWR_SMOOTHING_MAX_NUM_PRESET_PROFILES,
),
(
"PWR_SMOOTHING_PROFILE_PERCENT_TMP_FLOOR",
NVML_FI_PWR_SMOOTHING_PROFILE_PERCENT_TMP_FLOOR,
),
(
"PWR_SMOOTHING_PROFILE_RAMP_UP_RATE",
NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_UP_RATE,
),
(
"PWR_SMOOTHING_PROFILE_RAMP_DOWN_RATE",
NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_RATE,
),
(
"PWR_SMOOTHING_PROFILE_RAMP_DOWN_HYST_VAL",
NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_HYST_VAL,
),
(
"PWR_SMOOTHING_ACTIVE_PRESET_PROFILE",
NVML_FI_PWR_SMOOTHING_ACTIVE_PRESET_PROFILE,
),
(
"PWR_SMOOTHING_ADMIN_OVERRIDE_PERCENT_TMP_FLOOR",
NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PERCENT_TMP_FLOOR,
),
(
"PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_UP_RATE",
NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_UP_RATE,
),
(
"PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_RATE",
NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_RATE,
),
(
"PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL",
NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL,
),
];

let field_ids: Vec<FieldId> = fields.iter().map(|(_, id)| FieldId(*id)).collect();

let device = device(&nvml);
let results = device
.field_values_for(&field_ids)
.expect("field_values_for call succeeded");

println!(
"{:<52} {:>6} {:>10} {}",
"NAME", "V12_ID", "DRIVER_ID", "RESULT"
);
println!("{}", "-".repeat(90));

for ((name, v12_id), sample) in fields.iter().zip(results.iter()) {
let driver_id = crate::translate_field_id(scheme, *v12_id);
let result_str = match sample {
Ok(s) => match &s.value {
Ok(v) => format!("Ok({v:?})"),
Err(e) => format!("{e:?}"),
},
Err(e) => format!("ERR: {e:?}"),
};
println!("{name:<52} {v12_id:>6} {driver_id:>10} {result_str}");
}
}

// Passing an empty slice should return an `InvalidArg` error
#[should_panic(expected = "InvalidArg")]
#[test]
Expand Down
Loading
Loading