diff --git a/Cargo.lock b/Cargo.lock index 48160e1..65b4756 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -15,24 +15,24 @@ dependencies = [ [[package]] name = "bitflags" -version = "2.4.2" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" dependencies = [ - "serde", + "serde_core", ] [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "darling" -version = "0.20.5" +version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc5d6b04b3fd0ba9926f945895de7d806260a2d7431ba82e7edaecb043c4c6b8" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" dependencies = [ "darling_core", "darling_macro", @@ -40,9 +40,9 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.20.5" +version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04e48a959bcd5c761246f5d090ebc2fbf7b9cd527a492b07a67510c108f1e7e3" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" dependencies = [ "fnv", "ident_case", @@ -54,9 +54,9 @@ dependencies = [ [[package]] name = "darling_macro" -version = "0.20.5" +version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1545d67a2149e1d93b7e5c7752dce5a7426eb5d1357ddcfd89336b94444f77" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core", "quote", @@ -71,9 +71,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "getopts" -version = "0.2.21" +version = "0.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" dependencies = [ "unicode-width", ] @@ -95,23 +95,23 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "libc" -version = "0.2.153" +version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" [[package]] name = "libloading" -version = "0.8.1" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161" +checksum = "754ca22de805bb5744484a5b151a9e1a8e837d5dc232c2d7d8c2e3492edc8b60" dependencies = [ "cfg-if", - "windows-sys", + "windows-link", ] [[package]] name = "nvml-wrapper" -version = "0.12.0" +version = "0.13.0-utilidata.0" dependencies = [ "bitflags", "libloading", @@ -126,7 +126,7 @@ dependencies = [ [[package]] name = "nvml-wrapper-sys" -version = "0.9.0" +version = "0.10.0-utilidata.0" dependencies = [ "libloading", ] @@ -143,36 +143,45 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.78" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.35" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] [[package]] name = "serde" -version = "1.0.196" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "serde_core" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "870026e60fa08c69f064aa766c10f10b1d62db9ccd4d0abb206472bee0ce3b32" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.196" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", @@ -187,15 +196,15 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "strsim" -version = "0.10.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.48" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -204,18 +213,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.56" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.56" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", @@ -224,15 +233,15 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-width" -version = "0.1.11" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" [[package]] name = "winapi" @@ -257,70 +266,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] -name = "windows-sys" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" -dependencies = [ - "windows-targets", -] - -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" +name = "windows-link" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "wrapcenum-derive" diff --git a/README.md b/README.md index 6edf557..2cb48d0 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ each time it gets called. Instead, call `Nvml::init` once and store the resultin ## NVML Support This wrapper is being developed against and currently supports NVML version -12. Each new version of NVML is guaranteed to be backwards-compatible according +13. Each new version of NVML is guaranteed to be backwards-compatible according to NVIDIA, so this wrapper should continue to work without issue regardless of NVML version bumps. diff --git a/nvml-wrapper-sys/CHANGELOG.md b/nvml-wrapper-sys/CHANGELOG.md index f8c6c06..00febc5 100644 --- a/nvml-wrapper-sys/CHANGELOG.md +++ b/nvml-wrapper-sys/CHANGELOG.md @@ -4,6 +4,10 @@ This file describes the changes / additions / fixes between bindings releases. ## Unreleased +## 0.10.0 (released 2026-03-23) + +Bindings have been regenerated using the NVML 13.2.51 header and bindgen 0.72.1. + ## 0.9.0 (released 2025-03-28) Bindings have been regenerated using the NVML 12.8.90 header and bindgen 0.68.1. diff --git a/nvml-wrapper-sys/Cargo.toml b/nvml-wrapper-sys/Cargo.toml index 950f800..cdfb38b 100644 --- a/nvml-wrapper-sys/Cargo.toml +++ b/nvml-wrapper-sys/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "nvml-wrapper-sys" -version = "0.9.0" +version = "0.10.0-utilidata.0" authors = ["Cldfire"] description = "Generated bindings to the NVIDIA Management Library." readme = "README.md" @@ -16,7 +16,7 @@ categories = ["external-ffi-bindings", "hardware-support"] exclude = ["nvml.h"] [dependencies] -libloading = "0.8.1" +libloading = "0.9.0" [features] default = [] diff --git a/nvml-wrapper-sys/README.md b/nvml-wrapper-sys/README.md index a3b76d9..85a1e0f 100644 --- a/nvml-wrapper-sys/README.md +++ b/nvml-wrapper-sys/README.md @@ -35,10 +35,15 @@ there's a convincing reason to do so; please file an issue. ## NVML Support -These bindings were generated for NVML version 11. Each new version of NVML is +These bindings were generated for NVML version 13. Each new version of NVML is guaranteed to be backwards-compatible according to NVIDIA, so these bindings should be useful regardless of NVML version bumps. +Note that NVML version 13.0 update 1 (and/or driver 580TRD2) [breaks backwards compatibility](https://docs.nvidia.com/deploy/nvml-api/known-issues.html#known-issues): + +> NVML Field Values from #251 - #273 (Power Smoothing, Clock Event Reason, and Sync Power Balancing related field values) have changed between 13.0 and 13.0U1/v580TRD2. +> Any application that is using these field IDs must be recompiled using the NVML header file from CUDA 13.0 Update 1 in order to continue working correctly with NVIDIA drivers v580 TRD2 and beyond. + ### Legacy Functions Sometimes there will be function-level API version bumps in new NVML releases. diff --git a/nvml-wrapper-sys/nvml.h b/nvml-wrapper-sys/nvml.h index 9f96cb1..84a85fa 100644 --- a/nvml-wrapper-sys/nvml.h +++ b/nvml-wrapper-sys/nvml.h @@ -78,7 +78,7 @@ extern "C" { * On Windows, set up methods for DLL export * define NVML_STATIC_IMPORT when using nvml_loader library */ -#if defined _WINDOWS +#if defined(_WINDOWS) || defined(_WIN32) #if !defined NVML_STATIC_IMPORT #if defined NVML_LIB_EXPORT #define DECLDIR __declspec(dllexport) @@ -92,13 +92,22 @@ extern "C" { #define DECLDIR #endif - #define NVML_MCDM_SUPPORT +/* + * Deprecation definition. + */ +#if defined(_WINDOWS) || defined(_WIN32) + #define DEPRECATED(ver) __declspec(deprecated) +#else + #define DEPRECATED(ver) __attribute__((deprecated)) +#endif + + #define NVML_MCDM_SUPPORT //!< Definition to enable MCDM support. /** * NVML API versioning support */ -#define NVML_API_VERSION 12 -#define NVML_API_VERSION_STR "12" +#define NVML_API_VERSION 13 //!< NVML API version identifier. +#define NVML_API_VERSION_STR "13" //!< NVML API version identifier as a string. /** * Defining NVML_NO_UNVERSIONED_FUNC_DEFS will disable "auto upgrading" of APIs. * e.g. the user will have to call nvmlInit_v2 instead of nvmlInit. Enable this @@ -142,19 +151,21 @@ extern "C" { * * Each structure explicitly states when to check for this value. */ -#define NVML_VALUE_NOT_AVAILABLE (-1) +#define NVML_VALUE_NOT_AVAILABLE (-1) //!< Macro for unavailable values. typedef struct nvmlDevice_st* nvmlDevice_t; +typedef struct nvmlGpuInstance_st* nvmlGpuInstance_t; + /** * Buffer size guaranteed to be large enough for pci bus id */ -#define NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE 32 +#define NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE 32 //!< Buffer size for PCI bus ID. /** - * Buffer size guaranteed to be large enough for pci bus id for ::busIdLegacy + * Buffer size guaranteed to be large enough for pci bus id for \p busIdLegacy */ -#define NVML_DEVICE_PCI_BUS_ID_BUFFER_V2_SIZE 16 +#define NVML_DEVICE_PCI_BUS_ID_BUFFER_V2_SIZE 16 //!< Buffer size for legacy PCI bus ID. /** * PCI information about a GPU device. @@ -175,7 +186,7 @@ typedef struct char busId[NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE]; //!< The tuple domain:bus:device.function PCI identifier (& NULL terminator) } nvmlPciInfoExt_v1_t; typedef nvmlPciInfoExt_v1_t nvmlPciInfoExt_t; -#define nvmlPciInfoExt_v1 NVML_STRUCT_VERSION(PciInfoExt, 1) +#define nvmlPciInfoExt_v1 NVML_STRUCT_VERSION(PciInfoExt, 1) //!< Version macro for \a nvmlPciInfoExt_v1_t /** * PCI information about a GPU device. @@ -195,21 +206,21 @@ typedef struct nvmlPciInfo_st } nvmlPciInfo_t; /** - * PCI format string for ::busIdLegacy + * PCI format string for \p busIdLegacy */ -#define NVML_DEVICE_PCI_BUS_ID_LEGACY_FMT "%04X:%02X:%02X.0" +#define NVML_DEVICE_PCI_BUS_ID_LEGACY_FMT "%04X:%02X:%02X.0" //!< Legacy PCI bus ID format. /** - * PCI format string for ::busId + * PCI format string for \p busId */ -#define NVML_DEVICE_PCI_BUS_ID_FMT "%08X:%02X:%02X.0" +#define NVML_DEVICE_PCI_BUS_ID_FMT "%08X:%02X:%02X.0" //!< PCI bus ID format. /** * Utility macro for filling the pci bus id format from a nvmlPciInfo_t */ #define NVML_DEVICE_PCI_BUS_ID_FMT_ARGS(pciInfo) (pciInfo)->domain, \ (pciInfo)->bus, \ - (pciInfo)->device + (pciInfo)->device //!< Macro for formatting PCI bus ID arguments. /** * Detailed ECC error counts for a device. @@ -261,7 +272,7 @@ typedef struct nvmlMemory_v2_st unsigned long long used; //!< Allocated device memory (in bytes). } nvmlMemory_v2_t; -#define nvmlMemory_v2 NVML_STRUCT_VERSION(Memory, 2) +#define nvmlMemory_v2 NVML_STRUCT_VERSION(Memory, 2) //!< Version macro for \a nvmlMemory_v2_t /** * BAR1 Memory allocation Information for a device @@ -332,7 +343,7 @@ typedef nvmlProcessDetailList_v1_t nvmlProcessDetailList_t; /** * nvmlProcessDetailList version */ -#define nvmlProcessDetailList_v1 NVML_STRUCT_VERSION(ProcessDetailList, 1) +#define nvmlProcessDetailList_v1 NVML_STRUCT_VERSION(ProcessDetailList, 1) //!< Version macro for \a nvmlProcessDetailList_v1_t typedef struct nvmlDeviceAttributes_st { @@ -355,7 +366,50 @@ typedef struct unsigned int isC2cEnabled; } nvmlC2cModeInfo_v1_t; -#define nvmlC2cModeInfo_v1 NVML_STRUCT_VERSION(C2cModeInfo, 1) +#define nvmlC2cModeInfo_v1 NVML_STRUCT_VERSION(C2cModeInfo, 1) //!< Version macro for \a nvmlC2cModeInfo_v1_t + +/** + * Enum to represent device addressing mode values + */ +typedef enum +{ + NVML_DEVICE_ADDRESSING_MODE_NONE = 0, //!< No active mode + NVML_DEVICE_ADDRESSING_MODE_HMM = 1, //!< Heterogeneous Memory Management mode + NVML_DEVICE_ADDRESSING_MODE_ATS = 2, //!< Address Translation Services mode +} nvmlDeviceAddressingModeType_t; + +/** + * Struct to represent device addressing mode information + */ +typedef struct +{ + unsigned int version; //!< API version + unsigned int value; //!< One of \ref nvmlDeviceAddressingModeType_t +} nvmlDeviceAddressingMode_v1_t; +typedef nvmlDeviceAddressingMode_v1_t nvmlDeviceAddressingMode_t; + +#define nvmlDeviceAddressingMode_v1 NVML_STRUCT_VERSION(DeviceAddressingMode, 1) //!< Version macro for \a nvmlDeviceAddressingMode_v1_t + +/** + * Struct to represent the NVML repair status + */ +typedef struct +{ + unsigned int version; //!< API version number + unsigned int bChannelRepairPending; //!< Reference to \a unsigned int + unsigned int bTpcRepairPending; //!< Reference to \a unsigned int +} nvmlRepairStatus_v1_t; +typedef nvmlRepairStatus_v1_t nvmlRepairStatus_t; + +#define nvmlRepairStatus_v1 NVML_STRUCT_VERSION(RepairStatus, 1) //!< Version macro for \a nvmlRepairStatus_v1_t + +/** + * Struct to represent the NVML unrepairable memory status + */ +typedef struct +{ + unsigned int bUnrepairableMemory; //!< Reference to \a unsigned int +} nvmlUnrepairableMemoryStatus_v1_t; /** * Possible values that classify the remap availability for each bank. The max @@ -384,7 +438,7 @@ typedef enum nvmlBridgeChipType_enum /** * Maximum number of NvLink links supported */ -#define NVML_NVLINK_MAX_LINKS 18 +#define NVML_NVLINK_MAX_LINKS 18 //!< Maximum number of NVLink links supported. /** * Enum to represent the NvLink utilization counter packet units @@ -486,7 +540,7 @@ typedef enum nvmlGpuLevel_enum } nvmlGpuTopologyLevel_t; /* Compatibility for CPU->NODE renaming */ -#define NVML_TOPOLOGY_CPU NVML_TOPOLOGY_NODE +#define NVML_TOPOLOGY_CPU NVML_TOPOLOGY_NODE //!< Topology level for node. /* P2P Capability Index Status*/ typedef enum nvmlGpuP2PStatus_enum @@ -521,7 +575,7 @@ typedef enum nvmlGpuP2PCapsIndex_enum /** * Maximum limit on Physical Bridges per Board */ -#define NVML_MAX_PHYSICAL_BRIDGE (128) +#define NVML_MAX_PHYSICAL_BRIDGE (128) //!< Maximum number of physical bridges. /** * Information about the Bridge Chip Firmware @@ -642,7 +696,7 @@ typedef struct nvmlViolationTime_st unsigned long long violationTime; //!< violationTime in Nanoseconds }nvmlViolationTime_t; -#define NVML_MAX_THERMAL_SENSORS_PER_GPU 3 +#define NVML_MAX_THERMAL_SENSORS_PER_GPU 3 //!< Maximum number of thermal sensors per GPU. /** * Represents the thermal sensor targets @@ -739,7 +793,61 @@ typedef struct } nvmlCoolerInfo_v1_t; typedef nvmlCoolerInfo_v1_t nvmlCoolerInfo_t; -#define nvmlCoolerInfo_v1 NVML_STRUCT_VERSION(CoolerInfo, 1) +#define nvmlCoolerInfo_v1 NVML_STRUCT_VERSION(CoolerInfo, 1) //!< Version macro for \a nvmlCoolerInfo_v1_t + +/** + * UUID length in ASCII format + */ +#define NVML_DEVICE_UUID_ASCII_LEN 41 //!< Length of UUID in ASCII format. + +/** + * UUID length in binary format + */ +#define NVML_DEVICE_UUID_BINARY_LEN 16 //!< Length of UUID in binary format. + +/** + * Enum to represent different UUID types + */ +typedef enum +{ + NVML_UUID_TYPE_NONE = 0, //!< Undefined type + NVML_UUID_TYPE_ASCII = 1, //!< ASCII format type + NVML_UUID_TYPE_BINARY = 2, //!< Binary format type +} nvmlUUIDType_t; + +/** + * Union to represent different UUID values + */ +typedef union +{ + char str[NVML_DEVICE_UUID_ASCII_LEN]; //!< ASCII format value + unsigned char bytes[NVML_DEVICE_UUID_BINARY_LEN]; //!< Binary format value +} nvmlUUIDValue_t; + +/** + * Struct to represent NVML UUID information + */ +typedef struct +{ + unsigned int version; //!< API version number + unsigned int type; //!< One of \p nvmlUUIDType_t + nvmlUUIDValue_t value; //!< One of \p nvmlUUIDValue_t, to be set based on the UUID format +} nvmlUUID_v1_t; +typedef nvmlUUID_v1_t nvmlUUID_t; + +#define nvmlUUID_v1 NVML_STRUCT_VERSION(UUID, 1) //!< Version macro for \a nvmlUUID_v1_t + +/** + * Struct to represent the NVML PDI information + */ +typedef struct +{ + unsigned int version; //!< API version number + unsigned long long value; //!< 64-bit PDI value +} nvmlPdi_v1_t; +typedef nvmlPdi_v1_t nvmlPdi_t; + +#define nvmlPdi_v1 NVML_STRUCT_VERSION(Pdi, 1) //!< Version macro for \a nvmlPdi_v1_t /** @} */ @@ -759,9 +867,9 @@ typedef enum nvmlEnableState_enum } nvmlEnableState_t; //! Generic flag used to specify the default behavior of some functions. See description of particular functions for details. -#define nvmlFlagDefault 0x00 +#define nvmlFlagDefault 0x00 //!< Default flag. //! Generic flag used to force some behavior. See description of particular functions for details. -#define nvmlFlagForce 0x01 +#define nvmlFlagForce 0x01 //!< Force flag. /** * DRAM Encryption Info @@ -773,7 +881,7 @@ typedef struct } nvmlDramEncryptionInfo_v1_t; typedef nvmlDramEncryptionInfo_v1_t nvmlDramEncryptionInfo_t; -#define nvmlDramEncryptionInfo_v1 NVML_STRUCT_VERSION(DramEncryptionInfo, 1) +#define nvmlDramEncryptionInfo_v1 NVML_STRUCT_VERSION(DramEncryptionInfo, 1) //!< Version macro for \a nvmlDramEncryptionInfo_v1_t /** * * The Brand of the GPU @@ -789,7 +897,7 @@ typedef enum nvmlBrandType_enum NVML_BRAND_TITAN = 6, NVML_BRAND_NVIDIA_VAPPS = 7, // NVIDIA Virtual Applications NVML_BRAND_NVIDIA_VPC = 8, // NVIDIA Virtual PC - NVML_BRAND_NVIDIA_VCS = 9, // NVIDIA Virtual Compute Server + NVML_BRAND_NVIDIA_VCS = 9, // NVIDIA vGPU for Compute NVML_BRAND_NVIDIA_VWS = 10, // NVIDIA RTX Virtual Workstation NVML_BRAND_NVIDIA_CLOUD_GAMING = 11, // NVIDIA Cloud Gaming NVML_BRAND_NVIDIA_VGAMING = NVML_BRAND_NVIDIA_CLOUD_GAMING, // Deprecated from API reporting. Keeping definition for backward compatibility. @@ -798,9 +906,8 @@ typedef enum nvmlBrandType_enum NVML_BRAND_NVIDIA = 14, NVML_BRAND_GEFORCE_RTX = 15, // Unused NVML_BRAND_TITAN_RTX = 16, // Unused - // Keep this last - NVML_BRAND_COUNT + NVML_BRAND_COUNT = 18, } nvmlBrandType_t; /** @@ -850,7 +957,7 @@ typedef struct typedef nvmlMarginTemperature_v1_t nvmlMarginTemperature_t; -#define nvmlMarginTemperature_v1 NVML_STRUCT_VERSION(MarginTemperature, 1) +#define nvmlMarginTemperature_v1 NVML_STRUCT_VERSION(MarginTemperature, 1) //!< Version macro for \a nvmlMarginTemperature_v1_t /** * Compute mode. @@ -873,7 +980,7 @@ typedef enum nvmlComputeMode_enum /** * Max Clock Monitors available */ -#define MAX_CLK_DOMAINS 32 +#define MAX_CLK_DOMAINS 32 //!< Maximum number of clock domains. /** * Clock Monitor error types @@ -915,21 +1022,21 @@ typedef struct nvmlClkMonStatus_status { * * @deprecated See \ref nvmlMemoryErrorType_t for a more flexible type */ -#define nvmlEccBitType_t nvmlMemoryErrorType_t +#define nvmlEccBitType_t nvmlMemoryErrorType_t //!< Deprecated ECC bit type. See \ref nvmlMemoryErrorType_t. /** * Single bit ECC errors * * @deprecated Mapped to \ref NVML_MEMORY_ERROR_TYPE_CORRECTED */ -#define NVML_SINGLE_BIT_ECC NVML_MEMORY_ERROR_TYPE_CORRECTED +#define NVML_SINGLE_BIT_ECC NVML_MEMORY_ERROR_TYPE_CORRECTED //!< Deprecated single bit ECC error. See \ref NVML_MEMORY_ERROR_TYPE_CORRECTED. /** * Double bit ECC errors * * @deprecated Mapped to \ref NVML_MEMORY_ERROR_TYPE_UNCORRECTED */ -#define NVML_DOUBLE_BIT_ECC NVML_MEMORY_ERROR_TYPE_UNCORRECTED +#define NVML_DOUBLE_BIT_ECC NVML_MEMORY_ERROR_TYPE_UNCORRECTED //!< Deprecated double bit ECC error. See \ref NVML_MEMORY_ERROR_TYPE_UNCORRECTED. /** * Memory error types @@ -956,21 +1063,6 @@ typedef enum nvmlMemoryErrorType_enum } nvmlMemoryErrorType_t; -/** - * Represents Nvlink Version - */ -typedef enum nvmlNvlinkVersion_enum -{ - NVML_NVLINK_VERSION_INVALID = 0, - NVML_NVLINK_VERSION_1_0 = 1, - NVML_NVLINK_VERSION_2_0 = 2, - NVML_NVLINK_VERSION_2_2 = 3, - NVML_NVLINK_VERSION_3_0 = 4, - NVML_NVLINK_VERSION_3_1 = 5, - NVML_NVLINK_VERSION_4_0 = 6, - NVML_NVLINK_VERSION_5_0 = 7, -}nvmlNvlinkVersion_t; - /** * ECC counter types. * @@ -1011,8 +1103,10 @@ typedef enum nvmlClockType_enum typedef enum nvmlClockId_enum { NVML_CLOCK_ID_CURRENT = 0, //!< Current actual clock value - NVML_CLOCK_ID_APP_CLOCK_TARGET = 1, //!< Target application clock + NVML_CLOCK_ID_APP_CLOCK_TARGET = 1, //!< Target application clock. + //!< Deprecated, do not use. NVML_CLOCK_ID_APP_CLOCK_DEFAULT = 2, //!< Default application clock target + //!< Deprecated, do not use. NVML_CLOCK_ID_CUSTOMER_BOOST_MAX = 3, //!< OEM-defined maximum clock rate //Keep this last @@ -1032,7 +1126,7 @@ typedef enum nvmlDriverModel_enum NVML_DRIVER_MCDM = 2 //!< MCDM driver model -- GPU treated as a Microsoft compute device } nvmlDriverModel_t; -#define NVML_MAX_GPU_PERF_PSTATES 16 +#define NVML_MAX_GPU_PERF_PSTATES 16 //!< Maximum number of GPU performance states. /** * Allowed PStates. @@ -1073,7 +1167,7 @@ typedef struct typedef nvmlClockOffset_v1_t nvmlClockOffset_t; -#define nvmlClockOffset_v1 NVML_STRUCT_VERSION(ClockOffset, 1) +#define nvmlClockOffset_v1 NVML_STRUCT_VERSION(ClockOffset, 1) //!< Version macro for \a nvmlClockOffset_v1_t /** * Fan speed info. @@ -1086,9 +1180,9 @@ typedef struct } nvmlFanSpeedInfo_v1_t; typedef nvmlFanSpeedInfo_v1_t nvmlFanSpeedInfo_t; -#define nvmlFanSpeedInfo_v1 NVML_STRUCT_VERSION(FanSpeedInfo, 1) +#define nvmlFanSpeedInfo_v1 NVML_STRUCT_VERSION(FanSpeedInfo, 1) //!< Version macro for \a nvmlFanSpeedInfo_v1_t -#define NVML_PERF_MODES_BUFFER_SIZE 2048 +#define NVML_PERF_MODES_BUFFER_SIZE 2048 //!< Buffer size for performance modes strings. /** * Device performance modes string @@ -1100,7 +1194,7 @@ typedef struct } nvmlDevicePerfModes_v1_t; typedef nvmlDevicePerfModes_v1_t nvmlDevicePerfModes_t; -#define nvmlDevicePerfModes_v1 NVML_STRUCT_VERSION(DevicePerfModes, 1) +#define nvmlDevicePerfModes_v1 NVML_STRUCT_VERSION(DevicePerfModes, 1) //!< Version macro for \a nvmlDevicePerfModes_v1_t /** * Device current clocks string @@ -1112,7 +1206,36 @@ typedef struct } nvmlDeviceCurrentClockFreqs_v1_t; typedef nvmlDeviceCurrentClockFreqs_v1_t nvmlDeviceCurrentClockFreqs_t; -#define nvmlDeviceCurrentClockFreqs_v1 NVML_STRUCT_VERSION(DeviceCurrentClockFreqs, 1) +#define nvmlDeviceCurrentClockFreqs_v1 NVML_STRUCT_VERSION(DeviceCurrentClockFreqs, 1) //!< Version macro for \a nvmlDeviceCurrentClockFreqs_v1_t + +/** + * Device powerMizer modes + */ +#define NVML_POWER_MIZER_MODE_ADAPTIVE 0 //!< adjust GPU clocks based on GPU utilization +#define NVML_POWER_MIZER_MODE_PREFER_MAXIMUM_PERFORMANCE 1 //!< raise GPU clocks to favor maximum performance, + //!< to the extent that thermal and other constraints allow +#define NVML_POWER_MIZER_MODE_AUTO 2 //!< PowerMizer mode is driver controlled. +#define NVML_POWER_MIZER_MODE_PREFER_CONSISTENT_PERFORMANCE 3 //!< lock to GPU base clocks + +typedef struct +{ + unsigned int currentMode; //!< OUT: the current powermizer mode + unsigned int mode; //!< IN: the powermizer mode to set + + /** + * The bitmask of supported power mizer modes on this device. + * The supported modes can be combined using the bitwise OR operator '|'. + * For example, if a device supports all PowerMizer modes, the bitmask would be: + * supportedPowerMizerModes = ((1 << NVML_POWER_MIZER_MODE_ADAPTIVE) | + * (1 << NVML_POWER_MIZER_MODE_PREFER_MAXIMUM_PERFORMANCE) | + * (1 << NVML_POWER_MIZER_MODE_AUTO) | + * (1 << NVML_POWER_MIZER_MODE_PREFER_CONSISTENT_PERFORMANCE)); + * + * This bitmask can be used to check which power mizer modes are available on the device by performing + * a bitwise AND operation with the specific mode you want to check. + */ + unsigned int supportedPowerMizerModes; //!< OUT: Bitmask of supported powermizer modes +} nvmlDevicePowerMizerModes_v1_t; /** * GPU Operation Mode @@ -1181,6 +1304,7 @@ typedef enum nvmlReturn_enum NVML_ERROR_NOT_READY = 27, //!< The system is not ready for the request NVML_ERROR_GPU_NOT_FOUND = 28, //!< No GPUs were found NVML_ERROR_INVALID_STATE = 29, //!< Resource not in correct state to perform requested operation + NVML_ERROR_RESET_TYPE_NOT_SUPPORTED = 30, //!< Reset not supported for given device/parameters NVML_ERROR_UNKNOWN = 999 //!< An internal driver error occurred } nvmlReturn_t; @@ -1220,7 +1344,8 @@ typedef enum nvmlPageRetirementCause_enum typedef enum nvmlRestrictedAPI_enum { NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS = 0, //!< APIs that change application clocks, see nvmlDeviceSetApplicationsClocks - //!< and see nvmlDeviceResetApplicationsClocks + //!< and see nvmlDeviceResetApplicationsClocks. + //!< Deprecated, keeping definition for backward compatibility. NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS = 1, //!< APIs that enable/disable Auto Boosted clocks //!< see nvmlDeviceSetAutoBoostedClocksEnabled // Keep this last @@ -1289,7 +1414,7 @@ typedef struct } nvmlEccSramErrorStatus_v1_t; typedef nvmlEccSramErrorStatus_v1_t nvmlEccSramErrorStatus_t; -#define nvmlEccSramErrorStatus_v1 NVML_STRUCT_VERSION(EccSramErrorStatus, 1) +#define nvmlEccSramErrorStatus_v1 NVML_STRUCT_VERSION(EccSramErrorStatus, 1) //!< Version macro for \a nvmlEccSramErrorStatus_v1_t /** * Structure to store platform information @@ -1308,8 +1433,11 @@ typedef struct unsigned char peerType; //!< Platform indicated NVLink-peer type (e.g. switch present or not) unsigned char moduleId; //!< ID of this GPU within the node } nvmlPlatformInfo_v1_t; -#define nvmlPlatformInfo_v1 NVML_STRUCT_VERSION(PlatformInfo, 1) +#define nvmlPlatformInfo_v1 NVML_STRUCT_VERSION(PlatformInfo, 1) //!< Version macro for \a nvmlPlatformInfo_v1_t +/** + * Structure to store platform information (v2) + */ typedef struct { unsigned int version; //!< the API version number @@ -1323,41 +1451,88 @@ typedef struct } nvmlPlatformInfo_v2_t; typedef nvmlPlatformInfo_v2_t nvmlPlatformInfo_t; -#define nvmlPlatformInfo_v2 NVML_STRUCT_VERSION(PlatformInfo, 2) +#define nvmlPlatformInfo_v2 NVML_STRUCT_VERSION(PlatformInfo, 2) //!< Version macro for \a nvmlPlatformInfo_v2_t + +/** + * Structure to store hostname information + */ +#define NVML_DEVICE_HOSTNAME_BUFFER_SIZE 64 //!< Buffer size for hostname string. + +typedef struct +{ + char value[NVML_DEVICE_HOSTNAME_BUFFER_SIZE]; //!< null-terminated hostname string +} nvmlHostname_v1_t; + +typedef struct +{ + unsigned int unit; //!< the SRAM unit index + unsigned int location; //!< the error location within the SRAM unit + unsigned int sublocation; //!< the error sublocation within the SRAM unit + unsigned int extlocation; //!< the error extlocation within the SRAM unit + unsigned int address; //!< the error address within the SRAM unit + unsigned int isParity; //!< if the SRAM error is parity or not + unsigned int count; //!< the error count at the same SRAM address +} nvmlEccSramUniqueUncorrectedErrorEntry_v1_t; + +typedef struct +{ + unsigned int version; //!< the API version number + unsigned int entryCount; //!< the number of error count entries + nvmlEccSramUniqueUncorrectedErrorEntry_v1_t *entries; //!< pointer to caller-supplied buffer to return the SRAM unique uncorrected ECC error count entries +} nvmlEccSramUniqueUncorrectedErrorCounts_v1_t; + +typedef nvmlEccSramUniqueUncorrectedErrorCounts_v1_t nvmlEccSramUniqueUncorrectedErrorCounts_t; +#define nvmlEccSramUniqueUncorrectedErrorCounts_v1 NVML_STRUCT_VERSION(EccSramUniqueUncorrectedErrorCounts, 1) //!< Version macro for \a nvmlEccSramUniqueUncorrectedErrorCounts_v1_t + +#define NVML_RUSD_POLL_NONE 0x0 //!< Disable RUSD polling on all metric groups +#define NVML_RUSD_POLL_CLOCK 0x1 //!< Enable RUSD polling on clock group +#define NVML_RUSD_POLL_PERF 0x2 //!< Enable RUSD polling on performance group +#define NVML_RUSD_POLL_MEMORY 0x4 //!< Enable RUSD polling on memory group +#define NVML_RUSD_POLL_POWER 0x8 //!< Enable RUSD polling on power group +#define NVML_RUSD_POLL_THERMAL 0x10 //!< Enable RUSD polling on thermal group +#define NVML_RUSD_POLL_PCI 0x20 //!< Enable RUSD polling on pci group +#define NVML_RUSD_POLL_FAN 0x40 //!< Enable RUSD polling on fan group +#define NVML_RUSD_POLL_PROC_UTIL 0x80 //!< Enable RUSD polling on process utilization group +#define NVML_RUSD_POLL_ALL 0xFFFFFFFFFFFFFFFF //!< Enable RUSD polling on all groups + +typedef struct +{ + unsigned int version; + unsigned long long pollMask; //!< Bitmask of polling data. 0 value means the GPU's RUSD polling mask is cleared. +} nvmlRusdSettings_v1_t; +#define nvmlRusdSettings_v1 NVML_STRUCT_VERSION(RusdSettings, 1) //!< Version macro for \a nvmlRusdSettings_v1_t /** * GSP firmware */ -#define NVML_GSP_FIRMWARE_VERSION_BUF_SIZE 0x40 +#define NVML_GSP_FIRMWARE_VERSION_BUF_SIZE 0x40 //!< Buffer size for GSP firmware version string. /** * Simplified chip architecture */ -#define NVML_DEVICE_ARCH_KEPLER 2 // Devices based on the NVIDIA Kepler architecture -#define NVML_DEVICE_ARCH_MAXWELL 3 // Devices based on the NVIDIA Maxwell architecture -#define NVML_DEVICE_ARCH_PASCAL 4 // Devices based on the NVIDIA Pascal architecture -#define NVML_DEVICE_ARCH_VOLTA 5 // Devices based on the NVIDIA Volta architecture -#define NVML_DEVICE_ARCH_TURING 6 // Devices based on the NVIDIA Turing architecture -#define NVML_DEVICE_ARCH_AMPERE 7 // Devices based on the NVIDIA Ampere architecture -#define NVML_DEVICE_ARCH_ADA 8 // Devices based on the NVIDIA Ada architecture -#define NVML_DEVICE_ARCH_HOPPER 9 // Devices based on the NVIDIA Hopper architecture - -#define NVML_DEVICE_ARCH_BLACKWELL 10 // Devices based on the NVIDIA Blackwell architecture +#define NVML_DEVICE_ARCH_KEPLER 2 //!< Devices based on the NVIDIA Kepler architecture +#define NVML_DEVICE_ARCH_MAXWELL 3 //!< Devices based on the NVIDIA Maxwell architecture +#define NVML_DEVICE_ARCH_PASCAL 4 //!< Devices based on the NVIDIA Pascal architecture +#define NVML_DEVICE_ARCH_VOLTA 5 //!< Devices based on the NVIDIA Volta architecture +#define NVML_DEVICE_ARCH_TURING 6 //!< Devices based on the NVIDIA Turing architecture +#define NVML_DEVICE_ARCH_AMPERE 7 //!< Devices based on the NVIDIA Ampere architecture +#define NVML_DEVICE_ARCH_ADA 8 //!< Devices based on the NVIDIA Ada architecture +#define NVML_DEVICE_ARCH_HOPPER 9 //!< Devices based on the NVIDIA Hopper architecture -#define NVML_DEVICE_ARCH_T23X 11 // Devices based on NVIDIA Orin architecture +#define NVML_DEVICE_ARCH_BLACKWELL 10 //!< Devices based on the NVIDIA Blackwell architecture -#define NVML_DEVICE_ARCH_UNKNOWN 0xffffffff // Anything else, presumably something newer +#define NVML_DEVICE_ARCH_UNKNOWN 0xffffffff //!< Anything else, presumably something newer typedef unsigned int nvmlDeviceArchitecture_t; /** * PCI bus types */ -#define NVML_BUS_TYPE_UNKNOWN 0 -#define NVML_BUS_TYPE_PCI 1 -#define NVML_BUS_TYPE_PCIE 2 -#define NVML_BUS_TYPE_FPCI 3 -#define NVML_BUS_TYPE_AGP 4 +#define NVML_BUS_TYPE_UNKNOWN 0 //!< Unknown bus type. +#define NVML_BUS_TYPE_PCI 1 //!< PCI bus. +#define NVML_BUS_TYPE_PCIE 2 //!< PCI-Express bus. +#define NVML_BUS_TYPE_FPCI 3 //!< FPCI bus. +#define NVML_BUS_TYPE_AGP 4 //!< AGP bus. typedef unsigned int nvmlBusType_t; @@ -1368,38 +1543,38 @@ typedef unsigned int nvmlBusType_t; /** * Device Fan control policy */ -#define NVML_FAN_POLICY_TEMPERATURE_CONTINOUS_SW 0 -#define NVML_FAN_POLICY_MANUAL 1 +#define NVML_FAN_POLICY_TEMPERATURE_CONTINOUS_SW 0 //!< Temperature-controlled fan policy. +#define NVML_FAN_POLICY_MANUAL 1 //!< Manual fan control policy. typedef unsigned int nvmlFanControlPolicy_t; /** * Device Power Source */ -#define NVML_POWER_SOURCE_AC 0x00000000 -#define NVML_POWER_SOURCE_BATTERY 0x00000001 -#define NVML_POWER_SOURCE_UNDERSIZED 0x00000002 +#define NVML_POWER_SOURCE_AC 0x00000000 //!< AC power source. +#define NVML_POWER_SOURCE_BATTERY 0x00000001 //!< Battery power source. +#define NVML_POWER_SOURCE_UNDERSIZED 0x00000002 //!< Undersized power source. typedef unsigned int nvmlPowerSource_t; /** * Device PCIE link Max Speed */ -#define NVML_PCIE_LINK_MAX_SPEED_INVALID 0x00000000 -#define NVML_PCIE_LINK_MAX_SPEED_2500MBPS 0x00000001 -#define NVML_PCIE_LINK_MAX_SPEED_5000MBPS 0x00000002 -#define NVML_PCIE_LINK_MAX_SPEED_8000MBPS 0x00000003 -#define NVML_PCIE_LINK_MAX_SPEED_16000MBPS 0x00000004 -#define NVML_PCIE_LINK_MAX_SPEED_32000MBPS 0x00000005 -#define NVML_PCIE_LINK_MAX_SPEED_64000MBPS 0x00000006 +#define NVML_PCIE_LINK_MAX_SPEED_INVALID 0x00000000 //!< Invalid PCIe link speed. +#define NVML_PCIE_LINK_MAX_SPEED_2500MBPS 0x00000001 //!< 2500 MB/s PCIe link speed. +#define NVML_PCIE_LINK_MAX_SPEED_5000MBPS 0x00000002 //!< 5000 MB/s PCIe link speed. +#define NVML_PCIE_LINK_MAX_SPEED_8000MBPS 0x00000003 //!< 8000 MB/s PCIe link speed. +#define NVML_PCIE_LINK_MAX_SPEED_16000MBPS 0x00000004 //!< 16000 MB/s PCIe link speed. +#define NVML_PCIE_LINK_MAX_SPEED_32000MBPS 0x00000005 //!< 32000 MB/s PCIe link speed. +#define NVML_PCIE_LINK_MAX_SPEED_64000MBPS 0x00000006 //!< 64000 MB/s PCIe link speed. /** * Adaptive clocking status */ -#define NVML_ADAPTIVE_CLOCKING_INFO_STATUS_DISABLED 0x00000000 -#define NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED 0x00000001 +#define NVML_ADAPTIVE_CLOCKING_INFO_STATUS_DISABLED 0x00000000 //!< Adaptive clocking is disabled. +#define NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED 0x00000001 //!< Adaptive clocking is enabled. -#define NVML_MAX_GPU_UTILIZATIONS 8 +#define NVML_MAX_GPU_UTILIZATIONS 8 //!< Maximum number of GPU utilization domains. /** * Represents the GPU utilization domains @@ -1427,14 +1602,14 @@ typedef struct nvmlGpuDynamicPstatesInfo_st /* * PCIe outbound/inbound atomic operations capability */ -#define NVML_PCIE_ATOMICS_CAP_FETCHADD32 0x01 -#define NVML_PCIE_ATOMICS_CAP_FETCHADD64 0x02 -#define NVML_PCIE_ATOMICS_CAP_SWAP32 0x04 -#define NVML_PCIE_ATOMICS_CAP_SWAP64 0x08 -#define NVML_PCIE_ATOMICS_CAP_CAS32 0x10 -#define NVML_PCIE_ATOMICS_CAP_CAS64 0x20 -#define NVML_PCIE_ATOMICS_CAP_CAS128 0x40 -#define NVML_PCIE_ATOMICS_OPS_MAX 7 +#define NVML_PCIE_ATOMICS_CAP_FETCHADD32 0x01 //!< 32-bit fetch and add. +#define NVML_PCIE_ATOMICS_CAP_FETCHADD64 0x02 //!< 64-bit fetch and add. +#define NVML_PCIE_ATOMICS_CAP_SWAP32 0x04 //!< 32-bit swap. +#define NVML_PCIE_ATOMICS_CAP_SWAP64 0x08 //!< 64-bit swap. +#define NVML_PCIE_ATOMICS_CAP_CAS32 0x10 //!< 32-bit compare and swap. +#define NVML_PCIE_ATOMICS_CAP_CAS64 0x20 //!< 64-bit compare and swap. +#define NVML_PCIE_ATOMICS_CAP_CAS128 0x40 //!< 128-bit compare and swap. +#define NVML_PCIE_ATOMICS_OPS_MAX 7 //!< Maximum number of PCIe atomics operations. /** * Device Scope - This is useful to retrieve the telemetry at GPU and module (e.g. GPU + CPU) level @@ -1455,7 +1630,7 @@ typedef struct unsigned int powerValueMw; //!< [out] Power value to retrieve or set in milliwatts } nvmlPowerValue_v2_t; -#define nvmlPowerValue_v2 NVML_STRUCT_VERSION(PowerValue, 2) +#define nvmlPowerValue_v2 NVML_STRUCT_VERSION(PowerValue, 2) //!< Version macro for \a nvmlPowerValue_v2_t /** @} */ @@ -1567,6 +1742,8 @@ typedef enum nvmlDeviceVgpuCapability_enum NVML_DEVICE_VGPU_CAP_COMPUTE_MEDIA_ENGINE_GPU = 7, //!< Set/Get support for compute media engine vGPU profiles NVML_DEVICE_VGPU_CAP_WARM_UPDATE = 8, //!< Query whether the GPU supports FSR and warm update NVML_DEVICE_VGPU_CAP_HOMOGENEOUS_PLACEMENTS = 9, //!< Query whether the GPU supports reporting of placements of timesliced vGPU profiles with identical framebuffer sizes + NVML_DEVICE_VGPU_CAP_MIG_TIMESLICING_SUPPORTED = 10, //!< Query whether the GPU supports timesliced vGPU on MIG + NVML_DEVICE_VGPU_CAP_MIG_TIMESLICING_ENABLED = 11, //!< Set/Get MIG timesliced mode reporting, without impacting the underlying functionality // Keep this last NVML_DEVICE_VGPU_CAP_COUNT } nvmlDeviceVgpuCapability_t; @@ -1598,22 +1775,22 @@ typedef enum nvmlDeviceVgpuCapability_enum /*! * Macros for vGPU instance's virtualization capabilities bitfield. */ -#define NVML_VGPU_VIRTUALIZATION_CAP_MIGRATION 0:0 -#define NVML_VGPU_VIRTUALIZATION_CAP_MIGRATION_NO 0x0 -#define NVML_VGPU_VIRTUALIZATION_CAP_MIGRATION_YES 0x1 +#define NVML_VGPU_VIRTUALIZATION_CAP_MIGRATION 0:0 //!< vGPU migration capability. +#define NVML_VGPU_VIRTUALIZATION_CAP_MIGRATION_NO 0x0 //!< vGPU migration is not supported. +#define NVML_VGPU_VIRTUALIZATION_CAP_MIGRATION_YES 0x1 //!< vGPU migration is supported. /*! * Macros for pGPU's virtualization capabilities bitfield. */ -#define NVML_VGPU_PGPU_VIRTUALIZATION_CAP_MIGRATION 0:0 -#define NVML_VGPU_PGPU_VIRTUALIZATION_CAP_MIGRATION_NO 0x0 -#define NVML_VGPU_PGPU_VIRTUALIZATION_CAP_MIGRATION_YES 0x1 +#define NVML_VGPU_PGPU_VIRTUALIZATION_CAP_MIGRATION 0:0 //!< Physical GPU migration capability. +#define NVML_VGPU_PGPU_VIRTUALIZATION_CAP_MIGRATION_NO 0x0 //!< Physical GPU migration is not supported. +#define NVML_VGPU_PGPU_VIRTUALIZATION_CAP_MIGRATION_YES 0x1 //!< Physical GPU migration is supported. /** * Macros to indicate the vGPU mode of the GPU. */ -#define NVML_VGPU_PGPU_HETEROGENEOUS_MODE 0 -#define NVML_VGPU_PGPU_HOMOGENEOUS_MODE 1 +#define NVML_VGPU_PGPU_HETEROGENEOUS_MODE 0 //!< Heterogeneous vGPU mode. +#define NVML_VGPU_PGPU_HOMOGENEOUS_MODE 1 //!< Homogeneous vGPU mode. /** @} */ @@ -1636,7 +1813,7 @@ typedef struct unsigned int mode; //!< The vGPU heterogeneous mode } nvmlVgpuHeterogeneousMode_v1_t; typedef nvmlVgpuHeterogeneousMode_v1_t nvmlVgpuHeterogeneousMode_t; -#define nvmlVgpuHeterogeneousMode_v1 NVML_STRUCT_VERSION(VgpuHeterogeneousMode, 1) +#define nvmlVgpuHeterogeneousMode_v1 NVML_STRUCT_VERSION(VgpuHeterogeneousMode, 1) //!< Version macro for \a nvmlVgpuHeterogeneousMode_v1_t /** * Structure to store the placement ID of vGPU instance -- version 1 @@ -1647,7 +1824,7 @@ typedef struct unsigned int placementId; //!< Placement ID of the active vGPU instance } nvmlVgpuPlacementId_v1_t; typedef nvmlVgpuPlacementId_v1_t nvmlVgpuPlacementId_t; -#define nvmlVgpuPlacementId_v1 NVML_STRUCT_VERSION(VgpuPlacementId, 1) +#define nvmlVgpuPlacementId_v1 NVML_STRUCT_VERSION(VgpuPlacementId, 1) //!< Version macro for \a nvmlVgpuPlacementId_v1_t /** * Structure to store the list of vGPU placements -- version 1 @@ -1659,7 +1836,7 @@ typedef struct unsigned int count; //!< Count of placement IDs fetched unsigned int *placementIds; //!< Placement IDs for the vGPU type } nvmlVgpuPlacementList_v1_t; -#define nvmlVgpuPlacementList_v1 NVML_STRUCT_VERSION(VgpuPlacementList, 1) +#define nvmlVgpuPlacementList_v1 NVML_STRUCT_VERSION(VgpuPlacementList, 1) //!< Version macro for \a nvmlVgpuPlacementList_v1_t /** * Structure to store the list of vGPU placements -- version 2 @@ -1673,7 +1850,7 @@ typedef struct unsigned int mode; //!< IN: The vGPU mode. Either NVML_VGPU_PGPU_HETEROGENEOUS_MODE or NVML_VGPU_PGPU_HOMOGENEOUS_MODE } nvmlVgpuPlacementList_v2_t; typedef nvmlVgpuPlacementList_v2_t nvmlVgpuPlacementList_t; -#define nvmlVgpuPlacementList_v2 NVML_STRUCT_VERSION(VgpuPlacementList, 2) +#define nvmlVgpuPlacementList_v2 NVML_STRUCT_VERSION(VgpuPlacementList, 2) //!< Version macro for \a nvmlVgpuPlacementList_v2_t /** * Structure to store BAR1 size information of vGPU type -- Version 1 @@ -1684,7 +1861,7 @@ typedef struct unsigned long long bar1Size; //!< BAR1 size in megabytes } nvmlVgpuTypeBar1Info_v1_t; typedef nvmlVgpuTypeBar1Info_v1_t nvmlVgpuTypeBar1Info_t; -#define nvmlVgpuTypeBar1Info_v1 NVML_STRUCT_VERSION(VgpuTypeBar1Info, 1) +#define nvmlVgpuTypeBar1Info_v1 NVML_STRUCT_VERSION(VgpuTypeBar1Info, 1) //!< Version macro for \a nvmlVgpuTypeBar1Info_v1_t /** * Structure to store Utilization Value and vgpuInstance @@ -1726,7 +1903,7 @@ typedef struct nvmlVgpuInstanceUtilizationInfo_v1_t *vgpuUtilArray; //!< The array (allocated by caller) in which vGPU utilization are returned } nvmlVgpuInstancesUtilizationInfo_v1_t; typedef nvmlVgpuInstancesUtilizationInfo_v1_t nvmlVgpuInstancesUtilizationInfo_t; -#define nvmlVgpuInstancesUtilizationInfo_v1 NVML_STRUCT_VERSION(VgpuInstancesUtilizationInfo, 1) +#define nvmlVgpuInstancesUtilizationInfo_v1 NVML_STRUCT_VERSION(VgpuInstancesUtilizationInfo, 1) //!< Version macro for \a nvmlVgpuInstancesUtilizationInfo_v1_t /** * Structure to store Utilization Value, vgpuInstance and subprocess information @@ -1771,7 +1948,7 @@ typedef struct nvmlVgpuProcessUtilizationInfo_v1_t *vgpuProcUtilArray; //!< The array (allocated by caller) in which utilization of processes running on vGPU instances are returned } nvmlVgpuProcessesUtilizationInfo_v1_t; typedef nvmlVgpuProcessesUtilizationInfo_v1_t nvmlVgpuProcessesUtilizationInfo_t; -#define nvmlVgpuProcessesUtilizationInfo_v1 NVML_STRUCT_VERSION(VgpuProcessesUtilizationInfo, 1) +#define nvmlVgpuProcessesUtilizationInfo_v1 NVML_STRUCT_VERSION(VgpuProcessesUtilizationInfo, 1) //!< Version macro for \a nvmlVgpuProcessesUtilizationInfo_v1_t /** * Structure to store the information of vGPU runtime state -- version 1 @@ -1782,23 +1959,34 @@ typedef struct unsigned long long size; //!< OUT: The runtime state size of the vGPU instance } nvmlVgpuRuntimeState_v1_t; typedef nvmlVgpuRuntimeState_v1_t nvmlVgpuRuntimeState_t; -#define nvmlVgpuRuntimeState_v1 NVML_STRUCT_VERSION(VgpuRuntimeState, 1) +#define nvmlVgpuRuntimeState_v1 NVML_STRUCT_VERSION(VgpuRuntimeState, 1) //!< Version macro for \a nvmlVgpuRuntimeState_v1_t /** * vGPU scheduler policies */ -#define NVML_VGPU_SCHEDULER_POLICY_UNKNOWN 0 -#define NVML_VGPU_SCHEDULER_POLICY_BEST_EFFORT 1 -#define NVML_VGPU_SCHEDULER_POLICY_EQUAL_SHARE 2 -#define NVML_VGPU_SCHEDULER_POLICY_FIXED_SHARE 3 +#define NVML_VGPU_SCHEDULER_POLICY_UNKNOWN 0 //!< Unknown scheduler policy. +#define NVML_VGPU_SCHEDULER_POLICY_BEST_EFFORT 1 //!< Best effort scheduler policy. +#define NVML_VGPU_SCHEDULER_POLICY_EQUAL_SHARE 2 //!< Equal share scheduler policy. +#define NVML_VGPU_SCHEDULER_POLICY_FIXED_SHARE 3 //!< Fixed share scheduler policy. -#define NVML_SUPPORTED_VGPU_SCHEDULER_POLICY_COUNT 3 +#define NVML_SUPPORTED_VGPU_SCHEDULER_POLICY_COUNT 3 //!< Number of supported vGPU scheduler policies. -#define NVML_SCHEDULER_SW_MAX_LOG_ENTRIES 200 +#define NVML_SCHEDULER_SW_MAX_LOG_ENTRIES 200 //!< Maximum number of scheduler log entries. + +/* + * @deprecated Adaptive Round Robin mode is always enabled + */ +#define NVML_VGPU_SCHEDULER_ARR_DEFAULT 0 //!< Default Adaptive Round Robin mode. +#define NVML_VGPU_SCHEDULER_ARR_DISABLE 1 //!< Disable Adaptive Round Robin mode. +#define NVML_VGPU_SCHEDULER_ARR_ENABLE 2 //!< Enable Adaptive Round Robin mode. -#define NVML_VGPU_SCHEDULER_ARR_DEFAULT 0 -#define NVML_VGPU_SCHEDULER_ARR_DISABLE 1 -#define NVML_VGPU_SCHEDULER_ARR_ENABLE 2 +/** + * vGPU scheduler engine types + * A GPU or GI may support a subset of engines + */ +#define NVML_VGPU_SCHEDULER_ENGINE_TYPE_GRAPHICS 1 //!< Graphics engine. +#define NVML_VGPU_SCHEDULER_ENGINE_TYPE_NVENC1 2 //!< NVENC1 +#define NVML_VGPU_SCHEDULER_ENGINE_TYPE_NVENC0 3 //!< NVENC0 /** * Union to represent the vGPU Scheduler Parameters @@ -1861,13 +2049,13 @@ typedef union { struct { - unsigned int avgFactor; //!< Average factor in compensating the timeslice for Adaptive Round Robin mode - unsigned int frequency; //!< Frequency for Adaptive Round Robin mode + unsigned int avgFactor; //!< Average factor in compensating the timeslice for Adaptive Round Robin mode. 0 or unspecified uses default. + unsigned int frequency; //!< Frequency for Adaptive Round Robin mode. 0 or unspecified uses default. } vgpuSchedDataWithARR; struct { - unsigned int timeslice; //!< The timeslice in ns(Nanoseconds) for each software run list as configured, or the default value otherwise + unsigned int timeslice; //!< The timeslice in ns(Nanoseconds) for each software run list as configured, or the default value if unspecified or set to 0. } vgpuSchedData; } nvmlVgpuSchedulerSetParams_t; @@ -1969,13 +2157,156 @@ typedef struct nvmlGridLicensableFeatures_st * Enum describing the GPU Recovery Action */ typedef enum nvmlDeviceGpuRecoveryAction_s { - NVML_GPU_RECOVERY_ACTION_NONE = 0, - NVML_GPU_RECOVERY_ACTION_GPU_RESET = 1, - NVML_GPU_RECOVERY_ACTION_NODE_REBOOT = 2, - NVML_GPU_RECOVERY_ACTION_DRAIN_P2P = 3, - NVML_GPU_RECOVERY_ACTION_DRAIN_AND_RESET = 4, + NVML_GPU_RECOVERY_ACTION_NONE = 0, //!< No action needed + NVML_GPU_RECOVERY_ACTION_GPU_RESET = 1, //!< Reset Gpu + NVML_GPU_RECOVERY_ACTION_NODE_REBOOT = 2, //!< Reboot Node + NVML_GPU_RECOVERY_ACTION_DRAIN_P2P = 3, //!< Drain P2P + NVML_GPU_RECOVERY_ACTION_DRAIN_AND_RESET = 4, //!< Drain P2P and Reset Gpu } nvmlDeviceGpuRecoveryAction_t; +/** + * Structure to store the vGPU type IDs -- version 1 + */ +typedef struct +{ + unsigned int version; //!< IN: The version number of this struct + unsigned int vgpuCount; //!< IN/OUT: Number of vGPU types + nvmlVgpuTypeId_t *vgpuTypeIds; //!< OUT: List of vGPU type IDs +} nvmlVgpuTypeIdInfo_v1_t; +typedef nvmlVgpuTypeIdInfo_v1_t nvmlVgpuTypeIdInfo_t; +#define nvmlVgpuTypeIdInfo_v1 NVML_STRUCT_VERSION(VgpuTypeIdInfo, 1) //!< Version macro for \a nvmlVgpuTypeIdInfo_v1_t + +/** + * Structure to store the maximum number of possible vGPU type IDs -- version 1 + */ +typedef struct +{ + unsigned int version; //!< IN: The version number of this struct + nvmlVgpuTypeId_t vgpuTypeId; //!< IN: Handle to vGPU type + unsigned int maxInstancePerGI; //!< OUT: Maximum number of vGPU instances per GPU instance +} nvmlVgpuTypeMaxInstance_v1_t; +typedef nvmlVgpuTypeMaxInstance_v1_t nvmlVgpuTypeMaxInstance_t; +#define nvmlVgpuTypeMaxInstance_v1 NVML_STRUCT_VERSION(VgpuTypeMaxInstance, 1) //!< Version macro for \a nvmlVgpuTypeMaxInstance_v1_t + +/** + * Structure to store active vGPU instance information -- Version 1 + */ +typedef struct +{ + unsigned int version; //!< IN: The version number of this struct + unsigned int vgpuCount; //!< IN/OUT: Count of the active vGPU instances + nvmlVgpuInstance_t *vgpuInstances; //!< IN/OUT: list of active vGPU instances +} nvmlActiveVgpuInstanceInfo_v1_t; +typedef nvmlActiveVgpuInstanceInfo_v1_t nvmlActiveVgpuInstanceInfo_t; +#define nvmlActiveVgpuInstanceInfo_v1 NVML_STRUCT_VERSION(ActiveVgpuInstanceInfo, 1) //!< Version macro for \a nvmlActiveVgpuInstanceInfo_v1_t + +/** + * Structure to set vGPU scheduler state information -- version 1 + */ +typedef struct +{ + unsigned int version; //!< IN: The version number of this struct + unsigned int engineId; //!< IN: One of NVML_VGPU_SCHEDULER_ENGINE_TYPE_*. + unsigned int schedulerPolicy; //!< IN: Scheduler policy + unsigned int enableARRMode; //!< IN: Adaptive Round Robin scheduler + nvmlVgpuSchedulerSetParams_t schedulerParams; //!< IN: vGPU Scheduler Parameters +} nvmlVgpuSchedulerState_v1_t; +typedef nvmlVgpuSchedulerState_v1_t nvmlVgpuSchedulerState_t; +#define nvmlVgpuSchedulerState_v1 NVML_STRUCT_VERSION(VgpuSchedulerState, 1) //!< Version macro for \a nvmlVgpuSchedulerState_v1_t + +/** + * Structure to store vGPU scheduler state information -- Version 1 + */ +typedef struct +{ + unsigned int version; //!< IN: The version number of this struct + unsigned int engineId; //!< IN: Engine whose software scheduler state info is fetched. One of NVML_VGPU_SCHEDULER_ENGINE_TYPE_*. + unsigned int schedulerPolicy; //!< OUT: Scheduler policy + unsigned int arrMode; //!< OUT: Adaptive Round Robin scheduler mode. One of the NVML_VGPU_SCHEDULER_ARR_*. + nvmlVgpuSchedulerParams_t schedulerParams; //!< OUT: vGPU Scheduler Parameters +} nvmlVgpuSchedulerStateInfo_v1_t; +typedef nvmlVgpuSchedulerStateInfo_v1_t nvmlVgpuSchedulerStateInfo_t; +#define nvmlVgpuSchedulerStateInfo_v1 NVML_STRUCT_VERSION(VgpuSchedulerStateInfo, 1) //!< Version macro for \a nvmlVgpuSchedulerStateInfo_v1_t + +/** + * Structure to store vGPU scheduler log information -- Version 1 + */ +typedef struct +{ + unsigned int version; //!< IN: The version number of this struct + unsigned int engineId; //!< IN: Engine whose software runlist log entries are fetched. One of One of NVML_VGPU_SCHEDULER_ENGINE_TYPE_*. + unsigned int schedulerPolicy; //!< OUT: Scheduler policy + unsigned int arrMode; //!< OUT: Adaptive Round Robin scheduler mode. One of the NVML_VGPU_SCHEDULER_ARR_*. + nvmlVgpuSchedulerParams_t schedulerParams; //!< OUT: vGPU Scheduler Parameters + unsigned int entriesCount; //!< OUT: Count of log entries fetched + nvmlVgpuSchedulerLogEntry_t logEntries[NVML_SCHEDULER_SW_MAX_LOG_ENTRIES]; //!< OUT: Structure to store the state and logs of a software runlist +} nvmlVgpuSchedulerLogInfo_v1_t; +typedef nvmlVgpuSchedulerLogInfo_v1_t nvmlVgpuSchedulerLogInfo_t; +#define nvmlVgpuSchedulerLogInfo_v1 NVML_STRUCT_VERSION(VgpuSchedulerLogInfo, 1) //!< Version macro for \a nvmlVgpuSchedulerLogInfo_v1_t + +/** + * Structure to store creatable vGPU placement information -- version 1 + */ +typedef struct +{ + unsigned int version; //!< IN: The version number of this struct + nvmlVgpuTypeId_t vgpuTypeId; //!< IN: Handle to vGPU type + unsigned int count; //!< IN/OUT: Count of the placement IDs + unsigned int *placementIds; //!< IN/OUT: Placement IDs for the vGPU type + unsigned int placementSize; //!< OUT: The number of slots occupied by the vGPU type +} nvmlVgpuCreatablePlacementInfo_v1_t; +typedef nvmlVgpuCreatablePlacementInfo_v1_t nvmlVgpuCreatablePlacementInfo_t; +#define nvmlVgpuCreatablePlacementInfo_v1 NVML_STRUCT_VERSION(VgpuCreatablePlacementInfo, 1) //!< Version macro for \a nvmlVgpuCreatablePlacementInfo_v1_t + +/** + * Structure to store vGPU scheduler state information + */ +typedef struct +{ + unsigned int engineId; //!< IN: Engine whose software scheduler state info is fetched. One of NVML_VGPU_SCHEDULER_ENGINE_TYPE_*. + unsigned int schedulerPolicy; //!< OUT: Scheduler policy + unsigned int avgFactor; //!< OUT: Average factor in compensating the timeslice for Adaptive Round Robin mode + unsigned int timeslice; //!< OUT: The timeslice in ns for each software run list as configured, or the default value otherwise +} nvmlVgpuSchedulerStateInfo_v2_t; + +/** +* Structure to store the state and logs of a software runlist +*/ +typedef struct +{ + unsigned long long timestamp; //!< OUT: Timestamp in ns when this software runlist was preeempted + unsigned long long timeRunTotal; //!< OUT: Total time in ns this software runlist has run + unsigned long long timeRun; //!< OUT: Time in ns this software runlist ran before preemption + unsigned int swRunlistId; //!< OUT: Software runlist Id + unsigned long long targetTimeSlice; //!< OUT: The actual timeslice after deduction + unsigned long long cumulativePreemptionTime; //!< OUT: Preemption time in ns for this SW runlist + unsigned int weight; //!< OUT: Current weight of this SW runlist +} nvmlVgpuSchedulerLogEntry_v2_t; + +/** +* Structure to store vGPU scheduler log information +*/ +typedef struct +{ + unsigned int engineId; //!< IN: Engine whose software runlist log entries are fetched. One of One of NVML_VGPU_SCHEDULER_ENGINE_TYPE_*. + unsigned int schedulerPolicy; //!< OUT: Scheduler policy + unsigned int avgFactor; //!< OUT: Average factor in compensating the timeslice for Adaptive Round Robin mode + unsigned int timeslice; //!< OUT: The timeslice in ns for each software run list as configured, or the default value otherwise + unsigned int entriesCount; //!< OUT: Count of log entries fetched + nvmlVgpuSchedulerLogEntry_v2_t logEntries[NVML_SCHEDULER_SW_MAX_LOG_ENTRIES]; //!< OUT: Structure to store the state and logs of a software runlist +} nvmlVgpuSchedulerLogInfo_v2_t; + +/** + * Structure to set vGPU scheduler state information + */ +typedef struct +{ + unsigned int engineId; //!< IN: One of NVML_VGPU_SCHEDULER_ENGINE_TYPE_*. + unsigned int schedulerPolicy; //!< IN: Scheduler policy + unsigned int avgFactor; //!< IN: Average factor in compensating the timeslice for Adaptive Round Robin mode. 0 or unspecified uses default. + unsigned int frequency; //!< IN: Frequency for Adaptive Round Robin mode. 0 or unspecified uses default. +} nvmlVgpuSchedulerState_v2_t; + /** @} */ /** @} */ @@ -2026,7 +2357,11 @@ typedef enum nvmlDeviceGpuRecoveryAction_s { #define NVML_FI_DEV_RETIRED_DBE 30 //!< Number of retired pages because of double bit errors #define NVML_FI_DEV_RETIRED_PENDING 31 //!< If any pages are pending retirement. 1=yes. 0=no. -/* NvLink Flit Error Counters */ +/** + * NVLink Flit Error Counters + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L0 32 //!< NVLink flow control CRC Error Counter for Lane 0 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L1 33 //!< NVLink flow control CRC Error Counter for Lane 1 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L2 34 //!< NVLink flow control CRC Error Counter for Lane 2 @@ -2035,7 +2370,11 @@ typedef enum nvmlDeviceGpuRecoveryAction_s { #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L5 37 //!< NVLink flow control CRC Error Counter for Lane 5 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_TOTAL 38 //!< NVLink flow control CRC Error Counter total for all Lanes -/* NvLink CRC Data Error Counters */ +/** + * NVLink CRC Data Error Counters + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L0 39 //!< NVLink data CRC Error Counter for Lane 0 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L1 40 //!< NVLink data CRC Error Counter for Lane 1 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L2 41 //!< NVLink data CRC Error Counter for Lane 2 @@ -2044,7 +2383,11 @@ typedef enum nvmlDeviceGpuRecoveryAction_s { #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L5 44 //!< NVLink data CRC Error Counter for Lane 5 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_TOTAL 45 //!< NvLink data CRC Error Counter total for all Lanes -/* NvLink Replay Error Counters */ +/** + * NVLink Replay Error Counters + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L0 46 //!< NVLink Replay Error Counter for Lane 0 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L1 47 //!< NVLink Replay Error Counter for Lane 1 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L2 48 //!< NVLink Replay Error Counter for Lane 2 @@ -2053,7 +2396,11 @@ typedef enum nvmlDeviceGpuRecoveryAction_s { #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L5 51 //!< NVLink Replay Error Counter for Lane 5 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_TOTAL 52 //!< NVLink Replay Error Counter total for all Lanes -/* NvLink Recovery Error Counters */ +/** + * NVLink Recovery Error Counters + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L0 53 //!< NVLink Recovery Error Counter for Lane 0 #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L1 54 //!< NVLink Recovery Error Counter for Lane 1 #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L2 55 //!< NVLink Recovery Error Counter for Lane 2 @@ -2104,7 +2451,11 @@ typedef enum nvmlDeviceGpuRecoveryAction_s { /* Energy Counter */ #define NVML_FI_DEV_TOTAL_ENERGY_CONSUMPTION 83 //!< Total energy consumption for the GPU in mJ since the driver was last reloaded -/* NVLink Speed */ +/** + * NVLink Speed + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L0 84 //!< NVLink Speed in MBps for Link 0 #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L1 85 //!< NVLink Speed in MBps for Link 1 #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L2 86 //!< NVLink Speed in MBps for Link 2 @@ -2121,7 +2472,11 @@ typedef enum nvmlDeviceGpuRecoveryAction_s { #define NVML_FI_DEV_PCIE_REPLAY_COUNTER 94 //!< PCIe replay counter #define NVML_FI_DEV_PCIE_REPLAY_ROLLOVER_COUNTER 95 //!< PCIe replay rollover counter -/* NvLink Flit Error Counters */ +/** + * NVLink Flit Error Counters + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L6 96 //!< NVLink flow control CRC Error Counter for Lane 6 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L7 97 //!< NVLink flow control CRC Error Counter for Lane 7 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L8 98 //!< NVLink flow control CRC Error Counter for Lane 8 @@ -2129,7 +2484,11 @@ typedef enum nvmlDeviceGpuRecoveryAction_s { #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L10 100 //!< NVLink flow control CRC Error Counter for Lane 10 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L11 101 //!< NVLink flow control CRC Error Counter for Lane 11 -/* NvLink CRC Data Error Counters */ +/** + * NVLink CRC Data Error Counters + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L6 102 //!< NVLink data CRC Error Counter for Lane 6 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L7 103 //!< NVLink data CRC Error Counter for Lane 7 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L8 104 //!< NVLink data CRC Error Counter for Lane 8 @@ -2137,7 +2496,11 @@ typedef enum nvmlDeviceGpuRecoveryAction_s { #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L10 106 //!< NVLink data CRC Error Counter for Lane 10 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L11 107 //!< NVLink data CRC Error Counter for Lane 11 -/* NvLink Replay Error Counters */ +/** + * NVLink Replay Error Counters + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L6 108 //!< NVLink Replay Error Counter for Lane 6 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L7 109 //!< NVLink Replay Error Counter for Lane 7 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L8 110 //!< NVLink Replay Error Counter for Lane 8 @@ -2145,7 +2508,11 @@ typedef enum nvmlDeviceGpuRecoveryAction_s { #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L10 112 //!< NVLink Replay Error Counter for Lane 10 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L11 113 //!< NVLink Replay Error Counter for Lane 11 -/* NvLink Recovery Error Counters */ +/** + * NVLink Recovery Error Counters + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L6 114 //!< NVLink Recovery Error Counter for Lane 6 #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L7 115 //!< NVLink Recovery Error Counter for Lane 7 #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L8 116 //!< NVLink Recovery Error Counter for Lane 8 @@ -2177,7 +2544,11 @@ typedef enum nvmlDeviceGpuRecoveryAction_s { #define NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L10 130 //!< NVLink Bandwidth Counter for Counter Set 1, Lane 10 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L11 131 //!< NVLink Bandwidth Counter for Counter Set 1, Lane 11 -/* NVLink Speed */ +/** + * NVLink Speed + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L6 132 //!< NVLink Speed in MBps for Link 6 #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L7 133 //!< NVLink Speed in MBps for Link 7 #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L8 134 //!< NVLink Speed in MBps for Link 8 @@ -2234,15 +2605,37 @@ typedef enum nvmlDeviceGpuRecoveryAction_s { #define NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L11 159 //!< NVLink data ECC Error Counter for Link 11 #define NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_TOTAL 160 //!< NVLink data ECC Error Counter total for all Links +/** + * NVLink Error Replay + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_ERROR_DL_REPLAY 161 //!< NVLink Replay Error Counter //!< This is unsupported for Blackwell+. //!< Please use NVML_FI_DEV_NVLINK_COUNT_LINK_RECOVERY_* +/** + * NVLink Recovery Error Counter + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_ERROR_DL_RECOVERY 162 //!< NVLink Recovery Error Counter //!< This is unsupported for Blackwell+ //!< Please use NVML_FI_DEV_NVLINK_COUNT_LINK_RECOVERY_* + +/** + * NVLink Recovery Error CRC Counter + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_ERROR_DL_CRC 163 //!< NVLink CRC Error Counter //!< This is unsupported for Blackwell+ //!< Please use NVML_FI_DEV_NVLINK_COUNT_LINK_RECOVERY_* + +/** + * NVLink Speed, State and Version field id 164, 165, and 166 + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_GET_SPEED 164 //!< NVLink Speed in MBps #define NVML_FI_DEV_NVLINK_GET_STATE 165 //!< NVLink State - Active,Inactive #define NVML_FI_DEV_NVLINK_GET_VERSION 166 //!< NVLink Version @@ -2308,6 +2701,11 @@ typedef enum nvmlDeviceGpuRecoveryAction_s { #define NVML_FI_DEV_NVLINK_GET_POWER_THRESHOLD_MAX 200 //!< Max Nvlink Power Threshold. See NVML_FI_DEV_NVLINK_GET_POWER_THRESHOLD +/** + * NVLink counter field id 201-225 + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_COUNT_XMIT_PACKETS 201 //!> NVML_GPU_FABRIC_HEALTH_MASK_SHIFT##type) & \ - (NVML_GPU_FABRIC_HEALTH_MASK_WIDTH##type)) + (NVML_GPU_FABRIC_HEALTH_MASK_WIDTH##type)) //!< Macro to get GPU fabric health status. /** * GPU Fabric Health Status Mask for various fields can be tested @@ -3176,31 +3825,54 @@ typedef struct { */ #define NVML_GPU_FABRIC_HEALTH_TEST(var, type, val) \ (NVML_GPU_FABRIC_HEALTH_GET(var, type) == \ - NVML_GPU_FABRIC_HEALTH_MASK##type##val) + NVML_GPU_FABRIC_HEALTH_MASK##type##val) //!< Macro to test GPU fabric health status. /** * GPU Fabric information (v2). * +* @deprecated nvmlGpuFabricInfo_v2_t is deprecated and will be removed in a future release. +* Use nvmlGpuFabricInfo_v3_t instead +* * Version 2 adds the \ref nvmlGpuFabricInfo_v2_t.version field * to the start of the structure, and the \ref nvmlGpuFabricInfo_v2_t.healthMask * field to the end. This structure is not backwards-compatible with * \ref nvmlGpuFabricInfo_t. */ -typedef struct { +typedef struct +{ unsigned int version; //!< Structure version identifier (set to nvmlGpuFabricInfo_v2) unsigned char clusterUuid[NVML_GPU_FABRIC_UUID_LEN]; //!< Uuid of the cluster to which this GPU belongs - nvmlReturn_t status; //!< Error status, if any. Must be checked only if state returns "complete". + nvmlReturn_t status; //!< Probe Error status, if any. Must be checked only if Probe state returns "complete". unsigned int cliqueId; //!< ID of the fabric clique to which this GPU belongs - nvmlGpuFabricState_t state; //!< Current state of GPU registration process - unsigned int healthMask; //!< GPU Fabric health Status Mask + nvmlGpuFabricState_t state; //!< Current Probe State of GPU registration process. See NVML_GPU_FABRIC_STATE_* + unsigned int healthMask; //!< GPU Fabric health Status Mask. See NVML_GPU_FABRIC_HEALTH_MASK_* } nvmlGpuFabricInfo_v2_t; -typedef nvmlGpuFabricInfo_v2_t nvmlGpuFabricInfoV_t; - /** * Version identifier value for \ref nvmlGpuFabricInfo_v2_t.version. */ -#define nvmlGpuFabricInfo_v2 NVML_STRUCT_VERSION(GpuFabricInfo, 2) +#define nvmlGpuFabricInfo_v2 NVML_STRUCT_VERSION(GpuFabricInfo, 2) //!< Version macro for \a nvmlGpuFabricInfo_v2_t + +/** +* GPU Fabric information (v3). +*/ +typedef struct +{ + unsigned int version; //!< Structure version identifier (set to nvmlGpuFabricInfo_v2) + unsigned char clusterUuid[NVML_GPU_FABRIC_UUID_LEN]; //!< Uuid of the cluster to which this GPU belongs + nvmlReturn_t status; //!< Probe Error status, if any. Must be checked only if Probe state returns "complete". + unsigned int cliqueId; //!< ID of the fabric clique to which this GPU belongs + nvmlGpuFabricState_t state; //!< Current Probe State of GPU registration process. See NVML_GPU_FABRIC_STATE_* + unsigned int healthMask; //!< GPU Fabric health Status Mask. See NVML_GPU_FABRIC_HEALTH_MASK_* + unsigned char healthSummary; //!< GPU Fabric health summary. See NVML_GPU_FABRIC_HEALTH_SUMMARY_* +} nvmlGpuFabricInfo_v3_t; + +typedef nvmlGpuFabricInfo_v3_t nvmlGpuFabricInfoV_t; + +/** +* Version identifier value for \ref nvmlGpuFabricInfo_v3_t.version. +*/ +#define nvmlGpuFabricInfo_v3 NVML_STRUCT_VERSION(GpuFabricInfo, 3) //!< Version macro for \a nvmlGpuFabricInfo_v3_t /** @} */ @@ -3213,8 +3885,9 @@ typedef nvmlGpuFabricInfo_v2_t nvmlGpuFabricInfoV_t; */ /***************************************************************************************************/ -#define NVML_INIT_FLAG_NO_GPUS 1 //!< Don't fail nvmlInit() when no GPUs are found -#define NVML_INIT_FLAG_NO_ATTACH 2 //!< Don't attach GPUs +#define NVML_INIT_FLAG_NO_GPUS (1 << 0) //!< Don't fail nvmlInit() when no GPUs are found +#define NVML_INIT_FLAG_NO_ATTACH (1 << 1) //!< Don't attach GPUs +#define NVML_INIT_FLAG_FORCE_INIT (1 << 2) //!< Force GPU initialization when a previous nvmlInit was called with NO_GPUS and NO_ATTACH flags /** * Initialize NVML, but don't initialize any GPUs yet. @@ -3441,8 +4114,8 @@ nvmlReturn_t DECLDIR nvmlSystemGetCudaDriverVersion_v2(int *cudaDriverVersion); /** * Macros for converting the CUDA driver version number to Major and Minor version numbers. */ -#define NVML_CUDA_DRIVER_VERSION_MAJOR(v) ((v)/1000) -#define NVML_CUDA_DRIVER_VERSION_MINOR(v) (((v)%1000)/10) +#define NVML_CUDA_DRIVER_VERSION_MAJOR(v) ((v)/1000) //!< Macro to extract the major version number from the CUDA driver version. +#define NVML_CUDA_DRIVER_VERSION_MINOR(v) (((v)%1000)/10) //!< Macro to extract the minor version number from the CUDA driver version. /** * Gets name of the process with provided process id @@ -3513,7 +4186,7 @@ typedef struct char branch[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE]; //!< driver branch } nvmlSystemDriverBranchInfo_v1_t; typedef nvmlSystemDriverBranchInfo_v1_t nvmlSystemDriverBranchInfo_t; -#define nvmlSystemDriverBranchInfo_v1 NVML_STRUCT_VERSION(SystemDriverBranchInfo, 1) +#define nvmlSystemDriverBranchInfo_v1 NVML_STRUCT_VERSION(SystemDriverBranchInfo, 1) //!< Version macro for \a nvmlSystemDriverBranchInfo_v1_t /** * Retrieves the driver branch of the NVIDIA driver installed on the system. @@ -3835,10 +4508,10 @@ nvmlReturn_t DECLDIR nvmlDeviceGetHandleByIndex_v2(unsigned int index, nvmlDevic * @see nvmlDeviceGetSerial * @see nvmlDeviceGetHandleByUUID */ -nvmlReturn_t DECLDIR nvmlDeviceGetHandleBySerial(const char *serial, nvmlDevice_t *device); +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceGetHandleBySerial(const char *serial, nvmlDevice_t *device); /** - * Acquire the handle for a particular device, based on its globally unique immutable UUID associated with each device. + * Acquire the handle for a particular device, based on its globally unique immutable UUID (in ASCII format) associated with each device. * * For all products. * @@ -3862,6 +4535,29 @@ nvmlReturn_t DECLDIR nvmlDeviceGetHandleBySerial(const char *serial, nvmlDevice_ */ nvmlReturn_t DECLDIR nvmlDeviceGetHandleByUUID(const char *uuid, nvmlDevice_t *device); +/** + * Acquire the handle for a particular device, based on its globally unique immutable UUID (in either ASCII or binary format) associated with each device. + * See \ref nvmlUUID_v1_t for more information on the UUID struct. The caller must set the appropriate version prior to calling this API. + * + * For all products. + * + * @param[in] uuid The UUID of the target GPU or MIG instance + * @param[out] device Reference in which to return the device handle or MIG device handle + * + * This API causes NVML to initialize the target GPU + * NVML may initialize additional GPUs as it searches for the target GPU + * + * @return + * - \ref NVML_SUCCESS if \a device has been set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a uuid is invalid, \a device is null or \a uuid->type is invalid + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH if the provided version is invalid/unsupported + * - \ref NVML_ERROR_NOT_FOUND if \a uuid does not match a valid device on the system + * - \ref NVML_ERROR_GPU_IS_LOST if any GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +nvmlReturn_t DECLDIR nvmlDeviceGetHandleByUUIDV(const nvmlUUID_t *uuid, nvmlDevice_t *device); + /** * Acquire the handle for a particular device, based on its PCI bus id. * @@ -3877,6 +4573,11 @@ nvmlReturn_t DECLDIR nvmlDeviceGetHandleByUUID(const char *uuid, nvmlDevice_t *d * instead of NVML_ERROR_NO_PERMISSION. * * @param pciBusId The PCI bus id of the target GPU + * Accept the following formats (all numbers in hexadecimal): + * domain:bus:device.function in format %x:%x:%x.%x + * domain:bus:device in format %x:%x:%x + * bus:device.function in format %x:%x.%x + * * @param device Reference in which to return the device handle * * @return @@ -4178,6 +4879,66 @@ nvmlReturn_t DECLDIR nvmlDeviceClearCpuAffinity(nvmlDevice_t device); * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device \a node is invalid */ nvmlReturn_t DECLDIR nvmlDeviceGetNumaNodeId(nvmlDevice_t device, unsigned int *node); + +/** + * Get the addressing mode for a given GPU. Addressing modes can be one of: + * 1. HMM: System allocated memory (malloc, mmap) is addressable from the device (GPU), + * via software-based mirroring of the CPU's page tables, on the GPU. + * 2. ATS: System allocated memory (malloc, mmap) is addressable from the device (GPU), + * via Address Translation Services. This means that there is (effectively) + * a single set of page tables, and the CPU and GPU both use them. + * 3. None: Neither HMM nor ATS is active. + * + * For Turing &tm; or newer fully supported devices. + * Supported on Linux only. + * + * @param[in] device The device handle + * @param[out] mode Pointer to addressing mode of the device + * + * @returns + * - \ref NVML_SUCCESS if \a mode is retrieved successfully + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH if the provided version is invalid/unsupported + * - \ref NVML_ERROR_NOT_SUPPORTED if request is not supported on the current platform + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device \a node is invalid + */ +nvmlReturn_t DECLDIR nvmlDeviceGetAddressingMode(nvmlDevice_t device, nvmlDeviceAddressingMode_t *mode); + +/** + * Get the repair status for TPC/Channel repair + * + * For Ampere &tm; or newer fully supported devices. + * + * @param[in] device The identifier of the target device + * @param[out] repairStatus Reference to \a nvmlRepairStatus_t + * + * @return + * - \ref NVML_SUCCESS if the query was successful + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH if the provided version is invalid/unsupported + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +nvmlReturn_t DECLDIR nvmlDeviceGetRepairStatus(nvmlDevice_t device, nvmlRepairStatus_t *repairStatus); + +/** + * Get the unrepairable memory flag for a given GPU + * + * For Hopper &tm; or newer fully supported devices. + * + * @param[in] device The identifier of the target device + * @param[out] unrepairableMemoryStatus Reference to \a nvmlUnrepairableMemoryStatus_v1_t + * + * @return + * - \ref NVML_SUCCESS if the query was successful + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH if the provided version is invalid/unsupported + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +nvmlReturn_t DECLDIR nvmlDeviceGetUnrepairableMemoryFlag_v1(nvmlDevice_t device, nvmlUnrepairableMemoryStatus_v1_t *unrepairableMemoryStatus); + /** * Retrieve the common ancestor for two devices * For all products. @@ -4693,8 +5454,8 @@ nvmlReturn_t DECLDIR nvmlDeviceGetClockInfo(nvmlDevice_t device, nvmlClockType_t * * See \ref nvmlClockType_t for details on available clock information. * - * \note On GPUs from Fermi family current P0 clocks (reported by \ref nvmlDeviceGetClockInfo) can differ from max clocks - * by few MHz. + * \note Current P0 clocks (reported by \ref nvmlDeviceGetClockInfo) can differ from max clocks + * by a few MHz. * * @param device The identifier of the target device * @param type Identify which clock domain to query @@ -4725,46 +5486,14 @@ nvmlReturn_t DECLDIR nvmlDeviceGetMaxClockInfo(nvmlDevice_t device, nvmlClockTyp nvmlReturn_t DECLDIR nvmlDeviceGetGpcClkVfOffset(nvmlDevice_t device, int *offset); /** - * Retrieves the current setting of a clock that applications will use unless an overspec situation occurs. - * Can be changed using \ref nvmlDeviceSetApplicationsClocks. - * - * For Kepler &tm; or newer fully supported devices. - * - * @param device The identifier of the target device - * @param clockType Identify which clock domain to query - * @param clockMHz Reference in which to return the clock in MHz - * - * @return - * - \ref NVML_SUCCESS if \a clockMHz has been set - * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a clockMHz is NULL or \a clockType is invalid - * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature - * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible - * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * @deprecated Applications clocks are deprecated and will be removed in CUDA 14.0. */ -nvmlReturn_t DECLDIR nvmlDeviceGetApplicationsClock(nvmlDevice_t device, nvmlClockType_t clockType, unsigned int *clockMHz); +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceGetApplicationsClock(nvmlDevice_t device, nvmlClockType_t clockType, unsigned int *clockMHz); /** - * Retrieves the default applications clock that GPU boots with or - * defaults to after \ref nvmlDeviceResetApplicationsClocks call. - * - * For Kepler &tm; or newer fully supported devices. - * - * @param device The identifier of the target device - * @param clockType Identify which clock domain to query - * @param clockMHz Reference in which to return the default clock in MHz - * - * @return - * - \ref NVML_SUCCESS if \a clockMHz has been set - * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a clockMHz is NULL or \a clockType is invalid - * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature - * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible - * - \ref NVML_ERROR_UNKNOWN on any unexpected error - * - * \see nvmlDeviceGetApplicationsClock + * @deprecated Applications clocks are deprecated and will be removed in CUDA 14.0. */ -nvmlReturn_t DECLDIR nvmlDeviceGetDefaultApplicationsClock(nvmlDevice_t device, nvmlClockType_t clockType, unsigned int *clockMHz); +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceGetDefaultApplicationsClock(nvmlDevice_t device, nvmlClockType_t clockType, unsigned int *clockMHz); /** * Retrieves the clock speed for the clock specified by the clock type and clock ID. @@ -4806,7 +5535,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetClock(nvmlDevice_t device, nvmlClockType_t clo nvmlReturn_t DECLDIR nvmlDeviceGetMaxCustomerBoostClock(nvmlDevice_t device, nvmlClockType_t clockType, unsigned int *clockMHz); /** - * Retrieves the list of possible memory clocks that can be used as an argument for \ref nvmlDeviceSetApplicationsClocks. + * Retrieves the list of possible memory clocks that can be used as an argument for \ref nvmlDeviceSetMemoryLockedClocks. * * For Kepler &tm; or newer fully supported devices. * @@ -4825,13 +5554,12 @@ nvmlReturn_t DECLDIR nvmlDeviceGetMaxCustomerBoostClock(nvmlDevice_t device, nvm * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * - * @see nvmlDeviceSetApplicationsClocks - * @see nvmlDeviceGetSupportedGraphicsClocks + * @see nvmlDeviceSetMemoryLockedClocks */ nvmlReturn_t DECLDIR nvmlDeviceGetSupportedMemoryClocks(nvmlDevice_t device, unsigned int *count, unsigned int *clocksMHz); /** - * Retrieves the list of possible graphics clocks that can be used as an argument for \ref nvmlDeviceSetApplicationsClocks. + * Retrieves the list of possible graphics clocks that can be used as an argument for \ref nvmlDeviceSetGpuLockedClocks. * * For Kepler &tm; or newer fully supported devices. * @@ -4851,8 +5579,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetSupportedMemoryClocks(nvmlDevice_t device, uns * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * - * @see nvmlDeviceSetApplicationsClocks - * @see nvmlDeviceGetSupportedMemoryClocks + * @see nvmlDeviceSetGpuLockedClocks */ nvmlReturn_t DECLDIR nvmlDeviceGetSupportedGraphicsClocks(nvmlDevice_t device, unsigned int memoryClockMHz, unsigned int *count, unsigned int *clocksMHz); @@ -4908,7 +5635,6 @@ nvmlReturn_t DECLDIR nvmlDeviceGetAutoBoostedClocksEnabled(nvmlDevice_t device, */ nvmlReturn_t DECLDIR nvmlDeviceGetFanSpeed(nvmlDevice_t device, unsigned int *speed); - /** * Retrieves the intended operating speed of the device's specified fan. * @@ -4995,13 +5721,12 @@ nvmlReturn_t DECLDIR nvmlDeviceGetTargetFanSpeed(nvmlDevice_t device, unsigned i * @param minSpeed The minimum speed allowed to set * @param maxSpeed The maximum speed allowed to set * - * return - * NVML_SUCCESS if speed has been adjusted - * NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * NVML_ERROR_INVALID_ARGUMENT if device is invalid - * NVML_ERROR_NOT_SUPPORTED if the device does not support this - * (doesn't have fans) - * NVML_ERROR_UNKNOWN on any unexpected error + * @return + * - \ref NVML_SUCCESS if speed has been adjusted + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if device is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this (doesn't have fans) + * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetMinMaxFanSpeed(nvmlDevice_t device, unsigned int * minSpeed, unsigned int * maxSpeed); @@ -5013,16 +5738,17 @@ nvmlReturn_t DECLDIR nvmlDeviceGetMinMaxFanSpeed(nvmlDevice_t device, unsigned i * * For all cuda-capable discrete products with fans * - * device The identifier of the target \a device - * policy Reference in which to return the fan control \a policy + * @param device The identifier of the target \a device + * @param fan The index of the target fan, zero indexed. + * @param policy Reference in which to return the fan control \a policy * - * return - * NVML_SUCCESS if \a policy has been populated - * NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a policy is null or the \a fan given doesn't reference - * a fan that exists. - * NVML_ERROR_NOT_SUPPORTED if the \a device is older than Maxwell - * NVML_ERROR_UNKNOWN on any unexpected error + * @return + * - \ref NVML_SUCCESS if \a policy has been populated + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a policy is null or the \a fan given doesn't reference + * a fan that exists. + * - \ref NVML_ERROR_NOT_SUPPORTED if the \a device is older than Maxwell + * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetFanControlPolicy_v2(nvmlDevice_t device, unsigned int fan, nvmlFanControlPolicy_t *policy); @@ -5048,7 +5774,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetNumFans(nvmlDevice_t device, unsigned int *num /** * @deprecated Use \ref nvmlDeviceGetTemperatureV instead */ -nvmlReturn_t DECLDIR nvmlDeviceGetTemperature(nvmlDevice_t device, nvmlTemperatureSensors_t sensorType, unsigned int *temp); +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceGetTemperature(nvmlDevice_t device, nvmlTemperatureSensors_t sensorType, unsigned int *temp); /** * Retrieves the cooler's information. @@ -5086,7 +5812,7 @@ typedef struct typedef nvmlTemperature_v1_t nvmlTemperature_t; -#define nvmlTemperature_v1 NVML_STRUCT_VERSION(Temperature, 1) +#define nvmlTemperature_v1 NVML_STRUCT_VERSION(Temperature, 1) //!< Version macro for \a nvmlTemperature_v1_t /** * Retrieves the current temperature readings (in degrees C) for the given device. @@ -5216,7 +5942,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetCurrentClocksEventReasons(nvmlDevice_t device, /** * @deprecated Use \ref nvmlDeviceGetCurrentClocksEventReasons instead */ -nvmlReturn_t DECLDIR nvmlDeviceGetCurrentClocksThrottleReasons(nvmlDevice_t device, unsigned long long *clocksThrottleReasons); +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceGetCurrentClocksThrottleReasons(nvmlDevice_t device, unsigned long long *clocksThrottleReasons); /** * Retrieves bitmask of supported clocks event reasons that can be returned by @@ -5245,10 +5971,10 @@ nvmlReturn_t DECLDIR nvmlDeviceGetSupportedClocksEventReasons(nvmlDevice_t devic /** * @deprecated Use \ref nvmlDeviceGetSupportedClocksEventReasons instead */ -nvmlReturn_t DECLDIR nvmlDeviceGetSupportedClocksThrottleReasons(nvmlDevice_t device, unsigned long long *supportedClocksThrottleReasons); +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceGetSupportedClocksThrottleReasons(nvmlDevice_t device, unsigned long long *supportedClocksThrottleReasons); /** - * Deprecated: Use \ref nvmlDeviceGetPerformanceState. This function exposes an incorrect generalization. + * @deprecated Use \ref nvmlDeviceGetPerformanceState. This function exposes an incorrect generalization. * * Retrieve the current performance state for the device. * @@ -5267,7 +5993,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetSupportedClocksThrottleReasons(nvmlDevice_t de * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ -nvmlReturn_t DECLDIR nvmlDeviceGetPowerState(nvmlDevice_t device, nvmlPstates_t *pState); +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceGetPowerState(nvmlDevice_t device, nvmlPstates_t *pState); /** * Retrieve performance monitor samples from the associated subdevice. @@ -5311,9 +6037,9 @@ nvmlReturn_t DECLDIR nvmlDeviceGetMemClkVfOffset(nvmlDevice_t device, int *offse * @return * - \ref NVML_SUCCESS if everything worked * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a type or \a pstate are invalid or both - * \a minClockMHz and \a maxClockMHz are NULL + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a type or \a minClockMHz and \a maxClockMHz are NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature + * - \ref NVML_ERROR_UNKNOWN if \a type or \a pstate are invalid or any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetMinMaxClockOfPState(nvmlDevice_t device, nvmlClockType_t type, nvmlPstates_t pstate, unsigned int * minClockMHz, unsigned int * maxClockMHz); @@ -5530,7 +6256,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetPerformanceModes(nvmlDevice_t device, nvmlDevi nvmlReturn_t DECLDIR nvmlDeviceGetCurrentClockFreqs(nvmlDevice_t device, nvmlDeviceCurrentClockFreqs_t *currentClockFreqs); /** - * This API has been deprecated. + * @deprecated This API has been deprecated. * * Retrieves the power management mode associated with this device. * @@ -5557,7 +6283,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetCurrentClockFreqs(nvmlDevice_t device, nvmlDev * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ -nvmlReturn_t DECLDIR nvmlDeviceGetPowerManagementMode(nvmlDevice_t device, nvmlEnableState_t *mode); +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceGetPowerManagementMode(nvmlDevice_t device, nvmlEnableState_t *mode); /** * Retrieves the power management limit associated with this device. @@ -5650,6 +6376,47 @@ nvmlReturn_t DECLDIR nvmlDeviceGetPowerManagementDefaultLimit(nvmlDevice_t devic */ nvmlReturn_t DECLDIR nvmlDeviceGetPowerUsage(nvmlDevice_t device, unsigned int *power); +/** + * Retrieves current power mizer mode on this device. + * + * PowerMizerMode provides a hint to the driver as to how to manage the performance of the GPU. + * + * For Maxwell &tm; or newer fully supported devices. + * + * @param device The identifier of the target device + * @param powerMizerMode Reference in which to return the power mizer mode + * + * @return + * - \ref NVML_SUCCESS if \a powerMizerMode has been populated + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a powerMizerMode is NULL + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support powerMizerMode readings + * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ + +nvmlReturn_t DECLDIR nvmlDeviceGetPowerMizerMode_v1(nvmlDevice_t device, nvmlDevicePowerMizerModes_v1_t *powerMizerMode); + +/** + * Sets the new power mizer mode. + * + * For Maxwell &tm; or newer fully supported devices. + * + * @param device The identifier of the target device + * @param powerMizerMode Reference in which to set the power mizer mode. + * + * @return + * - \ref NVML_SUCCESS if \a powerMizerMode has been populated + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a powerMizerMode is NULL + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support powerMizerMode readings + * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ + +nvmlReturn_t DECLDIR nvmlDeviceSetPowerMizerMode_v1(nvmlDevice_t device, nvmlDevicePowerMizerModes_v1_t *powerMizerMode); + + /** * Retrieves total energy consumption for this GPU in millijoules (mJ) since the driver was last reloaded * @@ -5741,6 +6508,10 @@ nvmlReturn_t DECLDIR nvmlDeviceGetGpuOperationMode(nvmlDevice_t device, nvmlGpuO * the operating system is under memory pressure, it may resort to utilizing FB memory. * Such actions can result in discrepancies in the accuracy of memory reporting. * + * @note On certain SOC platforms, the integrated GPU (iGPU) does not use a dedicated framebuffer + * but instead shares memory with the system. As a result, \ref NVML_ERROR_NOT_SUPPORTED + * will be returned in this case. + * * @param device The identifier of the target device * @param memory Reference in which to return the memory information * @@ -5750,30 +6521,68 @@ nvmlReturn_t DECLDIR nvmlDeviceGetGpuOperationMode(nvmlDevice_t device, nvmlGpuO * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a memory is NULL * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_NOT_SUPPORTED if video memory is unsupported on the device * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetMemoryInfo(nvmlDevice_t device, nvmlMemory_t *memory); /** + * Retrieves the amount of used, free, reserved and total memory available on the device, in bytes. * nvmlDeviceGetMemoryInfo_v2 accounts separately for reserved memory and includes it in the used memory amount. - */ -nvmlReturn_t DECLDIR nvmlDeviceGetMemoryInfo_v2(nvmlDevice_t device, nvmlMemory_v2_t *memory); - -/** - * Retrieves the current compute mode for the device. * * For all products. * - * See \ref nvmlComputeMode_t for details on allowed compute modes. + * Enabling ECC reduces the amount of total available memory, due to the extra required parity bits. + * Under WDDM most device memory is allocated and managed on startup by Windows. * - * @param device The identifier of the target device - * @param mode Reference in which to return the current compute mode + * Under Linux and Windows TCC, the reported amount of used memory is equal to the sum of memory allocated + * by all active channels on the device. * - * @return - * - \ref NVML_SUCCESS if \a mode has been set - * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a mode is NULL - * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature + * @note In MIG mode, if device handle is provided, the API returns aggregate + * information, only if the caller has appropriate privileges. Per-instance + * information can be queried by using specific MIG device handles. + * + * @note On systems where GPUs are NUMA nodes, the accuracy of FB memory utilization + * provided by this API depends on the memory accounting of the operating system. + * This is because FB memory is managed by the operating system instead of the NVIDIA GPU driver. + * Typically, pages allocated from FB memory are not released even after + * the process terminates to enhance performance. In scenarios where + * the operating system is under memory pressure, it may resort to utilizing FB memory. + * Such actions can result in discrepancies in the accuracy of memory reporting. + * + * @note On certain SOC platforms, the integrated GPU (iGPU) does not use a dedicated framebuffer + * but instead shares memory with the system. As a result, \ref NVML_ERROR_NOT_SUPPORTED + * will be returned in this case. + * + * @param device The identifier of the target device + * @param memory Reference in which to return the memory information + * + * @return + * - \ref NVML_SUCCESS if \a memory has been populated + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a memory is NULL + * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_NOT_SUPPORTED if video memory is unsupported on the device + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +nvmlReturn_t DECLDIR nvmlDeviceGetMemoryInfo_v2(nvmlDevice_t device, nvmlMemory_v2_t *memory); + +/** + * Retrieves the current compute mode for the device. + * + * For all products. + * + * See \ref nvmlComputeMode_t for details on allowed compute modes. + * + * @param device The identifier of the target device + * @param mode Reference in which to return the current compute mode + * + * @return + * - \ref NVML_SUCCESS if \a mode has been set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a mode is NULL + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * @@ -5808,7 +6617,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetCudaComputeCapability(nvmlDevice_t device, int /** * Retrieves the current and pending DRAM Encryption modes for the device. * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * Only applicable to devices that support DRAM Encryption * Requires \a NVML_INFOROM_DEN version 1.0 or higher. * @@ -6029,7 +6838,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetTotalEccErrors(nvmlDevice_t device, nvmlMemory * * @see nvmlDeviceClearEccErrorCounts() */ -nvmlReturn_t DECLDIR nvmlDeviceGetDetailedEccErrors(nvmlDevice_t device, nvmlMemoryErrorType_t errorType, nvmlEccCounterType_t counterType, nvmlEccErrorCounts_t *eccCounts); +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceGetDetailedEccErrors(nvmlDevice_t device, nvmlMemoryErrorType_t errorType, nvmlEccCounterType_t counterType, nvmlEccErrorCounts_t *eccCounts); /** * Retrieves the requested memory error counter for the device. @@ -6210,7 +7019,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetDecoderUtilization(nvmlDevice_t device, unsign /** * Retrieves the current utilization and sampling size in microseconds for the JPG * - * %TURING_OR_NEWER% + * For Turing &tm; or newer fully supported devices. * * @note On MIG-enabled GPUs, querying decoder utilization is not currently supported. * @@ -6231,7 +7040,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetJpgUtilization(nvmlDevice_t device, unsigned i /** * Retrieves the current utilization and sampling size in microseconds for the OFA (Optical Flow Accelerator) * - * %TURING_OR_NEWER% + * For Turing &tm; or newer fully supported devices. * * @note On MIG-enabled GPUs, querying decoder utilization is not currently supported. * @@ -6665,30 +7474,22 @@ nvmlReturn_t DECLDIR nvmlDeviceGetSamples(nvmlDevice_t device, nvmlSamplingType_ nvmlReturn_t DECLDIR nvmlDeviceGetBAR1MemoryInfo(nvmlDevice_t device, nvmlBAR1Memory_t *bar1Memory); /** - * Gets the duration of time during which the device was throttled (lower than requested clocks) due to power - * or thermal constraints. - * - * The method is important to users who are tying to understand if their GPUs throttle at any point during their applications. The - * difference in violation times at two different reference times gives the indication of GPU throttling event. - * - * Violation for thermal capping is not supported at this time. - * - * For Kepler &tm; or newer fully supported devices. - * - * @param device The identifier of the target device - * @param perfPolicyType Represents Performance policy which can trigger GPU throttling - * @param violTime Reference to which violation time related information is returned - * + * @deprecated Use \ref nvmlDeviceGetFieldValues to query this data. + * This API will be removed in CUDA 14.0. * - * @return - * - \ref NVML_SUCCESS if violation time is successfully retrieved - * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a perfPolicyType is invalid, or \a violTime is NULL - * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device - * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * Translations are as follows: + * + * NVML_PERF_POLICY_POWER -> NVML_FI_DEV_CLOCKS_EVENT_REASON_SW_POWER_CAP + * NVML_PERF_POLICY_THERMAL -> NVML_FI_DEV_CLOCKS_EVENT_REASON_SW_THERM_SLOWDOWN + * NVML_PERF_POLICY_SYNC_BOOST -> NVML_FI_DEV_CLOCKS_EVENT_REASON_SYNC_BOOST + * NVML_PERF_POLICY_BOARD_LIMIT -> NVML_FI_DEV_PERF_POLICY_BOARD_LIMIT + * NVML_PERF_POLICY_LOW_UTILIZATION -> NVML_FI_DEV_PERF_POLICY_LOW_UTILIZATION + * NVML_PERF_POLICY_RELIABILITY -> NVML_FI_DEV_PERF_POLICY_RELIABILITY + * NVML_PERF_POLICY_TOTAL_APP_CLOCKS -> DEPRECATED, Do not use + * NVML_PERF_POLICY_TOTAL_BASE_CLOCKS -> NVML_FI_DEV_PERF_POLICY_TOTAL_BASE_CLOCKS */ -nvmlReturn_t DECLDIR nvmlDeviceGetViolationStatus(nvmlDevice_t device, nvmlPerfPolicyType_t perfPolicyType, nvmlViolationTime_t *violTime); +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceGetViolationStatus(nvmlDevice_t device, nvmlPerfPolicyType_t perfPolicyType, nvmlViolationTime_t *violTime); /** * Gets the device's interrupt number @@ -6709,6 +7510,9 @@ nvmlReturn_t DECLDIR nvmlDeviceGetIrqNum(nvmlDevice_t device, unsigned int *irqN /** * Gets the device's core count * + * @note On MIG-enabled GPUs, querying the device's core count is currently not supported using this API. + * Please use \ref nvmlDeviceGetGpuInstanceProfileInfo to fetch the MIG device's core count. + * * @param device The identifier of the target device * @param numCores The number of cores for the specified device * @@ -6716,7 +7520,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetIrqNum(nvmlDevice_t device, unsigned int *irqN * - \ref NVML_SUCCESS if GPU core count is successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a numCores is NULL - * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device + * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device or a mig device. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * */ @@ -6819,7 +7623,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetBusType(nvmlDevice_t device, nvmlBusType_t *ty /** - * Deprecated: Will be deprecated in a future release. Use \ref nvmlDeviceGetGpuFabricInfoV instead + * @deprecated Will be deprecated in a future release. Use \ref nvmlDeviceGetGpuFabricInfoV instead * * Get fabric information associated with the device. * @@ -6839,7 +7643,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetBusType(nvmlDevice_t device, nvmlBusType_t *ty * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_NOT_SUPPORTED If \a device doesn't support gpu fabric */ -nvmlReturn_t DECLDIR nvmlDeviceGetGpuFabricInfo(nvmlDevice_t device, nvmlGpuFabricInfo_t *gpuFabricInfo); +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceGetGpuFabricInfo(nvmlDevice_t device, nvmlGpuFabricInfo_t *gpuFabricInfo); /** * Versioned wrapper around \ref nvmlDeviceGetGpuFabricInfo that accepts a versioned @@ -7141,7 +7945,41 @@ nvmlReturn_t DECLDIR nvmlDeviceGetSramEccErrorStatus(nvmlDevice_t device, nvmlEccSramErrorStatus_t *status); /** - * @} + * Set new power limit of this device. + * + * For Kepler &tm; or newer fully supported devices. + * Requires root/admin permissions. + * + * See \ref nvmlDeviceGetPowerManagementLimitConstraints to check the allowed ranges of values. + * + * See \ref nvmlPowerValue_v2_t for more information on the struct. + * + * \note Limit is not persistent across reboots or driver unloads. + * Enable persistent mode to prevent driver from unloading when no application is using the device. + * + * This API replaces nvmlDeviceSetPowerManagementLimit. It can be used as a drop-in replacement for the older version. + * + * @param device The identifier of the target device + * @param powerValue Power management limit in milliwatts to set + * + * @return + * - \ref NVML_SUCCESS if \a limit has been set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a powerValue is NULL or contains invalid values + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature + * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * + * @see NVML_FI_DEV_POWER_AVERAGE + * @see NVML_FI_DEV_POWER_INSTANT + * @see NVML_FI_DEV_POWER_MIN_LIMIT + * @see NVML_FI_DEV_POWER_MAX_LIMIT + * @see NVML_FI_DEV_POWER_CURRENT_LIMIT + */ +nvmlReturn_t DECLDIR nvmlDeviceSetPowerManagementLimit_v2(nvmlDevice_t device, nvmlPowerValue_v2_t *powerValue); + +/** + * @} // @defgroup nvmlDeviceQueries Device Queries */ /** @addtogroup nvmlAccountingStats @@ -7303,7 +8141,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetRetiredPages(nvmlDevice_t device, nvmlPageReti * that this does not match the virtual address used in CUDA, but will match the address information in Xid 63 * * \note nvmlDeviceGetRetiredPages_v2 adds an additional timestamps parameter to return the time of each page's - * retirement. + * retirement. This is supported for Pascal and newer architecture. * * For Kepler &tm; or newer fully supported devices. * @@ -7518,9 +8356,9 @@ nvmlReturn_t DECLDIR nvmlDeviceGetProcessesUtilizationInfo(nvmlDevice_t device, /** * Get platform information of this device. * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * - * See \ref nvmlPlatformInfo_v1_t for more information on the struct. + * See \ref nvmlPlatformInfo_v2_t for more information on the struct. * * @param device The identifier of the target device * @param platformInfo Pointer to the caller-provided structure of nvmlPlatformInfo_t. @@ -7535,6 +8373,77 @@ nvmlReturn_t DECLDIR nvmlDeviceGetProcessesUtilizationInfo(nvmlDevice_t device, */ nvmlReturn_t DECLDIR nvmlDeviceGetPlatformInfo(nvmlDevice_t device, nvmlPlatformInfo_t *platformInfo); +/** + * Retrieves the Per Device Identifier (PDI) associated with this device. + * + * For Pascal &tm; or newer fully supported devices. + * + * See \ref nvmlPdi_v1_t for more information on the struct. + * + * @param[in] device The identifier of the target device + * @param[out] pdi Reference to the caller-provided structure to return the GPU PDI + * + * @return + * - \ref NVML_SUCCESS if \a pdi has been set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a pdi is NULL + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH if the version is invalid/unsupported + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature + * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +nvmlReturn_t DECLDIR nvmlDeviceGetPdi(nvmlDevice_t device, nvmlPdi_t *pdi); + +/** + * Set the hostname for the device. + * + * For Blackwell &tm; or newer fully supported devices. + * Requires root/admin permissions. + * Supported on Linux only. + * + * Sets a hostname string for the GPU device. This operation takes effect immediately. + * + * The hostname is not stored persistently across GPU resets or driver reloads. + * + * @param device The identifier of the target device + * @param hostname Reference to the caller-provided \ref nvmlHostname_v1_t struct containing the hostname + * + * @return + * - \ref NVML_SUCCESS if the hostname was set successfully + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a hostname is NULL or contains invalid characters + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature + * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation + * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * + * @see nvmlDeviceGetHostname_v1() + */ +nvmlReturn_t DECLDIR nvmlDeviceSetHostname_v1(nvmlDevice_t device, nvmlHostname_v1_t *hostname); + +/** + * Get the hostname for the device. + * + * For Blackwell &tm; or newer fully supported devices. + * Supported on Linux only. + * + * Retrieves the hostname string for the GPU device that was set using \ref nvmlDeviceSetHostname_v1(). + * + * @param device The identifier of the target device + * @param hostname Reference to the caller-provided \ref nvmlHostname_v1_t struct to return the hostname + * + * @return + * - \ref NVML_SUCCESS if the hostname was retrieved successfully + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a hostname is NULL + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature + * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * + * @see nvmlDeviceSetHostname_v1() + */ +nvmlReturn_t DECLDIR nvmlDeviceGetHostname_v1(nvmlDevice_t device, nvmlHostname_v1_t *hostname); + /** @} */ /***************************************************************************************************/ @@ -7772,8 +8681,6 @@ typedef enum nvmlClockLimitId_enum { * Set clocks that device will lock to. * * Sets the clocks that the device will be running at to the value in the range of minGpuClockMHz to maxGpuClockMHz. - * Setting this will supersede application clock values and take effect regardless if a cuda app is running. - * See /ref nvmlDeviceSetApplicationsClocks * * Can be used as a setting to request constant performance. * @@ -7792,7 +8699,7 @@ typedef enum nvmlClockLimitId_enum { * * Requires root/admin permissions. * - * After system reboot or driver reload applications clocks go back to their default value. + * After system reboot or driver reload GPU clocks go back to their default value. * See \ref nvmlDeviceResetGpuLockedClocks. * * For Volta &tm; or newer fully supported devices. @@ -7817,7 +8724,7 @@ nvmlReturn_t DECLDIR nvmlDeviceSetGpuLockedClocks(nvmlDevice_t device, unsigned * Resets the gpu clock to the default value * * This is the gpu clock that will be used after system reboot or driver reload. - * Default values are idle clocks, but the current values can be changed using \ref nvmlDeviceSetApplicationsClocks. + * Default values are idle clocks. * * @see nvmlDeviceSetGpuLockedClocks * @@ -7839,14 +8746,12 @@ nvmlReturn_t DECLDIR nvmlDeviceResetGpuLockedClocks(nvmlDevice_t device); * Set memory clocks that device will lock to. * * Sets the device's memory clocks to the value in the range of minMemClockMHz to maxMemClockMHz. - * Setting this will supersede application clock values and take effect regardless of whether a cuda app is running. - * See /ref nvmlDeviceSetApplicationsClocks * * Can be used as a setting to request constant performance. * * Requires root/admin permissions. * - * After system reboot or driver reload applications clocks go back to their default value. + * After system reboot or driver reload memory clocks go back to their default value. * See \ref nvmlDeviceResetMemoryLockedClocks. * * For Ampere &tm; or newer fully supported devices. @@ -7871,7 +8776,7 @@ nvmlReturn_t DECLDIR nvmlDeviceSetMemoryLockedClocks(nvmlDevice_t device, unsign * Resets the memory clock to the default value * * This is the memory clock that will be used after system reboot or driver reload. - * Default values are idle clocks, but the current values can be changed using \ref nvmlDeviceSetApplicationsClocks. + * Default values are idle clocks. * * @see nvmlDeviceSetMemoryLockedClocks * @@ -7890,72 +8795,20 @@ nvmlReturn_t DECLDIR nvmlDeviceSetMemoryLockedClocks(nvmlDevice_t device, unsign nvmlReturn_t DECLDIR nvmlDeviceResetMemoryLockedClocks(nvmlDevice_t device); /** - * Set clocks that applications will lock to. - * - * Sets the clocks that compute and graphics applications will be running at. - * e.g. CUDA driver requests these clocks during context creation which means this property - * defines clocks at which CUDA applications will be running unless some overspec event - * occurs (e.g. over power, over thermal or external HW brake). - * - * Can be used as a setting to request constant performance. - * - * On Pascal and newer hardware, this will automatically disable automatic boosting of clocks. - * - * On K80 and newer Kepler and Maxwell GPUs, users desiring fixed performance should also call - * \ref nvmlDeviceSetAutoBoostedClocksEnabled to prevent clocks from automatically boosting - * above the clock value being set. + * @deprecated Applications clocks are deprecated and will be removed in CUDA 14.0. * - * For Kepler &tm; or newer non-GeForce fully supported devices and Maxwell or newer GeForce devices. - * Requires root/admin permissions. - * - * See \ref nvmlDeviceGetSupportedMemoryClocks and \ref nvmlDeviceGetSupportedGraphicsClocks - * for details on how to list available clocks combinations. - * - * After system reboot or driver reload applications clocks go back to their default value. - * See \ref nvmlDeviceResetApplicationsClocks. - * - * @param device The identifier of the target device - * @param memClockMHz Requested memory clock in MHz - * @param graphicsClockMHz Requested graphics clock in MHz - * - * @return - * - \ref NVML_SUCCESS if new settings were successfully set - * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a memClockMHz and \a graphicsClockMHz - * is not a valid clock combination - * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation - * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature - * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible - * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * Please use \ref nvmlDeviceSetMemoryLockedClocks for Memory Clocks and + * \ref nvmlDeviceSetGpuLockedClocks for Graphics Clocks. */ -nvmlReturn_t DECLDIR nvmlDeviceSetApplicationsClocks(nvmlDevice_t device, unsigned int memClockMHz, unsigned int graphicsClockMHz); +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceSetApplicationsClocks(nvmlDevice_t device, unsigned int memClockMHz, unsigned int graphicsClockMHz); /** - * Resets the application clock to the default value - * - * This is the applications clock that will be used after system reboot or driver reload. - * Default value is constant, but the current value an be changed using \ref nvmlDeviceSetApplicationsClocks. + * @deprecated Applications clocks are deprecated and will be removed in CUDA 14.0. * - * On Pascal and newer hardware, if clocks were previously locked with \ref nvmlDeviceSetApplicationsClocks, - * this call will unlock clocks. This returns clocks their default behavior ofautomatically boosting above - * base clocks as thermal limits allow. - * - * @see nvmlDeviceGetApplicationsClock - * @see nvmlDeviceSetApplicationsClocks - * - * For Fermi &tm; or newer non-GeForce fully supported devices and Maxwell or newer GeForce devices. - * - * @param device The identifier of the target device - * - * @return - * - \ref NVML_SUCCESS if new settings were successfully set - * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid - * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature - * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible - * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * Please use \ref nvmlDeviceResetMemoryLockedClocks for Memory Clocks and + * \ref nvmlDeviceResetGpuLockedClocks for Graphics Clocks. */ -nvmlReturn_t DECLDIR nvmlDeviceResetApplicationsClocks(nvmlDevice_t device); +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceResetApplicationsClocks(nvmlDevice_t device); /** * Try to set the current state of Auto Boosted clocks on a device. @@ -8027,13 +8880,12 @@ nvmlReturn_t DECLDIR nvmlDeviceSetDefaultAutoBoostedClocksEnabled(nvmlDevice_t d * @param device The identifier of the target device * @param fan The index of the fan, starting at zero * - * return - * NVML_SUCCESS if speed has been adjusted - * NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * NVML_ERROR_INVALID_ARGUMENT if device is invalid - * NVML_ERROR_NOT_SUPPORTED if the device does not support this - * (doesn't have fans) - * NVML_ERROR_UNKNOWN on any unexpected error + * @return + * - \ref NVML_SUCCESS if speed has been adjusted + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if device is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this (doesn't have fans) + * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceSetDefaultFanSpeed_v2(nvmlDevice_t device, unsigned int fan); @@ -8046,16 +8898,16 @@ nvmlReturn_t DECLDIR nvmlDeviceSetDefaultFanSpeed_v2(nvmlDevice_t device, unsign * * For all cuda-capable discrete products with fans * - * device The identifier of the target \a device - * policy The fan control \a policy to set + * @param device The identifier of the target \a device + * @param fan The index of the fan, starting at zero + * @param policy The fan control \a policy to set * - * return - * NVML_SUCCESS if \a policy has been set - * NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a policy is null or the \a fan given doesn't reference - * a fan that exists. - * NVML_ERROR_NOT_SUPPORTED if the \a device is older than Maxwell - * NVML_ERROR_UNKNOWN on any unexpected error + * @return + * - \ref NVML_SUCCESS if \a policy has been set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a policy is null or the \a fan given doesn't reference a fan that exists. + * - \ref NVML_ERROR_NOT_SUPPORTED if the \a device is older than Maxwell + * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceSetFanControlPolicy(nvmlDevice_t device, unsigned int fan, nvmlFanControlPolicy_t policy); @@ -8176,22 +9028,22 @@ nvmlReturn_t DECLDIR nvmlDeviceSetAPIRestriction(nvmlDevice_t device, nvmlRestri * * For all cuda-capable discrete products with fans that are Maxwell or Newer. * - * device The identifier of the target device - * fan The index of the fan, starting at zero - * speed The target speed of the fan [0-100] in % of max speed + * @param device The identifier of the target device + * @param fan The index of the fan, starting at zero + * @param speed The target speed of the fan [0-100] in % of max speed * * return - * NVML_SUCCESS if the fan speed has been set - * NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * NVML_ERROR_INVALID_ARGUMENT if the device is not valid, or the speed is outside acceptable ranges, - * or if the fan index doesn't reference an actual fan. - * NVML_ERROR_NOT_SUPPORTED if the device is older than Maxwell. - * NVML_ERROR_UNKNOWN if there was an unexpected error. + * - \ref NVML_SUCCESS if the fan speed has been set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if the device is not valid, or the speed is outside acceptable ranges, + * or if the fan index doesn't reference an actual fan. + * - \ref NVML_ERROR_NOT_SUPPORTED if the device is older than Maxwell. + * - \ref NVML_ERROR_UNKNOWN if there was an unexpected error. */ nvmlReturn_t DECLDIR nvmlDeviceSetFanSpeed_v2(nvmlDevice_t device, unsigned int fan, unsigned int speed); /** - * Deprecated: Will be deprecated in a future release. Use \ref nvmlDeviceSetClockOffsets instead. It works + * @deprecated Will be deprecated in a future release. Use \ref nvmlDeviceSetClockOffsets instead. It works * on Maxwell onwards GPU architectures. * * Set the GPCCLK VF offset value @@ -8206,10 +9058,10 @@ nvmlReturn_t DECLDIR nvmlDeviceSetFanSpeed_v2(nvmlDevice_t device, unsigned int * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ -nvmlReturn_t DECLDIR nvmlDeviceSetGpcClkVfOffset(nvmlDevice_t device, int offset); +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceSetGpcClkVfOffset(nvmlDevice_t device, int offset); /** - * Deprecated: Will be deprecated in a future release. Use \ref nvmlDeviceSetClockOffsets instead. It works + * @deprecated Will be deprecated in a future release. Use \ref nvmlDeviceSetClockOffsets instead. It works * on Maxwell onwards GPU architectures. * * Set the MemClk (Memory Clock) VF offset value. It requires elevated privileges. @@ -8224,7 +9076,7 @@ nvmlReturn_t DECLDIR nvmlDeviceSetGpcClkVfOffset(nvmlDevice_t device, int offset * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ -nvmlReturn_t DECLDIR nvmlDeviceSetMemClkVfOffset(nvmlDevice_t device, int offset); +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceSetMemClkVfOffset(nvmlDevice_t device, int offset); /** * @} @@ -8288,51 +9140,20 @@ nvmlReturn_t DECLDIR nvmlDeviceSetAccountingMode(nvmlDevice_t device, nvmlEnable */ nvmlReturn_t DECLDIR nvmlDeviceClearAccountingPids(nvmlDevice_t device); -/** - * Set new power limit of this device. - * - * For Kepler &tm; or newer fully supported devices. - * Requires root/admin permissions. - * - * See \ref nvmlDeviceGetPowerManagementLimitConstraints to check the allowed ranges of values. - * - * See \ref nvmlPowerValue_v2_t for more information on the struct. - * - * \note Limit is not persistent across reboots or driver unloads. - * Enable persistent mode to prevent driver from unloading when no application is using the device. - * - * This API replaces nvmlDeviceSetPowerManagementLimit. It can be used as a drop-in replacement for the older version. - * - * @param device The identifier of the target device - * @param powerValue Power management limit in milliwatts to set - * - * @return - * - \ref NVML_SUCCESS if \a limit has been set - * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a powerValue is NULL or contains invalid values - * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature - * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible - * - \ref NVML_ERROR_UNKNOWN on any unexpected error - * - * @see NVML_FI_DEV_POWER_AVERAGE - * @see NVML_FI_DEV_POWER_INSTANT - * @see NVML_FI_DEV_POWER_MIN_LIMIT - * @see NVML_FI_DEV_POWER_MAX_LIMIT - * @see NVML_FI_DEV_POWER_CURRENT_LIMIT - */ -nvmlReturn_t DECLDIR nvmlDeviceSetPowerManagementLimit_v2(nvmlDevice_t device, nvmlPowerValue_v2_t *powerValue); +/** @} */ // @addtogroup nvmlAccountingStats /***************************************************************************************************/ -/** @defgroup NVML NVLink +/** @defgroup NvLink NvLink Methods + * This chapter describes methods that NVML can perform on NVLINK enabled devices. * @{ */ /***************************************************************************************************/ -#define NVML_NVLINK_BER_MANTISSA_SHIFT 8 -#define NVML_NVLINK_BER_MANTISSA_WIDTH 0xf +#define NVML_NVLINK_BER_MANTISSA_SHIFT 8 //!< Shift for NVLink BER mantissa. +#define NVML_NVLINK_BER_MANTISSA_WIDTH 0xf //!< Width for NVLink BER mantissa. -#define NVML_NVLINK_BER_EXP_SHIFT 0 -#define NVML_NVLINK_BER_EXP_WIDTH 0xff +#define NVML_NVLINK_BER_EXP_SHIFT 0 //!< Shift for NVLink BER exponent. +#define NVML_NVLINK_BER_EXP_WIDTH 0xff //!< Width for NVLink BER exponent. /** * Nvlink Error counter BER can be obtained using the below macros @@ -8345,11 +9166,26 @@ nvmlReturn_t DECLDIR nvmlDeviceSetPowerManagementLimit_v2(nvmlDevice_t device, n /* * NVML_FI_DEV_NVLINK_GET_STATE state enums */ -#define NVML_NVLINK_STATE_INACTIVE 0x0 -#define NVML_NVLINK_STATE_ACTIVE 0x1 -#define NVML_NVLINK_STATE_SLEEP 0x2 +#define NVML_NVLINK_STATE_INACTIVE 0x0 //!< NVLink is inactive. +#define NVML_NVLINK_STATE_ACTIVE 0x1 //!< NVLink is active. +#define NVML_NVLINK_STATE_SLEEP 0x2 //!< NVLink is in sleep state. + +/** + * Represents Nvlink Version + */ +typedef enum nvmlNvlinkVersion_enum +{ + NVML_NVLINK_VERSION_INVALID = 0, //!< NVLink version is invalid + NVML_NVLINK_VERSION_1_0 = 1, //!< NVLink Version 1.0 + NVML_NVLINK_VERSION_2_0 = 2, //!< NVLink Version 2.0 + NVML_NVLINK_VERSION_2_2 = 3, //!< NVLink Version 2.2 + NVML_NVLINK_VERSION_3_0 = 4, //!< NVLink Version 3.0 + NVML_NVLINK_VERSION_3_1 = 5, //!< NVLink Version 3.1 + NVML_NVLINK_VERSION_4_0 = 6, //!< NVLink Version 4.0 + NVML_NVLINK_VERSION_5_0 = 7, //!< NVLink Version 5.0 +} nvmlNvlinkVersion_t; -#define NVML_NVLINK_TOTAL_SUPPORTED_BW_MODES 23 +#define NVML_NVLINK_TOTAL_SUPPORTED_BW_MODES 23 //!< Total supported NVLink bandwidth modes. typedef struct { @@ -8358,7 +9194,7 @@ typedef struct unsigned char totalBwModes; } nvmlNvlinkSupportedBwModes_v1_t; typedef nvmlNvlinkSupportedBwModes_v1_t nvmlNvlinkSupportedBwModes_t; -#define nvmlNvlinkSupportedBwModes_v1 NVML_STRUCT_VERSION(NvlinkSupportedBwModes, 1) +#define nvmlNvlinkSupportedBwModes_v1 NVML_STRUCT_VERSION(NvlinkSupportedBwModes, 1) //!< Version macro for \a nvmlNvlinkSupportedBwModes_v1_t typedef struct { @@ -8367,7 +9203,7 @@ typedef struct unsigned char bwMode; } nvmlNvlinkGetBwMode_v1_t; typedef nvmlNvlinkGetBwMode_v1_t nvmlNvlinkGetBwMode_t; -#define nvmlNvlinkGetBwMode_v1 NVML_STRUCT_VERSION(NvlinkGetBwMode, 1) +#define nvmlNvlinkGetBwMode_v1 NVML_STRUCT_VERSION(NvlinkGetBwMode, 1) //!< Version macro for \a nvmlNvlinkGetBwMode_v1_t typedef struct { @@ -8376,19 +9212,56 @@ typedef struct unsigned char bwMode; } nvmlNvlinkSetBwMode_v1_t; typedef nvmlNvlinkSetBwMode_v1_t nvmlNvlinkSetBwMode_t; -#define nvmlNvlinkSetBwMode_v1 NVML_STRUCT_VERSION(NvlinkSetBwMode, 1) +#define nvmlNvlinkSetBwMode_v1 NVML_STRUCT_VERSION(NvlinkSetBwMode, 1) //!< Version macro for \a nvmlNvlinkSetBwMode_v1_t -/** @} */ // @defgroup NVML NVLink +/** + * Struct to represent per device NVLINK information v1 + */ +typedef struct +{ + unsigned int version; //!< IN - the API version number + unsigned int isNvleEnabled; //!< OUT - NVLINK encryption enablement +} nvmlNvLinkInfo_v1_t; +#define nvmlNvLinkInfo_v1 NVML_STRUCT_VERSION(NvLinkInfo, 1) //!< Version macro for \a nvmlNvLinkInfo_v1_t +#define NVML_NVLINK_FIRMWARE_UCODE_TYPE_MSE 0x1 //!< MSE ucode type. +#define NVML_NVLINK_FIRMWARE_UCODE_TYPE_NETIR 0x2 //!< NETIR ucode type. +#define NVML_NVLINK_FIRMWARE_UCODE_TYPE_NETIR_UPHY 0x3 //!< NETIR UPHY ucode type. +#define NVML_NVLINK_FIRMWARE_UCODE_TYPE_NETIR_CLN 0x4 //!< NETIR CLN ucode type. +#define NVML_NVLINK_FIRMWARE_UCODE_TYPE_NETIR_DLN 0x5 //!< NETIR DLN ucode type. +#define NVML_NVLINK_FIRMWARE_VERSION_LENGTH 100 //!< Length of firmware version string. -/** @} */ +/** + * Struct to represent NVLINK firmware Semantic versioning and ucode type + */ +typedef struct +{ + unsigned char ucodeType; + unsigned int major; + unsigned int minor; + unsigned int subMinor; +} nvmlNvlinkFirmwareVersion_t; -/***************************************************************************************************/ -/** @defgroup NvLink NvLink Methods - * This chapter describes methods that NVML can perform on NVLINK enabled devices. - * @{ +/** + * Struct to represent NVLINK firmware information */ -/***************************************************************************************************/ +typedef struct +{ + nvmlNvlinkFirmwareVersion_t firmwareVersion[NVML_NVLINK_FIRMWARE_VERSION_LENGTH]; //!< OUT - NVLINK firmware version + unsigned int numValidEntries; //!< OUT - Number of valid firmware entries +} nvmlNvlinkFirmwareInfo_t; + +/** + * Struct to represent per device NVLINK information v2 + */ +typedef struct +{ + unsigned int version; //!< IN - the API version number + unsigned int isNvleEnabled; //!< OUT - NVLINK encryption enablement + nvmlNvlinkFirmwareInfo_t firmwareInfo; //!< OUT - NVLINK Firmware info +} nvmlNvLinkInfo_v2_t; +typedef nvmlNvLinkInfo_v2_t nvmlNvLinkInfo_t; +#define nvmlNvLinkInfo_v2 NVML_STRUCT_VERSION(NvLinkInfo, 2) //!< Version macro for \a nvmlNvLinkInfo_v2_t /** * Retrieves the state of the device's NvLink for the link specified @@ -8417,7 +9290,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkState(nvmlDevice_t device, unsigned int * * @param device The identifier of the target device * @param link Specifies the NvLink link to be queried - * @param version Requested NvLink version from nvmlNvlinkVersion_t + * @param version Requested NvLink version from \ref nvmlNvlinkVersion_t * * @return * - \ref NVML_SUCCESS if \a version has been set @@ -8509,7 +9382,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkErrorCounter(nvmlDevice_t device, unsign nvmlReturn_t DECLDIR nvmlDeviceResetNvLinkErrorCounters(nvmlDevice_t device, unsigned int link); /** - * Deprecated: Setting utilization counter control is no longer supported. + * @deprecated Setting utilization counter control is no longer supported. * * Set the NVLINK utilization counter control information for the specified counter, 0 or 1. * Please refer to \a nvmlNvLinkUtilizationControl_t for the structure definition. Performs a reset @@ -8530,11 +9403,11 @@ nvmlReturn_t DECLDIR nvmlDeviceResetNvLinkErrorCounters(nvmlDevice_t device, uns * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ -nvmlReturn_t DECLDIR nvmlDeviceSetNvLinkUtilizationControl(nvmlDevice_t device, unsigned int link, unsigned int counter, +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceSetNvLinkUtilizationControl(nvmlDevice_t device, unsigned int link, unsigned int counter, nvmlNvLinkUtilizationControl_t *control, unsigned int reset); /** - * Deprecated: Getting utilization counter control is no longer supported. + * @deprecated Getting utilization counter control is no longer supported. * * Get the NVLINK utilization counter control information for the specified counter, 0 or 1. * Please refer to \a nvmlNvLinkUtilizationControl_t for the structure definition @@ -8553,12 +9426,12 @@ nvmlReturn_t DECLDIR nvmlDeviceSetNvLinkUtilizationControl(nvmlDevice_t device, * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ -nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkUtilizationControl(nvmlDevice_t device, unsigned int link, unsigned int counter, +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkUtilizationControl(nvmlDevice_t device, unsigned int link, unsigned int counter, nvmlNvLinkUtilizationControl_t *control); /** - * Deprecated: Use \ref nvmlDeviceGetFieldValues with NVML_FI_DEV_NVLINK_THROUGHPUT_* as field values instead. + * @deprecated Use \ref nvmlDeviceGetFieldValues with NVML_FI_DEV_NVLINK_THROUGHPUT_* as field values instead. * * Retrieve the NVLINK utilization counter based on the current control for a specified counter. * In general it is good practice to use \a nvmlDeviceSetNvLinkUtilizationControl @@ -8579,11 +9452,11 @@ nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkUtilizationControl(nvmlDevice_t device, * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ -nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkUtilizationCounter(nvmlDevice_t device, unsigned int link, unsigned int counter, +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkUtilizationCounter(nvmlDevice_t device, unsigned int link, unsigned int counter, unsigned long long *rxcounter, unsigned long long *txcounter); /** - * Deprecated: Freezing NVLINK utilization counters is no longer supported. + * @deprecated Freezing NVLINK utilization counters is no longer supported. * * Freeze the NVLINK utilization counters * Both the receive and transmit counters are operated on by this function @@ -8603,11 +9476,11 @@ nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkUtilizationCounter(nvmlDevice_t device, * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ -nvmlReturn_t DECLDIR nvmlDeviceFreezeNvLinkUtilizationCounter (nvmlDevice_t device, unsigned int link, +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceFreezeNvLinkUtilizationCounter (nvmlDevice_t device, unsigned int link, unsigned int counter, nvmlEnableState_t freeze); /** - * Deprecated: Resetting NVLINK utilization counters is no longer supported. + * @deprecated Resetting NVLINK utilization counters is no longer supported. * * Reset the NVLINK utilization counters * Both the receive and transmit counters are operated on by this function @@ -8625,7 +9498,7 @@ nvmlReturn_t DECLDIR nvmlDeviceFreezeNvLinkUtilizationCounter (nvmlDevice_t devi * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ -nvmlReturn_t DECLDIR nvmlDeviceResetNvLinkUtilizationCounter (nvmlDevice_t device, unsigned int link, unsigned int counter); +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceResetNvLinkUtilizationCounter (nvmlDevice_t device, unsigned int link, unsigned int counter); /** * Get the NVLink device type of the remote device connected over the given link. @@ -8693,7 +9566,7 @@ nvmlReturn_t DECLDIR nvmlSystemGetNvlinkBwMode(unsigned int *nvlinkBwMode); /** * Get the supported NvLink Reduced Bandwidth Modes of the device * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param supportedBwMode Reference to \a nvmlNvlinkSupportedBwModes_t @@ -8710,7 +9583,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetNvlinkSupportedBwModes(nvmlDevice_t device, /** * Get the NvLink Reduced Bandwidth Mode for the device * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param getBwMode Reference to \a nvmlNvlinkGetBwMode_t @@ -8727,7 +9600,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetNvlinkBwMode(nvmlDevice_t device, /** * Set the NvLink Reduced Bandwidth Mode for the device * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param setBwMode Reference to \a nvmlNvlinkSetBwMode_t @@ -8742,7 +9615,24 @@ nvmlReturn_t DECLDIR nvmlDeviceGetNvlinkBwMode(nvmlDevice_t device, nvmlReturn_t DECLDIR nvmlDeviceSetNvlinkBwMode(nvmlDevice_t device, nvmlNvlinkSetBwMode_t *setBwMode); -/** @} */ +/** + * Query NVLINK information associated with this device. + * + * @param[in] device The identifier of the target device + * @param[out] info Reference to \a nvmlNvLinkInfo_t + * + * @return + * - \ref NVML_SUCCESS if query is success + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a info is NULL + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH if the version is invalid/unsupported + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature + * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkInfo(nvmlDevice_t device, nvmlNvLinkInfo_t *info); + +/** @} */ // @defgroup NvLink NvLink Methods /***************************************************************************************************/ /** @defgroup nvmlEvents Event Handling Methods @@ -8778,8 +9668,6 @@ nvmlReturn_t DECLDIR nvmlEventSetCreate(nvmlEventSet_t *set); * * For Linux only. * - * \b IMPORTANT: Operations on \a set are not thread safe - * * This call starts recording of events on specific device. * All events that occurred before this call are not recorded. * Checking if some event occurred can be done with \ref nvmlEventSetWait_v2 @@ -8884,16 +9772,108 @@ nvmlReturn_t DECLDIR nvmlEventSetWait_v2(nvmlEventSet_t set, nvmlEventData_t * d */ nvmlReturn_t DECLDIR nvmlEventSetFree(nvmlEventSet_t set); -/** @} */ - -/***************************************************************************************************/ -/** @defgroup nvmlZPI Drain states - * This chapter describes methods that NVML can perform against each device to control their drain state - * and recognition by NVML and NVIDIA kernel driver. These methods can be used with out-of-band tools to - * power on/off GPUs, enable robust reset scenarios, etc. - * @{ +/** + * Create an empty set of system events. + * Event set should be freed by \ref nvmlSystemEventSetFree + * + * For Fermi &tm; or newer fully supported devices. + * @param request Reference to nvmlSystemEventSetCreateRequest_t + * + * @return + * - \ref NVML_SUCCESS if the event has been set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if request is NULL + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH for unsupported version + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * + * @see nvmlSystemEventSetFree */ -/***************************************************************************************************/ +nvmlReturn_t DECLDIR nvmlSystemEventSetCreate(nvmlSystemEventSetCreateRequest_t *request); + +/** + * Releases system event set + * + * For Fermi &tm; or newer fully supported devices. + * + * @param request Reference to nvmlSystemEventSetFreeRequest_t + * + * @return + * - \ref NVML_SUCCESS if the event has been set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if request is NULL + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH for unsupported version + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * + * @see nvmlDeviceRegisterEvents + */ +nvmlReturn_t DECLDIR nvmlSystemEventSetFree(nvmlSystemEventSetFreeRequest_t *request); + +/** + * Starts recording of events on system and add the events to specified \ref nvmlSystemEventSet_t + * + * For Linux only. + * + * This call starts recording of events on specific device. + * All events that occurred before this call are not recorded. + * Checking if some event occurred can be done with \ref nvmlSystemEventSetWait + * + * If function reports NVML_ERROR_UNKNOWN, event set is in undefined state and should be freed. + * If function reports NVML_ERROR_NOT_SUPPORTED, event set can still be used. None of the requested eventTypes + * are registered in that case. + * + * @param request Reference to the struct nvmlSystemRegisterEventRequest_t + * + * @return + * - \ref NVML_SUCCESS if the event has been set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if request is NULL + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH for unsupported version + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * + * @see nvmlSystemEventType + * @see nvmlSystemEventSetWait + * @see nvmlEventSetFree + */ +nvmlReturn_t DECLDIR nvmlSystemRegisterEvents(nvmlSystemRegisterEventRequest_t *request); + +/** + * Waits on system events and delivers events + * + * For Fermi &tm; or newer fully supported devices. + * + * If some events are ready to be delivered at the time of the call, function returns immediately. + * If there are no events ready to be delivered, function sleeps till event arrives + * but not longer than specified timeout. This function in certain conditions can return before + * specified timeout passes (e.g. when interrupt arrives) + * + * if the return request->numEvent equals to request->dataSize, there might be outstanding + * event, it is recommended to call nvmlSystemEventSetWait again to query all the events. + * + * @param request Reference in which to nvmlSystemEventSetWaitRequest_t + * + * @return + * - \ref NVML_SUCCESS if the event has been set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if request is NULL + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH for unsupported version + * - \ref NVML_ERROR_TIMEOUT if no event notification after timeoutms + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * + * @see nvmlSystemEventType + * @see nvmlSystemRegisterEvents + */ +nvmlReturn_t DECLDIR nvmlSystemEventSetWait(nvmlSystemEventSetWaitRequest_t *request); + +/** @} */ + +/***************************************************************************************************/ +/** @defgroup nvmlZPI Drain states + * This chapter describes methods that NVML can perform against each device to control their drain state + * and recognition by NVML and NVIDIA kernel driver. These methods can be used with out-of-band tools to + * power on/off GPUs, enable robust reset scenarios, etc. + * @{ + */ +/***************************************************************************************************/ /** * Modify the drain state of a GPU. This method forces a GPU to no longer accept new incoming requests. @@ -9122,7 +10102,7 @@ nvmlReturn_t DECLDIR nvmlDeviceSetVirtualizationMode(nvmlDevice_t device, nvmlGp * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device is invalid or \a pHeterogeneousMode is NULL - * - \ref NVML_ERROR_NOT_SUPPORTED If \a device doesn't support this feature + * - \ref NVML_ERROR_NOT_SUPPORTED If MIG is enabled or \a device doesn't support this feature * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pHeterogeneousMode is invalid * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ @@ -9228,7 +10208,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuTypeSupportedPlacements(nvmlDevice_t devic * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device or \a vgpuTypeId is invalid or \a pPlacementList is NULL - * - \ref NVML_ERROR_NOT_SUPPORTED If \a device or \a vgpuTypeId isn't supported + * - \ref NVML_ERROR_NOT_SUPPORTED If MIG is enabled or \a device or \a vgpuTypeId isn't supported * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pPlacementList is invalid * - \ref NVML_ERROR_UNKNOWN On any unexpected error @@ -9301,6 +10281,30 @@ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetRuntimeStateSize(nvmlVgpuInstance_t vgpu */ nvmlReturn_t DECLDIR nvmlDeviceSetVgpuCapabilities(nvmlDevice_t device, nvmlDeviceVgpuCapability_t capability, nvmlEnableState_t state); +/** + * Executes a forced GSP unload operation on a device + * + * For Ada &tm; or newer fully supported devices. + * Forces the unload of the GSP firmware on a device currently operating in vGPU mode. This operation forcibly removes + * the GSP from the targeted GPU and terminates all GSP operations. + * + * @note This is a disruptive operation that will impact any active vGPU instances and should only be used when + * absolutely necessary, such as during error recovery or maintenance operations. + * + * @warning This operation may result in a temporary loss of GPU functionality and should be used with caution. + * + * @param device The identifier of the target device + * + * @return + * - \ref NVML_SUCCESS GSP reset completed successfully + * - \ref NVML_ERROR_UNINITIALIZED If the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device is invalid or null + * - \ref NVML_ERROR_NOT_SUPPORTED The API is not supported in current state, or \a device not in vGPU mode + * - \ref NVML_ERROR_NO_PERMISSION The user doesn't have permission to perform this operation + * - \ref NVML_ERROR_UNKNOWN On any unexpected error during GSP reset operation + */ +nvmlReturn_t DECLDIR nvmlDeviceVgpuForceGspUnload(nvmlDevice_t device); + /** * Retrieve the vGPU Software licensable features. * @@ -9781,7 +10785,7 @@ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetFbUsage(nvmlVgpuInstance_t vgpuInstance, * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ -nvmlReturn_t DECLDIR nvmlVgpuInstanceGetLicenseStatus(nvmlVgpuInstance_t vgpuInstance, unsigned int *licensed); +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlVgpuInstanceGetLicenseStatus(nvmlVgpuInstance_t vgpuInstance, unsigned int *licensed); /** * Retrieve the vGPU type of a vGPU instance. @@ -10054,10 +11058,367 @@ nvmlReturn_t DECLDIR nvmlVgpuTypeGetCapabilities(nvmlVgpuTypeId_t vgpuTypeId, nv */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetMdevUUID(nvmlVgpuInstance_t vgpuInstance, char *mdevUuid, unsigned int size); +/** + * Query the currently creatable vGPU types on a specific GPU Instance. + * + * The function returns an array of vGPU types that can be created for a specified GPU instance. This array is stored + * in a caller-supplied buffer, with the buffer's element count passed through \a pVgpus->vgpuCount. The number of + * vGPU types written to the buffer is indicated by \a pVgpus->vgpuCount. If the buffer is too small to hold the vGPU + * type array, the function returns NVML_ERROR_INSUFFICIENT_SIZE and updates \a pVgpus->vgpuCount with the required + * element count. + * + * To determine the creatable vGPUs for a GPU Instance, invoke this function with \a pVgpus->vgpuCount set to 0 and + * \a pVgpus->vgpuTypeIds as NULL. This will result in NVML_ERROR_INSUFFICIENT_SIZE being returned, along with the + * count value in \a pVgpus->vgpuCount. + * + * The creatable vGPU types may differ over time, as there may be restrictions on what type of vGPUs can concurrently + * run on the device. + * + * @param gpuInstance The GPU instance handle + * @param pVgpus Pointer to the caller-provided structure of nvmlVgpuTypeIdInfo_t + * + * @return + * - \ref NVML_SUCCESS Upon success + * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance is NULL or invalid, or \a pVgpus is NULL + * or GPU Instance Id is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED If not on a vGPU host or an unsupported GPU + * - \ref NVML_ERROR_INSUFFICIENT_SIZE If \a pVgpus->vgpuTypeIds buffer is small + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pVgpus is invalid + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +nvmlReturn_t DECLDIR nvmlGpuInstanceGetCreatableVgpus(nvmlGpuInstance_t gpuInstance, nvmlVgpuTypeIdInfo_t *pVgpus); + +/** + * Retrieve the maximum number of vGPU instances per GPU instance for given vGPU type + * + * @param pMaxInstance Pointer to the caller-provided structure of nvmlVgpuTypeMaxInstance_t + * + * @return + * - \ref NVML_SUCCESS Upon success + * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a pMaxInstance is NULL or \a pMaxInstance->vgpuTypeId is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED If not on a vGPU host or an unsupported GPU or non-MIG vGPU type + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pMaxInstance is invalid + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +nvmlReturn_t DECLDIR nvmlVgpuTypeGetMaxInstancesPerGpuInstance(nvmlVgpuTypeMaxInstance_t *pMaxInstance); + +/** + * Retrieve the active vGPU instances within a GPU instance. + * + * An array of active vGPU instances is returned in the caller-supplied buffer pointed + * at by \a pVgpuInstanceInfo->vgpuInstances. The array element count is passed in + * \a pVgpuInstanceInfo->vgpuCount, and \a pVgpuInstanceInfo->vgpuCount is used to return + * the number of vGPU instances written to the buffer. + * + * If the supplied buffer is not large enough to accommodate the vGPU instance array, + * the function returns NVML_ERROR_INSUFFICIENT_SIZE, with the element count of + * nvmlVgpuInstance_t array required in \a pVgpuInstanceInfo->vgpuCount. To query the + * number of active vGPU instances, call this function with pVgpuInstanceInfo->vgpuCount = 0 + * and pVgpuInstanceInfo->vgpuTypeIds = NULL. The code will return NVML_ERROR_INSUFFICIENT_SIZE, + * or NVML_SUCCESS if no vGPU Types are active. + * + * @param gpuInstance The GPU instance handle + * @param pVgpuInstanceInfo Pointer to the vGPU instance information structure \a nvmlActiveVgpuInstanceInfo_t + * + * @return + * - \ref NVML_SUCCESS Successful completion + * - \ref NVML_ERROR_UNINITIALIZED If the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance is NULL or invalid, or \a pVgpuInstanceInfo is NULL + * or GPU Instance Id is invalid + * - \ref NVML_ERROR_INSUFFICIENT_SIZE \a pVgpuInstanceInfo->vgpuTypeIds buffer is too small, + * array element count is returned in \a pVgpuInstanceInfo->vgpuCount + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pVgpuInstanceInfo is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED If not on a vGPU host or an unsupported GPU + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +nvmlReturn_t DECLDIR nvmlGpuInstanceGetActiveVgpus(nvmlGpuInstance_t gpuInstance, nvmlActiveVgpuInstanceInfo_t *pVgpuInstanceInfo); + +/** + * @deprecated Will be deprecated in a future release. Use \ref nvmlGpuInstanceSetVgpuSchedulerState_v2 instead + * + * Set vGPU scheduler state for the given GPU instance + * + * For Blackwell &tm GB20x; or newer fully supported devices. + * + * Scheduler state and params will be allowed to set only when no VM is running within the GPU instance. + * In \a nvmlVgpuSchedulerState_t, IFF enableARRMode is enabled then provide the avgFactor and frequency + * as input. If enableARRMode is disabled then provide timeslice as input. + * + * The scheduler state change won't persist across module load/unload and GPU Instance creation/deletion. + * + * @param gpuInstance The GPU instance handle + * @param pScheduler Pointer to the caller-provided structure of nvmlVgpuSchedulerState_t + * + * @return + * - \ref NVML_SUCCESS Upon success + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance is NULL or invalid, or \a pScheduler is NULL + * or GPU Instance Id is invalid + * - \ref NVML_ERROR_RESET_REQUIRED If setting the state failed with fatal error, reboot is required + * - \ref NVML_ERROR_NOT_SUPPORTED If not on a vGPU host or an unsupported GPU or if any vGPU instance exists + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pScheduler is invalid + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlGpuInstanceSetVgpuSchedulerState(nvmlGpuInstance_t gpuInstance, nvmlVgpuSchedulerState_t *pScheduler); + +/** + * @deprecated Will be deprecated in a future release. Use \ref nvmlGpuInstanceGetVgpuSchedulerState_v2 instead + * + * Returns the vGPU scheduler state for the given GPU instance. + * The information returned in \a nvmlVgpuSchedulerStateInfo_t is not relevant if the BEST EFFORT policy is set. + * + * For Blackwell &tm GB20x; or newer fully supported devices. + * + * @param gpuInstance The GPU instance handle + * @param pSchedulerStateInfo Reference in which \a pSchedulerStateInfo is returned + * + * @return + * - \ref NVML_SUCCESS vGPU scheduler state is successfully obtained + * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance is NULL or invalid, or \a pSchedulerStateInfo is NULL + * or GPU Instance Id is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED If not on a vGPU host or an unsupported GPU + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pSchedulerStateInfo is invalid + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlGpuInstanceGetVgpuSchedulerState(nvmlGpuInstance_t gpuInstance, nvmlVgpuSchedulerStateInfo_t *pSchedulerStateInfo); + +/** + * @deprecated Will be deprecated in a future release. Use \ref nvmlGpuInstanceGetVgpuSchedulerLog_v2 instead + * + * Returns the vGPU scheduler logs for the given GPU instance. + * \a pSchedulerLogInfo points to a caller-allocated structure to contain the logs. The number of elements returned will + * never exceed \a NVML_SCHEDULER_SW_MAX_LOG_ENTRIES. + * + * To get the entire logs, call the function atleast 5 times a second. + * + * For Blackwell &tm GB20x; or newer fully supported devices. + * + * @param gpuInstance The GPU instance handle + * @param pSchedulerLogInfo Reference in which \a pSchedulerLogInfo is written + * + * @return + * - \ref NVML_SUCCESS vGPU scheduler logs are successfully obtained + * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance is NULL or invalid, or \a pSchedulerLogInfo is NULL + * or GPU Instance Id is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED If not on a vGPU host or an unsupported GPU + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pSchedulerLogInfo is invalid + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlGpuInstanceGetVgpuSchedulerLog(nvmlGpuInstance_t gpuInstance, nvmlVgpuSchedulerLogInfo_t *pSchedulerLogInfo); + +/** + * Query the creatable vGPU placement ID of the vGPU type within a GPU instance. + * + * For Blackwell &tm GB20x; or newer fully supported devices. + * + * An array of creatable vGPU placement IDs for the vGPU type ID indicated by \a pCreatablePlacementInfo->vgpuTypeId + * is returned in the caller-supplied buffer of \a pCreatablePlacementInfo->placementIds. Memory needed for the + * placementIds array should be allocated based on maximum instances of a vGPU type per GPU instance which can be + * queried via \ref nvmlVgpuTypeGetMaxInstancesPerGpuInstance(). + * If the provided count by the caller is insufficient, the function will return NVML_ERROR_INSUFFICIENT_SIZE along with + * the number of required entries in \a pCreatablePlacementInfo->count. The caller should then reallocate a buffer with the size + * of pCreatablePlacementInfo->count * sizeof(pCreatablePlacementInfo->placementIds) and invoke the function again. + * The creatable vGPU placement IDs may differ over time, as there may be restrictions on what type of vGPU the + * vGPU instance is running. + * + * @param gpuInstance The GPU instance handle + * @param pCreatablePlacementInfo Pointer to the list of vGPU creatable placement structure \a nvmlVgpuCreatablePlacementInfo_t + * + * @return + * - \ref NVML_SUCCESS Successful completion + * - \ref NVML_ERROR_UNINITIALIZED If the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance is NULL or invalid, or \a pCreatablePlacementInfo is NULL + * or GPU Instance Id is invalid + * - \ref NVML_ERROR_INSUFFICIENT_SIZE If the buffer is small, element count is returned in \a pCreatablePlacementInfo->count + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pCreatablePlacementInfo is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED If not on a vGPU host or an unsupported GPU or vGPU heterogeneous mode is not enabled + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +nvmlReturn_t DECLDIR nvmlGpuInstanceGetVgpuTypeCreatablePlacements(nvmlGpuInstance_t gpuInstance, nvmlVgpuCreatablePlacementInfo_t *pCreatablePlacementInfo); + +/** + * Get the vGPU heterogeneous mode for the GPU instance. + * + * When in heterogeneous mode, a vGPU can concurrently host timesliced vGPUs with differing framebuffer sizes. + * + * On successful return, the function returns \a pHeterogeneousMode->mode with the current vGPU heterogeneous mode. + * \a pHeterogeneousMode->version is the version number of the structure nvmlVgpuHeterogeneousMode_t, the caller should + * set the correct version number to retrieve the vGPU heterogeneous mode. + * \a pHeterogeneousMode->mode can either be \ref NVML_FEATURE_ENABLED or \ref NVML_FEATURE_DISABLED. + * + * For Blackwell &tm GB20x; or newer fully supported devices. + * + * @param gpuInstance The GPU instance handle + * @param pHeterogeneousMode Pointer to the caller-provided structure of nvmlVgpuHeterogeneousMode_t + * + * @return + * - \ref NVML_SUCCESS Upon success + * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance is NULL or invalid, or \a pHeterogeneousMode is NULL + * or GPU Instance Id is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED If not on a vGPU host or an unsupported GPU or not in MIG mode + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pHeterogeneousMode is invalid + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +nvmlReturn_t DECLDIR nvmlGpuInstanceGetVgpuHeterogeneousMode(nvmlGpuInstance_t gpuInstance, nvmlVgpuHeterogeneousMode_t *pHeterogeneousMode); + +/** + * Enable or disable vGPU heterogeneous mode for the GPU instance. + * + * When in heterogeneous mode, a vGPU can concurrently host timesliced vGPUs with differing framebuffer sizes. + * + * API would return an appropriate error code upon unsuccessful activation. For example, the heterogeneous mode + * set will fail with error \ref NVML_ERROR_IN_USE if any vGPU instance is active within the GPU instance. + * The caller of this API is expected to shutdown the vGPU VMs and retry setting the \a mode. + * On successful return, the function updates the vGPU heterogeneous mode with the user provided \a pHeterogeneousMode->mode. + * \a pHeterogeneousMode->version is the version number of the structure nvmlVgpuHeterogeneousMode_t, the caller should + * set the correct version number to set the vGPU heterogeneous mode. + * + * @param gpuInstance The GPU instance handle + * @param pHeterogeneousMode Pointer to the caller-provided structure of nvmlVgpuHeterogeneousMode_t + * + * @return + * - \ref NVML_SUCCESS Upon success + * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance is NULL or invalid, + * or \a pHeterogeneousMode is NULL or \a pHeterogeneousMode->mode is invalid + * or GPU Instance Id is invalid + * - \ref NVML_ERROR_IN_USE If the \a gpuInstance is in use + * - \ref NVML_ERROR_NOT_SUPPORTED If not on a vGPU host or an unsupported GPU + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pHeterogeneousMode is invalid + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +nvmlReturn_t DECLDIR nvmlGpuInstanceSetVgpuHeterogeneousMode(nvmlGpuInstance_t gpuInstance, const nvmlVgpuHeterogeneousMode_t *pHeterogeneousMode); + +/** + * Returns the vGPU scheduler state. + * The information returned in \a nvmlVgpuSchedulerStateInfo_v2_t is not relevant if the BEST EFFORT policy is set. + * + * For Pascal &tm; or newer fully supported devices. + * + * @param device The identifier of the target \a device + * @param pSchedulerStateInfo Reference in which \a pSchedulerStateInfo is returned + * + * @return + * - \ref NVML_SUCCESS vGPU scheduler state is successfully obtained + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a pSchedulerStateInfo is NULL or \a device is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED If MIG is enabled or \a device not in vGPU host mode + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +nvmlReturn_t DECLDIR nvmlDeviceGetVgpuSchedulerState_v2(nvmlDevice_t device, nvmlVgpuSchedulerStateInfo_v2_t *pSchedulerStateInfo); + +/** + * Returns the vGPU scheduler state for the given GPU instance. + * The information returned in \a nvmlVgpuSchedulerStateInfo_v2_t is not relevant if the BEST EFFORT policy is set. + * + * For Blackwell &tm GB20x; or newer fully supported devices. + * + * @param gpuInstance The GPU instance handle + * @param pSchedulerStateInfo Reference in which \a pSchedulerStateInfo is returned + * + * @return + * - \ref NVML_SUCCESS vGPU scheduler state is successfully obtained + * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance is NULL or invalid, or \a pSchedulerStateInfo is NULL + * or GPU Instance Id is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED If not on a vGPU host or an unsupported GPU + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +nvmlReturn_t DECLDIR nvmlGpuInstanceGetVgpuSchedulerState_v2(nvmlGpuInstance_t gpuInstance, nvmlVgpuSchedulerStateInfo_v2_t *pSchedulerStateInfo); + +/** +* Returns the vGPU Software scheduler logs for the device. +* \a pSchedulerLogInfo points to a caller-allocated structure to contain the logs. The number of elements returned will +* never exceed \a NVML_SCHEDULER_SW_MAX_LOG_ENTRIES. +* +* To get the entire logs, call the function atleast 5 times a second. +* +* For Pascal &tm; or newer fully supported devices. +* +* @param device The identifier of the target \a device +* @param pSchedulerLogInfo Reference in which \a pSchedulerLogInfo is written +* +* @return +* - \ref NVML_SUCCESS vGPU scheduler logs were successfully obtained +* - \ref NVML_ERROR_INVALID_ARGUMENT If \a pSchedulerLogInfo is NULL or \a device is invalid +* - \ref NVML_ERROR_NOT_SUPPORTED If MIG is enabled or \a device not in vGPU host mode +* - \ref NVML_ERROR_UNKNOWN On any unexpected error +*/ +nvmlReturn_t DECLDIR nvmlDeviceGetVgpuSchedulerLog_v2(nvmlDevice_t device, nvmlVgpuSchedulerLogInfo_v2_t *pSchedulerLogInfo); + +/** +* Returns the vGPU scheduler logs for the given GPU instance. +* \a pSchedulerLogInfo points to a caller-allocated structure to contain the logs. The number of elements returned will +* never exceed \a NVML_SCHEDULER_SW_MAX_LOG_ENTRIES. +* +* To get the entire logs, call the function atleast 5 times a second. +* +* For Blackwell &tm GB20x; or newer fully supported devices. +* +* @param gpuInstance The GPU instance handle +* @param pSchedulerLogInfo Reference in which \a pSchedulerLogInfo is written +* +* @return +* - \ref NVML_SUCCESS vGPU scheduler logs are successfully obtained +* - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized +* - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance is NULL or invalid, or \a pSchedulerLogInfo is NULL +* or GPU Instance Id is invalid +* - \ref NVML_ERROR_NOT_SUPPORTED If not on a vGPU host or an unsupported GPU +* - \ref NVML_ERROR_UNKNOWN on any unexpected error +*/ +nvmlReturn_t DECLDIR nvmlGpuInstanceGetVgpuSchedulerLog_v2(nvmlGpuInstance_t gpuInstance, nvmlVgpuSchedulerLogInfo_v2_t *pSchedulerLogInfo); + +/** + * Sets the vGPU scheduler state. + * + * For Pascal &tm; or newer fully supported devices. + * + * The scheduler state change won't persist across module load/unload. + * Scheduler state and params will be allowed to set only when no VM is running. + * + * @param device The identifier of the target \a device + * @param pSchedulerState vGPU \a pSchedulerState to set + * + * @return + * - \ref NVML_SUCCESS vGPU scheduler state has been successfully set + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a pSchedulerState is NULL or \a device is invalid + * - \ref NVML_ERROR_RESET_REQUIRED If setting \a pSchedulerState failed with fatal error, + * reboot is required to overcome from this error. + * - \ref NVML_ERROR_NOT_SUPPORTED If MIG is enabled or \a device not in vGPU host mode + * or if any vGPU instance currently exists on the \a device + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +nvmlReturn_t DECLDIR nvmlDeviceSetVgpuSchedulerState_v2(nvmlDevice_t device, nvmlVgpuSchedulerState_v2_t *pSchedulerState); + +/** + * Set vGPU scheduler state for the given GPU instance + * + * For Blackwell &tm GB20x; or newer fully supported devices. + * + * Scheduler state and params will be allowed to set only when no VM is running within the GPU instance. + * + * The scheduler state change won't persist across module load/unload and GPU Instance creation/deletion. + * + * @param gpuInstance The GPU instance handle + * @param pSchedulerState Pointer to the caller-provided structure of nvmlVgpuSchedulerState_v2_t + * + * @return + * - \ref NVML_SUCCESS Upon success + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance is NULL or invalid, or \a pSchedulerState is NULL + * or GPU Instance Id is invalid + * - \ref NVML_ERROR_RESET_REQUIRED If setting the state failed with fatal error, reboot is required + * - \ref NVML_ERROR_NOT_SUPPORTED If not on a vGPU host or an unsupported GPU or if any vGPU instance exists + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +nvmlReturn_t DECLDIR nvmlGpuInstanceSetVgpuSchedulerState_v2(nvmlGpuInstance_t gpuInstance, nvmlVgpuSchedulerState_v2_t *pSchedulerState); + /** @} */ /***************************************************************************************************/ -/** @defgroup nvml vGPU Migration +/** @defgroup nvmlVgpuMigration vGPU Migration * This chapter describes operations that are associated with vGPU Migration. * @{ */ @@ -10207,8 +11568,8 @@ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuMetadata(nvmlDevice_t device, nvmlVgpuPgpu * * @return * - \ref NVML_SUCCESS vGPU metadata structure was successfully returned - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuMetadata or \a pgpuMetadata or \a bufferSize are NULL - * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a vgpuMetadata or \a pgpuMetadata or \a bufferSize are NULL + * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ nvmlReturn_t DECLDIR nvmlGetVgpuCompatibility(nvmlVgpuMetadata_t *vgpuMetadata, nvmlVgpuPgpuMetadata_t *pgpuMetadata, nvmlVgpuPgpuCompatibility_t *compatibilityInfo); @@ -10226,13 +11587,15 @@ nvmlReturn_t DECLDIR nvmlGetVgpuCompatibility(nvmlVgpuMetadata_t *vgpuMetadata, * @return * - \ref NVML_SUCCESS GPU metadata structure was successfully returned * - \ref NVML_ERROR_INSUFFICIENT_SIZE \a pgpuMetadata buffer is too small, required size is returned in \a bufferSize - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a bufferSize is NULL or \a device is invalid; if \a pgpuMetadata is NULL and the value of \a bufferSize is not 0. - * - \ref NVML_ERROR_NOT_SUPPORTED if vGPU is not supported by the system - * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a bufferSize is NULL or \a device is invalid; if \a pgpuMetadata is NULL and the value of \a bufferSize is not 0. + * - \ref NVML_ERROR_NOT_SUPPORTED If vGPU is not supported by the system + * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetPgpuMetadataString(nvmlDevice_t device, char *pgpuMetadata, unsigned int *bufferSize); /** + * @deprecated Will be deprecated in a future release. Use \ref nvmlDeviceGetVgpuSchedulerLog_v2 instead + * * Returns the vGPU Software scheduler logs. * \a pSchedulerLog points to a caller-allocated structure to contain the logs. The number of elements returned will * never exceed \a NVML_SCHEDULER_SW_MAX_LOG_ENTRIES. @@ -10246,13 +11609,15 @@ nvmlReturn_t DECLDIR nvmlDeviceGetPgpuMetadataString(nvmlDevice_t device, char * * * @return * - \ref NVML_SUCCESS vGPU scheduler logs were successfully obtained - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a pSchedulerLog is NULL or \a device is invalid - * - \ref NVML_ERROR_NOT_SUPPORTED The API is not supported in current state or \a device not in vGPU host mode - * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a pSchedulerLog is NULL or \a device is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED If MIG is enabled or \a device not in vGPU host mode + * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ -nvmlReturn_t DECLDIR nvmlDeviceGetVgpuSchedulerLog(nvmlDevice_t device, nvmlVgpuSchedulerLog_t *pSchedulerLog); +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceGetVgpuSchedulerLog(nvmlDevice_t device, nvmlVgpuSchedulerLog_t *pSchedulerLog); /** + * @deprecated Will be deprecated in a future release. Use \ref nvmlDeviceGetVgpuSchedulerState_v2 instead + * * Returns the vGPU scheduler state. * The information returned in \a nvmlVgpuSchedulerGetState_t is not relevant if the BEST EFFORT policy is set. * @@ -10263,36 +11628,15 @@ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuSchedulerLog(nvmlDevice_t device, nvmlVgpu * * @return * - \ref NVML_SUCCESS vGPU scheduler state is successfully obtained - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a pSchedulerState is NULL or \a device is invalid - * - \ref NVML_ERROR_NOT_SUPPORTED The API is not supported in current state or \a device not in vGPU host mode - * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a pSchedulerState is NULL or \a device is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED If MIG is enabled or \a device not in vGPU host mode + * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ -nvmlReturn_t DECLDIR nvmlDeviceGetVgpuSchedulerState(nvmlDevice_t device, nvmlVgpuSchedulerGetState_t *pSchedulerState); +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceGetVgpuSchedulerState(nvmlDevice_t device, nvmlVgpuSchedulerGetState_t *pSchedulerState); /** - * Returns the vGPU scheduler capabilities. - * The list of supported vGPU schedulers returned in \a nvmlVgpuSchedulerCapabilities_t is from - * the NVML_VGPU_SCHEDULER_POLICY_*. This list enumerates the supported scheduler policies - * if the engine is Graphics type. - * The other values in \a nvmlVgpuSchedulerCapabilities_t are also applicable if the engine is - * Graphics type. For other engine types, it is BEST EFFORT policy. - * If ARR is supported and enabled, scheduling frequency and averaging factor are applicable - * else timeSlice is applicable. + * @deprecated Will be deprecated in a future release. Use \ref nvmlDeviceSetVgpuSchedulerState_v2 instead * - * For Pascal &tm; or newer fully supported devices. - * - * @param device The identifier of the target \a device - * @param pCapabilities Reference in which \a pCapabilities is written - * - * @return - * - \ref NVML_SUCCESS vGPU scheduler capabilities were successfully obtained - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a pCapabilities is NULL or \a device is invalid - * - \ref NVML_ERROR_NOT_SUPPORTED The API is not supported in current state or \a device not in vGPU host mode - * - \ref NVML_ERROR_UNKNOWN on any unexpected error - */ -nvmlReturn_t DECLDIR nvmlDeviceGetVgpuSchedulerCapabilities(nvmlDevice_t device, nvmlVgpuSchedulerCapabilities_t *pCapabilities); - -/** * Sets the vGPU scheduler state. * * For Pascal &tm; or newer fully supported devices. @@ -10308,14 +11652,37 @@ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuSchedulerCapabilities(nvmlDevice_t device, * * @return * - \ref NVML_SUCCESS vGPU scheduler state has been successfully set - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a pSchedulerState is NULL or \a device is invalid - * - \ref NVML_ERROR_RESET_REQUIRED if setting \a pSchedulerState failed with fatal error, + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a pSchedulerState is NULL or \a device is invalid + * - \ref NVML_ERROR_RESET_REQUIRED If setting \a pSchedulerState failed with fatal error, * reboot is required to overcome from this error. - * - \ref NVML_ERROR_NOT_SUPPORTED The API is not supported in current state or \a device not in vGPU host mode + * - \ref NVML_ERROR_NOT_SUPPORTED If MIG is enabled or \a device not in vGPU host mode * or if any vGPU instance currently exists on the \a device - * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +DEPRECATED(13.0) nvmlReturn_t DECLDIR nvmlDeviceSetVgpuSchedulerState(nvmlDevice_t device, nvmlVgpuSchedulerSetState_t *pSchedulerState); + +/** + * Returns the vGPU scheduler capabilities. + * The list of supported vGPU schedulers returned in \a nvmlVgpuSchedulerCapabilities_t is from + * the NVML_VGPU_SCHEDULER_POLICY_*. This list enumerates the supported scheduler policies + * if the engine is Graphics type. + * The other values in \a nvmlVgpuSchedulerCapabilities_t are also applicable if the engine is + * Graphics type. For other engine types, it is BEST EFFORT policy. + * If ARR is supported and enabled, scheduling frequency and averaging factor are applicable + * else timeSlice is applicable. + * + * For Pascal &tm; or newer fully supported devices. + * + * @param device The identifier of the target \a device + * @param pCapabilities Reference in which \a pCapabilities is written + * + * @return + * - \ref NVML_SUCCESS vGPU scheduler capabilities were successfully obtained + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a pCapabilities is NULL or \a device is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED The API is not supported in current state or \a device not in vGPU host mode + * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ -nvmlReturn_t DECLDIR nvmlDeviceSetVgpuSchedulerState(nvmlDevice_t device, nvmlVgpuSchedulerSetState_t *pSchedulerState); +nvmlReturn_t DECLDIR nvmlDeviceGetVgpuSchedulerCapabilities(nvmlDevice_t device, nvmlVgpuSchedulerCapabilities_t *pCapabilities); /* * Virtual GPU (vGPU) version @@ -10376,7 +11743,7 @@ nvmlReturn_t DECLDIR nvmlGetVgpuVersion(nvmlVgpuVersion_t *supported, nvmlVgpuVe */ nvmlReturn_t DECLDIR nvmlSetVgpuVersion(nvmlVgpuVersion_t *vgpuVersion); -/** @} */ +/** @} */ // @defgroup nvmlVgpuMigration vGPU Migration /***************************************************************************************************/ /** @defgroup nvmlUtil vGPU Utilization and Accounting @@ -10748,6 +12115,139 @@ nvmlReturn_t DECLDIR nvmlGetExcludedDeviceInfoByIndex(unsigned int index, nvmlEx /** @} */ +/***************************************************************************************************/ +/** @defgroup nvmlGPUPRMAccess PRM Access + * This chapter describes NVML operations that are associated with PRM register reads + * @{ + */ +/***************************************************************************************************/ + +#define NVML_PRM_DATA_MAX_SIZE 496 //!< Maximum size of the PRM data. +/** + * Main PRM input structure + */ +typedef struct +{ + /* I/O parameters */ + unsigned dataSize; //!< Size of the input TLV data. + unsigned status; //!< OUT: status of the PRM command + union { + /* Input data in TLV format */ + unsigned char inData[NVML_PRM_DATA_MAX_SIZE]; //!< IN: Input data in TLV format + /* Output data in TLV format */ + unsigned char outData[NVML_PRM_DATA_MAX_SIZE]; //!< OUT: Output PRM data in TLV format + }; +} nvmlPRMTLV_v1_t; + +/** + * Read or write a GPU PRM register. The input is assumed to be in TLV format in + * network byte order. + * + * For Blackwell &tm; or newer fully supported devices. + * + * Supported on Linux only. + * + * @param device Identifer of target GPU device + * @param buffer Structure holding the input data in TLV format as well as + * the PRM register contents in TLV format (in the case of a successful + * read operation). + * Note: the input data and any returned data shall be in network byte order. + * + * @return + * - \ref NVML_SUCCESS on success + * - \ref NVML_ERROR_INVALID_ARGUMENT if \p device or \p buffer are invalid + * - \ref NVML_ERROR_NO_PERMISSION if user does not have permission to perform this operation + * - \ref NVML_ERROR_NOT_SUPPORTED if this feature is not supported by the device + */ +nvmlReturn_t DECLDIR nvmlDeviceReadWritePRM_v1(nvmlDevice_t device, nvmlPRMTLV_v1_t *buffer); + +/** @} */ + +/** + * PRM Counter IDs + */ +typedef enum +{ + NVML_PRM_COUNTER_ID_NONE = 0, + /* Physical Layer Counters (PPCNT group 0x12) */ + NVML_PRM_COUNTER_ID_PPCNT_PHYSICAL_LAYER_CTRS_LINK_DOWN_EVENTS = 1, + NVML_PRM_COUNTER_ID_PPCNT_PHYSICAL_LAYER_CTRS_SUCCESSFUL_RECOVERY_EVENTS = 2, + /* Recovery counters (PPCNT group 0x1A) */ + NVML_PRM_COUNTER_ID_PPCNT_RECOVERY_CTRS_TOTAL_SUCCESSFUL_RECOVERY_EVENTS = 101, + NVML_PRM_COUNTER_ID_PPCNT_RECOVERY_CTRS_TIME_SINCE_LAST_RECOVERY = 102, + NVML_PRM_COUNTER_ID_PPCNT_RECOVERY_CTRS_TIME_BETWEEN_LAST_TWO_RECOVERIES = 103, + /* Infiniband PortCounters Attribute (PPCNT group 0x20) */ + NVML_PRM_COUNTER_ID_PPCNT_PORTCOUNTERS_PORT_XMIT_WAIT = 201, + /* PLR counters (PPCNT group 0x22) */ + NVML_PRM_COUNTER_ID_PPCNT_PLR_RCV_CODES = 301, + NVML_PRM_COUNTER_ID_PPCNT_PLR_RCV_CODE_ERR = 302, + NVML_PRM_COUNTER_ID_PPCNT_PLR_RCV_UNCORRECTABLE_CODE = 303, + NVML_PRM_COUNTER_ID_PPCNT_PLR_XMIT_CODES = 304, + NVML_PRM_COUNTER_ID_PPCNT_PLR_XMIT_RETRY_CODES = 305, + NVML_PRM_COUNTER_ID_PPCNT_PLR_XMIT_RETRY_EVENTS = 306, + NVML_PRM_COUNTER_ID_PPCNT_PLR_SYNC_EVENTS = 307, + /* PPRM counters */ + NVML_PRM_COUNTER_ID_PPRM_OPER_RECOVERY = 1001, +} nvmlPRMCounterId_t; + +/** + * PRM counter input values + */ +typedef struct +{ + unsigned int localPort; //!< Local port number +} nvmlPRMCounterInput_v1_t; + +/** + * PRM Counter Value Structure + */ +typedef struct +{ + nvmlReturn_t status; //!< Status of the PRM counter read + nvmlValueType_t outputType; //!< Output value type + nvmlValue_t outputValue; //!< Output value +} nvmlPRMCounterValue_v1_t; + +/** + * PRM Counter Structure v1 + */ +typedef struct +{ + unsigned int counterId; //!< Counter ID, one of \ref nvmlPRMCounterId_t + /* Input data */ + nvmlPRMCounterInput_v1_t inData; //!< PRM input values + /* Output counter value */ + nvmlPRMCounterValue_v1_t counterValue; //!< Counter value +} nvmlPRMCounter_v1_t; + +/** + * PRM Counter List Structure v1 + */ +typedef struct +{ + unsigned int numCounters; //!< Number of counters + nvmlPRMCounter_v1_t *counters; //!< Pointer to array of PRM counters +} nvmlPRMCounterList_v1_t; + +/** + * Read a list of GPU PRM Counters. + * + * For Blackwell &tm; or newer fully supported devices. + * + * Supported on Linux only. + * + * @param device Identifer of target GPU device + * @param counterList Structure holding the input parameters as well as the retrieved counter values + * + * @return + * - \ref NVML_SUCCESS on success + * - \ref NVML_ERROR_INVALID_ARGUMENT if \p device is invalid or \p counterList is NULL + * - \ref NVML_ERROR_NO_PERMISSION if user does not have permission to perform this operation + * - \ref NVML_ERROR_NOT_SUPPORTED if this feature is not supported by the device + * - \ref NVML_ERROR_UNKNOWN on any other error + */ +nvmlReturn_t DECLDIR nvmlDeviceReadPRMCounters_v1(nvmlDevice_t device, nvmlPRMCounterList_v1_t *counterList); + /***************************************************************************************************/ /** @defgroup nvmlMultiInstanceGPU Multi Instance GPU Management * This chapter describes NVML operations that are associated with Multi Instance GPU management. @@ -10758,12 +12258,12 @@ nvmlReturn_t DECLDIR nvmlGetExcludedDeviceInfoByIndex(unsigned int index, nvmlEx /** * Disable Multi Instance GPU mode. */ -#define NVML_DEVICE_MIG_DISABLE 0x0 +#define NVML_DEVICE_MIG_DISABLE 0x0 //!< Disable Multi Instance GPU mode. /** * Enable Multi Instance GPU mode. */ -#define NVML_DEVICE_MIG_ENABLE 0x1 +#define NVML_DEVICE_MIG_ENABLE 0x1 //!< Enable Multi Instance GPU mode. /** * GPU instance profiles. @@ -10771,20 +12271,38 @@ nvmlReturn_t DECLDIR nvmlGetExcludedDeviceInfoByIndex(unsigned int index, nvmlEx * These macros should be passed to \ref nvmlDeviceGetGpuInstanceProfileInfo to retrieve the * detailed information about a GPU instance such as profile ID, engine counts. */ -#define NVML_GPU_INSTANCE_PROFILE_1_SLICE 0x0 -#define NVML_GPU_INSTANCE_PROFILE_2_SLICE 0x1 -#define NVML_GPU_INSTANCE_PROFILE_3_SLICE 0x2 -#define NVML_GPU_INSTANCE_PROFILE_4_SLICE 0x3 -#define NVML_GPU_INSTANCE_PROFILE_7_SLICE 0x4 -#define NVML_GPU_INSTANCE_PROFILE_8_SLICE 0x5 -#define NVML_GPU_INSTANCE_PROFILE_6_SLICE 0x6 -#define NVML_GPU_INSTANCE_PROFILE_1_SLICE_REV1 0x7 -#define NVML_GPU_INSTANCE_PROFILE_2_SLICE_REV1 0x8 -#define NVML_GPU_INSTANCE_PROFILE_1_SLICE_REV2 0x9 -#define NVML_GPU_INSTANCE_PROFILE_1_SLICE_GFX 0xA -#define NVML_GPU_INSTANCE_PROFILE_2_SLICE_GFX 0xB -#define NVML_GPU_INSTANCE_PROFILE_4_SLICE_GFX 0xC -#define NVML_GPU_INSTANCE_PROFILE_COUNT 0xD +#define NVML_GPU_INSTANCE_PROFILE_1_SLICE 0x0 //!< 1_SLICE GPU instance profile. +#define NVML_GPU_INSTANCE_PROFILE_2_SLICE 0x1 //!< 2_SLICE GPU instance profile. +#define NVML_GPU_INSTANCE_PROFILE_3_SLICE 0x2 //!< 3_SLICE GPU instance profile. +#define NVML_GPU_INSTANCE_PROFILE_4_SLICE 0x3 //!< 4_SLICE GPU instance profile. +#define NVML_GPU_INSTANCE_PROFILE_7_SLICE 0x4 //!< 7_SLICE GPU instance profile. +#define NVML_GPU_INSTANCE_PROFILE_8_SLICE 0x5 //!< 8_SLICE GPU instance profile. +#define NVML_GPU_INSTANCE_PROFILE_6_SLICE 0x6 //!< 6_SLICE GPU instance profile. +// 1_SLICE profile with at least one (if supported at all) of Decoder, Encoder, JPEG, OFA engines. +#define NVML_GPU_INSTANCE_PROFILE_1_SLICE_REV1 0x7 //!< 1_SLICE GPU instance profile (rev1). +// 2_SLICE profile with at least one (if supported at all) of Decoder, Encoder, JPEG, OFA engines. +#define NVML_GPU_INSTANCE_PROFILE_2_SLICE_REV1 0x8 //!< 2_SLICE GPU instance profile (rev1). +// 1_SLICE profile with twice the amount of memory resources. +#define NVML_GPU_INSTANCE_PROFILE_1_SLICE_REV2 0x9 //!< 1_SLICE GPU instance profile (rev2). +// 1_SLICE gfx capable profile +#define NVML_GPU_INSTANCE_PROFILE_1_SLICE_GFX 0x0A //!< 1_SLICE gfx capable profile. +// 2_SLICE gfx capable profile +#define NVML_GPU_INSTANCE_PROFILE_2_SLICE_GFX 0x0B //!< 2_SLICE gfx capable profile. +// 4_SLICE gfx capable profile +#define NVML_GPU_INSTANCE_PROFILE_4_SLICE_GFX 0x0C //!< 4_SLICE gfx capable profile. +// 1_SLICE profile with none of Decode, Encoder, JPEG, OFA engines. +#define NVML_GPU_INSTANCE_PROFILE_1_SLICE_NO_ME 0x0D //!< 1_SLICE profile with none of Decode, Encoder, JPEG, OFA engines. +// 2_SLICE profile with none of Decode, Encoder, JPEG, OFA engines. +#define NVML_GPU_INSTANCE_PROFILE_2_SLICE_NO_ME 0x0E //!< 2_SLICE profile with none of Decode, Encoder, JPEG, OFA engines. +// 1_SLICE profile with all of GPU Decode, Encoder, JPEG, OFA engines. +// Allocation of instance of this profile prevents allocation of +// all but _NO_ME profiles. +#define NVML_GPU_INSTANCE_PROFILE_1_SLICE_ALL_ME 0x0F //!< 1_SLICE profile with all of GPU Decode, Encoder, JPEG, OFA engines. +// 2_SLICE profile with all of GPU Decode, Encoder, JPEG, OFA engines. +// Allocation of instance of this profile prevents allocation of +// all but _NO_ME profiles. +#define NVML_GPU_INSTANCE_PROFILE_2_SLICE_ALL_ME 0x10 +#define NVML_GPU_INSTANCE_PROFILE_COUNT 0x11 /** * MIG GPU instance profile capability. @@ -10792,9 +12310,9 @@ nvmlReturn_t DECLDIR nvmlGetExcludedDeviceInfoByIndex(unsigned int index, nvmlEx * Bit field values representing MIG profile capabilities * \ref nvmlGpuInstanceProfileInfo_v3_t.capabilities */ -#define NVML_GPU_INSTANCE_PROFILE_CAPS_P2P 0x1 -#define NVML_GPU_INTSTANCE_PROFILE_CAPS_P2P 0x1 //!< Deprecated, do not use -#define NVML_GPU_INSTANCE_PROFILE_CAPS_GFX 0x2 +#define NVML_GPU_INSTANCE_PROFILE_CAPS_P2P 0x1 //!< Peer-to-Peer support. +#define NVML_GPU_INTSTANCE_PROFILE_CAPS_P2P 0x1 //!< Deprecated, do not use +#define NVML_GPU_INSTANCE_PROFILE_CAPS_GFX 0x2 //!< GFX support. /** * MIG compute instance profile capability. @@ -10894,26 +12412,24 @@ typedef struct nvmlGpuInstanceInfo_st nvmlGpuInstancePlacement_t placement; //!< Placement for this instance } nvmlGpuInstanceInfo_t; -typedef struct nvmlGpuInstance_st* nvmlGpuInstance_t; - /** * Compute instance profiles. * * These macros should be passed to \ref nvmlGpuInstanceGetComputeInstanceProfileInfo to retrieve the * detailed information about a compute instance such as profile ID, engine counts */ -#define NVML_COMPUTE_INSTANCE_PROFILE_1_SLICE 0x0 -#define NVML_COMPUTE_INSTANCE_PROFILE_2_SLICE 0x1 -#define NVML_COMPUTE_INSTANCE_PROFILE_3_SLICE 0x2 -#define NVML_COMPUTE_INSTANCE_PROFILE_4_SLICE 0x3 -#define NVML_COMPUTE_INSTANCE_PROFILE_7_SLICE 0x4 -#define NVML_COMPUTE_INSTANCE_PROFILE_8_SLICE 0x5 -#define NVML_COMPUTE_INSTANCE_PROFILE_6_SLICE 0x6 -#define NVML_COMPUTE_INSTANCE_PROFILE_1_SLICE_REV1 0x7 -#define NVML_COMPUTE_INSTANCE_PROFILE_COUNT 0x8 +#define NVML_COMPUTE_INSTANCE_PROFILE_1_SLICE 0x0 //!< 1_SLICE compute instance profile. +#define NVML_COMPUTE_INSTANCE_PROFILE_2_SLICE 0x1 //!< 2_SLICE compute instance profile. +#define NVML_COMPUTE_INSTANCE_PROFILE_3_SLICE 0x2 //!< 3_SLICE compute instance profile. +#define NVML_COMPUTE_INSTANCE_PROFILE_4_SLICE 0x3 //!< 4_SLICE compute instance profile. +#define NVML_COMPUTE_INSTANCE_PROFILE_7_SLICE 0x4 //!< 7_SLICE compute instance profile. +#define NVML_COMPUTE_INSTANCE_PROFILE_8_SLICE 0x5 //!< 8_SLICE compute instance profile. +#define NVML_COMPUTE_INSTANCE_PROFILE_6_SLICE 0x6 //!< 6_SLICE compute instance profile. +#define NVML_COMPUTE_INSTANCE_PROFILE_1_SLICE_REV1 0x7 //!< 1_SLICE compute instance profile (rev1). +#define NVML_COMPUTE_INSTANCE_PROFILE_COUNT 0x8 //!< Number of compute instance profiles. -#define NVML_COMPUTE_INSTANCE_ENGINE_PROFILE_SHARED 0x0 //!< All the engines except multiprocessors would be shared -#define NVML_COMPUTE_INSTANCE_ENGINE_PROFILE_COUNT 0x1 +#define NVML_COMPUTE_INSTANCE_ENGINE_PROFILE_SHARED 0x0 //!< All the engines except multiprocessors would be shared. +#define NVML_COMPUTE_INSTANCE_ENGINE_PROFILE_COUNT 0x1 //!< Number of engine profiles. typedef struct nvmlComputeInstancePlacement_st { @@ -11057,6 +12573,7 @@ nvmlReturn_t DECLDIR nvmlDeviceSetMigMode(nvmlDevice_t device, unsigned int mode * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device, \a currentMode or \a pendingMode are invalid * - \ref NVML_ERROR_NOT_SUPPORTED If \a device doesn't support MIG mode + * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetMigMode(nvmlDevice_t device, unsigned int *currentMode, unsigned int *pendingMode); @@ -11065,6 +12582,9 @@ nvmlReturn_t DECLDIR nvmlDeviceGetMigMode(nvmlDevice_t device, unsigned int *cur * * Information provided by this API is immutable throughout the lifetime of a MIG mode. * + * @note This API can be used to enumerate all MIG profiles supported by NVML in a forward compatible + * way by invoking it on \a profile values starting from 0, until the API returns \ref NVML_ERROR_INVALID_ARGUMENT. + * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * @@ -11113,6 +12633,37 @@ nvmlReturn_t DECLDIR nvmlDeviceGetGpuInstanceProfileInfo(nvmlDevice_t device, un nvmlReturn_t DECLDIR nvmlDeviceGetGpuInstanceProfileInfoV(nvmlDevice_t device, unsigned int profile, nvmlGpuInstanceProfileInfo_v2_t *info); +/** + * GPU instance profile query function that accepts profile ID, instead of profile name. + * It accepts a versioned \ref nvmlGpuInstanceProfileInfo_v2_t or later output structure. + * + * @note The caller must set the \ref nvmlGpuInstanceProfileInfo_v2_t.version field to the + * appropriate version prior to calling this function. For example: + * \code + * nvmlGpuInstanceProfileInfo_v2_t profileInfo = + * { .version = nvmlGpuInstanceProfileInfo_v2 }; + * nvmlReturn_t result = nvmlDeviceGetGpuInstanceProfileInfoV(device, + * profile, + * &profileInfo); + * \endcode + * + * For Ampere &tm; or newer fully supported devices. + * Supported on Linux only. + * + * @param device The identifier of the target device + * @param profileId One of the profile IDs. + * @param info Returns detailed profile information + * + * @return + * - \ref NVML_SUCCESS Upon success + * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device, \a profileId, \a info, or \a info->version are invalid + * - \ref NVML_ERROR_NOT_SUPPORTED If \a device doesn't have MIG mode enabled or \a profile isn't supported + * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation + */ +nvmlReturn_t DECLDIR nvmlDeviceGetGpuInstanceProfileInfoByIdV(nvmlDevice_t device, unsigned int profileId, + nvmlGpuInstanceProfileInfo_v2_t *info); + /** * Get GPU instance placements. * @@ -11304,6 +12855,9 @@ nvmlReturn_t DECLDIR nvmlGpuInstanceGetInfo(nvmlGpuInstance_t gpuInstance, nvmlG * * Information provided by this API is immutable throughout the lifetime of a MIG mode. * + * @note This API can be used to enumerate all MIG profiles supported by NVML in a forward compatible + * way by invoking it on \a profile values starting from 0, until the API returns \ref NVML_ERROR_INVALID_ARGUMENT. + * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * @@ -11684,6 +13238,9 @@ nvmlReturn_t DECLDIR nvmlDeviceGetDeviceHandleFromMigDeviceHandle(nvmlDevice_t m /***************************************************************************************************/ /** @defgroup GPM NVML GPM + * @note For NVIDIA vGPU Software products + * @note (A) GPM is supported only on MIG-backed vGPU profiles that are allocated all of the instance's frame buffer + * @note (B) No GPM support on Windows * @{ */ /***************************************************************************************************/ @@ -11704,6 +13261,7 @@ typedef enum NVML_GPM_METRIC_ANY_TENSOR_UTIL = 5, //!< Percentage of time the GPU's SMs were doing ANY tensor operations. 0.0 - 100.0 NVML_GPM_METRIC_DFMA_TENSOR_UTIL = 6, //!< Percentage of time the GPU's SMs were doing DFMA tensor operations. 0.0 - 100.0 NVML_GPM_METRIC_HMMA_TENSOR_UTIL = 7, //!< Percentage of time the GPU's SMs were doing HMMA tensor operations. 0.0 - 100.0 + NVML_GPM_METRIC_DMMA_TENSOR_UTIL = 8, //!< Percentage of time the GPU's SMs were doing DMMA tensor operations. 0.0 - 100.0 NVML_GPM_METRIC_IMMA_TENSOR_UTIL = 9, //!< Percentage of time the GPU's SMs were doing IMMA tensor operations. 0.0 - 100.0 NVML_GPM_METRIC_DRAM_BW_UTIL = 10, //!< Percentage of DRAM bw used vs theoretical maximum. 0.0 - 100.0 */ NVML_GPM_METRIC_FP64_UTIL = 11, //!< Percentage of time the GPU's SMs were doing non-tensor FP64 math. 0.0 - 100.0 @@ -11767,7 +13325,166 @@ typedef enum NVML_GPM_METRIC_NVLINK_L16_TX_PER_SEC = 95, //!< NvLink write bandwidth for link 16 in MiB/sec NVML_GPM_METRIC_NVLINK_L17_RX_PER_SEC = 96, //!< NvLink read bandwidth for link 17 in MiB/sec NVML_GPM_METRIC_NVLINK_L17_TX_PER_SEC = 97, //!< NvLink write bandwidth for link 17 in MiB/sec - NVML_GPM_METRIC_MAX = 98, //!< Maximum value above +1. Note that changing this should also change NVML_GPM_METRICS_GET_VERSION due to struct size change + NVML_GPM_METRIC_C2C_TOTAL_TX_PER_SEC = 100, + NVML_GPM_METRIC_C2C_TOTAL_RX_PER_SEC = 101, + NVML_GPM_METRIC_C2C_DATA_TX_PER_SEC = 102, + NVML_GPM_METRIC_C2C_DATA_RX_PER_SEC = 103, + NVML_GPM_METRIC_C2C_LINK0_TOTAL_TX_PER_SEC = 104, + NVML_GPM_METRIC_C2C_LINK0_TOTAL_RX_PER_SEC = 105, + NVML_GPM_METRIC_C2C_LINK0_DATA_TX_PER_SEC = 106, + NVML_GPM_METRIC_C2C_LINK0_DATA_RX_PER_SEC = 107, + NVML_GPM_METRIC_C2C_LINK1_TOTAL_TX_PER_SEC = 108, + NVML_GPM_METRIC_C2C_LINK1_TOTAL_RX_PER_SEC = 109, + NVML_GPM_METRIC_C2C_LINK1_DATA_TX_PER_SEC = 110, + NVML_GPM_METRIC_C2C_LINK1_DATA_RX_PER_SEC = 111, + NVML_GPM_METRIC_C2C_LINK2_TOTAL_TX_PER_SEC = 112, + NVML_GPM_METRIC_C2C_LINK2_TOTAL_RX_PER_SEC = 113, + NVML_GPM_METRIC_C2C_LINK2_DATA_TX_PER_SEC = 114, + NVML_GPM_METRIC_C2C_LINK2_DATA_RX_PER_SEC = 115, + NVML_GPM_METRIC_C2C_LINK3_TOTAL_TX_PER_SEC = 116, + NVML_GPM_METRIC_C2C_LINK3_TOTAL_RX_PER_SEC = 117, + NVML_GPM_METRIC_C2C_LINK3_DATA_TX_PER_SEC = 118, + NVML_GPM_METRIC_C2C_LINK3_DATA_RX_PER_SEC = 119, + NVML_GPM_METRIC_C2C_LINK4_TOTAL_TX_PER_SEC = 120, + NVML_GPM_METRIC_C2C_LINK4_TOTAL_RX_PER_SEC = 121, + NVML_GPM_METRIC_C2C_LINK4_DATA_TX_PER_SEC = 122, + NVML_GPM_METRIC_C2C_LINK4_DATA_RX_PER_SEC = 123, + NVML_GPM_METRIC_C2C_LINK5_TOTAL_TX_PER_SEC = 124, + NVML_GPM_METRIC_C2C_LINK5_TOTAL_RX_PER_SEC = 125, + NVML_GPM_METRIC_C2C_LINK5_DATA_TX_PER_SEC = 126, + NVML_GPM_METRIC_C2C_LINK5_DATA_RX_PER_SEC = 127, + NVML_GPM_METRIC_C2C_LINK6_TOTAL_TX_PER_SEC = 128, + NVML_GPM_METRIC_C2C_LINK6_TOTAL_RX_PER_SEC = 129, + NVML_GPM_METRIC_C2C_LINK6_DATA_TX_PER_SEC = 130, + NVML_GPM_METRIC_C2C_LINK6_DATA_RX_PER_SEC = 131, + NVML_GPM_METRIC_C2C_LINK7_TOTAL_TX_PER_SEC = 132, + NVML_GPM_METRIC_C2C_LINK7_TOTAL_RX_PER_SEC = 133, + NVML_GPM_METRIC_C2C_LINK7_DATA_TX_PER_SEC = 134, + NVML_GPM_METRIC_C2C_LINK7_DATA_RX_PER_SEC = 135, + NVML_GPM_METRIC_C2C_LINK8_TOTAL_TX_PER_SEC = 136, + NVML_GPM_METRIC_C2C_LINK8_TOTAL_RX_PER_SEC = 137, + NVML_GPM_METRIC_C2C_LINK8_DATA_TX_PER_SEC = 138, + NVML_GPM_METRIC_C2C_LINK8_DATA_RX_PER_SEC = 139, + NVML_GPM_METRIC_C2C_LINK9_TOTAL_TX_PER_SEC = 140, + NVML_GPM_METRIC_C2C_LINK9_TOTAL_RX_PER_SEC = 141, + NVML_GPM_METRIC_C2C_LINK9_DATA_TX_PER_SEC = 142, + NVML_GPM_METRIC_C2C_LINK9_DATA_RX_PER_SEC = 143, + NVML_GPM_METRIC_C2C_LINK10_TOTAL_TX_PER_SEC = 144, + NVML_GPM_METRIC_C2C_LINK10_TOTAL_RX_PER_SEC = 145, + NVML_GPM_METRIC_C2C_LINK10_DATA_TX_PER_SEC = 146, + NVML_GPM_METRIC_C2C_LINK10_DATA_RX_PER_SEC = 147, + NVML_GPM_METRIC_C2C_LINK11_TOTAL_TX_PER_SEC = 148, + NVML_GPM_METRIC_C2C_LINK11_TOTAL_RX_PER_SEC = 149, + NVML_GPM_METRIC_C2C_LINK11_DATA_TX_PER_SEC = 150, + NVML_GPM_METRIC_C2C_LINK11_DATA_RX_PER_SEC = 151, + NVML_GPM_METRIC_C2C_LINK12_TOTAL_TX_PER_SEC = 152, + NVML_GPM_METRIC_C2C_LINK12_TOTAL_RX_PER_SEC = 153, + NVML_GPM_METRIC_C2C_LINK12_DATA_TX_PER_SEC = 154, + NVML_GPM_METRIC_C2C_LINK12_DATA_RX_PER_SEC = 155, + NVML_GPM_METRIC_C2C_LINK13_TOTAL_TX_PER_SEC = 156, + NVML_GPM_METRIC_C2C_LINK13_TOTAL_RX_PER_SEC = 157, + NVML_GPM_METRIC_C2C_LINK13_DATA_TX_PER_SEC = 158, + NVML_GPM_METRIC_C2C_LINK13_DATA_RX_PER_SEC = 159, + NVML_GPM_METRIC_HOSTMEM_CACHE_HIT = 160, + NVML_GPM_METRIC_HOSTMEM_CACHE_MISS = 161, + NVML_GPM_METRIC_PEERMEM_CACHE_HIT = 162, + NVML_GPM_METRIC_PEERMEM_CACHE_MISS = 163, + NVML_GPM_METRIC_DRAM_CACHE_HIT = 164, + NVML_GPM_METRIC_DRAM_CACHE_MISS = 165, + NVML_GPM_METRIC_NVENC_0_UTIL = 166, + NVML_GPM_METRIC_NVENC_1_UTIL = 167, + NVML_GPM_METRIC_NVENC_2_UTIL = 168, + NVML_GPM_METRIC_NVENC_3_UTIL = 169, + NVML_GPM_METRIC_GR0_CTXSW_CYCLES_ELAPSED = 170, + NVML_GPM_METRIC_GR0_CTXSW_CYCLES_ACTIVE = 171, + NVML_GPM_METRIC_GR0_CTXSW_REQUESTS = 172, + NVML_GPM_METRIC_GR0_CTXSW_CYCLES_PER_REQ = 173, + NVML_GPM_METRIC_GR0_CTXSW_ACTIVE_PCT = 174, + NVML_GPM_METRIC_GR1_CTXSW_CYCLES_ELAPSED = 175, + NVML_GPM_METRIC_GR1_CTXSW_CYCLES_ACTIVE = 176, + NVML_GPM_METRIC_GR1_CTXSW_REQUESTS = 177, + NVML_GPM_METRIC_GR1_CTXSW_CYCLES_PER_REQ = 178, + NVML_GPM_METRIC_GR1_CTXSW_ACTIVE_PCT = 179, + NVML_GPM_METRIC_GR2_CTXSW_CYCLES_ELAPSED = 180, + NVML_GPM_METRIC_GR2_CTXSW_CYCLES_ACTIVE = 181, + NVML_GPM_METRIC_GR2_CTXSW_REQUESTS = 182, + NVML_GPM_METRIC_GR2_CTXSW_CYCLES_PER_REQ = 183, + NVML_GPM_METRIC_GR2_CTXSW_ACTIVE_PCT = 184, + NVML_GPM_METRIC_GR3_CTXSW_CYCLES_ELAPSED = 185, + NVML_GPM_METRIC_GR3_CTXSW_CYCLES_ACTIVE = 186, + NVML_GPM_METRIC_GR3_CTXSW_REQUESTS = 187, + NVML_GPM_METRIC_GR3_CTXSW_CYCLES_PER_REQ = 188, + NVML_GPM_METRIC_GR3_CTXSW_ACTIVE_PCT = 189, + NVML_GPM_METRIC_GR4_CTXSW_CYCLES_ELAPSED = 190, + NVML_GPM_METRIC_GR4_CTXSW_CYCLES_ACTIVE = 191, + NVML_GPM_METRIC_GR4_CTXSW_REQUESTS = 192, + NVML_GPM_METRIC_GR4_CTXSW_CYCLES_PER_REQ = 193, + NVML_GPM_METRIC_GR4_CTXSW_ACTIVE_PCT = 194, + NVML_GPM_METRIC_GR5_CTXSW_CYCLES_ELAPSED = 195, + NVML_GPM_METRIC_GR5_CTXSW_CYCLES_ACTIVE = 196, + NVML_GPM_METRIC_GR5_CTXSW_REQUESTS = 197, + NVML_GPM_METRIC_GR5_CTXSW_CYCLES_PER_REQ = 198, + NVML_GPM_METRIC_GR5_CTXSW_ACTIVE_PCT = 199, + NVML_GPM_METRIC_GR6_CTXSW_CYCLES_ELAPSED = 200, + NVML_GPM_METRIC_GR6_CTXSW_CYCLES_ACTIVE = 201, + NVML_GPM_METRIC_GR6_CTXSW_REQUESTS = 202, + NVML_GPM_METRIC_GR6_CTXSW_CYCLES_PER_REQ = 203, + NVML_GPM_METRIC_GR6_CTXSW_ACTIVE_PCT = 204, + NVML_GPM_METRIC_GR7_CTXSW_CYCLES_ELAPSED = 205, + NVML_GPM_METRIC_GR7_CTXSW_CYCLES_ACTIVE = 206, + NVML_GPM_METRIC_GR7_CTXSW_REQUESTS = 207, + NVML_GPM_METRIC_GR7_CTXSW_CYCLES_PER_REQ = 208, + NVML_GPM_METRIC_GR7_CTXSW_ACTIVE_PCT = 209, + NVML_GPM_METRIC_SM_CYCLES_ELAPSED = 248, //!< The GPU's SM cycles elapsed since reboot + NVML_GPM_METRIC_SM_CYCLES_ACTIVE = 249, //!< The GPU's SM activity since reboot + NVML_GPM_METRIC_MMA_CYCLES_ACTIVE = 250, //!< The GPU's SM MMA tensor activity since reboot + NVML_GPM_METRIC_DMMA_CYCLES_ACTIVE = 251, //!< The GPU's SM DMMA tensor activity since reboot + NVML_GPM_METRIC_HMMA_CYCLES_ACTIVE = 252, //!< The GPU's SM HMMA tensor activity since reboot + NVML_GPM_METRIC_IMMA_CYCLES_ACTIVE = 253, //!< The GPU's SM IMMA tensor activity since reboot + NVML_GPM_METRIC_DFMA_CYCLES_ACTIVE = 254, //!< The GPU's SM DFMA tensor activity since reboot + NVML_GPM_METRIC_PCIE_TX = 255, //!< The PCIe TX traffic since reboot + NVML_GPM_METRIC_PCIE_RX = 256, //!< The PCIe RX traffic since reboot + NVML_GPM_METRIC_INTEGER_CYCLES_ACTIVE = 257, //!< The GPU's SM integer activity since reboot + NVML_GPM_METRIC_FP64_CYCLES_ACTIVE = 258, //!< The GPU's SM FP64 activity since reboot + NVML_GPM_METRIC_FP32_CYCLES_ACTIVE = 259, //!< The GPU's SM FP64 activity since reboot + NVML_GPM_METRIC_FP16_CYCLES_ACTIVE = 260, //!< The GPU's SM FP64 activity since reboot + NVML_GPM_METRIC_NVLINK_L0_RX = 261, //!< NvLink read for link 0 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L0_TX = 262, //!< NvLink write for link 0 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L1_RX = 263, //!< NvLink read for link 1 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L1_TX = 264, //!< NvLink write for link 1 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L2_RX = 265, //!< NvLink read for link 2 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L2_TX = 266, //!< NvLink write for link 2 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L3_RX = 267, //!< NvLink read for link 3 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L3_TX = 268, //!< NvLink write for link 3 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L4_RX = 269, //!< NvLink read for link 4 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L4_TX = 270, //!< NvLink write for link 4 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L5_RX = 271, //!< NvLink read for link 5 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L5_TX = 272, //!< NvLink write for link 5 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L6_RX = 273, //!< NvLink read for link 6 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L6_TX = 274, //!< NvLink write for link 6 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L7_RX = 275, //!< NvLink read for link 7 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L7_TX = 276, //!< NvLink write for link 7 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L8_RX = 277, //!< NvLink read for link 8 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L8_TX = 278, //!< NvLink write for link 8 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L9_RX = 279, //!< NvLink read for link 9 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L9_TX = 280, //!< NvLink write for link 9 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L10_RX = 281, //!< NvLink read for link 10 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L10_TX = 282, //!< NvLink write for link 10 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L11_RX = 283, //!< NvLink read for link 11 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L11_TX = 284, //!< NvLink write for link 11 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L12_RX = 285, //!< NvLink read for link 12 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L12_TX = 286, //!< NvLink write for link 12 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L13_RX = 287, //!< NvLink read for link 13 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L13_TX = 288, //!< NvLink write for link 13 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L14_RX = 289, //!< NvLink read for link 14 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L14_TX = 290, //!< NvLink write for link 14 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L15_RX = 291, //!< NvLink read for link 15 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L15_TX = 292, //!< NvLink write for link 15 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L16_RX = 293, //!< NvLink read for link 16 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L16_TX = 294, //!< NvLink write for link 16 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L17_RX = 295, //!< NvLink read for link 17 in bytes since reboot + NVML_GPM_METRIC_NVLINK_L17_TX = 296, //!< NvLink write for link 17 in bytes since reboot + NVML_GPM_METRIC_MAX = 333, //!< Maximum value above +1 } nvmlGpmMetricId_t; /** @} */ // @defgroup nvmlGpmEnums @@ -11838,6 +13555,17 @@ typedef struct * * For Hopper &tm; or newer fully supported devices. * + * To retrieve metrics, the user must first allocate the two sample buffers at \a metricsGet->sample1 + * and \a metricsGet->sample2 by calling \a nvmlGpmSampleAlloc(). Next, the user should fill in the ID of each metric + * in \a metricsGet->metrics[i].metricId and specify the total number of metrics to retrieve in \a metricsGet->numMetrics, + * The version should be set to NVML_GPM_METRICS_GET_VERSION in \a metricsGet->version. The user then calls the + * \a nvmlGpmSampleGet() API twice to obtain 2 samples of counters. + * + * @note The interval between these two \a nvmlGpmSampleGet() calls should be greater than 100ms due to the + * internal sample refresh rate. Finally, the user calls \a nvmlGpmMetricsGet to retrieve the metrics, which will + * be stored at \a metricsGet->metrics + * + * * @param metricsGet IN/OUT: populated \a nvmlGpmMetricsGet_t struct * * @return @@ -11883,7 +13611,11 @@ nvmlReturn_t DECLDIR nvmlGpmSampleAlloc(nvmlGpmSample_t *gpmSample); * * For Hopper &tm; or newer fully supported devices. * - * @param device Device to get samples for + * @note The interval between two \a nvmlGpmSampleGet() calls should be greater than 100ms due to + * the internal sample refresh rate. + * @note This API supports the device handle and MIG device handle. + * + * @param device The device handle or MIG device handle to get samples for * @param gpmSample Buffer to read samples into * * @return @@ -11900,6 +13632,9 @@ nvmlReturn_t DECLDIR nvmlGpmSampleGet(nvmlDevice_t device, nvmlGpmSample_t gpmSa * * For Hopper &tm; or newer fully supported devices. * + * @note The interval between two \a nvmlGpmMigSampleGet() calls should be greater than 100ms due to + * the internal sample refresh rate. + * * @param device Device to get samples for * @param gpuInstanceId MIG GPU Instance ID * @param gpmSample Buffer to read samples into @@ -11913,7 +13648,11 @@ nvmlReturn_t DECLDIR nvmlGpmMigSampleGet(nvmlDevice_t device, unsigned int gpuIn /** * Indicate whether the supplied device supports GPM * - * @param device NVML device to query for + * For Hopper &tm; or newer fully supported devices. + * + * @note This API supports the device handle and MIG device handle. + * + * @param device The device handle or MIG device handle to query for * @param gpmSupport Structure to indicate GPM support \a nvmlGpmSupport_t. Indicates * GPM support per system for the supplied device * @@ -11963,7 +13702,7 @@ nvmlReturn_t DECLDIR nvmlGpmSetStreamingEnabled(nvmlDevice_t device, unsigned in /** @} */ // @defgroup nvmlGpmFunctions /** @} */ // @defgroup GPM -#define NVML_DEV_CAP_EGM (1 << 0) // Extended GPU memory +#define NVML_DEV_CAP_EGM (1 << 0) //!< Extended GPU memory /** * Device capabilities */ @@ -11995,22 +13734,23 @@ typedef nvmlDeviceCapabilities_v1_t nvmlDeviceCapabilities_t; nvmlReturn_t DECLDIR nvmlDeviceGetCapabilities(nvmlDevice_t device, nvmlDeviceCapabilities_t *caps); + /* * Generic bitmask to hold 255 bits, represented by 8 elements of 32 bits */ -#define NVML_255_MASK_BITS_PER_ELEM 32 -#define NVML_255_MASK_NUM_ELEMS 8 +#define NVML_255_MASK_BITS_PER_ELEM 32 //!< Number of bits per element. +#define NVML_255_MASK_NUM_ELEMS 8 //!< Number of elements. #define NVML_255_MASK_BIT_SET(index, nvmlMask) \ - nvmlMask.mask[index / NVML_255_MASK_BITS_PER_ELEM] |= (1 << (index % NVML_255_MASK_BITS_PER_ELEM)) + nvmlMask.mask[index / NVML_255_MASK_BITS_PER_ELEM] |= (1 << (index % NVML_255_MASK_BITS_PER_ELEM)) //!< Set bit at index. #define NVML_255_MASK_BIT_GET(index, nvmlMask) \ - nvmlMask.mask[index / NVML_255_MASK_BITS_PER_ELEM] & (1 << (index % NVML_255_MASK_BITS_PER_ELEM)) + nvmlMask.mask[index / NVML_255_MASK_BITS_PER_ELEM] & (1 << (index % NVML_255_MASK_BITS_PER_ELEM)) //!< Get bit at index. #define NVML_255_MASK_BIT_SET_PTR(index, nvmlMask) \ - nvmlMask->mask[index / NVML_255_MASK_BITS_PER_ELEM] |= (1 << (index % NVML_255_MASK_BITS_PER_ELEM)) + nvmlMask->mask[index / NVML_255_MASK_BITS_PER_ELEM] |= (1 << (index % NVML_255_MASK_BITS_PER_ELEM)) //!< Set bit at index. #define NVML_255_MASK_BIT_GET_PTR(index, nvmlMask) \ - nvmlMask->mask[index / NVML_255_MASK_BITS_PER_ELEM] & (1 << (index % NVML_255_MASK_BITS_PER_ELEM)) + nvmlMask->mask[index / NVML_255_MASK_BITS_PER_ELEM] & (1 << (index % NVML_255_MASK_BITS_PER_ELEM)) //!< Get bit at index. typedef struct { @@ -12044,18 +13784,30 @@ typedef enum NVML_POWER_PROFILE_MAX = 15, } nvmlPowerProfileType_t; +/** + * Enum for operation to perform on the requested profiles + */ +typedef enum +{ + NVML_POWER_PROFILE_OPERATION_CLEAR = 0, //!< Remove the requested profiles from the existing list of requested profiles + NVML_POWER_PROFILE_OPERATION_SET = 1, //!< Add the requested profiles to the existing list of requested profiles + NVML_POWER_PROFILE_OPERATION_SET_AND_OVERWRITE = 2, //!< Overwrite the existing list of requested profiles with just the requested profiles + + NVML_POWER_PROFILE_OPERATION_MAX = 3, //!< Max value above +1 +} nvmlPowerProfileOperation_t; + /** * Profile Metadata */ typedef struct { unsigned int version; //!< the API version number - unsigned int profileId; // NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL) ? \ + (field_val - NVML_FI_PWR_SMOOTHING_ENABLED - \ + (NVML_FI_PWR_SMOOTHING_PRIMARY_POWER_FLOOR - NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL - 1)) : \ + (field_val - NVML_FI_PWR_SMOOTHING_ENABLED) \ + ) //!< Index from field value. + +#define NVML_POWER_SMOOTHING_MAX_NUM_PROFILES 5 //!< Maximum number of profiles. +#define NVML_POWER_SMOOTHING_NUM_PROFILE_PARAMS 8 //!< Number of profile parameters. +#define NVML_POWER_SMOOTHING_ADMIN_OVERRIDE_NOT_SET 0xFFFFFFFFU //!< Admin override not set. +#define NVML_POWER_SMOOTHING_PROFILE_PARAM_PERCENT_TMP_FLOOR 0 //!< Percent temperature floor. +#define NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_UP_RATE 1 //!< Ramp up rate. +#define NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_RATE 2 //!< Ramp down rate. +#define NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_HYSTERESIS 3 //!< Ramp down hysteresis. +#define NVML_POWER_SMOOTHING_PROFILE_PARAM_SECONDARY_POWER_FLOOR 4 //!< Secondary power floor value in Watts for a given profile +#define NVML_POWER_SMOOTHING_PROFILE_PARAM_PRIMARY_FLOOR_ACT_WIN_MULT 5 //!< Primary floor activation window multiplier value for a given profile +#define NVML_POWER_SMOOTHING_PROFILE_PARAM_PRIMARY_FLOOR_TAR_WIN_MULT 6 //!< Primary floor target window multiplier value for a given profile +#define NVML_POWER_SMOOTHING_PROFILE_PARAM_PRIMARY_FLOOR_ACT_OFFSET 7 //!< Primary floor activation offset value in Watts for a given profile -#define NVML_POWER_SMOOTHING_MAX_NUM_PROFILES 5 -#define NVML_POWER_SMOOTHING_NUM_PROFILE_PARAMS 4 -#define NVML_POWER_SMOOTHING_ADMIN_OVERRIDE_NOT_SET 0xFFFFFFFFU -#define NVML_POWER_SMOOTHING_PROFILE_PARAM_PERCENT_TMP_FLOOR 0 -#define NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_UP_RATE 1 -#define NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_RATE 2 -#define NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_HYSTERESIS 3 - -/* +/** * Power Smoothing Structure for Profile information */ typedef struct @@ -12221,10 +14025,10 @@ typedef struct unsigned int paramId; //!< The requested paramater ID double value; //!< The requested value for the given parameter } nvmlPowerSmoothingProfile_v1_t; -typedef nvmlPowerSmoothingProfile_v1_t nvmlPowerSmoothingProfile_t; //!< Current version for the power smoothing profile structure -#define nvmlPowerSmoothingProfile_v1 NVML_STRUCT_VERSION(PowerSmoothingProfile, 1) +typedef nvmlPowerSmoothingProfile_v1_t nvmlPowerSmoothingProfile_t; +#define nvmlPowerSmoothingProfile_v1 NVML_STRUCT_VERSION(PowerSmoothingProfile, 1) //!< Version macro for \a nvmlPowerSmoothingProfile_v1_t -/* +/** * Power Smoothing Structure for Feature Enablement */ typedef struct @@ -12232,18 +14036,19 @@ typedef struct unsigned int version; //!< the API version number nvmlEnableState_t state; //!< 0/Disabled or 1/Enabled } nvmlPowerSmoothingState_v1_t; -typedef nvmlPowerSmoothingState_v1_t nvmlPowerSmoothingState_t; //!< Current version for the power smoothing state structure -#define nvmlPowerSmoothingState_v1 NVML_STRUCT_VERSION(PowerSmoothingState, 1) +typedef nvmlPowerSmoothingState_v1_t nvmlPowerSmoothingState_t; +#define nvmlPowerSmoothingState_v1 NVML_STRUCT_VERSION(PowerSmoothingState, 1) //!< Version macro for \a nvmlPowerSmoothingState_v1_t /** - * Activiate a specific preset profile for datacenter power smoothing + * Activiate a specific preset profile for datacenter power smoothing. * The API only sets the active preset profile based on the input profileId, * and ignores the other parameters of the structure. + * Requires root/admin permissions. * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * * @param device The identifier of the target device - * @param profile Reference to \ref nvmlPowerSmoothingProfile_t. + * @param profile Reference to \ref nvmlPowerSmoothingProfile_v1_t. * Note that only \a profile->profileId is used and * the rest of the structure is ignored. * @@ -12258,9 +14063,10 @@ nvmlReturn_t DECLDIR nvmlDevicePowerSmoothingActivatePresetProfile(nvmlDevice_t nvmlPowerSmoothingProfile_t *profile); /** - * Update the value of a specific profile parameter contained within \ref nvmlPowerSmoothingProfile_t + * Update the value of a specific profile parameter contained within \ref nvmlPowerSmoothingProfile_v1_t. + * Requires root/admin permissions. * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * * NVML_POWER_SMOOTHING_PROFILE_PARAM_PERCENT_TMP_FLOOR expects a value as a percentage from 00.00-100.00% * NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_UP_RATE expects a value in W/s @@ -12268,7 +14074,7 @@ nvmlReturn_t DECLDIR nvmlDevicePowerSmoothingActivatePresetProfile(nvmlDevice_t * NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_HYSTERESIS expects a value in ms * * @param device The identifier of the target device - * @param profile Reference to \ref nvmlPowerSmoothingProfile_t struct + * @param profile Reference to \ref nvmlPowerSmoothingProfile_v1_t struct * * @return * - \ref NVML_SUCCESS if the Active Profile was successfully set @@ -12280,14 +14086,15 @@ nvmlReturn_t DECLDIR nvmlDevicePowerSmoothingActivatePresetProfile(nvmlDevice_t nvmlReturn_t DECLDIR nvmlDevicePowerSmoothingUpdatePresetProfileParam(nvmlDevice_t device, nvmlPowerSmoothingProfile_t *profile); /** - * Enable or disable the Power Smoothing Feature + * Enable or disable the Power Smoothing Feature. + * Requires root/admin permissions. * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * * See \ref nvmlEnableState_t for details on allowed states * * @param device The identifier of the target device - * @param state Reference to \ref nvmlPowerSmoothingState_t + * @param state Reference to \ref nvmlPowerSmoothingState_v1_t * * @return * - \ref NVML_SUCCESS if the feature state was successfully set @@ -12300,6 +14107,62 @@ nvmlReturn_t DECLDIR nvmlDevicePowerSmoothingSetState(nvmlDevice_t device, nvmlPowerSmoothingState_t *state); /** @} */ // @defgroup +/** + * Retrieves the counts of SRAM unique uncorrected ECC errors + * + * For Blackwell &tm; or newer fully supported devices. + * + * Reads SRAM unique uncorrected ECC error counts. The total number of unique errors is returned by + * \a errorCounts->entryCount. Error counts are returned as an array of in the caller-supplied buffer pointed at by + * \a errorCounts->entries. Each error count entry holds the location/address of the unique error, the error count and + * whether the error is parity or not. + * + * To read SRAM unique uncorrected ECC error counts, first determine the size of buffer required to hold the error + * counts by invoking the function with \a errorCounts->entries set to NULL. The required array size is returned in + * \a errorCounts->entryCount. The caller should allocate a buffer of size "errorCounts->entryCount * + * sizeof(nvmlEccSramUniqueUncorrectedErrorCounts_t)". Invoke the function again with the allocated buffer passed in + * \a errorCounts->entries. This time \a errorCounts->entryCount will be taken as the entry array size that caller + * allocates for \a errorCounts->entries. + * + * On successful return of the second query, the function updates \a errorCounts->entries with all unique errors. This + * may fail if \a errorCounts->entryCount is smaller than the actual number of unique errors. This can happen in cases + * like new errors occur since the previous query of \a errorCounts->entryCount. No matter the query succeeds or not, + * the latest number of unique errors will be returned in \a errorCounts->entryCount. + * + * @note The query is only supported when ECC mode is enabled. + * + * @param device The identifier of the target device + * @param errorCounts Pointer to caller-supplied array which returns the unique error count entries + * + * @return + * - \ref NVML_SUCCESS if \a utilization has been populated + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a errorCounts->entryCount is NULL + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature or ECC mods is not enabled + * - \ref NVML_ERROR_INSUFFICIENT_SIZE if the allocated error entry array is not big enough + * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +nvmlReturn_t DECLDIR nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts(nvmlDevice_t device, + nvmlEccSramUniqueUncorrectedErrorCounts_t *errorCounts); + +/** + * Set Read-only user shared data (RUSD) settings for GPU. + * Requires root/admin permissions. + * + * @param device The identifier of the target device + * @param settings Reference to \ref nvmlRusdSettings_v1_t struct + * + * @return + * - \ref NVML_SUCCESS if the RUSD setting was successfully set + * - \ref NVML_ERROR_INVALID_ARGUMENT if device is invalid or state is NULL + * - \ref NVML_ERROR_NO_PERMISSION if user does not have permission to change feature state + * - \ref NVML_ERROR_NOT_SUPPORTED if this feature is not supported by NVIDIA kernel driver + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH if the input version is not supported + * + **/ +nvmlReturn_t DECLDIR nvmlDeviceSetRusdSettings_v1(nvmlDevice_t device, nvmlRusdSettings_v1_t *settings); + /** * NVML API versioning support */ diff --git a/nvml-wrapper-sys/src/bindings.rs b/nvml-wrapper-sys/src/bindings.rs index 7ce7ba8..20dae09 100644 --- a/nvml-wrapper-sys/src/bindings.rs +++ b/nvml-wrapper-sys/src/bindings.rs @@ -1,4 +1,4 @@ -/* automatically generated by rust-bindgen 0.71.1 */ +/* automatically generated by rust-bindgen 0.72.1 */ #![allow(non_upper_case_globals)] #![allow(non_camel_case_types)] @@ -6,8 +6,8 @@ #![allow(dead_code)] use std::os::raw; -pub const NVML_API_VERSION: u32 = 12; -pub const NVML_API_VERSION_STR: &[u8; 3] = b"12\0"; +pub const NVML_API_VERSION: u32 = 13; +pub const NVML_API_VERSION_STR: &[u8; 3] = b"13\0"; pub const NVML_VALUE_NOT_AVAILABLE: i32 = -1; pub const NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE: u32 = 32; pub const NVML_DEVICE_PCI_BUS_ID_BUFFER_V2_SIZE: u32 = 16; @@ -16,11 +16,28 @@ pub const NVML_DEVICE_PCI_BUS_ID_FMT: &[u8; 17] = b"%08X:%02X:%02X.0\0"; pub const NVML_NVLINK_MAX_LINKS: u32 = 18; pub const NVML_MAX_PHYSICAL_BRIDGE: u32 = 128; pub const NVML_MAX_THERMAL_SENSORS_PER_GPU: u32 = 3; +pub const NVML_DEVICE_UUID_ASCII_LEN: u32 = 41; +pub const NVML_DEVICE_UUID_BINARY_LEN: u32 = 16; pub const nvmlFlagDefault: u32 = 0; pub const nvmlFlagForce: u32 = 1; pub const MAX_CLK_DOMAINS: u32 = 32; pub const NVML_MAX_GPU_PERF_PSTATES: u32 = 16; pub const NVML_PERF_MODES_BUFFER_SIZE: u32 = 2048; +pub const NVML_POWER_MIZER_MODE_ADAPTIVE: u32 = 0; +pub const NVML_POWER_MIZER_MODE_PREFER_MAXIMUM_PERFORMANCE: u32 = 1; +pub const NVML_POWER_MIZER_MODE_AUTO: u32 = 2; +pub const NVML_POWER_MIZER_MODE_PREFER_CONSISTENT_PERFORMANCE: u32 = 3; +pub const NVML_DEVICE_HOSTNAME_BUFFER_SIZE: u32 = 64; +pub const NVML_RUSD_POLL_NONE: u32 = 0; +pub const NVML_RUSD_POLL_CLOCK: u32 = 1; +pub const NVML_RUSD_POLL_PERF: u32 = 2; +pub const NVML_RUSD_POLL_MEMORY: u32 = 4; +pub const NVML_RUSD_POLL_POWER: u32 = 8; +pub const NVML_RUSD_POLL_THERMAL: u32 = 16; +pub const NVML_RUSD_POLL_PCI: u32 = 32; +pub const NVML_RUSD_POLL_FAN: u32 = 64; +pub const NVML_RUSD_POLL_PROC_UTIL: u32 = 128; +pub const NVML_RUSD_POLL_ALL: i32 = -1; pub const NVML_GSP_FIRMWARE_VERSION_BUF_SIZE: u32 = 64; pub const NVML_DEVICE_ARCH_KEPLER: u32 = 2; pub const NVML_DEVICE_ARCH_MAXWELL: u32 = 3; @@ -31,7 +48,6 @@ pub const NVML_DEVICE_ARCH_AMPERE: u32 = 7; pub const NVML_DEVICE_ARCH_ADA: u32 = 8; pub const NVML_DEVICE_ARCH_HOPPER: u32 = 9; pub const NVML_DEVICE_ARCH_BLACKWELL: u32 = 10; -pub const NVML_DEVICE_ARCH_T23X: u32 = 11; pub const NVML_DEVICE_ARCH_UNKNOWN: u32 = 4294967295; pub const NVML_BUS_TYPE_UNKNOWN: u32 = 0; pub const NVML_BUS_TYPE_PCI: u32 = 1; @@ -90,6 +106,9 @@ pub const NVML_SCHEDULER_SW_MAX_LOG_ENTRIES: u32 = 200; pub const NVML_VGPU_SCHEDULER_ARR_DEFAULT: u32 = 0; pub const NVML_VGPU_SCHEDULER_ARR_DISABLE: u32 = 1; pub const NVML_VGPU_SCHEDULER_ARR_ENABLE: u32 = 2; +pub const NVML_VGPU_SCHEDULER_ENGINE_TYPE_GRAPHICS: u32 = 1; +pub const NVML_VGPU_SCHEDULER_ENGINE_TYPE_NVENC1: u32 = 2; +pub const NVML_VGPU_SCHEDULER_ENGINE_TYPE_NVENC0: u32 = 3; pub const NVML_GRID_LICENSE_STATE_UNKNOWN: u32 = 0; pub const NVML_GRID_LICENSE_STATE_UNINITIALIZED: u32 = 1; pub const NVML_GRID_LICENSE_STATE_UNLICENSED_UNRESTRICTED: u32 = 2; @@ -327,6 +346,10 @@ pub mod field_id { pub const NVML_FI_DEV_PCIE_OUTBOUND_ATOMICS_MASK: u32 = 228; pub const NVML_FI_DEV_PCIE_INBOUND_ATOMICS_MASK: u32 = 229; pub const NVML_FI_DEV_GET_GPU_RECOVERY_ACTION: u32 = 230; + pub const NVML_FI_DEV_C2C_LINK_ERROR_INTR: u32 = 231; + pub const NVML_FI_DEV_C2C_LINK_ERROR_REPLAY: u32 = 232; + pub const NVML_FI_DEV_C2C_LINK_ERROR_REPLAY_B2B: u32 = 233; + pub const NVML_FI_DEV_C2C_LINK_POWER_STATE: u32 = 234; pub const NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_0: u32 = 235; pub const NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_1: u32 = 236; pub const NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_2: u32 = 237; @@ -361,7 +384,36 @@ pub mod field_id { pub const NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_UP_RATE: u32 = 266; pub const NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_RATE: u32 = 267; pub const NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL: u32 = 268; - pub const NVML_FI_MAX: u32 = 269; + pub const NVML_FI_DEV_CLOCKS_EVENT_REASON_SW_POWER_CAP: u32 = 74; + pub const NVML_FI_DEV_CLOCKS_EVENT_REASON_SYNC_BOOST: u32 = 76; + pub const NVML_FI_DEV_CLOCKS_EVENT_REASON_SW_THERM_SLOWDOWN: u32 = 269; + pub const NVML_FI_DEV_CLOCKS_EVENT_REASON_HW_THERM_SLOWDOWN: u32 = 270; + pub const NVML_FI_DEV_CLOCKS_EVENT_REASON_HW_POWER_BRAKE_SLOWDOWN: u32 = 271; + pub const NVML_FI_DEV_POWER_SYNC_BALANCING_FREQ: u32 = 272; + pub const NVML_FI_DEV_POWER_SYNC_BALANCING_AF: u32 = 273; + pub const NVML_FI_DEV_EDPP_MULTIPLIER: u32 = 274; + pub const NVML_FI_PWR_SMOOTHING_PRIMARY_POWER_FLOOR: u32 = 275; + pub const NVML_FI_PWR_SMOOTHING_SECONDARY_POWER_FLOOR: u32 = 276; + pub const NVML_FI_PWR_SMOOTHING_MIN_PRIMARY_FLOOR_ACT_OFFSET: u32 = 277; + pub const NVML_FI_PWR_SMOOTHING_MIN_PRIMARY_FLOOR_ACT_POINT: u32 = 278; + pub const NVML_FI_PWR_SMOOTHING_WINDOW_MULTIPLIER: u32 = 279; + pub const NVML_FI_PWR_SMOOTHING_DELAYED_PWR_SMOOTHING_SUPPORTED: u32 = 280; + pub const NVML_FI_PWR_SMOOTHING_PROFILE_SECONDARY_POWER_FLOOR: u32 = 281; + pub const NVML_FI_PWR_SMOOTHING_PROFILE_PRIMARY_FLOOR_ACT_WIN_MULT: u32 = 282; + pub const NVML_FI_PWR_SMOOTHING_PROFILE_PRIMARY_FLOOR_TAR_WIN_MULT: u32 = 283; + pub const NVML_FI_PWR_SMOOTHING_PROFILE_PRIMARY_FLOOR_ACT_OFFSET: u32 = 284; + pub const NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_SECONDARY_POWER_FLOOR: u32 = 285; + pub const NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PRIMARY_FLOOR_ACT_WIN_MULT: u32 = 286; + pub const NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PRIMARY_FLOOR_TAR_WIN_MULT: u32 = 287; + pub const NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PRIMARY_FLOOR_ACT_OFFSET: u32 = 288; + pub const NVML_FI_DEV_NVLINK_COUNT_RAW_ERRORS_LANE0: u32 = 289; + pub const NVML_FI_DEV_NVLINK_COUNT_RAW_ERRORS_LANE1: u32 = 290; + pub const NVML_FI_DEV_NVLINK_COUNT_RAW_BER_LANE0_V2: u32 = 291; + pub const NVML_FI_DEV_NVLINK_COUNT_RAW_BER_LANE1_V2: u32 = 292; + pub const NVML_FI_DEV_NVLINK_COUNT_RAW_BER_V2: u32 = 293; + pub const NVML_FI_DEV_NVLINK_PLR_XMIT_BLOCKS: u32 = 294; + pub const NVML_FI_DEV_NVLINK_PLR_XMIT_RETRY_BLOCKS: u32 = 295; + pub const NVML_FI_MAX: u32 = 296; } pub const NVML_NVLINK_LOW_POWER_THRESHOLD_UNIT_100US: u32 = 0; pub const NVML_NVLINK_LOW_POWER_THRESHOLD_UNIT_50US: u32 = 1; @@ -371,6 +423,8 @@ pub const NVML_NVLINK_LOW_POWER_THRESHOLD_MIN: u32 = 1; pub const NVML_NVLINK_LOW_POWER_THRESHOLD_MAX: u32 = 8191; pub const NVML_NVLINK_LOW_POWER_THRESHOLD_RESET: u32 = 4294967295; pub const NVML_NVLINK_LOW_POWER_THRESHOLD_DEFAULT: u32 = 4294967295; +pub const NVML_C2C_POWER_STATE_FULL_POWER: u32 = 0; +pub const NVML_C2C_POWER_STATE_LOW_POWER: u32 = 1; pub const nvmlEventTypeNone: u32 = 0; pub const nvmlEventTypeSingleBitEccError: u32 = 1; pub const nvmlEventTypeDoubleBitEccError: u32 = 2; @@ -387,6 +441,9 @@ pub const nvmlEventTypeFatalPoisonError: u32 = 8192; pub const nvmlEventTypeGpuUnavailableError: u32 = 16384; pub const nvmlEventTypeGpuRecoveryAction: u32 = 32768; pub const nvmlEventTypeAll: u32 = 65439; +pub const nvmlSystemEventTypeGpuDriverUnbind: u32 = 1; +pub const nvmlSystemEventTypeGpuDriverBind: u32 = 2; +pub const nvmlSystemEventTypeCount: u32 = 2; pub const nvmlClocksEventReasonGpuIdle: u32 = 1; pub const nvmlClocksEventReasonApplicationsClocksSetting: u32 = 2; pub const nvmlClocksThrottleReasonUserDefinedClocks: u32 = 2; @@ -426,6 +483,7 @@ pub const NVML_CC_SYSTEM_FEATURE_DISABLED: u32 = 0; pub const NVML_CC_SYSTEM_FEATURE_ENABLED: u32 = 1; pub const NVML_CC_SYSTEM_MULTIGPU_NONE: u32 = 0; pub const NVML_CC_SYSTEM_MULTIGPU_PROTECTED_PCIE: u32 = 1; +pub const NVML_CC_SYSTEM_MULTIGPU_NVLE: u32 = 2; pub const NVML_CC_ACCEPTING_CLIENT_REQUESTS_FALSE: u32 = 0; pub const NVML_CC_ACCEPTING_CLIENT_REQUESTS_TRUE: u32 = 1; pub const NVML_GPU_CERT_CHAIN_SIZE: u32 = 4096; @@ -462,8 +520,29 @@ pub const NVML_GPU_FABRIC_HEALTH_MASK_ACCESS_TIMEOUT_RECOVERY_TRUE: u32 = 1; pub const NVML_GPU_FABRIC_HEALTH_MASK_ACCESS_TIMEOUT_RECOVERY_FALSE: u32 = 2; pub const NVML_GPU_FABRIC_HEALTH_MASK_SHIFT_ACCESS_TIMEOUT_RECOVERY: u32 = 6; pub const NVML_GPU_FABRIC_HEALTH_MASK_WIDTH_ACCESS_TIMEOUT_RECOVERY: u32 = 3; +pub const NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_NOT_SUPPORTED: u32 = 0; +pub const NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_NONE: u32 = 1; +pub const NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INCORRECT_SYSGUID: u32 = 2; +pub const NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INCORRECT_CHASSIS_SN: u32 = 3; +pub const NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_NO_PARTITION: u32 = 4; +pub const NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INSUFFICIENT_NVLINKS: u32 = 5; +pub const NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INCOMPATIBLE_GPU_FW: u32 = 6; +pub const NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INVALID_LOCATION: u32 = 7; +pub const NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_GPU_STATE_INVALID: u32 = 8; +pub const NVML_GPU_FABRIC_HEALTH_MASK_SHIFT_INCORRECT_CONFIGURATION: u32 = 8; +pub const NVML_GPU_FABRIC_HEALTH_MASK_WIDTH_INCORRECT_CONFIGURATION: u32 = 15; +pub const NVML_GPU_FABRIC_HEALTH_MASK_PARTITION_ASSIGNED_NOT_SUPPORTED: u32 = 0; +pub const NVML_GPU_FABRIC_HEALTH_MASK_PARTITION_ASSIGNED_TRUE: u32 = 1; +pub const NVML_GPU_FABRIC_HEALTH_MASK_PARTITION_ASSIGNED_FALSE: u32 = 2; +pub const NVML_GPU_FABRIC_HEALTH_MASK_SHIFT_PARTITION_ASSIGNED: u32 = 12; +pub const NVML_GPU_FABRIC_HEALTH_MASK_WIDTH_PARTITION_ASSIGNED: u32 = 3; +pub const NVML_GPU_FABRIC_HEALTH_SUMMARY_NOT_SUPPORTED: u32 = 0; +pub const NVML_GPU_FABRIC_HEALTH_SUMMARY_HEALTHY: u32 = 1; +pub const NVML_GPU_FABRIC_HEALTH_SUMMARY_UNHEALTHY: u32 = 2; +pub const NVML_GPU_FABRIC_HEALTH_SUMMARY_LIMITED_CAPACITY: u32 = 3; pub const NVML_INIT_FLAG_NO_GPUS: u32 = 1; pub const NVML_INIT_FLAG_NO_ATTACH: u32 = 2; +pub const NVML_INIT_FLAG_FORCE_INIT: u32 = 4; pub const NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE: u32 = 16; pub const NVML_DEVICE_UUID_BUFFER_SIZE: u32 = 80; pub const NVML_DEVICE_UUID_V2_BUFFER_SIZE: u32 = 96; @@ -484,6 +563,13 @@ pub const NVML_NVLINK_STATE_INACTIVE: u32 = 0; pub const NVML_NVLINK_STATE_ACTIVE: u32 = 1; pub const NVML_NVLINK_STATE_SLEEP: u32 = 2; pub const NVML_NVLINK_TOTAL_SUPPORTED_BW_MODES: u32 = 23; +pub const NVML_NVLINK_FIRMWARE_UCODE_TYPE_MSE: u32 = 1; +pub const NVML_NVLINK_FIRMWARE_UCODE_TYPE_NETIR: u32 = 2; +pub const NVML_NVLINK_FIRMWARE_UCODE_TYPE_NETIR_UPHY: u32 = 3; +pub const NVML_NVLINK_FIRMWARE_UCODE_TYPE_NETIR_CLN: u32 = 4; +pub const NVML_NVLINK_FIRMWARE_UCODE_TYPE_NETIR_DLN: u32 = 5; +pub const NVML_NVLINK_FIRMWARE_VERSION_LENGTH: u32 = 100; +pub const NVML_PRM_DATA_MAX_SIZE: u32 = 496; pub const NVML_DEVICE_MIG_DISABLE: u32 = 0; pub const NVML_DEVICE_MIG_ENABLE: u32 = 1; pub const NVML_GPU_INSTANCE_PROFILE_1_SLICE: u32 = 0; @@ -499,7 +585,11 @@ pub const NVML_GPU_INSTANCE_PROFILE_1_SLICE_REV2: u32 = 9; pub const NVML_GPU_INSTANCE_PROFILE_1_SLICE_GFX: u32 = 10; pub const NVML_GPU_INSTANCE_PROFILE_2_SLICE_GFX: u32 = 11; pub const NVML_GPU_INSTANCE_PROFILE_4_SLICE_GFX: u32 = 12; -pub const NVML_GPU_INSTANCE_PROFILE_COUNT: u32 = 13; +pub const NVML_GPU_INSTANCE_PROFILE_1_SLICE_NO_ME: u32 = 13; +pub const NVML_GPU_INSTANCE_PROFILE_2_SLICE_NO_ME: u32 = 14; +pub const NVML_GPU_INSTANCE_PROFILE_1_SLICE_ALL_ME: u32 = 15; +pub const NVML_GPU_INSTANCE_PROFILE_2_SLICE_ALL_ME: u32 = 16; +pub const NVML_GPU_INSTANCE_PROFILE_COUNT: u32 = 17; pub const NVML_GPU_INSTANCE_PROFILE_CAPS_P2P: u32 = 1; pub const NVML_GPU_INTSTANCE_PROFILE_CAPS_P2P: u32 = 1; pub const NVML_GPU_INSTANCE_PROFILE_CAPS_GFX: u32 = 2; @@ -522,12 +612,16 @@ pub const NVML_255_MASK_BITS_PER_ELEM: u32 = 32; pub const NVML_255_MASK_NUM_ELEMS: u32 = 8; pub const NVML_WORKLOAD_POWER_MAX_PROFILES: u32 = 255; pub const NVML_POWER_SMOOTHING_MAX_NUM_PROFILES: u32 = 5; -pub const NVML_POWER_SMOOTHING_NUM_PROFILE_PARAMS: u32 = 4; +pub const NVML_POWER_SMOOTHING_NUM_PROFILE_PARAMS: u32 = 8; pub const NVML_POWER_SMOOTHING_ADMIN_OVERRIDE_NOT_SET: u32 = 4294967295; pub const NVML_POWER_SMOOTHING_PROFILE_PARAM_PERCENT_TMP_FLOOR: u32 = 0; pub const NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_UP_RATE: u32 = 1; pub const NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_RATE: u32 = 2; pub const NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_HYSTERESIS: u32 = 3; +pub const NVML_POWER_SMOOTHING_PROFILE_PARAM_SECONDARY_POWER_FLOOR: u32 = 4; +pub const NVML_POWER_SMOOTHING_PROFILE_PARAM_PRIMARY_FLOOR_ACT_WIN_MULT: u32 = 5; +pub const NVML_POWER_SMOOTHING_PROFILE_PARAM_PRIMARY_FLOOR_TAR_WIN_MULT: u32 = 6; +pub const NVML_POWER_SMOOTHING_PROFILE_PARAM_PRIMARY_FLOOR_ACT_OFFSET: u32 = 7; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct nvmlDevice_st { @@ -536,6 +630,12 @@ pub struct nvmlDevice_st { pub type nvmlDevice_t = *mut nvmlDevice_st; #[repr(C)] #[derive(Debug, Copy, Clone)] +pub struct nvmlGpuInstance_st { + _unused: [u8; 0], +} +pub type nvmlGpuInstance_t = *mut nvmlGpuInstance_st; +#[repr(C)] +#[derive(Debug, Copy, Clone)] pub struct nvmlPciInfoExt_v1_t { pub version: raw::c_uint, pub domain: raw::c_uint, @@ -656,6 +756,33 @@ pub type nvmlDeviceAttributes_t = nvmlDeviceAttributes_st; pub struct nvmlC2cModeInfo_v1_t { pub isC2cEnabled: raw::c_uint, } +pub const nvmlDeviceAddressingModeType_t_NVML_DEVICE_ADDRESSING_MODE_NONE: + nvmlDeviceAddressingModeType_t = 0; +pub const nvmlDeviceAddressingModeType_t_NVML_DEVICE_ADDRESSING_MODE_HMM: + nvmlDeviceAddressingModeType_t = 1; +pub const nvmlDeviceAddressingModeType_t_NVML_DEVICE_ADDRESSING_MODE_ATS: + nvmlDeviceAddressingModeType_t = 2; +pub type nvmlDeviceAddressingModeType_t = raw::c_uint; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlDeviceAddressingMode_v1_t { + pub version: raw::c_uint, + pub value: raw::c_uint, +} +pub type nvmlDeviceAddressingMode_t = nvmlDeviceAddressingMode_v1_t; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlRepairStatus_v1_t { + pub version: raw::c_uint, + pub bChannelRepairPending: raw::c_uint, + pub bTpcRepairPending: raw::c_uint, +} +pub type nvmlRepairStatus_t = nvmlRepairStatus_v1_t; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlUnrepairableMemoryStatus_v1_t { + pub bUnrepairableMemory: raw::c_uint, +} #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct nvmlRowRemapperHistogramValues_st { @@ -917,6 +1044,31 @@ pub struct nvmlCoolerInfo_v1_t { pub target: nvmlCoolerTarget_t, } pub type nvmlCoolerInfo_t = nvmlCoolerInfo_v1_t; +pub const nvmlUUIDType_t_NVML_UUID_TYPE_NONE: nvmlUUIDType_t = 0; +pub const nvmlUUIDType_t_NVML_UUID_TYPE_ASCII: nvmlUUIDType_t = 1; +pub const nvmlUUIDType_t_NVML_UUID_TYPE_BINARY: nvmlUUIDType_t = 2; +pub type nvmlUUIDType_t = raw::c_uint; +#[repr(C)] +#[derive(Copy, Clone)] +pub union nvmlUUIDValue_t { + pub str_: [raw::c_char; 41usize], + pub bytes: [raw::c_uchar; 16usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct nvmlUUID_v1_t { + pub version: raw::c_uint, + pub type_: raw::c_uint, + pub value: nvmlUUIDValue_t, +} +pub type nvmlUUID_t = nvmlUUID_v1_t; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlPdi_v1_t { + pub version: raw::c_uint, + pub value: raw::c_ulonglong, +} +pub type nvmlPdi_t = nvmlPdi_v1_t; pub const nvmlEnableState_enum_NVML_FEATURE_DISABLED: nvmlEnableState_enum = 0; pub const nvmlEnableState_enum_NVML_FEATURE_ENABLED: nvmlEnableState_enum = 1; pub type nvmlEnableState_enum = raw::c_uint; @@ -946,7 +1098,7 @@ pub const nvmlBrandType_enum_NVML_BRAND_NVIDIA_RTX: nvmlBrandType_enum = 13; pub const nvmlBrandType_enum_NVML_BRAND_NVIDIA: nvmlBrandType_enum = 14; pub const nvmlBrandType_enum_NVML_BRAND_GEFORCE_RTX: nvmlBrandType_enum = 15; pub const nvmlBrandType_enum_NVML_BRAND_TITAN_RTX: nvmlBrandType_enum = 16; -pub const nvmlBrandType_enum_NVML_BRAND_COUNT: nvmlBrandType_enum = 17; +pub const nvmlBrandType_enum_NVML_BRAND_COUNT: nvmlBrandType_enum = 18; pub type nvmlBrandType_enum = raw::c_uint; pub use self::nvmlBrandType_enum as nvmlBrandType_t; pub const nvmlTemperatureThresholds_enum_NVML_TEMPERATURE_THRESHOLD_SHUTDOWN: @@ -1007,16 +1159,6 @@ pub const nvmlMemoryErrorType_enum_NVML_MEMORY_ERROR_TYPE_UNCORRECTED: nvmlMemor pub const nvmlMemoryErrorType_enum_NVML_MEMORY_ERROR_TYPE_COUNT: nvmlMemoryErrorType_enum = 2; pub type nvmlMemoryErrorType_enum = raw::c_uint; pub use self::nvmlMemoryErrorType_enum as nvmlMemoryErrorType_t; -pub const nvmlNvlinkVersion_enum_NVML_NVLINK_VERSION_INVALID: nvmlNvlinkVersion_enum = 0; -pub const nvmlNvlinkVersion_enum_NVML_NVLINK_VERSION_1_0: nvmlNvlinkVersion_enum = 1; -pub const nvmlNvlinkVersion_enum_NVML_NVLINK_VERSION_2_0: nvmlNvlinkVersion_enum = 2; -pub const nvmlNvlinkVersion_enum_NVML_NVLINK_VERSION_2_2: nvmlNvlinkVersion_enum = 3; -pub const nvmlNvlinkVersion_enum_NVML_NVLINK_VERSION_3_0: nvmlNvlinkVersion_enum = 4; -pub const nvmlNvlinkVersion_enum_NVML_NVLINK_VERSION_3_1: nvmlNvlinkVersion_enum = 5; -pub const nvmlNvlinkVersion_enum_NVML_NVLINK_VERSION_4_0: nvmlNvlinkVersion_enum = 6; -pub const nvmlNvlinkVersion_enum_NVML_NVLINK_VERSION_5_0: nvmlNvlinkVersion_enum = 7; -pub type nvmlNvlinkVersion_enum = raw::c_uint; -pub use self::nvmlNvlinkVersion_enum as nvmlNvlinkVersion_t; pub const nvmlEccCounterType_enum_NVML_VOLATILE_ECC: nvmlEccCounterType_enum = 0; pub const nvmlEccCounterType_enum_NVML_AGGREGATE_ECC: nvmlEccCounterType_enum = 1; pub const nvmlEccCounterType_enum_NVML_ECC_COUNTER_TYPE_COUNT: nvmlEccCounterType_enum = 2; @@ -1093,6 +1235,13 @@ pub struct nvmlDeviceCurrentClockFreqs_v1_t { pub str_: [raw::c_char; 2048usize], } pub type nvmlDeviceCurrentClockFreqs_t = nvmlDeviceCurrentClockFreqs_v1_t; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlDevicePowerMizerModes_v1_t { + pub currentMode: raw::c_uint, + pub mode: raw::c_uint, + pub supportedPowerMizerModes: raw::c_uint, +} pub const nvmlGom_enum_NVML_GOM_ALL_ON: nvmlGom_enum = 0; pub const nvmlGom_enum_NVML_GOM_COMPUTE: nvmlGom_enum = 1; pub const nvmlGom_enum_NVML_GOM_LOW_DP: nvmlGom_enum = 2; @@ -1135,6 +1284,7 @@ pub const nvmlReturn_enum_NVML_ERROR_DEPRECATED: nvmlReturn_enum = 26; pub const nvmlReturn_enum_NVML_ERROR_NOT_READY: nvmlReturn_enum = 27; pub const nvmlReturn_enum_NVML_ERROR_GPU_NOT_FOUND: nvmlReturn_enum = 28; pub const nvmlReturn_enum_NVML_ERROR_INVALID_STATE: nvmlReturn_enum = 29; +pub const nvmlReturn_enum_NVML_ERROR_RESET_TYPE_NOT_SUPPORTED: nvmlReturn_enum = 30; pub const nvmlReturn_enum_NVML_ERROR_UNKNOWN: nvmlReturn_enum = 999; pub type nvmlReturn_enum = raw::c_uint; pub use self::nvmlReturn_enum as nvmlReturn_t; @@ -1240,6 +1390,36 @@ pub struct nvmlPlatformInfo_v2_t { pub moduleId: raw::c_uchar, } pub type nvmlPlatformInfo_t = nvmlPlatformInfo_v2_t; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlHostname_v1_t { + pub value: [raw::c_char; 64usize], +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlEccSramUniqueUncorrectedErrorEntry_v1_t { + pub unit: raw::c_uint, + pub location: raw::c_uint, + pub sublocation: raw::c_uint, + pub extlocation: raw::c_uint, + pub address: raw::c_uint, + pub isParity: raw::c_uint, + pub count: raw::c_uint, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlEccSramUniqueUncorrectedErrorCounts_v1_t { + pub version: raw::c_uint, + pub entryCount: raw::c_uint, + pub entries: *mut nvmlEccSramUniqueUncorrectedErrorEntry_v1_t, +} +pub type nvmlEccSramUniqueUncorrectedErrorCounts_t = nvmlEccSramUniqueUncorrectedErrorCounts_v1_t; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlRusdSettings_v1_t { + pub version: raw::c_uint, + pub pollMask: raw::c_ulonglong, +} pub type nvmlDeviceArchitecture_t = raw::c_uint; pub type nvmlBusType_t = raw::c_uint; pub type nvmlFanControlPolicy_t = raw::c_uint; @@ -1351,8 +1531,12 @@ pub const nvmlDeviceVgpuCapability_enum_NVML_DEVICE_VGPU_CAP_WARM_UPDATE: nvmlDeviceVgpuCapability_enum = 8; pub const nvmlDeviceVgpuCapability_enum_NVML_DEVICE_VGPU_CAP_HOMOGENEOUS_PLACEMENTS: nvmlDeviceVgpuCapability_enum = 9; +pub const nvmlDeviceVgpuCapability_enum_NVML_DEVICE_VGPU_CAP_MIG_TIMESLICING_SUPPORTED: + nvmlDeviceVgpuCapability_enum = 10; +pub const nvmlDeviceVgpuCapability_enum_NVML_DEVICE_VGPU_CAP_MIG_TIMESLICING_ENABLED: + nvmlDeviceVgpuCapability_enum = 11; pub const nvmlDeviceVgpuCapability_enum_NVML_DEVICE_VGPU_CAP_COUNT: nvmlDeviceVgpuCapability_enum = - 10; + 12; pub type nvmlDeviceVgpuCapability_enum = raw::c_uint; pub use self::nvmlDeviceVgpuCapability_enum as nvmlDeviceVgpuCapability_t; pub type nvmlVgpuTypeId_t = raw::c_uint; @@ -1622,6 +1806,109 @@ pub type nvmlDeviceGpuRecoveryAction_s = raw::c_uint; pub use self::nvmlDeviceGpuRecoveryAction_s as nvmlDeviceGpuRecoveryAction_t; #[repr(C)] #[derive(Debug, Copy, Clone)] +pub struct nvmlVgpuTypeIdInfo_v1_t { + pub version: raw::c_uint, + pub vgpuCount: raw::c_uint, + pub vgpuTypeIds: *mut nvmlVgpuTypeId_t, +} +pub type nvmlVgpuTypeIdInfo_t = nvmlVgpuTypeIdInfo_v1_t; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlVgpuTypeMaxInstance_v1_t { + pub version: raw::c_uint, + pub vgpuTypeId: nvmlVgpuTypeId_t, + pub maxInstancePerGI: raw::c_uint, +} +pub type nvmlVgpuTypeMaxInstance_t = nvmlVgpuTypeMaxInstance_v1_t; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlActiveVgpuInstanceInfo_v1_t { + pub version: raw::c_uint, + pub vgpuCount: raw::c_uint, + pub vgpuInstances: *mut nvmlVgpuInstance_t, +} +pub type nvmlActiveVgpuInstanceInfo_t = nvmlActiveVgpuInstanceInfo_v1_t; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct nvmlVgpuSchedulerState_v1_t { + pub version: raw::c_uint, + pub engineId: raw::c_uint, + pub schedulerPolicy: raw::c_uint, + pub enableARRMode: raw::c_uint, + pub schedulerParams: nvmlVgpuSchedulerSetParams_t, +} +pub type nvmlVgpuSchedulerState_t = nvmlVgpuSchedulerState_v1_t; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct nvmlVgpuSchedulerStateInfo_v1_t { + pub version: raw::c_uint, + pub engineId: raw::c_uint, + pub schedulerPolicy: raw::c_uint, + pub arrMode: raw::c_uint, + pub schedulerParams: nvmlVgpuSchedulerParams_t, +} +pub type nvmlVgpuSchedulerStateInfo_t = nvmlVgpuSchedulerStateInfo_v1_t; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct nvmlVgpuSchedulerLogInfo_v1_t { + pub version: raw::c_uint, + pub engineId: raw::c_uint, + pub schedulerPolicy: raw::c_uint, + pub arrMode: raw::c_uint, + pub schedulerParams: nvmlVgpuSchedulerParams_t, + pub entriesCount: raw::c_uint, + pub logEntries: [nvmlVgpuSchedulerLogEntry_t; 200usize], +} +pub type nvmlVgpuSchedulerLogInfo_t = nvmlVgpuSchedulerLogInfo_v1_t; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlVgpuCreatablePlacementInfo_v1_t { + pub version: raw::c_uint, + pub vgpuTypeId: nvmlVgpuTypeId_t, + pub count: raw::c_uint, + pub placementIds: *mut raw::c_uint, + pub placementSize: raw::c_uint, +} +pub type nvmlVgpuCreatablePlacementInfo_t = nvmlVgpuCreatablePlacementInfo_v1_t; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlVgpuSchedulerStateInfo_v2_t { + pub engineId: raw::c_uint, + pub schedulerPolicy: raw::c_uint, + pub avgFactor: raw::c_uint, + pub timeslice: raw::c_uint, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlVgpuSchedulerLogEntry_v2_t { + pub timestamp: raw::c_ulonglong, + pub timeRunTotal: raw::c_ulonglong, + pub timeRun: raw::c_ulonglong, + pub swRunlistId: raw::c_uint, + pub targetTimeSlice: raw::c_ulonglong, + pub cumulativePreemptionTime: raw::c_ulonglong, + pub weight: raw::c_uint, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlVgpuSchedulerLogInfo_v2_t { + pub engineId: raw::c_uint, + pub schedulerPolicy: raw::c_uint, + pub avgFactor: raw::c_uint, + pub timeslice: raw::c_uint, + pub entriesCount: raw::c_uint, + pub logEntries: [nvmlVgpuSchedulerLogEntry_v2_t; 200usize], +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlVgpuSchedulerState_v2_t { + pub engineId: raw::c_uint, + pub schedulerPolicy: raw::c_uint, + pub avgFactor: raw::c_uint, + pub frequency: raw::c_uint, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] pub struct nvmlNvLinkPowerThres_st { pub lowPwrThreshold: raw::c_uint, } @@ -1716,6 +2003,51 @@ pub struct nvmlEventData_st { pub type nvmlEventData_t = nvmlEventData_st; #[repr(C)] #[derive(Debug, Copy, Clone)] +pub struct nvmlSystemEventSet_st { + _unused: [u8; 0], +} +pub type nvmlSystemEventSet_t = *mut nvmlSystemEventSet_st; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlSystemEventSetCreateRequest_v1_t { + pub version: raw::c_uint, + pub set: nvmlSystemEventSet_t, +} +pub type nvmlSystemEventSetCreateRequest_t = nvmlSystemEventSetCreateRequest_v1_t; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlSystemEventSetFreeRequest_v1_t { + pub version: raw::c_uint, + pub set: nvmlSystemEventSet_t, +} +pub type nvmlSystemEventSetFreeRequest_t = nvmlSystemEventSetFreeRequest_v1_t; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlSystemRegisterEventRequest_v1_t { + pub version: raw::c_uint, + pub eventTypes: raw::c_ulonglong, + pub set: nvmlSystemEventSet_t, +} +pub type nvmlSystemRegisterEventRequest_t = nvmlSystemRegisterEventRequest_v1_t; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlSystemEventData_v1_t { + pub eventType: raw::c_ulonglong, + pub gpuId: raw::c_uint, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlSystemEventSetWaitRequest_v1_t { + pub version: raw::c_uint, + pub timeoutms: raw::c_uint, + pub set: nvmlSystemEventSet_t, + pub data: *mut nvmlSystemEventData_v1_t, + pub dataSize: raw::c_uint, + pub numEvent: raw::c_uint, +} +pub type nvmlSystemEventSetWaitRequest_t = nvmlSystemEventSetWaitRequest_v1_t; +#[repr(C)] +#[derive(Debug, Copy, Clone)] pub struct nvmlAccountingStats_st { pub gpuUtilization: raw::c_uint, pub memoryUtilization: raw::c_uint, @@ -1876,7 +2208,18 @@ pub struct nvmlGpuFabricInfo_v2_t { pub state: nvmlGpuFabricState_t, pub healthMask: raw::c_uint, } -pub type nvmlGpuFabricInfoV_t = nvmlGpuFabricInfo_v2_t; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlGpuFabricInfo_v3_t { + pub version: raw::c_uint, + pub clusterUuid: [raw::c_uchar; 16usize], + pub status: nvmlReturn_t, + pub cliqueId: raw::c_uint, + pub state: nvmlGpuFabricState_t, + pub healthMask: raw::c_uint, + pub healthSummary: raw::c_uchar, +} +pub type nvmlGpuFabricInfoV_t = nvmlGpuFabricInfo_v3_t; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct nvmlSystemDriverBranchInfo_v1_t { @@ -1898,6 +2241,16 @@ pub const nvmlClockLimitId_enum_NVML_CLOCK_LIMIT_ID_TDP: nvmlClockLimitId_enum = pub const nvmlClockLimitId_enum_NVML_CLOCK_LIMIT_ID_UNLIMITED: nvmlClockLimitId_enum = 4294967042; pub type nvmlClockLimitId_enum = raw::c_uint; pub use self::nvmlClockLimitId_enum as nvmlClockLimitId_t; +pub const nvmlNvlinkVersion_enum_NVML_NVLINK_VERSION_INVALID: nvmlNvlinkVersion_enum = 0; +pub const nvmlNvlinkVersion_enum_NVML_NVLINK_VERSION_1_0: nvmlNvlinkVersion_enum = 1; +pub const nvmlNvlinkVersion_enum_NVML_NVLINK_VERSION_2_0: nvmlNvlinkVersion_enum = 2; +pub const nvmlNvlinkVersion_enum_NVML_NVLINK_VERSION_2_2: nvmlNvlinkVersion_enum = 3; +pub const nvmlNvlinkVersion_enum_NVML_NVLINK_VERSION_3_0: nvmlNvlinkVersion_enum = 4; +pub const nvmlNvlinkVersion_enum_NVML_NVLINK_VERSION_3_1: nvmlNvlinkVersion_enum = 5; +pub const nvmlNvlinkVersion_enum_NVML_NVLINK_VERSION_4_0: nvmlNvlinkVersion_enum = 6; +pub const nvmlNvlinkVersion_enum_NVML_NVLINK_VERSION_5_0: nvmlNvlinkVersion_enum = 7; +pub type nvmlNvlinkVersion_enum = raw::c_uint; +pub use self::nvmlNvlinkVersion_enum as nvmlNvlinkVersion_t; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct nvmlNvlinkSupportedBwModes_v1_t { @@ -1924,6 +2277,34 @@ pub struct nvmlNvlinkSetBwMode_v1_t { pub type nvmlNvlinkSetBwMode_t = nvmlNvlinkSetBwMode_v1_t; #[repr(C)] #[derive(Debug, Copy, Clone)] +pub struct nvmlNvLinkInfo_v1_t { + pub version: raw::c_uint, + pub isNvleEnabled: raw::c_uint, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlNvlinkFirmwareVersion_t { + pub ucodeType: raw::c_uchar, + pub major: raw::c_uint, + pub minor: raw::c_uint, + pub subMinor: raw::c_uint, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlNvlinkFirmwareInfo_t { + pub firmwareVersion: [nvmlNvlinkFirmwareVersion_t; 100usize], + pub numValidEntries: raw::c_uint, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlNvLinkInfo_v2_t { + pub version: raw::c_uint, + pub isNvleEnabled: raw::c_uint, + pub firmwareInfo: nvmlNvlinkFirmwareInfo_t, +} +pub type nvmlNvLinkInfo_t = nvmlNvLinkInfo_v2_t; +#[repr(C)] +#[derive(Debug, Copy, Clone)] pub struct nvmlVgpuVersion_st { pub minVersion: raw::c_uint, pub maxVersion: raw::c_uint, @@ -1996,6 +2377,66 @@ pub struct nvmlExcludedDeviceInfo_st { } pub type nvmlExcludedDeviceInfo_t = nvmlExcludedDeviceInfo_st; #[repr(C)] +#[derive(Copy, Clone)] +pub struct nvmlPRMTLV_v1_t { + pub dataSize: raw::c_uint, + pub status: raw::c_uint, + pub __bindgen_anon_1: nvmlPRMTLV_v1_t__bindgen_ty_1, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union nvmlPRMTLV_v1_t__bindgen_ty_1 { + pub inData: [raw::c_uchar; 496usize], + pub outData: [raw::c_uchar; 496usize], +} +pub const nvmlPRMCounterId_t_NVML_PRM_COUNTER_ID_NONE: nvmlPRMCounterId_t = 0; +pub const nvmlPRMCounterId_t_NVML_PRM_COUNTER_ID_PPCNT_PHYSICAL_LAYER_CTRS_LINK_DOWN_EVENTS: + nvmlPRMCounterId_t = 1; +pub const nvmlPRMCounterId_t_NVML_PRM_COUNTER_ID_PPCNT_PHYSICAL_LAYER_CTRS_SUCCESSFUL_RECOVERY_EVENTS : nvmlPRMCounterId_t = 2 ; +pub const nvmlPRMCounterId_t_NVML_PRM_COUNTER_ID_PPCNT_RECOVERY_CTRS_TOTAL_SUCCESSFUL_RECOVERY_EVENTS : nvmlPRMCounterId_t = 101 ; +pub const nvmlPRMCounterId_t_NVML_PRM_COUNTER_ID_PPCNT_RECOVERY_CTRS_TIME_SINCE_LAST_RECOVERY: + nvmlPRMCounterId_t = 102; +pub const nvmlPRMCounterId_t_NVML_PRM_COUNTER_ID_PPCNT_RECOVERY_CTRS_TIME_BETWEEN_LAST_TWO_RECOVERIES : nvmlPRMCounterId_t = 103 ; +pub const nvmlPRMCounterId_t_NVML_PRM_COUNTER_ID_PPCNT_PORTCOUNTERS_PORT_XMIT_WAIT: + nvmlPRMCounterId_t = 201; +pub const nvmlPRMCounterId_t_NVML_PRM_COUNTER_ID_PPCNT_PLR_RCV_CODES: nvmlPRMCounterId_t = 301; +pub const nvmlPRMCounterId_t_NVML_PRM_COUNTER_ID_PPCNT_PLR_RCV_CODE_ERR: nvmlPRMCounterId_t = 302; +pub const nvmlPRMCounterId_t_NVML_PRM_COUNTER_ID_PPCNT_PLR_RCV_UNCORRECTABLE_CODE: + nvmlPRMCounterId_t = 303; +pub const nvmlPRMCounterId_t_NVML_PRM_COUNTER_ID_PPCNT_PLR_XMIT_CODES: nvmlPRMCounterId_t = 304; +pub const nvmlPRMCounterId_t_NVML_PRM_COUNTER_ID_PPCNT_PLR_XMIT_RETRY_CODES: nvmlPRMCounterId_t = + 305; +pub const nvmlPRMCounterId_t_NVML_PRM_COUNTER_ID_PPCNT_PLR_XMIT_RETRY_EVENTS: nvmlPRMCounterId_t = + 306; +pub const nvmlPRMCounterId_t_NVML_PRM_COUNTER_ID_PPCNT_PLR_SYNC_EVENTS: nvmlPRMCounterId_t = 307; +pub const nvmlPRMCounterId_t_NVML_PRM_COUNTER_ID_PPRM_OPER_RECOVERY: nvmlPRMCounterId_t = 1001; +pub type nvmlPRMCounterId_t = raw::c_uint; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlPRMCounterInput_v1_t { + pub localPort: raw::c_uint, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct nvmlPRMCounterValue_v1_t { + pub status: nvmlReturn_t, + pub outputType: nvmlValueType_t, + pub outputValue: nvmlValue_t, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct nvmlPRMCounter_v1_t { + pub counterId: raw::c_uint, + pub inData: nvmlPRMCounterInput_v1_t, + pub counterValue: nvmlPRMCounterValue_v1_t, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct nvmlPRMCounterList_v1_t { + pub numCounters: raw::c_uint, + pub counters: *mut nvmlPRMCounter_v1_t, +} +#[repr(C)] #[derive(Debug, Copy, Clone)] pub struct nvmlGpuInstancePlacement_st { pub start: raw::c_uint, @@ -2065,12 +2506,6 @@ pub struct nvmlGpuInstanceInfo_st { pub type nvmlGpuInstanceInfo_t = nvmlGpuInstanceInfo_st; #[repr(C)] #[derive(Debug, Copy, Clone)] -pub struct nvmlGpuInstance_st { - _unused: [u8; 0], -} -pub type nvmlGpuInstance_t = *mut nvmlGpuInstance_st; -#[repr(C)] -#[derive(Debug, Copy, Clone)] pub struct nvmlComputeInstancePlacement_st { pub start: raw::c_uint, pub size: raw::c_uint, @@ -2146,6 +2581,7 @@ pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_INTEGER_UTIL: nvmlGpmMetricId_t = 4; pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_ANY_TENSOR_UTIL: nvmlGpmMetricId_t = 5; pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_DFMA_TENSOR_UTIL: nvmlGpmMetricId_t = 6; pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_HMMA_TENSOR_UTIL: nvmlGpmMetricId_t = 7; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_DMMA_TENSOR_UTIL: nvmlGpmMetricId_t = 8; pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_IMMA_TENSOR_UTIL: nvmlGpmMetricId_t = 9; pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_DRAM_BW_UTIL: nvmlGpmMetricId_t = 10; pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_FP64_UTIL: nvmlGpmMetricId_t = 11; @@ -2209,7 +2645,166 @@ pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L16_RX_PER_SEC: nvmlGpmMetric pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L16_TX_PER_SEC: nvmlGpmMetricId_t = 95; pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L17_RX_PER_SEC: nvmlGpmMetricId_t = 96; pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L17_TX_PER_SEC: nvmlGpmMetricId_t = 97; -pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_MAX: nvmlGpmMetricId_t = 98; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_TOTAL_TX_PER_SEC: nvmlGpmMetricId_t = 100; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_TOTAL_RX_PER_SEC: nvmlGpmMetricId_t = 101; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_DATA_TX_PER_SEC: nvmlGpmMetricId_t = 102; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_DATA_RX_PER_SEC: nvmlGpmMetricId_t = 103; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK0_TOTAL_TX_PER_SEC: nvmlGpmMetricId_t = 104; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK0_TOTAL_RX_PER_SEC: nvmlGpmMetricId_t = 105; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK0_DATA_TX_PER_SEC: nvmlGpmMetricId_t = 106; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK0_DATA_RX_PER_SEC: nvmlGpmMetricId_t = 107; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK1_TOTAL_TX_PER_SEC: nvmlGpmMetricId_t = 108; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK1_TOTAL_RX_PER_SEC: nvmlGpmMetricId_t = 109; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK1_DATA_TX_PER_SEC: nvmlGpmMetricId_t = 110; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK1_DATA_RX_PER_SEC: nvmlGpmMetricId_t = 111; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK2_TOTAL_TX_PER_SEC: nvmlGpmMetricId_t = 112; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK2_TOTAL_RX_PER_SEC: nvmlGpmMetricId_t = 113; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK2_DATA_TX_PER_SEC: nvmlGpmMetricId_t = 114; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK2_DATA_RX_PER_SEC: nvmlGpmMetricId_t = 115; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK3_TOTAL_TX_PER_SEC: nvmlGpmMetricId_t = 116; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK3_TOTAL_RX_PER_SEC: nvmlGpmMetricId_t = 117; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK3_DATA_TX_PER_SEC: nvmlGpmMetricId_t = 118; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK3_DATA_RX_PER_SEC: nvmlGpmMetricId_t = 119; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK4_TOTAL_TX_PER_SEC: nvmlGpmMetricId_t = 120; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK4_TOTAL_RX_PER_SEC: nvmlGpmMetricId_t = 121; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK4_DATA_TX_PER_SEC: nvmlGpmMetricId_t = 122; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK4_DATA_RX_PER_SEC: nvmlGpmMetricId_t = 123; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK5_TOTAL_TX_PER_SEC: nvmlGpmMetricId_t = 124; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK5_TOTAL_RX_PER_SEC: nvmlGpmMetricId_t = 125; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK5_DATA_TX_PER_SEC: nvmlGpmMetricId_t = 126; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK5_DATA_RX_PER_SEC: nvmlGpmMetricId_t = 127; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK6_TOTAL_TX_PER_SEC: nvmlGpmMetricId_t = 128; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK6_TOTAL_RX_PER_SEC: nvmlGpmMetricId_t = 129; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK6_DATA_TX_PER_SEC: nvmlGpmMetricId_t = 130; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK6_DATA_RX_PER_SEC: nvmlGpmMetricId_t = 131; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK7_TOTAL_TX_PER_SEC: nvmlGpmMetricId_t = 132; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK7_TOTAL_RX_PER_SEC: nvmlGpmMetricId_t = 133; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK7_DATA_TX_PER_SEC: nvmlGpmMetricId_t = 134; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK7_DATA_RX_PER_SEC: nvmlGpmMetricId_t = 135; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK8_TOTAL_TX_PER_SEC: nvmlGpmMetricId_t = 136; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK8_TOTAL_RX_PER_SEC: nvmlGpmMetricId_t = 137; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK8_DATA_TX_PER_SEC: nvmlGpmMetricId_t = 138; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK8_DATA_RX_PER_SEC: nvmlGpmMetricId_t = 139; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK9_TOTAL_TX_PER_SEC: nvmlGpmMetricId_t = 140; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK9_TOTAL_RX_PER_SEC: nvmlGpmMetricId_t = 141; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK9_DATA_TX_PER_SEC: nvmlGpmMetricId_t = 142; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK9_DATA_RX_PER_SEC: nvmlGpmMetricId_t = 143; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK10_TOTAL_TX_PER_SEC: nvmlGpmMetricId_t = 144; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK10_TOTAL_RX_PER_SEC: nvmlGpmMetricId_t = 145; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK10_DATA_TX_PER_SEC: nvmlGpmMetricId_t = 146; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK10_DATA_RX_PER_SEC: nvmlGpmMetricId_t = 147; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK11_TOTAL_TX_PER_SEC: nvmlGpmMetricId_t = 148; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK11_TOTAL_RX_PER_SEC: nvmlGpmMetricId_t = 149; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK11_DATA_TX_PER_SEC: nvmlGpmMetricId_t = 150; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK11_DATA_RX_PER_SEC: nvmlGpmMetricId_t = 151; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK12_TOTAL_TX_PER_SEC: nvmlGpmMetricId_t = 152; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK12_TOTAL_RX_PER_SEC: nvmlGpmMetricId_t = 153; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK12_DATA_TX_PER_SEC: nvmlGpmMetricId_t = 154; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK12_DATA_RX_PER_SEC: nvmlGpmMetricId_t = 155; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK13_TOTAL_TX_PER_SEC: nvmlGpmMetricId_t = 156; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK13_TOTAL_RX_PER_SEC: nvmlGpmMetricId_t = 157; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK13_DATA_TX_PER_SEC: nvmlGpmMetricId_t = 158; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_C2C_LINK13_DATA_RX_PER_SEC: nvmlGpmMetricId_t = 159; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_HOSTMEM_CACHE_HIT: nvmlGpmMetricId_t = 160; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_HOSTMEM_CACHE_MISS: nvmlGpmMetricId_t = 161; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_PEERMEM_CACHE_HIT: nvmlGpmMetricId_t = 162; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_PEERMEM_CACHE_MISS: nvmlGpmMetricId_t = 163; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_DRAM_CACHE_HIT: nvmlGpmMetricId_t = 164; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_DRAM_CACHE_MISS: nvmlGpmMetricId_t = 165; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVENC_0_UTIL: nvmlGpmMetricId_t = 166; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVENC_1_UTIL: nvmlGpmMetricId_t = 167; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVENC_2_UTIL: nvmlGpmMetricId_t = 168; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVENC_3_UTIL: nvmlGpmMetricId_t = 169; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR0_CTXSW_CYCLES_ELAPSED: nvmlGpmMetricId_t = 170; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR0_CTXSW_CYCLES_ACTIVE: nvmlGpmMetricId_t = 171; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR0_CTXSW_REQUESTS: nvmlGpmMetricId_t = 172; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR0_CTXSW_CYCLES_PER_REQ: nvmlGpmMetricId_t = 173; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR0_CTXSW_ACTIVE_PCT: nvmlGpmMetricId_t = 174; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR1_CTXSW_CYCLES_ELAPSED: nvmlGpmMetricId_t = 175; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR1_CTXSW_CYCLES_ACTIVE: nvmlGpmMetricId_t = 176; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR1_CTXSW_REQUESTS: nvmlGpmMetricId_t = 177; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR1_CTXSW_CYCLES_PER_REQ: nvmlGpmMetricId_t = 178; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR1_CTXSW_ACTIVE_PCT: nvmlGpmMetricId_t = 179; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR2_CTXSW_CYCLES_ELAPSED: nvmlGpmMetricId_t = 180; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR2_CTXSW_CYCLES_ACTIVE: nvmlGpmMetricId_t = 181; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR2_CTXSW_REQUESTS: nvmlGpmMetricId_t = 182; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR2_CTXSW_CYCLES_PER_REQ: nvmlGpmMetricId_t = 183; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR2_CTXSW_ACTIVE_PCT: nvmlGpmMetricId_t = 184; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR3_CTXSW_CYCLES_ELAPSED: nvmlGpmMetricId_t = 185; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR3_CTXSW_CYCLES_ACTIVE: nvmlGpmMetricId_t = 186; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR3_CTXSW_REQUESTS: nvmlGpmMetricId_t = 187; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR3_CTXSW_CYCLES_PER_REQ: nvmlGpmMetricId_t = 188; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR3_CTXSW_ACTIVE_PCT: nvmlGpmMetricId_t = 189; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR4_CTXSW_CYCLES_ELAPSED: nvmlGpmMetricId_t = 190; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR4_CTXSW_CYCLES_ACTIVE: nvmlGpmMetricId_t = 191; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR4_CTXSW_REQUESTS: nvmlGpmMetricId_t = 192; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR4_CTXSW_CYCLES_PER_REQ: nvmlGpmMetricId_t = 193; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR4_CTXSW_ACTIVE_PCT: nvmlGpmMetricId_t = 194; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR5_CTXSW_CYCLES_ELAPSED: nvmlGpmMetricId_t = 195; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR5_CTXSW_CYCLES_ACTIVE: nvmlGpmMetricId_t = 196; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR5_CTXSW_REQUESTS: nvmlGpmMetricId_t = 197; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR5_CTXSW_CYCLES_PER_REQ: nvmlGpmMetricId_t = 198; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR5_CTXSW_ACTIVE_PCT: nvmlGpmMetricId_t = 199; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR6_CTXSW_CYCLES_ELAPSED: nvmlGpmMetricId_t = 200; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR6_CTXSW_CYCLES_ACTIVE: nvmlGpmMetricId_t = 201; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR6_CTXSW_REQUESTS: nvmlGpmMetricId_t = 202; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR6_CTXSW_CYCLES_PER_REQ: nvmlGpmMetricId_t = 203; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR6_CTXSW_ACTIVE_PCT: nvmlGpmMetricId_t = 204; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR7_CTXSW_CYCLES_ELAPSED: nvmlGpmMetricId_t = 205; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR7_CTXSW_CYCLES_ACTIVE: nvmlGpmMetricId_t = 206; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR7_CTXSW_REQUESTS: nvmlGpmMetricId_t = 207; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR7_CTXSW_CYCLES_PER_REQ: nvmlGpmMetricId_t = 208; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_GR7_CTXSW_ACTIVE_PCT: nvmlGpmMetricId_t = 209; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_SM_CYCLES_ELAPSED: nvmlGpmMetricId_t = 248; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_SM_CYCLES_ACTIVE: nvmlGpmMetricId_t = 249; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_MMA_CYCLES_ACTIVE: nvmlGpmMetricId_t = 250; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_DMMA_CYCLES_ACTIVE: nvmlGpmMetricId_t = 251; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_HMMA_CYCLES_ACTIVE: nvmlGpmMetricId_t = 252; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_IMMA_CYCLES_ACTIVE: nvmlGpmMetricId_t = 253; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_DFMA_CYCLES_ACTIVE: nvmlGpmMetricId_t = 254; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_PCIE_TX: nvmlGpmMetricId_t = 255; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_PCIE_RX: nvmlGpmMetricId_t = 256; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_INTEGER_CYCLES_ACTIVE: nvmlGpmMetricId_t = 257; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_FP64_CYCLES_ACTIVE: nvmlGpmMetricId_t = 258; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_FP32_CYCLES_ACTIVE: nvmlGpmMetricId_t = 259; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_FP16_CYCLES_ACTIVE: nvmlGpmMetricId_t = 260; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L0_RX: nvmlGpmMetricId_t = 261; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L0_TX: nvmlGpmMetricId_t = 262; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L1_RX: nvmlGpmMetricId_t = 263; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L1_TX: nvmlGpmMetricId_t = 264; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L2_RX: nvmlGpmMetricId_t = 265; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L2_TX: nvmlGpmMetricId_t = 266; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L3_RX: nvmlGpmMetricId_t = 267; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L3_TX: nvmlGpmMetricId_t = 268; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L4_RX: nvmlGpmMetricId_t = 269; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L4_TX: nvmlGpmMetricId_t = 270; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L5_RX: nvmlGpmMetricId_t = 271; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L5_TX: nvmlGpmMetricId_t = 272; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L6_RX: nvmlGpmMetricId_t = 273; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L6_TX: nvmlGpmMetricId_t = 274; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L7_RX: nvmlGpmMetricId_t = 275; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L7_TX: nvmlGpmMetricId_t = 276; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L8_RX: nvmlGpmMetricId_t = 277; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L8_TX: nvmlGpmMetricId_t = 278; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L9_RX: nvmlGpmMetricId_t = 279; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L9_TX: nvmlGpmMetricId_t = 280; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L10_RX: nvmlGpmMetricId_t = 281; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L10_TX: nvmlGpmMetricId_t = 282; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L11_RX: nvmlGpmMetricId_t = 283; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L11_TX: nvmlGpmMetricId_t = 284; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L12_RX: nvmlGpmMetricId_t = 285; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L12_TX: nvmlGpmMetricId_t = 286; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L13_RX: nvmlGpmMetricId_t = 287; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L13_TX: nvmlGpmMetricId_t = 288; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L14_RX: nvmlGpmMetricId_t = 289; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L14_TX: nvmlGpmMetricId_t = 290; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L15_RX: nvmlGpmMetricId_t = 291; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L15_TX: nvmlGpmMetricId_t = 292; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L16_RX: nvmlGpmMetricId_t = 293; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L16_TX: nvmlGpmMetricId_t = 294; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L17_RX: nvmlGpmMetricId_t = 295; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_NVLINK_L17_TX: nvmlGpmMetricId_t = 296; +pub const nvmlGpmMetricId_t_NVML_GPM_METRIC_MAX: nvmlGpmMetricId_t = 333; pub type nvmlGpmMetricId_t = raw::c_uint; #[repr(C)] #[derive(Debug, Copy, Clone)] @@ -2239,7 +2834,7 @@ pub struct nvmlGpmMetricsGet_t { pub numMetrics: raw::c_uint, pub sample1: nvmlGpmSample_t, pub sample2: nvmlGpmSample_t, - pub metrics: [nvmlGpmMetric_t; 98usize], + pub metrics: [nvmlGpmMetric_t; 333usize], } #[repr(C)] #[derive(Debug, Copy, Clone)] @@ -2276,6 +2871,15 @@ pub const nvmlPowerProfileType_t_NVML_POWER_PROFILE_HPC: nvmlPowerProfileType_t pub const nvmlPowerProfileType_t_NVML_POWER_PROFILE_MIG: nvmlPowerProfileType_t = 14; pub const nvmlPowerProfileType_t_NVML_POWER_PROFILE_MAX: nvmlPowerProfileType_t = 15; pub type nvmlPowerProfileType_t = raw::c_uint; +pub const nvmlPowerProfileOperation_t_NVML_POWER_PROFILE_OPERATION_CLEAR: + nvmlPowerProfileOperation_t = 0; +pub const nvmlPowerProfileOperation_t_NVML_POWER_PROFILE_OPERATION_SET: + nvmlPowerProfileOperation_t = 1; +pub const nvmlPowerProfileOperation_t_NVML_POWER_PROFILE_OPERATION_SET_AND_OVERWRITE: + nvmlPowerProfileOperation_t = 2; +pub const nvmlPowerProfileOperation_t_NVML_POWER_PROFILE_OPERATION_MAX: + nvmlPowerProfileOperation_t = 3; +pub type nvmlPowerProfileOperation_t = raw::c_uint; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct nvmlWorkloadPowerProfileInfo_v1_t { @@ -2312,6 +2916,12 @@ pub type nvmlWorkloadPowerProfileRequestedProfiles_t = nvmlWorkloadPowerProfileRequestedProfiles_v1_t; #[repr(C)] #[derive(Debug, Copy, Clone)] +pub struct nvmlWorkloadPowerProfileUpdateProfiles_v1_t { + pub operation: nvmlPowerProfileOperation_t, + pub updateProfilesMask: nvmlMask255_t, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] pub struct nvmlPowerSmoothingProfile_v1_t { pub version: raw::c_uint, pub profileId: raw::c_uint, @@ -2445,6 +3055,10 @@ pub struct NvmlLib { unsafe extern "C" fn(uuid: *const raw::c_char, device: *mut nvmlDevice_t) -> nvmlReturn_t, ::libloading::Error, >, + pub nvmlDeviceGetHandleByUUIDV: Result< + unsafe extern "C" fn(uuid: *const nvmlUUID_t, device: *mut nvmlDevice_t) -> nvmlReturn_t, + ::libloading::Error, + >, pub nvmlDeviceGetHandleByPciBusId_v2: Result< unsafe extern "C" fn( pciBusId: *const raw::c_char, @@ -2521,6 +3135,27 @@ pub struct NvmlLib { unsafe extern "C" fn(device: nvmlDevice_t, node: *mut raw::c_uint) -> nvmlReturn_t, ::libloading::Error, >, + pub nvmlDeviceGetAddressingMode: Result< + unsafe extern "C" fn( + device: nvmlDevice_t, + mode: *mut nvmlDeviceAddressingMode_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlDeviceGetRepairStatus: Result< + unsafe extern "C" fn( + device: nvmlDevice_t, + repairStatus: *mut nvmlRepairStatus_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlDeviceGetUnrepairableMemoryFlag_v1: Result< + unsafe extern "C" fn( + device: nvmlDevice_t, + unrepairableMemoryStatus: *mut nvmlUnrepairableMemoryStatus_v1_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, pub nvmlDeviceGetTopologyCommonAncestor: Result< unsafe extern "C" fn( device1: nvmlDevice_t, @@ -2953,6 +3588,20 @@ pub struct NvmlLib { unsafe extern "C" fn(device: nvmlDevice_t, power: *mut raw::c_uint) -> nvmlReturn_t, ::libloading::Error, >, + pub nvmlDeviceGetPowerMizerMode_v1: Result< + unsafe extern "C" fn( + device: nvmlDevice_t, + powerMizerMode: *mut nvmlDevicePowerMizerModes_v1_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlDeviceSetPowerMizerMode_v1: Result< + unsafe extern "C" fn( + device: nvmlDevice_t, + powerMizerMode: *mut nvmlDevicePowerMizerModes_v1_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, pub nvmlDeviceGetTotalEnergyConsumption: Result< unsafe extern "C" fn(device: nvmlDevice_t, energy: *mut raw::c_ulonglong) -> nvmlReturn_t, ::libloading::Error, @@ -3359,6 +4008,13 @@ pub struct NvmlLib { ) -> nvmlReturn_t, ::libloading::Error, >, + pub nvmlDeviceSetPowerManagementLimit_v2: Result< + unsafe extern "C" fn( + device: nvmlDevice_t, + powerValue: *mut nvmlPowerValue_v2_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, pub nvmlDeviceGetAccountingMode: Result< unsafe extern "C" fn(device: nvmlDevice_t, mode: *mut nvmlEnableState_t) -> nvmlReturn_t, ::libloading::Error, @@ -3460,6 +4116,24 @@ pub struct NvmlLib { ) -> nvmlReturn_t, ::libloading::Error, >, + pub nvmlDeviceGetPdi: Result< + unsafe extern "C" fn(device: nvmlDevice_t, pdi: *mut nvmlPdi_t) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlDeviceSetHostname_v1: Result< + unsafe extern "C" fn( + device: nvmlDevice_t, + hostname: *mut nvmlHostname_v1_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlDeviceGetHostname_v1: Result< + unsafe extern "C" fn( + device: nvmlDevice_t, + hostname: *mut nvmlHostname_v1_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, pub nvmlUnitSetLedState: Result< unsafe extern "C" fn(unit: nvmlUnit_t, color: nvmlLedColor_t) -> nvmlReturn_t, ::libloading::Error, @@ -3591,13 +4265,6 @@ pub struct NvmlLib { >, pub nvmlDeviceClearAccountingPids: Result nvmlReturn_t, ::libloading::Error>, - pub nvmlDeviceSetPowerManagementLimit_v2: Result< - unsafe extern "C" fn( - device: nvmlDevice_t, - powerValue: *mut nvmlPowerValue_v2_t, - ) -> nvmlReturn_t, - ::libloading::Error, - >, pub nvmlDeviceGetNvLinkState: Result< unsafe extern "C" fn( device: nvmlDevice_t, @@ -3734,6 +4401,10 @@ pub struct NvmlLib { ) -> nvmlReturn_t, ::libloading::Error, >, + pub nvmlDeviceGetNvLinkInfo: Result< + unsafe extern "C" fn(device: nvmlDevice_t, info: *mut nvmlNvLinkInfo_t) -> nvmlReturn_t, + ::libloading::Error, + >, pub nvmlEventSetCreate: Result nvmlReturn_t, ::libloading::Error>, pub nvmlDeviceRegisterEvents: Result< @@ -3761,6 +4432,22 @@ pub struct NvmlLib { >, pub nvmlEventSetFree: Result nvmlReturn_t, ::libloading::Error>, + pub nvmlSystemEventSetCreate: Result< + unsafe extern "C" fn(request: *mut nvmlSystemEventSetCreateRequest_t) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlSystemEventSetFree: Result< + unsafe extern "C" fn(request: *mut nvmlSystemEventSetFreeRequest_t) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlSystemRegisterEvents: Result< + unsafe extern "C" fn(request: *mut nvmlSystemRegisterEventRequest_t) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlSystemEventSetWait: Result< + unsafe extern "C" fn(request: *mut nvmlSystemEventSetWaitRequest_t) -> nvmlReturn_t, + ::libloading::Error, + >, pub nvmlDeviceModifyDrainState: Result< unsafe extern "C" fn( pciInfo: *mut nvmlPciInfo_t, @@ -3890,6 +4577,8 @@ pub struct NvmlLib { ) -> nvmlReturn_t, ::libloading::Error, >, + pub nvmlDeviceVgpuForceGspUnload: + Result nvmlReturn_t, ::libloading::Error>, pub nvmlDeviceGetGridLicensableFeatures_v4: Result< unsafe extern "C" fn( device: nvmlDevice_t, @@ -4164,6 +4853,108 @@ pub struct NvmlLib { ) -> nvmlReturn_t, ::libloading::Error, >, + pub nvmlGpuInstanceGetCreatableVgpus: Result< + unsafe extern "C" fn( + gpuInstance: nvmlGpuInstance_t, + pVgpus: *mut nvmlVgpuTypeIdInfo_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlVgpuTypeGetMaxInstancesPerGpuInstance: Result< + unsafe extern "C" fn(pMaxInstance: *mut nvmlVgpuTypeMaxInstance_t) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlGpuInstanceGetActiveVgpus: Result< + unsafe extern "C" fn( + gpuInstance: nvmlGpuInstance_t, + pVgpuInstanceInfo: *mut nvmlActiveVgpuInstanceInfo_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlGpuInstanceSetVgpuSchedulerState: Result< + unsafe extern "C" fn( + gpuInstance: nvmlGpuInstance_t, + pScheduler: *mut nvmlVgpuSchedulerState_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlGpuInstanceGetVgpuSchedulerState: Result< + unsafe extern "C" fn( + gpuInstance: nvmlGpuInstance_t, + pSchedulerStateInfo: *mut nvmlVgpuSchedulerStateInfo_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlGpuInstanceGetVgpuSchedulerLog: Result< + unsafe extern "C" fn( + gpuInstance: nvmlGpuInstance_t, + pSchedulerLogInfo: *mut nvmlVgpuSchedulerLogInfo_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlGpuInstanceGetVgpuTypeCreatablePlacements: Result< + unsafe extern "C" fn( + gpuInstance: nvmlGpuInstance_t, + pCreatablePlacementInfo: *mut nvmlVgpuCreatablePlacementInfo_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlGpuInstanceGetVgpuHeterogeneousMode: Result< + unsafe extern "C" fn( + gpuInstance: nvmlGpuInstance_t, + pHeterogeneousMode: *mut nvmlVgpuHeterogeneousMode_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlGpuInstanceSetVgpuHeterogeneousMode: Result< + unsafe extern "C" fn( + gpuInstance: nvmlGpuInstance_t, + pHeterogeneousMode: *const nvmlVgpuHeterogeneousMode_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlDeviceGetVgpuSchedulerState_v2: Result< + unsafe extern "C" fn( + device: nvmlDevice_t, + pSchedulerStateInfo: *mut nvmlVgpuSchedulerStateInfo_v2_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlGpuInstanceGetVgpuSchedulerState_v2: Result< + unsafe extern "C" fn( + gpuInstance: nvmlGpuInstance_t, + pSchedulerStateInfo: *mut nvmlVgpuSchedulerStateInfo_v2_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlDeviceGetVgpuSchedulerLog_v2: Result< + unsafe extern "C" fn( + device: nvmlDevice_t, + pSchedulerLogInfo: *mut nvmlVgpuSchedulerLogInfo_v2_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlGpuInstanceGetVgpuSchedulerLog_v2: Result< + unsafe extern "C" fn( + gpuInstance: nvmlGpuInstance_t, + pSchedulerLogInfo: *mut nvmlVgpuSchedulerLogInfo_v2_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlDeviceSetVgpuSchedulerState_v2: Result< + unsafe extern "C" fn( + device: nvmlDevice_t, + pSchedulerState: *mut nvmlVgpuSchedulerState_v2_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlGpuInstanceSetVgpuSchedulerState_v2: Result< + unsafe extern "C" fn( + gpuInstance: nvmlGpuInstance_t, + pSchedulerState: *mut nvmlVgpuSchedulerState_v2_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, pub nvmlVgpuInstanceGetMetadata: Result< unsafe extern "C" fn( vgpuInstance: nvmlVgpuInstance_t, @@ -4210,17 +5001,17 @@ pub struct NvmlLib { ) -> nvmlReturn_t, ::libloading::Error, >, - pub nvmlDeviceGetVgpuSchedulerCapabilities: Result< + pub nvmlDeviceSetVgpuSchedulerState: Result< unsafe extern "C" fn( device: nvmlDevice_t, - pCapabilities: *mut nvmlVgpuSchedulerCapabilities_t, + pSchedulerState: *mut nvmlVgpuSchedulerSetState_t, ) -> nvmlReturn_t, ::libloading::Error, >, - pub nvmlDeviceSetVgpuSchedulerState: Result< + pub nvmlDeviceGetVgpuSchedulerCapabilities: Result< unsafe extern "C" fn( device: nvmlDevice_t, - pSchedulerState: *mut nvmlVgpuSchedulerSetState_t, + pCapabilities: *mut nvmlVgpuSchedulerCapabilities_t, ) -> nvmlReturn_t, ::libloading::Error, >, @@ -4313,6 +5104,17 @@ pub struct NvmlLib { ) -> nvmlReturn_t, ::libloading::Error, >, + pub nvmlDeviceReadWritePRM_v1: Result< + unsafe extern "C" fn(device: nvmlDevice_t, buffer: *mut nvmlPRMTLV_v1_t) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlDeviceReadPRMCounters_v1: Result< + unsafe extern "C" fn( + device: nvmlDevice_t, + counterList: *mut nvmlPRMCounterList_v1_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, pub nvmlDeviceSetMigMode: Result< unsafe extern "C" fn( device: nvmlDevice_t, @@ -4345,6 +5147,14 @@ pub struct NvmlLib { ) -> nvmlReturn_t, ::libloading::Error, >, + pub nvmlDeviceGetGpuInstanceProfileInfoByIdV: Result< + unsafe extern "C" fn( + device: nvmlDevice_t, + profileId: raw::c_uint, + info: *mut nvmlGpuInstanceProfileInfo_v2_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, pub nvmlDeviceGetGpuInstancePossiblePlacements_v2: Result< unsafe extern "C" fn( device: nvmlDevice_t, @@ -4589,6 +5399,13 @@ pub struct NvmlLib { ) -> nvmlReturn_t, ::libloading::Error, >, + pub nvmlDeviceWorkloadPowerProfileUpdateProfiles_v1: Result< + unsafe extern "C" fn( + device: nvmlDevice_t, + updateProfiles: *mut nvmlWorkloadPowerProfileUpdateProfiles_v1_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, pub nvmlDevicePowerSmoothingActivatePresetProfile: Result< unsafe extern "C" fn( device: nvmlDevice_t, @@ -4610,6 +5427,20 @@ pub struct NvmlLib { ) -> nvmlReturn_t, ::libloading::Error, >, + pub nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts: Result< + unsafe extern "C" fn( + device: nvmlDevice_t, + errorCounts: *mut nvmlEccSramUniqueUncorrectedErrorCounts_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, + pub nvmlDeviceSetRusdSettings_v1: Result< + unsafe extern "C" fn( + device: nvmlDevice_t, + settings: *mut nvmlRusdSettings_v1_t, + ) -> nvmlReturn_t, + ::libloading::Error, + >, #[cfg(feature = "legacy-functions")] pub nvmlInit: Result nvmlReturn_t, ::libloading::Error>, #[cfg(feature = "legacy-functions")] @@ -4787,7 +5618,7 @@ pub struct NvmlLib { impl NvmlLib { pub unsafe fn new

(path: P) -> Result where - P: AsRef<::std::ffi::OsStr>, + P: libloading::AsFilename, { let library = ::libloading::Library::new(path)?; Self::from_library(library) @@ -4840,6 +5671,9 @@ impl NvmlLib { let nvmlDeviceGetHandleByUUID = __library .get(b"nvmlDeviceGetHandleByUUID\0") .map(|sym| *sym); + let nvmlDeviceGetHandleByUUIDV = __library + .get(b"nvmlDeviceGetHandleByUUIDV\0") + .map(|sym| *sym); let nvmlDeviceGetHandleByPciBusId_v2 = __library .get(b"nvmlDeviceGetHandleByPciBusId_v2\0") .map(|sym| *sym); @@ -4863,6 +5697,15 @@ impl NvmlLib { .get(b"nvmlDeviceClearCpuAffinity\0") .map(|sym| *sym); let nvmlDeviceGetNumaNodeId = __library.get(b"nvmlDeviceGetNumaNodeId\0").map(|sym| *sym); + let nvmlDeviceGetAddressingMode = __library + .get(b"nvmlDeviceGetAddressingMode\0") + .map(|sym| *sym); + let nvmlDeviceGetRepairStatus = __library + .get(b"nvmlDeviceGetRepairStatus\0") + .map(|sym| *sym); + let nvmlDeviceGetUnrepairableMemoryFlag_v1 = __library + .get(b"nvmlDeviceGetUnrepairableMemoryFlag_v1\0") + .map(|sym| *sym); let nvmlDeviceGetTopologyCommonAncestor = __library .get(b"nvmlDeviceGetTopologyCommonAncestor\0") .map(|sym| *sym); @@ -5032,6 +5875,12 @@ impl NvmlLib { .get(b"nvmlDeviceGetPowerManagementDefaultLimit\0") .map(|sym| *sym); let nvmlDeviceGetPowerUsage = __library.get(b"nvmlDeviceGetPowerUsage\0").map(|sym| *sym); + let nvmlDeviceGetPowerMizerMode_v1 = __library + .get(b"nvmlDeviceGetPowerMizerMode_v1\0") + .map(|sym| *sym); + let nvmlDeviceSetPowerMizerMode_v1 = __library + .get(b"nvmlDeviceSetPowerMizerMode_v1\0") + .map(|sym| *sym); let nvmlDeviceGetTotalEnergyConsumption = __library .get(b"nvmlDeviceGetTotalEnergyConsumption\0") .map(|sym| *sym); @@ -5195,6 +6044,9 @@ impl NvmlLib { let nvmlDeviceGetSramEccErrorStatus = __library .get(b"nvmlDeviceGetSramEccErrorStatus\0") .map(|sym| *sym); + let nvmlDeviceSetPowerManagementLimit_v2 = __library + .get(b"nvmlDeviceSetPowerManagementLimit_v2\0") + .map(|sym| *sym); let nvmlDeviceGetAccountingMode = __library .get(b"nvmlDeviceGetAccountingMode\0") .map(|sym| *sym); @@ -5237,6 +6089,9 @@ impl NvmlLib { let nvmlDeviceGetPlatformInfo = __library .get(b"nvmlDeviceGetPlatformInfo\0") .map(|sym| *sym); + let nvmlDeviceGetPdi = __library.get(b"nvmlDeviceGetPdi\0").map(|sym| *sym); + let nvmlDeviceSetHostname_v1 = __library.get(b"nvmlDeviceSetHostname_v1\0").map(|sym| *sym); + let nvmlDeviceGetHostname_v1 = __library.get(b"nvmlDeviceGetHostname_v1\0").map(|sym| *sym); let nvmlUnitSetLedState = __library.get(b"nvmlUnitSetLedState\0").map(|sym| *sym); let nvmlDeviceSetPersistenceMode = __library .get(b"nvmlDeviceSetPersistenceMode\0") @@ -5302,9 +6157,6 @@ impl NvmlLib { let nvmlDeviceClearAccountingPids = __library .get(b"nvmlDeviceClearAccountingPids\0") .map(|sym| *sym); - let nvmlDeviceSetPowerManagementLimit_v2 = __library - .get(b"nvmlDeviceSetPowerManagementLimit_v2\0") - .map(|sym| *sym); let nvmlDeviceGetNvLinkState = __library.get(b"nvmlDeviceGetNvLinkState\0").map(|sym| *sym); let nvmlDeviceGetNvLinkVersion = __library .get(b"nvmlDeviceGetNvLinkVersion\0") @@ -5357,6 +6209,7 @@ impl NvmlLib { let nvmlDeviceSetNvlinkBwMode = __library .get(b"nvmlDeviceSetNvlinkBwMode\0") .map(|sym| *sym); + let nvmlDeviceGetNvLinkInfo = __library.get(b"nvmlDeviceGetNvLinkInfo\0").map(|sym| *sym); let nvmlEventSetCreate = __library.get(b"nvmlEventSetCreate\0").map(|sym| *sym); let nvmlDeviceRegisterEvents = __library.get(b"nvmlDeviceRegisterEvents\0").map(|sym| *sym); let nvmlDeviceGetSupportedEventTypes = __library @@ -5364,6 +6217,10 @@ impl NvmlLib { .map(|sym| *sym); let nvmlEventSetWait_v2 = __library.get(b"nvmlEventSetWait_v2\0").map(|sym| *sym); let nvmlEventSetFree = __library.get(b"nvmlEventSetFree\0").map(|sym| *sym); + let nvmlSystemEventSetCreate = __library.get(b"nvmlSystemEventSetCreate\0").map(|sym| *sym); + let nvmlSystemEventSetFree = __library.get(b"nvmlSystemEventSetFree\0").map(|sym| *sym); + let nvmlSystemRegisterEvents = __library.get(b"nvmlSystemRegisterEvents\0").map(|sym| *sym); + let nvmlSystemEventSetWait = __library.get(b"nvmlSystemEventSetWait\0").map(|sym| *sym); let nvmlDeviceModifyDrainState = __library .get(b"nvmlDeviceModifyDrainState\0") .map(|sym| *sym); @@ -5412,6 +6269,9 @@ impl NvmlLib { let nvmlDeviceSetVgpuCapabilities = __library .get(b"nvmlDeviceSetVgpuCapabilities\0") .map(|sym| *sym); + let nvmlDeviceVgpuForceGspUnload = __library + .get(b"nvmlDeviceVgpuForceGspUnload\0") + .map(|sym| *sym); let nvmlDeviceGetGridLicensableFeatures_v4 = __library .get(b"nvmlDeviceGetGridLicensableFeatures_v4\0") .map(|sym| *sym); @@ -5502,6 +6362,51 @@ impl NvmlLib { let nvmlVgpuInstanceGetMdevUUID = __library .get(b"nvmlVgpuInstanceGetMdevUUID\0") .map(|sym| *sym); + let nvmlGpuInstanceGetCreatableVgpus = __library + .get(b"nvmlGpuInstanceGetCreatableVgpus\0") + .map(|sym| *sym); + let nvmlVgpuTypeGetMaxInstancesPerGpuInstance = __library + .get(b"nvmlVgpuTypeGetMaxInstancesPerGpuInstance\0") + .map(|sym| *sym); + let nvmlGpuInstanceGetActiveVgpus = __library + .get(b"nvmlGpuInstanceGetActiveVgpus\0") + .map(|sym| *sym); + let nvmlGpuInstanceSetVgpuSchedulerState = __library + .get(b"nvmlGpuInstanceSetVgpuSchedulerState\0") + .map(|sym| *sym); + let nvmlGpuInstanceGetVgpuSchedulerState = __library + .get(b"nvmlGpuInstanceGetVgpuSchedulerState\0") + .map(|sym| *sym); + let nvmlGpuInstanceGetVgpuSchedulerLog = __library + .get(b"nvmlGpuInstanceGetVgpuSchedulerLog\0") + .map(|sym| *sym); + let nvmlGpuInstanceGetVgpuTypeCreatablePlacements = __library + .get(b"nvmlGpuInstanceGetVgpuTypeCreatablePlacements\0") + .map(|sym| *sym); + let nvmlGpuInstanceGetVgpuHeterogeneousMode = __library + .get(b"nvmlGpuInstanceGetVgpuHeterogeneousMode\0") + .map(|sym| *sym); + let nvmlGpuInstanceSetVgpuHeterogeneousMode = __library + .get(b"nvmlGpuInstanceSetVgpuHeterogeneousMode\0") + .map(|sym| *sym); + let nvmlDeviceGetVgpuSchedulerState_v2 = __library + .get(b"nvmlDeviceGetVgpuSchedulerState_v2\0") + .map(|sym| *sym); + let nvmlGpuInstanceGetVgpuSchedulerState_v2 = __library + .get(b"nvmlGpuInstanceGetVgpuSchedulerState_v2\0") + .map(|sym| *sym); + let nvmlDeviceGetVgpuSchedulerLog_v2 = __library + .get(b"nvmlDeviceGetVgpuSchedulerLog_v2\0") + .map(|sym| *sym); + let nvmlGpuInstanceGetVgpuSchedulerLog_v2 = __library + .get(b"nvmlGpuInstanceGetVgpuSchedulerLog_v2\0") + .map(|sym| *sym); + let nvmlDeviceSetVgpuSchedulerState_v2 = __library + .get(b"nvmlDeviceSetVgpuSchedulerState_v2\0") + .map(|sym| *sym); + let nvmlGpuInstanceSetVgpuSchedulerState_v2 = __library + .get(b"nvmlGpuInstanceSetVgpuSchedulerState_v2\0") + .map(|sym| *sym); let nvmlVgpuInstanceGetMetadata = __library .get(b"nvmlVgpuInstanceGetMetadata\0") .map(|sym| *sym); @@ -5518,12 +6423,12 @@ impl NvmlLib { let nvmlDeviceGetVgpuSchedulerState = __library .get(b"nvmlDeviceGetVgpuSchedulerState\0") .map(|sym| *sym); - let nvmlDeviceGetVgpuSchedulerCapabilities = __library - .get(b"nvmlDeviceGetVgpuSchedulerCapabilities\0") - .map(|sym| *sym); let nvmlDeviceSetVgpuSchedulerState = __library .get(b"nvmlDeviceSetVgpuSchedulerState\0") .map(|sym| *sym); + let nvmlDeviceGetVgpuSchedulerCapabilities = __library + .get(b"nvmlDeviceGetVgpuSchedulerCapabilities\0") + .map(|sym| *sym); let nvmlGetVgpuVersion = __library.get(b"nvmlGetVgpuVersion\0").map(|sym| *sym); let nvmlSetVgpuVersion = __library.get(b"nvmlSetVgpuVersion\0").map(|sym| *sym); let nvmlDeviceGetVgpuUtilization = __library @@ -5559,6 +6464,12 @@ impl NvmlLib { let nvmlGetExcludedDeviceInfoByIndex = __library .get(b"nvmlGetExcludedDeviceInfoByIndex\0") .map(|sym| *sym); + let nvmlDeviceReadWritePRM_v1 = __library + .get(b"nvmlDeviceReadWritePRM_v1\0") + .map(|sym| *sym); + let nvmlDeviceReadPRMCounters_v1 = __library + .get(b"nvmlDeviceReadPRMCounters_v1\0") + .map(|sym| *sym); let nvmlDeviceSetMigMode = __library.get(b"nvmlDeviceSetMigMode\0").map(|sym| *sym); let nvmlDeviceGetMigMode = __library.get(b"nvmlDeviceGetMigMode\0").map(|sym| *sym); let nvmlDeviceGetGpuInstanceProfileInfo = __library @@ -5567,6 +6478,9 @@ impl NvmlLib { let nvmlDeviceGetGpuInstanceProfileInfoV = __library .get(b"nvmlDeviceGetGpuInstanceProfileInfoV\0") .map(|sym| *sym); + let nvmlDeviceGetGpuInstanceProfileInfoByIdV = __library + .get(b"nvmlDeviceGetGpuInstanceProfileInfoByIdV\0") + .map(|sym| *sym); let nvmlDeviceGetGpuInstancePossiblePlacements_v2 = __library .get(b"nvmlDeviceGetGpuInstancePossiblePlacements_v2\0") .map(|sym| *sym); @@ -5664,6 +6578,9 @@ impl NvmlLib { let nvmlDeviceWorkloadPowerProfileClearRequestedProfiles = __library .get(b"nvmlDeviceWorkloadPowerProfileClearRequestedProfiles\0") .map(|sym| *sym); + let nvmlDeviceWorkloadPowerProfileUpdateProfiles_v1 = __library + .get(b"nvmlDeviceWorkloadPowerProfileUpdateProfiles_v1\0") + .map(|sym| *sym); let nvmlDevicePowerSmoothingActivatePresetProfile = __library .get(b"nvmlDevicePowerSmoothingActivatePresetProfile\0") .map(|sym| *sym); @@ -5673,6 +6590,12 @@ impl NvmlLib { let nvmlDevicePowerSmoothingSetState = __library .get(b"nvmlDevicePowerSmoothingSetState\0") .map(|sym| *sym); + let nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts = __library + .get(b"nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts\0") + .map(|sym| *sym); + let nvmlDeviceSetRusdSettings_v1 = __library + .get(b"nvmlDeviceSetRusdSettings_v1\0") + .map(|sym| *sym); #[cfg(feature = "legacy-functions")] let nvmlInit = __library.get(b"nvmlInit\0").map(|sym| *sym); #[cfg(feature = "legacy-functions")] @@ -5775,6 +6698,7 @@ impl NvmlLib { nvmlDeviceGetHandleByIndex_v2, nvmlDeviceGetHandleBySerial, nvmlDeviceGetHandleByUUID, + nvmlDeviceGetHandleByUUIDV, nvmlDeviceGetHandleByPciBusId_v2, nvmlDeviceGetName, nvmlDeviceGetBrand, @@ -5788,6 +6712,9 @@ impl NvmlLib { nvmlDeviceSetCpuAffinity, nvmlDeviceClearCpuAffinity, nvmlDeviceGetNumaNodeId, + nvmlDeviceGetAddressingMode, + nvmlDeviceGetRepairStatus, + nvmlDeviceGetUnrepairableMemoryFlag_v1, nvmlDeviceGetTopologyCommonAncestor, nvmlDeviceGetTopologyNearestGpus, nvmlDeviceGetP2PStatus, @@ -5855,6 +6782,8 @@ impl NvmlLib { nvmlDeviceGetPowerManagementLimitConstraints, nvmlDeviceGetPowerManagementDefaultLimit, nvmlDeviceGetPowerUsage, + nvmlDeviceGetPowerMizerMode_v1, + nvmlDeviceSetPowerMizerMode_v1, nvmlDeviceGetTotalEnergyConsumption, nvmlDeviceGetEnforcedPowerLimit, nvmlDeviceGetGpuOperationMode, @@ -5918,6 +6847,7 @@ impl NvmlLib { nvmlDeviceGetGspFirmwareVersion, nvmlDeviceGetGspFirmwareMode, nvmlDeviceGetSramEccErrorStatus, + nvmlDeviceSetPowerManagementLimit_v2, nvmlDeviceGetAccountingMode, nvmlDeviceGetAccountingStats, nvmlDeviceGetAccountingPids, @@ -5932,6 +6862,9 @@ impl NvmlLib { nvmlDeviceGetProcessUtilization, nvmlDeviceGetProcessesUtilizationInfo, nvmlDeviceGetPlatformInfo, + nvmlDeviceGetPdi, + nvmlDeviceSetHostname_v1, + nvmlDeviceGetHostname_v1, nvmlUnitSetLedState, nvmlDeviceSetPersistenceMode, nvmlDeviceSetComputeMode, @@ -5957,7 +6890,6 @@ impl NvmlLib { nvmlDeviceSetMemClkVfOffset, nvmlDeviceSetAccountingMode, nvmlDeviceClearAccountingPids, - nvmlDeviceSetPowerManagementLimit_v2, nvmlDeviceGetNvLinkState, nvmlDeviceGetNvLinkVersion, nvmlDeviceGetNvLinkCapability, @@ -5976,11 +6908,16 @@ impl NvmlLib { nvmlDeviceGetNvlinkSupportedBwModes, nvmlDeviceGetNvlinkBwMode, nvmlDeviceSetNvlinkBwMode, + nvmlDeviceGetNvLinkInfo, nvmlEventSetCreate, nvmlDeviceRegisterEvents, nvmlDeviceGetSupportedEventTypes, nvmlEventSetWait_v2, nvmlEventSetFree, + nvmlSystemEventSetCreate, + nvmlSystemEventSetFree, + nvmlSystemRegisterEvents, + nvmlSystemEventSetWait, nvmlDeviceModifyDrainState, nvmlDeviceQueryDrainState, nvmlDeviceRemoveGpu_v2, @@ -5999,6 +6936,7 @@ impl NvmlLib { nvmlVgpuTypeGetFbReservation, nvmlVgpuInstanceGetRuntimeStateSize, nvmlDeviceSetVgpuCapabilities, + nvmlDeviceVgpuForceGspUnload, nvmlDeviceGetGridLicensableFeatures_v4, nvmlGetVgpuDriverCapabilities, nvmlDeviceGetVgpuCapabilities, @@ -6035,14 +6973,29 @@ impl NvmlLib { nvmlVgpuInstanceGetGpuPciId, nvmlVgpuTypeGetCapabilities, nvmlVgpuInstanceGetMdevUUID, + nvmlGpuInstanceGetCreatableVgpus, + nvmlVgpuTypeGetMaxInstancesPerGpuInstance, + nvmlGpuInstanceGetActiveVgpus, + nvmlGpuInstanceSetVgpuSchedulerState, + nvmlGpuInstanceGetVgpuSchedulerState, + nvmlGpuInstanceGetVgpuSchedulerLog, + nvmlGpuInstanceGetVgpuTypeCreatablePlacements, + nvmlGpuInstanceGetVgpuHeterogeneousMode, + nvmlGpuInstanceSetVgpuHeterogeneousMode, + nvmlDeviceGetVgpuSchedulerState_v2, + nvmlGpuInstanceGetVgpuSchedulerState_v2, + nvmlDeviceGetVgpuSchedulerLog_v2, + nvmlGpuInstanceGetVgpuSchedulerLog_v2, + nvmlDeviceSetVgpuSchedulerState_v2, + nvmlGpuInstanceSetVgpuSchedulerState_v2, nvmlVgpuInstanceGetMetadata, nvmlDeviceGetVgpuMetadata, nvmlGetVgpuCompatibility, nvmlDeviceGetPgpuMetadataString, nvmlDeviceGetVgpuSchedulerLog, nvmlDeviceGetVgpuSchedulerState, - nvmlDeviceGetVgpuSchedulerCapabilities, nvmlDeviceSetVgpuSchedulerState, + nvmlDeviceGetVgpuSchedulerCapabilities, nvmlGetVgpuVersion, nvmlSetVgpuVersion, nvmlDeviceGetVgpuUtilization, @@ -6056,10 +7009,13 @@ impl NvmlLib { nvmlVgpuInstanceGetLicenseInfo_v2, nvmlGetExcludedDeviceCount, nvmlGetExcludedDeviceInfoByIndex, + nvmlDeviceReadWritePRM_v1, + nvmlDeviceReadPRMCounters_v1, nvmlDeviceSetMigMode, nvmlDeviceGetMigMode, nvmlDeviceGetGpuInstanceProfileInfo, nvmlDeviceGetGpuInstanceProfileInfoV, + nvmlDeviceGetGpuInstanceProfileInfoByIdV, nvmlDeviceGetGpuInstancePossiblePlacements_v2, nvmlDeviceGetGpuInstanceRemainingCapacity, nvmlDeviceCreateGpuInstance, @@ -6097,9 +7053,12 @@ impl NvmlLib { nvmlDeviceWorkloadPowerProfileGetCurrentProfiles, nvmlDeviceWorkloadPowerProfileSetRequestedProfiles, nvmlDeviceWorkloadPowerProfileClearRequestedProfiles, + nvmlDeviceWorkloadPowerProfileUpdateProfiles_v1, nvmlDevicePowerSmoothingActivatePresetProfile, nvmlDevicePowerSmoothingUpdatePresetProfileParam, nvmlDevicePowerSmoothingSetState, + nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts, + nvmlDeviceSetRusdSettings_v1, #[cfg(feature = "legacy-functions")] nvmlInit, #[cfg(feature = "legacy-functions")] @@ -6375,6 +7334,16 @@ impl NvmlLib { .as_ref() .expect("Expected function, got error."))(uuid, device) } + pub unsafe fn nvmlDeviceGetHandleByUUIDV( + &self, + uuid: *const nvmlUUID_t, + device: *mut nvmlDevice_t, + ) -> nvmlReturn_t { + (self + .nvmlDeviceGetHandleByUUIDV + .as_ref() + .expect("Expected function, got error."))(uuid, device) + } pub unsafe fn nvmlDeviceGetHandleByPciBusId_v2( &self, pciBusId: *const raw::c_char, @@ -6504,6 +7473,36 @@ impl NvmlLib { .as_ref() .expect("Expected function, got error."))(device, node) } + pub unsafe fn nvmlDeviceGetAddressingMode( + &self, + device: nvmlDevice_t, + mode: *mut nvmlDeviceAddressingMode_t, + ) -> nvmlReturn_t { + (self + .nvmlDeviceGetAddressingMode + .as_ref() + .expect("Expected function, got error."))(device, mode) + } + pub unsafe fn nvmlDeviceGetRepairStatus( + &self, + device: nvmlDevice_t, + repairStatus: *mut nvmlRepairStatus_t, + ) -> nvmlReturn_t { + (self + .nvmlDeviceGetRepairStatus + .as_ref() + .expect("Expected function, got error."))(device, repairStatus) + } + pub unsafe fn nvmlDeviceGetUnrepairableMemoryFlag_v1( + &self, + device: nvmlDevice_t, + unrepairableMemoryStatus: *mut nvmlUnrepairableMemoryStatus_v1_t, + ) -> nvmlReturn_t { + (self + .nvmlDeviceGetUnrepairableMemoryFlag_v1 + .as_ref() + .expect("Expected function, got error."))(device, unrepairableMemoryStatus) + } pub unsafe fn nvmlDeviceGetTopologyCommonAncestor( &self, device1: nvmlDevice_t, @@ -7215,6 +8214,26 @@ impl NvmlLib { .as_ref() .expect("Expected function, got error."))(device, power) } + pub unsafe fn nvmlDeviceGetPowerMizerMode_v1( + &self, + device: nvmlDevice_t, + powerMizerMode: *mut nvmlDevicePowerMizerModes_v1_t, + ) -> nvmlReturn_t { + (self + .nvmlDeviceGetPowerMizerMode_v1 + .as_ref() + .expect("Expected function, got error."))(device, powerMizerMode) + } + pub unsafe fn nvmlDeviceSetPowerMizerMode_v1( + &self, + device: nvmlDevice_t, + powerMizerMode: *mut nvmlDevicePowerMizerModes_v1_t, + ) -> nvmlReturn_t { + (self + .nvmlDeviceSetPowerMizerMode_v1 + .as_ref() + .expect("Expected function, got error."))(device, powerMizerMode) + } pub unsafe fn nvmlDeviceGetTotalEnergyConsumption( &self, device: nvmlDevice_t, @@ -7895,6 +8914,16 @@ impl NvmlLib { .as_ref() .expect("Expected function, got error."))(device, status) } + pub unsafe fn nvmlDeviceSetPowerManagementLimit_v2( + &self, + device: nvmlDevice_t, + powerValue: *mut nvmlPowerValue_v2_t, + ) -> nvmlReturn_t { + (self + .nvmlDeviceSetPowerManagementLimit_v2 + .as_ref() + .expect("Expected function, got error."))(device, powerValue) + } pub unsafe fn nvmlDeviceGetAccountingMode( &self, device: nvmlDevice_t, @@ -8060,6 +9089,36 @@ impl NvmlLib { .as_ref() .expect("Expected function, got error."))(device, platformInfo) } + pub unsafe fn nvmlDeviceGetPdi( + &self, + device: nvmlDevice_t, + pdi: *mut nvmlPdi_t, + ) -> nvmlReturn_t { + (self + .nvmlDeviceGetPdi + .as_ref() + .expect("Expected function, got error."))(device, pdi) + } + pub unsafe fn nvmlDeviceSetHostname_v1( + &self, + device: nvmlDevice_t, + hostname: *mut nvmlHostname_v1_t, + ) -> nvmlReturn_t { + (self + .nvmlDeviceSetHostname_v1 + .as_ref() + .expect("Expected function, got error."))(device, hostname) + } + pub unsafe fn nvmlDeviceGetHostname_v1( + &self, + device: nvmlDevice_t, + hostname: *mut nvmlHostname_v1_t, + ) -> nvmlReturn_t { + (self + .nvmlDeviceGetHostname_v1 + .as_ref() + .expect("Expected function, got error."))(device, hostname) + } pub unsafe fn nvmlUnitSetLedState( &self, unit: nvmlUnit_t, @@ -8303,16 +9362,6 @@ impl NvmlLib { .as_ref() .expect("Expected function, got error."))(device) } - pub unsafe fn nvmlDeviceSetPowerManagementLimit_v2( - &self, - device: nvmlDevice_t, - powerValue: *mut nvmlPowerValue_v2_t, - ) -> nvmlReturn_t { - (self - .nvmlDeviceSetPowerManagementLimit_v2 - .as_ref() - .expect("Expected function, got error."))(device, powerValue) - } pub unsafe fn nvmlDeviceGetNvLinkState( &self, device: nvmlDevice_t, @@ -8506,6 +9555,16 @@ impl NvmlLib { .as_ref() .expect("Expected function, got error."))(device, setBwMode) } + pub unsafe fn nvmlDeviceGetNvLinkInfo( + &self, + device: nvmlDevice_t, + info: *mut nvmlNvLinkInfo_t, + ) -> nvmlReturn_t { + (self + .nvmlDeviceGetNvLinkInfo + .as_ref() + .expect("Expected function, got error."))(device, info) + } pub unsafe fn nvmlEventSetCreate(&self, set: *mut nvmlEventSet_t) -> nvmlReturn_t { (self .nvmlEventSetCreate @@ -8550,6 +9609,42 @@ impl NvmlLib { .as_ref() .expect("Expected function, got error."))(set) } + pub unsafe fn nvmlSystemEventSetCreate( + &self, + request: *mut nvmlSystemEventSetCreateRequest_t, + ) -> nvmlReturn_t { + (self + .nvmlSystemEventSetCreate + .as_ref() + .expect("Expected function, got error."))(request) + } + pub unsafe fn nvmlSystemEventSetFree( + &self, + request: *mut nvmlSystemEventSetFreeRequest_t, + ) -> nvmlReturn_t { + (self + .nvmlSystemEventSetFree + .as_ref() + .expect("Expected function, got error."))(request) + } + pub unsafe fn nvmlSystemRegisterEvents( + &self, + request: *mut nvmlSystemRegisterEventRequest_t, + ) -> nvmlReturn_t { + (self + .nvmlSystemRegisterEvents + .as_ref() + .expect("Expected function, got error."))(request) + } + pub unsafe fn nvmlSystemEventSetWait( + &self, + request: *mut nvmlSystemEventSetWaitRequest_t, + ) -> nvmlReturn_t { + (self + .nvmlSystemEventSetWait + .as_ref() + .expect("Expected function, got error."))(request) + } pub unsafe fn nvmlDeviceModifyDrainState( &self, pciInfo: *mut nvmlPciInfo_t, @@ -8732,6 +9827,12 @@ impl NvmlLib { .as_ref() .expect("Expected function, got error."))(device, capability, state) } + pub unsafe fn nvmlDeviceVgpuForceGspUnload(&self, device: nvmlDevice_t) -> nvmlReturn_t { + (self + .nvmlDeviceVgpuForceGspUnload + .as_ref() + .expect("Expected function, got error."))(device) + } pub unsafe fn nvmlDeviceGetGridLicensableFeatures_v4( &self, device: nvmlDevice_t, @@ -9125,6 +10226,155 @@ impl NvmlLib { .as_ref() .expect("Expected function, got error."))(vgpuInstance, mdevUuid, size) } + pub unsafe fn nvmlGpuInstanceGetCreatableVgpus( + &self, + gpuInstance: nvmlGpuInstance_t, + pVgpus: *mut nvmlVgpuTypeIdInfo_t, + ) -> nvmlReturn_t { + (self + .nvmlGpuInstanceGetCreatableVgpus + .as_ref() + .expect("Expected function, got error."))(gpuInstance, pVgpus) + } + pub unsafe fn nvmlVgpuTypeGetMaxInstancesPerGpuInstance( + &self, + pMaxInstance: *mut nvmlVgpuTypeMaxInstance_t, + ) -> nvmlReturn_t { + (self + .nvmlVgpuTypeGetMaxInstancesPerGpuInstance + .as_ref() + .expect("Expected function, got error."))(pMaxInstance) + } + pub unsafe fn nvmlGpuInstanceGetActiveVgpus( + &self, + gpuInstance: nvmlGpuInstance_t, + pVgpuInstanceInfo: *mut nvmlActiveVgpuInstanceInfo_t, + ) -> nvmlReturn_t { + (self + .nvmlGpuInstanceGetActiveVgpus + .as_ref() + .expect("Expected function, got error."))(gpuInstance, pVgpuInstanceInfo) + } + pub unsafe fn nvmlGpuInstanceSetVgpuSchedulerState( + &self, + gpuInstance: nvmlGpuInstance_t, + pScheduler: *mut nvmlVgpuSchedulerState_t, + ) -> nvmlReturn_t { + (self + .nvmlGpuInstanceSetVgpuSchedulerState + .as_ref() + .expect("Expected function, got error."))(gpuInstance, pScheduler) + } + pub unsafe fn nvmlGpuInstanceGetVgpuSchedulerState( + &self, + gpuInstance: nvmlGpuInstance_t, + pSchedulerStateInfo: *mut nvmlVgpuSchedulerStateInfo_t, + ) -> nvmlReturn_t { + (self + .nvmlGpuInstanceGetVgpuSchedulerState + .as_ref() + .expect("Expected function, got error."))(gpuInstance, pSchedulerStateInfo) + } + pub unsafe fn nvmlGpuInstanceGetVgpuSchedulerLog( + &self, + gpuInstance: nvmlGpuInstance_t, + pSchedulerLogInfo: *mut nvmlVgpuSchedulerLogInfo_t, + ) -> nvmlReturn_t { + (self + .nvmlGpuInstanceGetVgpuSchedulerLog + .as_ref() + .expect("Expected function, got error."))(gpuInstance, pSchedulerLogInfo) + } + pub unsafe fn nvmlGpuInstanceGetVgpuTypeCreatablePlacements( + &self, + gpuInstance: nvmlGpuInstance_t, + pCreatablePlacementInfo: *mut nvmlVgpuCreatablePlacementInfo_t, + ) -> nvmlReturn_t { + (self + .nvmlGpuInstanceGetVgpuTypeCreatablePlacements + .as_ref() + .expect("Expected function, got error."))(gpuInstance, pCreatablePlacementInfo) + } + pub unsafe fn nvmlGpuInstanceGetVgpuHeterogeneousMode( + &self, + gpuInstance: nvmlGpuInstance_t, + pHeterogeneousMode: *mut nvmlVgpuHeterogeneousMode_t, + ) -> nvmlReturn_t { + (self + .nvmlGpuInstanceGetVgpuHeterogeneousMode + .as_ref() + .expect("Expected function, got error."))(gpuInstance, pHeterogeneousMode) + } + pub unsafe fn nvmlGpuInstanceSetVgpuHeterogeneousMode( + &self, + gpuInstance: nvmlGpuInstance_t, + pHeterogeneousMode: *const nvmlVgpuHeterogeneousMode_t, + ) -> nvmlReturn_t { + (self + .nvmlGpuInstanceSetVgpuHeterogeneousMode + .as_ref() + .expect("Expected function, got error."))(gpuInstance, pHeterogeneousMode) + } + pub unsafe fn nvmlDeviceGetVgpuSchedulerState_v2( + &self, + device: nvmlDevice_t, + pSchedulerStateInfo: *mut nvmlVgpuSchedulerStateInfo_v2_t, + ) -> nvmlReturn_t { + (self + .nvmlDeviceGetVgpuSchedulerState_v2 + .as_ref() + .expect("Expected function, got error."))(device, pSchedulerStateInfo) + } + pub unsafe fn nvmlGpuInstanceGetVgpuSchedulerState_v2( + &self, + gpuInstance: nvmlGpuInstance_t, + pSchedulerStateInfo: *mut nvmlVgpuSchedulerStateInfo_v2_t, + ) -> nvmlReturn_t { + (self + .nvmlGpuInstanceGetVgpuSchedulerState_v2 + .as_ref() + .expect("Expected function, got error."))(gpuInstance, pSchedulerStateInfo) + } + pub unsafe fn nvmlDeviceGetVgpuSchedulerLog_v2( + &self, + device: nvmlDevice_t, + pSchedulerLogInfo: *mut nvmlVgpuSchedulerLogInfo_v2_t, + ) -> nvmlReturn_t { + (self + .nvmlDeviceGetVgpuSchedulerLog_v2 + .as_ref() + .expect("Expected function, got error."))(device, pSchedulerLogInfo) + } + pub unsafe fn nvmlGpuInstanceGetVgpuSchedulerLog_v2( + &self, + gpuInstance: nvmlGpuInstance_t, + pSchedulerLogInfo: *mut nvmlVgpuSchedulerLogInfo_v2_t, + ) -> nvmlReturn_t { + (self + .nvmlGpuInstanceGetVgpuSchedulerLog_v2 + .as_ref() + .expect("Expected function, got error."))(gpuInstance, pSchedulerLogInfo) + } + pub unsafe fn nvmlDeviceSetVgpuSchedulerState_v2( + &self, + device: nvmlDevice_t, + pSchedulerState: *mut nvmlVgpuSchedulerState_v2_t, + ) -> nvmlReturn_t { + (self + .nvmlDeviceSetVgpuSchedulerState_v2 + .as_ref() + .expect("Expected function, got error."))(device, pSchedulerState) + } + pub unsafe fn nvmlGpuInstanceSetVgpuSchedulerState_v2( + &self, + gpuInstance: nvmlGpuInstance_t, + pSchedulerState: *mut nvmlVgpuSchedulerState_v2_t, + ) -> nvmlReturn_t { + (self + .nvmlGpuInstanceSetVgpuSchedulerState_v2 + .as_ref() + .expect("Expected function, got error."))(gpuInstance, pSchedulerState) + } pub unsafe fn nvmlVgpuInstanceGetMetadata( &self, vgpuInstance: nvmlVgpuInstance_t, @@ -9191,25 +10441,25 @@ impl NvmlLib { .as_ref() .expect("Expected function, got error."))(device, pSchedulerState) } - pub unsafe fn nvmlDeviceGetVgpuSchedulerCapabilities( + pub unsafe fn nvmlDeviceSetVgpuSchedulerState( &self, device: nvmlDevice_t, - pCapabilities: *mut nvmlVgpuSchedulerCapabilities_t, + pSchedulerState: *mut nvmlVgpuSchedulerSetState_t, ) -> nvmlReturn_t { (self - .nvmlDeviceGetVgpuSchedulerCapabilities + .nvmlDeviceSetVgpuSchedulerState .as_ref() - .expect("Expected function, got error."))(device, pCapabilities) + .expect("Expected function, got error."))(device, pSchedulerState) } - pub unsafe fn nvmlDeviceSetVgpuSchedulerState( + pub unsafe fn nvmlDeviceGetVgpuSchedulerCapabilities( &self, device: nvmlDevice_t, - pSchedulerState: *mut nvmlVgpuSchedulerSetState_t, + pCapabilities: *mut nvmlVgpuSchedulerCapabilities_t, ) -> nvmlReturn_t { (self - .nvmlDeviceSetVgpuSchedulerState + .nvmlDeviceGetVgpuSchedulerCapabilities .as_ref() - .expect("Expected function, got error."))(device, pSchedulerState) + .expect("Expected function, got error."))(device, pCapabilities) } pub unsafe fn nvmlGetVgpuVersion( &self, @@ -9350,6 +10600,26 @@ impl NvmlLib { .as_ref() .expect("Expected function, got error."))(index, info) } + pub unsafe fn nvmlDeviceReadWritePRM_v1( + &self, + device: nvmlDevice_t, + buffer: *mut nvmlPRMTLV_v1_t, + ) -> nvmlReturn_t { + (self + .nvmlDeviceReadWritePRM_v1 + .as_ref() + .expect("Expected function, got error."))(device, buffer) + } + pub unsafe fn nvmlDeviceReadPRMCounters_v1( + &self, + device: nvmlDevice_t, + counterList: *mut nvmlPRMCounterList_v1_t, + ) -> nvmlReturn_t { + (self + .nvmlDeviceReadPRMCounters_v1 + .as_ref() + .expect("Expected function, got error."))(device, counterList) + } pub unsafe fn nvmlDeviceSetMigMode( &self, device: nvmlDevice_t, @@ -9394,6 +10664,17 @@ impl NvmlLib { .as_ref() .expect("Expected function, got error."))(device, profile, info) } + pub unsafe fn nvmlDeviceGetGpuInstanceProfileInfoByIdV( + &self, + device: nvmlDevice_t, + profileId: raw::c_uint, + info: *mut nvmlGpuInstanceProfileInfo_v2_t, + ) -> nvmlReturn_t { + (self + .nvmlDeviceGetGpuInstanceProfileInfoByIdV + .as_ref() + .expect("Expected function, got error."))(device, profileId, info) + } pub unsafe fn nvmlDeviceGetGpuInstancePossiblePlacements_v2( &self, device: nvmlDevice_t, @@ -9787,6 +11068,16 @@ impl NvmlLib { .as_ref() .expect("Expected function, got error."))(device, requestedProfiles) } + pub unsafe fn nvmlDeviceWorkloadPowerProfileUpdateProfiles_v1( + &self, + device: nvmlDevice_t, + updateProfiles: *mut nvmlWorkloadPowerProfileUpdateProfiles_v1_t, + ) -> nvmlReturn_t { + (self + .nvmlDeviceWorkloadPowerProfileUpdateProfiles_v1 + .as_ref() + .expect("Expected function, got error."))(device, updateProfiles) + } pub unsafe fn nvmlDevicePowerSmoothingActivatePresetProfile( &self, device: nvmlDevice_t, @@ -9817,6 +11108,26 @@ impl NvmlLib { .as_ref() .expect("Expected function, got error."))(device, state) } + pub unsafe fn nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts( + &self, + device: nvmlDevice_t, + errorCounts: *mut nvmlEccSramUniqueUncorrectedErrorCounts_t, + ) -> nvmlReturn_t { + (self + .nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts + .as_ref() + .expect("Expected function, got error."))(device, errorCounts) + } + pub unsafe fn nvmlDeviceSetRusdSettings_v1( + &self, + device: nvmlDevice_t, + settings: *mut nvmlRusdSettings_v1_t, + ) -> nvmlReturn_t { + (self + .nvmlDeviceSetRusdSettings_v1 + .as_ref() + .expect("Expected function, got error."))(device, settings) + } #[cfg(feature = "legacy-functions")] pub unsafe fn nvmlInit(&self) -> nvmlReturn_t { (self diff --git a/nvml-wrapper-sys/src/lib.rs b/nvml-wrapper-sys/src/lib.rs index 2c0dc40..a182c5c 100644 --- a/nvml-wrapper-sys/src/lib.rs +++ b/nvml-wrapper-sys/src/lib.rs @@ -30,10 +30,15 @@ there's a convincing reason to do so; please file an issue. ## NVML Support -These bindings were generated for NVML version 11. Each new version of NVML is +These bindings were generated for NVML version 13. Each new version of NVML is guaranteed to be backwards-compatible according to NVIDIA, so these bindings should be useful regardless of NVML version bumps. +Note that NVML version 13.0 update 1 (and/or driver 580TRD2) [breaks backwards compatibility](https://docs.nvidia.com/deploy/nvml-api/known-issues.html#known-issues): + +> NVML Field Values from #251 - #273 (Power Smoothing, Clock Event Reason, and Sync Power Balancing related field values) have changed between 13.0 and 13.0U1/v580TRD2. +> Any application that is using these field IDs must be recompiled using the NVML header file from CUDA 13.0 Update 1 in order to continue working correctly with NVIDIA drivers v580 TRD2 and beyond. + ### Legacy Functions Sometimes there will be function-level API version bumps in new NVML releases. diff --git a/nvml-wrapper/Cargo.toml b/nvml-wrapper/Cargo.toml index 60a31d5..1362b7d 100644 --- a/nvml-wrapper/Cargo.toml +++ b/nvml-wrapper/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "nvml-wrapper" -version = "0.12.0" +version = "0.13.0-utilidata.0" authors = ["Cldfire"] description = "A safe and ergonomic Rust wrapper for the NVIDIA Management Library" readme = "../README.md" @@ -19,14 +19,14 @@ legacy-functions = ["nvml-wrapper-sys/legacy-functions"] serde = ["dep:serde", "dep:serde_derive", "bitflags/serde"] [dependencies] -thiserror = "1.0" -bitflags = "2.4.0" -serde = { version = "1.0", optional = true } -serde_derive = { version = "1.0", optional = true } -nvml-wrapper-sys = { version = "0.9.0", path = "../nvml-wrapper-sys" } -wrapcenum-derive = "0.4.1" -libloading = "0.8.1" -static_assertions = "1.1" +thiserror = "2" +bitflags = "2" +serde = { version = "1", optional = true } +serde_derive = { version = "1", optional = true } +nvml-wrapper-sys = { version = "0.10.0-utilidata.0", path = "../nvml-wrapper-sys" } +wrapcenum-derive = "0.4" +libloading = "0.9" +static_assertions = "1" [dev-dependencies] # Used in the `basic_usage` example diff --git a/nvml-wrapper/src/device.rs b/nvml-wrapper/src/device.rs index d7447c1..e4cf1b6 100644 --- a/nvml-wrapper/src/device.rs +++ b/nvml-wrapper/src/device.rs @@ -7395,7 +7395,6 @@ mod test { let nvml = nvml(); test_with_device(3, &nvml, |device| device.gsp_firmware_version()) } - #[test] fn field_values_for() { let nvml = nvml(); @@ -7429,6 +7428,9 @@ mod test { FieldId(NVML_FI_DEV_ECC_SBE_AGG_TEX), FieldId(NVML_FI_DEV_ECC_DBE_AGG_TEX), FieldId(NVML_FI_DEV_ECC_DBE_AGG_CBU), + FieldId(NVML_FI_DEV_RETIRED_SBE), + FieldId(NVML_FI_DEV_RETIRED_DBE), + FieldId(NVML_FI_DEV_RETIRED_PENDING), FieldId(NVML_FI_DEV_PERF_POLICY_POWER), FieldId(NVML_FI_DEV_PERF_POLICY_THERMAL), FieldId(NVML_FI_DEV_PERF_POLICY_SYNC_BOOST), @@ -7439,6 +7441,63 @@ mod test { FieldId(NVML_FI_DEV_PERF_POLICY_TOTAL_BASE_CLOCKS), FieldId(NVML_FI_DEV_MEMORY_TEMP), FieldId(NVML_FI_DEV_TOTAL_ENERGY_CONSUMPTION), + FieldId(NVML_FI_DEV_RETIRED_PENDING_SBE), + FieldId(NVML_FI_DEV_RETIRED_PENDING_DBE), + FieldId(NVML_FI_DEV_PCIE_REPLAY_COUNTER), + FieldId(NVML_FI_DEV_PCIE_REPLAY_ROLLOVER_COUNTER), + FieldId(NVML_FI_DEV_REMAPPED_COR), + FieldId(NVML_FI_DEV_REMAPPED_UNC), + FieldId(NVML_FI_DEV_REMAPPED_PENDING), + FieldId(NVML_FI_DEV_REMAPPED_FAILURE), + FieldId(NVML_FI_DEV_NVSWITCH_CONNECTED_LINK_COUNT), + FieldId(NVML_FI_DEV_PCIE_L0_TO_RECOVERY_COUNTER), + FieldId(NVML_FI_DEV_C2C_LINK_COUNT), + FieldId(NVML_FI_DEV_C2C_LINK_GET_STATUS), + FieldId(NVML_FI_DEV_C2C_LINK_GET_MAX_BW), + FieldId(NVML_FI_DEV_PCIE_COUNT_CORRECTABLE_ERRORS), + FieldId(NVML_FI_DEV_PCIE_COUNT_NAKS_RECEIVED), + FieldId(NVML_FI_DEV_PCIE_COUNT_RECEIVER_ERROR), + FieldId(NVML_FI_DEV_PCIE_COUNT_BAD_TLP), + FieldId(NVML_FI_DEV_PCIE_COUNT_NAKS_SENT), + FieldId(NVML_FI_DEV_PCIE_COUNT_BAD_DLLP), + FieldId(NVML_FI_DEV_PCIE_COUNT_NON_FATAL_ERROR), + FieldId(NVML_FI_DEV_PCIE_COUNT_FATAL_ERROR), + FieldId(NVML_FI_DEV_PCIE_COUNT_UNSUPPORTED_REQ), + FieldId(NVML_FI_DEV_PCIE_COUNT_LCRC_ERROR), + FieldId(NVML_FI_DEV_PCIE_COUNT_LANE_ERROR), + FieldId(NVML_FI_DEV_IS_RESETLESS_MIG_SUPPORTED), + FieldId(NVML_FI_DEV_POWER_AVERAGE), + FieldId(NVML_FI_DEV_POWER_INSTANT), + FieldId(NVML_FI_DEV_POWER_MIN_LIMIT), + FieldId(NVML_FI_DEV_POWER_MAX_LIMIT), + FieldId(NVML_FI_DEV_POWER_DEFAULT_LIMIT), + FieldId(NVML_FI_DEV_POWER_CURRENT_LIMIT), + FieldId(NVML_FI_DEV_ENERGY), + FieldId(NVML_FI_DEV_POWER_REQUESTED_LIMIT), + FieldId(NVML_FI_DEV_TEMPERATURE_SHUTDOWN_TLIMIT), + FieldId(NVML_FI_DEV_TEMPERATURE_SLOWDOWN_TLIMIT), + FieldId(NVML_FI_DEV_TEMPERATURE_MEM_MAX_TLIMIT), + FieldId(NVML_FI_DEV_TEMPERATURE_GPU_MAX_TLIMIT), + FieldId(NVML_FI_DEV_PCIE_COUNT_TX_BYTES), + FieldId(NVML_FI_DEV_PCIE_COUNT_RX_BYTES), + FieldId(NVML_FI_DEV_IS_MIG_MODE_INDEPENDENT_MIG_QUERY_CAPABLE), + FieldId(NVML_FI_DEV_RESET_STATUS), + FieldId(NVML_FI_DEV_DRAIN_AND_RESET_STATUS), + FieldId(NVML_FI_DEV_PCIE_OUTBOUND_ATOMICS_MASK), + FieldId(NVML_FI_DEV_PCIE_INBOUND_ATOMICS_MASK), + FieldId(NVML_FI_DEV_GET_GPU_RECOVERY_ACTION), + FieldId(NVML_FI_DEV_C2C_LINK_ERROR_INTR), + FieldId(NVML_FI_DEV_C2C_LINK_ERROR_REPLAY), + FieldId(NVML_FI_DEV_C2C_LINK_ERROR_REPLAY_B2B), + FieldId(NVML_FI_DEV_C2C_LINK_POWER_STATE), + FieldId(NVML_FI_DEV_CLOCKS_EVENT_REASON_SW_POWER_CAP), + FieldId(NVML_FI_DEV_CLOCKS_EVENT_REASON_SYNC_BOOST), + FieldId(NVML_FI_DEV_CLOCKS_EVENT_REASON_SW_THERM_SLOWDOWN), + FieldId(NVML_FI_DEV_CLOCKS_EVENT_REASON_HW_THERM_SLOWDOWN), + FieldId(NVML_FI_DEV_CLOCKS_EVENT_REASON_HW_POWER_BRAKE_SLOWDOWN), + FieldId(NVML_FI_DEV_POWER_SYNC_BALANCING_FREQ), + FieldId(NVML_FI_DEV_POWER_SYNC_BALANCING_AF), + FieldId(NVML_FI_DEV_EDPP_MULTIPLIER), ]) }) } diff --git a/nvml-wrapper/src/lib.rs b/nvml-wrapper/src/lib.rs index cc56ff8..8cf27d6 100644 --- a/nvml-wrapper/src/lib.rs +++ b/nvml-wrapper/src/lib.rs @@ -237,7 +237,7 @@ impl Nvml { Self::init_internal(LIB_PATH) } - fn init_internal(path: impl AsRef) -> Result { + fn init_internal(path: impl libloading::AsFilename) -> Result { let lib = unsafe { let lib = NvmlLib::new(path)?; let sym = nvml_sym(lib.nvmlInit_v2.as_ref())?; @@ -282,7 +282,7 @@ impl Nvml { } fn init_with_flags_internal( - path: impl AsRef, + path: impl libloading::AsFilename, flags: InitFlags, ) -> Result { let lib = unsafe {