From a4866f6c343325b4fcef02a35fcf6c9f8e595a47 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Tue, 25 Mar 2025 22:08:06 +0000 Subject: [PATCH] Adding device::mig_mode(). To check if the device supports the MultiInstancesGpu feature and its state. --- nvml-wrapper/src/device.rs | 32 ++++++++++++++++++++++++++++ nvml-wrapper/src/structs/device.rs | 10 +++++++++ nvml-wrapper/src/test_utils.rs | 1 + nvml-wrapper/unwrapped_functions.txt | 2 -- 4 files changed, 43 insertions(+), 2 deletions(-) diff --git a/nvml-wrapper/src/device.rs b/nvml-wrapper/src/device.rs index 591c640..f77a113 100644 --- a/nvml-wrapper/src/device.rs +++ b/nvml-wrapper/src/device.rs @@ -2378,6 +2378,32 @@ impl<'nvml> Device<'nvml> { } } + /** + Checks if the `Device`supports multi partitioned GPU feature and if enabled. + Not to confuse with `is_multi_gpu_board`, MIG is a single GPU + being able to be split into isolated instances, a sort of "NUMA" for GPU. + If the `Device` supports MIG, we can have its current mode (enabled/disabled) + and, if set, its pending mode for the next system reboot. + # Errors + + * `Uninitialized`, if the library has not been successfully initialized + * `InvalidArg`, if this `Device` is invalid + * `NotSupported`, if this `Device` does not support this feature + * `GpuLost`, if this `Device` has fallen off the bus or is otherwise inaccessible + * `Unknown`, on any unexpected error + */ + #[doc(alias = "nvmlDeviceGetMigMode")] + pub fn mig_mode(&self) -> Result { + let sym = nvml_sym(self.nvml.lib.nvmlDeviceGetMigMode.as_ref())?; + + unsafe { + let mut mode: MigMode = mem::zeroed(); + nvml_try(sym(self.device, &mut mode.current, &mut mode.pending))?; + + Ok(mode) + } + } + /** The name of this `Device`, e.g. "Tesla C2070". @@ -5882,6 +5908,12 @@ mod test { test_with_device(3, &nvml, |device| device.is_multi_gpu_board()) } + #[test] + fn mig_mode() { + let nvml = nvml(); + test_with_device(3, &nvml, |device| device.mig_mode()) + } + #[test] fn name() { let nvml = nvml(); diff --git a/nvml-wrapper/src/structs/device.rs b/nvml-wrapper/src/structs/device.rs index ba1121c..cd9062d 100644 --- a/nvml-wrapper/src/structs/device.rs +++ b/nvml-wrapper/src/structs/device.rs @@ -135,3 +135,13 @@ pub struct RetiredPage { #[derive(Debug, Clone, Eq, PartialEq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct FieldId(pub u32); + +/// Returned from `Device.mig_mode()` +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct MigMode { + /// Whether MIG mode is enabled/disabled. + pub current: u32, + /// Mode set after reboot. + pub pending: u32, +} diff --git a/nvml-wrapper/src/test_utils.rs b/nvml-wrapper/src/test_utils.rs index 72f1b8d..f557ba4 100644 --- a/nvml-wrapper/src/test_utils.rs +++ b/nvml-wrapper/src/test_utils.rs @@ -105,6 +105,7 @@ impl ShouldPrint for PowerSource {} impl ShouldPrint for DeviceArchitecture {} impl ShouldPrint for PcieLinkMaxSpeed {} impl ShouldPrint for DeviceAttributes {} +impl ShouldPrint for MigMode {} #[cfg(target_os = "windows")] impl ShouldPrint for DriverModelState {} diff --git a/nvml-wrapper/unwrapped_functions.txt b/nvml-wrapper/unwrapped_functions.txt index df7c23b..cfa01f6 100644 --- a/nvml-wrapper/unwrapped_functions.txt +++ b/nvml-wrapper/unwrapped_functions.txt @@ -12,11 +12,9 @@ nvmlDeviceGetCapabilities nvmlDeviceGetClkMonStatus nvmlDeviceGetClockOffsets nvmlDeviceGetComputeInstanceId -nvmlDeviceGetConfComputeGpuCertificate nvmlDeviceGetConfComputeMemSizeInfo nvmlDeviceGetConfComputeProtectedMemoryUsage nvmlDeviceGetCoolerInfo -nvmlDeviceGetCpuAffinityWithinScope nvmlDeviceGetCreatableVgpus nvmlDeviceGetCurrentClockFreqs nvmlDeviceGetCurrentClocksEventReasons