Skip to content

Commit 0fab3f3

Browse files
committed
Adding device::multi_gpu_instances().
To check if the device supports the MultiInstancesGpu feature and its state.
1 parent 572095f commit 0fab3f3

File tree

4 files changed

+43
-2
lines changed

4 files changed

+43
-2
lines changed

nvml-wrapper/src/device.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2378,6 +2378,32 @@ impl<'nvml> Device<'nvml> {
23782378
}
23792379
}
23802380

2381+
/**
2382+
Checks if the `Device`supports multi partitioned GPU feature and if enabled.
2383+
Not to confuse with `is_multi_gpu_board`, MIG is a single GPU
2384+
being able to be split into isolated instances, a sort of "NUMA" for GPU.
2385+
If the `Device` supports MIG, we can have its current mode (enabled/disabled)
2386+
and, if set, its pending mode for the next system reboot.
2387+
# Errors
2388+
2389+
* `Uninitialized`, if the library has not been successfully initialized
2390+
* `InvalidArg`, if this `Device` is invalid
2391+
* `NotSupported`, if this `Device` does not support this feature
2392+
* `GpuLost`, if this `Device` has fallen off the bus or is otherwise inaccessible
2393+
* `Unknown`, on any unexpected error
2394+
*/
2395+
#[doc(alias = "nvmlDeviceGetMigMode")]
2396+
pub fn multi_gpu_instances(&self) -> Result<MigMode, NvmlError> {
2397+
let sym = nvml_sym(self.nvml.lib.nvmlDeviceGetMigMode.as_ref())?;
2398+
2399+
unsafe {
2400+
let mut mode: MigMode = mem::zeroed();
2401+
nvml_try(sym(self.device, &mut mode.current, &mut mode.pending))?;
2402+
2403+
Ok(mode)
2404+
}
2405+
}
2406+
23812407
/**
23822408
The name of this `Device`, e.g. "Tesla C2070".
23832409
@@ -5882,6 +5908,12 @@ mod test {
58825908
test_with_device(3, &nvml, |device| device.is_multi_gpu_board())
58835909
}
58845910

5911+
#[test]
5912+
fn multi_gpu_instances() {
5913+
let nvml = nvml();
5914+
test_with_device(3, &nvml, |device| device.multi_gpu_instances())
5915+
}
5916+
58855917
#[test]
58865918
fn name() {
58875919
let nvml = nvml();

nvml-wrapper/src/structs/device.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,3 +135,13 @@ pub struct RetiredPage {
135135
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
136136
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
137137
pub struct FieldId(pub u32);
138+
139+
/// Returned from `Device.mig_mode()`
140+
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
141+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
142+
pub struct MigMode {
143+
/// Whether MIG mode is enabled/disabled.
144+
pub current: u32,
145+
/// Mode set after reboot.
146+
pub pending: u32,
147+
}

nvml-wrapper/src/test_utils.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ impl ShouldPrint for PowerSource {}
105105
impl ShouldPrint for DeviceArchitecture {}
106106
impl ShouldPrint for PcieLinkMaxSpeed {}
107107
impl ShouldPrint for DeviceAttributes {}
108+
impl ShouldPrint for MigMode {}
108109

109110
#[cfg(target_os = "windows")]
110111
impl ShouldPrint for DriverModelState {}

nvml-wrapper/unwrapped_functions.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,9 @@ nvmlDeviceGetCapabilities
1212
nvmlDeviceGetClkMonStatus
1313
nvmlDeviceGetClockOffsets
1414
nvmlDeviceGetComputeInstanceId
15-
nvmlDeviceGetConfComputeGpuCertificate
1615
nvmlDeviceGetConfComputeMemSizeInfo
1716
nvmlDeviceGetConfComputeProtectedMemoryUsage
1817
nvmlDeviceGetCoolerInfo
19-
nvmlDeviceGetCpuAffinityWithinScope
2018
nvmlDeviceGetCreatableVgpus
2119
nvmlDeviceGetCurrentClockFreqs
2220
nvmlDeviceGetCurrentClocksEventReasons

0 commit comments

Comments
 (0)