Skip to content

Commit b6604a0

Browse files
00xcJonathanWoollett-Light
authored andcommitted
Add support for coalesced MMIO (KVM_CAP_COALESCED_MMIO)
Add support for coalesced MMIO. This performance feature allows guest writes to port and memory space to not trigger VM exits. Instead, the kernel will write an entry into a shared ring buffer for each access, which userspace must consume. The ring buffer is mapped at a certain offset in the vcpu's file descriptor. In order to enable this capability, introduce the KvmCoalescedIoRing struct, which will act as a safe wrapper around the raw mapping of the ring buffer. Since users may not use coalesced MMIO, or it might not be available, store it as an Option in the VcpuFd struct. Signed-off-by: Carlos López <[email protected]>
1 parent 2a102e7 commit b6604a0

File tree

6 files changed

+235
-3
lines changed

6 files changed

+235
-3
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ reg_size as a public method.
1313
userspace MSR handling.
1414
- [[#246](https://github.com/rust-vmm/kvm-ioctls/pull/246)] Add support for
1515
userspace NMI injection (`KVM_NMI` ioctl).
16+
- [[#244](https://github.com/rust-vmm/kvm-ioctls/pull/244)] add support for
17+
coalesced MMIO (`KVM_CAP_COALESCED_MMIO` / `KVM_CAP_COALESCED_PIO`)
1618

1719
# v0.15.0
1820

src/cap.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ pub enum Cap {
158158
DebugHwBps = KVM_CAP_GUEST_DEBUG_HW_BPS,
159159
DebugHwWps = KVM_CAP_GUEST_DEBUG_HW_WPS,
160160
GetMsrFeatures = KVM_CAP_GET_MSR_FEATURES,
161+
CoalescedPio = KVM_CAP_COALESCED_PIO,
161162
#[cfg(target_arch = "aarch64")]
162163
ArmSve = KVM_CAP_ARM_SVE,
163164
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]

src/ioctls/mod.rs

Lines changed: 98 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,13 @@
55
// Use of this source code is governed by a BSD-style license that can be
66
// found in the THIRD-PARTY file.
77

8+
use std::mem::size_of;
89
use std::os::unix::io::AsRawFd;
910
use std::ptr::null_mut;
1011

11-
use kvm_bindings::kvm_run;
12+
use kvm_bindings::{
13+
kvm_coalesced_mmio, kvm_coalesced_mmio_ring, kvm_run, KVM_COALESCED_MMIO_PAGE_OFFSET,
14+
};
1215
use vmm_sys_util::errno;
1316

1417
/// Wrappers over KVM device ioctls.
@@ -26,6 +29,100 @@ pub mod vm;
2629
/// is otherwise a direct mapping to Result.
2730
pub type Result<T> = std::result::Result<T, errno::Error>;
2831

32+
/// A wrapper around the coalesced MMIO ring page.
33+
#[derive(Debug)]
34+
pub(crate) struct KvmCoalescedIoRing {
35+
addr: *mut kvm_coalesced_mmio_ring,
36+
page_size: usize,
37+
}
38+
39+
impl KvmCoalescedIoRing {
40+
/// Maps the coalesced MMIO ring from the vCPU file descriptor.
41+
pub(crate) fn mmap_from_fd<F: AsRawFd>(fd: &F) -> Result<Self> {
42+
// SAFETY: We trust the sysconf libc function and we're calling it
43+
// with a correct parameter.
44+
let page_size = match unsafe { libc::sysconf(libc::_SC_PAGESIZE) } {
45+
-1 => return Err(errno::Error::last()),
46+
ps => ps as usize,
47+
};
48+
49+
let offset = KVM_COALESCED_MMIO_PAGE_OFFSET * page_size as u32;
50+
// SAFETY: KVM guarantees that there is a page at offset
51+
// KVM_COALESCED_MMIO_PAGE_OFFSET * PAGE_SIZE if the appropriate
52+
// capability is available. If it is not, the call will simply
53+
// fail.
54+
let addr = unsafe {
55+
libc::mmap(
56+
null_mut(),
57+
page_size,
58+
libc::PROT_READ | libc::PROT_WRITE,
59+
libc::MAP_SHARED,
60+
fd.as_raw_fd(),
61+
offset.into(),
62+
)
63+
};
64+
if addr == libc::MAP_FAILED {
65+
return Err(errno::Error::last());
66+
}
67+
Ok(Self {
68+
addr: addr.cast(),
69+
page_size,
70+
})
71+
}
72+
73+
/// Compute the size of the MMIO ring.
74+
/// Taken from [include/uapi/linux/kvm.h](https://elixir.bootlin.com/linux/v6.6/source/include/uapi/linux/kvm.h#L562)
75+
const fn ring_max(&self) -> usize {
76+
(self.page_size - size_of::<kvm_coalesced_mmio_ring>()) / size_of::<kvm_coalesced_mmio>()
77+
}
78+
79+
/// Gets a mutable reference to the ring
80+
fn ring_mut(&mut self) -> &mut kvm_coalesced_mmio_ring {
81+
// SAFETY: We have a `&mut self` and the pointer is private, so this
82+
// access is exclusive.
83+
unsafe { &mut *self.addr }
84+
}
85+
86+
/// Reads a single entry from the MMIO ring.
87+
///
88+
/// # Returns
89+
///
90+
/// An entry from the MMIO ring buffer, or [`None`] if the ring is empty.
91+
pub(crate) fn read_entry(&mut self) -> Option<kvm_coalesced_mmio> {
92+
let ring_max = self.ring_max();
93+
94+
let ring = self.ring_mut();
95+
if ring.first == ring.last {
96+
return None;
97+
}
98+
99+
let entries = ring.coalesced_mmio.as_ptr();
100+
// SAFETY: `ring.first` is an `u32` coming from mapped memory filled
101+
// by the kernel, so we trust it. `entries` is a pointer coming from
102+
// mmap(), so pointer arithmetic cannot overflow. We have a `&mut self`,
103+
// so nobody else has access to the contents of the pointer.
104+
let elem = unsafe { entries.add(ring.first as usize).read() };
105+
ring.first = (ring.first + 1) % ring_max as u32;
106+
107+
Some(elem)
108+
}
109+
}
110+
111+
impl Drop for KvmCoalescedIoRing {
112+
fn drop(&mut self) {
113+
// SAFETY: This is safe because we mmap the page ourselves, and nobody
114+
// else is holding a reference to it.
115+
unsafe {
116+
libc::munmap(self.addr.cast(), self.page_size);
117+
}
118+
}
119+
}
120+
121+
// SAFETY: See safety comments about [`KvmRunWrapper`].
122+
unsafe impl Send for KvmCoalescedIoRing {}
123+
// SAFETY: See safety comments about [`KvmRunWrapper`].
124+
unsafe impl Sync for KvmCoalescedIoRing {}
125+
29126
/// Safe wrapper over the `kvm_run` struct.
30127
///
31128
/// The wrapper is needed for sending the pointer to `kvm_run` between

src/ioctls/vcpu.rs

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use libc::EINVAL;
1010
use std::fs::File;
1111
use std::os::unix::io::{AsRawFd, RawFd};
1212

13-
use crate::ioctls::{KvmRunWrapper, Result};
13+
use crate::ioctls::{KvmCoalescedIoRing, KvmRunWrapper, Result};
1414
use crate::kvm_ioctls::*;
1515
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1616
use kvm_bindings::{
@@ -169,6 +169,8 @@ pub enum VcpuExit<'a> {
169169
pub struct VcpuFd {
170170
vcpu: File,
171171
kvm_run_ptr: KvmRunWrapper,
172+
/// A pointer to the coalesced MMIO page
173+
coalesced_mmio_ring: Option<KvmCoalescedIoRing>,
172174
}
173175

174176
/// KVM Sync Registers used to tell KVM which registers to sync
@@ -1849,6 +1851,55 @@ impl VcpuFd {
18491851
_ => Err(errno::Error::last()),
18501852
}
18511853
}
1854+
1855+
/// Maps the coalesced MMIO ring page. This allows reading entries from
1856+
/// the ring via [`coalesced_mmio_read()`](VcpuFd::coalesced_mmio_read).
1857+
///
1858+
/// # Returns
1859+
///
1860+
/// Returns an error if the buffer could not be mapped, usually because
1861+
/// `KVM_CAP_COALESCED_MMIO` ([`Cap::CoalescedMmio`](crate::Cap::CoalescedMmio))
1862+
/// is not available.
1863+
///
1864+
/// # Examples
1865+
///
1866+
/// ```rust
1867+
/// # use kvm_ioctls::{Kvm, Cap};
1868+
/// let kvm = Kvm::new().unwrap();
1869+
/// let vm = kvm.create_vm().unwrap();
1870+
/// let mut vcpu = vm.create_vcpu(0).unwrap();
1871+
/// if kvm.check_extension(Cap::CoalescedMmio) {
1872+
/// vcpu.map_coalesced_mmio_ring().unwrap();
1873+
/// }
1874+
/// ```
1875+
pub fn map_coalesced_mmio_ring(&mut self) -> Result<()> {
1876+
if self.coalesced_mmio_ring.is_none() {
1877+
let ring = KvmCoalescedIoRing::mmap_from_fd(&self.vcpu)?;
1878+
self.coalesced_mmio_ring = Some(ring);
1879+
}
1880+
Ok(())
1881+
}
1882+
1883+
/// Read a single entry from the coalesced MMIO ring.
1884+
/// For entries to be appended to the ring by the kernel, addresses must be registered
1885+
/// via [`VmFd::register_coalesced_mmio()`](crate::VmFd::register_coalesced_mmio()).
1886+
///
1887+
/// [`map_coalesced_mmio_ring()`](VcpuFd::map_coalesced_mmio_ring) must have been called beforehand.
1888+
///
1889+
/// See the documentation for `KVM_(UN)REGISTER_COALESCED_MMIO`.
1890+
///
1891+
/// # Returns
1892+
///
1893+
/// * An error if [`map_coalesced_mmio_ring()`](VcpuFd::map_coalesced_mmio_ring)
1894+
/// was not called beforehand.
1895+
/// * [`Ok<None>`] if the ring is empty.
1896+
/// * [`Ok<Some<kvm_coalesced_mmio>>`] if an entry was successfully read.
1897+
pub fn coalesced_mmio_read(&mut self) -> Result<Option<kvm_coalesced_mmio>> {
1898+
self.coalesced_mmio_ring
1899+
.as_mut()
1900+
.ok_or(errno::Error::new(libc::EIO))
1901+
.map(|ring| ring.read_entry())
1902+
}
18521903
}
18531904

18541905
/// Helper function to create a new `VcpuFd`.
@@ -1857,7 +1908,11 @@ impl VcpuFd {
18571908
/// `create_vcpu` from `VmFd`. The function cannot be part of the `VcpuFd` implementation because
18581909
/// then it would be exported with the public `VcpuFd` interface.
18591910
pub fn new_vcpu(vcpu: File, kvm_run_ptr: KvmRunWrapper) -> VcpuFd {
1860-
VcpuFd { vcpu, kvm_run_ptr }
1911+
VcpuFd {
1912+
vcpu,
1913+
kvm_run_ptr,
1914+
coalesced_mmio_ring: None,
1915+
}
18611916
}
18621917

18631918
impl AsRawFd for VcpuFd {
@@ -2440,6 +2495,7 @@ mod tests {
24402495
kvm_run_ptr: mmap_anonymous(10),
24412496
mmap_size: 10,
24422497
},
2498+
coalesced_mmio_ring: None,
24432499
};
24442500

24452501
assert_eq!(faulty_vcpu_fd.get_regs().unwrap_err().errno(), badf_errno);

src/ioctls/vm.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1562,6 +1562,68 @@ impl VmFd {
15621562
Err(errno::Error::last())
15631563
}
15641564
}
1565+
1566+
/// Registers an address for coalesced MMIO. Write accesses to the address
1567+
/// will not cause a corresponding [`VcpuExit`](crate::VcpuExit), but
1568+
/// instead will be appended to the MMIO ring buffer. The [`VcpuFd`] can
1569+
/// read entries in the ring buffer via [`VcpuFd::coalesced_mmio_read()`].
1570+
/// If entries are not read the buffer will eventually be full,
1571+
/// preventing further elements from being appended by the kernel.
1572+
///
1573+
/// Needs `KVM_CAP_COALESCED_MMIO` ([`Cap::CoalescedMmio`](crate::Cap::CoalescedMmio))
1574+
/// and/or `KVM_CAP_COALESCED_PIO` ([`Cap::CoalescedMmio`](crate::Cap::CoalescedPio)).
1575+
///
1576+
/// See the documentation for `KVM_REGISTER_COALESCED_MMIO`.
1577+
///
1578+
/// # Arguments
1579+
///
1580+
/// * `addr` - Address being written to.
1581+
/// * `size` - The size of the write for the mechanism to trigger.
1582+
pub fn register_coalesced_mmio(&self, addr: IoEventAddress, size: u32) -> Result<()> {
1583+
let (addr, pio) = match addr {
1584+
IoEventAddress::Pio(addr) => (addr, 1),
1585+
IoEventAddress::Mmio(addr) => (addr, 0),
1586+
};
1587+
let mut zone = kvm_coalesced_mmio_zone {
1588+
addr,
1589+
size,
1590+
..Default::default()
1591+
};
1592+
zone.__bindgen_anon_1.pio = pio;
1593+
1594+
// SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read
1595+
// the correct amount of memory from our pointer, and we verify the return result.
1596+
let ret = unsafe { ioctl_with_ref(self, KVM_REGISTER_COALESCED_MMIO(), &zone) };
1597+
if ret != 0 {
1598+
return Err(errno::Error::last());
1599+
}
1600+
Ok(())
1601+
}
1602+
1603+
/// Unregister an address that was previously registered via
1604+
/// [`register_coalesced_mmio()`](VmFd::register_coalesced_mmio).
1605+
///
1606+
/// See the documentation for `KVM_UNREGISTER_COALESCED_MMIO`.
1607+
pub fn unregister_coalesced_mmio(&self, addr: IoEventAddress, size: u32) -> Result<()> {
1608+
let (addr, pio) = match addr {
1609+
IoEventAddress::Pio(addr) => (addr, 1),
1610+
IoEventAddress::Mmio(addr) => (addr, 0),
1611+
};
1612+
let mut zone = kvm_coalesced_mmio_zone {
1613+
addr,
1614+
size,
1615+
..Default::default()
1616+
};
1617+
zone.__bindgen_anon_1.pio = pio;
1618+
1619+
// SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read
1620+
// the correct amount of memory from our pointer, and we verify the return result.
1621+
let ret = unsafe { ioctl_with_ref(self, KVM_UNREGISTER_COALESCED_MMIO(), &zone) };
1622+
if ret != 0 {
1623+
return Err(errno::Error::last());
1624+
}
1625+
Ok(())
1626+
}
15651627
}
15661628

15671629
/// Helper function to create a new `VmFd`.

src/kvm_ioctls.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,20 @@ ioctl_io_nr!(KVM_CREATE_IRQCHIP, KVMIO, 0x60);
6161
target_arch = "aarch64"
6262
))]
6363
ioctl_iow_nr!(KVM_IRQ_LINE, KVMIO, 0x61, kvm_irq_level);
64+
/* Available with KVM_CAP_COALESCED_MMIO / KVM_CAP_COALESCED_PIO */
65+
ioctl_iow_nr!(
66+
KVM_REGISTER_COALESCED_MMIO,
67+
KVMIO,
68+
0x67,
69+
kvm_coalesced_mmio_zone
70+
);
71+
/* Available with KVM_CAP_COALESCED_MMIO / KVM_CAP_COALESCED_PIO */
72+
ioctl_iow_nr!(
73+
KVM_UNREGISTER_COALESCED_MMIO,
74+
KVMIO,
75+
0x68,
76+
kvm_coalesced_mmio_zone
77+
);
6478
/* Available with KVM_CAP_IRQ_ROUTING */
6579
#[cfg(any(
6680
target_arch = "x86",

0 commit comments

Comments
 (0)