From 824090bf2f2fdf9648bf3f74a161b5a844d0d096 Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Fri, 30 May 2025 17:43:21 +0100 Subject: [PATCH 1/7] refactor: simplify max memslot checks It is reasonable to assume that we will not have more than u32::MAX memory slots since kernel only returns i32 from a query syscall. Enforce this with `.expect` calls and change the type of `max_memslots` to u32. Signed-off-by: Egor Lazarchuk --- src/vmm/src/vstate/kvm.rs | 7 +++++-- src/vmm/src/vstate/vm.rs | 18 ++++++++++-------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/vmm/src/vstate/kvm.rs b/src/vmm/src/vstate/kvm.rs index fb192d724d1..371b01cf70c 100644 --- a/src/vmm/src/vstate/kvm.rs +++ b/src/vmm/src/vstate/kvm.rs @@ -80,8 +80,11 @@ impl Kvm { } /// Returns the maximal number of memslots allowed in a [`Vm`] - pub fn max_nr_memslots(&self) -> usize { - self.fd.get_nr_memslots() + pub fn max_nr_memslots(&self) -> u32 { + self.fd + .get_nr_memslots() + .try_into() + .expect("Number of vcpus reported by KVM exceeds u32::MAX") } } diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index 7a8965a4b9a..9a01fee84ec 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -31,7 +31,7 @@ use crate::{DirtyBitmap, Vcpu, mem_size_mib}; pub struct VmCommon { /// The KVM file descriptor used to access this Vm. pub fd: VmFd, - max_memslots: usize, + max_memslots: u32, /// The guest memory of this Vm. pub guest_memory: GuestMemoryMmap, } @@ -51,8 +51,8 @@ pub enum VmError { EventFd(std::io::Error), /// Failed to create vcpu: {0} CreateVcpu(VcpuError), - /// The number of configured slots is bigger than the maximum reported by KVM - NotEnoughMemorySlots, + /// The number of configured slots is bigger than the maximum reported by KVM: {0} + NotEnoughMemorySlots(u32), /// Memory Error: {0} VmMemory(#[from] vm_memory::Error), } @@ -142,9 +142,9 @@ impl Vm { .guest_memory() .num_regions() .try_into() - .map_err(|_| VmError::NotEnoughMemorySlots)?; - if next_slot as usize >= self.common.max_memslots { - return Err(VmError::NotEnoughMemorySlots); + .expect("Number of existing memory regions exceeds u32::MAX"); + if self.common.max_memslots <= next_slot { + return Err(VmError::NotEnoughMemorySlots(self.common.max_memslots)); } let flags = if region.bitmap().is_some() { @@ -362,9 +362,11 @@ pub(crate) mod tests { let res = vm.register_memory_region(region); - if i >= max_nr_regions { + // For some reason max_nr_regions is considered unused here + #[allow(unused_variables)] + if max_nr_regions <= i { assert!( - matches!(res, Err(VmError::NotEnoughMemorySlots)), + matches!(res, Err(VmError::NotEnoughMemorySlots(max_nr_regions))), "{:?} at iteration {} - max_nr_memslots: {}", res, i, From 70c30dfa0834fab48af368f7bc07aabad49ffbfd Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Fri, 30 May 2025 17:55:34 +0100 Subject: [PATCH 2/7] refactor: simplify get_manufacturer_id_from_host Replaces never used error result type wit optional. Signed-off-by: Egor Lazarchuk --- src/vmm/src/arch/aarch64/vcpu.rs | 15 ++++----------- src/vmm/src/persist.rs | 8 ++++---- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/src/vmm/src/arch/aarch64/vcpu.rs b/src/vmm/src/arch/aarch64/vcpu.rs index 59c00c3ff86..5d49dacac19 100644 --- a/src/vmm/src/arch/aarch64/vcpu.rs +++ b/src/vmm/src/arch/aarch64/vcpu.rs @@ -41,25 +41,18 @@ pub enum VcpuArchError { SetMp(kvm_ioctls::Error), /// Failed FamStructWrapper operation: {0} Fam(vmm_sys_util::fam::Error), - /// {0} - GetMidrEl1(String), /// Failed to set/get device attributes for vCPU: {0} DeviceAttribute(kvm_ioctls::Error), } /// Extract the Manufacturer ID from the host. /// The ID is found between bits 24-31 of MIDR_EL1 register. -pub fn get_manufacturer_id_from_host() -> Result { +pub fn get_manufacturer_id_from_host() -> Option { let midr_el1_path = "/sys/devices/system/cpu/cpu0/regs/identification/midr_el1"; - let midr_el1 = std::fs::read_to_string(midr_el1_path).map_err(|err| { - VcpuArchError::GetMidrEl1(format!("Failed to get MIDR_EL1 from host path: {err}")) - })?; + let midr_el1 = std::fs::read_to_string(midr_el1_path).ok()?; let midr_el1_trimmed = midr_el1.trim_end().trim_start_matches("0x"); - let manufacturer_id = u32::from_str_radix(midr_el1_trimmed, 16).map_err(|err| { - VcpuArchError::GetMidrEl1(format!("Invalid MIDR_EL1 found on host: {err}",)) - })?; - - Ok(manufacturer_id >> 24) + let manufacturer_id = u32::from_str_radix(midr_el1_trimmed, 16).ok()?; + Some(manufacturer_id >> 24) } /// Saves states of registers into `state`. diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 4111d8d6c34..f53cee82692 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -255,22 +255,22 @@ pub fn validate_cpu_manufacturer_id(microvm_state: &MicrovmState) { let host_cpu_id = get_manufacturer_id_from_host(); let snapshot_cpu_id = microvm_state.vcpu_states[0].regs.manifacturer_id(); match (host_cpu_id, snapshot_cpu_id) { - (Ok(host_id), Some(snapshot_id)) => { + (Some(host_id), Some(snapshot_id)) => { info!("Host CPU manufacturer ID: {host_id:?}"); info!("Snapshot CPU manufacturer ID: {snapshot_id:?}"); if host_id != snapshot_id { warn!("Host CPU manufacturer ID differs from the snapshotted one",); } } - (Ok(host_id), None) => { + (Some(host_id), None) => { info!("Host CPU manufacturer ID: {host_id:?}"); warn!("Snapshot CPU manufacturer ID: couldn't get from the snapshot"); } - (Err(_), Some(snapshot_id)) => { + (None, Some(snapshot_id)) => { warn!("Host CPU manufacturer ID: couldn't get from the host"); info!("Snapshot CPU manufacturer ID: {snapshot_id:?}"); } - (Err(_), None) => { + (None, None) => { warn!("Host CPU manufacturer ID: couldn't get from the host"); warn!("Snapshot CPU manufacturer ID: couldn't get from the snapshot"); } From fc4aab3e02145a01a14695ba3364d19ae0c52531 Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Mon, 9 Jun 2025 16:03:39 +0100 Subject: [PATCH 3/7] feat: add devtool cargo command Add command to run any cargo command from inside container without needing to enter it. Signed-off-by: Egor Lazarchuk --- tools/devtool | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tools/devtool b/tools/devtool index 9d12c58fd60..bec88f0c4d7 100755 --- a/tools/devtool +++ b/tools/devtool @@ -526,6 +526,22 @@ cmd_build() { return $ret } +cmd_cargo() { + # Check prerequisites + ensure_devctr + ensure_build_dir + + run_devctr \ + --privileged \ + --workdir "$CTR_FC_ROOT_DIR" \ + -- \ + cargo $@ + + # Running as root would have created some root-owned files under the build + # dir. Let's fix that. + cmd_fix_perms +} + function cmd_make_release { ensure_build_dir run_devctr \ From ebfe1c69180cb0e5fd601fc1e0975319182720c7 Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Tue, 10 Jun 2025 14:09:54 +0100 Subject: [PATCH 4/7] refactor: assert on vcpu TLS init Vcpu TLS must only be initialized once. Enforce this with an assertion. Signed-off-by: Egor Lazarchuk --- src/vmm/src/vstate/vcpu.rs | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/src/vmm/src/vstate/vcpu.rs b/src/vmm/src/vstate/vcpu.rs index fdd73b9c175..08a597cc4ea 100644 --- a/src/vmm/src/vstate/vcpu.rs +++ b/src/vmm/src/vstate/vcpu.rs @@ -51,8 +51,6 @@ pub enum VcpuError { VcpuResponse(KvmVcpuError), /// Cannot spawn a new vCPU thread: {0} VcpuSpawn(io::Error), - /// Cannot clean init vcpu TLS - VcpuTlsInit, /// Vcpu not present in TLS VcpuTlsNotPresent, /// Error with gdb request sent @@ -118,14 +116,11 @@ impl Vcpu { /// /// It is a prerequisite to successfully run `init_thread_local_data()` before using /// `run_on_thread_local()` on the current thread. - /// This function will return an error if there already is a `Vcpu` present in the TLS. - fn init_thread_local_data(&mut self) -> Result<(), VcpuError> { + /// This function will panic if there already is a `Vcpu` present in the TLS. + fn init_thread_local_data(&mut self) { Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| { - if cell.get().is_some() { - return Err(VcpuError::VcpuTlsInit); - } + assert!(cell.get().is_none()); cell.set(Some(self as *mut Vcpu)); - Ok(()) }) } @@ -254,8 +249,7 @@ impl Vcpu { .name(format!("fc_vcpu {}", self.kvm_vcpu.index)) .spawn(move || { let filter = &*seccomp_filter; - self.init_thread_local_data() - .expect("Cannot cleanly initialize vcpu TLS."); + self.init_thread_local_data(); // Synchronization to make sure thread local data is initialized. barrier.wait(); self.run(filter); @@ -1034,7 +1028,7 @@ pub(crate) mod tests { } // Initialize vcpu TLS. - vcpu.init_thread_local_data().unwrap(); + vcpu.init_thread_local_data(); // Validate TLS vcpu is the local vcpu by changing the `id` then validating against // the one in TLS. @@ -1056,12 +1050,11 @@ pub(crate) mod tests { } #[test] - fn test_invalid_tls() { + #[should_panic] + fn test_tls_double_init() { let (_, _, mut vcpu) = setup_vcpu(0x1000); - // Initialize vcpu TLS. - vcpu.init_thread_local_data().unwrap(); - // Trying to initialize non-empty TLS should error. - vcpu.init_thread_local_data().unwrap_err(); + vcpu.init_thread_local_data(); + vcpu.init_thread_local_data(); } #[test] @@ -1080,7 +1073,7 @@ pub(crate) mod tests { let handle = std::thread::Builder::new() .name("test_vcpu_kick".to_string()) .spawn(move || { - vcpu.init_thread_local_data().unwrap(); + vcpu.init_thread_local_data(); // Notify TLS was populated. vcpu_barrier.wait(); // Loop for max 1 second to check if the signal handler has run. From 7ee217509f3c19535fc56e23ad31f12352c06ecc Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Tue, 10 Jun 2025 14:10:37 +0100 Subject: [PATCH 5/7] refactor: move vcpu signal handler setup to vcpu init Move setting of signal handler into vcpu init to prevent race condition between setting TLS and signal handler using TLS. Signed-off-by: Egor Lazarchuk --- src/vmm/src/lib.rs | 2 -- src/vmm/src/vstate/vcpu.rs | 10 +++++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 29f3b0148ac..2a923637e93 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -375,8 +375,6 @@ impl Vmm { })?; } - Vcpu::register_kick_signal_handler(); - self.vcpus_handles.reserve(vcpu_count); for mut vcpu in vcpus.drain(..) { diff --git a/src/vmm/src/vstate/vcpu.rs b/src/vmm/src/vstate/vcpu.rs index 08a597cc4ea..7edea01ab78 100644 --- a/src/vmm/src/vstate/vcpu.rs +++ b/src/vmm/src/vstate/vcpu.rs @@ -173,7 +173,9 @@ impl Vcpu { /// Registers a signal handler which makes use of TLS and kvm immediate exit to /// kick the vcpu running on the current thread, if there is one. - pub fn register_kick_signal_handler() { + fn register_kick_signal_handler(&mut self) { + self.init_thread_local_data(); + extern "C" fn handle_signal(_: c_int, _: *mut siginfo_t, _: *mut c_void) { // SAFETY: This is safe because it's temporarily aliasing the `Vcpu` object, but we are // only reading `vcpu.fd` which does not change for the lifetime of the `Vcpu`. @@ -249,7 +251,7 @@ impl Vcpu { .name(format!("fc_vcpu {}", self.kvm_vcpu.index)) .spawn(move || { let filter = &*seccomp_filter; - self.init_thread_local_data(); + self.register_kick_signal_handler(); // Synchronization to make sure thread local data is initialized. barrier.wait(); self.run(filter); @@ -953,7 +955,6 @@ pub(crate) mod tests { } fn vcpu_configured_for_boot() -> (Vm, VcpuHandle, EventFd) { - Vcpu::register_kick_signal_handler(); // Need enough mem to boot linux. let mem_size = mib_to_bytes(64); let (kvm, vm, mut vcpu) = setup_vcpu(mem_size); @@ -1059,7 +1060,6 @@ pub(crate) mod tests { #[test] fn test_vcpu_kick() { - Vcpu::register_kick_signal_handler(); let (_, vm, mut vcpu) = setup_vcpu(0x1000); let mut kvm_run = @@ -1073,7 +1073,7 @@ pub(crate) mod tests { let handle = std::thread::Builder::new() .name("test_vcpu_kick".to_string()) .spawn(move || { - vcpu.init_thread_local_data(); + vcpu.register_kick_signal_handler(); // Notify TLS was populated. vcpu_barrier.wait(); // Loop for max 1 second to check if the signal handler has run. From 8db599ba14eca0de2ebf52a0d95a1410d06dfa0c Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Tue, 10 Jun 2025 14:24:31 +0100 Subject: [PATCH 6/7] refactor: move TLS reset into Vcpu::Drop - The reset must be called only once on vcpu drop, so move it directly into the Drop impl. - Replace errors with asserts since there is an assumption that TLS will always hold correct vcpu pointer for a given thread. - Mark Drop impl as non test only as our unit tests create multiple vcpus which will panic on Drop since they all will be referencing same TLS. Signed-off-by: Egor Lazarchuk --- src/vmm/src/vstate/vcpu.rs | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/src/vmm/src/vstate/vcpu.rs b/src/vmm/src/vstate/vcpu.rs index 7edea01ab78..2e6f8a85f6f 100644 --- a/src/vmm/src/vstate/vcpu.rs +++ b/src/vmm/src/vstate/vcpu.rs @@ -124,26 +124,6 @@ impl Vcpu { }) } - /// Deassociates `self` from the current thread. - /// - /// Should be called if the current `self` had called `init_thread_local_data()` and - /// now needs to move to a different thread. - /// - /// Fails if `self` was not previously associated with the current thread. - fn reset_thread_local_data(&mut self) -> Result<(), VcpuError> { - // Best-effort to clean up TLS. If the `Vcpu` was moved to another thread - // _before_ running this, then there is nothing we can do. - Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| { - if let Some(vcpu_ptr) = cell.get() { - if std::ptr::eq(vcpu_ptr, self) { - Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| cell.take()); - return Ok(()); - } - } - Err(VcpuError::VcpuTlsNotPresent) - }) - } - /// Runs `func` for the `Vcpu` associated with the current thread. /// /// It requires that `init_thread_local_data()` was run on this thread. @@ -613,9 +593,19 @@ fn handle_kvm_exit( } } +// During normal runtime there is a strong assumption that vcpus will be +// put on their own threads, thus TLS will be initialized. +// In test we do not put vcpus on separate threads. +#[cfg(not(test))] impl Drop for Vcpu { fn drop(&mut self) { - let _ = self.reset_thread_local_data(); + Self::TLS_VCPU_PTR.with(|cell| { + let vcpu_ptr = cell.get().unwrap(); + assert!(std::ptr::eq(vcpu_ptr, self)); + // Have to do this trick because `update` method is + // not stable on Cell. + Self::TLS_VCPU_PTR.with(|cell| cell.take()); + }) } } @@ -1039,15 +1029,12 @@ pub(crate) mod tests { } // Reset vcpu TLS. - vcpu.reset_thread_local_data().unwrap(); + Vcpu::TLS_VCPU_PTR.with(|cell| cell.take()); // Running on the TLS vcpu after TLS reset should fail. unsafe { Vcpu::run_on_thread_local(|_| ()).unwrap_err(); } - - // Second reset should return error. - vcpu.reset_thread_local_data().unwrap_err(); } #[test] From ab064451b1d0f0e51f0f683011eea597016e61fd Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Tue, 10 Jun 2025 14:46:37 +0100 Subject: [PATCH 7/7] refactor: merge run_on_thread_local into signal handler Merge run_on_thread_local into signal handler since it is only used there. The error returned from run_on_thread_local was ignored, so instead replace it with logic to only use vcpu ptr if TLS is initialized, without returning any errors. The reason for not asserting on TLS being initialized here is that during Firecracker shutdown, vcpus will be destroyed and TLS will be reset. If signal will be send to Firecracker during that time, the TLS accessed from a signal handler will be empty. But this is expected, so no assertions/panics are needed. Because Rust is a good language, it does not allow to reference TLS_VCPU_PTR definded inside impl block inside the signal_handler function. So move the TLS_VCPU_PTR definition outside the Vcpu impl block. Signed-off-by: Egor Lazarchuk --- src/vmm/src/vstate/vcpu.rs | 80 +++++++------------------------------- 1 file changed, 14 insertions(+), 66 deletions(-) diff --git a/src/vmm/src/vstate/vcpu.rs b/src/vmm/src/vstate/vcpu.rs index 2e6f8a85f6f..41cf0417926 100644 --- a/src/vmm/src/vstate/vcpu.rs +++ b/src/vmm/src/vstate/vcpu.rs @@ -88,6 +88,8 @@ pub enum CopyKvmFdError { CreateVcpuError(#[from] kvm_ioctls::Error), } +thread_local!(static TLS_VCPU_PTR: VcpuCell = const { Cell::new(None) }); + /// A wrapper around creating and using a vcpu. #[derive(Debug)] pub struct Vcpu { @@ -110,61 +112,35 @@ pub struct Vcpu { } impl Vcpu { - thread_local!(static TLS_VCPU_PTR: VcpuCell = const { Cell::new(None) }); - /// Associates `self` with the current thread. /// /// It is a prerequisite to successfully run `init_thread_local_data()` before using /// `run_on_thread_local()` on the current thread. /// This function will panic if there already is a `Vcpu` present in the TLS. fn init_thread_local_data(&mut self) { - Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| { + TLS_VCPU_PTR.with(|cell: &VcpuCell| { assert!(cell.get().is_none()); cell.set(Some(self as *mut Vcpu)); }) } - /// Runs `func` for the `Vcpu` associated with the current thread. - /// - /// It requires that `init_thread_local_data()` was run on this thread. - /// - /// Fails if there is no `Vcpu` associated with the current thread. - /// - /// # Safety - /// - /// This is marked unsafe as it allows temporary aliasing through - /// dereferencing from pointer an already borrowed `Vcpu`. - unsafe fn run_on_thread_local(func: F) -> Result<(), VcpuError> - where - F: FnOnce(&mut Vcpu), - { - Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| { - if let Some(vcpu_ptr) = cell.get() { - // SAFETY: Dereferencing here is safe since `TLS_VCPU_PTR` is populated/non-empty, - // and it is being cleared on `Vcpu::drop` so there is no dangling pointer. - let vcpu_ref = unsafe { &mut *vcpu_ptr }; - func(vcpu_ref); - Ok(()) - } else { - Err(VcpuError::VcpuTlsNotPresent) - } - }) - } - /// Registers a signal handler which makes use of TLS and kvm immediate exit to /// kick the vcpu running on the current thread, if there is one. fn register_kick_signal_handler(&mut self) { self.init_thread_local_data(); extern "C" fn handle_signal(_: c_int, _: *mut siginfo_t, _: *mut c_void) { - // SAFETY: This is safe because it's temporarily aliasing the `Vcpu` object, but we are - // only reading `vcpu.fd` which does not change for the lifetime of the `Vcpu`. - unsafe { - let _ = Vcpu::run_on_thread_local(|vcpu| { + TLS_VCPU_PTR.with(|cell: &VcpuCell| { + if let Some(vcpu_ptr) = cell.get() { + // SAFETY: Dereferencing here is safe since `TLS_VCPU_PTR` is + // populated/non-empty, and it is being cleared on + // `Vcpu::drop` so there is no dangling pointer. + let vcpu = unsafe { &mut *vcpu_ptr }; + vcpu.kvm_vcpu.fd.set_kvm_immediate_exit(1); fence(Ordering::Release); - }); - } + } + }) } register_signal_handler(sigrtmin() + VCPU_RTSIG_OFFSET, handle_signal) @@ -599,12 +575,12 @@ fn handle_kvm_exit( #[cfg(not(test))] impl Drop for Vcpu { fn drop(&mut self) { - Self::TLS_VCPU_PTR.with(|cell| { + TLS_VCPU_PTR.with(|cell| { let vcpu_ptr = cell.get().unwrap(); assert!(std::ptr::eq(vcpu_ptr, self)); // Have to do this trick because `update` method is // not stable on Cell. - Self::TLS_VCPU_PTR.with(|cell| cell.take()); + TLS_VCPU_PTR.with(|cell| cell.take()); }) } } @@ -1009,34 +985,6 @@ pub(crate) mod tests { assert!(vcpu.kvm_vcpu.peripherals.mmio_bus.is_some()); } - #[test] - fn test_vcpu_tls() { - let (_, _, mut vcpu) = setup_vcpu(0x1000); - - // Running on the TLS vcpu should fail before we actually initialize it. - unsafe { - Vcpu::run_on_thread_local(|_| ()).unwrap_err(); - } - - // Initialize vcpu TLS. - vcpu.init_thread_local_data(); - - // Validate TLS vcpu is the local vcpu by changing the `id` then validating against - // the one in TLS. - vcpu.kvm_vcpu.index = 12; - unsafe { - Vcpu::run_on_thread_local(|v| assert_eq!(v.kvm_vcpu.index, 12)).unwrap(); - } - - // Reset vcpu TLS. - Vcpu::TLS_VCPU_PTR.with(|cell| cell.take()); - - // Running on the TLS vcpu after TLS reset should fail. - unsafe { - Vcpu::run_on_thread_local(|_| ()).unwrap_err(); - } - } - #[test] #[should_panic] fn test_tls_double_init() {