diff --git a/CHANGELOG.md b/CHANGELOG.md index 65ef73a1e1f..e402b85ad43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,9 @@ and this project adheres to - [#4921](https://github.com/firecracker-microvm/firecracker/pull/4921): Fixed swagger `CpuConfig` definition to include missing aarch64-specific fields. +- [#4916](https://github.com/firecracker-microvm/firecracker/pull/4916): Fixed + `IovDeque` implementation to work with any host page size. This fixes + virtio-net device on non 4K host kernels. ## [1.10.1] diff --git a/src/firecracker/src/main.rs b/src/firecracker/src/main.rs index 8fb5392afcf..300afd0ad66 100644 --- a/src/firecracker/src/main.rs +++ b/src/firecracker/src/main.rs @@ -20,6 +20,7 @@ use seccomp::FilterError; use seccompiler::BpfThreadMap; use utils::arg_parser::{ArgParser, Argument}; use utils::validators::validate_instance_id; +use vmm::arch::host_page_size; use vmm::builder::StartMicrovmError; use vmm::logger::{ debug, error, info, LoggerConfig, ProcessTimeReporter, StoreMetric, LOGGER, METRICS, @@ -108,6 +109,10 @@ fn main_exec() -> Result<(), MainError> { // Initialize the logger. LOGGER.init().map_err(MainError::SetLogger)?; + // First call to this function updates the value to current + // host page size. + _ = host_page_size(); + // We need this so that we can reset terminal to canonical mode if panic occurs. let stdin = io::stdin(); diff --git a/src/vmm/src/arch/aarch64/mod.rs b/src/vmm/src/arch/aarch64/mod.rs index 08ca1b65edb..a409f330c56 100644 --- a/src/vmm/src/arch/aarch64/mod.rs +++ b/src/vmm/src/arch/aarch64/mod.rs @@ -94,7 +94,8 @@ pub fn initrd_load_addr( guest_mem: &GuestMemoryMmap, initrd_size: usize, ) -> Result { - let round_to_pagesize = |size| (size + (super::PAGE_SIZE - 1)) & !(super::PAGE_SIZE - 1); + let round_to_pagesize = + |size| (size + (super::GUEST_PAGE_SIZE - 1)) & !(super::GUEST_PAGE_SIZE - 1); match GuestAddress(get_fdt_addr(guest_mem)).checked_sub(round_to_pagesize(initrd_size) as u64) { Some(offset) => { if guest_mem.address_in_range(offset) { diff --git a/src/vmm/src/arch/mod.rs b/src/vmm/src/arch/mod.rs index f5a2f98cb7c..a51055622e4 100644 --- a/src/vmm/src/arch/mod.rs +++ b/src/vmm/src/arch/mod.rs @@ -2,7 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 use std::fmt; +use std::sync::LazyLock; +use log::warn; use serde::{Deserialize, Serialize}; /// Module for aarch64 related functionality. @@ -52,8 +54,23 @@ pub struct InitrdConfig { pub size: usize, } -/// Default (smallest) memory page size for the supported architectures. -pub const PAGE_SIZE: usize = 4096; +/// Default page size for the guest OS. +pub const GUEST_PAGE_SIZE: usize = 4096; + +/// Get the size of the host page size. +pub fn host_page_size() -> usize { + /// Default page size for the host OS. + static PAGE_SIZE: LazyLock = LazyLock::new(|| { + // # Safety: Value always valid + let r = unsafe { libc::sysconf(libc::_SC_PAGESIZE) }; + usize::try_from(r).unwrap_or_else(|_| { + warn!("Could not get host page size with sysconf, assuming default 4K host pages"); + 4096 + }) + }); + + *PAGE_SIZE +} impl fmt::Display for DeviceType { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { diff --git a/src/vmm/src/arch/x86_64/mod.rs b/src/vmm/src/arch/x86_64/mod.rs index 1066d9734c3..40f4f15607a 100644 --- a/src/vmm/src/arch/x86_64/mod.rs +++ b/src/vmm/src/arch/x86_64/mod.rs @@ -97,7 +97,7 @@ pub fn initrd_load_addr( return Err(ConfigurationError::InitrdAddress); } - let align_to_pagesize = |address| address & !(super::PAGE_SIZE - 1); + let align_to_pagesize = |address| address & !(super::GUEST_PAGE_SIZE - 1); Ok(align_to_pagesize(lowmem_size - initrd_size) as u64) } diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 8594da9f077..6131e248d91 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -1305,7 +1305,7 @@ pub(crate) mod tests { use crate::vstate::memory::GuestMemory; let image = make_test_bin(); - let mem_size: usize = image.len() * 2 + crate::arch::PAGE_SIZE; + let mem_size: usize = image.len() * 2 + crate::arch::GUEST_PAGE_SIZE; let tempfile = TempFile::new().unwrap(); let mut tempfile = tempfile.into_file(); @@ -1344,7 +1344,7 @@ pub(crate) mod tests { let tempfile = TempFile::new().unwrap(); let mut tempfile = tempfile.into_file(); tempfile.write_all(&image).unwrap(); - let gm = single_region_mem_at(crate::arch::PAGE_SIZE as u64 + 1, image.len() * 2); + let gm = single_region_mem_at(crate::arch::GUEST_PAGE_SIZE as u64 + 1, image.len() * 2); let res = load_initrd(&gm, &mut tempfile); assert!( diff --git a/src/vmm/src/devices/virtio/iov_deque.rs b/src/vmm/src/devices/virtio/iov_deque.rs index 51bf28a49eb..b28d7076f43 100644 --- a/src/vmm/src/devices/virtio/iov_deque.rs +++ b/src/vmm/src/devices/virtio/iov_deque.rs @@ -6,7 +6,7 @@ use std::os::fd::AsRawFd; use libc::{c_int, c_void, iovec, off_t, size_t}; use memfd; -use crate::arch::PAGE_SIZE; +use crate::arch::host_page_size; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum IovDequeError { @@ -77,10 +77,7 @@ pub enum IovDequeError { // pub iov_len: ::size_t, // } // ``` -// -// This value must be a multiple of 256 because this is the maximum number of `iovec` can fit into -// 1 memory page: 256 * sizeof(iovec) == 4096 == PAGE_SIZE. IovDeque only operates with `PAGE_SIZE` -// granularity. + #[derive(Debug)] pub struct IovDeque { pub iov: *mut libc::iovec, @@ -92,18 +89,15 @@ pub struct IovDeque { unsafe impl Send for IovDeque {} impl IovDeque { - const BYTES: usize = L as usize * std::mem::size_of::(); - const _ASSERT: () = assert!(Self::BYTES % PAGE_SIZE == 0); - /// Create a [`memfd`] object that represents a single physical page - fn create_memfd() -> Result { + fn create_memfd(pages_bytes: usize) -> Result { // Create a sealable memfd. let opts = memfd::MemfdOptions::default().allow_sealing(true); let mfd = opts.create("iov_deque")?; // Resize to system page size. mfd.as_file() - .set_len(Self::BYTES.try_into().unwrap()) + .set_len(pages_bytes.try_into().unwrap()) .map_err(IovDequeError::MemfdResize)?; // Add seals to prevent further resizing. @@ -136,13 +130,13 @@ impl IovDeque { /// Allocate memory for our ring buffer /// - /// This will allocate 2 * `Self::BYTES` bytes of virtual memory. - fn allocate_ring_buffer_memory() -> Result<*mut c_void, IovDequeError> { + /// This will allocate 2 * `pages_bytes` bytes of virtual memory. + fn allocate_ring_buffer_memory(pages_bytes: usize) -> Result<*mut c_void, IovDequeError> { // SAFETY: We are calling the system call with valid arguments unsafe { Self::mmap( std::ptr::null_mut(), - Self::BYTES * 2, + pages_bytes * 2, libc::PROT_NONE, libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, -1, @@ -151,18 +145,29 @@ impl IovDeque { } } + /// Calculate a number of bytes in full pages required for + /// the type to operate. + fn pages_bytes() -> usize { + let host_page_size = host_page_size(); + let bytes = L as usize * std::mem::size_of::(); + let num_host_pages = bytes.div_ceil(host_page_size); + num_host_pages * host_page_size + } + /// Create a new [`IovDeque`] that can hold memory described by a single VirtIO queue. pub fn new() -> Result { - let memfd = Self::create_memfd()?; + let pages_bytes = Self::pages_bytes(); + + let memfd = Self::create_memfd(pages_bytes)?; let raw_memfd = memfd.as_file().as_raw_fd(); - let buffer = Self::allocate_ring_buffer_memory()?; + let buffer = Self::allocate_ring_buffer_memory(pages_bytes)?; // Map the first page of virtual memory to the physical page described by the memfd object // SAFETY: We are calling the system call with valid arguments let _ = unsafe { Self::mmap( buffer, - Self::BYTES, + pages_bytes, libc::PROT_READ | libc::PROT_WRITE, libc::MAP_SHARED | libc::MAP_FIXED, raw_memfd, @@ -173,17 +178,17 @@ impl IovDeque { // Map the second page of virtual memory to the physical page described by the memfd object // // SAFETY: This is safe because: - // * Both `buffer` and the result of `buffer.add(Self::BYTES)` are within bounds of the + // * Both `buffer` and the result of `buffer.add(pages_bytes)` are within bounds of the // allocation we got from `Self::allocate_ring_buffer_memory`. // * The resulting pointer is the beginning of the second page of our allocation, so it // doesn't wrap around the address space. - let next_page = unsafe { buffer.add(Self::BYTES) }; + let next_page = unsafe { buffer.add(pages_bytes) }; // SAFETY: We are calling the system call with valid arguments let _ = unsafe { Self::mmap( next_page, - Self::BYTES, + pages_bytes, libc::PROT_READ | libc::PROT_WRITE, libc::MAP_SHARED | libc::MAP_FIXED, raw_memfd, @@ -311,9 +316,10 @@ impl IovDeque { impl Drop for IovDeque { fn drop(&mut self) { + let pages_bytes = Self::pages_bytes(); // SAFETY: We are passing an address that we got from a previous allocation of `2 * - // Self::BYTES` bytes by calling mmap - let _ = unsafe { libc::munmap(self.iov.cast(), Self::BYTES * 2) }; + // pages_bytes` by calling mmap + let _ = unsafe { libc::munmap(self.iov.cast(), 2 * pages_bytes) }; } } @@ -331,6 +337,18 @@ mod tests { assert_eq!(deque.len(), 0); } + #[test] + fn test_new_less_than_page() { + let deque = super::IovDeque::<128>::new().unwrap(); + assert_eq!(deque.len(), 0); + } + + #[test] + fn test_new_more_than_page() { + let deque = super::IovDeque::<512>::new().unwrap(); + assert_eq!(deque.len(), 0); + } + fn make_iovec(id: u16, len: u16) -> iovec { iovec { iov_base: id as *mut libc::c_void, diff --git a/src/vmm/src/devices/virtio/iovec.rs b/src/vmm/src/devices/virtio/iovec.rs index 9262dff661b..c9893260d9e 100644 --- a/src/vmm/src/devices/virtio/iovec.rs +++ b/src/vmm/src/devices/virtio/iovec.rs @@ -815,13 +815,13 @@ mod verification { use vm_memory::VolatileSlice; use super::IoVecBuffer; + use crate::arch::GUEST_PAGE_SIZE; use crate::devices::virtio::iov_deque::IovDeque; // Redefine `IoVecBufferMut` and `IovDeque` with specific length. Otherwise // Rust will not know what to do. type IoVecBufferMutDefault = super::IoVecBufferMut; type IovDequeDefault = IovDeque; - use crate::arch::PAGE_SIZE; use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE; // Maximum memory size to use for our buffers. For the time being 1KB. @@ -912,8 +912,8 @@ mod verification { // SAFETY: safe because the layout has non-zero size let mem = unsafe { std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked( - 2 * PAGE_SIZE, - PAGE_SIZE, + 2 * GUEST_PAGE_SIZE, + GUEST_PAGE_SIZE, )) }; IovDequeDefault { diff --git a/src/vmm/src/gdb/target.rs b/src/vmm/src/gdb/target.rs index b8230342b27..6f3f2593c15 100644 --- a/src/vmm/src/gdb/target.rs +++ b/src/vmm/src/gdb/target.rs @@ -33,7 +33,7 @@ use vm_memory::{Bytes, GuestAddress, GuestMemoryError}; use super::arch; #[cfg(target_arch = "aarch64")] use crate::arch::aarch64::vcpu::VcpuError as AarchVcpuError; -use crate::arch::PAGE_SIZE; +use crate::arch::GUEST_PAGE_SIZE; use crate::logger::{error, info}; use crate::utils::u64_to_usize; use crate::vstate::vcpu::VcpuSendEventError; @@ -396,7 +396,7 @@ impl MultiThreadBase for FirecrackerTarget { // Compute the amount space left in the page after the gpa let read_len = std::cmp::min( data.len(), - PAGE_SIZE - (u64_to_usize(gpa) & (PAGE_SIZE - 1)), + GUEST_PAGE_SIZE - (u64_to_usize(gpa) & (GUEST_PAGE_SIZE - 1)), ); vmm.guest_memory() @@ -430,7 +430,7 @@ impl MultiThreadBase for FirecrackerTarget { // Compute the amount space left in the page after the gpa let write_len = std::cmp::min( data.len(), - PAGE_SIZE - (u64_to_usize(gpa) & (PAGE_SIZE - 1)), + GUEST_PAGE_SIZE - (u64_to_usize(gpa) & (GUEST_PAGE_SIZE - 1)), ); vmm.guest_memory() diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index 3d24dc6f9ac..d213d4d7bb6 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -593,7 +593,7 @@ pub(crate) mod tests { let res = vm.set_kvm_memory_regions(&gm, false); res.unwrap(); - // Trying to set a memory region with a size that is not a multiple of PAGE_SIZE + // Trying to set a memory region with a size that is not a multiple of GUEST_PAGE_SIZE // will result in error. let gm = single_region_mem(0x10); let res = vm.set_kvm_memory_regions(&gm, false);