Skip to content

Commit

Permalink
feat: update IovDeque to support arbitrary size and host page size
Browse files Browse the repository at this point in the history
Remove restriction on size and host page size.

Signed-off-by: Egor Lazarchuk <[email protected]>
  • Loading branch information
ShadowCurse committed Nov 15, 2024
1 parent ddf2cfb commit 82a4d30
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 20 deletions.
52 changes: 32 additions & 20 deletions src/vmm/src/devices/virtio/iov_deque.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,11 @@ pub enum IovDequeError {
// pub iov_len: ::size_t,
// }
// ```
//
// This value must be a multiple of 256 because this is the maximum number of `iovec` can fit into
// 1 memory page: 256 * sizeof(iovec) == 4096 == HOST_PAGE_SIZE. IovDeque only operates with
// `HOST_PAGE_SIZE` granularity.

#[derive(Debug)]
pub struct IovDeque<const L: u16> {
pub iov: *mut libc::iovec,
pub bytes: u32,
pub start: u16,
pub len: u16,
}
Expand All @@ -92,17 +90,15 @@ pub struct IovDeque<const L: u16> {
unsafe impl<const L: u16> Send for IovDeque<L> {}

impl<const L: u16> IovDeque<L> {
const BYTES: usize = L as usize * std::mem::size_of::<iovec>();

/// Create a [`memfd`] object that represents a single physical page
fn create_memfd() -> Result<memfd::Memfd, IovDequeError> {
fn create_memfd(pages_bytes: usize) -> Result<memfd::Memfd, IovDequeError> {
// Create a sealable memfd.
let opts = memfd::MemfdOptions::default().allow_sealing(true);
let mfd = opts.create("iov_deque")?;

// Resize to system page size.
mfd.as_file()
.set_len(Self::BYTES.try_into().unwrap())
.set_len(pages_bytes.try_into().unwrap())
.map_err(IovDequeError::MemfdResize)?;

// Add seals to prevent further resizing.
Expand Down Expand Up @@ -135,13 +131,13 @@ impl<const L: u16> IovDeque<L> {

/// Allocate memory for our ring buffer
///
/// This will allocate 2 * `Self::BYTES` bytes of virtual memory.
fn allocate_ring_buffer_memory() -> Result<*mut c_void, IovDequeError> {
/// This will allocate 2 * `pages_bytes` bytes of virtual memory.
fn allocate_ring_buffer_memory(pages_bytes: usize) -> Result<*mut c_void, IovDequeError> {
// SAFETY: We are calling the system call with valid arguments
unsafe {
Self::mmap(
std::ptr::null_mut(),
Self::BYTES * 2,
pages_bytes * 2,
libc::PROT_NONE,
libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
-1,
Expand All @@ -152,18 +148,21 @@ impl<const L: u16> IovDeque<L> {

/// Create a new [`IovDeque`] that can hold memory described by a single VirtIO queue.
pub fn new() -> Result<Self, IovDequeError> {
assert!(Self::BYTES % host_page_size() == 0);
let host_page_size = host_page_size();
let bytes = L as usize * std::mem::size_of::<iovec>();
let num_host_pages = bytes.div_ceil(host_page_size);
let pages_bytes = num_host_pages * host_page_size;

let memfd = Self::create_memfd()?;
let memfd = Self::create_memfd(pages_bytes)?;
let raw_memfd = memfd.as_file().as_raw_fd();
let buffer = Self::allocate_ring_buffer_memory()?;
let buffer = Self::allocate_ring_buffer_memory(pages_bytes)?;

// Map the first page of virtual memory to the physical page described by the memfd object
// SAFETY: We are calling the system call with valid arguments
let _ = unsafe {
Self::mmap(
buffer,
Self::BYTES,
pages_bytes,
libc::PROT_READ | libc::PROT_WRITE,
libc::MAP_SHARED | libc::MAP_FIXED,
raw_memfd,
Expand All @@ -174,17 +173,17 @@ impl<const L: u16> IovDeque<L> {
// Map the second page of virtual memory to the physical page described by the memfd object
//
// SAFETY: This is safe because:
// * Both `buffer` and the result of `buffer.add(Self::BYTES)` are within bounds of the
// * Both `buffer` and the result of `buffer.add(pages_bytes)` are within bounds of the
// allocation we got from `Self::allocate_ring_buffer_memory`.
// * The resulting pointer is the beginning of the second page of our allocation, so it
// doesn't wrap around the address space.
let next_page = unsafe { buffer.add(Self::BYTES) };
let next_page = unsafe { buffer.add(pages_bytes) };

// SAFETY: We are calling the system call with valid arguments
let _ = unsafe {
Self::mmap(
next_page,
Self::BYTES,
pages_bytes,
libc::PROT_READ | libc::PROT_WRITE,
libc::MAP_SHARED | libc::MAP_FIXED,
raw_memfd,
Expand All @@ -194,6 +193,7 @@ impl<const L: u16> IovDeque<L> {

Ok(Self {
iov: buffer.cast(),
bytes: u32::try_from(pages_bytes).unwrap(),
start: 0,
len: 0,
})
Expand Down Expand Up @@ -313,8 +313,8 @@ impl<const L: u16> IovDeque<L> {
impl<const L: u16> Drop for IovDeque<L> {
fn drop(&mut self) {
// SAFETY: We are passing an address that we got from a previous allocation of `2 *
// Self::BYTES` bytes by calling mmap
let _ = unsafe { libc::munmap(self.iov.cast(), Self::BYTES * 2) };
// self.bytes` by calling mmap
let _ = unsafe { libc::munmap(self.iov.cast(), usize::try_from(self.bytes).unwrap() * 2) };
}
}

Expand All @@ -332,6 +332,18 @@ mod tests {
assert_eq!(deque.len(), 0);
}

#[test]
fn test_new_less_than_page() {
let deque = super::IovDeque::<128>::new().unwrap();
assert_eq!(deque.len(), 0);
}

#[test]
fn test_new_more_than_page() {
let deque = super::IovDeque::<512>::new().unwrap();
assert_eq!(deque.len(), 0);
}

fn make_iovec(id: u16, len: u16) -> iovec {
iovec {
iov_base: id as *mut libc::c_void,
Expand Down
1 change: 1 addition & 0 deletions src/vmm/src/devices/virtio/iovec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -918,6 +918,7 @@ mod verification {
};
IovDequeDefault {
iov: mem.cast(),
bytes: 2 * u32::try_from(GUEST_PAGE_SIZE).unwrap(),
start: kani::any_where(|&start| start < FIRECRACKER_MAX_QUEUE_SIZE),
len: 0,
}
Expand Down

0 comments on commit 82a4d30

Please sign in to comment.