From 1da2abf4b5bc3779da226159b977c556cc650abe Mon Sep 17 00:00:00 2001 From: Yu Li Date: Wed, 7 Jun 2023 19:43:16 +0800 Subject: [PATCH] vmm: memory_manager: align down the rest space of ram_region This commit renames `ram_region_sub_size` to `ram_region_available_size` and make its value align down to the default page size or hugepage size of the current memory zone, which can prevent the memory zone from being split into misaligned parts. And if the available size of ram region is zero, this region will be marked as consumed even it has unused space. Note that there is two methods to use hugepages. 1. Specify `hugepages` for `memory` or `memory-zone`, if the `hugepage_size` is not specified, the value can be got by `statfs` for `/dev/hugepages`. 2. Specify a `file` in hugetlbfs for `memory-zone`, the hugepage size can also be got by `statfs` for the file. The value for alignment will be the hugepage size if this memory zone is using hugepages, otherwise the value will be default page size of system. Fixes: #5463 Signed-off-by: Yu Li --- vmm/src/memory_manager.rs | 118 +++++++++++++++++++++++++++++++++---- vmm/src/seccomp_filters.rs | 1 + 2 files changed, 108 insertions(+), 11 deletions(-) diff --git a/vmm/src/memory_manager.rs b/vmm/src/memory_manager.rs index 1e9c22272..262c0c08a 100644 --- a/vmm/src/memory_manager.rs +++ b/vmm/src/memory_manager.rs @@ -31,7 +31,7 @@ use std::convert::TryInto; use std::ffi; use std::fs::{File, OpenOptions}; use std::io::{self, Read}; -use std::ops::Deref; +use std::ops::{BitAnd, Deref, Not, Sub}; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::path::PathBuf; use std::result; @@ -330,6 +330,12 @@ pub enum Error { /// Using a directory as a backing file for memory is not supported DirectoryAsBackingFileForMemory, + + /// Failed to stat filesystem + GetFileSystemBlockSize(io::Error), + + /// Memory size is misaligned with default page size or its hugepage size + MisalignedMemorySize, } const ENABLE_FLAG: usize = 0; @@ -353,6 +359,77 @@ fn mmio_address_space_size(phys_bits: u8) -> u64 { (1 << phys_bits) - (1 << 16) } +// The `statfs` function can get information of hugetlbfs, and the hugepage size is in the +// `f_bsize` field. +// +// See: https://github.com/torvalds/linux/blob/v6.3/fs/hugetlbfs/inode.c#L1169 +fn statfs_get_bsize(path: &str) -> Result { + let path = std::ffi::CString::new(path).map_err(|_| Error::InvalidMemoryParameters)?; + let mut buf = std::mem::MaybeUninit::::uninit(); + + // SAFETY: FFI call with a valid path and buffer + let ret = unsafe { libc::statfs(path.as_ptr(), buf.as_mut_ptr()) }; + if ret != 0 { + return Err(Error::GetFileSystemBlockSize( + std::io::Error::last_os_error(), + )); + } + + // SAFETY: `buf` is valid at this point + // Because this value is always positive, just convert it directly. + // Note that the `f_bsize` is `i64` in glibc and `u64` in musl, using `as u64` will be warned + // by `clippy` on musl target. To avoid the warning, there should be `as _` instead of + // `as u64`. + let bsize = unsafe { (*buf.as_ptr()).f_bsize } as _; + Ok(bsize) +} + +fn memory_zone_get_align_size(zone: &MemoryZoneConfig) -> Result { + // SAFETY: FFI call. Trivially safe. + let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as u64 }; + + // There is no backend file and the `hugepages` is disabled, just use system page size. + if zone.file.is_none() && !zone.hugepages { + return Ok(page_size); + } + + // The `hugepages` is enabled and the `hugepage_size` is specified, just use it directly. + if zone.hugepages && zone.hugepage_size.is_some() { + return Ok(zone.hugepage_size.unwrap()); + } + + // There are two scenarios here: + // - `hugepages` is enabled but `hugepage_size` is not specified: + // Call `statfs` for `/dev/hugepages` for getting the default size of hugepage + // - The backing file is specified: + // Call `statfs` for the file and get its `f_bsize`. If the value is larger than the page + // size of normal page, just use the `f_bsize` because the file is in a hugetlbfs. If the + // value is less than or equal to the page size, just use the page size. + let path = zone.file.as_ref().map_or(Ok("/dev/hugepages"), |pathbuf| { + pathbuf.to_str().ok_or(Error::InvalidMemoryParameters) + })?; + + let align_size = std::cmp::max(page_size, statfs_get_bsize(path)?); + + Ok(align_size) +} + +#[inline] +fn align_down(val: T, align: T) -> T +where + T: BitAnd + Not + Sub + From, +{ + val & !(align - 1u8.into()) +} + +#[inline] +fn is_aligned(val: T, align: T) -> bool +where + T: BitAnd + Sub + From + PartialEq, +{ + (val & (align - 1u8.into())) == 0u8.into() +} + impl BusDevice for MemoryManager { fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { if self.selected_slot < self.hotplug_slots.len() { @@ -451,9 +528,14 @@ impl MemoryManager { let mut zones = zones.to_owned(); let mut mem_regions = Vec::new(); let mut zone = zones.remove(0); - let mut zone_offset = 0; + let mut zone_align_size = memory_zone_get_align_size(&zone)?; + let mut zone_offset = 0u64; let mut memory_zones = HashMap::new(); + if !is_aligned(zone.size, zone_align_size) { + return Err(Error::MisalignedMemorySize); + } + // Add zone id to the list of memory zones. memory_zones.insert(zone.id.clone(), MemoryZone::default()); @@ -465,16 +547,20 @@ impl MemoryManager { let mut ram_region_consumed = false; let mut pull_next_zone = false; - let ram_region_sub_size = ram_region.1 - ram_region_offset; - let zone_sub_size = zone.size as usize - zone_offset; + let ram_region_available_size = + align_down(ram_region.1 as u64 - ram_region_offset, zone_align_size); + if ram_region_available_size == 0 { + break; + } + let zone_sub_size = zone.size - zone_offset; - let file_offset = zone_offset as u64; + let file_offset = zone_offset; let region_start = ram_region .0 - .checked_add(ram_region_offset as u64) + .checked_add(ram_region_offset) .ok_or(Error::GuestAddressOverFlow)?; - let region_size = if zone_sub_size <= ram_region_sub_size { - if zone_sub_size == ram_region_sub_size { + let region_size = if zone_sub_size <= ram_region_available_size { + if zone_sub_size == ram_region_available_size { ram_region_consumed = true; } @@ -483,17 +569,23 @@ impl MemoryManager { zone_sub_size } else { - zone_offset += ram_region_sub_size; + zone_offset += ram_region_available_size; ram_region_consumed = true; - ram_region_sub_size + ram_region_available_size }; + info!( + "create ram region for zone {}, region_start: {:#x}, region_size: {:#x}", + zone.id, + region_start.raw_value(), + region_size + ); let region = MemoryManager::create_ram_region( &zone.file, file_offset, region_start, - region_size, + region_size as usize, prefault.unwrap_or(zone.prefault), zone.shared, zone.hugepages, @@ -519,6 +611,10 @@ impl MemoryManager { break; } zone = zones.remove(0); + zone_align_size = memory_zone_get_align_size(&zone)?; + if !is_aligned(zone.size, zone_align_size) { + return Err(Error::MisalignedMemorySize); + } // Check if zone id already exist. In case it does, throw // an error as we need unique identifiers. Otherwise, add diff --git a/vmm/src/seccomp_filters.rs b/vmm/src/seccomp_filters.rs index 8bf3d3720..ac41fc3eb 100644 --- a/vmm/src/seccomp_filters.rs +++ b/vmm/src/seccomp_filters.rs @@ -609,6 +609,7 @@ fn vmm_thread_rules( (libc::SYS_socketpair, vec![]), #[cfg(target_arch = "x86_64")] (libc::SYS_stat, vec![]), + (libc::SYS_statfs, vec![]), (libc::SYS_statx, vec![]), (libc::SYS_tgkill, vec![]), (libc::SYS_timerfd_create, vec![]),