mirror of
https://github.com/cloud-hypervisor/cloud-hypervisor.git
synced 2025-01-06 21:05:18 +00:00
vmm: memory_manager: align down the rest space of ram_region
This commit renames `ram_region_sub_size` to `ram_region_available_size` and make its value align down to the default page size or hugepage size of the current memory zone, which can prevent the memory zone from being split into misaligned parts. And if the available size of ram region is zero, this region will be marked as consumed even it has unused space. Note that there is two methods to use hugepages. 1. Specify `hugepages` for `memory` or `memory-zone`, if the `hugepage_size` is not specified, the value can be got by `statfs` for `/dev/hugepages`. 2. Specify a `file` in hugetlbfs for `memory-zone`, the hugepage size can also be got by `statfs` for the file. The value for alignment will be the hugepage size if this memory zone is using hugepages, otherwise the value will be default page size of system. Fixes: #5463 Signed-off-by: Yu Li <liyu.yukiteru@bytedance.com>
This commit is contained in:
parent
e5835fdc75
commit
1da2abf4b5
@ -31,7 +31,7 @@ use std::convert::TryInto;
|
|||||||
use std::ffi;
|
use std::ffi;
|
||||||
use std::fs::{File, OpenOptions};
|
use std::fs::{File, OpenOptions};
|
||||||
use std::io::{self, Read};
|
use std::io::{self, Read};
|
||||||
use std::ops::Deref;
|
use std::ops::{BitAnd, Deref, Not, Sub};
|
||||||
use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
|
use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::result;
|
use std::result;
|
||||||
@ -330,6 +330,12 @@ pub enum Error {
|
|||||||
|
|
||||||
/// Using a directory as a backing file for memory is not supported
|
/// Using a directory as a backing file for memory is not supported
|
||||||
DirectoryAsBackingFileForMemory,
|
DirectoryAsBackingFileForMemory,
|
||||||
|
|
||||||
|
/// Failed to stat filesystem
|
||||||
|
GetFileSystemBlockSize(io::Error),
|
||||||
|
|
||||||
|
/// Memory size is misaligned with default page size or its hugepage size
|
||||||
|
MisalignedMemorySize,
|
||||||
}
|
}
|
||||||
|
|
||||||
const ENABLE_FLAG: usize = 0;
|
const ENABLE_FLAG: usize = 0;
|
||||||
@ -353,6 +359,77 @@ fn mmio_address_space_size(phys_bits: u8) -> u64 {
|
|||||||
(1 << phys_bits) - (1 << 16)
|
(1 << phys_bits) - (1 << 16)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The `statfs` function can get information of hugetlbfs, and the hugepage size is in the
|
||||||
|
// `f_bsize` field.
|
||||||
|
//
|
||||||
|
// See: https://github.com/torvalds/linux/blob/v6.3/fs/hugetlbfs/inode.c#L1169
|
||||||
|
fn statfs_get_bsize(path: &str) -> Result<u64, Error> {
|
||||||
|
let path = std::ffi::CString::new(path).map_err(|_| Error::InvalidMemoryParameters)?;
|
||||||
|
let mut buf = std::mem::MaybeUninit::<libc::statfs>::uninit();
|
||||||
|
|
||||||
|
// SAFETY: FFI call with a valid path and buffer
|
||||||
|
let ret = unsafe { libc::statfs(path.as_ptr(), buf.as_mut_ptr()) };
|
||||||
|
if ret != 0 {
|
||||||
|
return Err(Error::GetFileSystemBlockSize(
|
||||||
|
std::io::Error::last_os_error(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
// SAFETY: `buf` is valid at this point
|
||||||
|
// Because this value is always positive, just convert it directly.
|
||||||
|
// Note that the `f_bsize` is `i64` in glibc and `u64` in musl, using `as u64` will be warned
|
||||||
|
// by `clippy` on musl target. To avoid the warning, there should be `as _` instead of
|
||||||
|
// `as u64`.
|
||||||
|
let bsize = unsafe { (*buf.as_ptr()).f_bsize } as _;
|
||||||
|
Ok(bsize)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn memory_zone_get_align_size(zone: &MemoryZoneConfig) -> Result<u64, Error> {
|
||||||
|
// SAFETY: FFI call. Trivially safe.
|
||||||
|
let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as u64 };
|
||||||
|
|
||||||
|
// There is no backend file and the `hugepages` is disabled, just use system page size.
|
||||||
|
if zone.file.is_none() && !zone.hugepages {
|
||||||
|
return Ok(page_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The `hugepages` is enabled and the `hugepage_size` is specified, just use it directly.
|
||||||
|
if zone.hugepages && zone.hugepage_size.is_some() {
|
||||||
|
return Ok(zone.hugepage_size.unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
// There are two scenarios here:
|
||||||
|
// - `hugepages` is enabled but `hugepage_size` is not specified:
|
||||||
|
// Call `statfs` for `/dev/hugepages` for getting the default size of hugepage
|
||||||
|
// - The backing file is specified:
|
||||||
|
// Call `statfs` for the file and get its `f_bsize`. If the value is larger than the page
|
||||||
|
// size of normal page, just use the `f_bsize` because the file is in a hugetlbfs. If the
|
||||||
|
// value is less than or equal to the page size, just use the page size.
|
||||||
|
let path = zone.file.as_ref().map_or(Ok("/dev/hugepages"), |pathbuf| {
|
||||||
|
pathbuf.to_str().ok_or(Error::InvalidMemoryParameters)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let align_size = std::cmp::max(page_size, statfs_get_bsize(path)?);
|
||||||
|
|
||||||
|
Ok(align_size)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn align_down<T>(val: T, align: T) -> T
|
||||||
|
where
|
||||||
|
T: BitAnd<Output = T> + Not<Output = T> + Sub<Output = T> + From<u8>,
|
||||||
|
{
|
||||||
|
val & !(align - 1u8.into())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_aligned<T>(val: T, align: T) -> bool
|
||||||
|
where
|
||||||
|
T: BitAnd<Output = T> + Sub<Output = T> + From<u8> + PartialEq,
|
||||||
|
{
|
||||||
|
(val & (align - 1u8.into())) == 0u8.into()
|
||||||
|
}
|
||||||
|
|
||||||
impl BusDevice for MemoryManager {
|
impl BusDevice for MemoryManager {
|
||||||
fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) {
|
fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) {
|
||||||
if self.selected_slot < self.hotplug_slots.len() {
|
if self.selected_slot < self.hotplug_slots.len() {
|
||||||
@ -451,9 +528,14 @@ impl MemoryManager {
|
|||||||
let mut zones = zones.to_owned();
|
let mut zones = zones.to_owned();
|
||||||
let mut mem_regions = Vec::new();
|
let mut mem_regions = Vec::new();
|
||||||
let mut zone = zones.remove(0);
|
let mut zone = zones.remove(0);
|
||||||
let mut zone_offset = 0;
|
let mut zone_align_size = memory_zone_get_align_size(&zone)?;
|
||||||
|
let mut zone_offset = 0u64;
|
||||||
let mut memory_zones = HashMap::new();
|
let mut memory_zones = HashMap::new();
|
||||||
|
|
||||||
|
if !is_aligned(zone.size, zone_align_size) {
|
||||||
|
return Err(Error::MisalignedMemorySize);
|
||||||
|
}
|
||||||
|
|
||||||
// Add zone id to the list of memory zones.
|
// Add zone id to the list of memory zones.
|
||||||
memory_zones.insert(zone.id.clone(), MemoryZone::default());
|
memory_zones.insert(zone.id.clone(), MemoryZone::default());
|
||||||
|
|
||||||
@ -465,16 +547,20 @@ impl MemoryManager {
|
|||||||
let mut ram_region_consumed = false;
|
let mut ram_region_consumed = false;
|
||||||
let mut pull_next_zone = false;
|
let mut pull_next_zone = false;
|
||||||
|
|
||||||
let ram_region_sub_size = ram_region.1 - ram_region_offset;
|
let ram_region_available_size =
|
||||||
let zone_sub_size = zone.size as usize - zone_offset;
|
align_down(ram_region.1 as u64 - ram_region_offset, zone_align_size);
|
||||||
|
if ram_region_available_size == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
let zone_sub_size = zone.size - zone_offset;
|
||||||
|
|
||||||
let file_offset = zone_offset as u64;
|
let file_offset = zone_offset;
|
||||||
let region_start = ram_region
|
let region_start = ram_region
|
||||||
.0
|
.0
|
||||||
.checked_add(ram_region_offset as u64)
|
.checked_add(ram_region_offset)
|
||||||
.ok_or(Error::GuestAddressOverFlow)?;
|
.ok_or(Error::GuestAddressOverFlow)?;
|
||||||
let region_size = if zone_sub_size <= ram_region_sub_size {
|
let region_size = if zone_sub_size <= ram_region_available_size {
|
||||||
if zone_sub_size == ram_region_sub_size {
|
if zone_sub_size == ram_region_available_size {
|
||||||
ram_region_consumed = true;
|
ram_region_consumed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -483,17 +569,23 @@ impl MemoryManager {
|
|||||||
|
|
||||||
zone_sub_size
|
zone_sub_size
|
||||||
} else {
|
} else {
|
||||||
zone_offset += ram_region_sub_size;
|
zone_offset += ram_region_available_size;
|
||||||
ram_region_consumed = true;
|
ram_region_consumed = true;
|
||||||
|
|
||||||
ram_region_sub_size
|
ram_region_available_size
|
||||||
};
|
};
|
||||||
|
|
||||||
|
info!(
|
||||||
|
"create ram region for zone {}, region_start: {:#x}, region_size: {:#x}",
|
||||||
|
zone.id,
|
||||||
|
region_start.raw_value(),
|
||||||
|
region_size
|
||||||
|
);
|
||||||
let region = MemoryManager::create_ram_region(
|
let region = MemoryManager::create_ram_region(
|
||||||
&zone.file,
|
&zone.file,
|
||||||
file_offset,
|
file_offset,
|
||||||
region_start,
|
region_start,
|
||||||
region_size,
|
region_size as usize,
|
||||||
prefault.unwrap_or(zone.prefault),
|
prefault.unwrap_or(zone.prefault),
|
||||||
zone.shared,
|
zone.shared,
|
||||||
zone.hugepages,
|
zone.hugepages,
|
||||||
@ -519,6 +611,10 @@ impl MemoryManager {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
zone = zones.remove(0);
|
zone = zones.remove(0);
|
||||||
|
zone_align_size = memory_zone_get_align_size(&zone)?;
|
||||||
|
if !is_aligned(zone.size, zone_align_size) {
|
||||||
|
return Err(Error::MisalignedMemorySize);
|
||||||
|
}
|
||||||
|
|
||||||
// Check if zone id already exist. In case it does, throw
|
// Check if zone id already exist. In case it does, throw
|
||||||
// an error as we need unique identifiers. Otherwise, add
|
// an error as we need unique identifiers. Otherwise, add
|
||||||
|
@ -609,6 +609,7 @@ fn vmm_thread_rules(
|
|||||||
(libc::SYS_socketpair, vec![]),
|
(libc::SYS_socketpair, vec![]),
|
||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg(target_arch = "x86_64")]
|
||||||
(libc::SYS_stat, vec![]),
|
(libc::SYS_stat, vec![]),
|
||||||
|
(libc::SYS_statfs, vec![]),
|
||||||
(libc::SYS_statx, vec![]),
|
(libc::SYS_statx, vec![]),
|
||||||
(libc::SYS_tgkill, vec![]),
|
(libc::SYS_tgkill, vec![]),
|
||||||
(libc::SYS_timerfd_create, vec![]),
|
(libc::SYS_timerfd_create, vec![]),
|
||||||
|
Loading…
Reference in New Issue
Block a user