mirror of
https://github.com/cloud-hypervisor/cloud-hypervisor.git
synced 2024-10-01 11:05:46 +00:00
vmm: memory_manager: Apply NUMA policy to memory zones
Relying on the new option 'host_numa_node' from the 'memory-zone' parameter, the user can now define which NUMA node from the host should be used to back the current memory zone. Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
This commit is contained in:
parent
e6f585a31c
commit
1b4591aecc
@ -2217,7 +2217,12 @@ mod tests {
|
|||||||
let mut cmd = GuestCommand::new(&guest);
|
let mut cmd = GuestCommand::new(&guest);
|
||||||
cmd.args(&["--cpus", "boot=1"])
|
cmd.args(&["--cpus", "boot=1"])
|
||||||
.args(&["--memory", "size=0"])
|
.args(&["--memory", "size=0"])
|
||||||
.args(&["--memory-zone", "size=1G", "size=3G,file=/dev/shm"])
|
.args(&[
|
||||||
|
"--memory-zone",
|
||||||
|
"size=1G",
|
||||||
|
"size=3G,file=/dev/shm",
|
||||||
|
"size=1G,host_numa_node=0",
|
||||||
|
])
|
||||||
.args(&["--kernel", guest.fw_path.as_str()])
|
.args(&["--kernel", guest.fw_path.as_str()])
|
||||||
.default_disks()
|
.default_disks()
|
||||||
.default_net();
|
.default_net();
|
||||||
|
@ -23,6 +23,7 @@ use std::convert::TryInto;
|
|||||||
use std::ffi;
|
use std::ffi;
|
||||||
use std::fs::{File, OpenOptions};
|
use std::fs::{File, OpenOptions};
|
||||||
use std::io;
|
use std::io;
|
||||||
|
use std::ops::Deref;
|
||||||
use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
|
use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::result;
|
use std::result;
|
||||||
@ -47,6 +48,11 @@ const X86_64_IRQ_BASE: u32 = 5;
|
|||||||
|
|
||||||
const HOTPLUG_COUNT: usize = 8;
|
const HOTPLUG_COUNT: usize = 8;
|
||||||
|
|
||||||
|
// Memory policy constants
|
||||||
|
const MPOL_BIND: u32 = 2;
|
||||||
|
const MPOL_MF_STRICT: u32 = 1 << 0;
|
||||||
|
const MPOL_MF_MOVE: u32 = 1 << 1;
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
struct HotPlugState {
|
struct HotPlugState {
|
||||||
base: u64,
|
base: u64,
|
||||||
@ -165,6 +171,13 @@ pub enum Error {
|
|||||||
/// Forbidden operation. Impossible to resize guest memory if it is
|
/// Forbidden operation. Impossible to resize guest memory if it is
|
||||||
/// backed by user defined memory regions.
|
/// backed by user defined memory regions.
|
||||||
InvalidResizeWithMemoryZones,
|
InvalidResizeWithMemoryZones,
|
||||||
|
|
||||||
|
/// It's invalid to try applying a NUMA policy to a memory zone that is
|
||||||
|
/// memory mapped with MAP_SHARED.
|
||||||
|
InvalidSharedMemoryZoneWithHostNuma,
|
||||||
|
|
||||||
|
/// Failed applying NUMA memory policy.
|
||||||
|
ApplyNumaPolicy(io::Error),
|
||||||
}
|
}
|
||||||
|
|
||||||
const ENABLE_FLAG: usize = 0;
|
const ENABLE_FLAG: usize = 0;
|
||||||
@ -314,6 +327,7 @@ impl MemoryManager {
|
|||||||
prefault,
|
prefault,
|
||||||
zone.shared,
|
zone.shared,
|
||||||
zone.hugepages,
|
zone.hugepages,
|
||||||
|
zone.host_numa_node,
|
||||||
&ext_regions,
|
&ext_regions,
|
||||||
)?);
|
)?);
|
||||||
|
|
||||||
@ -387,6 +401,14 @@ impl MemoryManager {
|
|||||||
let mut total_ram_size: u64 = 0;
|
let mut total_ram_size: u64 = 0;
|
||||||
for zone in zones.iter() {
|
for zone in zones.iter() {
|
||||||
total_ram_size += zone.size;
|
total_ram_size += zone.size;
|
||||||
|
|
||||||
|
if zone.shared && zone.host_numa_node.is_some() {
|
||||||
|
error!(
|
||||||
|
"Invalid to set host NUMA policy for a memory zone \
|
||||||
|
mapped as 'shared'"
|
||||||
|
);
|
||||||
|
return Err(Error::InvalidSharedMemoryZoneWithHostNuma);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
(total_ram_size, zones)
|
(total_ram_size, zones)
|
||||||
@ -431,6 +453,7 @@ impl MemoryManager {
|
|||||||
false,
|
false,
|
||||||
config.shared,
|
config.shared,
|
||||||
config.hugepages,
|
config.hugepages,
|
||||||
|
None,
|
||||||
&None,
|
&None,
|
||||||
)?);
|
)?);
|
||||||
}
|
}
|
||||||
@ -594,6 +617,33 @@ impl MemoryManager {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn mbind(
|
||||||
|
addr: *mut u8,
|
||||||
|
len: u64,
|
||||||
|
mode: u32,
|
||||||
|
nodemask: Vec<u64>,
|
||||||
|
maxnode: u64,
|
||||||
|
flags: u32,
|
||||||
|
) -> Result<(), io::Error> {
|
||||||
|
let res = unsafe {
|
||||||
|
libc::syscall(
|
||||||
|
libc::SYS_mbind,
|
||||||
|
addr as *mut libc::c_void,
|
||||||
|
len,
|
||||||
|
mode,
|
||||||
|
nodemask.as_ptr(),
|
||||||
|
maxnode,
|
||||||
|
flags,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
|
if res < 0 {
|
||||||
|
Err(io::Error::last_os_error())
|
||||||
|
} else {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
fn create_ram_region(
|
fn create_ram_region(
|
||||||
file: &Option<PathBuf>,
|
file: &Option<PathBuf>,
|
||||||
@ -603,6 +653,7 @@ impl MemoryManager {
|
|||||||
prefault: bool,
|
prefault: bool,
|
||||||
shared: bool,
|
shared: bool,
|
||||||
hugepages: bool,
|
hugepages: bool,
|
||||||
|
host_numa_node: Option<u64>,
|
||||||
ext_regions: &Option<Vec<MemoryRegion>>,
|
ext_regions: &Option<Vec<MemoryRegion>>,
|
||||||
) -> Result<Arc<GuestRegionMmap>, Error> {
|
) -> Result<Arc<GuestRegionMmap>, Error> {
|
||||||
let mut backing_file: Option<PathBuf> = file.clone();
|
let mut backing_file: Option<PathBuf> = file.clone();
|
||||||
@ -716,6 +767,38 @@ impl MemoryManager {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Apply NUMA policy if needed.
|
||||||
|
if let Some(node) = host_numa_node {
|
||||||
|
let addr = region.deref().as_ptr();
|
||||||
|
let len = region.deref().size() as u64;
|
||||||
|
let mode = MPOL_BIND;
|
||||||
|
let mut nodemask: Vec<u64> = Vec::new();
|
||||||
|
let flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
|
||||||
|
|
||||||
|
// Linux is kind of buggy in the way it interprets maxnode as it
|
||||||
|
// will cut off the last node. That's why we have to add 1 to what
|
||||||
|
// we would consider as the proper maxnode value.
|
||||||
|
let maxnode = node + 1 + 1;
|
||||||
|
|
||||||
|
// Allocate the right size for the vector.
|
||||||
|
nodemask.resize((node as usize / 64) + 1, 0);
|
||||||
|
|
||||||
|
// Fill the global bitmask through the nodemask vector.
|
||||||
|
let idx = (node / 64) as usize;
|
||||||
|
let shift = node % 64;
|
||||||
|
nodemask[idx] |= 1u64 << shift;
|
||||||
|
|
||||||
|
// Policies are enforced by using MPOL_MF_MOVE flag as it will
|
||||||
|
// force the kernel to move all pages that might have been already
|
||||||
|
// allocated to the proper set of NUMA nodes. MPOL_MF_STRICT is
|
||||||
|
// used to throw an error if MPOL_MF_MOVE didn't succeed.
|
||||||
|
// MPOL_BIND is the selected mode as it specifies a strict policy
|
||||||
|
// that restricts memory allocation to the nodes specified in the
|
||||||
|
// nodemask.
|
||||||
|
Self::mbind(addr, len, mode, nodemask, maxnode, flags)
|
||||||
|
.map_err(Error::ApplyNumaPolicy)?;
|
||||||
|
}
|
||||||
|
|
||||||
Ok(Arc::new(region))
|
Ok(Arc::new(region))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -783,6 +866,7 @@ impl MemoryManager {
|
|||||||
false,
|
false,
|
||||||
self.shared,
|
self.shared,
|
||||||
self.hugepages,
|
self.hugepages,
|
||||||
|
None,
|
||||||
&None,
|
&None,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
@ -289,6 +289,7 @@ fn vmm_thread_rules() -> Result<Vec<SyscallRuleSet>, Error> {
|
|||||||
allow_syscall(libc::SYS_listen),
|
allow_syscall(libc::SYS_listen),
|
||||||
allow_syscall(libc::SYS_lseek),
|
allow_syscall(libc::SYS_lseek),
|
||||||
allow_syscall(libc::SYS_madvise),
|
allow_syscall(libc::SYS_madvise),
|
||||||
|
allow_syscall(libc::SYS_mbind),
|
||||||
allow_syscall(libc::SYS_memfd_create),
|
allow_syscall(libc::SYS_memfd_create),
|
||||||
allow_syscall(libc::SYS_mmap),
|
allow_syscall(libc::SYS_mmap),
|
||||||
allow_syscall(libc::SYS_mprotect),
|
allow_syscall(libc::SYS_mprotect),
|
||||||
|
Loading…
Reference in New Issue
Block a user