From f21c04166a52a88155c17e5196dc089649238843 Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Fri, 4 Sep 2020 11:26:43 +0200 Subject: [PATCH] vmm: Move NUMA node list creation to Vm structure Based on the previous changes introducing new options for both memory zones and NUMA configuration, this patch changes the behavior of the NUMA node definition. Instead of relying on the memory zones to define the guest NUMA nodes, everything goes through the --numa parameter. This allows for defining NUMA nodes without associating any particular memory range to it. And in case one wants to associate one or multiple memory ranges to it, the expectation is to describe a list of memory zone through the --numa parameter. Signed-off-by: Sebastien Boeuf --- vmm/src/acpi.rs | 3 +- vmm/src/memory_manager.rs | 66 ++--------------------- vmm/src/vm.rs | 107 ++++++++++++++++++++++++++++++-------- 3 files changed, 90 insertions(+), 86 deletions(-) diff --git a/vmm/src/acpi.rs b/vmm/src/acpi.rs index fc73cfedf..2143e6f24 100644 --- a/vmm/src/acpi.rs +++ b/vmm/src/acpi.rs @@ -5,6 +5,7 @@ use crate::cpu::CpuManager; use crate::device_manager::DeviceManager; use crate::memory_manager::MemoryManager; +use crate::vm::NumaNodes; use acpi_tables::{ aml::Aml, rsdp::RSDP, @@ -73,6 +74,7 @@ pub fn create_acpi_tables( device_manager: &Arc>, cpu_manager: &Arc>, memory_manager: &Arc>, + numa_nodes: &NumaNodes, ) -> GuestAddress { // RSDP is at the EBDA let rsdp_offset = layout::RSDP_POINTER; @@ -152,7 +154,6 @@ pub fn create_acpi_tables( // SRAT and SLIT // Only created if the NUMA nodes list is not empty. - let numa_nodes = memory_manager.lock().unwrap().numa_nodes().clone(); let (prev_tbl_len, prev_tbl_off) = if numa_nodes.is_empty() { (mcfg.len(), mcfg_offset) } else { diff --git a/vmm/src/memory_manager.rs b/vmm/src/memory_manager.rs index 96da6c181..056d2fad9 100644 --- a/vmm/src/memory_manager.rs +++ b/vmm/src/memory_manager.rs @@ -19,7 +19,7 @@ use devices::BusDevice; #[cfg(target_arch = "x86_64")] use libc::{MAP_NORESERVE, MAP_POPULATE, MAP_SHARED, PROT_READ, PROT_WRITE}; -use std::collections::{BTreeMap, HashMap}; +use std::collections::HashMap; use std::convert::TryInto; use std::ffi; use std::fs::{File, OpenOptions}; @@ -65,37 +65,6 @@ struct HotPlugState { removing: bool, } -#[derive(Clone)] -pub struct NumaNode { - memory_regions: Vec>, - cpus: Vec, - distances: BTreeMap, -} - -impl NumaNode { - pub fn memory_regions(&self) -> &Vec> { - &self.memory_regions - } - - pub fn cpus(&self) -> &Vec { - &self.cpus - } - - pub fn cpus_mut(&mut self) -> &mut Vec { - &mut self.cpus - } - - pub fn distances(&self) -> &BTreeMap { - &self.distances - } - - pub fn distances_mut(&mut self) -> &mut BTreeMap { - &mut self.distances - } -} - -pub type NumaNodes = BTreeMap; - pub type MemoryZones = HashMap>>; pub struct MemoryManager { @@ -122,7 +91,6 @@ pub struct MemoryManager { sgx_epc_region: Option, use_zones: bool, snapshot_memory_regions: Vec, - numa_nodes: NumaNodes, memory_zones: MemoryZones, } @@ -325,12 +293,11 @@ impl MemoryManager { zones: &[MemoryZoneConfig], prefault: bool, ext_regions: Option>, - ) -> Result<(Vec>, NumaNodes, MemoryZones), Error> { + ) -> Result<(Vec>, MemoryZones), Error> { let mut zones = zones.to_owned(); let mut mem_regions = Vec::new(); let mut zone = zones.remove(0); let mut zone_offset = 0; - let mut numa_nodes = BTreeMap::new(); let mut memory_zones = HashMap::new(); // Add zone id to the list of memory zones. @@ -377,22 +344,6 @@ impl MemoryManager { &ext_regions, )?; - // Fill the list of NUMA nodes. - if let Some(node_id) = zone.guest_numa_node { - if let Some(node) = numa_nodes.get_mut(&node_id) as Option<&mut NumaNode> { - node.memory_regions.push(region.clone()); - } else { - numa_nodes.insert( - node_id, - NumaNode { - memory_regions: vec![region.clone()], - cpus: Vec::new(), - distances: BTreeMap::new(), - }, - ); - } - } - // Add region to the list of regions associated with the // current memory zone. if let Some(memory_zone) = memory_zones.get_mut(&zone.id) { @@ -434,7 +385,7 @@ impl MemoryManager { } } - Ok((mem_regions, numa_nodes, memory_zones)) + Ok((mem_regions, memory_zones)) } pub fn new( @@ -508,7 +459,7 @@ impl MemoryManager { .map(|r| (r.0, r.1)) .collect(); - let (mem_regions, numa_nodes, memory_zones) = + let (mem_regions, memory_zones) = Self::create_memory_regions_from_zones(&ram_regions, &zones, prefault, ext_regions)?; let guest_memory = @@ -600,7 +551,6 @@ impl MemoryManager { sgx_epc_region: None, use_zones, snapshot_memory_regions: Vec::new(), - numa_nodes, memory_zones, })); @@ -1283,14 +1233,6 @@ impl MemoryManager { unsafe { (*stat.as_ptr()).st_nlink as usize > 0 } } - pub fn numa_nodes(&self) -> &NumaNodes { - &self.numa_nodes - } - - pub fn numa_nodes_mut(&mut self) -> &mut NumaNodes { - &mut self.numa_nodes - } - pub fn memory_zones(&self) -> &MemoryZones { &self.memory_zones } diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 9cd7dc9e4..024ebea41 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -23,9 +23,11 @@ extern crate signal_hook; extern crate vm_allocator; extern crate vm_memory; +#[cfg(feature = "acpi")] +use crate::config::NumaConfig; use crate::config::{ - DeviceConfig, DiskConfig, FsConfig, HotplugMethod, NetConfig, NumaConfig, PmemConfig, - ValidationError, VmConfig, VsockConfig, + DeviceConfig, DiskConfig, FsConfig, HotplugMethod, NetConfig, PmemConfig, ValidationError, + VmConfig, VsockConfig, }; use crate::cpu; use crate::device_manager::{self, get_win_size, Console, DeviceManager, DeviceManagerError}; @@ -47,7 +49,7 @@ use linux_loader::loader::elf::PvhBootCapability::PvhEntryPresent; use linux_loader::loader::KernelLoader; use seccomp::SeccompAction; use signal_hook::{iterator::Signals, SIGINT, SIGTERM, SIGWINCH}; -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; use std::convert::TryInto; use std::ffi::CString; use std::fs::{File, OpenOptions}; @@ -59,7 +61,9 @@ use std::path::PathBuf; use std::sync::{Arc, Mutex, RwLock}; use std::{result, str, thread}; use url::Url; -use vm_memory::{Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryMmap}; +use vm_memory::{ + Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryMmap, GuestRegionMmap, +}; use vm_migration::{ Migratable, MigratableError, Pausable, Snapshot, SnapshotDataSection, Snapshottable, Transportable, @@ -201,6 +205,29 @@ pub enum Error { } pub type Result = result::Result; +#[derive(Clone)] +pub struct NumaNode { + memory_regions: Vec>, + cpus: Vec, + distances: BTreeMap, +} + +impl NumaNode { + pub fn memory_regions(&self) -> &Vec> { + &self.memory_regions + } + + pub fn cpus(&self) -> &Vec { + &self.cpus + } + + pub fn distances(&self) -> &BTreeMap { + &self.distances + } +} + +pub type NumaNodes = BTreeMap; + #[derive(Clone, Copy, Debug, Deserialize, Serialize, PartialEq)] pub enum VmState { Created, @@ -259,6 +286,8 @@ pub struct Vm { vm: Arc, #[cfg(target_arch = "x86_64")] saved_clock: Option, + #[cfg(feature = "acpi")] + numa_nodes: NumaNodes, } impl Vm { @@ -314,10 +343,10 @@ impl Vm { .transpose() .map_err(Error::InitramfsFile)?; - // Update NUMA based on NumaConfig. - if let Some(numa_cfg) = config.lock().unwrap().numa.clone() { - Self::update_numa(numa_cfg, &memory_manager)?; - } + // Create NUMA nodes based on NumaConfig. + #[cfg(feature = "acpi")] + let numa_nodes = + Self::create_numa_nodes(config.lock().unwrap().numa.clone(), &memory_manager)?; Ok(Vm { kernel, @@ -333,21 +362,48 @@ impl Vm { vm, #[cfg(target_arch = "x86_64")] saved_clock: _saved_clock, + #[cfg(feature = "acpi")] + numa_nodes, }) } - fn update_numa( - configs: Vec, + #[cfg(feature = "acpi")] + fn create_numa_nodes( + configs: Option>, memory_manager: &Arc>, - ) -> Result<()> { - let mut mm = memory_manager.lock().unwrap(); - let numa_nodes = mm.numa_nodes_mut(); - let existing_nodes: Vec = numa_nodes.keys().cloned().collect(); + ) -> Result { + let mm = memory_manager.lock().unwrap(); + let mm_zones = mm.memory_zones(); + let mut numa_nodes = BTreeMap::new(); + + if let Some(configs) = &configs { + let node_id_list: Vec = configs.iter().map(|cfg| cfg.id).collect(); + + for config in configs.iter() { + if numa_nodes.contains_key(&config.id) { + error!("Can't define twice the same NUMA node"); + return Err(Error::InvalidNumaConfig); + } + + let mut node = NumaNode { + memory_regions: Vec::new(), + cpus: Vec::new(), + distances: BTreeMap::new(), + }; + + if let Some(memory_zones) = &config.memory_zones { + for memory_zone in memory_zones.iter() { + if let Some(mm_zone) = mm_zones.get(memory_zone) { + node.memory_regions.extend(mm_zone.clone()); + } else { + error!("Unknown memory zone '{}'", memory_zone); + return Err(Error::InvalidNumaConfig); + } + } + } - for config in configs.iter() { - if let Some(node) = numa_nodes.get_mut(&config.id) { if let Some(cpus) = &config.cpus { - node.cpus_mut().extend(cpus); + node.cpus.extend(cpus); } if let Some(distances) = &config.distances { @@ -355,21 +411,25 @@ impl Vm { let dest = distance.destination; let dist = distance.distance; - if !existing_nodes.contains(&dest) { + if !node_id_list.contains(&dest) { error!("Unknown destination NUMA node {}", dest); return Err(Error::InvalidNumaConfig); } - node.distances_mut().insert(dest, dist); + if node.distances.contains_key(&dest) { + error!("Destination NUMA node {} has been already set", dest); + return Err(Error::InvalidNumaConfig); + } + + node.distances.insert(dest, dist); } } - } else { - error!("Unknown NUMA node {}", config.id); - return Err(Error::InvalidNumaConfig); + + numa_nodes.insert(config.id, node); } } - Ok(()) + Ok(numa_nodes) } pub fn new( @@ -630,6 +690,7 @@ impl Vm { &self.device_manager, &self.cpu_manager, &self.memory_manager, + &self.numa_nodes, )); }