vmm: Move NUMA node list creation to Vm structure

Based on the previous changes introducing new options for both memory
zones and NUMA configuration, this patch changes the behavior of the
NUMA node definition. Instead of relying on the memory zones to define
the guest NUMA nodes, everything goes through the --numa parameter. This
allows for defining NUMA nodes without associating any particular memory
range to it. And in case one wants to associate one or multiple memory
ranges to it, the expectation is to describe a list of memory zone
through the --numa parameter.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
This commit is contained in:
Sebastien Boeuf 2020-09-04 11:26:43 +02:00
parent dc42324351
commit f21c04166a
3 changed files with 90 additions and 86 deletions

View File

@ -5,6 +5,7 @@
use crate::cpu::CpuManager;
use crate::device_manager::DeviceManager;
use crate::memory_manager::MemoryManager;
use crate::vm::NumaNodes;
use acpi_tables::{
aml::Aml,
rsdp::RSDP,
@ -73,6 +74,7 @@ pub fn create_acpi_tables(
device_manager: &Arc<Mutex<DeviceManager>>,
cpu_manager: &Arc<Mutex<CpuManager>>,
memory_manager: &Arc<Mutex<MemoryManager>>,
numa_nodes: &NumaNodes,
) -> GuestAddress {
// RSDP is at the EBDA
let rsdp_offset = layout::RSDP_POINTER;
@ -152,7 +154,6 @@ pub fn create_acpi_tables(
// SRAT and SLIT
// Only created if the NUMA nodes list is not empty.
let numa_nodes = memory_manager.lock().unwrap().numa_nodes().clone();
let (prev_tbl_len, prev_tbl_off) = if numa_nodes.is_empty() {
(mcfg.len(), mcfg_offset)
} else {

View File

@ -19,7 +19,7 @@ use devices::BusDevice;
#[cfg(target_arch = "x86_64")]
use libc::{MAP_NORESERVE, MAP_POPULATE, MAP_SHARED, PROT_READ, PROT_WRITE};
use std::collections::{BTreeMap, HashMap};
use std::collections::HashMap;
use std::convert::TryInto;
use std::ffi;
use std::fs::{File, OpenOptions};
@ -65,37 +65,6 @@ struct HotPlugState {
removing: bool,
}
#[derive(Clone)]
pub struct NumaNode {
memory_regions: Vec<Arc<GuestRegionMmap>>,
cpus: Vec<u8>,
distances: BTreeMap<u32, u8>,
}
impl NumaNode {
pub fn memory_regions(&self) -> &Vec<Arc<GuestRegionMmap>> {
&self.memory_regions
}
pub fn cpus(&self) -> &Vec<u8> {
&self.cpus
}
pub fn cpus_mut(&mut self) -> &mut Vec<u8> {
&mut self.cpus
}
pub fn distances(&self) -> &BTreeMap<u32, u8> {
&self.distances
}
pub fn distances_mut(&mut self) -> &mut BTreeMap<u32, u8> {
&mut self.distances
}
}
pub type NumaNodes = BTreeMap<u32, NumaNode>;
pub type MemoryZones = HashMap<String, Vec<Arc<GuestRegionMmap>>>;
pub struct MemoryManager {
@ -122,7 +91,6 @@ pub struct MemoryManager {
sgx_epc_region: Option<SgxEpcRegion>,
use_zones: bool,
snapshot_memory_regions: Vec<MemoryRegion>,
numa_nodes: NumaNodes,
memory_zones: MemoryZones,
}
@ -325,12 +293,11 @@ impl MemoryManager {
zones: &[MemoryZoneConfig],
prefault: bool,
ext_regions: Option<Vec<MemoryRegion>>,
) -> Result<(Vec<Arc<GuestRegionMmap>>, NumaNodes, MemoryZones), Error> {
) -> Result<(Vec<Arc<GuestRegionMmap>>, MemoryZones), Error> {
let mut zones = zones.to_owned();
let mut mem_regions = Vec::new();
let mut zone = zones.remove(0);
let mut zone_offset = 0;
let mut numa_nodes = BTreeMap::new();
let mut memory_zones = HashMap::new();
// Add zone id to the list of memory zones.
@ -377,22 +344,6 @@ impl MemoryManager {
&ext_regions,
)?;
// Fill the list of NUMA nodes.
if let Some(node_id) = zone.guest_numa_node {
if let Some(node) = numa_nodes.get_mut(&node_id) as Option<&mut NumaNode> {
node.memory_regions.push(region.clone());
} else {
numa_nodes.insert(
node_id,
NumaNode {
memory_regions: vec![region.clone()],
cpus: Vec::new(),
distances: BTreeMap::new(),
},
);
}
}
// Add region to the list of regions associated with the
// current memory zone.
if let Some(memory_zone) = memory_zones.get_mut(&zone.id) {
@ -434,7 +385,7 @@ impl MemoryManager {
}
}
Ok((mem_regions, numa_nodes, memory_zones))
Ok((mem_regions, memory_zones))
}
pub fn new(
@ -508,7 +459,7 @@ impl MemoryManager {
.map(|r| (r.0, r.1))
.collect();
let (mem_regions, numa_nodes, memory_zones) =
let (mem_regions, memory_zones) =
Self::create_memory_regions_from_zones(&ram_regions, &zones, prefault, ext_regions)?;
let guest_memory =
@ -600,7 +551,6 @@ impl MemoryManager {
sgx_epc_region: None,
use_zones,
snapshot_memory_regions: Vec::new(),
numa_nodes,
memory_zones,
}));
@ -1283,14 +1233,6 @@ impl MemoryManager {
unsafe { (*stat.as_ptr()).st_nlink as usize > 0 }
}
pub fn numa_nodes(&self) -> &NumaNodes {
&self.numa_nodes
}
pub fn numa_nodes_mut(&mut self) -> &mut NumaNodes {
&mut self.numa_nodes
}
pub fn memory_zones(&self) -> &MemoryZones {
&self.memory_zones
}

View File

@ -23,9 +23,11 @@ extern crate signal_hook;
extern crate vm_allocator;
extern crate vm_memory;
#[cfg(feature = "acpi")]
use crate::config::NumaConfig;
use crate::config::{
DeviceConfig, DiskConfig, FsConfig, HotplugMethod, NetConfig, NumaConfig, PmemConfig,
ValidationError, VmConfig, VsockConfig,
DeviceConfig, DiskConfig, FsConfig, HotplugMethod, NetConfig, PmemConfig, ValidationError,
VmConfig, VsockConfig,
};
use crate::cpu;
use crate::device_manager::{self, get_win_size, Console, DeviceManager, DeviceManagerError};
@ -47,7 +49,7 @@ use linux_loader::loader::elf::PvhBootCapability::PvhEntryPresent;
use linux_loader::loader::KernelLoader;
use seccomp::SeccompAction;
use signal_hook::{iterator::Signals, SIGINT, SIGTERM, SIGWINCH};
use std::collections::HashMap;
use std::collections::{BTreeMap, HashMap};
use std::convert::TryInto;
use std::ffi::CString;
use std::fs::{File, OpenOptions};
@ -59,7 +61,9 @@ use std::path::PathBuf;
use std::sync::{Arc, Mutex, RwLock};
use std::{result, str, thread};
use url::Url;
use vm_memory::{Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryMmap};
use vm_memory::{
Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryMmap, GuestRegionMmap,
};
use vm_migration::{
Migratable, MigratableError, Pausable, Snapshot, SnapshotDataSection, Snapshottable,
Transportable,
@ -201,6 +205,29 @@ pub enum Error {
}
pub type Result<T> = result::Result<T, Error>;
#[derive(Clone)]
pub struct NumaNode {
memory_regions: Vec<Arc<GuestRegionMmap>>,
cpus: Vec<u8>,
distances: BTreeMap<u32, u8>,
}
impl NumaNode {
pub fn memory_regions(&self) -> &Vec<Arc<GuestRegionMmap>> {
&self.memory_regions
}
pub fn cpus(&self) -> &Vec<u8> {
&self.cpus
}
pub fn distances(&self) -> &BTreeMap<u32, u8> {
&self.distances
}
}
pub type NumaNodes = BTreeMap<u32, NumaNode>;
#[derive(Clone, Copy, Debug, Deserialize, Serialize, PartialEq)]
pub enum VmState {
Created,
@ -259,6 +286,8 @@ pub struct Vm {
vm: Arc<dyn hypervisor::Vm>,
#[cfg(target_arch = "x86_64")]
saved_clock: Option<hypervisor::ClockData>,
#[cfg(feature = "acpi")]
numa_nodes: NumaNodes,
}
impl Vm {
@ -314,10 +343,10 @@ impl Vm {
.transpose()
.map_err(Error::InitramfsFile)?;
// Update NUMA based on NumaConfig.
if let Some(numa_cfg) = config.lock().unwrap().numa.clone() {
Self::update_numa(numa_cfg, &memory_manager)?;
}
// Create NUMA nodes based on NumaConfig.
#[cfg(feature = "acpi")]
let numa_nodes =
Self::create_numa_nodes(config.lock().unwrap().numa.clone(), &memory_manager)?;
Ok(Vm {
kernel,
@ -333,21 +362,48 @@ impl Vm {
vm,
#[cfg(target_arch = "x86_64")]
saved_clock: _saved_clock,
#[cfg(feature = "acpi")]
numa_nodes,
})
}
fn update_numa(
configs: Vec<NumaConfig>,
#[cfg(feature = "acpi")]
fn create_numa_nodes(
configs: Option<Vec<NumaConfig>>,
memory_manager: &Arc<Mutex<MemoryManager>>,
) -> Result<()> {
let mut mm = memory_manager.lock().unwrap();
let numa_nodes = mm.numa_nodes_mut();
let existing_nodes: Vec<u32> = numa_nodes.keys().cloned().collect();
) -> Result<NumaNodes> {
let mm = memory_manager.lock().unwrap();
let mm_zones = mm.memory_zones();
let mut numa_nodes = BTreeMap::new();
if let Some(configs) = &configs {
let node_id_list: Vec<u32> = configs.iter().map(|cfg| cfg.id).collect();
for config in configs.iter() {
if numa_nodes.contains_key(&config.id) {
error!("Can't define twice the same NUMA node");
return Err(Error::InvalidNumaConfig);
}
let mut node = NumaNode {
memory_regions: Vec::new(),
cpus: Vec::new(),
distances: BTreeMap::new(),
};
if let Some(memory_zones) = &config.memory_zones {
for memory_zone in memory_zones.iter() {
if let Some(mm_zone) = mm_zones.get(memory_zone) {
node.memory_regions.extend(mm_zone.clone());
} else {
error!("Unknown memory zone '{}'", memory_zone);
return Err(Error::InvalidNumaConfig);
}
}
}
for config in configs.iter() {
if let Some(node) = numa_nodes.get_mut(&config.id) {
if let Some(cpus) = &config.cpus {
node.cpus_mut().extend(cpus);
node.cpus.extend(cpus);
}
if let Some(distances) = &config.distances {
@ -355,21 +411,25 @@ impl Vm {
let dest = distance.destination;
let dist = distance.distance;
if !existing_nodes.contains(&dest) {
if !node_id_list.contains(&dest) {
error!("Unknown destination NUMA node {}", dest);
return Err(Error::InvalidNumaConfig);
}
node.distances_mut().insert(dest, dist);
if node.distances.contains_key(&dest) {
error!("Destination NUMA node {} has been already set", dest);
return Err(Error::InvalidNumaConfig);
}
node.distances.insert(dest, dist);
}
}
} else {
error!("Unknown NUMA node {}", config.id);
return Err(Error::InvalidNumaConfig);
numa_nodes.insert(config.id, node);
}
}
Ok(())
Ok(numa_nodes)
}
pub fn new(
@ -630,6 +690,7 @@ impl Vm {
&self.device_manager,
&self.cpu_manager,
&self.memory_manager,
&self.numa_nodes,
));
}