vmm: cpu_manager: Add _PXM ACPI method to each vCPU

In order to allow a hotplugged vCPU to be assigned to the correct NUMA
node in the guest, the DSDT table must expose the _PXM method for each
vCPU. This method defines the proximity domain to which each vCPU should
be attached to.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
This commit is contained in:
Sebastien Boeuf 2021-06-17 12:41:30 +02:00
parent 09c3ddd47d
commit a36ac96444
3 changed files with 48 additions and 5 deletions

View File

@ -2230,7 +2230,7 @@ mod tests {
let kernel_path = direct_kernel_boot_path();
let mut child = GuestCommand::new(&guest)
.args(&["--cpus", "boot=6"])
.args(&["--cpus", "boot=6,max=12"])
.args(&["--memory", "size=0,hotplug_method=virtio-mem"])
.args(&[
"--memory-zone",
@ -2240,9 +2240,9 @@ mod tests {
])
.args(&[
"--numa",
"guest_numa_id=0,cpus=0-2,distances=1@15:2@20,memory_zones=mem0",
"guest_numa_id=1,cpus=3-4,distances=0@20:2@25,memory_zones=mem1",
"guest_numa_id=2,cpus=5,distances=0@25:1@30,memory_zones=mem2",
"guest_numa_id=0,cpus=0-2:9,distances=1@15:2@20,memory_zones=mem0",
"guest_numa_id=1,cpus=3-4:6-8,distances=0@20:2@25,memory_zones=mem1",
"guest_numa_id=2,cpus=5:10-11,distances=0@25:1@30,memory_zones=mem2",
])
.args(&["--kernel", kernel_path.to_str().unwrap()])
.args(&["--cmdline", DIRECT_KERNEL_BOOT_CMDLINE])
@ -2285,6 +2285,13 @@ mod tests {
resize_zone_command(&api_socket, "mem2", "4G");
thread::sleep(std::time::Duration::new(5, 0));
assert!(guest.get_numa_node_memory(2).unwrap_or_default() > 3_840_000);
// Resize to the maximum amount of CPUs and check each NUMA
// node has been assigned the right CPUs set.
resize_command(&api_socket, Some(12), None, None);
guest.check_numa_node_cpus(0, vec![0, 1, 2, 9]).unwrap();
guest.check_numa_node_cpus(1, vec![3, 4, 6, 7, 8]).unwrap();
guest.check_numa_node_cpus(2, vec![5, 10, 11]).unwrap();
});
let _ = child.kill();

View File

@ -19,6 +19,8 @@ use crate::memory_manager::MemoryManager;
use crate::seccomp_filters::{get_seccomp_filter, Thread};
#[cfg(target_arch = "x86_64")]
use crate::vm::physical_bits;
#[cfg(feature = "acpi")]
use crate::vm::NumaNodes;
use crate::GuestMemoryMmap;
use crate::CPU_MANAGER_SNAPSHOT_ID;
#[cfg(feature = "acpi")]
@ -37,6 +39,8 @@ use hypervisor::{vm::VmmOps, CpuState, HypervisorCpuError, VmExit};
use hypervisor::{CpuId, CpuIdEntry};
use libc::{c_void, siginfo_t};
use seccomp::{SeccompAction, SeccompFilter};
#[cfg(feature = "acpi")]
use std::collections::BTreeMap;
use std::os::unix::thread::JoinHandleExt;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Barrier, Mutex};
@ -407,6 +411,8 @@ pub struct CpuManager {
#[cfg(feature = "acpi")]
#[cfg_attr(target_arch = "aarch64", allow(dead_code))]
acpi_address: GuestAddress,
#[cfg(feature = "acpi")]
proximity_domain_per_cpu: BTreeMap<u8, u32>,
}
const CPU_ENABLE_FLAG: usize = 0;
@ -545,6 +551,7 @@ impl CpuManager {
seccomp_action: SeccompAction,
vmmops: Arc<Box<dyn VmmOps>>,
#[cfg(feature = "tdx")] tdx_enabled: bool,
#[cfg(feature = "acpi")] numa_nodes: &NumaNodes,
) -> Result<Arc<Mutex<CpuManager>>> {
let guest_memory = memory_manager.lock().unwrap().guest_memory();
let mut vcpu_states = Vec::with_capacity(usize::from(config.max_vcpus));
@ -579,6 +586,20 @@ impl CpuManager {
.unwrap()
.allocate_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None)
.ok_or(Error::AllocateMmmioAddress)?;
#[cfg(feature = "acpi")]
let proximity_domain_per_cpu: BTreeMap<u8, u32> = {
let mut cpu_list = Vec::new();
for (proximity_domain, numa_node) in numa_nodes.iter() {
for cpu in numa_node.cpus().iter() {
cpu_list.push((*cpu, *proximity_domain))
}
}
cpu_list
}
.into_iter()
.collect();
let cpu_manager = Arc::new(Mutex::new(CpuManager {
config: config.clone(),
interrupt_controller: device_manager.interrupt_controller().clone(),
@ -597,6 +618,8 @@ impl CpuManager {
vmmops,
#[cfg(feature = "acpi")]
acpi_address,
#[cfg(feature = "acpi")]
proximity_domain_per_cpu,
}));
#[cfg(feature = "acpi")]
@ -1284,6 +1307,7 @@ impl CpuManager {
#[cfg(feature = "acpi")]
struct Cpu {
cpu_id: u8,
proximity_domain: u32,
}
#[cfg(all(target_arch = "x86_64", feature = "acpi"))]
@ -1341,6 +1365,12 @@ impl Aml for Cpu {
vec![&self.cpu_id],
))],
),
&aml::Method::new(
"_PXM".into(),
0,
false,
vec![&aml::Return::new(&self.proximity_domain)],
),
// The Linux kernel expects every CPU device to have a _MAT entry
// containing the LAPIC for this processor with the enabled bit set
// even it if is disabled in the MADT (non-boot CPU)
@ -1573,7 +1603,11 @@ impl Aml for CpuManager {
let mut cpu_devices = Vec::new();
for cpu_id in 0..self.config.max_vcpus {
let cpu_device = Cpu { cpu_id };
let proximity_domain = *self.proximity_domain_per_cpu.get(&cpu_id).unwrap_or(&0);
let cpu_device = Cpu {
cpu_id,
proximity_domain,
};
cpu_devices.push(cpu_device);
}

View File

@ -569,6 +569,8 @@ impl Vm {
vm_ops,
#[cfg(feature = "tdx")]
tdx_enabled,
#[cfg(feature = "acpi")]
&numa_nodes,
)
.map_err(Error::CpuManager)?;