vmm: cpu_manager: Add _PXM ACPI method to each vCPU

In order to allow a hotplugged vCPU to be assigned to the correct NUMA
node in the guest, the DSDT table must expose the _PXM method for each
vCPU. This method defines the proximity domain to which each vCPU should
be attached to.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
This commit is contained in:
Sebastien Boeuf 2021-06-17 12:41:30 +02:00
parent 09c3ddd47d
commit a36ac96444
3 changed files with 48 additions and 5 deletions

View File

@ -2230,7 +2230,7 @@ mod tests {
let kernel_path = direct_kernel_boot_path(); let kernel_path = direct_kernel_boot_path();
let mut child = GuestCommand::new(&guest) let mut child = GuestCommand::new(&guest)
.args(&["--cpus", "boot=6"]) .args(&["--cpus", "boot=6,max=12"])
.args(&["--memory", "size=0,hotplug_method=virtio-mem"]) .args(&["--memory", "size=0,hotplug_method=virtio-mem"])
.args(&[ .args(&[
"--memory-zone", "--memory-zone",
@ -2240,9 +2240,9 @@ mod tests {
]) ])
.args(&[ .args(&[
"--numa", "--numa",
"guest_numa_id=0,cpus=0-2,distances=1@15:2@20,memory_zones=mem0", "guest_numa_id=0,cpus=0-2:9,distances=1@15:2@20,memory_zones=mem0",
"guest_numa_id=1,cpus=3-4,distances=0@20:2@25,memory_zones=mem1", "guest_numa_id=1,cpus=3-4:6-8,distances=0@20:2@25,memory_zones=mem1",
"guest_numa_id=2,cpus=5,distances=0@25:1@30,memory_zones=mem2", "guest_numa_id=2,cpus=5:10-11,distances=0@25:1@30,memory_zones=mem2",
]) ])
.args(&["--kernel", kernel_path.to_str().unwrap()]) .args(&["--kernel", kernel_path.to_str().unwrap()])
.args(&["--cmdline", DIRECT_KERNEL_BOOT_CMDLINE]) .args(&["--cmdline", DIRECT_KERNEL_BOOT_CMDLINE])
@ -2285,6 +2285,13 @@ mod tests {
resize_zone_command(&api_socket, "mem2", "4G"); resize_zone_command(&api_socket, "mem2", "4G");
thread::sleep(std::time::Duration::new(5, 0)); thread::sleep(std::time::Duration::new(5, 0));
assert!(guest.get_numa_node_memory(2).unwrap_or_default() > 3_840_000); assert!(guest.get_numa_node_memory(2).unwrap_or_default() > 3_840_000);
// Resize to the maximum amount of CPUs and check each NUMA
// node has been assigned the right CPUs set.
resize_command(&api_socket, Some(12), None, None);
guest.check_numa_node_cpus(0, vec![0, 1, 2, 9]).unwrap();
guest.check_numa_node_cpus(1, vec![3, 4, 6, 7, 8]).unwrap();
guest.check_numa_node_cpus(2, vec![5, 10, 11]).unwrap();
}); });
let _ = child.kill(); let _ = child.kill();

View File

@ -19,6 +19,8 @@ use crate::memory_manager::MemoryManager;
use crate::seccomp_filters::{get_seccomp_filter, Thread}; use crate::seccomp_filters::{get_seccomp_filter, Thread};
#[cfg(target_arch = "x86_64")] #[cfg(target_arch = "x86_64")]
use crate::vm::physical_bits; use crate::vm::physical_bits;
#[cfg(feature = "acpi")]
use crate::vm::NumaNodes;
use crate::GuestMemoryMmap; use crate::GuestMemoryMmap;
use crate::CPU_MANAGER_SNAPSHOT_ID; use crate::CPU_MANAGER_SNAPSHOT_ID;
#[cfg(feature = "acpi")] #[cfg(feature = "acpi")]
@ -37,6 +39,8 @@ use hypervisor::{vm::VmmOps, CpuState, HypervisorCpuError, VmExit};
use hypervisor::{CpuId, CpuIdEntry}; use hypervisor::{CpuId, CpuIdEntry};
use libc::{c_void, siginfo_t}; use libc::{c_void, siginfo_t};
use seccomp::{SeccompAction, SeccompFilter}; use seccomp::{SeccompAction, SeccompFilter};
#[cfg(feature = "acpi")]
use std::collections::BTreeMap;
use std::os::unix::thread::JoinHandleExt; use std::os::unix::thread::JoinHandleExt;
use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Barrier, Mutex}; use std::sync::{Arc, Barrier, Mutex};
@ -407,6 +411,8 @@ pub struct CpuManager {
#[cfg(feature = "acpi")] #[cfg(feature = "acpi")]
#[cfg_attr(target_arch = "aarch64", allow(dead_code))] #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
acpi_address: GuestAddress, acpi_address: GuestAddress,
#[cfg(feature = "acpi")]
proximity_domain_per_cpu: BTreeMap<u8, u32>,
} }
const CPU_ENABLE_FLAG: usize = 0; const CPU_ENABLE_FLAG: usize = 0;
@ -545,6 +551,7 @@ impl CpuManager {
seccomp_action: SeccompAction, seccomp_action: SeccompAction,
vmmops: Arc<Box<dyn VmmOps>>, vmmops: Arc<Box<dyn VmmOps>>,
#[cfg(feature = "tdx")] tdx_enabled: bool, #[cfg(feature = "tdx")] tdx_enabled: bool,
#[cfg(feature = "acpi")] numa_nodes: &NumaNodes,
) -> Result<Arc<Mutex<CpuManager>>> { ) -> Result<Arc<Mutex<CpuManager>>> {
let guest_memory = memory_manager.lock().unwrap().guest_memory(); let guest_memory = memory_manager.lock().unwrap().guest_memory();
let mut vcpu_states = Vec::with_capacity(usize::from(config.max_vcpus)); let mut vcpu_states = Vec::with_capacity(usize::from(config.max_vcpus));
@ -579,6 +586,20 @@ impl CpuManager {
.unwrap() .unwrap()
.allocate_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None) .allocate_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None)
.ok_or(Error::AllocateMmmioAddress)?; .ok_or(Error::AllocateMmmioAddress)?;
#[cfg(feature = "acpi")]
let proximity_domain_per_cpu: BTreeMap<u8, u32> = {
let mut cpu_list = Vec::new();
for (proximity_domain, numa_node) in numa_nodes.iter() {
for cpu in numa_node.cpus().iter() {
cpu_list.push((*cpu, *proximity_domain))
}
}
cpu_list
}
.into_iter()
.collect();
let cpu_manager = Arc::new(Mutex::new(CpuManager { let cpu_manager = Arc::new(Mutex::new(CpuManager {
config: config.clone(), config: config.clone(),
interrupt_controller: device_manager.interrupt_controller().clone(), interrupt_controller: device_manager.interrupt_controller().clone(),
@ -597,6 +618,8 @@ impl CpuManager {
vmmops, vmmops,
#[cfg(feature = "acpi")] #[cfg(feature = "acpi")]
acpi_address, acpi_address,
#[cfg(feature = "acpi")]
proximity_domain_per_cpu,
})); }));
#[cfg(feature = "acpi")] #[cfg(feature = "acpi")]
@ -1284,6 +1307,7 @@ impl CpuManager {
#[cfg(feature = "acpi")] #[cfg(feature = "acpi")]
struct Cpu { struct Cpu {
cpu_id: u8, cpu_id: u8,
proximity_domain: u32,
} }
#[cfg(all(target_arch = "x86_64", feature = "acpi"))] #[cfg(all(target_arch = "x86_64", feature = "acpi"))]
@ -1341,6 +1365,12 @@ impl Aml for Cpu {
vec![&self.cpu_id], vec![&self.cpu_id],
))], ))],
), ),
&aml::Method::new(
"_PXM".into(),
0,
false,
vec![&aml::Return::new(&self.proximity_domain)],
),
// The Linux kernel expects every CPU device to have a _MAT entry // The Linux kernel expects every CPU device to have a _MAT entry
// containing the LAPIC for this processor with the enabled bit set // containing the LAPIC for this processor with the enabled bit set
// even it if is disabled in the MADT (non-boot CPU) // even it if is disabled in the MADT (non-boot CPU)
@ -1573,7 +1603,11 @@ impl Aml for CpuManager {
let mut cpu_devices = Vec::new(); let mut cpu_devices = Vec::new();
for cpu_id in 0..self.config.max_vcpus { for cpu_id in 0..self.config.max_vcpus {
let cpu_device = Cpu { cpu_id }; let proximity_domain = *self.proximity_domain_per_cpu.get(&cpu_id).unwrap_or(&0);
let cpu_device = Cpu {
cpu_id,
proximity_domain,
};
cpu_devices.push(cpu_device); cpu_devices.push(cpu_device);
} }

View File

@ -569,6 +569,8 @@ impl Vm {
vm_ops, vm_ops,
#[cfg(feature = "tdx")] #[cfg(feature = "tdx")]
tdx_enabled, tdx_enabled,
#[cfg(feature = "acpi")]
&numa_nodes,
) )
.map_err(Error::CpuManager)?; .map_err(Error::CpuManager)?;