diff --git a/tests/integration.rs b/tests/integration.rs index a351123dd..f11d2d96e 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -2230,7 +2230,7 @@ mod tests { let kernel_path = direct_kernel_boot_path(); let mut child = GuestCommand::new(&guest) - .args(&["--cpus", "boot=6"]) + .args(&["--cpus", "boot=6,max=12"]) .args(&["--memory", "size=0,hotplug_method=virtio-mem"]) .args(&[ "--memory-zone", @@ -2240,9 +2240,9 @@ mod tests { ]) .args(&[ "--numa", - "guest_numa_id=0,cpus=0-2,distances=1@15:2@20,memory_zones=mem0", - "guest_numa_id=1,cpus=3-4,distances=0@20:2@25,memory_zones=mem1", - "guest_numa_id=2,cpus=5,distances=0@25:1@30,memory_zones=mem2", + "guest_numa_id=0,cpus=0-2:9,distances=1@15:2@20,memory_zones=mem0", + "guest_numa_id=1,cpus=3-4:6-8,distances=0@20:2@25,memory_zones=mem1", + "guest_numa_id=2,cpus=5:10-11,distances=0@25:1@30,memory_zones=mem2", ]) .args(&["--kernel", kernel_path.to_str().unwrap()]) .args(&["--cmdline", DIRECT_KERNEL_BOOT_CMDLINE]) @@ -2285,6 +2285,13 @@ mod tests { resize_zone_command(&api_socket, "mem2", "4G"); thread::sleep(std::time::Duration::new(5, 0)); assert!(guest.get_numa_node_memory(2).unwrap_or_default() > 3_840_000); + + // Resize to the maximum amount of CPUs and check each NUMA + // node has been assigned the right CPUs set. + resize_command(&api_socket, Some(12), None, None); + guest.check_numa_node_cpus(0, vec![0, 1, 2, 9]).unwrap(); + guest.check_numa_node_cpus(1, vec![3, 4, 6, 7, 8]).unwrap(); + guest.check_numa_node_cpus(2, vec![5, 10, 11]).unwrap(); }); let _ = child.kill(); diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index e7fa6ef87..95208a17b 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -19,6 +19,8 @@ use crate::memory_manager::MemoryManager; use crate::seccomp_filters::{get_seccomp_filter, Thread}; #[cfg(target_arch = "x86_64")] use crate::vm::physical_bits; +#[cfg(feature = "acpi")] +use crate::vm::NumaNodes; use crate::GuestMemoryMmap; use crate::CPU_MANAGER_SNAPSHOT_ID; #[cfg(feature = "acpi")] @@ -37,6 +39,8 @@ use hypervisor::{vm::VmmOps, CpuState, HypervisorCpuError, VmExit}; use hypervisor::{CpuId, CpuIdEntry}; use libc::{c_void, siginfo_t}; use seccomp::{SeccompAction, SeccompFilter}; +#[cfg(feature = "acpi")] +use std::collections::BTreeMap; use std::os::unix::thread::JoinHandleExt; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Barrier, Mutex}; @@ -407,6 +411,8 @@ pub struct CpuManager { #[cfg(feature = "acpi")] #[cfg_attr(target_arch = "aarch64", allow(dead_code))] acpi_address: GuestAddress, + #[cfg(feature = "acpi")] + proximity_domain_per_cpu: BTreeMap, } const CPU_ENABLE_FLAG: usize = 0; @@ -545,6 +551,7 @@ impl CpuManager { seccomp_action: SeccompAction, vmmops: Arc>, #[cfg(feature = "tdx")] tdx_enabled: bool, + #[cfg(feature = "acpi")] numa_nodes: &NumaNodes, ) -> Result>> { let guest_memory = memory_manager.lock().unwrap().guest_memory(); let mut vcpu_states = Vec::with_capacity(usize::from(config.max_vcpus)); @@ -579,6 +586,20 @@ impl CpuManager { .unwrap() .allocate_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None) .ok_or(Error::AllocateMmmioAddress)?; + + #[cfg(feature = "acpi")] + let proximity_domain_per_cpu: BTreeMap = { + let mut cpu_list = Vec::new(); + for (proximity_domain, numa_node) in numa_nodes.iter() { + for cpu in numa_node.cpus().iter() { + cpu_list.push((*cpu, *proximity_domain)) + } + } + cpu_list + } + .into_iter() + .collect(); + let cpu_manager = Arc::new(Mutex::new(CpuManager { config: config.clone(), interrupt_controller: device_manager.interrupt_controller().clone(), @@ -597,6 +618,8 @@ impl CpuManager { vmmops, #[cfg(feature = "acpi")] acpi_address, + #[cfg(feature = "acpi")] + proximity_domain_per_cpu, })); #[cfg(feature = "acpi")] @@ -1284,6 +1307,7 @@ impl CpuManager { #[cfg(feature = "acpi")] struct Cpu { cpu_id: u8, + proximity_domain: u32, } #[cfg(all(target_arch = "x86_64", feature = "acpi"))] @@ -1341,6 +1365,12 @@ impl Aml for Cpu { vec![&self.cpu_id], ))], ), + &aml::Method::new( + "_PXM".into(), + 0, + false, + vec![&aml::Return::new(&self.proximity_domain)], + ), // The Linux kernel expects every CPU device to have a _MAT entry // containing the LAPIC for this processor with the enabled bit set // even it if is disabled in the MADT (non-boot CPU) @@ -1573,7 +1603,11 @@ impl Aml for CpuManager { let mut cpu_devices = Vec::new(); for cpu_id in 0..self.config.max_vcpus { - let cpu_device = Cpu { cpu_id }; + let proximity_domain = *self.proximity_domain_per_cpu.get(&cpu_id).unwrap_or(&0); + let cpu_device = Cpu { + cpu_id, + proximity_domain, + }; cpu_devices.push(cpu_device); } diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index f623e83f1..a2262332b 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -569,6 +569,8 @@ impl Vm { vm_ops, #[cfg(feature = "tdx")] tdx_enabled, + #[cfg(feature = "acpi")] + &numa_nodes, ) .map_err(Error::CpuManager)?;