diff --git a/arch/src/x86_64/mod.rs b/arch/src/x86_64/mod.rs index 0a0d533ea..f324b6702 100644 --- a/arch/src/x86_64/mod.rs +++ b/arch/src/x86_64/mod.rs @@ -221,6 +221,26 @@ impl From for super::Error { } } +pub fn get_x2apic_id(cpu_id: u32, topology: Option<(u8, u8, u8)>) -> u32 { + if let Some(t) = topology { + let thread_mask_width = u8::BITS - (t.0 - 1).leading_zeros(); + let core_mask_width = u8::BITS - (t.1 - 1).leading_zeros(); + let die_mask_width = u8::BITS - (t.2 - 1).leading_zeros(); + + let thread_id = cpu_id % (t.0 as u32); + let core_id = cpu_id / (t.0 as u32) % (t.1 as u32); + let die_id = cpu_id / ((t.0 * t.1) as u32) % (t.2 as u32); + let socket_id = cpu_id / ((t.0 * t.1 * t.2) as u32); + + return thread_id + | (core_id << thread_mask_width) + | (die_id << (thread_mask_width + core_mask_width)) + | (socket_id << (thread_mask_width + core_mask_width + die_mask_width)); + } + + cpu_id +} + #[derive(Copy, Clone, Debug)] pub enum CpuidReg { EAX, @@ -777,18 +797,14 @@ pub fn configure_vcpu( cpu_vendor: CpuVendor, topology: Option<(u8, u8, u8)>, ) -> super::Result<()> { + let x2apic_id = get_x2apic_id(id as u32, topology); + // Per vCPU CPUID changes; common are handled via generate_common_cpuid() let mut cpuid = cpuid; - CpuidPatch::set_cpuid_reg(&mut cpuid, 0xb, None, CpuidReg::EDX, u32::from(id)); - CpuidPatch::set_cpuid_reg(&mut cpuid, 0x1f, None, CpuidReg::EDX, u32::from(id)); + CpuidPatch::set_cpuid_reg(&mut cpuid, 0xb, None, CpuidReg::EDX, x2apic_id); + CpuidPatch::set_cpuid_reg(&mut cpuid, 0x1f, None, CpuidReg::EDX, x2apic_id); if matches!(cpu_vendor, CpuVendor::AMD) { - CpuidPatch::set_cpuid_reg( - &mut cpuid, - 0x8000_001e, - Some(0), - CpuidReg::EAX, - u32::from(id), - ); + CpuidPatch::set_cpuid_reg(&mut cpuid, 0x8000_001e, Some(0), CpuidReg::EAX, x2apic_id); } if let Some(t) = topology { @@ -799,7 +815,7 @@ pub fn configure_vcpu( // SAFETY: get host cpuid when eax=1 let mut cpu_ebx = unsafe { core::arch::x86_64::__cpuid(1) }.ebx; cpu_ebx &= 0xffffff; - cpu_ebx |= (id as u32) << 24; + cpu_ebx |= x2apic_id << 24; CpuidPatch::set_cpuid_reg(&mut cpuid, 0x1, None, CpuidReg::EBX, cpu_ebx); // The TSC frequency CPUID leaf should not be included when running with HyperV emulation @@ -896,6 +912,7 @@ pub fn configure_system( serial_number: Option<&str>, uuid: Option<&str>, oem_strings: Option<&[&str]>, + topology: Option<(u8, u8, u8)>, ) -> super::Result<()> { // Write EBDA address to location where ACPICA expects to find it guest_mem @@ -908,7 +925,7 @@ pub fn configure_system( // Place the MP table after the SMIOS table aligned to 16 bytes let offset = GuestAddress(layout::SMBIOS_START).unchecked_add(size); let offset = GuestAddress((offset.0 + 16) & !0xf); - mptable::setup_mptable(offset, guest_mem, _num_cpus).map_err(Error::MpTableSetup)?; + mptable::setup_mptable(offset, guest_mem, _num_cpus, topology).map_err(Error::MpTableSetup)?; // Check that the RAM is not smaller than the RSDP start address if let Some(rsdp_addr) = rsdp_addr { @@ -1228,6 +1245,11 @@ fn update_cpuid_topology( cpu_vendor: CpuVendor, id: u8, ) { + let x2apic_id = get_x2apic_id( + id as u32, + Some((threads_per_core, cores_per_die, dies_per_package)), + ); + let thread_width = 8 - (threads_per_core - 1).leading_zeros(); let core_width = (8 - (cores_per_die - 1).leading_zeros()) + thread_width; let die_width = (8 - (dies_per_package - 1).leading_zeros()) + core_width; @@ -1290,7 +1312,7 @@ fn update_cpuid_topology( 0x8000_001e, Some(0), CpuidReg::EBX, - ((threads_per_core as u32 - 1) << 8) | (id as u32 & 0xff), + ((threads_per_core as u32 - 1) << 8) | (x2apic_id & 0xff), ); CpuidPatch::set_cpuid_reg( cpuid, @@ -1313,9 +1335,7 @@ fn update_cpuid_topology( 0x0000_0001, Some(0), CpuidReg::EBX, - ((id as u32) << 24) - | (8 << 8) - | (((cores_per_die * threads_per_core) as u32) << 16), + (x2apic_id << 24) | (8 << 8) | (((cores_per_die * threads_per_core) as u32) << 16), ); let cpuid_patches = vec![ // Patch tsc deadline timer bit @@ -1420,6 +1440,7 @@ mod tests { None, None, None, + None, ); assert!(config_err.is_err()); @@ -1442,6 +1463,7 @@ mod tests { None, None, None, + None, ) .unwrap(); @@ -1469,6 +1491,7 @@ mod tests { None, None, None, + None, ) .unwrap(); @@ -1482,6 +1505,7 @@ mod tests { None, None, None, + None, ) .unwrap(); } @@ -1510,4 +1534,25 @@ mod tests { assert_eq!(format!("{memmap:?}"), format!("{expected_memmap:?}")); } + + #[test] + fn test_get_x2apic_id() { + let x2apic_id = get_x2apic_id(0, Some((2, 3, 1))); + assert_eq!(x2apic_id, 0); + + let x2apic_id = get_x2apic_id(1, Some((2, 3, 1))); + assert_eq!(x2apic_id, 1); + + let x2apic_id = get_x2apic_id(2, Some((2, 3, 1))); + assert_eq!(x2apic_id, 2); + + let x2apic_id = get_x2apic_id(6, Some((2, 3, 1))); + assert_eq!(x2apic_id, 8); + + let x2apic_id = get_x2apic_id(7, Some((2, 3, 1))); + assert_eq!(x2apic_id, 9); + + let x2apic_id = get_x2apic_id(8, Some((2, 3, 1))); + assert_eq!(x2apic_id, 10); + } } diff --git a/arch/src/x86_64/mptable.rs b/arch/src/x86_64/mptable.rs index 414e21406..38c425b2f 100644 --- a/arch/src/x86_64/mptable.rs +++ b/arch/src/x86_64/mptable.rs @@ -6,7 +6,7 @@ // found in the LICENSE-BSD-3-Clause file. use crate::layout::{APIC_START, HIGH_RAM_START, IOAPIC_START}; -use crate::x86_64::mpspec; +use crate::x86_64::{get_x2apic_id, mpspec}; use crate::GuestMemoryMmap; use libc::c_char; use std::mem; @@ -125,9 +125,18 @@ fn compute_mp_size(num_cpus: u8) -> usize { } /// Performs setup of the MP table for the given `num_cpus`. -pub fn setup_mptable(offset: GuestAddress, mem: &GuestMemoryMmap, num_cpus: u8) -> Result<()> { - if num_cpus as u32 > MAX_SUPPORTED_CPUS { - return Err(Error::TooManyCpus); +pub fn setup_mptable( + offset: GuestAddress, + mem: &GuestMemoryMmap, + num_cpus: u8, + topology: Option<(u8, u8, u8)>, +) -> Result<()> { + if num_cpus > 0 { + let cpu_id_max = num_cpus - 1; + let x2apic_id_max = get_x2apic_id(cpu_id_max.into(), topology); + if x2apic_id_max >= MAX_SUPPORTED_CPUS { + return Err(Error::TooManyCpus); + } } // Used to keep track of the next base pointer into the MP table. @@ -141,7 +150,7 @@ pub fn setup_mptable(offset: GuestAddress, mem: &GuestMemoryMmap, num_cpus: u8) } let mut checksum: u8 = 0; - let ioapicid: u8 = num_cpus + 1; + let ioapicid: u8 = MAX_SUPPORTED_CPUS as u8 + 1; // The checked_add here ensures the all of the following base_mp.unchecked_add's will be without // overflow. @@ -179,7 +188,7 @@ pub fn setup_mptable(offset: GuestAddress, mem: &GuestMemoryMmap, num_cpus: u8) for cpu_id in 0..num_cpus { let mut mpc_cpu = MpcCpuWrapper(mpspec::mpc_cpu::default()); mpc_cpu.0.type_ = mpspec::MP_PROCESSOR as u8; - mpc_cpu.0.apicid = cpu_id; + mpc_cpu.0.apicid = get_x2apic_id(cpu_id as u32, topology) as u8; mpc_cpu.0.apicver = APIC_VERSION; mpc_cpu.0.cpuflag = mpspec::CPU_ENABLED as u8 | if cpu_id == 0 { @@ -312,7 +321,7 @@ mod tests { let mem = GuestMemoryMmap::from_ranges(&[(MPTABLE_START, compute_mp_size(num_cpus))]).unwrap(); - setup_mptable(MPTABLE_START, &mem, num_cpus).unwrap(); + setup_mptable(MPTABLE_START, &mem, num_cpus, None).unwrap(); } #[test] @@ -321,7 +330,7 @@ mod tests { let mem = GuestMemoryMmap::from_ranges(&[(MPTABLE_START, compute_mp_size(num_cpus) - 1)]) .unwrap(); - assert!(setup_mptable(MPTABLE_START, &mem, num_cpus).is_err()); + assert!(setup_mptable(MPTABLE_START, &mem, num_cpus, None).is_err()); } #[test] @@ -330,7 +339,7 @@ mod tests { let mem = GuestMemoryMmap::from_ranges(&[(MPTABLE_START, compute_mp_size(num_cpus))]).unwrap(); - setup_mptable(MPTABLE_START, &mem, num_cpus).unwrap(); + setup_mptable(MPTABLE_START, &mem, num_cpus, None).unwrap(); let mpf_intel: MpfIntelWrapper = mem.read_obj(MPTABLE_START).unwrap(); @@ -346,7 +355,7 @@ mod tests { let mem = GuestMemoryMmap::from_ranges(&[(MPTABLE_START, compute_mp_size(num_cpus))]).unwrap(); - setup_mptable(MPTABLE_START, &mem, num_cpus).unwrap(); + setup_mptable(MPTABLE_START, &mem, num_cpus, None).unwrap(); let mpf_intel: MpfIntelWrapper = mem.read_obj(MPTABLE_START).unwrap(); let mpc_offset = GuestAddress(mpf_intel.0.physptr as GuestUsize); @@ -384,7 +393,7 @@ mod tests { .unwrap(); for i in 0..MAX_SUPPORTED_CPUS as u8 { - setup_mptable(MPTABLE_START, &mem, i).unwrap(); + setup_mptable(MPTABLE_START, &mem, i, None).unwrap(); let mpf_intel: MpfIntelWrapper = mem.read_obj(MPTABLE_START).unwrap(); let mpc_offset = GuestAddress(mpf_intel.0.physptr as GuestUsize); @@ -417,7 +426,7 @@ mod tests { let mem = GuestMemoryMmap::from_ranges(&[(MPTABLE_START, compute_mp_size(cpus as u8))]).unwrap(); - let result = setup_mptable(MPTABLE_START, &mem, cpus as u8); + let result = setup_mptable(MPTABLE_START, &mem, cpus as u8, None); assert!(result.is_err()); } } diff --git a/tests/integration.rs b/tests/integration.rs index a849f88d1..a79784851 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -1035,6 +1035,40 @@ fn test_cpu_topology(threads_per_core: u8, cores_per_package: u8, packages: u8, .unwrap_or(0), packages ); + + #[cfg(target_arch = "x86_64")] + { + let mut cpu_id = 0; + for package_id in 0..packages { + for core_id in 0..cores_per_package { + for _ in 0..threads_per_core { + assert_eq!( + guest + .ssh_command(&format!("cat /sys/devices/system/cpu/cpu{cpu_id}/topology/physical_package_id")) + .unwrap() + .trim() + .parse::() + .unwrap_or(0), + package_id + ); + + assert_eq!( + guest + .ssh_command(&format!( + "cat /sys/devices/system/cpu/cpu{cpu_id}/topology/core_id" + )) + .unwrap() + .trim() + .parse::() + .unwrap_or(0), + core_id + ); + + cpu_id += 1; + } + } + } + } }); let _ = child.kill(); diff --git a/vmm/src/acpi.rs b/vmm/src/acpi.rs index 3ec748bf2..e89ffa3f2 100644 --- a/vmm/src/acpi.rs +++ b/vmm/src/acpi.rs @@ -277,7 +277,10 @@ fn create_tpm2_table() -> Sdt { tpm } -fn create_srat_table(numa_nodes: &NumaNodes) -> Sdt { +fn create_srat_table( + numa_nodes: &NumaNodes, + #[cfg(target_arch = "x86_64")] topology: Option<(u8, u8, u8)>, +) -> Sdt { let mut srat = Sdt::new(*b"SRAT", 36, 3, *b"CLOUDH", *b"CHSRAT ", 1); // SRAT reserved 12 bytes srat.append_slice(&[0u8; 12]); @@ -316,6 +319,9 @@ fn create_srat_table(numa_nodes: &NumaNodes) -> Sdt { } for cpu in &node.cpus { + #[cfg(target_arch = "x86_64")] + let x2apic_id = arch::x86_64::get_x2apic_id(*cpu as u32, topology); + #[cfg(target_arch = "aarch64")] let x2apic_id = *cpu as u32; // Flags @@ -752,8 +758,14 @@ pub fn create_acpi_tables( // SRAT and SLIT // Only created if the NUMA nodes list is not empty. if !numa_nodes.is_empty() { + #[cfg(target_arch = "x86_64")] + let topology = cpu_manager.lock().unwrap().get_vcpu_topology(); // SRAT - let srat = create_srat_table(numa_nodes); + let srat = create_srat_table( + numa_nodes, + #[cfg(target_arch = "x86_64")] + topology, + ); let srat_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); guest_mem .write_slice(srat.as_slice(), srat_offset) @@ -851,8 +863,15 @@ pub fn create_acpi_tables_tdx( // SRAT and SLIT // Only created if the NUMA nodes list is not empty. if !numa_nodes.is_empty() { + #[cfg(target_arch = "x86_64")] + let topology = cpu_manager.lock().unwrap().get_vcpu_topology(); + // SRAT - tables.push(create_srat_table(numa_nodes)); + tables.push(create_srat_table( + numa_nodes, + #[cfg(target_arch = "x86_64")] + topology, + )); // SLIT tables.push(create_slit_table(numa_nodes)); diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index ff7eecaee..226b495b3 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -31,6 +31,8 @@ use acpi_tables::{aml, sdt::Sdt, Aml}; use anyhow::anyhow; #[cfg(all(target_arch = "aarch64", feature = "guest_debug"))] use arch::aarch64::regs; +#[cfg(target_arch = "x86_64")] +use arch::x86_64::get_x2apic_id; use arch::EntryPoint; use arch::NumaNodes; #[cfg(target_arch = "aarch64")] @@ -331,12 +333,13 @@ impl Vcpu { /// * `cpu_vendor` - CPU vendor as reported by __cpuid(0x0) pub fn new( id: u8, + apic_id: u8, vm: &Arc, vm_ops: Option>, #[cfg(target_arch = "x86_64")] cpu_vendor: CpuVendor, ) -> Result { let vcpu = vm - .create_vcpu(id, vm_ops) + .create_vcpu(apic_id, vm_ops) .map_err(|e| Error::VcpuCreate(e.into()))?; // Initially the cpuid per vCPU is the one supported by this VM. Ok(Vcpu { @@ -757,8 +760,16 @@ impl CpuManager { fn create_vcpu(&mut self, cpu_id: u8, snapshot: Option) -> Result>> { info!("Creating vCPU: cpu_id = {}", cpu_id); + #[cfg(target_arch = "x86_64")] + let topology = self.get_vcpu_topology(); + #[cfg(target_arch = "x86_64")] + let x2apic_id = arch::x86_64::get_x2apic_id(cpu_id as u32, topology); + #[cfg(target_arch = "aarch64")] + let x2apic_id = cpu_id as u32; + let mut vcpu = Vcpu::new( cpu_id, + x2apic_id as u8, &self.vm, Some(self.vm_ops.clone()), #[cfg(target_arch = "x86_64")] @@ -1334,7 +1345,6 @@ impl CpuManager { .collect() } - #[cfg(target_arch = "aarch64")] pub fn get_vcpu_topology(&self) -> Option<(u8, u8, u8)> { self.config .topology @@ -1353,11 +1363,13 @@ impl CpuManager { madt.write(36, arch::layout::APIC_START.0); for cpu in 0..self.config.max_vcpus { + let x2apic_id = get_x2apic_id(cpu.into(), self.get_vcpu_topology()); + let lapic = LocalX2Apic { r#type: acpi::ACPI_X2APIC_PROCESSOR, length: 16, processor_id: cpu.into(), - apic_id: cpu.into(), + apic_id: x2apic_id, flags: if cpu < self.config.boot_vcpus { 1 << MADT_CPU_ENABLE_FLAG } else { @@ -1808,6 +1820,8 @@ struct Cpu { cpu_id: u8, proximity_domain: u32, dynamic: bool, + #[cfg(target_arch = "x86_64")] + topology: Option<(u8, u8, u8)>, } #[cfg(target_arch = "x86_64")] @@ -1819,11 +1833,13 @@ const MADT_CPU_ONLINE_CAPABLE_FLAG: usize = 1; impl Cpu { #[cfg(target_arch = "x86_64")] fn generate_mat(&self) -> Vec { + let x2apic_id = arch::x86_64::get_x2apic_id(self.cpu_id.into(), self.topology); + let lapic = LocalX2Apic { r#type: crate::acpi::ACPI_X2APIC_PROCESSOR, length: 16, processor_id: self.cpu_id.into(), - apic_id: self.cpu_id.into(), + apic_id: x2apic_id, flags: 1 << MADT_CPU_ENABLE_FLAG, _reserved: 0, }; @@ -2126,6 +2142,8 @@ impl Aml for CpuManager { }; let mut cpu_data_inner: Vec<&dyn Aml> = vec![&hid, &uid, &methods]; + #[cfg(target_arch = "x86_64")] + let topology = self.get_vcpu_topology(); let mut cpu_devices = Vec::new(); for cpu_id in 0..self.config.max_vcpus { let proximity_domain = *self.proximity_domain_per_cpu.get(&cpu_id).unwrap_or(&0); @@ -2133,6 +2151,8 @@ impl Aml for CpuManager { cpu_id, proximity_domain, dynamic: self.dynamic, + #[cfg(target_arch = "x86_64")] + topology, }; cpu_devices.push(cpu_device); diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index fe2caa097..b8d6599d8 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -1175,6 +1175,8 @@ impl Vm { .as_deref() .map(|strings| strings.iter().map(|s| s.as_ref()).collect::>()); + let topology = self.cpu_manager.lock().unwrap().get_vcpu_topology(); + arch::configure_system( &mem, arch::layout::CMDLINE_START, @@ -1185,6 +1187,7 @@ impl Vm { serial_number.as_deref(), uuid.as_deref(), oem_strings.as_deref(), + topology, ) .map_err(Error::ConfigureSystem)?; Ok(())