diff --git a/Cargo.lock b/Cargo.lock index 17e01d3b0..f9e6434f1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -77,6 +77,7 @@ dependencies = [ "vm-fdt", "vm-memory", "vm-migration", + "vmm-sys-util", ] [[package]] diff --git a/arch/Cargo.toml b/arch/Cargo.toml index 7cf964db1..bb516073d 100644 --- a/arch/Cargo.toml +++ b/arch/Cargo.toml @@ -23,6 +23,7 @@ versionize = "0.1.6" versionize_derive = "0.1.4" vm-memory = { version = "0.5.0", features = ["backend-mmap", "backend-bitmap"] } vm-migration = { path = "../vm-migration" } +vmm-sys-util = { version = ">=0.5.0", features = ["with-serde"] } [target.'cfg(target_arch = "aarch64")'.dependencies] fdt_parser = { version = "0.1.3", package = 'fdt'} diff --git a/arch/src/lib.rs b/arch/src/lib.rs index 51378113b..8d7589818 100644 --- a/arch/src/lib.rs +++ b/arch/src/lib.rs @@ -83,9 +83,9 @@ pub mod x86_64; #[cfg(target_arch = "x86_64")] pub use x86_64::{ - arch_memory_regions, configure_system, configure_vcpu, get_host_cpu_phys_bits, - initramfs_load_addr, layout, layout::CMDLINE_MAX_SIZE, layout::CMDLINE_START, regs, CpuidPatch, - CpuidReg, EntryPoint, + arch_memory_regions, configure_system, configure_vcpu, generate_common_cpuid, + get_host_cpu_phys_bits, initramfs_load_addr, layout, layout::CMDLINE_MAX_SIZE, + layout::CMDLINE_START, regs, EntryPoint, }; /// Safe wrapper for `sysconf(_SC_PAGESIZE)`. diff --git a/arch/src/x86_64/mod.rs b/arch/src/x86_64/mod.rs index 55839a8ad..b2310cc3b 100644 --- a/arch/src/x86_64/mod.rs +++ b/arch/src/x86_64/mod.rs @@ -15,7 +15,7 @@ pub mod regs; use crate::GuestMemoryMmap; use crate::InitramfsConfig; use crate::RegionType; -use hypervisor::{CpuId, CpuIdEntry, CPUID_FLAG_VALID_INDEX}; +use hypervisor::{CpuId, CpuIdEntry, HypervisorError, CPUID_FLAG_VALID_INDEX}; use linux_loader::loader::bootparam::boot_params; use linux_loader::loader::elf::start_info::{ hvm_memmap_table_entry, hvm_modlist_entry, hvm_start_info, @@ -31,6 +31,26 @@ use std::arch::x86_64; #[cfg(feature = "tdx")] pub mod tdx; +// CPUID feature bits +const TSC_DEADLINE_TIMER_ECX_BIT: u8 = 24; // tsc deadline timer ecx bit. +const HYPERVISOR_ECX_BIT: u8 = 31; // Hypervisor ecx bit. +const MTRR_EDX_BIT: u8 = 12; // Hypervisor ecx bit. + +// KVM feature bits +const KVM_FEATURE_ASYNC_PF_INT_BIT: u8 = 14; +#[cfg(feature = "tdx")] +const KVM_FEATURE_CLOCKSOURCE_BIT: u8 = 0; +#[cfg(feature = "tdx")] +const KVM_FEATURE_CLOCKSOURCE2_BIT: u8 = 3; +#[cfg(feature = "tdx")] +const KVM_FEATURE_CLOCKSOURCE_STABLE_BIT: u8 = 24; +#[cfg(feature = "tdx")] +const KVM_FEATURE_ASYNC_PF_BIT: u8 = 4; +#[cfg(feature = "tdx")] +const KVM_FEATURE_ASYNC_PF_VMEXIT_BIT: u8 = 10; +#[cfg(feature = "tdx")] +const KVM_FEATURE_STEAL_TIME_BIT: u8 = 5; + #[derive(Debug, Copy, Clone)] /// Specifies the entry point address where the guest must start /// executing code, as well as which of the supported boot protocols @@ -156,6 +176,15 @@ pub enum Error { /// Missing SGX_LC CPU feature MissingSgxLaunchControlFeature, + + /// Error getting supported CPUID through the hypervisor (kvm/mshv) API + CpuidGetSupported(HypervisorError), + + /// Error populating CPUID with KVM HyperV emulation details + CpuidKvmHyperV(vmm_sys_util::fam::Error), + + /// Error populating CPUID with CPU identification + CpuidIdentification(vmm_sys_util::fam::Error), } impl From for super::Error { @@ -309,6 +338,168 @@ impl CpuidPatch { } } +pub fn generate_common_cpuid( + hypervisor: Arc, + topology: Option<(u8, u8, u8)>, + sgx_epc_sections: Option>, + phys_bits: u8, + kvm_hyperv: bool, + #[cfg(feature = "tdx")] tdx_enabled: bool, +) -> super::Result { + let cpuid_patches = vec![ + // Patch tsc deadline timer bit + CpuidPatch { + function: 1, + index: 0, + flags_bit: None, + eax_bit: None, + ebx_bit: None, + ecx_bit: Some(TSC_DEADLINE_TIMER_ECX_BIT), + edx_bit: None, + }, + // Patch hypervisor bit + CpuidPatch { + function: 1, + index: 0, + flags_bit: None, + eax_bit: None, + ebx_bit: None, + ecx_bit: Some(HYPERVISOR_ECX_BIT), + edx_bit: None, + }, + // Enable MTRR feature + CpuidPatch { + function: 1, + index: 0, + flags_bit: None, + eax_bit: None, + ebx_bit: None, + ecx_bit: None, + edx_bit: Some(MTRR_EDX_BIT), + }, + ]; + + // Supported CPUID + let mut cpuid = hypervisor.get_cpuid().map_err(Error::CpuidGetSupported)?; + + CpuidPatch::patch_cpuid(&mut cpuid, cpuid_patches); + + if let Some(t) = topology { + update_cpuid_topology(&mut cpuid, t.0, t.1, t.2); + } + + if let Some(sgx_epc_sections) = sgx_epc_sections { + update_cpuid_sgx(&mut cpuid, sgx_epc_sections)?; + } + + // Update some existing CPUID + for entry in cpuid.as_mut_slice().iter_mut() { + match entry.function { + // Set CPU physical bits + 0x8000_0008 => { + entry.eax = (entry.eax & 0xffff_ff00) | (phys_bits as u32 & 0xff); + } + // Disable KVM_FEATURE_ASYNC_PF_INT + // This is required until we find out why the asynchronous page + // fault is generating unexpected behavior when using interrupt + // mechanism. + // TODO: Re-enable KVM_FEATURE_ASYNC_PF_INT (#2277) + 0x4000_0001 => { + entry.eax &= !(1 << KVM_FEATURE_ASYNC_PF_INT_BIT); + + // These features are not supported by TDX + #[cfg(feature = "tdx")] + if tdx_enabled { + entry.eax &= !(1 << KVM_FEATURE_CLOCKSOURCE_BIT + | 1 << KVM_FEATURE_CLOCKSOURCE2_BIT + | 1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT + | 1 << KVM_FEATURE_ASYNC_PF_BIT + | 1 << KVM_FEATURE_ASYNC_PF_VMEXIT_BIT + | 1 << KVM_FEATURE_STEAL_TIME_BIT) + } + } + _ => {} + } + } + + // Copy CPU identification string + for i in 0x8000_0002..=0x8000_0004 { + cpuid.retain(|c| c.function != i); + let leaf = unsafe { std::arch::x86_64::__cpuid(i) }; + cpuid + .push(CpuIdEntry { + function: i, + eax: leaf.eax, + ebx: leaf.ebx, + ecx: leaf.ecx, + edx: leaf.edx, + ..Default::default() + }) + .map_err(Error::CpuidIdentification)?; + } + + if kvm_hyperv { + // Remove conflicting entries + cpuid.retain(|c| c.function != 0x4000_0000); + cpuid.retain(|c| c.function != 0x4000_0001); + // See "Hypervisor Top Level Functional Specification" for details + // Compliance with "Hv#1" requires leaves up to 0x4000_000a + cpuid + .push(CpuIdEntry { + function: 0x40000000, + eax: 0x4000000a, // Maximum cpuid leaf + ebx: 0x756e694c, // "Linu" + ecx: 0x564b2078, // "x KV" + edx: 0x7648204d, // "M Hv" + ..Default::default() + }) + .map_err(Error::CpuidKvmHyperV)?; + cpuid + .push(CpuIdEntry { + function: 0x40000001, + eax: 0x31237648, // "Hv#1" + ..Default::default() + }) + .map_err(Error::CpuidKvmHyperV)?; + cpuid + .push(CpuIdEntry { + function: 0x40000002, + eax: 0x3839, // "Build number" + ebx: 0xa0000, // "Version" + ..Default::default() + }) + .map_err(Error::CpuidKvmHyperV)?; + cpuid + .push(CpuIdEntry { + function: 0x4000_0003, + eax: 1 << 1 // AccessPartitionReferenceCounter + | 1 << 2 // AccessSynicRegs + | 1 << 3 // AccessSyntheticTimerRegs + | 1 << 9, // AccessPartitionReferenceTsc + edx: 1 << 3, // CPU dynamic partitioning + ..Default::default() + }) + .map_err(Error::CpuidKvmHyperV)?; + cpuid + .push(CpuIdEntry { + function: 0x4000_0004, + eax: 1 << 5, // Recommend relaxed timing + ..Default::default() + }) + .map_err(Error::CpuidKvmHyperV)?; + for i in 0x4000_0005..=0x4000_000a { + cpuid + .push(CpuIdEntry { + function: i, + ..Default::default() + }) + .map_err(Error::CpuidKvmHyperV)?; + } + } + + Ok(cpuid) +} + pub fn configure_vcpu( fd: &Arc, id: u8, @@ -317,7 +508,7 @@ pub fn configure_vcpu( cpuid: CpuId, kvm_hyperv: bool, ) -> super::Result<()> { - // Per vCPU CPUID changes; common are handled via CpuManager::generate_common_cpuid() + // Per vCPU CPUID changes; common are handled via generate_common_cpuid() let mut cpuid = cpuid; CpuidPatch::set_cpuid_reg(&mut cpuid, 0xb, None, CpuidReg::EDX, u32::from(id)); CpuidPatch::set_cpuid_reg(&mut cpuid, 0x1f, None, CpuidReg::EDX, u32::from(id)); @@ -614,7 +805,7 @@ pub fn get_host_cpu_phys_bits() -> u8 { } } -pub fn update_cpuid_topology( +fn update_cpuid_topology( cpuid: &mut CpuId, threads_per_core: u8, cores_per_die: u8, @@ -679,7 +870,7 @@ pub fn update_cpuid_topology( // The goal is to update the CPUID sub-leaves to reflect the number of EPC // sections exposed to the guest. -pub fn update_cpuid_sgx(cpuid: &mut CpuId, epc_sections: Vec) -> Result<(), Error> { +fn update_cpuid_sgx(cpuid: &mut CpuId, epc_sections: Vec) -> Result<(), Error> { // Something's wrong if there's no EPC section. if epc_sections.is_empty() { return Err(Error::NoSgxEpcSection); diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index d0ebda3dd..315bf7456 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -11,8 +11,6 @@ // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause // -#[cfg(target_arch = "x86_64")] -use crate::config::CpuTopology; use crate::config::CpusConfig; use crate::device_manager::DeviceManager; use crate::memory_manager::MemoryManager; @@ -26,17 +24,13 @@ use crate::CPU_MANAGER_SNAPSHOT_ID; #[cfg(feature = "acpi")] use acpi_tables::{aml, aml::Aml, sdt::Sdt}; use anyhow::anyhow; -#[cfg(target_arch = "x86_64")] -use arch::x86_64::SgxEpcSection; -#[cfg(target_arch = "x86_64")] -use arch::CpuidPatch; use arch::EntryPoint; use devices::interrupt_controller::InterruptController; #[cfg(target_arch = "aarch64")] use hypervisor::kvm::kvm_bindings; -use hypervisor::{vm::VmmOps, CpuState, HypervisorCpuError, VmExit}; #[cfg(target_arch = "x86_64")] -use hypervisor::{CpuId, CpuIdEntry}; +use hypervisor::CpuId; +use hypervisor::{vm::VmmOps, CpuState, HypervisorCpuError, VmExit}; use libc::{c_void, siginfo_t}; use seccomp::{SeccompAction, SeccompFilter}; #[cfg(feature = "acpi")] @@ -56,30 +50,6 @@ use vm_migration::{ use vmm_sys_util::eventfd::EventFd; use vmm_sys_util::signal::{register_signal_handler, SIGRTMIN}; -// CPUID feature bits -#[cfg(target_arch = "x86_64")] -const TSC_DEADLINE_TIMER_ECX_BIT: u8 = 24; // tsc deadline timer ecx bit. -#[cfg(target_arch = "x86_64")] -const HYPERVISOR_ECX_BIT: u8 = 31; // Hypervisor ecx bit. -#[cfg(target_arch = "x86_64")] -const MTRR_EDX_BIT: u8 = 12; // Hypervisor ecx bit. - -// KVM feature bits -#[cfg(target_arch = "x86_64")] -const KVM_FEATURE_ASYNC_PF_INT_BIT: u8 = 14; -#[cfg(feature = "tdx")] -const KVM_FEATURE_CLOCKSOURCE_BIT: u8 = 0; -#[cfg(feature = "tdx")] -const KVM_FEATURE_CLOCKSOURCE2_BIT: u8 = 3; -#[cfg(feature = "tdx")] -const KVM_FEATURE_CLOCKSOURCE_STABLE_BIT: u8 = 24; -#[cfg(feature = "tdx")] -const KVM_FEATURE_ASYNC_PF_BIT: u8 = 4; -#[cfg(feature = "tdx")] -const KVM_FEATURE_ASYNC_PF_VMEXIT_BIT: u8 = 10; -#[cfg(feature = "tdx")] -const KVM_FEATURE_STEAL_TIME_BIT: u8 = 5; - #[cfg(feature = "acpi")] pub const CPU_MANAGER_ACPI_SIZE: usize = 0xc; @@ -94,8 +64,8 @@ pub enum Error { /// Cannot spawn a new vCPU thread. VcpuSpawn(io::Error), - /// Cannot patch the CPU ID - PatchCpuId(anyhow::Error), + /// Cannot generate common CPUID + CommonCpuId(arch::Error), /// Error configuring VCPU VcpuConfiguration(arch::Error), @@ -132,18 +102,6 @@ pub enum Error { /// Failed to allocate MMIO address AllocateMmmioAddress, - /// Error populating CPUID with KVM HyperV emulation details - #[cfg(target_arch = "x86_64")] - CpuidKvmHyperV(vmm_sys_util::fam::Error), - - /// Error populating CPUID with KVM HyperV emulation details - #[cfg(target_arch = "x86_64")] - CpuidSgx(arch::x86_64::Error), - - /// Error populating CPUID with CPU identification - #[cfg(target_arch = "x86_64")] - CpuidIdentification(vmm_sys_util::fam::Error), - #[cfg(feature = "tdx")] InitializeTdx(hypervisor::HypervisorCpuError), } @@ -576,15 +534,19 @@ impl CpuManager { #[cfg(target_arch = "x86_64")] let cpuid = { let phys_bits = physical_bits(config.max_phys_bits); - CpuManager::generate_common_cpuid( + arch::generate_common_cpuid( hypervisor, - &config.topology, + config + .topology + .clone() + .map(|t| (t.threads_per_core, t.cores_per_die, t.dies_per_package)), sgx_epc_sections, phys_bits, config.kvm_hyperv, #[cfg(feature = "tdx")] tdx_enabled, - )? + ) + .map_err(Error::CommonCpuId)? }; let device_manager = device_manager.lock().unwrap(); @@ -644,177 +606,6 @@ impl CpuManager { Ok(cpu_manager) } - #[cfg(target_arch = "x86_64")] - fn generate_common_cpuid( - hypervisor: Arc, - topology: &Option, - sgx_epc_sections: Option>, - phys_bits: u8, - kvm_hyperv: bool, - #[cfg(feature = "tdx")] tdx_enabled: bool, - ) -> Result { - let cpuid_patches = vec![ - // Patch tsc deadline timer bit - CpuidPatch { - function: 1, - index: 0, - flags_bit: None, - eax_bit: None, - ebx_bit: None, - ecx_bit: Some(TSC_DEADLINE_TIMER_ECX_BIT), - edx_bit: None, - }, - // Patch hypervisor bit - CpuidPatch { - function: 1, - index: 0, - flags_bit: None, - eax_bit: None, - ebx_bit: None, - ecx_bit: Some(HYPERVISOR_ECX_BIT), - edx_bit: None, - }, - // Enable MTRR feature - CpuidPatch { - function: 1, - index: 0, - flags_bit: None, - eax_bit: None, - ebx_bit: None, - ecx_bit: None, - edx_bit: Some(MTRR_EDX_BIT), - }, - ]; - - // Supported CPUID - let mut cpuid = hypervisor - .get_cpuid() - .map_err(|e| Error::PatchCpuId(e.into()))?; - - CpuidPatch::patch_cpuid(&mut cpuid, cpuid_patches); - - if let Some(t) = topology { - arch::x86_64::update_cpuid_topology( - &mut cpuid, - t.threads_per_core, - t.cores_per_die, - t.dies_per_package, - ); - } - - if let Some(sgx_epc_sections) = sgx_epc_sections { - arch::x86_64::update_cpuid_sgx(&mut cpuid, sgx_epc_sections) - .map_err(Error::CpuidSgx)?; - } - - // Update some existing CPUID - for entry in cpuid.as_mut_slice().iter_mut() { - match entry.function { - // Set CPU physical bits - 0x8000_0008 => { - entry.eax = (entry.eax & 0xffff_ff00) | (phys_bits as u32 & 0xff); - } - // Disable KVM_FEATURE_ASYNC_PF_INT - // This is required until we find out why the asynchronous page - // fault is generating unexpected behavior when using interrupt - // mechanism. - // TODO: Re-enable KVM_FEATURE_ASYNC_PF_INT (#2277) - 0x4000_0001 => { - entry.eax &= !(1 << KVM_FEATURE_ASYNC_PF_INT_BIT); - - // These features are not supported by TDX - #[cfg(feature = "tdx")] - if tdx_enabled { - entry.eax &= !(1 << KVM_FEATURE_CLOCKSOURCE_BIT - | 1 << KVM_FEATURE_CLOCKSOURCE2_BIT - | 1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT - | 1 << KVM_FEATURE_ASYNC_PF_BIT - | 1 << KVM_FEATURE_ASYNC_PF_VMEXIT_BIT - | 1 << KVM_FEATURE_STEAL_TIME_BIT) - } - } - _ => {} - } - } - - // Copy CPU identification string - for i in 0x8000_0002..=0x8000_0004 { - cpuid.retain(|c| c.function != i); - let leaf = unsafe { std::arch::x86_64::__cpuid(i) }; - cpuid - .push(CpuIdEntry { - function: i, - eax: leaf.eax, - ebx: leaf.ebx, - ecx: leaf.ecx, - edx: leaf.edx, - ..Default::default() - }) - .map_err(Error::CpuidIdentification)?; - } - - if kvm_hyperv { - // Remove conflicting entries - cpuid.retain(|c| c.function != 0x4000_0000); - cpuid.retain(|c| c.function != 0x4000_0001); - // See "Hypervisor Top Level Functional Specification" for details - // Compliance with "Hv#1" requires leaves up to 0x4000_000a - cpuid - .push(CpuIdEntry { - function: 0x40000000, - eax: 0x4000000a, // Maximum cpuid leaf - ebx: 0x756e694c, // "Linu" - ecx: 0x564b2078, // "x KV" - edx: 0x7648204d, // "M Hv" - ..Default::default() - }) - .map_err(Error::CpuidKvmHyperV)?; - cpuid - .push(CpuIdEntry { - function: 0x40000001, - eax: 0x31237648, // "Hv#1" - ..Default::default() - }) - .map_err(Error::CpuidKvmHyperV)?; - cpuid - .push(CpuIdEntry { - function: 0x40000002, - eax: 0x3839, // "Build number" - ebx: 0xa0000, // "Version" - ..Default::default() - }) - .map_err(Error::CpuidKvmHyperV)?; - cpuid - .push(CpuIdEntry { - function: 0x4000_0003, - eax: 1 << 1 // AccessPartitionReferenceCounter - | 1 << 2 // AccessSynicRegs - | 1 << 3 // AccessSyntheticTimerRegs - | 1 << 9, // AccessPartitionReferenceTsc - edx: 1 << 3, // CPU dynamic partitioning - ..Default::default() - }) - .map_err(Error::CpuidKvmHyperV)?; - cpuid - .push(CpuIdEntry { - function: 0x4000_0004, - eax: 1 << 5, // Recommend relaxed timing - ..Default::default() - }) - .map_err(Error::CpuidKvmHyperV)?; - for i in 0x4000_0005..=0x4000_000a { - cpuid - .push(CpuIdEntry { - function: i, - ..Default::default() - }) - .map_err(Error::CpuidKvmHyperV)?; - } - } - - Ok(cpuid) - } - fn create_vcpu( &mut self, cpu_id: u8,