diff --git a/src/main.rs b/src/main.rs index 4051b0df9..89ef302d1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -647,6 +647,7 @@ mod unit_tests { topology: None, kvm_hyperv: false, max_phys_bits: 46, + affinity: None, }, memory: MemoryConfig { size: 536_870_912, diff --git a/vmm/src/api/openapi/cloud-hypervisor.yaml b/vmm/src/api/openapi/cloud-hypervisor.yaml index 9d1d93982..163d811ed 100644 --- a/vmm/src/api/openapi/cloud-hypervisor.yaml +++ b/vmm/src/api/openapi/cloud-hypervisor.yaml @@ -480,6 +480,16 @@ components: default: false description: Virtual machine configuration + CpuAffinity: + type: object + properties: + vcpu: + type: integer + host_cpus: + type: array + items: + type: integer + CpuTopology: type: object properties: @@ -507,9 +517,13 @@ components: default: 1 type: integer topology: - $ref: '#/components/schemas/CpuTopology' + $ref: '#/components/schemas/CpuTopology' max_phys_bits: type: integer + affinity: + type: array + items: + $ref: '#/components/schemas/CpuAffinity' MemoryZoneConfig: required: diff --git a/vmm/src/config.rs b/vmm/src/config.rs index ee3026884..aa0f3efe0 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -398,6 +398,12 @@ impl FromStr for HotplugMethod { } } +#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] +pub struct CpuAffinity { + pub vcpu: u8, + pub host_cpus: Vec, +} + pub enum CpuTopologyParseError { InvalidValue(String), } @@ -453,6 +459,8 @@ pub struct CpusConfig { pub kvm_hyperv: bool, #[serde(default = "default_cpuconfig_max_phys_bits")] pub max_phys_bits: u8, + #[serde(default)] + pub affinity: Option>, } impl CpusConfig { @@ -463,7 +471,8 @@ impl CpusConfig { .add("max") .add("topology") .add("kvm_hyperv") - .add("max_phys_bits"); + .add("max_phys_bits") + .add("affinity"); parser.parse(cpus).map_err(Error::ParseCpus)?; let boot_vcpus: u8 = parser @@ -484,6 +493,17 @@ impl CpusConfig { .convert::("max_phys_bits") .map_err(Error::ParseCpus)? .unwrap_or(DEFAULT_MAX_PHYS_BITS); + let affinity = parser + .convert::>>("affinity") + .map_err(Error::ParseCpus)? + .map(|v| { + v.0.iter() + .map(|(e1, e2)| CpuAffinity { + vcpu: *e1, + host_cpus: e2.clone(), + }) + .collect() + }); Ok(CpusConfig { boot_vcpus, @@ -491,6 +511,7 @@ impl CpusConfig { topology, kvm_hyperv, max_phys_bits, + affinity, }) } } @@ -503,6 +524,7 @@ impl Default for CpusConfig { topology: None, kvm_hyperv: false, max_phys_bits: DEFAULT_MAX_PHYS_BITS, + affinity: None, } } } diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index 4f9fe9a20..b10ef03f8 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -33,7 +33,6 @@ use hypervisor::CpuId; use hypervisor::{vm::VmmOps, CpuState, HypervisorCpuError, VmExit}; use libc::{c_void, siginfo_t}; use seccompiler::{apply_filter, SeccompAction}; -#[cfg(feature = "acpi")] use std::collections::BTreeMap; use std::os::unix::thread::JoinHandleExt; use std::sync::atomic::{AtomicBool, Ordering}; @@ -104,6 +103,9 @@ pub enum Error { #[cfg(feature = "tdx")] InitializeTdx(hypervisor::HypervisorCpuError), + + /// Failed scheduling the thread on the expected CPU set. + ScheduleCpuSet, } pub type Result = result::Result; @@ -393,6 +395,7 @@ pub struct CpuManager { acpi_address: GuestAddress, #[cfg(feature = "acpi")] proximity_domain_per_cpu: BTreeMap, + affinity: BTreeMap>, } const CPU_ENABLE_FLAG: usize = 0; @@ -591,6 +594,15 @@ impl CpuManager { .into_iter() .collect(); + let affinity = if let Some(cpu_affinity) = config.affinity.as_ref() { + cpu_affinity + .iter() + .map(|a| (a.vcpu, a.host_cpus.clone())) + .collect() + } else { + BTreeMap::new() + }; + let cpu_manager = Arc::new(Mutex::new(CpuManager { config: config.clone(), interrupt_controller: device_manager.interrupt_controller().clone(), @@ -611,6 +623,7 @@ impl CpuManager { acpi_address, #[cfg(feature = "acpi")] proximity_domain_per_cpu, + affinity, })); #[cfg(feature = "acpi")] @@ -713,7 +726,15 @@ impl CpuManager { .clone(); let panic_vcpu_run_interrupted = vcpu_run_interrupted.clone(); - info!("Starting vCPU: cpu_id = {}", cpu_id); + // Prepare the CPU set the current vCPU is expected to run onto. + let cpuset = self.affinity.get(&cpu_id).map(|host_cpus| { + let mut cpuset: libc::cpu_set_t = unsafe { std::mem::zeroed() }; + unsafe { libc::CPU_ZERO(&mut cpuset) }; + for host_cpu in host_cpus { + unsafe { libc::CPU_SET(*host_cpu as usize, &mut cpuset) }; + } + cpuset + }); // Retrieve seccomp filter for vcpu thread let vcpu_seccomp_filter = get_seccomp_filter(&self.seccomp_action, Thread::Vcpu) @@ -722,10 +743,32 @@ impl CpuManager { #[cfg(target_arch = "x86_64")] let interrupt_controller_clone = self.interrupt_controller.as_ref().cloned(); + info!("Starting vCPU: cpu_id = {}", cpu_id); + let handle = Some( thread::Builder::new() .name(format!("vcpu{}", cpu_id)) .spawn(move || { + // Schedule the thread to run on the expected CPU set + if let Some(cpuset) = cpuset.as_ref() { + let ret = unsafe { + libc::sched_setaffinity( + 0, + std::mem::size_of::(), + cpuset as *const libc::cpu_set_t, + ) + }; + + if ret != 0 { + error!( + "Failed scheduling the vCPU {} on the expected CPU set: {}", + cpu_id, + io::Error::last_os_error() + ); + return; + } + } + // Apply seccomp filter for vcpu thread. if !vcpu_seccomp_filter.is_empty() { if let Err(e) = diff --git a/vmm/src/seccomp_filters.rs b/vmm/src/seccomp_filters.rs index 7119d03d7..8738a3c30 100644 --- a/vmm/src/seccomp_filters.rs +++ b/vmm/src/seccomp_filters.rs @@ -492,6 +492,7 @@ fn vmm_thread_rules() -> Result)>, BackendError> { (libc::SYS_rt_sigprocmask, vec![]), (libc::SYS_rt_sigreturn, vec![]), (libc::SYS_sched_getaffinity, vec![]), + (libc::SYS_sched_setaffinity, vec![]), (libc::SYS_sendmsg, vec![]), (libc::SYS_sendto, vec![]), (libc::SYS_set_robust_list, vec![]),