vmm: Add CPU affinity support

With the introduction of a new option `affinity` to the `cpus` parameter, Cloud Hypervisor can now let the user choose the set of host CPUs where to run each vCPU. This is useful when trying to achieve CPU pinning, as well as making sure the VM runs on a specific NUMA node. Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
2025-03-20 07:58:55 +00:00 · 2021-11-10 10:43:52 +01:00 · 2021-11-10 10:43:52 +01:00 · 932c8c9713
commit 932c8c9713
parent a4f5ad6076
5 changed files with 85 additions and 4 deletions
--- a/src/main.rs
+++ b/src/main.rs
@ -647,6 +647,7 @@ mod unit_tests {
                    topology: None,
                    kvm_hyperv: false,
                    max_phys_bits: 46,
+                    affinity: None,
                },
                memory: MemoryConfig {
                    size: 536_870_912,
--- a/vmm/src/api/openapi/cloud-hypervisor.yaml
+++ b/vmm/src/api/openapi/cloud-hypervisor.yaml
@ -480,6 +480,16 @@ components:
          default: false
      description: Virtual machine configuration

+    CpuAffinity:
+      type: object
+      properties:
+        vcpu:
+          type: integer
+        host_cpus:
+          type: array
+          items:
+            type: integer
+
    CpuTopology:
      type: object
      properties:
@ -507,9 +517,13 @@ components:
          default: 1
          type: integer
        topology:
-            $ref: '#/components/schemas/CpuTopology'
+          $ref: '#/components/schemas/CpuTopology'
        max_phys_bits:
          type: integer
+        affinity:
+          type: array
+          items:
+            $ref: '#/components/schemas/CpuAffinity'

    MemoryZoneConfig:
      required:
--- a/vmm/src/config.rs
+++ b/vmm/src/config.rs
@ -398,6 +398,12 @@ impl FromStr for HotplugMethod {
    }
 }

+#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
+pub struct CpuAffinity {
+    pub vcpu: u8,
+    pub host_cpus: Vec<u8>,
+}
+
 pub enum CpuTopologyParseError {
    InvalidValue(String),
 }
@ -453,6 +459,8 @@ pub struct CpusConfig {
    pub kvm_hyperv: bool,
    #[serde(default = "default_cpuconfig_max_phys_bits")]
    pub max_phys_bits: u8,
+    #[serde(default)]
+    pub affinity: Option<Vec<CpuAffinity>>,
 }

 impl CpusConfig {
@ -463,7 +471,8 @@ impl CpusConfig {
            .add("max")
            .add("topology")
            .add("kvm_hyperv")
-            .add("max_phys_bits");
+            .add("max_phys_bits")
+            .add("affinity");
        parser.parse(cpus).map_err(Error::ParseCpus)?;

        let boot_vcpus: u8 = parser
@ -484,6 +493,17 @@ impl CpusConfig {
            .convert::<u8>("max_phys_bits")
            .map_err(Error::ParseCpus)?
            .unwrap_or(DEFAULT_MAX_PHYS_BITS);
+        let affinity = parser
+            .convert::<Tuple<u8, Vec<u8>>>("affinity")
+            .map_err(Error::ParseCpus)?
+            .map(|v| {
+                v.0.iter()
+                    .map(|(e1, e2)| CpuAffinity {
+                        vcpu: *e1,
+                        host_cpus: e2.clone(),
+                    })
+                    .collect()
+            });

        Ok(CpusConfig {
            boot_vcpus,
@ -491,6 +511,7 @@ impl CpusConfig {
            topology,
            kvm_hyperv,
            max_phys_bits,
+            affinity,
        })
    }
 }
@ -503,6 +524,7 @@ impl Default for CpusConfig {
            topology: None,
            kvm_hyperv: false,
            max_phys_bits: DEFAULT_MAX_PHYS_BITS,
+            affinity: None,
        }
    }
 }
--- a/vmm/src/cpu.rs
+++ b/vmm/src/cpu.rs
@ -33,7 +33,6 @@ use hypervisor::CpuId;
 use hypervisor::{vm::VmmOps, CpuState, HypervisorCpuError, VmExit};
 use libc::{c_void, siginfo_t};
 use seccompiler::{apply_filter, SeccompAction};
-#[cfg(feature = "acpi")]
 use std::collections::BTreeMap;
 use std::os::unix::thread::JoinHandleExt;
 use std::sync::atomic::{AtomicBool, Ordering};
@ -104,6 +103,9 @@ pub enum Error {

    #[cfg(feature = "tdx")]
    InitializeTdx(hypervisor::HypervisorCpuError),
+
+    /// Failed scheduling the thread on the expected CPU set.
+    ScheduleCpuSet,
 }
 pub type Result<T> = result::Result<T, Error>;

@ -393,6 +395,7 @@ pub struct CpuManager {
    acpi_address: GuestAddress,
    #[cfg(feature = "acpi")]
    proximity_domain_per_cpu: BTreeMap<u8, u32>,
+    affinity: BTreeMap<u8, Vec<u8>>,
 }

 const CPU_ENABLE_FLAG: usize = 0;
@ -591,6 +594,15 @@ impl CpuManager {
        .into_iter()
        .collect();

+        let affinity = if let Some(cpu_affinity) = config.affinity.as_ref() {
+            cpu_affinity
+                .iter()
+                .map(|a| (a.vcpu, a.host_cpus.clone()))
+                .collect()
+        } else {
+            BTreeMap::new()
+        };
+
        let cpu_manager = Arc::new(Mutex::new(CpuManager {
            config: config.clone(),
            interrupt_controller: device_manager.interrupt_controller().clone(),
@ -611,6 +623,7 @@ impl CpuManager {
            acpi_address,
            #[cfg(feature = "acpi")]
            proximity_domain_per_cpu,
+            affinity,
        }));

        #[cfg(feature = "acpi")]
@ -713,7 +726,15 @@ impl CpuManager {
            .clone();
        let panic_vcpu_run_interrupted = vcpu_run_interrupted.clone();

-        info!("Starting vCPU: cpu_id = {}", cpu_id);
+        // Prepare the CPU set the current vCPU is expected to run onto.
+        let cpuset = self.affinity.get(&cpu_id).map(|host_cpus| {
+            let mut cpuset: libc::cpu_set_t = unsafe { std::mem::zeroed() };
+            unsafe { libc::CPU_ZERO(&mut cpuset) };
+            for host_cpu in host_cpus {
+                unsafe { libc::CPU_SET(*host_cpu as usize, &mut cpuset) };
+            }
+            cpuset
+        });

        // Retrieve seccomp filter for vcpu thread
        let vcpu_seccomp_filter = get_seccomp_filter(&self.seccomp_action, Thread::Vcpu)
@ -722,10 +743,32 @@ impl CpuManager {
        #[cfg(target_arch = "x86_64")]
        let interrupt_controller_clone = self.interrupt_controller.as_ref().cloned();

+        info!("Starting vCPU: cpu_id = {}", cpu_id);
+
        let handle = Some(
            thread::Builder::new()
                .name(format!("vcpu{}", cpu_id))
                .spawn(move || {
+                    // Schedule the thread to run on the expected CPU set
+                    if let Some(cpuset) = cpuset.as_ref() {
+                        let ret = unsafe {
+                            libc::sched_setaffinity(
+                                0,
+                                std::mem::size_of::<libc::cpu_set_t>(),
+                                cpuset as *const libc::cpu_set_t,
+                            )
+                        };
+
+                        if ret != 0 {
+                            error!(
+                                "Failed scheduling the vCPU {} on the expected CPU set: {}",
+                                cpu_id,
+                                io::Error::last_os_error()
+                            );
+                            return;
+                        }
+                    }
+
                    // Apply seccomp filter for vcpu thread.
                    if !vcpu_seccomp_filter.is_empty() {
                        if let Err(e) =
--- a/vmm/src/seccomp_filters.rs
+++ b/vmm/src/seccomp_filters.rs
@ -492,6 +492,7 @@ fn vmm_thread_rules() -> Result<Vec<(i64, Vec<SeccompRule>)>, BackendError> {
        (libc::SYS_rt_sigprocmask, vec![]),
        (libc::SYS_rt_sigreturn, vec![]),
        (libc::SYS_sched_getaffinity, vec![]),
+        (libc::SYS_sched_setaffinity, vec![]),
        (libc::SYS_sendmsg, vec![]),
        (libc::SYS_sendto, vec![]),
        (libc::SYS_set_robust_list, vec![]),