vmm: Add CPU affinity support

With the introduction of a new option `affinity` to the `cpus`
parameter, Cloud Hypervisor can now let the user choose the set
of host CPUs where to run each vCPU.

This is useful when trying to achieve CPU pinning, as well as making
sure the VM runs on a specific NUMA node.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
This commit is contained in:
Sebastien Boeuf 2021-11-10 10:43:52 +01:00 committed by Rob Bradford
parent a4f5ad6076
commit 932c8c9713
5 changed files with 85 additions and 4 deletions

View File

@ -647,6 +647,7 @@ mod unit_tests {
topology: None,
kvm_hyperv: false,
max_phys_bits: 46,
affinity: None,
},
memory: MemoryConfig {
size: 536_870_912,

View File

@ -480,6 +480,16 @@ components:
default: false
description: Virtual machine configuration
CpuAffinity:
type: object
properties:
vcpu:
type: integer
host_cpus:
type: array
items:
type: integer
CpuTopology:
type: object
properties:
@ -507,9 +517,13 @@ components:
default: 1
type: integer
topology:
$ref: '#/components/schemas/CpuTopology'
$ref: '#/components/schemas/CpuTopology'
max_phys_bits:
type: integer
affinity:
type: array
items:
$ref: '#/components/schemas/CpuAffinity'
MemoryZoneConfig:
required:

View File

@ -398,6 +398,12 @@ impl FromStr for HotplugMethod {
}
}
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
pub struct CpuAffinity {
pub vcpu: u8,
pub host_cpus: Vec<u8>,
}
pub enum CpuTopologyParseError {
InvalidValue(String),
}
@ -453,6 +459,8 @@ pub struct CpusConfig {
pub kvm_hyperv: bool,
#[serde(default = "default_cpuconfig_max_phys_bits")]
pub max_phys_bits: u8,
#[serde(default)]
pub affinity: Option<Vec<CpuAffinity>>,
}
impl CpusConfig {
@ -463,7 +471,8 @@ impl CpusConfig {
.add("max")
.add("topology")
.add("kvm_hyperv")
.add("max_phys_bits");
.add("max_phys_bits")
.add("affinity");
parser.parse(cpus).map_err(Error::ParseCpus)?;
let boot_vcpus: u8 = parser
@ -484,6 +493,17 @@ impl CpusConfig {
.convert::<u8>("max_phys_bits")
.map_err(Error::ParseCpus)?
.unwrap_or(DEFAULT_MAX_PHYS_BITS);
let affinity = parser
.convert::<Tuple<u8, Vec<u8>>>("affinity")
.map_err(Error::ParseCpus)?
.map(|v| {
v.0.iter()
.map(|(e1, e2)| CpuAffinity {
vcpu: *e1,
host_cpus: e2.clone(),
})
.collect()
});
Ok(CpusConfig {
boot_vcpus,
@ -491,6 +511,7 @@ impl CpusConfig {
topology,
kvm_hyperv,
max_phys_bits,
affinity,
})
}
}
@ -503,6 +524,7 @@ impl Default for CpusConfig {
topology: None,
kvm_hyperv: false,
max_phys_bits: DEFAULT_MAX_PHYS_BITS,
affinity: None,
}
}
}

View File

@ -33,7 +33,6 @@ use hypervisor::CpuId;
use hypervisor::{vm::VmmOps, CpuState, HypervisorCpuError, VmExit};
use libc::{c_void, siginfo_t};
use seccompiler::{apply_filter, SeccompAction};
#[cfg(feature = "acpi")]
use std::collections::BTreeMap;
use std::os::unix::thread::JoinHandleExt;
use std::sync::atomic::{AtomicBool, Ordering};
@ -104,6 +103,9 @@ pub enum Error {
#[cfg(feature = "tdx")]
InitializeTdx(hypervisor::HypervisorCpuError),
/// Failed scheduling the thread on the expected CPU set.
ScheduleCpuSet,
}
pub type Result<T> = result::Result<T, Error>;
@ -393,6 +395,7 @@ pub struct CpuManager {
acpi_address: GuestAddress,
#[cfg(feature = "acpi")]
proximity_domain_per_cpu: BTreeMap<u8, u32>,
affinity: BTreeMap<u8, Vec<u8>>,
}
const CPU_ENABLE_FLAG: usize = 0;
@ -591,6 +594,15 @@ impl CpuManager {
.into_iter()
.collect();
let affinity = if let Some(cpu_affinity) = config.affinity.as_ref() {
cpu_affinity
.iter()
.map(|a| (a.vcpu, a.host_cpus.clone()))
.collect()
} else {
BTreeMap::new()
};
let cpu_manager = Arc::new(Mutex::new(CpuManager {
config: config.clone(),
interrupt_controller: device_manager.interrupt_controller().clone(),
@ -611,6 +623,7 @@ impl CpuManager {
acpi_address,
#[cfg(feature = "acpi")]
proximity_domain_per_cpu,
affinity,
}));
#[cfg(feature = "acpi")]
@ -713,7 +726,15 @@ impl CpuManager {
.clone();
let panic_vcpu_run_interrupted = vcpu_run_interrupted.clone();
info!("Starting vCPU: cpu_id = {}", cpu_id);
// Prepare the CPU set the current vCPU is expected to run onto.
let cpuset = self.affinity.get(&cpu_id).map(|host_cpus| {
let mut cpuset: libc::cpu_set_t = unsafe { std::mem::zeroed() };
unsafe { libc::CPU_ZERO(&mut cpuset) };
for host_cpu in host_cpus {
unsafe { libc::CPU_SET(*host_cpu as usize, &mut cpuset) };
}
cpuset
});
// Retrieve seccomp filter for vcpu thread
let vcpu_seccomp_filter = get_seccomp_filter(&self.seccomp_action, Thread::Vcpu)
@ -722,10 +743,32 @@ impl CpuManager {
#[cfg(target_arch = "x86_64")]
let interrupt_controller_clone = self.interrupt_controller.as_ref().cloned();
info!("Starting vCPU: cpu_id = {}", cpu_id);
let handle = Some(
thread::Builder::new()
.name(format!("vcpu{}", cpu_id))
.spawn(move || {
// Schedule the thread to run on the expected CPU set
if let Some(cpuset) = cpuset.as_ref() {
let ret = unsafe {
libc::sched_setaffinity(
0,
std::mem::size_of::<libc::cpu_set_t>(),
cpuset as *const libc::cpu_set_t,
)
};
if ret != 0 {
error!(
"Failed scheduling the vCPU {} on the expected CPU set: {}",
cpu_id,
io::Error::last_os_error()
);
return;
}
}
// Apply seccomp filter for vcpu thread.
if !vcpu_seccomp_filter.is_empty() {
if let Err(e) =

View File

@ -492,6 +492,7 @@ fn vmm_thread_rules() -> Result<Vec<(i64, Vec<SeccompRule>)>, BackendError> {
(libc::SYS_rt_sigprocmask, vec![]),
(libc::SYS_rt_sigreturn, vec![]),
(libc::SYS_sched_getaffinity, vec![]),
(libc::SYS_sched_setaffinity, vec![]),
(libc::SYS_sendmsg, vec![]),
(libc::SYS_sendto, vec![]),
(libc::SYS_set_robust_list, vec![]),