From 6b0df31e5d4134c9a15b0fb22aedac5d4a5de58b Mon Sep 17 00:00:00 2001 From: William Douglas Date: Tue, 1 Mar 2022 10:25:30 -0800 Subject: [PATCH] vmm: Add support for enabling AMX in vm guests AMX is an x86 extension adding hardware units for matrix operations (int and float dot products). The goal of the extension is to provide performance enhancements for these common operations. On Linux, AMX requires requesting the permission from the kernel prior to use. Guests wanting to make use of the feature need to have the request made prior to starting the vm. This change then adds the first --cpus features option amx that when passed will enable AMX usage for guests (needs a 5.17+ kernel) or exits with failure. The activation is done in the CpuManager of the VMM thread as it allows migration and snapshot/restore to work fairly painlessly for AMX enabled workloads. Signed-off-by: William Douglas --- Cargo.toml | 1 + docs/cpu.md | 24 +++++++++++++- option_parser/src/lib.rs | 1 + src/main.rs | 8 +++-- vmm/Cargo.toml | 1 + vmm/src/api/openapi/cloud-hypervisor.yaml | 8 +++++ vmm/src/config.rs | 38 +++++++++++++++++++++-- vmm/src/cpu.rs | 35 +++++++++++++++++++++ vmm/src/lib.rs | 1 + 9 files changed, 111 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b6731885d..5382da015 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,6 +55,7 @@ default = ["common", "kvm"] # Common features for all hypervisors common = ["acpi", "cmos", "fwdebug"] acpi = ["vmm/acpi"] +amx = ["vmm/amx"] cmos = ["vmm/cmos"] fwdebug = ["vmm/fwdebug"] gdb = ["vmm/gdb"] diff --git a/docs/cpu.md b/docs/cpu.md index e2742761a..5dd797315 100644 --- a/docs/cpu.md +++ b/docs/cpu.md @@ -17,11 +17,12 @@ struct CpusConfig { kvm_hyperv: bool, max_phys_bits: u8, affinity: Option>, + features: CpuFeatures, } ``` ``` ---cpus boot=,max=,topology=:::,kvm_hyperv=on|off,max_phys_bits=,affinity= +--cpus boot=,max=,topology=:::,kvm_hyperv=on|off,max_phys_bits=,affinity=,features= ``` ### `boot` @@ -187,3 +188,24 @@ _Example_ In this example, assuming the host has 4 CPUs, vCPU 0 will run exclusively on host CPUs 2 and 3, while vCPU 1 will run exclusively on host CPUs 0 and 1. Because nothing is defined for vCPU 2, it can run on any of the 4 host CPUs. + +### `features` + +Set of CPU features to enable. + +This option allows the user to enable a set of CPU features that are disabled +by default otherwise. + +The currently available feature set is: `amx`. + +The `amx` feature will enable the x86 extension adding hardware units for +matrix operations (int and float dot products). The goal of the extension is to +provide performance enhancements for these common operations. + +_Example_ + +``` +--cpus features=amx +``` + +In this example the amx CPU feature will be enabled for the VMM. diff --git a/option_parser/src/lib.rs b/option_parser/src/lib.rs index 327b982a8..0e4ef2bdb 100644 --- a/option_parser/src/lib.rs +++ b/option_parser/src/lib.rs @@ -313,6 +313,7 @@ impl FromStr for Tuple { } } +#[derive(Default)] pub struct StringList(pub Vec); pub enum StringListParseError { diff --git a/src/main.rs b/src/main.rs index ee693c14c..66a1328f7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -151,7 +151,8 @@ fn create_app<'a>( "boot=,max=,\ topology=:::,\ kvm_hyperv=on|off,max_phys_bits=,\ - affinity=", + affinity=,\ + features=", ) .default_value(default_vcpus) .group("vm-config"), @@ -631,8 +632,8 @@ mod unit_tests { use crate::{create_app, prepare_default_values}; use std::path::PathBuf; use vmm::config::{ - CmdlineConfig, ConsoleConfig, ConsoleOutputMode, CpusConfig, KernelConfig, MemoryConfig, - RngConfig, VmConfig, VmParams, + CmdlineConfig, ConsoleConfig, ConsoleOutputMode, CpuFeatures, CpusConfig, KernelConfig, + MemoryConfig, RngConfig, VmConfig, VmParams, }; fn get_vm_config_from_vec(args: &[&str]) -> VmConfig { @@ -679,6 +680,7 @@ mod unit_tests { kvm_hyperv: false, max_phys_bits: 46, affinity: None, + features: CpuFeatures::default(), }, memory: MemoryConfig { size: 536_870_912, diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml index a285633a8..5e3f86936 100644 --- a/vmm/Cargo.toml +++ b/vmm/Cargo.toml @@ -7,6 +7,7 @@ edition = "2018" [features] default = [] acpi = ["acpi_tables","devices/acpi", "arch/acpi"] +amx = [] cmos = ["devices/cmos"] fwdebug = ["devices/fwdebug"] gdb = ["kvm"] diff --git a/vmm/src/api/openapi/cloud-hypervisor.yaml b/vmm/src/api/openapi/cloud-hypervisor.yaml index 533e610ca..4948d03b9 100644 --- a/vmm/src/api/openapi/cloud-hypervisor.yaml +++ b/vmm/src/api/openapi/cloud-hypervisor.yaml @@ -562,6 +562,12 @@ components: items: type: integer + CpuFeatures: + type: object + properties: + amx: + type: boolean + CpuTopology: type: object properties: @@ -596,6 +602,8 @@ components: type: array items: $ref: '#/components/schemas/CpuAffinity' + features: + $ref: '#/components/schemas/CpuFeatures' PlatformConfig: type: object diff --git a/vmm/src/config.rs b/vmm/src/config.rs index bc737eef4..1f9f7fefc 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -54,6 +54,8 @@ pub enum Error { ParseRestoreSourceUrlMissing, /// Error parsing CPU options ParseCpus(OptionParserError), + /// Invalid CPU features + InvalidCpuFeatures(String), /// Error parsing memory options ParseMemory(OptionParserError), /// Error parsing memory zone options @@ -267,7 +269,7 @@ impl fmt::Display for Error { write!(f, "Error parsing --console: invalid console mode given") } ParseCpus(o) => write!(f, "Error parsing --cpus: {}", o), - + InvalidCpuFeatures(o) => write!(f, "Invalid feature in --cpus features list: {}", o), ParseDevice(o) => write!(f, "Error parsing --device: {}", o), ParseDevicePathMissing => write!(f, "Error parsing --device: path missing"), ParseFileSystem(o) => write!(f, "Error parsing --fs: {}", o), @@ -452,6 +454,12 @@ pub struct CpuAffinity { pub host_cpus: Vec, } +#[derive(Clone, Debug, Default, PartialEq, Deserialize, Serialize)] +pub struct CpuFeatures { + #[cfg(all(feature = "amx", target_arch = "x86_64"))] + pub amx: bool, +} + pub enum CpuTopologyParseError { InvalidValue(String), } @@ -509,6 +517,8 @@ pub struct CpusConfig { pub max_phys_bits: u8, #[serde(default)] pub affinity: Option>, + #[serde(default)] + pub features: CpuFeatures, } impl CpusConfig { @@ -520,7 +530,8 @@ impl CpusConfig { .add("topology") .add("kvm_hyperv") .add("max_phys_bits") - .add("affinity"); + .add("affinity") + .add("features"); parser.parse(cpus).map_err(Error::ParseCpus)?; let boot_vcpus: u8 = parser @@ -552,6 +563,27 @@ impl CpusConfig { }) .collect() }); + let features_list = parser + .convert::("features") + .map_err(Error::ParseCpus)? + .unwrap_or_default(); + // Some ugliness here as the features being checked might be disabled + // at compile time causing the below allow and the need to specify the + // ref type in the match. + // The issue will go away once kvm_hyperv is moved under the features + // list as it will always be checked for. + #[allow(unused_mut)] + let mut features = CpuFeatures::default(); + for s in features_list.0 { + match >::as_ref(&s) { + #[cfg(all(feature = "amx", target_arch = "x86_64"))] + "amx" => { + features.amx = true; + Ok(()) + } + _ => Err(Error::InvalidCpuFeatures(s)), + }?; + } Ok(CpusConfig { boot_vcpus, @@ -560,6 +592,7 @@ impl CpusConfig { kvm_hyperv, max_phys_bits, affinity, + features, }) } } @@ -573,6 +606,7 @@ impl Default for CpusConfig { kvm_hyperv: false, max_phys_bits: DEFAULT_MAX_PHYS_BITS, affinity: None, + features: CpuFeatures::default(), } } } diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index ddf4c50e2..3c37af3a4 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -128,6 +128,10 @@ pub enum Error { /// CPU hotplug/unplug not supported ResizingNotSupported, + + #[cfg(all(feature = "amx", target_arch = "x86_64"))] + /// "Failed to setup AMX. + AmxEnable(anyhow::Error), } pub type Result = result::Result; @@ -598,6 +602,37 @@ impl CpuManager { ) .map_err(Error::CommonCpuId)? }; + #[cfg(all(feature = "amx", target_arch = "x86_64"))] + if config.features.amx { + const ARCH_GET_XCOMP_GUEST_PERM: usize = 0x1024; + const ARCH_REQ_XCOMP_GUEST_PERM: usize = 0x1025; + const XFEATURE_XTILEDATA: usize = 18; + const XFEATURE_XTILEDATA_MASK: usize = 1 << XFEATURE_XTILEDATA; + + // This is safe as the syscall is only modifing kernel internal + // data structures that the kernel is itself expected to safeguard. + let amx_tile = unsafe { + libc::syscall( + libc::SYS_arch_prctl, + ARCH_REQ_XCOMP_GUEST_PERM, + XFEATURE_XTILEDATA, + ) + }; + + if amx_tile != 0 { + return Err(Error::AmxEnable(anyhow!("Guest AMX usage not supported"))); + } else { + // This is safe as the mask being modified (not marked mutable as it is + // modified in unsafe only which is permitted) isn't in use elsewhere. + let mask: usize = 0; + let result = unsafe { + libc::syscall(libc::SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &mask) + }; + if result != 0 || (mask & XFEATURE_XTILEDATA_MASK) != XFEATURE_XTILEDATA_MASK { + return Err(Error::AmxEnable(anyhow!("Guest AMX usage not supported"))); + } + } + } let device_manager = device_manager.lock().unwrap(); diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index cbbf9d24d..f3b935e7b 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -1885,6 +1885,7 @@ mod unit_tests { kvm_hyperv: false, max_phys_bits: 46, affinity: None, + features: config::CpuFeatures::default(), }, memory: MemoryConfig { size: 536_870_912,