mirror of
https://github.com/cloud-hypervisor/cloud-hypervisor.git
synced 2024-11-10 05:40:10 +00:00
f282cc001a
Add API to the hypervisor interface and implement for KVM to allow the special TDX KVM ioctls on the VM and vCPU FDs. Signed-off-by: Rob Bradford <robert.bradford@intel.com>
1496 lines
52 KiB
Rust
1496 lines
52 KiB
Rust
// Copyright © 2019 Intel Corporation
|
|
//
|
|
// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
|
|
//
|
|
// Copyright © 2020, Microsoft Corporation
|
|
//
|
|
// Copyright 2018-2019 CrowdStrike, Inc.
|
|
//
|
|
//
|
|
|
|
#[cfg(target_arch = "aarch64")]
|
|
pub use crate::aarch64::{
|
|
check_required_kvm_extensions, is_system_register, VcpuInit, VcpuKvmState as CpuState,
|
|
MPIDR_EL1,
|
|
};
|
|
use crate::cpu;
|
|
use crate::device;
|
|
use crate::hypervisor;
|
|
use crate::vm::{self, VmmOps};
|
|
#[cfg(target_arch = "aarch64")]
|
|
use crate::{arm64_core_reg_id, offset__of};
|
|
use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd};
|
|
use serde_derive::{Deserialize, Serialize};
|
|
use std::os::unix::io::{AsRawFd, RawFd};
|
|
use std::result;
|
|
#[cfg(target_arch = "x86_64")]
|
|
use std::sync::atomic::{AtomicBool, Ordering};
|
|
use std::sync::Arc;
|
|
#[cfg(target_arch = "x86_64")]
|
|
use vm_memory::Address;
|
|
use vmm_sys_util::eventfd::EventFd;
|
|
// x86_64 dependencies
|
|
#[cfg(target_arch = "x86_64")]
|
|
pub mod x86_64;
|
|
#[cfg(target_arch = "x86_64")]
|
|
use crate::arch::x86::NUM_IOAPIC_PINS;
|
|
#[cfg(target_arch = "aarch64")]
|
|
use aarch64::{RegList, Register, StandardRegisters};
|
|
#[cfg(target_arch = "x86_64")]
|
|
use kvm_bindings::{
|
|
kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP,
|
|
};
|
|
#[cfg(target_arch = "x86_64")]
|
|
use x86_64::{
|
|
check_required_kvm_extensions, FpuState, SpecialRegisters, StandardRegisters, KVM_TSS_ADDRESS,
|
|
};
|
|
#[cfg(target_arch = "x86_64")]
|
|
pub use x86_64::{
|
|
CpuId, CpuIdEntry, ExtendedControlRegisters, LapicState, MsrEntries, VcpuKvmState as CpuState,
|
|
Xsave, CPUID_FLAG_VALID_INDEX,
|
|
};
|
|
// aarch64 dependencies
|
|
#[cfg(target_arch = "aarch64")]
|
|
pub mod aarch64;
|
|
pub use kvm_bindings;
|
|
#[cfg(feature = "tdx")]
|
|
use kvm_bindings::KVMIO;
|
|
pub use kvm_bindings::{
|
|
kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_irq_routing, kvm_irq_routing_entry,
|
|
kvm_userspace_memory_region, KVM_IRQ_ROUTING_MSI, KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY,
|
|
KVM_MSI_VALID_DEVID,
|
|
};
|
|
#[cfg(target_arch = "aarch64")]
|
|
use kvm_bindings::{
|
|
kvm_regs, user_fpsimd_state, user_pt_regs, KVM_NR_SPSR, KVM_REG_ARM64, KVM_REG_ARM_CORE,
|
|
KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64,
|
|
};
|
|
pub use kvm_ioctls;
|
|
pub use kvm_ioctls::{Cap, Kvm};
|
|
#[cfg(target_arch = "aarch64")]
|
|
use std::mem;
|
|
#[cfg(feature = "tdx")]
|
|
use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_expr, ioctl_ioc_nr, ioctl_iowr_nr};
|
|
|
|
///
|
|
/// Export generically-named wrappers of kvm-bindings for Unix-based platforms
|
|
///
|
|
pub use {
|
|
kvm_bindings::kvm_clock_data as ClockData, kvm_bindings::kvm_create_device as CreateDevice,
|
|
kvm_bindings::kvm_device_attr as DeviceAttr,
|
|
kvm_bindings::kvm_irq_routing_entry as IrqRoutingEntry, kvm_bindings::kvm_mp_state as MpState,
|
|
kvm_bindings::kvm_userspace_memory_region as MemoryRegion,
|
|
kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_ioctls::DeviceFd, kvm_ioctls::IoEventAddress,
|
|
kvm_ioctls::VcpuExit,
|
|
};
|
|
|
|
#[cfg(feature = "tdx")]
|
|
ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong);
|
|
|
|
#[cfg(feature = "tdx")]
|
|
#[repr(u32)]
|
|
enum TdxCommand {
|
|
#[allow(dead_code)]
|
|
Capabilities = 0,
|
|
InitVm,
|
|
InitVcpu,
|
|
InitMemRegion,
|
|
Finalize,
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, Deserialize, Serialize)]
|
|
pub struct KvmVmState {}
|
|
|
|
pub use KvmVmState as VmState;
|
|
/// Wrapper over KVM VM ioctls.
|
|
pub struct KvmVm {
|
|
fd: Arc<VmFd>,
|
|
#[cfg(target_arch = "x86_64")]
|
|
msrs: MsrEntries,
|
|
state: KvmVmState,
|
|
}
|
|
|
|
// Returns a `Vec<T>` with a size in bytes at least as large as `size_in_bytes`.
|
|
fn vec_with_size_in_bytes<T: Default>(size_in_bytes: usize) -> Vec<T> {
|
|
let rounded_size = (size_in_bytes + size_of::<T>() - 1) / size_of::<T>();
|
|
let mut v = Vec::with_capacity(rounded_size);
|
|
v.resize_with(rounded_size, T::default);
|
|
v
|
|
}
|
|
|
|
// The kvm API has many structs that resemble the following `Foo` structure:
|
|
//
|
|
// ```
|
|
// #[repr(C)]
|
|
// struct Foo {
|
|
// some_data: u32
|
|
// entries: __IncompleteArrayField<__u32>,
|
|
// }
|
|
// ```
|
|
//
|
|
// In order to allocate such a structure, `size_of::<Foo>()` would be too small because it would not
|
|
// include any space for `entries`. To make the allocation large enough while still being aligned
|
|
// for `Foo`, a `Vec<Foo>` is created. Only the first element of `Vec<Foo>` would actually be used
|
|
// as a `Foo`. The remaining memory in the `Vec<Foo>` is for `entries`, which must be contiguous
|
|
// with `Foo`. This function is used to make the `Vec<Foo>` with enough space for `count` entries.
|
|
use std::mem::size_of;
|
|
fn vec_with_array_field<T: Default, F>(count: usize) -> Vec<T> {
|
|
let element_space = count * size_of::<F>();
|
|
let vec_size_bytes = size_of::<T>() + element_space;
|
|
vec_with_size_in_bytes(vec_size_bytes)
|
|
}
|
|
|
|
///
|
|
/// Implementation of Vm trait for KVM
|
|
/// Example:
|
|
/// #[cfg(feature = "kvm")]
|
|
/// extern crate hypervisor
|
|
/// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap();
|
|
/// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
|
|
/// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
|
|
/// vm.set/get().unwrap()
|
|
///
|
|
impl vm::Vm for KvmVm {
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// Sets the address of the three-page region in the VM's address space.
|
|
///
|
|
fn set_tss_address(&self, offset: usize) -> vm::Result<()> {
|
|
self.fd
|
|
.set_tss_address(offset)
|
|
.map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into()))
|
|
}
|
|
///
|
|
/// Creates an in-kernel interrupt controller.
|
|
///
|
|
fn create_irq_chip(&self) -> vm::Result<()> {
|
|
self.fd
|
|
.create_irq_chip()
|
|
.map_err(|e| vm::HypervisorVmError::CreateIrq(e.into()))
|
|
}
|
|
///
|
|
/// Registers an event that will, when signaled, trigger the `gsi` IRQ.
|
|
///
|
|
fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
|
|
self.fd
|
|
.register_irqfd(fd, gsi)
|
|
.map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))
|
|
}
|
|
///
|
|
/// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
|
|
///
|
|
fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
|
|
self.fd
|
|
.unregister_irqfd(fd, gsi)
|
|
.map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))
|
|
}
|
|
///
|
|
/// Creates a VcpuFd object from a vcpu RawFd.
|
|
///
|
|
fn create_vcpu(
|
|
&self,
|
|
id: u8,
|
|
vmmops: Option<Arc<Box<dyn VmmOps>>>,
|
|
) -> vm::Result<Arc<dyn cpu::Vcpu>> {
|
|
let vc = self
|
|
.fd
|
|
.create_vcpu(id as u64)
|
|
.map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
|
|
let vcpu = KvmVcpu {
|
|
fd: vc,
|
|
#[cfg(target_arch = "x86_64")]
|
|
msrs: self.msrs.clone(),
|
|
vmmops,
|
|
#[cfg(target_arch = "x86_64")]
|
|
hyperv_synic: AtomicBool::new(false),
|
|
};
|
|
Ok(Arc::new(vcpu))
|
|
}
|
|
///
|
|
/// Registers an event to be signaled whenever a certain address is written to.
|
|
///
|
|
fn register_ioevent(
|
|
&self,
|
|
fd: &EventFd,
|
|
addr: &IoEventAddress,
|
|
datamatch: Option<vm::DataMatch>,
|
|
) -> vm::Result<()> {
|
|
if let Some(dm) = datamatch {
|
|
match dm {
|
|
vm::DataMatch::DataMatch32(kvm_dm32) => self
|
|
.fd
|
|
.register_ioevent(fd, addr, kvm_dm32)
|
|
.map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
|
|
vm::DataMatch::DataMatch64(kvm_dm64) => self
|
|
.fd
|
|
.register_ioevent(fd, addr, kvm_dm64)
|
|
.map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
|
|
}
|
|
} else {
|
|
self.fd
|
|
.register_ioevent(fd, addr, NoDatamatch)
|
|
.map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into()))
|
|
}
|
|
}
|
|
///
|
|
/// Unregisters an event from a certain address it has been previously registered to.
|
|
///
|
|
fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
|
|
self.fd
|
|
.unregister_ioevent(fd, addr, NoDatamatch)
|
|
.map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into()))
|
|
}
|
|
///
|
|
/// Sets the GSI routing table entries, overwriting any previously set
|
|
/// entries, as per the `KVM_SET_GSI_ROUTING` ioctl.
|
|
///
|
|
fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> {
|
|
let mut irq_routing =
|
|
vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len());
|
|
irq_routing[0].nr = entries.len() as u32;
|
|
irq_routing[0].flags = 0;
|
|
|
|
unsafe {
|
|
let entries_slice: &mut [kvm_irq_routing_entry] =
|
|
irq_routing[0].entries.as_mut_slice(entries.len());
|
|
entries_slice.copy_from_slice(&entries);
|
|
}
|
|
|
|
self.fd
|
|
.set_gsi_routing(&irq_routing[0])
|
|
.map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into()))
|
|
}
|
|
///
|
|
/// Creates a memory region structure that can be used with set_user_memory_region
|
|
///
|
|
fn make_user_memory_region(
|
|
&self,
|
|
slot: u32,
|
|
guest_phys_addr: u64,
|
|
memory_size: u64,
|
|
userspace_addr: u64,
|
|
readonly: bool,
|
|
log_dirty_pages: bool,
|
|
) -> MemoryRegion {
|
|
MemoryRegion {
|
|
slot,
|
|
guest_phys_addr,
|
|
memory_size,
|
|
userspace_addr,
|
|
flags: if readonly { KVM_MEM_READONLY } else { 0 }
|
|
| if log_dirty_pages {
|
|
KVM_MEM_LOG_DIRTY_PAGES
|
|
} else {
|
|
0
|
|
},
|
|
}
|
|
}
|
|
///
|
|
/// Creates/modifies a guest physical memory slot.
|
|
///
|
|
fn set_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> {
|
|
// Safe because guest regions are guaranteed not to overlap.
|
|
unsafe {
|
|
self.fd
|
|
.set_user_memory_region(user_memory_region)
|
|
.map_err(|e| vm::HypervisorVmError::SetUserMemory(e.into()))
|
|
}
|
|
}
|
|
///
|
|
/// Creates an emulated device in the kernel.
|
|
///
|
|
/// See the documentation for `KVM_CREATE_DEVICE`.
|
|
fn create_device(&self, device: &mut CreateDevice) -> vm::Result<Arc<dyn device::Device>> {
|
|
let fd = self
|
|
.fd
|
|
.create_device(device)
|
|
.map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?;
|
|
let device = KvmDevice { fd };
|
|
Ok(Arc::new(device))
|
|
}
|
|
///
|
|
/// Returns the preferred CPU target type which can be emulated by KVM on underlying host.
|
|
///
|
|
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
|
fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> {
|
|
self.fd
|
|
.get_preferred_target(kvi)
|
|
.map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into()))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
fn enable_split_irq(&self) -> vm::Result<()> {
|
|
// Set TSS
|
|
self.fd
|
|
.set_tss_address(KVM_TSS_ADDRESS.raw_value() as usize)
|
|
.map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?;
|
|
// Create split irqchip
|
|
// Only the local APIC is emulated in kernel, both PICs and IOAPIC
|
|
// are not.
|
|
let mut cap = kvm_enable_cap {
|
|
cap: KVM_CAP_SPLIT_IRQCHIP,
|
|
..Default::default()
|
|
};
|
|
cap.args[0] = NUM_IOAPIC_PINS as u64;
|
|
self.fd
|
|
.enable_cap(&cap)
|
|
.map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?;
|
|
Ok(())
|
|
}
|
|
/// Retrieve guest clock.
|
|
#[cfg(target_arch = "x86_64")]
|
|
fn get_clock(&self) -> vm::Result<ClockData> {
|
|
self.fd
|
|
.get_clock()
|
|
.map_err(|e| vm::HypervisorVmError::GetClock(e.into()))
|
|
}
|
|
/// Set guest clock.
|
|
#[cfg(target_arch = "x86_64")]
|
|
fn set_clock(&self, data: &ClockData) -> vm::Result<()> {
|
|
self.fd
|
|
.set_clock(data)
|
|
.map_err(|e| vm::HypervisorVmError::SetClock(e.into()))
|
|
}
|
|
/// Checks if a particular `Cap` is available.
|
|
fn check_extension(&self, c: Cap) -> bool {
|
|
self.fd.check_extension(c)
|
|
}
|
|
/// Create a device that is used for passthrough
|
|
fn create_passthrough_device(&self) -> vm::Result<Arc<dyn device::Device>> {
|
|
let mut vfio_dev = kvm_create_device {
|
|
type_: kvm_device_type_KVM_DEV_TYPE_VFIO,
|
|
fd: 0,
|
|
flags: 0,
|
|
};
|
|
|
|
self.create_device(&mut vfio_dev)
|
|
.map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into()))
|
|
}
|
|
///
|
|
/// Get the Vm state. Return VM specific data
|
|
///
|
|
fn state(&self) -> vm::Result<VmState> {
|
|
Ok(self.state)
|
|
}
|
|
///
|
|
/// Set the VM state
|
|
///
|
|
fn set_state(&self, _state: VmState) -> vm::Result<()> {
|
|
Ok(())
|
|
}
|
|
|
|
///
|
|
/// Get dirty pages bitmap (one bit per page)
|
|
///
|
|
fn get_dirty_log(&self, slot: u32, memory_size: u64) -> vm::Result<Vec<u64>> {
|
|
self.fd
|
|
.get_dirty_log(slot, memory_size as usize)
|
|
.map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into()))
|
|
}
|
|
|
|
///
|
|
/// Initialize TDX for this VM
|
|
///
|
|
#[cfg(feature = "tdx")]
|
|
fn tdx_init(&self, cpuid: &CpuId, max_vcpus: u32) -> vm::Result<()> {
|
|
#[repr(C)]
|
|
struct TdxInitVm {
|
|
max_vcpus: u32,
|
|
reserved: u32,
|
|
attributes: u64,
|
|
cpuid: u64,
|
|
};
|
|
let data = TdxInitVm {
|
|
max_vcpus,
|
|
reserved: 0,
|
|
attributes: 0,
|
|
cpuid: cpuid.as_fam_struct_ptr() as u64,
|
|
};
|
|
|
|
tdx_command(
|
|
&self.fd.as_raw_fd(),
|
|
TdxCommand::InitVm,
|
|
0,
|
|
&data as *const _ as u64,
|
|
)
|
|
.map_err(vm::HypervisorVmError::InitializeTdx)
|
|
}
|
|
|
|
///
|
|
/// Finalize the TDX setup for this VM
|
|
///
|
|
#[cfg(feature = "tdx")]
|
|
fn tdx_finalize(&self) -> vm::Result<()> {
|
|
tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0)
|
|
.map_err(vm::HypervisorVmError::FinalizeTdx)
|
|
}
|
|
|
|
///
|
|
/// Initialize memory regions for the TDX VM
|
|
///
|
|
#[cfg(feature = "tdx")]
|
|
fn tdx_init_memory_region(
|
|
&self,
|
|
host_address: u64,
|
|
guest_address: u64,
|
|
size: u64,
|
|
measure: bool,
|
|
) -> vm::Result<()> {
|
|
#[repr(C)]
|
|
struct TdxInitMemRegion {
|
|
host_address: u64,
|
|
guest_address: u64,
|
|
pages: u64,
|
|
};
|
|
let data = TdxInitMemRegion {
|
|
host_address,
|
|
guest_address,
|
|
pages: size / 4096,
|
|
};
|
|
|
|
tdx_command(
|
|
&self.fd.as_raw_fd(),
|
|
TdxCommand::InitMemRegion,
|
|
if measure { 1 } else { 0 },
|
|
&data as *const _ as u64,
|
|
)
|
|
.map_err(vm::HypervisorVmError::InitMemRegionTdx)
|
|
}
|
|
}
|
|
|
|
#[cfg(feature = "tdx")]
|
|
fn tdx_command(
|
|
fd: &RawFd,
|
|
command: TdxCommand,
|
|
metadata: u32,
|
|
data: u64,
|
|
) -> std::result::Result<(), std::io::Error> {
|
|
#[repr(C)]
|
|
struct TdxIoctlCmd {
|
|
command: TdxCommand,
|
|
metadata: u32,
|
|
data: u64,
|
|
};
|
|
let cmd = TdxIoctlCmd {
|
|
command,
|
|
metadata,
|
|
data,
|
|
};
|
|
let ret = unsafe {
|
|
ioctl_with_val(
|
|
fd,
|
|
KVM_MEMORY_ENCRYPT_OP(),
|
|
&cmd as *const TdxIoctlCmd as std::os::raw::c_ulong,
|
|
)
|
|
};
|
|
|
|
if ret < 0 {
|
|
return Err(std::io::Error::last_os_error());
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
/// Wrapper over KVM system ioctls.
|
|
pub struct KvmHypervisor {
|
|
kvm: Kvm,
|
|
}
|
|
/// Enum for KVM related error
|
|
#[derive(Debug)]
|
|
pub enum KvmError {
|
|
CapabilityMissing(Cap),
|
|
}
|
|
pub type KvmResult<T> = result::Result<T, KvmError>;
|
|
impl KvmHypervisor {
|
|
/// Create a hypervisor based on Kvm
|
|
pub fn new() -> hypervisor::Result<KvmHypervisor> {
|
|
let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?;
|
|
let api_version = kvm_obj.get_api_version();
|
|
|
|
if api_version != kvm_bindings::KVM_API_VERSION as i32 {
|
|
return Err(hypervisor::HypervisorError::IncompatibleApiVersion);
|
|
}
|
|
|
|
Ok(KvmHypervisor { kvm: kvm_obj })
|
|
}
|
|
}
|
|
/// Implementation of Hypervisor trait for KVM
|
|
/// Example:
|
|
/// #[cfg(feature = "kvm")]
|
|
/// extern crate hypervisor
|
|
/// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap();
|
|
/// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
|
|
/// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
|
|
///
|
|
impl hypervisor::Hypervisor for KvmHypervisor {
|
|
/// Create a KVM vm object and return the object as Vm trait object
|
|
/// Example
|
|
/// # extern crate hypervisor;
|
|
/// # use hypervisor::KvmHypervisor;
|
|
/// use hypervisor::KvmVm;
|
|
/// let hypervisor = KvmHypervisor::new().unwrap();
|
|
/// let vm = hypervisor.create_vm().unwrap()
|
|
///
|
|
fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
|
|
let fd: VmFd;
|
|
loop {
|
|
match self.kvm.create_vm() {
|
|
Ok(res) => fd = res,
|
|
Err(e) => {
|
|
if e.errno() == libc::EINTR {
|
|
// If the error returned is EINTR, which means the
|
|
// ioctl has been interrupted, we have to retry as
|
|
// this can't be considered as a regular error.
|
|
continue;
|
|
} else {
|
|
return Err(hypervisor::HypervisorError::VmCreate(e.into()));
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
let vm_fd = Arc::new(fd);
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
{
|
|
let msr_list = self.get_msr_list()?;
|
|
let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
|
|
let mut msrs = MsrEntries::new(num_msrs).unwrap();
|
|
let indices = msr_list.as_slice();
|
|
let msr_entries = msrs.as_mut_slice();
|
|
for (pos, index) in indices.iter().enumerate() {
|
|
msr_entries[pos].index = *index;
|
|
}
|
|
|
|
Ok(Arc::new(KvmVm {
|
|
fd: vm_fd,
|
|
msrs,
|
|
state: VmState {},
|
|
}))
|
|
}
|
|
|
|
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
|
{
|
|
Ok(Arc::new(KvmVm {
|
|
fd: vm_fd,
|
|
state: VmState {},
|
|
}))
|
|
}
|
|
}
|
|
|
|
fn check_required_extensions(&self) -> hypervisor::Result<()> {
|
|
check_required_kvm_extensions(&self.kvm).expect("Missing KVM capabilities");
|
|
Ok(())
|
|
}
|
|
|
|
///
|
|
/// Returns the size of the memory mapping required to use the vcpu's `kvm_run` structure.
|
|
///
|
|
fn get_vcpu_mmap_size(&self) -> hypervisor::Result<usize> {
|
|
self.kvm
|
|
.get_vcpu_mmap_size()
|
|
.map_err(|e| hypervisor::HypervisorError::GetVcpuMmap(e.into()))
|
|
}
|
|
///
|
|
/// Gets the recommended maximum number of VCPUs per VM.
|
|
///
|
|
fn get_max_vcpus(&self) -> hypervisor::Result<usize> {
|
|
Ok(self.kvm.get_max_vcpus())
|
|
}
|
|
///
|
|
/// Gets the recommended number of VCPUs per VM.
|
|
///
|
|
fn get_nr_vcpus(&self) -> hypervisor::Result<usize> {
|
|
Ok(self.kvm.get_nr_vcpus())
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// Checks if a particular `Cap` is available.
|
|
///
|
|
fn check_capability(&self, c: Cap) -> bool {
|
|
self.kvm.check_extension(c)
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// X86 specific call to get the system supported CPUID values.
|
|
///
|
|
fn get_cpuid(&self) -> hypervisor::Result<CpuId> {
|
|
self.kvm
|
|
.get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)
|
|
.map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into()))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// Retrieve the list of MSRs supported by KVM.
|
|
///
|
|
fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
|
|
self.kvm
|
|
.get_msr_index_list()
|
|
.map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
|
|
}
|
|
}
|
|
/// Vcpu struct for KVM
|
|
pub struct KvmVcpu {
|
|
fd: VcpuFd,
|
|
#[cfg(target_arch = "x86_64")]
|
|
msrs: MsrEntries,
|
|
vmmops: Option<Arc<Box<dyn vm::VmmOps>>>,
|
|
#[cfg(target_arch = "x86_64")]
|
|
hyperv_synic: AtomicBool,
|
|
}
|
|
/// Implementation of Vcpu trait for KVM
|
|
/// Example:
|
|
/// #[cfg(feature = "kvm")]
|
|
/// extern crate hypervisor
|
|
/// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap();
|
|
/// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
|
|
/// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
|
|
/// let vcpu = vm.create_vcpu(0, None).unwrap();
|
|
/// vcpu.get/set().unwrap()
|
|
///
|
|
impl cpu::Vcpu for KvmVcpu {
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// Returns the vCPU general purpose registers.
|
|
///
|
|
fn get_regs(&self) -> cpu::Result<StandardRegisters> {
|
|
self.fd
|
|
.get_regs()
|
|
.map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl.
|
|
///
|
|
fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> {
|
|
self.fd
|
|
.set_regs(regs)
|
|
.map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// Returns the vCPU special registers.
|
|
///
|
|
fn get_sregs(&self) -> cpu::Result<SpecialRegisters> {
|
|
self.fd
|
|
.get_sregs()
|
|
.map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl.
|
|
///
|
|
fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> {
|
|
self.fd
|
|
.set_sregs(sregs)
|
|
.map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// Returns the floating point state (FPU) from the vCPU.
|
|
///
|
|
fn get_fpu(&self) -> cpu::Result<FpuState> {
|
|
self.fd
|
|
.get_fpu()
|
|
.map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioct.
|
|
///
|
|
fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
|
|
self.fd
|
|
.set_fpu(fpu)
|
|
.map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// X86 specific call to setup the CPUID registers.
|
|
///
|
|
fn set_cpuid2(&self, cpuid: &CpuId) -> cpu::Result<()> {
|
|
self.fd
|
|
.set_cpuid2(cpuid)
|
|
.map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into()))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// X86 specific call to enable HyperV SynIC
|
|
///
|
|
fn enable_hyperv_synic(&self) -> cpu::Result<()> {
|
|
// Update the information about Hyper-V SynIC being enabled and
|
|
// emulated as it will influence later which MSRs should be saved.
|
|
self.hyperv_synic.store(true, Ordering::Release);
|
|
|
|
let cap = kvm_enable_cap {
|
|
cap: KVM_CAP_HYPERV_SYNIC,
|
|
..Default::default()
|
|
};
|
|
self.fd
|
|
.enable_cap(&cap)
|
|
.map_err(|e| cpu::HypervisorCpuError::EnableHyperVSynIC(e.into()))
|
|
}
|
|
///
|
|
/// X86 specific call to retrieve the CPUID registers.
|
|
///
|
|
#[cfg(target_arch = "x86_64")]
|
|
fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<CpuId> {
|
|
self.fd
|
|
.get_cpuid2(num_entries)
|
|
.map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into()))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
|
|
///
|
|
fn get_lapic(&self) -> cpu::Result<LapicState> {
|
|
self.fd
|
|
.get_lapic()
|
|
.map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
|
|
///
|
|
fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> {
|
|
self.fd
|
|
.set_lapic(klapic)
|
|
.map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// Returns the model-specific registers (MSR) for this vCPU.
|
|
///
|
|
fn get_msrs(&self, msrs: &mut MsrEntries) -> cpu::Result<usize> {
|
|
self.fd
|
|
.get_msrs(msrs)
|
|
.map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// Setup the model-specific registers (MSR) for this vCPU.
|
|
/// Returns the number of MSR entries actually written.
|
|
///
|
|
fn set_msrs(&self, msrs: &MsrEntries) -> cpu::Result<usize> {
|
|
self.fd
|
|
.set_msrs(msrs)
|
|
.map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
|
|
}
|
|
///
|
|
/// Returns the vcpu's current "multiprocessing state".
|
|
///
|
|
fn get_mp_state(&self) -> cpu::Result<MpState> {
|
|
self.fd
|
|
.get_mp_state()
|
|
.map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into()))
|
|
}
|
|
///
|
|
/// Sets the vcpu's current "multiprocessing state".
|
|
///
|
|
fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> {
|
|
self.fd
|
|
.set_mp_state(mp_state)
|
|
.map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into()))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// X86 specific call that returns the vcpu's current "xsave struct".
|
|
///
|
|
fn get_xsave(&self) -> cpu::Result<Xsave> {
|
|
self.fd
|
|
.get_xsave()
|
|
.map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// X86 specific call that sets the vcpu's current "xsave struct".
|
|
///
|
|
fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> {
|
|
self.fd
|
|
.set_xsave(xsave)
|
|
.map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into()))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// X86 specific call that returns the vcpu's current "xcrs".
|
|
///
|
|
fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
|
|
self.fd
|
|
.get_xcrs()
|
|
.map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// X86 specific call that sets the vcpu's current "xcrs".
|
|
///
|
|
fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
|
|
self.fd
|
|
.set_xcrs(&xcrs)
|
|
.map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
|
|
}
|
|
///
|
|
/// Triggers the running of the current virtual CPU returning an exit reason.
|
|
///
|
|
fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
|
|
match self.fd.run() {
|
|
Ok(run) => match run {
|
|
#[cfg(target_arch = "x86_64")]
|
|
VcpuExit::IoIn(addr, data) => {
|
|
if let Some(vmmops) = &self.vmmops {
|
|
return vmmops
|
|
.pio_read(addr.into(), data)
|
|
.map(|_| cpu::VmExit::Ignore)
|
|
.map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
|
|
}
|
|
|
|
Ok(cpu::VmExit::IoIn(addr, data))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
VcpuExit::IoOut(addr, data) => {
|
|
if let Some(vmmops) = &self.vmmops {
|
|
return vmmops
|
|
.pio_write(addr.into(), data)
|
|
.map(|_| cpu::VmExit::Ignore)
|
|
.map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
|
|
}
|
|
|
|
Ok(cpu::VmExit::IoOut(addr, data))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)),
|
|
#[cfg(target_arch = "x86_64")]
|
|
VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset),
|
|
|
|
#[cfg(target_arch = "aarch64")]
|
|
VcpuExit::SystemEvent(event_type, flags) => {
|
|
use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN};
|
|
// On Aarch64, when the VM is shutdown, run() returns
|
|
// VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN
|
|
if event_type == KVM_SYSTEM_EVENT_RESET {
|
|
Ok(cpu::VmExit::Reset)
|
|
} else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN {
|
|
Ok(cpu::VmExit::Shutdown)
|
|
} else {
|
|
Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
|
|
"Unexpected system event with type 0x{:x}, flags 0x{:x}",
|
|
event_type,
|
|
flags
|
|
)))
|
|
}
|
|
}
|
|
|
|
VcpuExit::MmioRead(addr, data) => {
|
|
if let Some(vmmops) = &self.vmmops {
|
|
return vmmops
|
|
.mmio_read(addr, data)
|
|
.map(|_| cpu::VmExit::Ignore)
|
|
.map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
|
|
}
|
|
|
|
Ok(cpu::VmExit::MmioRead(addr, data))
|
|
}
|
|
VcpuExit::MmioWrite(addr, data) => {
|
|
if let Some(vmmops) = &self.vmmops {
|
|
return vmmops
|
|
.mmio_write(addr, data)
|
|
.map(|_| cpu::VmExit::Ignore)
|
|
.map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
|
|
}
|
|
|
|
Ok(cpu::VmExit::MmioWrite(addr, data))
|
|
}
|
|
VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv),
|
|
|
|
r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
|
|
"Unexpected exit reason on vcpu run: {:?}",
|
|
r
|
|
))),
|
|
},
|
|
|
|
Err(ref e) => match e.errno() {
|
|
libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
|
|
_ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
|
|
"VCPU error {:?}",
|
|
e
|
|
))),
|
|
},
|
|
}
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// Returns currently pending exceptions, interrupts, and NMIs as well as related
|
|
/// states of the vcpu.
|
|
///
|
|
fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
|
|
self.fd
|
|
.get_vcpu_events()
|
|
.map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// Sets pending exceptions, interrupts, and NMIs as well as related states
|
|
/// of the vcpu.
|
|
///
|
|
fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
|
|
self.fd
|
|
.set_vcpu_events(events)
|
|
.map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// Let the guest know that it has been paused, which prevents from
|
|
/// potential soft lockups when being resumed.
|
|
///
|
|
fn notify_guest_clock_paused(&self) -> cpu::Result<()> {
|
|
self.fd
|
|
.kvmclock_ctrl()
|
|
.map_err(|e| cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into()))
|
|
}
|
|
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
|
fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> {
|
|
self.fd
|
|
.vcpu_init(kvi)
|
|
.map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into()))
|
|
}
|
|
///
|
|
/// Sets the value of one register for this vCPU.
|
|
///
|
|
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
|
fn set_reg(&self, reg_id: u64, data: u64) -> cpu::Result<()> {
|
|
self.fd
|
|
.set_one_reg(reg_id, data)
|
|
.map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))
|
|
}
|
|
///
|
|
/// Gets the value of one register for this vCPU.
|
|
///
|
|
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
|
fn get_reg(&self, reg_id: u64) -> cpu::Result<u64> {
|
|
self.fd
|
|
.get_one_reg(reg_id)
|
|
.map_err(|e| cpu::HypervisorCpuError::GetRegister(e.into()))
|
|
}
|
|
///
|
|
/// Gets a list of the guest registers that are supported for the
|
|
/// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
|
|
///
|
|
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
|
fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> {
|
|
self.fd
|
|
.get_reg_list(reg_list)
|
|
.map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))
|
|
}
|
|
///
|
|
/// Save the state of the core registers.
|
|
///
|
|
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
|
fn core_registers(&self, state: &mut StandardRegisters) -> cpu::Result<()> {
|
|
let mut off = offset__of!(user_pt_regs, regs);
|
|
// There are 31 user_pt_regs:
|
|
// https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72
|
|
// These actually are the general-purpose registers of the Armv8-a
|
|
// architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register).
|
|
for i in 0..31 {
|
|
state.regs.regs[i] = self
|
|
.fd
|
|
.get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
|
|
.map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
|
|
off += std::mem::size_of::<u64>();
|
|
}
|
|
|
|
// We are now entering the "Other register" section of the ARMv8-a architecture.
|
|
// First one, stack pointer.
|
|
let off = offset__of!(user_pt_regs, sp);
|
|
state.regs.sp = self
|
|
.fd
|
|
.get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
|
|
.map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
|
|
|
|
// Second one, the program counter.
|
|
let off = offset__of!(user_pt_regs, pc);
|
|
state.regs.pc = self
|
|
.fd
|
|
.get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
|
|
.map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
|
|
|
|
// Next is the processor state.
|
|
let off = offset__of!(user_pt_regs, pstate);
|
|
state.regs.pstate = self
|
|
.fd
|
|
.get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
|
|
.map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
|
|
|
|
// The stack pointer associated with EL1
|
|
let off = offset__of!(kvm_regs, sp_el1);
|
|
state.sp_el1 = self
|
|
.fd
|
|
.get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
|
|
.map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
|
|
|
|
// Exception Link Register for EL1, when taking an exception to EL1, this register
|
|
// holds the address to which to return afterwards.
|
|
let off = offset__of!(kvm_regs, elr_el1);
|
|
state.elr_el1 = self
|
|
.fd
|
|
.get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
|
|
.map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
|
|
|
|
// Saved Program Status Registers, there are 5 of them used in the kernel.
|
|
let mut off = offset__of!(kvm_regs, spsr);
|
|
for i in 0..KVM_NR_SPSR as usize {
|
|
state.spsr[i] = self
|
|
.fd
|
|
.get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
|
|
.map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
|
|
off += std::mem::size_of::<u64>();
|
|
}
|
|
|
|
// Now moving on to floting point registers which are stored in the user_fpsimd_state in the kernel:
|
|
// https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53
|
|
let mut off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, vregs);
|
|
for i in 0..32 {
|
|
state.fp_regs.vregs[i][0] = self
|
|
.fd
|
|
.get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off))
|
|
.map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
|
|
off += mem::size_of::<u128>();
|
|
}
|
|
|
|
// Floating-point Status Register
|
|
let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpsr);
|
|
state.fp_regs.fpsr = self
|
|
.fd
|
|
.get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off))
|
|
.map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?
|
|
as u32;
|
|
|
|
// Floating-point Control Register
|
|
let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpcr);
|
|
state.fp_regs.fpcr = self
|
|
.fd
|
|
.get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off))
|
|
.map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?
|
|
as u32;
|
|
Ok(())
|
|
}
|
|
///
|
|
/// Restore the state of the core registers.
|
|
///
|
|
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
|
fn set_core_registers(&self, state: &StandardRegisters) -> cpu::Result<()> {
|
|
// The function follows the exact identical order from `state`. Look there
|
|
// for some additional info on registers.
|
|
let mut off = offset__of!(user_pt_regs, regs);
|
|
for i in 0..31 {
|
|
self.fd
|
|
.set_one_reg(
|
|
arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
|
|
state.regs.regs[i],
|
|
)
|
|
.map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
|
|
off += std::mem::size_of::<u64>();
|
|
}
|
|
|
|
let off = offset__of!(user_pt_regs, sp);
|
|
self.fd
|
|
.set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.sp)
|
|
.map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
|
|
|
|
let off = offset__of!(user_pt_regs, pc);
|
|
self.fd
|
|
.set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.pc)
|
|
.map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
|
|
|
|
let off = offset__of!(user_pt_regs, pstate);
|
|
self.fd
|
|
.set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.pstate)
|
|
.map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
|
|
|
|
let off = offset__of!(kvm_regs, sp_el1);
|
|
self.fd
|
|
.set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.sp_el1)
|
|
.map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
|
|
|
|
let off = offset__of!(kvm_regs, elr_el1);
|
|
self.fd
|
|
.set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.elr_el1)
|
|
.map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
|
|
|
|
let mut off = offset__of!(kvm_regs, spsr);
|
|
for i in 0..KVM_NR_SPSR as usize {
|
|
self.fd
|
|
.set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.spsr[i])
|
|
.map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
|
|
off += std::mem::size_of::<u64>();
|
|
}
|
|
|
|
let mut off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, vregs);
|
|
for i in 0..32 {
|
|
self.fd
|
|
.set_one_reg(
|
|
arm64_core_reg_id!(KVM_REG_SIZE_U128, off),
|
|
state.fp_regs.vregs[i][0],
|
|
)
|
|
.map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
|
|
off += mem::size_of::<u128>();
|
|
}
|
|
|
|
let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpsr);
|
|
self.fd
|
|
.set_one_reg(
|
|
arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
|
|
state.fp_regs.fpsr as u64,
|
|
)
|
|
.map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
|
|
|
|
let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpcr);
|
|
self.fd
|
|
.set_one_reg(
|
|
arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
|
|
state.fp_regs.fpcr as u64,
|
|
)
|
|
.map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
|
|
Ok(())
|
|
}
|
|
///
|
|
/// Save the state of the system registers.
|
|
///
|
|
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
|
fn system_registers(&self, state: &mut Vec<Register>) -> cpu::Result<()> {
|
|
// Call KVM_GET_REG_LIST to get all registers available to the guest. For ArmV8 there are
|
|
// around 500 registers.
|
|
let mut reg_list = RegList::new(500).unwrap();
|
|
self.fd
|
|
.get_reg_list(&mut reg_list)
|
|
.map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?;
|
|
|
|
// At this point reg_list should contain: core registers and system registers.
|
|
// The register list contains the number of registers and their ids. We will be needing to
|
|
// call KVM_GET_ONE_REG on each id in order to save all of them. We carve out from the list
|
|
// the core registers which are represented in the kernel by kvm_regs structure and for which
|
|
// we can calculate the id based on the offset in the structure.
|
|
|
|
reg_list.retain(|regid| *regid != 0);
|
|
reg_list.as_slice().to_vec().sort_unstable();
|
|
|
|
reg_list.retain(|regid| is_system_register(*regid));
|
|
|
|
// Now, for the rest of the registers left in the previously fetched register list, we are
|
|
// simply calling KVM_GET_ONE_REG.
|
|
let indices = reg_list.as_slice();
|
|
for (_pos, index) in indices.iter().enumerate() {
|
|
if _pos > 230 {
|
|
break;
|
|
}
|
|
state.push(kvm_bindings::kvm_one_reg {
|
|
id: *index,
|
|
addr: self
|
|
.fd
|
|
.get_one_reg(*index)
|
|
.map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?,
|
|
});
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
///
|
|
/// Restore the state of the system registers.
|
|
///
|
|
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
|
fn set_system_registers(&self, state: &[Register]) -> cpu::Result<()> {
|
|
for reg in state {
|
|
self.fd
|
|
.set_one_reg(reg.id, reg.addr)
|
|
.map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?;
|
|
}
|
|
Ok(())
|
|
}
|
|
///
|
|
/// Read the MPIDR - Multiprocessor Affinity Register.
|
|
///
|
|
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
|
fn read_mpidr(&self) -> cpu::Result<u64> {
|
|
self.fd
|
|
.get_one_reg(MPIDR_EL1)
|
|
.map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// Get the current CPU state
|
|
///
|
|
/// Ordering requirements:
|
|
///
|
|
/// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify
|
|
/// vCPU/LAPIC state. As such, it must be done before most everything
|
|
/// else, otherwise we cannot restore everything and expect it to work.
|
|
///
|
|
/// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
|
|
/// still running.
|
|
///
|
|
/// KVM_GET_LAPIC may change state of LAPIC before returning it.
|
|
///
|
|
/// GET_VCPU_EVENTS should probably be last to save. The code looks as
|
|
/// it might as well be affected by internal state modifications of the
|
|
/// GET ioctls.
|
|
///
|
|
/// SREGS saves/restores a pending interrupt, similar to what
|
|
/// VCPU_EVENTS also does.
|
|
///
|
|
/// GET_MSRS requires a pre-populated data structure to do something
|
|
/// meaningful. For SET_MSRS it will then contain good data.
|
|
///
|
|
/// # Example
|
|
///
|
|
/// ```rust
|
|
/// # extern crate hypervisor;
|
|
/// # use hypervisor::KvmHypervisor;
|
|
/// # use std::sync::Arc;
|
|
/// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap();
|
|
/// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
|
|
/// let vm = hv.create_vm().expect("new VM fd creation failed");
|
|
/// vm.enable_split_irq().unwrap();
|
|
/// let vcpu = vm.create_vcpu(0, None).unwrap();
|
|
/// let state = vcpu.state().unwrap();
|
|
/// ```
|
|
fn state(&self) -> cpu::Result<CpuState> {
|
|
let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?;
|
|
let mp_state = self.get_mp_state()?;
|
|
let regs = self.get_regs()?;
|
|
let sregs = self.get_sregs()?;
|
|
let xsave = self.get_xsave()?;
|
|
let xcrs = self.get_xcrs()?;
|
|
let lapic_state = self.get_lapic()?;
|
|
let fpu = self.get_fpu()?;
|
|
|
|
// Try to get all MSRs based on the list previously retrieved from KVM.
|
|
// If the number of MSRs obtained from GET_MSRS is different from the
|
|
// expected amount, we fallback onto a slower method by getting MSRs
|
|
// by chunks. This is the only way to make sure we try to get as many
|
|
// MSRs as possible, even if some MSRs are not supported.
|
|
let mut msr_entries = self.msrs.clone();
|
|
|
|
// Save extra MSRs if the Hyper-V synthetic interrupt controller is
|
|
// emulated.
|
|
if self.hyperv_synic.load(Ordering::Acquire) {
|
|
let hyperv_synic_msrs = vec![
|
|
0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084,
|
|
0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096,
|
|
0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d,
|
|
0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4, 0x400000b5,
|
|
0x400000b6, 0x400000b7,
|
|
];
|
|
for index in hyperv_synic_msrs {
|
|
let msr = kvm_msr_entry {
|
|
index,
|
|
..Default::default()
|
|
};
|
|
msr_entries.push(msr).unwrap();
|
|
}
|
|
}
|
|
|
|
let expected_num_msrs = msr_entries.as_fam_struct_ref().nmsrs as usize;
|
|
let num_msrs = self.get_msrs(&mut msr_entries)?;
|
|
let msrs = if num_msrs != expected_num_msrs {
|
|
let mut faulty_msr_index = num_msrs;
|
|
let mut msr_entries_tmp =
|
|
MsrEntries::from_entries(&msr_entries.as_slice()[..faulty_msr_index]).unwrap();
|
|
|
|
loop {
|
|
warn!(
|
|
"Detected faulty MSR 0x{:x} while getting MSRs",
|
|
msr_entries.as_slice()[faulty_msr_index].index
|
|
);
|
|
|
|
let start_pos = faulty_msr_index + 1;
|
|
let mut sub_msr_entries =
|
|
MsrEntries::from_entries(&msr_entries.as_slice()[start_pos..]).unwrap();
|
|
let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize;
|
|
let num_msrs = self.get_msrs(&mut sub_msr_entries)?;
|
|
|
|
for i in 0..num_msrs {
|
|
msr_entries_tmp
|
|
.push(sub_msr_entries.as_slice()[i])
|
|
.map_err(|e| {
|
|
cpu::HypervisorCpuError::GetMsrEntries(anyhow!(
|
|
"Failed adding MSR entries: {:?}",
|
|
e
|
|
))
|
|
})?;
|
|
}
|
|
|
|
if num_msrs == expected_num_msrs {
|
|
break;
|
|
}
|
|
|
|
faulty_msr_index = start_pos + num_msrs;
|
|
}
|
|
|
|
msr_entries_tmp
|
|
} else {
|
|
msr_entries
|
|
};
|
|
|
|
let vcpu_events = self.get_vcpu_events()?;
|
|
|
|
Ok(CpuState {
|
|
cpuid,
|
|
msrs,
|
|
vcpu_events,
|
|
regs,
|
|
sregs,
|
|
fpu,
|
|
lapic_state,
|
|
xsave,
|
|
xcrs,
|
|
mp_state,
|
|
})
|
|
}
|
|
///
|
|
/// Get the current AArch64 CPU state
|
|
///
|
|
#[cfg(target_arch = "aarch64")]
|
|
fn state(&self) -> cpu::Result<CpuState> {
|
|
let mut state = CpuState {
|
|
mp_state: self.get_mp_state()?,
|
|
mpidr: self.read_mpidr()?,
|
|
..Default::default()
|
|
};
|
|
self.core_registers(&mut state.core_regs)?;
|
|
self.system_registers(&mut state.sys_regs)?;
|
|
|
|
Ok(state)
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
///
|
|
/// Restore the previously saved CPU state
|
|
///
|
|
/// Ordering requirements:
|
|
///
|
|
/// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
|
|
/// still running.
|
|
///
|
|
/// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so
|
|
/// if we ever change the BSP, we have to do that before restoring anything.
|
|
/// The same seems to be true for CPUID stuff.
|
|
///
|
|
/// SREGS saves/restores a pending interrupt, similar to what
|
|
/// VCPU_EVENTS also does.
|
|
///
|
|
/// SET_REGS clears pending exceptions unconditionally, thus, it must be
|
|
/// done before SET_VCPU_EVENTS, which restores it.
|
|
///
|
|
/// SET_LAPIC must come after SET_SREGS, because the latter restores
|
|
/// the apic base msr.
|
|
///
|
|
/// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR
|
|
/// only restores successfully, when the LAPIC is correctly configured.
|
|
///
|
|
/// Arguments: CpuState
|
|
/// # Example
|
|
///
|
|
/// ```rust
|
|
/// # extern crate hypervisor;
|
|
/// # use hypervisor::KvmHypervisor;
|
|
/// # use std::sync::Arc;
|
|
/// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap();
|
|
/// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
|
|
/// let vm = hv.create_vm().expect("new VM fd creation failed");
|
|
/// vm.enable_split_irq().unwrap();
|
|
/// let vcpu = vm.create_vcpu(0, None).unwrap();
|
|
/// let state = vcpu.state().unwrap();
|
|
/// vcpu.set_state(&state).unwrap();
|
|
/// ```
|
|
fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
|
|
self.set_cpuid2(&state.cpuid)?;
|
|
self.set_mp_state(state.mp_state)?;
|
|
self.set_regs(&state.regs)?;
|
|
self.set_sregs(&state.sregs)?;
|
|
self.set_xsave(&state.xsave)?;
|
|
self.set_xcrs(&state.xcrs)?;
|
|
self.set_lapic(&state.lapic_state)?;
|
|
self.set_fpu(&state.fpu)?;
|
|
|
|
// Try to set all MSRs previously stored.
|
|
// If the number of MSRs set from SET_MSRS is different from the
|
|
// expected amount, we fallback onto a slower method by setting MSRs
|
|
// by chunks. This is the only way to make sure we try to set as many
|
|
// MSRs as possible, even if some MSRs are not supported.
|
|
let expected_num_msrs = state.msrs.as_fam_struct_ref().nmsrs as usize;
|
|
let num_msrs = self.set_msrs(&state.msrs)?;
|
|
if num_msrs != expected_num_msrs {
|
|
let mut faulty_msr_index = num_msrs;
|
|
|
|
loop {
|
|
warn!(
|
|
"Detected faulty MSR 0x{:x} while setting MSRs",
|
|
state.msrs.as_slice()[faulty_msr_index].index
|
|
);
|
|
|
|
let start_pos = faulty_msr_index + 1;
|
|
let sub_msr_entries =
|
|
MsrEntries::from_entries(&state.msrs.as_slice()[start_pos..]).unwrap();
|
|
let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize;
|
|
let num_msrs = self.set_msrs(&sub_msr_entries)?;
|
|
|
|
if num_msrs == expected_num_msrs {
|
|
break;
|
|
}
|
|
|
|
faulty_msr_index = start_pos + num_msrs;
|
|
}
|
|
}
|
|
|
|
self.set_vcpu_events(&state.vcpu_events)?;
|
|
|
|
Ok(())
|
|
}
|
|
///
|
|
/// Restore the previously saved AArch64 CPU state
|
|
///
|
|
#[cfg(target_arch = "aarch64")]
|
|
fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
|
|
self.set_core_registers(&state.core_regs)?;
|
|
self.set_system_registers(&state.sys_regs)?;
|
|
self.set_mp_state(state.mp_state)?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
///
|
|
/// Initialize TDX for this CPU
|
|
///
|
|
#[cfg(feature = "tdx")]
|
|
fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> {
|
|
tdx_command(&self.fd.as_raw_fd(), TdxCommand::InitVcpu, 0, hob_address)
|
|
.map_err(cpu::HypervisorCpuError::InitializeTdx)
|
|
}
|
|
}
|
|
|
|
/// Device struct for KVM
|
|
pub struct KvmDevice {
|
|
fd: DeviceFd,
|
|
}
|
|
|
|
impl device::Device for KvmDevice {
|
|
///
|
|
/// Set device attribute
|
|
///
|
|
fn set_device_attr(&self, attr: &DeviceAttr) -> device::Result<()> {
|
|
self.fd
|
|
.set_device_attr(attr)
|
|
.map_err(|e| device::HypervisorDeviceError::SetDeviceAttribute(e.into()))
|
|
}
|
|
///
|
|
/// Get device attribute
|
|
///
|
|
fn get_device_attr(&self, attr: &mut DeviceAttr) -> device::Result<()> {
|
|
self.fd
|
|
.get_device_attr(attr)
|
|
.map_err(|e| device::HypervisorDeviceError::GetDeviceAttribute(e.into()))
|
|
}
|
|
}
|
|
|
|
impl AsRawFd for KvmDevice {
|
|
fn as_raw_fd(&self) -> RawFd {
|
|
self.fd.as_raw_fd()
|
|
}
|
|
}
|