// Copyright © 2019 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause // // Copyright © 2020, Microsoft Corporation // // Copyright 2018-2019 CrowdStrike, Inc. // // #[cfg(target_arch = "aarch64")] pub use crate::aarch64::{ check_required_kvm_extensions, is_system_register, VcpuInit, VcpuKvmState as CpuState, MPIDR_EL1, }; use crate::cpu; use crate::device; use crate::hypervisor; use crate::vec_with_array_field; use crate::vm::{self, VmmOps}; #[cfg(target_arch = "aarch64")] use crate::{arm64_core_reg_id, offset__of}; use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd}; use serde_derive::{Deserialize, Serialize}; use std::collections::HashMap; #[cfg(target_arch = "aarch64")] use std::convert::TryInto; #[cfg(target_arch = "x86_64")] use std::fs::File; use std::os::unix::io::{AsRawFd, RawFd}; use std::result; #[cfg(target_arch = "x86_64")] use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, RwLock}; #[cfg(target_arch = "x86_64")] use vm_memory::Address; use vmm_sys_util::eventfd::EventFd; // x86_64 dependencies #[cfg(target_arch = "x86_64")] pub mod x86_64; #[cfg(target_arch = "x86_64")] use crate::arch::x86::NUM_IOAPIC_PINS; #[cfg(target_arch = "aarch64")] use aarch64::{RegList, Register, StandardRegisters}; #[cfg(target_arch = "x86_64")] use kvm_bindings::{ kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP, }; #[cfg(target_arch = "x86_64")] use x86_64::{ check_required_kvm_extensions, FpuState, SpecialRegisters, StandardRegisters, KVM_TSS_ADDRESS, }; #[cfg(target_arch = "x86_64")] pub use x86_64::{ CpuId, CpuIdEntry, ExtendedControlRegisters, LapicState, MsrEntries, VcpuKvmState as CpuState, Xsave, CPUID_FLAG_VALID_INDEX, }; // aarch64 dependencies #[cfg(target_arch = "aarch64")] pub mod aarch64; pub use kvm_bindings; #[cfg(feature = "tdx")] use kvm_bindings::KVMIO; pub use kvm_bindings::{ kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_irq_routing, kvm_irq_routing_entry, kvm_userspace_memory_region, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID, }; #[cfg(target_arch = "aarch64")] use kvm_bindings::{ kvm_regs, user_fpsimd_state, user_pt_regs, KVM_NR_SPSR, KVM_REG_ARM64, KVM_REG_ARM_CORE, KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, }; pub use kvm_ioctls; pub use kvm_ioctls::{Cap, Kvm}; #[cfg(target_arch = "aarch64")] use std::mem; use thiserror::Error; #[cfg(feature = "tdx")] use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_expr, ioctl_ioc_nr, ioctl_iowr_nr}; /// /// Export generically-named wrappers of kvm-bindings for Unix-based platforms /// pub use { kvm_bindings::kvm_clock_data as ClockData, kvm_bindings::kvm_create_device as CreateDevice, kvm_bindings::kvm_device_attr as DeviceAttr, kvm_bindings::kvm_irq_routing_entry as IrqRoutingEntry, kvm_bindings::kvm_mp_state as MpState, kvm_bindings::kvm_userspace_memory_region as MemoryRegion, kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_ioctls::DeviceFd, kvm_ioctls::IoEventAddress, kvm_ioctls::VcpuExit, }; #[cfg(target_arch = "x86_64")] const KVM_CAP_SGX_ATTRIBUTE: u32 = 196; #[cfg(feature = "tdx")] ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong); #[cfg(feature = "tdx")] #[repr(u32)] enum TdxCommand { #[allow(dead_code)] Capabilities = 0, InitVm, InitVcpu, InitMemRegion, Finalize, } #[derive(Clone, Copy, Debug, PartialEq, Deserialize, Serialize)] pub struct KvmVmState {} pub use KvmVmState as VmState; struct KvmDirtyLogSlot { slot: u32, guest_phys_addr: u64, memory_size: u64, userspace_addr: u64, } /// Wrapper over KVM VM ioctls. pub struct KvmVm { fd: Arc, #[cfg(target_arch = "x86_64")] msrs: MsrEntries, state: KvmVmState, dirty_log_slots: Arc>>, } /// /// Implementation of Vm trait for KVM /// Example: /// #[cfg(feature = "kvm")] /// extern crate hypervisor /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); /// let hypervisor: Arc = Arc::new(kvm); /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); /// vm.set/get().unwrap() /// impl vm::Vm for KvmVm { #[cfg(target_arch = "x86_64")] /// /// Sets the address of the three-page region in the VM's address space. /// fn set_tss_address(&self, offset: usize) -> vm::Result<()> { self.fd .set_tss_address(offset) .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into())) } /// /// Creates an in-kernel interrupt controller. /// fn create_irq_chip(&self) -> vm::Result<()> { self.fd .create_irq_chip() .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into())) } /// /// Registers an event that will, when signaled, trigger the `gsi` IRQ. /// fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { self.fd .register_irqfd(fd, gsi) .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into())) } /// /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. /// fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { self.fd .unregister_irqfd(fd, gsi) .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into())) } /// /// Creates a VcpuFd object from a vcpu RawFd. /// fn create_vcpu( &self, id: u8, vmmops: Option>, ) -> vm::Result> { let vc = self .fd .create_vcpu(id as u64) .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; let vcpu = KvmVcpu { fd: vc, #[cfg(target_arch = "x86_64")] msrs: self.msrs.clone(), vmmops, #[cfg(target_arch = "x86_64")] hyperv_synic: AtomicBool::new(false), }; Ok(Arc::new(vcpu)) } /// /// Registers an event to be signaled whenever a certain address is written to. /// fn register_ioevent( &self, fd: &EventFd, addr: &IoEventAddress, datamatch: Option, ) -> vm::Result<()> { if let Some(dm) = datamatch { match dm { vm::DataMatch::DataMatch32(kvm_dm32) => self .fd .register_ioevent(fd, addr, kvm_dm32) .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), vm::DataMatch::DataMatch64(kvm_dm64) => self .fd .register_ioevent(fd, addr, kvm_dm64) .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), } } else { self.fd .register_ioevent(fd, addr, NoDatamatch) .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) } } /// /// Unregisters an event from a certain address it has been previously registered to. /// fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { self.fd .unregister_ioevent(fd, addr, NoDatamatch) .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) } /// /// Sets the GSI routing table entries, overwriting any previously set /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl. /// fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { let mut irq_routing = vec_with_array_field::(entries.len()); irq_routing[0].nr = entries.len() as u32; irq_routing[0].flags = 0; unsafe { let entries_slice: &mut [kvm_irq_routing_entry] = irq_routing[0].entries.as_mut_slice(entries.len()); entries_slice.copy_from_slice(entries); } self.fd .set_gsi_routing(&irq_routing[0]) .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) } /// /// Creates a memory region structure that can be used with {create/remove}_user_memory_region /// fn make_user_memory_region( &self, slot: u32, guest_phys_addr: u64, memory_size: u64, userspace_addr: u64, readonly: bool, log_dirty_pages: bool, ) -> MemoryRegion { MemoryRegion { slot, guest_phys_addr, memory_size, userspace_addr, flags: if readonly { KVM_MEM_READONLY } else { 0 } | if log_dirty_pages { KVM_MEM_LOG_DIRTY_PAGES } else { 0 }, } } /// /// Creates a guest physical memory region. /// fn create_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> { let mut region = user_memory_region; if (region.flags & KVM_MEM_LOG_DIRTY_PAGES) != 0 { if (region.flags & KVM_MEM_READONLY) != 0 { return Err(vm::HypervisorVmError::CreateUserMemory(anyhow!( "Error creating regions with both 'dirty-pages-log' and 'read-only'." ))); } // Keep track of the regions that need dirty pages log self.dirty_log_slots.write().unwrap().insert( region.slot, KvmDirtyLogSlot { slot: region.slot, guest_phys_addr: region.guest_phys_addr, memory_size: region.memory_size, userspace_addr: region.userspace_addr, }, ); // Always create guest physical memory region without `KVM_MEM_LOG_DIRTY_PAGES`. // For regions that need this flag, dirty pages log will be turned on in `start_dirty_log`. region.flags = 0; } // Safe because guest regions are guaranteed not to overlap. unsafe { self.fd .set_user_memory_region(region) .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into())) } } /// /// Removes a guest physical memory region. /// fn remove_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> { let mut region = user_memory_region; // Remove the corresponding entry from "self.dirty_log_slots" if needed self.dirty_log_slots.write().unwrap().remove(®ion.slot); // Setting the size to 0 means "remove" region.memory_size = 0; // Safe because guest regions are guaranteed not to overlap. unsafe { self.fd .set_user_memory_region(region) .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into())) } } /// /// Creates an emulated device in the kernel. /// /// See the documentation for `KVM_CREATE_DEVICE`. fn create_device(&self, device: &mut CreateDevice) -> vm::Result> { let fd = self .fd .create_device(device) .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; let device = KvmDevice { fd }; Ok(Arc::new(device)) } /// /// Returns the preferred CPU target type which can be emulated by KVM on underlying host. /// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> { self.fd .get_preferred_target(kvi) .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into())) } #[cfg(target_arch = "x86_64")] fn enable_split_irq(&self) -> vm::Result<()> { // Set TSS self.fd .set_tss_address(KVM_TSS_ADDRESS.raw_value() as usize) .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; // Create split irqchip // Only the local APIC is emulated in kernel, both PICs and IOAPIC // are not. let mut cap = kvm_enable_cap { cap: KVM_CAP_SPLIT_IRQCHIP, ..Default::default() }; cap.args[0] = NUM_IOAPIC_PINS as u64; self.fd .enable_cap(&cap) .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; Ok(()) } #[cfg(target_arch = "x86_64")] fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> { let mut cap = kvm_enable_cap { cap: KVM_CAP_SGX_ATTRIBUTE, ..Default::default() }; cap.args[0] = file.as_raw_fd() as u64; self.fd .enable_cap(&cap) .map_err(|e| vm::HypervisorVmError::EnableSgxAttribute(e.into()))?; Ok(()) } /// Retrieve guest clock. #[cfg(target_arch = "x86_64")] fn get_clock(&self) -> vm::Result { self.fd .get_clock() .map_err(|e| vm::HypervisorVmError::GetClock(e.into())) } /// Set guest clock. #[cfg(target_arch = "x86_64")] fn set_clock(&self, data: &ClockData) -> vm::Result<()> { self.fd .set_clock(data) .map_err(|e| vm::HypervisorVmError::SetClock(e.into())) } /// Checks if a particular `Cap` is available. fn check_extension(&self, c: Cap) -> bool { self.fd.check_extension(c) } /// Create a device that is used for passthrough fn create_passthrough_device(&self) -> vm::Result> { let mut vfio_dev = kvm_create_device { type_: kvm_device_type_KVM_DEV_TYPE_VFIO, fd: 0, flags: 0, }; self.create_device(&mut vfio_dev) .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) } /// /// Get the Vm state. Return VM specific data /// fn state(&self) -> vm::Result { Ok(self.state) } /// /// Set the VM state /// fn set_state(&self, _state: VmState) -> vm::Result<()> { Ok(()) } /// /// Start logging dirty pages /// fn start_dirty_log(&self) -> vm::Result<()> { let dirty_log_slots = self.dirty_log_slots.read().unwrap(); for (_, s) in dirty_log_slots.iter() { let region = MemoryRegion { slot: s.slot, guest_phys_addr: s.guest_phys_addr, memory_size: s.memory_size, userspace_addr: s.userspace_addr, flags: KVM_MEM_LOG_DIRTY_PAGES, }; // Safe because guest regions are guaranteed not to overlap. unsafe { self.fd .set_user_memory_region(region) .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; } } Ok(()) } /// /// Stop logging dirty pages /// fn stop_dirty_log(&self) -> vm::Result<()> { let dirty_log_slots = self.dirty_log_slots.read().unwrap(); for (_, s) in dirty_log_slots.iter() { let region = MemoryRegion { slot: s.slot, guest_phys_addr: s.guest_phys_addr, memory_size: s.memory_size, userspace_addr: s.userspace_addr, flags: 0, }; // Safe because guest regions are guaranteed not to overlap. unsafe { self.fd .set_user_memory_region(region) .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; } } Ok(()) } /// /// Get dirty pages bitmap (one bit per page) /// fn get_dirty_log(&self, slot: u32, _base_gpa: u64, memory_size: u64) -> vm::Result> { self.fd .get_dirty_log(slot, memory_size as usize) .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) } /// /// Initialize TDX for this VM /// #[cfg(feature = "tdx")] fn tdx_init(&self, cpuid: &CpuId, max_vcpus: u32) -> vm::Result<()> { #[repr(C)] struct TdxInitVm { max_vcpus: u32, tsc_khz: u32, attributes: u64, cpuid: u64, mrconfigid: [u64; 6], mrowner: [u64; 6], mrownerconfig: [u64; 6], reserved: [u64; 43], } let data = TdxInitVm { max_vcpus, tsc_khz: 0, attributes: 1, // TDX1_TD_ATTRIBUTE_DEBUG, cpuid: cpuid.as_fam_struct_ptr() as u64, mrconfigid: [0; 6], mrowner: [0; 6], mrownerconfig: [0; 6], reserved: [0; 43], }; tdx_command( &self.fd.as_raw_fd(), TdxCommand::InitVm, 0, &data as *const _ as u64, ) .map_err(vm::HypervisorVmError::InitializeTdx) } /// /// Finalize the TDX setup for this VM /// #[cfg(feature = "tdx")] fn tdx_finalize(&self) -> vm::Result<()> { tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0) .map_err(vm::HypervisorVmError::FinalizeTdx) } /// /// Initialize memory regions for the TDX VM /// #[cfg(feature = "tdx")] fn tdx_init_memory_region( &self, host_address: u64, guest_address: u64, size: u64, measure: bool, ) -> vm::Result<()> { #[repr(C)] struct TdxInitMemRegion { host_address: u64, guest_address: u64, pages: u64, } let data = TdxInitMemRegion { host_address, guest_address, pages: size / 4096, }; tdx_command( &self.fd.as_raw_fd(), TdxCommand::InitMemRegion, if measure { 1 } else { 0 }, &data as *const _ as u64, ) .map_err(vm::HypervisorVmError::InitMemRegionTdx) } } #[cfg(feature = "tdx")] fn tdx_command( fd: &RawFd, command: TdxCommand, metadata: u32, data: u64, ) -> std::result::Result<(), std::io::Error> { #[repr(C)] struct TdxIoctlCmd { command: TdxCommand, metadata: u32, data: u64, } let cmd = TdxIoctlCmd { command, metadata, data, }; let ret = unsafe { ioctl_with_val( fd, KVM_MEMORY_ENCRYPT_OP(), &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong, ) }; if ret < 0 { return Err(std::io::Error::last_os_error()); } Ok(()) } /// Wrapper over KVM system ioctls. pub struct KvmHypervisor { kvm: Kvm, } /// Enum for KVM related error #[derive(Debug, Error)] pub enum KvmError { #[error("Capability missing: {0:?}")] CapabilityMissing(Cap), } pub type KvmResult = result::Result; impl KvmHypervisor { /// Create a hypervisor based on Kvm pub fn new() -> hypervisor::Result { let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?; let api_version = kvm_obj.get_api_version(); if api_version != kvm_bindings::KVM_API_VERSION as i32 { return Err(hypervisor::HypervisorError::IncompatibleApiVersion); } Ok(KvmHypervisor { kvm: kvm_obj }) } } /// Implementation of Hypervisor trait for KVM /// Example: /// #[cfg(feature = "kvm")] /// extern crate hypervisor /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); /// let hypervisor: Arc = Arc::new(kvm); /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); /// impl hypervisor::Hypervisor for KvmHypervisor { /// Create a KVM vm object of a specific VM type and return the object as Vm trait object /// Example /// # extern crate hypervisor; /// # use hypervisor::KvmHypervisor; /// use hypervisor::KvmVm; /// let hypervisor = KvmHypervisor::new().unwrap(); /// let vm = hypervisor.create_vm_with_type(KvmVmType::LegacyVm).unwrap() /// fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result> { let fd: VmFd; loop { match self.kvm.create_vm_with_type(vm_type) { Ok(res) => fd = res, Err(e) => { if e.errno() == libc::EINTR { // If the error returned is EINTR, which means the // ioctl has been interrupted, we have to retry as // this can't be considered as a regular error. continue; } else { return Err(hypervisor::HypervisorError::VmCreate(e.into())); } } } break; } let vm_fd = Arc::new(fd); #[cfg(target_arch = "x86_64")] { let msr_list = self.get_msr_list()?; let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; let mut msrs = MsrEntries::new(num_msrs).unwrap(); let indices = msr_list.as_slice(); let msr_entries = msrs.as_mut_slice(); for (pos, index) in indices.iter().enumerate() { msr_entries[pos].index = *index; } Ok(Arc::new(KvmVm { fd: vm_fd, msrs, state: VmState {}, dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), })) } #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] { Ok(Arc::new(KvmVm { fd: vm_fd, state: VmState {}, dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), })) } } /// Create a KVM vm object and return the object as Vm trait object /// Example /// # extern crate hypervisor; /// # use hypervisor::KvmHypervisor; /// use hypervisor::KvmVm; /// let hypervisor = KvmHypervisor::new().unwrap(); /// let vm = hypervisor.create_vm().unwrap() /// fn create_vm(&self) -> hypervisor::Result> { #[allow(unused_mut)] let mut vm_type: u64 = 0; // Create with default platform type // When KVM supports Cap::ArmVmIPASize, it is better to get the IPA // size from the host and use that when creating the VM, which may // avoid unnecessary VM creation failures. #[cfg(target_arch = "aarch64")] if self.kvm.check_extension(Cap::ArmVmIPASize) { vm_type = self.kvm.get_host_ipa_limit().try_into().unwrap(); } self.create_vm_with_type(vm_type) } fn check_required_extensions(&self) -> hypervisor::Result<()> { check_required_kvm_extensions(&self.kvm) .map_err(|e| hypervisor::HypervisorError::CheckExtensions(e.into())) } #[cfg(target_arch = "x86_64")] /// /// X86 specific call to get the system supported CPUID values. /// fn get_cpuid(&self) -> hypervisor::Result { self.kvm .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into())) } #[cfg(target_arch = "x86_64")] /// /// Retrieve the list of MSRs supported by KVM. /// fn get_msr_list(&self) -> hypervisor::Result { self.kvm .get_msr_index_list() .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) } #[cfg(target_arch = "aarch64")] /// /// Retrieve AArch64 host maximum IPA size supported by KVM. /// fn get_host_ipa_limit(&self) -> i32 { self.kvm.get_host_ipa_limit() } } /// Vcpu struct for KVM pub struct KvmVcpu { fd: VcpuFd, #[cfg(target_arch = "x86_64")] msrs: MsrEntries, vmmops: Option>, #[cfg(target_arch = "x86_64")] hyperv_synic: AtomicBool, } /// Implementation of Vcpu trait for KVM /// Example: /// #[cfg(feature = "kvm")] /// extern crate hypervisor /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); /// let hypervisor: Arc = Arc::new(kvm); /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); /// let vcpu = vm.create_vcpu(0, None).unwrap(); /// vcpu.get/set().unwrap() /// impl cpu::Vcpu for KvmVcpu { #[cfg(target_arch = "x86_64")] /// /// Returns the vCPU general purpose registers. /// fn get_regs(&self) -> cpu::Result { self.fd .get_regs() .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into())) } #[cfg(target_arch = "x86_64")] /// /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl. /// fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { self.fd .set_regs(regs) .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) } #[cfg(target_arch = "x86_64")] /// /// Returns the vCPU special registers. /// fn get_sregs(&self) -> cpu::Result { self.fd .get_sregs() .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into())) } #[cfg(target_arch = "x86_64")] /// /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl. /// fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { self.fd .set_sregs(sregs) .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) } #[cfg(target_arch = "x86_64")] /// /// Returns the floating point state (FPU) from the vCPU. /// fn get_fpu(&self) -> cpu::Result { self.fd .get_fpu() .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into())) } #[cfg(target_arch = "x86_64")] /// /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioct. /// fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { self.fd .set_fpu(fpu) .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) } #[cfg(target_arch = "x86_64")] /// /// X86 specific call to setup the CPUID registers. /// fn set_cpuid2(&self, cpuid: &CpuId) -> cpu::Result<()> { self.fd .set_cpuid2(cpuid) .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) } #[cfg(target_arch = "x86_64")] /// /// X86 specific call to enable HyperV SynIC /// fn enable_hyperv_synic(&self) -> cpu::Result<()> { // Update the information about Hyper-V SynIC being enabled and // emulated as it will influence later which MSRs should be saved. self.hyperv_synic.store(true, Ordering::Release); let cap = kvm_enable_cap { cap: KVM_CAP_HYPERV_SYNIC, ..Default::default() }; self.fd .enable_cap(&cap) .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSyncIc(e.into())) } /// /// X86 specific call to retrieve the CPUID registers. /// #[cfg(target_arch = "x86_64")] fn get_cpuid2(&self, num_entries: usize) -> cpu::Result { self.fd .get_cpuid2(num_entries) .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into())) } #[cfg(target_arch = "x86_64")] /// /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). /// fn get_lapic(&self) -> cpu::Result { self.fd .get_lapic() .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into())) } #[cfg(target_arch = "x86_64")] /// /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). /// fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> { self.fd .set_lapic(klapic) .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) } #[cfg(target_arch = "x86_64")] /// /// Returns the model-specific registers (MSR) for this vCPU. /// fn get_msrs(&self, msrs: &mut MsrEntries) -> cpu::Result { self.fd .get_msrs(msrs) .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into())) } #[cfg(target_arch = "x86_64")] /// /// Setup the model-specific registers (MSR) for this vCPU. /// Returns the number of MSR entries actually written. /// fn set_msrs(&self, msrs: &MsrEntries) -> cpu::Result { self.fd .set_msrs(msrs) .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) } /// /// Returns the vcpu's current "multiprocessing state". /// fn get_mp_state(&self) -> cpu::Result { self.fd .get_mp_state() .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into())) } /// /// Sets the vcpu's current "multiprocessing state". /// fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> { self.fd .set_mp_state(mp_state) .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into())) } #[cfg(target_arch = "x86_64")] /// /// X86 specific call that returns the vcpu's current "xsave struct". /// fn get_xsave(&self) -> cpu::Result { self.fd .get_xsave() .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into())) } #[cfg(target_arch = "x86_64")] /// /// X86 specific call that sets the vcpu's current "xsave struct". /// fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> { self.fd .set_xsave(xsave) .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) } #[cfg(target_arch = "x86_64")] /// /// X86 specific call that returns the vcpu's current "xcrs". /// fn get_xcrs(&self) -> cpu::Result { self.fd .get_xcrs() .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) } #[cfg(target_arch = "x86_64")] /// /// X86 specific call that sets the vcpu's current "xcrs". /// fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { self.fd .set_xcrs(xcrs) .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) } /// /// Triggers the running of the current virtual CPU returning an exit reason. /// fn run(&self) -> std::result::Result { match self.fd.run() { Ok(run) => match run { #[cfg(target_arch = "x86_64")] VcpuExit::IoIn(addr, data) => { if let Some(vmmops) = &self.vmmops { return vmmops .pio_read(addr.into(), data) .map(|_| cpu::VmExit::Ignore) .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); } Ok(cpu::VmExit::IoIn(addr, data)) } #[cfg(target_arch = "x86_64")] VcpuExit::IoOut(addr, data) => { if let Some(vmmops) = &self.vmmops { return vmmops .pio_write(addr.into(), data) .map(|_| cpu::VmExit::Ignore) .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); } Ok(cpu::VmExit::IoOut(addr, data)) } #[cfg(target_arch = "x86_64")] VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)), #[cfg(target_arch = "x86_64")] VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset), #[cfg(target_arch = "aarch64")] VcpuExit::SystemEvent(event_type, flags) => { use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; // On Aarch64, when the VM is shutdown, run() returns // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN if event_type == KVM_SYSTEM_EVENT_RESET { Ok(cpu::VmExit::Reset) } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN { Ok(cpu::VmExit::Shutdown) } else { Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( "Unexpected system event with type 0x{:x}, flags 0x{:x}", event_type, flags ))) } } VcpuExit::MmioRead(addr, data) => { if let Some(vmmops) = &self.vmmops { return vmmops .mmio_read(addr, data) .map(|_| cpu::VmExit::Ignore) .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); } Ok(cpu::VmExit::MmioRead(addr, data)) } VcpuExit::MmioWrite(addr, data) => { if let Some(vmmops) = &self.vmmops { return vmmops .mmio_write(addr, data) .map(|_| cpu::VmExit::Ignore) .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); } Ok(cpu::VmExit::MmioWrite(addr, data)) } VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv), r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( "Unexpected exit reason on vcpu run: {:?}", r ))), }, Err(ref e) => match e.errno() { libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( "VCPU error {:?}", e ))), }, } } #[cfg(target_arch = "x86_64")] /// /// Returns currently pending exceptions, interrupts, and NMIs as well as related /// states of the vcpu. /// fn get_vcpu_events(&self) -> cpu::Result { self.fd .get_vcpu_events() .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) } #[cfg(target_arch = "x86_64")] /// /// Sets pending exceptions, interrupts, and NMIs as well as related states /// of the vcpu. /// fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { self.fd .set_vcpu_events(events) .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) } #[cfg(target_arch = "x86_64")] /// /// Let the guest know that it has been paused, which prevents from /// potential soft lockups when being resumed. /// fn notify_guest_clock_paused(&self) -> cpu::Result<()> { self.fd .kvmclock_ctrl() .map_err(|e| cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into())) } #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> { self.fd .vcpu_init(kvi) .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into())) } /// /// Sets the value of one register for this vCPU. /// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] fn set_reg(&self, reg_id: u64, data: u64) -> cpu::Result<()> { self.fd .set_one_reg(reg_id, data) .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into())) } /// /// Gets the value of one register for this vCPU. /// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] fn get_reg(&self, reg_id: u64) -> cpu::Result { self.fd .get_one_reg(reg_id) .map_err(|e| cpu::HypervisorCpuError::GetRegister(e.into())) } /// /// Gets a list of the guest registers that are supported for the /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. /// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> { self.fd .get_reg_list(reg_list) .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into())) } /// /// Save the state of the core registers. /// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] fn core_registers(&self, state: &mut StandardRegisters) -> cpu::Result<()> { let mut off = offset__of!(user_pt_regs, regs); // There are 31 user_pt_regs: // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72 // These actually are the general-purpose registers of the Armv8-a // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register). for i in 0..31 { state.regs.regs[i] = self .fd .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; off += std::mem::size_of::(); } // We are now entering the "Other register" section of the ARMv8-a architecture. // First one, stack pointer. let off = offset__of!(user_pt_regs, sp); state.regs.sp = self .fd .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; // Second one, the program counter. let off = offset__of!(user_pt_regs, pc); state.regs.pc = self .fd .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; // Next is the processor state. let off = offset__of!(user_pt_regs, pstate); state.regs.pstate = self .fd .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; // The stack pointer associated with EL1 let off = offset__of!(kvm_regs, sp_el1); state.sp_el1 = self .fd .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; // Exception Link Register for EL1, when taking an exception to EL1, this register // holds the address to which to return afterwards. let off = offset__of!(kvm_regs, elr_el1); state.elr_el1 = self .fd .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; // Saved Program Status Registers, there are 5 of them used in the kernel. let mut off = offset__of!(kvm_regs, spsr); for i in 0..KVM_NR_SPSR as usize { state.spsr[i] = self .fd .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; off += std::mem::size_of::(); } // Now moving on to floting point registers which are stored in the user_fpsimd_state in the kernel: // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53 let mut off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, vregs); for i in 0..32 { state.fp_regs.vregs[i] = self .fd .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off)) .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))? .into(); off += mem::size_of::(); } // Floating-point Status Register let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpsr); state.fp_regs.fpsr = self .fd .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off)) .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))? as u32; // Floating-point Control Register let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpcr); state.fp_regs.fpcr = self .fd .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off)) .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))? as u32; Ok(()) } /// /// Restore the state of the core registers. /// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] fn set_core_registers(&self, state: &StandardRegisters) -> cpu::Result<()> { // The function follows the exact identical order from `state`. Look there // for some additional info on registers. let mut off = offset__of!(user_pt_regs, regs); for i in 0..31 { self.fd .set_one_reg( arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.regs[i], ) .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; off += std::mem::size_of::(); } let off = offset__of!(user_pt_regs, sp); self.fd .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.sp) .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; let off = offset__of!(user_pt_regs, pc); self.fd .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.pc) .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; let off = offset__of!(user_pt_regs, pstate); self.fd .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.pstate) .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; let off = offset__of!(kvm_regs, sp_el1); self.fd .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.sp_el1) .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; let off = offset__of!(kvm_regs, elr_el1); self.fd .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.elr_el1) .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; let mut off = offset__of!(kvm_regs, spsr); for i in 0..KVM_NR_SPSR as usize { self.fd .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.spsr[i]) .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; off += std::mem::size_of::(); } let mut off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, vregs); for i in 0..32 { self.fd .set_one_reg( arm64_core_reg_id!(KVM_REG_SIZE_U128, off), state.fp_regs.vregs[i] as u64, ) .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; off += mem::size_of::(); } let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpsr); self.fd .set_one_reg( arm64_core_reg_id!(KVM_REG_SIZE_U32, off), state.fp_regs.fpsr as u64, ) .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpcr); self.fd .set_one_reg( arm64_core_reg_id!(KVM_REG_SIZE_U32, off), state.fp_regs.fpcr as u64, ) .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; Ok(()) } /// /// Save the state of the system registers. /// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] fn system_registers(&self, state: &mut Vec) -> cpu::Result<()> { // Call KVM_GET_REG_LIST to get all registers available to the guest. For ArmV8 there are // around 500 registers. let mut reg_list = RegList::new(500).unwrap(); self.fd .get_reg_list(&mut reg_list) .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?; // At this point reg_list should contain: core registers and system registers. // The register list contains the number of registers and their ids. We will be needing to // call KVM_GET_ONE_REG on each id in order to save all of them. We carve out from the list // the core registers which are represented in the kernel by kvm_regs structure and for which // we can calculate the id based on the offset in the structure. reg_list.retain(|regid| is_system_register(*regid)); // Now, for the rest of the registers left in the previously fetched register list, we are // simply calling KVM_GET_ONE_REG. let indices = reg_list.as_slice(); for index in indices.iter() { state.push(kvm_bindings::kvm_one_reg { id: *index, addr: self .fd .get_one_reg(*index) .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?, }); } Ok(()) } /// /// Restore the state of the system registers. /// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] fn set_system_registers(&self, state: &[Register]) -> cpu::Result<()> { for reg in state { self.fd .set_one_reg(reg.id, reg.addr) .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?; } Ok(()) } /// /// Read the MPIDR - Multiprocessor Affinity Register. /// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] fn read_mpidr(&self) -> cpu::Result { self.fd .get_one_reg(MPIDR_EL1) .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into())) } #[cfg(target_arch = "x86_64")] /// /// Get the current CPU state /// /// Ordering requirements: /// /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify /// vCPU/LAPIC state. As such, it must be done before most everything /// else, otherwise we cannot restore everything and expect it to work. /// /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are /// still running. /// /// KVM_GET_LAPIC may change state of LAPIC before returning it. /// /// GET_VCPU_EVENTS should probably be last to save. The code looks as /// it might as well be affected by internal state modifications of the /// GET ioctls. /// /// SREGS saves/restores a pending interrupt, similar to what /// VCPU_EVENTS also does. /// /// GET_MSRS requires a pre-populated data structure to do something /// meaningful. For SET_MSRS it will then contain good data. /// /// # Example /// /// ```rust /// # extern crate hypervisor; /// # use hypervisor::KvmHypervisor; /// # use std::sync::Arc; /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); /// let hv: Arc = Arc::new(kvm); /// let vm = hv.create_vm().expect("new VM fd creation failed"); /// vm.enable_split_irq().unwrap(); /// let vcpu = vm.create_vcpu(0, None).unwrap(); /// let state = vcpu.state().unwrap(); /// ``` fn state(&self) -> cpu::Result { let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?; let mp_state = self.get_mp_state()?; let regs = self.get_regs()?; let sregs = self.get_sregs()?; let xsave = self.get_xsave()?; let xcrs = self.get_xcrs()?; let lapic_state = self.get_lapic()?; let fpu = self.get_fpu()?; // Try to get all MSRs based on the list previously retrieved from KVM. // If the number of MSRs obtained from GET_MSRS is different from the // expected amount, we fallback onto a slower method by getting MSRs // by chunks. This is the only way to make sure we try to get as many // MSRs as possible, even if some MSRs are not supported. let mut msr_entries = self.msrs.clone(); // Save extra MSRs if the Hyper-V synthetic interrupt controller is // emulated. if self.hyperv_synic.load(Ordering::Acquire) { let hyperv_synic_msrs = vec![ 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084, 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096, 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d, 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4, 0x400000b5, 0x400000b6, 0x400000b7, ]; for index in hyperv_synic_msrs { let msr = kvm_msr_entry { index, ..Default::default() }; msr_entries.push(msr).unwrap(); } } let expected_num_msrs = msr_entries.as_fam_struct_ref().nmsrs as usize; let num_msrs = self.get_msrs(&mut msr_entries)?; let msrs = if num_msrs != expected_num_msrs { let mut faulty_msr_index = num_msrs; let mut msr_entries_tmp = MsrEntries::from_entries(&msr_entries.as_slice()[..faulty_msr_index]).unwrap(); loop { warn!( "Detected faulty MSR 0x{:x} while getting MSRs", msr_entries.as_slice()[faulty_msr_index].index ); let start_pos = faulty_msr_index + 1; let mut sub_msr_entries = MsrEntries::from_entries(&msr_entries.as_slice()[start_pos..]).unwrap(); let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize; let num_msrs = self.get_msrs(&mut sub_msr_entries)?; for i in 0..num_msrs { msr_entries_tmp .push(sub_msr_entries.as_slice()[i]) .map_err(|e| { cpu::HypervisorCpuError::GetMsrEntries(anyhow!( "Failed adding MSR entries: {:?}", e )) })?; } if num_msrs == expected_num_msrs { break; } faulty_msr_index = start_pos + num_msrs; } msr_entries_tmp } else { msr_entries }; let vcpu_events = self.get_vcpu_events()?; Ok(CpuState { cpuid, msrs, vcpu_events, regs, sregs, fpu, lapic_state, xsave, xcrs, mp_state, }) } /// /// Get the current AArch64 CPU state /// #[cfg(target_arch = "aarch64")] fn state(&self) -> cpu::Result { let mut state = CpuState { mp_state: self.get_mp_state()?, mpidr: self.read_mpidr()?, ..Default::default() }; self.core_registers(&mut state.core_regs)?; self.system_registers(&mut state.sys_regs)?; Ok(state) } #[cfg(target_arch = "x86_64")] /// /// Restore the previously saved CPU state /// /// Ordering requirements: /// /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are /// still running. /// /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so /// if we ever change the BSP, we have to do that before restoring anything. /// The same seems to be true for CPUID stuff. /// /// SREGS saves/restores a pending interrupt, similar to what /// VCPU_EVENTS also does. /// /// SET_REGS clears pending exceptions unconditionally, thus, it must be /// done before SET_VCPU_EVENTS, which restores it. /// /// SET_LAPIC must come after SET_SREGS, because the latter restores /// the apic base msr. /// /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR /// only restores successfully, when the LAPIC is correctly configured. /// /// Arguments: CpuState /// # Example /// /// ```rust /// # extern crate hypervisor; /// # use hypervisor::KvmHypervisor; /// # use std::sync::Arc; /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); /// let hv: Arc = Arc::new(kvm); /// let vm = hv.create_vm().expect("new VM fd creation failed"); /// vm.enable_split_irq().unwrap(); /// let vcpu = vm.create_vcpu(0, None).unwrap(); /// let state = vcpu.state().unwrap(); /// vcpu.set_state(&state).unwrap(); /// ``` fn set_state(&self, state: &CpuState) -> cpu::Result<()> { self.set_cpuid2(&state.cpuid)?; self.set_mp_state(state.mp_state)?; self.set_regs(&state.regs)?; self.set_sregs(&state.sregs)?; self.set_xsave(&state.xsave)?; self.set_xcrs(&state.xcrs)?; self.set_lapic(&state.lapic_state)?; self.set_fpu(&state.fpu)?; // Try to set all MSRs previously stored. // If the number of MSRs set from SET_MSRS is different from the // expected amount, we fallback onto a slower method by setting MSRs // by chunks. This is the only way to make sure we try to set as many // MSRs as possible, even if some MSRs are not supported. let expected_num_msrs = state.msrs.as_fam_struct_ref().nmsrs as usize; let num_msrs = self.set_msrs(&state.msrs)?; if num_msrs != expected_num_msrs { let mut faulty_msr_index = num_msrs; loop { warn!( "Detected faulty MSR 0x{:x} while setting MSRs", state.msrs.as_slice()[faulty_msr_index].index ); let start_pos = faulty_msr_index + 1; let sub_msr_entries = MsrEntries::from_entries(&state.msrs.as_slice()[start_pos..]).unwrap(); let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize; let num_msrs = self.set_msrs(&sub_msr_entries)?; if num_msrs == expected_num_msrs { break; } faulty_msr_index = start_pos + num_msrs; } } self.set_vcpu_events(&state.vcpu_events)?; Ok(()) } /// /// Restore the previously saved AArch64 CPU state /// #[cfg(target_arch = "aarch64")] fn set_state(&self, state: &CpuState) -> cpu::Result<()> { self.set_core_registers(&state.core_regs)?; self.set_system_registers(&state.sys_regs)?; self.set_mp_state(state.mp_state)?; Ok(()) } /// /// Initialize TDX for this CPU /// #[cfg(feature = "tdx")] fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> { tdx_command(&self.fd.as_raw_fd(), TdxCommand::InitVcpu, 0, hob_address) .map_err(cpu::HypervisorCpuError::InitializeTdx) } } /// Device struct for KVM pub struct KvmDevice { fd: DeviceFd, } impl device::Device for KvmDevice { /// /// Set device attribute /// fn set_device_attr(&self, attr: &DeviceAttr) -> device::Result<()> { self.fd .set_device_attr(attr) .map_err(|e| device::HypervisorDeviceError::SetDeviceAttribute(e.into())) } /// /// Get device attribute /// fn get_device_attr(&self, attr: &mut DeviceAttr) -> device::Result<()> { self.fd .get_device_attr(attr) .map_err(|e| device::HypervisorDeviceError::GetDeviceAttribute(e.into())) } } impl AsRawFd for KvmDevice { fn as_raw_fd(&self) -> RawFd { self.fd.as_raw_fd() } }