diff --git a/hypervisor/src/cpu.rs b/hypervisor/src/cpu.rs index 803eaccca..b05be3d21 100644 --- a/hypervisor/src/cpu.rs +++ b/hypervisor/src/cpu.rs @@ -443,4 +443,9 @@ pub trait Vcpu: Send + Sync { /// Return suspend registers(explicit and intercept suspend registers) /// fn get_suspend_regs(&self) -> Result; + #[cfg(feature = "kvm")] + /// + /// Set the "immediate_exit" state + /// + fn set_immediate_exit(&self, exit: bool); } diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index 8e5a3fda6..8d6540605 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -1574,6 +1574,13 @@ impl cpu::Vcpu for KvmVcpu { tdx_command(&self.fd.as_raw_fd(), TdxCommand::InitVcpu, 0, hob_address) .map_err(cpu::HypervisorCpuError::InitializeTdx) } + + /// + /// Set the "immediate_exit" state + /// + fn set_immediate_exit(&self, exit: bool) { + self.fd.set_kvm_immediate_exit(exit.into()); + } } /// Device struct for KVM diff --git a/hypervisor/src/kvm/x86_64/mod.rs b/hypervisor/src/kvm/x86_64/mod.rs index a2daa09c7..d176d4b5f 100644 --- a/hypervisor/src/kvm/x86_64/mod.rs +++ b/hypervisor/src/kvm/x86_64/mod.rs @@ -130,6 +130,9 @@ pub fn check_required_kvm_extensions(kvm: &Kvm) -> KvmResult<()> { if !kvm.check_extension(Cap::SetTssAddr) { return Err(KvmError::CapabilityMissing(Cap::SetTssAddr)); } + if !kvm.check_extension(Cap::ImmediateExit) { + return Err(KvmError::CapabilityMissing(Cap::ImmediateExit)); + } Ok(()) } #[derive(Clone, Serialize, Deserialize)] diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index 9d66e93a6..ef086a9f5 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -792,6 +792,31 @@ impl CpuManager { // to see them in a consistent order in all threads if vcpu_pause_signalled.load(Ordering::SeqCst) { + // As a pause can be caused by PIO & MMIO exits then we need to ensure they are + // completed by returning to KVM_RUN. From the kernel docs: + // + // For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR, KVM_EXIT_XEN, + // KVM_EXIT_EPR, KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding + // operations are complete (and guest state is consistent) only after userspace + // has re-entered the kernel with KVM_RUN. The kernel side will first finish + // incomplete operations and then check for pending signals. + // The pending state of the operation is not preserved in state which is + // visible to userspace, thus userspace should ensure that the operation is + // completed before performing a live migration. Userspace can re-enter the + // guest with an unmasked signal pending or with the immediate_exit field set + // to complete pending operations without allowing any further instructions + // to be executed. + + #[cfg(feature = "kvm")] + { + vcpu.lock().as_ref().unwrap().vcpu.set_immediate_exit(true); + if !matches!(vcpu.lock().unwrap().run(), Ok(VmExit::Ignore)) { + error!("Unexpected VM exit on \"immediate_exit\" run"); + break; + } + vcpu.lock().as_ref().unwrap().vcpu.set_immediate_exit(false); + } + vcpu_run_interrupted.store(true, Ordering::SeqCst); while vcpu_pause_signalled.load(Ordering::SeqCst) { thread::park();