From 507912385a416db65f7ea21ed85f0bb6f0f7f38d Mon Sep 17 00:00:00 2001 From: Rob Bradford Date: Mon, 7 Feb 2022 14:36:39 +0000 Subject: [PATCH] vmm: Ensure that PIO and MMIO exits complete before pausing As per this kernel documentation: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR, KVM_EXIT_XEN, KVM_EXIT_EPR, KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding operations are complete (and guest state is consistent) only after userspace has re-entered the kernel with KVM_RUN. The kernel side will first finish incomplete operations and then check for pending signals. The pending state of the operation is not preserved in state which is visible to userspace, thus userspace should ensure that the operation is completed before performing a live migration. Userspace can re-enter the guest with an unmasked signal pending or with the immediate_exit field set to complete pending operations without allowing any further instructions to be executed. Since we capture the state as part of the pause and override it as part of the resume we must ensure the state is consistent otherwise we will lose the results of the MMIO or PIO operation that caused the exit from which we paused. Fixes: #3658 Signed-off-by: Rob Bradford --- hypervisor/src/cpu.rs | 5 +++++ hypervisor/src/kvm/mod.rs | 7 +++++++ hypervisor/src/kvm/x86_64/mod.rs | 3 +++ vmm/src/cpu.rs | 25 +++++++++++++++++++++++++ 4 files changed, 40 insertions(+) diff --git a/hypervisor/src/cpu.rs b/hypervisor/src/cpu.rs index fd75b86d8..8c15a070e 100644 --- a/hypervisor/src/cpu.rs +++ b/hypervisor/src/cpu.rs @@ -464,4 +464,9 @@ pub trait Vcpu: Send + Sync { /// Return suspend registers(explicit and intercept suspend registers) /// fn get_suspend_regs(&self) -> Result; + #[cfg(feature = "kvm")] + /// + /// Set the "immediate_exit" state + /// + fn set_immediate_exit(&self, exit: bool); } diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index c66a96c79..0ec3a4888 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -1595,6 +1595,13 @@ impl cpu::Vcpu for KvmVcpu { tdx_command(&self.fd.as_raw_fd(), TdxCommand::InitVcpu, 0, hob_address) .map_err(cpu::HypervisorCpuError::InitializeTdx) } + + /// + /// Set the "immediate_exit" state + /// + fn set_immediate_exit(&self, exit: bool) { + self.fd.set_kvm_immediate_exit(exit.into()); + } } /// Device struct for KVM diff --git a/hypervisor/src/kvm/x86_64/mod.rs b/hypervisor/src/kvm/x86_64/mod.rs index a2daa09c7..d176d4b5f 100644 --- a/hypervisor/src/kvm/x86_64/mod.rs +++ b/hypervisor/src/kvm/x86_64/mod.rs @@ -130,6 +130,9 @@ pub fn check_required_kvm_extensions(kvm: &Kvm) -> KvmResult<()> { if !kvm.check_extension(Cap::SetTssAddr) { return Err(KvmError::CapabilityMissing(Cap::SetTssAddr)); } + if !kvm.check_extension(Cap::ImmediateExit) { + return Err(KvmError::CapabilityMissing(Cap::ImmediateExit)); + } Ok(()) } #[derive(Clone, Serialize, Deserialize)] diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index ac5ae69c4..d9eb3d2ee 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -841,6 +841,31 @@ impl CpuManager { // to see them in a consistent order in all threads if vcpu_pause_signalled.load(Ordering::SeqCst) { + // As a pause can be caused by PIO & MMIO exits then we need to ensure they are + // completed by returning to KVM_RUN. From the kernel docs: + // + // For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR, KVM_EXIT_XEN, + // KVM_EXIT_EPR, KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding + // operations are complete (and guest state is consistent) only after userspace + // has re-entered the kernel with KVM_RUN. The kernel side will first finish + // incomplete operations and then check for pending signals. + // The pending state of the operation is not preserved in state which is + // visible to userspace, thus userspace should ensure that the operation is + // completed before performing a live migration. Userspace can re-enter the + // guest with an unmasked signal pending or with the immediate_exit field set + // to complete pending operations without allowing any further instructions + // to be executed. + + #[cfg(feature = "kvm")] + { + vcpu.lock().as_ref().unwrap().vcpu.set_immediate_exit(true); + if !matches!(vcpu.lock().unwrap().run(), Ok(VmExit::Ignore)) { + error!("Unexpected VM exit on \"immediate_exit\" run"); + break; + } + vcpu.lock().as_ref().unwrap().vcpu.set_immediate_exit(false); + } + vcpu_run_interrupted.store(true, Ordering::SeqCst); while vcpu_pause_signalled.load(Ordering::SeqCst) { thread::park();