vmm: Ensure that PIO and MMIO exits complete before pausing

As per this kernel documentation:

      For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR, KVM_EXIT_XEN,
      KVM_EXIT_EPR, KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding
      operations are complete (and guest state is consistent) only after userspace
      has re-entered the kernel with KVM_RUN.  The kernel side will first finish
      incomplete operations and then check for pending signals.

      The pending state of the operation is not preserved in state which is
      visible to userspace, thus userspace should ensure that the operation is
      completed before performing a live migration.  Userspace can re-enter the
      guest with an unmasked signal pending or with the immediate_exit field set
      to complete pending operations without allowing any further instructions
      to be executed.

Since we capture the state as part of the pause and override it as part
of the resume we must ensure the state is consistent otherwise we will
lose the results of the MMIO or PIO operation that caused the exit from
which we paused.

Fixes: #3658

Signed-off-by: Rob Bradford <robert.bradford@intel.com>
This commit is contained in:
Rob Bradford 2022-02-07 14:36:39 +00:00
parent b552aebbd4
commit 507912385a
4 changed files with 40 additions and 0 deletions

View File

@ -464,4 +464,9 @@ pub trait Vcpu: Send + Sync {
/// Return suspend registers(explicit and intercept suspend registers)
///
fn get_suspend_regs(&self) -> Result<SuspendRegisters>;
#[cfg(feature = "kvm")]
///
/// Set the "immediate_exit" state
///
fn set_immediate_exit(&self, exit: bool);
}

View File

@ -1595,6 +1595,13 @@ impl cpu::Vcpu for KvmVcpu {
tdx_command(&self.fd.as_raw_fd(), TdxCommand::InitVcpu, 0, hob_address)
.map_err(cpu::HypervisorCpuError::InitializeTdx)
}
///
/// Set the "immediate_exit" state
///
fn set_immediate_exit(&self, exit: bool) {
self.fd.set_kvm_immediate_exit(exit.into());
}
}
/// Device struct for KVM

View File

@ -130,6 +130,9 @@ pub fn check_required_kvm_extensions(kvm: &Kvm) -> KvmResult<()> {
if !kvm.check_extension(Cap::SetTssAddr) {
return Err(KvmError::CapabilityMissing(Cap::SetTssAddr));
}
if !kvm.check_extension(Cap::ImmediateExit) {
return Err(KvmError::CapabilityMissing(Cap::ImmediateExit));
}
Ok(())
}
#[derive(Clone, Serialize, Deserialize)]

View File

@ -841,6 +841,31 @@ impl CpuManager {
// to see them in a consistent order in all threads
if vcpu_pause_signalled.load(Ordering::SeqCst) {
// As a pause can be caused by PIO & MMIO exits then we need to ensure they are
// completed by returning to KVM_RUN. From the kernel docs:
//
// For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR, KVM_EXIT_XEN,
// KVM_EXIT_EPR, KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding
// operations are complete (and guest state is consistent) only after userspace
// has re-entered the kernel with KVM_RUN. The kernel side will first finish
// incomplete operations and then check for pending signals.
// The pending state of the operation is not preserved in state which is
// visible to userspace, thus userspace should ensure that the operation is
// completed before performing a live migration. Userspace can re-enter the
// guest with an unmasked signal pending or with the immediate_exit field set
// to complete pending operations without allowing any further instructions
// to be executed.
#[cfg(feature = "kvm")]
{
vcpu.lock().as_ref().unwrap().vcpu.set_immediate_exit(true);
if !matches!(vcpu.lock().unwrap().run(), Ok(VmExit::Ignore)) {
error!("Unexpected VM exit on \"immediate_exit\" run");
break;
}
vcpu.lock().as_ref().unwrap().vcpu.set_immediate_exit(false);
}
vcpu_run_interrupted.store(true, Ordering::SeqCst);
while vcpu_pause_signalled.load(Ordering::SeqCst) {
thread::park();