From e2b5c78dc58a01491dcc03144b263efc0e29281e Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Wed, 24 Jun 2020 16:52:41 +0200 Subject: [PATCH] hypervisor: Re-order vCPU state for storing and restoring Some vCPU states such as MP_STATE can be modified while retrieving other states. For this reason, it's important to follow a specific order that will ensure a state won't be modified after it has been saved. Comments about ordering requirements have been copied over from Firecracker commit 57f4c7ca14a31c5536f188cacb669d2cad32b9ca. This patch also set the previously saved VCPU_EVENTS, as this was missing from the restore codepath. Signed-off-by: Sebastien Boeuf --- hypervisor/src/cpu.rs | 11 +++++ hypervisor/src/kvm/mod.rs | 92 ++++++++++++++++++++++++++++---------- vmm/src/seccomp_filters.rs | 2 + 3 files changed, 82 insertions(+), 23 deletions(-) diff --git a/hypervisor/src/cpu.rs b/hypervisor/src/cpu.rs index 9b70e3d24..9708b1ad2 100644 --- a/hypervisor/src/cpu.rs +++ b/hypervisor/src/cpu.rs @@ -125,6 +125,11 @@ pub enum HypervisorCpuError { #[error("Failed to get Vcpu events: {0}")] GetVcpuEvents(#[source] anyhow::Error), /// + /// Setting Vcpu events error + /// + #[error("Failed to set Vcpu events: {0}")] + SetVcpuEvents(#[source] anyhow::Error), + /// /// Vcpu Init error /// #[error("Failed to init vcpu: {0}")] @@ -255,6 +260,12 @@ pub trait Vcpu: Send + Sync { fn get_vcpu_events(&self) -> Result; #[cfg(target_arch = "x86_64")] /// + /// Sets pending exceptions, interrupts, and NMIs as well as related states + /// of the vcpu. + /// + fn set_vcpu_events(&self, events: &VcpuEvents) -> Result<()>; + #[cfg(target_arch = "x86_64")] + /// /// Let the guest know that it has been paused, which prevents from /// potential soft lockups when being resumed. /// diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index a62b387b1..fb401ca7a 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -528,6 +528,16 @@ impl cpu::Vcpu for KvmVcpu { } #[cfg(target_arch = "x86_64")] /// + /// Sets pending exceptions, interrupts, and NMIs as well as related states + /// of the vcpu. + /// + fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { + self.fd + .set_vcpu_events(events) + .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) + } + #[cfg(target_arch = "x86_64")] + /// /// Let the guest know that it has been paused, which prevents from /// potential soft lockups when being resumed. /// @@ -561,8 +571,29 @@ impl cpu::Vcpu for KvmVcpu { .map_err(|e| cpu::HypervisorCpuError::GetOneReg(e.into())) } #[cfg(target_arch = "x86_64")] + /// /// Get the current CPU state /// + /// Ordering requirements: + /// + /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify + /// vCPU/LAPIC state. As such, it must be done before most everything + /// else, otherwise we cannot restore everything and expect it to work. + /// + /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are + /// still running. + /// + /// KVM_GET_LAPIC may change state of LAPIC before returning it. + /// + /// GET_VCPU_EVENTS should probably be last to save. The code looks as + /// it might as well be affected by internal state modifications of the + /// GET ioctls. + /// + /// SREGS saves/restores a pending interrupt, similar to what + /// VCPU_EVENTS also does. + /// + /// GET_MSRS requires a pre-populated data structure to do something + /// meaningful. For SET_MSRS it will then contain good data. /// /// # Example /// @@ -576,19 +607,18 @@ impl cpu::Vcpu for KvmVcpu { /// vm.enable_split_irq().unwrap(); /// let vcpu = vm.create_vcpu(0).unwrap(); /// let state = vcpu.cpu_state().unwrap(); - /// + /// ``` fn cpu_state(&self) -> cpu::Result { - let mut msrs = boot_msr_entries(); - self.get_msrs(&mut msrs)?; - - let vcpu_events = self.get_vcpu_events()?; + let mp_state = self.get_mp_state()?; let regs = self.get_regs()?; let sregs = self.get_sregs()?; - let lapic_state = self.get_lapic()?; - let fpu = self.get_fpu()?; let xsave = self.get_xsave()?; let xcrs = self.get_xcrs()?; - let mp_state = self.get_mp_state()?; + let lapic_state = self.get_lapic()?; + let fpu = self.get_fpu()?; + let mut msrs = boot_msr_entries(); + self.get_msrs(&mut msrs)?; + let vcpu_events = self.get_vcpu_events()?; Ok(CpuState { msrs, @@ -607,8 +637,30 @@ impl cpu::Vcpu for KvmVcpu { unimplemented!(); } #[cfg(target_arch = "x86_64")] + /// /// Restore the previously saved CPU state /// + /// Ordering requirements: + /// + /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are + /// still running. + /// + /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so + /// if we ever change the BSP, we have to do that before restoring anything. + /// The same seems to be true for CPUID stuff. + /// + /// SREGS saves/restores a pending interrupt, similar to what + /// VCPU_EVENTS also does. + /// + /// SET_REGS clears pending exceptions unconditionally, thus, it must be + /// done before SET_VCPU_EVENTS, which restores it. + /// + /// SET_LAPIC must come after SET_SREGS, because the latter restores + /// the apic base msr. + /// + /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR + /// only restores successfully, when the LAPIC is correctly configured. + /// /// Arguments: CpuState /// # Example /// @@ -623,23 +675,17 @@ impl cpu::Vcpu for KvmVcpu { /// let vcpu = vm.create_vcpu(0).unwrap(); /// let state = vcpu.cpu_state().unwrap(); /// vcpu.set_cpu_state(&state).unwrap(); - /// + /// ``` fn set_cpu_state(&self, state: &CpuState) -> cpu::Result<()> { - self.set_regs(&state.regs)?; - - self.set_fpu(&state.fpu)?; - - self.set_xsave(&state.xsave)?; - - self.set_sregs(&state.sregs)?; - - self.set_xcrs(&state.xcrs)?; - - self.set_msrs(&state.msrs)?; - - self.set_lapic(&state.lapic_state)?; - self.set_mp_state(state.mp_state)?; + self.set_regs(&state.regs)?; + self.set_sregs(&state.sregs)?; + self.set_xsave(&state.xsave)?; + self.set_xcrs(&state.xcrs)?; + self.set_lapic(&state.lapic_state)?; + self.set_fpu(&state.fpu)?; + self.set_msrs(&state.msrs)?; + self.set_vcpu_events(&state.vcpu_events)?; Ok(()) } diff --git a/vmm/src/seccomp_filters.rs b/vmm/src/seccomp_filters.rs index ab875be8b..427a311c7 100644 --- a/vmm/src/seccomp_filters.rs +++ b/vmm/src/seccomp_filters.rs @@ -63,6 +63,7 @@ const KVM_IRQFD: u64 = 0x4020_ae76; const KVM_SET_CLOCK: u64 = 0x4030_ae7b; const KVM_CREATE_PIT2: u64 = 0x4040_ae77; const KVM_IOEVENTFD: u64 = 0x4040_ae79; +const KVM_SET_VCPU_EVENTS: u64 = 0x4040_aea0; const KVM_ENABLE_CAP: u64 = 0x4068_aea3; const KVM_SET_REGS: u64 = 0x4090_ae82; const KVM_SET_SREGS: u64 = 0x4138_ae84; @@ -154,6 +155,7 @@ fn create_vmm_ioctl_seccomp_rule() -> Result, Error> { and![Cond::new(1, ArgLen::DWORD, Eq, KVM_SET_SREGS)?], and![Cond::new(1, ArgLen::DWORD, Eq, KVM_SET_TSS_ADDR,)?], and![Cond::new(1, ArgLen::DWORD, Eq, KVM_SET_USER_MEMORY_REGION,)?], + and![Cond::new(1, ArgLen::DWORD, Eq, KVM_SET_VCPU_EVENTS,)?], and![Cond::new(1, ArgLen::DWORD, Eq, KVM_SET_XSAVE,)?], and![Cond::new(1, ArgLen::DWORD, Eq, KVM_SET_XCRS,)?], and![Cond::new(1, ArgLen::DWORD, Eq, SIOCGIFFLAGS)?],