mirror of
https://github.com/cloud-hypervisor/cloud-hypervisor.git
synced 2024-12-22 05:35:20 +00:00
vmm: Only return from reset driven I/O once event received
The reset system is asynchronous with an I/O event (PIO or MMIO) for ACPI/i8042/CMOS triggering a write to the reset_evt event handler. The VMM thread will pick up this event on the VMM main loop and then trigger a shutdown in the CpuManager. However since there is some delay between the CPU threads being marked to be killed (through the CpuManager::cpus_kill_signalled bool) it is possible for the guest vCPU that triggered the exit to be re-entered when the vCPU KVM_RUN is called after the I/O exit is completed. This is undesirable and in particular the Linux kernel will attempt to jump to real mode after a CMOS based exit - this is unsupported in nested KVM on AMD on Azure and will trigger an error in KVM_RUN. Solve this problem by spinning in the device that has triggered the reset until the vcpus_kill_signalled boolean has been updated indicating that the VMM thread has received the event and called CpuManager::shutdown(). In particular if this bool is set then the vCPU threads will not re-enter the guest. Signed-off-by: Rob Bradford <rbradford@rivosinc.com>
This commit is contained in:
parent
70cfd1be67
commit
06dc708515
@ -5,7 +5,9 @@
|
||||
|
||||
use super::AcpiNotificationFlags;
|
||||
use acpi_tables::{aml, Aml, AmlSink};
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::{Arc, Barrier};
|
||||
use std::thread;
|
||||
use std::time::Instant;
|
||||
use vm_device::interrupt::InterruptSourceGroup;
|
||||
use vm_device::BusDevice;
|
||||
@ -18,14 +20,20 @@ pub const GED_DEVICE_ACPI_SIZE: usize = 0x1;
|
||||
pub struct AcpiShutdownDevice {
|
||||
exit_evt: EventFd,
|
||||
reset_evt: EventFd,
|
||||
vcpus_kill_signalled: Arc<AtomicBool>,
|
||||
}
|
||||
|
||||
impl AcpiShutdownDevice {
|
||||
/// Constructs a device that will signal the given event when the guest requests it.
|
||||
pub fn new(exit_evt: EventFd, reset_evt: EventFd) -> AcpiShutdownDevice {
|
||||
pub fn new(
|
||||
exit_evt: EventFd,
|
||||
reset_evt: EventFd,
|
||||
vcpus_kill_signalled: Arc<AtomicBool>,
|
||||
) -> AcpiShutdownDevice {
|
||||
AcpiShutdownDevice {
|
||||
exit_evt,
|
||||
reset_evt,
|
||||
vcpus_kill_signalled,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -43,6 +51,13 @@ impl BusDevice for AcpiShutdownDevice {
|
||||
if let Err(e) = self.reset_evt.write(1) {
|
||||
error!("Error triggering ACPI reset event: {}", e);
|
||||
}
|
||||
// Spin until we are sure the reset_evt has been handled and that when
|
||||
// we return from the KVM_RUN we will exit rather than re-enter the guest.
|
||||
while !self.vcpus_kill_signalled.load(Ordering::SeqCst) {
|
||||
// This is more effective than thread::yield_now() at
|
||||
// avoiding a priority inversion with the VMM thread
|
||||
thread::sleep(std::time::Duration::from_millis(1));
|
||||
}
|
||||
}
|
||||
// The ACPI DSDT table specifies the S5 sleep state (shutdown) as value 5
|
||||
const S5_SLEEP_VALUE: u8 = 5;
|
||||
@ -53,6 +68,13 @@ impl BusDevice for AcpiShutdownDevice {
|
||||
if let Err(e) = self.exit_evt.write(1) {
|
||||
error!("Error triggering ACPI shutdown event: {}", e);
|
||||
}
|
||||
// Spin until we are sure the reset_evt has been handled and that when
|
||||
// we return from the KVM_RUN we will exit rather than re-enter the guest.
|
||||
while !self.vcpus_kill_signalled.load(Ordering::SeqCst) {
|
||||
// This is more effective than thread::yield_now() at
|
||||
// avoiding a priority inversion with the VMM thread
|
||||
thread::sleep(std::time::Duration::from_millis(1));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
@ -5,7 +5,9 @@
|
||||
use libc::{clock_gettime, gmtime_r, timespec, tm, CLOCK_REALTIME};
|
||||
use std::cmp::min;
|
||||
use std::mem;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::{Arc, Barrier};
|
||||
use std::thread;
|
||||
use vm_device::BusDevice;
|
||||
use vmm_sys_util::eventfd::EventFd;
|
||||
|
||||
@ -23,13 +25,19 @@ pub struct Cmos {
|
||||
index: u8,
|
||||
data: [u8; DATA_LEN],
|
||||
reset_evt: EventFd,
|
||||
vcpus_kill_signalled: Arc<AtomicBool>,
|
||||
}
|
||||
|
||||
impl Cmos {
|
||||
/// Constructs a CMOS/RTC device with initial data.
|
||||
/// `mem_below_4g` is the size of memory in bytes below the 32-bit gap.
|
||||
/// `mem_above_4g` is the size of memory in bytes above the 32-bit gap.
|
||||
pub fn new(mem_below_4g: u64, mem_above_4g: u64, reset_evt: EventFd) -> Cmos {
|
||||
pub fn new(
|
||||
mem_below_4g: u64,
|
||||
mem_above_4g: u64,
|
||||
reset_evt: EventFd,
|
||||
vcpus_kill_signalled: Arc<AtomicBool>,
|
||||
) -> Cmos {
|
||||
let mut data = [0u8; DATA_LEN];
|
||||
|
||||
// Extended memory from 16 MB to 4 GB in units of 64 KB
|
||||
@ -50,6 +58,7 @@ impl Cmos {
|
||||
index: 0,
|
||||
data,
|
||||
reset_evt,
|
||||
vcpus_kill_signalled,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -67,6 +76,13 @@ impl BusDevice for Cmos {
|
||||
if self.index == 0x8f && data[0] == 0 {
|
||||
info!("CMOS reset");
|
||||
self.reset_evt.write(1).unwrap();
|
||||
// Spin until we are sure the reset_evt has been handled and that when
|
||||
// we return from the KVM_RUN we will exit rather than re-enter the guest.
|
||||
while !self.vcpus_kill_signalled.load(Ordering::SeqCst) {
|
||||
// This is more effective than thread::yield_now() at
|
||||
// avoiding a priority inversion with the VMM thread
|
||||
thread::sleep(std::time::Duration::from_millis(1));
|
||||
}
|
||||
} else {
|
||||
self.data[(self.index & INDEX_MASK) as usize] = data[0]
|
||||
}
|
||||
|
@ -2,19 +2,27 @@
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE-BSD-3-Clause file.
|
||||
|
||||
use std::sync::{Arc, Barrier};
|
||||
use std::sync::{
|
||||
atomic::{AtomicBool, Ordering},
|
||||
Arc, Barrier,
|
||||
};
|
||||
use std::thread;
|
||||
use vm_device::BusDevice;
|
||||
use vmm_sys_util::eventfd::EventFd;
|
||||
|
||||
/// A i8042 PS/2 controller that emulates just enough to shutdown the machine.
|
||||
pub struct I8042Device {
|
||||
reset_evt: EventFd,
|
||||
vcpus_kill_signalled: Arc<AtomicBool>,
|
||||
}
|
||||
|
||||
impl I8042Device {
|
||||
/// Constructs a i8042 device that will signal the given event when the guest requests it.
|
||||
pub fn new(reset_evt: EventFd) -> I8042Device {
|
||||
I8042Device { reset_evt }
|
||||
pub fn new(reset_evt: EventFd, vcpus_kill_signalled: Arc<AtomicBool>) -> I8042Device {
|
||||
I8042Device {
|
||||
reset_evt,
|
||||
vcpus_kill_signalled,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -38,6 +46,13 @@ impl BusDevice for I8042Device {
|
||||
if let Err(e) = self.reset_evt.write(1) {
|
||||
error!("Error triggering i8042 reset event: {}", e);
|
||||
}
|
||||
// Spin until we are sure the reset_evt has been handled and that when
|
||||
// we return from the KVM_RUN we will exit rather than re-enter the guest.
|
||||
while !self.vcpus_kill_signalled.load(Ordering::SeqCst) {
|
||||
// This is more effective than thread::yield_now() at
|
||||
// avoiding a priority inversion with the VMM thread
|
||||
thread::sleep(std::time::Duration::from_millis(1));
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
|
@ -6,6 +6,8 @@
|
||||
use devices::legacy::Cmos;
|
||||
use libc::EFD_NONBLOCK;
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::Arc;
|
||||
use vm_device::BusDevice;
|
||||
use vmm_sys_util::eventfd::EventFd;
|
||||
|
||||
@ -25,6 +27,7 @@ fuzz_target!(|bytes| {
|
||||
u64::from_le_bytes(below_4g),
|
||||
u64::from_le_bytes(above_4g),
|
||||
EventFd::new(EFD_NONBLOCK).unwrap(),
|
||||
Arc::new(AtomicBool::default()),
|
||||
);
|
||||
|
||||
let mut i = 16;
|
||||
|
@ -1725,6 +1725,10 @@ impl CpuManager {
|
||||
) {
|
||||
self.interrupt_controller = Some(interrupt_controller);
|
||||
}
|
||||
|
||||
pub(crate) fn vcpus_kill_signalled(&self) -> &Arc<AtomicBool> {
|
||||
&self.vcpus_kill_signalled
|
||||
}
|
||||
}
|
||||
|
||||
struct Cpu {
|
||||
|
@ -1485,8 +1485,16 @@ impl DeviceManager {
|
||||
reset_evt: EventFd,
|
||||
exit_evt: EventFd,
|
||||
) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> {
|
||||
let vcpus_kill_signalled = self
|
||||
.cpu_manager
|
||||
.lock()
|
||||
.unwrap()
|
||||
.vcpus_kill_signalled()
|
||||
.clone();
|
||||
let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new(
|
||||
exit_evt, reset_evt,
|
||||
exit_evt,
|
||||
reset_evt,
|
||||
vcpus_kill_signalled,
|
||||
)));
|
||||
|
||||
self.bus_devices
|
||||
@ -1585,9 +1593,16 @@ impl DeviceManager {
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> {
|
||||
let vcpus_kill_signalled = self
|
||||
.cpu_manager
|
||||
.lock()
|
||||
.unwrap()
|
||||
.vcpus_kill_signalled()
|
||||
.clone();
|
||||
// Add a shutdown device (i8042)
|
||||
let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(
|
||||
reset_evt.try_clone().unwrap(),
|
||||
vcpus_kill_signalled.clone(),
|
||||
)));
|
||||
|
||||
self.bus_devices
|
||||
@ -1615,6 +1630,7 @@ impl DeviceManager {
|
||||
mem_below_4g,
|
||||
mem_above_4g,
|
||||
reset_evt,
|
||||
vcpus_kill_signalled,
|
||||
)));
|
||||
|
||||
self.bus_devices
|
||||
|
Loading…
Reference in New Issue
Block a user