vmm: Activate virtio device from VMM thread

When a device is ready to be activated signal to the VMM thread via an
EventFd that there is a device to be activated. When the VMM receives a
notification on the EventFd that there is a device to be activated
notify the device manager to attempt to activate any devices that have
not been activated.

As a side effect the VMM thread will create the virtio device threads.

Fixes: #1863

Signed-off-by: Rob Bradford <robert.bradford@intel.com>
This commit is contained in:
Rob Bradford 2020-11-09 13:29:05 +00:00
parent dee42ebb29
commit 03db48306b
4 changed files with 114 additions and 11 deletions

View File

@ -31,7 +31,7 @@ use std::io::Write;
use std::num::Wrapping; use std::num::Wrapping;
use std::result; use std::result;
use std::sync::atomic::{AtomicU16, AtomicUsize, Ordering}; use std::sync::atomic::{AtomicU16, AtomicUsize, Ordering};
use std::sync::{Arc, Barrier, Mutex}; use std::sync::{atomic::AtomicBool, Arc, Barrier, Mutex};
use vm_allocator::SystemAllocator; use vm_allocator::SystemAllocator;
use vm_device::interrupt::{ use vm_device::interrupt::{
InterruptIndex, InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig, InterruptIndex, InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig,
@ -292,7 +292,7 @@ pub struct VirtioPciDevice {
// Virtio device reference and status // Virtio device reference and status
device: Arc<Mutex<dyn VirtioDevice>>, device: Arc<Mutex<dyn VirtioDevice>>,
device_activated: bool, device_activated: Arc<AtomicBool>,
// PCI interrupts. // PCI interrupts.
interrupt_status: Arc<AtomicUsize>, interrupt_status: Arc<AtomicUsize>,
@ -323,10 +323,17 @@ pub struct VirtioPciDevice {
// Details of bar regions to free // Details of bar regions to free
bar_regions: Vec<(GuestAddress, GuestUsize, PciBarRegionType)>, bar_regions: Vec<(GuestAddress, GuestUsize, PciBarRegionType)>,
// EventFd to signal on to request activation
activate_evt: EventFd,
// Barrier that is used to wait on for activation
activate_barrier: Arc<Barrier>,
} }
impl VirtioPciDevice { impl VirtioPciDevice {
/// Constructs a new PCI transport for the given virtio device. /// Constructs a new PCI transport for the given virtio device.
#[allow(clippy::too_many_arguments)]
pub fn new( pub fn new(
id: String, id: String,
memory: GuestMemoryAtomic<GuestMemoryMmap>, memory: GuestMemoryAtomic<GuestMemoryMmap>,
@ -335,6 +342,7 @@ impl VirtioPciDevice {
iommu_mapping_cb: Option<Arc<VirtioIommuRemapping>>, iommu_mapping_cb: Option<Arc<VirtioIommuRemapping>>,
interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
pci_device_bdf: u32, pci_device_bdf: u32,
activate_evt: EventFd,
) -> Result<Self> { ) -> Result<Self> {
let device_clone = device.clone(); let device_clone = device.clone();
let locked_device = device_clone.lock().unwrap(); let locked_device = device_clone.lock().unwrap();
@ -420,7 +428,7 @@ impl VirtioPciDevice {
msix_config, msix_config,
msix_num, msix_num,
device, device,
device_activated: false, device_activated: Arc::new(AtomicBool::new(false)),
interrupt_status: Arc::new(AtomicUsize::new(0)), interrupt_status: Arc::new(AtomicUsize::new(0)),
virtio_interrupt: None, virtio_interrupt: None,
queues, queues,
@ -432,6 +440,8 @@ impl VirtioPciDevice {
interrupt_source_group, interrupt_source_group,
cap_pci_cfg_info: VirtioPciCfgCapInfo::default(), cap_pci_cfg_info: VirtioPciCfgCapInfo::default(),
bar_regions: vec![], bar_regions: vec![],
activate_evt,
activate_barrier: Arc::new(Barrier::new(2)),
}; };
if let Some(msix_config) = &virtio_pci_device.msix_config { if let Some(msix_config) = &virtio_pci_device.msix_config {
@ -447,14 +457,15 @@ impl VirtioPciDevice {
fn state(&self) -> VirtioPciDeviceState { fn state(&self) -> VirtioPciDeviceState {
VirtioPciDeviceState { VirtioPciDeviceState {
device_activated: self.device_activated, device_activated: self.device_activated.load(Ordering::Acquire),
interrupt_status: self.interrupt_status.load(Ordering::Acquire), interrupt_status: self.interrupt_status.load(Ordering::Acquire),
queues: self.queues.clone(), queues: self.queues.clone(),
} }
} }
fn set_state(&mut self, state: &VirtioPciDeviceState) -> std::result::Result<(), Error> { fn set_state(&mut self, state: &VirtioPciDeviceState) -> std::result::Result<(), Error> {
self.device_activated = state.device_activated; self.device_activated
.store(state.device_activated, Ordering::Release);
self.interrupt_status self.interrupt_status
.store(state.interrupt_status, Ordering::Release); .store(state.interrupt_status, Ordering::Release);
@ -656,14 +667,19 @@ impl VirtioPciDevice {
self.queue_evts.split_off(0), self.queue_evts.split_off(0),
) )
.expect("Failed to activate device"); .expect("Failed to activate device");
self.device_activated = true; self.device_activated.store(true, Ordering::SeqCst);
info!("Waiting for barrier");
self.activate_barrier.wait();
info!("Barrier released");
} }
} }
} }
} }
fn needs_activation(&self) -> bool { fn needs_activation(&self) -> bool {
!self.device_activated && self.is_driver_ready() && self.are_queues_valid() !self.device_activated.load(Ordering::SeqCst)
&& self.is_driver_ready()
&& self.are_queues_valid()
} }
} }
@ -1001,17 +1017,20 @@ impl PciDevice for VirtioPciDevice {
}; };
// Try and activate the device if the driver status has changed // Try and activate the device if the driver status has changed
self.maybe_activate(); if self.needs_activation() {
self.activate_evt.write(1).ok();
return Some(self.activate_barrier.clone());
}
// Device has been reset by the driver // Device has been reset by the driver
if self.device_activated && self.is_driver_init() { if self.device_activated.load(Ordering::SeqCst) && self.is_driver_init() {
let mut device = self.device.lock().unwrap(); let mut device = self.device.lock().unwrap();
if let Some((virtio_interrupt, mut queue_evts)) = device.reset() { if let Some((virtio_interrupt, mut queue_evts)) = device.reset() {
// Upon reset the device returns its interrupt EventFD and it's queue EventFDs // Upon reset the device returns its interrupt EventFD and it's queue EventFDs
self.virtio_interrupt = Some(virtio_interrupt); self.virtio_interrupt = Some(virtio_interrupt);
self.queue_evts.append(&mut queue_evts); self.queue_evts.append(&mut queue_evts);
self.device_activated = false; self.device_activated.store(false, Ordering::SeqCst);
// Reset queue readiness (changes queue_enable), queue sizes // Reset queue readiness (changes queue_enable), queue sizes
// and selected_queue as per spec for reset // and selected_queue as per spec for reset
@ -1128,7 +1147,10 @@ impl Snapshottable for VirtioPciDevice {
// Then we can activate the device, as we know at this point that // Then we can activate the device, as we know at this point that
// the virtqueues are in the right state and the device is ready // the virtqueues are in the right state and the device is ready
// to be activated, which will spawn each virtio worker thread. // to be activated, which will spawn each virtio worker thread.
if self.device_activated && self.is_driver_ready() && self.are_queues_valid() { if self.device_activated.load(Ordering::SeqCst)
&& self.is_driver_ready()
&& self.are_queues_valid()
{
if let Some(virtio_interrupt) = self.virtio_interrupt.take() { if let Some(virtio_interrupt) = self.virtio_interrupt.take() {
if self.memory.is_some() { if self.memory.is_some() {
let mem = self.memory.as_ref().unwrap().clone(); let mem = self.memory.as_ref().unwrap().clone();

View File

@ -776,6 +776,10 @@ pub struct DeviceManager {
// Possible handle to the virtio-balloon device // Possible handle to the virtio-balloon device
balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>,
// Virtio Device activation EventFd to allow the VMM thread to trigger device
// activation and thus start the threads from the VMM thread
activate_evt: EventFd,
} }
impl DeviceManager { impl DeviceManager {
@ -788,6 +792,7 @@ impl DeviceManager {
reset_evt: &EventFd, reset_evt: &EventFd,
seccomp_action: SeccompAction, seccomp_action: SeccompAction,
#[cfg(feature = "acpi")] numa_nodes: NumaNodes, #[cfg(feature = "acpi")] numa_nodes: NumaNodes,
activate_evt: &EventFd,
) -> DeviceManagerResult<Arc<Mutex<Self>>> { ) -> DeviceManagerResult<Arc<Mutex<Self>>> {
let device_tree = Arc::new(Mutex::new(DeviceTree::new())); let device_tree = Arc::new(Mutex::new(DeviceTree::new()));
@ -844,6 +849,9 @@ impl DeviceManager {
#[cfg(feature = "acpi")] #[cfg(feature = "acpi")]
numa_nodes, numa_nodes,
balloon: None, balloon: None,
activate_evt: activate_evt
.try_clone()
.map_err(DeviceManagerError::EventFd)?,
}; };
#[cfg(feature = "acpi")] #[cfg(feature = "acpi")]
@ -2739,6 +2747,9 @@ impl DeviceManager {
iommu_mapping_cb, iommu_mapping_cb,
interrupt_manager, interrupt_manager,
pci_device_bdf, pci_device_bdf,
self.activate_evt
.try_clone()
.map_err(DeviceManagerError::EventFd)?,
) )
.map_err(DeviceManagerError::VirtioDevice)?; .map_err(DeviceManagerError::VirtioDevice)?;
@ -2834,6 +2845,18 @@ impl DeviceManager {
Ok(()) Ok(())
} }
pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> {
// Find virtio pci devices and activate any pending ones
for (_, any_device) in self.pci_devices.iter() {
if let Ok(virtio_pci_device) =
Arc::clone(any_device).downcast::<Mutex<VirtioPciDevice>>()
{
virtio_pci_device.lock().unwrap().maybe_activate();
}
}
Ok(())
}
pub fn notify_hotplug( pub fn notify_hotplug(
&self, &self,
_notification_type: HotPlugNotificationFlags, _notification_type: HotPlugNotificationFlags,

View File

@ -131,6 +131,10 @@ pub enum Error {
/// Cannot apply seccomp filter /// Cannot apply seccomp filter
#[error("Error applying seccomp filter: {0}")] #[error("Error applying seccomp filter: {0}")]
ApplySeccompFilter(seccomp::Error), ApplySeccompFilter(seccomp::Error),
/// Error activating virtio devices
#[error("Error activating virtio devices: {0:?}")]
ActivateVirtioDevices(VmError),
} }
pub type Result<T> = result::Result<T, Error>; pub type Result<T> = result::Result<T, Error>;
@ -140,6 +144,7 @@ pub enum EpollDispatch {
Reset, Reset,
Stdin, Stdin,
Api, Api,
ActivateVirtioDevices,
} }
pub struct EpollContext { pub struct EpollContext {
@ -281,6 +286,7 @@ pub struct Vmm {
vm_config: Option<Arc<Mutex<VmConfig>>>, vm_config: Option<Arc<Mutex<VmConfig>>>,
seccomp_action: SeccompAction, seccomp_action: SeccompAction,
hypervisor: Arc<dyn hypervisor::Hypervisor>, hypervisor: Arc<dyn hypervisor::Hypervisor>,
activate_evt: EventFd,
} }
impl Vmm { impl Vmm {
@ -293,6 +299,7 @@ impl Vmm {
let mut epoll = EpollContext::new().map_err(Error::Epoll)?; let mut epoll = EpollContext::new().map_err(Error::Epoll)?;
let exit_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; let exit_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?;
let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?;
let activate_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?;
if unsafe { libc::isatty(libc::STDIN_FILENO as i32) } != 0 { if unsafe { libc::isatty(libc::STDIN_FILENO as i32) } != 0 {
epoll.add_stdin().map_err(Error::Epoll)?; epoll.add_stdin().map_err(Error::Epoll)?;
@ -306,6 +313,10 @@ impl Vmm {
.add_event(&reset_evt, EpollDispatch::Reset) .add_event(&reset_evt, EpollDispatch::Reset)
.map_err(Error::Epoll)?; .map_err(Error::Epoll)?;
epoll
.add_event(&activate_evt, EpollDispatch::ActivateVirtioDevices)
.map_err(Error::Epoll)?;
epoll epoll
.add_event(&api_evt, EpollDispatch::Api) .add_event(&api_evt, EpollDispatch::Api)
.map_err(Error::Epoll)?; .map_err(Error::Epoll)?;
@ -320,6 +331,7 @@ impl Vmm {
vm_config: None, vm_config: None,
seccomp_action, seccomp_action,
hypervisor, hypervisor,
activate_evt,
}) })
} }
@ -328,6 +340,10 @@ impl Vmm {
if self.vm.is_none() { if self.vm.is_none() {
let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
let activate_evt = self
.activate_evt
.try_clone()
.map_err(VmError::EventFdClone)?;
if let Some(ref vm_config) = self.vm_config { if let Some(ref vm_config) = self.vm_config {
let vm = Vm::new( let vm = Vm::new(
@ -336,6 +352,7 @@ impl Vmm {
reset_evt, reset_evt,
&self.seccomp_action, &self.seccomp_action,
self.hypervisor.clone(), self.hypervisor.clone(),
activate_evt,
)?; )?;
self.vm = Some(vm); self.vm = Some(vm);
} }
@ -397,6 +414,10 @@ impl Vmm {
let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
let activate_evt = self
.activate_evt
.try_clone()
.map_err(VmError::EventFdClone)?;
let vm = Vm::new_from_snapshot( let vm = Vm::new_from_snapshot(
&snapshot, &snapshot,
@ -406,6 +427,7 @@ impl Vmm {
restore_cfg.prefault, restore_cfg.prefault,
&self.seccomp_action, &self.seccomp_action,
self.hypervisor.clone(), self.hypervisor.clone(),
activate_evt,
)?; )?;
self.vm = Some(vm); self.vm = Some(vm);
@ -443,6 +465,10 @@ impl Vmm {
let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
let activate_evt = self
.activate_evt
.try_clone()
.map_err(VmError::EventFdClone)?;
// The Linux kernel fires off an i8042 reset after doing the ACPI reset so there may be // The Linux kernel fires off an i8042 reset after doing the ACPI reset so there may be
// an event sitting in the shared reset_evt. Without doing this we get very early reboots // an event sitting in the shared reset_evt. Without doing this we get very early reboots
@ -456,6 +482,7 @@ impl Vmm {
reset_evt, reset_evt,
&self.seccomp_action, &self.seccomp_action,
self.hypervisor.clone(), self.hypervisor.clone(),
activate_evt,
)?); )?);
} }
@ -676,6 +703,10 @@ impl Vmm {
let reset_evt = self.reset_evt.try_clone().map_err(|e| { let reset_evt = self.reset_evt.try_clone().map_err(|e| {
MigratableError::MigrateReceive(anyhow!("Error cloning reset EventFd: {}", e)) MigratableError::MigrateReceive(anyhow!("Error cloning reset EventFd: {}", e))
})?; })?;
let activate_evt = self.activate_evt.try_clone().map_err(|e| {
MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e))
})?;
self.vm_config = Some(Arc::new(Mutex::new(config))); self.vm_config = Some(Arc::new(Mutex::new(config)));
let vm = Vm::new_from_migration( let vm = Vm::new_from_migration(
self.vm_config.clone().unwrap(), self.vm_config.clone().unwrap(),
@ -683,6 +714,7 @@ impl Vmm {
reset_evt, reset_evt,
&self.seccomp_action, &self.seccomp_action,
self.hypervisor.clone(), self.hypervisor.clone(),
activate_evt,
) )
.map_err(|e| { .map_err(|e| {
MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e)) MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e))
@ -1065,6 +1097,13 @@ impl Vmm {
vm.handle_stdin().map_err(Error::Stdin)?; vm.handle_stdin().map_err(Error::Stdin)?;
} }
} }
EpollDispatch::ActivateVirtioDevices => {
if let Some(ref vm) = self.vm {
vm.activate_virtio_devices()
.map_err(Error::ActivateVirtioDevices)?;
}
self.activate_evt.read().map_err(Error::EventFdRead)?;
}
EpollDispatch::Api => { EpollDispatch::Api => {
// Consume the event. // Consume the event.
self.api_evt.read().map_err(Error::EventFdRead)?; self.api_evt.read().map_err(Error::EventFdRead)?;

View File

@ -227,6 +227,9 @@ pub enum Error {
/// Failed setting the VmmOps interface. /// Failed setting the VmmOps interface.
SetVmmOpsInterface(hypervisor::HypervisorVmError), SetVmmOpsInterface(hypervisor::HypervisorVmError),
/// Cannot activate virtio devices
ActivateVirtioDevices(device_manager::DeviceManagerError),
} }
pub type Result<T> = result::Result<T, Error>; pub type Result<T> = result::Result<T, Error>;
@ -488,6 +491,7 @@ impl Vm {
seccomp_action: &SeccompAction, seccomp_action: &SeccompAction,
hypervisor: Arc<dyn hypervisor::Hypervisor>, hypervisor: Arc<dyn hypervisor::Hypervisor>,
#[cfg(feature = "kvm")] _saved_clock: Option<hypervisor::ClockData>, #[cfg(feature = "kvm")] _saved_clock: Option<hypervisor::ClockData>,
activate_evt: EventFd,
) -> Result<Self> { ) -> Result<Self> {
config config
.lock() .lock()
@ -509,6 +513,7 @@ impl Vm {
seccomp_action.clone(), seccomp_action.clone(),
#[cfg(feature = "acpi")] #[cfg(feature = "acpi")]
numa_nodes.clone(), numa_nodes.clone(),
&activate_evt,
) )
.map_err(Error::DeviceManager)?; .map_err(Error::DeviceManager)?;
@ -646,6 +651,7 @@ impl Vm {
reset_evt: EventFd, reset_evt: EventFd,
seccomp_action: &SeccompAction, seccomp_action: &SeccompAction,
hypervisor: Arc<dyn hypervisor::Hypervisor>, hypervisor: Arc<dyn hypervisor::Hypervisor>,
activate_evt: EventFd,
) -> Result<Self> { ) -> Result<Self> {
#[cfg(all(feature = "kvm", target_arch = "x86_64"))] #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
hypervisor.check_required_extensions().unwrap(); hypervisor.check_required_extensions().unwrap();
@ -682,6 +688,7 @@ impl Vm {
hypervisor, hypervisor,
#[cfg(feature = "kvm")] #[cfg(feature = "kvm")]
None, None,
activate_evt,
)?; )?;
// The device manager must create the devices from here as it is part // The device manager must create the devices from here as it is part
@ -704,6 +711,7 @@ impl Vm {
prefault: bool, prefault: bool,
seccomp_action: &SeccompAction, seccomp_action: &SeccompAction,
hypervisor: Arc<dyn hypervisor::Hypervisor>, hypervisor: Arc<dyn hypervisor::Hypervisor>,
activate_evt: EventFd,
) -> Result<Self> { ) -> Result<Self> {
#[cfg(all(feature = "kvm", target_arch = "x86_64"))] #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
hypervisor.check_required_extensions().unwrap(); hypervisor.check_required_extensions().unwrap();
@ -746,6 +754,7 @@ impl Vm {
hypervisor, hypervisor,
#[cfg(feature = "kvm")] #[cfg(feature = "kvm")]
None, None,
activate_evt,
) )
} }
@ -755,6 +764,7 @@ impl Vm {
reset_evt: EventFd, reset_evt: EventFd,
seccomp_action: &SeccompAction, seccomp_action: &SeccompAction,
hypervisor: Arc<dyn hypervisor::Hypervisor>, hypervisor: Arc<dyn hypervisor::Hypervisor>,
activate_evt: EventFd,
) -> Result<Self> { ) -> Result<Self> {
#[cfg(all(feature = "kvm", target_arch = "x86_64"))] #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
hypervisor.check_required_extensions().unwrap(); hypervisor.check_required_extensions().unwrap();
@ -781,6 +791,7 @@ impl Vm {
hypervisor, hypervisor,
#[cfg(feature = "kvm")] #[cfg(feature = "kvm")]
None, None,
activate_evt,
) )
} }
@ -1739,6 +1750,14 @@ impl Vm {
pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
self.device_manager.lock().unwrap().device_tree() self.device_manager.lock().unwrap().device_tree()
} }
pub fn activate_virtio_devices(&self) -> Result<()> {
self.device_manager
.lock()
.unwrap()
.activate_virtio_devices()
.map_err(Error::ActivateVirtioDevices)
}
} }
impl Pausable for Vm { impl Pausable for Vm {