mirror of
https://github.com/cloud-hypervisor/cloud-hypervisor.git
synced 2024-12-22 13:45:20 +00:00
vmm: Create a single VFIO container
For most use cases, there is no need to create multiple VFIO containers as it causes unwanted behaviors. Especially when passing multiple devices from the same IOMMU group, we need to use the same container so that it can properly list the groups that have been already opened. The correct logic was already there in vfio-ioctls, but it was incorrectly used from our VMM implementation. For the special case where we put a VFIO device behind a vIOMMU, we must create one container per device, as we need to control the DMA mappings per device, which is performed at the container level. Because we must keep one container per device, the vIOMMU use case prevents multiple devices attached to the same IOMMU group to be passed through the VM. But this is a limitation that we are fine with, especially since the vIOMMU doesn't let us group multiple devices in the same group from a guest perspective. Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
This commit is contained in:
parent
a5d77cdee9
commit
a6040d7a30
@ -25,7 +25,6 @@ use crate::{VirtioInterrupt, VirtioInterruptType};
|
||||
use anyhow::anyhow;
|
||||
use libc::EFD_NONBLOCK;
|
||||
use seccompiler::SeccompAction;
|
||||
use std::collections::BTreeMap;
|
||||
use std::io;
|
||||
use std::mem::size_of;
|
||||
use std::os::unix::io::{AsRawFd, RawFd};
|
||||
@ -428,7 +427,7 @@ struct MemEpollHandler {
|
||||
kill_evt: EventFd,
|
||||
pause_evt: EventFd,
|
||||
hugepages: bool,
|
||||
dma_mapping_handlers: Arc<Mutex<BTreeMap<u32, Arc<dyn ExternalDmaMapping>>>>,
|
||||
dma_mapping_handler: Option<Arc<dyn ExternalDmaMapping>>,
|
||||
}
|
||||
|
||||
impl MemEpollHandler {
|
||||
@ -505,11 +504,10 @@ impl MemEpollHandler {
|
||||
.unwrap()
|
||||
.set_range(first_block_index, nb_blocks, plug);
|
||||
|
||||
let handlers = self.dma_mapping_handlers.lock().unwrap();
|
||||
if plug {
|
||||
let mut gpa = addr;
|
||||
for _ in 0..nb_blocks {
|
||||
for (_, handler) in handlers.iter() {
|
||||
if let Some(handler) = &self.dma_mapping_handler {
|
||||
if let Err(e) = handler.map(gpa, gpa, config.block_size) {
|
||||
error!(
|
||||
"failed DMA mapping addr 0x{:x} size 0x{:x}: {}",
|
||||
@ -524,7 +522,7 @@ impl MemEpollHandler {
|
||||
|
||||
config.plugged_size += size;
|
||||
} else {
|
||||
for (_, handler) in handlers.iter() {
|
||||
if let Some(handler) = &self.dma_mapping_handler {
|
||||
if let Err(e) = handler.unmap(addr, size) {
|
||||
error!(
|
||||
"failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
|
||||
@ -549,11 +547,10 @@ impl MemEpollHandler {
|
||||
|
||||
// Remaining plugged blocks are unmapped.
|
||||
if config.plugged_size > 0 {
|
||||
let handlers = self.dma_mapping_handlers.lock().unwrap();
|
||||
for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
|
||||
if *plugged {
|
||||
let gpa = config.addr + (idx as u64 * config.block_size);
|
||||
for (_, handler) in handlers.iter() {
|
||||
if let Some(handler) = &self.dma_mapping_handler {
|
||||
if let Err(e) = handler.unmap(gpa, config.block_size) {
|
||||
error!(
|
||||
"failed DMA unmapping addr 0x{:x} size 0x{:x}: {}",
|
||||
@ -744,7 +741,7 @@ pub struct Mem {
|
||||
config: Arc<Mutex<VirtioMemConfig>>,
|
||||
seccomp_action: SeccompAction,
|
||||
hugepages: bool,
|
||||
dma_mapping_handlers: Arc<Mutex<BTreeMap<u32, Arc<dyn ExternalDmaMapping>>>>,
|
||||
dma_mapping_handler: Option<Arc<dyn ExternalDmaMapping>>,
|
||||
blocks_state: Arc<Mutex<BlocksState>>,
|
||||
exit_evt: EventFd,
|
||||
}
|
||||
@ -832,7 +829,7 @@ impl Mem {
|
||||
config: Arc::new(Mutex::new(config)),
|
||||
seccomp_action,
|
||||
hugepages,
|
||||
dma_mapping_handlers: Arc::new(Mutex::new(BTreeMap::new())),
|
||||
dma_mapping_handler: None,
|
||||
blocks_state: Arc::new(Mutex::new(BlocksState(vec![
|
||||
false;
|
||||
(config.region_size / config.block_size)
|
||||
@ -844,7 +841,6 @@ impl Mem {
|
||||
|
||||
pub fn add_dma_mapping_handler(
|
||||
&mut self,
|
||||
device_id: u32,
|
||||
handler: Arc<dyn ExternalDmaMapping>,
|
||||
) -> result::Result<(), Error> {
|
||||
let config = self.config.lock().unwrap();
|
||||
@ -860,34 +856,7 @@ impl Mem {
|
||||
}
|
||||
}
|
||||
|
||||
self.dma_mapping_handlers
|
||||
.lock()
|
||||
.unwrap()
|
||||
.insert(device_id, handler);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn remove_dma_mapping_handler(&mut self, device_id: u32) -> result::Result<(), Error> {
|
||||
let handler = self
|
||||
.dma_mapping_handlers
|
||||
.lock()
|
||||
.unwrap()
|
||||
.remove(&device_id)
|
||||
.ok_or(Error::InvalidDmaMappingHandler)?;
|
||||
|
||||
let config = self.config.lock().unwrap();
|
||||
|
||||
if config.plugged_size > 0 {
|
||||
for (idx, plugged) in self.blocks_state.lock().unwrap().inner().iter().enumerate() {
|
||||
if *plugged {
|
||||
let gpa = config.addr + (idx as u64 * config.block_size);
|
||||
handler
|
||||
.unmap(gpa, config.block_size)
|
||||
.map_err(Error::DmaUnmap)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
self.dma_mapping_handler = Some(handler);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@ -946,7 +915,7 @@ impl VirtioDevice for Mem {
|
||||
kill_evt,
|
||||
pause_evt,
|
||||
hugepages: self.hugepages,
|
||||
dma_mapping_handlers: Arc::clone(&self.dma_mapping_handlers),
|
||||
dma_mapping_handler: self.dma_mapping_handler.clone(),
|
||||
};
|
||||
|
||||
handler
|
||||
|
@ -265,7 +265,7 @@ pub enum DeviceManagerError {
|
||||
VfioMapRegion(pci::VfioPciError),
|
||||
|
||||
/// Failed to DMA map VFIO device.
|
||||
VfioDmaMap(pci::VfioPciError),
|
||||
VfioDmaMap(vfio_ioctls::VfioError),
|
||||
|
||||
/// Failed to DMA unmap VFIO device.
|
||||
VfioDmaUnmap(pci::VfioPciError),
|
||||
@ -370,7 +370,7 @@ pub enum DeviceManagerError {
|
||||
VirtioMemRangeAllocation,
|
||||
|
||||
/// Failed updating guest memory for VFIO PCI device.
|
||||
UpdateMemoryForVfioPciDevice(pci::VfioPciError),
|
||||
UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError),
|
||||
|
||||
/// Trying to use a directory for pmem but no size specified
|
||||
PmemWithDirectorySizeMissing,
|
||||
@ -402,6 +402,9 @@ pub enum DeviceManagerError {
|
||||
/// Missing virtio-balloon, can't proceed as expected.
|
||||
MissingVirtioBalloon,
|
||||
|
||||
/// Missing virtual IOMMU device
|
||||
MissingVirtualIommu,
|
||||
|
||||
/// Failed to do power button notification
|
||||
PowerButtonNotification(io::Error),
|
||||
|
||||
@ -841,6 +844,11 @@ pub struct DeviceManager {
|
||||
// Passthrough device handle
|
||||
passthrough_device: Option<Arc<dyn hypervisor::Device>>,
|
||||
|
||||
// VFIO container
|
||||
// Only one container can be created, therefore it is stored as part of the
|
||||
// DeviceManager to be reused.
|
||||
vfio_container: Option<Arc<VfioContainer>>,
|
||||
|
||||
// Paravirtualized IOMMU
|
||||
iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>,
|
||||
|
||||
@ -961,6 +969,7 @@ impl DeviceManager {
|
||||
msi_interrupt_manager,
|
||||
legacy_interrupt_manager: None,
|
||||
passthrough_device: None,
|
||||
vfio_container: None,
|
||||
iommu_device: None,
|
||||
iommu_attached_devices: None,
|
||||
pci_devices_up: 0,
|
||||
@ -2790,28 +2799,12 @@ impl DeviceManager {
|
||||
self.add_vfio_device(pci, device_cfg)
|
||||
}
|
||||
|
||||
fn add_vfio_device(
|
||||
&mut self,
|
||||
pci: &mut PciBus,
|
||||
device_cfg: &mut DeviceConfig,
|
||||
) -> DeviceManagerResult<(u32, String)> {
|
||||
fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> {
|
||||
let passthrough_device = self
|
||||
.passthrough_device
|
||||
.as_ref()
|
||||
.ok_or(DeviceManagerError::NoDevicePassthroughSupport)?;
|
||||
|
||||
// We need to shift the device id since the 3 first bits
|
||||
// are dedicated to the PCI function, and we know we don't
|
||||
// do multifunction. Also, because we only support one PCI
|
||||
// bus, the bus 0, we don't need to add anything to the
|
||||
// global device ID.
|
||||
let pci_device_bdf = pci
|
||||
.next_device_id()
|
||||
.map_err(DeviceManagerError::NextPciDeviceId)?
|
||||
<< 3;
|
||||
|
||||
let memory = self.memory_manager.lock().unwrap().guest_memory();
|
||||
|
||||
// Safe because we know the RawFd is valid.
|
||||
//
|
||||
// This dup() is mandatory to be able to give full ownership of the
|
||||
@ -2830,31 +2823,94 @@ impl DeviceManager {
|
||||
// 1. When running on KVM or MSHV, passthrough_device wraps around DeviceFd.
|
||||
// 2. The conversion here extracts the raw fd and then turns the raw fd into a DeviceFd
|
||||
// of the same (correct) type.
|
||||
let vfio_container = Arc::new(
|
||||
Ok(Arc::new(
|
||||
VfioContainer::new(Arc::new(unsafe { DeviceFd::from_raw_fd(dup_device_fd) }))
|
||||
.map_err(DeviceManagerError::VfioCreate)?,
|
||||
);
|
||||
))
|
||||
}
|
||||
|
||||
let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container))
|
||||
.map_err(DeviceManagerError::VfioCreate)?;
|
||||
fn add_vfio_device(
|
||||
&mut self,
|
||||
pci: &mut PciBus,
|
||||
device_cfg: &mut DeviceConfig,
|
||||
) -> DeviceManagerResult<(u32, String)> {
|
||||
// We need to shift the device id since the 3 first bits
|
||||
// are dedicated to the PCI function, and we know we don't
|
||||
// do multifunction. Also, because we only support one PCI
|
||||
// bus, the bus 0, we don't need to add anything to the
|
||||
// global device ID.
|
||||
let pci_device_bdf = pci
|
||||
.next_device_id()
|
||||
.map_err(DeviceManagerError::NextPciDeviceId)?
|
||||
<< 3;
|
||||
|
||||
let mut needs_dma_mapping = false;
|
||||
|
||||
// Here we create a new VFIO container for two reasons. Either this is
|
||||
// the first VFIO device, meaning we need a new VFIO container, which
|
||||
// will be shared with other VFIO devices. Or the new VFIO device is
|
||||
// attached to a vIOMMU, meaning we must create a dedicated VFIO
|
||||
// container. In the vIOMMU use case, we can't let all devices under
|
||||
// the same VFIO container since we couldn't map/unmap memory for each
|
||||
// device. That's simply because the map/unmap operations happen at the
|
||||
// VFIO container level.
|
||||
let vfio_container = if device_cfg.iommu {
|
||||
let vfio_container = self.create_vfio_container()?;
|
||||
|
||||
let vfio_mapping = Arc::new(VfioDmaMapping::new(
|
||||
Arc::clone(&vfio_container),
|
||||
Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
|
||||
));
|
||||
|
||||
let vfio_mapping = Arc::new(VfioDmaMapping::new(
|
||||
Arc::clone(&vfio_container),
|
||||
Arc::new(memory),
|
||||
));
|
||||
if device_cfg.iommu {
|
||||
if let Some(iommu) = &self.iommu_device {
|
||||
iommu
|
||||
.lock()
|
||||
.unwrap()
|
||||
.add_external_mapping(pci_device_bdf, vfio_mapping);
|
||||
} else {
|
||||
return Err(DeviceManagerError::MissingVirtualIommu);
|
||||
}
|
||||
|
||||
vfio_container
|
||||
} else if let Some(vfio_container) = &self.vfio_container {
|
||||
Arc::clone(vfio_container)
|
||||
} else {
|
||||
let vfio_container = self.create_vfio_container()?;
|
||||
needs_dma_mapping = true;
|
||||
self.vfio_container = Some(Arc::clone(&vfio_container));
|
||||
|
||||
vfio_container
|
||||
};
|
||||
|
||||
let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container))
|
||||
.map_err(DeviceManagerError::VfioCreate)?;
|
||||
|
||||
if needs_dma_mapping {
|
||||
// Register DMA mapping in IOMMU.
|
||||
// Do not register virtio-mem regions, as they are handled directly by
|
||||
// virtio-mem device itself.
|
||||
for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
|
||||
for region in zone.regions() {
|
||||
vfio_container
|
||||
.vfio_dma_map(
|
||||
region.start_addr().raw_value(),
|
||||
region.len() as u64,
|
||||
region.as_ptr() as u64,
|
||||
)
|
||||
.map_err(DeviceManagerError::VfioDmaMap)?;
|
||||
}
|
||||
}
|
||||
|
||||
let vfio_mapping = Arc::new(VfioDmaMapping::new(
|
||||
Arc::clone(&vfio_container),
|
||||
Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
|
||||
));
|
||||
|
||||
for virtio_mem_device in self.virtio_mem_devices.iter() {
|
||||
virtio_mem_device
|
||||
.lock()
|
||||
.unwrap()
|
||||
.add_dma_mapping_handler(pci_device_bdf, vfio_mapping.clone())
|
||||
.add_dma_mapping_handler(vfio_mapping.clone())
|
||||
.map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
|
||||
}
|
||||
}
|
||||
@ -2910,21 +2966,6 @@ impl DeviceManager {
|
||||
});
|
||||
}
|
||||
|
||||
// Register DMA mapping in IOMMU.
|
||||
// Do not register virtio-mem regions, as they are handled directly by
|
||||
// virtio-mem device itself.
|
||||
for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
|
||||
for region in zone.regions() {
|
||||
vfio_pci_device
|
||||
.dma_map(
|
||||
region.start_addr().raw_value(),
|
||||
region.len() as u64,
|
||||
region.as_ptr() as u64,
|
||||
)
|
||||
.map_err(DeviceManagerError::VfioDmaMap)?;
|
||||
}
|
||||
}
|
||||
|
||||
let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device));
|
||||
|
||||
self.add_pci_device(
|
||||
@ -3266,8 +3307,19 @@ impl DeviceManager {
|
||||
.map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?;
|
||||
}
|
||||
|
||||
#[allow(clippy::single_match)]
|
||||
// Take care of updating the memory for VFIO PCI devices.
|
||||
if let Some(vfio_container) = &self.vfio_container {
|
||||
vfio_container
|
||||
.vfio_dma_map(
|
||||
new_region.start_addr().raw_value(),
|
||||
new_region.len() as u64,
|
||||
new_region.as_ptr() as u64,
|
||||
)
|
||||
.map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?;
|
||||
}
|
||||
|
||||
#[allow(clippy::single_match)]
|
||||
// Take care of updating the memory for vfio-user devices.
|
||||
{
|
||||
let device_tree = self.device_tree.lock().unwrap();
|
||||
for pci_device_node in device_tree.pci_devices() {
|
||||
@ -3276,18 +3328,6 @@ impl DeviceManager {
|
||||
.as_ref()
|
||||
.ok_or(DeviceManagerError::MissingPciDevice)?
|
||||
{
|
||||
#[cfg(feature = "kvm")]
|
||||
PciDeviceHandle::Vfio(vfio_pci_device) => {
|
||||
vfio_pci_device
|
||||
.lock()
|
||||
.unwrap()
|
||||
.dma_map(
|
||||
new_region.start_addr().raw_value(),
|
||||
new_region.len() as u64,
|
||||
new_region.as_ptr() as u64,
|
||||
)
|
||||
.map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?;
|
||||
}
|
||||
PciDeviceHandle::VfioUser(vfio_user_pci_device) => {
|
||||
vfio_user_pci_device
|
||||
.lock()
|
||||
@ -3466,38 +3506,11 @@ impl DeviceManager {
|
||||
.pci_device_handle
|
||||
.ok_or(DeviceManagerError::MissingPciDevice)?;
|
||||
let (pci_device, bus_device, virtio_device) = match pci_device_handle {
|
||||
PciDeviceHandle::Vfio(vfio_pci_device) => {
|
||||
{
|
||||
// Unregister DMA mapping in IOMMU.
|
||||
// Do not unregister the virtio-mem region, as it is
|
||||
// directly handled by the virtio-mem device.
|
||||
let dev = vfio_pci_device.lock().unwrap();
|
||||
for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
|
||||
for region in zone.regions() {
|
||||
dev.dma_unmap(region.start_addr().raw_value(), region.len() as u64)
|
||||
.map_err(DeviceManagerError::VfioDmaUnmap)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Unregister the VFIO mapping handler from all virtio-mem
|
||||
// devices.
|
||||
if !dev.iommu_attached() {
|
||||
for virtio_mem_device in self.virtio_mem_devices.iter() {
|
||||
virtio_mem_device
|
||||
.lock()
|
||||
.unwrap()
|
||||
.remove_dma_mapping_handler(pci_device_bdf)
|
||||
.map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(
|
||||
Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
|
||||
Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>,
|
||||
None as Option<VirtioDeviceArc>,
|
||||
)
|
||||
}
|
||||
PciDeviceHandle::Vfio(vfio_pci_device) => (
|
||||
Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
|
||||
Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>,
|
||||
None as Option<VirtioDeviceArc>,
|
||||
),
|
||||
PciDeviceHandle::Virtio(virtio_pci_device) => {
|
||||
let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
|
||||
for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
|
||||
|
Loading…
Reference in New Issue
Block a user