Compare commits

..

4 Commits

Author SHA1 Message Date
Andrew Carp
3fa02b34ca virtio-devices: Attach and detach endpoints from domain properly
Properly detach a device from a domain if that device is already
attached to another domain on an attach request (following section
5.13.6.3.2 of the virtio-iommu spec). Resolves nested virtualization
reboot.

Signed-off-by: Andrew Carp <acarp@crusoeenergy.com>
2024-04-01 09:19:04 +00:00
Andrew Carp
5668f02eb6 virtio-devices: Map previously attached endpoints
Ensures that any endpoints already attached to the domain are properly
mapped to a new endpoint on said endpoint's attach request. This is done
by search for all previous mappings in the domain and then issuing map
requests for the newly attached endpoint.

Signed-off-by: Andrew Carp <acarp@crusoeenergy.com>
2024-04-01 09:19:04 +00:00
Andrew Carp
045964deee virtio-devices: Map mmio over virtio-iommu
Add infrastructure to lookup the host address for mmio regions on
external dma mapping requests. This specifically resolves vfio
passthrough for virtio-iommu, allowing for nested virtualization to pass
external devices through.

Fixes #6110

Signed-off-by: Andrew Carp <acarp@crusoeenergy.com>
2024-04-01 09:16:30 +00:00
Andrew Carp
a5e2460d95 virtio-devices: Move VfioDmaMapping to be in the pci crate
VfioUserDmaMapping is already in the pci crate, this moves
VfioDmaMapping to match the behavior. This is a necessary change to
allow the VfioDmaMapping trait to have access to MmioRegion memory
without creating a circular dependency. The VfioDmaMapping trait
needs to have access to mmio regions to map external devices over
mmio (a follow-up commit).

Signed-off-by: Andrew Carp <acarp@crusoeenergy.com>
2024-04-01 09:16:30 +00:00
6 changed files with 209 additions and 100 deletions

View File

@ -26,7 +26,7 @@ pub use self::device::{
}; };
pub use self::msi::{msi_num_enabled_vectors, MsiCap, MsiConfig}; pub use self::msi::{msi_num_enabled_vectors, MsiCap, MsiConfig};
pub use self::msix::{MsixCap, MsixConfig, MsixTableEntry, MSIX_CONFIG_ID, MSIX_TABLE_ENTRY_SIZE}; pub use self::msix::{MsixCap, MsixConfig, MsixTableEntry, MSIX_CONFIG_ID, MSIX_TABLE_ENTRY_SIZE};
pub use self::vfio::{VfioPciDevice, VfioPciError}; pub use self::vfio::{MmioRegion, VfioDmaMapping, VfioPciDevice, VfioPciError};
pub use self::vfio_user::{VfioUserDmaMapping, VfioUserPciDevice, VfioUserPciDeviceError}; pub use self::vfio_user::{VfioUserDmaMapping, VfioUserPciDevice, VfioUserPciDeviceError};
use serde::de::Visitor; use serde::de::Visitor;
use std::fmt::{self, Display}; use std::fmt::{self, Display};

View File

@ -32,11 +32,12 @@ use vm_allocator::page_size::{
align_page_size_down, align_page_size_up, is_4k_aligned, is_4k_multiple, is_page_size_aligned, align_page_size_down, align_page_size_up, is_4k_aligned, is_4k_multiple, is_page_size_aligned,
}; };
use vm_allocator::{AddressAllocator, SystemAllocator}; use vm_allocator::{AddressAllocator, SystemAllocator};
use vm_device::dma_mapping::ExternalDmaMapping;
use vm_device::interrupt::{ use vm_device::interrupt::{
InterruptIndex, InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig, InterruptIndex, InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig,
}; };
use vm_device::{BusDevice, Resource}; use vm_device::{BusDevice, Resource};
use vm_memory::{Address, GuestAddress, GuestUsize}; use vm_memory::{Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestUsize};
use vm_migration::{ use vm_migration::{
Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable, VersionMapped, Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable, VersionMapped,
}; };
@ -274,6 +275,48 @@ pub struct MmioRegion {
pub(crate) index: u32, pub(crate) index: u32,
pub(crate) user_memory_regions: Vec<UserMemoryRegion>, pub(crate) user_memory_regions: Vec<UserMemoryRegion>,
} }
trait MmioRegionRange {
fn check_range(&self, guest_addr: u64, size: u64) -> bool;
fn find_user_address(&self, guest_addr: u64) -> Result<u64, io::Error>;
}
impl MmioRegionRange for Vec<MmioRegion> {
// Check if a guest address is within the range of mmio regions
fn check_range(&self, guest_addr: u64, size: u64) -> bool {
for region in self.iter() {
let Some(guest_addr_end) = guest_addr.checked_add(size) else {
return false;
};
let Some(region_end) = region.start.raw_value().checked_add(region.length) else {
return false;
};
if guest_addr >= region.start.raw_value() && guest_addr_end <= region_end {
return true;
}
}
false
}
// Locate the user region address for a guest address within all mmio regions
fn find_user_address(&self, guest_addr: u64) -> Result<u64, io::Error> {
for region in self.iter() {
for user_region in region.user_memory_regions.iter() {
if guest_addr >= user_region.start
&& guest_addr < user_region.start + user_region.size
{
return Ok(user_region.host_addr + (guest_addr - user_region.start));
}
}
}
Err(io::Error::new(
io::ErrorKind::Other,
format!("unable to find user address: 0x{guest_addr:x}"),
))
}
}
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum VfioError { pub enum VfioError {
#[error("Kernel VFIO error: {0}")] #[error("Kernel VFIO error: {0}")]
@ -1885,3 +1928,80 @@ impl Snapshottable for VfioPciDevice {
} }
impl Transportable for VfioPciDevice {} impl Transportable for VfioPciDevice {}
impl Migratable for VfioPciDevice {} impl Migratable for VfioPciDevice {}
/// This structure implements the ExternalDmaMapping trait. It is meant to
/// be used when the caller tries to provide a way to update the mappings
/// associated with a specific VFIO container.
pub struct VfioDmaMapping<M: GuestAddressSpace> {
container: Arc<VfioContainer>,
memory: Arc<M>,
mmio_regions: Arc<Mutex<Vec<MmioRegion>>>,
}
impl<M: GuestAddressSpace> VfioDmaMapping<M> {
/// Create a DmaMapping object.
/// # Parameters
/// * `container`: VFIO container object.
/// * `memory`: guest memory to mmap.
/// * `mmio_regions`: mmio_regions to mmap.
pub fn new(
container: Arc<VfioContainer>,
memory: Arc<M>,
mmio_regions: Arc<Mutex<Vec<MmioRegion>>>,
) -> Self {
VfioDmaMapping {
container,
memory,
mmio_regions,
}
}
}
impl<M: GuestAddressSpace + Sync + Send> ExternalDmaMapping for VfioDmaMapping<M> {
fn map(&self, iova: u64, gpa: u64, size: u64) -> std::result::Result<(), io::Error> {
let mem = self.memory.memory();
let guest_addr = GuestAddress(gpa);
let user_addr = if mem.check_range(guest_addr, size as usize) {
match mem.get_host_address(guest_addr) {
Ok(t) => t as u64,
Err(e) => {
return Err(io::Error::new(
io::ErrorKind::Other,
format!("unable to retrieve user address for gpa 0x{gpa:x} from guest memory region: {e}")
));
}
}
} else if self.mmio_regions.lock().unwrap().check_range(gpa, size) {
self.mmio_regions.lock().unwrap().find_user_address(gpa)?
} else {
return Err(io::Error::new(
io::ErrorKind::Other,
format!("failed to locate guest address 0x{gpa:x} in guest memory"),
));
};
self.container
.vfio_dma_map(iova, size, user_addr)
.map_err(|e| {
io::Error::new(
io::ErrorKind::Other,
format!(
"failed to map memory for VFIO container, \
iova 0x{iova:x}, gpa 0x{gpa:x}, size 0x{size:x}: {e:?}"
),
)
})
}
fn unmap(&self, iova: u64, size: u64) -> std::result::Result<(), io::Error> {
self.container.vfio_dma_unmap(iova, size).map_err(|e| {
io::Error::new(
io::ErrorKind::Other,
format!(
"failed to unmap memory for VFIO container, \
iova 0x{iova:x}, size 0x{size:x}: {e:?}"
),
)
})
}
}

View File

@ -407,6 +407,15 @@ impl Request {
let bypass = let bypass =
(req.flags & VIRTIO_IOMMU_ATTACH_F_BYPASS) == VIRTIO_IOMMU_ATTACH_F_BYPASS; (req.flags & VIRTIO_IOMMU_ATTACH_F_BYPASS) == VIRTIO_IOMMU_ATTACH_F_BYPASS;
let mut old_domain_id = domain_id;
if let Some(&id) = mapping.endpoints.read().unwrap().get(&endpoint) {
old_domain_id = id;
}
if old_domain_id != domain_id {
detach_endpoint_from_domain(endpoint, old_domain_id, mapping, ext_mapping)?;
}
// Add endpoint associated with specific domain // Add endpoint associated with specific domain
mapping mapping
.endpoints .endpoints
@ -414,6 +423,19 @@ impl Request {
.unwrap() .unwrap()
.insert(endpoint, domain_id); .insert(endpoint, domain_id);
// If any other mappings exist in the domain for other containers,
// make sure to issue these mappings for the new endpoint/container
if let Some(domain_mappings) = &mapping.domains.read().unwrap().get(&domain_id)
{
if let Some(ext_map) = ext_mapping.get(&endpoint) {
for (virt_start, addr_map) in &domain_mappings.mappings {
ext_map
.map(*virt_start, addr_map.gpa, addr_map.size)
.map_err(Error::ExternalUnmapping)?;
}
}
}
// Add new domain with no mapping if the entry didn't exist yet // Add new domain with no mapping if the entry didn't exist yet
let mut domains = mapping.domains.write().unwrap(); let mut domains = mapping.domains.write().unwrap();
let domain = Domain { let domain = Domain {
@ -439,22 +461,7 @@ impl Request {
let endpoint = req.endpoint; let endpoint = req.endpoint;
// Remove endpoint associated with specific domain // Remove endpoint associated with specific domain
mapping.endpoints.write().unwrap().remove(&endpoint); detach_endpoint_from_domain(endpoint, domain_id, mapping, ext_mapping)?;
// After all endpoints have been successfully detached from a
// domain, the domain can be removed. This means we must remove
// the mappings associated with this domain.
if mapping
.endpoints
.write()
.unwrap()
.iter()
.filter(|(_, &d)| d == domain_id)
.count()
== 0
{
mapping.domains.write().unwrap().remove(&domain_id);
}
} }
VIRTIO_IOMMU_T_MAP => { VIRTIO_IOMMU_T_MAP => {
if desc_size_left != size_of::<VirtioIommuReqMap>() { if desc_size_left != size_of::<VirtioIommuReqMap>() {
@ -491,7 +498,9 @@ impl Request {
.map(|(&e, _)| e) .map(|(&e, _)| e)
.collect(); .collect();
// Trigger external mapping if necessary. // For viommu all endpoints receive their own VFIO container, as a result
// Each endpoint within the domain needs to be separately mapped, as the
// mapping is done on a per-container level, not a per-domain level
for endpoint in endpoints { for endpoint in endpoints {
if let Some(ext_map) = ext_mapping.get(&endpoint) { if let Some(ext_map) = ext_mapping.get(&endpoint) {
let size = req.virt_end - req.virt_start + 1; let size = req.virt_end - req.virt_start + 1;
@ -640,6 +649,41 @@ impl Request {
} }
} }
fn detach_endpoint_from_domain(
endpoint: u32,
domain_id: u32,
mapping: &Arc<IommuMapping>,
ext_mapping: &BTreeMap<u32, Arc<dyn ExternalDmaMapping>>,
) -> result::Result<(), Error> {
// Remove endpoint associated with specific domain
mapping.endpoints.write().unwrap().remove(&endpoint);
// Trigger external unmapping for the endpoint if necessary.
if let Some(domain_mappings) = &mapping.domains.read().unwrap().get(&domain_id) {
if let Some(ext_map) = ext_mapping.get(&endpoint) {
for (virt_start, addr_map) in &domain_mappings.mappings {
ext_map
.unmap(*virt_start, addr_map.size)
.map_err(Error::ExternalUnmapping)?;
}
}
}
if mapping
.endpoints
.write()
.unwrap()
.iter()
.filter(|(_, &d)| d == domain_id)
.count()
== 0
{
mapping.domains.write().unwrap().remove(&domain_id);
}
Ok(())
}
struct IommuEpollHandler { struct IommuEpollHandler {
mem: GuestMemoryAtomic<GuestMemoryMmap>, mem: GuestMemoryAtomic<GuestMemoryMmap>,
request_queue: Queue, request_queue: Queue,

View File

@ -2,8 +2,6 @@
// //
// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
pub mod vfio;
/// Trait meant for triggering the DMA mapping update related to an external /// Trait meant for triggering the DMA mapping update related to an external
/// device not managed fully through virtio. It is dedicated to virtio-iommu /// device not managed fully through virtio. It is dedicated to virtio-iommu
/// in order to trigger the map update anytime the mapping is updated from the /// in order to trigger the map update anytime the mapping is updated from the

View File

@ -1,70 +0,0 @@
// Copyright © 2021 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
use crate::dma_mapping::ExternalDmaMapping;
use std::io;
use std::sync::Arc;
use vfio_ioctls::VfioContainer;
use vm_memory::{GuestAddress, GuestAddressSpace, GuestMemory};
/// This structure implements the ExternalDmaMapping trait. It is meant to
/// be used when the caller tries to provide a way to update the mappings
/// associated with a specific VFIO container.
pub struct VfioDmaMapping<M: GuestAddressSpace> {
container: Arc<VfioContainer>,
memory: Arc<M>,
}
impl<M: GuestAddressSpace> VfioDmaMapping<M> {
/// Create a DmaMapping object.
///
/// # Parameters
/// * `container`: VFIO container object.
/// * `memory·: guest memory to mmap.
pub fn new(container: Arc<VfioContainer>, memory: Arc<M>) -> Self {
VfioDmaMapping { container, memory }
}
}
impl<M: GuestAddressSpace + Sync + Send> ExternalDmaMapping for VfioDmaMapping<M> {
fn map(&self, iova: u64, gpa: u64, size: u64) -> std::result::Result<(), io::Error> {
let mem = self.memory.memory();
let guest_addr = GuestAddress(gpa);
let user_addr = if mem.check_range(guest_addr, size as usize) {
mem.get_host_address(guest_addr).unwrap() as u64
} else {
return Err(io::Error::new(
io::ErrorKind::Other,
format!(
"failed to convert guest address 0x{gpa:x} into \
host user virtual address"
),
));
};
self.container
.vfio_dma_map(iova, size, user_addr)
.map_err(|e| {
io::Error::new(
io::ErrorKind::Other,
format!(
"failed to map memory for VFIO container, \
iova 0x{iova:x}, gpa 0x{gpa:x}, size 0x{size:x}: {e:?}"
),
)
})
}
fn unmap(&self, iova: u64, size: u64) -> std::result::Result<(), io::Error> {
self.container.vfio_dma_unmap(iova, size).map_err(|e| {
io::Error::new(
io::ErrorKind::Other,
format!(
"failed to unmap memory for VFIO container, \
iova 0x{iova:x}, size 0x{size:x}: {e:?}"
),
)
})
}
}

View File

@ -58,8 +58,8 @@ use libc::{
O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW, O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW,
}; };
use pci::{ use pci::{
DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping, DeviceRelocation, MmioRegion, PciBarRegionType, PciBdf, PciDevice, VfioDmaMapping,
VfioUserPciDevice, VfioUserPciDeviceError, VfioPciDevice, VfioUserDmaMapping, VfioUserPciDevice, VfioUserPciDeviceError,
}; };
use rate_limiter::group::RateLimiterGroup; use rate_limiter::group::RateLimiterGroup;
use seccompiler::SeccompAction; use seccompiler::SeccompAction;
@ -85,7 +85,6 @@ use virtio_devices::{
}; };
use virtio_devices::{Endpoint, IommuMapping}; use virtio_devices::{Endpoint, IommuMapping};
use vm_allocator::{AddressAllocator, SystemAllocator}; use vm_allocator::{AddressAllocator, SystemAllocator};
use vm_device::dma_mapping::vfio::VfioDmaMapping;
use vm_device::dma_mapping::ExternalDmaMapping; use vm_device::dma_mapping::ExternalDmaMapping;
use vm_device::interrupt::{ use vm_device::interrupt::{
InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig,
@ -966,6 +965,8 @@ pub struct DeviceManager {
snapshot: Option<Snapshot>, snapshot: Option<Snapshot>,
rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>, rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>,
mmio_regions: Arc<Mutex<Vec<MmioRegion>>>,
} }
impl DeviceManager { impl DeviceManager {
@ -1196,6 +1197,7 @@ impl DeviceManager {
acpi_platform_addresses: AcpiPlatformAddresses::default(), acpi_platform_addresses: AcpiPlatformAddresses::default(),
snapshot, snapshot,
rate_limit_groups, rate_limit_groups,
mmio_regions: Arc::new(Mutex::new(Vec::new())),
}; };
let device_manager = Arc::new(Mutex::new(device_manager)); let device_manager = Arc::new(Mutex::new(device_manager));
@ -3424,6 +3426,7 @@ impl DeviceManager {
let vfio_mapping = Arc::new(VfioDmaMapping::new( let vfio_mapping = Arc::new(VfioDmaMapping::new(
Arc::clone(&vfio_container), Arc::clone(&vfio_container),
Arc::new(self.memory_manager.lock().unwrap().guest_memory()), Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
Arc::clone(&self.mmio_regions),
)); ));
if let Some(iommu) = &self.iommu_device { if let Some(iommu) = &self.iommu_device {
@ -3468,6 +3471,7 @@ impl DeviceManager {
let vfio_mapping = Arc::new(VfioDmaMapping::new( let vfio_mapping = Arc::new(VfioDmaMapping::new(
Arc::clone(&vfio_container), Arc::clone(&vfio_container),
Arc::new(self.memory_manager.lock().unwrap().guest_memory()), Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
Arc::clone(&self.mmio_regions),
)); ));
for virtio_mem_device in self.virtio_mem_devices.iter() { for virtio_mem_device in self.virtio_mem_devices.iter() {
@ -3530,6 +3534,10 @@ impl DeviceManager {
.map_mmio_regions() .map_mmio_regions()
.map_err(DeviceManagerError::VfioMapRegion)?; .map_err(DeviceManagerError::VfioMapRegion)?;
for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() {
self.mmio_regions.lock().unwrap().push(mmio_region);
}
let mut node = device_node!(vfio_name, vfio_pci_device); let mut node = device_node!(vfio_name, vfio_pci_device);
// Update the device tree with correct resource information. // Update the device tree with correct resource information.
@ -4201,12 +4209,21 @@ impl DeviceManager {
let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle {
// No need to remove any virtio-mem mapping here as the container outlives all devices // No need to remove any virtio-mem mapping here as the container outlives all devices
PciDeviceHandle::Vfio(vfio_pci_device) => ( PciDeviceHandle::Vfio(vfio_pci_device) => {
for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() {
self.mmio_regions
.lock()
.unwrap()
.retain(|x| x.start != mmio_region.start)
}
(
Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>, Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>,
None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
false, false,
), )
}
PciDeviceHandle::Virtio(virtio_pci_device) => { PciDeviceHandle::Virtio(virtio_pci_device) => {
let dev = virtio_pci_device.lock().unwrap(); let dev = virtio_pci_device.lock().unwrap();
let bar_addr = dev.config_bar_addr(); let bar_addr = dev.config_bar_addr();