pci: vfio: Implement support for sparse mmap

Reorganizing the code to leverage the same mechanics implemented for
vfio-user and aimed at supporting sparse memory mappings for a single
region.

Relying on the capabilities returned by the vfio-ioctls crate, we create
a list of sparse areas depending if we get SPARSE_MMAP or MSIX_MAPPABLE
capability, or a single sparse area in case we couldn't find any
capability.

The list of sparse areas is then used to create both the memory mappings
in the Cloud Hypervisor address space and the hypervisor user memory
regions.

This allowed for the simplification of the MmioRegion structure.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
This commit is contained in:
Sebastien Boeuf 2022-05-04 12:31:02 +02:00
parent 707cea2182
commit 1108bd1967

View File

@ -18,7 +18,9 @@ use std::ptr::null_mut;
use std::sync::{Arc, Barrier, Mutex}; use std::sync::{Arc, Barrier, Mutex};
use thiserror::Error; use thiserror::Error;
use vfio_bindings::bindings::vfio::*; use vfio_bindings::bindings::vfio::*;
use vfio_ioctls::{VfioContainer, VfioDevice, VfioIrq, VfioRegionInfoCap}; use vfio_ioctls::{
VfioContainer, VfioDevice, VfioIrq, VfioRegionInfoCap, VfioRegionSparseMmapArea,
};
use vm_allocator::{AddressAllocator, SystemAllocator}; use vm_allocator::{AddressAllocator, SystemAllocator};
use vm_device::interrupt::{ use vm_device::interrupt::{
InterruptIndex, InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig, InterruptIndex, InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig,
@ -29,6 +31,8 @@ use vmm_sys_util::eventfd::EventFd;
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum VfioPciError { pub enum VfioPciError {
#[error("Failed to create user memory region: {0}")]
CreateUserMemoryRegion(#[source] HypervisorVmError),
#[error("Failed to DMA map: {0}")] #[error("Failed to DMA map: {0}")]
DmaMap(#[source] vfio_ioctls::VfioError), DmaMap(#[source] vfio_ioctls::VfioError),
#[error("Failed to DMA unmap: {0}")] #[error("Failed to DMA unmap: {0}")]
@ -39,10 +43,14 @@ pub enum VfioPciError {
EnableMsi(#[source] VfioError), EnableMsi(#[source] VfioError),
#[error("Failed to enable MSI-x: {0}")] #[error("Failed to enable MSI-x: {0}")]
EnableMsix(#[source] VfioError), EnableMsix(#[source] VfioError),
#[error("Failed to map VFIO PCI region into guest: {0}")] #[error("Failed to mmap the area")]
MapRegionGuest(#[source] HypervisorVmError), MmapArea,
#[error("Failed to notifier's eventfd")] #[error("Failed to notifier's eventfd")]
MissingNotifier, MissingNotifier,
#[error("Invalid region alignment")]
RegionAlignment,
#[error("Invalid region size")]
RegionSize,
} }
#[derive(Copy, Clone)] #[derive(Copy, Clone)]
@ -223,8 +231,6 @@ pub struct MmioRegion {
pub length: GuestUsize, pub length: GuestUsize,
pub(crate) type_: PciBarRegionType, pub(crate) type_: PciBarRegionType,
pub(crate) index: u32, pub(crate) index: u32,
pub(crate) host_addr: Option<u64>,
pub(crate) mmap_size: Option<usize>,
pub(crate) user_memory_regions: Vec<UserMemoryRegion>, pub(crate) user_memory_regions: Vec<UserMemoryRegion>,
} }
#[derive(Debug, Error)] #[derive(Debug, Error)]
@ -545,8 +551,6 @@ impl VfioCommon {
length: region_size, length: region_size,
type_: region_type, type_: region_type,
index: bar_id as u32, index: bar_id as u32,
host_addr: None,
mmap_size: None,
user_memory_regions: Vec::new(), user_memory_regions: Vec::new(),
}); });
@ -1058,71 +1062,84 @@ impl VfioPciDevice {
(size & 0xfff) == 0 (size & 0xfff) == 0
} }
fn generate_user_memory_regions<F>( fn generate_sparse_areas(
caps: &[VfioRegionInfoCap],
region_index: u32, region_index: u32,
region_start: u64, region_start: u64,
region_size: u64, region_size: u64,
host_addr: u64,
mem_slot: F,
vfio_msix: Option<&VfioMsix>, vfio_msix: Option<&VfioMsix>,
) -> Vec<UserMemoryRegion> ) -> Result<Vec<VfioRegionSparseMmapArea>, VfioPciError> {
where for cap in caps {
F: Fn() -> u32, match cap {
{ VfioRegionInfoCap::SparseMmap(sparse_mmap) => return Ok(sparse_mmap.areas.clone()),
if !Self::is_4k_aligned(region_start) { VfioRegionInfoCap::MsixMappable => {
error!( if !Self::is_4k_aligned(region_start) {
"Region start address 0x{:x} must be at least aligned on 4KiB", error!(
region_start "Region start address 0x{:x} must be at least aligned on 4KiB",
); region_start
} );
if !Self::is_4k_multiple(region_size) { return Err(VfioPciError::RegionAlignment);
error!( }
"Region size 0x{:x} must be at least a multiple of 4KiB", if !Self::is_4k_multiple(region_size) {
region_size error!(
); "Region size 0x{:x} must be at least a multiple of 4KiB",
} region_size
);
return Err(VfioPciError::RegionSize);
}
// Using a BtreeMap as the list provided through the iterator is sorted // In case the region contains the MSI-X vectors table or
// by key. This ensures proper split of the whole region. // the MSI-X PBA table, we must calculate the subregions
let mut inter_ranges = BTreeMap::new(); // around them, leading to a list of sparse areas.
if let Some(msix) = vfio_msix { // We want to make sure we will still trap MMIO accesses
if region_index == msix.cap.table_bir() { // to these MSI-X specific ranges.
let (offset, size) = msix.cap.table_range(); //
let base = region_start + offset; // Using a BtreeMap as the list provided through the iterator is sorted
inter_ranges.insert(base, size); // by key. This ensures proper split of the whole region.
} let mut inter_ranges = BTreeMap::new();
if region_index == msix.cap.pba_bir() { if let Some(msix) = vfio_msix {
let (offset, size) = msix.cap.pba_range(); if region_index == msix.cap.table_bir() {
let base = region_start + offset; let (offset, size) = msix.cap.table_range();
inter_ranges.insert(base, size); inter_ranges.insert(offset, size);
}
if region_index == msix.cap.pba_bir() {
let (offset, size) = msix.cap.pba_range();
inter_ranges.insert(offset, size);
}
}
let mut sparse_areas = Vec::new();
let mut current_offset = 0;
for (range_offset, range_size) in inter_ranges {
if range_offset > current_offset {
sparse_areas.push(VfioRegionSparseMmapArea {
offset: current_offset,
size: range_offset - current_offset,
});
}
current_offset = Self::align_4k(range_offset + range_size);
}
if region_size > current_offset {
sparse_areas.push(VfioRegionSparseMmapArea {
offset: current_offset,
size: region_size - current_offset,
});
}
return Ok(sparse_areas);
}
_ => {}
} }
} }
let mut user_memory_regions = Vec::new(); // In case no relevant capabilities have been found, create a single
let mut new_start = region_start; // sparse area corresponding to the entire MMIO region.
for (range_start, range_size) in inter_ranges { Ok(vec![VfioRegionSparseMmapArea {
if range_start > new_start { offset: 0,
user_memory_regions.push(UserMemoryRegion { size: region_size,
slot: mem_slot(), }])
start: new_start,
size: range_start - new_start,
host_addr: host_addr + new_start - region_start,
});
}
new_start = Self::align_4k(range_start + range_size);
}
if region_start + region_size > new_start {
user_memory_regions.push(UserMemoryRegion {
slot: mem_slot(),
start: new_start,
size: region_start + region_size - new_start,
host_addr: host_addr + new_start - region_start,
});
}
user_memory_regions
} }
/// Map MMIO regions into the guest, and avoid VM exits when the guest tries /// Map MMIO regions into the guest, and avoid VM exits when the guest tries
@ -1173,42 +1190,46 @@ impl VfioPciDevice {
} }
let mmap_size = self.device.get_region_size(region.index); let mmap_size = self.device.get_region_size(region.index);
let offset = self.device.get_region_offset(region.index); let mmap_offset = self.device.get_region_offset(region.index);
let host_addr = unsafe { let sparse_areas = Self::generate_sparse_areas(
libc::mmap( &caps,
null_mut(),
mmap_size as usize,
prot,
libc::MAP_SHARED,
fd,
offset as libc::off_t,
)
};
if host_addr == libc::MAP_FAILED {
error!(
"Could not mmap region index {}: {}",
region.index,
io::Error::last_os_error()
);
continue;
}
// In case the region that is being mapped contains the MSI-X
// vectors table or the MSI-X PBA table, we must adjust what
// is being declared through the hypervisor. We want to make
// sure we will still trap MMIO accesses to these MSI-X
// specific ranges.
let user_memory_regions = Self::generate_user_memory_regions(
region.index, region.index,
region.start.raw_value(), region.start.0,
mmap_size, mmap_size,
host_addr as u64,
&mem_slot,
self.common.interrupt.msix.as_ref(), self.common.interrupt.msix.as_ref(),
); )?;
for user_memory_region in user_memory_regions.iter() {
let mut user_memory_regions = Vec::new();
for area in sparse_areas.iter() {
let host_addr = unsafe {
libc::mmap(
null_mut(),
area.size as usize,
prot,
libc::MAP_SHARED,
fd,
mmap_offset as libc::off_t + area.offset as libc::off_t,
)
};
if host_addr == libc::MAP_FAILED {
error!(
"Could not mmap sparse area (offset = 0x{:x}, size = 0x{:x}): {}",
area.offset,
area.size,
std::io::Error::last_os_error()
);
return Err(VfioPciError::MmapArea);
}
let user_memory_region = UserMemoryRegion {
slot: mem_slot(),
start: region.start.0 + area.offset,
size: area.size,
host_addr: host_addr as u64,
};
let mem_region = vm.make_user_memory_region( let mem_region = vm.make_user_memory_region(
user_memory_region.slot, user_memory_region.slot,
user_memory_region.start, user_memory_region.start,
@ -1219,12 +1240,11 @@ impl VfioPciDevice {
); );
vm.create_user_memory_region(mem_region) vm.create_user_memory_region(mem_region)
.map_err(VfioPciError::MapRegionGuest)?; .map_err(VfioPciError::CreateUserMemoryRegion)?;
user_memory_regions.push(user_memory_region);
} }
// Update the region with memory mapped info.
region.host_addr = Some(host_addr as u64);
region.mmap_size = Some(mmap_size as usize);
region.user_memory_regions = user_memory_regions; region.user_memory_regions = user_memory_regions;
} }
} }
@ -1248,10 +1268,13 @@ impl VfioPciDevice {
if let Err(e) = self.vm.remove_user_memory_region(r) { if let Err(e) = self.vm.remove_user_memory_region(r) {
error!("Could not remove the userspace memory region: {}", e); error!("Could not remove the userspace memory region: {}", e);
} }
}
if let (Some(host_addr), Some(mmap_size)) = (region.host_addr, region.mmap_size) { let ret = unsafe {
let ret = unsafe { libc::munmap(host_addr as *mut libc::c_void, mmap_size) }; libc::munmap(
user_memory_region.host_addr as *mut libc::c_void,
user_memory_region.size as usize,
)
};
if ret != 0 { if ret != 0 {
error!( error!(
"Could not unmap region {}, error:{}", "Could not unmap region {}, error:{}",