mirror of
https://github.com/cloud-hypervisor/cloud-hypervisor.git
synced 2025-01-03 11:25:20 +00:00
pci: vfio: Implement support for sparse mmap
Reorganizing the code to leverage the same mechanics implemented for vfio-user and aimed at supporting sparse memory mappings for a single region. Relying on the capabilities returned by the vfio-ioctls crate, we create a list of sparse areas depending if we get SPARSE_MMAP or MSIX_MAPPABLE capability, or a single sparse area in case we couldn't find any capability. The list of sparse areas is then used to create both the memory mappings in the Cloud Hypervisor address space and the hypervisor user memory regions. This allowed for the simplification of the MmioRegion structure. Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
This commit is contained in:
parent
707cea2182
commit
1108bd1967
229
pci/src/vfio.rs
229
pci/src/vfio.rs
@ -18,7 +18,9 @@ use std::ptr::null_mut;
|
|||||||
use std::sync::{Arc, Barrier, Mutex};
|
use std::sync::{Arc, Barrier, Mutex};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use vfio_bindings::bindings::vfio::*;
|
use vfio_bindings::bindings::vfio::*;
|
||||||
use vfio_ioctls::{VfioContainer, VfioDevice, VfioIrq, VfioRegionInfoCap};
|
use vfio_ioctls::{
|
||||||
|
VfioContainer, VfioDevice, VfioIrq, VfioRegionInfoCap, VfioRegionSparseMmapArea,
|
||||||
|
};
|
||||||
use vm_allocator::{AddressAllocator, SystemAllocator};
|
use vm_allocator::{AddressAllocator, SystemAllocator};
|
||||||
use vm_device::interrupt::{
|
use vm_device::interrupt::{
|
||||||
InterruptIndex, InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig,
|
InterruptIndex, InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig,
|
||||||
@ -29,6 +31,8 @@ use vmm_sys_util::eventfd::EventFd;
|
|||||||
|
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum VfioPciError {
|
pub enum VfioPciError {
|
||||||
|
#[error("Failed to create user memory region: {0}")]
|
||||||
|
CreateUserMemoryRegion(#[source] HypervisorVmError),
|
||||||
#[error("Failed to DMA map: {0}")]
|
#[error("Failed to DMA map: {0}")]
|
||||||
DmaMap(#[source] vfio_ioctls::VfioError),
|
DmaMap(#[source] vfio_ioctls::VfioError),
|
||||||
#[error("Failed to DMA unmap: {0}")]
|
#[error("Failed to DMA unmap: {0}")]
|
||||||
@ -39,10 +43,14 @@ pub enum VfioPciError {
|
|||||||
EnableMsi(#[source] VfioError),
|
EnableMsi(#[source] VfioError),
|
||||||
#[error("Failed to enable MSI-x: {0}")]
|
#[error("Failed to enable MSI-x: {0}")]
|
||||||
EnableMsix(#[source] VfioError),
|
EnableMsix(#[source] VfioError),
|
||||||
#[error("Failed to map VFIO PCI region into guest: {0}")]
|
#[error("Failed to mmap the area")]
|
||||||
MapRegionGuest(#[source] HypervisorVmError),
|
MmapArea,
|
||||||
#[error("Failed to notifier's eventfd")]
|
#[error("Failed to notifier's eventfd")]
|
||||||
MissingNotifier,
|
MissingNotifier,
|
||||||
|
#[error("Invalid region alignment")]
|
||||||
|
RegionAlignment,
|
||||||
|
#[error("Invalid region size")]
|
||||||
|
RegionSize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
@ -223,8 +231,6 @@ pub struct MmioRegion {
|
|||||||
pub length: GuestUsize,
|
pub length: GuestUsize,
|
||||||
pub(crate) type_: PciBarRegionType,
|
pub(crate) type_: PciBarRegionType,
|
||||||
pub(crate) index: u32,
|
pub(crate) index: u32,
|
||||||
pub(crate) host_addr: Option<u64>,
|
|
||||||
pub(crate) mmap_size: Option<usize>,
|
|
||||||
pub(crate) user_memory_regions: Vec<UserMemoryRegion>,
|
pub(crate) user_memory_regions: Vec<UserMemoryRegion>,
|
||||||
}
|
}
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
@ -545,8 +551,6 @@ impl VfioCommon {
|
|||||||
length: region_size,
|
length: region_size,
|
||||||
type_: region_type,
|
type_: region_type,
|
||||||
index: bar_id as u32,
|
index: bar_id as u32,
|
||||||
host_addr: None,
|
|
||||||
mmap_size: None,
|
|
||||||
user_memory_regions: Vec::new(),
|
user_memory_regions: Vec::new(),
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -1058,71 +1062,84 @@ impl VfioPciDevice {
|
|||||||
(size & 0xfff) == 0
|
(size & 0xfff) == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
fn generate_user_memory_regions<F>(
|
fn generate_sparse_areas(
|
||||||
|
caps: &[VfioRegionInfoCap],
|
||||||
region_index: u32,
|
region_index: u32,
|
||||||
region_start: u64,
|
region_start: u64,
|
||||||
region_size: u64,
|
region_size: u64,
|
||||||
host_addr: u64,
|
|
||||||
mem_slot: F,
|
|
||||||
vfio_msix: Option<&VfioMsix>,
|
vfio_msix: Option<&VfioMsix>,
|
||||||
) -> Vec<UserMemoryRegion>
|
) -> Result<Vec<VfioRegionSparseMmapArea>, VfioPciError> {
|
||||||
where
|
for cap in caps {
|
||||||
F: Fn() -> u32,
|
match cap {
|
||||||
{
|
VfioRegionInfoCap::SparseMmap(sparse_mmap) => return Ok(sparse_mmap.areas.clone()),
|
||||||
if !Self::is_4k_aligned(region_start) {
|
VfioRegionInfoCap::MsixMappable => {
|
||||||
error!(
|
if !Self::is_4k_aligned(region_start) {
|
||||||
"Region start address 0x{:x} must be at least aligned on 4KiB",
|
error!(
|
||||||
region_start
|
"Region start address 0x{:x} must be at least aligned on 4KiB",
|
||||||
);
|
region_start
|
||||||
}
|
);
|
||||||
if !Self::is_4k_multiple(region_size) {
|
return Err(VfioPciError::RegionAlignment);
|
||||||
error!(
|
}
|
||||||
"Region size 0x{:x} must be at least a multiple of 4KiB",
|
if !Self::is_4k_multiple(region_size) {
|
||||||
region_size
|
error!(
|
||||||
);
|
"Region size 0x{:x} must be at least a multiple of 4KiB",
|
||||||
}
|
region_size
|
||||||
|
);
|
||||||
|
return Err(VfioPciError::RegionSize);
|
||||||
|
}
|
||||||
|
|
||||||
// Using a BtreeMap as the list provided through the iterator is sorted
|
// In case the region contains the MSI-X vectors table or
|
||||||
// by key. This ensures proper split of the whole region.
|
// the MSI-X PBA table, we must calculate the subregions
|
||||||
let mut inter_ranges = BTreeMap::new();
|
// around them, leading to a list of sparse areas.
|
||||||
if let Some(msix) = vfio_msix {
|
// We want to make sure we will still trap MMIO accesses
|
||||||
if region_index == msix.cap.table_bir() {
|
// to these MSI-X specific ranges.
|
||||||
let (offset, size) = msix.cap.table_range();
|
//
|
||||||
let base = region_start + offset;
|
// Using a BtreeMap as the list provided through the iterator is sorted
|
||||||
inter_ranges.insert(base, size);
|
// by key. This ensures proper split of the whole region.
|
||||||
}
|
let mut inter_ranges = BTreeMap::new();
|
||||||
if region_index == msix.cap.pba_bir() {
|
if let Some(msix) = vfio_msix {
|
||||||
let (offset, size) = msix.cap.pba_range();
|
if region_index == msix.cap.table_bir() {
|
||||||
let base = region_start + offset;
|
let (offset, size) = msix.cap.table_range();
|
||||||
inter_ranges.insert(base, size);
|
inter_ranges.insert(offset, size);
|
||||||
|
}
|
||||||
|
if region_index == msix.cap.pba_bir() {
|
||||||
|
let (offset, size) = msix.cap.pba_range();
|
||||||
|
inter_ranges.insert(offset, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut sparse_areas = Vec::new();
|
||||||
|
let mut current_offset = 0;
|
||||||
|
for (range_offset, range_size) in inter_ranges {
|
||||||
|
if range_offset > current_offset {
|
||||||
|
sparse_areas.push(VfioRegionSparseMmapArea {
|
||||||
|
offset: current_offset,
|
||||||
|
size: range_offset - current_offset,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
current_offset = Self::align_4k(range_offset + range_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
if region_size > current_offset {
|
||||||
|
sparse_areas.push(VfioRegionSparseMmapArea {
|
||||||
|
offset: current_offset,
|
||||||
|
size: region_size - current_offset,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return Ok(sparse_areas);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut user_memory_regions = Vec::new();
|
// In case no relevant capabilities have been found, create a single
|
||||||
let mut new_start = region_start;
|
// sparse area corresponding to the entire MMIO region.
|
||||||
for (range_start, range_size) in inter_ranges {
|
Ok(vec![VfioRegionSparseMmapArea {
|
||||||
if range_start > new_start {
|
offset: 0,
|
||||||
user_memory_regions.push(UserMemoryRegion {
|
size: region_size,
|
||||||
slot: mem_slot(),
|
}])
|
||||||
start: new_start,
|
|
||||||
size: range_start - new_start,
|
|
||||||
host_addr: host_addr + new_start - region_start,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
new_start = Self::align_4k(range_start + range_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
if region_start + region_size > new_start {
|
|
||||||
user_memory_regions.push(UserMemoryRegion {
|
|
||||||
slot: mem_slot(),
|
|
||||||
start: new_start,
|
|
||||||
size: region_start + region_size - new_start,
|
|
||||||
host_addr: host_addr + new_start - region_start,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
user_memory_regions
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Map MMIO regions into the guest, and avoid VM exits when the guest tries
|
/// Map MMIO regions into the guest, and avoid VM exits when the guest tries
|
||||||
@ -1173,42 +1190,46 @@ impl VfioPciDevice {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mmap_size = self.device.get_region_size(region.index);
|
let mmap_size = self.device.get_region_size(region.index);
|
||||||
let offset = self.device.get_region_offset(region.index);
|
let mmap_offset = self.device.get_region_offset(region.index);
|
||||||
|
|
||||||
let host_addr = unsafe {
|
let sparse_areas = Self::generate_sparse_areas(
|
||||||
libc::mmap(
|
&caps,
|
||||||
null_mut(),
|
|
||||||
mmap_size as usize,
|
|
||||||
prot,
|
|
||||||
libc::MAP_SHARED,
|
|
||||||
fd,
|
|
||||||
offset as libc::off_t,
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
if host_addr == libc::MAP_FAILED {
|
|
||||||
error!(
|
|
||||||
"Could not mmap region index {}: {}",
|
|
||||||
region.index,
|
|
||||||
io::Error::last_os_error()
|
|
||||||
);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// In case the region that is being mapped contains the MSI-X
|
|
||||||
// vectors table or the MSI-X PBA table, we must adjust what
|
|
||||||
// is being declared through the hypervisor. We want to make
|
|
||||||
// sure we will still trap MMIO accesses to these MSI-X
|
|
||||||
// specific ranges.
|
|
||||||
let user_memory_regions = Self::generate_user_memory_regions(
|
|
||||||
region.index,
|
region.index,
|
||||||
region.start.raw_value(),
|
region.start.0,
|
||||||
mmap_size,
|
mmap_size,
|
||||||
host_addr as u64,
|
|
||||||
&mem_slot,
|
|
||||||
self.common.interrupt.msix.as_ref(),
|
self.common.interrupt.msix.as_ref(),
|
||||||
);
|
)?;
|
||||||
for user_memory_region in user_memory_regions.iter() {
|
|
||||||
|
let mut user_memory_regions = Vec::new();
|
||||||
|
for area in sparse_areas.iter() {
|
||||||
|
let host_addr = unsafe {
|
||||||
|
libc::mmap(
|
||||||
|
null_mut(),
|
||||||
|
area.size as usize,
|
||||||
|
prot,
|
||||||
|
libc::MAP_SHARED,
|
||||||
|
fd,
|
||||||
|
mmap_offset as libc::off_t + area.offset as libc::off_t,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
|
if host_addr == libc::MAP_FAILED {
|
||||||
|
error!(
|
||||||
|
"Could not mmap sparse area (offset = 0x{:x}, size = 0x{:x}): {}",
|
||||||
|
area.offset,
|
||||||
|
area.size,
|
||||||
|
std::io::Error::last_os_error()
|
||||||
|
);
|
||||||
|
return Err(VfioPciError::MmapArea);
|
||||||
|
}
|
||||||
|
|
||||||
|
let user_memory_region = UserMemoryRegion {
|
||||||
|
slot: mem_slot(),
|
||||||
|
start: region.start.0 + area.offset,
|
||||||
|
size: area.size,
|
||||||
|
host_addr: host_addr as u64,
|
||||||
|
};
|
||||||
|
|
||||||
let mem_region = vm.make_user_memory_region(
|
let mem_region = vm.make_user_memory_region(
|
||||||
user_memory_region.slot,
|
user_memory_region.slot,
|
||||||
user_memory_region.start,
|
user_memory_region.start,
|
||||||
@ -1219,12 +1240,11 @@ impl VfioPciDevice {
|
|||||||
);
|
);
|
||||||
|
|
||||||
vm.create_user_memory_region(mem_region)
|
vm.create_user_memory_region(mem_region)
|
||||||
.map_err(VfioPciError::MapRegionGuest)?;
|
.map_err(VfioPciError::CreateUserMemoryRegion)?;
|
||||||
|
|
||||||
|
user_memory_regions.push(user_memory_region);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update the region with memory mapped info.
|
|
||||||
region.host_addr = Some(host_addr as u64);
|
|
||||||
region.mmap_size = Some(mmap_size as usize);
|
|
||||||
region.user_memory_regions = user_memory_regions;
|
region.user_memory_regions = user_memory_regions;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1248,10 +1268,13 @@ impl VfioPciDevice {
|
|||||||
if let Err(e) = self.vm.remove_user_memory_region(r) {
|
if let Err(e) = self.vm.remove_user_memory_region(r) {
|
||||||
error!("Could not remove the userspace memory region: {}", e);
|
error!("Could not remove the userspace memory region: {}", e);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if let (Some(host_addr), Some(mmap_size)) = (region.host_addr, region.mmap_size) {
|
let ret = unsafe {
|
||||||
let ret = unsafe { libc::munmap(host_addr as *mut libc::c_void, mmap_size) };
|
libc::munmap(
|
||||||
|
user_memory_region.host_addr as *mut libc::c_void,
|
||||||
|
user_memory_region.size as usize,
|
||||||
|
)
|
||||||
|
};
|
||||||
if ret != 0 {
|
if ret != 0 {
|
||||||
error!(
|
error!(
|
||||||
"Could not unmap region {}, error:{}",
|
"Could not unmap region {}, error:{}",
|
||||||
|
Loading…
Reference in New Issue
Block a user