vfio: Map MMIO regions into the guest

VFIO explictly tells us if a MMIO region can be mapped into the guest
address space or not. Except for MSI-X table BARs, we try to map them
into the guest whenever VFIO allows us to do so. This avoids unnecessary
VM exits when the guest tries to access those regions.

Signed-off-by: Zhang, Xiong Y <xiong.y.zhang@intel.com>
Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
This commit is contained in:
Chao Peng 2019-07-14 18:27:38 +02:00 committed by Samuel Ortiz
parent c93d5361b8
commit b746dd7116
2 changed files with 177 additions and 3 deletions

View File

@ -100,6 +100,13 @@ impl fmt::Display for VfioError {
}
}
#[repr(C)]
#[derive(Debug, Default)]
struct vfio_region_info_with_cap {
region_info: vfio_region_info,
cap_info: __IncompleteArrayField<u8>,
}
struct VfioContainer {
container: File,
}
@ -369,6 +376,7 @@ struct VfioRegion {
flags: u32,
size: u64,
offset: u64,
mmap: (u64, u64),
}
struct VfioIrq {
@ -425,8 +433,10 @@ impl VfioDeviceInfo {
let mut regions: Vec<VfioRegion> = Vec::new();
for i in VFIO_PCI_BAR0_REGION_INDEX..self.num_regions {
let argsz: u32 = mem::size_of::<vfio_region_info>() as u32;
let mut reg_info = vfio_region_info {
argsz: mem::size_of::<vfio_region_info>() as u32,
argsz,
flags: 0,
index: i,
cap_offset: 0,
@ -435,7 +445,7 @@ impl VfioDeviceInfo {
};
// Safe as we are the owner of dev and reg_info which are valid value,
// and we verify the return value.
let ret = unsafe {
let mut ret = unsafe {
ioctl_with_mut_ref(&self.device, VFIO_DEVICE_GET_REGION_INFO(), &mut reg_info)
};
if ret < 0 {
@ -443,10 +453,61 @@ impl VfioDeviceInfo {
continue;
}
let mut mmap_size: u64 = reg_info.size;
let mut mmap_offset: u64 = 0;
if reg_info.flags & VFIO_REGION_INFO_FLAG_CAPS != 0 && reg_info.argsz > argsz {
let cap_len: usize = (reg_info.argsz - argsz) as usize;
let mut region_with_cap =
vec_with_array_field::<vfio_region_info_with_cap, u8>(cap_len);
region_with_cap[0].region_info.argsz = reg_info.argsz;
region_with_cap[0].region_info.flags = 0;
region_with_cap[0].region_info.index = i;
region_with_cap[0].region_info.cap_offset = 0;
region_with_cap[0].region_info.size = 0;
region_with_cap[0].region_info.offset = 0;
// Safe as we are the owner of dev and region_info which are valid value,
// and we verify the return value.
ret = unsafe {
ioctl_with_mut_ref(
&self.device,
VFIO_DEVICE_GET_REGION_INFO(),
&mut (region_with_cap[0].region_info),
)
};
if ret < 0 {
error!("Could not get region #{} info", i);
continue;
}
// region_with_cap[0].cap_info may contain vfio_region_info_cap_sparse_mmap
// struct or vfio_region_info_cap_type struct. Both of them begin with
// vfio_info_cap_header.
// so safe to convert cap_info into vfio_info_cap_header pointer first, and
// safe to access its elments through this poiner.
#[allow(clippy::cast_ptr_alignment)]
let cap_header =
unsafe { region_with_cap[0].cap_info.as_ptr() as *const vfio_info_cap_header };
if unsafe { u32::from((*cap_header).id) } == VFIO_REGION_INFO_CAP_SPARSE_MMAP {
// cap_info is vfio_region_sparse_mmap here
// so safe to convert cap_info into vfio_info_region_sparse_mmap pointer, and
// safe to access its elements through this pointer.
#[allow(clippy::cast_ptr_alignment)]
let sparse_mmap = unsafe {
region_with_cap[0].cap_info.as_ptr()
as *const vfio_region_info_cap_sparse_mmap
};
let mmap_area = unsafe {
(*sparse_mmap).areas.as_ptr() as *const vfio_region_sparse_mmap_area
};
mmap_size = unsafe { (*mmap_area).size };
mmap_offset = unsafe { (*mmap_area).offset };
}
}
let region = VfioRegion {
flags: reg_info.flags,
size: reg_info.size,
offset: reg_info.offset,
mmap: (mmap_offset, mmap_size),
};
debug!("Region #{}", i);
@ -610,6 +671,44 @@ impl VfioDevice {
self.disable_irq(VFIO_PCI_MSIX_IRQ_INDEX)
}
/// get a region's flag
pub fn get_region_flags(&self, index: u32) -> u32 {
match self.regions.get(index as usize) {
Some(v) => v.flags,
None => 0,
}
}
/// get a region's offset
pub fn get_region_offset(&self, index: u32) -> u64 {
match self.regions.get(index as usize) {
Some(v) => v.offset,
None => 0,
}
}
/// get a region's mmap info
pub fn get_region_mmap(&self, index: u32) -> (u64, u64) {
match self.regions.get(index as usize) {
Some(v) => v.mmap,
None => {
warn!("get_region_mmap with invalid index: {}", index);
(0, 0)
}
}
}
/// get a region's size
pub fn get_region_size(&self, index: u32) -> u64 {
match self.regions.get(index as usize) {
Some(v) => v.size,
None => {
warn!("get_region_size with invalid index: {}", index);
0
}
}
}
/// Read region's data from VFIO device into buf
/// index: region num
/// buf: data destination and buf length is read size

View File

@ -11,7 +11,9 @@ use crate::vec_with_array_field;
use crate::vfio_device::VfioDevice;
use byteorder::{ByteOrder, LittleEndian};
use devices::BusDevice;
use kvm_bindings::{kvm_irq_routing, kvm_irq_routing_entry, KVM_IRQ_ROUTING_MSI};
use kvm_bindings::{
kvm_irq_routing, kvm_irq_routing_entry, kvm_userspace_memory_region, KVM_IRQ_ROUTING_MSI,
};
use kvm_ioctls::*;
use pci::{
MsiCap, MsixCap, MsixConfig, PciBarConfiguration, PciBarRegionType, PciCapabilityID,
@ -19,6 +21,7 @@ use pci::{
MSIX_TABLE_ENTRY_SIZE,
};
use std::os::unix::io::AsRawFd;
use std::ptr::null_mut;
use std::sync::Arc;
use std::{fmt, io};
use vfio_bindings::bindings::vfio::*;
@ -32,6 +35,7 @@ pub enum VfioPciError {
EventFd(io::Error),
IrqFd(io::Error),
NewVfioPciDevice,
MapRegionGuest(io::Error),
SetGsiRouting(io::Error),
}
pub type Result<T> = std::result::Result<T, VfioPciError>;
@ -43,6 +47,9 @@ impl fmt::Display for VfioPciError {
VfioPciError::EventFd(e) => write!(f, "failed to create eventfd: {}", e),
VfioPciError::IrqFd(e) => write!(f, "failed to register irqfd: {}", e),
VfioPciError::NewVfioPciDevice => write!(f, "failed to create VFIO PCI device"),
VfioPciError::MapRegionGuest(e) => {
write!(f, "failed to map VFIO PCI region into guest: {}", e)
}
VfioPciError::SetGsiRouting(e) => write!(f, "failed to set GSI routes for KVM: {}", e),
}
}
@ -578,6 +585,74 @@ impl VfioPciDevice {
}
None
}
/// Map MMIO regions into the guest, and avoid VM exits when the guest tries
/// to reach those regions.
pub fn map_mmio_regions(&mut self, vm: &Arc<VmFd>, mem_slots: u32) -> Result<()> {
let mut slot = mem_slots;
let fd = self.device.as_raw_fd();
for region in self.mmio_regions.iter() {
// We want to skip the mapping of the BAR containing the MSI-X
// table even if it is mappable. The reason is we need to trap
// any access to the MSI-X table and update the GSI routing
// accordingly.
if let Some(msix) = &self.interrupt.msix {
if region.index == msix.cap.table_bir() || region.index == msix.cap.pba_bir() {
continue;
}
}
let region_flags = self.device.get_region_flags(region.index);
if region_flags & VFIO_REGION_INFO_FLAG_MMAP != 0 {
let mut prot = 0;
if region_flags & VFIO_REGION_INFO_FLAG_READ != 0 {
prot |= libc::PROT_READ;
}
if region_flags & VFIO_REGION_INFO_FLAG_WRITE != 0 {
prot |= libc::PROT_WRITE;
}
let (mmap_offset, mmap_size) = self.device.get_region_mmap(region.index);
let offset = self.device.get_region_offset(region.index) + mmap_offset;
let host_addr = unsafe {
libc::mmap(
null_mut(),
mmap_size as usize,
prot,
libc::MAP_SHARED,
fd,
offset as libc::off_t,
)
};
if host_addr == libc::MAP_FAILED {
error!(
"Could not mmap regions, error:{}",
io::Error::last_os_error()
);
continue;
}
let mem_region = kvm_userspace_memory_region {
slot,
guest_phys_addr: region.start.raw_value() + mmap_offset,
memory_size: mmap_size as u64,
userspace_addr: host_addr as u64,
flags: 0,
};
// Safe because the guest regions are guaranteed not to overlap.
unsafe {
vm.set_user_memory_region(mem_region)
.map_err(VfioPciError::MapRegionGuest)?;
}
slot += 1;
}
}
Ok(())
}
}
impl Drop for VfioPciDevice {