From b746dd7116b49fc793d2e231f932ffd293664c7e Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Sun, 14 Jul 2019 18:27:38 +0200 Subject: [PATCH] vfio: Map MMIO regions into the guest VFIO explictly tells us if a MMIO region can be mapped into the guest address space or not. Except for MSI-X table BARs, we try to map them into the guest whenever VFIO allows us to do so. This avoids unnecessary VM exits when the guest tries to access those regions. Signed-off-by: Zhang, Xiong Y Signed-off-by: Chao Peng Signed-off-by: Samuel Ortiz --- vfio/src/vfio_device.rs | 103 +++++++++++++++++++++++++++++++++++++++- vfio/src/vfio_pci.rs | 77 +++++++++++++++++++++++++++++- 2 files changed, 177 insertions(+), 3 deletions(-) diff --git a/vfio/src/vfio_device.rs b/vfio/src/vfio_device.rs index 0ee39a991..49d881195 100644 --- a/vfio/src/vfio_device.rs +++ b/vfio/src/vfio_device.rs @@ -100,6 +100,13 @@ impl fmt::Display for VfioError { } } +#[repr(C)] +#[derive(Debug, Default)] +struct vfio_region_info_with_cap { + region_info: vfio_region_info, + cap_info: __IncompleteArrayField, +} + struct VfioContainer { container: File, } @@ -369,6 +376,7 @@ struct VfioRegion { flags: u32, size: u64, offset: u64, + mmap: (u64, u64), } struct VfioIrq { @@ -425,8 +433,10 @@ impl VfioDeviceInfo { let mut regions: Vec = Vec::new(); for i in VFIO_PCI_BAR0_REGION_INDEX..self.num_regions { + let argsz: u32 = mem::size_of::() as u32; + let mut reg_info = vfio_region_info { - argsz: mem::size_of::() as u32, + argsz, flags: 0, index: i, cap_offset: 0, @@ -435,7 +445,7 @@ impl VfioDeviceInfo { }; // Safe as we are the owner of dev and reg_info which are valid value, // and we verify the return value. - let ret = unsafe { + let mut ret = unsafe { ioctl_with_mut_ref(&self.device, VFIO_DEVICE_GET_REGION_INFO(), &mut reg_info) }; if ret < 0 { @@ -443,10 +453,61 @@ impl VfioDeviceInfo { continue; } + let mut mmap_size: u64 = reg_info.size; + let mut mmap_offset: u64 = 0; + if reg_info.flags & VFIO_REGION_INFO_FLAG_CAPS != 0 && reg_info.argsz > argsz { + let cap_len: usize = (reg_info.argsz - argsz) as usize; + let mut region_with_cap = + vec_with_array_field::(cap_len); + region_with_cap[0].region_info.argsz = reg_info.argsz; + region_with_cap[0].region_info.flags = 0; + region_with_cap[0].region_info.index = i; + region_with_cap[0].region_info.cap_offset = 0; + region_with_cap[0].region_info.size = 0; + region_with_cap[0].region_info.offset = 0; + // Safe as we are the owner of dev and region_info which are valid value, + // and we verify the return value. + ret = unsafe { + ioctl_with_mut_ref( + &self.device, + VFIO_DEVICE_GET_REGION_INFO(), + &mut (region_with_cap[0].region_info), + ) + }; + if ret < 0 { + error!("Could not get region #{} info", i); + continue; + } + // region_with_cap[0].cap_info may contain vfio_region_info_cap_sparse_mmap + // struct or vfio_region_info_cap_type struct. Both of them begin with + // vfio_info_cap_header. + // so safe to convert cap_info into vfio_info_cap_header pointer first, and + // safe to access its elments through this poiner. + #[allow(clippy::cast_ptr_alignment)] + let cap_header = + unsafe { region_with_cap[0].cap_info.as_ptr() as *const vfio_info_cap_header }; + if unsafe { u32::from((*cap_header).id) } == VFIO_REGION_INFO_CAP_SPARSE_MMAP { + // cap_info is vfio_region_sparse_mmap here + // so safe to convert cap_info into vfio_info_region_sparse_mmap pointer, and + // safe to access its elements through this pointer. + #[allow(clippy::cast_ptr_alignment)] + let sparse_mmap = unsafe { + region_with_cap[0].cap_info.as_ptr() + as *const vfio_region_info_cap_sparse_mmap + }; + let mmap_area = unsafe { + (*sparse_mmap).areas.as_ptr() as *const vfio_region_sparse_mmap_area + }; + mmap_size = unsafe { (*mmap_area).size }; + mmap_offset = unsafe { (*mmap_area).offset }; + } + } + let region = VfioRegion { flags: reg_info.flags, size: reg_info.size, offset: reg_info.offset, + mmap: (mmap_offset, mmap_size), }; debug!("Region #{}", i); @@ -610,6 +671,44 @@ impl VfioDevice { self.disable_irq(VFIO_PCI_MSIX_IRQ_INDEX) } + /// get a region's flag + pub fn get_region_flags(&self, index: u32) -> u32 { + match self.regions.get(index as usize) { + Some(v) => v.flags, + None => 0, + } + } + + /// get a region's offset + pub fn get_region_offset(&self, index: u32) -> u64 { + match self.regions.get(index as usize) { + Some(v) => v.offset, + None => 0, + } + } + + /// get a region's mmap info + pub fn get_region_mmap(&self, index: u32) -> (u64, u64) { + match self.regions.get(index as usize) { + Some(v) => v.mmap, + None => { + warn!("get_region_mmap with invalid index: {}", index); + (0, 0) + } + } + } + + /// get a region's size + pub fn get_region_size(&self, index: u32) -> u64 { + match self.regions.get(index as usize) { + Some(v) => v.size, + None => { + warn!("get_region_size with invalid index: {}", index); + 0 + } + } + } + /// Read region's data from VFIO device into buf /// index: region num /// buf: data destination and buf length is read size diff --git a/vfio/src/vfio_pci.rs b/vfio/src/vfio_pci.rs index 9ace535a3..f25be3b19 100644 --- a/vfio/src/vfio_pci.rs +++ b/vfio/src/vfio_pci.rs @@ -11,7 +11,9 @@ use crate::vec_with_array_field; use crate::vfio_device::VfioDevice; use byteorder::{ByteOrder, LittleEndian}; use devices::BusDevice; -use kvm_bindings::{kvm_irq_routing, kvm_irq_routing_entry, KVM_IRQ_ROUTING_MSI}; +use kvm_bindings::{ + kvm_irq_routing, kvm_irq_routing_entry, kvm_userspace_memory_region, KVM_IRQ_ROUTING_MSI, +}; use kvm_ioctls::*; use pci::{ MsiCap, MsixCap, MsixConfig, PciBarConfiguration, PciBarRegionType, PciCapabilityID, @@ -19,6 +21,7 @@ use pci::{ MSIX_TABLE_ENTRY_SIZE, }; use std::os::unix::io::AsRawFd; +use std::ptr::null_mut; use std::sync::Arc; use std::{fmt, io}; use vfio_bindings::bindings::vfio::*; @@ -32,6 +35,7 @@ pub enum VfioPciError { EventFd(io::Error), IrqFd(io::Error), NewVfioPciDevice, + MapRegionGuest(io::Error), SetGsiRouting(io::Error), } pub type Result = std::result::Result; @@ -43,6 +47,9 @@ impl fmt::Display for VfioPciError { VfioPciError::EventFd(e) => write!(f, "failed to create eventfd: {}", e), VfioPciError::IrqFd(e) => write!(f, "failed to register irqfd: {}", e), VfioPciError::NewVfioPciDevice => write!(f, "failed to create VFIO PCI device"), + VfioPciError::MapRegionGuest(e) => { + write!(f, "failed to map VFIO PCI region into guest: {}", e) + } VfioPciError::SetGsiRouting(e) => write!(f, "failed to set GSI routes for KVM: {}", e), } } @@ -578,6 +585,74 @@ impl VfioPciDevice { } None } + + /// Map MMIO regions into the guest, and avoid VM exits when the guest tries + /// to reach those regions. + pub fn map_mmio_regions(&mut self, vm: &Arc, mem_slots: u32) -> Result<()> { + let mut slot = mem_slots; + let fd = self.device.as_raw_fd(); + + for region in self.mmio_regions.iter() { + // We want to skip the mapping of the BAR containing the MSI-X + // table even if it is mappable. The reason is we need to trap + // any access to the MSI-X table and update the GSI routing + // accordingly. + if let Some(msix) = &self.interrupt.msix { + if region.index == msix.cap.table_bir() || region.index == msix.cap.pba_bir() { + continue; + } + } + + let region_flags = self.device.get_region_flags(region.index); + if region_flags & VFIO_REGION_INFO_FLAG_MMAP != 0 { + let mut prot = 0; + if region_flags & VFIO_REGION_INFO_FLAG_READ != 0 { + prot |= libc::PROT_READ; + } + if region_flags & VFIO_REGION_INFO_FLAG_WRITE != 0 { + prot |= libc::PROT_WRITE; + } + let (mmap_offset, mmap_size) = self.device.get_region_mmap(region.index); + let offset = self.device.get_region_offset(region.index) + mmap_offset; + + let host_addr = unsafe { + libc::mmap( + null_mut(), + mmap_size as usize, + prot, + libc::MAP_SHARED, + fd, + offset as libc::off_t, + ) + }; + + if host_addr == libc::MAP_FAILED { + error!( + "Could not mmap regions, error:{}", + io::Error::last_os_error() + ); + continue; + } + + let mem_region = kvm_userspace_memory_region { + slot, + guest_phys_addr: region.start.raw_value() + mmap_offset, + memory_size: mmap_size as u64, + userspace_addr: host_addr as u64, + flags: 0, + }; + + // Safe because the guest regions are guaranteed not to overlap. + unsafe { + vm.set_user_memory_region(mem_region) + .map_err(VfioPciError::MapRegionGuest)?; + } + slot += 1; + } + } + + Ok(()) + } } impl Drop for VfioPciDevice {