cloud-hypervisor/pci/src/vfio_user.rs
Thomas Barrett 45b01d592a vmm: assign each pci segment 32-bit mmio allocator
Signed-off-by: Thomas Barrett <tbarrett@crusoeenergy.com>
2023-11-20 15:33:50 -08:00

605 lines
19 KiB
Rust

// Copyright © 2021 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
use crate::vfio::{UserMemoryRegion, Vfio, VfioCommon, VfioError, VFIO_COMMON_ID};
use crate::{BarReprogrammingParams, PciBarConfiguration, VfioPciError};
use crate::{PciBdf, PciDevice, PciDeviceError, PciSubclass};
use hypervisor::HypervisorVmError;
use std::any::Any;
use std::os::unix::prelude::AsRawFd;
use std::ptr::null_mut;
use std::sync::{Arc, Barrier, Mutex};
use std::u32;
use thiserror::Error;
use vfio_bindings::bindings::vfio::*;
use vfio_ioctls::VfioIrq;
use vfio_user::{Client, Error as VfioUserError};
use vm_allocator::{AddressAllocator, SystemAllocator};
use vm_device::dma_mapping::ExternalDmaMapping;
use vm_device::interrupt::{InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig};
use vm_device::{BusDevice, Resource};
use vm_memory::bitmap::AtomicBitmap;
use vm_memory::{
Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryRegion, GuestRegionMmap,
};
use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
use vmm_sys_util::eventfd::EventFd;
pub struct VfioUserPciDevice {
id: String,
vm: Arc<dyn hypervisor::Vm>,
client: Arc<Mutex<Client>>,
common: VfioCommon,
memory_slot: Arc<dyn Fn() -> u32 + Send + Sync>,
}
#[derive(Error, Debug)]
pub enum VfioUserPciDeviceError {
#[error("Client error: {0}")]
Client(#[source] VfioUserError),
#[error("Failed to map VFIO PCI region into guest: {0}")]
MapRegionGuest(#[source] HypervisorVmError),
#[error("Failed to DMA map: {0}")]
DmaMap(#[source] VfioUserError),
#[error("Failed to DMA unmap: {0}")]
DmaUnmap(#[source] VfioUserError),
#[error("Failed to initialize legacy interrupts: {0}")]
InitializeLegacyInterrupts(#[source] VfioPciError),
#[error("Failed to create VfioCommon: {0}")]
CreateVfioCommon(#[source] VfioPciError),
}
#[derive(Copy, Clone)]
enum PciVfioUserSubclass {
VfioUserSubclass = 0xff,
}
impl PciSubclass for PciVfioUserSubclass {
fn get_register_value(&self) -> u8 {
*self as u8
}
}
impl VfioUserPciDevice {
#[allow(clippy::too_many_arguments)]
pub fn new(
id: String,
vm: &Arc<dyn hypervisor::Vm>,
client: Arc<Mutex<Client>>,
msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>,
bdf: PciBdf,
memory_slot: Arc<dyn Fn() -> u32 + Send + Sync>,
snapshot: Option<Snapshot>,
) -> Result<Self, VfioUserPciDeviceError> {
let resettable = client.lock().unwrap().resettable();
if resettable {
client
.lock()
.unwrap()
.reset()
.map_err(VfioUserPciDeviceError::Client)?;
}
let vfio_wrapper = VfioUserClientWrapper {
client: client.clone(),
};
let common = VfioCommon::new(
msi_interrupt_manager,
legacy_interrupt_group,
Arc::new(vfio_wrapper) as Arc<dyn Vfio>,
&PciVfioUserSubclass::VfioUserSubclass,
bdf,
vm_migration::snapshot_from_id(snapshot.as_ref(), VFIO_COMMON_ID),
)
.map_err(VfioUserPciDeviceError::CreateVfioCommon)?;
Ok(Self {
id,
vm: vm.clone(),
client,
common,
memory_slot,
})
}
pub fn map_mmio_regions(&mut self) -> Result<(), VfioUserPciDeviceError> {
for mmio_region in &mut self.common.mmio_regions {
let region_flags = self
.client
.lock()
.unwrap()
.region(mmio_region.index)
.unwrap()
.flags;
let file_offset = self
.client
.lock()
.unwrap()
.region(mmio_region.index)
.unwrap()
.file_offset
.clone();
let sparse_areas = self
.client
.lock()
.unwrap()
.region(mmio_region.index)
.unwrap()
.sparse_areas
.clone();
if region_flags & VFIO_REGION_INFO_FLAG_MMAP != 0 {
let mut prot = 0;
if region_flags & VFIO_REGION_INFO_FLAG_READ != 0 {
prot |= libc::PROT_READ;
}
if region_flags & VFIO_REGION_INFO_FLAG_WRITE != 0 {
prot |= libc::PROT_WRITE;
}
let mmaps = if sparse_areas.is_empty() {
vec![vfio_region_sparse_mmap_area {
offset: 0,
size: mmio_region.length,
}]
} else {
sparse_areas
};
for s in mmaps.iter() {
// SAFETY: FFI call with correct arguments
let host_addr = unsafe {
libc::mmap(
null_mut(),
s.size as usize,
prot,
libc::MAP_SHARED,
file_offset.as_ref().unwrap().file().as_raw_fd(),
file_offset.as_ref().unwrap().start() as libc::off_t
+ s.offset as libc::off_t,
)
};
if host_addr == libc::MAP_FAILED {
error!(
"Could not mmap regions, error:{}",
std::io::Error::last_os_error()
);
continue;
}
let user_memory_region = UserMemoryRegion {
slot: (self.memory_slot)(),
start: mmio_region.start.0 + s.offset,
size: s.size,
host_addr: host_addr as u64,
};
mmio_region.user_memory_regions.push(user_memory_region);
let mem_region = self.vm.make_user_memory_region(
user_memory_region.slot,
user_memory_region.start,
user_memory_region.size,
user_memory_region.host_addr,
false,
false,
);
self.vm
.create_user_memory_region(mem_region)
.map_err(VfioUserPciDeviceError::MapRegionGuest)?;
}
}
}
Ok(())
}
pub fn unmap_mmio_regions(&mut self) {
for mmio_region in self.common.mmio_regions.iter() {
for user_memory_region in mmio_region.user_memory_regions.iter() {
// Remove region
let r = self.vm.make_user_memory_region(
user_memory_region.slot,
user_memory_region.start,
user_memory_region.size,
user_memory_region.host_addr,
false,
false,
);
if let Err(e) = self.vm.remove_user_memory_region(r) {
error!("Could not remove the userspace memory region: {}", e);
}
// Remove mmaps
// SAFETY: FFI call with correct arguments
let ret = unsafe {
libc::munmap(
user_memory_region.host_addr as *mut libc::c_void,
user_memory_region.size as usize,
)
};
if ret != 0 {
error!(
"Could not unmap region {}, error:{}",
mmio_region.index,
std::io::Error::last_os_error()
);
}
}
}
}
pub fn dma_map(
&mut self,
region: &GuestRegionMmap<AtomicBitmap>,
) -> Result<(), VfioUserPciDeviceError> {
let (fd, offset) = match region.file_offset() {
Some(_file_offset) => (_file_offset.file().as_raw_fd(), _file_offset.start()),
None => return Ok(()),
};
self.client
.lock()
.unwrap()
.dma_map(offset, region.start_addr().raw_value(), region.len(), fd)
.map_err(VfioUserPciDeviceError::DmaMap)
}
pub fn dma_unmap(
&mut self,
region: &GuestRegionMmap<AtomicBitmap>,
) -> Result<(), VfioUserPciDeviceError> {
self.client
.lock()
.unwrap()
.dma_unmap(region.start_addr().raw_value(), region.len())
.map_err(VfioUserPciDeviceError::DmaUnmap)
}
}
impl BusDevice for VfioUserPciDevice {
fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
self.read_bar(base, offset, data)
}
fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
self.write_bar(base, offset, data)
}
}
#[repr(u32)]
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
#[allow(dead_code)]
enum Regions {
Bar0,
Bar1,
Bar2,
Bar3,
Bar4,
Bar5,
Rom,
Config,
Vga,
Migration,
}
struct VfioUserClientWrapper {
client: Arc<Mutex<Client>>,
}
impl Vfio for VfioUserClientWrapper {
fn region_read(&self, index: u32, offset: u64, data: &mut [u8]) {
self.client
.lock()
.unwrap()
.region_read(index, offset, data)
.ok();
}
fn region_write(&self, index: u32, offset: u64, data: &[u8]) {
self.client
.lock()
.unwrap()
.region_write(index, offset, data)
.ok();
}
fn get_irq_info(&self, irq_index: u32) -> Option<VfioIrq> {
self.client
.lock()
.unwrap()
.get_irq_info(irq_index)
.ok()
.map(|i| VfioIrq {
index: i.index,
flags: i.flags,
count: i.count,
})
}
fn enable_irq(&self, irq_index: u32, event_fds: Vec<&EventFd>) -> Result<(), VfioError> {
info!(
"Enabling IRQ {:x} number of fds = {:?}",
irq_index,
event_fds.len()
);
let fds: Vec<i32> = event_fds.iter().map(|e| e.as_raw_fd()).collect();
// Batch into blocks of 16 fds as sendmsg() has a size limit
let mut sent_fds = 0;
let num_fds = event_fds.len() as u32;
while sent_fds < num_fds {
let remaining_fds = num_fds - sent_fds;
let count = if remaining_fds > 16 {
16
} else {
remaining_fds
};
self.client
.lock()
.unwrap()
.set_irqs(
irq_index,
VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER,
sent_fds,
count,
&fds[sent_fds as usize..(sent_fds + count) as usize],
)
.map_err(VfioError::VfioUser)?;
sent_fds += count;
}
Ok(())
}
fn disable_irq(&self, irq_index: u32) -> Result<(), VfioError> {
info!("Disabling IRQ {:x}", irq_index);
self.client
.lock()
.unwrap()
.set_irqs(
irq_index,
VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER,
0,
0,
&[],
)
.map_err(VfioError::VfioUser)
}
fn unmask_irq(&self, irq_index: u32) -> Result<(), VfioError> {
info!("Unmasking IRQ {:x}", irq_index);
self.client
.lock()
.unwrap()
.set_irqs(
irq_index,
VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK,
0,
1,
&[],
)
.map_err(VfioError::VfioUser)
}
}
impl PciDevice for VfioUserPciDevice {
fn allocate_bars(
&mut self,
allocator: &Arc<Mutex<SystemAllocator>>,
mmio32_allocator: &mut AddressAllocator,
mmio64_allocator: &mut AddressAllocator,
resources: Option<Vec<Resource>>,
) -> Result<Vec<PciBarConfiguration>, PciDeviceError> {
self.common
.allocate_bars(allocator, mmio32_allocator, mmio64_allocator, resources)
}
fn free_bars(
&mut self,
allocator: &mut SystemAllocator,
mmio32_allocator: &mut AddressAllocator,
mmio64_allocator: &mut AddressAllocator,
) -> Result<(), PciDeviceError> {
self.common
.free_bars(allocator, mmio32_allocator, mmio64_allocator)
}
fn as_any(&mut self) -> &mut dyn Any {
self
}
fn detect_bar_reprogramming(
&mut self,
reg_idx: usize,
data: &[u8],
) -> Option<BarReprogrammingParams> {
self.common
.configuration
.detect_bar_reprogramming(reg_idx, data)
}
fn write_config_register(
&mut self,
reg_idx: usize,
offset: u64,
data: &[u8],
) -> Option<Arc<Barrier>> {
self.common.write_config_register(reg_idx, offset, data)
}
fn read_config_register(&mut self, reg_idx: usize) -> u32 {
self.common.read_config_register(reg_idx)
}
fn read_bar(&mut self, base: u64, offset: u64, data: &mut [u8]) {
self.common.read_bar(base, offset, data)
}
fn write_bar(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
self.common.write_bar(base, offset, data)
}
fn move_bar(&mut self, old_base: u64, new_base: u64) -> Result<(), std::io::Error> {
info!("Moving BAR 0x{:x} -> 0x{:x}", old_base, new_base);
for mmio_region in self.common.mmio_regions.iter_mut() {
if mmio_region.start.raw_value() == old_base {
mmio_region.start = GuestAddress(new_base);
for user_memory_region in mmio_region.user_memory_regions.iter_mut() {
// Remove old region
let old_region = self.vm.make_user_memory_region(
user_memory_region.slot,
user_memory_region.start,
user_memory_region.size,
user_memory_region.host_addr,
false,
false,
);
self.vm
.remove_user_memory_region(old_region)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
// Update the user memory region with the correct start address.
if new_base > old_base {
user_memory_region.start += new_base - old_base;
} else {
user_memory_region.start -= old_base - new_base;
}
// Insert new region
let new_region = self.vm.make_user_memory_region(
user_memory_region.slot,
user_memory_region.start,
user_memory_region.size,
user_memory_region.host_addr,
false,
false,
);
self.vm
.create_user_memory_region(new_region)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
}
info!("Moved bar 0x{:x} -> 0x{:x}", old_base, new_base);
}
}
Ok(())
}
fn id(&self) -> Option<String> {
Some(self.id.clone())
}
}
impl Drop for VfioUserPciDevice {
fn drop(&mut self) {
self.unmap_mmio_regions();
if let Some(msix) = &self.common.interrupt.msix {
if msix.bar.enabled() {
self.common.disable_msix();
}
}
if let Some(msi) = &self.common.interrupt.msi {
if msi.cfg.enabled() {
self.common.disable_msi()
}
}
if self.common.interrupt.intx_in_use() {
self.common.disable_intx();
}
if let Err(e) = self.client.lock().unwrap().shutdown() {
error!("Failed shutting down vfio-user client: {}", e);
}
}
}
impl Pausable for VfioUserPciDevice {}
impl Snapshottable for VfioUserPciDevice {
fn id(&self) -> String {
self.id.clone()
}
fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
let mut vfio_pci_dev_snapshot = Snapshot::default();
// Snapshot VfioCommon
vfio_pci_dev_snapshot.add_snapshot(self.common.id(), self.common.snapshot()?);
Ok(vfio_pci_dev_snapshot)
}
}
impl Transportable for VfioUserPciDevice {}
impl Migratable for VfioUserPciDevice {}
pub struct VfioUserDmaMapping<M: GuestAddressSpace> {
client: Arc<Mutex<Client>>,
memory: Arc<M>,
}
impl<M: GuestAddressSpace> VfioUserDmaMapping<M> {
pub fn new(client: Arc<Mutex<Client>>, memory: Arc<M>) -> Self {
Self { client, memory }
}
}
impl<M: GuestAddressSpace + Sync + Send> ExternalDmaMapping for VfioUserDmaMapping<M> {
fn map(&self, iova: u64, gpa: u64, size: u64) -> std::result::Result<(), std::io::Error> {
let mem = self.memory.memory();
let guest_addr = GuestAddress(gpa);
let region = mem.find_region(guest_addr);
if let Some(region) = region {
let file_offset = region.file_offset().unwrap();
let offset = (GuestAddress(gpa).checked_offset_from(region.start_addr())).unwrap()
+ file_offset.start();
self.client
.lock()
.unwrap()
.dma_map(offset, iova, size, file_offset.file().as_raw_fd())
.map_err(|e| {
std::io::Error::new(
std::io::ErrorKind::Other,
format!("Error mapping region: {e}"),
)
})
} else {
Err(std::io::Error::new(
std::io::ErrorKind::Other,
format!("Region not found for 0x{gpa:x}"),
))
}
}
fn unmap(&self, iova: u64, size: u64) -> std::result::Result<(), std::io::Error> {
self.client
.lock()
.unwrap()
.dma_unmap(iova, size)
.map_err(|e| {
std::io::Error::new(
std::io::ErrorKind::Other,
format!("Error unmapping region: {e}"),
)
})
}
}