// Copyright © 2019 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause // extern crate devices; extern crate pci; extern crate vm_allocator; use crate::vfio_device::VfioDevice; use devices::BusDevice; use kvm_ioctls::*; use pci::{ PciBarConfiguration, PciBarRegionType, PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciHeaderType, PciSubclass, }; use std::sync::Arc; use vfio_bindings::bindings::vfio::*; use vm_allocator::SystemAllocator; use vm_memory::{Address, GuestAddress, GuestUsize}; #[derive(Copy, Clone)] enum PciVfioSubclass { VfioSubclass = 0xff, } impl PciSubclass for PciVfioSubclass { fn get_register_value(&self) -> u8 { *self as u8 } } #[derive(Copy, Clone)] struct MmioRegion { start: GuestAddress, length: GuestUsize, index: u32, } struct VfioPciConfig { device: Arc, } impl VfioPciConfig { fn new(device: Arc) -> Self { VfioPciConfig { device } } fn read_config_byte(&self, offset: u32) -> u8 { let mut data: [u8; 1] = [0]; self.device .region_read(VFIO_PCI_CONFIG_REGION_INDEX, data.as_mut(), offset.into()); data[0] } fn read_config_word(&self, offset: u32) -> u16 { let mut data: [u8; 2] = [0, 0]; self.device .region_read(VFIO_PCI_CONFIG_REGION_INDEX, data.as_mut(), offset.into()); u16::from_le_bytes(data) } fn read_config_dword(&self, offset: u32) -> u32 { let mut data: [u8; 4] = [0, 0, 0, 0]; self.device .region_read(VFIO_PCI_CONFIG_REGION_INDEX, data.as_mut(), offset.into()); u32::from_le_bytes(data) } fn write_config_dword(&self, buf: u32, offset: u32) { let data: [u8; 4] = buf.to_le_bytes(); self.device .region_write(VFIO_PCI_CONFIG_REGION_INDEX, &data, offset.into()) } } /// VfioPciDevice represents a VFIO PCI device. /// This structure implements the BusDevice and PciDevice traits. /// /// A VfioPciDevice is bound to a VfioDevice and is also a PCI device. /// The VMM creates a VfioDevice, then assigns it to a VfioPciDevice, /// which then gets added to the PCI bus. pub struct VfioPciDevice { vm_fd: Arc, device: Arc, vfio_pci_configuration: VfioPciConfig, configuration: PciConfiguration, mmio_regions: Vec, } impl VfioPciDevice { /// Constructs a new Vfio Pci device for the given Vfio device pub fn new( vm_fd: &Arc, allocator: &mut SystemAllocator, device: VfioDevice, ) -> Result { let device = Arc::new(device); device.reset(); let configuration = PciConfiguration::new( 0, 0, PciClassCode::Other, &PciVfioSubclass::VfioSubclass, None, PciHeaderType::Device, 0, 0, None, ); let vfio_pci_configuration = VfioPciConfig::new(Arc::clone(&device)); let mut vfio_pci_device = VfioPciDevice { vm_fd: vm_fd.clone(), device, configuration, vfio_pci_configuration, mmio_regions: Vec::new(), }; Ok(vfio_pci_device) } fn find_region(&self, addr: u64) -> Option { for region in self.mmio_regions.iter() { if addr >= region.start.raw_value() && addr < region.start.unchecked_add(region.length).raw_value() { return Some(*region); } } None } } impl Drop for VfioPciDevice { fn drop(&mut self) { if self.device.unset_dma_map().is_err() { error!("failed to remove all guest memory regions from iommu table"); } } } impl BusDevice for VfioPciDevice { fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { self.read_bar(base, offset, data) } fn write(&mut self, base: u64, offset: u64, data: &[u8]) { self.write_bar(base, offset, data) } } // First BAR offset in the PCI config space. const PCI_CONFIG_BAR_OFFSET: u32 = 0x10; // First BAR register index const PCI_CONFIG_BAR0_INDEX: usize = 4; // Capability register offset in the PCI config space. const PCI_CONFIG_CAPABILITY_OFFSET: u32 = 0x34; // IO BAR when first BAR bit is 1. const PCI_CONFIG_IO_BAR: u32 = 0x1; // Memory BAR flags (lower 4 bits). const PCI_CONFIG_MEMORY_BAR_FLAG_MASK: u32 = 0xf; // 64-bit memory bar flag. const PCI_CONFIG_MEMORY_BAR_64BIT: u32 = 0x4; // PCI config register size (4 bytes). const PCI_CONFIG_REGISTER_SIZE: usize = 4; // Number of BARs for a PCI device const BAR_NUMS: usize = 6; impl PciDevice for VfioPciDevice { fn allocate_bars( &mut self, allocator: &mut SystemAllocator, ) -> std::result::Result, PciDeviceError> { let mut ranges = Vec::new(); let mut bar_id = VFIO_PCI_BAR0_REGION_INDEX as u32; // Going through all regular regions to compute the BAR size. // We're not saving the BAR address to restore it, because we // are going to allocate a guest address for each BAR and write // that new address back. while bar_id < VFIO_PCI_ROM_REGION_INDEX { let mut lsb_size: u32 = 0xffff_ffff; let mut msb_size = 0; let mut region_size: u64; let bar_addr: GuestAddress; // Read the BAR size (Starts by all 1s to the BAR) let bar_offset = PCI_CONFIG_BAR_OFFSET + bar_id * 4; self.vfio_pci_configuration .write_config_dword(lsb_size, bar_offset); lsb_size = self.vfio_pci_configuration.read_config_dword(bar_offset); // We've just read the BAR size back. Or at least its LSB. let lsb_flag = lsb_size & PCI_CONFIG_MEMORY_BAR_FLAG_MASK; if lsb_size == 0 { bar_id += 1; continue; } // Is this an IO BAR? let io_bar = match lsb_flag & PCI_CONFIG_IO_BAR { PCI_CONFIG_IO_BAR => true, _ => false, }; // Is this a 64-bit BAR? let is_64bit_bar = match lsb_flag & PCI_CONFIG_MEMORY_BAR_64BIT { PCI_CONFIG_MEMORY_BAR_64BIT => true, _ => false, }; // By default, the region type is 32 bits memory BAR. let mut region_type = PciBarRegionType::Memory32BitRegion; if io_bar { // IO BAR region_type = PciBarRegionType::IORegion; // Clear first bit. lsb_size &= 0xffff_fffc; // Find the first bit that's set to 1. let first_bit = lsb_size.trailing_zeros(); region_size = 2u64.pow(first_bit); // We need to allocate a guest PIO address range for that BAR. bar_addr = allocator .allocate_io_addresses(None, region_size, Some(0x4)) .ok_or_else(|| PciDeviceError::IoAllocationFailed(region_size))?; } else { if is_64bit_bar { // 64 bits Memory BAR region_type = PciBarRegionType::Memory64BitRegion; msb_size = 0xffff_ffff; let msb_bar_offset: u32 = PCI_CONFIG_BAR_OFFSET + (bar_id + 1) * 4; self.vfio_pci_configuration .write_config_dword(msb_bar_offset, msb_size); msb_size = self .vfio_pci_configuration .read_config_dword(msb_bar_offset); } // Clear the first four bytes from our LSB. lsb_size &= 0xffff_fff0; region_size = u64::from(msb_size); region_size <<= 32; region_size |= u64::from(lsb_size); // Find the first that's set to 1. let first_bit = region_size.trailing_zeros(); region_size = 2u64.pow(first_bit); // We need to allocate a guest MMIO address range for that BAR. if is_64bit_bar { bar_addr = allocator .allocate_mmio_addresses(None, region_size, Some(0x1000)) .ok_or_else(|| PciDeviceError::IoAllocationFailed(region_size))?; } else { bar_addr = allocator .allocate_mmio_hole_addresses(None, region_size, Some(0x1000)) .ok_or_else(|| PciDeviceError::IoAllocationFailed(region_size))?; } } // We can now build our BAR configuration block. let config = PciBarConfiguration::default() .set_register_index(bar_id as usize) .set_address(bar_addr.raw_value()) .set_size(region_size) .set_region_type(region_type); self.configuration .add_pci_bar(&config) .map_err(|e| PciDeviceError::IoRegistrationFailed(bar_addr.raw_value(), e))?; ranges.push((bar_addr, region_size, region_type)); self.mmio_regions.push(MmioRegion { start: bar_addr, length: region_size, index: bar_id as u32, }); bar_id += 1; if is_64bit_bar { bar_id += 1; } } if self.device.setup_dma_map().is_err() { error!("failed to add all guest memory regions into iommu table"); } Ok(ranges) } fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) { // When the guest wants to write to a BAR, we trap it into // our local configuration space. We're not reprogramming // VFIO device. if reg_idx >= PCI_CONFIG_BAR0_INDEX && reg_idx < PCI_CONFIG_BAR0_INDEX + BAR_NUMS { // We keep our local cache updated with the BARs. // We'll read it back from there when the guest is asking // for BARs (see read_config_register()). return self .configuration .write_config_register(reg_idx, offset, data); } let reg = (reg_idx * PCI_CONFIG_REGISTER_SIZE) as u64; self.device .region_write(VFIO_PCI_CONFIG_REGION_INDEX, data, reg + offset); } fn read_config_register(&self, reg_idx: usize) -> u32 { // When reading the BARs, we trap it and return what comes // from our local configuration space. We want the guest to // use that and not the VFIO device BARs as it does not map // with the guest address space. if reg_idx >= PCI_CONFIG_BAR0_INDEX && reg_idx < PCI_CONFIG_BAR0_INDEX + BAR_NUMS { return self.configuration.read_reg(reg_idx); } // The config register read comes from the VFIO device itself. self.vfio_pci_configuration .read_config_dword((reg_idx * 4) as u32) } fn read_bar(&mut self, base: u64, offset: u64, data: &mut [u8]) { let addr = base + offset; if let Some(region) = self.find_region(addr) { let offset = addr - region.start.raw_value(); self.device.region_read(region.index, data, offset); } } fn write_bar(&mut self, base: u64, offset: u64, data: &[u8]) { let addr = base + offset; if let Some(region) = self.find_region(addr) { let offset = addr - region.start.raw_value(); self.device.region_write(region.index, data, offset); } } }