vfio: Initial PCI support

This brings the initial PCI support to the VFIO crate.
The VfioPciDevice is the main structure and holds an inner VfioDevice.

VfioPciDevice implements the PCI trait, leaving the IRQ assignments
empty as this will be driven by both the guest and the VFIO PCI device,
not by the VMM.

As we must trap BAR programming from the guest (We don't want to program
the actual device with guest addresses), we use our local PCI
configuration cache to read and write BARs.

Signed-off-by: Zhang, Xiong Y <xiong.y.zhang@intel.com>
Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
This commit is contained in:
Samuel Ortiz 2019-07-15 09:50:55 +02:00
parent 2cec3aad7f
commit db5b4763c2
3 changed files with 364 additions and 1 deletions

View File

@ -5,11 +5,14 @@ authors = ["The Cloud Hypervisor Authors"]
[dependencies]
byteorder = ">=1.2.1"
devices = { path = "../devices" }
kvm-bindings = "0.1"
kvm-ioctls = { git = "https://github.com/rust-vmm/kvm-ioctls", branch = "master" }
libc = ">=0.2.39"
log = "*"
pci = { path = "../pci" }
vfio-bindings = { path = "../vfio-bindings" }
vm-allocator = { path = "../vm-allocator" }
vmm-sys-util = { git = "https://github.com/rust-vmm/vmm-sys-util" }
[dependencies.vm-memory]

View File

@ -5,22 +5,26 @@
//#![deny(missing_docs)]
//! Virtual Function I/O (VFIO) API
extern crate byteorder;
extern crate devices;
extern crate kvm_bindings;
extern crate kvm_ioctls;
extern crate log;
extern crate pci;
extern crate vfio_bindings;
extern crate vm_allocator;
extern crate vm_memory;
#[macro_use]
extern crate vmm_sys_util;
mod vfio_device;
mod vfio_ioctls;
mod vfio_pci;
use std::mem::size_of;
pub use vfio_device::{VfioDevice, VfioError};
pub use vfio_pci::VfioPciDevice;
// Returns a `Vec<T>` with a size in bytes at least as large as `size_in_bytes`.
fn vec_with_size_in_bytes<T: Default>(size_in_bytes: usize) -> Vec<T> {

356
vfio/src/vfio_pci.rs Normal file
View File

@ -0,0 +1,356 @@
// Copyright © 2019 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
//
extern crate devices;
extern crate pci;
extern crate vm_allocator;
use crate::vfio_device::VfioDevice;
use devices::BusDevice;
use kvm_ioctls::*;
use pci::{
PciBarConfiguration, PciBarRegionType, PciClassCode, PciConfiguration, PciDevice,
PciDeviceError, PciHeaderType, PciSubclass,
};
use std::sync::Arc;
use vfio_bindings::bindings::vfio::*;
use vm_allocator::SystemAllocator;
use vm_memory::{Address, GuestAddress, GuestUsize};
#[derive(Copy, Clone)]
enum PciVfioSubclass {
VfioSubclass = 0xff,
}
impl PciSubclass for PciVfioSubclass {
fn get_register_value(&self) -> u8 {
*self as u8
}
}
#[derive(Copy, Clone)]
struct MmioRegion {
start: GuestAddress,
length: GuestUsize,
index: u32,
}
struct VfioPciConfig {
device: Arc<VfioDevice>,
}
impl VfioPciConfig {
fn new(device: Arc<VfioDevice>) -> Self {
VfioPciConfig { device }
}
fn read_config_byte(&self, offset: u32) -> u8 {
let mut data: [u8; 1] = [0];
self.device
.region_read(VFIO_PCI_CONFIG_REGION_INDEX, data.as_mut(), offset.into());
data[0]
}
fn read_config_word(&self, offset: u32) -> u16 {
let mut data: [u8; 2] = [0, 0];
self.device
.region_read(VFIO_PCI_CONFIG_REGION_INDEX, data.as_mut(), offset.into());
u16::from_le_bytes(data)
}
fn read_config_dword(&self, offset: u32) -> u32 {
let mut data: [u8; 4] = [0, 0, 0, 0];
self.device
.region_read(VFIO_PCI_CONFIG_REGION_INDEX, data.as_mut(), offset.into());
u32::from_le_bytes(data)
}
fn write_config_dword(&self, buf: u32, offset: u32) {
let data: [u8; 4] = buf.to_le_bytes();
self.device
.region_write(VFIO_PCI_CONFIG_REGION_INDEX, &data, offset.into())
}
}
/// VfioPciDevice represents a VFIO PCI device.
/// This structure implements the BusDevice and PciDevice traits.
///
/// A VfioPciDevice is bound to a VfioDevice and is also a PCI device.
/// The VMM creates a VfioDevice, then assigns it to a VfioPciDevice,
/// which then gets added to the PCI bus.
pub struct VfioPciDevice {
vm_fd: Arc<VmFd>,
device: Arc<VfioDevice>,
vfio_pci_configuration: VfioPciConfig,
configuration: PciConfiguration,
mmio_regions: Vec<MmioRegion>,
}
impl VfioPciDevice {
/// Constructs a new Vfio Pci device for the given Vfio device
pub fn new(
vm_fd: &Arc<VmFd>,
allocator: &mut SystemAllocator,
device: VfioDevice,
) -> Result<Self> {
let device = Arc::new(device);
device.reset();
let configuration = PciConfiguration::new(
0,
0,
PciClassCode::Other,
&PciVfioSubclass::VfioSubclass,
None,
PciHeaderType::Device,
0,
0,
None,
);
let vfio_pci_configuration = VfioPciConfig::new(Arc::clone(&device));
let mut vfio_pci_device = VfioPciDevice {
vm_fd: vm_fd.clone(),
device,
configuration,
vfio_pci_configuration,
mmio_regions: Vec::new(),
};
Ok(vfio_pci_device)
}
fn find_region(&self, addr: u64) -> Option<MmioRegion> {
for region in self.mmio_regions.iter() {
if addr >= region.start.raw_value()
&& addr < region.start.unchecked_add(region.length).raw_value()
{
return Some(*region);
}
}
None
}
}
impl Drop for VfioPciDevice {
fn drop(&mut self) {
if self.device.unset_dma_map().is_err() {
error!("failed to remove all guest memory regions from iommu table");
}
}
}
impl BusDevice for VfioPciDevice {
fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
self.read_bar(base, offset, data)
}
fn write(&mut self, base: u64, offset: u64, data: &[u8]) {
self.write_bar(base, offset, data)
}
}
// First BAR offset in the PCI config space.
const PCI_CONFIG_BAR_OFFSET: u32 = 0x10;
// First BAR register index
const PCI_CONFIG_BAR0_INDEX: usize = 4;
// Capability register offset in the PCI config space.
const PCI_CONFIG_CAPABILITY_OFFSET: u32 = 0x34;
// IO BAR when first BAR bit is 1.
const PCI_CONFIG_IO_BAR: u32 = 0x1;
// Memory BAR flags (lower 4 bits).
const PCI_CONFIG_MEMORY_BAR_FLAG_MASK: u32 = 0xf;
// 64-bit memory bar flag.
const PCI_CONFIG_MEMORY_BAR_64BIT: u32 = 0x4;
// PCI config register size (4 bytes).
const PCI_CONFIG_REGISTER_SIZE: usize = 4;
// Number of BARs for a PCI device
const BAR_NUMS: usize = 6;
impl PciDevice for VfioPciDevice {
fn allocate_bars(
&mut self,
allocator: &mut SystemAllocator,
) -> std::result::Result<Vec<(GuestAddress, GuestUsize, PciBarRegionType)>, PciDeviceError>
{
let mut ranges = Vec::new();
let mut bar_id = VFIO_PCI_BAR0_REGION_INDEX as u32;
// Going through all regular regions to compute the BAR size.
// We're not saving the BAR address to restore it, because we
// are going to allocate a guest address for each BAR and write
// that new address back.
while bar_id < VFIO_PCI_ROM_REGION_INDEX {
let mut lsb_size: u32 = 0xffff_ffff;
let mut msb_size = 0;
let mut region_size: u64;
let bar_addr: GuestAddress;
// Read the BAR size (Starts by all 1s to the BAR)
let bar_offset = PCI_CONFIG_BAR_OFFSET + bar_id * 4;
self.vfio_pci_configuration
.write_config_dword(lsb_size, bar_offset);
lsb_size = self.vfio_pci_configuration.read_config_dword(bar_offset);
// We've just read the BAR size back. Or at least its LSB.
let lsb_flag = lsb_size & PCI_CONFIG_MEMORY_BAR_FLAG_MASK;
if lsb_size == 0 {
bar_id += 1;
continue;
}
// Is this an IO BAR?
let io_bar = match lsb_flag & PCI_CONFIG_IO_BAR {
PCI_CONFIG_IO_BAR => true,
_ => false,
};
// Is this a 64-bit BAR?
let is_64bit_bar = match lsb_flag & PCI_CONFIG_MEMORY_BAR_64BIT {
PCI_CONFIG_MEMORY_BAR_64BIT => true,
_ => false,
};
// By default, the region type is 32 bits memory BAR.
let mut region_type = PciBarRegionType::Memory32BitRegion;
if io_bar {
// IO BAR
region_type = PciBarRegionType::IORegion;
// Clear first bit.
lsb_size &= 0xffff_fffc;
// Find the first bit that's set to 1.
let first_bit = lsb_size.trailing_zeros();
region_size = 2u64.pow(first_bit);
// We need to allocate a guest PIO address range for that BAR.
bar_addr = allocator
.allocate_io_addresses(None, region_size, Some(0x4))
.ok_or_else(|| PciDeviceError::IoAllocationFailed(region_size))?;
} else {
if is_64bit_bar {
// 64 bits Memory BAR
region_type = PciBarRegionType::Memory64BitRegion;
msb_size = 0xffff_ffff;
let msb_bar_offset: u32 = PCI_CONFIG_BAR_OFFSET + (bar_id + 1) * 4;
self.vfio_pci_configuration
.write_config_dword(msb_bar_offset, msb_size);
msb_size = self
.vfio_pci_configuration
.read_config_dword(msb_bar_offset);
}
// Clear the first four bytes from our LSB.
lsb_size &= 0xffff_fff0;
region_size = u64::from(msb_size);
region_size <<= 32;
region_size |= u64::from(lsb_size);
// Find the first that's set to 1.
let first_bit = region_size.trailing_zeros();
region_size = 2u64.pow(first_bit);
// We need to allocate a guest MMIO address range for that BAR.
if is_64bit_bar {
bar_addr = allocator
.allocate_mmio_addresses(None, region_size, Some(0x1000))
.ok_or_else(|| PciDeviceError::IoAllocationFailed(region_size))?;
} else {
bar_addr = allocator
.allocate_mmio_hole_addresses(None, region_size, Some(0x1000))
.ok_or_else(|| PciDeviceError::IoAllocationFailed(region_size))?;
}
}
// We can now build our BAR configuration block.
let config = PciBarConfiguration::default()
.set_register_index(bar_id as usize)
.set_address(bar_addr.raw_value())
.set_size(region_size)
.set_region_type(region_type);
self.configuration
.add_pci_bar(&config)
.map_err(|e| PciDeviceError::IoRegistrationFailed(bar_addr.raw_value(), e))?;
ranges.push((bar_addr, region_size, region_type));
self.mmio_regions.push(MmioRegion {
start: bar_addr,
length: region_size,
index: bar_id as u32,
});
bar_id += 1;
if is_64bit_bar {
bar_id += 1;
}
}
if self.device.setup_dma_map().is_err() {
error!("failed to add all guest memory regions into iommu table");
}
Ok(ranges)
}
fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) {
// When the guest wants to write to a BAR, we trap it into
// our local configuration space. We're not reprogramming
// VFIO device.
if reg_idx >= PCI_CONFIG_BAR0_INDEX && reg_idx < PCI_CONFIG_BAR0_INDEX + BAR_NUMS {
// We keep our local cache updated with the BARs.
// We'll read it back from there when the guest is asking
// for BARs (see read_config_register()).
return self
.configuration
.write_config_register(reg_idx, offset, data);
}
let reg = (reg_idx * PCI_CONFIG_REGISTER_SIZE) as u64;
self.device
.region_write(VFIO_PCI_CONFIG_REGION_INDEX, data, reg + offset);
}
fn read_config_register(&self, reg_idx: usize) -> u32 {
// When reading the BARs, we trap it and return what comes
// from our local configuration space. We want the guest to
// use that and not the VFIO device BARs as it does not map
// with the guest address space.
if reg_idx >= PCI_CONFIG_BAR0_INDEX && reg_idx < PCI_CONFIG_BAR0_INDEX + BAR_NUMS {
return self.configuration.read_reg(reg_idx);
}
// The config register read comes from the VFIO device itself.
self.vfio_pci_configuration
.read_config_dword((reg_idx * 4) as u32)
}
fn read_bar(&mut self, base: u64, offset: u64, data: &mut [u8]) {
let addr = base + offset;
if let Some(region) = self.find_region(addr) {
let offset = addr - region.start.raw_value();
self.device.region_read(region.index, data, offset);
}
}
fn write_bar(&mut self, base: u64, offset: u64, data: &[u8]) {
let addr = base + offset;
if let Some(region) = self.find_region(addr) {
let offset = addr - region.start.raw_value();
self.device.region_write(region.index, data, offset);
}
}
}