pci: vfio: Filter out some PCI extended capabilities

There are PCI extended capabilities that can't be passed through the VM
as they would be unusable from a guest perspective. That's why we
introduce a way to patch what is returned to the guest when the PCI
configuration space is accessed. The list of patches is created from the
parsing of the extended capabilities in that case, and particularly
based on the presence of the SRIOV and Resizable BAR capabilities.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
Signed-off-by: Steven Dake <sdake@lambdal.com>
This commit is contained in:
Sebastien Boeuf 2022-08-08 14:43:04 +02:00
parent 9f89e0a4e0
commit e45e3df64d
4 changed files with 177 additions and 6 deletions

View File

@ -190,7 +190,7 @@ pub trait PciProgrammingInterface {
#[derive(PartialEq, Eq, Copy, Clone)] #[derive(PartialEq, Eq, Copy, Clone)]
#[allow(dead_code)] #[allow(dead_code)]
#[allow(non_camel_case_types)] #[allow(non_camel_case_types)]
#[repr(C)] #[repr(u8)]
pub enum PciCapabilityId { pub enum PciCapabilityId {
ListId = 0, ListId = 0,
PowerManagement = 0x01, PowerManagement = 0x01,
@ -244,6 +244,118 @@ impl From<u8> for PciCapabilityId {
} }
} }
/// Types of PCI Express capabilities.
#[derive(PartialEq, Eq, Copy, Clone, Debug)]
#[allow(dead_code)]
#[repr(u16)]
pub enum PciExpressCapabilityId {
NullCapability = 0x0000,
AdvancedErrorReporting = 0x0001,
VirtualChannelMultiFunctionVirtualChannelNotPresent = 0x0002,
DeviceSerialNumber = 0x0003,
PowerBudgeting = 0x0004,
RootComplexLinkDeclaration = 0x0005,
RootComplexInternalLinkControl = 0x0006,
RootComplexEventCollectorEndpointAssociation = 0x0007,
MultiFunctionVirtualChannel = 0x0008,
VirtualChannelMultiFunctionVirtualChannelPresent = 0x0009,
RootComplexRegisterBlock = 0x000a,
VendorSpecificExtendedCapability = 0x000b,
ConfigurationAccessCorrelation = 0x000c,
AccessControlServices = 0x000d,
AlternativeRoutingIdentificationIntepretation = 0x000e,
AddressTranslationServices = 0x000f,
SingleRootIoVirtualization = 0x0010,
DeprecatedMultiRootIoVirtualzation = 0x0011,
Multicast = 0x0012,
PageRequestInterface = 0x0013,
ReservedForAmd = 0x0014,
ResizeableBar = 0x0015,
DynamicPowerAllocation = 0x0016,
ThpRequester = 0x0017,
LatencyToleranceReporting = 0x0018,
SecondaryPciExpress = 0x0019,
ProtocolMultiplexing = 0x001a,
ProcessAddressSpaceId = 0x001b,
LnRequestor = 0x001c,
DownstreamPortContainment = 0x001d,
L1PmSubstates = 0x001e,
PrecisionTimeMeasurement = 0x001f,
PciExpressOverMphy = 0x0020,
FRSQueueing = 0x0021,
ReadinessTimeReporting = 0x0022,
DesignatedVendorSpecificExtendedCapability = 0x0023,
VfResizeableBar = 0x0024,
DataLinkFeature = 0x0025,
PhysicalLayerSixteenGts = 0x0026,
LaneMargeningAtTheReceiver = 0x0027,
HierarchyId = 0x0028,
NativePcieEnclosureManagement = 0x0029,
PhysicalLayerThirtyTwoGts = 0x002a,
AlternateProtocol = 0x002b,
SystemFirmwareIntermediary = 0x002c,
ShadowFunctions = 0x002d,
DataObjectExchange = 0x002e,
Reserved = 0x002f,
ExtendedCapabilitiesAbsence = 0xffff,
}
impl From<u16> for PciExpressCapabilityId {
fn from(c: u16) -> Self {
match c {
0x0000 => PciExpressCapabilityId::NullCapability,
0x0001 => PciExpressCapabilityId::AdvancedErrorReporting,
0x0002 => PciExpressCapabilityId::VirtualChannelMultiFunctionVirtualChannelNotPresent,
0x0003 => PciExpressCapabilityId::DeviceSerialNumber,
0x0004 => PciExpressCapabilityId::PowerBudgeting,
0x0005 => PciExpressCapabilityId::RootComplexLinkDeclaration,
0x0006 => PciExpressCapabilityId::RootComplexInternalLinkControl,
0x0007 => PciExpressCapabilityId::RootComplexEventCollectorEndpointAssociation,
0x0008 => PciExpressCapabilityId::MultiFunctionVirtualChannel,
0x0009 => PciExpressCapabilityId::VirtualChannelMultiFunctionVirtualChannelPresent,
0x000a => PciExpressCapabilityId::RootComplexRegisterBlock,
0x000b => PciExpressCapabilityId::VendorSpecificExtendedCapability,
0x000c => PciExpressCapabilityId::ConfigurationAccessCorrelation,
0x000d => PciExpressCapabilityId::AccessControlServices,
0x000e => PciExpressCapabilityId::AlternativeRoutingIdentificationIntepretation,
0x000f => PciExpressCapabilityId::AddressTranslationServices,
0x0010 => PciExpressCapabilityId::SingleRootIoVirtualization,
0x0011 => PciExpressCapabilityId::DeprecatedMultiRootIoVirtualzation,
0x0012 => PciExpressCapabilityId::Multicast,
0x0013 => PciExpressCapabilityId::PageRequestInterface,
0x0014 => PciExpressCapabilityId::ReservedForAmd,
0x0015 => PciExpressCapabilityId::ResizeableBar,
0x0016 => PciExpressCapabilityId::DynamicPowerAllocation,
0x0017 => PciExpressCapabilityId::ThpRequester,
0x0018 => PciExpressCapabilityId::LatencyToleranceReporting,
0x0019 => PciExpressCapabilityId::SecondaryPciExpress,
0x001a => PciExpressCapabilityId::ProtocolMultiplexing,
0x001b => PciExpressCapabilityId::ProcessAddressSpaceId,
0x001c => PciExpressCapabilityId::LnRequestor,
0x001d => PciExpressCapabilityId::DownstreamPortContainment,
0x001e => PciExpressCapabilityId::L1PmSubstates,
0x001f => PciExpressCapabilityId::PrecisionTimeMeasurement,
0x0020 => PciExpressCapabilityId::PciExpressOverMphy,
0x0021 => PciExpressCapabilityId::FRSQueueing,
0x0022 => PciExpressCapabilityId::ReadinessTimeReporting,
0x0023 => PciExpressCapabilityId::DesignatedVendorSpecificExtendedCapability,
0x0024 => PciExpressCapabilityId::VfResizeableBar,
0x0025 => PciExpressCapabilityId::DataLinkFeature,
0x0026 => PciExpressCapabilityId::PhysicalLayerSixteenGts,
0x0027 => PciExpressCapabilityId::LaneMargeningAtTheReceiver,
0x0028 => PciExpressCapabilityId::HierarchyId,
0x0029 => PciExpressCapabilityId::NativePcieEnclosureManagement,
0x002a => PciExpressCapabilityId::PhysicalLayerThirtyTwoGts,
0x002b => PciExpressCapabilityId::AlternateProtocol,
0x002c => PciExpressCapabilityId::SystemFirmwareIntermediary,
0x002d => PciExpressCapabilityId::ShadowFunctions,
0x002e => PciExpressCapabilityId::DataObjectExchange,
0xffff => PciExpressCapabilityId::ExtendedCapabilitiesAbsence,
_ => PciExpressCapabilityId::Reserved,
}
}
}
/// A PCI capability list. Devices can optionally specify capabilities in their configuration space. /// A PCI capability list. Devices can optionally specify capabilities in their configuration space.
pub trait PciCapability { pub trait PciCapability {
fn bytes(&self) -> &[u8]; fn bytes(&self) -> &[u8];

View File

@ -17,7 +17,7 @@ mod vfio_user;
pub use self::bus::{PciBus, PciConfigIo, PciConfigMmio, PciRoot, PciRootError}; pub use self::bus::{PciBus, PciConfigIo, PciConfigMmio, PciRoot, PciRootError};
pub use self::configuration::{ pub use self::configuration::{
PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, PciCapability, PciCapabilityId, PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, PciCapability, PciCapabilityId,
PciClassCode, PciConfiguration, PciHeaderType, PciMassStorageSubclass, PciClassCode, PciConfiguration, PciExpressCapabilityId, PciHeaderType, PciMassStorageSubclass,
PciNetworkControllerSubclass, PciProgrammingInterface, PciSerialBusSubClass, PciSubclass, PciNetworkControllerSubclass, PciProgrammingInterface, PciSerialBusSubClass, PciSubclass,
}; };
pub use self::device::{ pub use self::device::{

View File

@ -6,14 +6,14 @@
use crate::{ use crate::{
msi_num_enabled_vectors, BarReprogrammingParams, MsiCap, MsiConfig, MsixCap, MsixConfig, msi_num_enabled_vectors, BarReprogrammingParams, MsiCap, MsiConfig, MsixCap, MsixConfig,
PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, PciBdf, PciCapabilityId, PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, PciBdf, PciCapabilityId,
PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciHeaderType, PciSubclass, PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciExpressCapabilityId,
MSIX_TABLE_ENTRY_SIZE, PciHeaderType, PciSubclass, MSIX_TABLE_ENTRY_SIZE,
}; };
use anyhow::anyhow; use anyhow::anyhow;
use byteorder::{ByteOrder, LittleEndian}; use byteorder::{ByteOrder, LittleEndian};
use hypervisor::HypervisorVmError; use hypervisor::HypervisorVmError;
use std::any::Any; use std::any::Any;
use std::collections::BTreeMap; use std::collections::{BTreeMap, HashMap};
use std::io; use std::io;
use std::os::unix::io::AsRawFd; use std::os::unix::io::AsRawFd;
use std::ptr::null_mut; use std::ptr::null_mut;
@ -390,6 +390,11 @@ struct VfioCommonState {
impl VersionMapped for VfioCommonState {} impl VersionMapped for VfioCommonState {}
pub(crate) struct ConfigPatch {
mask: u32,
patch: u32,
}
pub(crate) struct VfioCommon { pub(crate) struct VfioCommon {
pub(crate) configuration: PciConfiguration, pub(crate) configuration: PciConfiguration,
pub(crate) mmio_regions: Vec<MmioRegion>, pub(crate) mmio_regions: Vec<MmioRegion>,
@ -397,6 +402,7 @@ pub(crate) struct VfioCommon {
pub(crate) msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, pub(crate) msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
pub(crate) legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>, pub(crate) legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>,
pub(crate) vfio_wrapper: Arc<dyn Vfio>, pub(crate) vfio_wrapper: Arc<dyn Vfio>,
pub(crate) patches: HashMap<usize, ConfigPatch>,
} }
impl VfioCommon { impl VfioCommon {
@ -694,6 +700,9 @@ impl VfioCommon {
.vfio_wrapper .vfio_wrapper
.read_config_byte(PCI_CONFIG_CAPABILITY_OFFSET); .read_config_byte(PCI_CONFIG_CAPABILITY_OFFSET);
let mut pci_express_cap_found = false;
let mut power_management_cap_found = false;
while cap_next != 0 { while cap_next != 0 {
let cap_id = self.vfio_wrapper.read_config_byte(cap_next.into()); let cap_id = self.vfio_wrapper.read_config_byte(cap_next.into());
@ -719,11 +728,50 @@ impl VfioCommon {
} }
} }
} }
PciCapabilityId::PciExpress => pci_express_cap_found = true,
PciCapabilityId::PowerManagement => power_management_cap_found = true,
_ => {} _ => {}
}; };
cap_next = self.vfio_wrapper.read_config_byte((cap_next + 1).into()); cap_next = self.vfio_wrapper.read_config_byte((cap_next + 1).into());
} }
if pci_express_cap_found && power_management_cap_found {
self.parse_extended_capabilities();
}
}
fn parse_extended_capabilities(&mut self) {
let mut current_offset = PCI_CONFIG_EXTENDED_CAPABILITY_OFFSET;
loop {
let ext_cap_hdr = self.vfio_wrapper.read_config_dword(current_offset);
let cap_id: u16 = (ext_cap_hdr & 0xffff) as u16;
let cap_next: u16 = ((ext_cap_hdr >> 20) & 0xfff) as u16;
match PciExpressCapabilityId::from(cap_id) {
PciExpressCapabilityId::AlternativeRoutingIdentificationIntepretation
| PciExpressCapabilityId::ResizeableBar
| PciExpressCapabilityId::SingleRootIoVirtualization => {
let reg_idx = (current_offset / 4) as usize;
self.patches.insert(
reg_idx,
ConfigPatch {
mask: 0x0000_ffff,
patch: PciExpressCapabilityId::NullCapability as u32,
},
);
}
_ => {}
}
if cap_next == 0 {
break;
}
current_offset = cap_next.into();
}
} }
pub(crate) fn enable_intx(&mut self) -> Result<(), VfioPciError> { pub(crate) fn enable_intx(&mut self) -> Result<(), VfioPciError> {
@ -1011,7 +1059,13 @@ impl VfioCommon {
}; };
// The config register read comes from the VFIO device itself. // The config register read comes from the VFIO device itself.
self.vfio_wrapper.read_config_dword((reg_idx * 4) as u32) & mask let mut value = self.vfio_wrapper.read_config_dword((reg_idx * 4) as u32) & mask;
if let Some(config_patch) = self.patches.get(&reg_idx) {
value = (value & !config_patch.mask) | config_patch.patch;
}
value
} }
fn state(&self) -> VfioCommonState { fn state(&self) -> VfioCommonState {
@ -1196,6 +1250,7 @@ impl VfioPciDevice {
msi_interrupt_manager, msi_interrupt_manager,
legacy_interrupt_group, legacy_interrupt_group,
vfio_wrapper: Arc::new(vfio_wrapper) as Arc<dyn Vfio>, vfio_wrapper: Arc::new(vfio_wrapper) as Arc<dyn Vfio>,
patches: HashMap::new(),
}; };
// No need to parse capabilities from the device if on the restore path. // No need to parse capabilities from the device if on the restore path.
@ -1511,6 +1566,8 @@ impl BusDevice for VfioPciDevice {
const PCI_CONFIG_BAR_OFFSET: u32 = 0x10; const PCI_CONFIG_BAR_OFFSET: u32 = 0x10;
// Capability register offset in the PCI config space. // Capability register offset in the PCI config space.
const PCI_CONFIG_CAPABILITY_OFFSET: u32 = 0x34; const PCI_CONFIG_CAPABILITY_OFFSET: u32 = 0x34;
// Extended capabilities register offset in the PCI config space.
const PCI_CONFIG_EXTENDED_CAPABILITY_OFFSET: u32 = 0x100;
// IO BAR when first BAR bit is 1. // IO BAR when first BAR bit is 1.
const PCI_CONFIG_IO_BAR: u32 = 0x1; const PCI_CONFIG_IO_BAR: u32 = 0x1;
// 64-bit memory bar flag. // 64-bit memory bar flag.

View File

@ -11,6 +11,7 @@ use crate::{
use anyhow::anyhow; use anyhow::anyhow;
use hypervisor::HypervisorVmError; use hypervisor::HypervisorVmError;
use std::any::Any; use std::any::Any;
use std::collections::HashMap;
use std::os::unix::prelude::AsRawFd; use std::os::unix::prelude::AsRawFd;
use std::ptr::null_mut; use std::ptr::null_mut;
use std::sync::{Arc, Barrier, Mutex}; use std::sync::{Arc, Barrier, Mutex};
@ -112,6 +113,7 @@ impl VfioUserPciDevice {
msi_interrupt_manager, msi_interrupt_manager,
legacy_interrupt_group, legacy_interrupt_group,
vfio_wrapper: Arc::new(vfio_wrapper) as Arc<dyn Vfio>, vfio_wrapper: Arc::new(vfio_wrapper) as Arc<dyn Vfio>,
patches: HashMap::new(),
}; };
// No need to parse capabilities from the device if on the restore path. // No need to parse capabilities from the device if on the restore path.