pci: vfio: Filter out some PCI extended capabilities

There are PCI extended capabilities that can't be passed through the VM
as they would be unusable from a guest perspective. That's why we
introduce a way to patch what is returned to the guest when the PCI
configuration space is accessed. The list of patches is created from the
parsing of the extended capabilities in that case, and particularly
based on the presence of the SRIOV and Resizable BAR capabilities.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
Signed-off-by: Steven Dake <sdake@lambdal.com>
This commit is contained in:
Sebastien Boeuf 2022-08-08 14:43:04 +02:00
parent 9f89e0a4e0
commit e45e3df64d
4 changed files with 177 additions and 6 deletions

View File

@ -190,7 +190,7 @@ pub trait PciProgrammingInterface {
#[derive(PartialEq, Eq, Copy, Clone)]
#[allow(dead_code)]
#[allow(non_camel_case_types)]
#[repr(C)]
#[repr(u8)]
pub enum PciCapabilityId {
ListId = 0,
PowerManagement = 0x01,
@ -244,6 +244,118 @@ impl From<u8> for PciCapabilityId {
}
}
/// Types of PCI Express capabilities.
#[derive(PartialEq, Eq, Copy, Clone, Debug)]
#[allow(dead_code)]
#[repr(u16)]
pub enum PciExpressCapabilityId {
NullCapability = 0x0000,
AdvancedErrorReporting = 0x0001,
VirtualChannelMultiFunctionVirtualChannelNotPresent = 0x0002,
DeviceSerialNumber = 0x0003,
PowerBudgeting = 0x0004,
RootComplexLinkDeclaration = 0x0005,
RootComplexInternalLinkControl = 0x0006,
RootComplexEventCollectorEndpointAssociation = 0x0007,
MultiFunctionVirtualChannel = 0x0008,
VirtualChannelMultiFunctionVirtualChannelPresent = 0x0009,
RootComplexRegisterBlock = 0x000a,
VendorSpecificExtendedCapability = 0x000b,
ConfigurationAccessCorrelation = 0x000c,
AccessControlServices = 0x000d,
AlternativeRoutingIdentificationIntepretation = 0x000e,
AddressTranslationServices = 0x000f,
SingleRootIoVirtualization = 0x0010,
DeprecatedMultiRootIoVirtualzation = 0x0011,
Multicast = 0x0012,
PageRequestInterface = 0x0013,
ReservedForAmd = 0x0014,
ResizeableBar = 0x0015,
DynamicPowerAllocation = 0x0016,
ThpRequester = 0x0017,
LatencyToleranceReporting = 0x0018,
SecondaryPciExpress = 0x0019,
ProtocolMultiplexing = 0x001a,
ProcessAddressSpaceId = 0x001b,
LnRequestor = 0x001c,
DownstreamPortContainment = 0x001d,
L1PmSubstates = 0x001e,
PrecisionTimeMeasurement = 0x001f,
PciExpressOverMphy = 0x0020,
FRSQueueing = 0x0021,
ReadinessTimeReporting = 0x0022,
DesignatedVendorSpecificExtendedCapability = 0x0023,
VfResizeableBar = 0x0024,
DataLinkFeature = 0x0025,
PhysicalLayerSixteenGts = 0x0026,
LaneMargeningAtTheReceiver = 0x0027,
HierarchyId = 0x0028,
NativePcieEnclosureManagement = 0x0029,
PhysicalLayerThirtyTwoGts = 0x002a,
AlternateProtocol = 0x002b,
SystemFirmwareIntermediary = 0x002c,
ShadowFunctions = 0x002d,
DataObjectExchange = 0x002e,
Reserved = 0x002f,
ExtendedCapabilitiesAbsence = 0xffff,
}
impl From<u16> for PciExpressCapabilityId {
fn from(c: u16) -> Self {
match c {
0x0000 => PciExpressCapabilityId::NullCapability,
0x0001 => PciExpressCapabilityId::AdvancedErrorReporting,
0x0002 => PciExpressCapabilityId::VirtualChannelMultiFunctionVirtualChannelNotPresent,
0x0003 => PciExpressCapabilityId::DeviceSerialNumber,
0x0004 => PciExpressCapabilityId::PowerBudgeting,
0x0005 => PciExpressCapabilityId::RootComplexLinkDeclaration,
0x0006 => PciExpressCapabilityId::RootComplexInternalLinkControl,
0x0007 => PciExpressCapabilityId::RootComplexEventCollectorEndpointAssociation,
0x0008 => PciExpressCapabilityId::MultiFunctionVirtualChannel,
0x0009 => PciExpressCapabilityId::VirtualChannelMultiFunctionVirtualChannelPresent,
0x000a => PciExpressCapabilityId::RootComplexRegisterBlock,
0x000b => PciExpressCapabilityId::VendorSpecificExtendedCapability,
0x000c => PciExpressCapabilityId::ConfigurationAccessCorrelation,
0x000d => PciExpressCapabilityId::AccessControlServices,
0x000e => PciExpressCapabilityId::AlternativeRoutingIdentificationIntepretation,
0x000f => PciExpressCapabilityId::AddressTranslationServices,
0x0010 => PciExpressCapabilityId::SingleRootIoVirtualization,
0x0011 => PciExpressCapabilityId::DeprecatedMultiRootIoVirtualzation,
0x0012 => PciExpressCapabilityId::Multicast,
0x0013 => PciExpressCapabilityId::PageRequestInterface,
0x0014 => PciExpressCapabilityId::ReservedForAmd,
0x0015 => PciExpressCapabilityId::ResizeableBar,
0x0016 => PciExpressCapabilityId::DynamicPowerAllocation,
0x0017 => PciExpressCapabilityId::ThpRequester,
0x0018 => PciExpressCapabilityId::LatencyToleranceReporting,
0x0019 => PciExpressCapabilityId::SecondaryPciExpress,
0x001a => PciExpressCapabilityId::ProtocolMultiplexing,
0x001b => PciExpressCapabilityId::ProcessAddressSpaceId,
0x001c => PciExpressCapabilityId::LnRequestor,
0x001d => PciExpressCapabilityId::DownstreamPortContainment,
0x001e => PciExpressCapabilityId::L1PmSubstates,
0x001f => PciExpressCapabilityId::PrecisionTimeMeasurement,
0x0020 => PciExpressCapabilityId::PciExpressOverMphy,
0x0021 => PciExpressCapabilityId::FRSQueueing,
0x0022 => PciExpressCapabilityId::ReadinessTimeReporting,
0x0023 => PciExpressCapabilityId::DesignatedVendorSpecificExtendedCapability,
0x0024 => PciExpressCapabilityId::VfResizeableBar,
0x0025 => PciExpressCapabilityId::DataLinkFeature,
0x0026 => PciExpressCapabilityId::PhysicalLayerSixteenGts,
0x0027 => PciExpressCapabilityId::LaneMargeningAtTheReceiver,
0x0028 => PciExpressCapabilityId::HierarchyId,
0x0029 => PciExpressCapabilityId::NativePcieEnclosureManagement,
0x002a => PciExpressCapabilityId::PhysicalLayerThirtyTwoGts,
0x002b => PciExpressCapabilityId::AlternateProtocol,
0x002c => PciExpressCapabilityId::SystemFirmwareIntermediary,
0x002d => PciExpressCapabilityId::ShadowFunctions,
0x002e => PciExpressCapabilityId::DataObjectExchange,
0xffff => PciExpressCapabilityId::ExtendedCapabilitiesAbsence,
_ => PciExpressCapabilityId::Reserved,
}
}
}
/// A PCI capability list. Devices can optionally specify capabilities in their configuration space.
pub trait PciCapability {
fn bytes(&self) -> &[u8];

View File

@ -17,7 +17,7 @@ mod vfio_user;
pub use self::bus::{PciBus, PciConfigIo, PciConfigMmio, PciRoot, PciRootError};
pub use self::configuration::{
PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, PciCapability, PciCapabilityId,
PciClassCode, PciConfiguration, PciHeaderType, PciMassStorageSubclass,
PciClassCode, PciConfiguration, PciExpressCapabilityId, PciHeaderType, PciMassStorageSubclass,
PciNetworkControllerSubclass, PciProgrammingInterface, PciSerialBusSubClass, PciSubclass,
};
pub use self::device::{

View File

@ -6,14 +6,14 @@
use crate::{
msi_num_enabled_vectors, BarReprogrammingParams, MsiCap, MsiConfig, MsixCap, MsixConfig,
PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, PciBdf, PciCapabilityId,
PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciHeaderType, PciSubclass,
MSIX_TABLE_ENTRY_SIZE,
PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciExpressCapabilityId,
PciHeaderType, PciSubclass, MSIX_TABLE_ENTRY_SIZE,
};
use anyhow::anyhow;
use byteorder::{ByteOrder, LittleEndian};
use hypervisor::HypervisorVmError;
use std::any::Any;
use std::collections::BTreeMap;
use std::collections::{BTreeMap, HashMap};
use std::io;
use std::os::unix::io::AsRawFd;
use std::ptr::null_mut;
@ -390,6 +390,11 @@ struct VfioCommonState {
impl VersionMapped for VfioCommonState {}
pub(crate) struct ConfigPatch {
mask: u32,
patch: u32,
}
pub(crate) struct VfioCommon {
pub(crate) configuration: PciConfiguration,
pub(crate) mmio_regions: Vec<MmioRegion>,
@ -397,6 +402,7 @@ pub(crate) struct VfioCommon {
pub(crate) msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
pub(crate) legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>,
pub(crate) vfio_wrapper: Arc<dyn Vfio>,
pub(crate) patches: HashMap<usize, ConfigPatch>,
}
impl VfioCommon {
@ -694,6 +700,9 @@ impl VfioCommon {
.vfio_wrapper
.read_config_byte(PCI_CONFIG_CAPABILITY_OFFSET);
let mut pci_express_cap_found = false;
let mut power_management_cap_found = false;
while cap_next != 0 {
let cap_id = self.vfio_wrapper.read_config_byte(cap_next.into());
@ -719,11 +728,50 @@ impl VfioCommon {
}
}
}
PciCapabilityId::PciExpress => pci_express_cap_found = true,
PciCapabilityId::PowerManagement => power_management_cap_found = true,
_ => {}
};
cap_next = self.vfio_wrapper.read_config_byte((cap_next + 1).into());
}
if pci_express_cap_found && power_management_cap_found {
self.parse_extended_capabilities();
}
}
fn parse_extended_capabilities(&mut self) {
let mut current_offset = PCI_CONFIG_EXTENDED_CAPABILITY_OFFSET;
loop {
let ext_cap_hdr = self.vfio_wrapper.read_config_dword(current_offset);
let cap_id: u16 = (ext_cap_hdr & 0xffff) as u16;
let cap_next: u16 = ((ext_cap_hdr >> 20) & 0xfff) as u16;
match PciExpressCapabilityId::from(cap_id) {
PciExpressCapabilityId::AlternativeRoutingIdentificationIntepretation
| PciExpressCapabilityId::ResizeableBar
| PciExpressCapabilityId::SingleRootIoVirtualization => {
let reg_idx = (current_offset / 4) as usize;
self.patches.insert(
reg_idx,
ConfigPatch {
mask: 0x0000_ffff,
patch: PciExpressCapabilityId::NullCapability as u32,
},
);
}
_ => {}
}
if cap_next == 0 {
break;
}
current_offset = cap_next.into();
}
}
pub(crate) fn enable_intx(&mut self) -> Result<(), VfioPciError> {
@ -1011,7 +1059,13 @@ impl VfioCommon {
};
// The config register read comes from the VFIO device itself.
self.vfio_wrapper.read_config_dword((reg_idx * 4) as u32) & mask
let mut value = self.vfio_wrapper.read_config_dword((reg_idx * 4) as u32) & mask;
if let Some(config_patch) = self.patches.get(&reg_idx) {
value = (value & !config_patch.mask) | config_patch.patch;
}
value
}
fn state(&self) -> VfioCommonState {
@ -1196,6 +1250,7 @@ impl VfioPciDevice {
msi_interrupt_manager,
legacy_interrupt_group,
vfio_wrapper: Arc::new(vfio_wrapper) as Arc<dyn Vfio>,
patches: HashMap::new(),
};
// No need to parse capabilities from the device if on the restore path.
@ -1511,6 +1566,8 @@ impl BusDevice for VfioPciDevice {
const PCI_CONFIG_BAR_OFFSET: u32 = 0x10;
// Capability register offset in the PCI config space.
const PCI_CONFIG_CAPABILITY_OFFSET: u32 = 0x34;
// Extended capabilities register offset in the PCI config space.
const PCI_CONFIG_EXTENDED_CAPABILITY_OFFSET: u32 = 0x100;
// IO BAR when first BAR bit is 1.
const PCI_CONFIG_IO_BAR: u32 = 0x1;
// 64-bit memory bar flag.

View File

@ -11,6 +11,7 @@ use crate::{
use anyhow::anyhow;
use hypervisor::HypervisorVmError;
use std::any::Any;
use std::collections::HashMap;
use std::os::unix::prelude::AsRawFd;
use std::ptr::null_mut;
use std::sync::{Arc, Barrier, Mutex};
@ -112,6 +113,7 @@ impl VfioUserPciDevice {
msi_interrupt_manager,
legacy_interrupt_group,
vfio_wrapper: Arc::new(vfio_wrapper) as Arc<dyn Vfio>,
patches: HashMap::new(),
};
// No need to parse capabilities from the device if on the restore path.