vmm: add NVIDIA GPUDirect P2P support

On platforms where PCIe P2P is supported, inject a PCI capability into
NVIDIA GPU to indicate support.

Signed-off-by: Thomas Barrett <tbarrett@crusoeenergy.com>
This commit is contained in:
Thomas Barrett 2024-02-01 20:41:10 +00:00 committed by Rob Bradford
parent 05ec6190da
commit b750c332aa
6 changed files with 71 additions and 11 deletions

View File

@ -419,6 +419,7 @@ pub(crate) struct VfioCommon {
pub(crate) legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>, pub(crate) legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>,
pub(crate) vfio_wrapper: Arc<dyn Vfio>, pub(crate) vfio_wrapper: Arc<dyn Vfio>,
pub(crate) patches: HashMap<usize, ConfigPatch>, pub(crate) patches: HashMap<usize, ConfigPatch>,
x_nv_gpudirect_clique: Option<u8>,
} }
impl VfioCommon { impl VfioCommon {
@ -429,6 +430,7 @@ impl VfioCommon {
subclass: &dyn PciSubclass, subclass: &dyn PciSubclass,
bdf: PciBdf, bdf: PciBdf,
snapshot: Option<Snapshot>, snapshot: Option<Snapshot>,
x_nv_gpudirect_clique: Option<u8>,
) -> Result<Self, VfioPciError> { ) -> Result<Self, VfioPciError> {
let pci_configuration_state = let pci_configuration_state =
vm_migration::versioned_state_from_id(snapshot.as_ref(), PCI_CONFIGURATION_ID) vm_migration::versioned_state_from_id(snapshot.as_ref(), PCI_CONFIGURATION_ID)
@ -465,6 +467,7 @@ impl VfioCommon {
legacy_interrupt_group, legacy_interrupt_group,
vfio_wrapper, vfio_wrapper,
patches: HashMap::new(), patches: HashMap::new(),
x_nv_gpudirect_clique,
}; };
let state: Option<VfioCommonState> = snapshot let state: Option<VfioCommonState> = snapshot
@ -859,15 +862,15 @@ impl VfioCommon {
} }
pub(crate) fn parse_capabilities(&mut self, bdf: PciBdf) { pub(crate) fn parse_capabilities(&mut self, bdf: PciBdf) {
let mut cap_next = self let mut cap_iter = self
.vfio_wrapper .vfio_wrapper
.read_config_byte(PCI_CONFIG_CAPABILITY_OFFSET); .read_config_byte(PCI_CONFIG_CAPABILITY_OFFSET);
let mut pci_express_cap_found = false; let mut pci_express_cap_found = false;
let mut power_management_cap_found = false; let mut power_management_cap_found = false;
while cap_next != 0 { while cap_iter != 0 {
let cap_id = self.vfio_wrapper.read_config_byte(cap_next.into()); let cap_id = self.vfio_wrapper.read_config_byte(cap_iter.into());
match PciCapabilityId::from(cap_id) { match PciCapabilityId::from(cap_id) {
PciCapabilityId::MessageSignalledInterrupts => { PciCapabilityId::MessageSignalledInterrupts => {
@ -875,8 +878,8 @@ impl VfioCommon {
if irq_info.count > 0 { if irq_info.count > 0 {
// Parse capability only if the VFIO device // Parse capability only if the VFIO device
// supports MSI. // supports MSI.
let msg_ctl = self.parse_msi_capabilities(cap_next); let msg_ctl = self.parse_msi_capabilities(cap_iter);
self.initialize_msi(msg_ctl, cap_next as u32, None); self.initialize_msi(msg_ctl, cap_iter as u32, None);
} }
} }
} }
@ -886,8 +889,8 @@ impl VfioCommon {
if irq_info.count > 0 { if irq_info.count > 0 {
// Parse capability only if the VFIO device // Parse capability only if the VFIO device
// supports MSI-X. // supports MSI-X.
let msix_cap = self.parse_msix_capabilities(cap_next); let msix_cap = self.parse_msix_capabilities(cap_iter);
self.initialize_msix(msix_cap, cap_next as u32, bdf, None); self.initialize_msix(msix_cap, cap_iter as u32, bdf, None);
} }
} }
} }
@ -896,7 +899,16 @@ impl VfioCommon {
_ => {} _ => {}
}; };
cap_next = self.vfio_wrapper.read_config_byte((cap_next + 1).into()); let cap_next = self.vfio_wrapper.read_config_byte((cap_iter + 1).into());
if cap_next == 0 {
break;
}
cap_iter = cap_next;
}
if let Some(clique_id) = self.x_nv_gpudirect_clique {
self.add_nv_gpudirect_clique_cap(cap_iter, clique_id);
} }
if pci_express_cap_found && power_management_cap_found { if pci_express_cap_found && power_management_cap_found {
@ -904,6 +916,37 @@ impl VfioCommon {
} }
} }
fn add_nv_gpudirect_clique_cap(&mut self, cap_iter: u8, clique_id: u8) {
// Turing, Ampere, Hopper, and Lovelace GPUs have dedicated space
// at 0xD4 for this capability.
let cap_offset = 0xd4u32;
let reg_idx = (cap_iter / 4) as usize;
self.patches.insert(
reg_idx,
ConfigPatch {
mask: 0x0000_ff00,
patch: cap_offset << 8,
},
);
let reg_idx = (cap_offset / 4) as usize;
self.patches.insert(
reg_idx,
ConfigPatch {
mask: 0xffff_ffff,
patch: 0x50080009u32,
},
);
self.patches.insert(
reg_idx + 1,
ConfigPatch {
mask: 0xffff_ffff,
patch: u32::from(clique_id) << 19 | 0x5032,
},
);
}
fn parse_extended_capabilities(&mut self) { fn parse_extended_capabilities(&mut self) {
let mut current_offset = PCI_CONFIG_EXTENDED_CAPABILITY_OFFSET; let mut current_offset = PCI_CONFIG_EXTENDED_CAPABILITY_OFFSET;
@ -1351,6 +1394,7 @@ impl VfioPciDevice {
bdf: PciBdf, bdf: PciBdf,
memory_slot: Arc<dyn Fn() -> u32 + Send + Sync>, memory_slot: Arc<dyn Fn() -> u32 + Send + Sync>,
snapshot: Option<Snapshot>, snapshot: Option<Snapshot>,
x_nv_gpudirect_clique: Option<u8>,
) -> Result<Self, VfioPciError> { ) -> Result<Self, VfioPciError> {
let device = Arc::new(device); let device = Arc::new(device);
device.reset(); device.reset();
@ -1364,6 +1408,7 @@ impl VfioPciDevice {
&PciVfioSubclass::VfioSubclass, &PciVfioSubclass::VfioSubclass,
bdf, bdf,
vm_migration::snapshot_from_id(snapshot.as_ref(), VFIO_COMMON_ID), vm_migration::snapshot_from_id(snapshot.as_ref(), VFIO_COMMON_ID),
x_nv_gpudirect_clique,
)?; )?;
let vfio_pci_device = VfioPciDevice { let vfio_pci_device = VfioPciDevice {

View File

@ -94,6 +94,7 @@ impl VfioUserPciDevice {
&PciVfioUserSubclass::VfioUserSubclass, &PciVfioUserSubclass::VfioUserSubclass,
bdf, bdf,
vm_migration::snapshot_from_id(snapshot.as_ref(), VFIO_COMMON_ID), vm_migration::snapshot_from_id(snapshot.as_ref(), VFIO_COMMON_ID),
None,
) )
.map_err(VfioUserPciDeviceError::CreateVfioCommon)?; .map_err(VfioUserPciDeviceError::CreateVfioCommon)?;

View File

@ -1046,7 +1046,9 @@ components:
format: int16 format: int16
id: id:
type: string type: string
x_nv_gpudirect_clique:
type: integer
format: int8
TpmConfig: TpmConfig:
required: required:
- socket - socket

View File

@ -1746,7 +1746,12 @@ impl DeviceConfig {
pub fn parse(device: &str) -> Result<Self> { pub fn parse(device: &str) -> Result<Self> {
let mut parser = OptionParser::new(); let mut parser = OptionParser::new();
parser.add("path").add("id").add("iommu").add("pci_segment"); parser
.add("path")
.add("id")
.add("iommu")
.add("pci_segment")
.add("x_nv_gpudirect_clique");
parser.parse(device).map_err(Error::ParseDevice)?; parser.parse(device).map_err(Error::ParseDevice)?;
let path = parser let path = parser
@ -1763,12 +1768,15 @@ impl DeviceConfig {
.convert::<u16>("pci_segment") .convert::<u16>("pci_segment")
.map_err(Error::ParseDevice)? .map_err(Error::ParseDevice)?
.unwrap_or_default(); .unwrap_or_default();
let x_nv_gpudirect_clique = parser
.convert::<u8>("x_nv_gpudirect_clique")
.map_err(Error::ParseDevice)?;
Ok(DeviceConfig { Ok(DeviceConfig {
path, path,
iommu, iommu,
id, id,
pci_segment, pci_segment,
x_nv_gpudirect_clique,
}) })
} }
@ -3324,6 +3332,7 @@ mod tests {
id: None, id: None,
iommu: false, iommu: false,
pci_segment: 0, pci_segment: 0,
x_nv_gpudirect_clique: None,
} }
} }

View File

@ -3510,6 +3510,7 @@ impl DeviceManager {
pci_device_bdf, pci_device_bdf,
Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()), vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()),
device_cfg.x_nv_gpudirect_clique,
) )
.map_err(DeviceManagerError::VfioPciCreate)?; .map_err(DeviceManagerError::VfioPciCreate)?;

View File

@ -434,6 +434,8 @@ pub struct DeviceConfig {
pub id: Option<String>, pub id: Option<String>,
#[serde(default)] #[serde(default)]
pub pci_segment: u16, pub pci_segment: u16,
#[serde(default)]
pub x_nv_gpudirect_clique: Option<u8>,
} }
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]