util: permit existing binding to VFIO variant driver

Before a PCI device can be assigned to a guest with VFIO, that device
must be bound to the vfio-pci driver rather than to the device's
normal host driver. The vfio-pci driver provides APIs that permit QEMU
to perform all the necessary operations to make the device accessible
to the guest.

In the past vfio-pci was the only driver that supplied these APIs, but
there are now vendor/device-specific "VFIO variant" drivers that
provide the basic vfio-pci driver functionality/API while adding
support for device-specific operations (for example these
device-specific drivers may support live migration of certain
devices).  All that is needed to make this functionality available is
to bind the vendor-specific "VFIO variant" driver to the device
(rather than the generic vfio-pci driver, which will continue to work,
just without the extra functionality).

But until now libvirt has required that all PCI devices being assigned
to a guest with VFIO specifically have the "vfio-pci" driver bound to
the device. So even if the user manually binds a shiny new
vendor-specific VFIO variant driver to the device (and puts
"managed='no'" in the config to prevent libvirt from changing the
binding), libvirt will just fail during startup of the guest (or
during hotplug) because the driver bound to the device isn't exactly
"vfio-pci".

Beginning with kernel 6.1, it's possible to determine from the sysfs
directory for a device whether the currently-bound driver is the
vfio-pci driver or a VFIO variant - the device directory will have a
subdirectory called "vfio-dev". We can use that to appropriately widen
the list of drivers that libvirt will allow for VFIO device
assignment.

This patch doesn't remove the explicit check for the exact "vfio-pci"
driver (since that would cause systems with pre-6.1 kernels to behave
incorrectly), but adds an additional check for the vfio-dev directory,
so that any VFIO variant driver is acceptable for libvirt to continue
setting up for VFIO device assignment.

Signed-off-by: Laine Stump <laine@redhat.com>
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
This commit is contained in:
Laine Stump 2023-06-02 14:34:51 -04:00
parent 222b66974e
commit 6ce071f609
4 changed files with 87 additions and 23 deletions

View File

@ -743,9 +743,8 @@ virHostdevPreparePCIDevicesImpl(virHostdevManager *mgr,
mgr->inactivePCIHostdevs) < 0) mgr->inactivePCIHostdevs) < 0)
goto reattachdevs; goto reattachdevs;
} else { } else {
g_autofree char *driverPath = NULL; g_autofree char *drvName = NULL;
g_autofree char *driverName = NULL; virPCIStubDriver drvType;
int stub;
/* Unmanaged devices should already have been marked as /* Unmanaged devices should already have been marked as
* inactive: if that's the case, we can simply move on */ * inactive: if that's the case, we can simply move on */
@ -765,19 +764,17 @@ virHostdevPreparePCIDevicesImpl(virHostdevManager *mgr,
* information about active / inactive device across * information about active / inactive device across
* daemon restarts has been implemented */ * daemon restarts has been implemented */
if (virPCIDeviceGetCurrentDriverPathAndName(pci, &driverPath, if (virPCIDeviceGetCurrentDriverNameAndType(pci, &drvName,
&driverName) < 0) { &drvType) < 0) {
goto reattachdevs; goto reattachdevs;
} }
stub = virPCIStubDriverTypeFromString(driverName); if (drvType > VIR_PCI_STUB_DRIVER_NONE) {
if (stub > VIR_PCI_STUB_DRIVER_NONE &&
stub < VIR_PCI_STUB_DRIVER_LAST) {
/* The device is bound to a known stub driver: store this /* The device is bound to a known stub driver: store this
* information and add a copy to the inactive list */ * information and add a copy to the inactive list */
virPCIDeviceSetStubDriverType(pci, stub); virPCIDeviceSetStubDriverType(pci, drvType);
virPCIDeviceSetStubDriverName(pci, drvName);
VIR_DEBUG("Adding PCI device %s to inactive list", VIR_DEBUG("Adding PCI device %s to inactive list",
virPCIDeviceGetName(pci)); virPCIDeviceGetName(pci));
@ -2291,18 +2288,13 @@ virHostdevPrepareOneNVMeDevice(virHostdevManager *hostdev_mgr,
/* Let's check if all PCI devices are NVMe disks. */ /* Let's check if all PCI devices are NVMe disks. */
for (i = 0; i < virPCIDeviceListCount(pciDevices); i++) { for (i = 0; i < virPCIDeviceListCount(pciDevices); i++) {
virPCIDevice *pci = virPCIDeviceListGet(pciDevices, i); virPCIDevice *pci = virPCIDeviceListGet(pciDevices, i);
g_autofree char *drvPath = NULL;
g_autofree char *drvName = NULL; g_autofree char *drvName = NULL;
int stub = VIR_PCI_STUB_DRIVER_NONE; virPCIStubDriver drvType;
if (virPCIDeviceGetCurrentDriverPathAndName(pci, &drvPath, &drvName) < 0) if (virPCIDeviceGetCurrentDriverNameAndType(pci, &drvName, &drvType) < 0)
goto cleanup; goto cleanup;
if (drvName) if (drvType == VIR_PCI_STUB_DRIVER_VFIO || STREQ_NULLABLE(drvName, "nvme"))
stub = virPCIStubDriverTypeFromString(drvName);
if (stub == VIR_PCI_STUB_DRIVER_VFIO ||
STREQ_NULLABLE(drvName, "nvme"))
continue; continue;
VIR_WARN("Suspicious NVMe disk assignment. PCI device " VIR_WARN("Suspicious NVMe disk assignment. PCI device "

View File

@ -3074,6 +3074,7 @@ virPCIDeviceFileIterate;
virPCIDeviceFree; virPCIDeviceFree;
virPCIDeviceGetAddress; virPCIDeviceGetAddress;
virPCIDeviceGetConfigPath; virPCIDeviceGetConfigPath;
virPCIDeviceGetCurrentDriverNameAndType;
virPCIDeviceGetCurrentDriverPathAndName; virPCIDeviceGetCurrentDriverPathAndName;
virPCIDeviceGetIOMMUGroupDev; virPCIDeviceGetIOMMUGroupDev;
virPCIDeviceGetIOMMUGroupList; virPCIDeviceGetIOMMUGroupList;

View File

@ -280,6 +280,73 @@ virPCIDeviceGetCurrentDriverPathAndName(virPCIDevice *dev,
} }
/**
* virPCIDeviceGetCurrentDriverNameAndType:
* @dev: virPCIDevice object to examine
* @drvName: returns name of driver bound to this device (if any)
* @drvType: returns type of driver if it is a known stub driver type
*
* Find the name of the driver bound to @dev (if any) and the type of
* the driver if it is a known/recognized "stub" driver (based on the
* driver name).
*
* There are vfio "variant" drivers that provide all the basic
* functionality of the standard vfio-pci driver as well as additional
* stuff. As of kernel 6.1, the vfio-pci driver and all vfio variant
* drivers can be identified (once the driver has been bound to a
* device) by looking for the subdirectory "vfio-dev" in the device's
* sysfs directory; for example, if the directory
* /sys/bus/pci/devices/0000:04:11.4/vfio-dev exists, then the driver
* that is currently bound to PCI device 0000:04:11.4 is either
* vfio-pci, or a vfio-pci variant driver.
*
* Return 0 on success, -1 on failure. If -1 is returned, then an error
* message has been logged.
*/
int
virPCIDeviceGetCurrentDriverNameAndType(virPCIDevice *dev,
char **drvName,
virPCIStubDriver *drvType)
{
g_autofree char *drvPath = NULL;
g_autofree char *vfioDevDir = NULL;
int tmpType;
if (virPCIDeviceGetCurrentDriverPathAndName(dev, &drvPath, drvName) < 0)
return -1;
if (!*drvName) {
*drvType = VIR_PCI_STUB_DRIVER_NONE;
return 0;
}
tmpType = virPCIStubDriverTypeFromString(*drvName);
if (tmpType > VIR_PCI_STUB_DRIVER_NONE) {
*drvType = tmpType;
return 0; /* exact match of a known driver name (or no name) */
}
/* If the sysfs directory of this device contains a directory
* named "vfio-dev" then the currently-bound driver is a vfio
* variant driver.
*/
vfioDevDir = virPCIFile(dev->name, "vfio-dev");
if (virFileIsDir(vfioDevDir)) {
VIR_DEBUG("Driver %s is a vfio_pci driver", *drvName);
*drvType = VIR_PCI_STUB_DRIVER_VFIO;
} else {
VIR_DEBUG("Driver %s is NOT a vfio_pci driver, or kernel is too old",
*drvName);
*drvType = VIR_PCI_STUB_DRIVER_NONE;
}
return 0;
}
static int static int
virPCIDeviceConfigOpenInternal(virPCIDevice *dev, bool readonly, bool fatal) virPCIDeviceConfigOpenInternal(virPCIDevice *dev, bool readonly, bool fatal)
{ {
@ -1007,8 +1074,8 @@ virPCIDeviceReset(virPCIDevice *dev,
virPCIDeviceList *activeDevs, virPCIDeviceList *activeDevs,
virPCIDeviceList *inactiveDevs) virPCIDeviceList *inactiveDevs)
{ {
g_autofree char *drvPath = NULL;
g_autofree char *drvName = NULL; g_autofree char *drvName = NULL;
virPCIStubDriver drvType;
int ret = -1; int ret = -1;
int fd = -1; int fd = -1;
int hdrType = -1; int hdrType = -1;
@ -1034,15 +1101,16 @@ virPCIDeviceReset(virPCIDevice *dev,
* reset it whenever appropriate, so doing it ourselves would just * reset it whenever appropriate, so doing it ourselves would just
* be redundant. * be redundant.
*/ */
if (virPCIDeviceGetCurrentDriverPathAndName(dev, &drvPath, &drvName) < 0) if (virPCIDeviceGetCurrentDriverNameAndType(dev, &drvName, &drvType) < 0)
goto cleanup; goto cleanup;
if (virPCIStubDriverTypeFromString(drvName) == VIR_PCI_STUB_DRIVER_VFIO) { if (drvType == VIR_PCI_STUB_DRIVER_VFIO) {
VIR_DEBUG("Device %s is bound to vfio-pci - skip reset",
dev->name); VIR_DEBUG("Device %s is bound to %s - skip reset", dev->name, drvName);
ret = 0; ret = 0;
goto cleanup; goto cleanup;
} }
VIR_DEBUG("Resetting device %s", dev->name); VIR_DEBUG("Resetting device %s", dev->name);
if ((fd = virPCIDeviceConfigOpenWrite(dev)) < 0) if ((fd = virPCIDeviceConfigOpenWrite(dev)) < 0)

View File

@ -283,6 +283,9 @@ int virPCIDeviceRebind(virPCIDevice *dev);
int virPCIDeviceGetCurrentDriverPathAndName(virPCIDevice *dev, int virPCIDeviceGetCurrentDriverPathAndName(virPCIDevice *dev,
char **path, char **path,
char **name); char **name);
int virPCIDeviceGetCurrentDriverNameAndType(virPCIDevice *dev,
char **drvName,
virPCIStubDriver *drvType);
int virPCIDeviceIsPCIExpress(virPCIDevice *dev); int virPCIDeviceIsPCIExpress(virPCIDevice *dev);
int virPCIDeviceHasPCIExpressLink(virPCIDevice *dev); int virPCIDeviceHasPCIExpressLink(virPCIDevice *dev);