libvirt/src/util/virpci.h
Laine Stump b67eaa6351 util: save the correct VF's info when using a dual port SRIOV NIC in single port mode
Mellanox ConnectX-3 dual port SRIOV NICs present a bit of a challenge
when assigning one of their VFs to a guest using VFIO device
assignment.

These NICs have only a single PCI PF device, and that single PF has
two netdevs sharing the single PCI address - one for port 1 and one
for port 2. When a VF is created it can also have 2 netdevs, or it can
be setup in "single port" mode, where the VF has only a single netdev,
and that netdev is connected either to port 1 or to port 2.

When the VF is created in dual port mode, you get/set the MAC
address/vlan tag for the port 1 VF by sending a netlink message to the
PF's port1 netdev, and you get/set the MAC address/vlan tag for the
port 2 VF by sending a netlink message to the PF's port 2 netdev. (Of
course libvirt doesn't have any way to describe MAC/vlan info for 2
ports in a single hostdev interface, so that's a bit of a moot point)

When the VF is created in single port mode, you can *set* the MAC/vlan
info by sending a netlink message to *either* PF netdev - the driver
is smart enough to understand that there's only a single netdev, and
set the MAC/vlan for that netdev. When you want to *get* it, however,
the driver is more accurate - it will return 00:00:00:00:00:00 for the
MAC if you request it from the port 1 PF netdev when the VF was
configured to be single port on port 2, or if you request if from the
port 2 PF netdev when the VF was configured to be single port on port
1.

Based on this information, when *getting* the MAC/vlan info (to save
the original setting prior to assignment), we determine the correct PF
netdev by matching phys_port_id between VF and PF.

(IMPORTANT NOTE: this implies that to do PCI device assignment of the
VFs on dual port Mellanox cards using <interface type='hostdev'>
(i.e. if you want the MAC address/vlan tag to be set), not only must
the VFs be configured in single port mode, but also the VFs *must* be
bound to the host VF net driver, and libvirt must use managed='yes')

By the time libvirt is ready to set the new MAC/vlan tag, the VF has
already been unbound from the host net driver and bound to
vfio-pci. This isn't problematic though because, as stated earlier,
when a VF is created in single port mode, commands to configure it can
be sent to either the port 1 PF netdev or the port 2 PF netdev.

When it is time to restore the original MAC/vlan tag, again the VF
will *not* be bound to a host net driver, so it won't be possible to
learn from sysfs whether to use the port 1 or port 2 PF netdev for the
netlink commands. And again, it doesn't matter which netdev you
use. However, we must keep in mind that we saved the original settings
to a file called "${PF}_${VFNUM}". To solve this problem, we just
check for the existence of ${PF1}_${VFNUM} and ${PF2}_${VFNUM}, and
use whichever one we find (since we know that only one can be there)
2017-08-11 19:05:20 -04:00

253 lines
9.6 KiB
C

/*
* virpci.h: helper APIs for managing host PCI devices
*
* Copyright (C) 2009, 2011-2015 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see
* <http://www.gnu.org/licenses/>.
*
* Authors:
* Mark McLoughlin <markmc@redhat.com>
*/
#ifndef __VIR_PCI_H__
# define __VIR_PCI_H__
# include "internal.h"
# include "virobject.h"
# include "virutil.h"
typedef struct _virPCIDevice virPCIDevice;
typedef virPCIDevice *virPCIDevicePtr;
typedef struct _virPCIDeviceAddress virPCIDeviceAddress;
typedef virPCIDeviceAddress *virPCIDeviceAddressPtr;
typedef struct _virPCIDeviceList virPCIDeviceList;
typedef virPCIDeviceList *virPCIDeviceListPtr;
struct _virPCIDeviceAddress {
unsigned int domain;
unsigned int bus;
unsigned int slot;
unsigned int function;
int multi; /* virTristateSwitch */
};
typedef enum {
VIR_PCI_STUB_DRIVER_NONE = 0,
VIR_PCI_STUB_DRIVER_XEN,
VIR_PCI_STUB_DRIVER_KVM,
VIR_PCI_STUB_DRIVER_VFIO,
VIR_PCI_STUB_DRIVER_LAST
} virPCIStubDriver;
VIR_ENUM_DECL(virPCIStubDriver);
typedef enum {
VIR_PCIE_LINK_SPEED_NA = 0,
VIR_PCIE_LINK_SPEED_25,
VIR_PCIE_LINK_SPEED_5,
VIR_PCIE_LINK_SPEED_8,
VIR_PCIE_LINK_SPEED_16,
VIR_PCIE_LINK_SPEED_LAST
} virPCIELinkSpeed;
VIR_ENUM_DECL(virPCIELinkSpeed)
typedef enum {
VIR_PCI_HEADER_ENDPOINT = 0,
VIR_PCI_HEADER_PCI_BRIDGE,
VIR_PCI_HEADER_CARDBUS_BRIDGE,
VIR_PCI_HEADER_LAST
} virPCIHeaderType;
VIR_ENUM_DECL(virPCIHeader)
typedef struct _virPCIELink virPCIELink;
typedef virPCIELink *virPCIELinkPtr;
struct _virPCIELink {
int port;
virPCIELinkSpeed speed;
unsigned int width;
};
typedef struct _virPCIEDeviceInfo virPCIEDeviceInfo;
typedef virPCIEDeviceInfo *virPCIEDeviceInfoPtr;
struct _virPCIEDeviceInfo {
/* Not all PCI Express devices have link. For example this 'Root Complex
* Integrated Endpoint' and 'Root Complex Event Collector' don't have it. */
virPCIELink *link_cap; /* PCIe device link capabilities */
virPCIELink *link_sta; /* Actually negotiated capabilities */
};
virPCIDevicePtr virPCIDeviceNew(unsigned int domain,
unsigned int bus,
unsigned int slot,
unsigned int function);
virPCIDevicePtr virPCIDeviceCopy(virPCIDevicePtr dev);
void virPCIDeviceFree(virPCIDevicePtr dev);
const char *virPCIDeviceGetName(virPCIDevicePtr dev);
const char *virPCIDeviceGetConfigPath(virPCIDevicePtr dev);
int virPCIDeviceDetach(virPCIDevicePtr dev,
virPCIDeviceListPtr activeDevs,
virPCIDeviceListPtr inactiveDevs);
int virPCIDeviceReattach(virPCIDevicePtr dev,
virPCIDeviceListPtr activeDevs,
virPCIDeviceListPtr inactiveDevs);
int virPCIDeviceReset(virPCIDevicePtr dev,
virPCIDeviceListPtr activeDevs,
virPCIDeviceListPtr inactiveDevs);
void virPCIDeviceSetManaged(virPCIDevice *dev,
bool managed);
bool virPCIDeviceGetManaged(virPCIDevice *dev);
void virPCIDeviceSetStubDriver(virPCIDevicePtr dev,
virPCIStubDriver driver);
virPCIStubDriver virPCIDeviceGetStubDriver(virPCIDevicePtr dev);
virPCIDeviceAddressPtr virPCIDeviceGetAddress(virPCIDevicePtr dev);
int virPCIDeviceSetUsedBy(virPCIDevice *dev,
const char *drv_name,
const char *dom_name);
void virPCIDeviceGetUsedBy(virPCIDevice *dev,
const char **drv_name,
const char **dom_name);
bool virPCIDeviceGetUnbindFromStub(virPCIDevicePtr dev);
void virPCIDeviceSetUnbindFromStub(virPCIDevice *dev,
bool unbind);
bool virPCIDeviceGetRemoveSlot(virPCIDevicePtr dev);
void virPCIDeviceSetRemoveSlot(virPCIDevice *dev,
bool remove_slot);
bool virPCIDeviceGetReprobe(virPCIDevicePtr dev);
void virPCIDeviceSetReprobe(virPCIDevice *dev,
bool reprobe);
virPCIDeviceListPtr virPCIDeviceListNew(void);
int virPCIDeviceListAdd(virPCIDeviceListPtr list,
virPCIDevicePtr dev);
int virPCIDeviceListAddCopy(virPCIDeviceListPtr list, virPCIDevicePtr dev);
virPCIDevicePtr virPCIDeviceListGet(virPCIDeviceListPtr list,
int idx);
size_t virPCIDeviceListCount(virPCIDeviceListPtr list);
virPCIDevicePtr virPCIDeviceListSteal(virPCIDeviceListPtr list,
virPCIDevicePtr dev);
virPCIDevicePtr virPCIDeviceListStealIndex(virPCIDeviceListPtr list,
int idx);
void virPCIDeviceListDel(virPCIDeviceListPtr list,
virPCIDevicePtr dev);
virPCIDevicePtr virPCIDeviceListFind(virPCIDeviceListPtr list,
virPCIDevicePtr dev);
virPCIDevicePtr
virPCIDeviceListFindByIDs(virPCIDeviceListPtr list,
unsigned int domain,
unsigned int bus,
unsigned int slot,
unsigned int function);
int virPCIDeviceListFindIndex(virPCIDeviceListPtr list,
virPCIDevicePtr dev);
/*
* Callback that will be invoked once for each file
* associated with / used for PCI host device access.
*
* Should return 0 if successfully processed, or
* -1 to indicate error and abort iteration
*/
typedef int (*virPCIDeviceFileActor)(virPCIDevicePtr dev,
const char *path, void *opaque);
int virPCIDeviceFileIterate(virPCIDevicePtr dev,
virPCIDeviceFileActor actor,
void *opaque);
typedef int (*virPCIDeviceAddressActor)(virPCIDeviceAddressPtr addr,
void *opaque);
int virPCIDeviceAddressIOMMUGroupIterate(virPCIDeviceAddressPtr orig,
virPCIDeviceAddressActor actor,
void *opaque);
virPCIDeviceListPtr virPCIDeviceGetIOMMUGroupList(virPCIDevicePtr dev);
int virPCIDeviceAddressGetIOMMUGroupAddresses(virPCIDeviceAddressPtr devAddr,
virPCIDeviceAddressPtr **iommuGroupDevices,
size_t *nIommuGroupDevices);
int virPCIDeviceAddressGetIOMMUGroupNum(virPCIDeviceAddressPtr addr);
char *virPCIDeviceGetIOMMUGroupDev(virPCIDevicePtr dev);
int virPCIDeviceIsAssignable(virPCIDevicePtr dev,
int strict_acs_check);
int virPCIDeviceWaitForCleanup(virPCIDevicePtr dev, const char *matcher);
virPCIDeviceAddressPtr
virPCIGetDeviceAddressFromSysfsLink(const char *device_link);
int virPCIGetPhysicalFunction(const char *vf_sysfs_path,
virPCIDeviceAddressPtr *pf);
int virPCIGetVirtualFunctions(const char *sysfs_path,
virPCIDeviceAddressPtr **virtual_functions,
size_t *num_virtual_functions,
unsigned int *max_virtual_functions);
int virPCIIsVirtualFunction(const char *vf_sysfs_device_link);
int virPCIGetVirtualFunctionIndex(const char *pf_sysfs_device_link,
const char *vf_sysfs_device_link,
int *vf_index);
int virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr addr,
char **pci_sysfs_device_link);
int virPCIGetNetName(const char *device_link_sysfs_path,
size_t idx,
char *physPortID,
char **netname);
int virPCIGetSysfsFile(char *virPCIDeviceName,
char **pci_sysfs_device_link)
ATTRIBUTE_RETURN_CHECK;
int virPCIGetAddrString(unsigned int domain,
unsigned int bus,
unsigned int slot,
unsigned int function,
char **pciConfigAddr)
ATTRIBUTE_NONNULL(5) ATTRIBUTE_RETURN_CHECK;
int virPCIDeviceAddressParse(char *address, virPCIDeviceAddressPtr bdf);
int virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
int pfNetDevIdx,
char **pfname,
int *vf_index);
int virPCIDeviceUnbind(virPCIDevicePtr dev);
int virPCIDeviceRebind(virPCIDevicePtr dev);
int virPCIDeviceGetDriverPathAndName(virPCIDevicePtr dev,
char **path,
char **name);
int virPCIDeviceIsPCIExpress(virPCIDevicePtr dev);
int virPCIDeviceHasPCIExpressLink(virPCIDevicePtr dev);
int virPCIDeviceGetLinkCapSta(virPCIDevicePtr dev,
int *ca_port,
unsigned int *cap_speed,
unsigned int *cap_width,
unsigned int *sta_speed,
unsigned int *sta_width);
int virPCIGetHeaderType(virPCIDevicePtr dev, int *hdrType);
void virPCIEDeviceInfoFree(virPCIEDeviceInfoPtr dev);
#endif /* __VIR_PCI_H__ */