libvirt/src/util/virpci.h

300 lines
11 KiB
C
Raw Normal View History

/*
2012-12-13 14:52:25 +00:00
* virpci.h: helper APIs for managing host PCI devices
*
* Copyright (C) 2009, 2011-2015 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see
* <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "internal.h"
#include "virmdev.h"
#include "virobject.h"
#include "virenum.h"
#include "virpcivpd.h"
typedef struct _virPCIDevice virPCIDevice;
typedef struct _virPCIDeviceAddress virPCIDeviceAddress;
typedef struct _virPCIDeviceList virPCIDeviceList;
G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDeviceList, virObjectUnref);
#define VIR_DOMAIN_DEVICE_ZPCI_MAX_UID UINT16_MAX
#define VIR_DOMAIN_DEVICE_ZPCI_MAX_FID UINT32_MAX
conf: fix zPCI address auto-generation on s390 Let us fix the issues with zPCI address validation and auto-generation on s390. Currently, there are two issues with handling the ZPCI address extension. Firstly, when the uid is to be auto-generated with a specified fid, .i.e.: ... <address type='pci'> <zpci fid='0x0000001f'/> </address> ... we expect uid='0x0001' (or the next available uid for the domain). However, we get a parsing error: $ virsh define zpci.xml error: XML error: Invalid PCI address uid='0x0000', must be > 0x0000 and <= 0xffff Secondly, when the uid is specified explicitly with the invalid numerical value '0x0000', we actually expect the parsing error above. However, the domain is being defined and the uid value is silently changed to a valid value. The first issue is a bug and the second one is undesired behaviour, and both issues are related to how we (in-band) signal invalid values for uid and fid. So let's fix the XML parsing to do validation based on what is actually specified in the XML. The first issue is also related to the current code behaviour, which is, if either uid or fid is specified by the user, it is incorrectly assumed that both uid and fid are specified. This bug is fixed by identifying when the user specified ZPCI address is incomplete and auto-generating the missing ZPCI address. Signed-off-by: Bjoern Walk <bwalk@linux.ibm.com> Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com> Signed-off-by: Shalini Chellathurai Saroja <shalini@linux.ibm.com> Reviewed-by: Andrea Bolognani <abologna@redhat.com>
2020-06-18 08:25:15 +00:00
typedef struct _virZPCIDeviceAddressID virZPCIDeviceAddressID;
typedef struct _virZPCIDeviceAddress virZPCIDeviceAddress;
conf: fix zPCI address auto-generation on s390 Let us fix the issues with zPCI address validation and auto-generation on s390. Currently, there are two issues with handling the ZPCI address extension. Firstly, when the uid is to be auto-generated with a specified fid, .i.e.: ... <address type='pci'> <zpci fid='0x0000001f'/> </address> ... we expect uid='0x0001' (or the next available uid for the domain). However, we get a parsing error: $ virsh define zpci.xml error: XML error: Invalid PCI address uid='0x0000', must be > 0x0000 and <= 0xffff Secondly, when the uid is specified explicitly with the invalid numerical value '0x0000', we actually expect the parsing error above. However, the domain is being defined and the uid value is silently changed to a valid value. The first issue is a bug and the second one is undesired behaviour, and both issues are related to how we (in-band) signal invalid values for uid and fid. So let's fix the XML parsing to do validation based on what is actually specified in the XML. The first issue is also related to the current code behaviour, which is, if either uid or fid is specified by the user, it is incorrectly assumed that both uid and fid are specified. This bug is fixed by identifying when the user specified ZPCI address is incomplete and auto-generating the missing ZPCI address. Signed-off-by: Bjoern Walk <bwalk@linux.ibm.com> Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com> Signed-off-by: Shalini Chellathurai Saroja <shalini@linux.ibm.com> Reviewed-by: Andrea Bolognani <abologna@redhat.com>
2020-06-18 08:25:15 +00:00
struct _virZPCIDeviceAddressID {
unsigned int value;
bool isSet;
};
struct _virZPCIDeviceAddress {
conf: fix zPCI address auto-generation on s390 Let us fix the issues with zPCI address validation and auto-generation on s390. Currently, there are two issues with handling the ZPCI address extension. Firstly, when the uid is to be auto-generated with a specified fid, .i.e.: ... <address type='pci'> <zpci fid='0x0000001f'/> </address> ... we expect uid='0x0001' (or the next available uid for the domain). However, we get a parsing error: $ virsh define zpci.xml error: XML error: Invalid PCI address uid='0x0000', must be > 0x0000 and <= 0xffff Secondly, when the uid is specified explicitly with the invalid numerical value '0x0000', we actually expect the parsing error above. However, the domain is being defined and the uid value is silently changed to a valid value. The first issue is a bug and the second one is undesired behaviour, and both issues are related to how we (in-band) signal invalid values for uid and fid. So let's fix the XML parsing to do validation based on what is actually specified in the XML. The first issue is also related to the current code behaviour, which is, if either uid or fid is specified by the user, it is incorrectly assumed that both uid and fid are specified. This bug is fixed by identifying when the user specified ZPCI address is incomplete and auto-generating the missing ZPCI address. Signed-off-by: Bjoern Walk <bwalk@linux.ibm.com> Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com> Signed-off-by: Shalini Chellathurai Saroja <shalini@linux.ibm.com> Reviewed-by: Andrea Bolognani <abologna@redhat.com>
2020-06-18 08:25:15 +00:00
virZPCIDeviceAddressID uid; /* exempt from syntax-check */
virZPCIDeviceAddressID fid;
/* Don't forget to update virPCIDeviceAddressCopy if needed. */
};
#define VIR_PCI_DEVICE_ADDRESS_FMT "%04x:%02x:%02x.%d"
util: Add phys_port_name support on virPCIGetNetName virPCIGetNetName is used to get the name of the netdev associated with a particular PCI device. This is used when we have a VF name, but need the PF name in order to send a netlink command (e.g. in order to get/set the MAC address of the VF). In simple cases there is a single netdev associated with any PCI device, so it is easy to figure out the PF netdev for a VF - just look for the PCI device that has the VF listed in its "virtfns" directory; the only name in the "net" subdirectory of that PCI device's sysfs directory is the PF netdev that is upstream of the VF in question. In some cases there can be more than one netdev in a PCI device's net directory though. In the past, the only case of this was for SR-IOV NICs that could have multiple PF's per PCI device. In this case, all PF netdevs associated with a PCI address would be listed in the "net" subdirectory of the PCI device's directory in sysfs. At the same time, all VF netdevs and all PF netdevs have a phys_port_id in their sysfs, so the way to learn the correct PF netdev for a particular VF netdev is to search through the list of devices in the net subdirectory of the PF's PCI device, looking for the one netdev with a "phys_port_id" matching that of the VF netdev. But starting in kernel 5.8, the NVIDIA Mellanox driver began linking the VFs' representor netdevs to the PF PCI address [1], and so the VF representor netdevs would also show up in the net subdirectory. However, all of the devices that do so also only have a single PF netdev for any given PCI address. This means that the net directory of the PCI device can still hold multiple net devices, but only one of them will be the PF netdev (the others are VF representors): $ ls '/sys/bus/pci/devices/0000:82:00.0/net' ens1f0 eth0 eth1 In this case the way to find the PF device is to look at the "phys_port_name" attribute of each netdev in sysfs. All PF devices have a phys_port_name matching a particular regex (p[0-9]+$)|(p[0-9]+s[0-9]+$) Since there can only be one PF in the entire list of devices, once we match that regex, we've found the PF netdev. [1] - https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/ commit/?id=123f0f53dd64b67e34142485fe866a8a581f12f1 Co-Authored-by: Moshe Levi <moshele@nvidia.com> Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com> Reviewed-by: Adrian Chiris <adrianc@nvidia.com> Reviewed-by: Laine Stump <laine@redhat.com>
2021-01-21 12:15:22 +00:00
/* Represents format of PF's phys_port_name in switchdev mode:
* 'p%u' or 'p%us%u'. New line checked since value is read from sysfs file.
util: Add phys_port_name support on virPCIGetNetName virPCIGetNetName is used to get the name of the netdev associated with a particular PCI device. This is used when we have a VF name, but need the PF name in order to send a netlink command (e.g. in order to get/set the MAC address of the VF). In simple cases there is a single netdev associated with any PCI device, so it is easy to figure out the PF netdev for a VF - just look for the PCI device that has the VF listed in its "virtfns" directory; the only name in the "net" subdirectory of that PCI device's sysfs directory is the PF netdev that is upstream of the VF in question. In some cases there can be more than one netdev in a PCI device's net directory though. In the past, the only case of this was for SR-IOV NICs that could have multiple PF's per PCI device. In this case, all PF netdevs associated with a PCI address would be listed in the "net" subdirectory of the PCI device's directory in sysfs. At the same time, all VF netdevs and all PF netdevs have a phys_port_id in their sysfs, so the way to learn the correct PF netdev for a particular VF netdev is to search through the list of devices in the net subdirectory of the PF's PCI device, looking for the one netdev with a "phys_port_id" matching that of the VF netdev. But starting in kernel 5.8, the NVIDIA Mellanox driver began linking the VFs' representor netdevs to the PF PCI address [1], and so the VF representor netdevs would also show up in the net subdirectory. However, all of the devices that do so also only have a single PF netdev for any given PCI address. This means that the net directory of the PCI device can still hold multiple net devices, but only one of them will be the PF netdev (the others are VF representors): $ ls '/sys/bus/pci/devices/0000:82:00.0/net' ens1f0 eth0 eth1 In this case the way to find the PF device is to look at the "phys_port_name" attribute of each netdev in sysfs. All PF devices have a phys_port_name matching a particular regex (p[0-9]+$)|(p[0-9]+s[0-9]+$) Since there can only be one PF in the entire list of devices, once we match that regex, we've found the PF netdev. [1] - https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/ commit/?id=123f0f53dd64b67e34142485fe866a8a581f12f1 Co-Authored-by: Moshe Levi <moshele@nvidia.com> Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com> Reviewed-by: Adrian Chiris <adrianc@nvidia.com> Reviewed-by: Laine Stump <laine@redhat.com>
2021-01-21 12:15:22 +00:00
*/
#define VIR_PF_PHYS_PORT_NAME_REGEX "(p[0-9]+$)|(p[0-9]+s[0-9]+$)"
struct _virPCIDeviceAddress {
unsigned int domain;
unsigned int bus;
unsigned int slot;
unsigned int function;
virTristateSwitch multi;
int extFlags; /* enum virPCIDeviceAddressExtensionFlags */
virZPCIDeviceAddress zpci;
/* Don't forget to update virPCIDeviceAddressCopy if needed. */
};
typedef enum {
VIR_PCI_STUB_DRIVER_NONE = 0,
VIR_PCI_STUB_DRIVER_XEN,
VIR_PCI_STUB_DRIVER_VFIO,
VIR_PCI_STUB_DRIVER_LAST
} virPCIStubDriver;
VIR_ENUM_DECL(virPCIStubDriver);
typedef enum {
VIR_PCIE_LINK_SPEED_NA = 0,
VIR_PCIE_LINK_SPEED_25,
VIR_PCIE_LINK_SPEED_5,
VIR_PCIE_LINK_SPEED_8,
VIR_PCIE_LINK_SPEED_16,
VIR_PCIE_LINK_SPEED_LAST
} virPCIELinkSpeed;
VIR_ENUM_DECL(virPCIELinkSpeed);
typedef enum {
VIR_PCI_HEADER_ENDPOINT = 0,
VIR_PCI_HEADER_PCI_BRIDGE,
VIR_PCI_HEADER_CARDBUS_BRIDGE,
VIR_PCI_HEADER_LAST
} virPCIHeaderType;
VIR_ENUM_DECL(virPCIHeader);
typedef struct _virPCIELink virPCIELink;
struct _virPCIELink {
int port;
virPCIELinkSpeed speed;
unsigned int width;
};
typedef struct _virPCIEDeviceInfo virPCIEDeviceInfo;
struct _virPCIEDeviceInfo {
/* Not all PCI Express devices have link. For example this 'Root Complex
* Integrated Endpoint' and 'Root Complex Event Collector' don't have it. */
virPCIELink *link_cap; /* PCIe device link capabilities */
virPCIELink *link_sta; /* Actually negotiated capabilities */
};
virPCIDevice *virPCIDeviceNew(const virPCIDeviceAddress *address);
virPCIDevice *virPCIDeviceCopy(virPCIDevice *dev);
void virPCIDeviceFree(virPCIDevice *dev);
const char *virPCIDeviceGetName(virPCIDevice *dev);
const char *virPCIDeviceGetConfigPath(virPCIDevice *dev);
int virPCIDeviceDetach(virPCIDevice *dev,
virPCIDeviceList *activeDevs,
virPCIDeviceList *inactiveDevs);
int virPCIDeviceReattach(virPCIDevice *dev,
virPCIDeviceList *activeDevs,
virPCIDeviceList *inactiveDevs);
int virPCIDeviceReset(virPCIDevice *dev,
virPCIDeviceList *activeDevs,
virPCIDeviceList *inactiveDevs);
void virPCIDeviceSetManaged(virPCIDevice *dev,
bool managed);
bool virPCIDeviceGetManaged(virPCIDevice *dev);
void virPCIDeviceSetStubDriver(virPCIDevice *dev,
virPCIStubDriver driver);
virPCIStubDriver virPCIDeviceGetStubDriver(virPCIDevice *dev);
virPCIDeviceAddress *virPCIDeviceGetAddress(virPCIDevice *dev);
int virPCIDeviceSetUsedBy(virPCIDevice *dev,
const char *drv_name,
const char *dom_name);
void virPCIDeviceGetUsedBy(virPCIDevice *dev,
const char **drv_name,
const char **dom_name);
bool virPCIDeviceGetUnbindFromStub(virPCIDevice *dev);
void virPCIDeviceSetUnbindFromStub(virPCIDevice *dev,
bool unbind);
bool virPCIDeviceGetRemoveSlot(virPCIDevice *dev);
void virPCIDeviceSetRemoveSlot(virPCIDevice *dev,
bool remove_slot);
bool virPCIDeviceGetReprobe(virPCIDevice *dev);
void virPCIDeviceSetReprobe(virPCIDevice *dev,
bool reprobe);
virPCIDeviceList *virPCIDeviceListNew(void);
int virPCIDeviceListAdd(virPCIDeviceList *list,
virPCIDevice *dev);
int virPCIDeviceListAddCopy(virPCIDeviceList *list, virPCIDevice *dev);
virPCIDevice *virPCIDeviceListGet(virPCIDeviceList *list,
int idx);
size_t virPCIDeviceListCount(virPCIDeviceList *list);
virPCIDevice *virPCIDeviceListSteal(virPCIDeviceList *list,
virPCIDeviceAddress *devAddr);
virPCIDevice *virPCIDeviceListStealIndex(virPCIDeviceList *list,
int idx);
void virPCIDeviceListDel(virPCIDeviceList *list,
virPCIDeviceAddress *devAddr);
virPCIDevice *virPCIDeviceListFind(virPCIDeviceList *list,
virPCIDeviceAddress *devAddr);
virPCIDevice *
virPCIDeviceListFindByIDs(virPCIDeviceList *list,
unsigned int domain,
unsigned int bus,
unsigned int slot,
unsigned int function);
int virPCIDeviceListFindIndex(virPCIDeviceList *list,
virPCIDeviceAddress *devAddr);
/*
* Callback that will be invoked once for each file
* associated with / used for PCI host device access.
*
* Should return 0 if successfully processed, or
* -1 to indicate error and abort iteration
*/
typedef int (*virPCIDeviceFileActor)(virPCIDevice *dev,
const char *path, void *opaque);
int virPCIDeviceFileIterate(virPCIDevice *dev,
virPCIDeviceFileActor actor,
void *opaque);
typedef int (*virPCIDeviceAddressActor)(virPCIDeviceAddress *addr,
void *opaque);
int virPCIDeviceAddressIOMMUGroupIterate(virPCIDeviceAddress *orig,
virPCIDeviceAddressActor actor,
void *opaque);
virPCIDeviceList *virPCIDeviceGetIOMMUGroupList(virPCIDevice *dev);
int virPCIDeviceAddressGetIOMMUGroupAddresses(virPCIDeviceAddress *devAddr,
virPCIDeviceAddress ***iommuGroupDevices,
size_t *nIommuGroupDevices);
int virPCIDeviceAddressGetIOMMUGroupNum(virPCIDeviceAddress *addr);
char *virPCIDeviceAddressGetIOMMUGroupDev(const virPCIDeviceAddress *devAddr);
bool virPCIDeviceExists(const virPCIDeviceAddress *addr);
char *virPCIDeviceGetIOMMUGroupDev(virPCIDevice *dev);
int virPCIDeviceIsAssignable(virPCIDevice *dev,
int strict_acs_check);
virPCIDeviceAddress *
virPCIGetDeviceAddressFromSysfsLink(const char *device_link);
int virPCIGetPhysicalFunction(const char *vf_sysfs_path,
virPCIDeviceAddress **pf);
struct virPCIVirtualFunction {
virPCIDeviceAddress *addr;
char *ifname;
};
struct _virPCIVirtualFunctionList {
struct virPCIVirtualFunction *functions;
size_t nfunctions;
size_t maxfunctions;
};
typedef struct _virPCIVirtualFunctionList virPCIVirtualFunctionList;
void virPCIVirtualFunctionListFree(virPCIVirtualFunctionList *list);
G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIVirtualFunctionList, virPCIVirtualFunctionListFree);
int virPCIGetVirtualFunctionsFull(const char *sysfs_path,
virPCIVirtualFunctionList **vfs,
util: fix erroneous requirement for phys_port_id to get ifname of a VF Commit 795e9e05c3 (libvirt-7.7.0) refactored the code in virpci.c and virnetdev.c that gathered lists of the Virtual Functions (VF) of an SRIOV Physical Function (PF) to simplify the code. Unfortunately the simplification made the assumption, in the new function virPCIGetVirtualFunctionsFull(), that a VF's netdev interface name should only be retrieved if the PF had a valid phys_port_id. That is an incorrect assumption - only a small handful of (now previous-generation) Mellanox SRIOV cards actually use phys_port_id (this is for an odd design where there are multiple physical network ports on a single PCI address); all other SRIOV cards (including new Mellanox cards) have a file in sysfs called phys_port_id, but it can't be read, and so the pfPhysPortID string is NULL. The result of this logic error is that virtual networks that are a pool of VFs to be used for macvtap connections will be unable to start, giving an errror like this: VF 0 of SRIOV PF enp130s0f0 couldn't be added to the interface pool because it isn't bound to a network driver - possibly in use elsewhere This error message is misinformed - the caller of virNetDevGetVirtualFunctionsFull() only *thinks* that the VF isn't bound to a network driver because it doesn't see a netdev name for the VF in the list. But that's only because virNetDevGetVirtualFunctionsFull() didn't even try to get the names! We do need a way for virPCIGetVirtualFunctionsFull() to sometimes retrieve the netdev names and sometimes not. One way of doing that would be to send down the netdev name of the PF whenever we also want to know the netdev names of the VFs, but send a NULL when we don't. This can conveniently be done by just *replacing* pfPhysPortID in the arglist with pfNetDevName - pfPhysPortID is determined by simply calling virNetDevGetPhysPortID(pfNetDevName) so we can just make that call down in virPCIGetVirtualFunctionsFull() (when needed). This solves the regression introduced by commit 795e9e05c3, and also nicely sets us up to (in a subsequent commit) move the call to virNetDevGetPhysPortID() down one layer further to virPCIGetNetName(), where it really belongs! Resolves: https://bugzilla.redhat.com/2025432 Fixes: 795e9e05c3b6b9ef3abe6f6078a6373a136ec23b Signed-off-by: Laine Stump <laine@redhat.com> Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
2021-12-02 19:13:18 +00:00
const char *pfNetDevName);
int virPCIGetVirtualFunctions(const char *sysfs_path,
virPCIVirtualFunctionList **vfs);
int virPCIIsVirtualFunction(const char *vf_sysfs_device_link);
int virPCIGetVirtualFunctionIndex(const char *pf_sysfs_device_link,
const char *vf_sysfs_device_link,
int *vf_index);
int virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddress *addr,
char **pci_sysfs_device_link);
int virPCIGetNetName(const char *device_link_sysfs_path,
size_t idx,
const char *physPortNetDevName,
char **netname);
bool virPCIDeviceAddressIsValid(virPCIDeviceAddress *addr,
bool report);
bool virPCIDeviceAddressIsEmpty(const virPCIDeviceAddress *addr);
bool virPCIDeviceAddressEqual(const virPCIDeviceAddress *addr1,
const virPCIDeviceAddress *addr2);
void virPCIDeviceAddressCopy(virPCIDeviceAddress *dst,
const virPCIDeviceAddress *src);
char *virPCIDeviceAddressAsString(const virPCIDeviceAddress *addr)
ATTRIBUTE_NONNULL(1);
int virPCIDeviceAddressParse(char *address, virPCIDeviceAddress *bdf);
conf: fix zPCI address auto-generation on s390 Let us fix the issues with zPCI address validation and auto-generation on s390. Currently, there are two issues with handling the ZPCI address extension. Firstly, when the uid is to be auto-generated with a specified fid, .i.e.: ... <address type='pci'> <zpci fid='0x0000001f'/> </address> ... we expect uid='0x0001' (or the next available uid for the domain). However, we get a parsing error: $ virsh define zpci.xml error: XML error: Invalid PCI address uid='0x0000', must be > 0x0000 and <= 0xffff Secondly, when the uid is specified explicitly with the invalid numerical value '0x0000', we actually expect the parsing error above. However, the domain is being defined and the uid value is silently changed to a valid value. The first issue is a bug and the second one is undesired behaviour, and both issues are related to how we (in-band) signal invalid values for uid and fid. So let's fix the XML parsing to do validation based on what is actually specified in the XML. The first issue is also related to the current code behaviour, which is, if either uid or fid is specified by the user, it is incorrectly assumed that both uid and fid are specified. This bug is fixed by identifying when the user specified ZPCI address is incomplete and auto-generating the missing ZPCI address. Signed-off-by: Bjoern Walk <bwalk@linux.ibm.com> Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com> Signed-off-by: Shalini Chellathurai Saroja <shalini@linux.ibm.com> Reviewed-by: Andrea Bolognani <abologna@redhat.com>
2020-06-18 08:25:15 +00:00
bool virZPCIDeviceAddressIsIncomplete(const virZPCIDeviceAddress *addr);
bool virZPCIDeviceAddressIsPresent(const virZPCIDeviceAddress *addr);
int virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
util: save the correct VF's info when using a dual port SRIOV NIC in single port mode Mellanox ConnectX-3 dual port SRIOV NICs present a bit of a challenge when assigning one of their VFs to a guest using VFIO device assignment. These NICs have only a single PCI PF device, and that single PF has two netdevs sharing the single PCI address - one for port 1 and one for port 2. When a VF is created it can also have 2 netdevs, or it can be setup in "single port" mode, where the VF has only a single netdev, and that netdev is connected either to port 1 or to port 2. When the VF is created in dual port mode, you get/set the MAC address/vlan tag for the port 1 VF by sending a netlink message to the PF's port1 netdev, and you get/set the MAC address/vlan tag for the port 2 VF by sending a netlink message to the PF's port 2 netdev. (Of course libvirt doesn't have any way to describe MAC/vlan info for 2 ports in a single hostdev interface, so that's a bit of a moot point) When the VF is created in single port mode, you can *set* the MAC/vlan info by sending a netlink message to *either* PF netdev - the driver is smart enough to understand that there's only a single netdev, and set the MAC/vlan for that netdev. When you want to *get* it, however, the driver is more accurate - it will return 00:00:00:00:00:00 for the MAC if you request it from the port 1 PF netdev when the VF was configured to be single port on port 2, or if you request if from the port 2 PF netdev when the VF was configured to be single port on port 1. Based on this information, when *getting* the MAC/vlan info (to save the original setting prior to assignment), we determine the correct PF netdev by matching phys_port_id between VF and PF. (IMPORTANT NOTE: this implies that to do PCI device assignment of the VFs on dual port Mellanox cards using <interface type='hostdev'> (i.e. if you want the MAC address/vlan tag to be set), not only must the VFs be configured in single port mode, but also the VFs *must* be bound to the host VF net driver, and libvirt must use managed='yes') By the time libvirt is ready to set the new MAC/vlan tag, the VF has already been unbound from the host net driver and bound to vfio-pci. This isn't problematic though because, as stated earlier, when a VF is created in single port mode, commands to configure it can be sent to either the port 1 PF netdev or the port 2 PF netdev. When it is time to restore the original MAC/vlan tag, again the VF will *not* be bound to a host net driver, so it won't be possible to learn from sysfs whether to use the port 1 or port 2 PF netdev for the netlink commands. And again, it doesn't matter which netdev you use. However, we must keep in mind that we saved the original settings to a file called "${PF}_${VFNUM}". To solve this problem, we just check for the existence of ${PF1}_${VFNUM} and ${PF2}_${VFNUM}, and use whichever one we find (since we know that only one can be there)
2017-08-08 00:25:57 +00:00
int pfNetDevIdx,
char **pfname,
int *vf_index);
bool virPCIDeviceHasVPD(virPCIDevice *dev);
virPCIVPDResource * virPCIDeviceGetVPD(virPCIDevice *dev);
int virPCIDeviceUnbind(virPCIDevice *dev);
int virPCIDeviceRebind(virPCIDevice *dev);
int virPCIDeviceGetDriverPathAndName(virPCIDevice *dev,
char **path,
char **name);
int virPCIDeviceIsPCIExpress(virPCIDevice *dev);
int virPCIDeviceHasPCIExpressLink(virPCIDevice *dev);
int virPCIDeviceGetLinkCapSta(virPCIDevice *dev,
int *ca_port,
unsigned int *cap_speed,
unsigned int *cap_width,
unsigned int *sta_speed,
unsigned int *sta_width);
int virPCIGetHeaderType(virPCIDevice *dev, int *hdrType);
void virPCIEDeviceInfoFree(virPCIEDeviceInfo *dev);
void virPCIDeviceAddressFree(virPCIDeviceAddress *address);
G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDevice, virPCIDeviceFree);
G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDeviceAddress, virPCIDeviceAddressFree);
G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIEDeviceInfo, virPCIEDeviceInfoFree);