libvirt/src/util/virpci.h

288 lines
11 KiB
C
Raw Normal View History

/*
2012-12-13 14:52:25 +00:00
* virpci.h: helper APIs for managing host PCI devices
*
* Copyright (C) 2009, 2011-2015 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see
* <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "internal.h"
#include "virmdev.h"
#include "virobject.h"
#include "virenum.h"
typedef struct _virPCIDevice virPCIDevice;
typedef virPCIDevice *virPCIDevicePtr;
typedef struct _virPCIDeviceAddress virPCIDeviceAddress;
typedef virPCIDeviceAddress *virPCIDeviceAddressPtr;
typedef struct _virPCIDeviceList virPCIDeviceList;
typedef virPCIDeviceList *virPCIDeviceListPtr;
G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDeviceList, virObjectUnref);
#define VIR_DOMAIN_DEVICE_ZPCI_MAX_UID UINT16_MAX
#define VIR_DOMAIN_DEVICE_ZPCI_MAX_FID UINT32_MAX
conf: fix zPCI address auto-generation on s390 Let us fix the issues with zPCI address validation and auto-generation on s390. Currently, there are two issues with handling the ZPCI address extension. Firstly, when the uid is to be auto-generated with a specified fid, .i.e.: ... <address type='pci'> <zpci fid='0x0000001f'/> </address> ... we expect uid='0x0001' (or the next available uid for the domain). However, we get a parsing error: $ virsh define zpci.xml error: XML error: Invalid PCI address uid='0x0000', must be > 0x0000 and <= 0xffff Secondly, when the uid is specified explicitly with the invalid numerical value '0x0000', we actually expect the parsing error above. However, the domain is being defined and the uid value is silently changed to a valid value. The first issue is a bug and the second one is undesired behaviour, and both issues are related to how we (in-band) signal invalid values for uid and fid. So let's fix the XML parsing to do validation based on what is actually specified in the XML. The first issue is also related to the current code behaviour, which is, if either uid or fid is specified by the user, it is incorrectly assumed that both uid and fid are specified. This bug is fixed by identifying when the user specified ZPCI address is incomplete and auto-generating the missing ZPCI address. Signed-off-by: Bjoern Walk <bwalk@linux.ibm.com> Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com> Signed-off-by: Shalini Chellathurai Saroja <shalini@linux.ibm.com> Reviewed-by: Andrea Bolognani <abologna@redhat.com>
2020-06-18 08:25:15 +00:00
typedef struct _virZPCIDeviceAddressID virZPCIDeviceAddressID;
typedef struct _virZPCIDeviceAddress virZPCIDeviceAddress;
typedef virZPCIDeviceAddress *virZPCIDeviceAddressPtr;
conf: fix zPCI address auto-generation on s390 Let us fix the issues with zPCI address validation and auto-generation on s390. Currently, there are two issues with handling the ZPCI address extension. Firstly, when the uid is to be auto-generated with a specified fid, .i.e.: ... <address type='pci'> <zpci fid='0x0000001f'/> </address> ... we expect uid='0x0001' (or the next available uid for the domain). However, we get a parsing error: $ virsh define zpci.xml error: XML error: Invalid PCI address uid='0x0000', must be > 0x0000 and <= 0xffff Secondly, when the uid is specified explicitly with the invalid numerical value '0x0000', we actually expect the parsing error above. However, the domain is being defined and the uid value is silently changed to a valid value. The first issue is a bug and the second one is undesired behaviour, and both issues are related to how we (in-band) signal invalid values for uid and fid. So let's fix the XML parsing to do validation based on what is actually specified in the XML. The first issue is also related to the current code behaviour, which is, if either uid or fid is specified by the user, it is incorrectly assumed that both uid and fid are specified. This bug is fixed by identifying when the user specified ZPCI address is incomplete and auto-generating the missing ZPCI address. Signed-off-by: Bjoern Walk <bwalk@linux.ibm.com> Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com> Signed-off-by: Shalini Chellathurai Saroja <shalini@linux.ibm.com> Reviewed-by: Andrea Bolognani <abologna@redhat.com>
2020-06-18 08:25:15 +00:00
struct _virZPCIDeviceAddressID {
unsigned int value;
bool isSet;
};
struct _virZPCIDeviceAddress {
conf: fix zPCI address auto-generation on s390 Let us fix the issues with zPCI address validation and auto-generation on s390. Currently, there are two issues with handling the ZPCI address extension. Firstly, when the uid is to be auto-generated with a specified fid, .i.e.: ... <address type='pci'> <zpci fid='0x0000001f'/> </address> ... we expect uid='0x0001' (or the next available uid for the domain). However, we get a parsing error: $ virsh define zpci.xml error: XML error: Invalid PCI address uid='0x0000', must be > 0x0000 and <= 0xffff Secondly, when the uid is specified explicitly with the invalid numerical value '0x0000', we actually expect the parsing error above. However, the domain is being defined and the uid value is silently changed to a valid value. The first issue is a bug and the second one is undesired behaviour, and both issues are related to how we (in-band) signal invalid values for uid and fid. So let's fix the XML parsing to do validation based on what is actually specified in the XML. The first issue is also related to the current code behaviour, which is, if either uid or fid is specified by the user, it is incorrectly assumed that both uid and fid are specified. This bug is fixed by identifying when the user specified ZPCI address is incomplete and auto-generating the missing ZPCI address. Signed-off-by: Bjoern Walk <bwalk@linux.ibm.com> Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com> Signed-off-by: Shalini Chellathurai Saroja <shalini@linux.ibm.com> Reviewed-by: Andrea Bolognani <abologna@redhat.com>
2020-06-18 08:25:15 +00:00
virZPCIDeviceAddressID uid; /* exempt from syntax-check */
virZPCIDeviceAddressID fid;
/* Don't forget to update virPCIDeviceAddressCopy if needed. */
};
#define VIR_PCI_DEVICE_ADDRESS_FMT "%04x:%02x:%02x.%d"
util: Add phys_port_name support on virPCIGetNetName virPCIGetNetName is used to get the name of the netdev associated with a particular PCI device. This is used when we have a VF name, but need the PF name in order to send a netlink command (e.g. in order to get/set the MAC address of the VF). In simple cases there is a single netdev associated with any PCI device, so it is easy to figure out the PF netdev for a VF - just look for the PCI device that has the VF listed in its "virtfns" directory; the only name in the "net" subdirectory of that PCI device's sysfs directory is the PF netdev that is upstream of the VF in question. In some cases there can be more than one netdev in a PCI device's net directory though. In the past, the only case of this was for SR-IOV NICs that could have multiple PF's per PCI device. In this case, all PF netdevs associated with a PCI address would be listed in the "net" subdirectory of the PCI device's directory in sysfs. At the same time, all VF netdevs and all PF netdevs have a phys_port_id in their sysfs, so the way to learn the correct PF netdev for a particular VF netdev is to search through the list of devices in the net subdirectory of the PF's PCI device, looking for the one netdev with a "phys_port_id" matching that of the VF netdev. But starting in kernel 5.8, the NVIDIA Mellanox driver began linking the VFs' representor netdevs to the PF PCI address [1], and so the VF representor netdevs would also show up in the net subdirectory. However, all of the devices that do so also only have a single PF netdev for any given PCI address. This means that the net directory of the PCI device can still hold multiple net devices, but only one of them will be the PF netdev (the others are VF representors): $ ls '/sys/bus/pci/devices/0000:82:00.0/net' ens1f0 eth0 eth1 In this case the way to find the PF device is to look at the "phys_port_name" attribute of each netdev in sysfs. All PF devices have a phys_port_name matching a particular regex (p[0-9]+$)|(p[0-9]+s[0-9]+$) Since there can only be one PF in the entire list of devices, once we match that regex, we've found the PF netdev. [1] - https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/ commit/?id=123f0f53dd64b67e34142485fe866a8a581f12f1 Co-Authored-by: Moshe Levi <moshele@nvidia.com> Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com> Reviewed-by: Adrian Chiris <adrianc@nvidia.com> Reviewed-by: Laine Stump <laine@redhat.com>
2021-01-21 12:15:22 +00:00
/* Represents format of PF's phys_port_name in switchdev mode:
* 'p%u' or 'p%us%u'. New line checked since value is readed from sysfs file.
*/
#define VIR_PF_PHYS_PORT_NAME_REGEX "(p[0-9]+$)|(p[0-9]+s[0-9]+$)"
struct _virPCIDeviceAddress {
unsigned int domain;
unsigned int bus;
unsigned int slot;
unsigned int function;
int multi; /* virTristateSwitch */
int extFlags; /* enum virPCIDeviceAddressExtensionFlags */
virZPCIDeviceAddress zpci;
/* Don't forget to update virPCIDeviceAddressCopy if needed. */
};
typedef enum {
VIR_PCI_STUB_DRIVER_NONE = 0,
VIR_PCI_STUB_DRIVER_XEN,
VIR_PCI_STUB_DRIVER_VFIO,
VIR_PCI_STUB_DRIVER_LAST
} virPCIStubDriver;
VIR_ENUM_DECL(virPCIStubDriver);
typedef enum {
VIR_PCIE_LINK_SPEED_NA = 0,
VIR_PCIE_LINK_SPEED_25,
VIR_PCIE_LINK_SPEED_5,
VIR_PCIE_LINK_SPEED_8,
VIR_PCIE_LINK_SPEED_16,
VIR_PCIE_LINK_SPEED_LAST
} virPCIELinkSpeed;
VIR_ENUM_DECL(virPCIELinkSpeed);
typedef enum {
VIR_PCI_HEADER_ENDPOINT = 0,
VIR_PCI_HEADER_PCI_BRIDGE,
VIR_PCI_HEADER_CARDBUS_BRIDGE,
VIR_PCI_HEADER_LAST
} virPCIHeaderType;
VIR_ENUM_DECL(virPCIHeader);
typedef struct _virPCIELink virPCIELink;
typedef virPCIELink *virPCIELinkPtr;
struct _virPCIELink {
int port;
virPCIELinkSpeed speed;
unsigned int width;
};
typedef struct _virPCIEDeviceInfo virPCIEDeviceInfo;
typedef virPCIEDeviceInfo *virPCIEDeviceInfoPtr;
struct _virPCIEDeviceInfo {
/* Not all PCI Express devices have link. For example this 'Root Complex
* Integrated Endpoint' and 'Root Complex Event Collector' don't have it. */
virPCIELink *link_cap; /* PCIe device link capabilities */
virPCIELink *link_sta; /* Actually negotiated capabilities */
};
virPCIDevicePtr virPCIDeviceNew(unsigned int domain,
unsigned int bus,
unsigned int slot,
unsigned int function);
virPCIDevicePtr virPCIDeviceCopy(virPCIDevicePtr dev);
void virPCIDeviceFree(virPCIDevicePtr dev);
const char *virPCIDeviceGetName(virPCIDevicePtr dev);
const char *virPCIDeviceGetConfigPath(virPCIDevicePtr dev);
int virPCIDeviceDetach(virPCIDevicePtr dev,
virPCIDeviceListPtr activeDevs,
virPCIDeviceListPtr inactiveDevs);
int virPCIDeviceReattach(virPCIDevicePtr dev,
virPCIDeviceListPtr activeDevs,
virPCIDeviceListPtr inactiveDevs);
int virPCIDeviceReset(virPCIDevicePtr dev,
virPCIDeviceListPtr activeDevs,
virPCIDeviceListPtr inactiveDevs);
void virPCIDeviceSetManaged(virPCIDevice *dev,
bool managed);
bool virPCIDeviceGetManaged(virPCIDevice *dev);
void virPCIDeviceSetStubDriver(virPCIDevicePtr dev,
virPCIStubDriver driver);
virPCIStubDriver virPCIDeviceGetStubDriver(virPCIDevicePtr dev);
virPCIDeviceAddressPtr virPCIDeviceGetAddress(virPCIDevicePtr dev);
int virPCIDeviceSetUsedBy(virPCIDevice *dev,
const char *drv_name,
const char *dom_name);
void virPCIDeviceGetUsedBy(virPCIDevice *dev,
const char **drv_name,
const char **dom_name);
bool virPCIDeviceGetUnbindFromStub(virPCIDevicePtr dev);
void virPCIDeviceSetUnbindFromStub(virPCIDevice *dev,
bool unbind);
bool virPCIDeviceGetRemoveSlot(virPCIDevicePtr dev);
void virPCIDeviceSetRemoveSlot(virPCIDevice *dev,
bool remove_slot);
bool virPCIDeviceGetReprobe(virPCIDevicePtr dev);
void virPCIDeviceSetReprobe(virPCIDevice *dev,
bool reprobe);
virPCIDeviceListPtr virPCIDeviceListNew(void);
int virPCIDeviceListAdd(virPCIDeviceListPtr list,
virPCIDevicePtr dev);
int virPCIDeviceListAddCopy(virPCIDeviceListPtr list, virPCIDevicePtr dev);
virPCIDevicePtr virPCIDeviceListGet(virPCIDeviceListPtr list,
int idx);
size_t virPCIDeviceListCount(virPCIDeviceListPtr list);
virPCIDevicePtr virPCIDeviceListSteal(virPCIDeviceListPtr list,
virPCIDevicePtr dev);
virPCIDevicePtr virPCIDeviceListStealIndex(virPCIDeviceListPtr list,
int idx);
void virPCIDeviceListDel(virPCIDeviceListPtr list,
virPCIDevicePtr dev);
virPCIDevicePtr virPCIDeviceListFind(virPCIDeviceListPtr list,
virPCIDevicePtr dev);
virPCIDevicePtr
virPCIDeviceListFindByIDs(virPCIDeviceListPtr list,
unsigned int domain,
unsigned int bus,
unsigned int slot,
unsigned int function);
int virPCIDeviceListFindIndex(virPCIDeviceListPtr list,
virPCIDevicePtr dev);
/*
* Callback that will be invoked once for each file
* associated with / used for PCI host device access.
*
* Should return 0 if successfully processed, or
* -1 to indicate error and abort iteration
*/
typedef int (*virPCIDeviceFileActor)(virPCIDevicePtr dev,
const char *path, void *opaque);
int virPCIDeviceFileIterate(virPCIDevicePtr dev,
virPCIDeviceFileActor actor,
void *opaque);
typedef int (*virPCIDeviceAddressActor)(virPCIDeviceAddressPtr addr,
void *opaque);
int virPCIDeviceAddressIOMMUGroupIterate(virPCIDeviceAddressPtr orig,
virPCIDeviceAddressActor actor,
void *opaque);
virPCIDeviceListPtr virPCIDeviceGetIOMMUGroupList(virPCIDevicePtr dev);
int virPCIDeviceAddressGetIOMMUGroupAddresses(virPCIDeviceAddressPtr devAddr,
virPCIDeviceAddressPtr **iommuGroupDevices,
size_t *nIommuGroupDevices);
int virPCIDeviceAddressGetIOMMUGroupNum(virPCIDeviceAddressPtr addr);
char *virPCIDeviceAddressGetIOMMUGroupDev(const virPCIDeviceAddress *devAddr);
char *virPCIDeviceGetIOMMUGroupDev(virPCIDevicePtr dev);
int virPCIDeviceIsAssignable(virPCIDevicePtr dev,
int strict_acs_check);
virPCIDeviceAddressPtr
virPCIGetDeviceAddressFromSysfsLink(const char *device_link);
int virPCIGetPhysicalFunction(const char *vf_sysfs_path,
virPCIDeviceAddressPtr *pf);
int virPCIGetVirtualFunctions(const char *sysfs_path,
virPCIDeviceAddressPtr **virtual_functions,
size_t *num_virtual_functions,
unsigned int *max_virtual_functions);
int virPCIIsVirtualFunction(const char *vf_sysfs_device_link);
int virPCIGetVirtualFunctionIndex(const char *pf_sysfs_device_link,
const char *vf_sysfs_device_link,
int *vf_index);
int virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr addr,
char **pci_sysfs_device_link);
int virPCIGetNetName(const char *device_link_sysfs_path,
size_t idx,
char *physPortID,
char **netname);
bool virPCIDeviceAddressIsValid(virPCIDeviceAddressPtr addr,
bool report);
bool virPCIDeviceAddressIsEmpty(const virPCIDeviceAddress *addr);
bool virPCIDeviceAddressEqual(const virPCIDeviceAddress *addr1,
const virPCIDeviceAddress *addr2);
void virPCIDeviceAddressCopy(virPCIDeviceAddressPtr dst,
const virPCIDeviceAddress *src);
char *virPCIDeviceAddressAsString(const virPCIDeviceAddress *addr)
ATTRIBUTE_NONNULL(1);
int virPCIDeviceAddressParse(char *address, virPCIDeviceAddressPtr bdf);
conf: fix zPCI address auto-generation on s390 Let us fix the issues with zPCI address validation and auto-generation on s390. Currently, there are two issues with handling the ZPCI address extension. Firstly, when the uid is to be auto-generated with a specified fid, .i.e.: ... <address type='pci'> <zpci fid='0x0000001f'/> </address> ... we expect uid='0x0001' (or the next available uid for the domain). However, we get a parsing error: $ virsh define zpci.xml error: XML error: Invalid PCI address uid='0x0000', must be > 0x0000 and <= 0xffff Secondly, when the uid is specified explicitly with the invalid numerical value '0x0000', we actually expect the parsing error above. However, the domain is being defined and the uid value is silently changed to a valid value. The first issue is a bug and the second one is undesired behaviour, and both issues are related to how we (in-band) signal invalid values for uid and fid. So let's fix the XML parsing to do validation based on what is actually specified in the XML. The first issue is also related to the current code behaviour, which is, if either uid or fid is specified by the user, it is incorrectly assumed that both uid and fid are specified. This bug is fixed by identifying when the user specified ZPCI address is incomplete and auto-generating the missing ZPCI address. Signed-off-by: Bjoern Walk <bwalk@linux.ibm.com> Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com> Signed-off-by: Shalini Chellathurai Saroja <shalini@linux.ibm.com> Reviewed-by: Andrea Bolognani <abologna@redhat.com>
2020-06-18 08:25:15 +00:00
bool virZPCIDeviceAddressIsIncomplete(const virZPCIDeviceAddress *addr);
bool virZPCIDeviceAddressIsPresent(const virZPCIDeviceAddress *addr);
int virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
util: save the correct VF's info when using a dual port SRIOV NIC in single port mode Mellanox ConnectX-3 dual port SRIOV NICs present a bit of a challenge when assigning one of their VFs to a guest using VFIO device assignment. These NICs have only a single PCI PF device, and that single PF has two netdevs sharing the single PCI address - one for port 1 and one for port 2. When a VF is created it can also have 2 netdevs, or it can be setup in "single port" mode, where the VF has only a single netdev, and that netdev is connected either to port 1 or to port 2. When the VF is created in dual port mode, you get/set the MAC address/vlan tag for the port 1 VF by sending a netlink message to the PF's port1 netdev, and you get/set the MAC address/vlan tag for the port 2 VF by sending a netlink message to the PF's port 2 netdev. (Of course libvirt doesn't have any way to describe MAC/vlan info for 2 ports in a single hostdev interface, so that's a bit of a moot point) When the VF is created in single port mode, you can *set* the MAC/vlan info by sending a netlink message to *either* PF netdev - the driver is smart enough to understand that there's only a single netdev, and set the MAC/vlan for that netdev. When you want to *get* it, however, the driver is more accurate - it will return 00:00:00:00:00:00 for the MAC if you request it from the port 1 PF netdev when the VF was configured to be single port on port 2, or if you request if from the port 2 PF netdev when the VF was configured to be single port on port 1. Based on this information, when *getting* the MAC/vlan info (to save the original setting prior to assignment), we determine the correct PF netdev by matching phys_port_id between VF and PF. (IMPORTANT NOTE: this implies that to do PCI device assignment of the VFs on dual port Mellanox cards using <interface type='hostdev'> (i.e. if you want the MAC address/vlan tag to be set), not only must the VFs be configured in single port mode, but also the VFs *must* be bound to the host VF net driver, and libvirt must use managed='yes') By the time libvirt is ready to set the new MAC/vlan tag, the VF has already been unbound from the host net driver and bound to vfio-pci. This isn't problematic though because, as stated earlier, when a VF is created in single port mode, commands to configure it can be sent to either the port 1 PF netdev or the port 2 PF netdev. When it is time to restore the original MAC/vlan tag, again the VF will *not* be bound to a host net driver, so it won't be possible to learn from sysfs whether to use the port 1 or port 2 PF netdev for the netlink commands. And again, it doesn't matter which netdev you use. However, we must keep in mind that we saved the original settings to a file called "${PF}_${VFNUM}". To solve this problem, we just check for the existence of ${PF1}_${VFNUM} and ${PF2}_${VFNUM}, and use whichever one we find (since we know that only one can be there)
2017-08-08 00:25:57 +00:00
int pfNetDevIdx,
char **pfname,
int *vf_index);
int virPCIDeviceUnbind(virPCIDevicePtr dev);
int virPCIDeviceRebind(virPCIDevicePtr dev);
int virPCIDeviceGetDriverPathAndName(virPCIDevicePtr dev,
char **path,
char **name);
int virPCIDeviceIsPCIExpress(virPCIDevicePtr dev);
int virPCIDeviceHasPCIExpressLink(virPCIDevicePtr dev);
int virPCIDeviceGetLinkCapSta(virPCIDevicePtr dev,
int *ca_port,
unsigned int *cap_speed,
unsigned int *cap_width,
unsigned int *sta_speed,
unsigned int *sta_width);
int virPCIGetHeaderType(virPCIDevicePtr dev, int *hdrType);
void virPCIEDeviceInfoFree(virPCIEDeviceInfoPtr dev);
void virPCIDeviceAddressFree(virPCIDeviceAddressPtr address);
G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDevice, virPCIDeviceFree);
G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDeviceAddress, virPCIDeviceAddressFree);
G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIEDeviceInfo, virPCIEDeviceInfoFree);