util: save the correct VF's info when using a dual port SRIOV NIC in single port mode

Mellanox ConnectX-3 dual port SRIOV NICs present a bit of a challenge
when assigning one of their VFs to a guest using VFIO device
assignment.

These NICs have only a single PCI PF device, and that single PF has
two netdevs sharing the single PCI address - one for port 1 and one
for port 2. When a VF is created it can also have 2 netdevs, or it can
be setup in "single port" mode, where the VF has only a single netdev,
and that netdev is connected either to port 1 or to port 2.

When the VF is created in dual port mode, you get/set the MAC
address/vlan tag for the port 1 VF by sending a netlink message to the
PF's port1 netdev, and you get/set the MAC address/vlan tag for the
port 2 VF by sending a netlink message to the PF's port 2 netdev. (Of
course libvirt doesn't have any way to describe MAC/vlan info for 2
ports in a single hostdev interface, so that's a bit of a moot point)

When the VF is created in single port mode, you can *set* the MAC/vlan
info by sending a netlink message to *either* PF netdev - the driver
is smart enough to understand that there's only a single netdev, and
set the MAC/vlan for that netdev. When you want to *get* it, however,
the driver is more accurate - it will return 00:00:00:00:00:00 for the
MAC if you request it from the port 1 PF netdev when the VF was
configured to be single port on port 2, or if you request if from the
port 2 PF netdev when the VF was configured to be single port on port
1.

Based on this information, when *getting* the MAC/vlan info (to save
the original setting prior to assignment), we determine the correct PF
netdev by matching phys_port_id between VF and PF.

(IMPORTANT NOTE: this implies that to do PCI device assignment of the
VFs on dual port Mellanox cards using <interface type='hostdev'>
(i.e. if you want the MAC address/vlan tag to be set), not only must
the VFs be configured in single port mode, but also the VFs *must* be
bound to the host VF net driver, and libvirt must use managed='yes')

By the time libvirt is ready to set the new MAC/vlan tag, the VF has
already been unbound from the host net driver and bound to
vfio-pci. This isn't problematic though because, as stated earlier,
when a VF is created in single port mode, commands to configure it can
be sent to either the port 1 PF netdev or the port 2 PF netdev.

When it is time to restore the original MAC/vlan tag, again the VF
will *not* be bound to a host net driver, so it won't be possible to
learn from sysfs whether to use the port 1 or port 2 PF netdev for the
netlink commands. And again, it doesn't matter which netdev you
use. However, we must keep in mind that we saved the original settings
to a file called "${PF}_${VFNUM}". To solve this problem, we just
check for the existence of ${PF1}_${VFNUM} and ${PF2}_${VFNUM}, and
use whichever one we find (since we know that only one can be there)
This commit is contained in:
Laine Stump 2017-08-07 20:25:57 -04:00
parent 39d136b67b
commit b67eaa6351
3 changed files with 53 additions and 9 deletions

View File

@ -307,7 +307,9 @@ virHostdevIsVirtualFunction(virDomainHostdevDefPtr hostdev)
static int
virHostdevNetDevice(virDomainHostdevDefPtr hostdev, char **linkdev,
virHostdevNetDevice(virDomainHostdevDefPtr hostdev,
int pfNetDevIdx,
char **linkdev,
int *vf)
{
int ret = -1;
@ -317,9 +319,10 @@ virHostdevNetDevice(virDomainHostdevDefPtr hostdev, char **linkdev,
return ret;
if (virPCIIsVirtualFunction(sysfs_path) == 1) {
if (virPCIGetVirtualFunctionInfo(sysfs_path, linkdev,
vf) < 0)
if (virPCIGetVirtualFunctionInfo(sysfs_path, pfNetDevIdx,
linkdev, vf) < 0) {
goto cleanup;
}
} else {
/* In practice this should never happen, since we currently
* only support assigning SRIOV VFs via <interface
@ -444,7 +447,7 @@ virHostdevSaveNetConfig(virDomainHostdevDefPtr hostdev,
goto cleanup;
}
if (virHostdevNetDevice(hostdev, &linkdev, &vf) < 0)
if (virHostdevNetDevice(hostdev, -1, &linkdev, &vf) < 0)
goto cleanup;
if (virNetDevSaveNetConfig(linkdev, vf, stateDir, true) < 0)
@ -482,7 +485,7 @@ virHostdevSetNetConfig(virDomainHostdevDefPtr hostdev,
if (!virHostdevIsPCINetDevice(hostdev))
return 0;
if (virHostdevNetDevice(hostdev, &linkdev, &vf) < 0)
if (virHostdevNetDevice(hostdev, -1, &linkdev, &vf) < 0)
goto cleanup;
vlan = virDomainNetGetActualVlan(hostdev->parent.data.net);
@ -545,7 +548,7 @@ virHostdevRestoreNetConfig(virDomainHostdevDefPtr hostdev,
return ret;
}
if (virHostdevNetDevice(hostdev, &linkdev, &vf) < 0)
if (virHostdevNetDevice(hostdev, 0, &linkdev, &vf) < 0)
return ret;
virtPort = virDomainNetGetActualVirtPortProfile(
@ -565,6 +568,18 @@ virHostdevRestoreNetConfig(virDomainHostdevDefPtr hostdev,
ret = virNetDevReadNetConfig(linkdev, vf, oldStateDir,
&adminMAC, &vlan, &MAC);
if (ret < 0) {
/* see if the config was saved using the PF's "port 2"
* netdev for the file name.
*/
VIR_FREE(linkdev);
if (virHostdevNetDevice(hostdev, 1, &linkdev, &vf) >= 0) {
ret = virNetDevReadNetConfig(linkdev, vf, stateDir,
&adminMAC, &vlan, &MAC);
}
}
if (ret == 0) {
/* if a MAC was stored for the VF, we should now restore
* that as the adminMAC. We have to do it this way because

View File

@ -2935,10 +2935,14 @@ virPCIGetNetName(const char *device_link_sysfs_path,
int
virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
char **pfname, int *vf_index)
int pfNetDevIdx,
char **pfname,
int *vf_index)
{
virPCIDeviceAddressPtr pf_config_address = NULL;
char *pf_sysfs_device_path = NULL;
char *vfname = NULL;
char *vfPhysPortID = NULL;
int ret = -1;
if (virPCIGetPhysicalFunction(vf_sysfs_device_path, &pf_config_address) < 0)
@ -2957,8 +2961,28 @@ virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
goto cleanup;
}
if (virPCIGetNetName(pf_sysfs_device_path, 0, NULL, pfname) < 0)
/* If the caller hasn't asked for a specific pfNetDevIdx, and VF
* is bound to a netdev, learn that netdev's phys_port_id (if
* available). This can be used to disambiguate when the PF has
* multiple netdevs. If the VF isn't bound to a netdev, then we
* return netdev[pfNetDevIdx] on the PF, which may or may not be
* correct.
*/
if (pfNetDevIdx == -1) {
if (virPCIGetNetName(vf_sysfs_device_path, 0, NULL, &vfname) < 0)
goto cleanup;
if (vfname) {
if (virNetDevGetPhysPortID(vfname, &vfPhysPortID) < 0)
goto cleanup;
}
pfNetDevIdx = 0;
}
if (virPCIGetNetName(pf_sysfs_device_path,
pfNetDevIdx, vfPhysPortID, pfname) < 0) {
goto cleanup;
}
if (!*pfname) {
/* this shouldn't be possible. A VF can't exist unless its
@ -2974,6 +2998,8 @@ virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
cleanup:
VIR_FREE(pf_config_address);
VIR_FREE(pf_sysfs_device_path);
VIR_FREE(vfname);
VIR_FREE(vfPhysPortID);
return ret;
}
@ -3044,6 +3070,7 @@ virPCIGetNetName(const char *device_link_sysfs_path ATTRIBUTE_UNUSED,
int
virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path ATTRIBUTE_UNUSED,
int pfNetDevIdx ATTRIBUTE_UNUSED,
char **pfname ATTRIBUTE_UNUSED,
int *vf_index ATTRIBUTE_UNUSED)
{

View File

@ -226,7 +226,9 @@ int virPCIGetAddrString(unsigned int domain,
int virPCIDeviceAddressParse(char *address, virPCIDeviceAddressPtr bdf);
int virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
char **pfname, int *vf_index);
int pfNetDevIdx,
char **pfname,
int *vf_index);
int virPCIDeviceUnbind(virPCIDevicePtr dev);
int virPCIDeviceRebind(virPCIDevicePtr dev);