mirror of
https://gitlab.com/libvirt/libvirt.git
synced 2024-12-22 21:55:25 +00:00
nodedev: add switchdev to NIC capabilities
Adding functionality to libvirt that will allow querying the interface for the availability of switchdev Offloading NIC capabilities. The switchdev mode was introduced in kernel 4.8, the iproute2-devlink command to retrieve the switchdev NIC feature with command example: devlink dev eswitch show pci/0000:03:00.0 This feature is needed for Openstack so we can do a scheduling decision if the NIC is in Hardware Offload (switchdev) or regular SR-IOV (legacy) mode. And select the appropriate hypervisors with the requested capability see [1]. [1] - https://specs.openstack.org/openstack/nova-specs/specs/pike/approved/enable-sriov-nic-features.html Reviewed-by: Laine Stump <laine@laine.org> Reviewed-by: John Ferlan <jferlan@redhat.com>
This commit is contained in:
parent
8703813aae
commit
8708ca01c0
13
configure.ac
13
configure.ac
@ -627,6 +627,19 @@ if test "$with_linux" = "yes"; then
|
||||
AC_CHECK_HEADERS([linux/btrfs.h])
|
||||
fi
|
||||
|
||||
dnl
|
||||
dnl check for kernel headers required by devlink
|
||||
dnl
|
||||
if test "$with_linux" = "yes"; then
|
||||
AC_CHECK_HEADERS([linux/devlink.h])
|
||||
AC_CHECK_DECLS([DEVLINK_GENL_VERSION, DEVLINK_GENL_NAME, DEVLINK_ATTR_MAX, DEVLINK_CMD_ESWITCH_GET, DEVLINK_ATTR_BUS_NAME, DEVLINK_ATTR_DEV_NAME, DEVLINK_ATTR_ESWITCH_MODE, DEVLINK_ESWITCH_MODE_SWITCHDEV],
|
||||
[AC_DEFINE([HAVE_DECL_DEVLINK],
|
||||
[1],
|
||||
[whether devlink declarations are available])],
|
||||
[],
|
||||
[[#include <linux/devlink.h>]])
|
||||
fi
|
||||
|
||||
dnl Allow perl/python overrides
|
||||
AC_PATH_PROGS([PYTHON], [python2 python])
|
||||
if test -z "$PYTHON"; then
|
||||
|
@ -227,6 +227,7 @@
|
||||
<dt><code>rxhash</code></dt><dd>receive-hashing</dd>
|
||||
<dt><code>rdma</code></dt><dd>remote-direct-memory-access</dd>
|
||||
<dt><code>txudptnl</code></dt><dd>tx-udp-tunnel-segmentation</dd>
|
||||
<dt><code>switchdev</code></dt><dd>kernel-forward-plane-offload</dd>
|
||||
</dl>
|
||||
</dd>
|
||||
<dt><code>capability</code></dt>
|
||||
|
@ -59,6 +59,10 @@
|
||||
# include <net/if_dl.h>
|
||||
#endif
|
||||
|
||||
#if HAVE_DECL_DEVLINK
|
||||
# include <linux/devlink.h>
|
||||
#endif
|
||||
|
||||
#ifndef IFNAMSIZ
|
||||
# define IFNAMSIZ 16
|
||||
#endif
|
||||
@ -2481,7 +2485,8 @@ VIR_ENUM_IMPL(virNetDevFeature,
|
||||
"ntuple",
|
||||
"rxhash",
|
||||
"rdma",
|
||||
"txudptnl")
|
||||
"txudptnl",
|
||||
"switchdev")
|
||||
|
||||
#ifdef __linux__
|
||||
int
|
||||
@ -3115,6 +3120,181 @@ virNetDevGetEthtoolFeatures(virBitmapPtr bitmap,
|
||||
}
|
||||
|
||||
|
||||
# if HAVE_DECL_DEVLINK
|
||||
/**
|
||||
* virNetDevPutExtraHeader
|
||||
* reserve and prepare room for an extra header
|
||||
* This function sets to zero the room that is required to put the extra
|
||||
* header after the initial Netlink header. This function also increases
|
||||
* the nlmsg_len field.
|
||||
*
|
||||
* @nlh: pointer to Netlink header
|
||||
* @size: size of the extra header that we want to put
|
||||
*
|
||||
* Returns pointer to the start of the extended header
|
||||
*/
|
||||
static void *
|
||||
virNetDevPutExtraHeader(struct nlmsghdr *nlh,
|
||||
size_t size)
|
||||
{
|
||||
char *ptr = (char *)nlh + nlh->nlmsg_len;
|
||||
size_t len = NLMSG_ALIGN(size);
|
||||
nlh->nlmsg_len += len;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* virNetDevGetFamilyId:
|
||||
* This function supplies the devlink family id
|
||||
*
|
||||
* @family_name: the name of the family to query
|
||||
*
|
||||
* Returns family id or 0 on failure.
|
||||
*/
|
||||
static uint32_t
|
||||
virNetDevGetFamilyId(const char *family_name)
|
||||
{
|
||||
struct nl_msg *nl_msg = NULL;
|
||||
struct nlmsghdr *resp = NULL;
|
||||
struct genlmsghdr* gmsgh = NULL;
|
||||
struct nlattr *tb[CTRL_ATTR_MAX + 1] = {NULL, };
|
||||
unsigned int recvbuflen;
|
||||
uint32_t family_id = 0;
|
||||
|
||||
if (!(nl_msg = nlmsg_alloc_simple(GENL_ID_CTRL,
|
||||
NLM_F_REQUEST | NLM_F_ACK))) {
|
||||
virReportOOMError();
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (!(gmsgh = virNetDevPutExtraHeader(nlmsg_hdr(nl_msg), sizeof(struct genlmsghdr))))
|
||||
goto cleanup;
|
||||
|
||||
gmsgh->cmd = CTRL_CMD_GETFAMILY;
|
||||
gmsgh->version = DEVLINK_GENL_VERSION;
|
||||
|
||||
if (nla_put_string(nl_msg, CTRL_ATTR_FAMILY_NAME, family_name) < 0) {
|
||||
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
|
||||
_("allocated netlink buffer is too small"));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0, NETLINK_GENERIC, 0) < 0)
|
||||
goto cleanup;
|
||||
|
||||
if (nlmsg_parse(resp, sizeof(struct nlmsghdr), tb, CTRL_CMD_MAX, NULL) < 0) {
|
||||
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
|
||||
_("malformed netlink response message"));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (tb[CTRL_ATTR_FAMILY_ID] == NULL)
|
||||
goto cleanup;
|
||||
|
||||
family_id = *(uint32_t *)RTA_DATA(tb[CTRL_ATTR_FAMILY_ID]);
|
||||
|
||||
cleanup:
|
||||
nlmsg_free(nl_msg);
|
||||
VIR_FREE(resp);
|
||||
return family_id;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* virNetDevSwitchdevFeature
|
||||
* This function checks for the availability of Switchdev feature
|
||||
* and add it to bitmap
|
||||
*
|
||||
* @ifname: name of the interface
|
||||
* @out: add Switchdev feature if exist to bitmap
|
||||
*
|
||||
* Returns 0 on success, -1 on failure.
|
||||
*/
|
||||
static int
|
||||
virNetDevSwitchdevFeature(const char *ifname,
|
||||
virBitmapPtr *out)
|
||||
{
|
||||
struct nl_msg *nl_msg = NULL;
|
||||
struct nlmsghdr *resp = NULL;
|
||||
unsigned int recvbuflen;
|
||||
struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {NULL, };
|
||||
virPCIDevicePtr pci_device_ptr = NULL;
|
||||
struct genlmsghdr* gmsgh = NULL;
|
||||
const char *pci_name;
|
||||
char *pfname = NULL;
|
||||
int is_vf = -1;
|
||||
int ret = -1;
|
||||
uint32_t family_id;
|
||||
|
||||
if ((family_id = virNetDevGetFamilyId(DEVLINK_GENL_NAME)) <= 0)
|
||||
return ret;
|
||||
|
||||
if ((is_vf = virNetDevIsVirtualFunction(ifname)) < 0)
|
||||
return ret;
|
||||
|
||||
if (is_vf == 1 && virNetDevGetPhysicalFunction(ifname, &pfname) < 0)
|
||||
goto cleanup;
|
||||
|
||||
if (!(nl_msg = nlmsg_alloc_simple(family_id,
|
||||
NLM_F_REQUEST | NLM_F_ACK))) {
|
||||
virReportOOMError();
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (!(gmsgh = virNetDevPutExtraHeader(nlmsg_hdr(nl_msg), sizeof(struct genlmsghdr))))
|
||||
goto cleanup;
|
||||
|
||||
gmsgh->cmd = DEVLINK_CMD_ESWITCH_GET;
|
||||
gmsgh->version = DEVLINK_GENL_VERSION;
|
||||
|
||||
pci_device_ptr = pfname ? virNetDevGetPCIDevice(pfname) :
|
||||
virNetDevGetPCIDevice(ifname);
|
||||
if (pci_device_ptr == NULL)
|
||||
goto cleanup;
|
||||
|
||||
pci_name = virPCIDeviceGetName(pci_device_ptr);
|
||||
|
||||
if (nla_put(nl_msg, DEVLINK_ATTR_BUS_NAME, strlen("pci")+1, "pci") < 0 ||
|
||||
nla_put(nl_msg, DEVLINK_ATTR_DEV_NAME, strlen(pci_name)+1, pci_name) < 0) {
|
||||
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
|
||||
_("allocated netlink buffer is too small"));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0, NETLINK_GENERIC, 0) < 0)
|
||||
goto cleanup;
|
||||
|
||||
if (nlmsg_parse(resp, sizeof(struct genlmsghdr), tb, DEVLINK_ATTR_MAX, NULL) < 0) {
|
||||
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
|
||||
_("malformed netlink response message"));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (tb[DEVLINK_ATTR_ESWITCH_MODE] &&
|
||||
*(int *)RTA_DATA(tb[DEVLINK_ATTR_ESWITCH_MODE]) == DEVLINK_ESWITCH_MODE_SWITCHDEV) {
|
||||
ignore_value(virBitmapSetBit(*out, VIR_NET_DEV_FEAT_SWITCHDEV));
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
cleanup:
|
||||
nlmsg_free(nl_msg);
|
||||
virPCIDeviceFree(pci_device_ptr);
|
||||
VIR_FREE(resp);
|
||||
VIR_FREE(pfname);
|
||||
return ret;
|
||||
}
|
||||
# else
|
||||
static int
|
||||
virNetDevSwitchdevFeature(const char *ifname ATTRIBUTE_UNUSED,
|
||||
virBitmapPtr *out ATTRIBUTE_UNUSED)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
# endif
|
||||
|
||||
|
||||
# if HAVE_DECL_ETHTOOL_GFEATURES
|
||||
/**
|
||||
* virNetDevGFeatureAvailable
|
||||
@ -3315,6 +3495,9 @@ virNetDevGetFeatures(const char *ifname,
|
||||
if (virNetDevRDMAFeature(ifname, out) < 0)
|
||||
goto cleanup;
|
||||
|
||||
if (virNetDevSwitchdevFeature(ifname, out) < 0)
|
||||
goto cleanup;
|
||||
|
||||
ret = 0;
|
||||
cleanup:
|
||||
VIR_FORCE_CLOSE(fd);
|
||||
|
@ -112,6 +112,7 @@ typedef enum {
|
||||
VIR_NET_DEV_FEAT_RXHASH,
|
||||
VIR_NET_DEV_FEAT_RDMA,
|
||||
VIR_NET_DEV_FEAT_TXUDPTNL,
|
||||
VIR_NET_DEV_FEAT_SWITCHDEV,
|
||||
VIR_NET_DEV_FEAT_LAST
|
||||
} virNetDevFeature;
|
||||
|
||||
|
@ -15,6 +15,7 @@
|
||||
<feature name='rxhash'/>
|
||||
<feature name='rdma'/>
|
||||
<feature name='txudptnl'/>
|
||||
<feature name='switchdev'/>
|
||||
<capability type='80211'/>
|
||||
</capability>
|
||||
</device>
|
||||
|
@ -15,6 +15,7 @@
|
||||
<feature name='rxhash'/>
|
||||
<feature name='rdma'/>
|
||||
<feature name='txudptnl'/>
|
||||
<feature name='switchdev'/>
|
||||
<capability type='80203'/>
|
||||
</capability>
|
||||
</device>
|
||||
|
Loading…
Reference in New Issue
Block a user