diff --git a/configure.ac b/configure.ac index a46b9c693b..c9509c7f96 100644 --- a/configure.ac +++ b/configure.ac @@ -627,6 +627,19 @@ if test "$with_linux" = "yes"; then AC_CHECK_HEADERS([linux/btrfs.h]) fi +dnl +dnl check for kernel headers required by devlink +dnl +if test "$with_linux" = "yes"; then + AC_CHECK_HEADERS([linux/devlink.h]) + AC_CHECK_DECLS([DEVLINK_GENL_VERSION, DEVLINK_GENL_NAME, DEVLINK_ATTR_MAX, DEVLINK_CMD_ESWITCH_GET, DEVLINK_ATTR_BUS_NAME, DEVLINK_ATTR_DEV_NAME, DEVLINK_ATTR_ESWITCH_MODE, DEVLINK_ESWITCH_MODE_SWITCHDEV], + [AC_DEFINE([HAVE_DECL_DEVLINK], + [1], + [whether devlink declarations are available])], + [], + [[#include ]]) +fi + dnl Allow perl/python overrides AC_PATH_PROGS([PYTHON], [python2 python]) if test -z "$PYTHON"; then diff --git a/docs/formatnode.html.in b/docs/formatnode.html.in index 4d935b50f9..29244a8984 100644 --- a/docs/formatnode.html.in +++ b/docs/formatnode.html.in @@ -227,6 +227,7 @@
rxhash
receive-hashing
rdma
remote-direct-memory-access
txudptnl
tx-udp-tunnel-segmentation
+
switchdev
kernel-forward-plane-offload
capability
diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c index 51a6e42c5c..0406939254 100644 --- a/src/util/virnetdev.c +++ b/src/util/virnetdev.c @@ -59,6 +59,10 @@ # include #endif +#if HAVE_DECL_DEVLINK +# include +#endif + #ifndef IFNAMSIZ # define IFNAMSIZ 16 #endif @@ -2481,7 +2485,8 @@ VIR_ENUM_IMPL(virNetDevFeature, "ntuple", "rxhash", "rdma", - "txudptnl") + "txudptnl", + "switchdev") #ifdef __linux__ int @@ -3115,6 +3120,181 @@ virNetDevGetEthtoolFeatures(virBitmapPtr bitmap, } +# if HAVE_DECL_DEVLINK +/** + * virNetDevPutExtraHeader + * reserve and prepare room for an extra header + * This function sets to zero the room that is required to put the extra + * header after the initial Netlink header. This function also increases + * the nlmsg_len field. + * + * @nlh: pointer to Netlink header + * @size: size of the extra header that we want to put + * + * Returns pointer to the start of the extended header + */ +static void * +virNetDevPutExtraHeader(struct nlmsghdr *nlh, + size_t size) +{ + char *ptr = (char *)nlh + nlh->nlmsg_len; + size_t len = NLMSG_ALIGN(size); + nlh->nlmsg_len += len; + return ptr; +} + + +/** + * virNetDevGetFamilyId: + * This function supplies the devlink family id + * + * @family_name: the name of the family to query + * + * Returns family id or 0 on failure. + */ +static uint32_t +virNetDevGetFamilyId(const char *family_name) +{ + struct nl_msg *nl_msg = NULL; + struct nlmsghdr *resp = NULL; + struct genlmsghdr* gmsgh = NULL; + struct nlattr *tb[CTRL_ATTR_MAX + 1] = {NULL, }; + unsigned int recvbuflen; + uint32_t family_id = 0; + + if (!(nl_msg = nlmsg_alloc_simple(GENL_ID_CTRL, + NLM_F_REQUEST | NLM_F_ACK))) { + virReportOOMError(); + goto cleanup; + } + + if (!(gmsgh = virNetDevPutExtraHeader(nlmsg_hdr(nl_msg), sizeof(struct genlmsghdr)))) + goto cleanup; + + gmsgh->cmd = CTRL_CMD_GETFAMILY; + gmsgh->version = DEVLINK_GENL_VERSION; + + if (nla_put_string(nl_msg, CTRL_ATTR_FAMILY_NAME, family_name) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("allocated netlink buffer is too small")); + goto cleanup; + } + + if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0, NETLINK_GENERIC, 0) < 0) + goto cleanup; + + if (nlmsg_parse(resp, sizeof(struct nlmsghdr), tb, CTRL_CMD_MAX, NULL) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("malformed netlink response message")); + goto cleanup; + } + + if (tb[CTRL_ATTR_FAMILY_ID] == NULL) + goto cleanup; + + family_id = *(uint32_t *)RTA_DATA(tb[CTRL_ATTR_FAMILY_ID]); + + cleanup: + nlmsg_free(nl_msg); + VIR_FREE(resp); + return family_id; +} + + +/** + * virNetDevSwitchdevFeature + * This function checks for the availability of Switchdev feature + * and add it to bitmap + * + * @ifname: name of the interface + * @out: add Switchdev feature if exist to bitmap + * + * Returns 0 on success, -1 on failure. + */ +static int +virNetDevSwitchdevFeature(const char *ifname, + virBitmapPtr *out) +{ + struct nl_msg *nl_msg = NULL; + struct nlmsghdr *resp = NULL; + unsigned int recvbuflen; + struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {NULL, }; + virPCIDevicePtr pci_device_ptr = NULL; + struct genlmsghdr* gmsgh = NULL; + const char *pci_name; + char *pfname = NULL; + int is_vf = -1; + int ret = -1; + uint32_t family_id; + + if ((family_id = virNetDevGetFamilyId(DEVLINK_GENL_NAME)) <= 0) + return ret; + + if ((is_vf = virNetDevIsVirtualFunction(ifname)) < 0) + return ret; + + if (is_vf == 1 && virNetDevGetPhysicalFunction(ifname, &pfname) < 0) + goto cleanup; + + if (!(nl_msg = nlmsg_alloc_simple(family_id, + NLM_F_REQUEST | NLM_F_ACK))) { + virReportOOMError(); + goto cleanup; + } + + if (!(gmsgh = virNetDevPutExtraHeader(nlmsg_hdr(nl_msg), sizeof(struct genlmsghdr)))) + goto cleanup; + + gmsgh->cmd = DEVLINK_CMD_ESWITCH_GET; + gmsgh->version = DEVLINK_GENL_VERSION; + + pci_device_ptr = pfname ? virNetDevGetPCIDevice(pfname) : + virNetDevGetPCIDevice(ifname); + if (pci_device_ptr == NULL) + goto cleanup; + + pci_name = virPCIDeviceGetName(pci_device_ptr); + + if (nla_put(nl_msg, DEVLINK_ATTR_BUS_NAME, strlen("pci")+1, "pci") < 0 || + nla_put(nl_msg, DEVLINK_ATTR_DEV_NAME, strlen(pci_name)+1, pci_name) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("allocated netlink buffer is too small")); + goto cleanup; + } + + if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0, NETLINK_GENERIC, 0) < 0) + goto cleanup; + + if (nlmsg_parse(resp, sizeof(struct genlmsghdr), tb, DEVLINK_ATTR_MAX, NULL) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("malformed netlink response message")); + goto cleanup; + } + + if (tb[DEVLINK_ATTR_ESWITCH_MODE] && + *(int *)RTA_DATA(tb[DEVLINK_ATTR_ESWITCH_MODE]) == DEVLINK_ESWITCH_MODE_SWITCHDEV) { + ignore_value(virBitmapSetBit(*out, VIR_NET_DEV_FEAT_SWITCHDEV)); + } + + ret = 0; + + cleanup: + nlmsg_free(nl_msg); + virPCIDeviceFree(pci_device_ptr); + VIR_FREE(resp); + VIR_FREE(pfname); + return ret; +} +# else +static int +virNetDevSwitchdevFeature(const char *ifname ATTRIBUTE_UNUSED, + virBitmapPtr *out ATTRIBUTE_UNUSED) +{ + return 0; +} +# endif + + # if HAVE_DECL_ETHTOOL_GFEATURES /** * virNetDevGFeatureAvailable @@ -3315,6 +3495,9 @@ virNetDevGetFeatures(const char *ifname, if (virNetDevRDMAFeature(ifname, out) < 0) goto cleanup; + if (virNetDevSwitchdevFeature(ifname, out) < 0) + goto cleanup; + ret = 0; cleanup: VIR_FORCE_CLOSE(fd); diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h index 9205c0e86c..71eaf45e30 100644 --- a/src/util/virnetdev.h +++ b/src/util/virnetdev.h @@ -112,6 +112,7 @@ typedef enum { VIR_NET_DEV_FEAT_RXHASH, VIR_NET_DEV_FEAT_RDMA, VIR_NET_DEV_FEAT_TXUDPTNL, + VIR_NET_DEV_FEAT_SWITCHDEV, VIR_NET_DEV_FEAT_LAST } virNetDevFeature; diff --git a/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml b/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml index d4c96e8533..88252e6a4e 100644 --- a/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml +++ b/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml @@ -15,6 +15,7 @@ + diff --git a/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml b/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml index 71bf90e20e..f77dfcc3e0 100644 --- a/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml +++ b/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml @@ -15,6 +15,7 @@ +