libvirt/src/util/virnetdevmacvlan.c

672 lines
21 KiB
C
Raw Normal View History

/*
* Copyright (C) 2010-2011 Red Hat, Inc.
* Copyright (C) 2010 IBM Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Authors:
* Stefan Berger <stefanb@us.ibm.com>
*
* Notes:
* netlink: http://lovezutto.googlepages.com/netlink.pdf
* iproute2 package
*
*/
#include <config.h>
#include "virnetdevmacvlan.h"
#include "util.h"
#include "virterror_internal.h"
#define VIR_FROM_THIS VIR_FROM_NET
#define virNetDevError(code, ...) \
virReportErrorHelper(VIR_FROM_NET, code, __FILE__, \
__FUNCTION__, __LINE__, __VA_ARGS__)
VIR_ENUM_IMPL(virNetDevMacVLanMode, VIR_NETDEV_MACVLAN_MODE_LAST,
"vepa",
"private",
"bridge",
"passthrough")
#if WITH_MACVTAP
# include <stdint.h>
# include <stdio.h>
# include <errno.h>
# include <fcntl.h>
# include <sys/socket.h>
# include <sys/ioctl.h>
# include <linux/if.h>
# include <linux/if_tun.h>
/* Older kernels lacked this enum value. */
# if !HAVE_DECL_MACVLAN_MODE_PASSTHRU
# define MACVLAN_MODE_PASSTHRU 8
# endif
# include "memory.h"
# include "logging.h"
# include "interface.h"
# include "uuid.h"
# include "virfile.h"
# include "netlink.h"
# include "virnetdev.h"
# define MACVTAP_NAME_PREFIX "macvtap"
# define MACVTAP_NAME_PATTERN "macvtap%d"
/**
* virNetDevMacVLanCreate:
*
* @ifname: The name the interface is supposed to have; optional parameter
* @type: The type of device, i.e., "macvtap"
* @macaddress: The MAC address of the device
* @srcdev: The name of the 'link' device
* @macvlan_mode: The macvlan mode to use
* @retry: Pointer to integer that will be '1' upon return if an interface
* with the same name already exists and it is worth to try
* again with a different name
*
* Create a macvtap device with the given properties.
*
* Returns 0 on success, -1 on fatal error.
*/
int
virNetDevMacVLanCreate(const char *ifname,
const char *type,
const unsigned char *macaddress,
const char *srcdev,
uint32_t macvlan_mode,
int *retry)
{
int rc = 0;
struct nlmsghdr *resp;
struct nlmsgerr *err;
struct ifinfomsg ifinfo = { .ifi_family = AF_UNSPEC };
int ifindex;
unsigned char *recvbuf = NULL;
unsigned int recvbuflen;
struct nl_msg *nl_msg;
struct nlattr *linkinfo, *info_data;
if (ifaceGetIndex(true, srcdev, &ifindex) < 0)
return -1;
*retry = 0;
nl_msg = nlmsg_alloc_simple(RTM_NEWLINK,
NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL);
if (!nl_msg) {
virReportOOMError();
return -1;
}
if (nlmsg_append(nl_msg, &ifinfo, sizeof(ifinfo), NLMSG_ALIGNTO) < 0)
goto buffer_too_small;
if (nla_put_u32(nl_msg, IFLA_LINK, ifindex) < 0)
goto buffer_too_small;
if (nla_put(nl_msg, IFLA_ADDRESS, VIR_MAC_BUFLEN, macaddress) < 0)
goto buffer_too_small;
if (ifname &&
nla_put(nl_msg, IFLA_IFNAME, strlen(ifname)+1, ifname) < 0)
goto buffer_too_small;
if (!(linkinfo = nla_nest_start(nl_msg, IFLA_LINKINFO)))
goto buffer_too_small;
if (nla_put(nl_msg, IFLA_INFO_KIND, strlen(type), type) < 0)
goto buffer_too_small;
if (macvlan_mode > 0) {
if (!(info_data = nla_nest_start(nl_msg, IFLA_INFO_DATA)))
goto buffer_too_small;
if (nla_put(nl_msg, IFLA_MACVLAN_MODE, sizeof(macvlan_mode),
&macvlan_mode) < 0)
goto buffer_too_small;
nla_nest_end(nl_msg, info_data);
}
nla_nest_end(nl_msg, linkinfo);
if (nlComm(nl_msg, &recvbuf, &recvbuflen, 0) < 0) {
rc = -1;
goto cleanup;
}
if (recvbuflen < NLMSG_LENGTH(0) || recvbuf == NULL)
goto malformed_resp;
resp = (struct nlmsghdr *)recvbuf;
switch (resp->nlmsg_type) {
case NLMSG_ERROR:
err = (struct nlmsgerr *)NLMSG_DATA(resp);
if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
goto malformed_resp;
switch (err->error) {
case 0:
break;
case -EEXIST:
*retry = 1;
rc = -1;
break;
default:
virReportSystemError(-err->error,
_("error creating %s type of interface"),
type);
rc = -1;
}
break;
case NLMSG_DONE:
break;
default:
goto malformed_resp;
}
cleanup:
nlmsg_free(nl_msg);
VIR_FREE(recvbuf);
return rc;
malformed_resp:
nlmsg_free(nl_msg);
virNetDevError(VIR_ERR_INTERNAL_ERROR, "%s",
_("malformed netlink response message"));
VIR_FREE(recvbuf);
return -1;
buffer_too_small:
nlmsg_free(nl_msg);
virNetDevError(VIR_ERR_INTERNAL_ERROR, "%s",
_("allocated netlink buffer is too small"));
return -1;
}
/**
* virNetDevMacVLanDelete:
*
* @ifname: Name of the interface
*
* Tear down an interface with the given name.
*
* Returns 0 on success, -1 on fatal error.
*/
int virNetDevMacVLanDelete(const char *ifname)
{
int rc = 0;
struct nlmsghdr *resp;
struct nlmsgerr *err;
struct ifinfomsg ifinfo = { .ifi_family = AF_UNSPEC };
unsigned char *recvbuf = NULL;
unsigned int recvbuflen;
struct nl_msg *nl_msg;
nl_msg = nlmsg_alloc_simple(RTM_DELLINK,
NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL);
if (!nl_msg) {
virReportOOMError();
return -1;
}
if (nlmsg_append(nl_msg, &ifinfo, sizeof(ifinfo), NLMSG_ALIGNTO) < 0)
goto buffer_too_small;
if (nla_put(nl_msg, IFLA_IFNAME, strlen(ifname)+1, ifname) < 0)
goto buffer_too_small;
if (nlComm(nl_msg, &recvbuf, &recvbuflen, 0) < 0) {
rc = -1;
goto cleanup;
}
if (recvbuflen < NLMSG_LENGTH(0) || recvbuf == NULL)
goto malformed_resp;
resp = (struct nlmsghdr *)recvbuf;
switch (resp->nlmsg_type) {
case NLMSG_ERROR:
err = (struct nlmsgerr *)NLMSG_DATA(resp);
if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
goto malformed_resp;
if (err->error) {
virReportSystemError(-err->error,
_("error destroying %s interface"),
ifname);
rc = -1;
}
break;
case NLMSG_DONE:
break;
default:
goto malformed_resp;
}
cleanup:
nlmsg_free(nl_msg);
VIR_FREE(recvbuf);
return rc;
malformed_resp:
nlmsg_free(nl_msg);
virNetDevError(VIR_ERR_INTERNAL_ERROR, "%s",
_("malformed netlink response message"));
VIR_FREE(recvbuf);
return -1;
buffer_too_small:
nlmsg_free(nl_msg);
virNetDevError(VIR_ERR_INTERNAL_ERROR, "%s",
_("allocated netlink buffer is too small"));
return -1;
}
/**
* virNetDevMacVLanTapOpen:
* Open the macvtap's tap device.
* @ifname: Name of the macvtap interface
* @retries : Number of retries in case udev for example may need to be
* waited for to create the tap chardev
* Returns negative value in case of error, the file descriptor otherwise.
*/
static
int virNetDevMacVLanTapOpen(const char *ifname,
int retries)
{
FILE *file;
char path[64];
int ifindex;
char tapname[50];
int tapfd;
if (snprintf(path, sizeof(path),
"/sys/class/net/%s/ifindex", ifname) >= sizeof(path)) {
virReportSystemError(errno,
"%s",
_("buffer for ifindex path is too small"));
return -1;
}
file = fopen(path, "r");
if (!file) {
virReportSystemError(errno,
_("cannot open macvtap file %s to determine "
"interface index"), path);
return -1;
}
if (fscanf(file, "%d", &ifindex) != 1) {
virReportSystemError(errno,
"%s",_("cannot determine macvtap's tap device "
"interface index"));
VIR_FORCE_FCLOSE(file);
return -1;
}
VIR_FORCE_FCLOSE(file);
if (snprintf(tapname, sizeof(tapname),
"/dev/tap%d", ifindex) >= sizeof(tapname)) {
virReportSystemError(errno,
"%s",
_("internal buffer for tap device is too small"));
return -1;
}
while (1) {
/* may need to wait for udev to be done */
tapfd = open(tapname, O_RDWR);
if (tapfd < 0 && retries > 0) {
retries--;
usleep(20000);
continue;
}
break;
}
if (tapfd < 0)
virReportSystemError(errno,
_("cannot open macvtap tap device %s"),
tapname);
return tapfd;
}
/**
* virNetDevMacVLanTapSetup:
* @tapfd: file descriptor of the macvtap tap
* @vnet_hdr: 1 to enable IFF_VNET_HDR, 0 to disable it
*
* Returns 0 on success, -1 in case of fatal error, error code otherwise.
*
* Turn the IFF_VNET_HDR flag, if requested and available, make sure
* it's off in the other cases.
* A fatal error is defined as the VNET_HDR flag being set but it cannot
* be turned off for some reason. This is reported with -1. Other fatal
* error is not being able to read the interface flags. In that case the
* macvtap device should not be used.
*/
static int
virNetDevMacVLanTapSetup(int tapfd, int vnet_hdr)
{
unsigned int features;
struct ifreq ifreq;
short new_flags = 0;
int rc_on_fail = 0;
const char *errmsg = NULL;
memset(&ifreq, 0, sizeof(ifreq));
if (ioctl(tapfd, TUNGETIFF, &ifreq) < 0) {
virReportSystemError(errno, "%s",
_("cannot get interface flags on macvtap tap"));
return -1;
}
new_flags = ifreq.ifr_flags;
if ((ifreq.ifr_flags & IFF_VNET_HDR) && !vnet_hdr) {
new_flags = ifreq.ifr_flags & ~IFF_VNET_HDR;
rc_on_fail = -1;
errmsg = _("cannot clean IFF_VNET_HDR flag on macvtap tap");
} else if ((ifreq.ifr_flags & IFF_VNET_HDR) == 0 && vnet_hdr) {
if (ioctl(tapfd, TUNGETFEATURES, &features) < 0) {
virReportSystemError(errno, "%s",
_("cannot get feature flags on macvtap tap"));
return -1;
}
if ((features & IFF_VNET_HDR)) {
new_flags = ifreq.ifr_flags | IFF_VNET_HDR;
errmsg = _("cannot set IFF_VNET_HDR flag on macvtap tap");
}
}
if (new_flags != ifreq.ifr_flags) {
ifreq.ifr_flags = new_flags;
if (ioctl(tapfd, TUNSETIFF, &ifreq) < 0) {
virReportSystemError(errno, "%s", errmsg);
return rc_on_fail;
}
}
return 0;
}
static const uint32_t modeMap[VIR_NETDEV_MACVLAN_MODE_LAST] = {
[VIR_NETDEV_MACVLAN_MODE_VEPA] = MACVLAN_MODE_VEPA,
[VIR_NETDEV_MACVLAN_MODE_PRIVATE] = MACVLAN_MODE_PRIVATE,
[VIR_NETDEV_MACVLAN_MODE_BRIDGE] = MACVLAN_MODE_BRIDGE,
[VIR_NETDEV_MACVLAN_MODE_PASSTHRU] = MACVLAN_MODE_PASSTHRU,
};
/**
* virNetDevMacVLanCreateWithVPortProfile:
* Create an instance of a macvtap device and open its tap character
* device.
* @tgifname: Interface name that the macvtap is supposed to have. May
* be NULL if this function is supposed to choose a name
* @macaddress: The MAC address for the macvtap device
* @linkdev: The interface name of the NIC to connect to the external bridge
* @mode: int describing the mode for 'bridge', 'vepa', 'private' or 'passthru'.
* @vnet_hdr: 1 to enable IFF_VNET_HDR, 0 to disable it
* @vmuuid: The UUID of the VM the macvtap belongs to
* @virtPortProfile: pointer to object holding the virtual port profile data
* @res_ifname: Pointer to a string pointer where the actual name of the
* interface will be stored into if everything succeeded. It is up
* to the caller to free the string.
*
* Returns file descriptor of the tap device in case of success,
* negative value otherwise with error reported.
*
*/
int virNetDevMacVLanCreateWithVPortProfile(const char *tgifname,
const unsigned char *macaddress,
const char *linkdev,
enum virNetDevMacVLanMode mode,
int vnet_hdr,
const unsigned char *vmuuid,
virNetDevVPortProfilePtr virtPortProfile,
char **res_ifname,
enum virNetDevVPortProfileOp vmOp,
char *stateDir,
virNetDevBandwidthPtr bandwidth)
{
const char *type = "macvtap";
int c, rc;
char ifname[IFNAMSIZ];
int retries, do_retry = 0;
uint32_t macvtapMode;
const char *cr_ifname;
int ifindex;
macvtapMode = modeMap[mode];
*res_ifname = NULL;
VIR_DEBUG("%s: VM OPERATION: %s", __FUNCTION__, virNetDevVPortProfileOpTypeToString(vmOp));
/** Note: When using PASSTHROUGH mode with MACVTAP devices the link
* device's MAC address must be set to the VMs MAC address. In
* order to not confuse the first switch or bridge in line this MAC
* address must be reset when the VM is shut down.
* This is especially important when using SRIOV capable cards that
* emulate their switch in firmware.
*/
if (mode == VIR_NETDEV_MACVLAN_MODE_PASSTHRU) {
if (ifaceReplaceMacAddress(macaddress, linkdev, stateDir) < 0) {
return -1;
}
}
if (tgifname) {
if(ifaceGetIndex(false, tgifname, &ifindex) == 0) {
if (STRPREFIX(tgifname,
MACVTAP_NAME_PREFIX)) {
goto create_name;
}
virReportSystemError(errno,
_("Interface %s already exists"), tgifname);
return -1;
}
cr_ifname = tgifname;
rc = virNetDevMacVLanCreate(tgifname, type, macaddress, linkdev,
macvtapMode, &do_retry);
if (rc < 0)
return -1;
} else {
create_name:
retries = 5;
for (c = 0; c < 8192; c++) {
snprintf(ifname, sizeof(ifname), MACVTAP_NAME_PATTERN, c);
if (ifaceGetIndex(false, ifname, &ifindex) == -ENODEV) {
rc = virNetDevMacVLanCreate(ifname, type, macaddress, linkdev,
macvtapMode, &do_retry);
if (rc == 0)
break;
if (do_retry && --retries)
continue;
return -1;
}
}
cr_ifname = ifname;
}
if (virNetDevVPortProfileAssociate(cr_ifname,
virtPortProfile,
macaddress,
linkdev,
vmuuid, vmOp) < 0) {
rc = -1;
goto link_del_exit;
}
if (virNetDevSetOnline(cr_ifname, true) < 0) {
rc = -1;
goto disassociate_exit;
}
rc = virNetDevMacVLanTapOpen(cr_ifname, 10);
if (rc >= 0) {
if (virNetDevMacVLanTapSetup(rc, vnet_hdr) < 0) {
VIR_FORCE_CLOSE(rc); /* sets rc to -1 */
goto disassociate_exit;
}
*res_ifname = strdup(cr_ifname);
} else
goto disassociate_exit;
if (virNetDevBandwidthSet(cr_ifname, bandwidth) < 0) {
virNetDevError(VIR_ERR_INTERNAL_ERROR,
_("cannot set bandwidth limits on %s"),
cr_ifname);
rc = -1;
goto disassociate_exit;
}
return rc;
disassociate_exit:
ignore_value(virNetDevVPortProfileDisassociate(cr_ifname,
virtPortProfile,
macaddress,
linkdev,
vmOp));
link_del_exit:
ignore_value(virNetDevMacVLanDelete(cr_ifname));
return rc;
}
/**
* virNetDevMacVLanDeleteWithVPortProfile:
* @ifname : The name of the macvtap interface
* @linkdev: The interface name of the NIC to connect to the external bridge
* @virtPortProfile: pointer to object holding the virtual port profile data
*
* Delete an interface given its name. Disassociate
* it with the switch if port profile parameters
* were provided.
*/
int virNetDevMacVLanDeleteWithVPortProfile(const char *ifname,
const unsigned char *macaddr,
const char *linkdev,
int mode,
virNetDevVPortProfilePtr virtPortProfile,
char *stateDir)
{
int ret = 0;
if (mode == VIR_NETDEV_MACVLAN_MODE_PASSTHRU) {
ifaceRestoreMacAddress(linkdev, stateDir);
}
if (ifname) {
if (virNetDevVPortProfileDisassociate(ifname,
virtPortProfile,
macaddr,
linkdev,
VIR_NETDEV_VPORT_PROFILE_OP_DESTROY) < 0)
ret = -1;
if (virNetDevMacVLanDelete(ifname) < 0)
ret = -1;
}
return ret;
}
#else /* ! WITH_MACVTAP */
int virNetDevMacVLanCreate(const char *ifname ATTRIBUTE_UNUSED,
const char *type ATTRIBUTE_UNUSED,
const unsigned char *macaddress ATTRIBUTE_UNUSED,
const char *srcdev ATTRIBUTE_UNUSED,
uint32_t macvlan_mode ATTRIBUTE_UNUSED,
int *retry ATTRIBUTE_UNUSED)
{
virReportSystemError(ENOSYS, "%s",
_("Cannot create macvlan devices on this platform"));
return -1;
}
int virNetDevMacVLanDelete(const char *ifname ATTRIBUTE_UNUSED)
{
virReportSystemError(ENOSYS, "%s",
_("Cannot create macvlan devices on this platform"));
return -1;
}
int virNetDevMacVLanCreateWithVPortProfile(const char *ifname ATTRIBUTE_UNUSED,
const unsigned char *macaddress ATTRIBUTE_UNUSED,
const char *linkdev ATTRIBUTE_UNUSED,
enum virNetDevMacVLanMode mode ATTRIBUTE_UNUSED,
int vnet_hdr ATTRIBUTE_UNUSED,
const unsigned char *vmuuid ATTRIBUTE_UNUSED,
virNetDevVPortProfilePtr virtPortProfile ATTRIBUTE_UNUSED,
char **res_ifname ATTRIBUTE_UNUSED,
enum virNetDevVPortProfileOp vmop ATTRIBUTE_UNUSED,
char *stateDir ATTRIBUTE_UNUSED,
virNetDevBandwidthPtr bandwidth ATTRIBUTE_UNUSED)
{
virReportSystemError(ENOSYS, "%s",
_("Cannot create macvlan devices on this platform"));
return -1;
}
int virNetDevMacVLanDeleteWithVPortProfile(const char *ifname ATTRIBUTE_UNUSED,
const unsigned char *macaddress ATTRIBUTE_UNUSED,
const char *linkdev ATTRIBUTE_UNUSED,
int mode ATTRIBUTE_UNUSED,
virNetDevVPortProfilePtr virtPortProfile ATTRIBUTE_UNUSED,
char *stateDir ATTRIBUTE_UNUSED)
{
virReportSystemError(ENOSYS, "%s",
_("Cannot create macvlan devices on this platform"));
return -1;
}
#endif /* ! WITH_MACVTAP */