libvirt/src/util/macvtap.c
Matthias Bolte 45ef4d5e8a Implement virVMOperationType{To|From}String independent from WITH_MACVTAP
As this symbols are exported independent from WITH_MACVTAP.
2010-12-03 18:06:42 +01:00

1617 lines
43 KiB
C

/*
* Copyright (C) 2010 Red Hat, Inc.
* Copyright (C) 2010 IBM Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Authors:
* Stefan Berger <stefanb@us.ibm.com>
*
* Notes:
* netlink: http://lovezutto.googlepages.com/netlink.pdf
* iproute2 package
*
*/
#include <config.h>
#include <stdint.h>
#if WITH_MACVTAP || WITH_VIRTUALPORT
# include <stdio.h>
# include <errno.h>
# include <fcntl.h>
# include <c-ctype.h>
# include <sys/socket.h>
# include <sys/ioctl.h>
# include <linux/if.h>
# include <linux/netlink.h>
# include <linux/rtnetlink.h>
# include <linux/if_tun.h>
# include <netlink/msg.h>
#endif /* WITH_MACVTAP || WITH_VIRTUALPORT */
#include "util.h"
#include "macvtap.h"
#if WITH_MACVTAP || WITH_VIRTUALPORT
# include "memory.h"
# include "logging.h"
# include "interface.h"
# include "conf/domain_conf.h"
# include "virterror_internal.h"
# include "uuid.h"
# include "files.h"
# define VIR_FROM_THIS VIR_FROM_NET
# define macvtapError(code, ...) \
virReportErrorHelper(NULL, VIR_FROM_NET, code, __FILE__, \
__FUNCTION__, __LINE__, __VA_ARGS__)
# define MACVTAP_NAME_PREFIX "macvtap"
# define MACVTAP_NAME_PATTERN "macvtap%d"
# define MICROSEC_PER_SEC (1000 * 1000)
# define NLMSGBUF_SIZE 256
# define RATTBUF_SIZE 64
# define NETLINK_ACK_TIMEOUT_S 2
# define STATUS_POLL_TIMEOUT_USEC (10 * MICROSEC_PER_SEC)
# define STATUS_POLL_INTERVL_USEC (MICROSEC_PER_SEC / 8)
# define LLDPAD_PID_FILE "/var/run/lldpad.pid"
enum virVirtualPortOp {
ASSOCIATE = 0x1,
DISASSOCIATE = 0x2,
PREASSOCIATE = 0x3,
};
/**
* nlComm:
* @nlmsg: pointer to netlink message
* @respbuf: pointer to pointer where response buffer will be allocated
* @respbuflen: pointer to integer holding the size of the response buffer
* on return of the function.
* @nl_pid: the pid of the process to talk to, i.e., pid = 0 for kernel
*
* Send the given message to the netlink layer and receive response.
* Returns 0 on success, -1 on error. In case of error, no response
* buffer will be returned.
*/
static
int nlComm(struct nl_msg *nl_msg,
unsigned char **respbuf, unsigned int *respbuflen,
int nl_pid)
{
int rc = 0;
struct sockaddr_nl nladdr = {
.nl_family = AF_NETLINK,
.nl_pid = nl_pid,
.nl_groups = 0,
};
ssize_t nbytes;
struct timeval tv = {
.tv_sec = NETLINK_ACK_TIMEOUT_S,
};
fd_set readfds;
int fd;
int n;
struct nl_handle *nlhandle = nl_handle_alloc();
struct nlmsghdr *nlmsg = nlmsg_hdr(nl_msg);
if (!nlhandle)
return -1;
if (nl_connect(nlhandle, NETLINK_ROUTE) < 0) {
rc = -1;
goto err_exit;
}
nlmsg_set_dst(nl_msg, &nladdr);
nlmsg->nlmsg_pid = getpid();
nbytes = nl_send_auto_complete(nlhandle, nl_msg);
if (nbytes < 0) {
virReportSystemError(errno,
"%s", _("cannot send to netlink socket"));
rc = -1;
goto err_exit;
}
fd = nl_socket_get_fd(nlhandle);
FD_ZERO(&readfds);
FD_SET(fd, &readfds);
n = select(fd + 1, &readfds, NULL, NULL, &tv);
if (n <= 0) {
if (n < 0)
virReportSystemError(errno, "%s",
_("error in select call"));
if (n == 0)
virReportSystemError(ETIMEDOUT, "%s",
_("no valid netlink response was received"));
rc = -1;
goto err_exit;
}
*respbuflen = nl_recv(nlhandle, &nladdr, respbuf, NULL);
if (*respbuflen <= 0)
rc = -1;
err_exit:
if (rc == -1) {
VIR_FREE(*respbuf);
*respbuf = NULL;
*respbuflen = 0;
}
nl_handle_destroy(nlhandle);
return rc;
}
# if WITH_MACVTAP
static int
link_add(const char *type,
const unsigned char *macaddress, int macaddrsize,
const char *ifname,
const char *srcdev,
uint32_t macvlan_mode,
int *retry)
{
int rc = 0;
struct nlmsghdr *resp;
struct nlmsgerr *err;
struct ifinfomsg ifinfo = { .ifi_family = AF_UNSPEC };
int ifindex;
unsigned char *recvbuf = NULL;
unsigned int recvbuflen;
struct nl_msg *nl_msg;
struct nlattr *linkinfo, *info_data;
if (ifaceGetIndex(true, srcdev, &ifindex) != 0)
return -1;
*retry = 0;
nl_msg = nlmsg_alloc_simple(RTM_NEWLINK,
NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL);
if (!nl_msg) {
virReportOOMError();
return -1;
}
if (nlmsg_append(nl_msg, &ifinfo, sizeof(ifinfo), NLMSG_ALIGNTO) < 0)
goto buffer_too_small;
if (nla_put_u32(nl_msg, IFLA_LINK, ifindex) < 0)
goto buffer_too_small;
if (nla_put(nl_msg, IFLA_ADDRESS, macaddrsize, macaddress) < 0)
goto buffer_too_small;
if (ifname &&
nla_put(nl_msg, IFLA_IFNAME, strlen(ifname)+1, ifname) < 0)
goto buffer_too_small;
if (!(linkinfo = nla_nest_start(nl_msg, IFLA_LINKINFO)))
goto buffer_too_small;
if (nla_put(nl_msg, IFLA_INFO_KIND, strlen(type), type) < 0)
goto buffer_too_small;
if (macvlan_mode > 0) {
if (!(info_data = nla_nest_start(nl_msg, IFLA_INFO_DATA)))
goto buffer_too_small;
if (nla_put(nl_msg, IFLA_MACVLAN_MODE, sizeof(macvlan_mode),
&macvlan_mode) < 0)
goto buffer_too_small;
nla_nest_end(nl_msg, info_data);
}
nla_nest_end(nl_msg, linkinfo);
if (nlComm(nl_msg, &recvbuf, &recvbuflen, 0) < 0) {
rc = -1;
goto err_exit;
}
if (recvbuflen < NLMSG_LENGTH(0) || recvbuf == NULL)
goto malformed_resp;
resp = (struct nlmsghdr *)recvbuf;
switch (resp->nlmsg_type) {
case NLMSG_ERROR:
err = (struct nlmsgerr *)NLMSG_DATA(resp);
if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
goto malformed_resp;
switch (err->error) {
case 0:
break;
case -EEXIST:
*retry = 1;
rc = -1;
break;
default:
virReportSystemError(-err->error,
_("error creating %s type of interface"),
type);
rc = -1;
}
break;
case NLMSG_DONE:
break;
default:
goto malformed_resp;
}
err_exit:
nlmsg_free(nl_msg);
VIR_FREE(recvbuf);
return rc;
malformed_resp:
nlmsg_free(nl_msg);
macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
_("malformed netlink response message"));
VIR_FREE(recvbuf);
return -1;
buffer_too_small:
nlmsg_free(nl_msg);
macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
_("allocated netlink buffer is too small"));
return -1;
}
static int
link_del(const char *ifname)
{
int rc = 0;
struct nlmsghdr *resp;
struct nlmsgerr *err;
struct ifinfomsg ifinfo = { .ifi_family = AF_UNSPEC };
unsigned char *recvbuf = NULL;
unsigned int recvbuflen;
struct nl_msg *nl_msg;
nl_msg = nlmsg_alloc_simple(RTM_DELLINK,
NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL);
if (!nl_msg) {
virReportOOMError();
return -1;
}
if (nlmsg_append(nl_msg, &ifinfo, sizeof(ifinfo), NLMSG_ALIGNTO) < 0)
goto buffer_too_small;
if (nla_put(nl_msg, IFLA_IFNAME, strlen(ifname)+1, ifname) < 0)
goto buffer_too_small;
if (nlComm(nl_msg, &recvbuf, &recvbuflen, 0) < 0) {
rc = -1;
goto err_exit;
}
if (recvbuflen < NLMSG_LENGTH(0) || recvbuf == NULL)
goto malformed_resp;
resp = (struct nlmsghdr *)recvbuf;
switch (resp->nlmsg_type) {
case NLMSG_ERROR:
err = (struct nlmsgerr *)NLMSG_DATA(resp);
if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
goto malformed_resp;
if (err->error) {
virReportSystemError(-err->error,
_("error destroying %s interface"),
ifname);
rc = -1;
}
break;
case NLMSG_DONE:
break;
default:
goto malformed_resp;
}
err_exit:
nlmsg_free(nl_msg);
VIR_FREE(recvbuf);
return rc;
malformed_resp:
nlmsg_free(nl_msg);
macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
_("malformed netlink response message"));
VIR_FREE(recvbuf);
return -1;
buffer_too_small:
nlmsg_free(nl_msg);
macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
_("allocated netlink buffer is too small"));
return -1;
}
/* Open the macvtap's tap device.
* @ifname: Name of the macvtap interface
* @retries : Number of retries in case udev for example may need to be
* waited for to create the tap chardev
* Returns negative value in case of error, the file descriptor otherwise.
*/
static
int openTap(const char *ifname,
int retries)
{
FILE *file;
char path[64];
int ifindex;
char tapname[50];
int tapfd;
if (snprintf(path, sizeof(path),
"/sys/class/net/%s/ifindex", ifname) >= sizeof(path)) {
virReportSystemError(errno,
"%s",
_("buffer for ifindex path is too small"));
return -1;
}
file = fopen(path, "r");
if (!file) {
virReportSystemError(errno,
_("cannot open macvtap file %s to determine "
"interface index"), path);
return -1;
}
if (fscanf(file, "%d", &ifindex) != 1) {
virReportSystemError(errno,
"%s",_("cannot determine macvtap's tap device "
"interface index"));
VIR_FORCE_FCLOSE(file);
return -1;
}
VIR_FORCE_FCLOSE(file);
if (snprintf(tapname, sizeof(tapname),
"/dev/tap%d", ifindex) >= sizeof(tapname)) {
virReportSystemError(errno,
"%s",
_("internal buffer for tap device is too small"));
return -1;
}
while (1) {
// may need to wait for udev to be done
tapfd = open(tapname, O_RDWR);
if (tapfd < 0 && retries > 0) {
retries--;
usleep(20000);
continue;
}
break;
}
if (tapfd < 0)
virReportSystemError(errno,
_("cannot open macvtap tap device %s"),
tapname);
return tapfd;
}
static uint32_t
macvtapModeFromInt(enum virDomainNetdevMacvtapType mode)
{
switch (mode) {
case VIR_DOMAIN_NETDEV_MACVTAP_MODE_PRIVATE:
return MACVLAN_MODE_PRIVATE;
case VIR_DOMAIN_NETDEV_MACVTAP_MODE_BRIDGE:
return MACVLAN_MODE_BRIDGE;
case VIR_DOMAIN_NETDEV_MACVTAP_MODE_VEPA:
default:
return MACVLAN_MODE_VEPA;
}
}
/**
* configMacvtapTap:
* @tapfd: file descriptor of the macvtap tap
* @vnet_hdr: 1 to enable IFF_VNET_HDR, 0 to disable it
*
* Returns 0 on success, -1 in case of fatal error, error code otherwise.
*
* Turn the IFF_VNET_HDR flag, if requested and available, make sure
* it's off in the other cases.
* A fatal error is defined as the VNET_HDR flag being set but it cannot
* be turned off for some reason. This is reported with -1. Other fatal
* error is not being able to read the interface flags. In that case the
* macvtap device should not be used.
*/
static int
configMacvtapTap(int tapfd, int vnet_hdr)
{
unsigned int features;
struct ifreq ifreq;
short new_flags = 0;
int rc_on_fail = 0;
const char *errmsg = NULL;
memset(&ifreq, 0, sizeof(ifreq));
if (ioctl(tapfd, TUNGETIFF, &ifreq) < 0) {
virReportSystemError(errno, "%s",
_("cannot get interface flags on macvtap tap"));
return -1;
}
new_flags = ifreq.ifr_flags;
if ((ifreq.ifr_flags & IFF_VNET_HDR) && !vnet_hdr) {
new_flags = ifreq.ifr_flags & ~IFF_VNET_HDR;
rc_on_fail = -1;
errmsg = _("cannot clean IFF_VNET_HDR flag on macvtap tap");
} else if ((ifreq.ifr_flags & IFF_VNET_HDR) == 0 && vnet_hdr) {
if (ioctl(tapfd, TUNGETFEATURES, &features) != 0)
return errno;
if ((features & IFF_VNET_HDR)) {
new_flags = ifreq.ifr_flags | IFF_VNET_HDR;
errmsg = _("cannot set IFF_VNET_HDR flag on macvtap tap");
}
}
if (new_flags != ifreq.ifr_flags) {
ifreq.ifr_flags = new_flags;
if (ioctl(tapfd, TUNSETIFF, &ifreq) < 0) {
virReportSystemError(errno, "%s", errmsg);
return rc_on_fail;
}
}
return 0;
}
/**
* openMacvtapTap:
* Create an instance of a macvtap device and open its tap character
* device.
* @tgifname: Interface name that the macvtap is supposed to have. May
* be NULL if this function is supposed to choose a name
* @macaddress: The MAC address for the macvtap device
* @linkdev: The interface name of the NIC to connect to the external bridge
* @mode: int describing the mode for 'bridge', 'vepa' or 'private'.
* @vnet_hdr: 1 to enable IFF_VNET_HDR, 0 to disable it
* @vmuuid: The UUID of the VM the macvtap belongs to
* @virtPortProfile: pointer to object holding the virtual port profile data
* @res_ifname: Pointer to a string pointer where the actual name of the
* interface will be stored into if everything succeeded. It is up
* to the caller to free the string.
*
* Returns file descriptor of the tap device in case of success,
* negative value otherwise with error reported.
*
*/
int
openMacvtapTap(const char *tgifname,
const unsigned char *macaddress,
const char *linkdev,
int mode,
int vnet_hdr,
const unsigned char *vmuuid,
virVirtualPortProfileParamsPtr virtPortProfile,
char **res_ifname,
enum virVMOperationType vmOp)
{
const char *type = "macvtap";
int c, rc;
char ifname[IFNAMSIZ];
int retries, do_retry = 0;
uint32_t macvtapMode = macvtapModeFromInt(mode);
const char *cr_ifname;
int ifindex;
*res_ifname = NULL;
VIR_DEBUG("%s: VM OPERATION: %s", __FUNCTION__, virVMOperationTypeToString(vmOp));
if (tgifname) {
if(ifaceGetIndex(false, tgifname, &ifindex) == 0) {
if (STRPREFIX(tgifname,
MACVTAP_NAME_PREFIX)) {
goto create_name;
}
virReportSystemError(errno,
_("Interface %s already exists"), tgifname);
return -1;
}
cr_ifname = tgifname;
rc = link_add(type, macaddress, 6, tgifname, linkdev,
macvtapMode, &do_retry);
if (rc)
return -1;
} else {
create_name:
retries = 5;
for (c = 0; c < 8192; c++) {
snprintf(ifname, sizeof(ifname), MACVTAP_NAME_PATTERN, c);
if (ifaceGetIndex(false, ifname, &ifindex) == ENODEV) {
rc = link_add(type, macaddress, 6, ifname, linkdev,
macvtapMode, &do_retry);
if (rc == 0)
break;
if (do_retry && --retries)
continue;
return -1;
}
}
cr_ifname = ifname;
}
if (vpAssociatePortProfileId(cr_ifname,
macaddress,
linkdev,
virtPortProfile,
vmuuid, vmOp) != 0) {
rc = -1;
goto link_del_exit;
}
rc = ifaceUp(cr_ifname);
if (rc != 0) {
virReportSystemError(errno,
_("cannot 'up' interface %s -- another "
"macvtap device may be 'up' and have the same "
"MAC address"),
cr_ifname);
rc = -1;
goto disassociate_exit;
}
rc = openTap(cr_ifname, 10);
if (rc >= 0) {
if (configMacvtapTap(rc, vnet_hdr) < 0) {
VIR_FORCE_CLOSE(rc); /* sets rc to -1 */
goto disassociate_exit;
}
*res_ifname = strdup(cr_ifname);
} else
goto disassociate_exit;
return rc;
disassociate_exit:
vpDisassociatePortProfileId(cr_ifname,
macaddress,
linkdev,
virtPortProfile,
vmOp);
link_del_exit:
link_del(cr_ifname);
return rc;
}
/**
* delMacvtap:
* @ifname : The name of the macvtap interface
* @linkdev: The interface name of the NIC to connect to the external bridge
* @virtPortProfile: pointer to object holding the virtual port profile data
*
* Delete an interface given its name. Disassociate
* it with the switch if port profile parameters
* were provided.
*/
void
delMacvtap(const char *ifname,
const unsigned char *macaddr,
const char *linkdev,
virVirtualPortProfileParamsPtr virtPortProfile)
{
if (ifname) {
vpDisassociatePortProfileId(ifname, macaddr,
linkdev,
virtPortProfile,
VIR_VM_OP_DESTROY);
link_del(ifname);
}
}
# endif /* WITH_MACVTAP */
# ifdef IFLA_PORT_MAX
static struct nla_policy ifla_policy[IFLA_MAX + 1] =
{
[IFLA_VF_PORTS] = { .type = NLA_NESTED },
};
static struct nla_policy ifla_port_policy[IFLA_PORT_MAX + 1] =
{
[IFLA_PORT_RESPONSE] = { .type = NLA_U16 },
};
static uint32_t
getLldpadPid(void) {
int fd;
uint32_t pid = 0;
fd = open(LLDPAD_PID_FILE, O_RDONLY);
if (fd >= 0) {
char buffer[10];
if (saferead(fd, buffer, sizeof(buffer)) <= sizeof(buffer)) {
unsigned int res;
char *endptr;
if (virStrToLong_ui(buffer, &endptr, 10, &res) == 0
&& (*endptr == '\0' || c_isspace(*endptr))
&& res != 0) {
pid = res;
} else {
macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
_("error parsing pid of lldpad"));
}
}
} else {
virReportSystemError(errno,
_("Error opening file %s"), LLDPAD_PID_FILE);
}
VIR_FORCE_CLOSE(fd);
return pid;
}
static int
link_dump(bool nltarget_kernel, const char *ifname, int ifindex,
struct nlattr **tb, unsigned char **recvbuf)
{
int rc = 0;
struct nlmsghdr *resp;
struct nlmsgerr *err;
struct ifinfomsg ifinfo = {
.ifi_family = AF_UNSPEC,
.ifi_index = ifindex
};
unsigned int recvbuflen;
uint32_t pid = 0;
struct nl_msg *nl_msg;
*recvbuf = NULL;
nl_msg = nlmsg_alloc_simple(RTM_GETLINK, NLM_F_REQUEST);
if (!nl_msg) {
virReportOOMError();
return -1;
}
if (nlmsg_append(nl_msg, &ifinfo, sizeof(ifinfo), NLMSG_ALIGNTO) < 0)
goto buffer_too_small;
if (ifindex < 0 && ifname) {
if (nla_put(nl_msg, IFLA_IFNAME, strlen(ifname)+1, ifname) < 0)
goto buffer_too_small;
}
if (!nltarget_kernel) {
pid = getLldpadPid();
if (pid == 0) {
rc = -1;
goto err_exit;
}
}
if (nlComm(nl_msg, recvbuf, &recvbuflen, pid) < 0) {
rc = -1;
goto err_exit;
}
if (recvbuflen < NLMSG_LENGTH(0) || *recvbuf == NULL)
goto malformed_resp;
resp = (struct nlmsghdr *)*recvbuf;
switch (resp->nlmsg_type) {
case NLMSG_ERROR:
err = (struct nlmsgerr *)NLMSG_DATA(resp);
if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
goto malformed_resp;
if (err->error) {
virReportSystemError(-err->error,
_("error dumping %s (%d) interface"),
ifname, ifindex);
rc = -1;
}
break;
case GENL_ID_CTRL:
case NLMSG_DONE:
if (nlmsg_parse(resp, sizeof(struct ifinfomsg),
tb, IFLA_MAX, ifla_policy)) {
goto malformed_resp;
}
break;
default:
goto malformed_resp;
}
if (rc != 0)
VIR_FREE(*recvbuf);
err_exit:
nlmsg_free(nl_msg);
return rc;
malformed_resp:
nlmsg_free(nl_msg);
macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
_("malformed netlink response message"));
VIR_FREE(*recvbuf);
return -1;
buffer_too_small:
nlmsg_free(nl_msg);
macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
_("allocated netlink buffer is too small"));
return -1;
}
/**
* ifaceGetNthParent
*
* @ifindex : the index of the interface or -1 if ifname is given
* @ifname : the name of the interface; ignored if ifindex is valid
* @nthParent : the nth parent interface to get
* @parent_ifindex : pointer to int
* @parent_ifname : pointer to buffer of size IFNAMSIZ
* @nth : the nth parent that is actually returned; if for example eth0.100
* was given and the 100th parent is to be returned, then eth0 will
* most likely be returned with nth set to 1 since the chain does
* not have more interfaces
*
* Get the nth parent interface of the given interface. 0 is the interface
* itself.
*
* Return 0 on success, != 0 otherwise
*/
static int
ifaceGetNthParent(int ifindex, const char *ifname, unsigned int nthParent,
int *parent_ifindex, char *parent_ifname,
unsigned int *nth)
{
int rc;
struct nlattr *tb[IFLA_MAX + 1] = { NULL, };
unsigned char *recvbuf = NULL;
bool end = false;
unsigned int i = 0;
*nth = 0;
if (ifindex <= 0 && ifaceGetIndex(true, ifname, &ifindex) != 0)
return 1;
while (!end && i <= nthParent) {
rc = link_dump(true, ifname, ifindex, tb, &recvbuf);
if (rc)
break;
if (tb[IFLA_IFNAME]) {
if (!virStrcpy(parent_ifname, (char*)RTA_DATA(tb[IFLA_IFNAME]),
IFNAMSIZ)) {
macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
_("buffer for root interface name is too small"));
VIR_FREE(recvbuf);
return 1;
}
*parent_ifindex = ifindex;
}
if (tb[IFLA_LINK]) {
ifindex = *(int *)RTA_DATA(tb[IFLA_LINK]);
ifname = NULL;
} else
end = true;
VIR_FREE(recvbuf);
i++;
}
if (nth)
*nth = i - 1;
return rc;
}
/**
* getPortProfileStatus
*
* tb: top level netlink response attributes + values
* vf: The virtual function used in the request
* instanceId: instanceId of the interface (vm uuid in case of 802.1Qbh)
* is8021Qbg: whether this function is call for 8021Qbg
* status: pointer to a uint16 where the status will be written into
*
* Get the status from the IFLA_PORT_RESPONSE field; Returns 0 in
* case of success, != 0 otherwise with error having been reported
*/
static int
getPortProfileStatus(struct nlattr **tb, int32_t vf,
const unsigned char *instanceId,
bool nltarget_kernel,
bool is8021Qbg,
uint16_t *status)
{
int rc = 1;
const char *msg = NULL;
struct nlattr *tb_port[IFLA_PORT_MAX + 1] = { NULL, };
if (vf == PORT_SELF_VF && nltarget_kernel) {
if (tb[IFLA_PORT_SELF]) {
if (nla_parse_nested(tb_port, IFLA_PORT_MAX, tb[IFLA_PORT_SELF],
ifla_port_policy)) {
msg = _("error parsing IFLA_PORT_SELF part");
goto err_exit;
}
} else {
msg = _("IFLA_PORT_SELF is missing");
goto err_exit;
}
} else {
if (tb[IFLA_VF_PORTS]) {
int rem;
bool found = false;
struct nlattr *tb_vf_ports = { NULL, };
nla_for_each_nested(tb_vf_ports, tb[IFLA_VF_PORTS], rem) {
if (nla_type(tb_vf_ports) != IFLA_VF_PORT) {
msg = _("error while iterating over IFLA_VF_PORTS part");
goto err_exit;
}
if (nla_parse_nested(tb_port, IFLA_PORT_MAX, tb_vf_ports,
ifla_port_policy)) {
msg = _("error parsing IFLA_VF_PORT part");
goto err_exit;
}
if (instanceId &&
tb_port[IFLA_PORT_INSTANCE_UUID] &&
!memcmp(instanceId,
(unsigned char *)
RTA_DATA(tb_port[IFLA_PORT_INSTANCE_UUID]),
VIR_UUID_BUFLEN) &&
tb_port[IFLA_PORT_VF] &&
vf == *(uint32_t *)RTA_DATA(tb_port[IFLA_PORT_VF])) {
found = true;
break;
}
}
if (!found) {
msg = _("Could not find netlink response with "
"expected parameters");
goto err_exit;
}
} else {
msg = _("IFLA_VF_PORTS is missing");
goto err_exit;
}
}
if (tb_port[IFLA_PORT_RESPONSE]) {
*status = *(uint16_t *)RTA_DATA(tb_port[IFLA_PORT_RESPONSE]);
rc = 0;
} else {
if (is8021Qbg) {
/* no in-progress here; may be missing */
*status = PORT_PROFILE_RESPONSE_INPROGRESS;
rc = 0;
} else {
msg = _("no IFLA_PORT_RESPONSE found in netlink message");
goto err_exit;
}
}
err_exit:
if (msg)
macvtapError(VIR_ERR_INTERNAL_ERROR, "%s", msg);
return rc;
}
static int
doPortProfileOpSetLink(bool nltarget_kernel,
const char *ifname, int ifindex,
const unsigned char *macaddr,
int vlanid,
const char *profileId,
struct ifla_port_vsi *portVsi,
const unsigned char *instanceId,
const unsigned char *hostUUID,
int32_t vf,
uint8_t op)
{
int rc = 0;
struct nlmsghdr *resp;
struct nlmsgerr *err;
struct ifinfomsg ifinfo = {
.ifi_family = AF_UNSPEC,
.ifi_index = ifindex,
};
unsigned char *recvbuf = NULL;
unsigned int recvbuflen = 0;
uint32_t pid = 0;
struct nl_msg *nl_msg;
struct nlattr *vfports = NULL, *vfport;
nl_msg = nlmsg_alloc_simple(RTM_SETLINK, NLM_F_REQUEST);
if (!nl_msg) {
virReportOOMError();
return -1;
}
if (nlmsg_append(nl_msg, &ifinfo, sizeof(ifinfo), NLMSG_ALIGNTO) < 0)
goto buffer_too_small;
if (ifname &&
nla_put(nl_msg, IFLA_IFNAME, strlen(ifname)+1, ifname) < 0)
goto buffer_too_small;
if (macaddr && vlanid >= 0) {
struct nlattr *vfinfolist, *vfinfo;
struct ifla_vf_mac ifla_vf_mac = {
.vf = vf,
.mac = { 0, },
};
struct ifla_vf_vlan ifla_vf_vlan = {
.vf = vf,
.vlan = vlanid,
.qos = 0,
};
memcpy(ifla_vf_mac.mac, macaddr, 6);
if (!(vfinfolist = nla_nest_start(nl_msg, IFLA_VFINFO_LIST)))
goto buffer_too_small;
if (!(vfinfo = nla_nest_start(nl_msg, IFLA_VF_INFO)))
goto buffer_too_small;
if (!nla_put(nl_msg, IFLA_VF_MAC, sizeof(ifla_vf_mac),
&ifla_vf_mac) < 0)
goto buffer_too_small;
if (!nla_put(nl_msg, IFLA_VF_VLAN, sizeof(ifla_vf_vlan),
&ifla_vf_vlan) < 0)
goto buffer_too_small;
nla_nest_end(nl_msg, vfinfo);
nla_nest_end(nl_msg, vfinfolist);
}
if (vf == PORT_SELF_VF && nltarget_kernel) {
if (!(vfport = nla_nest_start(nl_msg, IFLA_PORT_SELF)))
goto buffer_too_small;
} else {
if (!(vfports = nla_nest_start(nl_msg, IFLA_VF_PORTS)))
goto buffer_too_small;
/* begin nesting vfports */
if (!(vfport = nla_nest_start(nl_msg, IFLA_VF_PORT)))
goto buffer_too_small;
}
if (profileId) {
if (nla_put(nl_msg, IFLA_PORT_PROFILE, strlen(profileId) + 1,
profileId) < 0)
goto buffer_too_small;
}
if (portVsi) {
if (nla_put(nl_msg, IFLA_PORT_VSI_TYPE, sizeof(*portVsi),
portVsi) < 0)
goto buffer_too_small;
}
if (instanceId) {
if (nla_put(nl_msg, IFLA_PORT_INSTANCE_UUID, VIR_UUID_BUFLEN,
instanceId) < 0)
goto buffer_too_small;
}
if (hostUUID) {
if (nla_put(nl_msg, IFLA_PORT_HOST_UUID, VIR_UUID_BUFLEN,
hostUUID) < 0)
goto buffer_too_small;
}
if (vf != PORT_SELF_VF) {
if (nla_put(nl_msg, IFLA_PORT_VF, sizeof(vf), &vf) < 0)
goto buffer_too_small;
}
if (nla_put(nl_msg, IFLA_PORT_REQUEST, sizeof(op), &op) < 0)
goto buffer_too_small;
/* end nesting of vport */
nla_nest_end(nl_msg, vfport);
if (vfports) {
/* end nesting of vfports */
nla_nest_end(nl_msg, vfports);
}
if (!nltarget_kernel) {
pid = getLldpadPid();
if (pid == 0) {
rc = -1;
goto err_exit;
}
}
if (nlComm(nl_msg, &recvbuf, &recvbuflen, pid) < 0) {
rc = -1;
goto err_exit;
}
if (recvbuflen < NLMSG_LENGTH(0) || recvbuf == NULL)
goto malformed_resp;
resp = (struct nlmsghdr *)recvbuf;
switch (resp->nlmsg_type) {
case NLMSG_ERROR:
err = (struct nlmsgerr *)NLMSG_DATA(resp);
if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
goto malformed_resp;
if (err->error) {
virReportSystemError(-err->error,
_("error during virtual port configuration of ifindex %d"),
ifindex);
rc = -1;
}
break;
case NLMSG_DONE:
break;
default:
goto malformed_resp;
}
err_exit:
nlmsg_free(nl_msg);
VIR_FREE(recvbuf);
return rc;
malformed_resp:
nlmsg_free(nl_msg);
macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
_("malformed netlink response message"));
VIR_FREE(recvbuf);
return -1;
buffer_too_small:
nlmsg_free(nl_msg);
macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
_("allocated netlink buffer is too small"));
return -1;
}
static int
doPortProfileOpCommon(bool nltarget_kernel,
const char *ifname, int ifindex,
const unsigned char *macaddr,
int vlanid,
const char *profileId,
struct ifla_port_vsi *portVsi,
const unsigned char *instanceId,
const unsigned char *hostUUID,
int32_t vf,
uint8_t op)
{
int rc;
unsigned char *recvbuf = NULL;
struct nlattr *tb[IFLA_MAX + 1] = { NULL , };
int repeats = STATUS_POLL_TIMEOUT_USEC / STATUS_POLL_INTERVL_USEC;
uint16_t status = 0;
bool is8021Qbg = (profileId == NULL);
rc = doPortProfileOpSetLink(nltarget_kernel,
ifname, ifindex,
macaddr,
vlanid,
profileId,
portVsi,
instanceId,
hostUUID,
vf,
op);
if (rc) {
macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
_("sending of PortProfileRequest failed."));
return rc;
}
while (--repeats >= 0) {
rc = link_dump(nltarget_kernel, NULL, ifindex, tb, &recvbuf);
if (rc)
goto err_exit;
rc = getPortProfileStatus(tb, vf, instanceId, nltarget_kernel,
is8021Qbg, &status);
if (rc)
goto err_exit;
if (status == PORT_PROFILE_RESPONSE_SUCCESS ||
status == PORT_VDP_RESPONSE_SUCCESS) {
break;
} else if (status == PORT_PROFILE_RESPONSE_INPROGRESS) {
// keep trying...
} else {
virReportSystemError(EINVAL,
_("error %d during port-profile setlink on "
"interface %s (%d)"),
status, ifname, ifindex);
rc = 1;
break;
}
usleep(STATUS_POLL_INTERVL_USEC);
VIR_FREE(recvbuf);
}
if (status == PORT_PROFILE_RESPONSE_INPROGRESS) {
macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
_("port-profile setlink timed out"));
rc = -ETIMEDOUT;
}
err_exit:
VIR_FREE(recvbuf);
return rc;
}
# endif /* IFLA_PORT_MAX */
# ifdef IFLA_VF_PORT_MAX
static int
getPhysdevAndVlan(const char *ifname, int *root_ifindex, char *root_ifname,
int *vlanid)
{
int ret;
unsigned int nth;
int ifindex = -1;
*vlanid = -1;
while (1) {
if ((ret = ifaceGetNthParent(ifindex, ifname, 1,
root_ifindex, root_ifname, &nth)))
return ret;
if (nth == 0)
break;
if (*vlanid == -1) {
if (ifaceGetVlanID(root_ifname, vlanid))
*vlanid = -1;
}
ifindex = *root_ifindex;
ifname = NULL;
}
return 0;
}
# endif
static int
doPortProfileOp8021Qbg(const char *ifname,
const unsigned char *macaddr,
const virVirtualPortProfileParamsPtr virtPort,
enum virVirtualPortOp virtPortOp)
{
int rc;
# ifndef IFLA_VF_PORT_MAX
(void)ifname;
(void)macaddr;
(void)virtPort;
(void)virtPortOp;
macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Kernel VF Port support was missing at compile time."));
rc = 1;
# else /* IFLA_VF_PORT_MAX */
int op = PORT_REQUEST_ASSOCIATE;
struct ifla_port_vsi portVsi = {
.vsi_mgr_id = virtPort->u.virtPort8021Qbg.managerID,
.vsi_type_version = virtPort->u.virtPort8021Qbg.typeIDVersion,
};
bool nltarget_kernel = false;
int vlanid;
int physdev_ifindex = 0;
char physdev_ifname[IFNAMSIZ] = { 0, };
int vf = PORT_SELF_VF;
if (getPhysdevAndVlan(ifname, &physdev_ifindex, physdev_ifname,
&vlanid) != 0) {
rc = 1;
goto err_exit;
}
if (vlanid < 0)
vlanid = 0;
portVsi.vsi_type_id[2] = virtPort->u.virtPort8021Qbg.typeID >> 16;
portVsi.vsi_type_id[1] = virtPort->u.virtPort8021Qbg.typeID >> 8;
portVsi.vsi_type_id[0] = virtPort->u.virtPort8021Qbg.typeID;
switch (virtPortOp) {
case PREASSOCIATE:
op = PORT_REQUEST_PREASSOCIATE;
break;
case ASSOCIATE:
op = PORT_REQUEST_ASSOCIATE;
break;
case DISASSOCIATE:
op = PORT_REQUEST_DISASSOCIATE;
break;
default:
macvtapError(VIR_ERR_INTERNAL_ERROR,
_("operation type %d not supported"), op);
rc = 1;
goto err_exit;
}
rc = doPortProfileOpCommon(nltarget_kernel,
physdev_ifname, physdev_ifindex,
macaddr,
vlanid,
NULL,
&portVsi,
virtPort->u.virtPort8021Qbg.instanceID,
NULL,
vf,
op);
err_exit:
# endif /* IFLA_VF_PORT_MAX */
return rc;
}
# ifdef IFLA_VF_PORT_MAX
static int
getPhysfn(const char *linkdev,
int32_t *vf,
char **physfndev)
{
int rc = 0;
bool virtfn = false;
if (virtfn) {
// XXX: if linkdev is SR-IOV VF, then set vf = VF index
// XXX: and set linkdev = PF device
// XXX: need to use get_physical_function_linux() or
// XXX: something like that to get PF
// XXX: device and figure out VF index
rc = 1;
} else {
/* Not SR-IOV VF: physfndev is linkdev and VF index
* refers to linkdev self
*/
*vf = PORT_SELF_VF;
*physfndev = (char *)linkdev;
}
return rc;
}
# endif /* IFLA_VF_PORT_MAX */
static int
doPortProfileOp8021Qbh(const char *ifname,
const virVirtualPortProfileParamsPtr virtPort,
const unsigned char *vm_uuid,
enum virVirtualPortOp virtPortOp)
{
int rc;
# ifndef IFLA_VF_PORT_MAX
(void)ifname;
(void)virtPort;
(void)vm_uuid;
(void)virtPortOp;
macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Kernel VF Port support was missing at compile time."));
rc = 1;
# else /* IFLA_VF_PORT_MAX */
char *physfndev;
unsigned char hostuuid[VIR_UUID_BUFLEN];
int32_t vf;
bool nltarget_kernel = true;
int ifindex;
int vlanid = -1;
const unsigned char *macaddr = NULL;
rc = getPhysfn(ifname, &vf, &physfndev);
if (rc)
goto err_exit;
if (ifaceGetIndex(true, physfndev, &ifindex) != 0) {
rc = 1;
goto err_exit;
}
switch (virtPortOp) {
case ASSOCIATE:
rc = virGetHostUUID(hostuuid);
if (rc)
goto err_exit;
rc = doPortProfileOpCommon(nltarget_kernel, NULL, ifindex,
macaddr,
vlanid,
virtPort->u.virtPort8021Qbh.profileID,
NULL,
vm_uuid,
hostuuid,
vf,
PORT_REQUEST_ASSOCIATE);
if (rc == -ETIMEDOUT)
/* Association timed out, disassociate */
doPortProfileOpCommon(nltarget_kernel, NULL, ifindex,
NULL,
0,
NULL,
NULL,
NULL,
NULL,
vf,
PORT_REQUEST_DISASSOCIATE);
if (!rc)
ifaceUp(ifname);
break;
case DISASSOCIATE:
rc = doPortProfileOpCommon(nltarget_kernel, NULL, ifindex,
NULL,
0,
NULL,
NULL,
NULL,
NULL,
vf,
PORT_REQUEST_DISASSOCIATE);
ifaceDown(ifname);
break;
default:
macvtapError(VIR_ERR_INTERNAL_ERROR,
_("operation type %d not supported"), virtPortOp);
rc = 1;
}
err_exit:
# endif /* IFLA_VF_PORT_MAX */
return rc;
}
/**
* vpAssociatePortProfile
*
* @macvtap_ifname: The name of the macvtap device
* @virtPort: pointer to the object holding port profile parameters
* @vmuuid : the UUID of the virtual machine
* @vmOp : The VM operation (i.e., create, no-op)
*
* Associate a port on a swtich with a profile. This function
* may notify a kernel driver or an external daemon to run
* the setup protocol. If profile parameters were not supplied
* by the user, then this function returns without doing
* anything.
*
* Returns 0 in case of success, != 0 otherwise with error
* having been reported.
*/
int
vpAssociatePortProfileId(const char *macvtap_ifname,
const unsigned char *macvtap_macaddr,
const char *linkdev,
const virVirtualPortProfileParamsPtr virtPort,
const unsigned char *vmuuid,
enum virVMOperationType vmOp)
{
int rc = 0;
VIR_DEBUG("Associating port profile '%p' on link device '%s'",
virtPort, macvtap_ifname);
VIR_DEBUG("%s: VM OPERATION: %s", __FUNCTION__, virVMOperationTypeToString(vmOp));
if (vmOp == VIR_VM_OP_NO_OP)
return 0;
switch (virtPort->virtPortType) {
case VIR_VIRTUALPORT_NONE:
case VIR_VIRTUALPORT_TYPE_LAST:
break;
case VIR_VIRTUALPORT_8021QBG:
rc = doPortProfileOp8021Qbg(macvtap_ifname, macvtap_macaddr,
virtPort,
(vmOp == VIR_VM_OP_MIGRATE_IN_START)
? PREASSOCIATE
: ASSOCIATE);
break;
case VIR_VIRTUALPORT_8021QBH:
/* avoid associating twice */
if (vmOp == VIR_VM_OP_MIGRATE_IN_FINISH)
break;
rc = doPortProfileOp8021Qbh(linkdev, virtPort,
vmuuid,
ASSOCIATE);
break;
}
return rc;
}
/**
* vpDisassociatePortProfile
*
* @macvtap_ifname: The name of the macvtap device
* @macvtap_macaddr : The MAC address of the macvtap
* @linkdev: The link device in case of macvtap
* @virtPort: point to object holding port profile parameters
*
* Returns 0 in case of success, != 0 otherwise with error
* having been reported.
*/
int
vpDisassociatePortProfileId(const char *macvtap_ifname,
const unsigned char *macvtap_macaddr,
const char *linkdev,
const virVirtualPortProfileParamsPtr virtPort,
enum virVMOperationType vmOp)
{
int rc = 0;
VIR_DEBUG("Disassociating port profile id '%p' on link device '%s' ",
virtPort, macvtap_ifname);
VIR_DEBUG("%s: VM OPERATION: %s", __FUNCTION__, virVMOperationTypeToString(vmOp));
switch (virtPort->virtPortType) {
case VIR_VIRTUALPORT_NONE:
case VIR_VIRTUALPORT_TYPE_LAST:
break;
case VIR_VIRTUALPORT_8021QBG:
rc = doPortProfileOp8021Qbg(macvtap_ifname, macvtap_macaddr,
virtPort, DISASSOCIATE);
break;
case VIR_VIRTUALPORT_8021QBH:
/* avoid disassociating twice */
if (vmOp == VIR_VM_OP_MIGRATE_IN_FINISH)
break;
rc = doPortProfileOp8021Qbh(linkdev, virtPort,
NULL,
DISASSOCIATE);
break;
}
return rc;
}
#endif /* WITH_MACVTAP || WITH_VIRTUALPORT */
VIR_ENUM_IMPL(virVMOperation, VIR_VM_OP_LAST,
"create",
"save",
"restore",
"destroy",
"migrate out",
"migrate in start",
"migrate in finish",
"no-op")