network: wait for DAD to finish for bridge IPv6 addresses

commit db488c79 assumed that dnsmasq would complete IPv6 DAD before
daemonizing, but in reality it doesn't wait, which creates problems
when libvirt's bridge driver sets the matching "dummy tap device" to
IFF_DOWN prior to DAD completing.

This patch waits for DAD completion by periodically polling the kernel
using netlink to check whether there are any IPv6 addresses assigned
to bridge which have a 'tentative' state (if there are any in this
state, then DAD hasn't yet finished). After DAD is finished, execution
continues. To avoid an endless hang in case something was wrong with
the kernel's DAD, we wait a maximum of 5 seconds.
This commit is contained in:
Maxim Perevedentsev 2015-10-20 18:44:18 +03:00 committed by Laine Stump
parent 131e7245a8
commit 0f7436ca54
4 changed files with 147 additions and 2 deletions

View File

@ -1792,6 +1792,7 @@ virNetDevSetRcvMulti;
virNetDevSetupControl;
virNetDevSysfsFile;
virNetDevValidateConfig;
virNetDevWaitDadFinish;
# util/virnetdevbandwidth.h

View File

@ -2025,6 +2025,32 @@ networkAddRouteToBridge(virNetworkObjPtr network,
return 0;
}
static int
networkWaitDadFinish(virNetworkObjPtr network)
{
virNetworkIpDefPtr ipdef;
virSocketAddrPtr *addrs = NULL, addr = NULL;
size_t naddrs = 0;
int ret = -1;
VIR_DEBUG("Begin waiting for IPv6 DAD on network %s", network->def->name);
while ((ipdef = virNetworkDefGetIpByIndex(network->def,
AF_INET6, naddrs))) {
addr = &ipdef->address;
if (VIR_APPEND_ELEMENT_COPY(addrs, naddrs, addr) < 0)
goto cleanup;
}
ret = (naddrs == 0) ? 0 : virNetDevWaitDadFinish(addrs, naddrs);
cleanup:
VIR_FREE(addrs);
VIR_DEBUG("Finished waiting for IPv6 DAD on network %s with status %d",
network->def->name, ret);
return ret;
}
static int
networkStartNetworkVirtual(virNetworkDriverStatePtr driver,
virNetworkObjPtr network)
@ -2159,8 +2185,14 @@ networkStartNetworkVirtual(virNetworkDriverStatePtr driver,
if (v6present && networkStartRadvd(driver, network) < 0)
goto err4;
/* DAD has happened (dnsmasq waits for it), dnsmasq is now bound to the
* bridge's IPv6 address, so we can now set the dummy tun down.
/* dnsmasq does not wait for DAD to complete before daemonizing,
* so we need to wait for it ourselves.
*/
if (v6present && networkWaitDadFinish(network) < 0)
goto err4;
/* DAD has finished, dnsmasq is now bound to the
* bridge's IPv6 address, so we can set the dummy tun down.
*/
if (tapfd >= 0) {
if (virNetDevSetOnline(macTapIfName, false) < 0)

View File

@ -96,6 +96,7 @@ VIR_LOG_INIT("util.netdev");
# define FEATURE_BIT_IS_SET(blocks, index, field) \
(FEATURE_WORD(blocks, index, field) & FEATURE_FIELD_FLAG(index))
#endif
#define VIR_DAD_WAIT_TIMEOUT 5 /* seconds */
typedef enum {
VIR_MCAST_TYPE_INDEX_TOKEN,
@ -1305,6 +1306,106 @@ int virNetDevClearIPAddress(const char *ifname,
return ret;
}
/* return true if there is a known address with 'tentative' flag set */
static bool
virNetDevParseDadStatus(struct nlmsghdr *nlh, int len,
virSocketAddrPtr *addrs, size_t count)
{
struct ifaddrmsg *ifaddrmsg_ptr;
unsigned int ifaddrmsg_len;
struct rtattr *rtattr_ptr;
size_t i;
struct in6_addr *addr;
for (; NLMSG_OK(nlh, len); nlh = NLMSG_NEXT(nlh, len)) {
if (NLMSG_PAYLOAD(nlh, 0) < sizeof(struct ifaddrmsg)) {
/* Message without payload is the last one. */
break;
}
ifaddrmsg_ptr = (struct ifaddrmsg *)NLMSG_DATA(nlh);
if (!(ifaddrmsg_ptr->ifa_flags & IFA_F_TENTATIVE)) {
/* Not tentative: we are not interested in this entry. */
continue;
}
ifaddrmsg_len = IFA_PAYLOAD(nlh);
rtattr_ptr = (struct rtattr *) IFA_RTA(ifaddrmsg_ptr);
for (; RTA_OK(rtattr_ptr, ifaddrmsg_len);
rtattr_ptr = RTA_NEXT(rtattr_ptr, ifaddrmsg_len)) {
if (RTA_PAYLOAD(rtattr_ptr) != sizeof(struct in6_addr)) {
/* No address: ignore. */
continue;
}
/* We check only known addresses. */
for (i = 0; i < count; i++) {
addr = &addrs[i]->data.inet6.sin6_addr;
if (!memcmp(addr, RTA_DATA(rtattr_ptr),
sizeof(struct in6_addr))) {
/* We found matching tentative address. */
return true;
}
}
}
}
return false;
}
/* return after DAD finishes for all known IPv6 addresses or an error */
int
virNetDevWaitDadFinish(virSocketAddrPtr *addrs, size_t count)
{
struct nl_msg *nlmsg = NULL;
struct ifaddrmsg ifa;
struct nlmsghdr *resp = NULL;
unsigned int recvbuflen;
int ret = -1;
bool dad = true;
time_t max_time = time(NULL) + VIR_DAD_WAIT_TIMEOUT;
if (!(nlmsg = nlmsg_alloc_simple(RTM_GETADDR,
NLM_F_REQUEST | NLM_F_DUMP))) {
virReportOOMError();
return -1;
}
memset(&ifa, 0, sizeof(ifa));
/* DAD is for IPv6 adresses only. */
ifa.ifa_family = AF_INET6;
if (nlmsg_append(nlmsg, &ifa, sizeof(ifa), NLMSG_ALIGNTO) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("allocated netlink buffer is too small"));
goto cleanup;
}
/* Periodically query netlink until DAD finishes on all known addresses. */
while (dad && time(NULL) < max_time) {
if (virNetlinkCommand(nlmsg, &resp, &recvbuflen, 0, 0,
NETLINK_ROUTE, 0) < 0)
goto cleanup;
if (virNetlinkGetErrorCode(resp, recvbuflen) < 0) {
virReportError(VIR_ERR_SYSTEM_ERROR, "%s",
_("error reading DAD state information"));
goto cleanup;
}
/* Parse response. */
dad = virNetDevParseDadStatus(resp, recvbuflen, addrs, count);
if (dad)
usleep(1000 * 10);
VIR_FREE(resp);
}
/* Check timeout. */
ret = dad ? -1 : 0;
cleanup:
VIR_FREE(resp);
nlmsg_free(nlmsg);
return ret;
}
#else /* defined(__linux__) && defined(HAVE_LIBNL) */
int virNetDevSetIPAddress(const char *ifname,
@ -1424,6 +1525,15 @@ int virNetDevClearIPAddress(const char *ifname,
return ret;
}
/* return after DAD finishes for all known IPv6 addresses or an error */
int
virNetDevWaitDadFinish(virSocketAddrPtr *addrs, size_t count)
{
virReportSystemError(ENOSYS, "%s",
_("Unable to wait for IPv6 DAD on this platform"));
return -1;
}
#endif /* defined(__linux__) && defined(HAVE_LIBNL) */
/**

View File

@ -105,6 +105,8 @@ int virNetDevClearIPAddress(const char *ifname,
ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK;
int virNetDevGetIPAddress(const char *ifname, virSocketAddrPtr addr)
ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK;
int virNetDevWaitDadFinish(virSocketAddrPtr *addrs, size_t count)
ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
int virNetDevSetMAC(const char *ifname,