From 3cc52164b1a406b53b952f8e161ff8d7cb3d0574 Mon Sep 17 00:00:00 2001 From: Laine Stump Date: Thu, 3 May 2012 10:39:04 -0400 Subject: [PATCH] util: fix libvirtd startup failure due to netlink error This is part of the solution to the problem detailed in: https://bugzilla.redhat.com/show_bug.cgi?id=816465 and further detailed in https://www.redhat.com/archives/libvir-list/2012-May/msg00202.htm A short explanation is included in the comments of the patch itself. Note that this patch by itself breaks communication between lldpad and libvirtd, so the other 3 patches in the series must be applied at the same time as this patch. (cherry picked from commit 642973135c54b93242c4548ef27d591b52b0994c) Conflicts: daemon/libvirtd.c --- daemon/libvirtd.c | 7 +++++ src/libvirt_private.syms | 2 ++ src/util/virnetlink.c | 65 ++++++++++++++++++++++++++++++++++++++++ src/util/virnetlink.h | 5 +++- 4 files changed, 78 insertions(+), 1 deletion(-) diff --git a/daemon/libvirtd.c b/daemon/libvirtd.c index 1ffbfbbd33..19729f478d 100644 --- a/daemon/libvirtd.c +++ b/daemon/libvirtd.c @@ -1483,6 +1483,12 @@ int main(int argc, char **argv) { use_polkit_dbus = config->auth_unix_rw == REMOTE_AUTH_POLKIT || config->auth_unix_ro == REMOTE_AUTH_POLKIT; + + if (virNetlinkStartup() < 0) { + ret = VIR_DAEMON_ERR_INIT; + goto cleanup; + } + if (!(srv = virNetServerNew(config->min_workers, config->max_workers, config->prio_workers, @@ -1620,6 +1626,7 @@ cleanup: virNetServerProgramFree(qemuProgram); virNetServerClose(srv); virNetServerFree(srv); + virNetlinkShutdown(); if (statuswrite != -1) { if (ret != 0) { /* Tell parent of daemon what failed */ diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index e3020e0e0e..6fcdd874a9 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1327,6 +1327,8 @@ virNetlinkEventRemoveClient; virNetlinkEventServiceIsRunning; virNetlinkEventServiceStop; virNetlinkEventServiceStart; +virNetlinkShutdown; +virNetlinkStartup; # virnetmessage.h diff --git a/src/util/virnetlink.c b/src/util/virnetlink.c index a5e6ca9724..3577ecc9da 100644 --- a/src/util/virnetlink.c +++ b/src/util/virnetlink.c @@ -88,9 +88,62 @@ static int nextWatch = 1; # define NETLINK_EVENT_ALLOC_EXTENT 10 static virNetlinkEventSrvPrivatePtr server = NULL; +static struct nl_handle *placeholder_nlhandle = NULL; /* Function definitions */ +/** + * virNetlinkStartup: + * + * Perform any initialization that needs to take place before the + * program starts up worker threads. This is currently used to assure + * that an nl_handle is allocated prior to any attempts to bind a + * netlink socket. For a discussion of why this is necessary, please + * see the following email message: + * + * https://www.redhat.com/archives/libvir-list/2012-May/msg00202.html + * + * The short version is that, without this placeholder allocation of + * an nl_handle that is never used, it is possible for nl_connect() in + * one thread to collide with a direct bind() of a netlink socket in + * another thread, leading to failure of the operation (which could + * lead to failure of libvirtd to start). Since getaddrinfo() (used by + * libvirtd in virSocketAddrParse, which is called quite frequently + * during startup) directly calls bind() on a netlink socket, this is + * actually a very common occurrence (15-20% failure rate on some + * hardware). + * + * Returns 0 on success, -1 on failure. + */ +int +virNetlinkStartup(void) +{ + if (placeholder_nlhandle) + return 0; + placeholder_nlhandle = nl_handle_alloc(); + if (!placeholder_nlhandle) { + virReportSystemError(errno, "%s", + _("cannot allocate placeholder nlhandle for netlink")); + return -1; + } + return 0; +} + +/** + * virNetlinkShutdown: + * + * Undo any initialization done by virNetlinkStartup. This currently + * destroys the placeholder nl_handle. + */ +void +virNetlinkShutdown(void) +{ + if (placeholder_nlhandle) { + nl_handle_destroy(placeholder_nlhandle); + placeholder_nlhandle = NULL; + } +} + /** * virNetlinkCommand: * @nlmsg: pointer to netlink message @@ -536,6 +589,18 @@ static const char *unsupported = N_("libnl was not available at build time"); static const char *unsupported = N_("not supported on non-linux platforms"); # endif +int +virNetlinkStartup(void) +{ + return 0; +} + +void +virNetlinkShutdown(void) +{ + return; +} + int virNetlinkCommand(struct nl_msg *nl_msg ATTRIBUTE_UNUSED, unsigned char **respbuf ATTRIBUTE_UNUSED, unsigned int *respbuflen ATTRIBUTE_UNUSED, diff --git a/src/util/virnetlink.h b/src/util/virnetlink.h index a72612e4a8..93df59ad82 100644 --- a/src/util/virnetlink.h +++ b/src/util/virnetlink.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2011 Red Hat, Inc. + * Copyright (C) 2010-2012 Red Hat, Inc. * Copyright (C) 2010-2012 IBM Corporation * * This library is free software; you can redistribute it and/or @@ -35,6 +35,9 @@ struct nlattr; # endif /* __linux__ */ +int virNetlinkStartup(void); +void virNetlinkShutdown(void); + int virNetlinkCommand(struct nl_msg *nl_msg, unsigned char **respbuf, unsigned int *respbuflen, int nl_pid);