util: fix libvirtd startup failure due to netlink error

This is part of the solution to the problem detailed in:

  https://bugzilla.redhat.com/show_bug.cgi?id=816465

and further detailed in

  https://www.redhat.com/archives/libvir-list/2012-May/msg00202.htm

A short explanation is included in the comments of the patch itself.

Note that this patch by itself breaks communication between lldpad and
libvirtd, so the other 3 patches in the series must be applied at the
same time as this patch.
This commit is contained in:
Laine Stump 2012-05-03 10:39:04 -04:00
parent f300c194fd
commit 642973135c
4 changed files with 77 additions and 1 deletions

View File

@ -1007,6 +1007,11 @@ int main(int argc, char **argv) {
goto cleanup;
}
if (virNetlinkStartup() < 0) {
ret = VIR_DAEMON_ERR_INIT;
goto cleanup;
}
if (!(srv = virNetServerNew(config->min_workers,
config->max_workers,
config->prio_workers,
@ -1143,6 +1148,7 @@ cleanup:
virNetServerProgramFree(qemuProgram);
virNetServerClose(srv);
virNetServerFree(srv);
virNetlinkShutdown();
if (statuswrite != -1) {
if (ret != 0) {
/* Tell parent of daemon what failed */

View File

@ -1335,6 +1335,8 @@ virNetlinkEventRemoveClient;
virNetlinkEventServiceIsRunning;
virNetlinkEventServiceStop;
virNetlinkEventServiceStart;
virNetlinkShutdown;
virNetlinkStartup;
# virnetmessage.h

View File

@ -98,9 +98,62 @@ static int nextWatch = 1;
# define NETLINK_EVENT_ALLOC_EXTENT 10
static virNetlinkEventSrvPrivatePtr server = NULL;
static struct nl_handle *placeholder_nlhandle = NULL;
/* Function definitions */
/**
* virNetlinkStartup:
*
* Perform any initialization that needs to take place before the
* program starts up worker threads. This is currently used to assure
* that an nl_handle is allocated prior to any attempts to bind a
* netlink socket. For a discussion of why this is necessary, please
* see the following email message:
*
* https://www.redhat.com/archives/libvir-list/2012-May/msg00202.html
*
* The short version is that, without this placeholder allocation of
* an nl_handle that is never used, it is possible for nl_connect() in
* one thread to collide with a direct bind() of a netlink socket in
* another thread, leading to failure of the operation (which could
* lead to failure of libvirtd to start). Since getaddrinfo() (used by
* libvirtd in virSocketAddrParse, which is called quite frequently
* during startup) directly calls bind() on a netlink socket, this is
* actually a very common occurrence (15-20% failure rate on some
* hardware).
*
* Returns 0 on success, -1 on failure.
*/
int
virNetlinkStartup(void)
{
if (placeholder_nlhandle)
return 0;
placeholder_nlhandle = nl_handle_alloc();
if (!placeholder_nlhandle) {
virReportSystemError(errno, "%s",
_("cannot allocate placeholder nlhandle for netlink"));
return -1;
}
return 0;
}
/**
* virNetlinkShutdown:
*
* Undo any initialization done by virNetlinkStartup. This currently
* destroys the placeholder nl_handle.
*/
void
virNetlinkShutdown(void)
{
if (placeholder_nlhandle) {
nl_handle_destroy(placeholder_nlhandle);
placeholder_nlhandle = NULL;
}
}
/**
* virNetlinkCommand:
* @nlmsg: pointer to netlink message
@ -546,6 +599,18 @@ static const char *unsupported = N_("libnl was not available at build time");
static const char *unsupported = N_("not supported on non-linux platforms");
# endif
int
virNetlinkStartup(void)
{
return 0;
}
void
virNetlinkShutdown(void)
{
return;
}
int virNetlinkCommand(struct nl_msg *nl_msg ATTRIBUTE_UNUSED,
unsigned char **respbuf ATTRIBUTE_UNUSED,
unsigned int *respbuflen ATTRIBUTE_UNUSED,

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2010-2011 Red Hat, Inc.
* Copyright (C) 2010-2012 Red Hat, Inc.
* Copyright (C) 2010-2012 IBM Corporation
*
* This library is free software; you can redistribute it and/or
@ -35,6 +35,9 @@ struct nlattr;
# endif /* __linux__ */
int virNetlinkStartup(void);
void virNetlinkShutdown(void);
int virNetlinkCommand(struct nl_msg *nl_msg,
unsigned char **respbuf, unsigned int *respbuflen,
int nl_pid);