From 30fb2276d88b275dc2aad6ddd28c100d944b59a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Wed, 14 Mar 2018 12:16:11 +0000 Subject: [PATCH] qemu: support passing pre-opened UNIX socket listen FD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a race condition when spawning QEMU where libvirt has spawned QEMU but the monitor socket is not yet open. Libvirt has to repeatedly try to connect() to QEMU's monitor until eventually it succeeds, or times out. We use kill() to check if QEMU is still alive so we avoid waiting a long time if QEMU exited, but having a timeout at all is still unpleasant. With QEMU 2.12 we can pass in a pre-opened FD for UNIX domain or TCP sockets. If libvirt has called bind() and listen() on this FD, then we have a guarantee that libvirt can immediately call connect() and succeed without any race. Although we only really care about this for the monitor socket and agent socket, this patch does FD passing for all UNIX socket based character devices since there appears to be no downside to it. We don't do FD passing for TCP sockets, however, because it is only possible to pass a single FD, while some hostnames may require listening on multiple FDs to cover IPv4 and IPv6 concurrently. Reviewed-by: John Ferlan Signed-off-by: Daniel P. Berrangé --- src/qemu/qemu_command.c | 64 ++++++++++++++++++- src/qemu/qemu_command.h | 4 ++ .../disk-drive-write-cache.x86_64-latest.args | 3 +- ...irtio-scsi-reservations.x86_64-latest.args | 3 +- .../genid-auto.x86_64-latest.args | 3 +- .../qemuxml2argvdata/genid.x86_64-latest.args | 3 +- .../vhost-vsock-auto.x86_64-latest.args | 3 +- .../vhost-vsock.x86_64-latest.args | 3 +- tests/qemuxml2argvmock.c | 19 ++++++ 9 files changed, 91 insertions(+), 14 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 2f5cf4e70e..e85c5ef804 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -4890,6 +4890,56 @@ qemuBuildChrChardevReconnectStr(virBufferPtr buf, } +int +qemuOpenChrChardevUNIXSocket(const virDomainChrSourceDef *dev) +{ + struct sockaddr_un addr; + socklen_t addrlen = sizeof(addr); + int fd; + + if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) { + virReportSystemError(errno, "%s", + _("Unable to create UNIX socket")); + goto error; + } + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + if (virStrcpyStatic(addr.sun_path, dev->data.nix.path) == NULL) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("UNIX socket path '%s' too long"), + dev->data.nix.path); + goto error; + } + + if (unlink(dev->data.nix.path) < 0 && errno != ENOENT) { + virReportSystemError(errno, + _("Unable to unlink %s"), + dev->data.nix.path); + goto error; + } + + if (bind(fd, (struct sockaddr *)&addr, addrlen) < 0) { + virReportSystemError(errno, + _("Unable to bind to UNIX socket path '%s'"), + dev->data.nix.path); + goto error; + } + + if (listen(fd, 1) < 0) { + virReportSystemError(errno, + _("Unable to listen to UNIX socket path '%s'"), + dev->data.nix.path); + goto error; + } + + return fd; + + error: + VIR_FORCE_CLOSE(fd); + return -1; +} + /* This function outputs a -chardev command line option which describes only the * host side of the character device */ static char * @@ -5026,8 +5076,18 @@ qemuBuildChrChardevStr(virLogManagerPtr logManager, break; case VIR_DOMAIN_CHR_TYPE_UNIX: - virBufferAsprintf(&buf, "socket,id=%s,path=", charAlias); - virQEMUBuildBufferEscapeComma(&buf, dev->data.nix.path); + if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_CHARDEV_FD_PASS)) { + int fd = qemuOpenChrChardevUNIXSocket(dev); + if (fd < 0) + goto cleanup; + + virBufferAsprintf(&buf, "socket,id=%s,fd=%d", charAlias, fd); + + virCommandPassFD(cmd, fd, VIR_COMMAND_PASS_FD_CLOSE_PARENT); + } else { + virBufferAsprintf(&buf, "socket,id=%s,path=", charAlias); + virQEMUBuildBufferEscapeComma(&buf, dev->data.nix.path); + } if (dev->data.nix.listen) virBufferAdd(&buf, nowait ? ",server,nowait" : ",server", -1); diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h index aeb13ce18a..da75645ac5 100644 --- a/src/qemu/qemu_command.h +++ b/src/qemu/qemu_command.h @@ -72,6 +72,10 @@ int qemuBuildTLSx509BackendProps(const char *tlspath, virQEMUCapsPtr qemuCaps, virJSONValuePtr *propsret); +/* Open a UNIX socket for chardev FD passing */ +int +qemuOpenChrChardevUNIXSocket(const virDomainChrSourceDef *dev); + /* Generate '-device' string for chardev device */ int qemuBuildChrDeviceStr(char **deviceStr, diff --git a/tests/qemuxml2argvdata/disk-drive-write-cache.x86_64-latest.args b/tests/qemuxml2argvdata/disk-drive-write-cache.x86_64-latest.args index a63c5b7477..9e5b611351 100644 --- a/tests/qemuxml2argvdata/disk-drive-write-cache.x86_64-latest.args +++ b/tests/qemuxml2argvdata/disk-drive-write-cache.x86_64-latest.args @@ -17,8 +17,7 @@ file=/tmp/lib/domain--1-QEMUGuest1/master-key.aes \ -display none \ -no-user-config \ -nodefaults \ --chardev socket,id=charmonitor,path=/tmp/lib/domain--1-QEMUGuest1/monitor.sock,\ -server,nowait \ +-chardev socket,id=charmonitor,fd=1729,server,nowait \ -mon chardev=charmonitor,id=monitor,mode=control \ -rtc base=utc \ -no-shutdown \ diff --git a/tests/qemuxml2argvdata/disk-virtio-scsi-reservations.x86_64-latest.args b/tests/qemuxml2argvdata/disk-virtio-scsi-reservations.x86_64-latest.args index 90843a19f5..1173dac674 100644 --- a/tests/qemuxml2argvdata/disk-virtio-scsi-reservations.x86_64-latest.args +++ b/tests/qemuxml2argvdata/disk-virtio-scsi-reservations.x86_64-latest.args @@ -19,8 +19,7 @@ path=/tmp/lib/domain--1-QEMUGuest1/pr-helper0.sock \ -display none \ -no-user-config \ -nodefaults \ --chardev socket,id=charmonitor,path=/tmp/lib/domain--1-QEMUGuest1/monitor.sock,\ -server,nowait \ +-chardev socket,id=charmonitor,fd=1729,server,nowait \ -mon chardev=charmonitor,id=monitor,mode=control \ -rtc base=utc \ -no-shutdown \ diff --git a/tests/qemuxml2argvdata/genid-auto.x86_64-latest.args b/tests/qemuxml2argvdata/genid-auto.x86_64-latest.args index ce163020b9..c25e73b6e2 100644 --- a/tests/qemuxml2argvdata/genid-auto.x86_64-latest.args +++ b/tests/qemuxml2argvdata/genid-auto.x86_64-latest.args @@ -18,8 +18,7 @@ file=/tmp/lib/domain--1-QEMUGuest1/master-key.aes \ -display none \ -no-user-config \ -nodefaults \ --chardev socket,id=charmonitor,path=/tmp/lib/domain--1-QEMUGuest1/monitor.sock,\ -server,nowait \ +-chardev socket,id=charmonitor,fd=1729,server,nowait \ -mon chardev=charmonitor,id=monitor,mode=control \ -rtc base=utc \ -no-shutdown \ diff --git a/tests/qemuxml2argvdata/genid.x86_64-latest.args b/tests/qemuxml2argvdata/genid.x86_64-latest.args index 54e00f4bdb..704e5d93e5 100644 --- a/tests/qemuxml2argvdata/genid.x86_64-latest.args +++ b/tests/qemuxml2argvdata/genid.x86_64-latest.args @@ -18,8 +18,7 @@ file=/tmp/lib/domain--1-QEMUGuest1/master-key.aes \ -display none \ -no-user-config \ -nodefaults \ --chardev socket,id=charmonitor,path=/tmp/lib/domain--1-QEMUGuest1/monitor.sock,\ -server,nowait \ +-chardev socket,id=charmonitor,fd=1729,server,nowait \ -mon chardev=charmonitor,id=monitor,mode=control \ -rtc base=utc \ -no-shutdown \ diff --git a/tests/qemuxml2argvdata/vhost-vsock-auto.x86_64-latest.args b/tests/qemuxml2argvdata/vhost-vsock-auto.x86_64-latest.args index dd9b60ba3e..a56d5a3efe 100644 --- a/tests/qemuxml2argvdata/vhost-vsock-auto.x86_64-latest.args +++ b/tests/qemuxml2argvdata/vhost-vsock-auto.x86_64-latest.args @@ -17,8 +17,7 @@ file=/tmp/lib/domain--1-test/master-key.aes \ -display none \ -no-user-config \ -nodefaults \ --chardev socket,id=charmonitor,path=/tmp/lib/domain--1-test/monitor.sock,\ -server,nowait \ +-chardev socket,id=charmonitor,fd=1729,server,nowait \ -mon chardev=charmonitor,id=monitor,mode=control \ -rtc base=utc \ -no-shutdown \ diff --git a/tests/qemuxml2argvdata/vhost-vsock.x86_64-latest.args b/tests/qemuxml2argvdata/vhost-vsock.x86_64-latest.args index 907af8bb99..922795abfd 100644 --- a/tests/qemuxml2argvdata/vhost-vsock.x86_64-latest.args +++ b/tests/qemuxml2argvdata/vhost-vsock.x86_64-latest.args @@ -17,8 +17,7 @@ file=/tmp/lib/domain--1-test/master-key.aes \ -display none \ -no-user-config \ -nodefaults \ --chardev socket,id=charmonitor,path=/tmp/lib/domain--1-test/monitor.sock,\ -server,nowait \ +-chardev socket,id=charmonitor,fd=1729,server,nowait \ -mon chardev=charmonitor,id=monitor,mode=control \ -rtc base=utc \ -no-shutdown \ diff --git a/tests/qemuxml2argvmock.c b/tests/qemuxml2argvmock.c index a4de7f0c46..4df92cf396 100644 --- a/tests/qemuxml2argvmock.c +++ b/tests/qemuxml2argvmock.c @@ -37,8 +37,10 @@ #include "virtpm.h" #include "virutil.h" #include "qemu/qemu_interface.h" +#include "qemu/qemu_command.h" #include #include +#include #define VIR_FROM_THIS VIR_FROM_NONE @@ -214,3 +216,20 @@ qemuInterfaceOpenVhostNet(virDomainDefPtr def ATTRIBUTE_UNUSED, vhostfd[i] = STDERR_FILENO + 42 + i; return 0; } + + +int +qemuOpenChrChardevUNIXSocket(const virDomainChrSourceDef *dev ATTRIBUTE_UNUSED) + +{ + /* We need to return an FD number for a UNIX listener socket, + * which will be given to QEMU via a CLI arg. We need a fixed + * number to get stable tests. This is obviously not a real + * FD number, so when virCommand closes the FD in the parent + * it will get EINVAL, but that's (hopefully) not going to + * be a problem.... + */ + if (fcntl(1729, F_GETFD) != -1) + abort(); + return 1729; +}