libvirt/src/qemu/qemu_monitor.c

4475 lines
112 KiB
C
Raw Normal View History

/*
* qemu_monitor.c: interaction with QEMU monitor console
*
* Copyright (C) 2006-2015 Red Hat, Inc.
* Copyright (C) 2006 Daniel P. Berrange
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see
* <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <poll.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <unistd.h>
#include <fcntl.h>
#include "qemu_monitor.h"
#include "qemu_monitor_text.h"
#include "qemu_monitor_json.h"
#include "qemu_domain.h"
#include "qemu_process.h"
#include "virerror.h"
2012-12-12 18:06:53 +00:00
#include "viralloc.h"
2012-12-12 17:59:27 +00:00
#include "virlog.h"
#include "virfile.h"
#include "virprocess.h"
#include "virobject.h"
#include "virprobe.h"
#include "virstring.h"
#include "virtime.h"
#ifdef WITH_DTRACE_PROBES
# include "libvirt_qemu_probes.h"
#endif
#define LIBVIRT_QEMU_MONITOR_PRIV_H_ALLOW
#include "qemu_monitor_priv.h"
#define VIR_FROM_THIS VIR_FROM_QEMU
VIR_LOG_INIT("qemu.qemu_monitor");
#define DEBUG_IO 0
#define DEBUG_RAW_IO 0
/* We read from QEMU until seeing a \r\n pair to indicate a
* completed reply or event. To avoid memory denial-of-service
* though, we must have a size limit on amount of data we
* buffer. 10 MB is large enough that it ought to cope with
* normal QEMU replies, and small enough that we're not
* consuming unreasonable mem.
*/
#define QEMU_MONITOR_MAX_RESPONSE (10 * 1024 * 1024)
struct _qemuMonitor {
virObjectLockable parent;
virCond notify;
int fd;
/* Represents the watch number to be used for updating and
* unregistering the monitor @fd for events in the event loop:
* > 0: valid watch number
* = 0: not registered
* < 0: an error occurred during the registration of @fd */
int watch;
int hasSendFD;
virDomainObjPtr vm;
qemuMonitorCallbacksPtr cb;
void *callbackOpaque;
/* If there's a command being processed this will be
* non-NULL */
qemuMonitorMessagePtr msg;
/* Buffer incoming data ready for Text/QMP monitor
* code to process & find message boundaries */
size_t bufferOffset;
size_t bufferLength;
char *buffer;
/* If anything went wrong, this will be fed back
* the next monitor msg */
virError lastError;
int nextSerial;
bool json;
bool waitGreeting;
/* cache of query-command-line-options results */
virJSONValuePtr options;
/* If found, path to the virtio memballoon driver */
char *balloonpath;
bool ballooninit;
/* Log file context of the qemu process to dig for usable info */
qemuMonitorReportDomainLogError logFunc;
void *logOpaque;
virFreeCallback logDestroy;
};
/**
* QEMU_CHECK_MONITOR_FULL:
* @mon: monitor pointer variable to check, evaluated multiple times, no parentheses
* @exit: statement that is used to exit the function
*
* This macro checks that the monitor is valid for given operation and exits
* the function if not. The macro also adds a debug statement regarding the
* monitor.
*/
#define QEMU_CHECK_MONITOR_FULL(mon, exit) \
do { \
if (!mon) { \
virReportError(VIR_ERR_INVALID_ARG, "%s", \
_("monitor must not be NULL")); \
exit; \
} \
VIR_DEBUG("mon:%p vm:%p json:%d fd:%d", \
mon, mon->vm, mon->json, mon->fd); \
if (!mon->json) { \
virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s", \
_("JSON monitor is required")); \
exit; \
} \
} while (0)
/* Check monitor and return NULL on error */
#define QEMU_CHECK_MONITOR_NULL(mon) \
QEMU_CHECK_MONITOR_FULL(mon, return NULL)
/* Check monitor and return -1 on error */
#define QEMU_CHECK_MONITOR(mon) \
QEMU_CHECK_MONITOR_FULL(mon, return -1)
/* Check monitor and jump to the provided label */
#define QEMU_CHECK_MONITOR_GOTO(mon, label) \
QEMU_CHECK_MONITOR_FULL(mon, goto label)
static virClassPtr qemuMonitorClass;
static void qemuMonitorDispose(void *obj);
static int qemuMonitorOnceInit(void)
{
if (!VIR_CLASS_NEW(qemuMonitor, virClassForObjectLockable()))
return -1;
return 0;
}
VIR_ONCE_GLOBAL_INIT(qemuMonitor);
VIR_ENUM_IMPL(qemuMonitorMigrationStatus,
QEMU_MONITOR_MIGRATION_STATUS_LAST,
"inactive", "setup",
"active", "pre-switchover",
"device", "postcopy-active",
"completed", "failed",
"cancelling", "cancelled",
);
VIR_ENUM_IMPL(qemuMonitorVMStatus,
QEMU_MONITOR_VM_STATUS_LAST,
"debug", "inmigrate", "internal-error", "io-error", "paused",
"postmigrate", "prelaunch", "finish-migrate", "restore-vm",
"running", "save-vm", "shutdown", "watchdog", "guest-panicked",
);
2012-01-19 17:58:58 +01:00
typedef enum {
QEMU_MONITOR_BLOCK_IO_STATUS_OK,
QEMU_MONITOR_BLOCK_IO_STATUS_FAILED,
QEMU_MONITOR_BLOCK_IO_STATUS_NOSPACE,
QEMU_MONITOR_BLOCK_IO_STATUS_LAST
} qemuMonitorBlockIOStatus;
VIR_ENUM_DECL(qemuMonitorBlockIOStatus);
2012-01-19 17:58:58 +01:00
VIR_ENUM_IMPL(qemuMonitorBlockIOStatus,
QEMU_MONITOR_BLOCK_IO_STATUS_LAST,
"ok", "failed", "nospace",
);
2012-01-19 17:58:58 +01:00
VIR_ENUM_IMPL(qemuMonitorDumpStatus,
QEMU_MONITOR_DUMP_STATUS_LAST,
"none", "active", "completed", "failed",
);
char *
qemuMonitorEscapeArg(const char *in)
{
int len = 0;
size_t i, j;
char *out;
/* To pass through the QEMU monitor, we need to use escape
sequences: \r, \n, \", \\
*/
for (i = 0; in[i] != '\0'; i++) {
switch (in[i]) {
case '\r':
case '\n':
case '"':
case '\\':
len += 2;
break;
default:
len += 1;
break;
}
}
if (VIR_ALLOC_N(out, len + 1) < 0)
return NULL;
for (i = j = 0; in[i] != '\0'; i++) {
switch (in[i]) {
case '\r':
out[j++] = '\\';
out[j++] = 'r';
break;
case '\n':
out[j++] = '\\';
out[j++] = 'n';
break;
case '"':
case '\\':
out[j++] = '\\';
out[j++] = in[i];
break;
default:
out[j++] = in[i];
break;
}
}
out[j] = '\0';
return out;
}
char *
qemuMonitorUnescapeArg(const char *in)
{
size_t i, j;
char *out;
int len = strlen(in);
char next;
if (VIR_ALLOC_N(out, len + 1) < 0)
return NULL;
for (i = j = 0; i < len; ++i) {
next = in[i];
if (in[i] == '\\') {
++i;
switch (in[i]) {
case 'r':
next = '\r';
break;
case 'n':
next = '\n';
break;
case '"':
case '\\':
next = in[i];
break;
default:
/* invalid input (including trailing '\' at end of in) */
VIR_FREE(out);
return NULL;
}
}
out[j++] = next;
}
out[j] = '\0';
return out;
}
2010-04-27 10:11:46 +02:00
#if DEBUG_RAW_IO
# include <c-ctype.h>
static char *
qemuMonitorEscapeNonPrintable(const char *text)
{
size_t i;
virBuffer buf = VIR_BUFFER_INITIALIZER;
for (i = 0; text[i] != '\0'; i++) {
if (c_isprint(text[i]) ||
text[i] == '\n' ||
(text[i] == '\r' && text[i + 1] == '\n'))
virBufferAddChar(&buf, text[i]);
else
virBufferAsprintf(&buf, "0x%02x", text[i]);
}
return virBufferContentAndReset(&buf);
}
#endif
static void
qemuMonitorDispose(void *obj)
{
qemuMonitorPtr mon = obj;
VIR_DEBUG("mon=%p", mon);
if (mon->cb && mon->cb->destroy)
(mon->cb->destroy)(mon, mon->vm, mon->callbackOpaque);
qemu_migration: Avoid crashing if domain dies too quickly I've noticed a SIGSEGV-ing libvirtd on the destination when the qemu died too quickly = in Prepare phase. What is happening here is: 1) [Thread 3493] We are in qemuMigrationPrepareAny() and calling qemuProcessStart() which subsequently calls qemuProcessWaitForMonitor() and qemuConnectMonitor(). So far so good. The qemuMonitorOpen() succeeds, however switching monitor to QMP mode fails as qemu died meanwhile. That is qemuMonitorSetCapabilities() returns -1. 2013-10-08 15:54:10.629+0000: 3493: debug : qemuMonitorSetCapabilities:1356 : mon=0x14a53da0 2013-10-08 15:54:10.630+0000: 3493: debug : qemuMonitorJSONCommandWithFd:262 : Send command '{"execute":"qmp_capabilities","id":"libvirt-1"}' for write with FD -1 2013-10-08 15:54:10.630+0000: 3493: debug : virEventPollUpdateHandle:147 : EVENT_POLL_UPDATE_HANDLE: watch=17 events=13 ... 2013-10-08 15:54:10.631+0000: 3493: debug : qemuMonitorSend:956 : QEMU_MONITOR_SEND_MSG: mon=0x14a53da0 msg={"execute":"qmp_capabilities","id":"libvirt-1"} fd=-1 2013-10-08 15:54:10.631+0000: 3262: debug : virEventPollRunOnce:641 : Poll got 1 event(s) 2) [Thread 3262] The event loop is trying to do the talking to monitor. However, qemu is dead already, remember? 2013-10-08 15:54:13.436+0000: 3262: error : qemuMonitorIORead:551 : Unable to read from monitor: Connection reset by peer 2013-10-08 15:54:13.516+0000: 3262: debug : virFileClose:90 : Closed fd 25 ... 2013-10-08 15:54:13.533+0000: 3493: debug : qemuMonitorSend:968 : Send command resulted in error internal error: early end of file from monitor: possible problem: 3) [Thread 3493] qemuProcessStart() failed. No big deal. Go to the 'endjob' label and subsequently to the 'cleanup'. Since the domain is not persistent and ret is -1, the qemuDomainRemoveInactive() is called. This has an (unpleasant) effect of virObjectUnref()-in the @vm object. Unpleasant because the event loop which is about to trigger EOF callback still holds a pointer to the @vm (not the reference). See the valgrind output below. 4) [Thread 3262] So the event loop starts triggering EOF: 2013-10-08 15:54:13.542+0000: 3262: debug : qemuMonitorIO:729 : Triggering EOF callback 2013-10-08 15:54:13.543+0000: 3262: debug : qemuProcessHandleMonitorEOF:294 : Received EOF on 0x14549110 'migt10' And the monitor is cleaned up. This results in calling qemuProcessHandleMonitorEOF with the @vm pointer passed. The pointer is kept in qemuMonitor struct. ==3262== Thread 1: ==3262== Invalid read of size 4 ==3262== at 0x77ECCAA: pthread_mutex_lock (in /lib64/libpthread-2.15.so) ==3262== by 0x52FAA06: virMutexLock (virthreadpthread.c:85) ==3262== by 0x52E3891: virObjectLock (virobject.c:320) ==3262== by 0x11626743: qemuProcessHandleMonitorEOF (qemu_process.c:296) ==3262== by 0x11642593: qemuMonitorIO (qemu_monitor.c:730) ==3262== by 0x52BD526: virEventPollDispatchHandles (vireventpoll.c:501) ==3262== by 0x52BDD49: virEventPollRunOnce (vireventpoll.c:648) ==3262== by 0x52BBC68: virEventRunDefaultImpl (virevent.c:274) ==3262== by 0x542D3D9: virNetServerRun (virnetserver.c:1112) ==3262== by 0x11F368: main (libvirtd.c:1513) ==3262== Address 0x14549128 is 24 bytes inside a block of size 136 free'd ==3262== at 0x4C2AF5C: free (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) ==3262== by 0x529B1FF: virFree (viralloc.c:580) ==3262== by 0x52E3703: virObjectUnref (virobject.c:270) ==3262== by 0x531557E: virDomainObjListRemove (domain_conf.c:2355) ==3262== by 0x1160E899: qemuDomainRemoveInactive (qemu_domain.c:2061) ==3262== by 0x1163A0C6: qemuMigrationPrepareAny (qemu_migration.c:2450) ==3262== by 0x1163A923: qemuMigrationPrepareDirect (qemu_migration.c:2626) ==3262== by 0x11682D71: qemuDomainMigratePrepare3Params (qemu_driver.c:10309) ==3262== by 0x53B0976: virDomainMigratePrepare3Params (libvirt.c:7266) ==3262== by 0x1502D3: remoteDispatchDomainMigratePrepare3Params (remote.c:4797) ==3262== by 0x12DECA: remoteDispatchDomainMigratePrepare3ParamsHelper (remote_dispatch.h:5741) ==3262== by 0x54322EB: virNetServerProgramDispatchCall (virnetserverprogram.c:435) The mon->vm is set in qemuMonitorOpenInternal() which is the correct place to increase @vm ref counter. The correct place to decrease the ref counter is then qemuMonitorDispose(). Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
2013-10-08 16:50:33 +02:00
virObjectUnref(mon->vm);
virResetError(&mon->lastError);
virCondDestroy(&mon->notify);
VIR_FREE(mon->buffer);
virJSONValueFree(mon->options);
VIR_FREE(mon->balloonpath);
}
static int
qemuMonitorOpenUnix(const char *monitor,
pid_t cpid,
bool retry,
unsigned long long timeout)
{
struct sockaddr_un addr;
int monfd;
virTimeBackOffVar timebackoff;
int ret = -1;
if ((monfd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
virReportSystemError(errno,
"%s", _("failed to create socket"));
return -1;
}
memset(&addr, 0, sizeof(addr));
addr.sun_family = AF_UNIX;
if (virStrcpyStatic(addr.sun_path, monitor) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Monitor path %s too big for destination"), monitor);
goto error;
}
if (retry) {
if (virTimeBackOffStart(&timebackoff, 1, timeout * 1000) < 0)
goto error;
while (virTimeBackOffWait(&timebackoff)) {
ret = connect(monfd, (struct sockaddr *)&addr, sizeof(addr));
if (ret == 0)
break;
if ((errno == ENOENT || errno == ECONNREFUSED) &&
(!cpid || virProcessKill(cpid, 0) == 0)) {
/* ENOENT : Socket may not have shown up yet
* ECONNREFUSED : Leftover socket hasn't been removed yet */
continue;
}
virReportSystemError(errno, "%s",
_("failed to connect to monitor socket"));
goto error;
}
if (ret != 0) {
virReportSystemError(errno, "%s",
_("monitor socket did not show up"));
goto error;
}
} else {
ret = connect(monfd, (struct sockaddr *) &addr, sizeof(addr));
if (ret < 0) {
virReportSystemError(errno, "%s",
_("failed to connect to monitor socket"));
goto error;
}
}
return monfd;
error:
VIR_FORCE_CLOSE(monfd);
return -1;
}
static int
qemuMonitorOpenPty(const char *monitor)
{
int monfd;
if ((monfd = open(monitor, O_RDWR)) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Unable to open monitor path %s"), monitor);
return -1;
}
return monfd;
}
/* This method processes data that has been received
* from the monitor. Looking for async events and
* replies/errors.
*/
static int
qemuMonitorIOProcess(qemuMonitorPtr mon)
{
int len;
qemuMonitorMessagePtr msg = NULL;
/* See if there's a message & whether its ready for its reply
* ie whether its completed writing all its data */
if (mon->msg && mon->msg->txOffset == mon->msg->txLength)
msg = mon->msg;
#if DEBUG_IO
2010-03-26 17:00:50 +01:00
# if DEBUG_RAW_IO
char *str1 = qemuMonitorEscapeNonPrintable(msg ? msg->txBuffer : "");
char *str2 = qemuMonitorEscapeNonPrintable(mon->buffer);
VIR_ERROR(_("Process %d %p %p [[[[%s]]][[[%s]]]"), (int)mon->bufferOffset, mon->msg, msg, str1, str2);
VIR_FREE(str1);
VIR_FREE(str2);
2010-03-26 17:00:50 +01:00
# else
VIR_DEBUG("Process %d", (int)mon->bufferOffset);
2010-03-26 17:00:50 +01:00
# endif
#endif
PROBE_QUIET(QEMU_MONITOR_IO_PROCESS, "mon=%p buf=%s len=%zu",
mon, mon->buffer, mon->bufferOffset);
len = qemuMonitorJSONIOProcess(mon,
mon->buffer, mon->bufferOffset,
msg);
if (len < 0)
return -1;
if (len && mon->waitGreeting)
mon->waitGreeting = false;
if (len < mon->bufferOffset) {
memmove(mon->buffer, mon->buffer + len, mon->bufferOffset - len);
mon->bufferOffset -= len;
} else {
VIR_FREE(mon->buffer);
mon->bufferOffset = mon->bufferLength = 0;
}
#if DEBUG_IO
VIR_DEBUG("Process done %d used %d", (int)mon->bufferOffset, len);
#endif
/* As the monitor mutex was unlocked in qemuMonitorJSONIOProcess()
* while dealing with qemu event, mon->msg could be changed which
* means the above 'msg' may be invalid, thus we use 'mon->msg' here */
if (mon->msg && mon->msg->finished)
virCondBroadcast(&mon->notify);
return len;
}
/* Call this function while holding the monitor lock. */
static int
qemuMonitorIOWriteWithFD(qemuMonitorPtr mon,
const char *data,
size_t len,
int fd)
{
struct msghdr msg;
struct iovec iov[1];
int ret;
char control[CMSG_SPACE(sizeof(int))];
struct cmsghdr *cmsg;
memset(&msg, 0, sizeof(msg));
memset(control, 0, sizeof(control));
iov[0].iov_base = (void *)data;
iov[0].iov_len = len;
msg.msg_iov = iov;
msg.msg_iovlen = 1;
msg.msg_control = control;
msg.msg_controllen = sizeof(control);
cmsg = CMSG_FIRSTHDR(&msg);
/* Some static analyzers, like clang 2.6-0.6.pre2, fail to see
that our use of CMSG_FIRSTHDR will not return NULL. */
sa_assert(cmsg);
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
do {
ret = sendmsg(mon->fd, &msg, 0);
} while (ret < 0 && errno == EINTR);
return ret;
}
/*
* Called when the monitor is able to write data
* Call this function while holding the monitor lock.
*/
static int
qemuMonitorIOWrite(qemuMonitorPtr mon)
{
int done;
char *buf;
size_t len;
/* If no active message, or fully transmitted, the no-op */
if (!mon->msg || mon->msg->txOffset == mon->msg->txLength)
return 0;
if (mon->msg->txFD != -1 && !mon->hasSendFD) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Monitor does not support sending of file descriptors"));
return -1;
}
buf = mon->msg->txBuffer + mon->msg->txOffset;
len = mon->msg->txLength - mon->msg->txOffset;
if (mon->msg->txFD == -1)
done = write(mon->fd, buf, len);
else
done = qemuMonitorIOWriteWithFD(mon, buf, len, mon->msg->txFD);
PROBE(QEMU_MONITOR_IO_WRITE,
"mon=%p buf=%s len=%zu ret=%d errno=%d",
mon, buf, len, done, done < 0 ? errno : 0);
if (mon->msg->txFD != -1) {
PROBE(QEMU_MONITOR_IO_SEND_FD,
"mon=%p fd=%d ret=%d errno=%d",
mon, mon->msg->txFD, done, done < 0 ? errno : 0);
}
if (done < 0) {
if (errno == EAGAIN)
return 0;
virReportSystemError(errno, "%s",
_("Unable to write to monitor"));
return -1;
}
mon->msg->txOffset += done;
return done;
}
/*
* Called when the monitor has incoming data to read
* Call this function while holding the monitor lock.
*
* Returns -1 on error, or number of bytes read
*/
static int
qemuMonitorIORead(qemuMonitorPtr mon)
{
size_t avail = mon->bufferLength - mon->bufferOffset;
int ret = 0;
if (avail < 1024) {
if (mon->bufferLength >= QEMU_MONITOR_MAX_RESPONSE) {
virReportSystemError(ERANGE,
_("No complete monitor response found in %d bytes"),
QEMU_MONITOR_MAX_RESPONSE);
return -1;
}
if (VIR_REALLOC_N(mon->buffer,
mon->bufferLength + 1024) < 0)
return -1;
mon->bufferLength += 1024;
avail += 1024;
}
/* Read as much as we can get into our buffer,
until we block on EAGAIN, or hit EOF */
while (avail > 1) {
int got;
got = read(mon->fd,
mon->buffer + mon->bufferOffset,
avail - 1);
if (got < 0) {
if (errno == EAGAIN)
break;
virReportSystemError(errno, "%s",
_("Unable to read from monitor"));
ret = -1;
break;
}
if (got == 0)
break;
ret += got;
avail -= got;
mon->bufferOffset += got;
mon->buffer[mon->bufferOffset] = '\0';
}
#if DEBUG_IO
VIR_DEBUG("Now read %d bytes of data", (int)mon->bufferOffset);
#endif
return ret;
}
static void
qemuMonitorUpdateWatch(qemuMonitorPtr mon)
{
int events =
VIR_EVENT_HANDLE_HANGUP |
VIR_EVENT_HANDLE_ERROR;
if (!mon->watch)
return;
if (mon->lastError.code == VIR_ERR_OK) {
events |= VIR_EVENT_HANDLE_READABLE;
if ((mon->msg && mon->msg->txOffset < mon->msg->txLength) &&
!mon->waitGreeting)
events |= VIR_EVENT_HANDLE_WRITABLE;
}
virEventUpdateHandle(mon->watch, events);
}
static void
qemuMonitorIO(int watch, int fd, int events, void *opaque)
{
qemuMonitorPtr mon = opaque;
bool error = false;
bool eof = false;
bool hangup = false;
virObjectRef(mon);
/* lock access to the monitor and protect fd */
virObjectLock(mon);
#if DEBUG_IO
VIR_DEBUG("Monitor %p I/O on watch %d fd %d events %d", mon, watch, fd, events);
#endif
if (mon->fd == -1 || mon->watch == 0) {
virObjectUnlock(mon);
virObjectUnref(mon);
return;
}
if (mon->fd != fd || mon->watch != watch) {
if (events & (VIR_EVENT_HANDLE_HANGUP | VIR_EVENT_HANDLE_ERROR))
eof = true;
virReportError(VIR_ERR_INTERNAL_ERROR,
_("event from unexpected fd %d!=%d / watch %d!=%d"),
mon->fd, fd, mon->watch, watch);
error = true;
} else if (mon->lastError.code != VIR_ERR_OK) {
if (events & (VIR_EVENT_HANDLE_HANGUP | VIR_EVENT_HANDLE_ERROR))
eof = true;
error = true;
} else {
if (events & VIR_EVENT_HANDLE_WRITABLE) {
if (qemuMonitorIOWrite(mon) < 0) {
error = true;
if (errno == ECONNRESET)
hangup = true;
}
events &= ~VIR_EVENT_HANDLE_WRITABLE;
}
if (!error &&
events & VIR_EVENT_HANDLE_READABLE) {
int got = qemuMonitorIORead(mon);
events &= ~VIR_EVENT_HANDLE_READABLE;
if (got < 0) {
error = true;
if (errno == ECONNRESET)
hangup = true;
} else if (got == 0) {
eof = true;
} else {
/* Ignore hangup/error events if we read some data, to
* give time for that data to be consumed */
events = 0;
if (qemuMonitorIOProcess(mon) < 0)
error = true;
}
}
if (events & VIR_EVENT_HANDLE_HANGUP) {
hangup = true;
if (!error) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("End of file from qemu monitor"));
eof = true;
events &= ~VIR_EVENT_HANDLE_HANGUP;
}
}
if (!error && !eof &&
events & VIR_EVENT_HANDLE_ERROR) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Invalid file descriptor while waiting for monitor"));
eof = true;
events &= ~VIR_EVENT_HANDLE_ERROR;
}
if (!error && events) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Unhandled event %d for monitor fd %d"),
events, mon->fd);
error = true;
}
}
if (error || eof) {
if (hangup && mon->logFunc != NULL) {
/* Check if an error message from qemu is available and if so, use
* it to overwrite the actual message. It's done only in early
* startup phases or during incoming migration when the message
* from qemu is certainly more interesting than a
* "connection reset by peer" message.
*/
mon->logFunc(mon,
_("qemu unexpectedly closed the monitor"),
mon->logOpaque);
virCopyLastError(&mon->lastError);
virResetLastError();
}
if (mon->lastError.code != VIR_ERR_OK) {
/* Already have an error, so clear any new error */
virResetLastError();
} else {
if (virGetLastErrorCode() == VIR_ERR_OK)
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Error while processing monitor IO"));
virCopyLastError(&mon->lastError);
virResetLastError();
}
VIR_DEBUG("Error on monitor %s", NULLSTR(mon->lastError.message));
/* If IO process resulted in an error & we have a message,
* then wakeup that waiter */
if (mon->msg && !mon->msg->finished) {
mon->msg->finished = 1;
virCondSignal(&mon->notify);
}
}
qemuMonitorUpdateWatch(mon);
/* We have to unlock to avoid deadlock against command thread,
* but is this safe ? I think it is, because the callback
* will try to acquire the virDomainObjPtr mutex next */
if (eof) {
qemuMonitorEofNotifyCallback eofNotify = mon->cb->eofNotify;
virDomainObjPtr vm = mon->vm;
/* Make sure anyone waiting wakes up now */
virCondSignal(&mon->notify);
virObjectUnlock(mon);
VIR_DEBUG("Triggering EOF callback");
(eofNotify)(mon, vm, mon->callbackOpaque);
qemuMonitorIO: Don't use @mon after it's unrefed https://bugzilla.redhat.com/show_bug.cgi?id=1018267 The aim of virObject refing and urefing is to tell where the object is to be used and when is no longer needed. Hence any object shouldn't be used after it has been unrefed, as we might be the last to hold the reference. The better way is to call virObjectUnref() *after* the last object usage. In this specific case, the monitor EOF handler was called after the qemuMonitorIO called virObjectUnref. Not only that @mon was disposed (which is not used in the handler anyway) but the @mon->vm which is causing a SIGSEGV: 2013-11-15 10:17:54.425+0000: 20110: error : qemuMonitorIO:688 : internal error: early end of file from monitor: possible problem: qemu-kvm: -incoming tcp:01.01.01.0:49152: Failed to bind socket: Cannot assign requested address Program received signal SIGSEGV, Segmentation fault. qemuProcessHandleMonitorEOF (mon=<optimized out>, vm=0x7fb728004170) at qemu/qemu_process.c:299 299 if (priv->beingDestroyed) { (gdb) p *priv Cannot access memory at address 0x0 (gdb) p vm $1 = (virDomainObj *) 0x7fb728004170 (gdb) p *vm $2 = {parent = {parent = {magic = 3735928559, refs = 0, klass = 0xdeadbeef}, lock = {lock = {__data = {__lock = 2, __count = 0, __owner = 20110, __nusers = 1, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = "\002\000\000\000\000\000\000\000\216N\000\000\001", '\000' <repeats 26 times>, __align = 2}}}, pid = 0, state = {state = 0, reason = 0}, autostart = 0, persistent = 0, updated = 0, def = 0x0, newDef = 0x0, snapshots = 0x0, current_snapshot = 0x0, hasManagedSave = false, privateData = 0x0, privateDataFreeFunc = 0x0, taint = 304} Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
2013-11-15 11:26:47 +01:00
virObjectUnref(mon);
} else if (error) {
qemuMonitorErrorNotifyCallback errorNotify = mon->cb->errorNotify;
virDomainObjPtr vm = mon->vm;
/* Make sure anyone waiting wakes up now */
virCondSignal(&mon->notify);
virObjectUnlock(mon);
VIR_DEBUG("Triggering error callback");
(errorNotify)(mon, vm, mon->callbackOpaque);
qemuMonitorIO: Don't use @mon after it's unrefed https://bugzilla.redhat.com/show_bug.cgi?id=1018267 The aim of virObject refing and urefing is to tell where the object is to be used and when is no longer needed. Hence any object shouldn't be used after it has been unrefed, as we might be the last to hold the reference. The better way is to call virObjectUnref() *after* the last object usage. In this specific case, the monitor EOF handler was called after the qemuMonitorIO called virObjectUnref. Not only that @mon was disposed (which is not used in the handler anyway) but the @mon->vm which is causing a SIGSEGV: 2013-11-15 10:17:54.425+0000: 20110: error : qemuMonitorIO:688 : internal error: early end of file from monitor: possible problem: qemu-kvm: -incoming tcp:01.01.01.0:49152: Failed to bind socket: Cannot assign requested address Program received signal SIGSEGV, Segmentation fault. qemuProcessHandleMonitorEOF (mon=<optimized out>, vm=0x7fb728004170) at qemu/qemu_process.c:299 299 if (priv->beingDestroyed) { (gdb) p *priv Cannot access memory at address 0x0 (gdb) p vm $1 = (virDomainObj *) 0x7fb728004170 (gdb) p *vm $2 = {parent = {parent = {magic = 3735928559, refs = 0, klass = 0xdeadbeef}, lock = {lock = {__data = {__lock = 2, __count = 0, __owner = 20110, __nusers = 1, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = "\002\000\000\000\000\000\000\000\216N\000\000\001", '\000' <repeats 26 times>, __align = 2}}}, pid = 0, state = {state = 0, reason = 0}, autostart = 0, persistent = 0, updated = 0, def = 0x0, newDef = 0x0, snapshots = 0x0, current_snapshot = 0x0, hasManagedSave = false, privateData = 0x0, privateDataFreeFunc = 0x0, taint = 304} Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
2013-11-15 11:26:47 +01:00
virObjectUnref(mon);
} else {
virObjectUnlock(mon);
virObjectUnref(mon);
}
}
static qemuMonitorPtr
qemuMonitorOpenInternal(virDomainObjPtr vm,
int fd,
bool hasSendFD,
bool json,
qemuMonitorCallbacksPtr cb,
void *opaque)
{
qemuMonitorPtr mon;
if (!cb->eofNotify) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("EOF notify callback must be supplied"));
return NULL;
}
if (!cb->errorNotify) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Error notify callback must be supplied"));
return NULL;
}
if (qemuMonitorInitialize() < 0)
return NULL;
if (!(mon = virObjectLockableNew(qemuMonitorClass)))
return NULL;
if (virCondInit(&mon->notify) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("cannot initialize monitor condition"));
goto cleanup;
}
mon->fd = fd;
mon->hasSendFD = hasSendFD;
qemu_migration: Avoid crashing if domain dies too quickly I've noticed a SIGSEGV-ing libvirtd on the destination when the qemu died too quickly = in Prepare phase. What is happening here is: 1) [Thread 3493] We are in qemuMigrationPrepareAny() and calling qemuProcessStart() which subsequently calls qemuProcessWaitForMonitor() and qemuConnectMonitor(). So far so good. The qemuMonitorOpen() succeeds, however switching monitor to QMP mode fails as qemu died meanwhile. That is qemuMonitorSetCapabilities() returns -1. 2013-10-08 15:54:10.629+0000: 3493: debug : qemuMonitorSetCapabilities:1356 : mon=0x14a53da0 2013-10-08 15:54:10.630+0000: 3493: debug : qemuMonitorJSONCommandWithFd:262 : Send command '{"execute":"qmp_capabilities","id":"libvirt-1"}' for write with FD -1 2013-10-08 15:54:10.630+0000: 3493: debug : virEventPollUpdateHandle:147 : EVENT_POLL_UPDATE_HANDLE: watch=17 events=13 ... 2013-10-08 15:54:10.631+0000: 3493: debug : qemuMonitorSend:956 : QEMU_MONITOR_SEND_MSG: mon=0x14a53da0 msg={"execute":"qmp_capabilities","id":"libvirt-1"} fd=-1 2013-10-08 15:54:10.631+0000: 3262: debug : virEventPollRunOnce:641 : Poll got 1 event(s) 2) [Thread 3262] The event loop is trying to do the talking to monitor. However, qemu is dead already, remember? 2013-10-08 15:54:13.436+0000: 3262: error : qemuMonitorIORead:551 : Unable to read from monitor: Connection reset by peer 2013-10-08 15:54:13.516+0000: 3262: debug : virFileClose:90 : Closed fd 25 ... 2013-10-08 15:54:13.533+0000: 3493: debug : qemuMonitorSend:968 : Send command resulted in error internal error: early end of file from monitor: possible problem: 3) [Thread 3493] qemuProcessStart() failed. No big deal. Go to the 'endjob' label and subsequently to the 'cleanup'. Since the domain is not persistent and ret is -1, the qemuDomainRemoveInactive() is called. This has an (unpleasant) effect of virObjectUnref()-in the @vm object. Unpleasant because the event loop which is about to trigger EOF callback still holds a pointer to the @vm (not the reference). See the valgrind output below. 4) [Thread 3262] So the event loop starts triggering EOF: 2013-10-08 15:54:13.542+0000: 3262: debug : qemuMonitorIO:729 : Triggering EOF callback 2013-10-08 15:54:13.543+0000: 3262: debug : qemuProcessHandleMonitorEOF:294 : Received EOF on 0x14549110 'migt10' And the monitor is cleaned up. This results in calling qemuProcessHandleMonitorEOF with the @vm pointer passed. The pointer is kept in qemuMonitor struct. ==3262== Thread 1: ==3262== Invalid read of size 4 ==3262== at 0x77ECCAA: pthread_mutex_lock (in /lib64/libpthread-2.15.so) ==3262== by 0x52FAA06: virMutexLock (virthreadpthread.c:85) ==3262== by 0x52E3891: virObjectLock (virobject.c:320) ==3262== by 0x11626743: qemuProcessHandleMonitorEOF (qemu_process.c:296) ==3262== by 0x11642593: qemuMonitorIO (qemu_monitor.c:730) ==3262== by 0x52BD526: virEventPollDispatchHandles (vireventpoll.c:501) ==3262== by 0x52BDD49: virEventPollRunOnce (vireventpoll.c:648) ==3262== by 0x52BBC68: virEventRunDefaultImpl (virevent.c:274) ==3262== by 0x542D3D9: virNetServerRun (virnetserver.c:1112) ==3262== by 0x11F368: main (libvirtd.c:1513) ==3262== Address 0x14549128 is 24 bytes inside a block of size 136 free'd ==3262== at 0x4C2AF5C: free (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) ==3262== by 0x529B1FF: virFree (viralloc.c:580) ==3262== by 0x52E3703: virObjectUnref (virobject.c:270) ==3262== by 0x531557E: virDomainObjListRemove (domain_conf.c:2355) ==3262== by 0x1160E899: qemuDomainRemoveInactive (qemu_domain.c:2061) ==3262== by 0x1163A0C6: qemuMigrationPrepareAny (qemu_migration.c:2450) ==3262== by 0x1163A923: qemuMigrationPrepareDirect (qemu_migration.c:2626) ==3262== by 0x11682D71: qemuDomainMigratePrepare3Params (qemu_driver.c:10309) ==3262== by 0x53B0976: virDomainMigratePrepare3Params (libvirt.c:7266) ==3262== by 0x1502D3: remoteDispatchDomainMigratePrepare3Params (remote.c:4797) ==3262== by 0x12DECA: remoteDispatchDomainMigratePrepare3ParamsHelper (remote_dispatch.h:5741) ==3262== by 0x54322EB: virNetServerProgramDispatchCall (virnetserverprogram.c:435) The mon->vm is set in qemuMonitorOpenInternal() which is the correct place to increase @vm ref counter. The correct place to decrease the ref counter is then qemuMonitorDispose(). Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
2013-10-08 16:50:33 +02:00
mon->vm = virObjectRef(vm);
mon->json = json;
if (json)
mon->waitGreeting = true;
mon->cb = cb;
mon->callbackOpaque = opaque;
if (virSetCloseExec(mon->fd) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("Unable to set monitor close-on-exec flag"));
goto cleanup;
}
if (virSetNonBlock(mon->fd) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("Unable to put monitor into non-blocking mode"));
goto cleanup;
}
virObjectLock(mon);
if (!qemuMonitorRegister(mon)) {
virObjectUnlock(mon);
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("unable to register monitor events"));
goto cleanup;
}
PROBE(QEMU_MONITOR_NEW,
"mon=%p refs=%d fd=%d",
object: require maximal alignment in base class Recent changes to events (commit 8a29ffcf) resulted in new compile failures on some targets (such as ARM OMAP5): conf/domain_event.c: In function 'virDomainEventDispatchDefaultFunc': conf/domain_event.c:1198:30: error: cast increases required alignment of target type [-Werror=cast-align] conf/domain_event.c:1314:34: error: cast increases required alignment of target type [-Werror=cast-align] cc1: all warnings being treated as errors The error is due to alignment; the base class is merely aligned to the worst of 'int' and 'void*', while the child class must be aligned to a 'long long'. The solution is to include a 'long long' (and for good measure, a function pointer) in the base class to ensure correct alignment regardless of what a child class may add, but to wrap the inclusion in a union so as to not incur any wasted space. On a typical x86_64 platform, the base class remains 16 bytes; on i686, the base class remains 12 bytes; and on the impacted ARM platform, the base class grows from 12 bytes to 16 bytes due to the increase of alignment from 4 to 8 bytes. Reported by Michele Paolino and others. * src/util/virobject.h (_virObject): Use a union to ensure that subclasses never have stricter alignment than the parent. * src/util/virobject.c (virObjectNew, virObjectUnref) (virObjectRef): Adjust clients. * src/libvirt.c (virConnectRef, virDomainRef, virNetworkRef) (virInterfaceRef, virStoragePoolRef, virStorageVolRef) (virNodeDeviceRef, virSecretRef, virStreamRef, virNWFilterRef) (virDomainSnapshotRef): Likewise. * src/qemu/qemu_monitor.c (qemuMonitorOpenInternal) (qemuMonitorClose): Likewise. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-12-12 16:01:15 -07:00
mon, mon->parent.parent.u.s.refs, mon->fd);
virObjectUnlock(mon);
return mon;
cleanup:
/* We don't want the 'destroy' callback invoked during
* cleanup from construction failure, because that can
* give a double-unref on virDomainObjPtr in the caller,
* so kill the callbacks now.
*/
mon->cb = NULL;
/* The caller owns 'fd' on failure */
mon->fd = -1;
qemuMonitorClose(mon);
return NULL;
}
#define QEMU_DEFAULT_MONITOR_WAIT 30
/**
* qemuMonitorOpen:
* @vm: domain object
* @config: monitor configuration
* @json: enable JSON on the monitor
* @timeout: number of seconds to add to default timeout
* @cb: monitor event handles
* @opaque: opaque data for @cb
*
* Opens the monitor for running qemu. It may happen that it
* takes some time for qemu to create the monitor socket (e.g.
* because kernel is zeroing configured hugepages), therefore we
* wait up to default + timeout seconds for the monitor to show
* up after which a failure is claimed.
*
* Returns monitor object, NULL on error.
*/
qemuMonitorPtr
qemuMonitorOpen(virDomainObjPtr vm,
virDomainChrSourceDefPtr config,
bool json,
bool retry,
unsigned long long timeout,
qemuMonitorCallbacksPtr cb,
void *opaque)
{
int fd;
bool hasSendFD = false;
qemuMonitorPtr ret;
timeout += QEMU_DEFAULT_MONITOR_WAIT;
switch (config->type) {
case VIR_DOMAIN_CHR_TYPE_UNIX:
hasSendFD = true;
if ((fd = qemuMonitorOpenUnix(config->data.nix.path,
vm->pid, retry, timeout)) < 0)
return NULL;
break;
case VIR_DOMAIN_CHR_TYPE_PTY:
if ((fd = qemuMonitorOpenPty(config->data.file.path)) < 0)
return NULL;
break;
default:
virReportError(VIR_ERR_INTERNAL_ERROR,
_("unable to handle monitor type: %s"),
virDomainChrTypeToString(config->type));
return NULL;
}
ret = qemuMonitorOpenInternal(vm, fd, hasSendFD, json, cb, opaque);
if (!ret)
VIR_FORCE_CLOSE(fd);
return ret;
}
qemuMonitorPtr
qemuMonitorOpenFD(virDomainObjPtr vm,
int sockfd,
bool json,
qemuMonitorCallbacksPtr cb,
void *opaque)
{
return qemuMonitorOpenInternal(vm, sockfd, true, json, cb, opaque);
}
/**
* qemuMonitorRegister:
* @mon: QEMU monitor
*
* Registers the monitor in the event loop. The caller has to hold the
* lock for @mon.
*
* Returns true in case of success, false otherwise
*/
bool
qemuMonitorRegister(qemuMonitorPtr mon)
{
virObjectRef(mon);
if ((mon->watch = virEventAddHandle(mon->fd,
VIR_EVENT_HANDLE_HANGUP |
VIR_EVENT_HANDLE_ERROR |
VIR_EVENT_HANDLE_READABLE,
qemuMonitorIO,
mon,
virObjectFreeCallback)) < 0) {
virObjectUnref(mon);
return false;
}
return true;
}
void
qemuMonitorUnregister(qemuMonitorPtr mon)
{
if (mon->watch) {
virEventRemoveHandle(mon->watch);
mon->watch = 0;
}
}
void
qemuMonitorClose(qemuMonitorPtr mon)
{
if (!mon)
return;
virObjectLock(mon);
PROBE(QEMU_MONITOR_CLOSE,
object: require maximal alignment in base class Recent changes to events (commit 8a29ffcf) resulted in new compile failures on some targets (such as ARM OMAP5): conf/domain_event.c: In function 'virDomainEventDispatchDefaultFunc': conf/domain_event.c:1198:30: error: cast increases required alignment of target type [-Werror=cast-align] conf/domain_event.c:1314:34: error: cast increases required alignment of target type [-Werror=cast-align] cc1: all warnings being treated as errors The error is due to alignment; the base class is merely aligned to the worst of 'int' and 'void*', while the child class must be aligned to a 'long long'. The solution is to include a 'long long' (and for good measure, a function pointer) in the base class to ensure correct alignment regardless of what a child class may add, but to wrap the inclusion in a union so as to not incur any wasted space. On a typical x86_64 platform, the base class remains 16 bytes; on i686, the base class remains 12 bytes; and on the impacted ARM platform, the base class grows from 12 bytes to 16 bytes due to the increase of alignment from 4 to 8 bytes. Reported by Michele Paolino and others. * src/util/virobject.h (_virObject): Use a union to ensure that subclasses never have stricter alignment than the parent. * src/util/virobject.c (virObjectNew, virObjectUnref) (virObjectRef): Adjust clients. * src/libvirt.c (virConnectRef, virDomainRef, virNetworkRef) (virInterfaceRef, virStoragePoolRef, virStorageVolRef) (virNodeDeviceRef, virSecretRef, virStreamRef, virNWFilterRef) (virDomainSnapshotRef): Likewise. * src/qemu/qemu_monitor.c (qemuMonitorOpenInternal) (qemuMonitorClose): Likewise. Signed-off-by: Eric Blake <eblake@redhat.com>
2013-12-12 16:01:15 -07:00
"mon=%p refs=%d", mon, mon->parent.parent.u.s.refs);
qemu: Fix two use-after-free situations There were multiple race conditions that could lead to segmentation faults. The first precondition for this is qemuProcessLaunch must fail sometime shortly after starting the new QEMU process. The second precondition for the segmentation faults is that the new QEMU process dies - or to be more precise the QEMU monitor has to be closed irregularly. If both happens during qemuProcessStart (starting a domain) there are race windows between the thread with the event loop (T1) and the thread that is starting the domain (T2). First segmentation fault scenario: If qemuProcessLaunch fails during qemuProcessStart the code branches to the 'stop' path where 'qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL)' will set the log function of the monitor to NULL (done in T2). In the meantime the event loop of T1 will wake up with an EOF event for the QEMU monitor because the QEMU process has died. The crash occurs if T1 has checked 'mon->logFunc != NULL' in qemuMonitorIO just before the logFunc was set to NULL by T2. If this situation occurs T1 will try to call mon->logFunc which leads to the segmentation fault. Solution: Require the monitor lock for setting the log function. Backtrace: 0 0x0000000000000000 in ?? () 1 0x000003ffe9e45316 in qemuMonitorIO (watch=<optimized out>, fd=<optimized out>, events=<optimized out>, opaque=0x3ffe08aa860) at ../../src/qemu/qemu_monitor.c:727 2 0x000003fffda2e1a4 in virEventPollDispatchHandles (nfds=<optimized out>, fds=0x2aa000fd980) at ../../src/util/vireventpoll.c:508 3 0x000003fffda2e398 in virEventPollRunOnce () at ../../src/util/vireventpoll.c:657 4 0x000003fffda2ca10 in virEventRunDefaultImpl () at ../../src/util/virevent.c:314 5 0x000003fffdba9366 in virNetDaemonRun (dmn=0x2aa000cc550) at ../../src/rpc/virnetdaemon.c:818 6 0x000002aa00024668 in main (argc=<optimized out>, argv=<optimized out>) at ../../daemon/libvirtd.c:1541 Second segmentation fault scenario: If qemuProcessLaunch fails it will unref the log context and with invoking qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL) qemuDomainLogContextFree() will be invoked. qemuDomainLogContextFree() invokes virNetClientClose() to close the client and cleans everything up (including unref of _virLogManager.client) when virNetClientClose() returns. When T1 is now trying to report 'qemu unexpectedly closed the monitor' libvirtd will crash because the client has already been freed. Solution: As the critical section in qemuMonitorIO is protected with the monitor lock we can use the same solution as proposed for the first segmentation fault. Backtrace: 0 virClassIsDerivedFrom (klass=0x3100979797979797, parent=0x2aa000d92f0) at ../../src/util/virobject.c:169 1 0x000003fffda659e6 in virObjectIsClass (anyobj=<optimized out>, klass=<optimized out>) at ../../src/util/virobject.c:365 2 0x000003fffda65a24 in virObjectLock (anyobj=0x3ffe08c1db0) at ../../src/util/virobject.c:317 3 0x000003fffdba4688 in virNetClientIOEventLoop (client=client@entry=0x3ffe08c1db0, thiscall=thiscall@entry=0x2aa000fbfa0) at ../../src/rpc/virnetclient.c:1668 4 0x000003fffdba4b4c in virNetClientIO (client=client@entry=0x3ffe08c1db0, thiscall=0x2aa000fbfa0) at ../../src/rpc/virnetclient.c:1944 5 0x000003fffdba4d42 in virNetClientSendInternal (client=client@entry=0x3ffe08c1db0, msg=msg@entry=0x2aa000cc710, expectReply=expectReply@entry=true, nonBlock=nonBlock@entry=false) at ../../src/rpc/virnetclient.c:2116 6 0x000003fffdba6268 in virNetClientSendWithReply (client=0x3ffe08c1db0, msg=0x2aa000cc710) at ../../src/rpc/virnetclient.c:2144 7 0x000003fffdba6e8e in virNetClientProgramCall (prog=0x3ffe08c1120, client=<optimized out>, serial=<optimized out>, proc=<optimized out>, noutfds=<optimized out>, outfds=0x0, ninfds=0x0, infds=0x0, args_filter=0x3fffdb64440 <xdr_virLogManagerProtocolDomainReadLogFileArgs>, args=0x3ffffffe010, ret_filter=0x3fffdb644c0 <xdr_virLogManagerProtocolDomainReadLogFileRet>, ret=0x3ffffffe008) at ../../src/rpc/virnetclientprogram.c:329 8 0x000003fffdb64042 in virLogManagerDomainReadLogFile (mgr=<optimized out>, path=<optimized out>, inode=<optimized out>, offset=<optimized out>, maxlen=<optimized out>, flags=0) at ../../src/logging/log_manager.c:272 9 0x000003ffe9e0315c in qemuDomainLogContextRead (ctxt=0x3ffe08c2980, msg=0x3ffffffe1c0) at ../../src/qemu/qemu_domain.c:4422 10 0x000003ffe9e280a8 in qemuProcessReadLog (logCtxt=<optimized out>, msg=msg@entry=0x3ffffffe288) at ../../src/qemu/qemu_process.c:1800 11 0x000003ffe9e28206 in qemuProcessReportLogError (logCtxt=<optimized out>, msgprefix=0x3ffe9ec276a "qemu unexpectedly closed the monitor") at ../../src/qemu/qemu_process.c:1836 12 0x000003ffe9e28306 in qemuProcessMonitorReportLogError (mon=mon@entry=0x3ffe085cf10, msg=<optimized out>, opaque=<optimized out>) at ../../src/qemu/qemu_process.c:1856 13 0x000003ffe9e452b6 in qemuMonitorIO (watch=<optimized out>, fd=<optimized out>, events=<optimized out>, opaque=0x3ffe085cf10) at ../../src/qemu/qemu_monitor.c:726 14 0x000003fffda2e1a4 in virEventPollDispatchHandles (nfds=<optimized out>, fds=0x2aa000fd980) at ../../src/util/vireventpoll.c:508 15 0x000003fffda2e398 in virEventPollRunOnce () at ../../src/util/vireventpoll.c:657 16 0x000003fffda2ca10 in virEventRunDefaultImpl () at ../../src/util/virevent.c:314 17 0x000003fffdba9366 in virNetDaemonRun (dmn=0x2aa000cc550) at ../../src/rpc/virnetdaemon.c:818 18 0x000002aa00024668 in main (argc=<optimized out>, argv=<optimized out>) at ../../daemon/libvirtd.c:1541 Other code parts where the same problem was possible to occur are fixed as well (qemuMigrationFinish, qemuProcessStart, and qemuDomainSaveImageStartVM). Signed-off-by: Marc Hartmayer <mhartmay@linux.vnet.ibm.com> Reported-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
2017-04-03 10:24:35 +02:00
qemuMonitorSetDomainLogLocked(mon, NULL, NULL, NULL);
if (mon->fd >= 0) {
qemuMonitorUnregister(mon);
VIR_FORCE_CLOSE(mon->fd);
}
/* In case another thread is waiting for its monitor command to be
* processed, we need to wake it up with appropriate error set.
*/
if (mon->msg) {
if (mon->lastError.code == VIR_ERR_OK) {
virErrorPtr err = virSaveLastError();
virReportError(VIR_ERR_OPERATION_FAILED, "%s",
_("QEMU monitor was closed"));
virCopyLastError(&mon->lastError);
if (err) {
virSetError(err);
virFreeError(err);
} else {
virResetLastError();
}
}
mon->msg->finished = 1;
virCondSignal(&mon->notify);
}
/* Propagate existing monitor error in case the current thread has no
* error set.
*/
if (mon->lastError.code != VIR_ERR_OK && virGetLastErrorCode() == VIR_ERR_OK)
virSetError(&mon->lastError);
virObjectUnlock(mon);
virObjectUnref(mon);
}
char *
qemuMonitorNextCommandID(qemuMonitorPtr mon)
{
char *id;
ignore_value(virAsprintf(&id, "libvirt-%d", ++mon->nextSerial));
return id;
}
/* for use only in the test suite */
void
qemuMonitorResetCommandID(qemuMonitorPtr mon)
{
mon->nextSerial = 0;
}
int
qemuMonitorSend(qemuMonitorPtr mon,
qemuMonitorMessagePtr msg)
{
int ret = -1;
/* Check whether qemu quit unexpectedly */
if (mon->lastError.code != VIR_ERR_OK) {
VIR_DEBUG("Attempt to send command while error is set %s",
NULLSTR(mon->lastError.message));
virSetError(&mon->lastError);
return -1;
}
mon->msg = msg;
qemuMonitorUpdateWatch(mon);
PROBE(QEMU_MONITOR_SEND_MSG,
"mon=%p msg=%s fd=%d",
mon, mon->msg->txBuffer, mon->msg->txFD);
while (!mon->msg->finished) {
if (virCondWait(&mon->notify, &mon->parent.lock) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Unable to wait on monitor condition"));
goto cleanup;
}
}
if (mon->lastError.code != VIR_ERR_OK) {
VIR_DEBUG("Send command resulted in error %s",
NULLSTR(mon->lastError.message));
virSetError(&mon->lastError);
goto cleanup;
}
ret = 0;
cleanup:
mon->msg = NULL;
qemuMonitorUpdateWatch(mon);
return ret;
}
/**
* This function returns a new virError object; the caller is responsible
* for freeing it.
*/
virErrorPtr
qemuMonitorLastError(qemuMonitorPtr mon)
{
if (mon->lastError.code == VIR_ERR_OK)
return NULL;
return virErrorCopyNew(&mon->lastError);
}
virJSONValuePtr
qemuMonitorGetOptions(qemuMonitorPtr mon)
{
return mon->options;
}
void
qemuMonitorSetOptions(qemuMonitorPtr mon, virJSONValuePtr options)
{
mon->options = options;
}
/**
* Search the qom objects for the balloon driver object by its known names
* of "virtio-balloon-pci" or "virtio-balloon-ccw". The entry for the driver
* will be found by using function "qemuMonitorJSONFindLinkPath".
*
* Once found, check the entry to ensure it has the correct property listed.
* If it does not, then obtaining statistics from QEMU will not be possible.
* This feature was added to QEMU 1.5.
*/
static void
qemuMonitorInitBalloonObjectPath(qemuMonitorPtr mon,
virDomainMemballoonDefPtr balloon)
{
ssize_t i, nprops = 0;
char *path = NULL;
const char *name;
qemuMonitorJSONListPathPtr *bprops = NULL;
if (mon->balloonpath) {
return;
} else if (mon->ballooninit) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Cannot determine balloon device path"));
return;
}
mon->ballooninit = true;
switch (balloon->info.type) {
case VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI:
name = "virtio-balloon-pci";
break;
case VIR_DOMAIN_DEVICE_ADDRESS_TYPE_CCW:
name = "virtio-balloon-ccw";
break;
default:
return;
}
if (qemuMonitorJSONFindLinkPath(mon, name, balloon->info.alias, &path) < 0)
return;
nprops = qemuMonitorJSONGetObjectListPaths(mon, path, &bprops);
if (nprops < 0)
goto cleanup;
for (i = 0; i < nprops; i++) {
if (STREQ(bprops[i]->name, "guest-stats-polling-interval")) {
VIR_DEBUG("Found Balloon Object Path %s", path);
mon->balloonpath = path;
path = NULL;
goto cleanup;
}
}
/* If we get here, we found the path, but not the property */
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Property 'guest-stats-polling-interval' "
"not found on memory balloon driver."));
cleanup:
for (i = 0; i < nprops; i++)
qemuMonitorJSONListPathFree(bprops[i]);
VIR_FREE(bprops);
VIR_FREE(path);
return;
}
/**
* To update video memory size in status XML we need to load correct values from
* QEMU. This is supported only with JSON monitor.
*
* Returns 0 on success, -1 on failure and sets proper error message.
*/
int
qemuMonitorUpdateVideoMemorySize(qemuMonitorPtr mon,
virDomainVideoDefPtr video,
const char *videoName)
{
int ret = -1;
char *path = NULL;
QEMU_CHECK_MONITOR(mon);
if (mon->json) {
ret = qemuMonitorJSONFindLinkPath(mon, videoName,
video->info.alias, &path);
if (ret < 0) {
if (ret == -2)
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Failed to find QOM Object path for "
"device '%s'"), videoName);
return -1;
}
ret = qemuMonitorJSONUpdateVideoMemorySize(mon, video, path);
VIR_FREE(path);
return ret;
}
return 0;
}
/**
* To update video vram64 size in status XML we need to load correct value from
* QEMU. This is supported only with JSON monitor.
*
* Returns 0 on success, -1 on failure and sets proper error message.
*/
int
qemuMonitorUpdateVideoVram64Size(qemuMonitorPtr mon,
virDomainVideoDefPtr video,
const char *videoName)
{
int ret = -1;
char *path = NULL;
QEMU_CHECK_MONITOR(mon);
if (mon->json) {
ret = qemuMonitorJSONFindLinkPath(mon, videoName,
video->info.alias, &path);
if (ret < 0) {
if (ret == -2)
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Failed to find QOM Object path for "
"device '%s'"), videoName);
return -1;
}
ret = qemuMonitorJSONUpdateVideoVram64Size(mon, video, path);
VIR_FREE(path);
return ret;
}
return 0;
}
int
qemuMonitorHMPCommandWithFd(qemuMonitorPtr mon,
const char *cmd,
int scm_fd,
char **reply)
{
char *json_cmd = NULL;
int ret = -1;
QEMU_CHECK_MONITOR(mon);
/* hack to avoid complicating each call to text monitor functions */
json_cmd = qemuMonitorUnescapeArg(cmd);
if (!json_cmd) {
VIR_DEBUG("Could not unescape command: %s", cmd);
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Unable to unescape command"));
goto cleanup;
}
ret = qemuMonitorJSONHumanCommandWithFd(mon, json_cmd, scm_fd, reply);
cleanup:
VIR_FREE(json_cmd);
return ret;
}
/* Ensure proper locking around callbacks. */
#define QEMU_MONITOR_CALLBACK(mon, ret, callback, ...) \
do { \
virObjectRef(mon); \
virObjectUnlock(mon); \
if ((mon)->cb && (mon)->cb->callback) \
(ret) = (mon)->cb->callback(mon, __VA_ARGS__, \
(mon)->callbackOpaque); \
virObjectLock(mon); \
virObjectUnref(mon); \
} while (0)
int
qemuMonitorEmitEvent(qemuMonitorPtr mon, const char *event,
long long seconds, unsigned int micros,
const char *details)
{
int ret = -1;
VIR_DEBUG("mon=%p event=%s", mon, event);
QEMU_MONITOR_CALLBACK(mon, ret, domainEvent, mon->vm, event, seconds,
micros, details);
return ret;
}
int
qemuMonitorEmitShutdown(qemuMonitorPtr mon, virTristateBool guest)
{
int ret = -1;
VIR_DEBUG("mon=%p guest=%u", mon, guest);
QEMU_MONITOR_CALLBACK(mon, ret, domainShutdown, mon->vm, guest);
return ret;
}
int
qemuMonitorEmitReset(qemuMonitorPtr mon)
{
int ret = -1;
VIR_DEBUG("mon=%p", mon);
QEMU_MONITOR_CALLBACK(mon, ret, domainReset, mon->vm);
return ret;
}
int
qemuMonitorEmitPowerdown(qemuMonitorPtr mon)
{
int ret = -1;
VIR_DEBUG("mon=%p", mon);
QEMU_MONITOR_CALLBACK(mon, ret, domainPowerdown, mon->vm);
return ret;
}
int
qemuMonitorEmitStop(qemuMonitorPtr mon)
{
int ret = -1;
VIR_DEBUG("mon=%p", mon);
QEMU_MONITOR_CALLBACK(mon, ret, domainStop, mon->vm);
return ret;
}
int
qemuMonitorEmitResume(qemuMonitorPtr mon)
{
int ret = -1;
VIR_DEBUG("mon=%p", mon);
QEMU_MONITOR_CALLBACK(mon, ret, domainResume, mon->vm);
return ret;
}
int
qemuMonitorEmitGuestPanic(qemuMonitorPtr mon,
qemuMonitorEventPanicInfoPtr info)
{
int ret = -1;
VIR_DEBUG("mon=%p", mon);
QEMU_MONITOR_CALLBACK(mon, ret, domainGuestPanic, mon->vm, info);
return ret;
}
int
qemuMonitorEmitRTCChange(qemuMonitorPtr mon, long long offset)
{
int ret = -1;
VIR_DEBUG("mon=%p", mon);
QEMU_MONITOR_CALLBACK(mon, ret, domainRTCChange, mon->vm, offset);
return ret;
}
int
qemuMonitorEmitWatchdog(qemuMonitorPtr mon, int action)
{
int ret = -1;
VIR_DEBUG("mon=%p", mon);
QEMU_MONITOR_CALLBACK(mon, ret, domainWatchdog, mon->vm, action);
return ret;
}
int
qemuMonitorEmitIOError(qemuMonitorPtr mon,
const char *diskAlias,
const char *nodename,
int action,
const char *reason)
Add support for an explicit IO error event This introduces a new event type VIR_DOMAIN_EVENT_ID_IO_ERROR This event includes the action that is about to be taken as a result of the watchdog triggering typedef enum { VIR_DOMAIN_EVENT_IO_ERROR_NONE = 0, VIR_DOMAIN_EVENT_IO_ERROR_PAUSE, VIR_DOMAIN_EVENT_IO_ERROR_REPORT, } virDomainEventIOErrorAction; In addition it has the source path of the disk that had the error and its unique device alias. It does not include the target device name (/dev/sda), since this would preclude triggering IO errors from other file backed devices (eg serial ports connected to a file) Thus there is a new callback definition for this event type typedef void (*virConnectDomainEventIOErrorCallback)(virConnectPtr conn, virDomainPtr dom, const char *srcPath, const char *devAlias, int action, void *opaque); This is currently wired up to the QEMU block IO error events * daemon/remote.c: Dispatch IO error events to client * examples/domain-events/events-c/event-test.c: Watch for IO error events * include/libvirt/libvirt.h.in: Define new IO error event ID and callback signature * src/conf/domain_event.c, src/conf/domain_event.h, src/libvirt_private.syms: Extend API to handle IO error events * src/qemu/qemu_driver.c: Connect to the QEMU monitor event for block IO errors and emit a libvirt IO error event * src/remote/remote_driver.c: Receive and dispatch IO error events to application * src/remote/remote_protocol.x: Wire protocol definition for IO error events * src/qemu/qemu_monitor.c, src/qemu/qemu_monitor.h, src/qemu/qemu_monitor_json.c: Watch for BLOCK_IO_ERROR event from QEMU monitor
2010-03-18 19:37:44 +00:00
{
int ret = -1;
VIR_DEBUG("mon=%p", mon);
QEMU_MONITOR_CALLBACK(mon, ret, domainIOError, mon->vm,
diskAlias, nodename, action, reason);
Add support for an explicit IO error event This introduces a new event type VIR_DOMAIN_EVENT_ID_IO_ERROR This event includes the action that is about to be taken as a result of the watchdog triggering typedef enum { VIR_DOMAIN_EVENT_IO_ERROR_NONE = 0, VIR_DOMAIN_EVENT_IO_ERROR_PAUSE, VIR_DOMAIN_EVENT_IO_ERROR_REPORT, } virDomainEventIOErrorAction; In addition it has the source path of the disk that had the error and its unique device alias. It does not include the target device name (/dev/sda), since this would preclude triggering IO errors from other file backed devices (eg serial ports connected to a file) Thus there is a new callback definition for this event type typedef void (*virConnectDomainEventIOErrorCallback)(virConnectPtr conn, virDomainPtr dom, const char *srcPath, const char *devAlias, int action, void *opaque); This is currently wired up to the QEMU block IO error events * daemon/remote.c: Dispatch IO error events to client * examples/domain-events/events-c/event-test.c: Watch for IO error events * include/libvirt/libvirt.h.in: Define new IO error event ID and callback signature * src/conf/domain_event.c, src/conf/domain_event.h, src/libvirt_private.syms: Extend API to handle IO error events * src/qemu/qemu_driver.c: Connect to the QEMU monitor event for block IO errors and emit a libvirt IO error event * src/remote/remote_driver.c: Receive and dispatch IO error events to application * src/remote/remote_protocol.x: Wire protocol definition for IO error events * src/qemu/qemu_monitor.c, src/qemu/qemu_monitor.h, src/qemu/qemu_monitor_json.c: Watch for BLOCK_IO_ERROR event from QEMU monitor
2010-03-18 19:37:44 +00:00
return ret;
}
int
qemuMonitorEmitGraphics(qemuMonitorPtr mon,
int phase,
int localFamily,
const char *localNode,
const char *localService,
int remoteFamily,
const char *remoteNode,
const char *remoteService,
const char *authScheme,
const char *x509dname,
const char *saslUsername)
Add domain events for graphics network clients This introduces a new event type VIR_DOMAIN_EVENT_ID_GRAPHICS The same event can be emitted in 3 scenarios typedef enum { VIR_DOMAIN_EVENT_GRAPHICS_CONNECT = 0, VIR_DOMAIN_EVENT_GRAPHICS_INITIALIZE, VIR_DOMAIN_EVENT_GRAPHICS_DISCONNECT, } virDomainEventGraphicsPhase; Connect/disconnect are triggered at socket accept/close. The initialize phase is immediately after the protocol setup and authentication has completed. ie when the client is authorized and about to start interacting with the graphical desktop This event comes with *a lot* of potential information - IP address, port & address family of client - IP address, port & address family of server - Authentication scheme (arbitrary string) - Authenticated subject identity. A subject may have multiple identities with some authentication schemes. For example, vencrypt+sasl results in a x509dname and saslUsername identities. This results in a very complicated callback :-( typedef enum { VIR_DOMAIN_EVENT_GRAPHICS_ADDRESS_IPV4, VIR_DOMAIN_EVENT_GRAPHICS_ADDRESS_IPV6, } virDomainEventGraphicsAddressType; struct _virDomainEventGraphicsAddress { int family; const char *node; const char *service; }; typedef struct _virDomainEventGraphicsAddress virDomainEventGraphicsAddress; typedef virDomainEventGraphicsAddress *virDomainEventGraphicsAddressPtr; struct _virDomainEventGraphicsSubject { int nidentity; struct { const char *type; const char *name; } *identities; }; typedef struct _virDomainEventGraphicsSubject virDomainEventGraphicsSubject; typedef virDomainEventGraphicsSubject *virDomainEventGraphicsSubjectPtr; typedef void (*virConnectDomainEventGraphicsCallback)(virConnectPtr conn, virDomainPtr dom, int phase, virDomainEventGraphicsAddressPtr local, virDomainEventGraphicsAddressPtr remote, const char *authScheme, virDomainEventGraphicsSubjectPtr subject, void *opaque); The wire protocol is similarly complex struct remote_domain_event_graphics_address { int family; remote_nonnull_string node; remote_nonnull_string service; }; const REMOTE_DOMAIN_EVENT_GRAPHICS_IDENTITY_MAX = 20; struct remote_domain_event_graphics_identity { remote_nonnull_string type; remote_nonnull_string name; }; struct remote_domain_event_graphics_msg { remote_nonnull_domain dom; int phase; remote_domain_event_graphics_address local; remote_domain_event_graphics_address remote; remote_nonnull_string authScheme; remote_domain_event_graphics_identity subject<REMOTE_DOMAIN_EVENT_GRAPHICS_IDENTITY_MAX>; }; This is currently implemented in QEMU for the VNC graphics protocol, but designed to be usable with SPICE graphics in the future too. * daemon/remote.c: Dispatch graphics events to client * examples/domain-events/events-c/event-test.c: Watch for graphics events * include/libvirt/libvirt.h.in: Define new graphics event ID and callback signature * src/conf/domain_event.c, src/conf/domain_event.h, src/libvirt_private.syms: Extend API to handle graphics events * src/qemu/qemu_driver.c: Connect to the QEMU monitor event for VNC events and emit a libvirt graphics event * src/remote/remote_driver.c: Receive and dispatch graphics events to application * src/remote/remote_protocol.x: Wire protocol definition for graphics events * src/qemu/qemu_monitor.c, src/qemu/qemu_monitor.h, src/qemu/qemu_monitor_json.c: Watch for VNC_CONNECTED, VNC_INITIALIZED & VNC_DISCONNETED events from QEMU monitor
2010-03-19 13:27:45 +00:00
{
int ret = -1;
VIR_DEBUG("mon=%p", mon);
QEMU_MONITOR_CALLBACK(mon, ret, domainGraphics, mon->vm, phase,
localFamily, localNode, localService,
remoteFamily, remoteNode, remoteService,
authScheme, x509dname, saslUsername);
Add domain events for graphics network clients This introduces a new event type VIR_DOMAIN_EVENT_ID_GRAPHICS The same event can be emitted in 3 scenarios typedef enum { VIR_DOMAIN_EVENT_GRAPHICS_CONNECT = 0, VIR_DOMAIN_EVENT_GRAPHICS_INITIALIZE, VIR_DOMAIN_EVENT_GRAPHICS_DISCONNECT, } virDomainEventGraphicsPhase; Connect/disconnect are triggered at socket accept/close. The initialize phase is immediately after the protocol setup and authentication has completed. ie when the client is authorized and about to start interacting with the graphical desktop This event comes with *a lot* of potential information - IP address, port & address family of client - IP address, port & address family of server - Authentication scheme (arbitrary string) - Authenticated subject identity. A subject may have multiple identities with some authentication schemes. For example, vencrypt+sasl results in a x509dname and saslUsername identities. This results in a very complicated callback :-( typedef enum { VIR_DOMAIN_EVENT_GRAPHICS_ADDRESS_IPV4, VIR_DOMAIN_EVENT_GRAPHICS_ADDRESS_IPV6, } virDomainEventGraphicsAddressType; struct _virDomainEventGraphicsAddress { int family; const char *node; const char *service; }; typedef struct _virDomainEventGraphicsAddress virDomainEventGraphicsAddress; typedef virDomainEventGraphicsAddress *virDomainEventGraphicsAddressPtr; struct _virDomainEventGraphicsSubject { int nidentity; struct { const char *type; const char *name; } *identities; }; typedef struct _virDomainEventGraphicsSubject virDomainEventGraphicsSubject; typedef virDomainEventGraphicsSubject *virDomainEventGraphicsSubjectPtr; typedef void (*virConnectDomainEventGraphicsCallback)(virConnectPtr conn, virDomainPtr dom, int phase, virDomainEventGraphicsAddressPtr local, virDomainEventGraphicsAddressPtr remote, const char *authScheme, virDomainEventGraphicsSubjectPtr subject, void *opaque); The wire protocol is similarly complex struct remote_domain_event_graphics_address { int family; remote_nonnull_string node; remote_nonnull_string service; }; const REMOTE_DOMAIN_EVENT_GRAPHICS_IDENTITY_MAX = 20; struct remote_domain_event_graphics_identity { remote_nonnull_string type; remote_nonnull_string name; }; struct remote_domain_event_graphics_msg { remote_nonnull_domain dom; int phase; remote_domain_event_graphics_address local; remote_domain_event_graphics_address remote; remote_nonnull_string authScheme; remote_domain_event_graphics_identity subject<REMOTE_DOMAIN_EVENT_GRAPHICS_IDENTITY_MAX>; }; This is currently implemented in QEMU for the VNC graphics protocol, but designed to be usable with SPICE graphics in the future too. * daemon/remote.c: Dispatch graphics events to client * examples/domain-events/events-c/event-test.c: Watch for graphics events * include/libvirt/libvirt.h.in: Define new graphics event ID and callback signature * src/conf/domain_event.c, src/conf/domain_event.h, src/libvirt_private.syms: Extend API to handle graphics events * src/qemu/qemu_driver.c: Connect to the QEMU monitor event for VNC events and emit a libvirt graphics event * src/remote/remote_driver.c: Receive and dispatch graphics events to application * src/remote/remote_protocol.x: Wire protocol definition for graphics events * src/qemu/qemu_monitor.c, src/qemu/qemu_monitor.h, src/qemu/qemu_monitor_json.c: Watch for VNC_CONNECTED, VNC_INITIALIZED & VNC_DISCONNETED events from QEMU monitor
2010-03-19 13:27:45 +00:00
return ret;
}
int
qemuMonitorEmitTrayChange(qemuMonitorPtr mon,
const char *devAlias,
const char *devid,
int reason)
{
int ret = -1;
VIR_DEBUG("mon=%p", mon);
QEMU_MONITOR_CALLBACK(mon, ret, domainTrayChange, mon->vm,
devAlias, devid, reason);
return ret;
}
int
qemuMonitorEmitPMWakeup(qemuMonitorPtr mon)
{
int ret = -1;
VIR_DEBUG("mon=%p", mon);
QEMU_MONITOR_CALLBACK(mon, ret, domainPMWakeup, mon->vm);
return ret;
}
int
qemuMonitorEmitPMSuspend(qemuMonitorPtr mon)
{
int ret = -1;
VIR_DEBUG("mon=%p", mon);
QEMU_MONITOR_CALLBACK(mon, ret, domainPMSuspend, mon->vm);
return ret;
}
int
qemuMonitorEmitPMSuspendDisk(qemuMonitorPtr mon)
{
int ret = -1;
VIR_DEBUG("mon=%p", mon);
QEMU_MONITOR_CALLBACK(mon, ret, domainPMSuspendDisk, mon->vm);
return ret;
}
int
qemuMonitorEmitBlockJob(qemuMonitorPtr mon,
const char *diskAlias,
int type,
int status,
const char *error)
{
int ret = -1;
VIR_DEBUG("mon=%p", mon);
QEMU_MONITOR_CALLBACK(mon, ret, domainBlockJob, mon->vm,
diskAlias, type, status, error);
return ret;
}
Add domain events for graphics network clients This introduces a new event type VIR_DOMAIN_EVENT_ID_GRAPHICS The same event can be emitted in 3 scenarios typedef enum { VIR_DOMAIN_EVENT_GRAPHICS_CONNECT = 0, VIR_DOMAIN_EVENT_GRAPHICS_INITIALIZE, VIR_DOMAIN_EVENT_GRAPHICS_DISCONNECT, } virDomainEventGraphicsPhase; Connect/disconnect are triggered at socket accept/close. The initialize phase is immediately after the protocol setup and authentication has completed. ie when the client is authorized and about to start interacting with the graphical desktop This event comes with *a lot* of potential information - IP address, port & address family of client - IP address, port & address family of server - Authentication scheme (arbitrary string) - Authenticated subject identity. A subject may have multiple identities with some authentication schemes. For example, vencrypt+sasl results in a x509dname and saslUsername identities. This results in a very complicated callback :-( typedef enum { VIR_DOMAIN_EVENT_GRAPHICS_ADDRESS_IPV4, VIR_DOMAIN_EVENT_GRAPHICS_ADDRESS_IPV6, } virDomainEventGraphicsAddressType; struct _virDomainEventGraphicsAddress { int family; const char *node; const char *service; }; typedef struct _virDomainEventGraphicsAddress virDomainEventGraphicsAddress; typedef virDomainEventGraphicsAddress *virDomainEventGraphicsAddressPtr; struct _virDomainEventGraphicsSubject { int nidentity; struct { const char *type; const char *name; } *identities; }; typedef struct _virDomainEventGraphicsSubject virDomainEventGraphicsSubject; typedef virDomainEventGraphicsSubject *virDomainEventGraphicsSubjectPtr; typedef void (*virConnectDomainEventGraphicsCallback)(virConnectPtr conn, virDomainPtr dom, int phase, virDomainEventGraphicsAddressPtr local, virDomainEventGraphicsAddressPtr remote, const char *authScheme, virDomainEventGraphicsSubjectPtr subject, void *opaque); The wire protocol is similarly complex struct remote_domain_event_graphics_address { int family; remote_nonnull_string node; remote_nonnull_string service; }; const REMOTE_DOMAIN_EVENT_GRAPHICS_IDENTITY_MAX = 20; struct remote_domain_event_graphics_identity { remote_nonnull_string type; remote_nonnull_string name; }; struct remote_domain_event_graphics_msg { remote_nonnull_domain dom; int phase; remote_domain_event_graphics_address local; remote_domain_event_graphics_address remote; remote_nonnull_string authScheme; remote_domain_event_graphics_identity subject<REMOTE_DOMAIN_EVENT_GRAPHICS_IDENTITY_MAX>; }; This is currently implemented in QEMU for the VNC graphics protocol, but designed to be usable with SPICE graphics in the future too. * daemon/remote.c: Dispatch graphics events to client * examples/domain-events/events-c/event-test.c: Watch for graphics events * include/libvirt/libvirt.h.in: Define new graphics event ID and callback signature * src/conf/domain_event.c, src/conf/domain_event.h, src/libvirt_private.syms: Extend API to handle graphics events * src/qemu/qemu_driver.c: Connect to the QEMU monitor event for VNC events and emit a libvirt graphics event * src/remote/remote_driver.c: Receive and dispatch graphics events to application * src/remote/remote_protocol.x: Wire protocol definition for graphics events * src/qemu/qemu_monitor.c, src/qemu/qemu_monitor.h, src/qemu/qemu_monitor_json.c: Watch for VNC_CONNECTED, VNC_INITIALIZED & VNC_DISCONNETED events from QEMU monitor
2010-03-19 13:27:45 +00:00
int
qemuMonitorEmitBalloonChange(qemuMonitorPtr mon,
unsigned long long actual)
{
int ret = -1;
VIR_DEBUG("mon=%p", mon);
QEMU_MONITOR_CALLBACK(mon, ret, domainBalloonChange, mon->vm, actual);
return ret;
}
Add domain events for graphics network clients This introduces a new event type VIR_DOMAIN_EVENT_ID_GRAPHICS The same event can be emitted in 3 scenarios typedef enum { VIR_DOMAIN_EVENT_GRAPHICS_CONNECT = 0, VIR_DOMAIN_EVENT_GRAPHICS_INITIALIZE, VIR_DOMAIN_EVENT_GRAPHICS_DISCONNECT, } virDomainEventGraphicsPhase; Connect/disconnect are triggered at socket accept/close. The initialize phase is immediately after the protocol setup and authentication has completed. ie when the client is authorized and about to start interacting with the graphical desktop This event comes with *a lot* of potential information - IP address, port & address family of client - IP address, port & address family of server - Authentication scheme (arbitrary string) - Authenticated subject identity. A subject may have multiple identities with some authentication schemes. For example, vencrypt+sasl results in a x509dname and saslUsername identities. This results in a very complicated callback :-( typedef enum { VIR_DOMAIN_EVENT_GRAPHICS_ADDRESS_IPV4, VIR_DOMAIN_EVENT_GRAPHICS_ADDRESS_IPV6, } virDomainEventGraphicsAddressType; struct _virDomainEventGraphicsAddress { int family; const char *node; const char *service; }; typedef struct _virDomainEventGraphicsAddress virDomainEventGraphicsAddress; typedef virDomainEventGraphicsAddress *virDomainEventGraphicsAddressPtr; struct _virDomainEventGraphicsSubject { int nidentity; struct { const char *type; const char *name; } *identities; }; typedef struct _virDomainEventGraphicsSubject virDomainEventGraphicsSubject; typedef virDomainEventGraphicsSubject *virDomainEventGraphicsSubjectPtr; typedef void (*virConnectDomainEventGraphicsCallback)(virConnectPtr conn, virDomainPtr dom, int phase, virDomainEventGraphicsAddressPtr local, virDomainEventGraphicsAddressPtr remote, const char *authScheme, virDomainEventGraphicsSubjectPtr subject, void *opaque); The wire protocol is similarly complex struct remote_domain_event_graphics_address { int family; remote_nonnull_string node; remote_nonnull_string service; }; const REMOTE_DOMAIN_EVENT_GRAPHICS_IDENTITY_MAX = 20; struct remote_domain_event_graphics_identity { remote_nonnull_string type; remote_nonnull_string name; }; struct remote_domain_event_graphics_msg { remote_nonnull_domain dom; int phase; remote_domain_event_graphics_address local; remote_domain_event_graphics_address remote; remote_nonnull_string authScheme; remote_domain_event_graphics_identity subject<REMOTE_DOMAIN_EVENT_GRAPHICS_IDENTITY_MAX>; }; This is currently implemented in QEMU for the VNC graphics protocol, but designed to be usable with SPICE graphics in the future too. * daemon/remote.c: Dispatch graphics events to client * examples/domain-events/events-c/event-test.c: Watch for graphics events * include/libvirt/libvirt.h.in: Define new graphics event ID and callback signature * src/conf/domain_event.c, src/conf/domain_event.h, src/libvirt_private.syms: Extend API to handle graphics events * src/qemu/qemu_driver.c: Connect to the QEMU monitor event for VNC events and emit a libvirt graphics event * src/remote/remote_driver.c: Receive and dispatch graphics events to application * src/remote/remote_protocol.x: Wire protocol definition for graphics events * src/qemu/qemu_monitor.c, src/qemu/qemu_monitor.h, src/qemu/qemu_monitor_json.c: Watch for VNC_CONNECTED, VNC_INITIALIZED & VNC_DISCONNETED events from QEMU monitor
2010-03-19 13:27:45 +00:00
int
qemuMonitorEmitDeviceDeleted(qemuMonitorPtr mon,
const char *devAlias)
{
int ret = -1;
VIR_DEBUG("mon=%p", mon);
QEMU_MONITOR_CALLBACK(mon, ret, domainDeviceDeleted, mon->vm, devAlias);
return ret;
}
int
qemuMonitorEmitNicRxFilterChanged(qemuMonitorPtr mon,
const char *devAlias)
{
int ret = -1;
VIR_DEBUG("mon=%p", mon);
QEMU_MONITOR_CALLBACK(mon, ret, domainNicRxFilterChanged, mon->vm, devAlias);
return ret;
}
int
qemuMonitorEmitSerialChange(qemuMonitorPtr mon,
const char *devAlias,
bool connected)
{
int ret = -1;
VIR_DEBUG("mon=%p, devAlias='%s', connected=%d", mon, devAlias, connected);
QEMU_MONITOR_CALLBACK(mon, ret, domainSerialChange, mon->vm, devAlias, connected);
return ret;
}
int
qemuMonitorEmitSpiceMigrated(qemuMonitorPtr mon)
{
int ret = -1;
VIR_DEBUG("mon=%p", mon);
QEMU_MONITOR_CALLBACK(mon, ret, domainSpiceMigrated, mon->vm);
return ret;
}
int
qemuMonitorEmitMigrationStatus(qemuMonitorPtr mon,
int status)
{
int ret = -1;
VIR_DEBUG("mon=%p, status=%s",
mon, NULLSTR(qemuMonitorMigrationStatusTypeToString(status)));
QEMU_MONITOR_CALLBACK(mon, ret, domainMigrationStatus, mon->vm, status);
return ret;
}
int
qemuMonitorEmitMigrationPass(qemuMonitorPtr mon,
int pass)
{
int ret = -1;
VIR_DEBUG("mon=%p, pass=%d", mon, pass);
QEMU_MONITOR_CALLBACK(mon, ret, domainMigrationPass, mon->vm, pass);
return ret;
}
int
qemuMonitorEmitAcpiOstInfo(qemuMonitorPtr mon,
const char *alias,
const char *slotType,
const char *slot,
unsigned int source,
unsigned int status)
{
int ret = -1;
VIR_DEBUG("mon=%p, alias='%s', slotType='%s', slot='%s', source='%u' status=%u",
mon, NULLSTR(alias), slotType, slot, source, status);
QEMU_MONITOR_CALLBACK(mon, ret, domainAcpiOstInfo, mon->vm,
alias, slotType, slot, source, status);
return ret;
}
int
qemuMonitorEmitBlockThreshold(qemuMonitorPtr mon,
const char *nodename,
unsigned long long threshold,
unsigned long long excess)
{
int ret = -1;
VIR_DEBUG("mon=%p, node-name='%s', threshold='%llu', excess='%llu'",
mon, nodename, threshold, excess);
QEMU_MONITOR_CALLBACK(mon, ret, domainBlockThreshold, mon->vm,
nodename, threshold, excess);
return ret;
}
int
qemuMonitorEmitDumpCompleted(qemuMonitorPtr mon,
int status,
qemuMonitorDumpStatsPtr stats,
const char *error)
{
int ret = -1;
VIR_DEBUG("mon=%p", mon);
QEMU_MONITOR_CALLBACK(mon, ret, domainDumpCompleted, mon->vm,
status, stats, error);
return ret;
}
int
qemuMonitorEmitPRManagerStatusChanged(qemuMonitorPtr mon,
const char *prManager,
bool connected)
{
int ret = -1;
VIR_DEBUG("mon=%p, prManager='%s', connected=%d", mon, prManager, connected);
QEMU_MONITOR_CALLBACK(mon, ret, domainPRManagerStatusChanged,
mon->vm, prManager, connected);
return ret;
}
int
qemuMonitorEmitRdmaGidStatusChanged(qemuMonitorPtr mon,
const char *netdev,
bool gid_status,
unsigned long long subnet_prefix,
unsigned long long interface_id)
{
int ret = -1;
VIR_DEBUG("netdev=%s, gid_status=%d, subnet_prefix=0x%llx, interface_id=0x%llx",
netdev, gid_status, subnet_prefix, interface_id);
QEMU_MONITOR_CALLBACK(mon, ret, domainRdmaGidStatusChanged, mon->vm,
netdev, gid_status, subnet_prefix, interface_id);
return ret;
}
int
qemuMonitorSetCapabilities(qemuMonitorPtr mon)
{
QEMU_CHECK_MONITOR(mon);
if (!mon->json)
return 0;
return qemuMonitorJSONSetCapabilities(mon);
}
int
qemuMonitorStartCPUs(qemuMonitorPtr mon)
{
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONStartCPUs(mon);
}
int
qemuMonitorStopCPUs(qemuMonitorPtr mon)
{
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONStopCPUs(mon);
}
int
qemuMonitorCheck(qemuMonitorPtr mon)
{
bool running;
return qemuMonitorGetStatus(mon, &running, NULL);
}
int
qemuMonitorGetStatus(qemuMonitorPtr mon,
bool *running,
virDomainPausedReason *reason)
{
VIR_DEBUG("running=%p, reason=%p", running, reason);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetStatus(mon, running, reason);
}
int
qemuMonitorSystemPowerdown(qemuMonitorPtr mon)
{
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONSystemPowerdown(mon);
}
int
qemuMonitorSystemReset(qemuMonitorPtr mon)
{
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONSystemReset(mon);
}
static void
qemuMonitorCPUInfoClear(qemuMonitorCPUInfoPtr cpus,
size_t ncpus)
{
size_t i;
for (i = 0; i < ncpus; i++) {
cpus[i].id = 0;
cpus[i].qemu_id = -1;
cpus[i].socket_id = -1;
cpus[i].core_id = -1;
cpus[i].thread_id = -1;
cpus[i].node_id = -1;
cpus[i].vcpus = 0;
cpus[i].tid = 0;
cpus[i].halted = false;
VIR_FREE(cpus[i].qom_path);
VIR_FREE(cpus[i].alias);
VIR_FREE(cpus[i].type);
}
}
void
qemuMonitorCPUInfoFree(qemuMonitorCPUInfoPtr cpus,
size_t ncpus)
{
if (!cpus)
return;
qemuMonitorCPUInfoClear(cpus, ncpus);
VIR_FREE(cpus);
}
void
qemuMonitorQueryCpusFree(struct qemuMonitorQueryCpusEntry *entries,
size_t nentries)
{
size_t i;
if (!entries)
return;
for (i = 0; i < nentries; i++)
VIR_FREE(entries[i].qom_path);
VIR_FREE(entries);
}
/**
* Legacy approach doesn't allow out of order cpus, thus no complex matching
* algorithm is necessary */
static void
qemuMonitorGetCPUInfoLegacy(struct qemuMonitorQueryCpusEntry *cpuentries,
size_t ncpuentries,
qemuMonitorCPUInfoPtr vcpus,
size_t maxvcpus)
{
size_t i;
for (i = 0; i < maxvcpus; i++) {
if (i < ncpuentries) {
vcpus[i].tid = cpuentries[i].tid;
vcpus[i].halted = cpuentries[i].halted;
vcpus[i].qemu_id = cpuentries[i].qemu_id;
}
/* for legacy hotplug to work we need to fake the vcpu count added by
* enabling a given vcpu */
vcpus[i].vcpus = 1;
}
}
/**
* qemuMonitorGetCPUInfoHotplug:
*
* This function stitches together data retrieved via query-hotpluggable-cpus
* which returns entities on the hotpluggable level (which may describe more
* than one guest logical vcpu) with the output of query-cpus (or
* query-cpus-fast), having an entry per enabled guest logical vcpu.
*
* query-hotpluggable-cpus conveys following information:
* - topology information and number of logical vcpus this entry creates
* - device type name of the entry that needs to be used when hotplugging
* - qom path in qemu which can be used to map the entry against
* query-cpus[-fast]
*
* query-cpus[-fast] conveys following information:
* - thread id of a given guest logical vcpu
* - order in which the vcpus were inserted
* - qom path to allow mapping the two together
*
* The libvirt's internal structure has an entry for each possible (even
* disabled) guest vcpu. The purpose is to map the data together so that we are
* certain of the thread id mapping and the information required for vcpu
* hotplug.
*
* This function returns 0 on success and -1 on error, but does not report
* libvirt errors so that fallback approach can be used.
*/
static int
qemuMonitorGetCPUInfoHotplug(struct qemuMonitorQueryHotpluggableCpusEntry *hotplugvcpus,
size_t nhotplugvcpus,
struct qemuMonitorQueryCpusEntry *cpuentries,
size_t ncpuentries,
qemuMonitorCPUInfoPtr vcpus,
size_t maxvcpus)
{
char *tmp;
int order = 1;
size_t totalvcpus = 0;
size_t mastervcpu; /* this iterator is used for iterating hotpluggable entities */
size_t slavevcpu; /* this corresponds to subentries of a hotpluggable entry */
size_t anyvcpu; /* this iterator is used for any vcpu entry in the result */
size_t i;
size_t j;
/* ensure that the total vcpu count reported by query-hotpluggable-cpus equals
* to the libvirt maximum cpu count */
for (i = 0; i < nhotplugvcpus; i++)
totalvcpus += hotplugvcpus[i].vcpus;
/* trim '/thread...' suffix from the data returned by query-cpus[-fast] */
for (i = 0; i < ncpuentries; i++) {
if (cpuentries[i].qom_path &&
(tmp = strstr(cpuentries[i].qom_path, "/thread")))
*tmp = '\0';
}
if (totalvcpus != maxvcpus) {
VIR_DEBUG("expected '%zu' total vcpus got '%zu'", maxvcpus, totalvcpus);
return -1;
}
/* Note the order in which the hotpluggable entities are inserted by
* matching them to the query-cpus[-fast] entries */
for (i = 0; i < ncpuentries; i++) {
for (j = 0; j < nhotplugvcpus; j++) {
if (!cpuentries[i].qom_path ||
!hotplugvcpus[j].qom_path ||
STRNEQ(cpuentries[i].qom_path, hotplugvcpus[j].qom_path))
continue;
/* add ordering info for hotpluggable entries */
if (hotplugvcpus[j].enable_id == 0)
hotplugvcpus[j].enable_id = order++;
break;
}
}
/* transfer appropriate data from the hotpluggable list to corresponding
* entries. the entries returned by qemu may in fact describe multiple
* logical vcpus in the guest */
mastervcpu = 0;
for (i = 0; i < nhotplugvcpus; i++) {
vcpus[mastervcpu].online = !!hotplugvcpus[i].qom_path;
vcpus[mastervcpu].hotpluggable = !!hotplugvcpus[i].alias ||
!vcpus[mastervcpu].online;
vcpus[mastervcpu].socket_id = hotplugvcpus[i].socket_id;
vcpus[mastervcpu].core_id = hotplugvcpus[i].core_id;
vcpus[mastervcpu].thread_id = hotplugvcpus[i].thread_id;
vcpus[mastervcpu].node_id = hotplugvcpus[i].node_id;
vcpus[mastervcpu].vcpus = hotplugvcpus[i].vcpus;
VIR_STEAL_PTR(vcpus[mastervcpu].qom_path, hotplugvcpus[i].qom_path);
VIR_STEAL_PTR(vcpus[mastervcpu].alias, hotplugvcpus[i].alias);
VIR_STEAL_PTR(vcpus[mastervcpu].type, hotplugvcpus[i].type);
vcpus[mastervcpu].id = hotplugvcpus[i].enable_id;
/* copy state information to slave vcpus */
for (slavevcpu = mastervcpu + 1; slavevcpu < mastervcpu + hotplugvcpus[i].vcpus; slavevcpu++) {
vcpus[slavevcpu].online = vcpus[mastervcpu].online;
vcpus[slavevcpu].hotpluggable = vcpus[mastervcpu].hotpluggable;
}
/* calculate next master vcpu (hotpluggable unit) entry */
mastervcpu += hotplugvcpus[i].vcpus;
}
/* match entries from query cpus to the output array taking into account
* multi-vcpu objects */
for (j = 0; j < ncpuentries; j++) {
/* find the correct entry or beginning of group of entries */
for (anyvcpu = 0; anyvcpu < maxvcpus; anyvcpu++) {
if (cpuentries[j].qom_path && vcpus[anyvcpu].qom_path &&
STREQ(cpuentries[j].qom_path, vcpus[anyvcpu].qom_path))
break;
}
if (anyvcpu == maxvcpus) {
VIR_DEBUG("too many query-cpus[-fast] entries for a given "
"query-hotpluggable-cpus entry");
return -1;
}
if (vcpus[anyvcpu].vcpus != 1) {
/* find a possibly empty vcpu thread for core granularity systems */
for (; anyvcpu < maxvcpus; anyvcpu++) {
if (vcpus[anyvcpu].tid == 0)
break;
}
}
vcpus[anyvcpu].qemu_id = cpuentries[j].qemu_id;
vcpus[anyvcpu].tid = cpuentries[j].tid;
vcpus[anyvcpu].halted = cpuentries[j].halted;
}
return 0;
}
/**
* qemuMonitorGetCPUInfo:
* @mon: monitor
* @vcpus: pointer filled by array of qemuMonitorCPUInfo structures
* @maxvcpus: total possible number of vcpus
* @hotplug: query data relevant for hotplug support
* @fast: use QMP query-cpus-fast if supported
*
* Detects VCPU information. If qemu doesn't support or fails reporting
* information this function will return success as other parts of libvirt
* are able to cope with that.
*
* Returns 0 on success (including if qemu didn't report any data) and
* -1 on error (reports libvirt error).
*/
int
qemuMonitorGetCPUInfo(qemuMonitorPtr mon,
qemuMonitorCPUInfoPtr *vcpus,
size_t maxvcpus,
bool hotplug,
bool fast)
{
struct qemuMonitorQueryHotpluggableCpusEntry *hotplugcpus = NULL;
size_t nhotplugcpus = 0;
struct qemuMonitorQueryCpusEntry *cpuentries = NULL;
size_t ncpuentries = 0;
int ret = -1;
int rc;
qemuMonitorCPUInfoPtr info = NULL;
QEMU_CHECK_MONITOR(mon);
if (VIR_ALLOC_N(info, maxvcpus) < 0)
return -1;
if (!mon->json)
hotplug = false;
/* initialize a few non-zero defaults */
qemuMonitorCPUInfoClear(info, maxvcpus);
if (hotplug &&
(qemuMonitorJSONGetHotpluggableCPUs(mon, &hotplugcpus, &nhotplugcpus)) < 0)
goto cleanup;
rc = qemuMonitorJSONQueryCPUs(mon, &cpuentries, &ncpuentries, hotplug,
fast);
if (rc < 0) {
if (!hotplug && rc == -2) {
VIR_STEAL_PTR(*vcpus, info);
ret = 0;
}
goto cleanup;
}
if (!hotplugcpus ||
qemuMonitorGetCPUInfoHotplug(hotplugcpus, nhotplugcpus,
cpuentries, ncpuentries,
info, maxvcpus) < 0) {
/* Fallback to the legacy algorithm. Hotplug paths will make sure that
* the appropriate data is present */
qemuMonitorCPUInfoClear(info, maxvcpus);
qemuMonitorGetCPUInfoLegacy(cpuentries, ncpuentries, info, maxvcpus);
}
VIR_STEAL_PTR(*vcpus, info);
ret = 0;
cleanup:
qemuMonitorQueryHotpluggableCpusFree(hotplugcpus, nhotplugcpus);
qemuMonitorQueryCpusFree(cpuentries, ncpuentries);
qemuMonitorCPUInfoFree(info, maxvcpus);
return ret;
}
/**
* qemuMonitorGetCpuHalted:
*
* Returns a bitmap of vcpu id's that are halted. The id's correspond to the
* 'CPU' field as reported by query-cpus[-fast]'.
*/
virBitmapPtr
qemuMonitorGetCpuHalted(qemuMonitorPtr mon,
size_t maxvcpus,
bool fast)
{
struct qemuMonitorQueryCpusEntry *cpuentries = NULL;
size_t ncpuentries = 0;
size_t i;
int rc;
virBitmapPtr ret = NULL;
QEMU_CHECK_MONITOR_NULL(mon);
rc = qemuMonitorJSONQueryCPUs(mon, &cpuentries, &ncpuentries, false,
fast);
if (rc < 0)
goto cleanup;
if (!(ret = virBitmapNew(maxvcpus)))
goto cleanup;
for (i = 0; i < ncpuentries; i++) {
if (cpuentries[i].halted)
ignore_value(virBitmapSetBit(ret, cpuentries[i].qemu_id));
}
cleanup:
qemuMonitorQueryCpusFree(cpuentries, ncpuentries);
return ret;
}
int
qemuMonitorSetLink(qemuMonitorPtr mon,
const char *name,
virDomainNetInterfaceLinkState state)
{
VIR_DEBUG("name=%s, state=%u", name, state);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONSetLink(mon, name, state);
}
int
qemuMonitorGetVirtType(qemuMonitorPtr mon,
virDomainVirtType *virtType)
{
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetVirtType(mon, virtType);
}
/**
* Returns: 0 if balloon not supported, +1 if balloon query worked
* or -1 on failure
*/
int
qemuMonitorGetBalloonInfo(qemuMonitorPtr mon,
unsigned long long *currmem)
{
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetBalloonInfo(mon, currmem);
}
int
qemuMonitorGetMemoryStats(qemuMonitorPtr mon,
virDomainMemballoonDefPtr balloon,
virDomainMemoryStatPtr stats,
unsigned int nr_stats)
{
VIR_DEBUG("stats=%p nstats=%u", stats, nr_stats);
QEMU_CHECK_MONITOR(mon);
qemuMonitorInitBalloonObjectPath(mon, balloon);
return qemuMonitorJSONGetMemoryStats(mon, mon->balloonpath,
stats, nr_stats);
}
/**
* qemuMonitorSetMemoryStatsPeriod:
*
* This function sets balloon stats update period.
*
* Returns 0 on success and -1 on error, but does *not* set an error.
*/
int
qemuMonitorSetMemoryStatsPeriod(qemuMonitorPtr mon,
virDomainMemballoonDefPtr balloon,
int period)
{
int ret = -1;
VIR_DEBUG("mon=%p period=%d", mon, period);
if (!mon)
return -1;
if (!mon->json)
return -1;
if (period < 0)
return -1;
qemuMonitorInitBalloonObjectPath(mon, balloon);
if (mon->balloonpath) {
ret = qemuMonitorJSONSetMemoryStatsPeriod(mon, mon->balloonpath,
period);
/*
* Most of the calls to this function are supposed to be
* non-fatal and the only one that should be fatal wants its
* own error message. More details for debugging will be in
* the log file.
*/
if (ret < 0)
virResetLastError();
}
return ret;
}
2012-01-19 17:58:58 +01:00
int
qemuMonitorBlockIOStatusToError(const char *status)
{
int st = qemuMonitorBlockIOStatusTypeFromString(status);
if (st < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("unknown block IO status: %s"), status);
2012-01-19 17:58:58 +01:00
return -1;
}
switch ((qemuMonitorBlockIOStatus) st) {
case QEMU_MONITOR_BLOCK_IO_STATUS_OK:
return VIR_DOMAIN_DISK_ERROR_NONE;
case QEMU_MONITOR_BLOCK_IO_STATUS_FAILED:
return VIR_DOMAIN_DISK_ERROR_UNSPEC;
case QEMU_MONITOR_BLOCK_IO_STATUS_NOSPACE:
return VIR_DOMAIN_DISK_ERROR_NO_SPACE;
/* unreachable */
case QEMU_MONITOR_BLOCK_IO_STATUS_LAST:
break;
}
return -1;
}
static void
qemuDomainDiskInfoFree(void *value, const void *name ATTRIBUTE_UNUSED)
{
struct qemuDomainDiskInfo *info = value;
VIR_FREE(info->nodename);
VIR_FREE(info);
}
virHashTablePtr
qemuMonitorGetBlockInfo(qemuMonitorPtr mon)
{
int ret;
virHashTablePtr table;
QEMU_CHECK_MONITOR_NULL(mon);
if (!(table = virHashCreate(32, qemuDomainDiskInfoFree)))
return NULL;
ret = qemuMonitorJSONGetBlockInfo(mon, table);
if (ret < 0) {
virHashFree(table);
return NULL;
}
return table;
}
/**
* qemuMonitorQueryBlockstats:
* @mon: monitor object
*
* Returns data from a call to 'query-blockstats'.
*/
virJSONValuePtr
qemuMonitorQueryBlockstats(qemuMonitorPtr mon)
{
QEMU_CHECK_MONITOR_NULL(mon);
return qemuMonitorJSONQueryBlockstats(mon);
}
/**
* qemuMonitorGetAllBlockStatsInfo:
* @mon: monitor object
* @ret_stats: pointer that is filled with a hash table containing the stats
* @backingChain: recurse into the backing chain of devices
*
* Creates a hash table in @ret_stats with block stats of all devices. In case
* @backingChain is true @ret_stats will additionally contain stats for
* backing chain members of block devices.
*
* Returns < 0 on error, count of supported block stats fields on success.
*/
int
qemuMonitorGetAllBlockStatsInfo(qemuMonitorPtr mon,
virHashTablePtr *ret_stats,
bool backingChain)
{
int ret = -1;
VIR_DEBUG("ret_stats=%p, backing=%d", ret_stats, backingChain);
QEMU_CHECK_MONITOR(mon);
if (!(*ret_stats = virHashCreate(10, virHashValueFree)))
goto error;
ret = qemuMonitorJSONGetAllBlockStatsInfo(mon, *ret_stats,
backingChain);
if (ret < 0)
goto error;
return ret;
error:
virHashFree(*ret_stats);
*ret_stats = NULL;
return -1;
}
/* Updates "stats" to fill virtual and physical size of the image */
int
qemuMonitorBlockStatsUpdateCapacity(qemuMonitorPtr mon,
virHashTablePtr stats,
bool backingChain)
{
VIR_DEBUG("stats=%p, backing=%d", stats, backingChain);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONBlockStatsUpdateCapacity(mon, stats, backingChain);
}
int
qemuMonitorBlockStatsUpdateCapacityBlockdev(qemuMonitorPtr mon,
virHashTablePtr stats)
{
VIR_DEBUG("stats=%p", stats);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONBlockStatsUpdateCapacityBlockdev(mon, stats);
}
int
qemuMonitorBlockResize(qemuMonitorPtr mon,
const char *device,
const char *nodename,
unsigned long long size)
{
VIR_DEBUG("device=%s nodename=%s size=%llu",
NULLSTR(device), NULLSTR(nodename), size);
QEMU_CHECK_MONITOR(mon);
if ((!device && !nodename) || (device && nodename)) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("exactly one of 'device' and 'nodename' need to be specified"));
return -1;
}
return qemuMonitorJSONBlockResize(mon, device, nodename, size);
}
static const char *
qemuMonitorTypeToProtocol(int type)
{
switch (type) {
case VIR_DOMAIN_GRAPHICS_TYPE_VNC:
return "vnc";
case VIR_DOMAIN_GRAPHICS_TYPE_SPICE:
return "spice";
default:
virReportError(VIR_ERR_INVALID_ARG,
_("unsupported protocol type %s"),
virDomainGraphicsTypeToString(type));
return NULL;
}
}
int
qemuMonitorSetPassword(qemuMonitorPtr mon,
int type,
const char *password,
const char *action_if_connected)
{
const char *protocol = qemuMonitorTypeToProtocol(type);
if (!protocol)
return -1;
VIR_DEBUG("protocol=%s, password=%p, action_if_connected=%s",
protocol, password, action_if_connected);
QEMU_CHECK_MONITOR(mon);
if (!password)
password = "";
if (!action_if_connected)
action_if_connected = "keep";
return qemuMonitorJSONSetPassword(mon, protocol, password, action_if_connected);
}
int
qemuMonitorExpirePassword(qemuMonitorPtr mon,
int type,
const char *expire_time)
{
const char *protocol = qemuMonitorTypeToProtocol(type);
if (!protocol)
return -1;
VIR_DEBUG("protocol=%s, expire_time=%s", protocol, expire_time);
QEMU_CHECK_MONITOR(mon);
if (!expire_time)
expire_time = "now";
return qemuMonitorJSONExpirePassword(mon, protocol, expire_time);
}
/*
* Returns: 0 if balloon not supported, +1 if balloon adjust worked
* or -1 on failure
*/
int
qemuMonitorSetBalloon(qemuMonitorPtr mon,
unsigned long long newmem)
{
VIR_DEBUG("newmem=%llu", newmem);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONSetBalloon(mon, newmem);
}
/*
* Returns: 0 if CPU modification was successful or -1 on failure
*/
int
qemuMonitorSetCPU(qemuMonitorPtr mon, int cpu, bool online)
{
VIR_DEBUG("cpu=%d online=%d", cpu, online);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONSetCPU(mon, cpu, online);
}
int
qemuMonitorEjectMedia(qemuMonitorPtr mon,
const char *dev_name,
bool force)
{
VIR_DEBUG("dev_name=%s force=%d", dev_name, force);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONEjectMedia(mon, dev_name, force);
}
int
qemuMonitorChangeMedia(qemuMonitorPtr mon,
const char *dev_name,
const char *newmedia,
const char *format)
{
VIR_DEBUG("dev_name=%s newmedia=%s format=%s", dev_name, newmedia, format);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONChangeMedia(mon, dev_name, newmedia, format);
}
int
qemuMonitorSaveVirtualMemory(qemuMonitorPtr mon,
unsigned long long offset,
size_t length,
const char *path)
{
VIR_DEBUG("offset=%llu length=%zu path=%s", offset, length, path);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONSaveVirtualMemory(mon, offset, length, path);
}
int
qemuMonitorSavePhysicalMemory(qemuMonitorPtr mon,
unsigned long long offset,
size_t length,
const char *path)
{
VIR_DEBUG("offset=%llu length=%zu path=%s", offset, length, path);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONSavePhysicalMemory(mon, offset, length, path);
}
int
qemuMonitorSetMigrationSpeed(qemuMonitorPtr mon,
unsigned long bandwidth)
{
VIR_DEBUG("bandwidth=%lu", bandwidth);
QEMU_CHECK_MONITOR(mon);
if (bandwidth > QEMU_DOMAIN_MIG_BANDWIDTH_MAX) {
virReportError(VIR_ERR_OVERFLOW,
_("bandwidth must be less than %llu"),
QEMU_DOMAIN_MIG_BANDWIDTH_MAX + 1ULL);
return -1;
}
return qemuMonitorJSONSetMigrationSpeed(mon, bandwidth);
}
int
qemuMonitorSetMigrationDowntime(qemuMonitorPtr mon,
unsigned long long downtime)
{
VIR_DEBUG("downtime=%llu", downtime);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONSetMigrationDowntime(mon, downtime);
}
int
qemuMonitorGetMigrationCacheSize(qemuMonitorPtr mon,
unsigned long long *cacheSize)
{
VIR_DEBUG("cacheSize=%p", cacheSize);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetMigrationCacheSize(mon, cacheSize);
}
int
qemuMonitorSetMigrationCacheSize(qemuMonitorPtr mon,
unsigned long long cacheSize)
{
VIR_DEBUG("cacheSize=%llu", cacheSize);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONSetMigrationCacheSize(mon, cacheSize);
}
/**
* qemuMonitorGetMigrationParams:
* @mon: Pointer to the monitor object.
* @params: Where to store migration parameters.
*
* If QEMU does not support querying migration parameters, the function will
* set @params to NULL and return 0 (success). The caller is responsible for
* freeing @params.
*
* Returns 0 on success, -1 on error.
*/
int
qemuMonitorGetMigrationParams(qemuMonitorPtr mon,
virJSONValuePtr *params)
{
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetMigrationParams(mon, params);
}
/**
* qemuMonitorSetMigrationParams:
* @mon: Pointer to the monitor object.
* @params: Migration parameters.
*
* The @params object is consumed and should not be referenced by the caller
* after this function returns.
*
* Returns 0 on success, -1 on error.
*/
int
qemuMonitorSetMigrationParams(qemuMonitorPtr mon,
virJSONValuePtr params)
{
QEMU_CHECK_MONITOR_GOTO(mon, error);
return qemuMonitorJSONSetMigrationParams(mon, params);
error:
virJSONValueFree(params);
return -1;
}
int
qemuMonitorGetMigrationStats(qemuMonitorPtr mon,
qemuMonitorMigrationStatsPtr stats,
char **error)
{
QEMU_CHECK_MONITOR(mon);
if (error)
*error = NULL;
return qemuMonitorJSONGetMigrationStats(mon, stats, error);
}
int
qemuMonitorMigrateToFd(qemuMonitorPtr mon,
unsigned int flags,
int fd)
{
int ret;
VIR_DEBUG("fd=%d flags=0x%x", fd, flags);
QEMU_CHECK_MONITOR(mon);
if (qemuMonitorSendFileHandle(mon, "migrate", fd) < 0)
return -1;
ret = qemuMonitorJSONMigrate(mon, flags, "fd:migrate");
if (ret < 0) {
if (qemuMonitorCloseFileHandle(mon, "migrate") < 0)
VIR_WARN("failed to close migration handle");
}
return ret;
}
int
qemuMonitorMigrateToHost(qemuMonitorPtr mon,
unsigned int flags,
const char *protocol,
const char *hostname,
int port)
{
int ret;
char *uri = NULL;
VIR_DEBUG("hostname=%s port=%d flags=0x%x", hostname, port, flags);
QEMU_CHECK_MONITOR(mon);
if (strchr(hostname, ':')) {
if (virAsprintf(&uri, "%s:[%s]:%d", protocol, hostname, port) < 0)
return -1;
} else if (virAsprintf(&uri, "%s:%s:%d", protocol, hostname, port) < 0) {
return -1;
}
ret = qemuMonitorJSONMigrate(mon, flags, uri);
VIR_FREE(uri);
return ret;
}
int
qemuMonitorMigrateCancel(qemuMonitorPtr mon)
{
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONMigrateCancel(mon);
}
int
qemuMonitorQueryDump(qemuMonitorPtr mon,
qemuMonitorDumpStatsPtr stats)
{
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONQueryDump(mon, stats);
}
/**
* Returns 1 if @capability is supported, 0 if it's not, or -1 on error.
*/
int
qemuMonitorGetDumpGuestMemoryCapability(qemuMonitorPtr mon,
const char *capability)
{
VIR_DEBUG("capability=%s", capability);
QEMU_CHECK_MONITOR(mon);
/* No capability is supported without JSON monitor */
if (!mon->json)
return 0;
return qemuMonitorJSONGetDumpGuestMemoryCapability(mon, capability);
}
int
qemuMonitorDumpToFd(qemuMonitorPtr mon,
int fd,
const char *dumpformat,
bool detach)
{
int ret;
VIR_DEBUG("fd=%d dumpformat=%s", fd, dumpformat);
QEMU_CHECK_MONITOR(mon);
if (qemuMonitorSendFileHandle(mon, "dump", fd) < 0)
return -1;
ret = qemuMonitorJSONDump(mon, "fd:dump", dumpformat, detach);
if (ret < 0) {
if (qemuMonitorCloseFileHandle(mon, "dump") < 0)
VIR_WARN("failed to close dumping handle");
}
return ret;
}
int
qemuMonitorGraphicsRelocate(qemuMonitorPtr mon,
int type,
const char *hostname,
int port,
int tlsPort,
const char *tlsSubject)
{
VIR_DEBUG("type=%d hostname=%s port=%d tlsPort=%d tlsSubject=%s",
type, hostname, port, tlsPort, NULLSTR(tlsSubject));
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGraphicsRelocate(mon,
type,
hostname,
port,
tlsPort,
tlsSubject);
}
int
qemuMonitorSendFileHandle(qemuMonitorPtr mon,
const char *fdname,
int fd)
{
VIR_DEBUG("fdname=%s fd=%d", fdname, fd);
QEMU_CHECK_MONITOR(mon);
if (fd < 0) {
virReportError(VIR_ERR_INVALID_ARG, "%s",
_("fd must be valid"));
return -1;
}
if (!mon->hasSendFD) {
virReportError(VIR_ERR_OPERATION_UNSUPPORTED,
_("qemu is not using a unix socket monitor, "
"cannot send fd %s"), fdname);
return -1;
}
return qemuMonitorJSONSendFileHandle(mon, fdname, fd);
}
int
qemuMonitorCloseFileHandle(qemuMonitorPtr mon,
const char *fdname)
{
int ret = -1;
virErrorPtr error;
VIR_DEBUG("fdname=%s", fdname);
error = virSaveLastError();
QEMU_CHECK_MONITOR_GOTO(mon, cleanup);
ret = qemuMonitorJSONCloseFileHandle(mon, fdname);
cleanup:
if (error) {
virSetError(error);
virFreeError(error);
}
return ret;
}
/* Add the open file descriptor FD into the non-negative set FDSET.
* If NAME is present, it will be passed along for logging purposes.
* Returns the counterpart fd that qemu received, or -1 on error. */
int
qemuMonitorAddFd(qemuMonitorPtr mon, int fdset, int fd, const char *name)
{
VIR_DEBUG("fdset=%d, fd=%d, name=%s", fdset, fd, NULLSTR(name));
QEMU_CHECK_MONITOR(mon);
if (fd < 0 || fdset < 0) {
virReportError(VIR_ERR_INVALID_ARG, "%s",
_("fd and fdset must be valid"));
return -1;
}
if (!mon->hasSendFD) {
virReportError(VIR_ERR_OPERATION_UNSUPPORTED,
_("qemu is not using a unix socket monitor, "
"cannot send fd %s"), NULLSTR(name));
return -1;
}
return qemuMonitorJSONAddFd(mon, fdset, fd, name);
}
/* Remove one of qemu's fds from the given FDSET, or if FD is
* negative, remove the entire set. Preserve any previous error on
* entry. Returns 0 on success, -1 on error. */
int
qemuMonitorRemoveFd(qemuMonitorPtr mon, int fdset, int fd)
{
int ret = -1;
virErrorPtr error;
VIR_DEBUG("fdset=%d, fd=%d", fdset, fd);
error = virSaveLastError();
QEMU_CHECK_MONITOR_GOTO(mon, cleanup);
ret = qemuMonitorJSONRemoveFd(mon, fdset, fd);
cleanup:
if (error) {
virSetError(error);
virFreeError(error);
}
return ret;
}
int
qemuMonitorAddNetdev(qemuMonitorPtr mon,
const char *netdevstr,
int *tapfd, char **tapfdName, int tapfdSize,
int *vhostfd, char **vhostfdName, int vhostfdSize)
{
int ret = -1;
size_t i = 0, j = 0;
VIR_DEBUG("netdevstr=%s tapfd=%p tapfdName=%p tapfdSize=%d"
"vhostfd=%p vhostfdName=%p vhostfdSize=%d",
netdevstr, tapfd, tapfdName, tapfdSize,
vhostfd, vhostfdName, vhostfdSize);
QEMU_CHECK_MONITOR(mon);
for (i = 0; i < tapfdSize; i++) {
if (qemuMonitorSendFileHandle(mon, tapfdName[i], tapfd[i]) < 0)
goto cleanup;
}
for (j = 0; j < vhostfdSize; j++) {
if (qemuMonitorSendFileHandle(mon, vhostfdName[j], vhostfd[j]) < 0)
goto cleanup;
}
ret = qemuMonitorJSONAddNetdev(mon, netdevstr);
cleanup:
if (ret < 0) {
while (i--) {
if (qemuMonitorCloseFileHandle(mon, tapfdName[i]) < 0)
VIR_WARN("failed to close device handle '%s'", tapfdName[i]);
}
while (j--) {
if (qemuMonitorCloseFileHandle(mon, vhostfdName[j]) < 0)
VIR_WARN("failed to close device handle '%s'", vhostfdName[j]);
}
}
return ret;
}
int
qemuMonitorRemoveNetdev(qemuMonitorPtr mon,
const char *alias)
{
VIR_DEBUG("alias=%s", alias);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONRemoveNetdev(mon, alias);
}
qemu: qemuMonitorQueryRxFilter - retrieve guest netdev rx-filter This function can be called at any time to get the current status of a guest's network device rx-filter. In particular it is useful to call after libvirt recieves a NIC_RX_FILTER_CHANGED event - this event only tells you that something has changed in the rx-filter, the details are retrieved with the query-rx-filter monitor command (only available in the json monitor). The command sent to the qemu monitor looks like this: {"execute":"query-rx-filter", "arguments": {"name":"net2"} }' and the results will look something like this: { "return": [ { "promiscuous": false, "name": "net2", "main-mac": "52:54:00:98:2d:e3", "unicast": "normal", "vlan": "normal", "vlan-table": [ 42, 0 ], "unicast-table": [ ], "multicast": "normal", "multicast-overflow": false, "unicast-overflow": false, "multicast-table": [ "33:33:ff:98:2d:e3", "01:80:c2:00:00:21", "01:00:5e:00:00:fb", "33:33:ff:98:2d:e2", "01:00:5e:00:00:01", "33:33:00:00:00:01" ], "broadcast-allowed": false } ], "id": "libvirt-14" } This is all parsed from JSON into a virNetDevRxFilter object for easier consumption. (unicast-table is usually empty, but is also an array of mac addresses similar to multicast-table). (NB: LIBNL_CFLAGS was added to tests/Makefile.am because virnetdev.h now includes util/virnetlink.h, which includes netlink/msg.h when appropriate. Without LIBNL_CFLAGS, gcc can't find that file (if libnl/netlink isn't available, LIBNL_CFLAGS will be empty and virnetlink.h won't try to include netlink/msg.h anyway).)
2014-09-22 12:19:41 -04:00
int
qemuMonitorQueryRxFilter(qemuMonitorPtr mon, const char *alias,
virNetDevRxFilterPtr *filter)
{
VIR_DEBUG("alias=%s filter=%p", alias, filter);
qemu: qemuMonitorQueryRxFilter - retrieve guest netdev rx-filter This function can be called at any time to get the current status of a guest's network device rx-filter. In particular it is useful to call after libvirt recieves a NIC_RX_FILTER_CHANGED event - this event only tells you that something has changed in the rx-filter, the details are retrieved with the query-rx-filter monitor command (only available in the json monitor). The command sent to the qemu monitor looks like this: {"execute":"query-rx-filter", "arguments": {"name":"net2"} }' and the results will look something like this: { "return": [ { "promiscuous": false, "name": "net2", "main-mac": "52:54:00:98:2d:e3", "unicast": "normal", "vlan": "normal", "vlan-table": [ 42, 0 ], "unicast-table": [ ], "multicast": "normal", "multicast-overflow": false, "unicast-overflow": false, "multicast-table": [ "33:33:ff:98:2d:e3", "01:80:c2:00:00:21", "01:00:5e:00:00:fb", "33:33:ff:98:2d:e2", "01:00:5e:00:00:01", "33:33:00:00:00:01" ], "broadcast-allowed": false } ], "id": "libvirt-14" } This is all parsed from JSON into a virNetDevRxFilter object for easier consumption. (unicast-table is usually empty, but is also an array of mac addresses similar to multicast-table). (NB: LIBNL_CFLAGS was added to tests/Makefile.am because virnetdev.h now includes util/virnetlink.h, which includes netlink/msg.h when appropriate. Without LIBNL_CFLAGS, gcc can't find that file (if libnl/netlink isn't available, LIBNL_CFLAGS will be empty and virnetlink.h won't try to include netlink/msg.h anyway).)
2014-09-22 12:19:41 -04:00
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONQueryRxFilter(mon, alias, filter);
qemu: qemuMonitorQueryRxFilter - retrieve guest netdev rx-filter This function can be called at any time to get the current status of a guest's network device rx-filter. In particular it is useful to call after libvirt recieves a NIC_RX_FILTER_CHANGED event - this event only tells you that something has changed in the rx-filter, the details are retrieved with the query-rx-filter monitor command (only available in the json monitor). The command sent to the qemu monitor looks like this: {"execute":"query-rx-filter", "arguments": {"name":"net2"} }' and the results will look something like this: { "return": [ { "promiscuous": false, "name": "net2", "main-mac": "52:54:00:98:2d:e3", "unicast": "normal", "vlan": "normal", "vlan-table": [ 42, 0 ], "unicast-table": [ ], "multicast": "normal", "multicast-overflow": false, "unicast-overflow": false, "multicast-table": [ "33:33:ff:98:2d:e3", "01:80:c2:00:00:21", "01:00:5e:00:00:fb", "33:33:ff:98:2d:e2", "01:00:5e:00:00:01", "33:33:00:00:00:01" ], "broadcast-allowed": false } ], "id": "libvirt-14" } This is all parsed from JSON into a virNetDevRxFilter object for easier consumption. (unicast-table is usually empty, but is also an array of mac addresses similar to multicast-table). (NB: LIBNL_CFLAGS was added to tests/Makefile.am because virnetdev.h now includes util/virnetlink.h, which includes netlink/msg.h when appropriate. Without LIBNL_CFLAGS, gcc can't find that file (if libnl/netlink isn't available, LIBNL_CFLAGS will be empty and virnetlink.h won't try to include netlink/msg.h anyway).)
2014-09-22 12:19:41 -04:00
}
void
qemuMonitorChardevInfoFree(void *data,
const void *name ATTRIBUTE_UNUSED)
{
qemuMonitorChardevInfoPtr info = data;
VIR_FREE(info->ptyPath);
VIR_FREE(info);
}
int
qemuMonitorGetChardevInfo(qemuMonitorPtr mon,
virHashTablePtr *retinfo)
{
int ret;
virHashTablePtr info = NULL;
VIR_DEBUG("retinfo=%p", retinfo);
QEMU_CHECK_MONITOR_GOTO(mon, error);
if (!(info = virHashCreate(10, qemuMonitorChardevInfoFree)))
goto error;
ret = qemuMonitorJSONGetChardevInfo(mon, info);
if (ret < 0)
goto error;
*retinfo = info;
return 0;
error:
virHashFree(info);
*retinfo = NULL;
return -1;
}
/**
* qemuMonitorDriveDel:
* @mon: monitor object
* @drivestr: identifier of drive to delete.
*
* Attempts to remove a host drive.
* Returns 1 if unsupported, 0 if ok, and -1 on other failure */
int
qemuMonitorDriveDel(qemuMonitorPtr mon,
const char *drivestr)
{
VIR_DEBUG("drivestr=%s", drivestr);
QEMU_CHECK_MONITOR(mon);
/* there won't be a direct replacement for drive_del in QMP */
return qemuMonitorTextDriveDel(mon, drivestr);
}
/**
* @mon: monitor object
* @devalias: alias of the device to detach
*
* Sends device detach request to qemu.
*
* Returns: 0 on success,
* -2 if DeviceNotFound error encountered (error NOT reported)
* -1 otherwise (error reported)
*/
int
qemuMonitorDelDevice(qemuMonitorPtr mon,
const char *devalias)
{
VIR_DEBUG("devalias=%s", devalias);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONDelDevice(mon, devalias);
}
int
qemuMonitorAddDeviceWithFd(qemuMonitorPtr mon,
const char *devicestr,
int fd,
const char *fdname)
{
VIR_DEBUG("device=%s fd=%d fdname=%s", devicestr, fd, NULLSTR(fdname));
int ret;
QEMU_CHECK_MONITOR(mon);
if (fd >= 0 && qemuMonitorSendFileHandle(mon, fdname, fd) < 0)
return -1;
ret = qemuMonitorJSONAddDevice(mon, devicestr);
if (ret < 0 && fd >= 0) {
if (qemuMonitorCloseFileHandle(mon, fdname) < 0)
VIR_WARN("failed to close device handle '%s'", fdname);
}
return ret;
}
int
qemuMonitorAddDevice(qemuMonitorPtr mon,
const char *devicestr)
{
return qemuMonitorAddDeviceWithFd(mon, devicestr, -1, NULL);
}
/**
* qemuMonitorAddDeviceArgs:
* @mon: monitor object
* @args: arguments for device add, consumed on success or failure
*
* Adds a device described by @args. Requires JSON monitor.
* Returns 0 on success -1 on error.
*/
int
qemuMonitorAddDeviceArgs(qemuMonitorPtr mon,
virJSONValuePtr args)
{
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONAddDeviceArgs(mon, args);
}
virJSONValuePtr
qemuMonitorCreateObjectPropsWrap(const char *type,
const char *alias,
virJSONValuePtr *props)
{
virJSONValuePtr ret;
ignore_value(virJSONValueObjectCreate(&ret,
"s:qom-type", type,
"s:id", alias,
"A:props", props,
NULL));
return ret;
}
/**
* qemuMonitorCreateObjectProps:
* @propsret: returns full object properties
* @type: Type name of object to add
* @objalias: Alias of the new object
* @...: Optional arguments for the given object. See virJSONValueObjectAddVArgs.
*
* Returns a JSONValue containing everything on success and NULL on error.
*/
int
qemuMonitorCreateObjectProps(virJSONValuePtr *propsret,
const char *type,
const char *alias,
...)
{
virJSONValuePtr props = NULL;
int ret = -1;
va_list args;
*propsret = NULL;
va_start(args, alias);
if (virJSONValueObjectCreateVArgs(&props, args) < 0)
goto cleanup;
if (!(*propsret = qemuMonitorCreateObjectPropsWrap(type, alias, &props)))
goto cleanup;
ret = 0;
cleanup:
virJSONValueFree(props);
va_end(args);
return ret;
}
/**
* qemuMonitorAddObject:
* @mon: Pointer to monitor object
* @props: Pointer to a JSON object holding configuration of the object to add.
* The object must be non-null and contain at least the "qom-type" and
* "id" field. The object is consumed and the pointer is cleared.
* @alias: If not NULL, returns the alias of the added object if it was added
* successfully to qemu. Caller should free the returned pointer.
*
* Returns 0 on success -1 on error.
*/
int
qemuMonitorAddObject(qemuMonitorPtr mon,
virJSONValuePtr *props,
char **alias)
{
const char *type = NULL;
const char *id = NULL;
char *tmp = NULL;
int ret = -1;
if (!*props) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("object props can't be NULL"));
goto cleanup;
}
type = virJSONValueObjectGetString(*props, "qom-type");
id = virJSONValueObjectGetString(*props, "id");
VIR_DEBUG("type=%s id=%s", NULLSTR(type), NULLSTR(id));
QEMU_CHECK_MONITOR_GOTO(mon, cleanup);
if (!id || !type) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("missing alias or qom-type for qemu object '%s'"),
NULLSTR(type));
goto cleanup;
}
if (alias && VIR_STRDUP(tmp, id) < 0)
goto cleanup;
ret = qemuMonitorJSONAddObject(mon, *props);
*props = NULL;
if (alias)
VIR_STEAL_PTR(*alias, tmp);
cleanup:
VIR_FREE(tmp);
virJSONValueFree(*props);
*props = NULL;
return ret;
}
int
qemuMonitorDelObject(qemuMonitorPtr mon,
const char *objalias)
{
VIR_DEBUG("objalias=%s", objalias);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONDelObject(mon, objalias);
}
int
qemuMonitorAddDrive(qemuMonitorPtr mon,
const char *drivestr)
{
VIR_DEBUG("drive=%s", drivestr);
QEMU_CHECK_MONITOR(mon);
/* there won't ever be a direct QMP replacement for this function */
return qemuMonitorTextAddDrive(mon, drivestr);
}
int
qemuMonitorCreateSnapshot(qemuMonitorPtr mon, const char *name)
{
VIR_DEBUG("name=%s", name);
QEMU_CHECK_MONITOR(mon);
/* there won't ever be a direct QMP replacement for this function */
return qemuMonitorTextCreateSnapshot(mon, name);
}
int
qemuMonitorLoadSnapshot(qemuMonitorPtr mon, const char *name)
{
VIR_DEBUG("name=%s", name);
QEMU_CHECK_MONITOR(mon);
/* there won't ever be a direct QMP replacement for this function */
return qemuMonitorTextLoadSnapshot(mon, name);
}
int
qemuMonitorDeleteSnapshot(qemuMonitorPtr mon, const char *name)
{
VIR_DEBUG("name=%s", name);
QEMU_CHECK_MONITOR(mon);
/* there won't ever be a direct QMP replacement for this function */
return qemuMonitorTextDeleteSnapshot(mon, name);
}
blockjob: hoist bandwidth scaling out of monitor code qemu treats blockjob bandwidth as a 64-bit number, in the units of bytes/second. But we stupidly modeled block job bandwidth after migration bandwidth, which in turn was an 'unsigned long' and therefore subject to 32-bit vs. 64-bit interpretations, and with a scale of MiB/s. Our code already has to convert between the two scales, and report overflow as appropriate; although this conversion currently lives in the monitor code. In fact, our conversion code limited things to 63 bits, because we checked against LLONG_MAX and reject what would be negative bandwidth if treated as signed. On the bright side, our use of MiB/s means that even with a 32-bit unsigned long, we still have no problem representing a bandwidth of 2GiB/s, which is starting to be more feasible as 10-gigabit or even faster interfaces are used. And once you get past the physical speeds of existing interfaces, any larger bandwidth number behaves the same - effectively unlimited. But on the low side, the granularity of 1MiB/s tuning is rather coarse. So the new virDomainBlockJob API decided to go with a direct 64-bit bytes/sec number instead of the scaled number that prior blockjob APIs had used. But there is no point in rounding this number to MiB/s just to scale it back to bytes/s for handing to qemu. In order to make future code sharing possible between the old virDomainBlockRebase and the new virDomainBlockCopy, this patch moves the scaling and overflow detection into the driver code. Several of the block job calls that can set speed are fed through a common interface, so it was easier to adjust all block jobs at once, for consistency. This patch is just code motion; there should be no user-visible change in behavior. * src/qemu/qemu_monitor.h (qemuMonitorBlockJob) (qemuMonitorBlockCommit, qemuMonitorDriveMirror): Change parameter type and scale. * src/qemu/qemu_monitor.c (qemuMonitorBlockJob) (qemuMonitorBlockCommit, qemuMonitorDriveMirror): Move scaling and overflow detection... * src/qemu/qemu_driver.c (qemuDomainBlockJobImpl) (qemuDomainBlockRebase, qemuDomainBlockCommit): ...here. (qemuDomainBlockCopy): Use bytes/sec. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-08-29 13:58:45 -06:00
/* Start a drive-mirror block job. bandwidth is in bytes/sec. */
blockjob: add qemu capabilities related to block jobs Upstream qemu 1.3 is adding two new monitor commands, 'drive-mirror' and 'block-job-complete'[1], which can drive live block copy and storage migration. [Additionally, RHEL 6.3 had backported an earlier version of most of the same functionality, but under the names '__com.redhat_drive-mirror' and '__com.redhat_drive-reopen' and with slightly different JSON arguments, and has been using patches similar to these upstream patches for several months now.] The libvirt API virDomainBlockRebase as already committed for 0.9.12 is flexible enough to expose the basics of block copy, but some additional features in the 'drive-mirror' qemu command, such as setting error policy, setting granularity, or using a persistent bitmap, may later require a new libvirt API virDomainBlockCopy. I will wait to add that API until we know more about what qemu 1.3 will finally provide. This patch caters only to the upstream qemu 1.3 interface, although I have proven that the changes for RHEL 6.3 can be isolated to just qemu_monitor_json.c, and the rest of this series will gracefully handle either interface once the JSON differences are papered over in a downstream patch. For consistency with other block job commands, libvirt must handle the bandwidth argument as MiB/sec from the user, even though qemu exposes the speed argument as bytes/sec; then again, qemu rounds up to cluster size internally, so using MiB hides the worst effects of that rounding if you pass small numbers. [1]https://lists.gnu.org/archive/html/qemu-devel/2012-10/msg04123.html * src/qemu/qemu_capabilities.h (QEMU_CAPS_DRIVE_MIRROR) (QEMU_CAPS_DRIVE_REOPEN): New bits. * src/qemu/qemu_capabilities.c (qemuCaps): Name them. * src/qemu/qemu_monitor_json.c (qemuMonitorJSONCheckCommands): Set them. (qemuMonitorJSONDriveMirror, qemuMonitorDrivePivot): New functions. * src/qemu/qemu_monitor_json.h (qemuMonitorJSONDriveMirror) (qemuMonitorDrivePivot): Declare them. * src/qemu/qemu_monitor.c (qemuMonitorDriveMirror) (qemuMonitorDrivePivot): New passthroughs. * src/qemu/qemu_monitor.h (qemuMonitorDriveMirror) (qemuMonitorDrivePivot): Declare them.
2012-09-28 17:29:53 -06:00
int
qemuMonitorDriveMirror(qemuMonitorPtr mon,
const char *device, const char *file,
blockjob: hoist bandwidth scaling out of monitor code qemu treats blockjob bandwidth as a 64-bit number, in the units of bytes/second. But we stupidly modeled block job bandwidth after migration bandwidth, which in turn was an 'unsigned long' and therefore subject to 32-bit vs. 64-bit interpretations, and with a scale of MiB/s. Our code already has to convert between the two scales, and report overflow as appropriate; although this conversion currently lives in the monitor code. In fact, our conversion code limited things to 63 bits, because we checked against LLONG_MAX and reject what would be negative bandwidth if treated as signed. On the bright side, our use of MiB/s means that even with a 32-bit unsigned long, we still have no problem representing a bandwidth of 2GiB/s, which is starting to be more feasible as 10-gigabit or even faster interfaces are used. And once you get past the physical speeds of existing interfaces, any larger bandwidth number behaves the same - effectively unlimited. But on the low side, the granularity of 1MiB/s tuning is rather coarse. So the new virDomainBlockJob API decided to go with a direct 64-bit bytes/sec number instead of the scaled number that prior blockjob APIs had used. But there is no point in rounding this number to MiB/s just to scale it back to bytes/s for handing to qemu. In order to make future code sharing possible between the old virDomainBlockRebase and the new virDomainBlockCopy, this patch moves the scaling and overflow detection into the driver code. Several of the block job calls that can set speed are fed through a common interface, so it was easier to adjust all block jobs at once, for consistency. This patch is just code motion; there should be no user-visible change in behavior. * src/qemu/qemu_monitor.h (qemuMonitorBlockJob) (qemuMonitorBlockCommit, qemuMonitorDriveMirror): Change parameter type and scale. * src/qemu/qemu_monitor.c (qemuMonitorBlockJob) (qemuMonitorBlockCommit, qemuMonitorDriveMirror): Move scaling and overflow detection... * src/qemu/qemu_driver.c (qemuDomainBlockJobImpl) (qemuDomainBlockRebase, qemuDomainBlockCommit): ...here. (qemuDomainBlockCopy): Use bytes/sec. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-08-29 13:58:45 -06:00
const char *format, unsigned long long bandwidth,
blockcopy: add qemu implementation of new tunables Upstream qemu 1.4 added some drive-mirror tunables not present when it was first introduced in 1.3. Management apps may want to set these in some cases (for example, without tuning granularity down to sector size, a copy may end up occupying more bytes than the original because an entire cluster is copied even when only a sector within the cluster is dirty, although tuning it down results in more CPU time to do the copy). I haven't personally needed to use the parameters, but since they exist, and since the new API supports virTypedParams, we might as well expose them. Since the tuning parameters aren't often used, and omitted from the QMP command when unspecified, I think it is safe to rely on qemu 1.3 to issue an error about them being unsupported, rather than trying to create a new capability bit in libvirt. Meanwhile, all versions of qemu from 1.4 to 2.1 have a bug where a bad granularity (such as non-power-of-2) gives a poor message: error: internal error: unable to execute QEMU command 'drive-mirror': Invalid parameter 'drive-virtio-disk0' because of abuse of QERR_INVALID_PARAMETER (which is supposed to name the parameter that was given a bad value, rather than the value passed to some other parameter). I don't see that a capability check will help, so we'll just live with it (and it has since been improved in upstream qemu). * src/qemu/qemu_monitor.h (qemuMonitorDriveMirror): Add parameters. * src/qemu/qemu_monitor.c (qemuMonitorDriveMirror): Likewise. * src/qemu/qemu_monitor_json.h (qemuMonitorJSONDriveMirror): Likewise. * src/qemu/qemu_monitor_json.c (qemuMonitorJSONDriveMirror): Likewise. * src/qemu/qemu_driver.c (qemuDomainBlockCopyCommon): Likewise. (qemuDomainBlockRebase, qemuDomainBlockCopy): Adjust callers. * src/qemu/qemu_migration.c (qemuMigrationDriveMirror): Likewise. * tests/qemumonitorjsontest.c (qemuMonitorJSONDriveMirror): Likewise. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-09-08 14:53:12 -06:00
unsigned int granularity, unsigned long long buf_size,
blockjob: add qemu capabilities related to block jobs Upstream qemu 1.3 is adding two new monitor commands, 'drive-mirror' and 'block-job-complete'[1], which can drive live block copy and storage migration. [Additionally, RHEL 6.3 had backported an earlier version of most of the same functionality, but under the names '__com.redhat_drive-mirror' and '__com.redhat_drive-reopen' and with slightly different JSON arguments, and has been using patches similar to these upstream patches for several months now.] The libvirt API virDomainBlockRebase as already committed for 0.9.12 is flexible enough to expose the basics of block copy, but some additional features in the 'drive-mirror' qemu command, such as setting error policy, setting granularity, or using a persistent bitmap, may later require a new libvirt API virDomainBlockCopy. I will wait to add that API until we know more about what qemu 1.3 will finally provide. This patch caters only to the upstream qemu 1.3 interface, although I have proven that the changes for RHEL 6.3 can be isolated to just qemu_monitor_json.c, and the rest of this series will gracefully handle either interface once the JSON differences are papered over in a downstream patch. For consistency with other block job commands, libvirt must handle the bandwidth argument as MiB/sec from the user, even though qemu exposes the speed argument as bytes/sec; then again, qemu rounds up to cluster size internally, so using MiB hides the worst effects of that rounding if you pass small numbers. [1]https://lists.gnu.org/archive/html/qemu-devel/2012-10/msg04123.html * src/qemu/qemu_capabilities.h (QEMU_CAPS_DRIVE_MIRROR) (QEMU_CAPS_DRIVE_REOPEN): New bits. * src/qemu/qemu_capabilities.c (qemuCaps): Name them. * src/qemu/qemu_monitor_json.c (qemuMonitorJSONCheckCommands): Set them. (qemuMonitorJSONDriveMirror, qemuMonitorDrivePivot): New functions. * src/qemu/qemu_monitor_json.h (qemuMonitorJSONDriveMirror) (qemuMonitorDrivePivot): Declare them. * src/qemu/qemu_monitor.c (qemuMonitorDriveMirror) (qemuMonitorDrivePivot): New passthroughs. * src/qemu/qemu_monitor.h (qemuMonitorDriveMirror) (qemuMonitorDrivePivot): Declare them.
2012-09-28 17:29:53 -06:00
unsigned int flags)
{
VIR_DEBUG("device=%s, file=%s, format=%s, bandwidth=%lld, "
"granularity=%#x, buf_size=%lld, flags=0x%x",
device, file, NULLSTR(format), bandwidth, granularity,
blockcopy: add qemu implementation of new tunables Upstream qemu 1.4 added some drive-mirror tunables not present when it was first introduced in 1.3. Management apps may want to set these in some cases (for example, without tuning granularity down to sector size, a copy may end up occupying more bytes than the original because an entire cluster is copied even when only a sector within the cluster is dirty, although tuning it down results in more CPU time to do the copy). I haven't personally needed to use the parameters, but since they exist, and since the new API supports virTypedParams, we might as well expose them. Since the tuning parameters aren't often used, and omitted from the QMP command when unspecified, I think it is safe to rely on qemu 1.3 to issue an error about them being unsupported, rather than trying to create a new capability bit in libvirt. Meanwhile, all versions of qemu from 1.4 to 2.1 have a bug where a bad granularity (such as non-power-of-2) gives a poor message: error: internal error: unable to execute QEMU command 'drive-mirror': Invalid parameter 'drive-virtio-disk0' because of abuse of QERR_INVALID_PARAMETER (which is supposed to name the parameter that was given a bad value, rather than the value passed to some other parameter). I don't see that a capability check will help, so we'll just live with it (and it has since been improved in upstream qemu). * src/qemu/qemu_monitor.h (qemuMonitorDriveMirror): Add parameters. * src/qemu/qemu_monitor.c (qemuMonitorDriveMirror): Likewise. * src/qemu/qemu_monitor_json.h (qemuMonitorJSONDriveMirror): Likewise. * src/qemu/qemu_monitor_json.c (qemuMonitorJSONDriveMirror): Likewise. * src/qemu/qemu_driver.c (qemuDomainBlockCopyCommon): Likewise. (qemuDomainBlockRebase, qemuDomainBlockCopy): Adjust callers. * src/qemu/qemu_migration.c (qemuMigrationDriveMirror): Likewise. * tests/qemumonitorjsontest.c (qemuMonitorJSONDriveMirror): Likewise. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-09-08 14:53:12 -06:00
buf_size, flags);
blockjob: add qemu capabilities related to block jobs Upstream qemu 1.3 is adding two new monitor commands, 'drive-mirror' and 'block-job-complete'[1], which can drive live block copy and storage migration. [Additionally, RHEL 6.3 had backported an earlier version of most of the same functionality, but under the names '__com.redhat_drive-mirror' and '__com.redhat_drive-reopen' and with slightly different JSON arguments, and has been using patches similar to these upstream patches for several months now.] The libvirt API virDomainBlockRebase as already committed for 0.9.12 is flexible enough to expose the basics of block copy, but some additional features in the 'drive-mirror' qemu command, such as setting error policy, setting granularity, or using a persistent bitmap, may later require a new libvirt API virDomainBlockCopy. I will wait to add that API until we know more about what qemu 1.3 will finally provide. This patch caters only to the upstream qemu 1.3 interface, although I have proven that the changes for RHEL 6.3 can be isolated to just qemu_monitor_json.c, and the rest of this series will gracefully handle either interface once the JSON differences are papered over in a downstream patch. For consistency with other block job commands, libvirt must handle the bandwidth argument as MiB/sec from the user, even though qemu exposes the speed argument as bytes/sec; then again, qemu rounds up to cluster size internally, so using MiB hides the worst effects of that rounding if you pass small numbers. [1]https://lists.gnu.org/archive/html/qemu-devel/2012-10/msg04123.html * src/qemu/qemu_capabilities.h (QEMU_CAPS_DRIVE_MIRROR) (QEMU_CAPS_DRIVE_REOPEN): New bits. * src/qemu/qemu_capabilities.c (qemuCaps): Name them. * src/qemu/qemu_monitor_json.c (qemuMonitorJSONCheckCommands): Set them. (qemuMonitorJSONDriveMirror, qemuMonitorDrivePivot): New functions. * src/qemu/qemu_monitor_json.h (qemuMonitorJSONDriveMirror) (qemuMonitorDrivePivot): Declare them. * src/qemu/qemu_monitor.c (qemuMonitorDriveMirror) (qemuMonitorDrivePivot): New passthroughs. * src/qemu/qemu_monitor.h (qemuMonitorDriveMirror) (qemuMonitorDrivePivot): Declare them.
2012-09-28 17:29:53 -06:00
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONDriveMirror(mon, device, file, format, bandwidth,
granularity, buf_size, flags);
blockjob: add qemu capabilities related to block jobs Upstream qemu 1.3 is adding two new monitor commands, 'drive-mirror' and 'block-job-complete'[1], which can drive live block copy and storage migration. [Additionally, RHEL 6.3 had backported an earlier version of most of the same functionality, but under the names '__com.redhat_drive-mirror' and '__com.redhat_drive-reopen' and with slightly different JSON arguments, and has been using patches similar to these upstream patches for several months now.] The libvirt API virDomainBlockRebase as already committed for 0.9.12 is flexible enough to expose the basics of block copy, but some additional features in the 'drive-mirror' qemu command, such as setting error policy, setting granularity, or using a persistent bitmap, may later require a new libvirt API virDomainBlockCopy. I will wait to add that API until we know more about what qemu 1.3 will finally provide. This patch caters only to the upstream qemu 1.3 interface, although I have proven that the changes for RHEL 6.3 can be isolated to just qemu_monitor_json.c, and the rest of this series will gracefully handle either interface once the JSON differences are papered over in a downstream patch. For consistency with other block job commands, libvirt must handle the bandwidth argument as MiB/sec from the user, even though qemu exposes the speed argument as bytes/sec; then again, qemu rounds up to cluster size internally, so using MiB hides the worst effects of that rounding if you pass small numbers. [1]https://lists.gnu.org/archive/html/qemu-devel/2012-10/msg04123.html * src/qemu/qemu_capabilities.h (QEMU_CAPS_DRIVE_MIRROR) (QEMU_CAPS_DRIVE_REOPEN): New bits. * src/qemu/qemu_capabilities.c (qemuCaps): Name them. * src/qemu/qemu_monitor_json.c (qemuMonitorJSONCheckCommands): Set them. (qemuMonitorJSONDriveMirror, qemuMonitorDrivePivot): New functions. * src/qemu/qemu_monitor_json.h (qemuMonitorJSONDriveMirror) (qemuMonitorDrivePivot): Declare them. * src/qemu/qemu_monitor.c (qemuMonitorDriveMirror) (qemuMonitorDrivePivot): New passthroughs. * src/qemu/qemu_monitor.h (qemuMonitorDriveMirror) (qemuMonitorDrivePivot): Declare them.
2012-09-28 17:29:53 -06:00
}
int
qemuMonitorBlockdevMirror(qemuMonitorPtr mon,
const char *jobname,
const char *device,
const char *target,
unsigned long long bandwidth,
unsigned int granularity,
unsigned long long buf_size,
unsigned int flags)
{
VIR_DEBUG("jobname=%s, device=%s, target=%s, bandwidth=%lld, "
"granularity=%#x, buf_size=%lld, flags=0x%x",
NULLSTR(jobname), device, target, bandwidth, granularity,
buf_size, flags);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONBlockdevMirror(mon, jobname, device, target, bandwidth,
granularity, buf_size, flags);
}
snapshot: add support for qemu transaction command QEmu 1.1 is adding a 'transaction' command to the JSON monitor. Each element of a transaction corresponds to a top-level command, with the additional guarantee that the transaction flushes all pending I/O, then guarantees that all actions will be successful as a group or that failure will roll back the state to what it was before the monitor command. The difference between a top-level command: { "execute": "blockdev-snapshot-sync", "arguments": { "device": "virtio0", ... } } and a transaction: { "execute": "transaction", "arguments": { "actions": [ { "type": "blockdev-snapshot-sync", "data": { "device": "virtio0", ... } } ] } } is just a couple of changed key names and nesting the shorter command inside a JSON array to the longer command. This patch just adds the framework; the next patch will actually use a transaction. * src/qemu/qemu_monitor_json.c (qemuMonitorJSONMakeCommand): Move guts... (qemuMonitorJSONMakeCommandRaw): ...into new helper. Add support for array element. (qemuMonitorJSONTransaction): New command. (qemuMonitorJSONDiskSnapshot): Support use in a transaction. * src/qemu/qemu_monitor_json.h (qemuMonitorJSONDiskSnapshot): Add argument. (qemuMonitorJSONTransaction): New declaration. * src/qemu/qemu_monitor.h (qemuMonitorTransaction): Likewise. (qemuMonitorDiskSnapshot): Add argument. * src/qemu/qemu_monitor.c (qemuMonitorTransaction): New wrapper. (qemuMonitorDiskSnapshot): Pass argument on. * src/qemu/qemu_driver.c (qemuDomainSnapshotCreateSingleDiskActive): Update caller.
2012-03-16 22:17:28 -06:00
/* Use the transaction QMP command to run atomic snapshot commands. */
int
qemuMonitorTransaction(qemuMonitorPtr mon, virJSONValuePtr *actions)
snapshot: add support for qemu transaction command QEmu 1.1 is adding a 'transaction' command to the JSON monitor. Each element of a transaction corresponds to a top-level command, with the additional guarantee that the transaction flushes all pending I/O, then guarantees that all actions will be successful as a group or that failure will roll back the state to what it was before the monitor command. The difference between a top-level command: { "execute": "blockdev-snapshot-sync", "arguments": { "device": "virtio0", ... } } and a transaction: { "execute": "transaction", "arguments": { "actions": [ { "type": "blockdev-snapshot-sync", "data": { "device": "virtio0", ... } } ] } } is just a couple of changed key names and nesting the shorter command inside a JSON array to the longer command. This patch just adds the framework; the next patch will actually use a transaction. * src/qemu/qemu_monitor_json.c (qemuMonitorJSONMakeCommand): Move guts... (qemuMonitorJSONMakeCommandRaw): ...into new helper. Add support for array element. (qemuMonitorJSONTransaction): New command. (qemuMonitorJSONDiskSnapshot): Support use in a transaction. * src/qemu/qemu_monitor_json.h (qemuMonitorJSONDiskSnapshot): Add argument. (qemuMonitorJSONTransaction): New declaration. * src/qemu/qemu_monitor.h (qemuMonitorTransaction): Likewise. (qemuMonitorDiskSnapshot): Add argument. * src/qemu/qemu_monitor.c (qemuMonitorTransaction): New wrapper. (qemuMonitorDiskSnapshot): Pass argument on. * src/qemu/qemu_driver.c (qemuDomainSnapshotCreateSingleDiskActive): Update caller.
2012-03-16 22:17:28 -06:00
{
VIR_DEBUG("actions=%p", *actions);
snapshot: add support for qemu transaction command QEmu 1.1 is adding a 'transaction' command to the JSON monitor. Each element of a transaction corresponds to a top-level command, with the additional guarantee that the transaction flushes all pending I/O, then guarantees that all actions will be successful as a group or that failure will roll back the state to what it was before the monitor command. The difference between a top-level command: { "execute": "blockdev-snapshot-sync", "arguments": { "device": "virtio0", ... } } and a transaction: { "execute": "transaction", "arguments": { "actions": [ { "type": "blockdev-snapshot-sync", "data": { "device": "virtio0", ... } } ] } } is just a couple of changed key names and nesting the shorter command inside a JSON array to the longer command. This patch just adds the framework; the next patch will actually use a transaction. * src/qemu/qemu_monitor_json.c (qemuMonitorJSONMakeCommand): Move guts... (qemuMonitorJSONMakeCommandRaw): ...into new helper. Add support for array element. (qemuMonitorJSONTransaction): New command. (qemuMonitorJSONDiskSnapshot): Support use in a transaction. * src/qemu/qemu_monitor_json.h (qemuMonitorJSONDiskSnapshot): Add argument. (qemuMonitorJSONTransaction): New declaration. * src/qemu/qemu_monitor.h (qemuMonitorTransaction): Likewise. (qemuMonitorDiskSnapshot): Add argument. * src/qemu/qemu_monitor.c (qemuMonitorTransaction): New wrapper. (qemuMonitorDiskSnapshot): Pass argument on. * src/qemu/qemu_driver.c (qemuDomainSnapshotCreateSingleDiskActive): Update caller.
2012-03-16 22:17:28 -06:00
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONTransaction(mon, actions);
}
blockjob: hoist bandwidth scaling out of monitor code qemu treats blockjob bandwidth as a 64-bit number, in the units of bytes/second. But we stupidly modeled block job bandwidth after migration bandwidth, which in turn was an 'unsigned long' and therefore subject to 32-bit vs. 64-bit interpretations, and with a scale of MiB/s. Our code already has to convert between the two scales, and report overflow as appropriate; although this conversion currently lives in the monitor code. In fact, our conversion code limited things to 63 bits, because we checked against LLONG_MAX and reject what would be negative bandwidth if treated as signed. On the bright side, our use of MiB/s means that even with a 32-bit unsigned long, we still have no problem representing a bandwidth of 2GiB/s, which is starting to be more feasible as 10-gigabit or even faster interfaces are used. And once you get past the physical speeds of existing interfaces, any larger bandwidth number behaves the same - effectively unlimited. But on the low side, the granularity of 1MiB/s tuning is rather coarse. So the new virDomainBlockJob API decided to go with a direct 64-bit bytes/sec number instead of the scaled number that prior blockjob APIs had used. But there is no point in rounding this number to MiB/s just to scale it back to bytes/s for handing to qemu. In order to make future code sharing possible between the old virDomainBlockRebase and the new virDomainBlockCopy, this patch moves the scaling and overflow detection into the driver code. Several of the block job calls that can set speed are fed through a common interface, so it was easier to adjust all block jobs at once, for consistency. This patch is just code motion; there should be no user-visible change in behavior. * src/qemu/qemu_monitor.h (qemuMonitorBlockJob) (qemuMonitorBlockCommit, qemuMonitorDriveMirror): Change parameter type and scale. * src/qemu/qemu_monitor.c (qemuMonitorBlockJob) (qemuMonitorBlockCommit, qemuMonitorDriveMirror): Move scaling and overflow detection... * src/qemu/qemu_driver.c (qemuDomainBlockJobImpl) (qemuDomainBlockRebase, qemuDomainBlockCommit): ...here. (qemuDomainBlockCopy): Use bytes/sec. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-08-29 13:58:45 -06:00
/* Start a block-commit block job. bandwidth is in bytes/sec. */
int
qemuMonitorBlockCommit(qemuMonitorPtr mon, const char *device,
const char *top, const char *base,
const char *backingName,
blockjob: hoist bandwidth scaling out of monitor code qemu treats blockjob bandwidth as a 64-bit number, in the units of bytes/second. But we stupidly modeled block job bandwidth after migration bandwidth, which in turn was an 'unsigned long' and therefore subject to 32-bit vs. 64-bit interpretations, and with a scale of MiB/s. Our code already has to convert between the two scales, and report overflow as appropriate; although this conversion currently lives in the monitor code. In fact, our conversion code limited things to 63 bits, because we checked against LLONG_MAX and reject what would be negative bandwidth if treated as signed. On the bright side, our use of MiB/s means that even with a 32-bit unsigned long, we still have no problem representing a bandwidth of 2GiB/s, which is starting to be more feasible as 10-gigabit or even faster interfaces are used. And once you get past the physical speeds of existing interfaces, any larger bandwidth number behaves the same - effectively unlimited. But on the low side, the granularity of 1MiB/s tuning is rather coarse. So the new virDomainBlockJob API decided to go with a direct 64-bit bytes/sec number instead of the scaled number that prior blockjob APIs had used. But there is no point in rounding this number to MiB/s just to scale it back to bytes/s for handing to qemu. In order to make future code sharing possible between the old virDomainBlockRebase and the new virDomainBlockCopy, this patch moves the scaling and overflow detection into the driver code. Several of the block job calls that can set speed are fed through a common interface, so it was easier to adjust all block jobs at once, for consistency. This patch is just code motion; there should be no user-visible change in behavior. * src/qemu/qemu_monitor.h (qemuMonitorBlockJob) (qemuMonitorBlockCommit, qemuMonitorDriveMirror): Change parameter type and scale. * src/qemu/qemu_monitor.c (qemuMonitorBlockJob) (qemuMonitorBlockCommit, qemuMonitorDriveMirror): Move scaling and overflow detection... * src/qemu/qemu_driver.c (qemuDomainBlockJobImpl) (qemuDomainBlockRebase, qemuDomainBlockCommit): ...here. (qemuDomainBlockCopy): Use bytes/sec. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-08-29 13:58:45 -06:00
unsigned long long bandwidth)
{
VIR_DEBUG("device=%s, top=%s, base=%s, backingName=%s, bandwidth=%llu",
device, top, base, NULLSTR(backingName), bandwidth);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONBlockCommit(mon, device, top, base,
backingName, bandwidth);
}
blockjob: allow omitted arguments to QMP block-commit We are about to turn on support for active block commit. Although qemu 2.0 was the first version to mostly support it, that version mis-handles 0-length files, and doesn't have anything available for easy probing. But qemu 2.1 fixed bugs, and made life simpler by letting the 'top' argument be optional. Unless someone begs for active commit with qemu 2.0, for now we are just going to enable it only by probing for qemu 2.1 behavior (anyone backporting active commit can also backport the optional argument behavior). This requires qemu.git commit 7676e2c597000eff3a7233b40cca768b358f9bc9. Although all our actual uses of block-commit supply arguments for both base and top, we can omit both arguments and use a bogus device string to trigger an interesting behavior in qemu. All QMP commands first do argument validation, failing with GenericError if a mandatory argument is missing. Once that passes, the code in the specific command gets to do further checking, and the qemu developers made sure that if device is the only supplied argument, then the block-commit code will look up the device first, with a failure of DeviceNotFound, before attempting any further argument validation (most other validations fail with GenericError). Thus, the category of error class can reliably be used to decipher whether the top argument was optional, which in turn implies a working active commit. Since we expect our bogus device string to trigger an error either way, the code is written to return a distinct return value without spamming the logs. * src/qemu/qemu_monitor.h (qemuMonitorSupportsActiveCommit): New prototype. * src/qemu/qemu_monitor.c (qemuMonitorSupportsActiveCommit): Implement it. * src/qemu/qemu_monitor_json.h (qemuMonitorJSONBlockCommit): Allow NULL for top and base, for probing purposes. * src/qemu/qemu_monitor_json.c (qemuMonitorJSONBlockCommit): Likewise, implementing the probe. * tests/qemumonitorjsontest.c (mymain): Enable... (testQemuMonitorJSONqemuMonitorSupportsActiveCommit): ...a new test. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-06-16 21:42:49 -06:00
/* Probe whether active commits are supported by a given qemu binary. */
bool
qemuMonitorSupportsActiveCommit(qemuMonitorPtr mon)
{
if (!mon || !mon->json)
blockjob: allow omitted arguments to QMP block-commit We are about to turn on support for active block commit. Although qemu 2.0 was the first version to mostly support it, that version mis-handles 0-length files, and doesn't have anything available for easy probing. But qemu 2.1 fixed bugs, and made life simpler by letting the 'top' argument be optional. Unless someone begs for active commit with qemu 2.0, for now we are just going to enable it only by probing for qemu 2.1 behavior (anyone backporting active commit can also backport the optional argument behavior). This requires qemu.git commit 7676e2c597000eff3a7233b40cca768b358f9bc9. Although all our actual uses of block-commit supply arguments for both base and top, we can omit both arguments and use a bogus device string to trigger an interesting behavior in qemu. All QMP commands first do argument validation, failing with GenericError if a mandatory argument is missing. Once that passes, the code in the specific command gets to do further checking, and the qemu developers made sure that if device is the only supplied argument, then the block-commit code will look up the device first, with a failure of DeviceNotFound, before attempting any further argument validation (most other validations fail with GenericError). Thus, the category of error class can reliably be used to decipher whether the top argument was optional, which in turn implies a working active commit. Since we expect our bogus device string to trigger an error either way, the code is written to return a distinct return value without spamming the logs. * src/qemu/qemu_monitor.h (qemuMonitorSupportsActiveCommit): New prototype. * src/qemu/qemu_monitor.c (qemuMonitorSupportsActiveCommit): Implement it. * src/qemu/qemu_monitor_json.h (qemuMonitorJSONBlockCommit): Allow NULL for top and base, for probing purposes. * src/qemu/qemu_monitor_json.c (qemuMonitorJSONBlockCommit): Likewise, implementing the probe. * tests/qemumonitorjsontest.c (mymain): Enable... (testQemuMonitorJSONqemuMonitorSupportsActiveCommit): ...a new test. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-06-16 21:42:49 -06:00
return false;
return qemuMonitorJSONSupportsActiveCommit(mon);
blockjob: allow omitted arguments to QMP block-commit We are about to turn on support for active block commit. Although qemu 2.0 was the first version to mostly support it, that version mis-handles 0-length files, and doesn't have anything available for easy probing. But qemu 2.1 fixed bugs, and made life simpler by letting the 'top' argument be optional. Unless someone begs for active commit with qemu 2.0, for now we are just going to enable it only by probing for qemu 2.1 behavior (anyone backporting active commit can also backport the optional argument behavior). This requires qemu.git commit 7676e2c597000eff3a7233b40cca768b358f9bc9. Although all our actual uses of block-commit supply arguments for both base and top, we can omit both arguments and use a bogus device string to trigger an interesting behavior in qemu. All QMP commands first do argument validation, failing with GenericError if a mandatory argument is missing. Once that passes, the code in the specific command gets to do further checking, and the qemu developers made sure that if device is the only supplied argument, then the block-commit code will look up the device first, with a failure of DeviceNotFound, before attempting any further argument validation (most other validations fail with GenericError). Thus, the category of error class can reliably be used to decipher whether the top argument was optional, which in turn implies a working active commit. Since we expect our bogus device string to trigger an error either way, the code is written to return a distinct return value without spamming the logs. * src/qemu/qemu_monitor.h (qemuMonitorSupportsActiveCommit): New prototype. * src/qemu/qemu_monitor.c (qemuMonitorSupportsActiveCommit): Implement it. * src/qemu/qemu_monitor_json.h (qemuMonitorJSONBlockCommit): Allow NULL for top and base, for probing purposes. * src/qemu/qemu_monitor_json.c (qemuMonitorJSONBlockCommit): Likewise, implementing the probe. * tests/qemumonitorjsontest.c (mymain): Enable... (testQemuMonitorJSONqemuMonitorSupportsActiveCommit): ...a new test. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-06-16 21:42:49 -06:00
}
qemu: read backing chain names from qemu https://bugzilla.redhat.com/show_bug.cgi?id=1199182 documents that after a series of disk snapshots into existing destination images, followed by active commits of the top image, it is possible for qemu 2.2 and earlier to end up tracking a different name for the image than what it would have had when opening the chain afresh. That is, when starting with the chain 'a <- b <- c', the name associated with 'b' is how it was spelled in the metadata of 'c', but when starting with 'a', taking two snapshots into 'a <- b <- c', then committing 'c' back into 'b', the name associated with 'b' is now the name used when taking the first snapshot. Sadly, older qemu doesn't know how to treat different spellings of the same filename as identical files (it uses strcmp() instead of checking for the same inode), which means libvirt's attempt to commit an image using solely the names learned from qcow2 metadata fails with a cryptic: error: internal error: unable to execute QEMU command 'block-commit': Top image file /tmp/images/c/../b/b not found even though the file exists. Trying to teach libvirt the rules on which name qemu will expect is not worth the effort (besides, we'd have to remember it across libvirtd restarts, and track whether a file was opened via metadata or via snapshot creation for a given qemu process); it is easier to just always directly ask qemu what string it expects to see in the first place. As a safety valve, we validate that any name returned by qemu still maps to the same local file as we have tracked it, so that a compromised qemu cannot accidentally cause us to act on an incorrect file. * src/qemu/qemu_monitor.h (qemuMonitorDiskNameLookup): New prototype. * src/qemu/qemu_monitor_json.h (qemuMonitorJSONDiskNameLookup): Likewise. * src/qemu/qemu_monitor.c (qemuMonitorDiskNameLookup): New function. * src/qemu/qemu_monitor_json.c (qemuMonitorJSONDiskNameLookup) (qemuMonitorJSONDiskNameLookupOne): Likewise. * src/qemu/qemu_driver.c (qemuDomainBlockCommit) (qemuDomainBlockJobImpl): Use it. Signed-off-by: Eric Blake <eblake@redhat.com>
2015-03-11 14:37:04 -06:00
/* Determine the name that qemu is using for tracking the backing
* element TARGET within the chain starting at TOP. */
char *
qemuMonitorDiskNameLookup(qemuMonitorPtr mon,
const char *device,
virStorageSourcePtr top,
virStorageSourcePtr target)
{
QEMU_CHECK_MONITOR_NULL(mon);
qemu: read backing chain names from qemu https://bugzilla.redhat.com/show_bug.cgi?id=1199182 documents that after a series of disk snapshots into existing destination images, followed by active commits of the top image, it is possible for qemu 2.2 and earlier to end up tracking a different name for the image than what it would have had when opening the chain afresh. That is, when starting with the chain 'a <- b <- c', the name associated with 'b' is how it was spelled in the metadata of 'c', but when starting with 'a', taking two snapshots into 'a <- b <- c', then committing 'c' back into 'b', the name associated with 'b' is now the name used when taking the first snapshot. Sadly, older qemu doesn't know how to treat different spellings of the same filename as identical files (it uses strcmp() instead of checking for the same inode), which means libvirt's attempt to commit an image using solely the names learned from qcow2 metadata fails with a cryptic: error: internal error: unable to execute QEMU command 'block-commit': Top image file /tmp/images/c/../b/b not found even though the file exists. Trying to teach libvirt the rules on which name qemu will expect is not worth the effort (besides, we'd have to remember it across libvirtd restarts, and track whether a file was opened via metadata or via snapshot creation for a given qemu process); it is easier to just always directly ask qemu what string it expects to see in the first place. As a safety valve, we validate that any name returned by qemu still maps to the same local file as we have tracked it, so that a compromised qemu cannot accidentally cause us to act on an incorrect file. * src/qemu/qemu_monitor.h (qemuMonitorDiskNameLookup): New prototype. * src/qemu/qemu_monitor_json.h (qemuMonitorJSONDiskNameLookup): Likewise. * src/qemu/qemu_monitor.c (qemuMonitorDiskNameLookup): New function. * src/qemu/qemu_monitor_json.c (qemuMonitorJSONDiskNameLookup) (qemuMonitorJSONDiskNameLookupOne): Likewise. * src/qemu/qemu_driver.c (qemuDomainBlockCommit) (qemuDomainBlockJobImpl): Use it. Signed-off-by: Eric Blake <eblake@redhat.com>
2015-03-11 14:37:04 -06:00
return qemuMonitorJSONDiskNameLookup(mon, device, top, target);
}
/* Use the block-job-complete monitor command to pivot a block copy job. */
blockjob: add qemu capabilities related to block jobs Upstream qemu 1.3 is adding two new monitor commands, 'drive-mirror' and 'block-job-complete'[1], which can drive live block copy and storage migration. [Additionally, RHEL 6.3 had backported an earlier version of most of the same functionality, but under the names '__com.redhat_drive-mirror' and '__com.redhat_drive-reopen' and with slightly different JSON arguments, and has been using patches similar to these upstream patches for several months now.] The libvirt API virDomainBlockRebase as already committed for 0.9.12 is flexible enough to expose the basics of block copy, but some additional features in the 'drive-mirror' qemu command, such as setting error policy, setting granularity, or using a persistent bitmap, may later require a new libvirt API virDomainBlockCopy. I will wait to add that API until we know more about what qemu 1.3 will finally provide. This patch caters only to the upstream qemu 1.3 interface, although I have proven that the changes for RHEL 6.3 can be isolated to just qemu_monitor_json.c, and the rest of this series will gracefully handle either interface once the JSON differences are papered over in a downstream patch. For consistency with other block job commands, libvirt must handle the bandwidth argument as MiB/sec from the user, even though qemu exposes the speed argument as bytes/sec; then again, qemu rounds up to cluster size internally, so using MiB hides the worst effects of that rounding if you pass small numbers. [1]https://lists.gnu.org/archive/html/qemu-devel/2012-10/msg04123.html * src/qemu/qemu_capabilities.h (QEMU_CAPS_DRIVE_MIRROR) (QEMU_CAPS_DRIVE_REOPEN): New bits. * src/qemu/qemu_capabilities.c (qemuCaps): Name them. * src/qemu/qemu_monitor_json.c (qemuMonitorJSONCheckCommands): Set them. (qemuMonitorJSONDriveMirror, qemuMonitorDrivePivot): New functions. * src/qemu/qemu_monitor_json.h (qemuMonitorJSONDriveMirror) (qemuMonitorDrivePivot): Declare them. * src/qemu/qemu_monitor.c (qemuMonitorDriveMirror) (qemuMonitorDrivePivot): New passthroughs. * src/qemu/qemu_monitor.h (qemuMonitorDriveMirror) (qemuMonitorDrivePivot): Declare them.
2012-09-28 17:29:53 -06:00
int
qemuMonitorDrivePivot(qemuMonitorPtr mon,
const char *jobname)
blockjob: add qemu capabilities related to block jobs Upstream qemu 1.3 is adding two new monitor commands, 'drive-mirror' and 'block-job-complete'[1], which can drive live block copy and storage migration. [Additionally, RHEL 6.3 had backported an earlier version of most of the same functionality, but under the names '__com.redhat_drive-mirror' and '__com.redhat_drive-reopen' and with slightly different JSON arguments, and has been using patches similar to these upstream patches for several months now.] The libvirt API virDomainBlockRebase as already committed for 0.9.12 is flexible enough to expose the basics of block copy, but some additional features in the 'drive-mirror' qemu command, such as setting error policy, setting granularity, or using a persistent bitmap, may later require a new libvirt API virDomainBlockCopy. I will wait to add that API until we know more about what qemu 1.3 will finally provide. This patch caters only to the upstream qemu 1.3 interface, although I have proven that the changes for RHEL 6.3 can be isolated to just qemu_monitor_json.c, and the rest of this series will gracefully handle either interface once the JSON differences are papered over in a downstream patch. For consistency with other block job commands, libvirt must handle the bandwidth argument as MiB/sec from the user, even though qemu exposes the speed argument as bytes/sec; then again, qemu rounds up to cluster size internally, so using MiB hides the worst effects of that rounding if you pass small numbers. [1]https://lists.gnu.org/archive/html/qemu-devel/2012-10/msg04123.html * src/qemu/qemu_capabilities.h (QEMU_CAPS_DRIVE_MIRROR) (QEMU_CAPS_DRIVE_REOPEN): New bits. * src/qemu/qemu_capabilities.c (qemuCaps): Name them. * src/qemu/qemu_monitor_json.c (qemuMonitorJSONCheckCommands): Set them. (qemuMonitorJSONDriveMirror, qemuMonitorDrivePivot): New functions. * src/qemu/qemu_monitor_json.h (qemuMonitorJSONDriveMirror) (qemuMonitorDrivePivot): Declare them. * src/qemu/qemu_monitor.c (qemuMonitorDriveMirror) (qemuMonitorDrivePivot): New passthroughs. * src/qemu/qemu_monitor.h (qemuMonitorDriveMirror) (qemuMonitorDrivePivot): Declare them.
2012-09-28 17:29:53 -06:00
{
VIR_DEBUG("jobname=%s", jobname);
blockjob: add qemu capabilities related to block jobs Upstream qemu 1.3 is adding two new monitor commands, 'drive-mirror' and 'block-job-complete'[1], which can drive live block copy and storage migration. [Additionally, RHEL 6.3 had backported an earlier version of most of the same functionality, but under the names '__com.redhat_drive-mirror' and '__com.redhat_drive-reopen' and with slightly different JSON arguments, and has been using patches similar to these upstream patches for several months now.] The libvirt API virDomainBlockRebase as already committed for 0.9.12 is flexible enough to expose the basics of block copy, but some additional features in the 'drive-mirror' qemu command, such as setting error policy, setting granularity, or using a persistent bitmap, may later require a new libvirt API virDomainBlockCopy. I will wait to add that API until we know more about what qemu 1.3 will finally provide. This patch caters only to the upstream qemu 1.3 interface, although I have proven that the changes for RHEL 6.3 can be isolated to just qemu_monitor_json.c, and the rest of this series will gracefully handle either interface once the JSON differences are papered over in a downstream patch. For consistency with other block job commands, libvirt must handle the bandwidth argument as MiB/sec from the user, even though qemu exposes the speed argument as bytes/sec; then again, qemu rounds up to cluster size internally, so using MiB hides the worst effects of that rounding if you pass small numbers. [1]https://lists.gnu.org/archive/html/qemu-devel/2012-10/msg04123.html * src/qemu/qemu_capabilities.h (QEMU_CAPS_DRIVE_MIRROR) (QEMU_CAPS_DRIVE_REOPEN): New bits. * src/qemu/qemu_capabilities.c (qemuCaps): Name them. * src/qemu/qemu_monitor_json.c (qemuMonitorJSONCheckCommands): Set them. (qemuMonitorJSONDriveMirror, qemuMonitorDrivePivot): New functions. * src/qemu/qemu_monitor_json.h (qemuMonitorJSONDriveMirror) (qemuMonitorDrivePivot): Declare them. * src/qemu/qemu_monitor.c (qemuMonitorDriveMirror) (qemuMonitorDrivePivot): New passthroughs. * src/qemu/qemu_monitor.h (qemuMonitorDriveMirror) (qemuMonitorDrivePivot): Declare them.
2012-09-28 17:29:53 -06:00
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONDrivePivot(mon, jobname);
blockjob: add qemu capabilities related to block jobs Upstream qemu 1.3 is adding two new monitor commands, 'drive-mirror' and 'block-job-complete'[1], which can drive live block copy and storage migration. [Additionally, RHEL 6.3 had backported an earlier version of most of the same functionality, but under the names '__com.redhat_drive-mirror' and '__com.redhat_drive-reopen' and with slightly different JSON arguments, and has been using patches similar to these upstream patches for several months now.] The libvirt API virDomainBlockRebase as already committed for 0.9.12 is flexible enough to expose the basics of block copy, but some additional features in the 'drive-mirror' qemu command, such as setting error policy, setting granularity, or using a persistent bitmap, may later require a new libvirt API virDomainBlockCopy. I will wait to add that API until we know more about what qemu 1.3 will finally provide. This patch caters only to the upstream qemu 1.3 interface, although I have proven that the changes for RHEL 6.3 can be isolated to just qemu_monitor_json.c, and the rest of this series will gracefully handle either interface once the JSON differences are papered over in a downstream patch. For consistency with other block job commands, libvirt must handle the bandwidth argument as MiB/sec from the user, even though qemu exposes the speed argument as bytes/sec; then again, qemu rounds up to cluster size internally, so using MiB hides the worst effects of that rounding if you pass small numbers. [1]https://lists.gnu.org/archive/html/qemu-devel/2012-10/msg04123.html * src/qemu/qemu_capabilities.h (QEMU_CAPS_DRIVE_MIRROR) (QEMU_CAPS_DRIVE_REOPEN): New bits. * src/qemu/qemu_capabilities.c (qemuCaps): Name them. * src/qemu/qemu_monitor_json.c (qemuMonitorJSONCheckCommands): Set them. (qemuMonitorJSONDriveMirror, qemuMonitorDrivePivot): New functions. * src/qemu/qemu_monitor_json.h (qemuMonitorJSONDriveMirror) (qemuMonitorDrivePivot): Declare them. * src/qemu/qemu_monitor.c (qemuMonitorDriveMirror) (qemuMonitorDrivePivot): New passthroughs. * src/qemu/qemu_monitor.h (qemuMonitorDriveMirror) (qemuMonitorDrivePivot): Declare them.
2012-09-28 17:29:53 -06:00
}
int
qemuMonitorArbitraryCommand(qemuMonitorPtr mon,
const char *cmd,
char **reply,
bool hmp)
{
VIR_DEBUG("cmd=%s, reply=%p, hmp=%d", cmd, reply, hmp);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONArbitraryCommand(mon, cmd, reply, hmp);
}
int
qemuMonitorInjectNMI(qemuMonitorPtr mon)
{
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONInjectNMI(mon);
}
int
qemuMonitorSendKey(qemuMonitorPtr mon,
unsigned int holdtime,
unsigned int *keycodes,
unsigned int nkeycodes)
{
VIR_DEBUG("holdtime=%u, nkeycodes=%u", holdtime, nkeycodes);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONSendKey(mon, holdtime, keycodes, nkeycodes);
}
int
qemuMonitorScreendump(qemuMonitorPtr mon,
const char *device,
unsigned int head,
const char *file)
{
VIR_DEBUG("file=%s", file);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONScreendump(mon, device, head, file);
}
blockjob: hoist bandwidth scaling out of monitor code qemu treats blockjob bandwidth as a 64-bit number, in the units of bytes/second. But we stupidly modeled block job bandwidth after migration bandwidth, which in turn was an 'unsigned long' and therefore subject to 32-bit vs. 64-bit interpretations, and with a scale of MiB/s. Our code already has to convert between the two scales, and report overflow as appropriate; although this conversion currently lives in the monitor code. In fact, our conversion code limited things to 63 bits, because we checked against LLONG_MAX and reject what would be negative bandwidth if treated as signed. On the bright side, our use of MiB/s means that even with a 32-bit unsigned long, we still have no problem representing a bandwidth of 2GiB/s, which is starting to be more feasible as 10-gigabit or even faster interfaces are used. And once you get past the physical speeds of existing interfaces, any larger bandwidth number behaves the same - effectively unlimited. But on the low side, the granularity of 1MiB/s tuning is rather coarse. So the new virDomainBlockJob API decided to go with a direct 64-bit bytes/sec number instead of the scaled number that prior blockjob APIs had used. But there is no point in rounding this number to MiB/s just to scale it back to bytes/s for handing to qemu. In order to make future code sharing possible between the old virDomainBlockRebase and the new virDomainBlockCopy, this patch moves the scaling and overflow detection into the driver code. Several of the block job calls that can set speed are fed through a common interface, so it was easier to adjust all block jobs at once, for consistency. This patch is just code motion; there should be no user-visible change in behavior. * src/qemu/qemu_monitor.h (qemuMonitorBlockJob) (qemuMonitorBlockCommit, qemuMonitorDriveMirror): Change parameter type and scale. * src/qemu/qemu_monitor.c (qemuMonitorBlockJob) (qemuMonitorBlockCommit, qemuMonitorDriveMirror): Move scaling and overflow detection... * src/qemu/qemu_driver.c (qemuDomainBlockJobImpl) (qemuDomainBlockRebase, qemuDomainBlockCommit): ...here. (qemuDomainBlockCopy): Use bytes/sec. Signed-off-by: Eric Blake <eblake@redhat.com>
2014-08-29 13:58:45 -06:00
/* bandwidth is in bytes/sec */
int
qemuMonitorBlockStream(qemuMonitorPtr mon,
const char *device,
const char *base,
const char *backingName,
unsigned long long bandwidth)
{
VIR_DEBUG("device=%s, base=%s, backingName=%s, bandwidth=%lluB",
device, NULLSTR(base), NULLSTR(backingName), bandwidth);
blockjob: fix block-stream bandwidth race With RHEL 6.2, virDomainBlockPull(dom, dev, bandwidth, 0) has a race with non-zero bandwidth: there is a window between the block_stream and block_job_set_speed monitor commands where an unlimited amount of data was let through, defeating the point of a throttle. This race was first identified in commit a9d3495e, and libvirt was able to reduce the size of the window for that race. In the meantime, the qemu developers decided to fix things properly; per this message: https://lists.gnu.org/archive/html/qemu-devel/2012-04/msg03793.html the fix will be in qemu 1.1, and changes block-job-set-speed to use a different parameter name, as well as adding a new optional parameter to block-stream, which eliminates the race altogether. Since our documentation already mentioned that we can refuse a non-zero bandwidth for some hypervisors, I think the best solution is to do just that for RHEL 6.2 qemu, so that the race is obvious to the user (anyone using stock RHEL 6.2 binaries won't have this patch, and anyone building their own libvirt with this patch for RHEL can also rebuild qemu to get the modern semantics, so it is no real loss in behavior). Meanwhile the code must be fixed to honor actual qemu 1.1 naming. Rename the parameter to 'modern', since the naming difference now covers more than just 'async' block-job-cancel. And while at it, fix an unchecked integer overflow. * src/qemu/qemu_monitor.h (enum BLOCK_JOB_CMD): Drop unused value, rename enum to match conventions. * src/qemu/qemu_monitor.c (qemuMonitorBlockJob): Reflect enum rename. * src/qemu_qemu_monitor_json.h (qemuMonitorJSONBlockJob): Likewise. * src/qemu/qemu_monitor_json.c (qemuMonitorJSONBlockJob): Likewise, and support difference between RHEL 6.2 and qemu 1.1 block pull. * src/qemu/qemu_driver.c (qemuDomainBlockJobImpl): Reject bandwidth during pull with too-old qemu. * src/libvirt.c (virDomainBlockPull, virDomainBlockRebase): Document this.
2012-04-25 16:49:44 -06:00
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONBlockStream(mon, device, base, backingName, bandwidth);
}
int
qemuMonitorBlockJobCancel(qemuMonitorPtr mon,
const char *jobname)
{
VIR_DEBUG("jobname=%s", jobname);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONBlockJobCancel(mon, jobname);
}
int
qemuMonitorBlockJobSetSpeed(qemuMonitorPtr mon,
const char *jobname,
unsigned long long bandwidth)
{
VIR_DEBUG("jobname=%s, bandwidth=%lluB", jobname, bandwidth);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONBlockJobSetSpeed(mon, jobname, bandwidth);
}
virHashTablePtr
qemuMonitorGetAllBlockJobInfo(qemuMonitorPtr mon)
{
QEMU_CHECK_MONITOR_NULL(mon);
return qemuMonitorJSONGetAllBlockJobInfo(mon);
}
/**
* qemuMonitorGetBlockJobInfo:
* Parse Block Job information, and populate info for the named device.
* Return 1 if info available, 0 if device has no block job, and -1 on error.
*/
int
qemuMonitorGetBlockJobInfo(qemuMonitorPtr mon,
const char *alias,
qemuMonitorBlockJobInfoPtr info)
{
virHashTablePtr all;
qemuMonitorBlockJobInfoPtr data;
int ret = 0;
VIR_DEBUG("alias=%s, info=%p", alias, info);
if (!(all = qemuMonitorGetAllBlockJobInfo(mon)))
return -1;
if ((data = virHashLookup(all, alias))) {
*info = *data;
ret = 1;
}
virHashFree(all);
return ret;
}
int
qemuMonitorSetBlockIoThrottle(qemuMonitorPtr mon,
const char *drivealias,
const char *qomid,
virDomainBlockIoTuneInfoPtr info,
bool supportMaxOptions,
bool supportGroupNameOption,
bool supportMaxLengthOptions)
{
VIR_DEBUG("drivealias=%s, qomid=%s, info=%p",
NULLSTR(drivealias), NULLSTR(qomid), info);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONSetBlockIoThrottle(mon, drivealias, qomid, info,
supportMaxOptions,
supportGroupNameOption,
supportMaxLengthOptions);
}
int
qemuMonitorGetBlockIoThrottle(qemuMonitorPtr mon,
const char *drivealias,
const char *qdevid,
virDomainBlockIoTuneInfoPtr reply)
{
VIR_DEBUG("drivealias=%s, qdevid=%s, reply=%p",
NULLSTR(drivealias), NULLSTR(qdevid), reply);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetBlockIoThrottle(mon, drivealias, qdevid, reply);
}
int
qemuMonitorVMStatusToPausedReason(const char *status)
{
int st;
if (!status)
return VIR_DOMAIN_PAUSED_UNKNOWN;
if ((st = qemuMonitorVMStatusTypeFromString(status)) < 0) {
VIR_WARN("QEMU reported unknown VM status: '%s'", status);
return VIR_DOMAIN_PAUSED_UNKNOWN;
}
switch ((qemuMonitorVMStatus) st) {
case QEMU_MONITOR_VM_STATUS_DEBUG:
case QEMU_MONITOR_VM_STATUS_INTERNAL_ERROR:
case QEMU_MONITOR_VM_STATUS_RESTORE_VM:
return VIR_DOMAIN_PAUSED_UNKNOWN;
case QEMU_MONITOR_VM_STATUS_INMIGRATE:
case QEMU_MONITOR_VM_STATUS_POSTMIGRATE:
case QEMU_MONITOR_VM_STATUS_FINISH_MIGRATE:
return VIR_DOMAIN_PAUSED_MIGRATION;
case QEMU_MONITOR_VM_STATUS_IO_ERROR:
return VIR_DOMAIN_PAUSED_IOERROR;
case QEMU_MONITOR_VM_STATUS_PAUSED:
case QEMU_MONITOR_VM_STATUS_PRELAUNCH:
return VIR_DOMAIN_PAUSED_USER;
case QEMU_MONITOR_VM_STATUS_RUNNING:
VIR_WARN("QEMU reports the guest is paused but status is 'running'");
return VIR_DOMAIN_PAUSED_UNKNOWN;
case QEMU_MONITOR_VM_STATUS_SAVE_VM:
return VIR_DOMAIN_PAUSED_SAVE;
case QEMU_MONITOR_VM_STATUS_SHUTDOWN:
return VIR_DOMAIN_PAUSED_SHUTTING_DOWN;
case QEMU_MONITOR_VM_STATUS_WATCHDOG:
return VIR_DOMAIN_PAUSED_WATCHDOG;
case QEMU_MONITOR_VM_STATUS_GUEST_PANICKED:
return VIR_DOMAIN_PAUSED_CRASHED;
/* unreachable from this point on */
case QEMU_MONITOR_VM_STATUS_LAST:
;
}
return VIR_DOMAIN_PAUSED_UNKNOWN;
}
int
qemuMonitorOpenGraphics(qemuMonitorPtr mon,
const char *protocol,
int fd,
const char *fdname,
bool skipauth)
{
VIR_DEBUG("protocol=%s fd=%d fdname=%s skipauth=%d",
protocol, fd, NULLSTR(fdname), skipauth);
int ret;
QEMU_CHECK_MONITOR(mon);
if (qemuMonitorSendFileHandle(mon, fdname, fd) < 0)
return -1;
ret = qemuMonitorJSONOpenGraphics(mon, protocol, fdname, skipauth);
if (ret < 0) {
if (qemuMonitorCloseFileHandle(mon, fdname) < 0)
VIR_WARN("failed to close device handle '%s'", fdname);
}
return ret;
}
int
qemuMonitorSystemWakeup(qemuMonitorPtr mon)
{
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONSystemWakeup(mon);
}
int
qemuMonitorGetVersion(qemuMonitorPtr mon,
int *major,
int *minor,
int *micro,
char **package)
{
VIR_DEBUG("major=%p minor=%p micro=%p package=%p",
major, minor, micro, package);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetVersion(mon, major, minor, micro, package);
}
int
qemuMonitorGetMachines(qemuMonitorPtr mon,
qemuMonitorMachineInfoPtr **machines)
{
VIR_DEBUG("machines=%p", machines);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetMachines(mon, machines);
}
void
qemuMonitorMachineInfoFree(qemuMonitorMachineInfoPtr machine)
{
if (!machine)
return;
VIR_FREE(machine->name);
VIR_FREE(machine->alias);
VIR_FREE(machine);
}
int
qemuMonitorGetCPUDefinitions(qemuMonitorPtr mon,
qemuMonitorCPUDefInfoPtr **cpus)
{
VIR_DEBUG("cpus=%p", cpus);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetCPUDefinitions(mon, cpus);
}
void
qemuMonitorCPUDefInfoFree(qemuMonitorCPUDefInfoPtr cpu)
{
if (!cpu)
return;
virStringListFree(cpu->blockers);
VIR_FREE(cpu->name);
VIR_FREE(cpu);
}
int
qemuMonitorGetCPUModelExpansion(qemuMonitorPtr mon,
qemuMonitorCPUModelExpansionType type,
const char *model_name,
bool migratable,
qemuMonitorCPUModelInfoPtr *model_info)
{
VIR_DEBUG("type=%d model_name=%s migratable=%d",
type, model_name, migratable);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetCPUModelExpansion(mon, type, model_name,
migratable, model_info);
}
void
qemuMonitorCPUModelInfoFree(qemuMonitorCPUModelInfoPtr model_info)
{
size_t i;
if (!model_info)
return;
for (i = 0; i < model_info->nprops; i++) {
VIR_FREE(model_info->props[i].name);
if (model_info->props[i].type == QEMU_MONITOR_CPU_PROPERTY_STRING)
VIR_FREE(model_info->props[i].value.string);
}
VIR_FREE(model_info->props);
VIR_FREE(model_info->name);
VIR_FREE(model_info);
}
qemuMonitorCPUModelInfoPtr
qemuMonitorCPUModelInfoCopy(const qemuMonitorCPUModelInfo *orig)
{
qemuMonitorCPUModelInfoPtr copy;
size_t i;
if (VIR_ALLOC(copy) < 0)
goto error;
if (VIR_ALLOC_N(copy->props, orig->nprops) < 0)
goto error;
if (VIR_STRDUP(copy->name, orig->name) < 0)
goto error;
copy->migratability = orig->migratability;
copy->nprops = orig->nprops;
for (i = 0; i < orig->nprops; i++) {
if (VIR_STRDUP(copy->props[i].name, orig->props[i].name) < 0)
goto error;
copy->props[i].migratable = orig->props[i].migratable;
copy->props[i].type = orig->props[i].type;
switch (orig->props[i].type) {
case QEMU_MONITOR_CPU_PROPERTY_BOOLEAN:
copy->props[i].value.boolean = orig->props[i].value.boolean;
break;
case QEMU_MONITOR_CPU_PROPERTY_STRING:
if (VIR_STRDUP(copy->props[i].value.string,
orig->props[i].value.string) < 0)
goto error;
break;
case QEMU_MONITOR_CPU_PROPERTY_NUMBER:
copy->props[i].value.number = orig->props[i].value.number;
break;
case QEMU_MONITOR_CPU_PROPERTY_LAST:
break;
}
}
return copy;
error:
qemuMonitorCPUModelInfoFree(copy);
return NULL;
}
int
qemuMonitorGetCommands(qemuMonitorPtr mon,
char ***commands)
{
VIR_DEBUG("commands=%p", commands);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetCommands(mon, commands);
}
int
qemuMonitorGetEvents(qemuMonitorPtr mon,
char ***events)
{
VIR_DEBUG("events=%p", events);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetEvents(mon, events);
}
/* Collect the parameters associated with a given command line option.
* Return count of known parameters or -1 on error. */
int
qemuMonitorGetCommandLineOptionParameters(qemuMonitorPtr mon,
const char *option,
char ***params,
bool *found)
{
VIR_DEBUG("option=%s params=%p", option, params);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetCommandLineOptionParameters(mon, option,
params, found);
}
int
qemuMonitorGetKVMState(qemuMonitorPtr mon,
bool *enabled,
bool *present)
{
VIR_DEBUG("enabled=%p present=%p", enabled, present);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetKVMState(mon, enabled, present);
}
int
qemuMonitorGetObjectTypes(qemuMonitorPtr mon,
char ***types)
{
VIR_DEBUG("types=%p", types);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetObjectTypes(mon, types);
}
int
qemuMonitorGetDeviceProps(qemuMonitorPtr mon,
const char *device,
char ***props)
{
VIR_DEBUG("device=%s props=%p", device, props);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetDeviceProps(mon, device, props);
}
int
qemuMonitorGetObjectProps(qemuMonitorPtr mon,
const char *object,
char ***props)
{
VIR_DEBUG("object=%s props=%p", object, props);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetObjectProps(mon, object, props);
}
char *
qemuMonitorGetTargetArch(qemuMonitorPtr mon)
{
QEMU_CHECK_MONITOR_NULL(mon);
return qemuMonitorJSONGetTargetArch(mon);
}
2014-09-11 14:11:54 +02:00
int
qemuMonitorGetMigrationCapabilities(qemuMonitorPtr mon,
char ***capabilities)
{
QEMU_CHECK_MONITOR(mon);
2014-09-11 14:11:54 +02:00
/* No capability is supported without JSON monitor */
if (!mon->json)
return 0;
return qemuMonitorJSONGetMigrationCapabilities(mon, capabilities);
}
/**
* qemuMonitorSetMigrationCapabilities:
* @mon: Pointer to the monitor object.
* @caps: Migration capabilities.
*
* The @caps object is consumed and should not be referenced by the caller
* after this function returns.
*
* Returns 0 on success, -1 on error.
*/
int
qemuMonitorSetMigrationCapabilities(qemuMonitorPtr mon,
virJSONValuePtr caps)
{
QEMU_CHECK_MONITOR_GOTO(mon, error);
return qemuMonitorJSONSetMigrationCapabilities(mon, caps);
error:
virJSONValueFree(caps);
return -1;
}
/**
* qemuMonitorGetGICCapabilities:
* @mon: QEMU monitor
* @capabilities: where to store the GIC capabilities
*
* See qemuMonitorJSONGetGICCapabilities().
*/
int
qemuMonitorGetGICCapabilities(qemuMonitorPtr mon,
virGICCapability **capabilities)
{
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetGICCapabilities(mon, capabilities);
}
int
qemuMonitorGetSEVCapabilities(qemuMonitorPtr mon,
virSEVCapability **capabilities)
{
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetSEVCapabilities(mon, capabilities);
}
int
qemuMonitorNBDServerStart(qemuMonitorPtr mon,
const char *host,
unsigned int port,
const char *tls_alias)
{
VIR_DEBUG("host=%s port=%u tls_alias=%s", host, port, NULLSTR(tls_alias));
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONNBDServerStart(mon, host, port, tls_alias);
}
int
qemuMonitorNBDServerAdd(qemuMonitorPtr mon,
const char *deviceID,
bool writable)
{
VIR_DEBUG("deviceID=%s", deviceID);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONNBDServerAdd(mon, deviceID, writable);
}
int
qemuMonitorNBDServerStop(qemuMonitorPtr mon)
{
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONNBDServerStop(mon);
}
int
qemuMonitorGetTPMModels(qemuMonitorPtr mon,
char ***tpmmodels)
{
VIR_DEBUG("tpmmodels=%p", tpmmodels);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetTPMModels(mon, tpmmodels);
}
int
qemuMonitorGetTPMTypes(qemuMonitorPtr mon,
char ***tpmtypes)
{
VIR_DEBUG("tpmtypes=%p", tpmtypes);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetTPMTypes(mon, tpmtypes);
}
int
qemuMonitorAttachCharDev(qemuMonitorPtr mon,
const char *chrID,
virDomainChrSourceDefPtr chr)
{
VIR_DEBUG("chrID=%s chr=%p", chrID, chr);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONAttachCharDev(mon, chrID, chr);
}
int
qemuMonitorDetachCharDev(qemuMonitorPtr mon,
const char *chrID)
{
VIR_DEBUG("chrID=%s", chrID);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONDetachCharDev(mon, chrID);
}
int
qemuMonitorGetDeviceAliases(qemuMonitorPtr mon,
char ***aliases)
{
VIR_DEBUG("aliases=%p", aliases);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetDeviceAliases(mon, aliases);
}
/**
qemu: Fix two use-after-free situations There were multiple race conditions that could lead to segmentation faults. The first precondition for this is qemuProcessLaunch must fail sometime shortly after starting the new QEMU process. The second precondition for the segmentation faults is that the new QEMU process dies - or to be more precise the QEMU monitor has to be closed irregularly. If both happens during qemuProcessStart (starting a domain) there are race windows between the thread with the event loop (T1) and the thread that is starting the domain (T2). First segmentation fault scenario: If qemuProcessLaunch fails during qemuProcessStart the code branches to the 'stop' path where 'qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL)' will set the log function of the monitor to NULL (done in T2). In the meantime the event loop of T1 will wake up with an EOF event for the QEMU monitor because the QEMU process has died. The crash occurs if T1 has checked 'mon->logFunc != NULL' in qemuMonitorIO just before the logFunc was set to NULL by T2. If this situation occurs T1 will try to call mon->logFunc which leads to the segmentation fault. Solution: Require the monitor lock for setting the log function. Backtrace: 0 0x0000000000000000 in ?? () 1 0x000003ffe9e45316 in qemuMonitorIO (watch=<optimized out>, fd=<optimized out>, events=<optimized out>, opaque=0x3ffe08aa860) at ../../src/qemu/qemu_monitor.c:727 2 0x000003fffda2e1a4 in virEventPollDispatchHandles (nfds=<optimized out>, fds=0x2aa000fd980) at ../../src/util/vireventpoll.c:508 3 0x000003fffda2e398 in virEventPollRunOnce () at ../../src/util/vireventpoll.c:657 4 0x000003fffda2ca10 in virEventRunDefaultImpl () at ../../src/util/virevent.c:314 5 0x000003fffdba9366 in virNetDaemonRun (dmn=0x2aa000cc550) at ../../src/rpc/virnetdaemon.c:818 6 0x000002aa00024668 in main (argc=<optimized out>, argv=<optimized out>) at ../../daemon/libvirtd.c:1541 Second segmentation fault scenario: If qemuProcessLaunch fails it will unref the log context and with invoking qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL) qemuDomainLogContextFree() will be invoked. qemuDomainLogContextFree() invokes virNetClientClose() to close the client and cleans everything up (including unref of _virLogManager.client) when virNetClientClose() returns. When T1 is now trying to report 'qemu unexpectedly closed the monitor' libvirtd will crash because the client has already been freed. Solution: As the critical section in qemuMonitorIO is protected with the monitor lock we can use the same solution as proposed for the first segmentation fault. Backtrace: 0 virClassIsDerivedFrom (klass=0x3100979797979797, parent=0x2aa000d92f0) at ../../src/util/virobject.c:169 1 0x000003fffda659e6 in virObjectIsClass (anyobj=<optimized out>, klass=<optimized out>) at ../../src/util/virobject.c:365 2 0x000003fffda65a24 in virObjectLock (anyobj=0x3ffe08c1db0) at ../../src/util/virobject.c:317 3 0x000003fffdba4688 in virNetClientIOEventLoop (client=client@entry=0x3ffe08c1db0, thiscall=thiscall@entry=0x2aa000fbfa0) at ../../src/rpc/virnetclient.c:1668 4 0x000003fffdba4b4c in virNetClientIO (client=client@entry=0x3ffe08c1db0, thiscall=0x2aa000fbfa0) at ../../src/rpc/virnetclient.c:1944 5 0x000003fffdba4d42 in virNetClientSendInternal (client=client@entry=0x3ffe08c1db0, msg=msg@entry=0x2aa000cc710, expectReply=expectReply@entry=true, nonBlock=nonBlock@entry=false) at ../../src/rpc/virnetclient.c:2116 6 0x000003fffdba6268 in virNetClientSendWithReply (client=0x3ffe08c1db0, msg=0x2aa000cc710) at ../../src/rpc/virnetclient.c:2144 7 0x000003fffdba6e8e in virNetClientProgramCall (prog=0x3ffe08c1120, client=<optimized out>, serial=<optimized out>, proc=<optimized out>, noutfds=<optimized out>, outfds=0x0, ninfds=0x0, infds=0x0, args_filter=0x3fffdb64440 <xdr_virLogManagerProtocolDomainReadLogFileArgs>, args=0x3ffffffe010, ret_filter=0x3fffdb644c0 <xdr_virLogManagerProtocolDomainReadLogFileRet>, ret=0x3ffffffe008) at ../../src/rpc/virnetclientprogram.c:329 8 0x000003fffdb64042 in virLogManagerDomainReadLogFile (mgr=<optimized out>, path=<optimized out>, inode=<optimized out>, offset=<optimized out>, maxlen=<optimized out>, flags=0) at ../../src/logging/log_manager.c:272 9 0x000003ffe9e0315c in qemuDomainLogContextRead (ctxt=0x3ffe08c2980, msg=0x3ffffffe1c0) at ../../src/qemu/qemu_domain.c:4422 10 0x000003ffe9e280a8 in qemuProcessReadLog (logCtxt=<optimized out>, msg=msg@entry=0x3ffffffe288) at ../../src/qemu/qemu_process.c:1800 11 0x000003ffe9e28206 in qemuProcessReportLogError (logCtxt=<optimized out>, msgprefix=0x3ffe9ec276a "qemu unexpectedly closed the monitor") at ../../src/qemu/qemu_process.c:1836 12 0x000003ffe9e28306 in qemuProcessMonitorReportLogError (mon=mon@entry=0x3ffe085cf10, msg=<optimized out>, opaque=<optimized out>) at ../../src/qemu/qemu_process.c:1856 13 0x000003ffe9e452b6 in qemuMonitorIO (watch=<optimized out>, fd=<optimized out>, events=<optimized out>, opaque=0x3ffe085cf10) at ../../src/qemu/qemu_monitor.c:726 14 0x000003fffda2e1a4 in virEventPollDispatchHandles (nfds=<optimized out>, fds=0x2aa000fd980) at ../../src/util/vireventpoll.c:508 15 0x000003fffda2e398 in virEventPollRunOnce () at ../../src/util/vireventpoll.c:657 16 0x000003fffda2ca10 in virEventRunDefaultImpl () at ../../src/util/virevent.c:314 17 0x000003fffdba9366 in virNetDaemonRun (dmn=0x2aa000cc550) at ../../src/rpc/virnetdaemon.c:818 18 0x000002aa00024668 in main (argc=<optimized out>, argv=<optimized out>) at ../../daemon/libvirtd.c:1541 Other code parts where the same problem was possible to occur are fixed as well (qemuMigrationFinish, qemuProcessStart, and qemuDomainSaveImageStartVM). Signed-off-by: Marc Hartmayer <mhartmay@linux.vnet.ibm.com> Reported-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
2017-04-03 10:24:35 +02:00
* qemuMonitorSetDomainLogLocked:
* @mon: Locked monitor object to set the log file reading on
* @func: the callback to report errors
* @opaque: data to pass to @func
* @destroy: optional callback to free @opaque
qemu: Fix two use-after-free situations There were multiple race conditions that could lead to segmentation faults. The first precondition for this is qemuProcessLaunch must fail sometime shortly after starting the new QEMU process. The second precondition for the segmentation faults is that the new QEMU process dies - or to be more precise the QEMU monitor has to be closed irregularly. If both happens during qemuProcessStart (starting a domain) there are race windows between the thread with the event loop (T1) and the thread that is starting the domain (T2). First segmentation fault scenario: If qemuProcessLaunch fails during qemuProcessStart the code branches to the 'stop' path where 'qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL)' will set the log function of the monitor to NULL (done in T2). In the meantime the event loop of T1 will wake up with an EOF event for the QEMU monitor because the QEMU process has died. The crash occurs if T1 has checked 'mon->logFunc != NULL' in qemuMonitorIO just before the logFunc was set to NULL by T2. If this situation occurs T1 will try to call mon->logFunc which leads to the segmentation fault. Solution: Require the monitor lock for setting the log function. Backtrace: 0 0x0000000000000000 in ?? () 1 0x000003ffe9e45316 in qemuMonitorIO (watch=<optimized out>, fd=<optimized out>, events=<optimized out>, opaque=0x3ffe08aa860) at ../../src/qemu/qemu_monitor.c:727 2 0x000003fffda2e1a4 in virEventPollDispatchHandles (nfds=<optimized out>, fds=0x2aa000fd980) at ../../src/util/vireventpoll.c:508 3 0x000003fffda2e398 in virEventPollRunOnce () at ../../src/util/vireventpoll.c:657 4 0x000003fffda2ca10 in virEventRunDefaultImpl () at ../../src/util/virevent.c:314 5 0x000003fffdba9366 in virNetDaemonRun (dmn=0x2aa000cc550) at ../../src/rpc/virnetdaemon.c:818 6 0x000002aa00024668 in main (argc=<optimized out>, argv=<optimized out>) at ../../daemon/libvirtd.c:1541 Second segmentation fault scenario: If qemuProcessLaunch fails it will unref the log context and with invoking qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL) qemuDomainLogContextFree() will be invoked. qemuDomainLogContextFree() invokes virNetClientClose() to close the client and cleans everything up (including unref of _virLogManager.client) when virNetClientClose() returns. When T1 is now trying to report 'qemu unexpectedly closed the monitor' libvirtd will crash because the client has already been freed. Solution: As the critical section in qemuMonitorIO is protected with the monitor lock we can use the same solution as proposed for the first segmentation fault. Backtrace: 0 virClassIsDerivedFrom (klass=0x3100979797979797, parent=0x2aa000d92f0) at ../../src/util/virobject.c:169 1 0x000003fffda659e6 in virObjectIsClass (anyobj=<optimized out>, klass=<optimized out>) at ../../src/util/virobject.c:365 2 0x000003fffda65a24 in virObjectLock (anyobj=0x3ffe08c1db0) at ../../src/util/virobject.c:317 3 0x000003fffdba4688 in virNetClientIOEventLoop (client=client@entry=0x3ffe08c1db0, thiscall=thiscall@entry=0x2aa000fbfa0) at ../../src/rpc/virnetclient.c:1668 4 0x000003fffdba4b4c in virNetClientIO (client=client@entry=0x3ffe08c1db0, thiscall=0x2aa000fbfa0) at ../../src/rpc/virnetclient.c:1944 5 0x000003fffdba4d42 in virNetClientSendInternal (client=client@entry=0x3ffe08c1db0, msg=msg@entry=0x2aa000cc710, expectReply=expectReply@entry=true, nonBlock=nonBlock@entry=false) at ../../src/rpc/virnetclient.c:2116 6 0x000003fffdba6268 in virNetClientSendWithReply (client=0x3ffe08c1db0, msg=0x2aa000cc710) at ../../src/rpc/virnetclient.c:2144 7 0x000003fffdba6e8e in virNetClientProgramCall (prog=0x3ffe08c1120, client=<optimized out>, serial=<optimized out>, proc=<optimized out>, noutfds=<optimized out>, outfds=0x0, ninfds=0x0, infds=0x0, args_filter=0x3fffdb64440 <xdr_virLogManagerProtocolDomainReadLogFileArgs>, args=0x3ffffffe010, ret_filter=0x3fffdb644c0 <xdr_virLogManagerProtocolDomainReadLogFileRet>, ret=0x3ffffffe008) at ../../src/rpc/virnetclientprogram.c:329 8 0x000003fffdb64042 in virLogManagerDomainReadLogFile (mgr=<optimized out>, path=<optimized out>, inode=<optimized out>, offset=<optimized out>, maxlen=<optimized out>, flags=0) at ../../src/logging/log_manager.c:272 9 0x000003ffe9e0315c in qemuDomainLogContextRead (ctxt=0x3ffe08c2980, msg=0x3ffffffe1c0) at ../../src/qemu/qemu_domain.c:4422 10 0x000003ffe9e280a8 in qemuProcessReadLog (logCtxt=<optimized out>, msg=msg@entry=0x3ffffffe288) at ../../src/qemu/qemu_process.c:1800 11 0x000003ffe9e28206 in qemuProcessReportLogError (logCtxt=<optimized out>, msgprefix=0x3ffe9ec276a "qemu unexpectedly closed the monitor") at ../../src/qemu/qemu_process.c:1836 12 0x000003ffe9e28306 in qemuProcessMonitorReportLogError (mon=mon@entry=0x3ffe085cf10, msg=<optimized out>, opaque=<optimized out>) at ../../src/qemu/qemu_process.c:1856 13 0x000003ffe9e452b6 in qemuMonitorIO (watch=<optimized out>, fd=<optimized out>, events=<optimized out>, opaque=0x3ffe085cf10) at ../../src/qemu/qemu_monitor.c:726 14 0x000003fffda2e1a4 in virEventPollDispatchHandles (nfds=<optimized out>, fds=0x2aa000fd980) at ../../src/util/vireventpoll.c:508 15 0x000003fffda2e398 in virEventPollRunOnce () at ../../src/util/vireventpoll.c:657 16 0x000003fffda2ca10 in virEventRunDefaultImpl () at ../../src/util/virevent.c:314 17 0x000003fffdba9366 in virNetDaemonRun (dmn=0x2aa000cc550) at ../../src/rpc/virnetdaemon.c:818 18 0x000002aa00024668 in main (argc=<optimized out>, argv=<optimized out>) at ../../daemon/libvirtd.c:1541 Other code parts where the same problem was possible to occur are fixed as well (qemuMigrationFinish, qemuProcessStart, and qemuDomainSaveImageStartVM). Signed-off-by: Marc Hartmayer <mhartmay@linux.vnet.ibm.com> Reported-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
2017-04-03 10:24:35 +02:00
*
* Set the file descriptor of the open VM log file to report potential
* early startup errors of qemu. This function requires @mon to be
* locked already!
*/
void
qemu: Fix two use-after-free situations There were multiple race conditions that could lead to segmentation faults. The first precondition for this is qemuProcessLaunch must fail sometime shortly after starting the new QEMU process. The second precondition for the segmentation faults is that the new QEMU process dies - or to be more precise the QEMU monitor has to be closed irregularly. If both happens during qemuProcessStart (starting a domain) there are race windows between the thread with the event loop (T1) and the thread that is starting the domain (T2). First segmentation fault scenario: If qemuProcessLaunch fails during qemuProcessStart the code branches to the 'stop' path where 'qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL)' will set the log function of the monitor to NULL (done in T2). In the meantime the event loop of T1 will wake up with an EOF event for the QEMU monitor because the QEMU process has died. The crash occurs if T1 has checked 'mon->logFunc != NULL' in qemuMonitorIO just before the logFunc was set to NULL by T2. If this situation occurs T1 will try to call mon->logFunc which leads to the segmentation fault. Solution: Require the monitor lock for setting the log function. Backtrace: 0 0x0000000000000000 in ?? () 1 0x000003ffe9e45316 in qemuMonitorIO (watch=<optimized out>, fd=<optimized out>, events=<optimized out>, opaque=0x3ffe08aa860) at ../../src/qemu/qemu_monitor.c:727 2 0x000003fffda2e1a4 in virEventPollDispatchHandles (nfds=<optimized out>, fds=0x2aa000fd980) at ../../src/util/vireventpoll.c:508 3 0x000003fffda2e398 in virEventPollRunOnce () at ../../src/util/vireventpoll.c:657 4 0x000003fffda2ca10 in virEventRunDefaultImpl () at ../../src/util/virevent.c:314 5 0x000003fffdba9366 in virNetDaemonRun (dmn=0x2aa000cc550) at ../../src/rpc/virnetdaemon.c:818 6 0x000002aa00024668 in main (argc=<optimized out>, argv=<optimized out>) at ../../daemon/libvirtd.c:1541 Second segmentation fault scenario: If qemuProcessLaunch fails it will unref the log context and with invoking qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL) qemuDomainLogContextFree() will be invoked. qemuDomainLogContextFree() invokes virNetClientClose() to close the client and cleans everything up (including unref of _virLogManager.client) when virNetClientClose() returns. When T1 is now trying to report 'qemu unexpectedly closed the monitor' libvirtd will crash because the client has already been freed. Solution: As the critical section in qemuMonitorIO is protected with the monitor lock we can use the same solution as proposed for the first segmentation fault. Backtrace: 0 virClassIsDerivedFrom (klass=0x3100979797979797, parent=0x2aa000d92f0) at ../../src/util/virobject.c:169 1 0x000003fffda659e6 in virObjectIsClass (anyobj=<optimized out>, klass=<optimized out>) at ../../src/util/virobject.c:365 2 0x000003fffda65a24 in virObjectLock (anyobj=0x3ffe08c1db0) at ../../src/util/virobject.c:317 3 0x000003fffdba4688 in virNetClientIOEventLoop (client=client@entry=0x3ffe08c1db0, thiscall=thiscall@entry=0x2aa000fbfa0) at ../../src/rpc/virnetclient.c:1668 4 0x000003fffdba4b4c in virNetClientIO (client=client@entry=0x3ffe08c1db0, thiscall=0x2aa000fbfa0) at ../../src/rpc/virnetclient.c:1944 5 0x000003fffdba4d42 in virNetClientSendInternal (client=client@entry=0x3ffe08c1db0, msg=msg@entry=0x2aa000cc710, expectReply=expectReply@entry=true, nonBlock=nonBlock@entry=false) at ../../src/rpc/virnetclient.c:2116 6 0x000003fffdba6268 in virNetClientSendWithReply (client=0x3ffe08c1db0, msg=0x2aa000cc710) at ../../src/rpc/virnetclient.c:2144 7 0x000003fffdba6e8e in virNetClientProgramCall (prog=0x3ffe08c1120, client=<optimized out>, serial=<optimized out>, proc=<optimized out>, noutfds=<optimized out>, outfds=0x0, ninfds=0x0, infds=0x0, args_filter=0x3fffdb64440 <xdr_virLogManagerProtocolDomainReadLogFileArgs>, args=0x3ffffffe010, ret_filter=0x3fffdb644c0 <xdr_virLogManagerProtocolDomainReadLogFileRet>, ret=0x3ffffffe008) at ../../src/rpc/virnetclientprogram.c:329 8 0x000003fffdb64042 in virLogManagerDomainReadLogFile (mgr=<optimized out>, path=<optimized out>, inode=<optimized out>, offset=<optimized out>, maxlen=<optimized out>, flags=0) at ../../src/logging/log_manager.c:272 9 0x000003ffe9e0315c in qemuDomainLogContextRead (ctxt=0x3ffe08c2980, msg=0x3ffffffe1c0) at ../../src/qemu/qemu_domain.c:4422 10 0x000003ffe9e280a8 in qemuProcessReadLog (logCtxt=<optimized out>, msg=msg@entry=0x3ffffffe288) at ../../src/qemu/qemu_process.c:1800 11 0x000003ffe9e28206 in qemuProcessReportLogError (logCtxt=<optimized out>, msgprefix=0x3ffe9ec276a "qemu unexpectedly closed the monitor") at ../../src/qemu/qemu_process.c:1836 12 0x000003ffe9e28306 in qemuProcessMonitorReportLogError (mon=mon@entry=0x3ffe085cf10, msg=<optimized out>, opaque=<optimized out>) at ../../src/qemu/qemu_process.c:1856 13 0x000003ffe9e452b6 in qemuMonitorIO (watch=<optimized out>, fd=<optimized out>, events=<optimized out>, opaque=0x3ffe085cf10) at ../../src/qemu/qemu_monitor.c:726 14 0x000003fffda2e1a4 in virEventPollDispatchHandles (nfds=<optimized out>, fds=0x2aa000fd980) at ../../src/util/vireventpoll.c:508 15 0x000003fffda2e398 in virEventPollRunOnce () at ../../src/util/vireventpoll.c:657 16 0x000003fffda2ca10 in virEventRunDefaultImpl () at ../../src/util/virevent.c:314 17 0x000003fffdba9366 in virNetDaemonRun (dmn=0x2aa000cc550) at ../../src/rpc/virnetdaemon.c:818 18 0x000002aa00024668 in main (argc=<optimized out>, argv=<optimized out>) at ../../daemon/libvirtd.c:1541 Other code parts where the same problem was possible to occur are fixed as well (qemuMigrationFinish, qemuProcessStart, and qemuDomainSaveImageStartVM). Signed-off-by: Marc Hartmayer <mhartmay@linux.vnet.ibm.com> Reported-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
2017-04-03 10:24:35 +02:00
qemuMonitorSetDomainLogLocked(qemuMonitorPtr mon,
qemuMonitorReportDomainLogError func,
void *opaque,
virFreeCallback destroy)
{
if (mon->logDestroy && mon->logOpaque)
mon->logDestroy(mon->logOpaque);
mon->logFunc = func;
mon->logOpaque = opaque;
mon->logDestroy = destroy;
}
qemu: Fix two use-after-free situations There were multiple race conditions that could lead to segmentation faults. The first precondition for this is qemuProcessLaunch must fail sometime shortly after starting the new QEMU process. The second precondition for the segmentation faults is that the new QEMU process dies - or to be more precise the QEMU monitor has to be closed irregularly. If both happens during qemuProcessStart (starting a domain) there are race windows between the thread with the event loop (T1) and the thread that is starting the domain (T2). First segmentation fault scenario: If qemuProcessLaunch fails during qemuProcessStart the code branches to the 'stop' path where 'qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL)' will set the log function of the monitor to NULL (done in T2). In the meantime the event loop of T1 will wake up with an EOF event for the QEMU monitor because the QEMU process has died. The crash occurs if T1 has checked 'mon->logFunc != NULL' in qemuMonitorIO just before the logFunc was set to NULL by T2. If this situation occurs T1 will try to call mon->logFunc which leads to the segmentation fault. Solution: Require the monitor lock for setting the log function. Backtrace: 0 0x0000000000000000 in ?? () 1 0x000003ffe9e45316 in qemuMonitorIO (watch=<optimized out>, fd=<optimized out>, events=<optimized out>, opaque=0x3ffe08aa860) at ../../src/qemu/qemu_monitor.c:727 2 0x000003fffda2e1a4 in virEventPollDispatchHandles (nfds=<optimized out>, fds=0x2aa000fd980) at ../../src/util/vireventpoll.c:508 3 0x000003fffda2e398 in virEventPollRunOnce () at ../../src/util/vireventpoll.c:657 4 0x000003fffda2ca10 in virEventRunDefaultImpl () at ../../src/util/virevent.c:314 5 0x000003fffdba9366 in virNetDaemonRun (dmn=0x2aa000cc550) at ../../src/rpc/virnetdaemon.c:818 6 0x000002aa00024668 in main (argc=<optimized out>, argv=<optimized out>) at ../../daemon/libvirtd.c:1541 Second segmentation fault scenario: If qemuProcessLaunch fails it will unref the log context and with invoking qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL) qemuDomainLogContextFree() will be invoked. qemuDomainLogContextFree() invokes virNetClientClose() to close the client and cleans everything up (including unref of _virLogManager.client) when virNetClientClose() returns. When T1 is now trying to report 'qemu unexpectedly closed the monitor' libvirtd will crash because the client has already been freed. Solution: As the critical section in qemuMonitorIO is protected with the monitor lock we can use the same solution as proposed for the first segmentation fault. Backtrace: 0 virClassIsDerivedFrom (klass=0x3100979797979797, parent=0x2aa000d92f0) at ../../src/util/virobject.c:169 1 0x000003fffda659e6 in virObjectIsClass (anyobj=<optimized out>, klass=<optimized out>) at ../../src/util/virobject.c:365 2 0x000003fffda65a24 in virObjectLock (anyobj=0x3ffe08c1db0) at ../../src/util/virobject.c:317 3 0x000003fffdba4688 in virNetClientIOEventLoop (client=client@entry=0x3ffe08c1db0, thiscall=thiscall@entry=0x2aa000fbfa0) at ../../src/rpc/virnetclient.c:1668 4 0x000003fffdba4b4c in virNetClientIO (client=client@entry=0x3ffe08c1db0, thiscall=0x2aa000fbfa0) at ../../src/rpc/virnetclient.c:1944 5 0x000003fffdba4d42 in virNetClientSendInternal (client=client@entry=0x3ffe08c1db0, msg=msg@entry=0x2aa000cc710, expectReply=expectReply@entry=true, nonBlock=nonBlock@entry=false) at ../../src/rpc/virnetclient.c:2116 6 0x000003fffdba6268 in virNetClientSendWithReply (client=0x3ffe08c1db0, msg=0x2aa000cc710) at ../../src/rpc/virnetclient.c:2144 7 0x000003fffdba6e8e in virNetClientProgramCall (prog=0x3ffe08c1120, client=<optimized out>, serial=<optimized out>, proc=<optimized out>, noutfds=<optimized out>, outfds=0x0, ninfds=0x0, infds=0x0, args_filter=0x3fffdb64440 <xdr_virLogManagerProtocolDomainReadLogFileArgs>, args=0x3ffffffe010, ret_filter=0x3fffdb644c0 <xdr_virLogManagerProtocolDomainReadLogFileRet>, ret=0x3ffffffe008) at ../../src/rpc/virnetclientprogram.c:329 8 0x000003fffdb64042 in virLogManagerDomainReadLogFile (mgr=<optimized out>, path=<optimized out>, inode=<optimized out>, offset=<optimized out>, maxlen=<optimized out>, flags=0) at ../../src/logging/log_manager.c:272 9 0x000003ffe9e0315c in qemuDomainLogContextRead (ctxt=0x3ffe08c2980, msg=0x3ffffffe1c0) at ../../src/qemu/qemu_domain.c:4422 10 0x000003ffe9e280a8 in qemuProcessReadLog (logCtxt=<optimized out>, msg=msg@entry=0x3ffffffe288) at ../../src/qemu/qemu_process.c:1800 11 0x000003ffe9e28206 in qemuProcessReportLogError (logCtxt=<optimized out>, msgprefix=0x3ffe9ec276a "qemu unexpectedly closed the monitor") at ../../src/qemu/qemu_process.c:1836 12 0x000003ffe9e28306 in qemuProcessMonitorReportLogError (mon=mon@entry=0x3ffe085cf10, msg=<optimized out>, opaque=<optimized out>) at ../../src/qemu/qemu_process.c:1856 13 0x000003ffe9e452b6 in qemuMonitorIO (watch=<optimized out>, fd=<optimized out>, events=<optimized out>, opaque=0x3ffe085cf10) at ../../src/qemu/qemu_monitor.c:726 14 0x000003fffda2e1a4 in virEventPollDispatchHandles (nfds=<optimized out>, fds=0x2aa000fd980) at ../../src/util/vireventpoll.c:508 15 0x000003fffda2e398 in virEventPollRunOnce () at ../../src/util/vireventpoll.c:657 16 0x000003fffda2ca10 in virEventRunDefaultImpl () at ../../src/util/virevent.c:314 17 0x000003fffdba9366 in virNetDaemonRun (dmn=0x2aa000cc550) at ../../src/rpc/virnetdaemon.c:818 18 0x000002aa00024668 in main (argc=<optimized out>, argv=<optimized out>) at ../../daemon/libvirtd.c:1541 Other code parts where the same problem was possible to occur are fixed as well (qemuMigrationFinish, qemuProcessStart, and qemuDomainSaveImageStartVM). Signed-off-by: Marc Hartmayer <mhartmay@linux.vnet.ibm.com> Reported-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
2017-04-03 10:24:35 +02:00
/**
* qemuMonitorSetDomainLog:
* @mon: Unlocked monitor object to set the log file reading on
* @func: the callback to report errors
* @opaque: data to pass to @func
* @destroy: optional callback to free @opaque
*
* Set the file descriptor of the open VM log file to report potential
* early startup errors of qemu. This functions requires @mon to be
* unlocked.
*/
void
qemuMonitorSetDomainLog(qemuMonitorPtr mon,
qemuMonitorReportDomainLogError func,
void *opaque,
virFreeCallback destroy)
{
virObjectLock(mon);
qemuMonitorSetDomainLogLocked(mon, func, opaque, destroy);
virObjectUnlock(mon);
}
/**
* qemuMonitorJSONGetGuestCPU:
* @mon: Pointer to the monitor
* @arch: arch of the guest
* @data: returns the cpu data
* @disabled: returns the CPU data for features which were disabled by QEMU
*
* Retrieve the definition of the guest CPU from a running qemu instance.
*
* Returns 0 on success, -2 if the operation is not supported by the guest,
* -1 on other errors.
*/
int
qemuMonitorGetGuestCPU(qemuMonitorPtr mon,
virArch arch,
virCPUDataPtr *data,
virCPUDataPtr *disabled)
{
VIR_DEBUG("arch=%s data=%p disabled=%p",
virArchToString(arch), data, disabled);
QEMU_CHECK_MONITOR(mon);
*data = NULL;
if (disabled)
*disabled = NULL;
return qemuMonitorJSONGetGuestCPU(mon, arch, data, disabled);
}
/**
* qemuMonitorRTCResetReinjection:
* @mon: Pointer to the monitor
*
* Issue rtc-reset-reinjection command.
* This should be used in cases where guest time is restored via
* guest agent, so RTC injection is not needed (in fact it would
* confuse guest's RTC).
*
* Returns 0 on success
* -1 on error.
*/
int
qemuMonitorRTCResetReinjection(qemuMonitorPtr mon)
{
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONRTCResetReinjection(mon);
}
/**
* qemuMonitorGetIOThreads:
* @mon: Pointer to the monitor
* @iothreads: Location to return array of IOThreadInfo data
*
* Issue query-iothreads command.
* Retrieve the list of iothreads defined/running for the machine
*
* Returns count of IOThreadInfo structures on success
* -1 on error.
*/
int
qemuMonitorGetIOThreads(qemuMonitorPtr mon,
qemuMonitorIOThreadInfoPtr **iothreads)
{
VIR_DEBUG("iothreads=%p", iothreads);
QEMU_CHECK_MONITOR(mon);
/* Requires JSON to make the query */
if (!mon->json) {
*iothreads = NULL;
return 0;
}
return qemuMonitorJSONGetIOThreads(mon, iothreads);
}
/**
* qemuMonitorSetIOThread:
* @mon: Pointer to the monitor
* @iothreadInfo: filled IOThread info with data
*
* Alter the specified IOThread's IOThreadInfo values.
*/
int
qemuMonitorSetIOThread(qemuMonitorPtr mon,
qemuMonitorIOThreadInfoPtr iothreadInfo)
{
VIR_DEBUG("iothread=%p", iothreadInfo);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONSetIOThread(mon, iothreadInfo);
}
/**
* qemuMonitorGetMemoryDeviceInfo:
* @mon: pointer to the monitor
* @info: Location to return the hash of qemuMonitorMemoryDeviceInfo
*
* Retrieve state and addresses of frontend memory devices present in
* the guest.
*
* Returns 0 on success and fills @info with a newly allocated struct; if the
* data can't be retrieved due to lack of support in qemu, returns -2. On
* other errors returns -1.
*/
int
qemuMonitorGetMemoryDeviceInfo(qemuMonitorPtr mon,
virHashTablePtr *info)
{
VIR_DEBUG("info=%p", info);
int ret;
*info = NULL;
QEMU_CHECK_MONITOR(mon);
if (!mon->json)
return -2;
if (!(*info = virHashCreate(10, virHashValueFree)))
return -1;
if ((ret = qemuMonitorJSONGetMemoryDeviceInfo(mon, *info)) < 0) {
virHashFree(*info);
*info = NULL;
}
return ret;
}
int
qemuMonitorMigrateIncoming(qemuMonitorPtr mon,
const char *uri)
{
VIR_DEBUG("uri=%s", uri);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONMigrateIncoming(mon, uri);
}
int
qemuMonitorMigrateStartPostCopy(qemuMonitorPtr mon)
{
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONMigrateStartPostCopy(mon);
}
int
qemuMonitorMigrateContinue(qemuMonitorPtr mon,
qemuMonitorMigrationStatus status)
{
VIR_DEBUG("status=%s", qemuMonitorMigrationStatusTypeToString(status));
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONMigrateContinue(mon, status);
}
int
qemuMonitorGetRTCTime(qemuMonitorPtr mon,
struct tm *tm)
{
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONGetRTCTime(mon, tm);
}
virJSONValuePtr
qemuMonitorQueryQMPSchema(qemuMonitorPtr mon)
{
QEMU_CHECK_MONITOR_NULL(mon);
return qemuMonitorJSONQueryQMPSchema(mon);
}
int
qemuMonitorSetBlockThreshold(qemuMonitorPtr mon,
const char *nodename,
unsigned long long threshold)
{
VIR_DEBUG("node='%s', threshold=%llu", nodename, threshold);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONSetBlockThreshold(mon, nodename, threshold);
}
virJSONValuePtr
qemuMonitorQueryNamedBlockNodes(qemuMonitorPtr mon)
{
QEMU_CHECK_MONITOR_NULL(mon);
return qemuMonitorJSONQueryNamedBlockNodes(mon);
}
char *
qemuMonitorGuestPanicEventInfoFormatMsg(qemuMonitorEventPanicInfoPtr info)
{
char *ret = NULL;
switch (info->type) {
case QEMU_MONITOR_EVENT_PANIC_INFO_TYPE_HYPERV:
ignore_value(virAsprintf(&ret,
"hyper-v: arg1='0x%llx', arg2='0x%llx', "
"arg3='0x%llx', arg4='0x%llx', arg5='0x%llx'",
info->data.hyperv.arg1, info->data.hyperv.arg2,
info->data.hyperv.arg3, info->data.hyperv.arg4,
info->data.hyperv.arg5));
break;
case QEMU_MONITOR_EVENT_PANIC_INFO_TYPE_S390:
ignore_value(virAsprintf(&ret, "s390: core='%d' psw-mask='0x%016llx' "
"psw-addr='0x%016llx' reason='%s'",
info->data.s390.core,
info->data.s390.psw_mask,
info->data.s390.psw_addr,
info->data.s390.reason));
break;
case QEMU_MONITOR_EVENT_PANIC_INFO_TYPE_NONE:
case QEMU_MONITOR_EVENT_PANIC_INFO_TYPE_LAST:
break;
}
return ret;
}
void
qemuMonitorEventPanicInfoFree(qemuMonitorEventPanicInfoPtr info)
{
if (!info)
return;
switch (info->type) {
case QEMU_MONITOR_EVENT_PANIC_INFO_TYPE_S390:
VIR_FREE(info->data.s390.reason);
break;
case QEMU_MONITOR_EVENT_PANIC_INFO_TYPE_NONE:
case QEMU_MONITOR_EVENT_PANIC_INFO_TYPE_HYPERV:
case QEMU_MONITOR_EVENT_PANIC_INFO_TYPE_LAST:
break;
}
VIR_FREE(info);
}
void
qemuMonitorEventRdmaGidStatusFree(qemuMonitorRdmaGidStatusPtr info)
{
if (!info)
return;
VIR_FREE(info->netdev);
VIR_FREE(info);
}
int
qemuMonitorSetWatchdogAction(qemuMonitorPtr mon,
const char *action)
{
VIR_DEBUG("watchdogAction=%s", action);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONSetWatchdogAction(mon, action);
}
/**
* qemuMonitorBlockdevAdd:
* @mon: monitor object
* @props: JSON object describing the blockdev to add
*
* Adds a new block device (BDS) to qemu. Note that @props is always consumed
* by this function and should not be accessed after calling this function.
*/
int
qemuMonitorBlockdevAdd(qemuMonitorPtr mon,
virJSONValuePtr props)
{
VIR_DEBUG("props=%p (node-name=%s)", props,
NULLSTR(virJSONValueObjectGetString(props, "node-name")));
QEMU_CHECK_MONITOR_GOTO(mon, error);
return qemuMonitorJSONBlockdevAdd(mon, props);
error:
virJSONValueFree(props);
return -1;
}
int
qemuMonitorBlockdevDel(qemuMonitorPtr mon,
const char *nodename)
{
VIR_DEBUG("nodename=%s", nodename);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONBlockdevDel(mon, nodename);
}
int
qemuMonitorBlockdevTrayOpen(qemuMonitorPtr mon,
const char *id,
bool force)
{
VIR_DEBUG("id=%s force=%d", id, force);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONBlockdevTrayOpen(mon, id, force);
}
int
qemuMonitorBlockdevTrayClose(qemuMonitorPtr mon,
const char *id)
{
VIR_DEBUG("id=%s", id);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONBlockdevTrayClose(mon, id);
}
int
qemuMonitorBlockdevMediumRemove(qemuMonitorPtr mon,
const char *id)
{
VIR_DEBUG("id=%s", id);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONBlockdevMediumRemove(mon, id);
}
int
qemuMonitorBlockdevMediumInsert(qemuMonitorPtr mon,
const char *id,
const char *nodename)
{
VIR_DEBUG("id=%s nodename=%s", id, nodename);
QEMU_CHECK_MONITOR(mon);
return qemuMonitorJSONBlockdevMediumInsert(mon, id, nodename);
}
char *
qemuMonitorGetSEVMeasurement(qemuMonitorPtr mon)
{
QEMU_CHECK_MONITOR_NULL(mon);
return qemuMonitorJSONGetSEVMeasurement(mon);
}
int
qemuMonitorGetPRManagerInfo(qemuMonitorPtr mon,
virHashTablePtr *retinfo)
{
int ret = -1;
virHashTablePtr info = NULL;
*retinfo = NULL;
QEMU_CHECK_MONITOR(mon);
if (!(info = virHashCreate(10, virHashValueFree)))
goto cleanup;
if (qemuMonitorJSONGetPRManagerInfo(mon, info) < 0)
goto cleanup;
VIR_STEAL_PTR(*retinfo, info);
ret = 0;
cleanup:
virHashFree(info);
return ret;
}