Don't kill QEMU process when a monitor I/O parsing error occurs

Currently whenever there is any failure with parsing the monitor,
this is treated in the same was as end-of-file (ie QEMU quit).
The domain is terminated, if not already dead.

With this change, failures in parsing the monitor stream do not
result in the death of QEMU. The guest continues running unchanged,
but all further use of the monitor will be disabled.

The VMM_FAILURE event will be emitted, and the mgmt application
can decide when to kill/restart the guest to re-gain control

* src/qemu/qemu_monitor.c, src/qemu/qemu_monitor.h: Run a
  different callback for monitor EOF vs error conditions.
* src/qemu/qemu_process.c: Emit VMM_FAILURE event when monitor
  fails
This commit is contained in:
Daniel P. Berrange 2011-05-29 20:37:29 +08:00 committed by Daniel Veillard
parent a6135ec1e6
commit 4454a9efc7
3 changed files with 72 additions and 37 deletions

View File

@ -517,7 +517,8 @@ static void qemuMonitorUpdateWatch(qemuMonitorPtr mon)
static void
qemuMonitorIO(int watch, int fd, int events, void *opaque) {
qemuMonitorPtr mon = opaque;
int quit = 0, failed = 0;
bool error = false;
bool eof = false;
/* lock access to the monitor and protect fd */
qemuMonitorLock(mon);
@ -528,27 +529,27 @@ qemuMonitorIO(int watch, int fd, int events, void *opaque) {
if (mon->fd != fd || mon->watch != watch) {
VIR_ERROR(_("event from unexpected fd %d!=%d / watch %d!=%d"), mon->fd, fd, mon->watch, watch);
failed = 1;
error = true;
} else {
if (!mon->lastErrno &&
events & VIR_EVENT_HANDLE_WRITABLE) {
int done = qemuMonitorIOWrite(mon);
if (done < 0)
failed = 1;
error = 1;
events &= ~VIR_EVENT_HANDLE_WRITABLE;
}
if (!mon->lastErrno &&
events & VIR_EVENT_HANDLE_READABLE) {
int got = qemuMonitorIORead(mon);
if (got < 0)
failed = 1;
error = true;
/* Ignore hangup/error events if we read some data, to
* give time for that data to be consumed */
if (got > 0) {
events = 0;
if (qemuMonitorIOProcess(mon) < 0)
failed = 1;
error = true;
} else
events &= ~VIR_EVENT_HANDLE_READABLE;
}
@ -572,36 +573,44 @@ qemuMonitorIO(int watch, int fd, int events, void *opaque) {
mon->msg->lastErrno = EIO;
virCondSignal(&mon->notify);
}
quit = 1;
eof = 1;
} else if (events) {
VIR_ERROR(_("unhandled fd event %d for monitor fd %d"),
events, mon->fd);
failed = 1;
error = 1;
}
}
if (eof || error)
mon->lastErrno = EIO;
qemuMonitorUpdateWatch(mon);
/* We have to unlock to avoid deadlock against command thread,
* but is this safe ? I think it is, because the callback
* will try to acquire the virDomainObjPtr mutex next */
if (failed || quit) {
void (*eofNotify)(qemuMonitorPtr, virDomainObjPtr, int)
if (eof) {
void (*eofNotify)(qemuMonitorPtr, virDomainObjPtr)
= mon->cb->eofNotify;
virDomainObjPtr vm = mon->vm;
/* If qemu quited unexpectedly, and we may try to send monitor
* command later. But we have no chance to wake up it. So set
* mon->lastErrno to EIO, and check it before sending monitor
* command.
*/
if (!mon->lastErrno)
mon->lastErrno = EIO;
/* Make sure anyone waiting wakes up now */
virCondSignal(&mon->notify);
if (qemuMonitorUnref(mon) > 0)
qemuMonitorUnlock(mon);
VIR_DEBUG("Triggering EOF callback error? %d", failed);
(eofNotify)(mon, vm, failed);
VIR_DEBUG("Triggering EOF callback");
(eofNotify)(mon, vm);
} else if (error) {
void (*errorNotify)(qemuMonitorPtr, virDomainObjPtr)
= mon->cb->errorNotify;
virDomainObjPtr vm = mon->vm;
/* Make sure anyone waiting wakes up now */
virCondSignal(&mon->notify);
if (qemuMonitorUnref(mon) > 0)
qemuMonitorUnlock(mon);
VIR_DEBUG("Triggering error callback");
(errorNotify)(mon, vm);
} else {
if (qemuMonitorUnref(mon) > 0)
qemuMonitorUnlock(mon);

View File

@ -67,9 +67,10 @@ struct _qemuMonitorCallbacks {
virDomainObjPtr vm);
void (*eofNotify)(qemuMonitorPtr mon,
virDomainObjPtr vm,
int withError);
/* XXX we'd really like to avoid virCOnnectPtr here
virDomainObjPtr vm);
void (*errorNotify)(qemuMonitorPtr mon,
virDomainObjPtr vm);
/* XXX we'd really like to avoid virConnectPtr here
* It is required so the callback can find the active
* secret driver. Need to change this to work like the
* security drivers do, to avoid this

View File

@ -100,12 +100,14 @@ extern struct qemud_driver *qemu_driver;
*/
static void
qemuProcessHandleMonitorEOF(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
virDomainObjPtr vm,
int hasError)
virDomainObjPtr vm)
{
struct qemud_driver *driver = qemu_driver;
virDomainEventPtr event = NULL;
qemuDomainObjPrivatePtr priv;
int eventReason = VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN;
int stopReason = VIR_DOMAIN_SHUTOFF_SHUTDOWN;
const char *auditReason = "shutdown";
VIR_DEBUG("Received EOF on %p '%s'", vm, vm->def->name);
@ -120,32 +122,54 @@ qemuProcessHandleMonitorEOF(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
}
priv = vm->privateData;
if (!hasError && priv->monJSON && !priv->gotShutdown) {
if (priv->monJSON && !priv->gotShutdown) {
VIR_DEBUG("Monitor connection to '%s' closed without SHUTDOWN event; "
"assuming the domain crashed", vm->def->name);
hasError = 1;
eventReason = VIR_DOMAIN_EVENT_STOPPED_FAILED;
stopReason = VIR_DOMAIN_SHUTOFF_FAILED;
auditReason = "failed";
}
event = virDomainEventNewFromObj(vm,
VIR_DOMAIN_EVENT_STOPPED,
hasError ?
VIR_DOMAIN_EVENT_STOPPED_FAILED :
VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN);
qemuProcessStop(driver, vm, 0,
hasError ?
VIR_DOMAIN_SHUTOFF_CRASHED :
VIR_DOMAIN_SHUTOFF_SHUTDOWN);
qemuAuditDomainStop(vm, hasError ? "failed" : "shutdown");
eventReason);
qemuProcessStop(driver, vm, 0, stopReason);
qemuAuditDomainStop(vm, auditReason);
if (!vm->persistent)
virDomainRemoveInactive(&driver->domains, vm);
else
virDomainObjUnlock(vm);
if (event) {
if (event)
qemuDomainEventQueue(driver, event);
}
qemuDriverUnlock(driver);
}
/*
* This is invoked when there is some kind of error
* parsing data to/from the monitor. The VM can continue
* to run, but no further monitor commands will be
* allowed
*/
static void
qemuProcessHandleMonitorError(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
virDomainObjPtr vm)
{
struct qemud_driver *driver = qemu_driver;
virDomainEventPtr event = NULL;
VIR_DEBUG("Received error on %p '%s'", vm, vm->def->name);
qemuDriverLock(driver);
virDomainObjLock(vm);
event = virDomainEventControlErrorNewFromObj(vm);
if (event)
qemuDomainEventQueue(driver, event);
virDomainObjUnlock(vm);
qemuDriverUnlock(driver);
}
@ -626,6 +650,7 @@ static void qemuProcessHandleMonitorDestroy(qemuMonitorPtr mon,
static qemuMonitorCallbacks monitorCallbacks = {
.destroy = qemuProcessHandleMonitorDestroy,
.eofNotify = qemuProcessHandleMonitorEOF,
.errorNotify = qemuProcessHandleMonitorError,
.diskSecretLookup = qemuProcessFindVolumeQcowPassphrase,
.domainShutdown = qemuProcessHandleShutdown,
.domainStop = qemuProcessHandleStop,