From 96fc4784177ecb70357518fa863442455e45ad0e Mon Sep 17 00:00:00 2001 From: Jiri Denemark Date: Tue, 13 Sep 2011 18:11:26 +0200 Subject: [PATCH] qemu: Prevent disk corruption on domain shutdown Ever since we introduced fake reboot, we call qemuProcessKill as a reaction to SHUTDOWN event. Unfortunately, qemu doesn't guarantee it flushed all internal buffers before sending SHUTDOWN, in which case killing the process forcibly may result in (virtual) disk corruption. By sending just SIGTERM without SIGKILL we give qemu time to to flush all buffers and exit. Once qemu exits, we will see an EOF on monitor connection and tear down the domain. In case qemu ignores SIGTERM or just hangs there, the process stays running but that's not any different from a possible hang anytime during the shutdown process so I think it's just fine. Also qemu (since 0.14 until it's fixed) has a bug in SIGTERM processing which causes it not to exit but instead send new SHUTDOWN event and keep waiting. I think the best we can do is to ignore duplicate SHUTDOWN events to avoid a SHUTDOWN-SIGTERM loop and leave the domain in paused state. --- src/qemu/qemu_driver.c | 2 +- src/qemu/qemu_process.c | 25 ++++++++++++++++++------- src/qemu/qemu_process.h | 2 +- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index c73b2ec0c2..32f376ec53 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1870,7 +1870,7 @@ qemuDomainDestroyFlags(virDomainPtr dom, * can kill the process even if a job is active. Killing * it now means the job will be released */ - qemuProcessKill(vm); + qemuProcessKill(vm, false); if (qemuDomainObjBeginJobWithDriver(driver, vm, QEMU_JOB_DESTROY) < 0) goto cleanup; diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 5f7f0e6f6a..909c3f7f57 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -419,7 +419,7 @@ endjob: cleanup: if (vm) { if (ret == -1) - qemuProcessKill(vm); + qemuProcessKill(vm, false); if (virDomainObjUnref(vm) > 0) virDomainObjUnlock(vm); } @@ -437,6 +437,12 @@ qemuProcessHandleShutdown(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DEBUG("vm=%p", vm); virDomainObjLock(vm); + if (priv->gotShutdown) { + VIR_DEBUG("Ignoring repeated SHUTDOWN event from domain %s", + vm->def->name); + goto cleanup; + } + priv->gotShutdown = true; if (priv->fakeReboot) { virDomainObjRef(vm); @@ -446,16 +452,17 @@ qemuProcessHandleShutdown(qemuMonitorPtr mon ATTRIBUTE_UNUSED, qemuProcessFakeReboot, vm) < 0) { VIR_ERROR(_("Failed to create reboot thread, killing domain")); - qemuProcessKill(vm); + qemuProcessKill(vm, true); if (virDomainObjUnref(vm) == 0) vm = NULL; } } else { - qemuProcessKill(vm); + qemuProcessKill(vm, true); } + +cleanup: if (vm) virDomainObjUnlock(vm); - return 0; } @@ -3200,10 +3207,11 @@ cleanup: } -void qemuProcessKill(virDomainObjPtr vm) +void qemuProcessKill(virDomainObjPtr vm, bool gracefully) { int i; - VIR_DEBUG("vm=%s pid=%d", vm->def->name, vm->pid); + VIR_DEBUG("vm=%s pid=%d gracefully=%d", + vm->def->name, vm->pid, gracefully); if (!virDomainObjIsActive(vm)) { VIR_DEBUG("VM '%s' not active", vm->def->name); @@ -3233,6 +3241,9 @@ void qemuProcessKill(virDomainObjPtr vm) break; } + if (i == 0 && gracefully) + break; + usleep(200 * 1000); } } @@ -3317,7 +3328,7 @@ void qemuProcessStop(struct qemud_driver *driver, } /* shut it off for sure */ - qemuProcessKill(vm); + qemuProcessKill(vm, false); /* Stop autodestroy in case guest is restarted */ qemuProcessAutoDestroyRemove(driver, vm); diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h index 96ba3f34fc..ef422c490c 100644 --- a/src/qemu/qemu_process.h +++ b/src/qemu/qemu_process.h @@ -68,7 +68,7 @@ int qemuProcessAttach(virConnectPtr conn, virDomainChrSourceDefPtr monConfig, bool monJSON); -void qemuProcessKill(virDomainObjPtr vm); +void qemuProcessKill(virDomainObjPtr vm, bool gracefully); int qemuProcessAutoDestroyInit(struct qemud_driver *driver); void qemuProcessAutoDestroyRun(struct qemud_driver *driver,