qemu: Stop domain on failed restore

When resuming a domain from a save file, we read the domain XML
from the file, add it onto our internal list of domains, start
the qemu process, let it load the incoming migration stream and
resume its vCPUs afterwards. If anything goes wrong, the domain
object is removed from the list of domains and error is returned
to the caller. However, the qemu process might be left behind -
if resuming vCPUs fails (e.g. because qemu is unable to acquire
write lock on a disk) then due to a bug the qemu process is not
killed but the domain object is removed from the list.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1718707

Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
This commit is contained in:
Michal Privoznik 2020-01-13 11:07:53 +01:00
parent 3203ad6cfd
commit 4c581527d4

View File

@ -6800,7 +6800,7 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
{ {
qemuDomainObjPrivatePtr priv = vm->privateData; qemuDomainObjPrivatePtr priv = vm->privateData;
int ret = -1; int ret = -1;
bool restored = false; bool started = false;
virObjectEventPtr event; virObjectEventPtr event;
VIR_AUTOCLOSE intermediatefd = -1; VIR_AUTOCLOSE intermediatefd = -1;
g_autoptr(virCommand) cmd = NULL; g_autoptr(virCommand) cmd = NULL;
@ -6808,6 +6808,7 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
virQEMUSaveHeaderPtr header = &data->header; virQEMUSaveHeaderPtr header = &data->header;
g_autoptr(qemuDomainSaveCookie) cookie = NULL; g_autoptr(qemuDomainSaveCookie) cookie = NULL;
int rc = 0;
if (virSaveCookieParseString(data->cookie, (virObjectPtr *)&cookie, if (virSaveCookieParseString(data->cookie, (virObjectPtr *)&cookie,
virDomainXMLOptionGetSaveCookie(driver->xmlopt)) < 0) virDomainXMLOptionGetSaveCookie(driver->xmlopt)) < 0)
@ -6848,12 +6849,12 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
VIR_NETDEV_VPORT_PROFILE_OP_RESTORE, VIR_NETDEV_VPORT_PROFILE_OP_RESTORE,
VIR_QEMU_PROCESS_START_PAUSED | VIR_QEMU_PROCESS_START_PAUSED |
VIR_QEMU_PROCESS_START_GEN_VMID) == 0) VIR_QEMU_PROCESS_START_GEN_VMID) == 0)
restored = true; started = true;
if (intermediatefd != -1) { if (intermediatefd != -1) {
virErrorPtr orig_err = NULL; virErrorPtr orig_err = NULL;
if (!restored) { if (!started) {
/* if there was an error setting up qemu, the intermediate /* if there was an error setting up qemu, the intermediate
* process will wait forever to write to stdout, so we * process will wait forever to write to stdout, so we
* must manually kill it and ignore any error related to * must manually kill it and ignore any error related to
@ -6864,21 +6865,17 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
VIR_FORCE_CLOSE(*fd); VIR_FORCE_CLOSE(*fd);
} }
if (virCommandWait(cmd, NULL) < 0) { rc = virCommandWait(cmd, NULL);
qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, asyncJob, 0);
restored = false;
}
VIR_DEBUG("Decompression binary stderr: %s", NULLSTR(errbuf)); VIR_DEBUG("Decompression binary stderr: %s", NULLSTR(errbuf));
virErrorRestore(&orig_err); virErrorRestore(&orig_err);
} }
if (VIR_CLOSE(*fd) < 0) { if (VIR_CLOSE(*fd) < 0) {
virReportSystemError(errno, _("cannot close file: %s"), path); virReportSystemError(errno, _("cannot close file: %s"), path);
restored = false; rc = -1;
} }
virDomainAuditStart(vm, "restored", restored); virDomainAuditStart(vm, "restored", started);
if (!restored) if (!started || rc < 0)
goto cleanup; goto cleanup;
/* qemuProcessStart doesn't unset the qemu error reporting infrastructure /* qemuProcessStart doesn't unset the qemu error reporting infrastructure
@ -6918,6 +6915,10 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
ret = 0; ret = 0;
cleanup: cleanup:
if (ret < 0 && started) {
qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED,
asyncJob, VIR_QEMU_PROCESS_STOP_MIGRATED);
}
if (qemuSecurityRestoreSavedStateLabel(driver, vm, path) < 0) if (qemuSecurityRestoreSavedStateLabel(driver, vm, path) < 0)
VIR_WARN("failed to restore save state label on %s", path); VIR_WARN("failed to restore save state label on %s", path);
return ret; return ret;