1
0
mirror of https://gitlab.com/libvirt/libvirt.git synced 2025-03-07 17:28:15 +00:00

qemu: Keep domain running on dst on failed post-copy migration

There's no need to artificially pause a domain when post-copy fails
from our point of view unless QEMU connection is broken too as migration
may still be progressing well.

Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
Reviewed-by: Peter Krempa <pkrempa@redhat.com>
This commit is contained in:
Jiri Denemark 2022-05-10 15:20:25 +02:00
parent aab9d64d4d
commit 13b43c22b7
3 changed files with 42 additions and 23 deletions

View File

@ -1577,34 +1577,51 @@ qemuMigrationSrcIsSafe(virDomainDef *def,
void
qemuMigrationAnyPostcopyFailed(virQEMUDriver *driver,
virDomainObj *vm)
qemuMigrationSrcPostcopyFailed(virDomainObj *vm)
{
virDomainState state;
int reason;
state = virDomainObjGetState(vm, &reason);
if (state != VIR_DOMAIN_PAUSED &&
state != VIR_DOMAIN_RUNNING)
return;
VIR_DEBUG("%s/%s",
virDomainStateTypeToString(state),
virDomainStateReasonToString(state, reason));
if (state == VIR_DOMAIN_PAUSED &&
if (state != VIR_DOMAIN_PAUSED ||
reason == VIR_DOMAIN_PAUSED_POSTCOPY_FAILED)
return;
VIR_WARN("Migration of domain %s failed during post-copy; "
"leaving the domain paused", vm->def->name);
if (state == VIR_DOMAIN_RUNNING) {
if (qemuProcessStopCPUs(driver, vm,
VIR_DOMAIN_PAUSED_POSTCOPY_FAILED,
VIR_ASYNC_JOB_MIGRATION_IN) < 0)
VIR_WARN("Unable to pause guest CPUs for %s", vm->def->name);
} else {
virDomainObjSetState(vm, VIR_DOMAIN_PAUSED,
VIR_DOMAIN_PAUSED_POSTCOPY_FAILED);
}
virDomainObjSetState(vm, VIR_DOMAIN_PAUSED,
VIR_DOMAIN_PAUSED_POSTCOPY_FAILED);
}
void
qemuMigrationDstPostcopyFailed(virDomainObj *vm)
{
virDomainState state;
int reason;
state = virDomainObjGetState(vm, &reason);
VIR_DEBUG("%s/%s",
virDomainStateTypeToString(state),
virDomainStateReasonToString(state, reason));
if (state != VIR_DOMAIN_RUNNING ||
reason == VIR_DOMAIN_RUNNING_POSTCOPY_FAILED)
return;
VIR_WARN("Migration protocol failed during incoming migration of domain "
"%s, but QEMU keeps migrating; leaving the domain running, the "
"migration will be handled as unattended", vm->def->name);
virDomainObjSetState(vm, VIR_DOMAIN_RUNNING,
VIR_DOMAIN_RUNNING_POSTCOPY_FAILED);
}
@ -3453,7 +3470,7 @@ qemuMigrationSrcConfirmPhase(virQEMUDriver *driver,
if (virDomainObjGetState(vm, &reason) == VIR_DOMAIN_PAUSED &&
reason == VIR_DOMAIN_PAUSED_POSTCOPY)
qemuMigrationAnyPostcopyFailed(driver, vm);
qemuMigrationSrcPostcopyFailed(vm);
else
qemuMigrationSrcRestoreDomainState(driver, vm);
@ -5826,7 +5843,7 @@ qemuMigrationDstFinish(virQEMUDriver *driver,
VIR_DOMAIN_EVENT_STOPPED_FAILED);
virObjectEventStateQueue(driver->domainEventState, event);
} else {
qemuMigrationAnyPostcopyFailed(driver, vm);
qemuMigrationDstPostcopyFailed(vm);
}
}

View File

@ -251,8 +251,10 @@ qemuMigrationDstRun(virQEMUDriver *driver,
virDomainAsyncJob asyncJob);
void
qemuMigrationAnyPostcopyFailed(virQEMUDriver *driver,
virDomainObj *vm);
qemuMigrationSrcPostcopyFailed(virDomainObj *vm);
void
qemuMigrationDstPostcopyFailed(virDomainObj *vm);
int
qemuMigrationSrcFetchMirrorStats(virQEMUDriver *driver,

View File

@ -3411,7 +3411,7 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
* confirm success or failure yet; killing it seems safest unless
* we already started guest CPUs or we were in post-copy mode */
if (postcopy) {
qemuMigrationAnyPostcopyFailed(driver, vm);
qemuMigrationDstPostcopyFailed(vm);
} else if (state != VIR_DOMAIN_RUNNING) {
VIR_DEBUG("Killing migrated domain %s", vm->def->name);
return -1;
@ -3462,7 +3462,7 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
* post-copy mode
*/
if (postcopy) {
qemuMigrationAnyPostcopyFailed(driver, vm);
qemuMigrationSrcPostcopyFailed(vm);
} else {
VIR_DEBUG("Cancelling unfinished migration of domain %s",
vm->def->name);
@ -3480,7 +3480,7 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
* post-copy mode we can use PAUSED_POSTCOPY_FAILED state for this
*/
if (postcopy)
qemuMigrationAnyPostcopyFailed(driver, vm);
qemuMigrationSrcPostcopyFailed(vm);
break;
case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
@ -3489,7 +3489,7 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
* as broken in that case
*/
if (postcopy) {
qemuMigrationAnyPostcopyFailed(driver, vm);
qemuMigrationSrcPostcopyFailed(vm);
} else {
VIR_DEBUG("Resuming domain %s after failed migration",
vm->def->name);