qemu: Use QEMU_MIGRATION_PHASE_POSTCOPY_FAILED

This phase marks a migration protocol as broken in a post-copy phase.
Libvirt is no longer actively watching the migration in this phase as
the migration API that started the migration failed.

This may either happen when post-copy migration really fails (QEMU
enters postcopy-paused migration state) or when the migration still
progresses between both QEMU processes, but libvirt lost control of it
because the connection between libvirt daemons (in p2p migration) or a
daemon and client (non-p2p migration) was closed. For example, when one
of the daemons was restarted.

Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
Reviewed-by: Peter Krempa <pkrempa@redhat.com>
Reviewed-by: Pavel Hrdina <phrdina@redhat.com>
This commit is contained in:
Jiri Denemark 2022-05-18 14:02:13 +02:00
parent f1da6e9076
commit b3110bb8e4
2 changed files with 19 additions and 7 deletions

View File

@ -2341,6 +2341,7 @@ qemuMigrationSrcCleanup(virDomainObj *vm,
vm->def->name);
if (virDomainObjIsPostcopy(vm, VIR_DOMAIN_JOB_OPERATION_MIGRATION_OUT)) {
ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
qemuMigrationSrcPostcopyFailed(vm);
qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
qemuMigrationJobContinue(vm);
@ -2352,8 +2353,10 @@ qemuMigrationSrcCleanup(virDomainObj *vm,
}
break;
case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
qemuMigrationSrcPostcopyFailed(vm);
qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
qemuMigrationJobContinue(vm);
@ -2374,7 +2377,6 @@ qemuMigrationSrcCleanup(virDomainObj *vm,
case QEMU_MIGRATION_PHASE_PERFORM2:
/* single phase outgoing migration; unreachable */
case QEMU_MIGRATION_PHASE_NONE:
case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
case QEMU_MIGRATION_PHASE_LAST:
/* unreachable */
;
@ -3744,6 +3746,7 @@ qemuMigrationSrcConfirm(virQEMUDriver *driver,
flags, cancelled);
if (virDomainObjIsFailedPostcopy(vm)) {
ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
qemuMigrationJobContinue(vm);
} else {
@ -5572,6 +5575,7 @@ qemuMigrationSrcPerformJob(virQEMUDriver *driver,
virErrorPreserveLast(&orig_err);
if (virDomainObjIsFailedPostcopy(vm)) {
ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
qemuMigrationJobContinue(vm);
} else {
@ -5664,6 +5668,8 @@ qemuMigrationSrcPerformPhase(virQEMUDriver *driver,
jobPriv->migParams, priv->job.apiFlags);
qemuMigrationJobFinish(vm);
} else {
if (ret < 0)
ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
qemuMigrationJobContinue(vm);
}
@ -5903,7 +5909,7 @@ qemuMigrationDstComplete(virQEMUDriver *driver,
/* Guest is successfully running, so cancel previous auto destroy. There's
* nothing to remove when we are resuming post-copy migration.
*/
if (!virDomainObjIsFailedPostcopy(vm))
if (job->phase < QEMU_MIGRATION_PHASE_POSTCOPY_FAILED)
qemuProcessAutoDestroyRemove(driver, vm);
/* Remove completed stats for post-copy, everything but timing fields
@ -6170,6 +6176,7 @@ qemuMigrationDstFinishActive(virQEMUDriver *driver,
}
if (virDomainObjIsFailedPostcopy(vm)) {
ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
qemuProcessAutoDestroyRemove(driver, vm);
qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
*finishJob = false;
@ -6290,9 +6297,9 @@ qemuMigrationProcessUnattended(virQEMUDriver *driver,
vm->def->name);
if (job == VIR_ASYNC_JOB_MIGRATION_IN)
phase = QEMU_MIGRATION_PHASE_FINISH3;
phase = QEMU_MIGRATION_PHASE_FINISH_RESUME;
else
phase = QEMU_MIGRATION_PHASE_CONFIRM3;
phase = QEMU_MIGRATION_PHASE_CONFIRM_RESUME;
if (qemuMigrationJobStartPhase(vm, phase) < 0)
return;

View File

@ -1555,9 +1555,12 @@ qemuProcessHandleMigrationStatus(qemuMonitor *mon G_GNUC_UNUSED,
* watching it in any thread. Let's make sure the migration is properly
* finished in case we get a "completed" event.
*/
if (virDomainObjIsFailedPostcopy(vm) && priv->job.asyncOwner == 0)
if (virDomainObjIsPostcopy(vm, priv->job.current->operation) &&
priv->job.phase == QEMU_MIGRATION_PHASE_POSTCOPY_FAILED &&
priv->job.asyncOwner == 0) {
qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION,
priv->job.asyncJob, status, NULL);
}
break;
case QEMU_MONITOR_MIGRATION_STATUS_INACTIVE:
@ -3507,7 +3510,6 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
case QEMU_MIGRATION_PHASE_PERFORM3_DONE:
case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
case QEMU_MIGRATION_PHASE_CONFIRM3:
case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
case QEMU_MIGRATION_PHASE_CONFIRM_RESUME:
@ -3545,6 +3547,7 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
}
break;
case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
case QEMU_MIGRATION_PHASE_PREPARE_RESUME:
case QEMU_MIGRATION_PHASE_FINISH_RESUME:
return 1;
@ -3581,7 +3584,6 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
case QEMU_MIGRATION_PHASE_PREPARE:
case QEMU_MIGRATION_PHASE_FINISH2:
case QEMU_MIGRATION_PHASE_FINISH3:
case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
case QEMU_MIGRATION_PHASE_PREPARE_RESUME:
case QEMU_MIGRATION_PHASE_FINISH_RESUME:
case QEMU_MIGRATION_PHASE_LAST:
@ -3643,6 +3645,7 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
}
return 1;
case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
return 1;
@ -3694,6 +3697,8 @@ qemuProcessRecoverMigration(virQEMUDriver *driver,
return -1;
if (rc > 0) {
job->phase = QEMU_MIGRATION_PHASE_POSTCOPY_FAILED;
if (migStatus == VIR_DOMAIN_JOB_STATUS_POSTCOPY) {
VIR_DEBUG("Post-copy migration of domain %s still running, it will be handled as unattended",
vm->def->name);