From 6908f857f89fa91eebf12953a814fcaa66343276 Mon Sep 17 00:00:00 2001 From: Jiri Denemark Date: Tue, 10 May 2022 15:20:25 +0200 Subject: [PATCH] qemu: Finish completed unattended migration So far migration could only be completed while a migration API was running and waiting for the migration to finish. In case such API could not be called (the connection that initiated the migration is broken) the migration would just be aborted or left in a "don't know what to do" state. But this will change soon and we will be able to successfully complete such migration once we get the corresponding event from QEMU. This is specific to post-copy migration when vCPUs are already running on the destination and we're only waiting for all memory pages to be transferred. Such post-copy migration (which no-one is actively watching) is called unattended migration. Signed-off-by: Jiri Denemark Reviewed-by: Peter Krempa Reviewed-by: Pavel Hrdina --- src/qemu/qemu_domain.c | 1 + src/qemu/qemu_domain.h | 1 + src/qemu/qemu_driver.c | 5 +++++ src/qemu/qemu_migration.c | 43 +++++++++++++++++++++++++++++++++++++-- src/qemu/qemu_migration.h | 6 ++++++ src/qemu/qemu_process.c | 12 ++++++++++- 6 files changed, 65 insertions(+), 3 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 6ad337d4a8..941cf7e0e4 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -11114,6 +11114,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event) qemuMonitorMemoryDeviceSizeChangeFree(event->data); break; case QEMU_PROCESS_EVENT_PR_DISCONNECT: + case QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION: case QEMU_PROCESS_EVENT_LAST: break; } diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index ce2dba499c..153dfe3a23 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -426,6 +426,7 @@ typedef enum { QEMU_PROCESS_EVENT_RDMA_GID_STATUS_CHANGED, QEMU_PROCESS_EVENT_GUEST_CRASHLOADED, QEMU_PROCESS_EVENT_MEMORY_DEVICE_SIZE_CHANGE, + QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION, QEMU_PROCESS_EVENT_LAST } qemuProcessEventType; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 3ac9da37dd..bc8f5c34db 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -4312,6 +4312,11 @@ static void qemuProcessEventHandler(void *data, void *opaque) case QEMU_PROCESS_EVENT_MEMORY_DEVICE_SIZE_CHANGE: processMemoryDeviceSizeChange(driver, vm, processEvent->data); break; + case QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION: + qemuMigrationProcessUnattended(driver, vm, + processEvent->action, + processEvent->status); + break; case QEMU_PROCESS_EVENT_LAST: break; } diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index d0ce17b9e4..7839874f50 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -5811,8 +5811,11 @@ qemuMigrationDstComplete(virQEMUDriver *driver, qemuDomainSaveStatus(vm); - /* Guest is successfully running, so cancel previous auto destroy */ - qemuProcessAutoDestroyRemove(driver, vm); + /* Guest is successfully running, so cancel previous auto destroy. There's + * nothing to remove when we are resuming post-copy migration. + */ + if (!virDomainObjIsFailedPostcopy(vm)) + qemuProcessAutoDestroyRemove(driver, vm); /* Remove completed stats for post-copy, everything but timing fields * is obsolete anyway. @@ -6179,6 +6182,42 @@ qemuMigrationDstFinish(virQEMUDriver *driver, } +void +qemuMigrationProcessUnattended(virQEMUDriver *driver, + virDomainObj *vm, + virDomainAsyncJob job, + qemuMonitorMigrationStatus status) +{ + qemuDomainObjPrivate *priv = vm->privateData; + qemuMigrationJobPhase phase; + + if (!qemuMigrationJobIsActive(vm, job) || + status != QEMU_MONITOR_MIGRATION_STATUS_COMPLETED) + return; + + VIR_DEBUG("Unattended %s migration of domain %s successfully finished", + job == VIR_ASYNC_JOB_MIGRATION_IN ? "incoming" : "outgoing", + vm->def->name); + + if (job == VIR_ASYNC_JOB_MIGRATION_IN) + phase = QEMU_MIGRATION_PHASE_FINISH3; + else + phase = QEMU_MIGRATION_PHASE_CONFIRM3; + + qemuMigrationJobStartPhase(vm, phase); + + if (job == VIR_ASYNC_JOB_MIGRATION_IN) + qemuMigrationDstComplete(driver, vm, true, job, &priv->job); + else + qemuMigrationSrcComplete(driver, vm, job); + + qemuMigrationJobFinish(vm); + + if (!virDomainObjIsActive(vm)) + qemuDomainRemoveInactive(driver, vm); +} + + /* Helper function called while vm is active. */ int qemuMigrationSrcToFile(virQEMUDriver *driver, virDomainObj *vm, diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h index c099cf99cf..eeb69a52bf 100644 --- a/src/qemu/qemu_migration.h +++ b/src/qemu/qemu_migration.h @@ -211,6 +211,12 @@ qemuMigrationSrcComplete(virQEMUDriver *driver, virDomainObj *vm, virDomainAsyncJob asyncJob); +void +qemuMigrationProcessUnattended(virQEMUDriver *driver, + virDomainObj *vm, + virDomainAsyncJob job, + qemuMonitorMigrationStatus status); + bool qemuMigrationSrcIsAllowed(virQEMUDriver *driver, virDomainObj *vm, diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index d3769de496..97d84893be 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -1549,12 +1549,22 @@ qemuProcessHandleMigrationStatus(qemuMonitor *mon G_GNUC_UNUSED, } break; + case QEMU_MONITOR_MIGRATION_STATUS_COMPLETED: + /* A post-copy migration marked as failed when reconnecting to a domain + * with running migration may actually still be running, but we're not + * watching it in any thread. Let's make sure the migration is properly + * finished in case we get a "completed" event. + */ + if (virDomainObjIsFailedPostcopy(vm) && priv->job.asyncOwner == 0) + qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION, + priv->job.asyncJob, status, NULL); + break; + case QEMU_MONITOR_MIGRATION_STATUS_INACTIVE: case QEMU_MONITOR_MIGRATION_STATUS_SETUP: case QEMU_MONITOR_MIGRATION_STATUS_ACTIVE: case QEMU_MONITOR_MIGRATION_STATUS_PRE_SWITCHOVER: case QEMU_MONITOR_MIGRATION_STATUS_DEVICE: - case QEMU_MONITOR_MIGRATION_STATUS_COMPLETED: case QEMU_MONITOR_MIGRATION_STATUS_ERROR: case QEMU_MONITOR_MIGRATION_STATUS_CANCELLING: case QEMU_MONITOR_MIGRATION_STATUS_CANCELLED: