mirror of
https://gitlab.com/libvirt/libvirt.git
synced 2025-01-22 20:45:18 +00:00
qemu: Remember failed post-copy migration in job
When post-copy migration fails, the domain stays running on the destination with a VIR_DOMAIN_RUNNING_POSTCOPY_FAILED reason. Both the state and the reason can later be rewritten in case the domain gets paused for other reasons (such as an I/O error). Thus we need a separate place to remember the post-copy migration failed to be able to resume the migration. https://bugzilla.redhat.com/show_bug.cgi?id=2111948 Signed-off-by: Jiri Denemark <jdenemar@redhat.com> Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
This commit is contained in:
parent
49a5754063
commit
7050dad5f9
@ -27922,8 +27922,13 @@ virDomainObjGetState(virDomainObj *dom, int *reason)
|
|||||||
|
|
||||||
bool
|
bool
|
||||||
virDomainObjIsFailedPostcopy(virDomainObj *dom,
|
virDomainObjIsFailedPostcopy(virDomainObj *dom,
|
||||||
virDomainJobObj *job G_GNUC_UNUSED)
|
virDomainJobObj *job)
|
||||||
{
|
{
|
||||||
|
if (job && job->asyncPaused &&
|
||||||
|
(job->asyncJob == VIR_ASYNC_JOB_MIGRATION_IN ||
|
||||||
|
job->asyncJob == VIR_ASYNC_JOB_MIGRATION_OUT))
|
||||||
|
return true;
|
||||||
|
|
||||||
return ((dom->state.state == VIR_DOMAIN_PAUSED &&
|
return ((dom->state.state == VIR_DOMAIN_PAUSED &&
|
||||||
dom->state.reason == VIR_DOMAIN_PAUSED_POSTCOPY_FAILED) ||
|
dom->state.reason == VIR_DOMAIN_PAUSED_POSTCOPY_FAILED) ||
|
||||||
(dom->state.state == VIR_DOMAIN_RUNNING &&
|
(dom->state.state == VIR_DOMAIN_RUNNING &&
|
||||||
|
@ -174,6 +174,7 @@ virDomainObjResetAsyncJob(virDomainJobObj *job)
|
|||||||
job->asyncOwner = 0;
|
job->asyncOwner = 0;
|
||||||
g_clear_pointer(&job->asyncOwnerAPI, g_free);
|
g_clear_pointer(&job->asyncOwnerAPI, g_free);
|
||||||
job->asyncStarted = 0;
|
job->asyncStarted = 0;
|
||||||
|
job->asyncPaused = false;
|
||||||
job->phase = 0;
|
job->phase = 0;
|
||||||
job->mask = VIR_JOB_DEFAULT_MASK;
|
job->mask = VIR_JOB_DEFAULT_MASK;
|
||||||
job->abortJob = false;
|
job->abortJob = false;
|
||||||
|
@ -176,6 +176,7 @@ struct _virDomainJobObj {
|
|||||||
unsigned long long asyncOwner; /* Thread which set current async job */
|
unsigned long long asyncOwner; /* Thread which set current async job */
|
||||||
char *asyncOwnerAPI; /* The API which owns the async job */
|
char *asyncOwnerAPI; /* The API which owns the async job */
|
||||||
unsigned long long asyncStarted; /* When the current async job started */
|
unsigned long long asyncStarted; /* When the current async job started */
|
||||||
|
bool asyncPaused; /* The async job is paused */
|
||||||
int phase; /* Job phase (mainly for migrations) */
|
int phase; /* Job phase (mainly for migrations) */
|
||||||
unsigned long long mask; /* Jobs allowed during async job */
|
unsigned long long mask; /* Jobs allowed during async job */
|
||||||
virDomainJobData *current; /* async job progress data */
|
virDomainJobData *current; /* async job progress data */
|
||||||
|
@ -695,6 +695,8 @@ qemuDomainObjPrivateXMLFormatJob(virBuffer *buf,
|
|||||||
if (vm->job->asyncJob != VIR_ASYNC_JOB_NONE) {
|
if (vm->job->asyncJob != VIR_ASYNC_JOB_NONE) {
|
||||||
virBufferAsprintf(&attrBuf, " flags='0x%x'", vm->job->apiFlags);
|
virBufferAsprintf(&attrBuf, " flags='0x%x'", vm->job->apiFlags);
|
||||||
virBufferAsprintf(&attrBuf, " asyncStarted='%llu'", vm->job->asyncStarted);
|
virBufferAsprintf(&attrBuf, " asyncStarted='%llu'", vm->job->asyncStarted);
|
||||||
|
if (vm->job->asyncPaused)
|
||||||
|
virBufferAddLit(&attrBuf, " asyncPaused='yes'");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vm->job->cb &&
|
if (vm->job->cb &&
|
||||||
@ -732,6 +734,7 @@ qemuDomainObjPrivateXMLParseJob(virDomainObj *vm,
|
|||||||
|
|
||||||
if ((tmp = virXPathString("string(@async)", ctxt))) {
|
if ((tmp = virXPathString("string(@async)", ctxt))) {
|
||||||
int async;
|
int async;
|
||||||
|
virTristateBool paused;
|
||||||
|
|
||||||
if ((async = virDomainAsyncJobTypeFromString(tmp)) < 0) {
|
if ((async = virDomainAsyncJobTypeFromString(tmp)) < 0) {
|
||||||
virReportError(VIR_ERR_INTERNAL_ERROR,
|
virReportError(VIR_ERR_INTERNAL_ERROR,
|
||||||
@ -757,6 +760,12 @@ qemuDomainObjPrivateXMLParseJob(virDomainObj *vm,
|
|||||||
_("Invalid async job start"));
|
_("Invalid async job start"));
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (virXMLPropTristateBool(ctxt->node, "asyncPaused", VIR_XML_PROP_NONE,
|
||||||
|
&paused) < 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
vm->job->asyncPaused = paused == VIR_TRISTATE_BOOL_YES;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (virXMLPropUInt(ctxt->node, "flags", 16, VIR_XML_PROP_NONE,
|
if (virXMLPropUInt(ctxt->node, "flags", 16, VIR_XML_PROP_NONE,
|
||||||
|
@ -1664,17 +1664,19 @@ qemuMigrationSrcPostcopyFailed(virDomainObj *vm)
|
|||||||
|
|
||||||
state = virDomainObjGetState(vm, &reason);
|
state = virDomainObjGetState(vm, &reason);
|
||||||
|
|
||||||
VIR_DEBUG("%s/%s",
|
VIR_DEBUG("%s/%s, asyncPaused=%u",
|
||||||
virDomainStateTypeToString(state),
|
virDomainStateTypeToString(state),
|
||||||
virDomainStateReasonToString(state, reason));
|
virDomainStateReasonToString(state, reason),
|
||||||
|
vm->job->asyncPaused);
|
||||||
|
|
||||||
if (state != VIR_DOMAIN_PAUSED ||
|
if (state != VIR_DOMAIN_PAUSED ||
|
||||||
reason == VIR_DOMAIN_PAUSED_POSTCOPY_FAILED)
|
virDomainObjIsFailedPostcopy(vm, vm->job))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
VIR_WARN("Migration of domain %s failed during post-copy; "
|
VIR_WARN("Migration of domain %s failed during post-copy; "
|
||||||
"leaving the domain paused", vm->def->name);
|
"leaving the domain paused", vm->def->name);
|
||||||
|
|
||||||
|
vm->job->asyncPaused = true;
|
||||||
virDomainObjSetState(vm, VIR_DOMAIN_PAUSED,
|
virDomainObjSetState(vm, VIR_DOMAIN_PAUSED,
|
||||||
VIR_DOMAIN_PAUSED_POSTCOPY_FAILED);
|
VIR_DOMAIN_PAUSED_POSTCOPY_FAILED);
|
||||||
event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_SUSPENDED,
|
event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_SUSPENDED,
|
||||||
@ -1694,21 +1696,31 @@ qemuMigrationDstPostcopyFailed(virDomainObj *vm)
|
|||||||
|
|
||||||
state = virDomainObjGetState(vm, &reason);
|
state = virDomainObjGetState(vm, &reason);
|
||||||
|
|
||||||
VIR_DEBUG("%s/%s",
|
VIR_DEBUG("%s/%s, asyncPaused=%u",
|
||||||
virDomainStateTypeToString(state),
|
virDomainStateTypeToString(state),
|
||||||
virDomainStateReasonToString(state, reason));
|
virDomainStateReasonToString(state, reason),
|
||||||
|
vm->job->asyncPaused);
|
||||||
|
|
||||||
if (state != VIR_DOMAIN_RUNNING ||
|
if ((state != VIR_DOMAIN_RUNNING && state != VIR_DOMAIN_PAUSED) ||
|
||||||
reason == VIR_DOMAIN_RUNNING_POSTCOPY_FAILED)
|
virDomainObjIsFailedPostcopy(vm, vm->job))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
VIR_WARN("Incoming migration of domain '%s' failed during post-copy; "
|
VIR_WARN("Incoming migration of domain '%s' failed during post-copy; "
|
||||||
"leaving the domain running", vm->def->name);
|
"leaving the domain running", vm->def->name);
|
||||||
|
|
||||||
virDomainObjSetState(vm, VIR_DOMAIN_RUNNING,
|
vm->job->asyncPaused = true;
|
||||||
VIR_DOMAIN_RUNNING_POSTCOPY_FAILED);
|
if (state == VIR_DOMAIN_RUNNING) {
|
||||||
event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_RESUMED,
|
virDomainObjSetState(vm, VIR_DOMAIN_RUNNING,
|
||||||
VIR_DOMAIN_EVENT_RESUMED_POSTCOPY_FAILED);
|
VIR_DOMAIN_RUNNING_POSTCOPY_FAILED);
|
||||||
|
event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_RESUMED,
|
||||||
|
VIR_DOMAIN_EVENT_RESUMED_POSTCOPY_FAILED);
|
||||||
|
} else {
|
||||||
|
/* The domain was paused for other reasons (I/O error, ...) so we don't
|
||||||
|
* want to rewrite the original reason and just emit a postcopy-failed
|
||||||
|
* event. */
|
||||||
|
event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_SUSPENDED,
|
||||||
|
VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY_FAILED);
|
||||||
|
}
|
||||||
virObjectEventStateQueue(driver->domainEventState, event);
|
virObjectEventStateQueue(driver->domainEventState, event);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -712,6 +712,15 @@ qemuProcessHandleResume(qemuMonitor *mon G_GNUC_UNUSED,
|
|||||||
vm->def->name, virDomainRunningReasonTypeToString(reason),
|
vm->def->name, virDomainRunningReasonTypeToString(reason),
|
||||||
eventDetail);
|
eventDetail);
|
||||||
|
|
||||||
|
/* When a domain is running in (failed) post-copy migration on the
|
||||||
|
* destination host, we need to make sure to set the appropriate reason
|
||||||
|
* here. */
|
||||||
|
if (virDomainObjIsPostcopy(vm, vm->job)) {
|
||||||
|
if (virDomainObjIsFailedPostcopy(vm, vm->job))
|
||||||
|
reason = VIR_DOMAIN_RUNNING_POSTCOPY_FAILED;
|
||||||
|
else
|
||||||
|
reason = VIR_DOMAIN_RUNNING_POSTCOPY;
|
||||||
|
}
|
||||||
virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason);
|
virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason);
|
||||||
event = virDomainEventLifecycleNewFromObj(vm,
|
event = virDomainEventLifecycleNewFromObj(vm,
|
||||||
VIR_DOMAIN_EVENT_RESUMED,
|
VIR_DOMAIN_EVENT_RESUMED,
|
||||||
@ -1491,6 +1500,7 @@ qemuProcessHandleMigrationStatus(qemuMonitor *mon G_GNUC_UNUSED,
|
|||||||
vm->def->name,
|
vm->def->name,
|
||||||
virDomainStateTypeToString(state),
|
virDomainStateTypeToString(state),
|
||||||
NULLSTR(virDomainStateReasonToString(state, reason)));
|
NULLSTR(virDomainStateReasonToString(state, reason)));
|
||||||
|
vm->job->asyncPaused = false;
|
||||||
virDomainObjSetState(vm, state, reason);
|
virDomainObjSetState(vm, state, reason);
|
||||||
event = virDomainEventLifecycleNewFromObj(vm, eventType, eventDetail);
|
event = virDomainEventLifecycleNewFromObj(vm, eventType, eventDetail);
|
||||||
qemuDomainSaveStatus(vm);
|
qemuDomainSaveStatus(vm);
|
||||||
@ -3420,6 +3430,7 @@ qemuProcessRestoreMigrationJob(virDomainObj *vm,
|
|||||||
job->privateData = g_steal_pointer(&vm->job->privateData);
|
job->privateData = g_steal_pointer(&vm->job->privateData);
|
||||||
vm->job->privateData = jobPriv;
|
vm->job->privateData = jobPriv;
|
||||||
vm->job->apiFlags = job->apiFlags;
|
vm->job->apiFlags = job->apiFlags;
|
||||||
|
vm->job->asyncPaused = job->asyncPaused;
|
||||||
|
|
||||||
qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
|
qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
|
||||||
}
|
}
|
||||||
@ -3645,6 +3656,7 @@ qemuProcessRecoverMigration(virQEMUDriver *driver,
|
|||||||
if (migStatus == VIR_DOMAIN_JOB_STATUS_POSTCOPY) {
|
if (migStatus == VIR_DOMAIN_JOB_STATUS_POSTCOPY) {
|
||||||
VIR_DEBUG("Post-copy migration of domain %s still running, it will be handled as unattended",
|
VIR_DEBUG("Post-copy migration of domain %s still running, it will be handled as unattended",
|
||||||
vm->def->name);
|
vm->def->name);
|
||||||
|
vm->job->asyncPaused = false;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3653,6 +3665,9 @@ qemuProcessRecoverMigration(virQEMUDriver *driver,
|
|||||||
qemuMigrationSrcPostcopyFailed(vm);
|
qemuMigrationSrcPostcopyFailed(vm);
|
||||||
else
|
else
|
||||||
qemuMigrationDstPostcopyFailed(vm);
|
qemuMigrationDstPostcopyFailed(vm);
|
||||||
|
/* Set the asyncPaused flag in case we're reconnecting to a domain
|
||||||
|
* started by an older libvirt. */
|
||||||
|
vm->job->asyncPaused = true;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user