qemu: Recover from interrupted jobs

Detect and react on situations when libvirtd was restarted or killed
when a job was active.
This commit is contained in:
Jiri Denemark 2011-07-04 23:33:39 +02:00
parent ff340a84b8
commit e6704af1fc
3 changed files with 96 additions and 0 deletions

View File

@ -142,6 +142,20 @@ qemuDomainObjResetAsyncJob(qemuDomainObjPrivatePtr priv)
memset(&job->signalsData, 0, sizeof(job->signalsData));
}
void
qemuDomainObjRestoreJob(virDomainObjPtr obj,
struct qemuDomainJobObj *job)
{
qemuDomainObjPrivatePtr priv = obj->privateData;
memset(job, 0, sizeof(*job));
job->active = priv->job.active;
job->asyncJob = priv->job.asyncJob;
qemuDomainObjResetJob(priv);
qemuDomainObjResetAsyncJob(priv);
}
static void
qemuDomainObjFreeJob(qemuDomainObjPrivatePtr priv)
{

View File

@ -184,6 +184,8 @@ void qemuDomainObjEndNestedJob(struct qemud_driver *driver,
void qemuDomainObjSaveJob(struct qemud_driver *driver, virDomainObjPtr obj);
void qemuDomainObjSetAsyncJobMask(virDomainObjPtr obj,
unsigned long long allowedJobs);
void qemuDomainObjRestoreJob(virDomainObjPtr obj,
struct qemuDomainJobObj *job);
void qemuDomainObjDiscardAsyncJob(struct qemud_driver *driver,
virDomainObjPtr obj);

View File

@ -2231,6 +2231,80 @@ qemuProcessUpdateState(struct qemud_driver *driver, virDomainObjPtr vm)
return 0;
}
static int
qemuProcessRecoverJob(struct qemud_driver *driver,
virDomainObjPtr vm,
virConnectPtr conn,
const struct qemuDomainJobObj *job)
{
virDomainState state;
int reason;
state = virDomainObjGetState(vm, &reason);
switch (job->asyncJob) {
case QEMU_ASYNC_JOB_MIGRATION_OUT:
case QEMU_ASYNC_JOB_MIGRATION_IN:
/* we don't know what to do yet */
break;
case QEMU_ASYNC_JOB_SAVE:
case QEMU_ASYNC_JOB_DUMP:
/* TODO cancel possibly running migrate operation */
/* resume the domain but only if it was paused as a result of
* running save/dump operation */
if (state == VIR_DOMAIN_PAUSED &&
((job->asyncJob == QEMU_ASYNC_JOB_DUMP &&
reason == VIR_DOMAIN_PAUSED_DUMP) ||
(job->asyncJob == QEMU_ASYNC_JOB_SAVE &&
reason == VIR_DOMAIN_PAUSED_SAVE) ||
reason == VIR_DOMAIN_PAUSED_UNKNOWN)) {
if (qemuProcessStartCPUs(driver, vm, conn,
VIR_DOMAIN_RUNNING_UNPAUSED) < 0) {
VIR_WARN("Could not resume domain %s after", vm->def->name);
}
}
break;
case QEMU_ASYNC_JOB_NONE:
case QEMU_ASYNC_JOB_LAST:
break;
}
if (!virDomainObjIsActive(vm))
return -1;
switch (job->active) {
case QEMU_JOB_QUERY:
/* harmless */
break;
case QEMU_JOB_DESTROY:
VIR_DEBUG("Domain %s should have already been destroyed",
vm->def->name);
return -1;
case QEMU_JOB_SUSPEND:
/* mostly harmless */
break;
case QEMU_JOB_MODIFY:
/* XXX depending on the command we may be in an inconsistent state and
* we should probably fall back to "monitor error" state and refuse to
*/
break;
case QEMU_JOB_ASYNC:
case QEMU_JOB_ASYNC_NESTED:
/* async job was already handled above */
case QEMU_JOB_NONE:
case QEMU_JOB_LAST:
break;
}
return 0;
}
struct qemuProcessReconnectData {
virConnectPtr conn;
struct qemud_driver *driver;
@ -2247,9 +2321,12 @@ qemuProcessReconnect(void *payload, const void *name ATTRIBUTE_UNUSED, void *opa
struct qemud_driver *driver = data->driver;
qemuDomainObjPrivatePtr priv;
virConnectPtr conn = data->conn;
struct qemuDomainJobObj oldjob;
virDomainObjLock(obj);
qemuDomainObjRestoreJob(obj, &oldjob);
VIR_DEBUG("Reconnect monitor to %p '%s'", obj, obj->def->name);
priv = obj->privateData;
@ -2295,6 +2372,9 @@ qemuProcessReconnect(void *payload, const void *name ATTRIBUTE_UNUSED, void *opa
if (qemuProcessFiltersInstantiate(conn, obj->def))
goto error;
if (qemuProcessRecoverJob(driver, obj, conn, &oldjob) < 0)
goto error;
priv->job.active = QEMU_JOB_NONE;
/* update domain state XML with possibly updated state in virDomainObj */