blockjob: split out block info driver handling

The qemu implementation for virDomainGetBlockJobInfo() has a
minor bug: it grabs the qemu job with intent to QEMU_JOB_MODIFY,
which means it cannot be run in parallel with any other
domain-modifying command.  Among others, virDomainBlockJobAbort()
is such a modifying command, and it defaults to being
synchronous, and can wait as long as several seconds to ensure
that the job has actually finished.  Due to the job rules, this
means a user cannot obtain status about the job during that
timeframe, even though we know that some client management code
exists which is using a polling loop on status to see when a job
finishes.

This bug has been present ever since blockpull support was first
introduced (commit b976165, v0.9.4 in Jul 2011), all because we
stupidly tried to cram too much multiplexing through a single
helper routine, but was made worse in 97c59b9 (v1.2.7) when
BlockJobAbort was fixed to wait longer.  It's time to disentangle
some of the mess in qemuDomainBlockJobImpl, and in the process
relax block job query to use QEMU_JOB_QUERY, since it can safely
be used in parallel with any long running modify command.

Technically, there is one case where getting block job info can
modify domain XML - we do snooping to see if a 2-phase job has
transitioned into the second phase, for an optimization in the
case of old qemu that lacked an event for the transition.  I
claim this optimization is safe (the jobs are all about modifying
qemu state, not necessarily xml state); but if it proves to be
a problem, we could use the difference between the capabilities
QEMU_CAPS_BLOCKJOB_{ASYNC,SYNC} to determine whether we even
need snooping, and only request a modifying job in the case of
older qemu.

* src/qemu/qemu_driver.c (qemuDomainBlockJobImpl): Move info
handling...
(qemuDomainGetBlockJobInfo): ...here, and relax job type.
(qemuDomainBlockJobAbort, qemuDomainBlockJobSetSpeed)
(qemuDomainBlockRebase, qemuDomainBlockPull): Adjust callers.

Signed-off-by: Eric Blake <eblake@redhat.com>
This commit is contained in:
Eric Blake 2014-08-29 13:36:59 -06:00
parent 02d2bd7d91
commit cefe0ba3db

View File

@ -14994,7 +14994,7 @@ static int
qemuDomainBlockJobImpl(virDomainObjPtr vm,
virConnectPtr conn,
const char *path, const char *base,
unsigned long bandwidth, virDomainBlockJobInfoPtr info,
unsigned long bandwidth,
int mode, unsigned int flags)
{
virQEMUDriverPtr driver = conn->privateData;
@ -15125,25 +15125,14 @@ qemuDomainBlockJobImpl(virDomainObjPtr vm,
qemuDomainObjEnterMonitor(driver, vm);
ret = qemuMonitorBlockJob(priv->mon, device, basePath, backingPath,
bandwidth, info, mode, async);
bandwidth, NULL, mode, async);
qemuDomainObjExitMonitor(driver, vm);
if (info && info->type == VIR_DOMAIN_BLOCK_JOB_TYPE_COMMIT &&
disk->mirrorJob == VIR_DOMAIN_BLOCK_JOB_TYPE_ACTIVE_COMMIT)
info->type = disk->mirrorJob;
if (ret < 0) {
if (mode == BLOCK_JOB_ABORT && disk->mirror)
disk->mirrorState = VIR_DOMAIN_DISK_MIRROR_STATE_NONE;
goto endjob;
}
/* Snoop block copy operations, so future cancel operations can
* avoid checking if pivot is safe. */
if (mode == BLOCK_JOB_INFO && ret == 1 && disk->mirror &&
info->cur == info->end && !disk->mirrorState) {
disk->mirrorState = VIR_DOMAIN_DISK_MIRROR_STATE_READY;
save = true;
}
waitjob:
/* If we have made changes to XML due to a copy job, make a best
* effort to save it now. But we can ignore failure, since there
@ -15239,15 +15228,22 @@ qemuDomainBlockJobAbort(virDomainPtr dom, const char *path, unsigned int flags)
return -1;
}
return qemuDomainBlockJobImpl(vm, dom->conn, path, NULL, 0, NULL, BLOCK_JOB_ABORT,
flags);
return qemuDomainBlockJobImpl(vm, dom->conn, path, NULL, 0,
BLOCK_JOB_ABORT, flags);
}
static int
qemuDomainGetBlockJobInfo(virDomainPtr dom, const char *path,
virDomainBlockJobInfoPtr info, unsigned int flags)
{
virQEMUDriverPtr driver = dom->conn->privateData;
qemuDomainObjPrivatePtr priv;
virDomainObjPtr vm;
char *device = NULL;
int idx;
virDomainDiskDefPtr disk;
int ret = -1;
virCheckFlags(0, -1);
if (!(vm = qemuDomObjFromDomain(dom)))
@ -15258,8 +15254,60 @@ qemuDomainGetBlockJobInfo(virDomainPtr dom, const char *path,
return -1;
}
return qemuDomainBlockJobImpl(vm, dom->conn, path, NULL, 0, info, BLOCK_JOB_INFO,
flags);
priv = vm->privateData;
if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKJOB_ASYNC) &&
!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKJOB_SYNC)) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("block jobs not supported with this QEMU binary"));
goto cleanup;
}
if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
goto cleanup;
if (!virDomainObjIsActive(vm)) {
virReportError(VIR_ERR_OPERATION_INVALID, "%s",
_("domain is not running"));
goto endjob;
}
device = qemuDiskPathToAlias(vm, path, &idx);
if (!device)
goto endjob;
disk = vm->def->disks[idx];
qemuDomainObjEnterMonitor(driver, vm);
ret = qemuMonitorBlockJob(priv->mon, device, NULL, NULL, 0,
info, BLOCK_JOB_INFO, true);
qemuDomainObjExitMonitor(driver, vm);
if (ret < 0)
goto endjob;
if (info->type == VIR_DOMAIN_BLOCK_JOB_TYPE_COMMIT &&
disk->mirrorJob == VIR_DOMAIN_BLOCK_JOB_TYPE_ACTIVE_COMMIT)
info->type = disk->mirrorJob;
/* Snoop block copy operations, so future cancel operations can
* avoid checking if pivot is safe. Save the change to XML, but
* we can ignore failure because it is only an optimization. We
* hold the vm lock, so modifying the in-memory representation is
* safe, even if we are a query rather than a modify job. */
if (ret == 1 && disk->mirror &&
info->cur == info->end && !disk->mirrorState) {
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
disk->mirrorState = VIR_DOMAIN_DISK_MIRROR_STATE_READY;
ignore_value(virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm));
virObjectUnref(cfg);
}
endjob:
if (!qemuDomainObjEndJob(driver, vm))
vm = NULL;
cleanup:
if (vm)
virObjectUnlock(vm);
return ret;
}
static int
@ -15277,7 +15325,7 @@ qemuDomainBlockJobSetSpeed(virDomainPtr dom, const char *path,
return -1;
}
return qemuDomainBlockJobImpl(vm, dom->conn, path, NULL, bandwidth, NULL,
return qemuDomainBlockJobImpl(vm, dom->conn, path, NULL, bandwidth,
BLOCK_JOB_SPEED, flags);
}
@ -15486,7 +15534,7 @@ qemuDomainBlockRebase(virDomainPtr dom, const char *path, const char *base,
* everything, including vm cleanup. */
if (!(flags & VIR_DOMAIN_BLOCK_REBASE_COPY))
return qemuDomainBlockJobImpl(vm, dom->conn, path, base, bandwidth,
NULL, BLOCK_JOB_PULL, flags);
BLOCK_JOB_PULL, flags);
/* If we got here, we are doing a block copy rebase. */
if (flags & VIR_DOMAIN_BLOCK_REBASE_COPY_RAW)
@ -15527,7 +15575,7 @@ qemuDomainBlockPull(virDomainPtr dom, const char *path, unsigned long bandwidth,
return -1;
}
return qemuDomainBlockJobImpl(vm, dom->conn, path, NULL, bandwidth, NULL,
return qemuDomainBlockJobImpl(vm, dom->conn, path, NULL, bandwidth,
BLOCK_JOB_PULL, flags);
}