blockjob: support pivot operation on cancel

This is the bare minimum to end a copy job (of course, until a
later patch adds the ability to start a copy job, this patch
doesn't do much in isolation; I've just split the patches to
ease the review).

This patch intentionally avoids SELinux, lock manager, and audit
actions.  Also, if libvirtd restarts at the exact moment that a
'block-job-complete' is in flight, the proposed proper way to
detect the outcome of that would be with a persistent bitmap and
some additional query commands when libvirtd restarts.  This
patch is enough to test the common case of success when used
correctly, while saving the subtleties of proper cleanup for
worst-case errors for later.

When a mirror job is started, cancelling the job safely reverts back
to the source disk, regardless of whether the destination is in
phase 1 (streaming, in which case the destination is worthless) or
phase 2 (mirroring, in which case the destination is synced up to
the source at the time of the cancel).  Our existing code does just
fine in either phase, other than some bookkeeping cleanup; this
implements live block copy.

Ideas for future enhancements via new flags:

Depending on when persistent bitmap support is added, it may be
worth adding a VIR_DOMAIN_REBASE_COPY_ATOMIC flag that fails up
front if we detect an older qemu with risky pivot operation.

Interesting side note: while snapshot-create --disk-only creates a
copy of the disk at a point in time by moving the domain on to a
new file (the copy is the file now in the just-extended backing
chain), blockjob --abort of a copy job creates a copy of the disk
while keeping the domain on the original file.  There may be
potential improvements to the snapshot code to exploit block copy
over multiple disks all at one point in time.  And, if
'block-job-cancel' were made part of 'transaction', you could
copy multiple disks at the same point in time without pausing
the domain.  This also implies we may want to add a --quiesce flag
to virDomainBlockJobAbort, so that when breaking a mirror (whether
by cancel or pivot), the side of the mirror that we are abandoning
is at least in a stable state with regards to guest I/O.

* src/qemu/qemu_driver.c (qemuDomainBlockJobAbort): Accept new flag.
(qemuDomainBlockPivot): New helper function.
(qemuDomainBlockJobImpl): Implement it.
This commit is contained in:
Eric Blake 2012-04-16 16:19:19 -06:00
parent edecd45c78
commit eaba79d22e

View File

@ -12548,6 +12548,81 @@ cleanup:
return ret;
}
/* Called while holding the VM job lock, to implement a block job
* abort with pivot; this updates the VM definition as appropriate, on
* either success or failure. */
static int
qemuDomainBlockPivot(struct qemud_driver *driver, virDomainObjPtr vm,
const char *device, virDomainDiskDefPtr disk)
{
int ret = -1;
qemuDomainObjPrivatePtr priv = vm->privateData;
virDomainBlockJobInfo info;
const char *format = virStorageFileFormatTypeToString(disk->mirrorFormat);
/* Probe the status, if needed. */
if (!disk->mirroring) {
qemuDomainObjEnterMonitorWithDriver(driver, vm);
ret = qemuMonitorBlockJob(priv->mon, device, NULL, 0, &info,
BLOCK_JOB_INFO, true);
qemuDomainObjExitMonitorWithDriver(driver, vm);
if (ret < 0)
goto cleanup;
if (!virDomainObjIsActive(vm)) {
virReportError(VIR_ERR_OPERATION_INVALID, "%s",
_("domain is not running"));
goto cleanup;
}
if (ret == 1 && info.cur == info.end &&
info.type == VIR_DOMAIN_BLOCK_JOB_TYPE_COPY)
disk->mirroring = true;
}
if (!disk->mirroring) {
virReportError(VIR_ERR_BLOCK_COPY_ACTIVE,
_("disk '%s' not ready for pivot yet"),
disk->dst);
goto cleanup;
}
/* Attempt the pivot. */
qemuDomainObjEnterMonitorWithDriver(driver, vm);
ret = qemuMonitorDrivePivot(priv->mon, device, disk->mirror, format);
qemuDomainObjExitMonitorWithDriver(driver, vm);
if (ret == 0) {
/* XXX We want to revoke security labels and disk lease, as
* well as audit that revocation, before dropping the original
* source. But it gets tricky if both source and mirror share
* common backing files (we want to only revoke the non-shared
* portion of the chain, and is made more difficult by the
* fact that we aren't tracking the full chain ourselves; so
* for now, we leak the access to the original. */
VIR_FREE(disk->src);
disk->src = disk->mirror;
disk->format = disk->mirrorFormat;
disk->mirror = NULL;
disk->mirrorFormat = VIR_STORAGE_FILE_NONE;
disk->mirroring = false;
qemuDomainDetermineDiskChain(driver, disk, true);
} else {
/* On failure, qemu abandons the mirror, and reverts back to
* the source disk (RHEL 6.3 has a bug where the revert could
* cause catastrophic failure in qemu, but we don't need to
* worry about it here as it is not an upstream qemu problem. */
/* XXX should we be parsing the exact qemu error, or calling
* 'query-block', to see what state we really got left in
* before killing the mirroring job? And just as on the
* success case, there's security labeling to worry about. */
VIR_FREE(disk->mirror);
disk->mirrorFormat = VIR_STORAGE_FILE_NONE;
disk->mirroring = false;
}
cleanup:
return ret;
}
static int
qemuDomainBlockJobImpl(virDomainPtr dom, const char *path, const char *base,
unsigned long bandwidth, virDomainBlockJobInfoPtr info,
@ -12608,6 +12683,14 @@ qemuDomainBlockJobImpl(virDomainPtr dom, const char *path, const char *base,
disk->dst);
goto cleanup;
}
if (mode == BLOCK_JOB_ABORT &&
(flags & VIR_DOMAIN_BLOCK_JOB_ABORT_PIVOT) &&
!(async && disk->mirror)) {
virReportError(VIR_ERR_OPERATION_INVALID,
_("pivot of disk '%s' requires an active copy job"),
disk->dst);
goto cleanup;
}
if (qemuDomainObjBeginJobWithDriver(driver, vm, QEMU_JOB_MODIFY) < 0)
goto cleanup;
@ -12618,6 +12701,12 @@ qemuDomainBlockJobImpl(virDomainPtr dom, const char *path, const char *base,
goto endjob;
}
if (disk->mirror && mode == BLOCK_JOB_ABORT &&
(flags & VIR_DOMAIN_BLOCK_JOB_ABORT_PIVOT)) {
ret = qemuDomainBlockPivot(driver, vm, device, disk);
goto endjob;
}
qemuDomainObjEnterMonitorWithDriver(driver, vm);
/* XXX - libvirt should really be tracking the backing file chain
* itself, and validating that base is on the chain, rather than
@ -12634,6 +12723,15 @@ qemuDomainBlockJobImpl(virDomainPtr dom, const char *path, const char *base,
info->cur == info->end && info->type == VIR_DOMAIN_BLOCK_JOB_TYPE_COPY)
disk->mirroring = true;
/* A successful block job cancelation stops any mirroring. */
if (mode == BLOCK_JOB_ABORT && disk->mirror) {
/* XXX We should also revoke security labels and disk lease on
* the mirror, and audit that fact, before dropping things. */
VIR_FREE(disk->mirror);
disk->mirrorFormat = VIR_STORAGE_FILE_NONE;
disk->mirroring = false;
}
/* With synchronous block cancel, we must synthesize an event, and
* we silently ignore the ABORT_ASYNC flag. With asynchronous
* block cancel, the event will come from qemu, but without the
@ -12698,7 +12796,8 @@ cleanup:
static int
qemuDomainBlockJobAbort(virDomainPtr dom, const char *path, unsigned int flags)
{
virCheckFlags(VIR_DOMAIN_BLOCK_JOB_ABORT_ASYNC, -1);
virCheckFlags(VIR_DOMAIN_BLOCK_JOB_ABORT_ASYNC |
VIR_DOMAIN_BLOCK_JOB_ABORT_PIVOT, -1);
return qemuDomainBlockJobImpl(dom, path, NULL, 0, NULL, BLOCK_JOB_ABORT,
flags);
}