From ef1e024df89cba592cf3c3aecd75ff319111946d Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 17 Sep 2012 11:56:27 -0600 Subject: [PATCH] blockjob: add virDomainBlockCommit A block commit moves data in the opposite direction of block pull. Block pull reduces the chain length by dropping backing files after data has been pulled into the top overlay, and is always safe; block commit reduces the chain length by dropping overlays after data has been committed into the backing file, and any files that depended on base but not on top are invalidated at any point where they have unallocated data that is now pointing to changed contents in base. Both directions are useful, however: a qcow2 layer that is more than 50% allocated will typically be faster with a pull operation, while a qcow2 layer with less than 50% allocation will be faster as a commit operation. Committing across multiple layers can be more efficient than repeatedly committing one layer at a time, but requires extra support from the hypervisor. This API matches Jeff Cody's proposed qemu command 'block-commit': https://lists.gnu.org/archive/html/qemu-devel/2012-09/msg02226.html Jeff's command is still in the works for qemu 1.3, and may gain further enhancements, such as the ability to control on-error handling (it will be comparable to the error handling Paolo is adding to 'drive-mirror', so a similar solution will be needed when I finally propose virDomainBlockCopy with more functionality than the basics supported by virDomainBlockRebase). However, even without qemu support, this API will be useful for _offline_ block commits, by wrapping qemu-img calls and turning them into a block job, so this API is worth committing now. For some examples of how this will be implemented, all starting with the chain: base <- snap1 <- snap2 <- active + These are equivalent: virDomainBlockCommit(dom, disk, NULL, NULL, 0, 0) virDomainBlockCommit(dom, disk, NULL, "active", 0, 0) virDomainBlockCommit(dom, disk, "base", NULL, 0, 0) virDomainBlockCommit(dom, disk, "base", "active", 0, 0) but cannot be implemented for online qemu with round 1 of Jeff's patches; and for offline images, it would require three back-to-back qemu-img invocations unless qemu-img is patched to allow more efficient multi-layer commits; the end result would be 'base' as the active disk with contents from all three other files, where 'snap1' and 'snap2' are invalid right away, and 'active' is invalid once any further changes to 'base' are made. + These are equivalent: virDomainBlockCommit(dom, disk, "snap2", NULL, 0, 0) virDomainBlockCommit(dom, disk, NULL, NULL, 0, _SHALLOW) they cannot be implemented for online qemu, but for offline, it is a matter of 'qemu-img commit active', so that 'snap2' is now the active disk with contents formerly in 'active'. + Similarly: virDomainBlockCommit(dom, disk, "snap2", NULL, 0, _DELETE) for an offline domain will merge 'active' into 'snap2', then delete 'active' to avoid leaving a potentially invalid file around. + This version: virDomainBlockCommit(dom, disk, NULL, "snap2", 0, _SHALLOW) can be implemented online with 'block-commit' passing a base of snap1 and a top of snap2; and can be implemented offline by 'qemu-img commit snap2' followed by 'qemu-img rebase -u -b snap1 active' * include/libvirt/libvirt.h.in (virDomainBlockCommit): New API. * src/libvirt.c (virDomainBlockCommit): Implement it. * src/libvirt_public.syms (LIBVIRT_0.10.2): Export it. * src/driver.h (virDrvDomainBlockCommit): New driver callback. * docs/apibuild.py (CParser.parseSignature): Add exception. --- docs/apibuild.py | 1 + include/libvirt/libvirt.h.in | 20 ++++++ src/driver.h | 5 ++ src/libvirt.c | 114 ++++++++++++++++++++++++++++++++++- src/libvirt_public.syms | 1 + 5 files changed, 139 insertions(+), 2 deletions(-) diff --git a/docs/apibuild.py b/docs/apibuild.py index 3f0ecd939d..ae84be974e 100755 --- a/docs/apibuild.py +++ b/docs/apibuild.py @@ -1759,6 +1759,7 @@ class CParser: "virDomainSetMaxMemory" : (False, ("memory")), "virDomainSetMemory" : (False, ("memory")), "virDomainSetMemoryFlags" : (False, ("memory")), + "virDomainBlockCommit" : (False, ("bandwidth")), "virDomainBlockJobSetSpeed" : (False, ("bandwidth")), "virDomainBlockPull" : (False, ("bandwidth")), "virDomainBlockRebase" : (False, ("bandwidth")), diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in index b12f7e3e49..d9bfb6e76d 100644 --- a/include/libvirt/libvirt.h.in +++ b/include/libvirt/libvirt.h.in @@ -2062,11 +2062,14 @@ int virDomainUpdateDeviceFlags(virDomainPtr domain, * virDomainBlockRebase without flags), job ends on completion * VIR_DOMAIN_BLOCK_JOB_TYPE_COPY: Block Copy (virDomainBlockRebase with * flags), job exists as long as mirroring is active + * VIR_DOMAIN_BLOCK_JOB_TYPE_COMMIT: Block Commit (virDomainBlockCommit), + * job ends on completion */ typedef enum { VIR_DOMAIN_BLOCK_JOB_TYPE_UNKNOWN = 0, VIR_DOMAIN_BLOCK_JOB_TYPE_PULL = 1, VIR_DOMAIN_BLOCK_JOB_TYPE_COPY = 2, + VIR_DOMAIN_BLOCK_JOB_TYPE_COMMIT = 3, #ifdef VIR_ENUM_SENTINELS VIR_DOMAIN_BLOCK_JOB_TYPE_LAST @@ -2131,6 +2134,23 @@ int virDomainBlockRebase(virDomainPtr dom, const char *disk, const char *base, unsigned long bandwidth, unsigned int flags); +/** + * virDomainBlockCommitFlags: + * + * Flags available for virDomainBlockCommit(). + */ +typedef enum { + VIR_DOMAIN_BLOCK_COMMIT_SHALLOW = 1 << 0, /* NULL base means next backing + file, not whole chain */ + VIR_DOMAIN_BLOCK_COMMIT_DELETE = 1 << 1, /* Delete any files that are now + invalid after their contents + have been committed */ +} virDomainBlockCommitFlags; + +int virDomainBlockCommit(virDomainPtr dom, const char *disk, const char *base, + const char *top, unsigned long bandwidth, + unsigned int flags); + /* Block I/O throttling support */ diff --git a/src/driver.h b/src/driver.h index bb470fea09..063bbbfaa0 100644 --- a/src/driver.h +++ b/src/driver.h @@ -832,6 +832,10 @@ typedef int (*virDrvDomainBlockRebase)(virDomainPtr dom, const char *path, const char *base, unsigned long bandwidth, unsigned int flags); +typedef int + (*virDrvDomainBlockCommit)(virDomainPtr dom, const char *disk, + const char *base, const char *top, + unsigned long bandwidth, unsigned int flags); typedef int (*virDrvSetKeepAlive)(virConnectPtr conn, @@ -1071,6 +1075,7 @@ struct _virDriver { virDrvDomainBlockJobSetSpeed domainBlockJobSetSpeed; virDrvDomainBlockPull domainBlockPull; virDrvDomainBlockRebase domainBlockRebase; + virDrvDomainBlockCommit domainBlockCommit; virDrvSetKeepAlive setKeepAlive; virDrvConnectIsAlive isAlive; virDrvNodeSuspendForDuration nodeSuspendForDuration; diff --git a/src/libvirt.c b/src/libvirt.c index dc8f4e481b..5438fe4cd1 100644 --- a/src/libvirt.c +++ b/src/libvirt.c @@ -19027,7 +19027,8 @@ error: * image. This function pulls data for the entire device in the background. * Progress of the operation can be checked with virDomainGetBlockJobInfo() and * the operation can be aborted with virDomainBlockJobAbort(). When finished, - * an asynchronous event is raised to indicate the final status. + * an asynchronous event is raised to indicate the final status. To move + * data in the opposite direction, see virDomainBlockCommit(). * * The @disk parameter is either an unambiguous source name of the * block device (the sub-element, such as @@ -19170,7 +19171,7 @@ int virDomainBlockRebase(virDomainPtr dom, const char *disk, { virConnectPtr conn; - VIR_DOMAIN_DEBUG(dom, "disk=%s, base=%s bandwidth=%lu, flags=%x", + VIR_DOMAIN_DEBUG(dom, "disk=%s, base=%s, bandwidth=%lu, flags=%x", disk, NULLSTR(base), bandwidth, flags); virResetLastError(); @@ -19217,6 +19218,115 @@ error: } +/** + * virDomainBlockCommit: + * @dom: pointer to domain object + * @disk: path to the block device, or device shorthand + * @base: path to backing file to merge into, or NULL for default + * @top: path to file within backing chain that contains data to be merged, + * or NULL to merge all possible data + * @bandwidth: (optional) specify commit bandwidth limit in MiB/s + * @flags: bitwise-OR of virDomainBlockCommitFlags + * + * Commit changes that were made to temporary top-level files within a disk + * image backing file chain into a lower-level base file. In other words, + * take all the difference between @base and @top, and update @base to contain + * that difference; after the commit, any portion of the chain that previously + * depended on @top will now depend on @base, and all files after @base up + * to and including @top will now be invalidated. A typical use of this + * command is to reduce the length of a backing file chain after taking an + * external disk snapshot. To move data in the opposite direction, see + * virDomainBlockPull(). + * + * This command starts a long-running commit block job, whose status may + * be tracked by virDomainBlockJobInfo() with a job type of + * VIR_DOMAIN_BLOCK_JOB_TYPE_COMMIT, and the operation can be aborted with + * virDomainBlockJobAbort(). When finished, an asynchronous event is + * raised to indicate the final status, and the job no longer exists. If + * the job is aborted, it is up to the hypervisor whether starting a new + * job will resume from the same point, or start over. + * + * Be aware that this command may invalidate files even if it is aborted; + * the user is cautioned against relying on the contents of invalidated + * intermediate files such as @top without manually rebasing those files + * to use a backing file of a read-only copy of @base prior to the point + * where the commit operation was started (although such a rebase cannot + * be safely done until the commit has successfully completed). However, + * the domain itself will not have any issues; the active layer remains + * valid throughout the entire commit operation. As a convenience, + * if @flags contains VIR_DOMAIN_BLOCK_COMMIT_DELETE, this command will + * unlink all files that were invalidated, after the commit successfully + * completes. + * + * By default, if @base is NULL, the commit target will be the bottom of + * the backing chain; if @flags contains VIR_DOMAIN_BLOCK_COMMIT_SHALLOW, + * then the immediate backing file of @top will be used instead. If @top + * is NULL, the active image at the top of the chain will be used. Some + * hypervisors place restrictions on how much can be committed, and might + * fail if @base is not the immediate backing file of @top, or if @top is + * the active layer in use by a running domain, or if @top is not the + * top-most file; restrictions may differ for online vs. offline domains. + * + * The @disk parameter is either an unambiguous source name of the + * block device (the sub-element, such as + * "/path/to/image"), or the device target shorthand (the + * sub-element, such as "xvda"). Valid names + * can be found by calling virDomainGetXMLDesc() and inspecting + * elements within //domain/devices/disk. + * + * The maximum bandwidth (in MiB/s) that will be used to do the commit can be + * specified with the bandwidth parameter. If set to 0, libvirt will choose a + * suitable default. Some hypervisors do not support this feature and will + * return an error if bandwidth is not 0; in this case, it might still be + * possible for a later call to virDomainBlockJobSetSpeed() to succeed. + * The actual speed can be determined with virDomainGetBlockJobInfo(). + * + * Returns 0 if the operation has started, -1 on failure. + */ +int virDomainBlockCommit(virDomainPtr dom, const char *disk, + const char *base, const char *top, + unsigned long bandwidth, unsigned int flags) +{ + virConnectPtr conn; + + VIR_DOMAIN_DEBUG(dom, "disk=%s, base=%s, top=%s, bandwidth=%lu, flags=%x", + disk, NULLSTR(base), NULLSTR(top), bandwidth, flags); + + virResetLastError(); + + if (!VIR_IS_CONNECTED_DOMAIN(dom)) { + virLibDomainError(VIR_ERR_INVALID_DOMAIN, __FUNCTION__); + virDispatchError(NULL); + return -1; + } + conn = dom->conn; + + if (dom->conn->flags & VIR_CONNECT_RO) { + virLibDomainError(VIR_ERR_OPERATION_DENIED, __FUNCTION__); + goto error; + } + + virCheckNonNullArgGoto(disk, error); + if (flags & VIR_DOMAIN_BLOCK_COMMIT_SHALLOW) + virCheckNullArgGoto(base, error); + + if (conn->driver->domainBlockCommit) { + int ret; + ret = conn->driver->domainBlockCommit(dom, disk, base, top, bandwidth, + flags); + if (ret < 0) + goto error; + return ret; + } + + virLibDomainError(VIR_ERR_NO_SUPPORT, __FUNCTION__); + +error: + virDispatchError(dom->conn); + return -1; +} + + /** * virDomainOpenGraphics: * @dom: pointer to domain object diff --git a/src/libvirt_public.syms b/src/libvirt_public.syms index 28b92add3c..d965c7f2b8 100644 --- a/src/libvirt_public.syms +++ b/src/libvirt_public.syms @@ -562,6 +562,7 @@ LIBVIRT_0.10.2 { virConnectListAllNWFilters; virConnectListAllSecrets; virConnectListAllStoragePools; + virDomainBlockCommit; virNodeGetMemoryParameters; virNodeSetMemoryParameters; virStoragePoolListAllVolumes;