migration: Make erroring out on I/O error controllable by flag

Paolo Bonzini pointed out that it's actually possible to migrate a qemu
instance that was paused due to I/O error and it will be able to work on
the destination if the storage is accessible.

This patch introduces flag VIR_MIGRATE_ABORT_ON_ERROR that cancels the
migration in case an I/O error happens while it's being performed and
allows migration without this flag. This flag can be possibly used for
other error reasons that may be introduced in the future.
This commit is contained in:
Peter Krempa 2013-06-12 16:11:21 +02:00 committed by Jiri Denemark
parent ddf8ad82eb
commit cf6d56ac43
6 changed files with 32 additions and 15 deletions

View File

@ -1188,6 +1188,7 @@ typedef enum {
VIR_MIGRATE_UNSAFE = (1 << 9), /* force migration even if it is considered unsafe */ VIR_MIGRATE_UNSAFE = (1 << 9), /* force migration even if it is considered unsafe */
VIR_MIGRATE_OFFLINE = (1 << 10), /* offline migrate */ VIR_MIGRATE_OFFLINE = (1 << 10), /* offline migrate */
VIR_MIGRATE_COMPRESSED = (1 << 11), /* compress data during migration */ VIR_MIGRATE_COMPRESSED = (1 << 11), /* compress data during migration */
VIR_MIGRATE_ABORT_ON_ERROR = (1 << 12), /* abort migration on I/O errors happened during migration */
} virDomainMigrateFlags; } virDomainMigrateFlags;
/* Domain migration. */ /* Domain migration. */

View File

@ -2819,7 +2819,7 @@ qemuDomainSaveInternal(virQEMUDriverPtr driver, virDomainPtr dom,
if (!(caps = virQEMUDriverGetCapabilities(driver, false))) if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
goto cleanup; goto cleanup;
if (!qemuMigrationIsAllowed(driver, vm, vm->def, false)) if (!qemuMigrationIsAllowed(driver, vm, vm->def, false, false))
goto cleanup; goto cleanup;
if (qemuDomainObjBeginAsyncJob(driver, vm, if (qemuDomainObjBeginAsyncJob(driver, vm,
@ -11670,7 +11670,7 @@ qemuDomainSnapshotCreateActiveExternal(virConnectPtr conn,
/* do the memory snapshot if necessary */ /* do the memory snapshot if necessary */
if (memory) { if (memory) {
/* check if migration is possible */ /* check if migration is possible */
if (!qemuMigrationIsAllowed(driver, vm, vm->def, false)) if (!qemuMigrationIsAllowed(driver, vm, vm->def, false, false))
goto endjob; goto endjob;
/* allow the migration job to be cancelled or the domain to be paused */ /* allow the migration job to be cancelled or the domain to be paused */

View File

@ -1420,7 +1420,7 @@ cleanup:
* the fact that older servers did not do checks on the source. */ * the fact that older servers did not do checks on the source. */
bool bool
qemuMigrationIsAllowed(virQEMUDriverPtr driver, virDomainObjPtr vm, qemuMigrationIsAllowed(virQEMUDriverPtr driver, virDomainObjPtr vm,
virDomainDefPtr def, bool remote) virDomainDefPtr def, bool remote, bool abort_on_error)
{ {
int nsnapshots; int nsnapshots;
int pauseReason; int pauseReason;
@ -1448,7 +1448,8 @@ qemuMigrationIsAllowed(virQEMUDriverPtr driver, virDomainObjPtr vm,
} }
/* cancel migration if disk I/O error is emitted while migrating */ /* cancel migration if disk I/O error is emitted while migrating */
if (virDomainObjGetState(vm, &pauseReason) == VIR_DOMAIN_PAUSED && if (abort_on_error &&
virDomainObjGetState(vm, &pauseReason) == VIR_DOMAIN_PAUSED &&
pauseReason == VIR_DOMAIN_PAUSED_IOERROR) { pauseReason == VIR_DOMAIN_PAUSED_IOERROR) {
virReportError(VIR_ERR_OPERATION_INVALID, "%s", virReportError(VIR_ERR_OPERATION_INVALID, "%s",
_("cannot migrate domain with I/O error")); _("cannot migrate domain with I/O error"));
@ -1709,7 +1710,7 @@ qemuMigrationUpdateJobStatus(virQEMUDriverPtr driver,
static int static int
qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, virDomainObjPtr vm, qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, virDomainObjPtr vm,
enum qemuDomainAsyncJob asyncJob, enum qemuDomainAsyncJob asyncJob,
virConnectPtr dconn) virConnectPtr dconn, bool abort_on_error)
{ {
qemuDomainObjPrivatePtr priv = vm->privateData; qemuDomainObjPrivatePtr priv = vm->privateData;
const char *job; const char *job;
@ -1736,7 +1737,7 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, virDomainObjPtr vm,
struct timespec ts = { .tv_sec = 0, .tv_nsec = 50 * 1000 * 1000ull }; struct timespec ts = { .tv_sec = 0, .tv_nsec = 50 * 1000 * 1000ull };
/* cancel migration if disk I/O error is emitted while migrating */ /* cancel migration if disk I/O error is emitted while migrating */
if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT && if (abort_on_error &&
virDomainObjGetState(vm, &pauseReason) == VIR_DOMAIN_PAUSED && virDomainObjGetState(vm, &pauseReason) == VIR_DOMAIN_PAUSED &&
pauseReason == VIR_DOMAIN_PAUSED_IOERROR) pauseReason == VIR_DOMAIN_PAUSED_IOERROR)
goto cancel; goto cancel;
@ -1937,6 +1938,7 @@ char *qemuMigrationBegin(virQEMUDriverPtr driver,
qemuDomainObjPrivatePtr priv = vm->privateData; qemuDomainObjPrivatePtr priv = vm->privateData;
virCapsPtr caps = NULL; virCapsPtr caps = NULL;
unsigned int cookieFlags = QEMU_MIGRATION_COOKIE_LOCKSTATE; unsigned int cookieFlags = QEMU_MIGRATION_COOKIE_LOCKSTATE;
bool abort_on_error = !!(flags & VIR_MIGRATE_ABORT_ON_ERROR);
VIR_DEBUG("driver=%p, vm=%p, xmlin=%s, dname=%s," VIR_DEBUG("driver=%p, vm=%p, xmlin=%s, dname=%s,"
" cookieout=%p, cookieoutlen=%p, flags=%lx", " cookieout=%p, cookieoutlen=%p, flags=%lx",
@ -1953,7 +1955,7 @@ char *qemuMigrationBegin(virQEMUDriverPtr driver,
if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT) if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT)
qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_BEGIN3); qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_BEGIN3);
if (!qemuMigrationIsAllowed(driver, vm, NULL, true)) if (!qemuMigrationIsAllowed(driver, vm, NULL, true, abort_on_error))
goto cleanup; goto cleanup;
if (!(flags & VIR_MIGRATE_UNSAFE) && !qemuMigrationIsSafe(vm->def)) if (!(flags & VIR_MIGRATE_UNSAFE) && !qemuMigrationIsSafe(vm->def))
@ -2069,6 +2071,7 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver,
virCapsPtr caps = NULL; virCapsPtr caps = NULL;
const char *listenAddr = NULL; const char *listenAddr = NULL;
char *migrateFrom = NULL; char *migrateFrom = NULL;
bool abort_on_error = !!(flags & VIR_MIGRATE_ABORT_ON_ERROR);
if (virTimeMillisNow(&now) < 0) if (virTimeMillisNow(&now) < 0)
return -1; return -1;
@ -2098,7 +2101,7 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver,
if (!(caps = virQEMUDriverGetCapabilities(driver, false))) if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
goto cleanup; goto cleanup;
if (!qemuMigrationIsAllowed(driver, NULL, *def, true)) if (!qemuMigrationIsAllowed(driver, NULL, *def, true, abort_on_error))
goto cleanup; goto cleanup;
/* Let migration hook filter domain XML */ /* Let migration hook filter domain XML */
@ -2795,6 +2798,7 @@ qemuMigrationRun(virQEMUDriverPtr driver,
unsigned long migrate_speed = resource ? resource : priv->migMaxBandwidth; unsigned long migrate_speed = resource ? resource : priv->migMaxBandwidth;
virErrorPtr orig_err = NULL; virErrorPtr orig_err = NULL;
unsigned int cookieFlags = 0; unsigned int cookieFlags = 0;
bool abort_on_error = !!(flags & VIR_MIGRATE_ABORT_ON_ERROR);
VIR_DEBUG("driver=%p, vm=%p, cookiein=%s, cookieinlen=%d, " VIR_DEBUG("driver=%p, vm=%p, cookiein=%s, cookieinlen=%d, "
"cookieout=%p, cookieoutlen=%p, flags=%lx, resource=%lu, " "cookieout=%p, cookieoutlen=%p, flags=%lx, resource=%lu, "
@ -2947,7 +2951,7 @@ qemuMigrationRun(virQEMUDriverPtr driver,
if (qemuMigrationWaitForCompletion(driver, vm, if (qemuMigrationWaitForCompletion(driver, vm,
QEMU_ASYNC_JOB_MIGRATION_OUT, QEMU_ASYNC_JOB_MIGRATION_OUT,
dconn) < 0) dconn, abort_on_error) < 0)
goto cleanup; goto cleanup;
/* When migration completed, QEMU will have paused the /* When migration completed, QEMU will have paused the
@ -3628,6 +3632,7 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver,
int resume = 0; int resume = 0;
virErrorPtr orig_err = NULL; virErrorPtr orig_err = NULL;
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
bool abort_on_error = !!(flags & VIR_MIGRATE_ABORT_ON_ERROR);
if (qemuMigrationJobStart(driver, vm, QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) if (qemuMigrationJobStart(driver, vm, QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
goto cleanup; goto cleanup;
@ -3638,7 +3643,7 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver,
goto endjob; goto endjob;
} }
if (!qemuMigrationIsAllowed(driver, vm, NULL, true)) if (!qemuMigrationIsAllowed(driver, vm, NULL, true, abort_on_error))
goto endjob; goto endjob;
if (!(flags & VIR_MIGRATE_UNSAFE) && !qemuMigrationIsSafe(vm->def)) if (!(flags & VIR_MIGRATE_UNSAFE) && !qemuMigrationIsSafe(vm->def))
@ -4338,7 +4343,7 @@ qemuMigrationToFile(virQEMUDriverPtr driver, virDomainObjPtr vm,
if (rc < 0) if (rc < 0)
goto cleanup; goto cleanup;
rc = qemuMigrationWaitForCompletion(driver, vm, asyncJob, NULL); rc = qemuMigrationWaitForCompletion(driver, vm, asyncJob, NULL, false);
if (rc < 0) if (rc < 0)
goto cleanup; goto cleanup;

View File

@ -38,7 +38,8 @@
VIR_MIGRATE_CHANGE_PROTECTION | \ VIR_MIGRATE_CHANGE_PROTECTION | \
VIR_MIGRATE_UNSAFE | \ VIR_MIGRATE_UNSAFE | \
VIR_MIGRATE_OFFLINE | \ VIR_MIGRATE_OFFLINE | \
VIR_MIGRATE_COMPRESSED) VIR_MIGRATE_COMPRESSED | \
VIR_MIGRATE_ABORT_ON_ERROR)
enum qemuMigrationJobPhase { enum qemuMigrationJobPhase {
QEMU_MIGRATION_PHASE_NONE = 0, QEMU_MIGRATION_PHASE_NONE = 0,
@ -147,7 +148,8 @@ int qemuMigrationConfirm(virQEMUDriverPtr driver,
int retcode); int retcode);
bool qemuMigrationIsAllowed(virQEMUDriverPtr driver, virDomainObjPtr vm, bool qemuMigrationIsAllowed(virQEMUDriverPtr driver, virDomainObjPtr vm,
virDomainDefPtr def, bool remote); virDomainDefPtr def, bool remote,
bool abort_on_error);
int qemuMigrationToFile(virQEMUDriverPtr driver, virDomainObjPtr vm, int qemuMigrationToFile(virQEMUDriverPtr driver, virDomainObjPtr vm,
int fd, off_t offset, const char *path, int fd, off_t offset, const char *path,

View File

@ -8306,6 +8306,10 @@ static const vshCmdOptDef opts_migrate[] = {
.type = VSH_OT_BOOL, .type = VSH_OT_BOOL,
.help = N_("compress repeated pages during live migration") .help = N_("compress repeated pages during live migration")
}, },
{.name = "abort-on-error",
.type = VSH_OT_BOOL,
.help = N_("abort on soft errors during migration")
},
{.name = "domain", {.name = "domain",
.type = VSH_OT_DATA, .type = VSH_OT_DATA,
.flags = VSH_OFLAG_REQ, .flags = VSH_OFLAG_REQ,
@ -8399,6 +8403,9 @@ doMigrate(void *opaque)
flags |= VIR_MIGRATE_OFFLINE; flags |= VIR_MIGRATE_OFFLINE;
} }
if (vshCommandOptBool(cmd, "abort-on-error"))
flags |= VIR_MIGRATE_ABORT_ON_ERROR;
if (xmlfile && if (xmlfile &&
virFileReadAll(xmlfile, 8192, &xml) < 0) { virFileReadAll(xmlfile, 8192, &xml) < 0) {
vshError(ctl, _("file '%s' doesn't exist"), xmlfile); vshError(ctl, _("file '%s' doesn't exist"), xmlfile);

View File

@ -1043,7 +1043,8 @@ stats.
=item B<migrate> [I<--live>] [I<--offline>] [I<--direct>] [I<--p2p> [I<--tunnelled>]] =item B<migrate> [I<--live>] [I<--offline>] [I<--direct>] [I<--p2p> [I<--tunnelled>]]
[I<--persistent>] [I<--undefinesource>] [I<--suspend>] [I<--copy-storage-all>] [I<--persistent>] [I<--undefinesource>] [I<--suspend>] [I<--copy-storage-all>]
[I<--copy-storage-inc>] [I<--change-protection>] [I<--unsafe>] [I<--verbose>] [I<--copy-storage-inc>] [I<--change-protection>] [I<--unsafe>] [I<--verbose>]
[I<--compressed>] I<domain> I<desturi> [I<migrateuri>] [I<dname>] [I<--compressed>] [I<--abort-on-error>]
I<domain> I<desturi> [I<migrateuri>] [I<dname>]
[I<--timeout> B<seconds>] [I<--xml> B<file>] [I<--timeout> B<seconds>] [I<--xml> B<file>]
Migrate domain to another host. Add I<--live> for live migration; <--p2p> Migrate domain to another host. Add I<--live> for live migration; <--p2p>
@ -1066,7 +1067,8 @@ is implicitly enabled when supported by the hypervisor, but can be explicitly
used to reject the migration if the hypervisor lacks change protection used to reject the migration if the hypervisor lacks change protection
support. I<--verbose> displays the progress of migration. I<--compressed> support. I<--verbose> displays the progress of migration. I<--compressed>
activates compression of memory pages that have to be transferred repeatedly activates compression of memory pages that have to be transferred repeatedly
during live migration. during live migration. I<--abort-on-error> cancels the migration if a soft
error (for example I/O error) happens during the migration.
B<Note>: Individual hypervisors usually do not support all possible types of B<Note>: Individual hypervisors usually do not support all possible types of
migration. For example, QEMU does not support direct migration. migration. For example, QEMU does not support direct migration.