From f15cad2916f0ac80a2f3659c3ba8294857283ed1 Mon Sep 17 00:00:00 2001 From: Wen Congyang Date: Tue, 25 Jan 2011 18:14:45 +0800 Subject: [PATCH] Force guest suspend at timeout If the memory of guest OS is changed constantly, the live migration can not be ended ever for ever. We can use the command 'virsh migrate-setmaxdowntime' to control the live migration. But the value of maxdowntime is diffcult to calculate because it depends on the transfer speed of network and constantly changing memroy size. We need a easy way to control the live migration. This patch adds the support of forcing guest to suspend at timeout. With this patch, when we migrate the guest OS, we can specify a timeout. If the live migration timeouts, auto-suspend the guest OS, where the migration will complete offline. --- tools/virsh.c | 38 ++++++++++++++++++++++++++++++++++++++ tools/virsh.pod | 5 ++++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/tools/virsh.c b/tools/virsh.c index 8f3c59650d..cd54174eb5 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -3418,6 +3418,7 @@ static const vshCmdOptDef opts_migrate[] = { {"desturi", VSH_OT_DATA, VSH_OFLAG_REQ, N_("connection URI of the destination host as seen from the client(normal migration) or source(p2p migration)")}, {"migrateuri", VSH_OT_DATA, 0, N_("migration URI, usually can be omitted")}, {"dname", VSH_OT_DATA, 0, N_("rename to new name during migration (if supported)")}, + {"timeout", VSH_OT_INT, 0, N_("force guest to suspend if live migration exceeds timeout (in seconds)")}, {NULL, 0, 0, NULL} }; @@ -3548,12 +3549,16 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) int ret = -1; virThread workerThread; struct pollfd pollfd; + int found; char retchar; struct sigaction sig_action; struct sigaction old_sig_action; virDomainJobInfo jobinfo; bool verbose = false; sigset_t sigmask, oldsigmask; + int timeout; + struct timeval start, curr; + bool live_flag = false; vshCtrlData data; @@ -3563,6 +3568,29 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) if (vshCommandOptBool (cmd, "verbose")) verbose = true; + if (vshCommandOptBool (cmd, "live")) + live_flag = TRUE; + timeout = vshCommandOptInt(cmd, "timeout", &found); + if (found) { + if (! live_flag) { + vshError(ctl, "%s", _("migrate: Unexpected timeout for offline migration")); + goto cleanup; + } + + if (timeout < 1) { + vshError(ctl, "%s", _("migrate: Invalid timeout")); + goto cleanup; + } + + /* Ensure that we can multiply by 1000 without overflowing. */ + if (timeout > INT_MAX / 1000) { + vshError(ctl, "%s", _("migrate: Timeout is too big")); + goto cleanup; + } + } else { + timeout = 0; + } + if (pipe(p) < 0) goto cleanup; @@ -3588,6 +3616,7 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) sigemptyset(&sigmask); sigaddset(&sigmask, SIGINT); + GETTIMEOFDAY(&start); while (1) { repoll: ret = poll(&pollfd, 1, 500); @@ -3618,6 +3647,15 @@ repoll: break; } + GETTIMEOFDAY(&curr); + if ( timeout && ((int)(curr.tv_sec - start.tv_sec) * 1000 + \ + (int)(curr.tv_usec - start.tv_usec) / 1000) > timeout * 1000 ) { + /* suspend the domain when migration timeouts. */ + vshDebug(ctl, 5, "suspend the domain when migration timeouts\n"); + virDomainSuspend(dom); + timeout = 0; + } + if (verbose) { pthread_sigmask(SIG_BLOCK, &sigmask, &oldsigmask); ret = virDomainGetJobInfo(dom, &jobinfo); diff --git a/tools/virsh.pod b/tools/virsh.pod index 3128d01457..c00cc8aba2 100644 --- a/tools/virsh.pod +++ b/tools/virsh.pod @@ -490,7 +490,7 @@ type attribute for the element of XML. =item B optional I<--live> I<--p2p> I<--direct> I<--tunnelled> I<--persistent> I<--undefinesource> I<--suspend> I<--copy-storage-all> I<--copy-storage-inc> I<--verbose> I I I -I +I I<--timeout> Migrate domain to another host. Add I<--live> for live migration; I<--p2p> for peer-2-peer migration; I<--direct> for direct migration; or I<--tunnelled> @@ -507,6 +507,9 @@ I is the migration URI, which usually can be omitted. I is used for renaming the domain to new name during migration, which also usually can be omitted. +I<--timeout> forces guest to suspend when live migration exceeds timeout, and +then the migration will complete offline. It can only be used with I<--live>. + B: The I parameter for normal migration and peer2peer migration has different semantics: