From aa9f13959972c057a6028cfd4d58b6daaa6e1d8f Mon Sep 17 00:00:00 2001 From: Jiri Denemark Date: Thu, 23 Apr 2015 09:19:12 +0200 Subject: [PATCH] migration: Usable time statistics without requiring NTP virDomainGetJobStats is able to report statistics of a completed migration, however to get usable downtime and total time statistics both hosts have to keep synchronized time. To provide at least some estimation of the times even when NTP daemons are not running on both hosts we can just ignore the time needed to transfer a migration cookie to the destination host. The result will be also inaccurate but a bit more predictable. The total/down time will just be at least what we report. https://bugzilla.redhat.com/show_bug.cgi?id=1213434 --- include/libvirt/libvirt-domain.h | 23 ++++++++++++++++++++++- src/qemu/qemu_domain.c | 15 +++++++++++++++ src/qemu/qemu_domain.h | 10 ++++++++++ src/qemu/qemu_migration.c | 30 +++++++++++++++++------------- tools/virsh-domain.c | 16 ++++++++++++++++ 5 files changed, 80 insertions(+), 14 deletions(-) diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h index 8a4fe531d2..5c0a3822ba 100644 --- a/include/libvirt/libvirt-domain.h +++ b/include/libvirt/libvirt-domain.h @@ -2547,6 +2547,16 @@ int virDomainAbortJob(virDomainPtr dom); */ # define VIR_DOMAIN_JOB_TIME_ELAPSED "time_elapsed" +/** + * VIR_DOMAIN_JOB_TIME_ELAPSED_NET: + * + * virDomainGetJobStats field: time (ms) since the beginning of the + * migration job NOT including the time required to transfer control + * flow from the source host to the destination host, + * as VIR_TYPED_PARAM_ULLONG. + */ +# define VIR_DOMAIN_JOB_TIME_ELAPSED_NET "time_elapsed_net" + /** * VIR_DOMAIN_JOB_TIME_REMAINING: * @@ -2561,10 +2571,21 @@ int virDomainAbortJob(virDomainPtr dom); * VIR_DOMAIN_JOB_DOWNTIME: * * virDomainGetJobStats field: downtime (ms) that is expected to happen - * during migration, as VIR_TYPED_PARAM_ULLONG. + * during migration, as VIR_TYPED_PARAM_ULLONG. The real computed downtime + * between the time guest CPUs were paused and the time they were resumed + * is reported for completed migration. */ # define VIR_DOMAIN_JOB_DOWNTIME "downtime" +/** + * VIR_DOMAIN_JOB_DOWNTIME_NET: + * + * virDomainGetJobStats field: real measured downtime (ms) NOT including + * the time required to transfer control flow from the source host to the + * destination host, as VIR_TYPED_PARAM_ULLONG. + */ +# define VIR_DOMAIN_JOB_DOWNTIME_NET "downtime_net" + /** * VIR_DOMAIN_JOB_SETUP_TIME: * diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 616e635ab5..6c480c6daf 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -288,6 +288,13 @@ qemuDomainJobInfoToParams(qemuDomainJobInfoPtr jobInfo, jobInfo->timeElapsed) < 0) goto error; + if (jobInfo->timeDeltaSet && + jobInfo->timeElapsed > jobInfo->timeDelta && + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_TIME_ELAPSED_NET, + jobInfo->timeElapsed - jobInfo->timeDelta) < 0) + goto error; + if (jobInfo->type == VIR_DOMAIN_JOB_BOUNDED && virTypedParamsAddULLong(&par, &npar, &maxpar, VIR_DOMAIN_JOB_TIME_REMAINING, @@ -300,6 +307,14 @@ qemuDomainJobInfoToParams(qemuDomainJobInfoPtr jobInfo, status->downtime) < 0) goto error; + if (status->downtime_set && + jobInfo->timeDeltaSet && + status->downtime > jobInfo->timeDelta && + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DOWNTIME_NET, + status->downtime - jobInfo->timeDelta) < 0) + goto error; + if (status->setup_time_set && virTypedParamsAddULLong(&par, &npar, &maxpar, VIR_DOMAIN_JOB_SETUP_TIME, diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 382b5c9d2d..d550ae38fe 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -100,9 +100,19 @@ struct _qemuDomainJobInfo { virDomainJobType type; unsigned long long started; /* When the async job started */ unsigned long long stopped; /* When the domain's CPUs were stopped */ + unsigned long long sent; /* When the source sent status info to the + destination (only for migrations). */ + unsigned long long received; /* When the destination host received status + info from the source (migrations only). */ /* Computed values */ unsigned long long timeElapsed; unsigned long long timeRemaining; + long long timeDelta; /* delta = sent - received, i.e., the difference + between the source and the destination time plus + the time between the end of Perform phase on the + source and the beginning of Finish phase on the + destination. */ + bool timeDeltaSet; /* Raw values from QEMU */ qemuMonitorMigrationStatus status; }; diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 42d27cb4fc..ec72cfac1f 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -688,6 +688,9 @@ qemuMigrationCookieStatisticsXMLFormat(virBufferPtr buf, virBufferAsprintf(buf, "%llu\n", jobInfo->started); virBufferAsprintf(buf, "%llu\n", jobInfo->stopped); + virBufferAsprintf(buf, "%llu\n", jobInfo->sent); + if (jobInfo->timeDeltaSet) + virBufferAsprintf(buf, "%lld\n", jobInfo->timeDelta); virBufferAsprintf(buf, "<%1$s>%2$llu\n", VIR_DOMAIN_JOB_TIME_ELAPSED, @@ -1046,11 +1049,15 @@ qemuMigrationCookieStatisticsXMLParse(xmlXPathContextPtr ctxt) virXPathULongLong("string(./started[1])", ctxt, &jobInfo->started); virXPathULongLong("string(./stopped[1])", ctxt, &jobInfo->stopped); + virXPathULongLong("string(./sent[1])", ctxt, &jobInfo->sent); + if (virXPathLongLong("string(./delta[1])", ctxt, &jobInfo->timeDelta) == 0) + jobInfo->timeDeltaSet = true; virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_ELAPSED "[1])", ctxt, &jobInfo->timeElapsed); virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_REMAINING "[1])", ctxt, &jobInfo->timeRemaining); + if (virXPathULongLong("string(./" VIR_DOMAIN_JOB_DOWNTIME "[1])", ctxt, &status->downtime) == 0) status->downtime_set = true; @@ -3438,18 +3445,9 @@ qemuMigrationConfirmPhase(virQEMUDriverPtr driver, /* Update total times with the values sent by the destination daemon */ if (mig->jobInfo) { qemuDomainObjPrivatePtr priv = vm->privateData; - if (priv->job.completed) { - qemuDomainJobInfoPtr jobInfo = priv->job.completed; - if (mig->jobInfo->status.downtime_set) { - jobInfo->status.downtime = mig->jobInfo->status.downtime; - jobInfo->status.downtime_set = true; - } - if (mig->jobInfo->timeElapsed) - jobInfo->timeElapsed = mig->jobInfo->timeElapsed; - } else { - priv->job.completed = mig->jobInfo; - mig->jobInfo = NULL; - } + VIR_FREE(priv->job.completed); + priv->job.completed = mig->jobInfo; + mig->jobInfo = NULL; } if (flags & VIR_MIGRATE_OFFLINE) @@ -4041,6 +4039,7 @@ qemuMigrationRun(virQEMUDriverPtr driver, if (priv->job.completed) { qemuDomainJobInfoUpdateTime(priv->job.completed); qemuDomainJobInfoUpdateDowntime(priv->job.completed); + ignore_value(virTimeMillisNow(&priv->job.completed->sent)); } if (priv->job.current->type == VIR_DOMAIN_JOB_UNBOUNDED) @@ -5164,8 +5163,13 @@ qemuMigrationFinish(virQEMUDriverPtr driver, } if (mig->jobInfo) { - priv->job.completed = mig->jobInfo; + qemuDomainJobInfoPtr jobInfo = mig->jobInfo; + priv->job.completed = jobInfo; mig->jobInfo = NULL; + if (jobInfo->sent && virTimeMillisNow(&jobInfo->received) == 0) { + jobInfo->timeDelta = jobInfo->received - jobInfo->sent; + jobInfo->timeDeltaSet = true; + } } if (!(flags & VIR_MIGRATE_OFFLINE)) { diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c index 4f58f07a6b..a1900508b6 100644 --- a/tools/virsh-domain.c +++ b/tools/virsh-domain.c @@ -5764,6 +5764,15 @@ cmdDomjobinfo(vshControl *ctl, const vshCmd *cmd) } vshPrint(ctl, "%-17s %-12llu ms\n", _("Time elapsed:"), info.timeElapsed); + if ((rc = virTypedParamsGetULLong(params, nparams, + VIR_DOMAIN_JOB_TIME_ELAPSED_NET, + &value)) < 0) { + goto save_error; + } else if (rc) { + vshPrint(ctl, "%-17s %-12llu ms\n", _("Time elapsed w/o network:"), + value); + } + if (info.type == VIR_DOMAIN_JOB_BOUNDED) vshPrint(ctl, "%-17s %-12llu ms\n", _("Time remaining:"), info.timeRemaining); @@ -5852,6 +5861,13 @@ cmdDomjobinfo(vshControl *ctl, const vshCmd *cmd) } } + if ((rc = virTypedParamsGetULLong(params, nparams, + VIR_DOMAIN_JOB_DOWNTIME_NET, + &value)) < 0) + goto save_error; + else if (rc) + vshPrint(ctl, "%-17s %-12llu ms\n", _("Downtime w/o network:"), value); + if ((rc = virTypedParamsGetULLong(params, nparams, VIR_DOMAIN_JOB_SETUP_TIME, &value)) < 0)