migration: Usable time statistics without requiring NTP

virDomainGetJobStats is able to report statistics of a completed
migration, however to get usable downtime and total time statistics both
hosts have to keep synchronized time. To provide at least some
estimation of the times even when NTP daemons are not running on both
hosts we can just ignore the time needed to transfer a migration cookie
to the destination host. The result will be also inaccurate but a bit
more predictable. The total/down time will just be at least what we
report.

https://bugzilla.redhat.com/show_bug.cgi?id=1213434
This commit is contained in:
Jiri Denemark 2015-04-23 09:19:12 +02:00
parent da4d7c3069
commit aa9f139599
5 changed files with 80 additions and 14 deletions

View File

@ -2547,6 +2547,16 @@ int virDomainAbortJob(virDomainPtr dom);
*/ */
# define VIR_DOMAIN_JOB_TIME_ELAPSED "time_elapsed" # define VIR_DOMAIN_JOB_TIME_ELAPSED "time_elapsed"
/**
* VIR_DOMAIN_JOB_TIME_ELAPSED_NET:
*
* virDomainGetJobStats field: time (ms) since the beginning of the
* migration job NOT including the time required to transfer control
* flow from the source host to the destination host,
* as VIR_TYPED_PARAM_ULLONG.
*/
# define VIR_DOMAIN_JOB_TIME_ELAPSED_NET "time_elapsed_net"
/** /**
* VIR_DOMAIN_JOB_TIME_REMAINING: * VIR_DOMAIN_JOB_TIME_REMAINING:
* *
@ -2561,10 +2571,21 @@ int virDomainAbortJob(virDomainPtr dom);
* VIR_DOMAIN_JOB_DOWNTIME: * VIR_DOMAIN_JOB_DOWNTIME:
* *
* virDomainGetJobStats field: downtime (ms) that is expected to happen * virDomainGetJobStats field: downtime (ms) that is expected to happen
* during migration, as VIR_TYPED_PARAM_ULLONG. * during migration, as VIR_TYPED_PARAM_ULLONG. The real computed downtime
* between the time guest CPUs were paused and the time they were resumed
* is reported for completed migration.
*/ */
# define VIR_DOMAIN_JOB_DOWNTIME "downtime" # define VIR_DOMAIN_JOB_DOWNTIME "downtime"
/**
* VIR_DOMAIN_JOB_DOWNTIME_NET:
*
* virDomainGetJobStats field: real measured downtime (ms) NOT including
* the time required to transfer control flow from the source host to the
* destination host, as VIR_TYPED_PARAM_ULLONG.
*/
# define VIR_DOMAIN_JOB_DOWNTIME_NET "downtime_net"
/** /**
* VIR_DOMAIN_JOB_SETUP_TIME: * VIR_DOMAIN_JOB_SETUP_TIME:
* *

View File

@ -288,6 +288,13 @@ qemuDomainJobInfoToParams(qemuDomainJobInfoPtr jobInfo,
jobInfo->timeElapsed) < 0) jobInfo->timeElapsed) < 0)
goto error; goto error;
if (jobInfo->timeDeltaSet &&
jobInfo->timeElapsed > jobInfo->timeDelta &&
virTypedParamsAddULLong(&par, &npar, &maxpar,
VIR_DOMAIN_JOB_TIME_ELAPSED_NET,
jobInfo->timeElapsed - jobInfo->timeDelta) < 0)
goto error;
if (jobInfo->type == VIR_DOMAIN_JOB_BOUNDED && if (jobInfo->type == VIR_DOMAIN_JOB_BOUNDED &&
virTypedParamsAddULLong(&par, &npar, &maxpar, virTypedParamsAddULLong(&par, &npar, &maxpar,
VIR_DOMAIN_JOB_TIME_REMAINING, VIR_DOMAIN_JOB_TIME_REMAINING,
@ -300,6 +307,14 @@ qemuDomainJobInfoToParams(qemuDomainJobInfoPtr jobInfo,
status->downtime) < 0) status->downtime) < 0)
goto error; goto error;
if (status->downtime_set &&
jobInfo->timeDeltaSet &&
status->downtime > jobInfo->timeDelta &&
virTypedParamsAddULLong(&par, &npar, &maxpar,
VIR_DOMAIN_JOB_DOWNTIME_NET,
status->downtime - jobInfo->timeDelta) < 0)
goto error;
if (status->setup_time_set && if (status->setup_time_set &&
virTypedParamsAddULLong(&par, &npar, &maxpar, virTypedParamsAddULLong(&par, &npar, &maxpar,
VIR_DOMAIN_JOB_SETUP_TIME, VIR_DOMAIN_JOB_SETUP_TIME,

View File

@ -100,9 +100,19 @@ struct _qemuDomainJobInfo {
virDomainJobType type; virDomainJobType type;
unsigned long long started; /* When the async job started */ unsigned long long started; /* When the async job started */
unsigned long long stopped; /* When the domain's CPUs were stopped */ unsigned long long stopped; /* When the domain's CPUs were stopped */
unsigned long long sent; /* When the source sent status info to the
destination (only for migrations). */
unsigned long long received; /* When the destination host received status
info from the source (migrations only). */
/* Computed values */ /* Computed values */
unsigned long long timeElapsed; unsigned long long timeElapsed;
unsigned long long timeRemaining; unsigned long long timeRemaining;
long long timeDelta; /* delta = sent - received, i.e., the difference
between the source and the destination time plus
the time between the end of Perform phase on the
source and the beginning of Finish phase on the
destination. */
bool timeDeltaSet;
/* Raw values from QEMU */ /* Raw values from QEMU */
qemuMonitorMigrationStatus status; qemuMonitorMigrationStatus status;
}; };

View File

@ -688,6 +688,9 @@ qemuMigrationCookieStatisticsXMLFormat(virBufferPtr buf,
virBufferAsprintf(buf, "<started>%llu</started>\n", jobInfo->started); virBufferAsprintf(buf, "<started>%llu</started>\n", jobInfo->started);
virBufferAsprintf(buf, "<stopped>%llu</stopped>\n", jobInfo->stopped); virBufferAsprintf(buf, "<stopped>%llu</stopped>\n", jobInfo->stopped);
virBufferAsprintf(buf, "<sent>%llu</sent>\n", jobInfo->sent);
if (jobInfo->timeDeltaSet)
virBufferAsprintf(buf, "<delta>%lld</delta>\n", jobInfo->timeDelta);
virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n",
VIR_DOMAIN_JOB_TIME_ELAPSED, VIR_DOMAIN_JOB_TIME_ELAPSED,
@ -1046,11 +1049,15 @@ qemuMigrationCookieStatisticsXMLParse(xmlXPathContextPtr ctxt)
virXPathULongLong("string(./started[1])", ctxt, &jobInfo->started); virXPathULongLong("string(./started[1])", ctxt, &jobInfo->started);
virXPathULongLong("string(./stopped[1])", ctxt, &jobInfo->stopped); virXPathULongLong("string(./stopped[1])", ctxt, &jobInfo->stopped);
virXPathULongLong("string(./sent[1])", ctxt, &jobInfo->sent);
if (virXPathLongLong("string(./delta[1])", ctxt, &jobInfo->timeDelta) == 0)
jobInfo->timeDeltaSet = true;
virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_ELAPSED "[1])", virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_ELAPSED "[1])",
ctxt, &jobInfo->timeElapsed); ctxt, &jobInfo->timeElapsed);
virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_REMAINING "[1])", virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_REMAINING "[1])",
ctxt, &jobInfo->timeRemaining); ctxt, &jobInfo->timeRemaining);
if (virXPathULongLong("string(./" VIR_DOMAIN_JOB_DOWNTIME "[1])", if (virXPathULongLong("string(./" VIR_DOMAIN_JOB_DOWNTIME "[1])",
ctxt, &status->downtime) == 0) ctxt, &status->downtime) == 0)
status->downtime_set = true; status->downtime_set = true;
@ -3438,19 +3445,10 @@ qemuMigrationConfirmPhase(virQEMUDriverPtr driver,
/* Update total times with the values sent by the destination daemon */ /* Update total times with the values sent by the destination daemon */
if (mig->jobInfo) { if (mig->jobInfo) {
qemuDomainObjPrivatePtr priv = vm->privateData; qemuDomainObjPrivatePtr priv = vm->privateData;
if (priv->job.completed) { VIR_FREE(priv->job.completed);
qemuDomainJobInfoPtr jobInfo = priv->job.completed;
if (mig->jobInfo->status.downtime_set) {
jobInfo->status.downtime = mig->jobInfo->status.downtime;
jobInfo->status.downtime_set = true;
}
if (mig->jobInfo->timeElapsed)
jobInfo->timeElapsed = mig->jobInfo->timeElapsed;
} else {
priv->job.completed = mig->jobInfo; priv->job.completed = mig->jobInfo;
mig->jobInfo = NULL; mig->jobInfo = NULL;
} }
}
if (flags & VIR_MIGRATE_OFFLINE) if (flags & VIR_MIGRATE_OFFLINE)
goto done; goto done;
@ -4041,6 +4039,7 @@ qemuMigrationRun(virQEMUDriverPtr driver,
if (priv->job.completed) { if (priv->job.completed) {
qemuDomainJobInfoUpdateTime(priv->job.completed); qemuDomainJobInfoUpdateTime(priv->job.completed);
qemuDomainJobInfoUpdateDowntime(priv->job.completed); qemuDomainJobInfoUpdateDowntime(priv->job.completed);
ignore_value(virTimeMillisNow(&priv->job.completed->sent));
} }
if (priv->job.current->type == VIR_DOMAIN_JOB_UNBOUNDED) if (priv->job.current->type == VIR_DOMAIN_JOB_UNBOUNDED)
@ -5164,8 +5163,13 @@ qemuMigrationFinish(virQEMUDriverPtr driver,
} }
if (mig->jobInfo) { if (mig->jobInfo) {
priv->job.completed = mig->jobInfo; qemuDomainJobInfoPtr jobInfo = mig->jobInfo;
priv->job.completed = jobInfo;
mig->jobInfo = NULL; mig->jobInfo = NULL;
if (jobInfo->sent && virTimeMillisNow(&jobInfo->received) == 0) {
jobInfo->timeDelta = jobInfo->received - jobInfo->sent;
jobInfo->timeDeltaSet = true;
}
} }
if (!(flags & VIR_MIGRATE_OFFLINE)) { if (!(flags & VIR_MIGRATE_OFFLINE)) {

View File

@ -5764,6 +5764,15 @@ cmdDomjobinfo(vshControl *ctl, const vshCmd *cmd)
} }
vshPrint(ctl, "%-17s %-12llu ms\n", _("Time elapsed:"), info.timeElapsed); vshPrint(ctl, "%-17s %-12llu ms\n", _("Time elapsed:"), info.timeElapsed);
if ((rc = virTypedParamsGetULLong(params, nparams,
VIR_DOMAIN_JOB_TIME_ELAPSED_NET,
&value)) < 0) {
goto save_error;
} else if (rc) {
vshPrint(ctl, "%-17s %-12llu ms\n", _("Time elapsed w/o network:"),
value);
}
if (info.type == VIR_DOMAIN_JOB_BOUNDED) if (info.type == VIR_DOMAIN_JOB_BOUNDED)
vshPrint(ctl, "%-17s %-12llu ms\n", _("Time remaining:"), vshPrint(ctl, "%-17s %-12llu ms\n", _("Time remaining:"),
info.timeRemaining); info.timeRemaining);
@ -5852,6 +5861,13 @@ cmdDomjobinfo(vshControl *ctl, const vshCmd *cmd)
} }
} }
if ((rc = virTypedParamsGetULLong(params, nparams,
VIR_DOMAIN_JOB_DOWNTIME_NET,
&value)) < 0)
goto save_error;
else if (rc)
vshPrint(ctl, "%-17s %-12llu ms\n", _("Downtime w/o network:"), value);
if ((rc = virTypedParamsGetULLong(params, nparams, if ((rc = virTypedParamsGetULLong(params, nparams,
VIR_DOMAIN_JOB_SETUP_TIME, VIR_DOMAIN_JOB_SETUP_TIME,
&value)) < 0) &value)) < 0)