From 31a55c7cb4bc37d5b4028466631a4230a4b4a33c Mon Sep 17 00:00:00 2001 From: Peter Krempa Date: Thu, 19 Feb 2015 11:53:42 +0100 Subject: [PATCH] qemu: Properly report error state in qemuDomainGetControlInfo() Previously when a domain would get stuck in a domain job due to a programming mistake we'd report the following control state: $ virsh domcontrol domain occupied (1424343406.150s) The timestamp is invalid as the monitor was not entered for that domain. We can use that to detect that the domain has an active job and report a better error instead: $ virsh domcontrol domain error: internal (locking) error --- include/libvirt/libvirt-domain.h | 27 +++++++++++++++++++++++++-- src/qemu/qemu_driver.c | 15 +++++++++++++-- tools/virsh-domain-monitor.c | 19 +++++++++++++++++++ 3 files changed, 57 insertions(+), 4 deletions(-) diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h index 4dbd7f51a2..a9d3efdd6a 100644 --- a/include/libvirt/libvirt-domain.h +++ b/include/libvirt/libvirt-domain.h @@ -182,13 +182,35 @@ typedef enum { monitored by virDomainGetJobInfo); only limited set of commands may be allowed */ VIR_DOMAIN_CONTROL_OCCUPIED = 2, /* occupied by a running command */ - VIR_DOMAIN_CONTROL_ERROR = 3, /* unusable, domain cannot be fully operated */ + VIR_DOMAIN_CONTROL_ERROR = 3, /* unusable, domain cannot be fully + operated, possible reason is provided + in the details field */ # ifdef VIR_ENUM_SENTINELS VIR_DOMAIN_CONTROL_LAST # endif } virDomainControlState; +/** + * virDomainControlErrorReason: + * + * Reason for the error state. + */ +typedef enum { + VIR_DOMAIN_CONTROL_ERROR_REASON_NONE = 0, /* server didn't provide a + reason */ + VIR_DOMAIN_CONTROL_ERROR_REASON_UNKNOWN = 1, /* unknown reason for the + error */ + VIR_DOMAIN_CONTROL_ERROR_REASON_MONITOR = 2, /* monitor connection is + broken */ + VIR_DOMAIN_CONTROL_ERROR_REASON_INTERNAL = 3, /* error caused due to + internal failure in libvirt + */ +# ifdef VIR_ENUM_SENTINELS + VIR_DOMAIN_CONTROL_ERROR_REASON_LAST +# endif +} virDomainControlErrorReason; + /** * virDomainControlInfo: * @@ -198,7 +220,8 @@ typedef enum { typedef struct _virDomainControlInfo virDomainControlInfo; struct _virDomainControlInfo { unsigned int state; /* control state, one of virDomainControlState */ - unsigned int details; /* state details, currently 0 */ + unsigned int details; /* state details, currently 0 except for ERROR + state (one of virDomainControlErrorReason) */ unsigned long long stateTime; /* for how long (in msec) control interface has been in current state (except for OK and ERROR states) */ diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 84aa0994b2..0e933e8f01 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -2701,6 +2701,7 @@ qemuDomainGetControlInfo(virDomainPtr dom, if (priv->monError) { info->state = VIR_DOMAIN_CONTROL_ERROR; + info->details = VIR_DOMAIN_CONTROL_ERROR_REASON_MONITOR; } else if (priv->job.active) { if (virTimeMillisNow(&info->stateTime) < 0) goto cleanup; @@ -2708,8 +2709,18 @@ qemuDomainGetControlInfo(virDomainPtr dom, info->state = VIR_DOMAIN_CONTROL_JOB; info->stateTime -= priv->job.current->started; } else { - info->state = VIR_DOMAIN_CONTROL_OCCUPIED; - info->stateTime -= priv->monStart; + if (priv->monStart > 0) { + info->state = VIR_DOMAIN_CONTROL_OCCUPIED; + info->stateTime -= priv->monStart; + } else { + /* At this point the domain has an active job, but monitor was + * not entered and the domain object lock is not held thus we + * are stuck in the job forever due to a programming error. + */ + info->state = VIR_DOMAIN_CONTROL_ERROR; + info->details = VIR_DOMAIN_CONTROL_ERROR_REASON_INTERNAL; + info->stateTime = 0; + } } } else { info->state = VIR_DOMAIN_CONTROL_OK; diff --git a/tools/virsh-domain-monitor.c b/tools/virsh-domain-monitor.c index 3c16b3eab9..fbafa7763f 100644 --- a/tools/virsh-domain-monitor.c +++ b/tools/virsh-domain-monitor.c @@ -128,6 +128,21 @@ vshDomainControlStateToString(int state) return str ? _(str) : _("unknown"); } +VIR_ENUM_DECL(vshDomainControlErrorReason) +VIR_ENUM_IMPL(vshDomainControlErrorReason, + VIR_DOMAIN_CONTROL_ERROR_REASON_LAST, + "", + N_("unknown"), + N_("monitor failure"), + N_("internal (locking) error")) + +static const char * +vshDomainControlErrorReasonToString(int reason) +{ + const char *ret = vshDomainControlErrorReasonTypeToString(reason); + return ret ? _(ret) : _("unknown"); +} + VIR_ENUM_DECL(vshDomainState) VIR_ENUM_IMPL(vshDomainState, VIR_DOMAIN_LAST, @@ -815,6 +830,10 @@ cmdDomControl(vshControl *ctl, const vshCmd *cmd) vshPrint(ctl, "%s (%0.3fs)\n", vshDomainControlStateToString(info.state), info.stateTime / 1000.0); + } else if (info.state == VIR_DOMAIN_CONTROL_ERROR && info.details > 0) { + vshPrint(ctl, "%s: %s\n", + vshDomainControlStateToString(info.state), + vshDomainControlErrorReasonToString(info.details)); } else { vshPrint(ctl, "%s\n", vshDomainControlStateToString(info.state));