mirror of
https://gitlab.com/libvirt/libvirt.git
synced 2025-01-11 15:27:47 +00:00
qemu: Track job owner for better debugging
In case an API fails with "cannot acquire state change lock", searching for the API that possibly forgot to end its job is not always easy. Let's keep track of the job owner and print it out for easier identification.
This commit is contained in:
parent
31796e2c1c
commit
08ec1d787f
@ -147,6 +147,7 @@ qemuDomainObjResetJob(qemuDomainObjPrivatePtr priv)
|
|||||||
struct qemuDomainJobObj *job = &priv->job;
|
struct qemuDomainJobObj *job = &priv->job;
|
||||||
|
|
||||||
job->active = QEMU_JOB_NONE;
|
job->active = QEMU_JOB_NONE;
|
||||||
|
job->owner = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -155,6 +156,7 @@ qemuDomainObjResetAsyncJob(qemuDomainObjPrivatePtr priv)
|
|||||||
struct qemuDomainJobObj *job = &priv->job;
|
struct qemuDomainJobObj *job = &priv->job;
|
||||||
|
|
||||||
job->asyncJob = QEMU_ASYNC_JOB_NONE;
|
job->asyncJob = QEMU_ASYNC_JOB_NONE;
|
||||||
|
job->asyncOwner = 0;
|
||||||
job->phase = 0;
|
job->phase = 0;
|
||||||
job->mask = DEFAULT_JOB_MASK;
|
job->mask = DEFAULT_JOB_MASK;
|
||||||
job->start = 0;
|
job->start = 0;
|
||||||
@ -169,13 +171,25 @@ qemuDomainObjRestoreJob(virDomainObjPtr obj,
|
|||||||
|
|
||||||
memset(job, 0, sizeof(*job));
|
memset(job, 0, sizeof(*job));
|
||||||
job->active = priv->job.active;
|
job->active = priv->job.active;
|
||||||
|
job->owner = priv->job.owner;
|
||||||
job->asyncJob = priv->job.asyncJob;
|
job->asyncJob = priv->job.asyncJob;
|
||||||
|
job->asyncOwner = priv->job.asyncOwner;
|
||||||
job->phase = priv->job.phase;
|
job->phase = priv->job.phase;
|
||||||
|
|
||||||
qemuDomainObjResetJob(priv);
|
qemuDomainObjResetJob(priv);
|
||||||
qemuDomainObjResetAsyncJob(priv);
|
qemuDomainObjResetAsyncJob(priv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
qemuDomainObjTransferJob(virDomainObjPtr obj)
|
||||||
|
{
|
||||||
|
qemuDomainObjPrivatePtr priv = obj->privateData;
|
||||||
|
|
||||||
|
VIR_DEBUG("Changing job owner from %d to %d",
|
||||||
|
priv->job.owner, virThreadSelfID());
|
||||||
|
priv->job.owner = virThreadSelfID();
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
qemuDomainObjFreeJob(qemuDomainObjPrivatePtr priv)
|
qemuDomainObjFreeJob(qemuDomainObjPrivatePtr priv)
|
||||||
{
|
{
|
||||||
@ -664,11 +678,23 @@ qemuDomainObjSetJobPhase(struct qemud_driver *driver,
|
|||||||
int phase)
|
int phase)
|
||||||
{
|
{
|
||||||
qemuDomainObjPrivatePtr priv = obj->privateData;
|
qemuDomainObjPrivatePtr priv = obj->privateData;
|
||||||
|
int me = virThreadSelfID();
|
||||||
|
|
||||||
if (!priv->job.asyncJob)
|
if (!priv->job.asyncJob)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
VIR_DEBUG("Setting '%s' phase to '%s'",
|
||||||
|
qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
|
||||||
|
qemuDomainAsyncJobPhaseToString(priv->job.asyncJob, phase));
|
||||||
|
|
||||||
|
if (priv->job.asyncOwner && me != priv->job.asyncOwner) {
|
||||||
|
VIR_WARN("'%s' async job is owned by thread %d",
|
||||||
|
qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
|
||||||
|
priv->job.asyncOwner);
|
||||||
|
}
|
||||||
|
|
||||||
priv->job.phase = phase;
|
priv->job.phase = phase;
|
||||||
|
priv->job.asyncOwner = me;
|
||||||
qemuDomainObjSaveJob(driver, obj);
|
qemuDomainObjSaveJob(driver, obj);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -695,6 +721,22 @@ qemuDomainObjDiscardAsyncJob(struct qemud_driver *driver, virDomainObjPtr obj)
|
|||||||
qemuDomainObjSaveJob(driver, obj);
|
qemuDomainObjSaveJob(driver, obj);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
qemuDomainObjReleaseAsyncJob(virDomainObjPtr obj)
|
||||||
|
{
|
||||||
|
qemuDomainObjPrivatePtr priv = obj->privateData;
|
||||||
|
|
||||||
|
VIR_DEBUG("Releasing ownership of '%s' async job",
|
||||||
|
qemuDomainAsyncJobTypeToString(priv->job.asyncJob));
|
||||||
|
|
||||||
|
if (priv->job.asyncOwner != virThreadSelfID()) {
|
||||||
|
VIR_WARN("'%s' async job is owned by thread %d",
|
||||||
|
qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
|
||||||
|
priv->job.asyncOwner);
|
||||||
|
}
|
||||||
|
priv->job.asyncOwner = 0;
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
qemuDomainNestedJobAllowed(qemuDomainObjPrivatePtr priv, enum qemuDomainJob job)
|
qemuDomainNestedJobAllowed(qemuDomainObjPrivatePtr priv, enum qemuDomainJob job)
|
||||||
{
|
{
|
||||||
@ -764,11 +806,13 @@ retry:
|
|||||||
qemuDomainJobTypeToString(job),
|
qemuDomainJobTypeToString(job),
|
||||||
qemuDomainAsyncJobTypeToString(priv->job.asyncJob));
|
qemuDomainAsyncJobTypeToString(priv->job.asyncJob));
|
||||||
priv->job.active = job;
|
priv->job.active = job;
|
||||||
|
priv->job.owner = virThreadSelfID();
|
||||||
} else {
|
} else {
|
||||||
VIR_DEBUG("Starting async job: %s",
|
VIR_DEBUG("Starting async job: %s",
|
||||||
qemuDomainAsyncJobTypeToString(asyncJob));
|
qemuDomainAsyncJobTypeToString(asyncJob));
|
||||||
qemuDomainObjResetAsyncJob(priv);
|
qemuDomainObjResetAsyncJob(priv);
|
||||||
priv->job.asyncJob = asyncJob;
|
priv->job.asyncJob = asyncJob;
|
||||||
|
priv->job.asyncOwner = virThreadSelfID();
|
||||||
priv->job.start = now;
|
priv->job.start = now;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -784,6 +828,15 @@ retry:
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
|
VIR_WARN("Cannot start job (%s, %s) for domain %s;"
|
||||||
|
" current job is (%s, %s) owned by (%d, %d)",
|
||||||
|
qemuDomainJobTypeToString(job),
|
||||||
|
qemuDomainAsyncJobTypeToString(asyncJob),
|
||||||
|
obj->def->name,
|
||||||
|
qemuDomainJobTypeToString(priv->job.active),
|
||||||
|
qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
|
||||||
|
priv->job.owner, priv->job.asyncOwner);
|
||||||
|
|
||||||
if (errno == ETIMEDOUT)
|
if (errno == ETIMEDOUT)
|
||||||
qemuReportError(VIR_ERR_OPERATION_TIMEOUT,
|
qemuReportError(VIR_ERR_OPERATION_TIMEOUT,
|
||||||
"%s", _("cannot acquire state change lock"));
|
"%s", _("cannot acquire state change lock"));
|
||||||
|
@ -96,9 +96,11 @@ VIR_ENUM_DECL(qemuDomainAsyncJob)
|
|||||||
struct qemuDomainJobObj {
|
struct qemuDomainJobObj {
|
||||||
virCond cond; /* Use to coordinate jobs */
|
virCond cond; /* Use to coordinate jobs */
|
||||||
enum qemuDomainJob active; /* Currently running job */
|
enum qemuDomainJob active; /* Currently running job */
|
||||||
|
int owner; /* Thread which set current job */
|
||||||
|
|
||||||
virCond asyncCond; /* Use to coordinate with async jobs */
|
virCond asyncCond; /* Use to coordinate with async jobs */
|
||||||
enum qemuDomainAsyncJob asyncJob; /* Currently active async job */
|
enum qemuDomainAsyncJob asyncJob; /* Currently active async job */
|
||||||
|
int asyncOwner; /* Thread which set current async job */
|
||||||
int phase; /* Job phase (mainly for migrations) */
|
int phase; /* Job phase (mainly for migrations) */
|
||||||
unsigned long long mask; /* Jobs allowed during async job */
|
unsigned long long mask; /* Jobs allowed during async job */
|
||||||
unsigned long long start; /* When the async job started */
|
unsigned long long start; /* When the async job started */
|
||||||
@ -203,8 +205,10 @@ void qemuDomainObjSetAsyncJobMask(virDomainObjPtr obj,
|
|||||||
unsigned long long allowedJobs);
|
unsigned long long allowedJobs);
|
||||||
void qemuDomainObjRestoreJob(virDomainObjPtr obj,
|
void qemuDomainObjRestoreJob(virDomainObjPtr obj,
|
||||||
struct qemuDomainJobObj *job);
|
struct qemuDomainJobObj *job);
|
||||||
|
void qemuDomainObjTransferJob(virDomainObjPtr obj);
|
||||||
void qemuDomainObjDiscardAsyncJob(struct qemud_driver *driver,
|
void qemuDomainObjDiscardAsyncJob(struct qemud_driver *driver,
|
||||||
virDomainObjPtr obj);
|
virDomainObjPtr obj);
|
||||||
|
void qemuDomainObjReleaseAsyncJob(virDomainObjPtr obj);
|
||||||
|
|
||||||
void qemuDomainObjEnterMonitor(struct qemud_driver *driver,
|
void qemuDomainObjEnterMonitor(struct qemud_driver *driver,
|
||||||
virDomainObjPtr obj)
|
virDomainObjPtr obj)
|
||||||
|
@ -3242,6 +3242,7 @@ qemuMigrationJobStartPhase(struct qemud_driver *driver,
|
|||||||
int
|
int
|
||||||
qemuMigrationJobContinue(virDomainObjPtr vm)
|
qemuMigrationJobContinue(virDomainObjPtr vm)
|
||||||
{
|
{
|
||||||
|
qemuDomainObjReleaseAsyncJob(vm);
|
||||||
return virDomainObjUnref(vm);
|
return virDomainObjUnref(vm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3038,8 +3038,8 @@ qemuProcessReconnect(void *opaque)
|
|||||||
|
|
||||||
priv = obj->privateData;
|
priv = obj->privateData;
|
||||||
|
|
||||||
/* Set fake job so that EnterMonitor* doesn't want to start a new one */
|
/* Job was started by the caller for us */
|
||||||
priv->job.active = QEMU_JOB_MODIFY;
|
qemuDomainObjTransferJob(obj);
|
||||||
|
|
||||||
/* Hold an extra reference because we can't allow 'vm' to be
|
/* Hold an extra reference because we can't allow 'vm' to be
|
||||||
* deleted if qemuConnectMonitor() failed */
|
* deleted if qemuConnectMonitor() failed */
|
||||||
@ -3119,8 +3119,6 @@ qemuProcessReconnect(void *opaque)
|
|||||||
if (qemuProcessRecoverJob(driver, obj, conn, &oldjob) < 0)
|
if (qemuProcessRecoverJob(driver, obj, conn, &oldjob) < 0)
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
priv->job.active = QEMU_JOB_NONE;
|
|
||||||
|
|
||||||
/* update domain state XML with possibly updated state in virDomainObj */
|
/* update domain state XML with possibly updated state in virDomainObj */
|
||||||
if (virDomainSaveStatus(driver->caps, driver->stateDir, obj) < 0)
|
if (virDomainSaveStatus(driver->caps, driver->stateDir, obj) < 0)
|
||||||
goto error;
|
goto error;
|
||||||
|
Loading…
Reference in New Issue
Block a user