security_manager: Ensure top lock is acquired before nested locks

Fix libvirtd hang since fork() was called while another thread had
security manager locked.

We have the stack security driver, which internally manages other security drivers,
just call them "top" and "nested".

We call virSecurityStackPreFork() to lock the top one, and it also locks
and then unlocks the nested drivers prior to fork. Then in qemuSecurityPostFork(),
it unlocks the top one, but not the nested ones. Thus, if one of the nested
drivers ("dac" or "selinux") is still locked, it will cause a deadlock. If we always
surround nested locks with top lock, it is always secure. Because we have got top lock
before fork child libvirtd.

However, it is not always the case in the current code, We discovered this case:
the nested list obtained through the qemuSecurityGetNested() will be locked directly
for subsequent use, such as in virQEMUDriverCreateCapabilities(), where the nested list
is locked using qemuSecurityGetDOI, but the top one is not locked beforehand.

The problem stack is as follows:

libvirtd thread1          libvirtd thread2          child libvirtd
        |                           |                       |
        |                           |                       |
virsh capabilities      qemuProcessLanuch                   |
        |                           |                       |
        |                       lock top                    |
        |                           |                       |
    lock nested                     |                       |
        |                           |                       |
        |                           fork------------------->|(nested lock held by thread1)
        |                           |                       |
        |                           |                       |
    unlock nested               unlock top              unlock top
                                                            |
                                                            |
                                                qemuSecuritySetSocketLabel
                                                            |
                                                            |
                                                    lock nested (deadlock)

In this commit, we ensure that the top lock is acquired before the nested lock,
so during fork, it's not possible for another task to acquire the nested lock.

Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1303031

Signed-off-by: hongmianquan <hongmianquan@bytedance.com>
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
This commit is contained in:
hongmianquan 2024-07-05 16:01:57 +08:00 committed by Michal Privoznik
parent 8515a178f8
commit 790b4d8067
6 changed files with 50 additions and 4 deletions

View File

@ -1806,6 +1806,8 @@ virSecurityManagerSetSocketLabel;
virSecurityManagerSetTapFDLabel;
virSecurityManagerSetTPMLabels;
virSecurityManagerStackAddNested;
virSecurityManagerStackLock;
virSecurityManagerStackUnlock;
virSecurityManagerTransactionAbort;
virSecurityManagerTransactionCommit;
virSecurityManagerTransactionStart;

View File

@ -1380,9 +1380,14 @@ virCaps *virQEMUDriverCreateCapabilities(virQEMUDriver *driver)
return NULL;
}
/* Ensure top lock is acquired before nested locks */
qemuSecurityStackLock(driver->securityManager);
/* access sec drivers and create a sec model for each one */
if (!(sec_managers = qemuSecurityGetNested(driver->securityManager)))
if (!(sec_managers = qemuSecurityGetNested(driver->securityManager))) {
qemuSecurityStackUnlock(driver->securityManager);
return NULL;
}
/* calculate length */
for (i = 0; sec_managers[i]; i++)
@ -1402,14 +1407,18 @@ virCaps *virQEMUDriverCreateCapabilities(virQEMUDriver *driver)
lbl = qemuSecurityGetBaseLabel(sec_managers[i], virtTypes[j]);
type = virDomainVirtTypeToString(virtTypes[j]);
if (lbl &&
virCapabilitiesHostSecModelAddBaseLabel(sm, type, lbl) < 0)
virCapabilitiesHostSecModelAddBaseLabel(sm, type, lbl) < 0) {
qemuSecurityStackUnlock(driver->securityManager);
return NULL;
}
}
VIR_DEBUG("Initialized caps for security driver \"%s\" with "
"DOI \"%s\"", model, doi);
}
qemuSecurityStackUnlock(driver->securityManager);
caps->host.numa = virCapabilitiesHostNUMANewHost();
caps->host.cpu = virQEMUDriverGetHostCPU(driver);
return g_steal_pointer(&caps);

View File

@ -5654,9 +5654,16 @@ static int qemuDomainGetSecurityLabelList(virDomainPtr dom,
ret = 0;
} else {
int len = 0;
virSecurityManager ** mgrs = qemuSecurityGetNested(driver->securityManager);
if (!mgrs)
virSecurityManager ** mgrs = NULL;
/* Ensure top lock is acquired before nested locks */
qemuSecurityStackLock(driver->securityManager);
mgrs = qemuSecurityGetNested(driver->securityManager);
if (!mgrs) {
qemuSecurityStackUnlock(driver->securityManager);
goto cleanup;
}
/* Allocate seclabels array */
for (i = 0; mgrs[i]; i++)
@ -5669,11 +5676,13 @@ static int qemuDomainGetSecurityLabelList(virDomainPtr dom,
for (i = 0; i < len; i++) {
if (qemuSecurityGetProcessLabel(mgrs[i], vm->def, vm->pid,
&(*seclabels)[i]) < 0) {
qemuSecurityStackUnlock(driver->securityManager);
VIR_FREE(mgrs);
VIR_FREE(*seclabels);
goto cleanup;
}
}
qemuSecurityStackUnlock(driver->securityManager);
ret = len;
VIR_FREE(mgrs);
}

View File

@ -151,3 +151,5 @@ int qemuSecurityCommandRun(virQEMUDriver *driver,
#define qemuSecuritySetTapFDLabel virSecurityManagerSetTapFDLabel
#define qemuSecurityStackAddNested virSecurityManagerStackAddNested
#define qemuSecurityVerify virSecurityManagerVerify
#define qemuSecurityStackLock virSecurityManagerStackLock
#define qemuSecurityStackUnlock virSecurityManagerStackUnlock

View File

@ -989,6 +989,28 @@ virSecurityManagerGetNested(virSecurityManager *mgr)
return list;
}
/*
* Usually called before virSecurityManagerGetNested().
* We need to ensure locking the stack security manager before
* locking the nested security manager to maintain the correct
* synchronization state.
* It must be followed by a call virSecurityManagerStackUnlock().
*/
void
virSecurityManagerStackLock(virSecurityManager *mgr)
{
if (STREQ("stack", mgr->drv->name))
virObjectLock(mgr);
}
void
virSecurityManagerStackUnlock(virSecurityManager *mgr)
{
if (STREQ("stack", mgr->drv->name))
virObjectUnlock(mgr);
}
/**
* virSecurityManagerDomainSetPathLabel:

View File

@ -158,6 +158,8 @@ int virSecurityManagerSetTapFDLabel(virSecurityManager *mgr,
char *virSecurityManagerGetMountOptions(virSecurityManager *mgr,
virDomainDef *vm);
virSecurityManager ** virSecurityManagerGetNested(virSecurityManager *mgr);
void virSecurityManagerStackLock(virSecurityManager *mgr);
void virSecurityManagerStackUnlock(virSecurityManager *mgr);
typedef enum {
VIR_SECURITY_DOMAIN_IMAGE_LABEL_BACKING_CHAIN = 1 << 0,