mirror of
https://gitlab.com/libvirt/libvirt.git
synced 2025-01-07 05:25:25 +00:00
cgroup: be robust against cgroup movement races
https://bugzilla.redhat.com/show_bug.cgi?id=965169 documents a
problem starting domains when cgroups are enabled; I was able
to reliably reproduce the race about 5% of the time when I added
hooks to domain startup by 3 seconds (as that seemed to be about
the length of time that qemu created and then closed a temporary
thread, probably related to aio handling of initially opening
a disk image). The problem has existed since we introduced
virCgroupMoveTask in commit 9102829
(v0.10.0).
There are some inherent TOCTTOU races when moving tasks between
kernel cgroups, precisely because threads can be created or
completed in the window between when we read a thread id from the
source and when we write to the destination. As the goal of
virCgroupMoveTask is merely to move ALL tasks into the new
cgroup, it is sufficient to iterate until no more threads are
being created in the old group, and ignoring any threads that
die before we can move them.
It would be nicer to start the threads in the right cgroup to
begin with, but by default, all child threads are created in
the same cgroup as their parent, and we don't want vcpu child
threads in the emulator cgroup, so I don't see any good way
of avoiding the move. It would also be nice if the kernel were
to implement something like rename() as a way to atomically move
a group of threads from one cgroup to another, instead of forcing
a window where we have to read and parse the source, then format
and write back into the destination.
* src/util/vircgroup.c (virCgroupAddTaskStrController): Ignore
ESRCH, because a thread ended between read and write attempts.
(virCgroupMoveTask): Loop until all threads have moved.
Signed-off-by: Eric Blake <eblake@redhat.com>
This commit is contained in:
parent
63411259bb
commit
83e4c77547
@ -1037,7 +1037,11 @@ static int virCgroupAddTaskStrController(virCgroupPtr group,
|
|||||||
goto cleanup;
|
goto cleanup;
|
||||||
|
|
||||||
rc = virCgroupAddTaskController(group, p, controller);
|
rc = virCgroupAddTaskController(group, p, controller);
|
||||||
if (rc != 0)
|
/* A thread that exits between when we first read the source
|
||||||
|
* tasks and now is not fatal. */
|
||||||
|
if (rc == -ESRCH)
|
||||||
|
rc = 0;
|
||||||
|
else if (rc != 0)
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
|
|
||||||
next = strchr(cur, '\n');
|
next = strchr(cur, '\n');
|
||||||
@ -1074,9 +1078,16 @@ int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group)
|
|||||||
!dest_group->controllers[i].mountPoint)
|
!dest_group->controllers[i].mountPoint)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
/* New threads are created in the same group as their parent;
|
||||||
|
* but if a thread is created after we first read we aren't
|
||||||
|
* aware that it needs to move. Therefore, we must iterate
|
||||||
|
* until content is empty. */
|
||||||
|
while (1) {
|
||||||
rc = virCgroupGetValueStr(src_group, i, "tasks", &content);
|
rc = virCgroupGetValueStr(src_group, i, "tasks", &content);
|
||||||
if (rc != 0)
|
if (rc != 0)
|
||||||
return rc;
|
return rc;
|
||||||
|
if (!*content)
|
||||||
|
break;
|
||||||
|
|
||||||
rc = virCgroupAddTaskStrController(dest_group, content, i);
|
rc = virCgroupAddTaskStrController(dest_group, content, i);
|
||||||
if (rc != 0)
|
if (rc != 0)
|
||||||
@ -1084,6 +1095,7 @@ int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group)
|
|||||||
|
|
||||||
VIR_FREE(content);
|
VIR_FREE(content);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
cleanup:
|
cleanup:
|
||||||
VIR_FREE(content);
|
VIR_FREE(content);
|
||||||
|
Loading…
Reference in New Issue
Block a user