From 44f79a0bd075b036091244aade14b73dd2859634 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Thu, 5 Jan 2017 15:26:00 +0000 Subject: [PATCH] lxc: ensure libvirt_lxc and qemu-nbd move into systemd machine slice Currently when spawning containers with systemd, the container PID 1 will get moved into the systemd machine slice. Libvirt then manually moves the libvirt_lxc and qemu-nbd processes into the cgroups associated with the slice, but skips the systemd controller cgroup. This means that from systemd's POV, libvirt_lxc and qemu-nbd are still part of the libvirtd.service unit. On systemctl daemon-reload, it will notice that libvirt_lxc & qemu-nbd are in the libvirtd.service unit for the systemd controller, but in the machine cgroups for resources. Systemd will thus move them back into the libvirtd.service resource cgroups next time libvirtd is restarted. This causes libvirtd to kill off the container due to incorrect cgroup placement. The solution is to ensure that when moving libvirt_lxc & qemu-nbd, we also move the systemd cgroup controller placement. Normally this is not something we ever want todo, but this is a special case as we are intentionally wanting to move them to a different systemd unit. Signed-off-by: Daniel P. Berrange --- src/libvirt_private.syms | 1 + src/lxc/lxc_controller.c | 4 ++-- src/util/vircgroup.c | 52 ++++++++++++++++++++++++++++++---------- src/util/vircgroup.h | 1 + 4 files changed, 44 insertions(+), 14 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index c5b9cee45e..f07a219a82 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1290,6 +1290,7 @@ virBufferVasprintf; # util/vircgroup.h +virCgroupAddMachineTask; virCgroupAddTask; virCgroupAddTaskController; virCgroupAllowAllDevices; diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 2170b0ae2f..88ba8aaa21 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -869,12 +869,12 @@ static int virLXCControllerSetupCgroupLimits(virLXCControllerPtr ctrl) ctrl->nicindexes))) goto cleanup; - if (virCgroupAddTask(ctrl->cgroup, getpid()) < 0) + if (virCgroupAddMachineTask(ctrl->cgroup, getpid()) < 0) goto cleanup; /* Add all qemu-nbd tasks to the cgroup */ for (i = 0; i < ctrl->nnbdpids; i++) { - if (virCgroupAddTask(ctrl->cgroup, ctrl->nbdpids[i]) < 0) + if (virCgroupAddMachineTask(ctrl->cgroup, ctrl->nbdpids[i]) < 0) goto cleanup; } diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 80ce43c1ff..ddf19e9023 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -1184,16 +1184,8 @@ virCgroupNew(pid_t pid, } -/** - * virCgroupAddTask: - * - * @group: The cgroup to add a task to - * @pid: The pid of the task to add - * - * Returns: 0 on success, -1 on error - */ -int -virCgroupAddTask(virCgroupPtr group, pid_t pid) +static int +virCgroupAddTaskInternal(virCgroupPtr group, pid_t pid, bool withSystemd) { int ret = -1; size_t i; @@ -1203,8 +1195,10 @@ virCgroupAddTask(virCgroupPtr group, pid_t pid) if (!group->controllers[i].mountPoint) continue; - /* We must never add tasks in systemd's hierarchy */ - if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) + /* We must never add tasks in systemd's hierarchy + * unless we're intentionally trying to move a + * task into a systemd machine scope */ + if (i == VIR_CGROUP_CONTROLLER_SYSTEMD && !withSystemd) continue; if (virCgroupAddTaskController(group, pid, i) < 0) @@ -1216,6 +1210,40 @@ virCgroupAddTask(virCgroupPtr group, pid_t pid) return ret; } +/** + * virCgroupAddTask: + * + * @group: The cgroup to add a task to + * @pid: The pid of the task to add + * + * Will add the task to all controllers, except the + * systemd unit controller. + * + * Returns: 0 on success, -1 on error + */ +int +virCgroupAddTask(virCgroupPtr group, pid_t pid) +{ + return virCgroupAddTaskInternal(group, pid, false); +} + +/** + * virCgroupAddMachineTask: + * + * @group: The cgroup to add a task to + * @pid: The pid of the task to add + * + * Will add the task to all controllers, including the + * systemd unit controller. + * + * Returns: 0 on success, -1 on error + */ +int +virCgroupAddMachineTask(virCgroupPtr group, pid_t pid) +{ + return virCgroupAddTaskInternal(group, pid, true); +} + /** * virCgroupAddTaskController: diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h index 4b8f3ff86f..2de1bf2de4 100644 --- a/src/util/vircgroup.h +++ b/src/util/vircgroup.h @@ -131,6 +131,7 @@ int virCgroupPathOfController(virCgroupPtr group, char **path); int virCgroupAddTask(virCgroupPtr group, pid_t pid); +int virCgroupAddMachineTask(virCgroupPtr group, pid_t pid); int virCgroupAddTaskController(virCgroupPtr group, pid_t pid,