From 4acb01e43ec126c3ca6b7c7cdef5e40af23e1a93 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Fri, 16 Jan 2015 16:23:45 +0000 Subject: [PATCH] lxc: delay setup of cgroup until we have the init pid Don't create the cgroups ahead of launching the container since there is no need for the limits to apply during initial bootstrap. Create the cgroup after the container PID is known and tell systemd the initpid is the leader, instead of the controller pid. --- src/lxc/lxc_cgroup.c | 11 +++-------- src/lxc/lxc_cgroup.h | 3 ++- src/lxc/lxc_controller.c | 42 +++++++++++++++++++++++++++++++++++----- 3 files changed, 42 insertions(+), 14 deletions(-) diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index 728e8e500d..0987050bf7 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -462,7 +462,8 @@ static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def, } -virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def) +virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, + pid_t initpid) { virCgroupPtr cgroup = NULL; @@ -473,18 +474,12 @@ virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def) goto cleanup; } - /* - * XXX - * We should pass the PID of the LXC init process - * not ourselves, but this requires some more - * refactoring. We should also pass the root dir - */ if (virCgroupNewMachine(def->name, "lxc", true, def->uuid, NULL, - getpid(), + initpid, true, 0, NULL, def->resource->partition, diff --git a/src/lxc/lxc_cgroup.h b/src/lxc/lxc_cgroup.h index 0e78126e1b..31d6800dc7 100644 --- a/src/lxc/lxc_cgroup.h +++ b/src/lxc/lxc_cgroup.h @@ -27,7 +27,8 @@ # include "lxc_fuse.h" # include "virusb.h" -virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def); +virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, + pid_t initpid); virCgroupPtr virLXCCgroupJoin(virDomainDefPtr def); int virLXCCgroupSetup(virDomainDefPtr def, virCgroupPtr cgroup, diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index f2c0b57853..00d0e23628 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -680,8 +680,9 @@ static int virLXCControllerGetNumadAdvice(virLXCControllerPtr ctrl, * virLXCControllerSetupResourceLimits * @ctrl: the controller state * - * Creates a cgroup for the container, moves the task inside, - * and sets resource limits + * Sets up the non-cgroup based resource limits that need + * to be inherited by the child process across clone()/exec(). + * The cgroup limits are setup later * * Returns 0 on success or -1 in case of error */ @@ -704,6 +705,37 @@ static int virLXCControllerSetupResourceLimits(virLXCControllerPtr ctrl) if (virLXCControllerSetupCpuAffinity(ctrl) < 0) goto cleanup; + ret = 0; + cleanup: + virBitmapFree(auto_nodeset); + return ret; +} + + +/* + * Creates the cgroup and sets up the various limits associated + * with it + */ +static int virLXCControllerSetupCgroupLimits(virLXCControllerPtr ctrl) +{ + virBitmapPtr auto_nodeset = NULL; + int ret = -1; + virBitmapPtr nodeset = NULL; + + VIR_DEBUG("Setting up cgroup resource limits"); + + if (virLXCControllerGetNumadAdvice(ctrl, &auto_nodeset) < 0) + goto cleanup; + + nodeset = virDomainNumatuneGetNodeset(ctrl->def->numatune, auto_nodeset, -1); + + if (!(ctrl->cgroup = virLXCCgroupCreate(ctrl->def, + ctrl->initpid))) + goto cleanup; + + if (virCgroupAddTask(ctrl->cgroup, getpid()) < 0) + goto cleanup; + if (virLXCCgroupSetup(ctrl->def, ctrl->cgroup, nodeset) < 0) goto cleanup; @@ -2224,6 +2256,9 @@ virLXCControllerRun(virLXCControllerPtr ctrl) for (i = 0; i < ctrl->npassFDs; i++) VIR_FORCE_CLOSE(ctrl->passFDs[i]); + if (virLXCControllerSetupCgroupLimits(ctrl) < 0) + goto cleanup; + if (virLXCControllerSetupUserns(ctrl) < 0) goto cleanup; @@ -2454,9 +2489,6 @@ int main(int argc, char *argv[]) if (virLXCControllerValidateConsoles(ctrl) < 0) goto cleanup; - if (!(ctrl->cgroup = virLXCCgroupCreate(ctrl->def))) - goto cleanup; - if (virLXCControllerSetupServer(ctrl) < 0) goto cleanup;