Add support for systemd cgroup mount

Systemd uses a named cgroup mount for tracking processes. Add
it as another type of controller, albeit one which we have to
special case in a number of places. In particular we must
never create/delete directories there, nor add tasks. Essentially
the systemd mount is to be considered read-only for libvirt.

With this change both the virCgroupDetectPlacement and
virCgroupCopyPlacement methods must be invoked. The copy
placement method will copy setup for resource controllers
only. The detect placement method will probe for any
named controllers, or resource controllers not already
setup.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
This commit is contained in:
Daniel P. Berrange 2013-07-25 19:13:44 +01:00
parent 4574b475df
commit aedd46e7e3
3 changed files with 63 additions and 15 deletions

View File

@ -57,7 +57,8 @@
VIR_ENUM_IMPL(virCgroupController, VIR_CGROUP_CONTROLLER_LAST,
"cpu", "cpuacct", "cpuset", "memory", "devices",
"freezer", "blkio", "net_cls", "perf_event");
"freezer", "blkio", "net_cls", "perf_event",
"name=systemd");
typedef enum {
VIR_CGROUP_NONE = 0, /* create subdir under each cgroup if possible. */
@ -117,6 +118,9 @@ virCgroupValidateMachineGroup(virCgroupPtr group,
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
char *tmp;
if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
continue;
if (!group->controllers[i].placement)
continue;
@ -331,6 +335,9 @@ static int virCgroupCopyPlacement(virCgroupPtr group,
if (!group->controllers[i].mountPoint)
continue;
if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
continue;
if (path[0] == '/') {
if (VIR_STRDUP(group->controllers[i].placement, path) < 0)
return -1;
@ -386,6 +393,8 @@ static int virCgroupDetectPlacement(virCgroupPtr group,
int ret = -1;
char *procfile;
VIR_DEBUG("Detecting placement for pid %lld path %s",
(unsigned long long)pid, path);
if (pid == -1) {
if (VIR_STRDUP(procfile, "/proc/self/cgroup") < 0)
goto cleanup;
@ -422,6 +431,7 @@ static int virCgroupDetectPlacement(virCgroupPtr group,
const char *typestr = virCgroupControllerTypeToString(i);
int typelen = strlen(typestr);
char *tmp = controllers;
while (tmp) {
char *next = strchr(tmp, ',');
int len;
@ -438,13 +448,20 @@ static int virCgroupDetectPlacement(virCgroupPtr group,
* selfpath=="/libvirt.service" + path="foo" -> "/libvirt.service/foo"
*/
if (typelen == len && STREQLEN(typestr, tmp, len) &&
group->controllers[i].mountPoint != NULL) {
if (virAsprintf(&group->controllers[i].placement,
"%s%s%s", selfpath,
(STREQ(selfpath, "/") ||
STREQ(path, "") ? "" : "/"),
path) < 0)
goto cleanup;
group->controllers[i].mountPoint != NULL &&
group->controllers[i].placement == NULL) {
if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) {
if (VIR_STRDUP(group->controllers[i].placement,
selfpath) < 0)
goto cleanup;
} else {
if (virAsprintf(&group->controllers[i].placement,
"%s%s%s", selfpath,
(STREQ(selfpath, "/") ||
STREQ(path, "") ? "" : "/"),
path) < 0)
goto cleanup;
}
}
tmp = next;
@ -535,13 +552,16 @@ static int virCgroupDetect(virCgroupPtr group,
return -1;
}
if (parent || path[0] == '/') {
if (virCgroupCopyPlacement(group, path, parent) < 0)
return -1;
} else {
if (virCgroupDetectPlacement(group, pid, path) < 0)
return -1;
}
/* In some cases we can copy part of the placement info
* based on the parent cgroup...
*/
if ((parent || path[0] == '/') &&
virCgroupCopyPlacement(group, path, parent) < 0)
return -1;
/* ... but use /proc/cgroups to fill in the rest */
if (virCgroupDetectPlacement(group, pid, path) < 0)
return -1;
/* Check that for every mounted controller, we found our placement */
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
@ -833,6 +853,12 @@ static int virCgroupMakeGroup(virCgroupPtr parent,
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
char *path = NULL;
/* We must never mkdir() in systemd's hierarchy */
if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) {
VIR_DEBUG("Not creating systemd controller group");
continue;
}
/* Skip over controllers that aren't mounted */
if (!group->controllers[i].mountPoint) {
VIR_DEBUG("Skipping unmounted controller %s",
@ -1037,6 +1063,10 @@ int virCgroupRemove(virCgroupPtr group)
if (!group->controllers[i].mountPoint)
continue;
/* We must never rmdir() in systemd's hierarchy */
if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
continue;
/* Don't delete the root group, if we accidentally
ended up in it for some reason */
if (STREQ(group->controllers[i].placement, "/"))
@ -1076,6 +1106,10 @@ int virCgroupAddTask(virCgroupPtr group, pid_t pid)
if (!group->controllers[i].mountPoint)
continue;
/* We must never add tasks in systemd's hierarchy */
if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
continue;
if (virCgroupSetValueU64(group, i, "tasks", (unsigned long long)pid) < 0)
goto cleanup;
}
@ -1177,6 +1211,10 @@ int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group)
!dest_group->controllers[i].mountPoint)
continue;
/* We must never move tasks in systemd's hierarchy */
if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
continue;
/* New threads are created in the same group as their parent;
* but if a thread is created after we first read we aren't
* aware that it needs to move. Therefore, we must iterate

View File

@ -40,6 +40,7 @@ enum {
VIR_CGROUP_CONTROLLER_BLKIO,
VIR_CGROUP_CONTROLLER_NET_CLS,
VIR_CGROUP_CONTROLLER_PERF_EVENT,
VIR_CGROUP_CONTROLLER_SYSTEMD,
VIR_CGROUP_CONTROLLER_LAST
};

View File

@ -87,6 +87,7 @@ const char *mountsSmall[VIR_CGROUP_CONTROLLER_LAST] = {
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = NULL,
[VIR_CGROUP_CONTROLLER_BLKIO] = NULL,
[VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL,
};
const char *mountsFull[VIR_CGROUP_CONTROLLER_LAST] = {
[VIR_CGROUP_CONTROLLER_CPU] = "/not/really/sys/fs/cgroup/cpu,cpuacct",
@ -96,6 +97,7 @@ const char *mountsFull[VIR_CGROUP_CONTROLLER_LAST] = {
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = "/not/really/sys/fs/cgroup/freezer",
[VIR_CGROUP_CONTROLLER_BLKIO] = "/not/really/sys/fs/cgroup/blkio",
[VIR_CGROUP_CONTROLLER_SYSTEMD] = "/not/really/sys/fs/cgroup/systemd",
};
const char *links[VIR_CGROUP_CONTROLLER_LAST] = {
@ -121,6 +123,7 @@ static int testCgroupNewForSelf(const void *args ATTRIBUTE_UNUSED)
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = "/",
[VIR_CGROUP_CONTROLLER_BLKIO] = "/",
[VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
};
if (virCgroupNewSelf(&cgroup) < 0) {
@ -161,6 +164,7 @@ static int testCgroupNewForPartition(const void *args ATTRIBUTE_UNUSED)
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = NULL,
[VIR_CGROUP_CONTROLLER_BLKIO] = NULL,
[VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL,
};
const char *placementFull[VIR_CGROUP_CONTROLLER_LAST] = {
[VIR_CGROUP_CONTROLLER_CPU] = "/virtualmachines.partition",
@ -170,6 +174,7 @@ static int testCgroupNewForPartition(const void *args ATTRIBUTE_UNUSED)
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = "/virtualmachines.partition",
[VIR_CGROUP_CONTROLLER_BLKIO] = "/virtualmachines.partition",
[VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
};
if ((rv = virCgroupNewPartition("/virtualmachines", false, -1, &cgroup)) != -1) {
@ -233,6 +238,7 @@ static int testCgroupNewForPartitionNested(const void *args ATTRIBUTE_UNUSED)
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = "/deployment.partition/production.partition",
[VIR_CGROUP_CONTROLLER_BLKIO] = "/deployment.partition/production.partition",
[VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
};
if ((rv = virCgroupNewPartition("/deployment/production", false, -1, &cgroup)) != -1) {
@ -281,6 +287,7 @@ static int testCgroupNewForPartitionNestedDeep(const void *args ATTRIBUTE_UNUSED
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = "/user/berrange.user/production.partition",
[VIR_CGROUP_CONTROLLER_BLKIO] = "/user/berrange.user/production.partition",
[VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
};
if ((rv = virCgroupNewPartition("/user/berrange.user/production", false, -1, &cgroup)) != -1) {
@ -336,6 +343,7 @@ static int testCgroupNewForPartitionDomain(const void *args ATTRIBUTE_UNUSED)
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = "/production.partition/foo.libvirt-lxc",
[VIR_CGROUP_CONTROLLER_BLKIO] = "/production.partition/foo.libvirt-lxc",
[VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
};
if ((rv = virCgroupNewPartition("/production", true, -1, &partitioncgroup)) != 0) {
@ -372,6 +380,7 @@ static int testCgroupNewForPartitionDomainEscaped(const void *args ATTRIBUTE_UNU
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
[VIR_CGROUP_CONTROLLER_BLKIO] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
[VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
};
if ((rv = virCgroupNewPartition("/cgroup.evil", true, -1, &partitioncgroup1)) != 0) {