Do proper escaping of cgroup resource partitions

If a user cgroup name begins with "cgroup.", "_" or with any of
the controllers from /proc/cgroups followed by a dot, then they
need to be prefixed with a single underscore. eg if there is
an object "cpu.service", then this would end up as "_cpu.service"
in the cgroup filesystem tree, however, "waldo.service" would
stay "waldo.service", at least as long as nobody comes up with
a cgroup controller called "waldo".

Since we require a '.XXXX' suffix on all partitions, there is
no scope for clashing with the kernel 'tasks' and 'release_agent'
files.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
This commit is contained in:
Daniel P. Berrange 2013-04-26 10:50:24 +01:00
parent 9ddfe7eea6
commit f3662737b1
3 changed files with 160 additions and 4 deletions

View File

@ -1092,6 +1092,81 @@ cleanup:
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
static int virCgroupPartitionNeedsEscaping(const char *path)
{
FILE *fp = NULL;
int ret = 0;
char *line = NULL;
size_t len;
/* If it starts with 'cgroup.' or a '_' of any
* of the controller names from /proc/cgroups,
* then we must prefix a '_'
*/
if (STRPREFIX(path, "cgroup."))
return 1;
if (path[0] == '_')
return 1;
if (!(fp = fopen("/proc/cgroups", "r")))
return -errno;
/*
* Data looks like this:
* #subsys_name hierarchy num_cgroups enabled
* cpuset 2 4 1
* cpu 3 48 1
* cpuacct 3 48 1
* memory 4 4 1
* devices 5 4 1
* freezer 6 4 1
* net_cls 7 1 1
*/
while (getline(&line, &len, fp) > 0) {
if (STRPREFIX(line, "#subsys_name")) {
VIR_FREE(line);
continue;
}
char *tmp = strchr(line, ' ');
if (tmp)
*tmp = '\0';
len = tmp - line;
if (STRPREFIX(path, line) &&
path[len] == '.') {
ret = 1;
VIR_FREE(line);
goto cleanup;
}
VIR_FREE(line);
}
if (ferror(fp)) {
ret = -EIO;
goto cleanup;
}
cleanup:
VIR_FORCE_FCLOSE(fp);
return ret;
}
static int virCgroupPartitionEscape(char **path)
{
size_t len = strlen(*path) + 1;
int rc;
char escape = '_';
if ((rc = virCgroupPartitionNeedsEscaping(*path)) <= 0)
return rc;
if (VIR_INSERT_ELEMENT(*path, 0, len, escape) < 0)
return -ENOMEM;
return 0;
}
static char *virCgroupSetPartitionSuffix(const char *path)
{
char **tokens = virStringSplit(path, "/", 0);
@ -1123,6 +1198,11 @@ static char *virCgroupSetPartitionSuffix(const char *path)
}
strcat(tokens[i], ".partition");
}
if (virCgroupPartitionEscape(&(tokens[i])) < 0) {
virReportOOMError();
goto cleanup;
}
}
if (!(ret = virStringJoin((const char **)tokens, "/")))
@ -1352,6 +1432,9 @@ int virCgroupNewDomainPartition(virCgroupPtr partition,
name, driver) < 0)
return -ENOMEM;
if ((rc = virCgroupPartitionEscape(&grpname)) < 0)
return rc;
rc = virCgroupNew(grpname, partition, -1, group);
if (rc == 0) {

View File

@ -66,7 +66,7 @@ static char *fakesysfsdir;
* Co-mounting cpu & cpuacct controllers
* An anonymous controller for systemd
*/
const char *mounts =
const char *procmounts =
"rootfs / rootfs rw 0 0\n"
"tmpfs /run tmpfs rw,seclabel,nosuid,nodev,mode=755 0 0\n"
"tmpfs /not/really/sys/fs/cgroup tmpfs rw,seclabel,nosuid,nodev,noexec,mode=755 0 0\n"
@ -79,7 +79,7 @@ const char *mounts =
"/dev/sda1 /boot ext4 rw,seclabel,relatime,data=ordered 0 0\n"
"tmpfs /tmp tmpfs rw,seclabel,relatime,size=1024000k 0 0\n";
const char *cgroups =
const char *procselfcgroups =
"115:memory:/\n"
"8:blkio:/\n"
"6:freezer:/\n"
@ -87,6 +87,17 @@ const char *cgroups =
"2:cpuset:/\n"
"1:name=systemd:/user/berrange/123\n";
const char *proccgroups =
"#subsys_name hierarchy num_cgroups enabled\n"
"cpuset 2 4 1\n"
"cpu 3 48 1\n"
"cpuacct 3 48 1\n"
"memory 4 4 1\n"
"devices 5 4 1\n"
"freezer 6 4 1\n"
"blkio 8 4 1\n";
static int make_file(const char *path,
const char *name,
const char *value)
@ -366,7 +377,15 @@ FILE *fopen(const char *path, const char *mode)
if (STREQ(path, "/proc/mounts")) {
if (STREQ(mode, "r")) {
return fmemopen((void *)mounts, strlen(mounts), mode);
return fmemopen((void *)procmounts, strlen(procmounts), mode);
} else {
errno = EACCES;
return NULL;
}
}
if (STREQ(path, "/proc/cgroups")) {
if (STREQ(mode, "r")) {
return fmemopen((void *)proccgroups, strlen(proccgroups), mode);
} else {
errno = EACCES;
return NULL;
@ -374,7 +393,7 @@ FILE *fopen(const char *path, const char *mode)
}
if (STREQ(path, "/proc/self/cgroup")) {
if (STREQ(mode, "r")) {
return fmemopen((void *)cgroups, strlen(cgroups), mode);
return fmemopen((void *)procselfcgroups, strlen(procselfcgroups), mode);
} else {
errno = EACCES;
return NULL;

View File

@ -441,6 +441,58 @@ cleanup:
return ret;
}
static int testCgroupNewForPartitionDomainEscaped(const void *args ATTRIBUTE_UNUSED)
{
virCgroupPtr partitioncgroup1 = NULL;
virCgroupPtr partitioncgroup2 = NULL;
virCgroupPtr partitioncgroup3 = NULL;
virCgroupPtr domaincgroup = NULL;
int ret = -1;
int rv;
const char *placement[VIR_CGROUP_CONTROLLER_LAST] = {
[VIR_CGROUP_CONTROLLER_CPU] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
[VIR_CGROUP_CONTROLLER_CPUACCT] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
[VIR_CGROUP_CONTROLLER_CPUSET] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
[VIR_CGROUP_CONTROLLER_MEMORY] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
[VIR_CGROUP_CONTROLLER_BLKIO] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
};
if ((rv = virCgroupNewPartition("/cgroup.evil", true, -1, &partitioncgroup1)) != 0) {
fprintf(stderr, "Failed to create /cgroup.evil cgroup: %d\n", -rv);
goto cleanup;
}
if ((rv = virCgroupNewPartition("/cgroup.evil/net_cls.evil", true, -1, &partitioncgroup2)) != 0) {
fprintf(stderr, "Failed to create /cgroup.evil/cpu.evil cgroup: %d\n", -rv);
goto cleanup;
}
if ((rv = virCgroupNewPartition("/cgroup.evil/net_cls.evil/_evil.evil", true, -1, &partitioncgroup3)) != 0) {
fprintf(stderr, "Failed to create /cgroup.evil cgroup: %d\n", -rv);
goto cleanup;
}
if ((rv = virCgroupNewDomainPartition(partitioncgroup3, "lxc", "cpu.foo", true, &domaincgroup)) != 0) {
fprintf(stderr, "Cannot create LXC cgroup: %d\n", -rv);
goto cleanup;
}
/* NB we're not expecting 'net_cls.evil' to be escaped,
* since our fake /proc/cgroups pretends this controller
* isn't compiled into the kernel
*/
ret = validateCgroup(domaincgroup, "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc", mountsFull, links, placement);
cleanup:
virCgroupFree(&partitioncgroup3);
virCgroupFree(&partitioncgroup2);
virCgroupFree(&partitioncgroup1);
virCgroupFree(&domaincgroup);
return ret;
}
# define FAKESYSFSDIRTEMPLATE abs_builddir "/fakesysfsdir-XXXXXX"
static int
@ -482,6 +534,8 @@ mymain(void)
if (virtTestRun("New cgroup for domain partition", 1, testCgroupNewForPartitionDomain, NULL) < 0)
ret = -1;
if (virtTestRun("New cgroup for domain partition escaped", 1, testCgroupNewForPartitionDomainEscaped, NULL) < 0)
ret = -1;
if (getenv("LIBVIRT_SKIP_CLEANUP") == NULL)
virFileDeleteTree(fakesysfsdir);