mirror of
https://gitlab.com/libvirt/libvirt.git
synced 2024-11-02 11:21:12 +00:00
fe0aac0503
This involves setting the cpuacct cgroup to a per-vcpu granularity, as well as summing the each vcpu accounting into a common array. Now that we are reading more than one cgroup file, we double-check that cpus weren't hot-plugged between reads to invalidate our summing. Signed-off-by: Eric Blake <eblake@redhat.com>
1865 lines
49 KiB
C
1865 lines
49 KiB
C
/*
|
|
* cgroup.c: Tools for managing cgroups
|
|
*
|
|
* Copyright (C) 2010-2012 Red Hat, Inc.
|
|
* Copyright IBM Corp. 2008
|
|
*
|
|
* See COPYING.LIB for the License of this software
|
|
*
|
|
* Authors:
|
|
* Dan Smith <danms@us.ibm.com>
|
|
*/
|
|
#include <config.h>
|
|
|
|
#include <stdio.h>
|
|
#include <stdint.h>
|
|
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
|
|
# include <mntent.h>
|
|
#endif
|
|
#include <fcntl.h>
|
|
#include <string.h>
|
|
#include <errno.h>
|
|
#include <stdlib.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/types.h>
|
|
#include <signal.h>
|
|
#include <libgen.h>
|
|
#include <dirent.h>
|
|
|
|
#include "internal.h"
|
|
#include "util.h"
|
|
#include "memory.h"
|
|
#include "cgroup.h"
|
|
#include "logging.h"
|
|
#include "virfile.h"
|
|
#include "virhash.h"
|
|
#include "virhashcode.h"
|
|
|
|
#define CGROUP_MAX_VAL 512
|
|
|
|
VIR_ENUM_IMPL(virCgroupController, VIR_CGROUP_CONTROLLER_LAST,
|
|
"cpu", "cpuacct", "cpuset", "memory", "devices",
|
|
"freezer", "blkio");
|
|
|
|
struct virCgroupController {
|
|
int type;
|
|
char *mountPoint;
|
|
char *placement;
|
|
};
|
|
|
|
struct virCgroup {
|
|
char *path;
|
|
|
|
struct virCgroupController controllers[VIR_CGROUP_CONTROLLER_LAST];
|
|
};
|
|
|
|
typedef enum {
|
|
VIR_CGROUP_NONE = 0, /* create subdir under each cgroup if possible. */
|
|
VIR_CGROUP_MEM_HIERACHY = 1 << 0, /* call virCgroupSetMemoryUseHierarchy
|
|
* before creating subcgroups and
|
|
* attaching tasks
|
|
*/
|
|
VIR_CGROUP_VCPU = 1 << 1, /* create subdir only under the cgroup cpu,
|
|
* cpuacct and cpuset if possible. */
|
|
} virCgroupFlags;
|
|
|
|
/**
|
|
* virCgroupFree:
|
|
*
|
|
* @group: The group structure to free
|
|
*/
|
|
void virCgroupFree(virCgroupPtr *group)
|
|
{
|
|
int i;
|
|
|
|
if (*group == NULL)
|
|
return;
|
|
|
|
for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
|
|
VIR_FREE((*group)->controllers[i].mountPoint);
|
|
VIR_FREE((*group)->controllers[i].placement);
|
|
}
|
|
|
|
VIR_FREE((*group)->path);
|
|
VIR_FREE(*group);
|
|
}
|
|
|
|
/**
|
|
* virCgroupMounted: query whether a cgroup subsystem is mounted or not
|
|
*
|
|
* @cgroup: The group structure to be queried
|
|
* @controller: cgroup subsystem id
|
|
*
|
|
* Returns true if a cgroup is subsystem is mounted.
|
|
*/
|
|
bool virCgroupMounted(virCgroupPtr cgroup, int controller)
|
|
{
|
|
return cgroup->controllers[controller].mountPoint != NULL;
|
|
}
|
|
|
|
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
|
|
/*
|
|
* Process /proc/mounts figuring out what controllers are
|
|
* mounted and where
|
|
*/
|
|
static int virCgroupDetectMounts(virCgroupPtr group)
|
|
{
|
|
int i;
|
|
FILE *mounts = NULL;
|
|
struct mntent entry;
|
|
char buf[CGROUP_MAX_VAL];
|
|
|
|
mounts = fopen("/proc/mounts", "r");
|
|
if (mounts == NULL) {
|
|
VIR_ERROR(_("Unable to open /proc/mounts"));
|
|
return -ENOENT;
|
|
}
|
|
|
|
while (getmntent_r(mounts, &entry, buf, sizeof(buf)) != NULL) {
|
|
if (STRNEQ(entry.mnt_type, "cgroup"))
|
|
continue;
|
|
|
|
for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
|
|
const char *typestr = virCgroupControllerTypeToString(i);
|
|
int typelen = strlen(typestr);
|
|
char *tmp = entry.mnt_opts;
|
|
while (tmp) {
|
|
char *next = strchr(tmp, ',');
|
|
int len;
|
|
if (next) {
|
|
len = next-tmp;
|
|
next++;
|
|
} else {
|
|
len = strlen(tmp);
|
|
}
|
|
/* NB, the same controller can appear >1 time in mount list
|
|
* due to bind mounts from one location to another. Pick the
|
|
* first entry only
|
|
*/
|
|
if (typelen == len && STREQLEN(typestr, tmp, len) &&
|
|
!group->controllers[i].mountPoint &&
|
|
!(group->controllers[i].mountPoint = strdup(entry.mnt_dir)))
|
|
goto no_memory;
|
|
tmp = next;
|
|
}
|
|
}
|
|
}
|
|
|
|
VIR_FORCE_FCLOSE(mounts);
|
|
|
|
return 0;
|
|
|
|
no_memory:
|
|
VIR_FORCE_FCLOSE(mounts);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
|
|
/*
|
|
* Process /proc/self/cgroup figuring out what cgroup
|
|
* sub-path the current process is assigned to. ie not
|
|
* neccessarily in the root
|
|
*/
|
|
static int virCgroupDetectPlacement(virCgroupPtr group)
|
|
{
|
|
int i;
|
|
FILE *mapping = NULL;
|
|
char line[1024];
|
|
|
|
mapping = fopen("/proc/self/cgroup", "r");
|
|
if (mapping == NULL) {
|
|
VIR_ERROR(_("Unable to open /proc/self/cgroup"));
|
|
return -ENOENT;
|
|
}
|
|
|
|
while (fgets(line, sizeof(line), mapping) != NULL) {
|
|
char *controllers = strchr(line, ':');
|
|
char *path = controllers ? strchr(controllers+1, ':') : NULL;
|
|
char *nl = path ? strchr(path, '\n') : NULL;
|
|
|
|
if (!controllers || !path)
|
|
continue;
|
|
|
|
if (nl)
|
|
*nl = '\0';
|
|
|
|
*path = '\0';
|
|
controllers++;
|
|
path++;
|
|
|
|
for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
|
|
const char *typestr = virCgroupControllerTypeToString(i);
|
|
int typelen = strlen(typestr);
|
|
char *tmp = controllers;
|
|
while (tmp) {
|
|
char *next = strchr(tmp, ',');
|
|
int len;
|
|
if (next) {
|
|
len = next-tmp;
|
|
next++;
|
|
} else {
|
|
len = strlen(tmp);
|
|
}
|
|
if (typelen == len && STREQLEN(typestr, tmp, len) &&
|
|
!(group->controllers[i].placement = strdup(STREQ(path, "/") ? "" : path)))
|
|
goto no_memory;
|
|
|
|
tmp = next;
|
|
}
|
|
}
|
|
}
|
|
|
|
VIR_FORCE_FCLOSE(mapping);
|
|
|
|
return 0;
|
|
|
|
no_memory:
|
|
VIR_FORCE_FCLOSE(mapping);
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
static int virCgroupDetect(virCgroupPtr group)
|
|
{
|
|
int any = 0;
|
|
int rc;
|
|
int i;
|
|
|
|
rc = virCgroupDetectMounts(group);
|
|
if (rc < 0) {
|
|
VIR_ERROR(_("Failed to detect mounts for %s"), group->path);
|
|
return rc;
|
|
}
|
|
|
|
/* Check that at least 1 controller is available */
|
|
for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
|
|
if (group->controllers[i].mountPoint != NULL)
|
|
any = 1;
|
|
}
|
|
if (!any)
|
|
return -ENXIO;
|
|
|
|
|
|
rc = virCgroupDetectPlacement(group);
|
|
|
|
if (rc == 0) {
|
|
/* Check that for every mounted controller, we found our placement */
|
|
for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
|
|
if (!group->controllers[i].mountPoint)
|
|
continue;
|
|
|
|
if (!group->controllers[i].placement) {
|
|
VIR_ERROR(_("Could not find placement for controller %s at %s"),
|
|
virCgroupControllerTypeToString(i),
|
|
group->controllers[i].placement);
|
|
rc = -ENOENT;
|
|
break;
|
|
}
|
|
|
|
VIR_DEBUG("Detected mount/mapping %i:%s at %s in %s", i,
|
|
virCgroupControllerTypeToString(i),
|
|
group->controllers[i].mountPoint,
|
|
group->controllers[i].placement);
|
|
}
|
|
} else {
|
|
VIR_ERROR(_("Failed to detect mapping for %s"), group->path);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
#endif
|
|
|
|
|
|
int virCgroupPathOfController(virCgroupPtr group,
|
|
int controller,
|
|
const char *key,
|
|
char **path)
|
|
{
|
|
if (controller == -1) {
|
|
int i;
|
|
for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
|
|
if (group->controllers[i].mountPoint &&
|
|
group->controllers[i].placement) {
|
|
controller = i;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (controller == -1)
|
|
return -ENOSYS;
|
|
|
|
if (group->controllers[controller].mountPoint == NULL)
|
|
return -ENOENT;
|
|
|
|
if (group->controllers[controller].placement == NULL)
|
|
return -ENOENT;
|
|
|
|
if (virAsprintf(path, "%s%s%s/%s",
|
|
group->controllers[controller].mountPoint,
|
|
group->controllers[controller].placement,
|
|
STREQ(group->path, "/") ? "" : group->path,
|
|
key ? key : "") == -1)
|
|
return -ENOMEM;
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static int virCgroupSetValueStr(virCgroupPtr group,
|
|
int controller,
|
|
const char *key,
|
|
const char *value)
|
|
{
|
|
int rc = 0;
|
|
char *keypath = NULL;
|
|
|
|
rc = virCgroupPathOfController(group, controller, key, &keypath);
|
|
if (rc != 0)
|
|
return rc;
|
|
|
|
VIR_DEBUG("Set value '%s' to '%s'", keypath, value);
|
|
rc = virFileWriteStr(keypath, value, 0);
|
|
if (rc < 0) {
|
|
rc = -errno;
|
|
VIR_DEBUG("Failed to write value '%s': %m", value);
|
|
} else {
|
|
rc = 0;
|
|
}
|
|
|
|
VIR_FREE(keypath);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int virCgroupGetValueStr(virCgroupPtr group,
|
|
int controller,
|
|
const char *key,
|
|
char **value)
|
|
{
|
|
int rc;
|
|
char *keypath = NULL;
|
|
|
|
*value = NULL;
|
|
|
|
rc = virCgroupPathOfController(group, controller, key, &keypath);
|
|
if (rc != 0) {
|
|
VIR_DEBUG("No path of %s, %s", group->path, key);
|
|
return rc;
|
|
}
|
|
|
|
VIR_DEBUG("Get value %s", keypath);
|
|
|
|
rc = virFileReadAll(keypath, 1024, value);
|
|
if (rc < 0) {
|
|
rc = -errno;
|
|
VIR_DEBUG("Failed to read %s: %m\n", keypath);
|
|
} else {
|
|
/* Terminated with '\n' has sometimes harmful effects to the caller */
|
|
if ((*value)[rc - 1] == '\n')
|
|
(*value)[rc - 1] = '\0';
|
|
|
|
rc = 0;
|
|
}
|
|
|
|
VIR_FREE(keypath);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int virCgroupSetValueU64(virCgroupPtr group,
|
|
int controller,
|
|
const char *key,
|
|
unsigned long long int value)
|
|
{
|
|
char *strval = NULL;
|
|
int rc;
|
|
|
|
if (virAsprintf(&strval, "%llu", value) == -1)
|
|
return -ENOMEM;
|
|
|
|
rc = virCgroupSetValueStr(group, controller, key, strval);
|
|
|
|
VIR_FREE(strval);
|
|
|
|
return rc;
|
|
}
|
|
|
|
|
|
|
|
static int virCgroupSetValueI64(virCgroupPtr group,
|
|
int controller,
|
|
const char *key,
|
|
long long int value)
|
|
{
|
|
char *strval = NULL;
|
|
int rc;
|
|
|
|
if (virAsprintf(&strval, "%lld", value) == -1)
|
|
return -ENOMEM;
|
|
|
|
rc = virCgroupSetValueStr(group, controller, key, strval);
|
|
|
|
VIR_FREE(strval);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int virCgroupGetValueI64(virCgroupPtr group,
|
|
int controller,
|
|
const char *key,
|
|
long long int *value)
|
|
{
|
|
char *strval = NULL;
|
|
int rc = 0;
|
|
|
|
rc = virCgroupGetValueStr(group, controller, key, &strval);
|
|
if (rc != 0)
|
|
goto out;
|
|
|
|
if (virStrToLong_ll(strval, NULL, 10, value) < 0)
|
|
rc = -EINVAL;
|
|
out:
|
|
VIR_FREE(strval);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int virCgroupGetValueU64(virCgroupPtr group,
|
|
int controller,
|
|
const char *key,
|
|
unsigned long long int *value)
|
|
{
|
|
char *strval = NULL;
|
|
int rc = 0;
|
|
|
|
rc = virCgroupGetValueStr(group, controller, key, &strval);
|
|
if (rc != 0)
|
|
goto out;
|
|
|
|
if (virStrToLong_ull(strval, NULL, 10, value) < 0)
|
|
rc = -EINVAL;
|
|
out:
|
|
VIR_FREE(strval);
|
|
|
|
return rc;
|
|
}
|
|
|
|
|
|
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
|
|
static int virCgroupCpuSetInherit(virCgroupPtr parent, virCgroupPtr group)
|
|
{
|
|
int i;
|
|
int rc = 0;
|
|
const char *inherit_values[] = {
|
|
"cpuset.cpus",
|
|
"cpuset.mems",
|
|
};
|
|
|
|
VIR_DEBUG("Setting up inheritance %s -> %s", parent->path, group->path);
|
|
for (i = 0; i < ARRAY_CARDINALITY(inherit_values) ; i++) {
|
|
char *value;
|
|
|
|
rc = virCgroupGetValueStr(parent,
|
|
VIR_CGROUP_CONTROLLER_CPUSET,
|
|
inherit_values[i],
|
|
&value);
|
|
if (rc != 0) {
|
|
VIR_ERROR(_("Failed to get %s %d"), inherit_values[i], rc);
|
|
break;
|
|
}
|
|
|
|
VIR_DEBUG("Inherit %s = %s", inherit_values[i], value);
|
|
|
|
rc = virCgroupSetValueStr(group,
|
|
VIR_CGROUP_CONTROLLER_CPUSET,
|
|
inherit_values[i],
|
|
value);
|
|
VIR_FREE(value);
|
|
|
|
if (rc != 0) {
|
|
VIR_ERROR(_("Failed to set %s %d"), inherit_values[i], rc);
|
|
break;
|
|
}
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int virCgroupSetMemoryUseHierarchy(virCgroupPtr group)
|
|
{
|
|
int rc = 0;
|
|
unsigned long long value;
|
|
const char *filename = "memory.use_hierarchy";
|
|
|
|
rc = virCgroupGetValueU64(group,
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
filename, &value);
|
|
if (rc != 0) {
|
|
VIR_ERROR(_("Failed to read %s/%s (%d)"), group->path, filename, rc);
|
|
return rc;
|
|
}
|
|
|
|
/* Setting twice causes error, so if already enabled, skip setting */
|
|
if (value == 1)
|
|
return 0;
|
|
|
|
VIR_DEBUG("Setting up %s/%s", group->path, filename);
|
|
rc = virCgroupSetValueU64(group,
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
filename, 1);
|
|
|
|
if (rc != 0) {
|
|
VIR_ERROR(_("Failed to set %s/%s (%d)"), group->path, filename, rc);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int virCgroupMakeGroup(virCgroupPtr parent, virCgroupPtr group,
|
|
int create, unsigned int flags)
|
|
{
|
|
int i;
|
|
int rc = 0;
|
|
|
|
VIR_DEBUG("Make group %s", group->path);
|
|
for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
|
|
char *path = NULL;
|
|
|
|
/* Skip over controllers that aren't mounted */
|
|
if (!group->controllers[i].mountPoint)
|
|
continue;
|
|
|
|
/* We need to control cpu bandwidth for each vcpu now */
|
|
if ((flags & VIR_CGROUP_VCPU) &&
|
|
(i != VIR_CGROUP_CONTROLLER_CPU &&
|
|
i != VIR_CGROUP_CONTROLLER_CPUACCT)) {
|
|
/* treat it as unmounted and we can use virCgroupAddTask */
|
|
VIR_FREE(group->controllers[i].mountPoint);
|
|
continue;
|
|
}
|
|
|
|
rc = virCgroupPathOfController(group, i, "", &path);
|
|
if (rc < 0)
|
|
return rc;
|
|
/* As of Feb 2011, clang can't see that the above function
|
|
* call did not modify group. */
|
|
sa_assert(group->controllers[i].mountPoint);
|
|
|
|
VIR_DEBUG("Make controller %s", path);
|
|
if (access(path, F_OK) != 0) {
|
|
if (!create ||
|
|
mkdir(path, 0755) < 0) {
|
|
/* With a kernel that doesn't support multi-level directory
|
|
* for blkio controller, libvirt will fail and disable all
|
|
* other controllers even though they are available. So
|
|
* treat blkio as unmounted if mkdir fails. */
|
|
if (i == VIR_CGROUP_CONTROLLER_BLKIO) {
|
|
rc = 0;
|
|
VIR_FREE(group->controllers[i].mountPoint);
|
|
VIR_FREE(path);
|
|
continue;
|
|
} else {
|
|
rc = -errno;
|
|
VIR_FREE(path);
|
|
break;
|
|
}
|
|
}
|
|
if (group->controllers[VIR_CGROUP_CONTROLLER_CPUSET].mountPoint != NULL &&
|
|
(i == VIR_CGROUP_CONTROLLER_CPUSET ||
|
|
STREQ(group->controllers[i].mountPoint, group->controllers[VIR_CGROUP_CONTROLLER_CPUSET].mountPoint))) {
|
|
rc = virCgroupCpuSetInherit(parent, group);
|
|
if (rc != 0) {
|
|
VIR_FREE(path);
|
|
break;
|
|
}
|
|
}
|
|
/*
|
|
* Note that virCgroupSetMemoryUseHierarchy should always be
|
|
* called prior to creating subcgroups and attaching tasks.
|
|
*/
|
|
if ((flags & VIR_CGROUP_MEM_HIERACHY) &&
|
|
(group->controllers[VIR_CGROUP_CONTROLLER_MEMORY].mountPoint != NULL) &&
|
|
(i == VIR_CGROUP_CONTROLLER_MEMORY ||
|
|
STREQ(group->controllers[i].mountPoint, group->controllers[VIR_CGROUP_CONTROLLER_MEMORY].mountPoint))) {
|
|
rc = virCgroupSetMemoryUseHierarchy(group);
|
|
if (rc != 0) {
|
|
VIR_FREE(path);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
VIR_FREE(path);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
|
|
static int virCgroupNew(const char *path,
|
|
virCgroupPtr *group)
|
|
{
|
|
int rc = 0;
|
|
char *typpath = NULL;
|
|
|
|
VIR_DEBUG("New group %s", path);
|
|
*group = NULL;
|
|
|
|
if (VIR_ALLOC((*group)) != 0) {
|
|
rc = -ENOMEM;
|
|
goto err;
|
|
}
|
|
|
|
if (!((*group)->path = strdup(path))) {
|
|
rc = -ENOMEM;
|
|
goto err;
|
|
}
|
|
|
|
rc = virCgroupDetect(*group);
|
|
if (rc < 0)
|
|
goto err;
|
|
|
|
return rc;
|
|
err:
|
|
virCgroupFree(group);
|
|
*group = NULL;
|
|
|
|
VIR_FREE(typpath);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int virCgroupAppRoot(int privileged,
|
|
virCgroupPtr *group,
|
|
int create)
|
|
{
|
|
virCgroupPtr rootgrp = NULL;
|
|
int rc;
|
|
|
|
rc = virCgroupNew("/", &rootgrp);
|
|
if (rc != 0)
|
|
return rc;
|
|
|
|
if (privileged) {
|
|
rc = virCgroupNew("/libvirt", group);
|
|
} else {
|
|
char *rootname;
|
|
char *username;
|
|
username = virGetUserName(getuid());
|
|
if (!username) {
|
|
rc = -ENOMEM;
|
|
goto cleanup;
|
|
}
|
|
rc = virAsprintf(&rootname, "/libvirt-%s", username);
|
|
VIR_FREE(username);
|
|
if (rc < 0) {
|
|
rc = -ENOMEM;
|
|
goto cleanup;
|
|
}
|
|
|
|
rc = virCgroupNew(rootname, group);
|
|
VIR_FREE(rootname);
|
|
}
|
|
if (rc != 0)
|
|
goto cleanup;
|
|
|
|
rc = virCgroupMakeGroup(rootgrp, *group, create, VIR_CGROUP_NONE);
|
|
|
|
cleanup:
|
|
virCgroupFree(&rootgrp);
|
|
return rc;
|
|
}
|
|
#endif
|
|
|
|
#if defined _DIRENT_HAVE_D_TYPE
|
|
static int virCgroupRemoveRecursively(char *grppath)
|
|
{
|
|
DIR *grpdir;
|
|
struct dirent *ent;
|
|
int rc = 0;
|
|
|
|
grpdir = opendir(grppath);
|
|
if (grpdir == NULL) {
|
|
if (errno == ENOENT)
|
|
return 0;
|
|
rc = -errno;
|
|
VIR_ERROR(_("Unable to open %s (%d)"), grppath, errno);
|
|
return rc;
|
|
}
|
|
|
|
for (;;) {
|
|
char *path;
|
|
|
|
errno = 0;
|
|
ent = readdir(grpdir);
|
|
if (ent == NULL) {
|
|
if ((rc = -errno))
|
|
VIR_ERROR(_("Failed to readdir for %s (%d)"), grppath, errno);
|
|
break;
|
|
}
|
|
|
|
if (ent->d_name[0] == '.') continue;
|
|
if (ent->d_type != DT_DIR) continue;
|
|
|
|
if (virAsprintf(&path, "%s/%s", grppath, ent->d_name) == -1) {
|
|
rc = -ENOMEM;
|
|
break;
|
|
}
|
|
rc = virCgroupRemoveRecursively(path);
|
|
VIR_FREE(path);
|
|
if (rc != 0)
|
|
break;
|
|
}
|
|
closedir(grpdir);
|
|
|
|
VIR_DEBUG("Removing cgroup %s", grppath);
|
|
if (rmdir(grppath) != 0 && errno != ENOENT) {
|
|
rc = -errno;
|
|
VIR_ERROR(_("Unable to remove %s (%d)"), grppath, errno);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
#else
|
|
static int virCgroupRemoveRecursively(char *grppath ATTRIBUTE_UNUSED)
|
|
{
|
|
/* Claim no support */
|
|
return -ENXIO;
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* virCgroupRemove:
|
|
*
|
|
* @group: The group to be removed
|
|
*
|
|
* It first removes all child groups recursively
|
|
* in depth first order and then removes @group
|
|
* because the presence of the child groups
|
|
* prevents removing @group.
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupRemove(virCgroupPtr group)
|
|
{
|
|
int rc = 0;
|
|
int i;
|
|
char *grppath = NULL;
|
|
|
|
for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
|
|
/* Skip over controllers not mounted */
|
|
if (!group->controllers[i].mountPoint)
|
|
continue;
|
|
|
|
if (virCgroupPathOfController(group,
|
|
i,
|
|
NULL,
|
|
&grppath) != 0)
|
|
continue;
|
|
|
|
VIR_DEBUG("Removing cgroup %s and all child cgroups", grppath);
|
|
rc = virCgroupRemoveRecursively(grppath);
|
|
VIR_FREE(grppath);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
/**
|
|
* virCgroupAddTask:
|
|
*
|
|
* @group: The cgroup to add a task to
|
|
* @pid: The pid of the task to add
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupAddTask(virCgroupPtr group, pid_t pid)
|
|
{
|
|
int rc = 0;
|
|
int i;
|
|
|
|
for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
|
|
/* Skip over controllers not mounted */
|
|
if (!group->controllers[i].mountPoint)
|
|
continue;
|
|
|
|
rc = virCgroupSetValueU64(group, i, "tasks", (unsigned long long)pid);
|
|
if (rc != 0)
|
|
break;
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
|
|
/**
|
|
* virCgroupForDriver:
|
|
*
|
|
* @name: name of this driver (e.g., xen, qemu, lxc)
|
|
* @group: Pointer to returned virCgroupPtr
|
|
*
|
|
* Returns 0 on success
|
|
*/
|
|
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
|
|
int virCgroupForDriver(const char *name,
|
|
virCgroupPtr *group,
|
|
int privileged,
|
|
int create)
|
|
{
|
|
int rc;
|
|
char *path = NULL;
|
|
virCgroupPtr rootgrp = NULL;
|
|
|
|
rc = virCgroupAppRoot(privileged, &rootgrp, create);
|
|
if (rc != 0)
|
|
goto out;
|
|
|
|
if (virAsprintf(&path, "%s/%s", rootgrp->path, name) < 0) {
|
|
rc = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
rc = virCgroupNew(path, group);
|
|
VIR_FREE(path);
|
|
|
|
if (rc == 0) {
|
|
rc = virCgroupMakeGroup(rootgrp, *group, create, VIR_CGROUP_NONE);
|
|
if (rc != 0)
|
|
virCgroupFree(group);
|
|
}
|
|
|
|
out:
|
|
virCgroupFree(&rootgrp);
|
|
|
|
return rc;
|
|
}
|
|
#else
|
|
int virCgroupForDriver(const char *name ATTRIBUTE_UNUSED,
|
|
virCgroupPtr *group ATTRIBUTE_UNUSED,
|
|
int privileged ATTRIBUTE_UNUSED,
|
|
int create ATTRIBUTE_UNUSED)
|
|
{
|
|
/* Claim no support */
|
|
return -ENXIO;
|
|
}
|
|
#endif
|
|
|
|
|
|
/**
|
|
* virCgroupForDomain:
|
|
*
|
|
* @driver: group for driver owning the domain
|
|
* @name: name of the domain
|
|
* @group: Pointer to returned virCgroupPtr
|
|
*
|
|
* Returns 0 on success
|
|
*/
|
|
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
|
|
int virCgroupForDomain(virCgroupPtr driver,
|
|
const char *name,
|
|
virCgroupPtr *group,
|
|
int create)
|
|
{
|
|
int rc;
|
|
char *path;
|
|
|
|
if (driver == NULL)
|
|
return -EINVAL;
|
|
|
|
if (virAsprintf(&path, "%s/%s", driver->path, name) < 0)
|
|
return -ENOMEM;
|
|
|
|
rc = virCgroupNew(path, group);
|
|
VIR_FREE(path);
|
|
|
|
if (rc == 0) {
|
|
/*
|
|
* Create a cgroup with memory.use_hierarchy enabled to
|
|
* surely account memory usage of lxc with ns subsystem
|
|
* enabled. (To be exact, memory and ns subsystems are
|
|
* enabled at the same time.)
|
|
*
|
|
* The reason why doing it here, not a upper group, say
|
|
* a group for driver, is to avoid overhead to track
|
|
* cumulative usage that we don't need.
|
|
*/
|
|
rc = virCgroupMakeGroup(driver, *group, create, VIR_CGROUP_MEM_HIERACHY);
|
|
if (rc != 0)
|
|
virCgroupFree(group);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
#else
|
|
int virCgroupForDomain(virCgroupPtr driver ATTRIBUTE_UNUSED,
|
|
const char *name ATTRIBUTE_UNUSED,
|
|
virCgroupPtr *group ATTRIBUTE_UNUSED,
|
|
int create ATTRIBUTE_UNUSED)
|
|
{
|
|
return -ENXIO;
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* virCgroupForVcpu:
|
|
*
|
|
* @driver: group for the domain
|
|
* @vcpuid: id of the vcpu
|
|
* @group: Pointer to returned virCgroupPtr
|
|
*
|
|
* Returns 0 on success
|
|
*/
|
|
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
|
|
int virCgroupForVcpu(virCgroupPtr driver,
|
|
int vcpuid,
|
|
virCgroupPtr *group,
|
|
int create)
|
|
{
|
|
int rc;
|
|
char *path;
|
|
|
|
if (driver == NULL)
|
|
return -EINVAL;
|
|
|
|
if (virAsprintf(&path, "%s/vcpu%d", driver->path, vcpuid) < 0)
|
|
return -ENOMEM;
|
|
|
|
rc = virCgroupNew(path, group);
|
|
VIR_FREE(path);
|
|
|
|
if (rc == 0) {
|
|
rc = virCgroupMakeGroup(driver, *group, create, VIR_CGROUP_VCPU);
|
|
if (rc != 0)
|
|
virCgroupFree(group);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
#else
|
|
int virCgroupForVcpu(virCgroupPtr driver ATTRIBUTE_UNUSED,
|
|
int vcpuid ATTRIBUTE_UNUSED,
|
|
virCgroupPtr *group ATTRIBUTE_UNUSED,
|
|
int create ATTRIBUTE_UNUSED)
|
|
{
|
|
return -ENXIO;
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* virCgroupSetBlkioWeight:
|
|
*
|
|
* @group: The cgroup to change io weight for
|
|
* @weight: The Weight for this cgroup
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupSetBlkioWeight(virCgroupPtr group, unsigned int weight)
|
|
{
|
|
if (weight > 1000 || weight < 100)
|
|
return -EINVAL;
|
|
|
|
return virCgroupSetValueU64(group,
|
|
VIR_CGROUP_CONTROLLER_BLKIO,
|
|
"blkio.weight",
|
|
weight);
|
|
}
|
|
|
|
/**
|
|
* virCgroupGetBlkioWeight:
|
|
*
|
|
* @group: The cgroup to get weight for
|
|
* @Weight: Pointer to returned weight
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupGetBlkioWeight(virCgroupPtr group, unsigned int *weight)
|
|
{
|
|
unsigned long long tmp;
|
|
int ret;
|
|
ret = virCgroupGetValueU64(group,
|
|
VIR_CGROUP_CONTROLLER_BLKIO,
|
|
"blkio.weight", &tmp);
|
|
if (ret == 0)
|
|
*weight = tmp;
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* virCgroupSetBlkioDeviceWeight:
|
|
*
|
|
* @group: The cgroup to change io device weight device for
|
|
* @path: The device with a weight to alter
|
|
* @weight: The new device weight (100-1000), or 0 to clear
|
|
*
|
|
* device_weight is treated as a write-only parameter, so
|
|
* there isn't a getter counterpart.
|
|
*
|
|
* Returns: 0 on success, -errno on failure
|
|
*/
|
|
#if defined(major) && defined(minor)
|
|
int virCgroupSetBlkioDeviceWeight(virCgroupPtr group,
|
|
const char *path,
|
|
unsigned int weight)
|
|
{
|
|
char *str;
|
|
struct stat sb;
|
|
int ret;
|
|
|
|
if (weight && (weight > 1000 || weight < 100))
|
|
return -EINVAL;
|
|
|
|
if (stat(path, &sb) < 0)
|
|
return -errno;
|
|
|
|
if (!S_ISBLK(sb.st_mode))
|
|
return -EINVAL;
|
|
|
|
if (virAsprintf(&str, "%d:%d %d", major(sb.st_rdev), minor(sb.st_rdev),
|
|
weight) < 0)
|
|
return -errno;
|
|
|
|
ret = virCgroupSetValueStr(group,
|
|
VIR_CGROUP_CONTROLLER_BLKIO,
|
|
"blkio.weight_device",
|
|
str);
|
|
VIR_FREE(str);
|
|
return ret;
|
|
}
|
|
#else
|
|
int
|
|
virCgroupSetBlkioDeviceWeight(virCgroupPtr group ATTRIBUTE_UNUSED,
|
|
const char *path ATTRIBUTE_UNUSED,
|
|
unsigned int weight ATTRIBUTE_UNUSED)
|
|
{
|
|
return -ENOSYS;
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* virCgroupSetMemory:
|
|
*
|
|
* @group: The cgroup to change memory for
|
|
* @kb: The memory amount in kilobytes
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupSetMemory(virCgroupPtr group, unsigned long long kb)
|
|
{
|
|
unsigned long long maxkb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;
|
|
|
|
if (kb > maxkb)
|
|
return -EINVAL;
|
|
else if (kb == maxkb)
|
|
return virCgroupSetValueI64(group,
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
"memory.limit_in_bytes",
|
|
-1);
|
|
else
|
|
return virCgroupSetValueU64(group,
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
"memory.limit_in_bytes",
|
|
kb << 10);
|
|
}
|
|
|
|
/**
|
|
* virCgroupGetMemoryUsage:
|
|
*
|
|
* @group: The cgroup to change memory for
|
|
* @kb: Pointer to returned used memory in kilobytes
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupGetMemoryUsage(virCgroupPtr group, unsigned long *kb)
|
|
{
|
|
long long unsigned int usage_in_bytes;
|
|
int ret;
|
|
ret = virCgroupGetValueU64(group,
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
"memory.usage_in_bytes", &usage_in_bytes);
|
|
if (ret == 0)
|
|
*kb = (unsigned long) usage_in_bytes >> 10;
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* virCgroupSetMemoryHardLimit:
|
|
*
|
|
* @group: The cgroup to change memory hard limit for
|
|
* @kb: The memory amount in kilobytes
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupSetMemoryHardLimit(virCgroupPtr group, unsigned long long kb)
|
|
{
|
|
return virCgroupSetMemory(group, kb);
|
|
}
|
|
|
|
/**
|
|
* virCgroupGetMemoryHardLimit:
|
|
*
|
|
* @group: The cgroup to get the memory hard limit for
|
|
* @kb: The memory amount in kilobytes
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupGetMemoryHardLimit(virCgroupPtr group, unsigned long long *kb)
|
|
{
|
|
long long unsigned int limit_in_bytes;
|
|
int ret;
|
|
ret = virCgroupGetValueU64(group,
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
"memory.limit_in_bytes", &limit_in_bytes);
|
|
if (ret == 0)
|
|
*kb = limit_in_bytes >> 10;
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* virCgroupSetMemorySoftLimit:
|
|
*
|
|
* @group: The cgroup to change memory soft limit for
|
|
* @kb: The memory amount in kilobytes
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupSetMemorySoftLimit(virCgroupPtr group, unsigned long long kb)
|
|
{
|
|
unsigned long long maxkb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;
|
|
|
|
if (kb > maxkb)
|
|
return -EINVAL;
|
|
else if (kb == maxkb)
|
|
return virCgroupSetValueI64(group,
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
"memory.soft_limit_in_bytes",
|
|
-1);
|
|
else
|
|
return virCgroupSetValueU64(group,
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
"memory.soft_limit_in_bytes",
|
|
kb << 10);
|
|
}
|
|
|
|
|
|
/**
|
|
* virCgroupGetMemorySoftLimit:
|
|
*
|
|
* @group: The cgroup to get the memory soft limit for
|
|
* @kb: The memory amount in kilobytes
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupGetMemorySoftLimit(virCgroupPtr group, unsigned long long *kb)
|
|
{
|
|
long long unsigned int limit_in_bytes;
|
|
int ret;
|
|
ret = virCgroupGetValueU64(group,
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
"memory.soft_limit_in_bytes", &limit_in_bytes);
|
|
if (ret == 0)
|
|
*kb = limit_in_bytes >> 10;
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* virCgroupSetMemSwapHardLimit:
|
|
*
|
|
* @group: The cgroup to change mem+swap hard limit for
|
|
* @kb: The mem+swap amount in kilobytes
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupSetMemSwapHardLimit(virCgroupPtr group, unsigned long long kb)
|
|
{
|
|
unsigned long long maxkb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;
|
|
|
|
if (kb > maxkb)
|
|
return -EINVAL;
|
|
else if (kb == maxkb)
|
|
return virCgroupSetValueI64(group,
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
"memory.memsw.limit_in_bytes",
|
|
-1);
|
|
else
|
|
return virCgroupSetValueU64(group,
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
"memory.memsw.limit_in_bytes",
|
|
kb << 10);
|
|
}
|
|
|
|
/**
|
|
* virCgroupGetMemSwapHardLimit:
|
|
*
|
|
* @group: The cgroup to get mem+swap hard limit for
|
|
* @kb: The mem+swap amount in kilobytes
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupGetMemSwapHardLimit(virCgroupPtr group, unsigned long long *kb)
|
|
{
|
|
long long unsigned int limit_in_bytes;
|
|
int ret;
|
|
ret = virCgroupGetValueU64(group,
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
"memory.memsw.limit_in_bytes", &limit_in_bytes);
|
|
if (ret == 0)
|
|
*kb = limit_in_bytes >> 10;
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* virCgroupSetCpusetMems:
|
|
*
|
|
* @group: The cgroup to set cpuset.mems for
|
|
* @mems: the numa nodes to set
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupSetCpusetMems(virCgroupPtr group, const char *mems)
|
|
{
|
|
return virCgroupSetValueStr(group,
|
|
VIR_CGROUP_CONTROLLER_CPUSET,
|
|
"cpuset.mems",
|
|
mems);
|
|
}
|
|
|
|
/**
|
|
* virCgroupGetCpusetMems:
|
|
*
|
|
* @group: The cgroup to get cpuset.mems for
|
|
* @mems: the numa nodes to get
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupGetCpusetMems(virCgroupPtr group, char **mems)
|
|
{
|
|
return virCgroupGetValueStr(group,
|
|
VIR_CGROUP_CONTROLLER_CPUSET,
|
|
"cpuset.mems",
|
|
mems);
|
|
}
|
|
|
|
/**
|
|
* virCgroupDenyAllDevices:
|
|
*
|
|
* @group: The cgroup to deny all permissions, for all devices
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupDenyAllDevices(virCgroupPtr group)
|
|
{
|
|
return virCgroupSetValueStr(group,
|
|
VIR_CGROUP_CONTROLLER_DEVICES,
|
|
"devices.deny",
|
|
"a");
|
|
}
|
|
|
|
/**
|
|
* virCgroupAllowDevice:
|
|
*
|
|
* @group: The cgroup to allow a device for
|
|
* @type: The device type (i.e., 'c' or 'b')
|
|
* @major: The major number of the device
|
|
* @minor: The minor number of the device
|
|
* @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to allow
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupAllowDevice(virCgroupPtr group, char type, int major, int minor,
|
|
int perms)
|
|
{
|
|
int rc;
|
|
char *devstr = NULL;
|
|
|
|
if (virAsprintf(&devstr, "%c %i:%i %s%s%s", type, major, minor,
|
|
perms & VIR_CGROUP_DEVICE_READ ? "r" : "",
|
|
perms & VIR_CGROUP_DEVICE_WRITE ? "w" : "",
|
|
perms & VIR_CGROUP_DEVICE_MKNOD ? "m" : "") == -1) {
|
|
rc = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
rc = virCgroupSetValueStr(group,
|
|
VIR_CGROUP_CONTROLLER_DEVICES,
|
|
"devices.allow",
|
|
devstr);
|
|
out:
|
|
VIR_FREE(devstr);
|
|
|
|
return rc;
|
|
}
|
|
|
|
/**
|
|
* virCgroupAllowDeviceMajor:
|
|
*
|
|
* @group: The cgroup to allow an entire device major type for
|
|
* @type: The device type (i.e., 'c' or 'b')
|
|
* @major: The major number of the device type
|
|
* @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to allow
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupAllowDeviceMajor(virCgroupPtr group, char type, int major,
|
|
int perms)
|
|
{
|
|
int rc;
|
|
char *devstr = NULL;
|
|
|
|
if (virAsprintf(&devstr, "%c %i:* %s%s%s", type, major,
|
|
perms & VIR_CGROUP_DEVICE_READ ? "r" : "",
|
|
perms & VIR_CGROUP_DEVICE_WRITE ? "w" : "",
|
|
perms & VIR_CGROUP_DEVICE_MKNOD ? "m" : "") == -1) {
|
|
rc = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
rc = virCgroupSetValueStr(group,
|
|
VIR_CGROUP_CONTROLLER_DEVICES,
|
|
"devices.allow",
|
|
devstr);
|
|
out:
|
|
VIR_FREE(devstr);
|
|
|
|
return rc;
|
|
}
|
|
|
|
/**
|
|
* virCgroupAllowDevicePath:
|
|
*
|
|
* @group: The cgroup to allow the device for
|
|
* @path: the device to allow
|
|
* @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to allow
|
|
*
|
|
* Queries the type of device and its major/minor number, and
|
|
* adds that to the cgroup ACL
|
|
*
|
|
* Returns: 0 on success, 1 if path exists but is not a device, or
|
|
* negative errno value on failure
|
|
*/
|
|
#if defined(major) && defined(minor)
|
|
int virCgroupAllowDevicePath(virCgroupPtr group, const char *path, int perms)
|
|
{
|
|
struct stat sb;
|
|
|
|
if (stat(path, &sb) < 0)
|
|
return -errno;
|
|
|
|
if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode))
|
|
return 1;
|
|
|
|
return virCgroupAllowDevice(group,
|
|
S_ISCHR(sb.st_mode) ? 'c' : 'b',
|
|
major(sb.st_rdev),
|
|
minor(sb.st_rdev),
|
|
perms);
|
|
}
|
|
#else
|
|
int virCgroupAllowDevicePath(virCgroupPtr group ATTRIBUTE_UNUSED,
|
|
const char *path ATTRIBUTE_UNUSED,
|
|
int perms ATTRIBUTE_UNUSED)
|
|
{
|
|
return -ENOSYS;
|
|
}
|
|
#endif
|
|
|
|
|
|
/**
|
|
* virCgroupDenyDevice:
|
|
*
|
|
* @group: The cgroup to deny a device for
|
|
* @type: The device type (i.e., 'c' or 'b')
|
|
* @major: The major number of the device
|
|
* @minor: The minor number of the device
|
|
* @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to deny
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupDenyDevice(virCgroupPtr group, char type, int major, int minor,
|
|
int perms)
|
|
{
|
|
int rc;
|
|
char *devstr = NULL;
|
|
|
|
if (virAsprintf(&devstr, "%c %i:%i %s%s%s", type, major, minor,
|
|
perms & VIR_CGROUP_DEVICE_READ ? "r" : "",
|
|
perms & VIR_CGROUP_DEVICE_WRITE ? "w" : "",
|
|
perms & VIR_CGROUP_DEVICE_MKNOD ? "m" : "") == -1) {
|
|
rc = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
rc = virCgroupSetValueStr(group,
|
|
VIR_CGROUP_CONTROLLER_DEVICES,
|
|
"devices.deny",
|
|
devstr);
|
|
out:
|
|
VIR_FREE(devstr);
|
|
|
|
return rc;
|
|
}
|
|
|
|
/**
|
|
* virCgroupDenyDeviceMajor:
|
|
*
|
|
* @group: The cgroup to deny an entire device major type for
|
|
* @type: The device type (i.e., 'c' or 'b')
|
|
* @major: The major number of the device type
|
|
* @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to deny
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupDenyDeviceMajor(virCgroupPtr group, char type, int major,
|
|
int perms)
|
|
{
|
|
int rc;
|
|
char *devstr = NULL;
|
|
|
|
if (virAsprintf(&devstr, "%c %i:* %s%s%s", type, major,
|
|
perms & VIR_CGROUP_DEVICE_READ ? "r" : "",
|
|
perms & VIR_CGROUP_DEVICE_WRITE ? "w" : "",
|
|
perms & VIR_CGROUP_DEVICE_MKNOD ? "m" : "") == -1) {
|
|
rc = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
rc = virCgroupSetValueStr(group,
|
|
VIR_CGROUP_CONTROLLER_DEVICES,
|
|
"devices.deny",
|
|
devstr);
|
|
out:
|
|
VIR_FREE(devstr);
|
|
|
|
return rc;
|
|
}
|
|
|
|
#if defined(major) && defined(minor)
|
|
int virCgroupDenyDevicePath(virCgroupPtr group, const char *path, int perms)
|
|
{
|
|
struct stat sb;
|
|
|
|
if (stat(path, &sb) < 0)
|
|
return -errno;
|
|
|
|
if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode))
|
|
return 1;
|
|
|
|
return virCgroupDenyDevice(group,
|
|
S_ISCHR(sb.st_mode) ? 'c' : 'b',
|
|
major(sb.st_rdev),
|
|
minor(sb.st_rdev),
|
|
perms);
|
|
}
|
|
#else
|
|
int virCgroupDenyDevicePath(virCgroupPtr group ATTRIBUTE_UNUSED,
|
|
const char *path ATTRIBUTE_UNUSED,
|
|
int perms ATTRIBUTE_UNUSED)
|
|
{
|
|
return -ENOSYS;
|
|
}
|
|
#endif
|
|
|
|
int virCgroupSetCpuShares(virCgroupPtr group, unsigned long long shares)
|
|
{
|
|
return virCgroupSetValueU64(group,
|
|
VIR_CGROUP_CONTROLLER_CPU,
|
|
"cpu.shares", shares);
|
|
}
|
|
|
|
int virCgroupGetCpuShares(virCgroupPtr group, unsigned long long *shares)
|
|
{
|
|
return virCgroupGetValueU64(group,
|
|
VIR_CGROUP_CONTROLLER_CPU,
|
|
"cpu.shares", shares);
|
|
}
|
|
|
|
/**
|
|
* virCgroupSetCpuCfsPeriod:
|
|
*
|
|
* @group: The cgroup to change cpu.cfs_period_us for
|
|
* @cfs_period: The bandwidth period in usecs
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupSetCpuCfsPeriod(virCgroupPtr group, unsigned long long cfs_period)
|
|
{
|
|
/* The cfs_period shoule be greater or equal than 1ms, and less or equal
|
|
* than 1s.
|
|
*/
|
|
if (cfs_period < 1000 || cfs_period > 1000000)
|
|
return -EINVAL;
|
|
|
|
return virCgroupSetValueU64(group,
|
|
VIR_CGROUP_CONTROLLER_CPU,
|
|
"cpu.cfs_period_us", cfs_period);
|
|
}
|
|
|
|
/**
|
|
* virCgroupGetCpuCfsPeriod:
|
|
*
|
|
* @group: The cgroup to get cpu.cfs_period_us for
|
|
* @cfs_period: Pointer to the returned bandwidth period in usecs
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupGetCpuCfsPeriod(virCgroupPtr group, unsigned long long *cfs_period)
|
|
{
|
|
return virCgroupGetValueU64(group,
|
|
VIR_CGROUP_CONTROLLER_CPU,
|
|
"cpu.cfs_period_us", cfs_period);
|
|
}
|
|
|
|
/**
|
|
* virCgroupSetCpuCfsQuota:
|
|
*
|
|
* @group: The cgroup to change cpu.cfs_quota_us for
|
|
* @cfs_quota: the cpu bandwidth (in usecs) that this tg will be allowed to
|
|
* consume over period
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupSetCpuCfsQuota(virCgroupPtr group, long long cfs_quota)
|
|
{
|
|
if (cfs_quota >= 0) {
|
|
/* The cfs_quota shoule be greater or equal than 1ms */
|
|
if (cfs_quota < 1000)
|
|
return -EINVAL;
|
|
|
|
/* check overflow */
|
|
if (cfs_quota > ULLONG_MAX / 1000)
|
|
return -EINVAL;
|
|
}
|
|
|
|
return virCgroupSetValueI64(group,
|
|
VIR_CGROUP_CONTROLLER_CPU,
|
|
"cpu.cfs_quota_us", cfs_quota);
|
|
}
|
|
|
|
/**
|
|
* virCgroupGetCpuCfsQuota:
|
|
*
|
|
* @group: The cgroup to get cpu.cfs_quota_us for
|
|
* @cfs_quota: Pointer to the returned cpu bandwidth (in usecs) that this tg
|
|
* will be allowed to consume over period
|
|
*
|
|
* Returns: 0 on success
|
|
*/
|
|
int virCgroupGetCpuCfsQuota(virCgroupPtr group, long long *cfs_quota)
|
|
{
|
|
return virCgroupGetValueI64(group,
|
|
VIR_CGROUP_CONTROLLER_CPU,
|
|
"cpu.cfs_quota_us", cfs_quota);
|
|
}
|
|
|
|
int virCgroupGetCpuacctUsage(virCgroupPtr group, unsigned long long *usage)
|
|
{
|
|
return virCgroupGetValueU64(group,
|
|
VIR_CGROUP_CONTROLLER_CPUACCT,
|
|
"cpuacct.usage", usage);
|
|
}
|
|
|
|
int virCgroupGetCpuacctPercpuUsage(virCgroupPtr group, char **usage)
|
|
{
|
|
return virCgroupGetValueStr(group, VIR_CGROUP_CONTROLLER_CPUACCT,
|
|
"cpuacct.usage_percpu", usage);
|
|
}
|
|
|
|
#ifdef _SC_CLK_TCK
|
|
int virCgroupGetCpuacctStat(virCgroupPtr group, unsigned long long *user,
|
|
unsigned long long *sys)
|
|
{
|
|
char *str;
|
|
char *p;
|
|
int ret;
|
|
static double scale = -1.0;
|
|
|
|
if ((ret = virCgroupGetValueStr(group, VIR_CGROUP_CONTROLLER_CPUACCT,
|
|
"cpuacct.stat", &str)) < 0)
|
|
return ret;
|
|
if (!(p = STRSKIP(str, "user ")) ||
|
|
virStrToLong_ull(p, &p, 10, user) < 0 ||
|
|
!(p = STRSKIP(p, "\nsystem ")) ||
|
|
virStrToLong_ull(p, NULL, 10, sys) < 0) {
|
|
ret = -EINVAL;
|
|
goto cleanup;
|
|
}
|
|
/* times reported are in system ticks (generally 100 Hz), but that
|
|
* rate can theoretically vary between machines. Scale things
|
|
* into approximate nanoseconds. */
|
|
if (scale < 0) {
|
|
long ticks_per_sec = sysconf(_SC_CLK_TCK);
|
|
if (ticks_per_sec == -1) {
|
|
ret = -errno;
|
|
goto cleanup;
|
|
}
|
|
scale = 1000000000.0 / ticks_per_sec;
|
|
}
|
|
*user *= scale;
|
|
*sys *= scale;
|
|
|
|
ret = 0;
|
|
cleanup:
|
|
VIR_FREE(str);
|
|
return ret;
|
|
}
|
|
#else
|
|
int virCgroupGetCpuacctStat(virCgroupPtr group ATTRIBUTE_UNUSED,
|
|
unsigned long long *user ATTRIBUTE_UNUSED,
|
|
unsigned long long *sys ATTRIBUTE_UNUSED)
|
|
{
|
|
return -ENOSYS;
|
|
}
|
|
#endif
|
|
|
|
int virCgroupSetFreezerState(virCgroupPtr group, const char *state)
|
|
{
|
|
return virCgroupSetValueStr(group,
|
|
VIR_CGROUP_CONTROLLER_FREEZER,
|
|
"freezer.state", state);
|
|
}
|
|
|
|
int virCgroupGetFreezerState(virCgroupPtr group, char **state)
|
|
{
|
|
return virCgroupGetValueStr(group,
|
|
VIR_CGROUP_CONTROLLER_FREEZER,
|
|
"freezer.state", state);
|
|
}
|
|
|
|
|
|
#if defined HAVE_KILL && defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
|
|
static int virCgroupKillInternal(virCgroupPtr group, int signum, virHashTablePtr pids)
|
|
{
|
|
int rc;
|
|
int killedAny = 0;
|
|
char *keypath = NULL;
|
|
bool done = false;
|
|
FILE *fp = NULL;
|
|
VIR_DEBUG("group=%p path=%s signum=%d pids=%p",
|
|
group, group->path, signum, pids);
|
|
|
|
rc = virCgroupPathOfController(group, -1, "tasks", &keypath);
|
|
if (rc != 0) {
|
|
VIR_DEBUG("No path of %s, tasks", group->path);
|
|
return rc;
|
|
}
|
|
|
|
/* PIDs may be forking as we kill them, so loop
|
|
* until there are no new PIDs found
|
|
*/
|
|
while (!done) {
|
|
done = true;
|
|
if (!(fp = fopen(keypath, "r"))) {
|
|
rc = -errno;
|
|
VIR_DEBUG("Failed to read %s: %m\n", keypath);
|
|
goto cleanup;
|
|
} else {
|
|
while (!feof(fp)) {
|
|
unsigned long pid_value;
|
|
if (fscanf(fp, "%lu", &pid_value) != 1) {
|
|
if (feof(fp))
|
|
break;
|
|
rc = -errno;
|
|
VIR_DEBUG("Failed to read %s: %m\n", keypath);
|
|
goto cleanup;
|
|
}
|
|
if (virHashLookup(pids, (void*)pid_value))
|
|
continue;
|
|
|
|
VIR_DEBUG("pid=%lu", pid_value);
|
|
/* Cgroups is a Linux concept, so this cast is safe. */
|
|
if (kill((pid_t)pid_value, signum) < 0) {
|
|
if (errno != ESRCH) {
|
|
rc = -errno;
|
|
goto cleanup;
|
|
}
|
|
/* Leave RC == 0 since we didn't kill one */
|
|
} else {
|
|
killedAny = 1;
|
|
done = false;
|
|
}
|
|
|
|
ignore_value(virHashAddEntry(pids, (void*)pid_value, (void*)1));
|
|
}
|
|
VIR_FORCE_FCLOSE(fp);
|
|
}
|
|
}
|
|
|
|
rc = killedAny ? 1 : 0;
|
|
|
|
cleanup:
|
|
VIR_FREE(keypath);
|
|
VIR_FORCE_FCLOSE(fp);
|
|
|
|
return rc;
|
|
}
|
|
|
|
|
|
static uint32_t virCgroupPidCode(const void *name, uint32_t seed)
|
|
{
|
|
unsigned long pid_value = (unsigned long)(intptr_t)name;
|
|
return virHashCodeGen(&pid_value, sizeof(pid_value), seed);
|
|
}
|
|
static bool virCgroupPidEqual(const void *namea, const void *nameb)
|
|
{
|
|
return namea == nameb;
|
|
}
|
|
static void *virCgroupPidCopy(const void *name)
|
|
{
|
|
return (void*)name;
|
|
}
|
|
|
|
/*
|
|
* Returns
|
|
* < 0 : errno that occurred
|
|
* 0 : no PIDs killed
|
|
* 1 : at least one PID killed
|
|
*/
|
|
int virCgroupKill(virCgroupPtr group, int signum)
|
|
{
|
|
VIR_DEBUG("group=%p path=%s signum=%d", group, group->path, signum);
|
|
int rc;
|
|
/* The 'tasks' file in cgroups can contain duplicated
|
|
* pids, so we use a hash to track which we've already
|
|
* killed.
|
|
*/
|
|
virHashTablePtr pids = virHashCreateFull(100,
|
|
NULL,
|
|
virCgroupPidCode,
|
|
virCgroupPidEqual,
|
|
virCgroupPidCopy,
|
|
NULL);
|
|
|
|
rc = virCgroupKillInternal(group, signum, pids);
|
|
|
|
virHashFree(pids);
|
|
|
|
return rc;
|
|
}
|
|
|
|
|
|
static int virCgroupKillRecursiveInternal(virCgroupPtr group, int signum, virHashTablePtr pids, bool dormdir)
|
|
{
|
|
int rc;
|
|
int killedAny = 0;
|
|
char *keypath = NULL;
|
|
DIR *dp;
|
|
virCgroupPtr subgroup = NULL;
|
|
struct dirent *ent;
|
|
VIR_DEBUG("group=%p path=%s signum=%d pids=%p", group, group->path, signum, pids);
|
|
|
|
rc = virCgroupPathOfController(group, -1, "", &keypath);
|
|
if (rc != 0) {
|
|
VIR_DEBUG("No path of %s, tasks", group->path);
|
|
return rc;
|
|
}
|
|
|
|
if ((rc = virCgroupKillInternal(group, signum, pids)) != 0)
|
|
return rc;
|
|
|
|
VIR_DEBUG("Iterate over children of %s", keypath);
|
|
if (!(dp = opendir(keypath))) {
|
|
rc = -errno;
|
|
return rc;
|
|
}
|
|
|
|
while ((ent = readdir(dp))) {
|
|
char *subpath;
|
|
|
|
if (STREQ(ent->d_name, "."))
|
|
continue;
|
|
if (STREQ(ent->d_name, ".."))
|
|
continue;
|
|
if (ent->d_type != DT_DIR)
|
|
continue;
|
|
|
|
VIR_DEBUG("Process subdir %s", ent->d_name);
|
|
if (virAsprintf(&subpath, "%s/%s", group->path, ent->d_name) < 0) {
|
|
rc = -ENOMEM;
|
|
goto cleanup;
|
|
}
|
|
|
|
if ((rc = virCgroupNew(subpath, &subgroup)) != 0)
|
|
goto cleanup;
|
|
|
|
if ((rc = virCgroupKillRecursiveInternal(subgroup, signum, pids, true)) < 0)
|
|
goto cleanup;
|
|
if (rc == 1)
|
|
killedAny = 1;
|
|
|
|
if (dormdir)
|
|
virCgroupRemove(subgroup);
|
|
|
|
virCgroupFree(&subgroup);
|
|
}
|
|
|
|
rc = killedAny;
|
|
|
|
cleanup:
|
|
virCgroupFree(&subgroup);
|
|
closedir(dp);
|
|
|
|
return rc;
|
|
}
|
|
|
|
int virCgroupKillRecursive(virCgroupPtr group, int signum)
|
|
{
|
|
int rc;
|
|
VIR_DEBUG("group=%p path=%s signum=%d", group, group->path, signum);
|
|
virHashTablePtr pids = virHashCreateFull(100,
|
|
NULL,
|
|
virCgroupPidCode,
|
|
virCgroupPidEqual,
|
|
virCgroupPidCopy,
|
|
NULL);
|
|
|
|
rc = virCgroupKillRecursiveInternal(group, signum, pids, false);
|
|
|
|
virHashFree(pids);
|
|
|
|
return rc;
|
|
}
|
|
|
|
|
|
int virCgroupKillPainfully(virCgroupPtr group)
|
|
{
|
|
int i;
|
|
int rc;
|
|
VIR_DEBUG("cgroup=%p path=%s", group, group->path);
|
|
for (i = 0 ; i < 15 ; i++) {
|
|
int signum;
|
|
if (i == 0)
|
|
signum = SIGTERM;
|
|
else if (i == 8)
|
|
signum = SIGKILL;
|
|
else
|
|
signum = 0; /* Just check for existance */
|
|
|
|
rc = virCgroupKillRecursive(group, signum);
|
|
VIR_DEBUG("Iteration %d rc=%d", i, rc);
|
|
/* If rc == -1 we hit error, if 0 we ran out of PIDs */
|
|
if (rc <= 0)
|
|
break;
|
|
|
|
usleep(200 * 1000);
|
|
}
|
|
VIR_DEBUG("Complete %d", rc);
|
|
return rc;
|
|
}
|
|
|
|
#else /* !(HAVE_KILL, HAVE_MNTENT_H, HAVE_GETMNTENT_R) */
|
|
int virCgroupKill(virCgroupPtr group ATTRIBUTE_UNUSED,
|
|
int signum ATTRIBUTE_UNUSED)
|
|
{
|
|
return -ENOSYS;
|
|
}
|
|
int virCgroupKillRecursive(virCgroupPtr group ATTRIBUTE_UNUSED,
|
|
int signum ATTRIBUTE_UNUSED)
|
|
{
|
|
return -ENOSYS;
|
|
}
|
|
|
|
int virCgroupKillPainfully(virCgroupPtr group ATTRIBUTE_UNUSED)
|
|
{
|
|
return -ENOSYS;
|
|
}
|
|
#endif /* HAVE_KILL, HAVE_MNTENT_H, HAVE_GETMNTENT_R */
|