2008-10-03 17:58:02 +00:00
|
|
|
/*
|
2012-12-03 15:03:47 +00:00
|
|
|
* vircgroup.c: methods for managing control cgroups
|
2008-10-03 17:58:02 +00:00
|
|
|
*
|
2013-04-25 20:24:42 +00:00
|
|
|
* Copyright (C) 2010-2013 Red Hat, Inc.
|
2008-10-03 17:58:02 +00:00
|
|
|
* Copyright IBM Corp. 2008
|
|
|
|
*
|
2012-07-27 09:39:53 +00:00
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
2012-09-20 22:30:55 +00:00
|
|
|
* License along with this library. If not, see
|
2012-07-27 09:39:53 +00:00
|
|
|
* <http://www.gnu.org/licenses/>.
|
2008-10-03 17:58:02 +00:00
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Dan Smith <danms@us.ibm.com>
|
|
|
|
*/
|
|
|
|
#include <config.h>
|
|
|
|
|
|
|
|
#include <stdio.h>
|
2010-04-23 09:34:17 +00:00
|
|
|
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
|
2010-03-09 18:22:22 +00:00
|
|
|
# include <mntent.h>
|
2009-10-07 10:18:31 +00:00
|
|
|
#endif
|
2013-04-05 11:48:47 +00:00
|
|
|
#if defined HAVE_SYS_MOUNT_H
|
|
|
|
# include <sys/mount.h>
|
|
|
|
#endif
|
2008-10-03 17:58:02 +00:00
|
|
|
#include <fcntl.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/types.h>
|
2011-02-22 17:33:59 +00:00
|
|
|
#include <signal.h>
|
2010-06-23 16:00:55 +00:00
|
|
|
#include <dirent.h>
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2013-03-28 14:32:23 +00:00
|
|
|
#define __VIR_CGROUP_ALLOW_INCLUDE_PRIV_H__
|
|
|
|
#include "vircgrouppriv.h"
|
|
|
|
|
2012-12-13 17:44:57 +00:00
|
|
|
#include "virutil.h"
|
2012-12-12 18:06:53 +00:00
|
|
|
#include "viralloc.h"
|
2013-04-05 11:48:47 +00:00
|
|
|
#include "virerror.h"
|
2012-12-12 17:59:27 +00:00
|
|
|
#include "virlog.h"
|
2011-07-19 18:32:58 +00:00
|
|
|
#include "virfile.h"
|
2012-01-25 16:13:59 +00:00
|
|
|
#include "virhash.h"
|
2012-01-18 16:10:43 +00:00
|
|
|
#include "virhashcode.h"
|
2013-04-26 09:23:51 +00:00
|
|
|
#include "virstring.h"
|
2008-10-03 17:58:02 +00:00
|
|
|
|
|
|
|
#define CGROUP_MAX_VAL 512
|
|
|
|
|
2013-04-05 11:48:47 +00:00
|
|
|
#define VIR_FROM_THIS VIR_FROM_CGROUP
|
|
|
|
|
2009-07-09 13:09:38 +00:00
|
|
|
VIR_ENUM_IMPL(virCgroupController, VIR_CGROUP_CONTROLLER_LAST,
|
2009-09-21 14:31:22 +00:00
|
|
|
"cpu", "cpuacct", "cpuset", "memory", "devices",
|
2013-04-15 14:17:33 +00:00
|
|
|
"freezer", "blkio", "net_cls", "perf_event");
|
2009-07-09 13:10:41 +00:00
|
|
|
|
2011-07-21 07:12:55 +00:00
|
|
|
typedef enum {
|
|
|
|
VIR_CGROUP_NONE = 0, /* create subdir under each cgroup if possible. */
|
|
|
|
VIR_CGROUP_MEM_HIERACHY = 1 << 0, /* call virCgroupSetMemoryUseHierarchy
|
|
|
|
* before creating subcgroups and
|
|
|
|
* attaching tasks
|
|
|
|
*/
|
|
|
|
} virCgroupFlags;
|
|
|
|
|
2008-10-03 17:58:02 +00:00
|
|
|
/**
|
|
|
|
* virCgroupFree:
|
|
|
|
*
|
|
|
|
* @group: The group structure to free
|
|
|
|
*/
|
|
|
|
void virCgroupFree(virCgroupPtr *group)
|
|
|
|
{
|
2009-07-09 13:10:41 +00:00
|
|
|
int i;
|
|
|
|
|
|
|
|
if (*group == NULL)
|
|
|
|
return;
|
|
|
|
|
2013-05-21 07:58:16 +00:00
|
|
|
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
|
2009-07-09 13:10:41 +00:00
|
|
|
VIR_FREE((*group)->controllers[i].mountPoint);
|
2013-04-05 10:28:04 +00:00
|
|
|
VIR_FREE((*group)->controllers[i].linkPoint);
|
2009-07-09 13:10:41 +00:00
|
|
|
VIR_FREE((*group)->controllers[i].placement);
|
2008-10-03 17:58:02 +00:00
|
|
|
}
|
2009-07-09 13:10:41 +00:00
|
|
|
|
|
|
|
VIR_FREE((*group)->path);
|
|
|
|
VIR_FREE(*group);
|
2008-10-03 17:58:02 +00:00
|
|
|
}
|
|
|
|
|
2010-10-29 09:32:16 +00:00
|
|
|
/**
|
2013-03-21 13:38:31 +00:00
|
|
|
* virCgroupHasController: query whether a cgroup controller is present
|
2010-10-29 09:32:16 +00:00
|
|
|
*
|
2013-03-21 13:38:31 +00:00
|
|
|
* @cgroup: The group structure to be queried, or NULL
|
2010-10-29 09:32:16 +00:00
|
|
|
* @controller: cgroup subsystem id
|
|
|
|
*
|
2013-03-21 13:38:31 +00:00
|
|
|
* Returns true if a cgroup controller is mounted and is associated
|
|
|
|
* with this cgroup object.
|
2010-10-29 09:32:16 +00:00
|
|
|
*/
|
2013-03-21 13:38:31 +00:00
|
|
|
bool virCgroupHasController(virCgroupPtr cgroup, int controller)
|
2010-10-29 09:32:16 +00:00
|
|
|
{
|
2013-03-21 13:38:31 +00:00
|
|
|
if (!cgroup)
|
|
|
|
return false;
|
|
|
|
if (controller < 0 || controller >= VIR_CGROUP_CONTROLLER_LAST)
|
|
|
|
return false;
|
2010-10-29 09:32:16 +00:00
|
|
|
return cgroup->controllers[controller].mountPoint != NULL;
|
|
|
|
}
|
|
|
|
|
2010-04-23 09:34:17 +00:00
|
|
|
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
|
2013-03-22 11:11:34 +00:00
|
|
|
static int virCgroupCopyMounts(virCgroupPtr group,
|
|
|
|
virCgroupPtr parent)
|
|
|
|
{
|
|
|
|
int i;
|
2013-05-21 07:58:16 +00:00
|
|
|
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
|
2013-03-22 11:11:34 +00:00
|
|
|
if (!parent->controllers[i].mountPoint)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
group->controllers[i].mountPoint =
|
|
|
|
strdup(parent->controllers[i].mountPoint);
|
|
|
|
|
|
|
|
if (!group->controllers[i].mountPoint)
|
|
|
|
return -ENOMEM;
|
2013-04-05 10:28:04 +00:00
|
|
|
|
|
|
|
if (parent->controllers[i].linkPoint) {
|
|
|
|
group->controllers[i].linkPoint =
|
|
|
|
strdup(parent->controllers[i].linkPoint);
|
|
|
|
|
|
|
|
if (!group->controllers[i].linkPoint)
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
2013-03-22 11:11:34 +00:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
/*
|
|
|
|
* Process /proc/mounts figuring out what controllers are
|
|
|
|
* mounted and where
|
|
|
|
*/
|
|
|
|
static int virCgroupDetectMounts(virCgroupPtr group)
|
2008-10-03 17:58:02 +00:00
|
|
|
{
|
2009-07-09 13:10:41 +00:00
|
|
|
int i;
|
2009-03-16 10:41:37 +00:00
|
|
|
FILE *mounts = NULL;
|
2008-10-03 17:58:02 +00:00
|
|
|
struct mntent entry;
|
|
|
|
char buf[CGROUP_MAX_VAL];
|
|
|
|
|
|
|
|
mounts = fopen("/proc/mounts", "r");
|
|
|
|
if (mounts == NULL) {
|
2011-05-09 09:24:09 +00:00
|
|
|
VIR_ERROR(_("Unable to open /proc/mounts"));
|
2009-07-09 13:10:41 +00:00
|
|
|
return -ENOENT;
|
2008-10-03 17:58:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
while (getmntent_r(mounts, &entry, buf, sizeof(buf)) != NULL) {
|
2009-07-09 13:10:41 +00:00
|
|
|
if (STRNEQ(entry.mnt_type, "cgroup"))
|
|
|
|
continue;
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2013-05-21 07:58:16 +00:00
|
|
|
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
|
2009-07-09 13:10:41 +00:00
|
|
|
const char *typestr = virCgroupControllerTypeToString(i);
|
|
|
|
int typelen = strlen(typestr);
|
|
|
|
char *tmp = entry.mnt_opts;
|
|
|
|
while (tmp) {
|
|
|
|
char *next = strchr(tmp, ',');
|
|
|
|
int len;
|
|
|
|
if (next) {
|
|
|
|
len = next-tmp;
|
|
|
|
next++;
|
|
|
|
} else {
|
|
|
|
len = strlen(tmp);
|
|
|
|
}
|
2011-08-31 15:46:13 +00:00
|
|
|
/* NB, the same controller can appear >1 time in mount list
|
|
|
|
* due to bind mounts from one location to another. Pick the
|
|
|
|
* first entry only
|
|
|
|
*/
|
2009-07-09 13:10:41 +00:00
|
|
|
if (typelen == len && STREQLEN(typestr, tmp, len) &&
|
2013-04-05 10:28:04 +00:00
|
|
|
!group->controllers[i].mountPoint) {
|
|
|
|
char *linksrc;
|
|
|
|
struct stat sb;
|
|
|
|
char *tmp2;
|
|
|
|
|
|
|
|
if (!(group->controllers[i].mountPoint = strdup(entry.mnt_dir)))
|
|
|
|
goto no_memory;
|
|
|
|
|
|
|
|
tmp2 = strrchr(entry.mnt_dir, '/');
|
|
|
|
if (!tmp2) {
|
|
|
|
errno = EINVAL;
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
*tmp2 = '\0';
|
|
|
|
/* If it is a co-mount it has a filename like "cpu,cpuacct"
|
|
|
|
* and we must identify the symlink path */
|
|
|
|
if (strchr(tmp2 + 1, ',')) {
|
|
|
|
if (virAsprintf(&linksrc, "%s/%s",
|
|
|
|
entry.mnt_dir, typestr) < 0)
|
|
|
|
goto no_memory;
|
|
|
|
*tmp2 = '/';
|
|
|
|
|
|
|
|
if (lstat(linksrc, &sb) < 0) {
|
|
|
|
if (errno == ENOENT) {
|
|
|
|
VIR_WARN("Controller %s co-mounted at %s is missing symlink at %s",
|
|
|
|
typestr, entry.mnt_dir, linksrc);
|
|
|
|
VIR_FREE(linksrc);
|
|
|
|
} else {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (!S_ISLNK(sb.st_mode)) {
|
|
|
|
VIR_WARN("Expecting a symlink at %s for controller %s",
|
|
|
|
linksrc, typestr);
|
|
|
|
} else {
|
|
|
|
group->controllers[i].linkPoint = linksrc;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2009-07-09 13:10:41 +00:00
|
|
|
tmp = next;
|
|
|
|
}
|
|
|
|
}
|
2008-10-03 17:58:02 +00:00
|
|
|
}
|
|
|
|
|
2010-11-17 02:13:29 +00:00
|
|
|
VIR_FORCE_FCLOSE(mounts);
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
return 0;
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
no_memory:
|
2013-04-05 10:28:04 +00:00
|
|
|
errno = ENOMEM;
|
|
|
|
error:
|
2010-11-17 02:13:29 +00:00
|
|
|
VIR_FORCE_FCLOSE(mounts);
|
2013-04-05 10:28:04 +00:00
|
|
|
return -errno;
|
2008-10-03 17:58:02 +00:00
|
|
|
}
|
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
|
2013-03-22 11:11:34 +00:00
|
|
|
static int virCgroupCopyPlacement(virCgroupPtr group,
|
|
|
|
const char *path,
|
|
|
|
virCgroupPtr parent)
|
|
|
|
{
|
|
|
|
int i;
|
2013-05-21 07:58:16 +00:00
|
|
|
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
|
2013-03-22 11:11:34 +00:00
|
|
|
if (!group->controllers[i].mountPoint)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (path[0] == '/') {
|
|
|
|
if (!(group->controllers[i].placement = strdup(path)))
|
|
|
|
return -ENOMEM;
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* parent=="/" + path="" => "/"
|
|
|
|
* parent=="/libvirt.service" + path=="" => "/libvirt.service"
|
|
|
|
* parent=="/libvirt.service" + path=="foo" => "/libvirt.service/foo"
|
|
|
|
*/
|
|
|
|
if (virAsprintf(&group->controllers[i].placement,
|
|
|
|
"%s%s%s",
|
|
|
|
parent->controllers[i].placement,
|
|
|
|
(STREQ(parent->controllers[i].placement, "/") ||
|
|
|
|
STREQ(path, "") ? "" : "/"),
|
|
|
|
path) < 0)
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
/*
|
2013-03-22 11:11:34 +00:00
|
|
|
* virCgroupDetectPlacement:
|
|
|
|
* @group: the group to process
|
|
|
|
* @path: the relative path to append, not starting with '/'
|
|
|
|
*
|
2009-07-09 13:10:41 +00:00
|
|
|
* Process /proc/self/cgroup figuring out what cgroup
|
|
|
|
* sub-path the current process is assigned to. ie not
|
2013-03-22 11:11:34 +00:00
|
|
|
* necessarily in the root. The contents of this file
|
|
|
|
* looks like
|
|
|
|
*
|
|
|
|
* 9:perf_event:/
|
|
|
|
* 8:blkio:/
|
|
|
|
* 7:net_cls:/
|
|
|
|
* 6:freezer:/
|
|
|
|
* 5:devices:/
|
|
|
|
* 4:memory:/
|
|
|
|
* 3:cpuacct,cpu:/
|
|
|
|
* 2:cpuset:/
|
|
|
|
* 1:name=systemd:/user/berrange/2
|
|
|
|
*
|
|
|
|
* It then appends @path to each detected path.
|
2008-10-03 17:58:02 +00:00
|
|
|
*/
|
2013-03-22 11:11:34 +00:00
|
|
|
static int virCgroupDetectPlacement(virCgroupPtr group,
|
|
|
|
const char *path)
|
2008-10-03 17:58:02 +00:00
|
|
|
{
|
|
|
|
int i;
|
2009-07-09 13:10:41 +00:00
|
|
|
FILE *mapping = NULL;
|
|
|
|
char line[1024];
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
mapping = fopen("/proc/self/cgroup", "r");
|
|
|
|
if (mapping == NULL) {
|
2011-05-09 09:24:09 +00:00
|
|
|
VIR_ERROR(_("Unable to open /proc/self/cgroup"));
|
2009-07-09 13:10:41 +00:00
|
|
|
return -ENOENT;
|
2008-10-03 17:58:02 +00:00
|
|
|
}
|
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
while (fgets(line, sizeof(line), mapping) != NULL) {
|
|
|
|
char *controllers = strchr(line, ':');
|
2013-03-22 11:11:34 +00:00
|
|
|
char *selfpath = controllers ? strchr(controllers + 1, ':') : NULL;
|
|
|
|
char *nl = selfpath ? strchr(selfpath, '\n') : NULL;
|
2009-07-09 13:10:41 +00:00
|
|
|
|
2013-03-22 11:11:34 +00:00
|
|
|
if (!controllers || !selfpath)
|
2009-07-09 13:10:41 +00:00
|
|
|
continue;
|
|
|
|
|
|
|
|
if (nl)
|
|
|
|
*nl = '\0';
|
|
|
|
|
2013-03-22 11:11:34 +00:00
|
|
|
*selfpath = '\0';
|
2009-07-09 13:10:41 +00:00
|
|
|
controllers++;
|
2013-03-22 11:11:34 +00:00
|
|
|
selfpath++;
|
2009-07-09 13:10:41 +00:00
|
|
|
|
2013-05-21 07:58:16 +00:00
|
|
|
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
|
2009-07-09 13:10:41 +00:00
|
|
|
const char *typestr = virCgroupControllerTypeToString(i);
|
|
|
|
int typelen = strlen(typestr);
|
|
|
|
char *tmp = controllers;
|
|
|
|
while (tmp) {
|
|
|
|
char *next = strchr(tmp, ',');
|
|
|
|
int len;
|
|
|
|
if (next) {
|
2013-03-22 11:11:34 +00:00
|
|
|
len = next - tmp;
|
2009-07-09 13:10:41 +00:00
|
|
|
next++;
|
|
|
|
} else {
|
|
|
|
len = strlen(tmp);
|
|
|
|
}
|
2013-03-22 11:11:34 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* selfpath=="/" + path="" -> "/"
|
|
|
|
* selfpath=="/libvirt.service" + path="" -> "/libvirt.service"
|
|
|
|
* selfpath=="/libvirt.service" + path="foo" -> "/libvirt.service/foo"
|
|
|
|
*/
|
|
|
|
if (typelen == len && STREQLEN(typestr, tmp, len)) {
|
|
|
|
if (virAsprintf(&group->controllers[i].placement,
|
|
|
|
"%s%s%s", selfpath,
|
|
|
|
(STREQ(selfpath, "/") ||
|
|
|
|
STREQ(path, "") ? "" : "/"),
|
|
|
|
path) < 0)
|
|
|
|
goto no_memory;
|
|
|
|
}
|
2009-07-09 13:10:41 +00:00
|
|
|
|
|
|
|
tmp = next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-11-17 02:13:29 +00:00
|
|
|
VIR_FORCE_FCLOSE(mapping);
|
2009-07-09 13:10:41 +00:00
|
|
|
|
2008-10-03 17:58:02 +00:00
|
|
|
return 0;
|
2009-07-09 13:10:41 +00:00
|
|
|
|
|
|
|
no_memory:
|
2010-11-17 02:13:29 +00:00
|
|
|
VIR_FORCE_FCLOSE(mapping);
|
2009-07-09 13:10:41 +00:00
|
|
|
return -ENOMEM;
|
|
|
|
|
2008-10-03 17:58:02 +00:00
|
|
|
}
|
|
|
|
|
2013-03-21 13:27:13 +00:00
|
|
|
static int virCgroupDetect(virCgroupPtr group,
|
2013-03-22 11:11:34 +00:00
|
|
|
int controllers,
|
|
|
|
const char *path,
|
|
|
|
virCgroupPtr parent)
|
2008-10-03 17:58:02 +00:00
|
|
|
{
|
2009-07-09 13:10:41 +00:00
|
|
|
int rc;
|
|
|
|
int i;
|
2013-03-21 13:27:13 +00:00
|
|
|
int j;
|
2013-03-22 11:11:34 +00:00
|
|
|
VIR_DEBUG("group=%p controllers=%d path=%s parent=%p",
|
|
|
|
group, controllers, path, parent);
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2013-03-22 11:11:34 +00:00
|
|
|
if (parent)
|
|
|
|
rc = virCgroupCopyMounts(group, parent);
|
|
|
|
else
|
|
|
|
rc = virCgroupDetectMounts(group);
|
2009-07-09 13:10:41 +00:00
|
|
|
if (rc < 0) {
|
2010-05-20 06:15:46 +00:00
|
|
|
VIR_ERROR(_("Failed to detect mounts for %s"), group->path);
|
2009-07-09 13:10:41 +00:00
|
|
|
return rc;
|
2008-10-03 17:58:02 +00:00
|
|
|
}
|
|
|
|
|
2013-03-21 13:27:13 +00:00
|
|
|
if (controllers >= 0) {
|
|
|
|
VIR_DEBUG("Validating controllers %d", controllers);
|
2013-05-21 07:58:16 +00:00
|
|
|
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
|
2013-03-22 11:11:34 +00:00
|
|
|
VIR_DEBUG("Controller '%s' wanted=%s, mount='%s'",
|
2013-03-21 13:27:13 +00:00
|
|
|
virCgroupControllerTypeToString(i),
|
2013-03-22 11:11:34 +00:00
|
|
|
(1 << i) & controllers ? "yes" : "no",
|
|
|
|
NULLSTR(group->controllers[i].mountPoint));
|
2013-03-21 13:27:13 +00:00
|
|
|
if (((1 << i) & controllers)) {
|
|
|
|
/* Ensure requested controller is present */
|
|
|
|
if (!group->controllers[i].mountPoint) {
|
|
|
|
VIR_DEBUG("Requested controlled '%s' not mounted",
|
|
|
|
virCgroupControllerTypeToString(i));
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* Check whether a request to disable a controller
|
|
|
|
* clashes with co-mounting of controllers */
|
2013-05-21 07:58:16 +00:00
|
|
|
for (j = 0; j < VIR_CGROUP_CONTROLLER_LAST; j++) {
|
2013-03-21 13:27:13 +00:00
|
|
|
if (j == i)
|
|
|
|
continue;
|
|
|
|
if (!((1 << j) & controllers))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (STREQ_NULLABLE(group->controllers[i].mountPoint,
|
|
|
|
group->controllers[j].mountPoint)) {
|
|
|
|
VIR_DEBUG("Controller '%s' is not wanted, but '%s' is co-mounted",
|
|
|
|
virCgroupControllerTypeToString(i),
|
|
|
|
virCgroupControllerTypeToString(j));
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
VIR_FREE(group->controllers[i].mountPoint);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
VIR_DEBUG("Auto-detecting controllers");
|
|
|
|
controllers = 0;
|
2013-05-21 07:58:16 +00:00
|
|
|
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
|
2013-03-21 13:27:13 +00:00
|
|
|
VIR_DEBUG("Controller '%s' present=%s",
|
|
|
|
virCgroupControllerTypeToString(i),
|
|
|
|
group->controllers[i].mountPoint ? "yes" : "no");
|
|
|
|
if (group->controllers[i].mountPoint == NULL)
|
|
|
|
continue;
|
|
|
|
controllers |= (1 << i);
|
|
|
|
}
|
2009-07-09 13:10:41 +00:00
|
|
|
}
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2013-03-21 13:27:13 +00:00
|
|
|
/* Check that at least 1 controller is available */
|
2013-03-22 11:11:34 +00:00
|
|
|
if (!controllers) {
|
|
|
|
VIR_DEBUG("No controllers set");
|
2013-03-21 13:27:13 +00:00
|
|
|
return -ENXIO;
|
2013-03-22 11:11:34 +00:00
|
|
|
}
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2013-03-22 11:11:34 +00:00
|
|
|
if (parent || path[0] == '/')
|
|
|
|
rc = virCgroupCopyPlacement(group, path, parent);
|
|
|
|
else
|
|
|
|
rc = virCgroupDetectPlacement(group, path);
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
if (rc == 0) {
|
|
|
|
/* Check that for every mounted controller, we found our placement */
|
2013-05-21 07:58:16 +00:00
|
|
|
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
|
2009-07-09 13:10:41 +00:00
|
|
|
if (!group->controllers[i].mountPoint)
|
|
|
|
continue;
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
if (!group->controllers[i].placement) {
|
2010-05-20 06:15:46 +00:00
|
|
|
VIR_ERROR(_("Could not find placement for controller %s at %s"),
|
2009-07-09 13:10:41 +00:00
|
|
|
virCgroupControllerTypeToString(i),
|
|
|
|
group->controllers[i].placement);
|
|
|
|
rc = -ENOENT;
|
|
|
|
break;
|
|
|
|
}
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
VIR_DEBUG("Detected mount/mapping %i:%s at %s in %s", i,
|
|
|
|
virCgroupControllerTypeToString(i),
|
|
|
|
group->controllers[i].mountPoint,
|
|
|
|
group->controllers[i].placement);
|
|
|
|
}
|
|
|
|
} else {
|
2010-05-20 06:15:46 +00:00
|
|
|
VIR_ERROR(_("Failed to detect mapping for %s"), group->path);
|
2008-10-03 17:58:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
2009-10-07 10:18:31 +00:00
|
|
|
#endif
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
|
2011-03-07 23:41:40 +00:00
|
|
|
int virCgroupPathOfController(virCgroupPtr group,
|
|
|
|
int controller,
|
|
|
|
const char *key,
|
|
|
|
char **path)
|
2009-07-09 13:10:41 +00:00
|
|
|
{
|
2011-02-22 17:33:59 +00:00
|
|
|
if (controller == -1) {
|
|
|
|
int i;
|
2013-05-21 07:58:16 +00:00
|
|
|
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
|
2011-02-22 17:33:59 +00:00
|
|
|
if (group->controllers[i].mountPoint &&
|
|
|
|
group->controllers[i].placement) {
|
|
|
|
controller = i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (controller == -1)
|
|
|
|
return -ENOSYS;
|
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
if (group->controllers[controller].mountPoint == NULL)
|
|
|
|
return -ENOENT;
|
|
|
|
|
|
|
|
if (group->controllers[controller].placement == NULL)
|
|
|
|
return -ENOENT;
|
|
|
|
|
2013-03-22 11:11:34 +00:00
|
|
|
if (virAsprintf(path, "%s%s/%s",
|
2009-07-09 13:10:41 +00:00
|
|
|
group->controllers[controller].mountPoint,
|
|
|
|
group->controllers[controller].placement,
|
|
|
|
key ? key : "") == -1)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-10-03 17:58:02 +00:00
|
|
|
static int virCgroupSetValueStr(virCgroupPtr group,
|
2009-07-09 13:10:41 +00:00
|
|
|
int controller,
|
2008-10-03 17:58:02 +00:00
|
|
|
const char *key,
|
|
|
|
const char *value)
|
|
|
|
{
|
|
|
|
int rc = 0;
|
|
|
|
char *keypath = NULL;
|
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
rc = virCgroupPathOfController(group, controller, key, &keypath);
|
2008-10-03 17:58:02 +00:00
|
|
|
if (rc != 0)
|
|
|
|
return rc;
|
|
|
|
|
2010-06-23 15:17:15 +00:00
|
|
|
VIR_DEBUG("Set value '%s' to '%s'", keypath, value);
|
2010-12-03 09:47:08 +00:00
|
|
|
rc = virFileWriteStr(keypath, value, 0);
|
2008-10-03 17:58:02 +00:00
|
|
|
if (rc < 0) {
|
|
|
|
rc = -errno;
|
2011-02-15 23:50:34 +00:00
|
|
|
VIR_DEBUG("Failed to write value '%s': %m", value);
|
2009-07-09 13:10:06 +00:00
|
|
|
} else {
|
|
|
|
rc = 0;
|
2008-10-03 17:58:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
VIR_FREE(keypath);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int virCgroupGetValueStr(virCgroupPtr group,
|
2009-07-09 13:10:41 +00:00
|
|
|
int controller,
|
2008-10-03 17:58:02 +00:00
|
|
|
const char *key,
|
|
|
|
char **value)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
char *keypath = NULL;
|
|
|
|
|
2009-07-09 13:10:06 +00:00
|
|
|
*value = NULL;
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
rc = virCgroupPathOfController(group, controller, key, &keypath);
|
2008-10-03 17:58:02 +00:00
|
|
|
if (rc != 0) {
|
2011-02-15 23:50:34 +00:00
|
|
|
VIR_DEBUG("No path of %s, %s", group->path, key);
|
2008-10-03 17:58:02 +00:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2009-07-09 13:10:06 +00:00
|
|
|
VIR_DEBUG("Get value %s", keypath);
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2012-08-31 08:31:30 +00:00
|
|
|
rc = virFileReadAll(keypath, 1024*1024, value);
|
2008-10-03 17:58:02 +00:00
|
|
|
if (rc < 0) {
|
|
|
|
rc = -errno;
|
2011-02-15 23:50:34 +00:00
|
|
|
VIR_DEBUG("Failed to read %s: %m\n", keypath);
|
2009-07-09 13:10:06 +00:00
|
|
|
} else {
|
2009-10-19 12:29:42 +00:00
|
|
|
/* Terminated with '\n' has sometimes harmful effects to the caller */
|
2012-03-09 00:35:59 +00:00
|
|
|
if ((*value)[rc - 1] == '\n')
|
|
|
|
(*value)[rc - 1] = '\0';
|
2009-10-19 12:29:42 +00:00
|
|
|
|
2009-07-09 13:10:06 +00:00
|
|
|
rc = 0;
|
2008-10-03 17:58:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
VIR_FREE(keypath);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2009-07-09 13:10:06 +00:00
|
|
|
static int virCgroupSetValueU64(virCgroupPtr group,
|
2009-07-09 13:10:41 +00:00
|
|
|
int controller,
|
2009-07-09 13:10:06 +00:00
|
|
|
const char *key,
|
2009-10-07 10:18:31 +00:00
|
|
|
unsigned long long int value)
|
2009-07-09 13:10:06 +00:00
|
|
|
{
|
|
|
|
char *strval = NULL;
|
|
|
|
int rc;
|
|
|
|
|
2009-10-07 10:18:31 +00:00
|
|
|
if (virAsprintf(&strval, "%llu", value) == -1)
|
2009-07-09 13:10:06 +00:00
|
|
|
return -ENOMEM;
|
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
rc = virCgroupSetValueStr(group, controller, key, strval);
|
2009-07-09 13:10:06 +00:00
|
|
|
|
|
|
|
VIR_FREE(strval);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-10-08 16:28:48 +00:00
|
|
|
|
|
|
|
static int virCgroupSetValueI64(virCgroupPtr group,
|
2009-07-09 13:10:41 +00:00
|
|
|
int controller,
|
2008-10-03 17:58:02 +00:00
|
|
|
const char *key,
|
2009-10-07 10:18:31 +00:00
|
|
|
long long int value)
|
2008-10-08 16:28:48 +00:00
|
|
|
{
|
|
|
|
char *strval = NULL;
|
|
|
|
int rc;
|
|
|
|
|
2009-10-07 10:18:31 +00:00
|
|
|
if (virAsprintf(&strval, "%lld", value) == -1)
|
2008-10-08 16:28:48 +00:00
|
|
|
return -ENOMEM;
|
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
rc = virCgroupSetValueStr(group, controller, key, strval);
|
2008-10-08 16:28:48 +00:00
|
|
|
|
|
|
|
VIR_FREE(strval);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int virCgroupGetValueI64(virCgroupPtr group,
|
2009-07-09 13:10:41 +00:00
|
|
|
int controller,
|
2008-10-08 16:28:48 +00:00
|
|
|
const char *key,
|
2009-10-07 10:18:31 +00:00
|
|
|
long long int *value)
|
2008-10-03 17:58:02 +00:00
|
|
|
{
|
|
|
|
char *strval = NULL;
|
|
|
|
int rc = 0;
|
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
rc = virCgroupGetValueStr(group, controller, key, &strval);
|
2008-10-03 17:58:02 +00:00
|
|
|
if (rc != 0)
|
|
|
|
goto out;
|
|
|
|
|
2010-03-30 14:20:41 +00:00
|
|
|
if (virStrToLong_ll(strval, NULL, 10, value) < 0)
|
2008-10-03 17:58:02 +00:00
|
|
|
rc = -EINVAL;
|
|
|
|
out:
|
|
|
|
VIR_FREE(strval);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2008-10-08 16:28:48 +00:00
|
|
|
static int virCgroupGetValueU64(virCgroupPtr group,
|
2009-07-09 13:10:41 +00:00
|
|
|
int controller,
|
2008-10-03 17:58:02 +00:00
|
|
|
const char *key,
|
2009-10-07 10:18:31 +00:00
|
|
|
unsigned long long int *value)
|
2008-10-03 17:58:02 +00:00
|
|
|
{
|
|
|
|
char *strval = NULL;
|
|
|
|
int rc = 0;
|
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
rc = virCgroupGetValueStr(group, controller, key, &strval);
|
2008-10-03 17:58:02 +00:00
|
|
|
if (rc != 0)
|
|
|
|
goto out;
|
|
|
|
|
2009-10-07 10:18:31 +00:00
|
|
|
if (virStrToLong_ull(strval, NULL, 10, value) < 0)
|
2008-10-03 17:58:02 +00:00
|
|
|
rc = -EINVAL;
|
|
|
|
out:
|
|
|
|
VIR_FREE(strval);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-04-23 09:34:17 +00:00
|
|
|
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
|
2009-07-09 13:10:41 +00:00
|
|
|
static int virCgroupCpuSetInherit(virCgroupPtr parent, virCgroupPtr group)
|
2008-10-03 17:58:02 +00:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int rc = 0;
|
|
|
|
const char *inherit_values[] = {
|
|
|
|
"cpuset.cpus",
|
|
|
|
"cpuset.mems",
|
|
|
|
};
|
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
VIR_DEBUG("Setting up inheritance %s -> %s", parent->path, group->path);
|
2013-05-21 07:58:16 +00:00
|
|
|
for (i = 0; i < ARRAY_CARDINALITY(inherit_values); i++) {
|
2009-07-09 13:10:41 +00:00
|
|
|
char *value;
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
rc = virCgroupGetValueStr(parent,
|
|
|
|
VIR_CGROUP_CONTROLLER_CPUSET,
|
|
|
|
inherit_values[i],
|
|
|
|
&value);
|
2008-10-03 17:58:02 +00:00
|
|
|
if (rc != 0) {
|
2010-05-20 06:15:46 +00:00
|
|
|
VIR_ERROR(_("Failed to get %s %d"), inherit_values[i], rc);
|
2009-07-09 13:10:41 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
VIR_DEBUG("Inherit %s = %s", inherit_values[i], value);
|
|
|
|
|
|
|
|
rc = virCgroupSetValueStr(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_CPUSET,
|
|
|
|
inherit_values[i],
|
|
|
|
value);
|
2010-02-04 10:14:03 +00:00
|
|
|
VIR_FREE(value);
|
2009-07-09 13:10:41 +00:00
|
|
|
|
|
|
|
if (rc != 0) {
|
2010-05-20 06:15:46 +00:00
|
|
|
VIR_ERROR(_("Failed to set %s %d"), inherit_values[i], rc);
|
2008-10-03 17:58:02 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
cgroup: Enable memory.use_hierarchy of cgroup for domain
Through conversation with Kumar L Srikanth-B22348, I found
that the function of getting memory usage (e.g., virsh dominfo)
doesn't work for lxc with ns subsystem of cgroup enabled.
This is because of features of ns and memory subsystems.
Ns creates child cgroup on every process fork and as a result
processes in a container are not assigned in a cgroup for
domain (e.g., libvirt/lxc/test1/). For example, libvirt_lxc
and init (or somewhat specified in XML) are assigned into
libvirt/lxc/test1/8839/ and libvirt/lxc/test1/8839/8849/,
respectively. On the other hand, memory subsystem accounts
memory usage within a group of processes by default, i.e.,
it does not take any child (and descendant) groups into
account. With the two features, virsh dominfo which just
checks memory usage of a cgroup for domain always returns
zero because the cgroup has no process.
Setting memory.use_hierarchy of a group allows to account
(and limit) memory usage of every descendant groups of the group.
By setting it of a cgroup for domain, we can get proper memory
usage of lxc with ns subsystem enabled. (To be exact, the
setting is required only when memory and ns subsystems are
enabled at the same time, e.g., mount -t cgroup none /cgroup.)
2010-06-23 16:00:56 +00:00
|
|
|
static int virCgroupSetMemoryUseHierarchy(virCgroupPtr group)
|
|
|
|
{
|
|
|
|
int rc = 0;
|
|
|
|
unsigned long long value;
|
|
|
|
const char *filename = "memory.use_hierarchy";
|
|
|
|
|
|
|
|
rc = virCgroupGetValueU64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
|
|
filename, &value);
|
|
|
|
if (rc != 0) {
|
|
|
|
VIR_ERROR(_("Failed to read %s/%s (%d)"), group->path, filename, rc);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Setting twice causes error, so if already enabled, skip setting */
|
|
|
|
if (value == 1)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
VIR_DEBUG("Setting up %s/%s", group->path, filename);
|
|
|
|
rc = virCgroupSetValueU64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
|
|
filename, 1);
|
|
|
|
|
|
|
|
if (rc != 0) {
|
|
|
|
VIR_ERROR(_("Failed to set %s/%s (%d)"), group->path, filename, rc);
|
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2012-10-31 19:03:54 +00:00
|
|
|
static int virCgroupMakeGroup(virCgroupPtr parent,
|
|
|
|
virCgroupPtr group,
|
|
|
|
bool create,
|
|
|
|
unsigned int flags)
|
2008-10-03 17:58:02 +00:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int rc = 0;
|
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
VIR_DEBUG("Make group %s", group->path);
|
2013-05-21 07:58:16 +00:00
|
|
|
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
|
2008-10-03 17:58:02 +00:00
|
|
|
char *path = NULL;
|
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
/* Skip over controllers that aren't mounted */
|
2013-04-03 17:06:41 +00:00
|
|
|
if (!group->controllers[i].mountPoint) {
|
|
|
|
VIR_DEBUG("Skipping unmounted controller %s",
|
|
|
|
virCgroupControllerTypeToString(i));
|
2008-10-03 17:58:02 +00:00
|
|
|
continue;
|
2013-04-03 17:06:41 +00:00
|
|
|
}
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
rc = virCgroupPathOfController(group, i, "", &path);
|
2013-04-03 17:06:41 +00:00
|
|
|
if (rc < 0) {
|
|
|
|
VIR_DEBUG("Failed to find path of controller %s",
|
|
|
|
virCgroupControllerTypeToString(i));
|
2009-07-09 13:10:41 +00:00
|
|
|
return rc;
|
2013-04-03 17:06:41 +00:00
|
|
|
}
|
2011-02-14 21:23:11 +00:00
|
|
|
/* As of Feb 2011, clang can't see that the above function
|
|
|
|
* call did not modify group. */
|
|
|
|
sa_assert(group->controllers[i].mountPoint);
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
VIR_DEBUG("Make controller %s", path);
|
2008-10-03 17:58:02 +00:00
|
|
|
if (access(path, F_OK) != 0) {
|
2009-11-12 14:11:43 +00:00
|
|
|
if (!create ||
|
|
|
|
mkdir(path, 0755) < 0) {
|
2011-03-07 03:49:12 +00:00
|
|
|
/* With a kernel that doesn't support multi-level directory
|
|
|
|
* for blkio controller, libvirt will fail and disable all
|
|
|
|
* other controllers even though they are available. So
|
|
|
|
* treat blkio as unmounted if mkdir fails. */
|
|
|
|
if (i == VIR_CGROUP_CONTROLLER_BLKIO) {
|
2013-04-03 17:06:41 +00:00
|
|
|
VIR_DEBUG("Ignoring mkdir failure with blkio controller. Kernel probably too old");
|
2011-03-07 03:49:12 +00:00
|
|
|
rc = 0;
|
|
|
|
VIR_FREE(group->controllers[i].mountPoint);
|
|
|
|
VIR_FREE(path);
|
|
|
|
continue;
|
|
|
|
} else {
|
2013-04-03 17:06:41 +00:00
|
|
|
VIR_DEBUG("Failed to create controller %s for group",
|
|
|
|
virCgroupControllerTypeToString(i));
|
2011-03-07 03:49:12 +00:00
|
|
|
rc = -errno;
|
|
|
|
VIR_FREE(path);
|
|
|
|
break;
|
|
|
|
}
|
2008-10-03 17:58:02 +00:00
|
|
|
}
|
2009-07-09 13:10:41 +00:00
|
|
|
if (group->controllers[VIR_CGROUP_CONTROLLER_CPUSET].mountPoint != NULL &&
|
|
|
|
(i == VIR_CGROUP_CONTROLLER_CPUSET ||
|
|
|
|
STREQ(group->controllers[i].mountPoint, group->controllers[VIR_CGROUP_CONTROLLER_CPUSET].mountPoint))) {
|
|
|
|
rc = virCgroupCpuSetInherit(parent, group);
|
2010-05-03 19:04:51 +00:00
|
|
|
if (rc != 0) {
|
|
|
|
VIR_FREE(path);
|
2009-07-09 13:10:41 +00:00
|
|
|
break;
|
2010-05-03 19:04:51 +00:00
|
|
|
}
|
2009-07-09 13:10:41 +00:00
|
|
|
}
|
cgroup: Enable memory.use_hierarchy of cgroup for domain
Through conversation with Kumar L Srikanth-B22348, I found
that the function of getting memory usage (e.g., virsh dominfo)
doesn't work for lxc with ns subsystem of cgroup enabled.
This is because of features of ns and memory subsystems.
Ns creates child cgroup on every process fork and as a result
processes in a container are not assigned in a cgroup for
domain (e.g., libvirt/lxc/test1/). For example, libvirt_lxc
and init (or somewhat specified in XML) are assigned into
libvirt/lxc/test1/8839/ and libvirt/lxc/test1/8839/8849/,
respectively. On the other hand, memory subsystem accounts
memory usage within a group of processes by default, i.e.,
it does not take any child (and descendant) groups into
account. With the two features, virsh dominfo which just
checks memory usage of a cgroup for domain always returns
zero because the cgroup has no process.
Setting memory.use_hierarchy of a group allows to account
(and limit) memory usage of every descendant groups of the group.
By setting it of a cgroup for domain, we can get proper memory
usage of lxc with ns subsystem enabled. (To be exact, the
setting is required only when memory and ns subsystems are
enabled at the same time, e.g., mount -t cgroup none /cgroup.)
2010-06-23 16:00:56 +00:00
|
|
|
/*
|
|
|
|
* Note that virCgroupSetMemoryUseHierarchy should always be
|
|
|
|
* called prior to creating subcgroups and attaching tasks.
|
|
|
|
*/
|
2011-07-21 07:12:55 +00:00
|
|
|
if ((flags & VIR_CGROUP_MEM_HIERACHY) &&
|
|
|
|
(group->controllers[VIR_CGROUP_CONTROLLER_MEMORY].mountPoint != NULL) &&
|
cgroup: Enable memory.use_hierarchy of cgroup for domain
Through conversation with Kumar L Srikanth-B22348, I found
that the function of getting memory usage (e.g., virsh dominfo)
doesn't work for lxc with ns subsystem of cgroup enabled.
This is because of features of ns and memory subsystems.
Ns creates child cgroup on every process fork and as a result
processes in a container are not assigned in a cgroup for
domain (e.g., libvirt/lxc/test1/). For example, libvirt_lxc
and init (or somewhat specified in XML) are assigned into
libvirt/lxc/test1/8839/ and libvirt/lxc/test1/8839/8849/,
respectively. On the other hand, memory subsystem accounts
memory usage within a group of processes by default, i.e.,
it does not take any child (and descendant) groups into
account. With the two features, virsh dominfo which just
checks memory usage of a cgroup for domain always returns
zero because the cgroup has no process.
Setting memory.use_hierarchy of a group allows to account
(and limit) memory usage of every descendant groups of the group.
By setting it of a cgroup for domain, we can get proper memory
usage of lxc with ns subsystem enabled. (To be exact, the
setting is required only when memory and ns subsystems are
enabled at the same time, e.g., mount -t cgroup none /cgroup.)
2010-06-23 16:00:56 +00:00
|
|
|
(i == VIR_CGROUP_CONTROLLER_MEMORY ||
|
|
|
|
STREQ(group->controllers[i].mountPoint, group->controllers[VIR_CGROUP_CONTROLLER_MEMORY].mountPoint))) {
|
|
|
|
rc = virCgroupSetMemoryUseHierarchy(group);
|
|
|
|
if (rc != 0) {
|
|
|
|
VIR_FREE(path);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2008-10-03 17:58:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
VIR_FREE(path);
|
|
|
|
}
|
|
|
|
|
2013-04-03 17:06:41 +00:00
|
|
|
VIR_DEBUG("Done making controllers for group");
|
2008-10-03 17:58:02 +00:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
|
2013-03-22 11:11:34 +00:00
|
|
|
/**
|
|
|
|
* virCgroupNew:
|
|
|
|
* @path: path for the new group
|
|
|
|
* @parent: parent group, or NULL
|
|
|
|
* @controllers: bitmask of controllers to activate
|
|
|
|
*
|
|
|
|
* Create a new cgroup storing it in @group.
|
|
|
|
*
|
|
|
|
* If @path starts with a '/' it is treated as an
|
|
|
|
* absolute path, and @parent is ignored. Otherwise
|
|
|
|
* it is treated as being relative to @parent. If
|
|
|
|
* @parent is NULL, then the placement of the current
|
|
|
|
* process is used.
|
|
|
|
*
|
|
|
|
*/
|
2009-07-09 13:10:41 +00:00
|
|
|
static int virCgroupNew(const char *path,
|
2013-03-22 11:11:34 +00:00
|
|
|
virCgroupPtr parent,
|
2013-03-21 13:27:13 +00:00
|
|
|
int controllers,
|
2009-07-09 13:10:41 +00:00
|
|
|
virCgroupPtr *group)
|
2008-10-03 17:58:02 +00:00
|
|
|
{
|
|
|
|
int rc = 0;
|
|
|
|
char *typpath = NULL;
|
|
|
|
|
2013-03-22 11:11:34 +00:00
|
|
|
VIR_DEBUG("parent=%p path=%s controllers=%d",
|
|
|
|
parent, path, controllers);
|
2009-07-09 13:10:41 +00:00
|
|
|
*group = NULL;
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
if (VIR_ALLOC((*group)) != 0) {
|
2008-10-03 17:58:02 +00:00
|
|
|
rc = -ENOMEM;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2013-03-22 11:11:34 +00:00
|
|
|
if (path[0] == '/' || !parent) {
|
|
|
|
if (!((*group)->path = strdup(path))) {
|
|
|
|
rc = -ENOMEM;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (virAsprintf(&(*group)->path, "%s%s%s",
|
|
|
|
parent->path,
|
|
|
|
STREQ(parent->path, "") ? "" : "/",
|
|
|
|
path) < 0) {
|
|
|
|
rc = -ENOMEM;
|
|
|
|
goto err;
|
|
|
|
}
|
2008-10-03 17:58:02 +00:00
|
|
|
}
|
|
|
|
|
2013-03-22 11:11:34 +00:00
|
|
|
rc = virCgroupDetect(*group, controllers, path, parent);
|
2009-07-09 13:10:41 +00:00
|
|
|
if (rc < 0)
|
|
|
|
goto err;
|
2008-10-03 17:58:02 +00:00
|
|
|
|
|
|
|
return rc;
|
|
|
|
err:
|
2009-07-09 13:10:41 +00:00
|
|
|
virCgroupFree(group);
|
|
|
|
*group = NULL;
|
2008-10-03 17:58:02 +00:00
|
|
|
|
|
|
|
VIR_FREE(typpath);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2013-04-04 11:10:55 +00:00
|
|
|
static int virCgroupAppRoot(virCgroupPtr *group,
|
2013-03-21 13:27:13 +00:00
|
|
|
bool create,
|
|
|
|
int controllers)
|
2008-10-03 17:58:02 +00:00
|
|
|
{
|
2013-03-22 11:11:34 +00:00
|
|
|
virCgroupPtr selfgrp = NULL;
|
2009-07-10 10:40:04 +00:00
|
|
|
int rc;
|
2009-07-09 13:10:41 +00:00
|
|
|
|
2013-03-22 11:11:34 +00:00
|
|
|
rc = virCgroupNewSelf(&selfgrp);
|
|
|
|
|
2009-07-10 10:40:04 +00:00
|
|
|
if (rc != 0)
|
|
|
|
return rc;
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2013-04-04 11:10:55 +00:00
|
|
|
rc = virCgroupNew("libvirt", selfgrp, controllers, group);
|
2008-10-03 17:58:02 +00:00
|
|
|
if (rc != 0)
|
2009-07-10 10:40:04 +00:00
|
|
|
goto cleanup;
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2013-03-22 11:11:34 +00:00
|
|
|
rc = virCgroupMakeGroup(selfgrp, *group, create, VIR_CGROUP_NONE);
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2009-07-10 10:40:04 +00:00
|
|
|
cleanup:
|
2013-03-22 11:11:34 +00:00
|
|
|
virCgroupFree(&selfgrp);
|
2008-10-03 17:58:02 +00:00
|
|
|
return rc;
|
|
|
|
}
|
2009-10-07 10:18:31 +00:00
|
|
|
#endif
|
2009-07-10 10:40:04 +00:00
|
|
|
|
2010-06-30 11:49:28 +00:00
|
|
|
#if defined _DIRENT_HAVE_D_TYPE
|
2013-03-20 08:14:23 +00:00
|
|
|
int virCgroupRemoveRecursively(char *grppath)
|
2010-06-23 16:00:55 +00:00
|
|
|
{
|
|
|
|
DIR *grpdir;
|
|
|
|
struct dirent *ent;
|
|
|
|
int rc = 0;
|
|
|
|
|
|
|
|
grpdir = opendir(grppath);
|
|
|
|
if (grpdir == NULL) {
|
2010-06-26 17:21:28 +00:00
|
|
|
if (errno == ENOENT)
|
|
|
|
return 0;
|
2010-06-23 16:00:55 +00:00
|
|
|
rc = -errno;
|
2011-02-15 23:50:34 +00:00
|
|
|
VIR_ERROR(_("Unable to open %s (%d)"), grppath, errno);
|
2010-06-23 16:00:55 +00:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
char *path;
|
|
|
|
|
|
|
|
errno = 0;
|
|
|
|
ent = readdir(grpdir);
|
|
|
|
if (ent == NULL) {
|
|
|
|
if ((rc = -errno))
|
|
|
|
VIR_ERROR(_("Failed to readdir for %s (%d)"), grppath, errno);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ent->d_name[0] == '.') continue;
|
|
|
|
if (ent->d_type != DT_DIR) continue;
|
|
|
|
|
|
|
|
if (virAsprintf(&path, "%s/%s", grppath, ent->d_name) == -1) {
|
|
|
|
rc = -ENOMEM;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
rc = virCgroupRemoveRecursively(path);
|
|
|
|
VIR_FREE(path);
|
|
|
|
if (rc != 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
closedir(grpdir);
|
|
|
|
|
2011-02-15 23:50:34 +00:00
|
|
|
VIR_DEBUG("Removing cgroup %s", grppath);
|
2010-06-23 16:00:55 +00:00
|
|
|
if (rmdir(grppath) != 0 && errno != ENOENT) {
|
|
|
|
rc = -errno;
|
|
|
|
VIR_ERROR(_("Unable to remove %s (%d)"), grppath, errno);
|
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
2010-06-30 11:49:28 +00:00
|
|
|
#else
|
2013-03-20 08:14:23 +00:00
|
|
|
int virCgroupRemoveRecursively(char *grppath ATTRIBUTE_UNUSED)
|
2010-06-30 11:49:28 +00:00
|
|
|
{
|
|
|
|
/* Claim no support */
|
|
|
|
return -ENXIO;
|
|
|
|
}
|
|
|
|
#endif
|
2010-06-23 16:00:55 +00:00
|
|
|
|
2008-10-03 17:58:02 +00:00
|
|
|
/**
|
|
|
|
* virCgroupRemove:
|
|
|
|
*
|
|
|
|
* @group: The group to be removed
|
|
|
|
*
|
2010-06-23 16:00:55 +00:00
|
|
|
* It first removes all child groups recursively
|
|
|
|
* in depth first order and then removes @group
|
|
|
|
* because the presence of the child groups
|
|
|
|
* prevents removing @group.
|
|
|
|
*
|
2008-10-03 17:58:02 +00:00
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
|
|
|
int virCgroupRemove(virCgroupPtr group)
|
|
|
|
{
|
|
|
|
int rc = 0;
|
|
|
|
int i;
|
|
|
|
char *grppath = NULL;
|
|
|
|
|
2013-04-03 17:06:41 +00:00
|
|
|
VIR_DEBUG("Removing cgroup %s", group->path);
|
2013-05-21 07:58:16 +00:00
|
|
|
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
|
2009-07-09 13:10:41 +00:00
|
|
|
/* Skip over controllers not mounted */
|
|
|
|
if (!group->controllers[i].mountPoint)
|
2008-10-03 17:58:02 +00:00
|
|
|
continue;
|
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
if (virCgroupPathOfController(group,
|
|
|
|
i,
|
|
|
|
NULL,
|
|
|
|
&grppath) != 0)
|
|
|
|
continue;
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2011-02-15 23:50:34 +00:00
|
|
|
VIR_DEBUG("Removing cgroup %s and all child cgroups", grppath);
|
2010-06-23 16:00:55 +00:00
|
|
|
rc = virCgroupRemoveRecursively(grppath);
|
2008-10-03 17:58:02 +00:00
|
|
|
VIR_FREE(grppath);
|
|
|
|
}
|
2013-04-03 17:06:41 +00:00
|
|
|
VIR_DEBUG("Done removing cgroup %s", group->path);
|
2008-10-03 17:58:02 +00:00
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2013-03-21 13:27:13 +00:00
|
|
|
|
2008-10-03 17:58:02 +00:00
|
|
|
/**
|
|
|
|
* virCgroupAddTask:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to add a task to
|
|
|
|
* @pid: The pid of the task to add
|
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
|
|
|
int virCgroupAddTask(virCgroupPtr group, pid_t pid)
|
|
|
|
{
|
|
|
|
int rc = 0;
|
|
|
|
int i;
|
|
|
|
|
2013-05-21 07:58:16 +00:00
|
|
|
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
|
2009-07-09 13:10:41 +00:00
|
|
|
/* Skip over controllers not mounted */
|
|
|
|
if (!group->controllers[i].mountPoint)
|
|
|
|
continue;
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2009-07-09 13:10:41 +00:00
|
|
|
rc = virCgroupSetValueU64(group, i, "tasks", (unsigned long long)pid);
|
2008-10-03 17:58:02 +00:00
|
|
|
if (rc != 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2012-08-21 09:18:25 +00:00
|
|
|
/**
|
|
|
|
* virCgroupAddTaskController:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to add a task to
|
|
|
|
* @pid: The pid of the task to add
|
|
|
|
* @controller: The cgroup controller to be operated on
|
|
|
|
*
|
|
|
|
* Returns: 0 on success or -errno on failure
|
|
|
|
*/
|
|
|
|
int virCgroupAddTaskController(virCgroupPtr group, pid_t pid, int controller)
|
|
|
|
{
|
2012-11-28 13:34:47 +00:00
|
|
|
if (controller < 0 || controller >= VIR_CGROUP_CONTROLLER_LAST)
|
2012-08-21 09:18:25 +00:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (!group->controllers[controller].mountPoint)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
return virCgroupSetValueU64(group, controller, "tasks",
|
|
|
|
(unsigned long long)pid);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int virCgroupAddTaskStrController(virCgroupPtr group,
|
|
|
|
const char *pidstr,
|
|
|
|
int controller)
|
|
|
|
{
|
|
|
|
char *str = NULL, *cur = NULL, *next = NULL;
|
|
|
|
unsigned long long p = 0;
|
|
|
|
int rc = 0;
|
|
|
|
char *endp;
|
|
|
|
|
2013-05-02 19:35:26 +00:00
|
|
|
if (!(str = strdup(pidstr)))
|
2012-08-21 09:18:25 +00:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
cur = str;
|
|
|
|
while (*cur != '\0') {
|
|
|
|
rc = virStrToLong_ull(cur, &endp, 10, &p);
|
|
|
|
if (rc != 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
rc = virCgroupAddTaskController(group, p, controller);
|
cgroup: be robust against cgroup movement races
https://bugzilla.redhat.com/show_bug.cgi?id=965169 documents a
problem starting domains when cgroups are enabled; I was able
to reliably reproduce the race about 5% of the time when I added
hooks to domain startup by 3 seconds (as that seemed to be about
the length of time that qemu created and then closed a temporary
thread, probably related to aio handling of initially opening
a disk image). The problem has existed since we introduced
virCgroupMoveTask in commit 9102829 (v0.10.0).
There are some inherent TOCTTOU races when moving tasks between
kernel cgroups, precisely because threads can be created or
completed in the window between when we read a thread id from the
source and when we write to the destination. As the goal of
virCgroupMoveTask is merely to move ALL tasks into the new
cgroup, it is sufficient to iterate until no more threads are
being created in the old group, and ignoring any threads that
die before we can move them.
It would be nicer to start the threads in the right cgroup to
begin with, but by default, all child threads are created in
the same cgroup as their parent, and we don't want vcpu child
threads in the emulator cgroup, so I don't see any good way
of avoiding the move. It would also be nice if the kernel were
to implement something like rename() as a way to atomically move
a group of threads from one cgroup to another, instead of forcing
a window where we have to read and parse the source, then format
and write back into the destination.
* src/util/vircgroup.c (virCgroupAddTaskStrController): Ignore
ESRCH, because a thread ended between read and write attempts.
(virCgroupMoveTask): Loop until all threads have moved.
Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-21 02:30:30 +00:00
|
|
|
/* A thread that exits between when we first read the source
|
|
|
|
* tasks and now is not fatal. */
|
|
|
|
if (rc == -ESRCH)
|
|
|
|
rc = 0;
|
|
|
|
else if (rc != 0)
|
2012-08-21 09:18:25 +00:00
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
next = strchr(cur, '\n');
|
|
|
|
if (next) {
|
|
|
|
cur = next + 1;
|
|
|
|
*next = '\0';
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
VIR_FREE(str);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virCgroupMoveTask:
|
|
|
|
*
|
|
|
|
* @src_group: The source cgroup where all tasks are removed from
|
|
|
|
* @dest_group: The destination where all tasks are added to
|
|
|
|
* @controller: The cgroup controller to be operated on
|
|
|
|
*
|
|
|
|
* Returns: 0 on success or -errno on failure
|
|
|
|
*/
|
2013-03-21 13:27:13 +00:00
|
|
|
int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group)
|
2012-08-21 09:18:25 +00:00
|
|
|
{
|
2013-03-21 13:27:13 +00:00
|
|
|
int rc = 0;
|
2012-08-21 09:18:25 +00:00
|
|
|
char *content = NULL;
|
2013-03-21 13:27:13 +00:00
|
|
|
int i;
|
2012-08-21 09:18:25 +00:00
|
|
|
|
2013-05-21 07:58:16 +00:00
|
|
|
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
|
2013-03-21 13:27:13 +00:00
|
|
|
if (!src_group->controllers[i].mountPoint ||
|
|
|
|
!dest_group->controllers[i].mountPoint)
|
|
|
|
continue;
|
2012-08-21 09:18:25 +00:00
|
|
|
|
cgroup: be robust against cgroup movement races
https://bugzilla.redhat.com/show_bug.cgi?id=965169 documents a
problem starting domains when cgroups are enabled; I was able
to reliably reproduce the race about 5% of the time when I added
hooks to domain startup by 3 seconds (as that seemed to be about
the length of time that qemu created and then closed a temporary
thread, probably related to aio handling of initially opening
a disk image). The problem has existed since we introduced
virCgroupMoveTask in commit 9102829 (v0.10.0).
There are some inherent TOCTTOU races when moving tasks between
kernel cgroups, precisely because threads can be created or
completed in the window between when we read a thread id from the
source and when we write to the destination. As the goal of
virCgroupMoveTask is merely to move ALL tasks into the new
cgroup, it is sufficient to iterate until no more threads are
being created in the old group, and ignoring any threads that
die before we can move them.
It would be nicer to start the threads in the right cgroup to
begin with, but by default, all child threads are created in
the same cgroup as their parent, and we don't want vcpu child
threads in the emulator cgroup, so I don't see any good way
of avoiding the move. It would also be nice if the kernel were
to implement something like rename() as a way to atomically move
a group of threads from one cgroup to another, instead of forcing
a window where we have to read and parse the source, then format
and write back into the destination.
* src/util/vircgroup.c (virCgroupAddTaskStrController): Ignore
ESRCH, because a thread ended between read and write attempts.
(virCgroupMoveTask): Loop until all threads have moved.
Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-21 02:30:30 +00:00
|
|
|
/* New threads are created in the same group as their parent;
|
|
|
|
* but if a thread is created after we first read we aren't
|
|
|
|
* aware that it needs to move. Therefore, we must iterate
|
|
|
|
* until content is empty. */
|
|
|
|
while (1) {
|
|
|
|
rc = virCgroupGetValueStr(src_group, i, "tasks", &content);
|
|
|
|
if (rc != 0)
|
|
|
|
return rc;
|
|
|
|
if (!*content)
|
|
|
|
break;
|
2012-08-21 09:18:25 +00:00
|
|
|
|
cgroup: be robust against cgroup movement races
https://bugzilla.redhat.com/show_bug.cgi?id=965169 documents a
problem starting domains when cgroups are enabled; I was able
to reliably reproduce the race about 5% of the time when I added
hooks to domain startup by 3 seconds (as that seemed to be about
the length of time that qemu created and then closed a temporary
thread, probably related to aio handling of initially opening
a disk image). The problem has existed since we introduced
virCgroupMoveTask in commit 9102829 (v0.10.0).
There are some inherent TOCTTOU races when moving tasks between
kernel cgroups, precisely because threads can be created or
completed in the window between when we read a thread id from the
source and when we write to the destination. As the goal of
virCgroupMoveTask is merely to move ALL tasks into the new
cgroup, it is sufficient to iterate until no more threads are
being created in the old group, and ignoring any threads that
die before we can move them.
It would be nicer to start the threads in the right cgroup to
begin with, but by default, all child threads are created in
the same cgroup as their parent, and we don't want vcpu child
threads in the emulator cgroup, so I don't see any good way
of avoiding the move. It would also be nice if the kernel were
to implement something like rename() as a way to atomically move
a group of threads from one cgroup to another, instead of forcing
a window where we have to read and parse the source, then format
and write back into the destination.
* src/util/vircgroup.c (virCgroupAddTaskStrController): Ignore
ESRCH, because a thread ended between read and write attempts.
(virCgroupMoveTask): Loop until all threads have moved.
Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-21 02:30:30 +00:00
|
|
|
rc = virCgroupAddTaskStrController(dest_group, content, i);
|
|
|
|
if (rc != 0)
|
|
|
|
goto cleanup;
|
2012-08-21 09:18:25 +00:00
|
|
|
|
cgroup: be robust against cgroup movement races
https://bugzilla.redhat.com/show_bug.cgi?id=965169 documents a
problem starting domains when cgroups are enabled; I was able
to reliably reproduce the race about 5% of the time when I added
hooks to domain startup by 3 seconds (as that seemed to be about
the length of time that qemu created and then closed a temporary
thread, probably related to aio handling of initially opening
a disk image). The problem has existed since we introduced
virCgroupMoveTask in commit 9102829 (v0.10.0).
There are some inherent TOCTTOU races when moving tasks between
kernel cgroups, precisely because threads can be created or
completed in the window between when we read a thread id from the
source and when we write to the destination. As the goal of
virCgroupMoveTask is merely to move ALL tasks into the new
cgroup, it is sufficient to iterate until no more threads are
being created in the old group, and ignoring any threads that
die before we can move them.
It would be nicer to start the threads in the right cgroup to
begin with, but by default, all child threads are created in
the same cgroup as their parent, and we don't want vcpu child
threads in the emulator cgroup, so I don't see any good way
of avoiding the move. It would also be nice if the kernel were
to implement something like rename() as a way to atomically move
a group of threads from one cgroup to another, instead of forcing
a window where we have to read and parse the source, then format
and write back into the destination.
* src/util/vircgroup.c (virCgroupAddTaskStrController): Ignore
ESRCH, because a thread ended between read and write attempts.
(virCgroupMoveTask): Loop until all threads have moved.
Signed-off-by: Eric Blake <eblake@redhat.com>
2013-05-21 02:30:30 +00:00
|
|
|
VIR_FREE(content);
|
|
|
|
}
|
2013-03-21 13:27:13 +00:00
|
|
|
}
|
2012-08-21 09:18:25 +00:00
|
|
|
|
|
|
|
cleanup:
|
|
|
|
VIR_FREE(content);
|
|
|
|
return rc;
|
|
|
|
}
|
2009-07-10 10:40:04 +00:00
|
|
|
|
2013-03-28 18:08:39 +00:00
|
|
|
|
|
|
|
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
|
2013-04-26 09:50:24 +00:00
|
|
|
static int virCgroupPartitionNeedsEscaping(const char *path)
|
|
|
|
{
|
|
|
|
FILE *fp = NULL;
|
|
|
|
int ret = 0;
|
|
|
|
char *line = NULL;
|
|
|
|
size_t len;
|
|
|
|
|
|
|
|
/* If it starts with 'cgroup.' or a '_' of any
|
|
|
|
* of the controller names from /proc/cgroups,
|
|
|
|
* then we must prefix a '_'
|
|
|
|
*/
|
|
|
|
if (STRPREFIX(path, "cgroup."))
|
|
|
|
return 1;
|
|
|
|
|
2013-05-03 19:56:50 +00:00
|
|
|
if (path[0] == '_' ||
|
|
|
|
path[0] == '.')
|
2013-04-26 09:50:24 +00:00
|
|
|
return 1;
|
|
|
|
|
2013-05-16 17:47:07 +00:00
|
|
|
if (!(fp = fopen("/proc/cgroups", "r"))) {
|
|
|
|
/* The API contract is that we return ENXIO
|
|
|
|
* if cgroups are not available on a host */
|
|
|
|
if (errno == ENOENT)
|
|
|
|
errno = ENXIO;
|
2013-04-26 09:50:24 +00:00
|
|
|
return -errno;
|
2013-05-16 17:47:07 +00:00
|
|
|
}
|
2013-04-26 09:50:24 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Data looks like this:
|
|
|
|
* #subsys_name hierarchy num_cgroups enabled
|
|
|
|
* cpuset 2 4 1
|
|
|
|
* cpu 3 48 1
|
|
|
|
* cpuacct 3 48 1
|
|
|
|
* memory 4 4 1
|
|
|
|
* devices 5 4 1
|
|
|
|
* freezer 6 4 1
|
|
|
|
* net_cls 7 1 1
|
|
|
|
*/
|
|
|
|
while (getline(&line, &len, fp) > 0) {
|
|
|
|
if (STRPREFIX(line, "#subsys_name")) {
|
|
|
|
VIR_FREE(line);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
char *tmp = strchr(line, ' ');
|
|
|
|
if (tmp)
|
|
|
|
*tmp = '\0';
|
|
|
|
len = tmp - line;
|
|
|
|
|
|
|
|
if (STRPREFIX(path, line) &&
|
|
|
|
path[len] == '.') {
|
|
|
|
ret = 1;
|
|
|
|
VIR_FREE(line);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
VIR_FREE(line);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ferror(fp)) {
|
|
|
|
ret = -EIO;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
VIR_FORCE_FCLOSE(fp);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int virCgroupPartitionEscape(char **path)
|
|
|
|
{
|
|
|
|
size_t len = strlen(*path) + 1;
|
|
|
|
int rc;
|
|
|
|
char escape = '_';
|
|
|
|
|
|
|
|
if ((rc = virCgroupPartitionNeedsEscaping(*path)) <= 0)
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
if (VIR_INSERT_ELEMENT(*path, 0, len, escape) < 0)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-05-10 18:05:00 +00:00
|
|
|
static int virCgroupSetPartitionSuffix(const char *path, char **res)
|
2013-04-26 09:23:51 +00:00
|
|
|
{
|
|
|
|
char **tokens = virStringSplit(path, "/", 0);
|
|
|
|
size_t i;
|
2013-05-10 18:05:00 +00:00
|
|
|
int ret = -1;
|
2013-04-26 09:23:51 +00:00
|
|
|
|
|
|
|
if (!tokens)
|
2013-05-10 18:05:00 +00:00
|
|
|
return ret;
|
2013-04-26 09:23:51 +00:00
|
|
|
|
2013-05-21 07:58:16 +00:00
|
|
|
for (i = 0; tokens[i] != NULL; i++) {
|
2013-04-26 09:23:51 +00:00
|
|
|
/* Whitelist the 3 top level fixed dirs
|
|
|
|
* NB i == 0 is "", since we have leading '/'
|
|
|
|
*/
|
|
|
|
if (i == 1 &&
|
|
|
|
(STREQ(tokens[i], "machine") ||
|
|
|
|
STREQ(tokens[i], "system") ||
|
|
|
|
STREQ(tokens[i], "user"))) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
/* If there is no suffix set already, then
|
|
|
|
* add ".partition"
|
|
|
|
*/
|
|
|
|
if (STRNEQ(tokens[i], "") &&
|
|
|
|
!strchr(tokens[i], '.')) {
|
|
|
|
if (VIR_REALLOC_N(tokens[i],
|
|
|
|
strlen(tokens[i]) + strlen(".partition") + 1) < 0) {
|
2013-05-10 18:05:00 +00:00
|
|
|
ret = -ENOMEM;
|
2013-04-26 09:23:51 +00:00
|
|
|
virReportOOMError();
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
strcat(tokens[i], ".partition");
|
|
|
|
}
|
2013-04-26 09:50:24 +00:00
|
|
|
|
2013-05-10 18:05:00 +00:00
|
|
|
ret = virCgroupPartitionEscape(&(tokens[i]));
|
|
|
|
if (ret < 0) {
|
|
|
|
if (ret == -ENOMEM)
|
|
|
|
virReportOOMError();
|
2013-04-26 09:50:24 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
2013-04-26 09:23:51 +00:00
|
|
|
}
|
|
|
|
|
2013-05-10 18:05:00 +00:00
|
|
|
if (!(*res = virStringJoin((const char **)tokens, "/"))) {
|
|
|
|
ret = -ENOMEM;
|
2013-04-26 09:23:51 +00:00
|
|
|
goto cleanup;
|
2013-05-10 18:05:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
2013-04-26 09:23:51 +00:00
|
|
|
|
|
|
|
cleanup:
|
|
|
|
virStringFreeList(tokens);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2013-03-28 18:08:39 +00:00
|
|
|
/**
|
|
|
|
* virCgroupNewPartition:
|
|
|
|
* @path: path for the partition
|
|
|
|
* @create: true to create the cgroup tree
|
|
|
|
* @controllers: mask of controllers to create
|
|
|
|
*
|
|
|
|
* Creates a new cgroup to represent the resource
|
|
|
|
* partition path identified by @name.
|
|
|
|
*
|
|
|
|
* Returns 0 on success, -errno on failure
|
|
|
|
*/
|
|
|
|
int virCgroupNewPartition(const char *path,
|
|
|
|
bool create,
|
|
|
|
int controllers,
|
|
|
|
virCgroupPtr *group)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
char *parentPath = NULL;
|
|
|
|
virCgroupPtr parent = NULL;
|
2013-04-26 09:23:51 +00:00
|
|
|
char *newpath;
|
2013-03-28 18:08:39 +00:00
|
|
|
VIR_DEBUG("path=%s create=%d controllers=%x",
|
|
|
|
path, create, controllers);
|
|
|
|
|
|
|
|
if (path[0] != '/')
|
|
|
|
return -EINVAL;
|
|
|
|
|
2013-04-26 09:23:51 +00:00
|
|
|
/* XXX convert all cgroups APIs to use error report
|
|
|
|
* APIs instead of returning errno */
|
2013-05-10 18:05:00 +00:00
|
|
|
rc = virCgroupSetPartitionSuffix(path, &newpath);
|
|
|
|
if (rc < 0) {
|
2013-04-26 09:23:51 +00:00
|
|
|
virResetLastError();
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = virCgroupNew(newpath, NULL, controllers, group);
|
2013-03-28 18:08:39 +00:00
|
|
|
if (rc != 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
2013-04-26 09:23:51 +00:00
|
|
|
if (STRNEQ(newpath, "/")) {
|
2013-03-28 18:08:39 +00:00
|
|
|
char *tmp;
|
2013-04-26 09:23:51 +00:00
|
|
|
if (!(parentPath = strdup(newpath))) {
|
2013-03-28 18:08:39 +00:00
|
|
|
rc = -ENOMEM;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
tmp = strrchr(parentPath, '/');
|
|
|
|
tmp++;
|
|
|
|
*tmp = '\0';
|
|
|
|
|
|
|
|
rc = virCgroupNew(parentPath, NULL, controllers, &parent);
|
|
|
|
if (rc != 0)
|
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
rc = virCgroupMakeGroup(parent, *group, create, VIR_CGROUP_NONE);
|
|
|
|
if (rc != 0) {
|
|
|
|
virCgroupRemove(*group);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
if (rc != 0)
|
|
|
|
virCgroupFree(group);
|
|
|
|
virCgroupFree(&parent);
|
|
|
|
VIR_FREE(parentPath);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
int virCgroupNewPartition(const char *path ATTRIBUTE_UNUSED,
|
|
|
|
bool create ATTRIBUTE_UNUSED,
|
|
|
|
int controllers ATTRIBUTE_UNUSED,
|
|
|
|
virCgroupPtr *group ATTRIBUTE_UNUSED)
|
|
|
|
{
|
|
|
|
/* Claim no support */
|
|
|
|
return -ENXIO;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2008-10-03 17:58:02 +00:00
|
|
|
/**
|
2013-03-28 16:33:22 +00:00
|
|
|
* virCgroupNewDriver:
|
2008-10-03 17:58:02 +00:00
|
|
|
*
|
2009-07-10 10:40:04 +00:00
|
|
|
* @name: name of this driver (e.g., xen, qemu, lxc)
|
2008-10-03 17:58:02 +00:00
|
|
|
* @group: Pointer to returned virCgroupPtr
|
|
|
|
*
|
|
|
|
* Returns 0 on success
|
|
|
|
*/
|
2010-04-23 09:34:17 +00:00
|
|
|
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
|
2013-03-28 16:33:22 +00:00
|
|
|
int virCgroupNewDriver(const char *name,
|
2013-03-21 13:27:13 +00:00
|
|
|
bool create,
|
2013-03-28 16:33:22 +00:00
|
|
|
int controllers,
|
|
|
|
virCgroupPtr *group)
|
2008-10-03 17:58:02 +00:00
|
|
|
{
|
|
|
|
int rc;
|
2009-07-09 13:10:41 +00:00
|
|
|
virCgroupPtr rootgrp = NULL;
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2013-04-04 11:10:55 +00:00
|
|
|
rc = virCgroupAppRoot(&rootgrp,
|
2013-03-21 13:27:13 +00:00
|
|
|
create, controllers);
|
2009-07-09 13:10:41 +00:00
|
|
|
if (rc != 0)
|
2008-10-03 17:58:02 +00:00
|
|
|
goto out;
|
|
|
|
|
2013-03-22 11:11:34 +00:00
|
|
|
rc = virCgroupNew(name, rootgrp, -1, group);
|
2009-11-12 14:11:43 +00:00
|
|
|
if (rc == 0) {
|
2011-07-21 07:12:55 +00:00
|
|
|
rc = virCgroupMakeGroup(rootgrp, *group, create, VIR_CGROUP_NONE);
|
2013-04-03 17:08:41 +00:00
|
|
|
if (rc != 0) {
|
|
|
|
virCgroupRemove(*group);
|
2009-07-10 10:40:04 +00:00
|
|
|
virCgroupFree(group);
|
2013-04-03 17:08:41 +00:00
|
|
|
}
|
2009-07-10 10:40:04 +00:00
|
|
|
}
|
2008-10-03 17:58:02 +00:00
|
|
|
out:
|
2009-07-09 13:10:41 +00:00
|
|
|
virCgroupFree(&rootgrp);
|
2008-10-03 17:58:02 +00:00
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
2009-10-07 10:18:31 +00:00
|
|
|
#else
|
2013-03-28 16:33:22 +00:00
|
|
|
int virCgroupNewDriver(const char *name ATTRIBUTE_UNUSED,
|
2013-04-15 16:06:19 +00:00
|
|
|
bool create ATTRIBUTE_UNUSED,
|
2013-03-28 16:33:22 +00:00
|
|
|
int controllers ATTRIBUTE_UNUSED,
|
|
|
|
virCgroupPtr *group ATTRIBUTE_UNUSED)
|
2009-10-07 10:18:31 +00:00
|
|
|
{
|
|
|
|
/* Claim no support */
|
|
|
|
return -ENXIO;
|
|
|
|
}
|
|
|
|
#endif
|
2008-10-03 17:58:02 +00:00
|
|
|
|
2012-11-12 07:02:26 +00:00
|
|
|
/**
|
2013-03-28 16:33:22 +00:00
|
|
|
* virCgroupNewSelf:
|
2012-11-12 07:02:26 +00:00
|
|
|
*
|
|
|
|
* @group: Pointer to returned virCgroupPtr
|
|
|
|
*
|
2013-03-21 11:53:14 +00:00
|
|
|
* Obtain a cgroup representing the config of the
|
|
|
|
* current process
|
|
|
|
*
|
2012-11-12 07:02:26 +00:00
|
|
|
* Returns 0 on success
|
|
|
|
*/
|
2012-12-13 13:10:19 +00:00
|
|
|
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
|
2013-03-28 16:33:22 +00:00
|
|
|
int virCgroupNewSelf(virCgroupPtr *group)
|
2012-11-12 07:02:26 +00:00
|
|
|
{
|
2013-03-22 11:11:34 +00:00
|
|
|
return virCgroupNew("", NULL, -1, group);
|
2012-12-13 13:10:19 +00:00
|
|
|
}
|
2012-11-12 07:02:26 +00:00
|
|
|
#else
|
2013-03-28 16:33:22 +00:00
|
|
|
int virCgroupNewSelf(virCgroupPtr *group ATTRIBUTE_UNUSED)
|
2012-12-13 13:10:19 +00:00
|
|
|
{
|
2012-11-12 07:02:26 +00:00
|
|
|
return -ENXIO;
|
|
|
|
}
|
2012-12-13 13:10:19 +00:00
|
|
|
#endif
|
2009-07-10 10:40:04 +00:00
|
|
|
|
|
|
|
/**
|
2013-03-28 18:08:39 +00:00
|
|
|
* virCgroupNewDomainDriver:
|
2009-07-10 10:40:04 +00:00
|
|
|
*
|
|
|
|
* @driver: group for driver owning the domain
|
|
|
|
* @name: name of the domain
|
|
|
|
* @group: Pointer to returned virCgroupPtr
|
|
|
|
*
|
|
|
|
* Returns 0 on success
|
|
|
|
*/
|
2010-04-23 09:34:17 +00:00
|
|
|
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
|
2013-03-28 18:08:39 +00:00
|
|
|
int virCgroupNewDomainDriver(virCgroupPtr driver,
|
|
|
|
const char *name,
|
|
|
|
bool create,
|
|
|
|
virCgroupPtr *group)
|
2009-07-10 10:40:04 +00:00
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
|
2013-03-22 11:11:34 +00:00
|
|
|
rc = virCgroupNew(name, driver, -1, group);
|
2009-07-10 10:40:04 +00:00
|
|
|
|
2009-11-12 14:11:43 +00:00
|
|
|
if (rc == 0) {
|
cgroup: Enable memory.use_hierarchy of cgroup for domain
Through conversation with Kumar L Srikanth-B22348, I found
that the function of getting memory usage (e.g., virsh dominfo)
doesn't work for lxc with ns subsystem of cgroup enabled.
This is because of features of ns and memory subsystems.
Ns creates child cgroup on every process fork and as a result
processes in a container are not assigned in a cgroup for
domain (e.g., libvirt/lxc/test1/). For example, libvirt_lxc
and init (or somewhat specified in XML) are assigned into
libvirt/lxc/test1/8839/ and libvirt/lxc/test1/8839/8849/,
respectively. On the other hand, memory subsystem accounts
memory usage within a group of processes by default, i.e.,
it does not take any child (and descendant) groups into
account. With the two features, virsh dominfo which just
checks memory usage of a cgroup for domain always returns
zero because the cgroup has no process.
Setting memory.use_hierarchy of a group allows to account
(and limit) memory usage of every descendant groups of the group.
By setting it of a cgroup for domain, we can get proper memory
usage of lxc with ns subsystem enabled. (To be exact, the
setting is required only when memory and ns subsystems are
enabled at the same time, e.g., mount -t cgroup none /cgroup.)
2010-06-23 16:00:56 +00:00
|
|
|
/*
|
|
|
|
* Create a cgroup with memory.use_hierarchy enabled to
|
|
|
|
* surely account memory usage of lxc with ns subsystem
|
|
|
|
* enabled. (To be exact, memory and ns subsystems are
|
|
|
|
* enabled at the same time.)
|
|
|
|
*
|
|
|
|
* The reason why doing it here, not a upper group, say
|
|
|
|
* a group for driver, is to avoid overhead to track
|
|
|
|
* cumulative usage that we don't need.
|
|
|
|
*/
|
2011-07-21 07:12:55 +00:00
|
|
|
rc = virCgroupMakeGroup(driver, *group, create, VIR_CGROUP_MEM_HIERACHY);
|
2013-04-03 17:08:41 +00:00
|
|
|
if (rc != 0) {
|
|
|
|
virCgroupRemove(*group);
|
2009-07-10 10:40:04 +00:00
|
|
|
virCgroupFree(group);
|
2013-04-03 17:08:41 +00:00
|
|
|
}
|
2009-07-10 10:40:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
2009-10-07 10:18:31 +00:00
|
|
|
#else
|
2013-03-28 18:08:39 +00:00
|
|
|
int virCgroupNewDomainDriver(virCgroupPtr driver ATTRIBUTE_UNUSED,
|
|
|
|
const char *name ATTRIBUTE_UNUSED,
|
|
|
|
bool create ATTRIBUTE_UNUSED,
|
|
|
|
virCgroupPtr *group ATTRIBUTE_UNUSED)
|
|
|
|
{
|
|
|
|
return -ENXIO;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virCgroupNewDomainPartition:
|
|
|
|
*
|
|
|
|
* @partition: partition holding the domain
|
|
|
|
* @driver: name of the driver
|
|
|
|
* @name: name of the domain
|
|
|
|
* @group: Pointer to returned virCgroupPtr
|
|
|
|
*
|
|
|
|
* Returns 0 on success
|
|
|
|
*/
|
|
|
|
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
|
|
|
|
int virCgroupNewDomainPartition(virCgroupPtr partition,
|
|
|
|
const char *driver,
|
|
|
|
const char *name,
|
|
|
|
bool create,
|
|
|
|
virCgroupPtr *group)
|
|
|
|
{
|
|
|
|
int rc;
|
2013-04-16 12:16:37 +00:00
|
|
|
char *grpname = NULL;
|
2013-03-28 18:08:39 +00:00
|
|
|
|
2013-04-26 08:52:47 +00:00
|
|
|
if (virAsprintf(&grpname, "%s.libvirt-%s",
|
2013-03-28 18:08:39 +00:00
|
|
|
name, driver) < 0)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2013-04-26 09:50:24 +00:00
|
|
|
if ((rc = virCgroupPartitionEscape(&grpname)) < 0)
|
|
|
|
return rc;
|
|
|
|
|
2013-04-16 12:16:37 +00:00
|
|
|
rc = virCgroupNew(grpname, partition, -1, group);
|
2013-03-28 18:08:39 +00:00
|
|
|
|
|
|
|
if (rc == 0) {
|
|
|
|
/*
|
|
|
|
* Create a cgroup with memory.use_hierarchy enabled to
|
|
|
|
* surely account memory usage of lxc with ns subsystem
|
|
|
|
* enabled. (To be exact, memory and ns subsystems are
|
|
|
|
* enabled at the same time.)
|
|
|
|
*
|
|
|
|
* The reason why doing it here, not a upper group, say
|
|
|
|
* a group for driver, is to avoid overhead to track
|
|
|
|
* cumulative usage that we don't need.
|
|
|
|
*/
|
|
|
|
rc = virCgroupMakeGroup(partition, *group, create, VIR_CGROUP_MEM_HIERACHY);
|
|
|
|
if (rc != 0) {
|
|
|
|
virCgroupRemove(*group);
|
|
|
|
virCgroupFree(group);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-04-16 12:16:37 +00:00
|
|
|
VIR_FREE(grpname);
|
2013-03-28 18:08:39 +00:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
int virCgroupNewDomainPartition(virCgroupPtr partition ATTRIBUTE_UNUSED,
|
|
|
|
const char *driver ATTRIBUTE_UNUSED,
|
|
|
|
const char *name ATTRIBUTE_UNUSED,
|
|
|
|
bool create ATTRIBUTE_UNUSED,
|
|
|
|
virCgroupPtr *group ATTRIBUTE_UNUSED)
|
2009-10-07 10:18:31 +00:00
|
|
|
{
|
|
|
|
return -ENXIO;
|
|
|
|
}
|
|
|
|
#endif
|
2009-07-10 10:40:04 +00:00
|
|
|
|
2011-07-21 07:12:55 +00:00
|
|
|
/**
|
2013-03-28 16:33:22 +00:00
|
|
|
* virCgroupNewVcpu:
|
2011-07-21 07:12:55 +00:00
|
|
|
*
|
2013-03-28 16:33:22 +00:00
|
|
|
* @domain: group for the domain
|
2011-07-21 07:12:55 +00:00
|
|
|
* @vcpuid: id of the vcpu
|
2013-03-28 16:33:22 +00:00
|
|
|
* @create: true to create if not already existing
|
2011-07-21 07:12:55 +00:00
|
|
|
* @group: Pointer to returned virCgroupPtr
|
|
|
|
*
|
|
|
|
* Returns 0 on success
|
|
|
|
*/
|
|
|
|
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
|
2013-03-28 16:33:22 +00:00
|
|
|
int virCgroupNewVcpu(virCgroupPtr domain,
|
2011-07-21 07:12:55 +00:00
|
|
|
int vcpuid,
|
2013-03-28 16:33:22 +00:00
|
|
|
bool create,
|
|
|
|
virCgroupPtr *group)
|
2011-07-21 07:12:55 +00:00
|
|
|
{
|
|
|
|
int rc;
|
2013-03-22 11:11:34 +00:00
|
|
|
char *name;
|
2013-03-21 13:27:13 +00:00
|
|
|
int controllers;
|
2011-07-21 07:12:55 +00:00
|
|
|
|
2013-03-22 11:11:34 +00:00
|
|
|
if (virAsprintf(&name, "vcpu%d", vcpuid) < 0)
|
2011-07-21 07:12:55 +00:00
|
|
|
return -ENOMEM;
|
|
|
|
|
2013-03-21 13:27:13 +00:00
|
|
|
controllers = ((1 << VIR_CGROUP_CONTROLLER_CPU) |
|
|
|
|
(1 << VIR_CGROUP_CONTROLLER_CPUACCT) |
|
|
|
|
(1 << VIR_CGROUP_CONTROLLER_CPUSET));
|
|
|
|
|
2013-03-22 11:11:34 +00:00
|
|
|
rc = virCgroupNew(name, domain, controllers, group);
|
|
|
|
VIR_FREE(name);
|
2011-07-21 07:12:55 +00:00
|
|
|
|
|
|
|
if (rc == 0) {
|
2013-03-28 16:33:22 +00:00
|
|
|
rc = virCgroupMakeGroup(domain, *group, create, VIR_CGROUP_NONE);
|
2013-04-03 17:08:41 +00:00
|
|
|
if (rc != 0) {
|
|
|
|
virCgroupRemove(*group);
|
2011-07-21 07:12:55 +00:00
|
|
|
virCgroupFree(group);
|
2013-04-03 17:08:41 +00:00
|
|
|
}
|
2011-07-21 07:12:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
#else
|
2013-03-28 16:33:22 +00:00
|
|
|
int virCgroupNewVcpu(virCgroupPtr domain ATTRIBUTE_UNUSED,
|
2011-07-21 07:12:55 +00:00
|
|
|
int vcpuid ATTRIBUTE_UNUSED,
|
2013-03-28 16:33:22 +00:00
|
|
|
bool create ATTRIBUTE_UNUSED,
|
|
|
|
virCgroupPtr *group ATTRIBUTE_UNUSED)
|
2011-07-21 07:12:55 +00:00
|
|
|
{
|
|
|
|
return -ENXIO;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2012-08-21 09:18:24 +00:00
|
|
|
/**
|
2013-03-28 16:33:22 +00:00
|
|
|
* virCgroupNewEmulator:
|
2012-08-21 09:18:24 +00:00
|
|
|
*
|
2013-03-28 16:33:22 +00:00
|
|
|
* @domain: group for the domain
|
|
|
|
* @create: true to create if not already existing
|
2012-08-21 09:18:24 +00:00
|
|
|
* @group: Pointer to returned virCgroupPtr
|
|
|
|
*
|
|
|
|
* Returns: 0 on success or -errno on failure
|
|
|
|
*/
|
|
|
|
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
|
2013-03-28 16:33:22 +00:00
|
|
|
int virCgroupNewEmulator(virCgroupPtr domain,
|
|
|
|
bool create,
|
|
|
|
virCgroupPtr *group)
|
2012-08-21 09:18:24 +00:00
|
|
|
{
|
|
|
|
int rc;
|
2013-03-21 13:27:13 +00:00
|
|
|
int controllers;
|
2012-08-21 09:18:24 +00:00
|
|
|
|
2013-03-21 13:27:13 +00:00
|
|
|
controllers = ((1 << VIR_CGROUP_CONTROLLER_CPU) |
|
|
|
|
(1 << VIR_CGROUP_CONTROLLER_CPUACCT) |
|
|
|
|
(1 << VIR_CGROUP_CONTROLLER_CPUSET));
|
|
|
|
|
2013-03-22 11:11:34 +00:00
|
|
|
rc = virCgroupNew("emulator", domain, controllers, group);
|
2012-08-21 09:18:24 +00:00
|
|
|
|
|
|
|
if (rc == 0) {
|
2013-03-28 16:33:22 +00:00
|
|
|
rc = virCgroupMakeGroup(domain, *group, create, VIR_CGROUP_NONE);
|
2013-04-03 17:08:41 +00:00
|
|
|
if (rc != 0) {
|
|
|
|
virCgroupRemove(*group);
|
2012-08-21 09:18:24 +00:00
|
|
|
virCgroupFree(group);
|
2013-04-03 17:08:41 +00:00
|
|
|
}
|
2012-08-21 09:18:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
#else
|
2013-03-28 16:33:22 +00:00
|
|
|
int virCgroupNewEmulator(virCgroupPtr domain ATTRIBUTE_UNUSED,
|
|
|
|
bool create ATTRIBUTE_UNUSED,
|
|
|
|
virCgroupPtr *group ATTRIBUTE_UNUSED)
|
2012-08-21 09:18:24 +00:00
|
|
|
{
|
|
|
|
return -ENXIO;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
2011-02-08 06:56:39 +00:00
|
|
|
/**
|
|
|
|
* virCgroupSetBlkioWeight:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to change io weight for
|
|
|
|
* @weight: The Weight for this cgroup
|
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
|
|
|
int virCgroupSetBlkioWeight(virCgroupPtr group, unsigned int weight)
|
|
|
|
{
|
|
|
|
if (weight > 1000 || weight < 100)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
return virCgroupSetValueU64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_BLKIO,
|
|
|
|
"blkio.weight",
|
|
|
|
weight);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virCgroupGetBlkioWeight:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to get weight for
|
|
|
|
* @Weight: Pointer to returned weight
|
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
|
|
|
int virCgroupGetBlkioWeight(virCgroupPtr group, unsigned int *weight)
|
|
|
|
{
|
|
|
|
unsigned long long tmp;
|
|
|
|
int ret;
|
|
|
|
ret = virCgroupGetValueU64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_BLKIO,
|
|
|
|
"blkio.weight", &tmp);
|
|
|
|
if (ret == 0)
|
|
|
|
*weight = tmp;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2011-11-08 11:00:34 +00:00
|
|
|
/**
|
|
|
|
* virCgroupSetBlkioDeviceWeight:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to change io device weight device for
|
|
|
|
* @path: The device with a weight to alter
|
|
|
|
* @weight: The new device weight (100-1000), or 0 to clear
|
|
|
|
*
|
|
|
|
* device_weight is treated as a write-only parameter, so
|
|
|
|
* there isn't a getter counterpart.
|
|
|
|
*
|
|
|
|
* Returns: 0 on success, -errno on failure
|
|
|
|
*/
|
|
|
|
#if defined(major) && defined(minor)
|
|
|
|
int virCgroupSetBlkioDeviceWeight(virCgroupPtr group,
|
|
|
|
const char *path,
|
|
|
|
unsigned int weight)
|
|
|
|
{
|
|
|
|
char *str;
|
|
|
|
struct stat sb;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (weight && (weight > 1000 || weight < 100))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (stat(path, &sb) < 0)
|
|
|
|
return -errno;
|
|
|
|
|
|
|
|
if (!S_ISBLK(sb.st_mode))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (virAsprintf(&str, "%d:%d %d", major(sb.st_rdev), minor(sb.st_rdev),
|
|
|
|
weight) < 0)
|
|
|
|
return -errno;
|
|
|
|
|
|
|
|
ret = virCgroupSetValueStr(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_BLKIO,
|
|
|
|
"blkio.weight_device",
|
|
|
|
str);
|
|
|
|
VIR_FREE(str);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
#else
|
2012-11-25 15:21:58 +00:00
|
|
|
int
|
|
|
|
virCgroupSetBlkioDeviceWeight(virCgroupPtr group ATTRIBUTE_UNUSED,
|
|
|
|
const char *path ATTRIBUTE_UNUSED,
|
|
|
|
unsigned int weight ATTRIBUTE_UNUSED)
|
2011-11-08 11:00:34 +00:00
|
|
|
{
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2008-10-03 17:58:02 +00:00
|
|
|
/**
|
|
|
|
* virCgroupSetMemory:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to change memory for
|
|
|
|
* @kb: The memory amount in kilobytes
|
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
2011-01-13 09:18:11 +00:00
|
|
|
int virCgroupSetMemory(virCgroupPtr group, unsigned long long kb)
|
2008-10-03 17:58:02 +00:00
|
|
|
{
|
2011-01-13 09:18:11 +00:00
|
|
|
unsigned long long maxkb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;
|
|
|
|
|
|
|
|
if (kb > maxkb)
|
|
|
|
return -EINVAL;
|
|
|
|
else if (kb == maxkb)
|
|
|
|
return virCgroupSetValueI64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
|
|
"memory.limit_in_bytes",
|
|
|
|
-1);
|
|
|
|
else
|
|
|
|
return virCgroupSetValueU64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
|
|
"memory.limit_in_bytes",
|
|
|
|
kb << 10);
|
2008-10-03 17:58:02 +00:00
|
|
|
}
|
|
|
|
|
2009-10-07 13:26:23 +00:00
|
|
|
/**
|
|
|
|
* virCgroupGetMemoryUsage:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to change memory for
|
|
|
|
* @kb: Pointer to returned used memory in kilobytes
|
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
|
|
|
int virCgroupGetMemoryUsage(virCgroupPtr group, unsigned long *kb)
|
|
|
|
{
|
2009-10-08 14:26:42 +00:00
|
|
|
long long unsigned int usage_in_bytes;
|
2009-10-07 13:26:23 +00:00
|
|
|
int ret;
|
|
|
|
ret = virCgroupGetValueU64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
|
|
"memory.usage_in_bytes", &usage_in_bytes);
|
|
|
|
if (ret == 0)
|
|
|
|
*kb = (unsigned long) usage_in_bytes >> 10;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2010-10-12 14:50:53 +00:00
|
|
|
/**
|
|
|
|
* virCgroupSetMemoryHardLimit:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to change memory hard limit for
|
|
|
|
* @kb: The memory amount in kilobytes
|
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
2011-01-13 09:18:11 +00:00
|
|
|
int virCgroupSetMemoryHardLimit(virCgroupPtr group, unsigned long long kb)
|
2010-10-12 14:50:53 +00:00
|
|
|
{
|
|
|
|
return virCgroupSetMemory(group, kb);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virCgroupGetMemoryHardLimit:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to get the memory hard limit for
|
|
|
|
* @kb: The memory amount in kilobytes
|
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
2011-01-13 09:18:11 +00:00
|
|
|
int virCgroupGetMemoryHardLimit(virCgroupPtr group, unsigned long long *kb)
|
2010-10-12 14:50:53 +00:00
|
|
|
{
|
|
|
|
long long unsigned int limit_in_bytes;
|
|
|
|
int ret;
|
|
|
|
ret = virCgroupGetValueU64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
|
|
"memory.limit_in_bytes", &limit_in_bytes);
|
|
|
|
if (ret == 0)
|
2011-01-13 09:18:11 +00:00
|
|
|
*kb = limit_in_bytes >> 10;
|
2010-10-12 14:50:53 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virCgroupSetMemorySoftLimit:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to change memory soft limit for
|
|
|
|
* @kb: The memory amount in kilobytes
|
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
2011-01-13 09:18:11 +00:00
|
|
|
int virCgroupSetMemorySoftLimit(virCgroupPtr group, unsigned long long kb)
|
2010-10-12 14:50:53 +00:00
|
|
|
{
|
2011-01-13 09:18:11 +00:00
|
|
|
unsigned long long maxkb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;
|
|
|
|
|
|
|
|
if (kb > maxkb)
|
|
|
|
return -EINVAL;
|
|
|
|
else if (kb == maxkb)
|
|
|
|
return virCgroupSetValueI64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
|
|
"memory.soft_limit_in_bytes",
|
|
|
|
-1);
|
|
|
|
else
|
|
|
|
return virCgroupSetValueU64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
|
|
"memory.soft_limit_in_bytes",
|
|
|
|
kb << 10);
|
2010-10-12 14:50:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virCgroupGetMemorySoftLimit:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to get the memory soft limit for
|
|
|
|
* @kb: The memory amount in kilobytes
|
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
2011-01-13 09:18:11 +00:00
|
|
|
int virCgroupGetMemorySoftLimit(virCgroupPtr group, unsigned long long *kb)
|
2010-10-12 14:50:53 +00:00
|
|
|
{
|
|
|
|
long long unsigned int limit_in_bytes;
|
|
|
|
int ret;
|
|
|
|
ret = virCgroupGetValueU64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
|
|
"memory.soft_limit_in_bytes", &limit_in_bytes);
|
|
|
|
if (ret == 0)
|
2011-01-13 09:18:11 +00:00
|
|
|
*kb = limit_in_bytes >> 10;
|
2010-10-12 14:50:53 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2011-03-16 05:07:12 +00:00
|
|
|
* virCgroupSetMemSwapHardLimit:
|
2010-10-12 14:50:53 +00:00
|
|
|
*
|
2011-03-16 05:07:12 +00:00
|
|
|
* @group: The cgroup to change mem+swap hard limit for
|
|
|
|
* @kb: The mem+swap amount in kilobytes
|
2010-10-12 14:50:53 +00:00
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
2011-03-16 05:07:12 +00:00
|
|
|
int virCgroupSetMemSwapHardLimit(virCgroupPtr group, unsigned long long kb)
|
2010-10-12 14:50:53 +00:00
|
|
|
{
|
2011-01-13 09:18:11 +00:00
|
|
|
unsigned long long maxkb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;
|
|
|
|
|
|
|
|
if (kb > maxkb)
|
|
|
|
return -EINVAL;
|
|
|
|
else if (kb == maxkb)
|
|
|
|
return virCgroupSetValueI64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
|
|
"memory.memsw.limit_in_bytes",
|
|
|
|
-1);
|
|
|
|
else
|
|
|
|
return virCgroupSetValueU64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
|
|
"memory.memsw.limit_in_bytes",
|
|
|
|
kb << 10);
|
2010-10-12 14:50:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2011-03-16 05:07:12 +00:00
|
|
|
* virCgroupGetMemSwapHardLimit:
|
2010-10-12 14:50:53 +00:00
|
|
|
*
|
2011-03-16 05:07:12 +00:00
|
|
|
* @group: The cgroup to get mem+swap hard limit for
|
|
|
|
* @kb: The mem+swap amount in kilobytes
|
2010-10-12 14:50:53 +00:00
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
2011-03-16 05:07:12 +00:00
|
|
|
int virCgroupGetMemSwapHardLimit(virCgroupPtr group, unsigned long long *kb)
|
2010-10-12 14:50:53 +00:00
|
|
|
{
|
|
|
|
long long unsigned int limit_in_bytes;
|
|
|
|
int ret;
|
|
|
|
ret = virCgroupGetValueU64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
|
|
"memory.memsw.limit_in_bytes", &limit_in_bytes);
|
|
|
|
if (ret == 0)
|
2011-01-13 09:18:11 +00:00
|
|
|
*kb = limit_in_bytes >> 10;
|
2010-10-12 14:50:53 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-11-12 07:02:25 +00:00
|
|
|
/**
|
|
|
|
* virCgroupGetMemSwapUsage:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to get mem+swap usage for
|
|
|
|
* @kb: The mem+swap amount in kilobytes
|
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
|
|
|
int virCgroupGetMemSwapUsage(virCgroupPtr group, unsigned long long *kb)
|
|
|
|
{
|
|
|
|
long long unsigned int usage_in_bytes;
|
|
|
|
int ret;
|
|
|
|
ret = virCgroupGetValueU64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_MEMORY,
|
|
|
|
"memory.memsw.usage_in_bytes", &usage_in_bytes);
|
|
|
|
if (ret == 0)
|
|
|
|
*kb = usage_in_bytes >> 10;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2011-12-20 08:34:58 +00:00
|
|
|
/**
|
|
|
|
* virCgroupSetCpusetMems:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to set cpuset.mems for
|
|
|
|
* @mems: the numa nodes to set
|
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
|
|
|
int virCgroupSetCpusetMems(virCgroupPtr group, const char *mems)
|
|
|
|
{
|
|
|
|
return virCgroupSetValueStr(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_CPUSET,
|
|
|
|
"cpuset.mems",
|
|
|
|
mems);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virCgroupGetCpusetMems:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to get cpuset.mems for
|
|
|
|
* @mems: the numa nodes to get
|
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
|
|
|
int virCgroupGetCpusetMems(virCgroupPtr group, char **mems)
|
|
|
|
{
|
|
|
|
return virCgroupGetValueStr(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_CPUSET,
|
|
|
|
"cpuset.mems",
|
|
|
|
mems);
|
|
|
|
}
|
|
|
|
|
2012-08-21 09:18:30 +00:00
|
|
|
/**
|
|
|
|
* virCgroupSetCpusetCpus:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to set cpuset.cpus for
|
|
|
|
* @cpus: the cpus to set
|
|
|
|
*
|
|
|
|
* Retuens: 0 on success
|
|
|
|
*/
|
|
|
|
int virCgroupSetCpusetCpus(virCgroupPtr group, const char *cpus)
|
|
|
|
{
|
|
|
|
return virCgroupSetValueStr(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_CPUSET,
|
|
|
|
"cpuset.cpus",
|
|
|
|
cpus);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virCgroupGetCpusetCpus:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to get cpuset.cpus for
|
|
|
|
* @cpus: the cpus to get
|
|
|
|
*
|
|
|
|
* Retuens: 0 on success
|
|
|
|
*/
|
|
|
|
int virCgroupGetCpusetCpus(virCgroupPtr group, char **cpus)
|
|
|
|
{
|
|
|
|
return virCgroupGetValueStr(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_CPUSET,
|
|
|
|
"cpuset.cpus",
|
|
|
|
cpus);
|
|
|
|
}
|
|
|
|
|
2008-10-03 17:58:02 +00:00
|
|
|
/**
|
|
|
|
* virCgroupDenyAllDevices:
|
|
|
|
*
|
2011-03-09 03:13:18 +00:00
|
|
|
* @group: The cgroup to deny all permissions, for all devices
|
2008-10-03 17:58:02 +00:00
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
|
|
|
int virCgroupDenyAllDevices(virCgroupPtr group)
|
|
|
|
{
|
|
|
|
return virCgroupSetValueStr(group,
|
2009-07-09 13:10:41 +00:00
|
|
|
VIR_CGROUP_CONTROLLER_DEVICES,
|
|
|
|
"devices.deny",
|
|
|
|
"a");
|
2008-10-03 17:58:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virCgroupAllowDevice:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to allow a device for
|
|
|
|
* @type: The device type (i.e., 'c' or 'b')
|
|
|
|
* @major: The major number of the device
|
|
|
|
* @minor: The minor number of the device
|
2011-03-09 03:13:18 +00:00
|
|
|
* @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to allow
|
2008-10-03 17:58:02 +00:00
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
2011-03-09 03:13:18 +00:00
|
|
|
int virCgroupAllowDevice(virCgroupPtr group, char type, int major, int minor,
|
|
|
|
int perms)
|
2008-10-03 17:58:02 +00:00
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
char *devstr = NULL;
|
|
|
|
|
2011-03-09 03:13:18 +00:00
|
|
|
if (virAsprintf(&devstr, "%c %i:%i %s%s%s", type, major, minor,
|
|
|
|
perms & VIR_CGROUP_DEVICE_READ ? "r" : "",
|
|
|
|
perms & VIR_CGROUP_DEVICE_WRITE ? "w" : "",
|
|
|
|
perms & VIR_CGROUP_DEVICE_MKNOD ? "m" : "") == -1) {
|
2008-10-03 17:58:02 +00:00
|
|
|
rc = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = virCgroupSetValueStr(group,
|
2009-07-09 13:10:41 +00:00
|
|
|
VIR_CGROUP_CONTROLLER_DEVICES,
|
2008-10-03 17:58:02 +00:00
|
|
|
"devices.allow",
|
|
|
|
devstr);
|
|
|
|
out:
|
|
|
|
VIR_FREE(devstr);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
2008-10-08 16:28:48 +00:00
|
|
|
|
2008-10-21 16:46:47 +00:00
|
|
|
/**
|
|
|
|
* virCgroupAllowDeviceMajor:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to allow an entire device major type for
|
|
|
|
* @type: The device type (i.e., 'c' or 'b')
|
|
|
|
* @major: The major number of the device type
|
2011-03-09 03:13:18 +00:00
|
|
|
* @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to allow
|
2008-10-21 16:46:47 +00:00
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
2011-03-09 03:13:18 +00:00
|
|
|
int virCgroupAllowDeviceMajor(virCgroupPtr group, char type, int major,
|
|
|
|
int perms)
|
2008-10-21 16:46:47 +00:00
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
char *devstr = NULL;
|
|
|
|
|
2011-03-09 03:13:18 +00:00
|
|
|
if (virAsprintf(&devstr, "%c %i:* %s%s%s", type, major,
|
|
|
|
perms & VIR_CGROUP_DEVICE_READ ? "r" : "",
|
|
|
|
perms & VIR_CGROUP_DEVICE_WRITE ? "w" : "",
|
|
|
|
perms & VIR_CGROUP_DEVICE_MKNOD ? "m" : "") == -1) {
|
2008-10-21 16:46:47 +00:00
|
|
|
rc = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = virCgroupSetValueStr(group,
|
2009-07-09 13:10:41 +00:00
|
|
|
VIR_CGROUP_CONTROLLER_DEVICES,
|
2008-10-21 16:46:47 +00:00
|
|
|
"devices.allow",
|
|
|
|
devstr);
|
|
|
|
out:
|
|
|
|
VIR_FREE(devstr);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2009-07-09 13:11:49 +00:00
|
|
|
/**
|
|
|
|
* virCgroupAllowDevicePath:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to allow the device for
|
|
|
|
* @path: the device to allow
|
2011-03-09 03:13:18 +00:00
|
|
|
* @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to allow
|
2009-07-09 13:11:49 +00:00
|
|
|
*
|
|
|
|
* Queries the type of device and its major/minor number, and
|
|
|
|
* adds that to the cgroup ACL
|
|
|
|
*
|
2011-02-17 00:05:54 +00:00
|
|
|
* Returns: 0 on success, 1 if path exists but is not a device, or
|
|
|
|
* negative errno value on failure
|
2009-07-09 13:11:49 +00:00
|
|
|
*/
|
2009-10-07 10:18:31 +00:00
|
|
|
#if defined(major) && defined(minor)
|
2011-03-09 03:13:18 +00:00
|
|
|
int virCgroupAllowDevicePath(virCgroupPtr group, const char *path, int perms)
|
2009-07-09 13:11:49 +00:00
|
|
|
{
|
|
|
|
struct stat sb;
|
|
|
|
|
|
|
|
if (stat(path, &sb) < 0)
|
|
|
|
return -errno;
|
|
|
|
|
|
|
|
if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode))
|
2011-02-17 00:05:54 +00:00
|
|
|
return 1;
|
2009-07-09 13:11:49 +00:00
|
|
|
|
|
|
|
return virCgroupAllowDevice(group,
|
|
|
|
S_ISCHR(sb.st_mode) ? 'c' : 'b',
|
|
|
|
major(sb.st_rdev),
|
2011-03-09 03:13:18 +00:00
|
|
|
minor(sb.st_rdev),
|
|
|
|
perms);
|
2009-07-09 13:11:49 +00:00
|
|
|
}
|
2009-10-07 10:18:31 +00:00
|
|
|
#else
|
|
|
|
int virCgroupAllowDevicePath(virCgroupPtr group ATTRIBUTE_UNUSED,
|
2011-03-09 03:13:18 +00:00
|
|
|
const char *path ATTRIBUTE_UNUSED,
|
|
|
|
int perms ATTRIBUTE_UNUSED)
|
2009-10-07 10:18:31 +00:00
|
|
|
{
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2009-07-09 13:11:49 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* virCgroupDenyDevice:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to deny a device for
|
|
|
|
* @type: The device type (i.e., 'c' or 'b')
|
|
|
|
* @major: The major number of the device
|
|
|
|
* @minor: The minor number of the device
|
2011-03-09 03:13:18 +00:00
|
|
|
* @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to deny
|
2009-07-09 13:11:49 +00:00
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
2011-03-09 03:13:18 +00:00
|
|
|
int virCgroupDenyDevice(virCgroupPtr group, char type, int major, int minor,
|
|
|
|
int perms)
|
2009-07-09 13:11:49 +00:00
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
char *devstr = NULL;
|
|
|
|
|
2011-03-09 03:13:18 +00:00
|
|
|
if (virAsprintf(&devstr, "%c %i:%i %s%s%s", type, major, minor,
|
|
|
|
perms & VIR_CGROUP_DEVICE_READ ? "r" : "",
|
|
|
|
perms & VIR_CGROUP_DEVICE_WRITE ? "w" : "",
|
|
|
|
perms & VIR_CGROUP_DEVICE_MKNOD ? "m" : "") == -1) {
|
2009-07-09 13:11:49 +00:00
|
|
|
rc = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = virCgroupSetValueStr(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_DEVICES,
|
|
|
|
"devices.deny",
|
|
|
|
devstr);
|
|
|
|
out:
|
|
|
|
VIR_FREE(devstr);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virCgroupDenyDeviceMajor:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to deny an entire device major type for
|
|
|
|
* @type: The device type (i.e., 'c' or 'b')
|
|
|
|
* @major: The major number of the device type
|
2011-03-09 03:13:18 +00:00
|
|
|
* @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to deny
|
2009-07-09 13:11:49 +00:00
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
2011-03-09 03:13:18 +00:00
|
|
|
int virCgroupDenyDeviceMajor(virCgroupPtr group, char type, int major,
|
|
|
|
int perms)
|
2009-07-09 13:11:49 +00:00
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
char *devstr = NULL;
|
|
|
|
|
2011-03-09 03:13:18 +00:00
|
|
|
if (virAsprintf(&devstr, "%c %i:* %s%s%s", type, major,
|
|
|
|
perms & VIR_CGROUP_DEVICE_READ ? "r" : "",
|
|
|
|
perms & VIR_CGROUP_DEVICE_WRITE ? "w" : "",
|
|
|
|
perms & VIR_CGROUP_DEVICE_MKNOD ? "m" : "") == -1) {
|
2009-07-09 13:11:49 +00:00
|
|
|
rc = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = virCgroupSetValueStr(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_DEVICES,
|
|
|
|
"devices.deny",
|
|
|
|
devstr);
|
|
|
|
out:
|
|
|
|
VIR_FREE(devstr);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2009-10-07 10:18:31 +00:00
|
|
|
#if defined(major) && defined(minor)
|
2011-03-09 03:13:18 +00:00
|
|
|
int virCgroupDenyDevicePath(virCgroupPtr group, const char *path, int perms)
|
2009-07-09 13:11:49 +00:00
|
|
|
{
|
|
|
|
struct stat sb;
|
|
|
|
|
|
|
|
if (stat(path, &sb) < 0)
|
|
|
|
return -errno;
|
|
|
|
|
|
|
|
if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode))
|
2011-02-17 00:05:54 +00:00
|
|
|
return 1;
|
2009-07-09 13:11:49 +00:00
|
|
|
|
|
|
|
return virCgroupDenyDevice(group,
|
|
|
|
S_ISCHR(sb.st_mode) ? 'c' : 'b',
|
|
|
|
major(sb.st_rdev),
|
2011-03-09 03:13:18 +00:00
|
|
|
minor(sb.st_rdev),
|
|
|
|
perms);
|
2009-07-09 13:11:49 +00:00
|
|
|
}
|
2009-10-07 10:18:31 +00:00
|
|
|
#else
|
|
|
|
int virCgroupDenyDevicePath(virCgroupPtr group ATTRIBUTE_UNUSED,
|
2011-03-09 03:13:18 +00:00
|
|
|
const char *path ATTRIBUTE_UNUSED,
|
|
|
|
int perms ATTRIBUTE_UNUSED)
|
2009-10-07 10:18:31 +00:00
|
|
|
{
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
#endif
|
2009-07-09 13:11:49 +00:00
|
|
|
|
2009-07-09 13:11:21 +00:00
|
|
|
int virCgroupSetCpuShares(virCgroupPtr group, unsigned long long shares)
|
2008-10-08 16:28:48 +00:00
|
|
|
{
|
2009-07-09 13:10:41 +00:00
|
|
|
return virCgroupSetValueU64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_CPU,
|
2009-10-07 10:18:31 +00:00
|
|
|
"cpu.shares", shares);
|
2008-10-08 16:28:48 +00:00
|
|
|
}
|
|
|
|
|
2009-07-09 13:11:21 +00:00
|
|
|
int virCgroupGetCpuShares(virCgroupPtr group, unsigned long long *shares)
|
2008-10-08 16:28:48 +00:00
|
|
|
{
|
2009-07-09 13:10:41 +00:00
|
|
|
return virCgroupGetValueU64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_CPU,
|
2009-10-07 10:18:31 +00:00
|
|
|
"cpu.shares", shares);
|
2008-10-08 16:28:48 +00:00
|
|
|
}
|
2009-03-06 14:44:04 +00:00
|
|
|
|
2011-07-21 07:21:05 +00:00
|
|
|
/**
|
|
|
|
* virCgroupSetCpuCfsPeriod:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to change cpu.cfs_period_us for
|
|
|
|
* @cfs_period: The bandwidth period in usecs
|
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
|
|
|
int virCgroupSetCpuCfsPeriod(virCgroupPtr group, unsigned long long cfs_period)
|
|
|
|
{
|
|
|
|
/* The cfs_period shoule be greater or equal than 1ms, and less or equal
|
|
|
|
* than 1s.
|
|
|
|
*/
|
|
|
|
if (cfs_period < 1000 || cfs_period > 1000000)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
return virCgroupSetValueU64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_CPU,
|
|
|
|
"cpu.cfs_period_us", cfs_period);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virCgroupGetCpuCfsPeriod:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to get cpu.cfs_period_us for
|
|
|
|
* @cfs_period: Pointer to the returned bandwidth period in usecs
|
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
|
|
|
int virCgroupGetCpuCfsPeriod(virCgroupPtr group, unsigned long long *cfs_period)
|
|
|
|
{
|
|
|
|
return virCgroupGetValueU64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_CPU,
|
|
|
|
"cpu.cfs_period_us", cfs_period);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virCgroupSetCpuCfsQuota:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to change cpu.cfs_quota_us for
|
|
|
|
* @cfs_quota: the cpu bandwidth (in usecs) that this tg will be allowed to
|
|
|
|
* consume over period
|
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
|
|
|
int virCgroupSetCpuCfsQuota(virCgroupPtr group, long long cfs_quota)
|
|
|
|
{
|
|
|
|
if (cfs_quota >= 0) {
|
|
|
|
/* The cfs_quota shoule be greater or equal than 1ms */
|
|
|
|
if (cfs_quota < 1000)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* check overflow */
|
|
|
|
if (cfs_quota > ULLONG_MAX / 1000)
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return virCgroupSetValueI64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_CPU,
|
|
|
|
"cpu.cfs_quota_us", cfs_quota);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virCgroupGetCpuCfsQuota:
|
|
|
|
*
|
|
|
|
* @group: The cgroup to get cpu.cfs_quota_us for
|
|
|
|
* @cfs_quota: Pointer to the returned cpu bandwidth (in usecs) that this tg
|
|
|
|
* will be allowed to consume over period
|
|
|
|
*
|
|
|
|
* Returns: 0 on success
|
|
|
|
*/
|
|
|
|
int virCgroupGetCpuCfsQuota(virCgroupPtr group, long long *cfs_quota)
|
|
|
|
{
|
|
|
|
return virCgroupGetValueI64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_CPU,
|
|
|
|
"cpu.cfs_quota_us", cfs_quota);
|
|
|
|
}
|
|
|
|
|
2009-03-06 14:44:04 +00:00
|
|
|
int virCgroupGetCpuacctUsage(virCgroupPtr group, unsigned long long *usage)
|
|
|
|
{
|
2009-07-09 13:10:41 +00:00
|
|
|
return virCgroupGetValueU64(group,
|
|
|
|
VIR_CGROUP_CONTROLLER_CPUACCT,
|
2009-10-07 10:18:31 +00:00
|
|
|
"cpuacct.usage", usage);
|
2009-03-06 14:44:04 +00:00
|
|
|
}
|
2009-09-21 14:31:22 +00:00
|
|
|
|
2012-03-02 02:54:23 +00:00
|
|
|
int virCgroupGetCpuacctPercpuUsage(virCgroupPtr group, char **usage)
|
|
|
|
{
|
|
|
|
return virCgroupGetValueStr(group, VIR_CGROUP_CONTROLLER_CPUACCT,
|
|
|
|
"cpuacct.usage_percpu", usage);
|
|
|
|
}
|
|
|
|
|
2012-03-09 00:35:59 +00:00
|
|
|
#ifdef _SC_CLK_TCK
|
|
|
|
int virCgroupGetCpuacctStat(virCgroupPtr group, unsigned long long *user,
|
|
|
|
unsigned long long *sys)
|
|
|
|
{
|
|
|
|
char *str;
|
|
|
|
char *p;
|
|
|
|
int ret;
|
|
|
|
static double scale = -1.0;
|
|
|
|
|
|
|
|
if ((ret = virCgroupGetValueStr(group, VIR_CGROUP_CONTROLLER_CPUACCT,
|
|
|
|
"cpuacct.stat", &str)) < 0)
|
|
|
|
return ret;
|
|
|
|
if (!(p = STRSKIP(str, "user ")) ||
|
|
|
|
virStrToLong_ull(p, &p, 10, user) < 0 ||
|
|
|
|
!(p = STRSKIP(p, "\nsystem ")) ||
|
|
|
|
virStrToLong_ull(p, NULL, 10, sys) < 0) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
/* times reported are in system ticks (generally 100 Hz), but that
|
|
|
|
* rate can theoretically vary between machines. Scale things
|
|
|
|
* into approximate nanoseconds. */
|
|
|
|
if (scale < 0) {
|
|
|
|
long ticks_per_sec = sysconf(_SC_CLK_TCK);
|
|
|
|
if (ticks_per_sec == -1) {
|
|
|
|
ret = -errno;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
scale = 1000000000.0 / ticks_per_sec;
|
|
|
|
}
|
|
|
|
*user *= scale;
|
|
|
|
*sys *= scale;
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
cleanup:
|
|
|
|
VIR_FREE(str);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
int virCgroupGetCpuacctStat(virCgroupPtr group ATTRIBUTE_UNUSED,
|
|
|
|
unsigned long long *user ATTRIBUTE_UNUSED,
|
|
|
|
unsigned long long *sys ATTRIBUTE_UNUSED)
|
|
|
|
{
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2009-09-21 14:31:22 +00:00
|
|
|
int virCgroupSetFreezerState(virCgroupPtr group, const char *state)
|
|
|
|
{
|
|
|
|
return virCgroupSetValueStr(group,
|
2011-11-29 11:28:09 +00:00
|
|
|
VIR_CGROUP_CONTROLLER_FREEZER,
|
2009-09-21 14:31:22 +00:00
|
|
|
"freezer.state", state);
|
|
|
|
}
|
|
|
|
|
|
|
|
int virCgroupGetFreezerState(virCgroupPtr group, char **state)
|
|
|
|
{
|
2009-10-19 12:29:42 +00:00
|
|
|
return virCgroupGetValueStr(group,
|
2011-11-29 11:28:09 +00:00
|
|
|
VIR_CGROUP_CONTROLLER_FREEZER,
|
2009-09-21 14:31:22 +00:00
|
|
|
"freezer.state", state);
|
|
|
|
}
|
2011-02-22 17:33:59 +00:00
|
|
|
|
2011-02-28 14:13:58 +00:00
|
|
|
|
2011-03-09 04:43:26 +00:00
|
|
|
#if defined HAVE_KILL && defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
|
2011-02-22 17:33:59 +00:00
|
|
|
static int virCgroupKillInternal(virCgroupPtr group, int signum, virHashTablePtr pids)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
int killedAny = 0;
|
|
|
|
char *keypath = NULL;
|
|
|
|
bool done = false;
|
cgroup: avoid leaking a file
Clang detected a dead store to rc. It turns out that in fixing this,
I also found a FILE* leak.
This is a subtle change in behavior, although unlikely to hit. The
pidfile is a kernel file, so we've probably got more serious problems
under foot if we fail to parse one. However, the previous behavior
was that even if one pid file failed to parse, we tried others,
whereas now we give up on the first failure. Either way, though,
the function returns -1, so the caller will know that something is
going wrong, and that not all pids were necessarily reaped. Besides,
there were other instances already in the code where failure in the
inner loop aborted the outer loop.
* src/util/cgroup.c (virCgroupKillInternal): Abort rather than
resuming loop on fscanf failure, and cleanup file on error.
2011-05-03 21:46:06 +00:00
|
|
|
FILE *fp = NULL;
|
|
|
|
VIR_DEBUG("group=%p path=%s signum=%d pids=%p",
|
|
|
|
group, group->path, signum, pids);
|
2011-02-22 17:33:59 +00:00
|
|
|
|
|
|
|
rc = virCgroupPathOfController(group, -1, "tasks", &keypath);
|
|
|
|
if (rc != 0) {
|
|
|
|
VIR_DEBUG("No path of %s, tasks", group->path);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* PIDs may be forking as we kill them, so loop
|
|
|
|
* until there are no new PIDs found
|
|
|
|
*/
|
|
|
|
while (!done) {
|
|
|
|
done = true;
|
|
|
|
if (!(fp = fopen(keypath, "r"))) {
|
|
|
|
rc = -errno;
|
|
|
|
VIR_DEBUG("Failed to read %s: %m\n", keypath);
|
|
|
|
goto cleanup;
|
|
|
|
} else {
|
|
|
|
while (!feof(fp)) {
|
build: use correct type for pid and similar types
No thanks to 64-bit windows, with 64-bit pid_t, we have to avoid
constructs like 'int pid'. Our API in libvirt-qemu cannot be
changed without breaking ABI; but then again, libvirt-qemu can
only be used on systems that support UNIX sockets, which rules
out Windows (even if qemu could be compiled there) - so for all
points on the call chain that interact with this API decision,
we require a different variable name to make it clear that we
audited the use for safety.
Adding a syntax-check rule only solves half the battle; anywhere
that uses printf on a pid_t still needs to be converted, but that
will be a separate patch.
* cfg.mk (sc_correct_id_types): New syntax check.
* src/libvirt-qemu.c (virDomainQemuAttach): Document why we didn't
use pid_t for pid, and validate for overflow.
* include/libvirt/libvirt-qemu.h (virDomainQemuAttach): Tweak name
for syntax check.
* src/vmware/vmware_conf.c (vmwareExtractPid): Likewise.
* src/driver.h (virDrvDomainQemuAttach): Likewise.
* tools/virsh.c (cmdQemuAttach): Likewise.
* src/remote/qemu_protocol.x (qemu_domain_attach_args): Likewise.
* src/qemu_protocol-structs (qemu_domain_attach_args): Likewise.
* src/util/cgroup.c (virCgroupPidCode, virCgroupKillInternal):
Likewise.
* src/qemu/qemu_command.c(qemuParseProcFileStrings): Likewise.
(qemuParseCommandLinePid): Use pid_t for pid.
* daemon/libvirtd.c (daemonForkIntoBackground): Likewise.
* src/conf/domain_conf.h (_virDomainObj): Likewise.
* src/probes.d (rpc_socket_new): Likewise.
* src/qemu/qemu_command.h (qemuParseCommandLinePid): Likewise.
* src/qemu/qemu_driver.c (qemudGetProcessInfo, qemuDomainAttach):
Likewise.
* src/qemu/qemu_process.c (qemuProcessAttach): Likewise.
* src/qemu/qemu_process.h (qemuProcessAttach): Likewise.
* src/uml/uml_driver.c (umlGetProcessInfo): Likewise.
* src/util/virnetdev.h (virNetDevSetNamespace): Likewise.
* src/util/virnetdev.c (virNetDevSetNamespace): Likewise.
* tests/testutils.c (virtTestCaptureProgramOutput): Likewise.
* src/conf/storage_conf.h (_virStoragePerms): Use mode_t, uid_t,
and gid_t rather than int.
* src/security/security_dac.c (virSecurityDACSetOwnership): Likewise.
* src/conf/storage_conf.c (virStorageDefParsePerms): Avoid
compiler warning.
2012-02-10 23:08:11 +00:00
|
|
|
unsigned long pid_value;
|
|
|
|
if (fscanf(fp, "%lu", &pid_value) != 1) {
|
2011-02-22 17:33:59 +00:00
|
|
|
if (feof(fp))
|
|
|
|
break;
|
|
|
|
rc = -errno;
|
cgroup: avoid leaking a file
Clang detected a dead store to rc. It turns out that in fixing this,
I also found a FILE* leak.
This is a subtle change in behavior, although unlikely to hit. The
pidfile is a kernel file, so we've probably got more serious problems
under foot if we fail to parse one. However, the previous behavior
was that even if one pid file failed to parse, we tried others,
whereas now we give up on the first failure. Either way, though,
the function returns -1, so the caller will know that something is
going wrong, and that not all pids were necessarily reaped. Besides,
there were other instances already in the code where failure in the
inner loop aborted the outer loop.
* src/util/cgroup.c (virCgroupKillInternal): Abort rather than
resuming loop on fscanf failure, and cleanup file on error.
2011-05-03 21:46:06 +00:00
|
|
|
VIR_DEBUG("Failed to read %s: %m\n", keypath);
|
|
|
|
goto cleanup;
|
2011-02-22 17:33:59 +00:00
|
|
|
}
|
build: use correct type for pid and similar types
No thanks to 64-bit windows, with 64-bit pid_t, we have to avoid
constructs like 'int pid'. Our API in libvirt-qemu cannot be
changed without breaking ABI; but then again, libvirt-qemu can
only be used on systems that support UNIX sockets, which rules
out Windows (even if qemu could be compiled there) - so for all
points on the call chain that interact with this API decision,
we require a different variable name to make it clear that we
audited the use for safety.
Adding a syntax-check rule only solves half the battle; anywhere
that uses printf on a pid_t still needs to be converted, but that
will be a separate patch.
* cfg.mk (sc_correct_id_types): New syntax check.
* src/libvirt-qemu.c (virDomainQemuAttach): Document why we didn't
use pid_t for pid, and validate for overflow.
* include/libvirt/libvirt-qemu.h (virDomainQemuAttach): Tweak name
for syntax check.
* src/vmware/vmware_conf.c (vmwareExtractPid): Likewise.
* src/driver.h (virDrvDomainQemuAttach): Likewise.
* tools/virsh.c (cmdQemuAttach): Likewise.
* src/remote/qemu_protocol.x (qemu_domain_attach_args): Likewise.
* src/qemu_protocol-structs (qemu_domain_attach_args): Likewise.
* src/util/cgroup.c (virCgroupPidCode, virCgroupKillInternal):
Likewise.
* src/qemu/qemu_command.c(qemuParseProcFileStrings): Likewise.
(qemuParseCommandLinePid): Use pid_t for pid.
* daemon/libvirtd.c (daemonForkIntoBackground): Likewise.
* src/conf/domain_conf.h (_virDomainObj): Likewise.
* src/probes.d (rpc_socket_new): Likewise.
* src/qemu/qemu_command.h (qemuParseCommandLinePid): Likewise.
* src/qemu/qemu_driver.c (qemudGetProcessInfo, qemuDomainAttach):
Likewise.
* src/qemu/qemu_process.c (qemuProcessAttach): Likewise.
* src/qemu/qemu_process.h (qemuProcessAttach): Likewise.
* src/uml/uml_driver.c (umlGetProcessInfo): Likewise.
* src/util/virnetdev.h (virNetDevSetNamespace): Likewise.
* src/util/virnetdev.c (virNetDevSetNamespace): Likewise.
* tests/testutils.c (virtTestCaptureProgramOutput): Likewise.
* src/conf/storage_conf.h (_virStoragePerms): Use mode_t, uid_t,
and gid_t rather than int.
* src/security/security_dac.c (virSecurityDACSetOwnership): Likewise.
* src/conf/storage_conf.c (virStorageDefParsePerms): Avoid
compiler warning.
2012-02-10 23:08:11 +00:00
|
|
|
if (virHashLookup(pids, (void*)pid_value))
|
2011-02-22 17:33:59 +00:00
|
|
|
continue;
|
|
|
|
|
build: use correct type for pid and similar types
No thanks to 64-bit windows, with 64-bit pid_t, we have to avoid
constructs like 'int pid'. Our API in libvirt-qemu cannot be
changed without breaking ABI; but then again, libvirt-qemu can
only be used on systems that support UNIX sockets, which rules
out Windows (even if qemu could be compiled there) - so for all
points on the call chain that interact with this API decision,
we require a different variable name to make it clear that we
audited the use for safety.
Adding a syntax-check rule only solves half the battle; anywhere
that uses printf on a pid_t still needs to be converted, but that
will be a separate patch.
* cfg.mk (sc_correct_id_types): New syntax check.
* src/libvirt-qemu.c (virDomainQemuAttach): Document why we didn't
use pid_t for pid, and validate for overflow.
* include/libvirt/libvirt-qemu.h (virDomainQemuAttach): Tweak name
for syntax check.
* src/vmware/vmware_conf.c (vmwareExtractPid): Likewise.
* src/driver.h (virDrvDomainQemuAttach): Likewise.
* tools/virsh.c (cmdQemuAttach): Likewise.
* src/remote/qemu_protocol.x (qemu_domain_attach_args): Likewise.
* src/qemu_protocol-structs (qemu_domain_attach_args): Likewise.
* src/util/cgroup.c (virCgroupPidCode, virCgroupKillInternal):
Likewise.
* src/qemu/qemu_command.c(qemuParseProcFileStrings): Likewise.
(qemuParseCommandLinePid): Use pid_t for pid.
* daemon/libvirtd.c (daemonForkIntoBackground): Likewise.
* src/conf/domain_conf.h (_virDomainObj): Likewise.
* src/probes.d (rpc_socket_new): Likewise.
* src/qemu/qemu_command.h (qemuParseCommandLinePid): Likewise.
* src/qemu/qemu_driver.c (qemudGetProcessInfo, qemuDomainAttach):
Likewise.
* src/qemu/qemu_process.c (qemuProcessAttach): Likewise.
* src/qemu/qemu_process.h (qemuProcessAttach): Likewise.
* src/uml/uml_driver.c (umlGetProcessInfo): Likewise.
* src/util/virnetdev.h (virNetDevSetNamespace): Likewise.
* src/util/virnetdev.c (virNetDevSetNamespace): Likewise.
* tests/testutils.c (virtTestCaptureProgramOutput): Likewise.
* src/conf/storage_conf.h (_virStoragePerms): Use mode_t, uid_t,
and gid_t rather than int.
* src/security/security_dac.c (virSecurityDACSetOwnership): Likewise.
* src/conf/storage_conf.c (virStorageDefParsePerms): Avoid
compiler warning.
2012-02-10 23:08:11 +00:00
|
|
|
VIR_DEBUG("pid=%lu", pid_value);
|
|
|
|
/* Cgroups is a Linux concept, so this cast is safe. */
|
|
|
|
if (kill((pid_t)pid_value, signum) < 0) {
|
2011-02-22 17:33:59 +00:00
|
|
|
if (errno != ESRCH) {
|
|
|
|
rc = -errno;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
/* Leave RC == 0 since we didn't kill one */
|
|
|
|
} else {
|
|
|
|
killedAny = 1;
|
|
|
|
done = false;
|
|
|
|
}
|
|
|
|
|
build: use correct type for pid and similar types
No thanks to 64-bit windows, with 64-bit pid_t, we have to avoid
constructs like 'int pid'. Our API in libvirt-qemu cannot be
changed without breaking ABI; but then again, libvirt-qemu can
only be used on systems that support UNIX sockets, which rules
out Windows (even if qemu could be compiled there) - so for all
points on the call chain that interact with this API decision,
we require a different variable name to make it clear that we
audited the use for safety.
Adding a syntax-check rule only solves half the battle; anywhere
that uses printf on a pid_t still needs to be converted, but that
will be a separate patch.
* cfg.mk (sc_correct_id_types): New syntax check.
* src/libvirt-qemu.c (virDomainQemuAttach): Document why we didn't
use pid_t for pid, and validate for overflow.
* include/libvirt/libvirt-qemu.h (virDomainQemuAttach): Tweak name
for syntax check.
* src/vmware/vmware_conf.c (vmwareExtractPid): Likewise.
* src/driver.h (virDrvDomainQemuAttach): Likewise.
* tools/virsh.c (cmdQemuAttach): Likewise.
* src/remote/qemu_protocol.x (qemu_domain_attach_args): Likewise.
* src/qemu_protocol-structs (qemu_domain_attach_args): Likewise.
* src/util/cgroup.c (virCgroupPidCode, virCgroupKillInternal):
Likewise.
* src/qemu/qemu_command.c(qemuParseProcFileStrings): Likewise.
(qemuParseCommandLinePid): Use pid_t for pid.
* daemon/libvirtd.c (daemonForkIntoBackground): Likewise.
* src/conf/domain_conf.h (_virDomainObj): Likewise.
* src/probes.d (rpc_socket_new): Likewise.
* src/qemu/qemu_command.h (qemuParseCommandLinePid): Likewise.
* src/qemu/qemu_driver.c (qemudGetProcessInfo, qemuDomainAttach):
Likewise.
* src/qemu/qemu_process.c (qemuProcessAttach): Likewise.
* src/qemu/qemu_process.h (qemuProcessAttach): Likewise.
* src/uml/uml_driver.c (umlGetProcessInfo): Likewise.
* src/util/virnetdev.h (virNetDevSetNamespace): Likewise.
* src/util/virnetdev.c (virNetDevSetNamespace): Likewise.
* tests/testutils.c (virtTestCaptureProgramOutput): Likewise.
* src/conf/storage_conf.h (_virStoragePerms): Use mode_t, uid_t,
and gid_t rather than int.
* src/security/security_dac.c (virSecurityDACSetOwnership): Likewise.
* src/conf/storage_conf.c (virStorageDefParsePerms): Avoid
compiler warning.
2012-02-10 23:08:11 +00:00
|
|
|
ignore_value(virHashAddEntry(pids, (void*)pid_value, (void*)1));
|
2011-02-22 17:33:59 +00:00
|
|
|
}
|
|
|
|
VIR_FORCE_FCLOSE(fp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = killedAny ? 1 : 0;
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
VIR_FREE(keypath);
|
cgroup: avoid leaking a file
Clang detected a dead store to rc. It turns out that in fixing this,
I also found a FILE* leak.
This is a subtle change in behavior, although unlikely to hit. The
pidfile is a kernel file, so we've probably got more serious problems
under foot if we fail to parse one. However, the previous behavior
was that even if one pid file failed to parse, we tried others,
whereas now we give up on the first failure. Either way, though,
the function returns -1, so the caller will know that something is
going wrong, and that not all pids were necessarily reaped. Besides,
there were other instances already in the code where failure in the
inner loop aborted the outer loop.
* src/util/cgroup.c (virCgroupKillInternal): Abort rather than
resuming loop on fscanf failure, and cleanup file on error.
2011-05-03 21:46:06 +00:00
|
|
|
VIR_FORCE_FCLOSE(fp);
|
2011-02-22 17:33:59 +00:00
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-01-18 16:10:43 +00:00
|
|
|
static uint32_t virCgroupPidCode(const void *name, uint32_t seed)
|
2011-02-22 17:33:59 +00:00
|
|
|
{
|
build: use correct type for pid and similar types
No thanks to 64-bit windows, with 64-bit pid_t, we have to avoid
constructs like 'int pid'. Our API in libvirt-qemu cannot be
changed without breaking ABI; but then again, libvirt-qemu can
only be used on systems that support UNIX sockets, which rules
out Windows (even if qemu could be compiled there) - so for all
points on the call chain that interact with this API decision,
we require a different variable name to make it clear that we
audited the use for safety.
Adding a syntax-check rule only solves half the battle; anywhere
that uses printf on a pid_t still needs to be converted, but that
will be a separate patch.
* cfg.mk (sc_correct_id_types): New syntax check.
* src/libvirt-qemu.c (virDomainQemuAttach): Document why we didn't
use pid_t for pid, and validate for overflow.
* include/libvirt/libvirt-qemu.h (virDomainQemuAttach): Tweak name
for syntax check.
* src/vmware/vmware_conf.c (vmwareExtractPid): Likewise.
* src/driver.h (virDrvDomainQemuAttach): Likewise.
* tools/virsh.c (cmdQemuAttach): Likewise.
* src/remote/qemu_protocol.x (qemu_domain_attach_args): Likewise.
* src/qemu_protocol-structs (qemu_domain_attach_args): Likewise.
* src/util/cgroup.c (virCgroupPidCode, virCgroupKillInternal):
Likewise.
* src/qemu/qemu_command.c(qemuParseProcFileStrings): Likewise.
(qemuParseCommandLinePid): Use pid_t for pid.
* daemon/libvirtd.c (daemonForkIntoBackground): Likewise.
* src/conf/domain_conf.h (_virDomainObj): Likewise.
* src/probes.d (rpc_socket_new): Likewise.
* src/qemu/qemu_command.h (qemuParseCommandLinePid): Likewise.
* src/qemu/qemu_driver.c (qemudGetProcessInfo, qemuDomainAttach):
Likewise.
* src/qemu/qemu_process.c (qemuProcessAttach): Likewise.
* src/qemu/qemu_process.h (qemuProcessAttach): Likewise.
* src/uml/uml_driver.c (umlGetProcessInfo): Likewise.
* src/util/virnetdev.h (virNetDevSetNamespace): Likewise.
* src/util/virnetdev.c (virNetDevSetNamespace): Likewise.
* tests/testutils.c (virtTestCaptureProgramOutput): Likewise.
* src/conf/storage_conf.h (_virStoragePerms): Use mode_t, uid_t,
and gid_t rather than int.
* src/security/security_dac.c (virSecurityDACSetOwnership): Likewise.
* src/conf/storage_conf.c (virStorageDefParsePerms): Avoid
compiler warning.
2012-02-10 23:08:11 +00:00
|
|
|
unsigned long pid_value = (unsigned long)(intptr_t)name;
|
|
|
|
return virHashCodeGen(&pid_value, sizeof(pid_value), seed);
|
2011-02-22 17:33:59 +00:00
|
|
|
}
|
|
|
|
static bool virCgroupPidEqual(const void *namea, const void *nameb)
|
|
|
|
{
|
|
|
|
return namea == nameb;
|
|
|
|
}
|
|
|
|
static void *virCgroupPidCopy(const void *name)
|
|
|
|
{
|
|
|
|
return (void*)name;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns
|
|
|
|
* < 0 : errno that occurred
|
|
|
|
* 0 : no PIDs killed
|
|
|
|
* 1 : at least one PID killed
|
|
|
|
*/
|
|
|
|
int virCgroupKill(virCgroupPtr group, int signum)
|
|
|
|
{
|
|
|
|
VIR_DEBUG("group=%p path=%s signum=%d", group, group->path, signum);
|
|
|
|
int rc;
|
|
|
|
/* The 'tasks' file in cgroups can contain duplicated
|
|
|
|
* pids, so we use a hash to track which we've already
|
|
|
|
* killed.
|
|
|
|
*/
|
|
|
|
virHashTablePtr pids = virHashCreateFull(100,
|
|
|
|
NULL,
|
|
|
|
virCgroupPidCode,
|
|
|
|
virCgroupPidEqual,
|
|
|
|
virCgroupPidCopy,
|
|
|
|
NULL);
|
|
|
|
|
|
|
|
rc = virCgroupKillInternal(group, signum, pids);
|
|
|
|
|
|
|
|
virHashFree(pids);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int virCgroupKillRecursiveInternal(virCgroupPtr group, int signum, virHashTablePtr pids, bool dormdir)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
int killedAny = 0;
|
|
|
|
char *keypath = NULL;
|
|
|
|
DIR *dp;
|
|
|
|
virCgroupPtr subgroup = NULL;
|
|
|
|
struct dirent *ent;
|
|
|
|
VIR_DEBUG("group=%p path=%s signum=%d pids=%p", group, group->path, signum, pids);
|
|
|
|
|
|
|
|
rc = virCgroupPathOfController(group, -1, "", &keypath);
|
|
|
|
if (rc != 0) {
|
|
|
|
VIR_DEBUG("No path of %s, tasks", group->path);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((rc = virCgroupKillInternal(group, signum, pids)) != 0)
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
VIR_DEBUG("Iterate over children of %s", keypath);
|
|
|
|
if (!(dp = opendir(keypath))) {
|
|
|
|
rc = -errno;
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
while ((ent = readdir(dp))) {
|
|
|
|
if (STREQ(ent->d_name, "."))
|
|
|
|
continue;
|
|
|
|
if (STREQ(ent->d_name, ".."))
|
|
|
|
continue;
|
|
|
|
if (ent->d_type != DT_DIR)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
VIR_DEBUG("Process subdir %s", ent->d_name);
|
|
|
|
|
2013-03-22 11:11:34 +00:00
|
|
|
if ((rc = virCgroupNew(ent->d_name, group, -1, &subgroup)) != 0)
|
2011-02-22 17:33:59 +00:00
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
if ((rc = virCgroupKillRecursiveInternal(subgroup, signum, pids, true)) < 0)
|
|
|
|
goto cleanup;
|
|
|
|
if (rc == 1)
|
|
|
|
killedAny = 1;
|
|
|
|
|
|
|
|
if (dormdir)
|
|
|
|
virCgroupRemove(subgroup);
|
|
|
|
|
|
|
|
virCgroupFree(&subgroup);
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = killedAny;
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
virCgroupFree(&subgroup);
|
|
|
|
closedir(dp);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
int virCgroupKillRecursive(virCgroupPtr group, int signum)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
VIR_DEBUG("group=%p path=%s signum=%d", group, group->path, signum);
|
|
|
|
virHashTablePtr pids = virHashCreateFull(100,
|
|
|
|
NULL,
|
|
|
|
virCgroupPidCode,
|
|
|
|
virCgroupPidEqual,
|
|
|
|
virCgroupPidCopy,
|
|
|
|
NULL);
|
|
|
|
|
|
|
|
rc = virCgroupKillRecursiveInternal(group, signum, pids, false);
|
|
|
|
|
|
|
|
virHashFree(pids);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int virCgroupKillPainfully(virCgroupPtr group)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int rc;
|
|
|
|
VIR_DEBUG("cgroup=%p path=%s", group, group->path);
|
2013-05-21 07:58:16 +00:00
|
|
|
for (i = 0; i < 15; i++) {
|
2011-02-22 17:33:59 +00:00
|
|
|
int signum;
|
|
|
|
if (i == 0)
|
|
|
|
signum = SIGTERM;
|
|
|
|
else if (i == 8)
|
|
|
|
signum = SIGKILL;
|
|
|
|
else
|
2012-10-11 16:31:20 +00:00
|
|
|
signum = 0; /* Just check for existence */
|
2011-02-22 17:33:59 +00:00
|
|
|
|
|
|
|
rc = virCgroupKillRecursive(group, signum);
|
|
|
|
VIR_DEBUG("Iteration %d rc=%d", i, rc);
|
|
|
|
/* If rc == -1 we hit error, if 0 we ran out of PIDs */
|
|
|
|
if (rc <= 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
usleep(200 * 1000);
|
|
|
|
}
|
|
|
|
VIR_DEBUG("Complete %d", rc);
|
|
|
|
return rc;
|
|
|
|
}
|
2011-02-28 14:13:58 +00:00
|
|
|
|
2011-03-09 04:43:26 +00:00
|
|
|
#else /* !(HAVE_KILL, HAVE_MNTENT_H, HAVE_GETMNTENT_R) */
|
2011-02-28 14:13:58 +00:00
|
|
|
int virCgroupKill(virCgroupPtr group ATTRIBUTE_UNUSED,
|
|
|
|
int signum ATTRIBUTE_UNUSED)
|
|
|
|
{
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
int virCgroupKillRecursive(virCgroupPtr group ATTRIBUTE_UNUSED,
|
|
|
|
int signum ATTRIBUTE_UNUSED)
|
|
|
|
{
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
|
|
|
|
int virCgroupKillPainfully(virCgroupPtr group ATTRIBUTE_UNUSED)
|
|
|
|
{
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
2011-03-09 04:43:26 +00:00
|
|
|
#endif /* HAVE_KILL, HAVE_MNTENT_H, HAVE_GETMNTENT_R */
|
2013-04-05 11:48:47 +00:00
|
|
|
|
|
|
|
#ifdef __linux__
|
|
|
|
static char *virCgroupIdentifyRoot(virCgroupPtr group)
|
|
|
|
{
|
|
|
|
char *ret = NULL;
|
|
|
|
size_t i;
|
|
|
|
|
2013-05-21 07:58:16 +00:00
|
|
|
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
|
2013-04-05 11:48:47 +00:00
|
|
|
char *tmp;
|
|
|
|
if (!group->controllers[i].mountPoint)
|
|
|
|
continue;
|
|
|
|
if (!(tmp = strrchr(group->controllers[i].mountPoint, '/'))) {
|
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR,
|
|
|
|
_("Could not find directory separator in %s"),
|
|
|
|
group->controllers[i].mountPoint);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
tmp[0] = '\0';
|
|
|
|
ret = strdup(group->controllers[i].mountPoint);
|
|
|
|
tmp[0] = '/';
|
|
|
|
if (!ret) {
|
|
|
|
virReportOOMError();
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
|
|
|
|
_("Could not find any mounted controllers"));
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int virCgroupIsolateMount(virCgroupPtr group, const char *oldroot,
|
|
|
|
const char *mountopts)
|
|
|
|
{
|
|
|
|
int ret = -1;
|
|
|
|
size_t i;
|
|
|
|
char *opts = NULL;
|
|
|
|
char *root = NULL;
|
|
|
|
|
|
|
|
if (!(root = virCgroupIdentifyRoot(group)))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
VIR_DEBUG("Mounting cgroups at '%s'", root);
|
|
|
|
|
|
|
|
if (virFileMakePath(root) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Unable to create directory %s"),
|
|
|
|
root);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (virAsprintf(&opts,
|
|
|
|
"mode=755,size=65536%s", mountopts) < 0) {
|
|
|
|
virReportOOMError();
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mount("tmpfs", root, "tmpfs", MS_NOSUID|MS_NODEV|MS_NOEXEC, opts) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to mount %s on %s type %s"),
|
|
|
|
"tmpfs", root, "tmpfs");
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2013-05-21 07:58:16 +00:00
|
|
|
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
|
2013-04-05 11:48:47 +00:00
|
|
|
if (!group->controllers[i].mountPoint)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (!virFileExists(group->controllers[i].mountPoint)) {
|
|
|
|
char *src;
|
|
|
|
if (virAsprintf(&src, "%s%s%s",
|
|
|
|
oldroot,
|
|
|
|
group->controllers[i].mountPoint,
|
|
|
|
group->controllers[i].placement) < 0) {
|
|
|
|
virReportOOMError();
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
VIR_DEBUG("Create mount point '%s'", group->controllers[i].mountPoint);
|
|
|
|
if (virFileMakePath(group->controllers[i].mountPoint) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Unable to create directory %s"),
|
|
|
|
group->controllers[i].mountPoint);
|
|
|
|
VIR_FREE(src);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mount(src, group->controllers[i].mountPoint, NULL, MS_BIND, NULL) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Failed to bind cgroup '%s' on '%s'"),
|
|
|
|
src, group->controllers[i].mountPoint);
|
|
|
|
VIR_FREE(src);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
VIR_FREE(src);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (group->controllers[i].linkPoint) {
|
|
|
|
VIR_DEBUG("Link mount point '%s' to '%s'",
|
|
|
|
group->controllers[i].mountPoint,
|
|
|
|
group->controllers[i].linkPoint);
|
|
|
|
if (symlink(group->controllers[i].mountPoint,
|
|
|
|
group->controllers[i].linkPoint) < 0) {
|
|
|
|
virReportSystemError(errno,
|
|
|
|
_("Unable to symlink directory %s to %s"),
|
|
|
|
group->controllers[i].mountPoint,
|
|
|
|
group->controllers[i].linkPoint);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
VIR_FREE(root);
|
|
|
|
VIR_FREE(opts);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
#else /* __linux__ */
|
|
|
|
int virCgroupIsolateMount(virCgroupPtr group ATTRIBUTE_UNUSED,
|
|
|
|
const char *oldroot ATTRIBUTE_UNUSED,
|
|
|
|
const char *mountopts ATTRIBUTE_UNUSED)
|
|
|
|
{
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
#endif /* __linux__ */
|