/*
 * vircgroupv1.c: methods for cgroups v1 backend
 *
 * Copyright (C) 2010-2015,2018 Red Hat, Inc.
 * Copyright IBM Corp. 2008
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library.  If not, see
 * <http://www.gnu.org/licenses/>.
 */
#include <config.h>

#include <unistd.h>
#ifdef __linux__
# include <mntent.h>
# include <sys/stat.h>
# include <sys/mount.h>
#endif /* __linux__ */

#include "internal.h"

#define LIBVIRT_VIRCGROUPPRIV_H_ALLOW
#include "vircgrouppriv.h"

#include "vircgroup.h"
#include "vircgroupbackend.h"
#include "vircgroupv1.h"
#include "virfile.h"
#include "virlog.h"
#include "virstring.h"
#include "virsystemd.h"
#include "virerror.h"
#include "viralloc.h"

VIR_LOG_INIT("util.cgroup");

#define VIR_FROM_THIS VIR_FROM_CGROUP


VIR_ENUM_DECL(virCgroupV1Controller);
VIR_ENUM_IMPL(virCgroupV1Controller,
              VIR_CGROUP_CONTROLLER_LAST,
              "cpu", "cpuacct", "cpuset", "memory", "devices",
              "freezer", "blkio", "net_cls", "perf_event",
              "name=systemd",
);


#ifdef __linux__

/* We're looking for at least one 'cgroup' fs mount,
 * which is *not* a named mount. */
static bool
virCgroupV1Available(void)
{
    bool ret = false;
    FILE *mounts = NULL;
    struct mntent entry;
    char buf[CGROUP_MAX_VAL];

    if (!virFileExists("/proc/cgroups"))
        return false;

    if (!(mounts = fopen("/proc/mounts", "r")))
        return false;

    while (getmntent_r(mounts, &entry, buf, sizeof(buf)) != NULL) {
        if (STREQ(entry.mnt_type, "cgroup") && !strstr(entry.mnt_opts, "name=")) {
            ret = true;
            break;
        }
    }

    VIR_FORCE_FCLOSE(mounts);
    return ret;
}


static bool
virCgroupV1ValidateMachineGroup(virCgroupPtr group,
                                const char *name,
                                const char *drivername,
                                const char *machinename)
{
    size_t i;
    g_autofree char *partname = NULL;
    g_autofree char *scopename_old = NULL;
    g_autofree char *scopename_new = NULL;
    g_autofree char *partmachinename = NULL;

    partname = g_strdup_printf("%s.libvirt-%s", name, drivername);

    if (virCgroupPartitionEscape(&partname) < 0)
        return false;

    partmachinename = g_strdup_printf("%s.libvirt-%s",
                                      machinename, drivername);

    if (virCgroupPartitionEscape(&partmachinename) < 0)
        return false;

    if (!(scopename_old = virSystemdMakeScopeName(name, drivername, true)))
        return false;

    if (!(scopename_new = virSystemdMakeScopeName(machinename,
                                                  drivername, false)))
        return false;

    if (virCgroupPartitionEscape(&scopename_old) < 0)
        return false;

    if (virCgroupPartitionEscape(&scopename_new) < 0)
        return false;

    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
        char *tmp;

        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
            continue;

        if (!group->legacy[i].placement)
            continue;

        tmp = strrchr(group->legacy[i].placement, '/');
        if (!tmp)
            return false;

        if (i == VIR_CGROUP_CONTROLLER_CPU ||
            i == VIR_CGROUP_CONTROLLER_CPUACCT ||
            i == VIR_CGROUP_CONTROLLER_CPUSET) {
            if (STREQ(tmp, "/emulator"))
                *tmp = '\0';
            tmp = strrchr(group->legacy[i].placement, '/');
            if (!tmp)
                return false;
        }

        tmp++;

        if (STRNEQ(tmp, name) &&
            STRNEQ(tmp, machinename) &&
            STRNEQ(tmp, partname) &&
            STRNEQ(tmp, partmachinename) &&
            STRNEQ(tmp, scopename_old) &&
            STRNEQ(tmp, scopename_new)) {
            VIR_DEBUG("Name '%s' for controller '%s' does not match "
                      "'%s', '%s', '%s', '%s' or '%s'",
                      tmp, virCgroupV1ControllerTypeToString(i),
                      name, machinename, partname,
                      scopename_old, scopename_new);
            return false;
        }
    }

    return true;
}


static int
virCgroupV1CopyMounts(virCgroupPtr group,
                      virCgroupPtr parent)
{
    size_t i;
    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
        if (!parent->legacy[i].mountPoint)
            continue;

        group->legacy[i].mountPoint = g_strdup(parent->legacy[i].mountPoint);

        group->legacy[i].linkPoint = g_strdup(parent->legacy[i].linkPoint);
    }
    return 0;
}


static int
virCgroupV1CopyPlacement(virCgroupPtr group,
                         const char *path,
                         virCgroupPtr parent)
{
    size_t i;
    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
        if (!group->legacy[i].mountPoint)
            continue;

        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
            continue;

        if (path[0] == '/') {
            group->legacy[i].placement = g_strdup(path);
        } else {
            bool delim = STREQ(parent->legacy[i].placement, "/") || STREQ(path, "");
            /*
             * parent == "/" + path="" => "/"
             * parent == "/libvirt.service" + path == "" => "/libvirt.service"
             * parent == "/libvirt.service" + path == "foo" => "/libvirt.service/foo"
             */
            group->legacy[i].placement = g_strdup_printf("%s%s%s",
                                                         parent->legacy[i].placement,
                                                         delim ? "" : "/",
                                                         path);
        }
    }

    return 0;
}


static int
virCgroupV1ResolveMountLink(const char *mntDir,
                            const char *typeStr,
                            virCgroupV1ControllerPtr controller)
{
    g_autofree char *linkSrc = NULL;
    g_autofree char *tmp = NULL;
    char *dirName;
    GStatBuf sb;

    tmp = g_strdup(mntDir);

    dirName = strrchr(tmp, '/');
    if (!dirName) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Missing '/' separator in cgroup mount '%s'"), tmp);
        return -1;
    }

    if (!strchr(dirName + 1, ','))
        return 0;

    *dirName = '\0';
    linkSrc = g_strdup_printf("%s/%s", tmp, typeStr);
    *dirName = '/';

    if (g_lstat(linkSrc, &sb) < 0) {
        if (errno == ENOENT) {
            VIR_WARN("Controller %s co-mounted at %s is missing symlink at %s",
                     typeStr, tmp, linkSrc);
        } else {
            virReportSystemError(errno, _("Cannot stat %s"), linkSrc);
            return -1;
        }
    } else {
        if (!S_ISLNK(sb.st_mode)) {
            VIR_WARN("Expecting a symlink at %s for controller %s",
                     linkSrc, typeStr);
        } else {
            controller->linkPoint = g_steal_pointer(&linkSrc);
        }
    }

    return 0;
}


static bool
virCgroupV1MountOptsMatchController(const char *mntOpts,
                                    const char *typeStr)
{
    const char *tmp = mntOpts;
    int typeLen = strlen(typeStr);

    while (tmp) {
        const char *next = strchr(tmp, ',');
        int len;
        if (next) {
            len = next - tmp;
            next++;
        } else {
            len = strlen(tmp);
        }

        if (typeLen == len && STREQLEN(typeStr, tmp, len))
            return true;

        tmp = next;
    }

    return false;
}


static int
virCgroupV1DetectMounts(virCgroupPtr group,
                        const char *mntType,
                        const char *mntOpts,
                        const char *mntDir)
{
    size_t i;

    if (STRNEQ(mntType, "cgroup"))
        return 0;

    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
        const char *typestr = virCgroupV1ControllerTypeToString(i);

        if (virCgroupV1MountOptsMatchController(mntOpts, typestr)) {
            /* Note that the lines in /proc/mounts have the same
             * order than the mount operations, and that there may
             * be duplicates due to bind mounts. This means
             * that the same mount point may be processed more than
             * once. We need to save the results of the last one,
             * and we need to be careful to release the memory used
             * by previous processing. */
            virCgroupV1ControllerPtr controller = &group->legacy[i];

            VIR_FREE(controller->mountPoint);
            VIR_FREE(controller->linkPoint);
            controller->mountPoint = g_strdup(mntDir);

            /* If it is a co-mount it has a filename like "cpu,cpuacct"
             * and we must identify the symlink path */
            if (virCgroupV1ResolveMountLink(mntDir, typestr, controller) < 0)
                return -1;
        }
    }

    return 0;
}


static int
virCgroupV1DetectPlacement(virCgroupPtr group,
                           const char *path,
                           const char *controllers,
                           const char *selfpath)
{
    size_t i;

    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
        const char *typestr = virCgroupV1ControllerTypeToString(i);

        if (virCgroupV1MountOptsMatchController(controllers, typestr) &&
            group->legacy[i].mountPoint != NULL &&
            group->legacy[i].placement == NULL) {
            /*
             * selfpath == "/" + path="" -> "/"
             * selfpath == "/libvirt.service" + path == "" -> "/libvirt.service"
             * selfpath == "/libvirt.service" + path == "foo" -> "/libvirt.service/foo"
             */
            if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) {
                group->legacy[i].placement = g_strdup(selfpath);
            } else {
                bool delim = STREQ(selfpath, "/") || STREQ(path, "");

                group->legacy[i].placement = g_strdup_printf("%s%s%s", selfpath,
                                                             delim ? "" : "/",
                                                             path);
            }
        }
    }

    return 0;
}


static int
virCgroupV1ValidatePlacement(virCgroupPtr group,
                             pid_t pid)
{
    size_t i;

    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
        if (!group->legacy[i].mountPoint)
            continue;

        if (!group->legacy[i].placement) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Could not find placement for v1 controller %s at %s"),
                           virCgroupV1ControllerTypeToString(i),
                           group->legacy[i].placement);
            return -1;
        }

        VIR_DEBUG("Detected mount/mapping %zu:%s at %s in %s for pid %lld",
                  i,
                  virCgroupV1ControllerTypeToString(i),
                  group->legacy[i].mountPoint,
                  group->legacy[i].placement,
                  (long long) pid);
    }

    return 0;
}


static char *
virCgroupV1StealPlacement(virCgroupPtr group)
{
    return g_steal_pointer(&group->legacy[VIR_CGROUP_CONTROLLER_SYSTEMD].placement);
}


static int
virCgroupV1DetectControllers(virCgroupPtr group,
                             int controllers,
                             virCgroupPtr parent G_GNUC_UNUSED,
                             int detected)
{
    size_t i;
    size_t j;

    if (controllers >= 0) {
        VIR_DEBUG("Filtering controllers %d", controllers);
        /* First mark requested but non-existing controllers to be ignored */
        for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
            if (((1 << i) & controllers)) {
                int type = 1 << i;
                if (type & detected) {
                    VIR_FREE(group->legacy[i].mountPoint);
                    VIR_FREE(group->legacy[i].placement);
                }
                /* Remove non-existent controllers  */
                if (!group->legacy[i].mountPoint) {
                    VIR_DEBUG("Requested controller '%s' not mounted, ignoring",
                              virCgroupV1ControllerTypeToString(i));
                    controllers &= ~(1 << i);
                }
            }
        }
        for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
            VIR_DEBUG("Controller '%s' wanted=%s, mount='%s'",
                      virCgroupV1ControllerTypeToString(i),
                      (1 << i) & controllers ? "yes" : "no",
                      NULLSTR(group->legacy[i].mountPoint));
            if (!((1 << i) & controllers) &&
                group->legacy[i].mountPoint) {
                /* Check whether a request to disable a controller
                 * clashes with co-mounting of controllers */
                for (j = 0; j < VIR_CGROUP_CONTROLLER_LAST; j++) {
                    if (j == i)
                        continue;
                    if (!((1 << j) & controllers))
                        continue;

                    if (STREQ_NULLABLE(group->legacy[i].mountPoint,
                                       group->legacy[j].mountPoint)) {
                        virReportSystemError(EINVAL,
                                             _("V1 controller '%s' is not wanted, but '%s' is co-mounted"),
                                             virCgroupV1ControllerTypeToString(i),
                                             virCgroupV1ControllerTypeToString(j));
                        return -1;
                    }
                }
                VIR_FREE(group->legacy[i].mountPoint);
                VIR_FREE(group->legacy[i].placement);
            }
        }
    } else {
        VIR_DEBUG("Auto-detecting controllers");
        controllers = 0;
        for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
            int type = 1 << i;
            if (type & detected) {
                VIR_FREE(group->legacy[i].mountPoint);
                VIR_FREE(group->legacy[i].placement);
            }
            VIR_DEBUG("Controller '%s' present=%s",
                      virCgroupV1ControllerTypeToString(i),
                      group->legacy[i].mountPoint ? "yes" : "no");
            if (group->legacy[i].mountPoint == NULL)
                continue;
            controllers |= (1 << i);
        }
    }

    return controllers;
}


static bool
virCgroupV1HasController(virCgroupPtr group,
                         int controller)
{
    return group->legacy[controller].mountPoint != NULL;
}


static int
virCgroupV1GetAnyController(virCgroupPtr group)
{
    size_t i;

    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
        /* Reject any controller with a placement
         * of '/' to avoid doing bad stuff to the root
         * cgroup
         */
        if (group->legacy[i].mountPoint &&
            group->legacy[i].placement &&
            STRNEQ(group->legacy[i].placement, "/")) {
            return i;
        }
    }

    return -1;
}


static int
virCgroupV1PathOfController(virCgroupPtr group,
                            int controller,
                            const char *key,
                            char **path)
{
    if (group->legacy[controller].mountPoint == NULL) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("v1 controller '%s' is not mounted"),
                       virCgroupV1ControllerTypeToString(controller));
        return -1;
    }

    if (group->legacy[controller].placement == NULL) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("v1 controller '%s' is not enabled for group"),
                       virCgroupV1ControllerTypeToString(controller));
        return -1;
    }

    *path = g_strdup_printf("%s%s/%s", group->legacy[controller].mountPoint,
                            group->legacy[controller].placement, NULLSTR_EMPTY(key));

    return 0;
}


static int
virCgroupV1CpuSetInherit(virCgroupPtr parent,
                         virCgroupPtr group)
{
    size_t i;
    const char *inherit_values[] = {
        "cpuset.cpus",
        "cpuset.mems",
        "cpuset.memory_migrate",
    };

    VIR_DEBUG("Setting up inheritance %s -> %s", parent->path, group->path);
    for (i = 0; i < G_N_ELEMENTS(inherit_values); i++) {
        g_autofree char *value = NULL;

        if (virCgroupGetValueStr(parent,
                                 VIR_CGROUP_CONTROLLER_CPUSET,
                                 inherit_values[i],
                                 &value) < 0)
            return -1;

        VIR_DEBUG("Inherit %s = %s", inherit_values[i], value);

        if (virCgroupSetValueStr(group,
                                 VIR_CGROUP_CONTROLLER_CPUSET,
                                 inherit_values[i],
                                 value) < 0)
            return -1;
    }

    return 0;
}


static int
virCgroupV1SetMemoryUseHierarchy(virCgroupPtr group)
{
    unsigned long long value;
    const char *filename = "memory.use_hierarchy";

    if (virCgroupGetValueU64(group,
                             VIR_CGROUP_CONTROLLER_MEMORY,
                             filename, &value) < 0)
        return -1;

    /* Setting twice causes error, so if already enabled, skip setting */
    if (value == 1)
        return 0;

    VIR_DEBUG("Setting up %s/%s", group->path, filename);
    if (virCgroupSetValueU64(group,
                             VIR_CGROUP_CONTROLLER_MEMORY,
                             filename, 1) < 0)
        return -1;

    return 0;
}


static int
virCgroupV1MakeGroup(virCgroupPtr parent,
                     virCgroupPtr group,
                     bool create,
                     unsigned int flags)
{
    size_t i;

    VIR_DEBUG("Make group %s", group->path);
    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
        g_autofree char *path = NULL;

        /* We must never mkdir() in systemd's hierarchy */
        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) {
            VIR_DEBUG("Not creating systemd controller group");
            continue;
        }

        /* Skip over controllers that aren't mounted */
        if (!group->legacy[i].mountPoint) {
            VIR_DEBUG("Skipping unmounted controller %s",
                      virCgroupV1ControllerTypeToString(i));
            continue;
        }

        if (virCgroupV1PathOfController(group, i, "", &path) < 0)
            return -1;

        VIR_DEBUG("Make controller %s", path);
        if (!virFileExists(path)) {
            if (!create ||
                mkdir(path, 0755) < 0) {
                if (errno == EEXIST)
                    continue;
                /* With a kernel that doesn't support multi-level directory
                 * for blkio controller, libvirt will fail and disable all
                 * other controllers even though they are available. So
                 * treat blkio as unmounted if mkdir fails. */
                if (i == VIR_CGROUP_CONTROLLER_BLKIO) {
                    VIR_DEBUG("Ignoring mkdir failure with blkio controller. Kernel probably too old");
                    VIR_FREE(group->legacy[i].mountPoint);
                    continue;
                } else {
                    virReportSystemError(errno,
                                         _("Failed to create v1 controller %s for group"),
                                         virCgroupV1ControllerTypeToString(i));
                    return -1;
                }
            }
            if (i == VIR_CGROUP_CONTROLLER_CPUSET &&
                group->legacy[i].mountPoint != NULL &&
                virCgroupV1CpuSetInherit(parent, group) < 0) {
                return -1;
            }
            /*
             * Note that virCgroupV1SetMemoryUseHierarchy should always be
             * called prior to creating subcgroups and attaching tasks.
             */
            if ((flags & VIR_CGROUP_MEM_HIERACHY) &&
                i == VIR_CGROUP_CONTROLLER_MEMORY &&
                group->legacy[i].mountPoint != NULL &&
                virCgroupV1SetMemoryUseHierarchy(group) < 0) {
                return -1;
            }
        }
    }

    VIR_DEBUG("Done making controllers for group");
    return 0;
}


static int
virCgroupV1Remove(virCgroupPtr group)
{
    int rc = 0;
    size_t i;

    VIR_DEBUG("Removing cgroup %s", group->path);
    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
        g_autofree char *grppath = NULL;

        /* Skip over controllers not mounted */
        if (!group->legacy[i].mountPoint)
            continue;

        /* We must never rmdir() in systemd's hierarchy */
        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
            continue;

        /* Don't delete the root group, if we accidentally
           ended up in it for some reason */
        if (STREQ(group->legacy[i].placement, "/"))
            continue;

        if (virCgroupV1PathOfController(group,
                                        i,
                                        NULL,
                                        &grppath) != 0)
            continue;

        VIR_DEBUG("Removing cgroup %s and all child cgroups", grppath);
        rc = virCgroupRemoveRecursively(grppath);
    }
    VIR_DEBUG("Done removing cgroup %s", group->path);

    return rc;
}


static int
virCgroupV1AddTask(virCgroupPtr group,
                   pid_t pid,
                   unsigned int flags)
{
    size_t i;

    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
        /* Skip over controllers not mounted */
        if (!group->legacy[i].mountPoint)
            continue;

        /* We must never add tasks in systemd's hierarchy
         * unless we're intentionally trying to move a
         * task into a systemd machine scope */
        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD &&
            !(flags & VIR_CGROUP_TASK_SYSTEMD))
            continue;

        if (virCgroupSetValueI64(group, i, "tasks", pid) < 0)
            return -1;
    }

    return 0;
}


static int
virCgroupV1HasEmptyTasks(virCgroupPtr cgroup,
                         int controller)
{
    int ret = -1;
    g_autofree char *content = NULL;

    if (!cgroup)
        return -1;

    ret = virCgroupGetValueStr(cgroup, controller, "tasks", &content);

    if (ret == 0 && content[0] == '\0')
        ret = 1;

    return ret;
}


static int
virCgroupV1KillRecursive(virCgroupPtr group,
                         int signum,
                         virHashTablePtr pids)
{
    int controller = virCgroupV1GetAnyController(group);

    if (controller < 0)
        return -1;

    return virCgroupKillRecursiveInternal(group, signum, pids, controller,
                                          "tasks", false);
}


static char *
virCgroupV1IdentifyRoot(virCgroupPtr group)
{
    char *ret = NULL;
    size_t i;

    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
        char *tmp;
        if (!group->legacy[i].mountPoint)
            continue;
        if (!(tmp = strrchr(group->legacy[i].mountPoint, '/'))) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Could not find directory separator in %s"),
                           group->legacy[i].mountPoint);
            return NULL;
        }

        ret = g_strndup(group->legacy[i].mountPoint,
                        tmp - group->legacy[i].mountPoint);
        return ret;
    }

    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                   _("Could not find any mounted v1 controllers"));
    return NULL;
}


static int
virCgroupV1BindMount(virCgroupPtr group,
                     const char *oldroot,
                     const char *mountopts)
{
    size_t i;
    g_autofree char *opts = NULL;
    g_autofree char *root = NULL;

    if (!(root = virCgroupV1IdentifyRoot(group)))
        return -1;

    VIR_DEBUG("Mounting cgroups at '%s'", root);

    if (virFileMakePath(root) < 0) {
        virReportSystemError(errno,
                             _("Unable to create directory %s"),
                             root);
        return -1;
    }

    opts = g_strdup_printf("mode=755,size=65536%s", mountopts);

    if (mount("tmpfs", root, "tmpfs", MS_NOSUID|MS_NODEV|MS_NOEXEC, opts) < 0) {
        virReportSystemError(errno,
                             _("Failed to mount %s on %s type %s"),
                             "tmpfs", root, "tmpfs");
        return -1;
    }

    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
        if (!group->legacy[i].mountPoint)
            continue;

        if (!virFileExists(group->legacy[i].mountPoint)) {
            g_autofree char *src = NULL;
            src = g_strdup_printf("%s%s", oldroot, group->legacy[i].mountPoint);

            VIR_DEBUG("Create mount point '%s'",
                      group->legacy[i].mountPoint);
            if (virFileMakePath(group->legacy[i].mountPoint) < 0) {
                virReportSystemError(errno,
                                     _("Unable to create directory %s"),
                                     group->legacy[i].mountPoint);
                return -1;
            }

            if (mount(src, group->legacy[i].mountPoint, "none", MS_BIND,
                      NULL) < 0) {
                virReportSystemError(errno,
                                     _("Failed to bind cgroup '%s' on '%s'"),
                                     src, group->legacy[i].mountPoint);
                return -1;
            }
        }

        if (group->legacy[i].linkPoint) {
            VIR_DEBUG("Link mount point '%s' to '%s'",
                      group->legacy[i].mountPoint,
                      group->legacy[i].linkPoint);
            if (symlink(group->legacy[i].mountPoint,
                        group->legacy[i].linkPoint) < 0) {
                virReportSystemError(errno,
                                     _("Unable to symlink directory %s to %s"),
                                     group->legacy[i].mountPoint,
                                     group->legacy[i].linkPoint);
                return -1;
            }
        }
    }

    return 0;
}


static int
virCgroupV1SetOwner(virCgroupPtr cgroup,
                    uid_t uid,
                    gid_t gid,
                    int controllers)
{
    int ret = -1;
    size_t i;
    DIR *dh = NULL;
    int direrr;

    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
        g_autofree char *base = NULL;
        struct dirent *de;

        if (!((1 << i) & controllers))
            continue;

        if (!cgroup->legacy[i].mountPoint)
            continue;

        base = g_strdup_printf("%s%s", cgroup->legacy[i].mountPoint,
                               cgroup->legacy[i].placement);

        if (virDirOpen(&dh, base) < 0)
            goto cleanup;

        while ((direrr = virDirRead(dh, &de, base)) > 0) {
            g_autofree char *entry = NULL;

            entry = g_strdup_printf("%s/%s", base, de->d_name);

            if (chown(entry, uid, gid) < 0) {
                virReportSystemError(errno,
                                     _("cannot chown '%s' to (%u, %u)"),
                                     entry, uid, gid);
                goto cleanup;
            }
        }
        if (direrr < 0)
            goto cleanup;

        if (chown(base, uid, gid) < 0) {
            virReportSystemError(errno,
                                 _("cannot chown '%s' to (%u, %u)"),
                                 base, uid, gid);
            goto cleanup;
        }

        VIR_DIR_CLOSE(dh);
    }

    ret = 0;

 cleanup:
    VIR_DIR_CLOSE(dh);
    return ret;
}


static int
virCgroupV1SetBlkioWeight(virCgroupPtr group,
                          unsigned int weight)
{
    g_autofree char *path = NULL;
    g_autofree char *value = NULL;

    if (virCgroupV1PathOfController(group, VIR_CGROUP_CONTROLLER_BLKIO,
                                    "blkio.bfq.weight", &path) < 0) {
        return -1;
    }

    if (!virFileExists(path)) {
        VIR_FREE(path);

        if (virCgroupV1PathOfController(group, VIR_CGROUP_CONTROLLER_BLKIO,
                                        "blkio.weight", &path) < 0) {
            return -1;
        }
    }

    if (!virFileExists(path)) {
        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
                       _("blkio device weight is valid only for bfq or cfq scheduler"));
        return -1;
    }

    value = g_strdup_printf("%u", weight);

    return virCgroupSetValueRaw(path, value);
}


static int
virCgroupV1GetBlkioWeight(virCgroupPtr group,
                          unsigned int *weight)
{
    g_autofree char *path = NULL;
    g_autofree char *value = NULL;

    if (virCgroupV1PathOfController(group, VIR_CGROUP_CONTROLLER_BLKIO,
                                    "blkio.bfq.weight", &path) < 0) {
        return -1;
    }

    if (!virFileExists(path)) {
        VIR_FREE(path);

        if (virCgroupV1PathOfController(group, VIR_CGROUP_CONTROLLER_BLKIO,
                                        "blkio.weight", &path) < 0) {
            return -1;
        }
    }

    if (!virFileExists(path)) {
        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
                       _("blkio device weight is valid only for bfq or cfq scheduler"));
        return -1;
    }

    if (virCgroupGetValueRaw(path, &value) < 0)
        return -1;

    if (virStrToLong_ui(value, NULL, 10, weight) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to parse '%s' as an integer"),
                       value);
        return -1;
    }

    return 0;
}


static int
virCgroupV1GetBlkioIoServiced(virCgroupPtr group,
                              long long *bytes_read,
                              long long *bytes_write,
                              long long *requests_read,
                              long long *requests_write)
{
    long long stats_val;
    g_autofree char *str1 = NULL;
    g_autofree char *str2 = NULL;
    char *p1 = NULL;
    char *p2 = NULL;
    size_t i;

    const char *value_names[] = {
        "Read ",
        "Write "
    };
    long long *bytes_ptrs[] = {
        bytes_read,
        bytes_write
    };
    long long *requests_ptrs[] = {
        requests_read,
        requests_write
    };

    *bytes_read = 0;
    *bytes_write = 0;
    *requests_read = 0;
    *requests_write = 0;

    if (virCgroupGetValueStr(group,
                             VIR_CGROUP_CONTROLLER_BLKIO,
                             "blkio.throttle.io_service_bytes", &str1) < 0)
        return -1;

    if (virCgroupGetValueStr(group,
                             VIR_CGROUP_CONTROLLER_BLKIO,
                             "blkio.throttle.io_serviced", &str2) < 0)
        return -1;

    /* sum up all entries of the same kind, from all devices */
    for (i = 0; i < G_N_ELEMENTS(value_names); i++) {
        p1 = str1;
        p2 = str2;

        while ((p1 = strstr(p1, value_names[i]))) {
            p1 += strlen(value_names[i]);
            if (virStrToLong_ll(p1, &p1, 10, &stats_val) < 0) {
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("Cannot parse byte %sstat '%s'"),
                               value_names[i],
                               p1);
                return -1;
            }

            if (stats_val < 0 ||
                (stats_val > 0 && *bytes_ptrs[i] > (LLONG_MAX - stats_val)))
            {
                virReportError(VIR_ERR_OVERFLOW,
                               _("Sum of byte %sstat overflows"),
                               value_names[i]);
                return -1;
            }
            *bytes_ptrs[i] += stats_val;
        }

        while ((p2 = strstr(p2, value_names[i]))) {
            p2 += strlen(value_names[i]);
            if (virStrToLong_ll(p2, &p2, 10, &stats_val) < 0) {
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("Cannot parse %srequest stat '%s'"),
                               value_names[i],
                               p2);
                return -1;
            }

            if (stats_val < 0 ||
                (stats_val > 0 && *requests_ptrs[i] > (LLONG_MAX - stats_val)))
            {
                virReportError(VIR_ERR_OVERFLOW,
                               _("Sum of %srequest stat overflows"),
                               value_names[i]);
                return -1;
            }
            *requests_ptrs[i] += stats_val;
        }
    }

    return 0;
}


static int
virCgroupV1GetBlkioIoDeviceServiced(virCgroupPtr group,
                                    const char *path,
                                    long long *bytes_read,
                                    long long *bytes_write,
                                    long long *requests_read,
                                    long long *requests_write)
{
    g_autofree char *str1 = NULL;
    g_autofree char *str2 = NULL;
    g_autofree char *str3 = NULL;
    char *p1 = NULL;
    char *p2 = NULL;
    size_t i;

    const char *value_names[] = {
        "Read ",
        "Write "
    };
    long long *bytes_ptrs[] = {
        bytes_read,
        bytes_write
    };
    long long *requests_ptrs[] = {
        requests_read,
        requests_write
    };

    if (virCgroupGetValueStr(group,
                             VIR_CGROUP_CONTROLLER_BLKIO,
                             "blkio.throttle.io_service_bytes", &str1) < 0)
        return -1;

    if (virCgroupGetValueStr(group,
                             VIR_CGROUP_CONTROLLER_BLKIO,
                             "blkio.throttle.io_serviced", &str2) < 0)
        return -1;

    if (!(str3 = virCgroupGetBlockDevString(path)))
        return -1;

    if (!(p1 = strstr(str1, str3))) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Cannot find byte stats for block device '%s'"),
                       str3);
        return -1;
    }

    if (!(p2 = strstr(str2, str3))) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Cannot find request stats for block device '%s'"),
                       str3);
        return -1;
    }

    for (i = 0; i < G_N_ELEMENTS(value_names); i++) {
        if (!(p1 = strstr(p1, value_names[i]))) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Cannot find byte %sstats for block device '%s'"),
                           value_names[i], str3);
            return -1;
        }

        if (virStrToLong_ll(p1 + strlen(value_names[i]), &p1, 10, bytes_ptrs[i]) < 0) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Cannot parse %sstat '%s'"),
                           value_names[i], p1 + strlen(value_names[i]));
            return -1;
        }

        if (!(p2 = strstr(p2, value_names[i]))) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Cannot find request %sstats for block device '%s'"),
                           value_names[i], str3);
            return -1;
        }

        if (virStrToLong_ll(p2 + strlen(value_names[i]), &p2, 10, requests_ptrs[i]) < 0) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Cannot parse %sstat '%s'"),
                           value_names[i], p2 + strlen(value_names[i]));
            return -1;
        }
    }

    return 0;
}


static int
virCgroupV1SetBlkioDeviceWeight(virCgroupPtr group,
                                const char *devPath,
                                unsigned int weight)
{
    g_autofree char *str = NULL;
    g_autofree char *blkstr = NULL;
    g_autofree char *path = NULL;

    if (!(blkstr = virCgroupGetBlockDevString(devPath)))
        return -1;

    str = g_strdup_printf("%s%d", blkstr, weight);

    if (virCgroupV1PathOfController(group, VIR_CGROUP_CONTROLLER_BLKIO,
                                    "blkio.weight_device", &path) < 0) {
        return -1;
    }

    if (!virFileExists(path)) {
        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
                       _("blkio device weight is valid only for cfq scheduler"));
        return -1;
    }

    return virCgroupSetValueRaw(path, str);
}


static int
virCgroupV1GetBlkioDeviceWeight(virCgroupPtr group,
                                const char *devPath,
                                unsigned int *weight)
{
    g_autofree char *str = NULL;
    g_autofree char *value = NULL;
    g_autofree char *path = NULL;

    if (virCgroupV1PathOfController(group, VIR_CGROUP_CONTROLLER_BLKIO,
                                    "blkio.weight_device", &path) < 0) {
        return -1;
    }

    if (!virFileExists(path)) {
        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
                       _("blkio device weight is valid only for cfq scheduler"));
        return -1;
    }

    if (virCgroupGetValueRaw(path, &value) < 0)
        return -1;

    if (virCgroupGetValueForBlkDev(value, devPath, &str) < 0)
        return -1;

    if (!str) {
        *weight = 0;
    } else if (virStrToLong_ui(str, NULL, 10, weight) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to parse '%s' as an integer"),
                       str);
        return -1;
    }

    return 0;
}


static int
virCgroupV1SetBlkioDeviceReadIops(virCgroupPtr group,
                                  const char *path,
                                  unsigned int riops)
{
    g_autofree char *str = NULL;
    g_autofree char *blkstr = NULL;

    if (!(blkstr = virCgroupGetBlockDevString(path)))
        return -1;

    str = g_strdup_printf("%s%u", blkstr, riops);

    return virCgroupSetValueStr(group,
                                VIR_CGROUP_CONTROLLER_BLKIO,
                                "blkio.throttle.read_iops_device",
                                str);
}


static int
virCgroupV1GetBlkioDeviceReadIops(virCgroupPtr group,
                                  const char *path,
                                  unsigned int *riops)
{
    g_autofree char *str = NULL;
    g_autofree char *value = NULL;

    if (virCgroupGetValueStr(group,
                             VIR_CGROUP_CONTROLLER_BLKIO,
                             "blkio.throttle.read_iops_device",
                             &value) < 0) {
        return -1;
    }

    if (virCgroupGetValueForBlkDev(value, path, &str) < 0)
        return -1;

    if (!str) {
        *riops = 0;
    } else if (virStrToLong_ui(str, NULL, 10, riops) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to parse '%s' as an integer"),
                       str);
        return -1;
    }

    return 0;
}


static int
virCgroupV1SetBlkioDeviceWriteIops(virCgroupPtr group,
                                   const char *path,
                                   unsigned int wiops)
{
    g_autofree char *str = NULL;
    g_autofree char *blkstr = NULL;

    if (!(blkstr = virCgroupGetBlockDevString(path)))
        return -1;

    str = g_strdup_printf("%s%u", blkstr, wiops);

    return virCgroupSetValueStr(group,
                                VIR_CGROUP_CONTROLLER_BLKIO,
                                "blkio.throttle.write_iops_device",
                                str);
}


static int
virCgroupV1GetBlkioDeviceWriteIops(virCgroupPtr group,
                                   const char *path,
                                   unsigned int *wiops)
{
    g_autofree char *str = NULL;
    g_autofree char *value = NULL;

    if (virCgroupGetValueStr(group,
                             VIR_CGROUP_CONTROLLER_BLKIO,
                             "blkio.throttle.write_iops_device",
                             &value) < 0) {
        return -1;
    }

    if (virCgroupGetValueForBlkDev(value, path, &str) < 0)
        return -1;

    if (!str) {
        *wiops = 0;
    } else if (virStrToLong_ui(str, NULL, 10, wiops) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to parse '%s' as an integer"),
                       str);
        return -1;
    }

    return 0;
}


static int
virCgroupV1SetBlkioDeviceReadBps(virCgroupPtr group,
                                 const char *path,
                                 unsigned long long rbps)
{
    g_autofree char *str = NULL;
    g_autofree char *blkstr = NULL;

    if (!(blkstr = virCgroupGetBlockDevString(path)))
        return -1;

    str = g_strdup_printf("%s%llu", blkstr, rbps);

    return virCgroupSetValueStr(group,
                                VIR_CGROUP_CONTROLLER_BLKIO,
                                "blkio.throttle.read_bps_device",
                                str);
}


static int
virCgroupV1GetBlkioDeviceReadBps(virCgroupPtr group,
                                 const char *path,
                                 unsigned long long *rbps)
{
    g_autofree char *str = NULL;
    g_autofree char *value = NULL;

    if (virCgroupGetValueStr(group,
                             VIR_CGROUP_CONTROLLER_BLKIO,
                             "blkio.throttle.read_bps_device",
                             &value) < 0) {
        return -1;
    }

    if (virCgroupGetValueForBlkDev(value, path, &str) < 0)
        return -1;

    if (!str) {
        *rbps = 0;
    } else if (virStrToLong_ull(str, NULL, 10, rbps) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to parse '%s' as an integer"),
                       str);
        return -1;
    }

    return 0;
}


static int
virCgroupV1SetBlkioDeviceWriteBps(virCgroupPtr group,
                                  const char *path,
                                  unsigned long long wbps)
{
    g_autofree char *str = NULL;
    g_autofree char *blkstr = NULL;

    if (!(blkstr = virCgroupGetBlockDevString(path)))
        return -1;

    str = g_strdup_printf("%s%llu", blkstr, wbps);

    return virCgroupSetValueStr(group,
                                VIR_CGROUP_CONTROLLER_BLKIO,
                                "blkio.throttle.write_bps_device",
                                str);
}


static int
virCgroupV1GetBlkioDeviceWriteBps(virCgroupPtr group,
                                  const char *path,
                                  unsigned long long *wbps)
{
    g_autofree char *str = NULL;
    g_autofree char *value = NULL;

    if (virCgroupGetValueStr(group,
                             VIR_CGROUP_CONTROLLER_BLKIO,
                             "blkio.throttle.write_bps_device",
                             &value) < 0) {
        return -1;
    }

    if (virCgroupGetValueForBlkDev(value, path, &str) < 0)
        return -1;

    if (!str) {
        *wbps = 0;
    } else if (virStrToLong_ull(str, NULL, 10, wbps) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to parse '%s' as an integer"),
                       str);
        return -1;
    }

    return 0;
}


/*
 * Retrieve the "memory.limit_in_bytes" value from the memory controller
 * root dir. This value cannot be modified by userspace and therefore
 * is the maximum limit value supported by cgroups on the local system.
 * Returns this value scaled to KB or falls back to the original
 * VIR_DOMAIN_MEMORY_PARAM_UNLIMITED. Either way, remember the return
 * value to avoid unnecessary cgroup filesystem access.
 */
static unsigned long long int virCgroupV1MemoryUnlimitedKB;
static virOnceControl virCgroupV1MemoryOnce = VIR_ONCE_CONTROL_INITIALIZER;

static void
virCgroupV1MemoryOnceInit(void)
{
    virCgroupPtr group;
    unsigned long long int mem_unlimited = 0ULL;

    if (virCgroupNew(-1, "/", NULL, -1, &group) < 0)
        goto cleanup;

    if (!virCgroupV1HasController(group, VIR_CGROUP_CONTROLLER_MEMORY))
        goto cleanup;

    ignore_value(virCgroupGetValueU64(group,
                                      VIR_CGROUP_CONTROLLER_MEMORY,
                                      "memory.limit_in_bytes",
                                      &mem_unlimited));
 cleanup:
    virCgroupFree(&group);
    virCgroupV1MemoryUnlimitedKB = mem_unlimited >> 10;
}

static unsigned long long int
virCgroupV1GetMemoryUnlimitedKB(void)
{
    if (virOnce(&virCgroupV1MemoryOnce, virCgroupV1MemoryOnceInit) < 0)
        VIR_DEBUG("Init failed, will fall back to defaults.");

    if (virCgroupV1MemoryUnlimitedKB)
        return virCgroupV1MemoryUnlimitedKB;
    else
        return VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;
}


static int
virCgroupV1SetMemory(virCgroupPtr group,
                     unsigned long long kb)
{
    unsigned long long maxkb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;

    if (kb > maxkb) {
        virReportError(VIR_ERR_INVALID_ARG,
                       _("Memory '%llu' must be less than %llu"),
                       kb, maxkb);
        return -1;
    }

    if (kb == maxkb)
        return virCgroupSetValueI64(group,
                                    VIR_CGROUP_CONTROLLER_MEMORY,
                                    "memory.limit_in_bytes",
                                    -1);
    else
        return virCgroupSetValueU64(group,
                                    VIR_CGROUP_CONTROLLER_MEMORY,
                                    "memory.limit_in_bytes",
                                    kb << 10);
}


static int
virCgroupV1GetMemoryStat(virCgroupPtr group,
                         unsigned long long *cache,
                         unsigned long long *activeAnon,
                         unsigned long long *inactiveAnon,
                         unsigned long long *activeFile,
                         unsigned long long *inactiveFile,
                         unsigned long long *unevictable)
{
    int ret = -1;
    char *stat = NULL;
    char *line = NULL;
    unsigned long long cacheVal = 0;
    unsigned long long activeAnonVal = 0;
    unsigned long long inactiveAnonVal = 0;
    unsigned long long activeFileVal = 0;
    unsigned long long inactiveFileVal = 0;
    unsigned long long unevictableVal = 0;

    if (virCgroupGetValueStr(group,
                             VIR_CGROUP_CONTROLLER_MEMORY,
                             "memory.stat",
                             &stat) < 0) {
        return -1;
    }

    line = stat;

    while (*line) {
        char *newLine = strchr(line, '\n');
        char *valueStr = strchr(line, ' ');
        unsigned long long value;

        if (newLine)
            *newLine = '\0';

        if (!valueStr) {
            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                           _("Cannot parse 'memory.stat' cgroup file."));
            goto cleanup;
        }
        *valueStr = '\0';

        if (virStrToLong_ull(valueStr + 1, NULL, 10, &value) < 0)
            goto cleanup;

        if (STREQ(line, "cache"))
            cacheVal = value >> 10;
        else if (STREQ(line, "active_anon"))
            activeAnonVal = value >> 10;
        else if (STREQ(line, "inactive_anon"))
            inactiveAnonVal = value >> 10;
        else if (STREQ(line, "active_file"))
            activeFileVal = value >> 10;
        else if (STREQ(line, "inactive_file"))
            inactiveFileVal = value >> 10;
        else if (STREQ(line, "unevictable"))
            unevictableVal = value >> 10;

        if (newLine)
            line = newLine + 1;
        else
            break;
    }

    *cache = cacheVal;
    *activeAnon = activeAnonVal;
    *inactiveAnon = inactiveAnonVal;
    *activeFile = activeFileVal;
    *inactiveFile = inactiveFileVal;
    *unevictable = unevictableVal;

    ret = 0;

 cleanup:
    VIR_FREE(stat);
    return ret;
}


static int
virCgroupV1GetMemoryUsage(virCgroupPtr group,
                          unsigned long *kb)
{
    long long unsigned int usage_in_bytes;
    int ret;
    ret = virCgroupGetValueU64(group,
                               VIR_CGROUP_CONTROLLER_MEMORY,
                               "memory.usage_in_bytes", &usage_in_bytes);
    if (ret == 0)
        *kb = (unsigned long) usage_in_bytes >> 10;
    return ret;
}


static int
virCgroupV1SetMemoryHardLimit(virCgroupPtr group,
                              unsigned long long kb)
{
    return virCgroupV1SetMemory(group, kb);
}


static int
virCgroupV1GetMemoryHardLimit(virCgroupPtr group,
                              unsigned long long *kb)
{
    long long unsigned int limit_in_bytes;

    if (virCgroupGetValueU64(group,
                             VIR_CGROUP_CONTROLLER_MEMORY,
                             "memory.limit_in_bytes", &limit_in_bytes) < 0)
        return -1;

    *kb = limit_in_bytes >> 10;
    if (*kb >= virCgroupV1GetMemoryUnlimitedKB())
        *kb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;

    return 0;
}


static int
virCgroupV1SetMemorySoftLimit(virCgroupPtr group,
                              unsigned long long kb)
{
    unsigned long long maxkb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;

    if (kb > maxkb) {
        virReportError(VIR_ERR_INVALID_ARG,
                       _("Memory '%llu' must be less than %llu"),
                       kb, maxkb);
        return -1;
    }

    if (kb == maxkb)
        return virCgroupSetValueI64(group,
                                    VIR_CGROUP_CONTROLLER_MEMORY,
                                    "memory.soft_limit_in_bytes",
                                    -1);
    else
        return virCgroupSetValueU64(group,
                                    VIR_CGROUP_CONTROLLER_MEMORY,
                                    "memory.soft_limit_in_bytes",
                                    kb << 10);
}


static int
virCgroupV1GetMemorySoftLimit(virCgroupPtr group,
                              unsigned long long *kb)
{
    long long unsigned int limit_in_bytes;

    if (virCgroupGetValueU64(group,
                             VIR_CGROUP_CONTROLLER_MEMORY,
                             "memory.soft_limit_in_bytes", &limit_in_bytes) < 0)
        return -1;

    *kb = limit_in_bytes >> 10;
    if (*kb >= virCgroupV1GetMemoryUnlimitedKB())
        *kb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;

    return 0;
}


static int
virCgroupV1SetMemSwapHardLimit(virCgroupPtr group,
                               unsigned long long kb)
{
    unsigned long long maxkb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;

    if (kb > maxkb) {
        virReportError(VIR_ERR_INVALID_ARG,
                       _("Memory '%llu' must be less than %llu"),
                       kb, maxkb);
        return -1;
    }

    if (kb == maxkb)
        return virCgroupSetValueI64(group,
                                    VIR_CGROUP_CONTROLLER_MEMORY,
                                    "memory.memsw.limit_in_bytes",
                                    -1);
    else
        return virCgroupSetValueU64(group,
                                    VIR_CGROUP_CONTROLLER_MEMORY,
                                    "memory.memsw.limit_in_bytes",
                                    kb << 10);
}


static int
virCgroupV1GetMemSwapHardLimit(virCgroupPtr group,
                               unsigned long long *kb)
{
    long long unsigned int limit_in_bytes;

    if (virCgroupGetValueU64(group,
                             VIR_CGROUP_CONTROLLER_MEMORY,
                             "memory.memsw.limit_in_bytes", &limit_in_bytes) < 0)
        return -1;

    *kb = limit_in_bytes >> 10;
    if (*kb >= virCgroupV1GetMemoryUnlimitedKB())
        *kb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;

    return 0;
}


static int
virCgroupV1GetMemSwapUsage(virCgroupPtr group,
                           unsigned long long *kb)
{
    long long unsigned int usage_in_bytes;
    int ret;
    ret = virCgroupGetValueU64(group,
                               VIR_CGROUP_CONTROLLER_MEMORY,
                               "memory.memsw.usage_in_bytes", &usage_in_bytes);
    if (ret == 0)
        *kb = usage_in_bytes >> 10;
    return ret;
}


static int
virCgroupV1AllowDevice(virCgroupPtr group,
                       char type,
                       int major,
                       int minor,
                       int perms)
{
    g_autofree char *devstr = NULL;
    g_autofree char *majorstr = NULL;
    g_autofree char *minorstr = NULL;

    if (major < 0)
        majorstr = g_strdup("*");
    else
        majorstr = g_strdup_printf("%i", major);

    if (minor < 0)
        minorstr = g_strdup("*");
    else
        minorstr = g_strdup_printf("%i", minor);

    devstr = g_strdup_printf("%c %s:%s %s", type, majorstr, minorstr,
                             virCgroupGetDevicePermsString(perms));

    if (virCgroupSetValueStr(group,
                             VIR_CGROUP_CONTROLLER_DEVICES,
                             "devices.allow",
                             devstr) < 0)
        return -1;

    return 0;
}


static int
virCgroupV1DenyDevice(virCgroupPtr group,
                      char type,
                      int major,
                      int minor,
                      int perms)
{
    g_autofree char *devstr = NULL;
    g_autofree char *majorstr = NULL;
    g_autofree char *minorstr = NULL;

    if (major < 0)
        majorstr = g_strdup("*");
    else
        majorstr = g_strdup_printf("%i", major);

    if (minor < 0)
        minorstr = g_strdup("*");
    else
        minorstr = g_strdup_printf("%i", minor);

    devstr = g_strdup_printf("%c %s:%s %s", type, majorstr, minorstr,
                             virCgroupGetDevicePermsString(perms));

    if (virCgroupSetValueStr(group,
                             VIR_CGROUP_CONTROLLER_DEVICES,
                             "devices.deny",
                             devstr) < 0)
        return -1;

    return 0;
}


static int
virCgroupV1AllowAllDevices(virCgroupPtr group,
                           int perms)
{
    if (virCgroupV1AllowDevice(group, 'b', -1, -1, perms) < 0)
        return -1;

    if (virCgroupV1AllowDevice(group, 'c', -1, -1, perms) < 0)
        return -1;

    return 0;
}


static int
virCgroupV1DenyAllDevices(virCgroupPtr group)
{
    return virCgroupSetValueStr(group,
                                VIR_CGROUP_CONTROLLER_DEVICES,
                                "devices.deny",
                                "a");
}


static int
virCgroupV1SetCpuShares(virCgroupPtr group,
                        unsigned long long shares)
{
    return virCgroupSetValueU64(group,
                                VIR_CGROUP_CONTROLLER_CPU,
                                "cpu.shares", shares);
}


static int
virCgroupV1GetCpuShares(virCgroupPtr group,
                        unsigned long long *shares)
{
    return virCgroupGetValueU64(group,
                                VIR_CGROUP_CONTROLLER_CPU,
                                "cpu.shares", shares);
}


static int
virCgroupV1SetCpuCfsPeriod(virCgroupPtr group,
                           unsigned long long cfs_period)
{
    /* The cfs_period should be greater or equal than 1ms, and less or equal
     * than 1s.
     */
    if (cfs_period < 1000 || cfs_period > 1000000) {
        virReportError(VIR_ERR_INVALID_ARG,
                       _("cfs_period '%llu' must be in range (1000, 1000000)"),
                       cfs_period);
        return -1;
    }

    return virCgroupSetValueU64(group,
                                VIR_CGROUP_CONTROLLER_CPU,
                                "cpu.cfs_period_us", cfs_period);
}


static int
virCgroupV1GetCpuCfsPeriod(virCgroupPtr group,
                           unsigned long long *cfs_period)
{
    return virCgroupGetValueU64(group,
                                VIR_CGROUP_CONTROLLER_CPU,
                                "cpu.cfs_period_us", cfs_period);
}


static int
virCgroupV1SetCpuCfsQuota(virCgroupPtr group,
                          long long cfs_quota)
{
    /* The cfs_quota should be greater or equal than 1ms */
    if (cfs_quota >= 0 &&
        (cfs_quota < 1000 ||
         cfs_quota > ULLONG_MAX / 1000)) {
        virReportError(VIR_ERR_INVALID_ARG,
                       _("cfs_quota '%lld' must be in range (1000, %llu)"),
                       cfs_quota, ULLONG_MAX / 1000);
        return -1;
    }

    return virCgroupSetValueI64(group,
                                VIR_CGROUP_CONTROLLER_CPU,
                                "cpu.cfs_quota_us", cfs_quota);
}


static int
virCgroupV1GetCpuCfsQuota(virCgroupPtr group,
                          long long *cfs_quota)
{
    return virCgroupGetValueI64(group,
                                VIR_CGROUP_CONTROLLER_CPU,
                                "cpu.cfs_quota_us", cfs_quota);
}


static bool
virCgroupV1SupportsCpuBW(virCgroupPtr cgroup)
{
    g_autofree char *path = NULL;

    if (!cgroup)
        return false;

    if (virCgroupV1PathOfController(cgroup, VIR_CGROUP_CONTROLLER_CPU,
                                    "cpu.cfs_period_us", &path) < 0) {
        virResetLastError();
        return false;
    }

    return virFileExists(path);
}


static int
virCgroupV1GetCpuacctUsage(virCgroupPtr group,
                           unsigned long long *usage)
{
    return virCgroupGetValueU64(group,
                                VIR_CGROUP_CONTROLLER_CPUACCT,
                                "cpuacct.usage", usage);
}


static int
virCgroupV1GetCpuacctPercpuUsage(virCgroupPtr group,
                                 char **usage)
{
    return virCgroupGetValueStr(group, VIR_CGROUP_CONTROLLER_CPUACCT,
                                "cpuacct.usage_percpu", usage);
}


static int
virCgroupV1GetCpuacctStat(virCgroupPtr group,
                          unsigned long long *user,
                          unsigned long long *sys)
{
    g_autofree char *str = NULL;
    char *p;
    static double scale = -1.0;

    if (virCgroupGetValueStr(group, VIR_CGROUP_CONTROLLER_CPUACCT,
                             "cpuacct.stat", &str) < 0)
        return -1;

    if (!(p = STRSKIP(str, "user ")) ||
        virStrToLong_ull(p, &p, 10, user) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Cannot parse user stat '%s'"),
                       p);
        return -1;
    }
    if (!(p = STRSKIP(p, "\nsystem ")) ||
        virStrToLong_ull(p, NULL, 10, sys) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Cannot parse sys stat '%s'"),
                       p);
        return -1;
    }
    /* times reported are in system ticks (generally 100 Hz), but that
     * rate can theoretically vary between machines.  Scale things
     * into approximate nanoseconds.  */
    if (scale < 0) {
        long ticks_per_sec = sysconf(_SC_CLK_TCK);
        if (ticks_per_sec == -1) {
            virReportSystemError(errno, "%s",
                                 _("Cannot determine system clock HZ"));
            return -1;
        }
        scale = 1000000000.0 / ticks_per_sec;
    }
    *user *= scale;
    *sys *= scale;

    return 0;
}


static int
virCgroupV1SetFreezerState(virCgroupPtr group,
                           const char *state)
{
    return virCgroupSetValueStr(group,
                                VIR_CGROUP_CONTROLLER_FREEZER,
                                "freezer.state", state);
}


static int
virCgroupV1GetFreezerState(virCgroupPtr group,
                           char **state)
{
    return virCgroupGetValueStr(group,
                                VIR_CGROUP_CONTROLLER_FREEZER,
                                "freezer.state", state);
}


static int
virCgroupV1SetCpusetMems(virCgroupPtr group,
                         const char *mems)
{
    return virCgroupSetValueStr(group,
                                VIR_CGROUP_CONTROLLER_CPUSET,
                                "cpuset.mems",
                                mems);
}


static int
virCgroupV1GetCpusetMems(virCgroupPtr group,
                         char **mems)
{
    return virCgroupGetValueStr(group,
                                VIR_CGROUP_CONTROLLER_CPUSET,
                                "cpuset.mems",
                                mems);
}


static int
virCgroupV1SetCpusetMemoryMigrate(virCgroupPtr group,
                                  bool migrate)
{
    return virCgroupSetValueStr(group,
                                VIR_CGROUP_CONTROLLER_CPUSET,
                                "cpuset.memory_migrate",
                                migrate ? "1" : "0");
}


static int
virCgroupV1GetCpusetMemoryMigrate(virCgroupPtr group,
                                  bool *migrate)
{
    unsigned long long value = 0;
    int ret = virCgroupGetValueU64(group,
                                   VIR_CGROUP_CONTROLLER_CPUSET,
                                   "cpuset.memory_migrate",
                                   &value);
    *migrate = !!value;
    return ret;
}


static int
virCgroupV1SetCpusetCpus(virCgroupPtr group,
                         const char *cpus)
{
    return virCgroupSetValueStr(group,
                                VIR_CGROUP_CONTROLLER_CPUSET,
                                "cpuset.cpus",
                                cpus);
}


static int
virCgroupV1GetCpusetCpus(virCgroupPtr group,
                         char **cpus)
{
    return virCgroupGetValueStr(group,
                                VIR_CGROUP_CONTROLLER_CPUSET,
                                "cpuset.cpus",
                                cpus);
}


virCgroupBackend virCgroupV1Backend = {
    .type = VIR_CGROUP_BACKEND_TYPE_V1,

    .available = virCgroupV1Available,
    .validateMachineGroup = virCgroupV1ValidateMachineGroup,
    .copyMounts = virCgroupV1CopyMounts,
    .copyPlacement = virCgroupV1CopyPlacement,
    .detectMounts = virCgroupV1DetectMounts,
    .detectPlacement = virCgroupV1DetectPlacement,
    .validatePlacement = virCgroupV1ValidatePlacement,
    .stealPlacement = virCgroupV1StealPlacement,
    .detectControllers = virCgroupV1DetectControllers,
    .hasController = virCgroupV1HasController,
    .getAnyController = virCgroupV1GetAnyController,
    .pathOfController = virCgroupV1PathOfController,
    .makeGroup = virCgroupV1MakeGroup,
    .remove = virCgroupV1Remove,
    .addTask = virCgroupV1AddTask,
    .hasEmptyTasks = virCgroupV1HasEmptyTasks,
    .killRecursive = virCgroupV1KillRecursive,
    .bindMount = virCgroupV1BindMount,
    .setOwner = virCgroupV1SetOwner,

    .setBlkioWeight = virCgroupV1SetBlkioWeight,
    .getBlkioWeight = virCgroupV1GetBlkioWeight,
    .getBlkioIoServiced = virCgroupV1GetBlkioIoServiced,
    .getBlkioIoDeviceServiced = virCgroupV1GetBlkioIoDeviceServiced,
    .setBlkioDeviceWeight = virCgroupV1SetBlkioDeviceWeight,
    .getBlkioDeviceWeight = virCgroupV1GetBlkioDeviceWeight,
    .setBlkioDeviceReadIops = virCgroupV1SetBlkioDeviceReadIops,
    .getBlkioDeviceReadIops = virCgroupV1GetBlkioDeviceReadIops,
    .setBlkioDeviceWriteIops = virCgroupV1SetBlkioDeviceWriteIops,
    .getBlkioDeviceWriteIops = virCgroupV1GetBlkioDeviceWriteIops,
    .setBlkioDeviceReadBps = virCgroupV1SetBlkioDeviceReadBps,
    .getBlkioDeviceReadBps = virCgroupV1GetBlkioDeviceReadBps,
    .setBlkioDeviceWriteBps = virCgroupV1SetBlkioDeviceWriteBps,
    .getBlkioDeviceWriteBps = virCgroupV1GetBlkioDeviceWriteBps,

    .setMemory = virCgroupV1SetMemory,
    .getMemoryStat = virCgroupV1GetMemoryStat,
    .getMemoryUsage = virCgroupV1GetMemoryUsage,
    .setMemoryHardLimit = virCgroupV1SetMemoryHardLimit,
    .getMemoryHardLimit = virCgroupV1GetMemoryHardLimit,
    .setMemorySoftLimit = virCgroupV1SetMemorySoftLimit,
    .getMemorySoftLimit = virCgroupV1GetMemorySoftLimit,
    .setMemSwapHardLimit = virCgroupV1SetMemSwapHardLimit,
    .getMemSwapHardLimit = virCgroupV1GetMemSwapHardLimit,
    .getMemSwapUsage = virCgroupV1GetMemSwapUsage,

    .allowDevice = virCgroupV1AllowDevice,
    .denyDevice = virCgroupV1DenyDevice,
    .allowAllDevices = virCgroupV1AllowAllDevices,
    .denyAllDevices = virCgroupV1DenyAllDevices,

    .setCpuShares = virCgroupV1SetCpuShares,
    .getCpuShares = virCgroupV1GetCpuShares,
    .setCpuCfsPeriod = virCgroupV1SetCpuCfsPeriod,
    .getCpuCfsPeriod = virCgroupV1GetCpuCfsPeriod,
    .setCpuCfsQuota = virCgroupV1SetCpuCfsQuota,
    .getCpuCfsQuota = virCgroupV1GetCpuCfsQuota,
    .supportsCpuBW = virCgroupV1SupportsCpuBW,

    .getCpuacctUsage = virCgroupV1GetCpuacctUsage,
    .getCpuacctPercpuUsage = virCgroupV1GetCpuacctPercpuUsage,
    .getCpuacctStat = virCgroupV1GetCpuacctStat,

    .setFreezerState = virCgroupV1SetFreezerState,
    .getFreezerState = virCgroupV1GetFreezerState,

    .setCpusetMems = virCgroupV1SetCpusetMems,
    .getCpusetMems = virCgroupV1GetCpusetMems,
    .setCpusetMemoryMigrate = virCgroupV1SetCpusetMemoryMigrate,
    .getCpusetMemoryMigrate = virCgroupV1GetCpusetMemoryMigrate,
    .setCpusetCpus = virCgroupV1SetCpusetCpus,
    .getCpusetCpus = virCgroupV1GetCpusetCpus,
};


void
virCgroupV1Register(void)
{
    virCgroupBackendRegister(&virCgroupV1Backend);
}

#else /* !__linux__ */

void
virCgroupV1Register(void)
{
    VIR_INFO("Control groups not supported on this platform");
}

#endif /* !__linux__ */