libvirt/src/util/virresctrl.c
Peter Krempa e88a08e80b util: bitmap: Use VIR_SHRINK_N in virBitmapShrink
The function only reduces the size of the bitmap thus we can use the
appropriate shrinking function which also does not have any return
value.

Since virBitmapShrink now does not return any value callers need to be
fixed as well.
2018-02-05 16:08:57 +01:00

1655 lines
44 KiB
C

/*
* virresctrl.c:
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see
* <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <sys/file.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include "virresctrlpriv.h"
#include "c-ctype.h"
#include "count-one-bits.h"
#include "viralloc.h"
#include "virfile.h"
#include "virlog.h"
#include "virobject.h"
#include "virstring.h"
#define VIR_FROM_THIS VIR_FROM_RESCTRL
VIR_LOG_INIT("util.virresctrl")
/* Common definitions */
#define SYSFS_RESCTRL_PATH "/sys/fs/resctrl"
/* Resctrl is short for Resource Control. It might be implemented for various
* resources, but at the time of this writing this is only supported for cache
* allocation technology (aka CAT). Hence the reson for leaving 'Cache' out of
* all the structure and function names for now (can be added later if needed.
*/
/* Our naming for cache types and scopes */
VIR_ENUM_IMPL(virCache, VIR_CACHE_TYPE_LAST,
"both",
"code",
"data")
/*
* This is the same enum, but for the resctrl naming
* of the type (L<level><type>)
*/
VIR_ENUM_DECL(virResctrl)
VIR_ENUM_IMPL(virResctrl, VIR_CACHE_TYPE_LAST,
"",
"CODE",
"DATA")
/* Info-related definitions and InfoClass-related functions */
typedef struct _virResctrlInfoPerType virResctrlInfoPerType;
typedef virResctrlInfoPerType *virResctrlInfoPerTypePtr;
struct _virResctrlInfoPerType {
/* Kernel-provided information */
char *cbm_mask;
unsigned int min_cbm_bits;
/* Our computed information from the above */
unsigned int bits;
unsigned int max_cache_id;
/* In order to be self-sufficient we need size information per cache.
* Funnily enough, one of the outcomes of the resctrl design is that it
* does not account for different sizes per cache on the same level. So
* for the sake of easiness, let's copy that, for now. */
unsigned long long size;
/* Information that we will return upon request (this is public struct) as
* until now all the above is internal to this module */
virResctrlInfoPerCache control;
};
typedef struct _virResctrlInfoPerLevel virResctrlInfoPerLevel;
typedef virResctrlInfoPerLevel *virResctrlInfoPerLevelPtr;
struct _virResctrlInfoPerLevel {
virResctrlInfoPerTypePtr *types;
};
struct _virResctrlInfo {
virObject parent;
virResctrlInfoPerLevelPtr *levels;
size_t nlevels;
};
static virClassPtr virResctrlInfoClass;
static void
virResctrlInfoDispose(void *obj)
{
size_t i = 0;
size_t j = 0;
virResctrlInfoPtr resctrl = obj;
for (i = 0; i < resctrl->nlevels; i++) {
virResctrlInfoPerLevelPtr level = resctrl->levels[i];
if (!level)
continue;
if (level->types) {
for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
if (level->types[j])
VIR_FREE(level->types[j]->cbm_mask);
VIR_FREE(level->types[j]);
}
}
VIR_FREE(level->types);
VIR_FREE(level);
}
VIR_FREE(resctrl->levels);
}
static int
virResctrlInfoOnceInit(void)
{
if (!(virResctrlInfoClass = virClassNew(virClassForObject(),
"virResctrlInfo",
sizeof(virResctrlInfo),
virResctrlInfoDispose)))
return -1;
return 0;
}
VIR_ONCE_GLOBAL_INIT(virResctrlInfo)
virResctrlInfoPtr
virResctrlInfoNew(void)
{
if (virResctrlInfoInitialize() < 0)
return NULL;
return virObjectNew(virResctrlInfoClass);
}
/* Alloc-related definitions and AllocClass-related functions */
/*
* virResctrlAlloc represents one allocation (in XML under cputune/cachetune and
* consequently a directory under /sys/fs/resctrl). Since it can have multiple
* parts of multiple caches allocated it is represented as bunch of nested
* sparse arrays (by sparse I mean array of pointers so that each might be NULL
* in case there is no allocation for that particular one (level, cache, ...)).
*
* Since one allocation can be made for caches on different levels, the first
* nested sparse array is of types virResctrlAllocPerLevel. For example if you
* have allocation for level 3 cache, there will be three NULL pointers and then
* allocated pointer to virResctrlAllocPerLevel. That way you can access it by
* `alloc[level]` as O(1) is desired instead of crawling through normal arrays
* or lists in three nested loops. The code uses a lot of direct accesses.
*
* Each virResctrlAllocPerLevel can have allocations for different cache
* allocation types. You can allocate instruction cache (VIR_CACHE_TYPE_CODE),
* data cache (VIR_CACHE_TYPE_DATA) or unified cache (VIR_CACHE_TYPE_BOTH).
* Those allocations are kept in sparse array of virResctrlAllocPerType pointers.
*
* For each virResctrlAllocPerType users can request some size of the cache to
* be allocated. That's what the sparse array `sizes` is for. Non-NULL
* pointers represent requested size allocations. The array is indexed by host
* cache id (gotten from `/sys/devices/system/cpu/cpuX/cache/indexY/id`). Users
* can see this information e.g. in the output of `virsh capabilities` (for that
* information there's the other struct, namely `virResctrlInfo`).
*
* When allocation is being created we need to find unused part of the cache for
* all of them. While doing that we store the bitmask in a sparse array of
* virBitmaps named `masks` indexed the same way as `sizes`. The upper bounds
* of the sparse arrays are stored in nmasks or nsizes, respectively.
*/
typedef struct _virResctrlAllocPerType virResctrlAllocPerType;
typedef virResctrlAllocPerType *virResctrlAllocPerTypePtr;
struct _virResctrlAllocPerType {
/* There could be bool saying whether this is set or not, but since everything
* in virResctrlAlloc (and most of libvirt) goes with pointer arrays we would
* have to have one more level of allocation anyway, so this stays faithful to
* the concept */
unsigned long long **sizes;
size_t nsizes;
/* Mask for each cache */
virBitmapPtr *masks;
size_t nmasks;
};
typedef struct _virResctrlAllocPerLevel virResctrlAllocPerLevel;
typedef virResctrlAllocPerLevel *virResctrlAllocPerLevelPtr;
struct _virResctrlAllocPerLevel {
virResctrlAllocPerTypePtr *types; /* Indexed with enum virCacheType */
/* There is no `ntypes` member variable as it is always allocated for
* VIR_CACHE_TYPE_LAST number of items */
};
struct _virResctrlAlloc {
virObject parent;
virResctrlAllocPerLevelPtr *levels;
size_t nlevels;
/* The identifier (any unique string for now) */
char *id;
/* libvirt-generated path in /sys/fs/resctrl for this particular
* allocation */
char *path;
};
static virClassPtr virResctrlAllocClass;
static void
virResctrlAllocDispose(void *obj)
{
size_t i = 0;
size_t j = 0;
size_t k = 0;
virResctrlAllocPtr resctrl = obj;
for (i = 0; i < resctrl->nlevels; i++) {
virResctrlAllocPerLevelPtr level = resctrl->levels[i];
if (!level)
continue;
for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
virResctrlAllocPerTypePtr type = level->types[j];
if (!type)
continue;
for (k = 0; k < type->nsizes; k++)
VIR_FREE(type->sizes[k]);
for (k = 0; k < type->nmasks; k++)
virBitmapFree(type->masks[k]);
VIR_FREE(type->sizes);
VIR_FREE(type->masks);
VIR_FREE(type);
}
VIR_FREE(level->types);
VIR_FREE(level);
}
VIR_FREE(resctrl->id);
VIR_FREE(resctrl->path);
VIR_FREE(resctrl->levels);
}
static int
virResctrlAllocOnceInit(void)
{
if (!(virResctrlAllocClass = virClassNew(virClassForObject(),
"virResctrlAlloc",
sizeof(virResctrlAlloc),
virResctrlAllocDispose)))
return -1;
return 0;
}
VIR_ONCE_GLOBAL_INIT(virResctrlAlloc)
virResctrlAllocPtr
virResctrlAllocNew(void)
{
if (virResctrlAllocInitialize() < 0)
return NULL;
return virObjectNew(virResctrlAllocClass);
}
/* Common functions */
#ifdef __linux__
static int
virResctrlLockInternal(int op)
{
int fd = open(SYSFS_RESCTRL_PATH, O_DIRECTORY | O_CLOEXEC);
if (fd < 0) {
virReportSystemError(errno, "%s", _("Cannot open resctrl"));
return -1;
}
if (flock(fd, op) < 0) {
virReportSystemError(errno, "%s", _("Cannot lock resctrl"));
VIR_FORCE_CLOSE(fd);
return -1;
}
return fd;
}
static inline int
virResctrlLockWrite(void)
{
return virResctrlLockInternal(LOCK_EX);
}
#else
static inline int
virResctrlLockWrite(void)
{
virReportSystemError(ENOSYS, "%s",
_("resctrl not supported on this platform"));
return -1;
}
#endif
static int
virResctrlUnlock(int fd)
{
if (fd == -1)
return 0;
#ifdef __linux__
/* The lock gets unlocked by closing the fd, which we need to do anyway in
* order to clean up properly */
if (VIR_CLOSE(fd) < 0) {
virReportSystemError(errno, "%s", _("Cannot close resctrl"));
/* Trying to save the already broken */
if (flock(fd, LOCK_UN) < 0)
virReportSystemError(errno, "%s", _("Cannot unlock resctrl"));
return -1;
}
#endif /* ! __linux__ */
return 0;
}
/* Info-related functions */
static bool
virResctrlInfoIsEmpty(virResctrlInfoPtr resctrl)
{
size_t i = 0;
size_t j = 0;
if (!resctrl)
return true;
for (i = 0; i < resctrl->nlevels; i++) {
virResctrlInfoPerLevelPtr i_level = resctrl->levels[i];
if (!i_level)
continue;
for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
if (i_level->types[j])
return false;
}
}
return true;
}
#ifdef __linux__
int
virResctrlGetInfo(virResctrlInfoPtr resctrl)
{
DIR *dirp = NULL;
char *endptr = NULL;
char *tmp_str = NULL;
int ret = -1;
int rv = -1;
int type = 0;
struct dirent *ent = NULL;
unsigned int level = 0;
virResctrlInfoPerLevelPtr i_level = NULL;
virResctrlInfoPerTypePtr i_type = NULL;
rv = virDirOpenIfExists(&dirp, SYSFS_RESCTRL_PATH "/info");
if (rv <= 0) {
ret = rv;
goto cleanup;
}
while ((rv = virDirRead(dirp, &ent, SYSFS_RESCTRL_PATH "/info")) > 0) {
VIR_DEBUG("Parsing info type '%s'", ent->d_name);
if (ent->d_name[0] != 'L')
continue;
if (virStrToLong_uip(ent->d_name + 1, &endptr, 10, &level) < 0) {
VIR_DEBUG("Cannot parse resctrl cache info level '%s'", ent->d_name + 1);
continue;
}
type = virResctrlTypeFromString(endptr);
if (type < 0) {
VIR_DEBUG("Cannot parse resctrl cache info type '%s'", endptr);
continue;
}
if (VIR_ALLOC(i_type) < 0)
goto cleanup;
i_type->control.scope = type;
rv = virFileReadValueUint(&i_type->control.max_allocation,
SYSFS_RESCTRL_PATH "/info/%s/num_closids",
ent->d_name);
if (rv == -2) {
/* The file doesn't exist, so it's unusable for us,
* but we can scan further */
VIR_WARN("The path '" SYSFS_RESCTRL_PATH "/info/%s/num_closids' "
"does not exist",
ent->d_name);
} else if (rv < 0) {
/* Other failures are fatal, so just quit */
goto cleanup;
}
rv = virFileReadValueString(&i_type->cbm_mask,
SYSFS_RESCTRL_PATH
"/info/%s/cbm_mask",
ent->d_name);
if (rv == -2) {
/* If the previous file exists, so should this one. Hence -2 is
* fatal in this case as well (errors out in next condition) - the
* kernel interface might've changed too much or something else is
* wrong. */
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Cannot get cbm_mask from resctrl cache info"));
}
if (rv < 0)
goto cleanup;
virStringTrimOptionalNewline(i_type->cbm_mask);
rv = virFileReadValueUint(&i_type->min_cbm_bits,
SYSFS_RESCTRL_PATH "/info/%s/min_cbm_bits",
ent->d_name);
if (rv == -2)
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Cannot get min_cbm_bits from resctrl cache info"));
if (rv < 0)
goto cleanup;
if (resctrl->nlevels <= level &&
VIR_EXPAND_N(resctrl->levels, resctrl->nlevels,
level - resctrl->nlevels + 1) < 0)
goto cleanup;
if (!resctrl->levels[level]) {
virResctrlInfoPerTypePtr *types = NULL;
if (VIR_ALLOC_N(types, VIR_CACHE_TYPE_LAST) < 0)
goto cleanup;
if (VIR_ALLOC(resctrl->levels[level]) < 0) {
VIR_FREE(types);
goto cleanup;
}
resctrl->levels[level]->types = types;
}
i_level = resctrl->levels[level];
if (i_level->types[type]) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Duplicate cache type in resctrl for level %u"),
level);
goto cleanup;
}
for (tmp_str = i_type->cbm_mask; *tmp_str != '\0'; tmp_str++) {
if (!c_isxdigit(*tmp_str)) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Cannot parse cbm_mask from resctrl cache info"));
goto cleanup;
}
i_type->bits += count_one_bits(virHexToBin(*tmp_str));
}
VIR_STEAL_PTR(i_level->types[type], i_type);
}
ret = 0;
cleanup:
VIR_DIR_CLOSE(dirp);
if (i_type)
VIR_FREE(i_type->cbm_mask);
VIR_FREE(i_type);
return ret;
}
#else /* ! __linux__ */
int
virResctrlGetInfo(virResctrlInfoPtr resctrl ATTRIBUTE_UNUSED)
{
virReportSystemError(ENOSYS, "%s",
_("Cache tune not supported on this platform"));
return -1;
}
#endif /* ! __linux__ */
int
virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
unsigned int level,
unsigned long long size,
size_t *ncontrols,
virResctrlInfoPerCachePtr **controls)
{
virResctrlInfoPerLevelPtr i_level = NULL;
virResctrlInfoPerTypePtr i_type = NULL;
size_t i = 0;
int ret = -1;
if (virResctrlInfoIsEmpty(resctrl))
return 0;
if (level >= resctrl->nlevels)
return 0;
i_level = resctrl->levels[level];
if (!i_level)
return 0;
for (i = 0; i < VIR_CACHE_TYPE_LAST; i++) {
i_type = i_level->types[i];
if (!i_type)
continue;
/* Let's take the opportunity to update our internal information about
* the cache size */
if (!i_type->size) {
i_type->size = size;
i_type->control.granularity = size / i_type->bits;
if (i_type->min_cbm_bits != 1)
i_type->control.min = i_type->min_cbm_bits * i_type->control.granularity;
} else {
if (i_type->size != size) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("level %u cache size %llu does not match "
"expected size %llu"),
level, i_type->size, size);
goto error;
}
i_type->max_cache_id++;
}
if (VIR_EXPAND_N(*controls, *ncontrols, 1) < 0)
goto error;
if (VIR_ALLOC((*controls)[*ncontrols - 1]) < 0)
goto error;
memcpy((*controls)[*ncontrols - 1], &i_type->control, sizeof(i_type->control));
}
ret = 0;
cleanup:
return ret;
error:
while (*ncontrols)
VIR_FREE((*controls)[--*ncontrols]);
VIR_FREE(*controls);
goto cleanup;
}
/* Alloc-related functions */
bool
virResctrlAllocIsEmpty(virResctrlAllocPtr resctrl)
{
size_t i = 0;
size_t j = 0;
size_t k = 0;
if (!resctrl)
return true;
for (i = 0; i < resctrl->nlevels; i++) {
virResctrlAllocPerLevelPtr a_level = resctrl->levels[i];
if (!a_level)
continue;
for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
virResctrlAllocPerTypePtr a_type = a_level->types[j];
if (!a_type)
continue;
for (k = 0; k < a_type->nsizes; k++) {
if (a_type->sizes[k])
return false;
}
for (k = 0; k < a_type->nmasks; k++) {
if (a_type->masks[k])
return false;
}
}
}
return true;
}
static virResctrlAllocPerTypePtr
virResctrlAllocGetType(virResctrlAllocPtr resctrl,
unsigned int level,
virCacheType type)
{
virResctrlAllocPerLevelPtr a_level = NULL;
if (resctrl->nlevels <= level &&
VIR_EXPAND_N(resctrl->levels, resctrl->nlevels, level - resctrl->nlevels + 1) < 0)
return NULL;
if (!resctrl->levels[level]) {
virResctrlAllocPerTypePtr *types = NULL;
if (VIR_ALLOC_N(types, VIR_CACHE_TYPE_LAST) < 0)
return NULL;
if (VIR_ALLOC(resctrl->levels[level]) < 0) {
VIR_FREE(types);
return NULL;
}
resctrl->levels[level]->types = types;
}
a_level = resctrl->levels[level];
if (!a_level->types[type] && VIR_ALLOC(a_level->types[type]) < 0)
return NULL;
return a_level->types[type];
}
static int
virResctrlAllocUpdateMask(virResctrlAllocPtr resctrl,
unsigned int level,
virCacheType type,
unsigned int cache,
virBitmapPtr mask)
{
virResctrlAllocPerTypePtr a_type = virResctrlAllocGetType(resctrl, level, type);
if (!a_type)
return -1;
if (a_type->nmasks <= cache &&
VIR_EXPAND_N(a_type->masks, a_type->nmasks,
cache - a_type->nmasks + 1) < 0)
return -1;
if (!a_type->masks[cache]) {
a_type->masks[cache] = virBitmapNew(virBitmapSize(mask));
if (!a_type->masks[cache])
return -1;
}
return virBitmapCopy(a_type->masks[cache], mask);
}
static int
virResctrlAllocUpdateSize(virResctrlAllocPtr resctrl,
unsigned int level,
virCacheType type,
unsigned int cache,
unsigned long long size)
{
virResctrlAllocPerTypePtr a_type = virResctrlAllocGetType(resctrl, level, type);
if (!a_type)
return -1;
if (a_type->nsizes <= cache &&
VIR_EXPAND_N(a_type->sizes, a_type->nsizes,
cache - a_type->nsizes + 1) < 0)
return -1;
if (!a_type->sizes[cache] && VIR_ALLOC(a_type->sizes[cache]) < 0)
return -1;
*(a_type->sizes[cache]) = size;
return 0;
}
/*
* Check if there is an allocation for this level/type/cache already. Called
* before updating the structure. VIR_CACHE_TYPE_BOTH collides with any type,
* the other types collide with itself. This code basically checks if either:
* `alloc[level]->types[type]->sizes[cache]`
* or
* `alloc[level]->types[VIR_CACHE_TYPE_BOTH]->sizes[cache]`
* is non-NULL. All the fuzz around it is checking for NULL pointers along
* the way.
*/
static bool
virResctrlAllocCheckCollision(virResctrlAllocPtr alloc,
unsigned int level,
virCacheType type,
unsigned int cache)
{
virResctrlAllocPerLevelPtr a_level = NULL;
virResctrlAllocPerTypePtr a_type = NULL;
if (!alloc)
return false;
if (alloc->nlevels <= level)
return false;
a_level = alloc->levels[level];
if (!a_level)
return false;
a_type = a_level->types[VIR_CACHE_TYPE_BOTH];
/* If there is an allocation for type 'both', there can be no other
* allocation for the same cache */
if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
return true;
if (type == VIR_CACHE_TYPE_BOTH) {
a_type = a_level->types[VIR_CACHE_TYPE_CODE];
if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
return true;
a_type = a_level->types[VIR_CACHE_TYPE_DATA];
if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
return true;
} else {
a_type = a_level->types[type];
if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
return true;
}
return false;
}
int
virResctrlAllocSetSize(virResctrlAllocPtr resctrl,
unsigned int level,
virCacheType type,
unsigned int cache,
unsigned long long size)
{
if (virResctrlAllocCheckCollision(resctrl, level, type, cache)) {
virReportError(VIR_ERR_XML_ERROR,
_("Colliding cache allocations for cache "
"level '%u' id '%u', type '%s'"),
level, cache, virCacheTypeToString(type));
return -1;
}
return virResctrlAllocUpdateSize(resctrl, level, type, cache, size);
}
int
virResctrlAllocForeachSize(virResctrlAllocPtr resctrl,
virResctrlAllocForeachSizeCallback cb,
void *opaque)
{
int ret = 0;
unsigned int level = 0;
unsigned int type = 0;
unsigned int cache = 0;
if (!resctrl)
return 0;
for (level = 0; level < resctrl->nlevels; level++) {
virResctrlAllocPerLevelPtr a_level = resctrl->levels[level];
if (!a_level)
continue;
for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
virResctrlAllocPerTypePtr a_type = a_level->types[type];
if (!a_type)
continue;
for (cache = 0; cache < a_type->nsizes; cache++) {
unsigned long long *size = a_type->sizes[cache];
if (!size)
continue;
ret = cb(level, type, cache, *size, opaque);
if (ret < 0)
return ret;
}
}
}
return 0;
}
int
virResctrlAllocSetID(virResctrlAllocPtr alloc,
const char *id)
{
if (!id) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Resctrl allocation 'id' cannot be NULL"));
return -1;
}
return VIR_STRDUP(alloc->id, id);
}
const char *
virResctrlAllocGetID(virResctrlAllocPtr alloc)
{
return alloc->id;
}
char *
virResctrlAllocFormat(virResctrlAllocPtr resctrl)
{
virBuffer buf = VIR_BUFFER_INITIALIZER;
unsigned int level = 0;
unsigned int type = 0;
unsigned int cache = 0;
if (!resctrl)
return NULL;
for (level = 0; level < resctrl->nlevels; level++) {
virResctrlAllocPerLevelPtr a_level = resctrl->levels[level];
if (!a_level)
continue;
for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
virResctrlAllocPerTypePtr a_type = a_level->types[type];
if (!a_type)
continue;
virBufferAsprintf(&buf, "L%u%s:", level, virResctrlTypeToString(type));
for (cache = 0; cache < a_type->nmasks; cache++) {
virBitmapPtr mask = a_type->masks[cache];
char *mask_str = NULL;
if (!mask)
continue;
mask_str = virBitmapToString(mask, false, true);
if (!mask_str) {
virBufferFreeAndReset(&buf);
return NULL;
}
virBufferAsprintf(&buf, "%u=%s;", cache, mask_str);
VIR_FREE(mask_str);
}
virBufferTrim(&buf, ";", 1);
virBufferAddChar(&buf, '\n');
}
}
virBufferCheckError(&buf);
return virBufferContentAndReset(&buf);
}
static int
virResctrlAllocParseProcessCache(virResctrlInfoPtr resctrl,
virResctrlAllocPtr alloc,
unsigned int level,
virCacheType type,
char *cache)
{
char *tmp = strchr(cache, '=');
unsigned int cache_id = 0;
virBitmapPtr mask = NULL;
int ret = -1;
if (!tmp)
return 0;
*tmp = '\0';
tmp++;
if (virStrToLong_uip(cache, NULL, 10, &cache_id) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Invalid cache id '%s'"), cache);
return -1;
}
mask = virBitmapNewString(tmp);
if (!mask)
return -1;
if (!resctrl ||
level >= resctrl->nlevels ||
!resctrl->levels[level] ||
!resctrl->levels[level]->types[type]) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Missing or inconsistent resctrl info for "
"level '%ud' type '%s'"),
level, virCacheTypeToString(type));
goto cleanup;
}
virBitmapShrink(mask, resctrl->levels[level]->types[type]->bits);
if (virResctrlAllocUpdateMask(alloc, level, type, cache_id, mask) < 0)
goto cleanup;
ret = 0;
cleanup:
virBitmapFree(mask);
return ret;
}
static int
virResctrlAllocParseProcessLine(virResctrlInfoPtr resctrl,
virResctrlAllocPtr alloc,
char *line)
{
char **caches = NULL;
char *tmp = NULL;
unsigned int level = 0;
int type = -1;
size_t ncaches = 0;
size_t i = 0;
int ret = -1;
/* For no reason there can be spaces */
virSkipSpaces((const char **) &line);
/* Skip lines that don't concern caches, e.g. MB: etc. */
if (line[0] != 'L')
return 0;
/* And lines that we can't parse too */
tmp = strchr(line, ':');
if (!tmp)
return 0;
*tmp = '\0';
tmp++;
if (virStrToLong_uip(line + 1, &line, 10, &level) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Cannot parse resctrl schema level '%s'"),
line + 1);
return -1;
}
type = virResctrlTypeFromString(line);
if (type < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Cannot parse resctrl schema level '%s'"),
line + 1);
return -1;
}
caches = virStringSplitCount(tmp, ";", 0, &ncaches);
if (!caches)
return 0;
for (i = 0; i < ncaches; i++) {
if (virResctrlAllocParseProcessCache(resctrl, alloc, level, type, caches[i]) < 0)
goto cleanup;
}
ret = 0;
cleanup:
virStringListFree(caches);
return ret;
}
static int
virResctrlAllocParse(virResctrlInfoPtr resctrl,
virResctrlAllocPtr alloc,
const char *schemata)
{
char **lines = NULL;
size_t nlines = 0;
size_t i = 0;
int ret = -1;
lines = virStringSplitCount(schemata, "\n", 0, &nlines);
for (i = 0; i < nlines; i++) {
if (virResctrlAllocParseProcessLine(resctrl, alloc, lines[i]) < 0)
goto cleanup;
}
ret = 0;
cleanup:
virStringListFree(lines);
return ret;
}
static int
virResctrlAllocGetGroup(virResctrlInfoPtr resctrl,
const char *groupname,
virResctrlAllocPtr *alloc)
{
char *schemata = NULL;
int rv = virFileReadValueString(&schemata,
SYSFS_RESCTRL_PATH
"/%s/schemata",
groupname);
*alloc = NULL;
if (rv < 0)
return rv;
*alloc = virResctrlAllocNew();
if (!*alloc)
goto error;
if (virResctrlAllocParse(resctrl, *alloc, schemata) < 0)
goto error;
VIR_FREE(schemata);
return 0;
error:
VIR_FREE(schemata);
virObjectUnref(*alloc);
*alloc = NULL;
return -1;
}
static virResctrlAllocPtr
virResctrlAllocGetDefault(virResctrlInfoPtr resctrl)
{
virResctrlAllocPtr ret = NULL;
int rv = virResctrlAllocGetGroup(resctrl, ".", &ret);
if (rv == -2) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Could not read schemata file for the default group"));
}
return ret;
}
#ifdef __linux__
static void
virResctrlAllocSubtractPerType(virResctrlAllocPerTypePtr dst,
virResctrlAllocPerTypePtr src)
{
size_t i = 0;
if (!dst || !src)
return;
for (i = 0; i < dst->nmasks && i < src->nmasks; i++) {
if (dst->masks[i] && src->masks[i])
virBitmapSubtract(dst->masks[i], src->masks[i]);
}
}
static void
virResctrlAllocSubtract(virResctrlAllocPtr dst,
virResctrlAllocPtr src)
{
size_t i = 0;
size_t j = 0;
if (!src)
return;
for (i = 0; i < dst->nlevels && i < src->nlevels; i++) {
if (dst->levels[i] && src->levels[i]) {
for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
virResctrlAllocSubtractPerType(dst->levels[i]->types[j],
src->levels[i]->types[j]);
}
}
}
}
static virResctrlAllocPtr
virResctrlAllocNewFromInfo(virResctrlInfoPtr info)
{
size_t i = 0;
size_t j = 0;
size_t k = 0;
virResctrlAllocPtr ret = virResctrlAllocNew();
virBitmapPtr mask = NULL;
if (!ret)
return NULL;
for (i = 0; i < info->nlevels; i++) {
virResctrlInfoPerLevelPtr i_level = info->levels[i];
if (!i_level)
continue;
for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
virResctrlInfoPerTypePtr i_type = i_level->types[j];
if (!i_type)
continue;
virBitmapFree(mask);
mask = virBitmapNew(i_type->bits);
if (!mask)
goto error;
virBitmapSetAll(mask);
for (k = 0; k <= i_type->max_cache_id; k++) {
if (virResctrlAllocUpdateMask(ret, i, j, k, mask) < 0)
goto error;
}
}
}
cleanup:
virBitmapFree(mask);
return ret;
error:
virObjectUnref(ret);
ret = NULL;
goto cleanup;
}
/*
* This function creates an allocation that represents all unused parts of all
* caches in the system. It uses virResctrlInfo for creating a new full
* allocation with all bits set (using virResctrlAllocNewFromInfo()) and then
* scans for all allocations under /sys/fs/resctrl and subtracts each one of
* them from it. That way it can then return an allocation with only bit set
* being those that are not mentioned in any other allocation. It is used for
* two things, a) calculating the masks when creating allocations and b) from
* tests.
*/
virResctrlAllocPtr
virResctrlAllocGetUnused(virResctrlInfoPtr resctrl)
{
virResctrlAllocPtr ret = NULL;
virResctrlAllocPtr alloc = NULL;
struct dirent *ent = NULL;
DIR *dirp = NULL;
int rv = -1;
if (virResctrlInfoIsEmpty(resctrl)) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("Resource control is not supported on this host"));
return NULL;
}
ret = virResctrlAllocNewFromInfo(resctrl);
if (!ret)
return NULL;
alloc = virResctrlAllocGetDefault(resctrl);
if (!alloc)
goto error;
virResctrlAllocSubtract(ret, alloc);
virObjectUnref(alloc);
if (virDirOpen(&dirp, SYSFS_RESCTRL_PATH) < 0)
goto error;
while ((rv = virDirRead(dirp, &ent, SYSFS_RESCTRL_PATH)) > 0) {
if (STREQ(ent->d_name, "info"))
continue;
rv = virResctrlAllocGetGroup(resctrl, ent->d_name, &alloc);
if (rv == -2)
continue;
if (rv < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Could not read schemata file for group %s"),
ent->d_name);
goto error;
}
virResctrlAllocSubtract(ret, alloc);
virObjectUnref(alloc);
alloc = NULL;
}
if (rv < 0)
goto error;
cleanup:
virObjectUnref(alloc);
VIR_DIR_CLOSE(dirp);
return ret;
error:
virObjectUnref(ret);
ret = NULL;
goto cleanup;
}
#else /* ! __linux__ */
virResctrlAllocPtr
virResctrlAllocGetUnused(virResctrlInfoPtr resctrl ATTRIBUTE_UNUSED)
{
virReportSystemError(ENOSYS, "%s",
_("Cache tune not supported on this platform"));
return NULL;
}
#endif /* ! __linux__ */
/*
* Given the information about requested allocation type `a_type`, the host
* cache for a particular type `i_type` and unused bits in the system `f_type`
* this function tries to find the smallest free space in which the allocation
* for cache id `cache` would fit. We're looking for the smallest place in
* order to minimize fragmentation and maximize the possibility of succeeding.
*
* Per-cache allocation for the @level, @type and @cache must already be
* allocated for @alloc (does not have to exist though).
*/
static int
virResctrlAllocFindUnused(virResctrlAllocPtr alloc,
virResctrlInfoPerTypePtr i_type,
virResctrlAllocPerTypePtr f_type,
unsigned int level,
unsigned int type,
unsigned int cache)
{
unsigned long long *size = alloc->levels[level]->types[type]->sizes[cache];
virBitmapPtr a_mask = NULL;
virBitmapPtr f_mask = NULL;
unsigned long long need_bits;
size_t i = 0;
ssize_t pos = -1;
ssize_t last_bits = 0;
ssize_t last_pos = -1;
int ret = -1;
if (!size)
return 0;
if (cache >= f_type->nmasks) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_("Cache with id %u does not exists for level %d"),
cache, level);
return -1;
}
f_mask = f_type->masks[cache];
if (!f_mask) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_("Cache level %d id %u does not support tuning for "
"scope type '%s'"),
level, cache, virCacheTypeToString(type));
return -1;
}
if (*size == i_type->size) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_("Cache allocation for the whole cache is not "
"possible, specify size smaller than %llu"),
i_type->size);
return -1;
}
need_bits = *size / i_type->control.granularity;
if (*size % i_type->control.granularity) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_("Cache allocation of size %llu is not "
"divisible by granularity %llu"),
*size, i_type->control.granularity);
return -1;
}
if (need_bits < i_type->min_cbm_bits) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_("Cache allocation of size %llu is smaller "
"than the minimum allowed allocation %llu"),
*size,
i_type->control.granularity * i_type->min_cbm_bits);
return -1;
}
while ((pos = virBitmapNextSetBit(f_mask, pos)) >= 0) {
ssize_t pos_clear = virBitmapNextClearBit(f_mask, pos);
ssize_t bits;
if (pos_clear < 0)
pos_clear = virBitmapSize(f_mask);
bits = pos_clear - pos;
/* Not enough bits, move on and skip all of them */
if (bits < need_bits) {
pos = pos_clear;
continue;
}
/* This fits perfectly */
if (bits == need_bits) {
last_pos = pos;
break;
}
/* Remember the smaller region if we already found on before */
if (last_pos < 0 || (last_bits && bits < last_bits)) {
last_bits = bits;
last_pos = pos;
}
pos = pos_clear;
}
if (last_pos < 0) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_("Not enough room for allocation of "
"%llu bytes for level %u cache %u "
"scope type '%s'"),
*size, level, cache,
virCacheTypeToString(type));
return -1;
}
a_mask = virBitmapNew(i_type->bits);
if (!a_mask)
return -1;
for (i = last_pos; i < last_pos + need_bits; i++)
ignore_value(virBitmapSetBit(a_mask, i));
if (virResctrlAllocUpdateMask(alloc, level, type, cache, a_mask) < 0)
goto cleanup;
ret = 0;
cleanup:
virBitmapFree(a_mask);
return ret;
}
static int
virResctrlAllocCopyMasks(virResctrlAllocPtr dst,
virResctrlAllocPtr src)
{
unsigned int level = 0;
for (level = 0; level < src->nlevels; level++) {
virResctrlAllocPerLevelPtr s_level = src->levels[level];
unsigned int type = 0;
if (!s_level)
continue;
for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
virResctrlAllocPerTypePtr s_type = s_level->types[type];
virResctrlAllocPerTypePtr d_type = NULL;
unsigned int cache = 0;
if (!s_type)
continue;
d_type = virResctrlAllocGetType(dst, level, type);
if (!d_type)
return -1;
for (cache = 0; cache < s_type->nmasks; cache++) {
virBitmapPtr mask = s_type->masks[cache];
if (mask && virResctrlAllocUpdateMask(dst, level, type, cache, mask) < 0)
return -1;
}
}
}
return 0;
}
/*
* This function is called when creating an allocation in the system. What it
* does is that it gets all the unused bits using virResctrlAllocGetUnused() and
* then tries to find a proper space for every requested allocation effectively
* transforming `sizes` into `masks`.
*/
static int
virResctrlAllocMasksAssign(virResctrlInfoPtr resctrl,
virResctrlAllocPtr alloc)
{
int ret = -1;
unsigned int level = 0;
virResctrlAllocPtr alloc_free = NULL;
virResctrlAllocPtr alloc_default = NULL;
alloc_free = virResctrlAllocGetUnused(resctrl);
if (!alloc_free)
return -1;
alloc_default = virResctrlAllocGetDefault(resctrl);
if (!alloc_default)
goto cleanup;
if (virResctrlAllocCopyMasks(alloc, alloc_default) < 0)
goto cleanup;
for (level = 0; level < alloc->nlevels; level++) {
virResctrlAllocPerLevelPtr a_level = alloc->levels[level];
virResctrlAllocPerLevelPtr f_level = NULL;
unsigned int type = 0;
if (!a_level)
continue;
if (level < alloc_free->nlevels)
f_level = alloc_free->levels[level];
if (!f_level) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_("Cache level %d does not support tuning"),
level);
goto cleanup;
}
for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
virResctrlAllocPerTypePtr a_type = a_level->types[type];
virResctrlAllocPerTypePtr f_type = f_level->types[type];
unsigned int cache = 0;
if (!a_type)
continue;
if (!f_type) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_("Cache level %d does not support tuning for "
"scope type '%s'"),
level, virCacheTypeToString(type));
goto cleanup;
}
for (cache = 0; cache < a_type->nsizes; cache++) {
virResctrlInfoPerLevelPtr i_level = resctrl->levels[level];
virResctrlInfoPerTypePtr i_type = i_level->types[type];
if (virResctrlAllocFindUnused(alloc, i_type, f_type, level, type, cache) < 0)
goto cleanup;
}
}
}
ret = 0;
cleanup:
virObjectUnref(alloc_free);
virObjectUnref(alloc_default);
return ret;
}
int
virResctrlAllocDeterminePath(virResctrlAllocPtr alloc,
const char *machinename)
{
if (!alloc->id) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Resctrl Allocation ID must be set before creation"));
return -1;
}
if (!alloc->path &&
virAsprintf(&alloc->path, "%s/%s-%s",
SYSFS_RESCTRL_PATH, machinename, alloc->id) < 0)
return -1;
return 0;
}
/* This checks if the directory for the alloc exists. If not it tries to create
* it and apply appropriate alloc settings. */
int
virResctrlAllocCreate(virResctrlInfoPtr resctrl,
virResctrlAllocPtr alloc,
const char *machinename)
{
char *schemata_path = NULL;
char *alloc_str = NULL;
int ret = -1;
int lockfd = -1;
if (!alloc)
return 0;
if (virResctrlInfoIsEmpty(resctrl)) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("Resource control is not supported on this host"));
return -1;
}
if (virResctrlAllocDeterminePath(alloc, machinename) < 0)
return -1;
if (virFileExists(alloc->path)) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Path '%s' for resctrl allocation exists"),
alloc->path);
goto cleanup;
}
lockfd = virResctrlLockWrite();
if (lockfd < 0)
goto cleanup;
if (virResctrlAllocMasksAssign(resctrl, alloc) < 0)
goto cleanup;
alloc_str = virResctrlAllocFormat(alloc);
if (!alloc_str)
goto cleanup;
if (virAsprintf(&schemata_path, "%s/schemata", alloc->path) < 0)
goto cleanup;
if (virFileMakePath(alloc->path) < 0) {
virReportSystemError(errno,
_("Cannot create resctrl directory '%s'"),
alloc->path);
goto cleanup;
}
VIR_DEBUG("Writing resctrl schemata '%s' into '%s'", alloc_str, schemata_path);
if (virFileWriteStr(schemata_path, alloc_str, 0) < 0) {
rmdir(alloc->path);
virReportSystemError(errno,
_("Cannot write into schemata file '%s'"),
schemata_path);
goto cleanup;
}
ret = 0;
cleanup:
virResctrlUnlock(lockfd);
VIR_FREE(alloc_str);
VIR_FREE(schemata_path);
return ret;
}
int
virResctrlAllocAddPID(virResctrlAllocPtr alloc,
pid_t pid)
{
char *tasks = NULL;
char *pidstr = NULL;
int ret = 0;
if (!alloc->path) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Cannot add pid to non-existing resctrl allocation"));
return -1;
}
if (virAsprintf(&tasks, "%s/tasks", alloc->path) < 0)
return -1;
if (virAsprintf(&pidstr, "%lld", (long long int) pid) < 0)
goto cleanup;
if (virFileWriteStr(tasks, pidstr, 0) < 0) {
virReportSystemError(errno,
_("Cannot write pid in tasks file '%s'"),
tasks);
goto cleanup;
}
ret = 0;
cleanup:
VIR_FREE(tasks);
VIR_FREE(pidstr);
return ret;
}
int
virResctrlAllocRemove(virResctrlAllocPtr alloc)
{
int ret = 0;
if (!alloc->path)
return 0;
VIR_DEBUG("Removing resctrl allocation %s", alloc->path);
if (rmdir(alloc->path) != 0 && errno != ENOENT) {
ret = -errno;
VIR_ERROR(_("Unable to remove %s (%d)"), alloc->path, errno);
}
return ret;
}