libvirt/src/conf/capabilities.c
Peter Krempa 98f6f2081d util: alloc: Reimplement VIR_APPEND_ELEMENT using virAppendElement
Use virAppendElement instead of virInsertElementsN to implement
VIR_APPEND_ELEMENT which allows us to remove error handling as the
only relevant errors were removed when switching to aborting memory
allocation functions.

Signed-off-by: Peter Krempa <pkrempa@redhat.com>
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
2021-08-06 08:53:25 +02:00

2290 lines
68 KiB
C

/*
* capabilities.c: hypervisor capabilities
*
* Copyright (C) 2006-2015 Red Hat, Inc.
* Copyright (C) 2006-2008 Daniel P. Berrange
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see
* <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <unistd.h>
#include "capabilities.h"
#include "cpu_conf.h"
#include "domain_conf.h"
#include "storage_conf.h"
#include "viralloc.h"
#include "virarch.h"
#include "virbuffer.h"
#include "virerror.h"
#include "virfile.h"
#include "virhostcpu.h"
#include "virhostmem.h"
#include "virlog.h"
#include "virnuma.h"
#include "virstring.h"
#include "virtypedparam.h"
#include "viruuid.h"
#include "virenum.h"
#include "virutil.h"
#define VIR_FROM_THIS VIR_FROM_CAPABILITIES
#define SYSFS_SYSTEM_PATH "/sys/devices/system"
VIR_LOG_INIT("conf.capabilities");
VIR_ENUM_DECL(virCapsHostPMTarget);
VIR_ENUM_IMPL(virCapsHostPMTarget,
VIR_NODE_SUSPEND_TARGET_LAST,
"suspend_mem", "suspend_disk", "suspend_hybrid",
);
static virClass *virCapsClass;
static void virCapsDispose(void *obj);
static int virCapabilitiesOnceInit(void)
{
if (!VIR_CLASS_NEW(virCaps, virClassForObject()))
return -1;
return 0;
}
VIR_ONCE_GLOBAL_INIT(virCapabilities);
/**
* virCapabilitiesNew:
* @hostarch: host machine architecture
* @offlineMigrate: true if offline migration is available
* @liveMigrate: true if live migration is available
*
* Allocate a new capabilities object
*/
virCaps *
virCapabilitiesNew(virArch hostarch,
bool offlineMigrate,
bool liveMigrate)
{
virCaps *caps;
if (virCapabilitiesInitialize() < 0)
return NULL;
if (!(caps = virObjectNew(virCapsClass)))
return NULL;
caps->host.arch = hostarch;
caps->host.offlineMigrate = offlineMigrate;
caps->host.liveMigrate = liveMigrate;
return caps;
}
void
virCapabilitiesClearHostNUMACellCPUTopology(virCapsHostNUMACellCPU *cpus,
size_t ncpus)
{
size_t i;
if (!cpus)
return;
for (i = 0; i < ncpus; i++) {
virBitmapFree(cpus[i].siblings);
cpus[i].siblings = NULL;
}
}
static void
virCapabilitiesFreeHostNUMACell(virCapsHostNUMACell *cell)
{
if (cell == NULL)
return;
virCapabilitiesClearHostNUMACellCPUTopology(cell->cpus, cell->ncpus);
g_free(cell->cpus);
g_free(cell->distances);
g_free(cell->pageinfo);
if (cell->caches)
g_array_unref(cell->caches);
g_free(cell);
}
static void
virCapabilitiesFreeGuestMachine(virCapsGuestMachine *machine)
{
if (machine == NULL)
return;
g_free(machine->name);
g_free(machine->canonical);
g_free(machine);
}
static void
virCapabilitiesFreeGuestDomain(virCapsGuestDomain *dom)
{
size_t i;
if (dom == NULL)
return;
g_free(dom->info.emulator);
g_free(dom->info.loader);
for (i = 0; i < dom->info.nmachines; i++)
virCapabilitiesFreeGuestMachine(dom->info.machines[i]);
g_free(dom->info.machines);
g_free(dom);
}
void
virCapabilitiesFreeGuest(virCapsGuest *guest)
{
size_t i;
if (guest == NULL)
return;
g_free(guest->arch.defaultInfo.emulator);
g_free(guest->arch.defaultInfo.loader);
for (i = 0; i < guest->arch.defaultInfo.nmachines; i++)
virCapabilitiesFreeGuestMachine(guest->arch.defaultInfo.machines[i]);
g_free(guest->arch.defaultInfo.machines);
for (i = 0; i < guest->arch.ndomains; i++)
virCapabilitiesFreeGuestDomain(guest->arch.domains[i]);
g_free(guest->arch.domains);
g_free(guest);
}
static void
virCapabilitiesFreeStoragePool(virCapsStoragePool *pool)
{
if (!pool)
return;
g_free(pool);
}
void
virCapabilitiesHostNUMAUnref(virCapsHostNUMA *caps)
{
if (!caps)
return;
if (g_atomic_int_dec_and_test(&caps->refs)) {
g_ptr_array_unref(caps->cells);
if (caps->interconnects)
g_array_unref(caps->interconnects);
g_free(caps);
}
}
void
virCapabilitiesHostNUMARef(virCapsHostNUMA *caps)
{
g_atomic_int_inc(&caps->refs);
}
static void
virCapsHostMemBWNodeFree(virCapsHostMemBWNode *ptr)
{
if (!ptr)
return;
virBitmapFree(ptr->cpus);
g_free(ptr);
}
static void
virCapabilitiesClearSecModel(virCapsHostSecModel *secmodel)
{
size_t i;
for (i = 0; i < secmodel->nlabels; i++) {
VIR_FREE(secmodel->labels[i].type);
VIR_FREE(secmodel->labels[i].label);
}
VIR_FREE(secmodel->labels);
VIR_FREE(secmodel->model);
VIR_FREE(secmodel->doi);
}
static void
virCapsDispose(void *object)
{
virCaps *caps = object;
size_t i;
for (i = 0; i < caps->npools; i++)
virCapabilitiesFreeStoragePool(caps->pools[i]);
g_free(caps->pools);
for (i = 0; i < caps->nguests; i++)
virCapabilitiesFreeGuest(caps->guests[i]);
g_free(caps->guests);
for (i = 0; i < caps->host.nfeatures; i++)
g_free(caps->host.features[i]);
g_free(caps->host.features);
if (caps->host.numa)
virCapabilitiesHostNUMAUnref(caps->host.numa);
for (i = 0; i < caps->host.nmigrateTrans; i++)
g_free(caps->host.migrateTrans[i]);
g_free(caps->host.migrateTrans);
for (i = 0; i < caps->host.nsecModels; i++)
virCapabilitiesClearSecModel(&caps->host.secModels[i]);
g_free(caps->host.secModels);
for (i = 0; i < caps->host.cache.nbanks; i++)
virCapsHostCacheBankFree(caps->host.cache.banks[i]);
virResctrlInfoMonFree(caps->host.cache.monitor);
g_free(caps->host.cache.banks);
for (i = 0; i < caps->host.memBW.nnodes; i++)
virCapsHostMemBWNodeFree(caps->host.memBW.nodes[i]);
virResctrlInfoMonFree(caps->host.memBW.monitor);
g_free(caps->host.memBW.nodes);
g_free(caps->host.netprefix);
g_free(caps->host.pagesSize);
virCPUDefFree(caps->host.cpu);
virObjectUnref(caps->host.resctrl);
}
/**
* virCapabilitiesAddHostFeature:
* @caps: capabilities to extend
* @name: name of new feature
*
* Registers a new host CPU feature, eg 'pae', or 'vmx'
*/
int
virCapabilitiesAddHostFeature(virCaps *caps,
const char *name)
{
VIR_RESIZE_N(caps->host.features, caps->host.nfeatures_max,
caps->host.nfeatures, 1);
caps->host.features[caps->host.nfeatures] = g_strdup(name);
caps->host.nfeatures++;
return 0;
}
/**
* virCapabilitiesAddHostMigrateTransport:
* @caps: capabilities to extend
* @name: name of migration transport
*
* Registers a new domain migration transport URI
*/
int
virCapabilitiesAddHostMigrateTransport(virCaps *caps,
const char *name)
{
VIR_RESIZE_N(caps->host.migrateTrans, caps->host.nmigrateTrans_max,
caps->host.nmigrateTrans, 1);
caps->host.migrateTrans[caps->host.nmigrateTrans] = g_strdup(name);
caps->host.nmigrateTrans++;
return 0;
}
/**
* virCapabilitiesSetNetPrefix:
* @caps: capabilities to extend
* @name: prefix for host generated network interfaces
*
* Registers the prefix that is used for generated network interfaces
*/
int
virCapabilitiesSetNetPrefix(virCaps *caps,
const char *prefix)
{
caps->host.netprefix = g_strdup(prefix);
return 0;
}
/**
* virCapabilitiesHostNUMAAddCell:
* @caps: capabilities to extend
* @num: ID number of NUMA cell
* @mem: Total size of memory in the NUMA node (in KiB)
* @ncpus: number of CPUs in cell
* @cpus: array of CPU definition structures
* @ndistances: number of sibling NUMA nodes
* @distances: NUMA distances to other nodes
* @npageinfo: number of pages at node @num
* @pageinfo: info on each single memory page
* @caches: info on memory side caches
*
* Registers a new NUMA cell for a host, passing in a array of
* CPU IDs belonging to the cell, distances to other NUMA nodes
* and info on hugepages on the node.
*
* All pointers are stolen.
*/
void
virCapabilitiesHostNUMAAddCell(virCapsHostNUMA *caps,
int num,
unsigned long long mem,
int ncpus,
virCapsHostNUMACellCPU **cpus,
int ndistances,
virNumaDistance **distances,
int npageinfo,
virCapsHostNUMACellPageInfo **pageinfo,
GArray **caches)
{
virCapsHostNUMACell *cell = g_new0(virCapsHostNUMACell, 1);
cell->num = num;
cell->mem = mem;
if (cpus) {
cell->ncpus = ncpus;
cell->cpus = g_steal_pointer(cpus);
}
if (distances) {
cell->ndistances = ndistances;
cell->distances = g_steal_pointer(distances);
}
if (pageinfo) {
cell->npageinfo = npageinfo;
cell->pageinfo = g_steal_pointer(pageinfo);
}
if (caches) {
cell->caches = g_steal_pointer(caches);
}
g_ptr_array_add(caps->cells, cell);
}
/**
* virCapabilitiesAllocMachines:
* @machines: machine variants for emulator ('pc', or 'isapc', etc)
* @nmachines: number of machine variants for emulator
*
* Allocate a table of virCapsGuestMachine *from the supplied table
* of machine names.
*/
virCapsGuestMachine **
virCapabilitiesAllocMachines(const char *const *names, int nnames)
{
virCapsGuestMachine **machines;
size_t i;
machines = g_new0(virCapsGuestMachine *, nnames);
for (i = 0; i < nnames; i++) {
machines[i] = g_new0(virCapsGuestMachine, 1);
machines[i]->name = g_strdup(names[i]);
}
return machines;
}
/**
* virCapabilitiesFreeMachines:
* @machines: table of vircapsGuestMachinePtr
*
* Free a table of virCapsGuestMachine *
*/
void
virCapabilitiesFreeMachines(virCapsGuestMachine **machines,
int nmachines)
{
size_t i;
if (!machines)
return;
for (i = 0; i < nmachines && machines[i]; i++) {
virCapabilitiesFreeGuestMachine(machines[i]);
machines[i] = NULL;
}
g_free(machines);
}
/**
* virCapabilitiesAddGuest:
* @caps: capabilities to extend
* @ostype: guest operating system type, of enum VIR_DOMAIN_OSTYPE
* @arch: guest CPU architecture
* @wordsize: number of bits in CPU word
* @emulator: path to default device emulator for arch/ostype
* @loader: path to default BIOS loader for arch/ostype
* @nmachines: number of machine variants for emulator
* @machines: machine variants for emulator ('pc', or 'isapc', etc)
*
* Registers a new guest operating system. This should be
* followed by registration of at least one domain for
* running the guest
*/
virCapsGuest *
virCapabilitiesAddGuest(virCaps *caps,
int ostype,
virArch arch,
const char *emulator,
const char *loader,
int nmachines,
virCapsGuestMachine **machines)
{
virCapsGuest *guest;
guest = g_new0(virCapsGuest, 1);
guest->ostype = ostype;
guest->arch.id = arch;
guest->arch.wordsize = virArchGetWordSize(arch);
guest->arch.defaultInfo.emulator = g_strdup(emulator);
guest->arch.defaultInfo.loader = g_strdup(loader);
VIR_RESIZE_N(caps->guests, caps->nguests_max, caps->nguests, 1);
caps->guests[caps->nguests++] = guest;
if (nmachines) {
guest->arch.defaultInfo.nmachines = nmachines;
guest->arch.defaultInfo.machines = machines;
}
return guest;
}
/**
* virCapabilitiesAddGuestDomain:
* @guest: guest to support
* @hvtype: hypervisor type ('xen', 'qemu', 'kvm')
* @emulator: specialized device emulator for domain
* @loader: specialized BIOS loader for domain
* @nmachines: number of machine variants for emulator
* @machines: specialized machine variants for emulator
*
* Registers a virtual domain capable of running a
* guest operating system
*/
virCapsGuestDomain *
virCapabilitiesAddGuestDomain(virCapsGuest *guest,
int hvtype,
const char *emulator,
const char *loader,
int nmachines,
virCapsGuestMachine **machines)
{
virCapsGuestDomain *dom;
dom = g_new0(virCapsGuestDomain, 1);
dom->type = hvtype;
dom->info.emulator = g_strdup(emulator);
dom->info.loader = g_strdup(loader);
VIR_RESIZE_N(guest->arch.domains, guest->arch.ndomains_max,
guest->arch.ndomains, 1);
guest->arch.domains[guest->arch.ndomains] = dom;
guest->arch.ndomains++;
if (nmachines) {
dom->info.nmachines = nmachines;
dom->info.machines = machines;
}
return dom;
}
struct virCapsGuestFeatureInfo {
const char *name;
bool togglesRequired;
};
static const struct virCapsGuestFeatureInfo virCapsGuestFeatureInfos[VIR_CAPS_GUEST_FEATURE_TYPE_LAST] = {
[VIR_CAPS_GUEST_FEATURE_TYPE_PAE] = { "pae", false },
[VIR_CAPS_GUEST_FEATURE_TYPE_NONPAE] = { "nonpae", false },
[VIR_CAPS_GUEST_FEATURE_TYPE_IA64_BE] = { "ia64_be", false },
[VIR_CAPS_GUEST_FEATURE_TYPE_ACPI] = { "acpi", true },
[VIR_CAPS_GUEST_FEATURE_TYPE_APIC] = { "apic", true },
[VIR_CAPS_GUEST_FEATURE_TYPE_CPUSELECTION] = { "cpuselection", false },
[VIR_CAPS_GUEST_FEATURE_TYPE_DEVICEBOOT] = { "deviceboot", false },
[VIR_CAPS_GUEST_FEATURE_TYPE_DISKSNAPSHOT] = { "disksnapshot", true },
[VIR_CAPS_GUEST_FEATURE_TYPE_HAP] = { "hap", true },
};
static void
virCapabilitiesAddGuestFeatureInternal(virCapsGuest *guest,
virCapsGuestFeatureType feature,
bool defaultOn,
bool toggle)
{
guest->features[feature].present = true;
if (virCapsGuestFeatureInfos[feature].togglesRequired) {
guest->features[feature].defaultOn = virTristateSwitchFromBool(defaultOn);
guest->features[feature].toggle = virTristateBoolFromBool(toggle);
}
}
/**
* virCapabilitiesAddGuestFeature:
* @guest: guest to associate feature with
* @feature: feature to add
*
* Registers a feature for a guest domain.
*/
void
virCapabilitiesAddGuestFeature(virCapsGuest *guest,
virCapsGuestFeatureType feature)
{
virCapabilitiesAddGuestFeatureInternal(guest, feature, false, false);
}
/**
* virCapabilitiesAddGuestFeatureWithToggle:
* @guest: guest to associate feature with
* @feature: feature to add
* @defaultOn: true if it defaults to on
* @toggle: true if its state can be toggled
*
* Registers a feature with toggles for a guest domain.
*/
void
virCapabilitiesAddGuestFeatureWithToggle(virCapsGuest *guest,
virCapsGuestFeatureType feature,
bool defaultOn,
bool toggle)
{
virCapabilitiesAddGuestFeatureInternal(guest, feature, defaultOn, toggle);
}
/**
* virCapabilitiesHostSecModelAddBaseLabel
* @secmodel: Security model to add a base label for
* @type: virtualization type
* @label: base label
*
* Returns non-zero on error.
*/
extern int
virCapabilitiesHostSecModelAddBaseLabel(virCapsHostSecModel *secmodel,
const char *type,
const char *label)
{
if (type == NULL || label == NULL)
return -1;
VIR_EXPAND_N(secmodel->labels, secmodel->nlabels, 1);
secmodel->labels[secmodel->nlabels - 1].type = g_strdup(type);
secmodel->labels[secmodel->nlabels - 1].label = g_strdup(label);
return 0;
}
static virCapsDomainData *
virCapabilitiesDomainDataLookupInternal(virCaps *caps,
int ostype,
virArch arch,
virDomainVirtType domaintype,
const char *emulator,
const char *machinetype)
{
virCapsGuest *foundguest = NULL;
virCapsGuestDomain *founddomain = NULL;
virCapsGuestMachine *foundmachine = NULL;
virCapsDomainData *ret = NULL;
size_t i, j, k;
VIR_DEBUG("Lookup ostype=%d arch=%d domaintype=%d emulator=%s machine=%s",
ostype, arch, domaintype, NULLSTR(emulator), NULLSTR(machinetype));
for (i = 0; i < caps->nguests; i++) {
virCapsGuest *guest = caps->guests[i];
if (ostype != -1 && guest->ostype != ostype) {
VIR_DEBUG("Skip os type want=%d vs got=%d", ostype, guest->ostype);
continue;
}
VIR_DEBUG("Match os type %d", ostype);
if ((arch != VIR_ARCH_NONE) && (guest->arch.id != arch)) {
VIR_DEBUG("Skip arch want=%d vs got=%d", arch, guest->arch.id);
continue;
}
VIR_DEBUG("Match arch %d", arch);
for (j = 0; j < guest->arch.ndomains; j++) {
virCapsGuestDomain *domain = guest->arch.domains[j];
virCapsGuestMachine **machinelist;
int nmachines;
const char *check_emulator = NULL;
if (domaintype != VIR_DOMAIN_VIRT_NONE &&
(domain->type != domaintype)) {
VIR_DEBUG("Skip domain type want=%d vs got=%d", domaintype, domain->type);
continue;
}
VIR_DEBUG("Match domain type %d", domaintype);
check_emulator = domain->info.emulator;
if (!check_emulator)
check_emulator = guest->arch.defaultInfo.emulator;
if (emulator && STRNEQ_NULLABLE(check_emulator, emulator)) {
VIR_DEBUG("Skip emulator got=%s vs want=%s",
emulator, NULLSTR(check_emulator));
continue;
}
VIR_DEBUG("Match emulator %s", NULLSTR(emulator));
if (domain->info.nmachines) {
nmachines = domain->info.nmachines;
machinelist = domain->info.machines;
} else {
nmachines = guest->arch.defaultInfo.nmachines;
machinelist = guest->arch.defaultInfo.machines;
}
for (k = 0; k < nmachines; k++) {
virCapsGuestMachine *machine = machinelist[k];
if (machinetype &&
STRNEQ(machine->name, machinetype) &&
STRNEQ_NULLABLE(machine->canonical, machinetype)) {
VIR_DEBUG("Skip machine type want=%s vs got=%s got=%s",
machinetype, machine->name, NULLSTR(machine->canonical));
continue;
}
VIR_DEBUG("Match machine type machine %s", NULLSTR(machinetype));
foundmachine = machine;
break;
}
if (!foundmachine && nmachines)
continue;
founddomain = domain;
break;
}
if (!founddomain)
continue;
foundguest = guest;
break;
}
/* XXX check default_emulator, see how it uses this */
if (!foundguest) {
g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
if (ostype)
virBufferAsprintf(&buf, "ostype=%s ",
virDomainOSTypeToString(ostype));
if (arch)
virBufferAsprintf(&buf, "arch=%s ", virArchToString(arch));
if (domaintype > VIR_DOMAIN_VIRT_NONE)
virBufferAsprintf(&buf, "domaintype=%s ",
virDomainVirtTypeToString(domaintype));
if (emulator)
virBufferEscapeString(&buf, "emulator=%s ", emulator);
if (machinetype)
virBufferEscapeString(&buf, "machine=%s ", machinetype);
if (virBufferCurrentContent(&buf) &&
!virBufferCurrentContent(&buf)[0])
virBufferAsprintf(&buf, "%s", _("any configuration"));
virReportError(VIR_ERR_INVALID_ARG,
_("could not find capabilities for %s"),
virBufferCurrentContent(&buf));
return ret;
}
ret = g_new0(virCapsDomainData, 1);
ret->ostype = foundguest->ostype;
ret->arch = foundguest->arch.id;
if (founddomain) {
ret->domaintype = founddomain->type;
ret->emulator = founddomain->info.emulator;
}
if (!ret->emulator)
ret->emulator = foundguest->arch.defaultInfo.emulator;
if (foundmachine)
ret->machinetype = foundmachine->name;
return ret;
}
/**
* virCapabilitiesDomainDataLookup:
* @caps: capabilities to query
* @ostype: guest operating system type, of enum VIR_DOMAIN_OSTYPE
* @arch: Architecture to search for
* @domaintype: domain type to search for, of enum virDomainVirtType
* @emulator: Emulator path to search for
* @machinetype: Machine type to search for
*
* Search capabilities for the passed values, and if found return
* virCapabilitiesDomainDataLookup filled in with the default values
*/
virCapsDomainData *
virCapabilitiesDomainDataLookup(virCaps *caps,
int ostype,
virArch arch,
int domaintype,
const char *emulator,
const char *machinetype)
{
virCapsDomainData *ret;
if (arch == VIR_ARCH_NONE) {
/* Prefer host arch if its available */
ret = virCapabilitiesDomainDataLookupInternal(caps, ostype,
caps->host.arch,
domaintype,
emulator, machinetype);
if (ret)
return ret;
}
return virCapabilitiesDomainDataLookupInternal(caps, ostype,
arch, domaintype,
emulator, machinetype);
}
bool
virCapabilitiesDomainSupported(virCaps *caps,
int ostype,
virArch arch,
int virttype)
{
g_autofree virCapsDomainData *capsdata = NULL;
capsdata = virCapabilitiesDomainDataLookup(caps, ostype,
arch,
virttype,
NULL, NULL);
return capsdata != NULL;
}
int
virCapabilitiesAddStoragePool(virCaps *caps,
int poolType)
{
virCapsStoragePool *pool;
pool = g_new0(virCapsStoragePool, 1);
pool->type = poolType;
VIR_RESIZE_N(caps->pools, caps->npools_max, caps->npools, 1);
caps->pools[caps->npools++] = pool;
return 0;
}
static int
virCapsHostNUMACellCPUFormat(virBuffer *buf,
const virCapsHostNUMACellCPU *cpus,
int ncpus)
{
g_auto(virBuffer) attrBuf = VIR_BUFFER_INITIALIZER;
g_auto(virBuffer) childBuf = VIR_BUFFER_INIT_CHILD(buf);
size_t j;
virBufferAsprintf(&attrBuf, " num='%d'", ncpus);
for (j = 0; j < ncpus; j++) {
virBufferAsprintf(&childBuf, "<cpu id='%d'", cpus[j].id);
if (cpus[j].siblings) {
g_autofree char *siblings = NULL;
if (!(siblings = virBitmapFormat(cpus[j].siblings)))
return -1;
virBufferAsprintf(&childBuf,
" socket_id='%d' die_id='%d' core_id='%d' siblings='%s'",
cpus[j].socket_id,
cpus[j].die_id,
cpus[j].core_id,
siblings);
}
virBufferAddLit(&childBuf, "/>\n");
}
virXMLFormatElement(buf, "cpus", &attrBuf, &childBuf);
return 0;
}
static int
virCapabilitiesHostNUMAFormat(virBuffer *buf,
virCapsHostNUMA *caps)
{
size_t i;
if (!caps)
return 0;
virBufferAddLit(buf, "<topology>\n");
virBufferAdjustIndent(buf, 2);
virBufferAsprintf(buf, "<cells num='%d'>\n", caps->cells->len);
virBufferAdjustIndent(buf, 2);
for (i = 0; i < caps->cells->len; i++) {
virCapsHostNUMACell *cell = g_ptr_array_index(caps->cells, i);
size_t j;
virBufferAsprintf(buf, "<cell id='%d'>\n", cell->num);
virBufferAdjustIndent(buf, 2);
/* Print out the numacell memory total if it is available */
if (cell->mem)
virBufferAsprintf(buf, "<memory unit='KiB'>%llu</memory>\n",
cell->mem);
for (j = 0; j < cell->npageinfo; j++) {
virBufferAsprintf(buf, "<pages unit='KiB' size='%u'>%llu</pages>\n",
cell->pageinfo[j].size,
cell->pageinfo[j].avail);
}
virNumaDistanceFormat(buf, cell->distances, cell->ndistances);
if (cell->caches) {
virNumaCache *caches = &g_array_index(cell->caches, virNumaCache, 0);
virNumaCacheFormat(buf, caches, cell->caches->len);
}
if (virCapsHostNUMACellCPUFormat(buf, cell->cpus, cell->ncpus) < 0)
return -1;
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</cell>\n");
}
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</cells>\n");
if (caps->interconnects) {
const virNumaInterconnect *interconnects;
interconnects = &g_array_index(caps->interconnects, virNumaInterconnect, 0);
virNumaInterconnectFormat(buf, interconnects, caps->interconnects->len);
}
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</topology>\n");
return 0;
}
static int
virCapabilitiesFormatResctrlMonitor(virBuffer *buf,
virResctrlInfoMon *monitor)
{
size_t i = 0;
g_auto(virBuffer) childrenBuf = VIR_BUFFER_INIT_CHILD(buf);
/* monitor not supported, no capability */
if (!monitor)
return 0;
/* no feature found in monitor means no capability, return */
if (monitor->nfeatures == 0)
return 0;
virBufferAddLit(buf, "<monitor ");
/* CMT might not enabled, if enabled show related attributes. */
if (monitor->type == VIR_RESCTRL_MONITOR_TYPE_CACHE)
virBufferAsprintf(buf,
"level='%u' reuseThreshold='%u' ",
monitor->cache_level,
monitor->cache_reuse_threshold);
virBufferAsprintf(buf,
"maxMonitors='%u'>\n",
monitor->max_monitor);
for (i = 0; i < monitor->nfeatures; i++) {
virBufferAsprintf(&childrenBuf,
"<feature name='%s'/>\n",
monitor->features[i]);
}
virBufferAddBuffer(buf, &childrenBuf);
virBufferAddLit(buf, "</monitor>\n");
return 0;
}
static int
virCapabilitiesFormatCaches(virBuffer *buf,
virCapsHostCache *cache)
{
size_t i = 0;
size_t j = 0;
if (!cache->nbanks && !cache->monitor)
return 0;
virBufferAddLit(buf, "<cache>\n");
virBufferAdjustIndent(buf, 2);
for (i = 0; i < cache->nbanks; i++) {
g_auto(virBuffer) attrBuf = VIR_BUFFER_INITIALIZER;
g_auto(virBuffer) childrenBuf = VIR_BUFFER_INIT_CHILD(buf);
virCapsHostCacheBank *bank = cache->banks[i];
g_autofree char *cpus_str = virBitmapFormat(bank->cpus);
const char *unit = NULL;
unsigned long long short_size = virFormatIntPretty(bank->size, &unit);
if (!cpus_str)
return -1;
/*
* Let's just *hope* the size is aligned to KiBs so that it does not
* bite is back in the future
*/
virBufferAsprintf(&attrBuf,
" id='%u' level='%u' type='%s' "
"size='%llu' unit='%s' cpus='%s'",
bank->id, bank->level,
virCacheTypeToString(bank->type),
short_size, unit, cpus_str);
for (j = 0; j < bank->ncontrols; j++) {
const char *min_unit;
virResctrlInfoPerCache *controls = bank->controls[j];
unsigned long long gran_short_size = controls->granularity;
unsigned long long min_short_size = controls->min;
gran_short_size = virFormatIntPretty(gran_short_size, &unit);
min_short_size = virFormatIntPretty(min_short_size, &min_unit);
/* Only use the smaller unit if they are different */
if (min_short_size) {
unsigned long long gran_div;
unsigned long long min_div;
gran_div = controls->granularity / gran_short_size;
min_div = controls->min / min_short_size;
if (min_div > gran_div) {
min_short_size *= min_div / gran_div;
} else if (min_div < gran_div) {
unit = min_unit;
gran_short_size *= gran_div / min_div;
}
}
virBufferAsprintf(&childrenBuf,
"<control granularity='%llu'",
gran_short_size);
if (min_short_size)
virBufferAsprintf(&childrenBuf, " min='%llu'", min_short_size);
virBufferAsprintf(&childrenBuf,
" unit='%s' type='%s' maxAllocs='%u'/>\n",
unit,
virCacheTypeToString(controls->scope),
controls->max_allocation);
}
virXMLFormatElement(buf, "bank", &attrBuf, &childrenBuf);
}
if (virCapabilitiesFormatResctrlMonitor(buf, cache->monitor) < 0)
return -1;
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</cache>\n");
return 0;
}
static int
virCapabilitiesFormatMemoryBandwidth(virBuffer *buf,
virCapsHostMemBW *memBW)
{
size_t i = 0;
if (!memBW->nnodes && !memBW->monitor)
return 0;
virBufferAddLit(buf, "<memory_bandwidth>\n");
virBufferAdjustIndent(buf, 2);
for (i = 0; i < memBW->nnodes; i++) {
g_auto(virBuffer) attrBuf = VIR_BUFFER_INITIALIZER;
g_auto(virBuffer) childrenBuf = VIR_BUFFER_INIT_CHILD(buf);
virCapsHostMemBWNode *node = memBW->nodes[i];
virResctrlInfoMemBWPerNode *control = &node->control;
g_autofree char *cpus_str = virBitmapFormat(node->cpus);
if (!cpus_str)
return -1;
virBufferAsprintf(&attrBuf,
" id='%u' cpus='%s'",
node->id, cpus_str);
virBufferAsprintf(&childrenBuf,
"<control granularity='%u' min='%u' "
"maxAllocs='%u'/>\n",
control->granularity, control->min,
control->max_allocation);
virXMLFormatElement(buf, "node", &attrBuf, &childrenBuf);
}
if (virCapabilitiesFormatResctrlMonitor(buf, memBW->monitor) < 0)
return -1;
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</memory_bandwidth>\n");
return 0;
}
static int
virCapabilitiesFormatHostXML(virCapsHost *host,
virBuffer *buf)
{
size_t i, j;
char host_uuid[VIR_UUID_STRING_BUFLEN];
/* The lack of some data means we have nothing
* minimally to format, so just return. */
if (!virUUIDIsValid(host->host_uuid) &&
!host->arch && !host->powerMgmt && !host->iommu)
return 0;
virBufferAddLit(buf, "<host>\n");
virBufferAdjustIndent(buf, 2);
if (virUUIDIsValid(host->host_uuid)) {
virUUIDFormat(host->host_uuid, host_uuid);
virBufferAsprintf(buf, "<uuid>%s</uuid>\n", host_uuid);
}
virBufferAddLit(buf, "<cpu>\n");
virBufferAdjustIndent(buf, 2);
if (host->arch)
virBufferAsprintf(buf, "<arch>%s</arch>\n",
virArchToString(host->arch));
if (host->nfeatures) {
virBufferAddLit(buf, "<features>\n");
virBufferAdjustIndent(buf, 2);
for (i = 0; i < host->nfeatures; i++) {
virBufferAsprintf(buf, "<%s/>\n",
host->features[i]);
}
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</features>\n");
}
virCPUDefFormatBuf(buf, host->cpu);
for (i = 0; i < host->nPagesSize; i++) {
virBufferAsprintf(buf, "<pages unit='KiB' size='%u'/>\n",
host->pagesSize[i]);
}
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</cpu>\n");
/* The PM query was successful. */
if (host->powerMgmt) {
/* The host supports some PM features. */
unsigned int pm = host->powerMgmt;
virBufferAddLit(buf, "<power_management>\n");
virBufferAdjustIndent(buf, 2);
while (pm) {
int bit = __builtin_ffs(pm) - 1;
virBufferAsprintf(buf, "<%s/>\n",
virCapsHostPMTargetTypeToString(bit));
pm &= ~(1U << bit);
}
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</power_management>\n");
} else {
/* The host does not support any PM feature. */
virBufferAddLit(buf, "<power_management/>\n");
}
virBufferAsprintf(buf, "<iommu support='%s'/>\n",
host->iommu ? "yes" : "no");
if (host->offlineMigrate) {
virBufferAddLit(buf, "<migration_features>\n");
virBufferAdjustIndent(buf, 2);
if (host->liveMigrate)
virBufferAddLit(buf, "<live/>\n");
if (host->nmigrateTrans) {
virBufferAddLit(buf, "<uri_transports>\n");
virBufferAdjustIndent(buf, 2);
for (i = 0; i < host->nmigrateTrans; i++) {
virBufferAsprintf(buf, "<uri_transport>%s</uri_transport>\n",
host->migrateTrans[i]);
}
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</uri_transports>\n");
}
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</migration_features>\n");
}
if (host->netprefix)
virBufferAsprintf(buf, "<netprefix>%s</netprefix>\n",
host->netprefix);
if (virCapabilitiesHostNUMAFormat(buf, host->numa) < 0)
return -1;
if (virCapabilitiesFormatCaches(buf, &host->cache) < 0)
return -1;
if (virCapabilitiesFormatMemoryBandwidth(buf, &host->memBW) < 0)
return -1;
for (i = 0; i < host->nsecModels; i++) {
virBufferAddLit(buf, "<secmodel>\n");
virBufferAdjustIndent(buf, 2);
virBufferAsprintf(buf, "<model>%s</model>\n",
host->secModels[i].model);
virBufferAsprintf(buf, "<doi>%s</doi>\n",
host->secModels[i].doi);
for (j = 0; j < host->secModels[i].nlabels; j++) {
virBufferAsprintf(buf, "<baselabel type='%s'>%s</baselabel>\n",
host->secModels[i].labels[j].type,
host->secModels[i].labels[j].label);
}
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</secmodel>\n");
}
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</host>\n\n");
return 0;
}
static void
virCapabilitiesFormatGuestFeatures(virCapsGuest *guest,
virBuffer *buf)
{
g_auto(virBuffer) childBuf = VIR_BUFFER_INIT_CHILD(buf);
size_t i;
for (i = 0; i < VIR_CAPS_GUEST_FEATURE_TYPE_LAST; i++) {
virCapsGuestFeature *feature = guest->features + i;
if (!feature->present)
continue;
virBufferAsprintf(&childBuf, "<%s", virCapsGuestFeatureInfos[i].name);
if (feature->defaultOn) {
virBufferAsprintf(&childBuf, " default='%s'",
virTristateSwitchTypeToString(feature->defaultOn));
}
if (feature->toggle) {
virBufferAsprintf(&childBuf, " toggle='%s'",
virTristateBoolTypeToString(feature->toggle));
}
virBufferAddLit(&childBuf, "/>\n");
}
virXMLFormatElement(buf, "features", NULL, &childBuf);
}
static void
virCapabilitiesFormatGuestXML(virCapsGuest **guests,
size_t nguests,
virBuffer *buf)
{
size_t i, j, k;
for (i = 0; i < nguests; i++) {
virBufferAddLit(buf, "<guest>\n");
virBufferAdjustIndent(buf, 2);
virBufferAsprintf(buf, "<os_type>%s</os_type>\n",
virDomainOSTypeToString(guests[i]->ostype));
if (guests[i]->arch.id)
virBufferAsprintf(buf, "<arch name='%s'>\n",
virArchToString(guests[i]->arch.id));
virBufferAdjustIndent(buf, 2);
virBufferAsprintf(buf, "<wordsize>%d</wordsize>\n",
guests[i]->arch.wordsize);
if (guests[i]->arch.defaultInfo.emulator)
virBufferAsprintf(buf, "<emulator>%s</emulator>\n",
guests[i]->arch.defaultInfo.emulator);
if (guests[i]->arch.defaultInfo.loader)
virBufferAsprintf(buf, "<loader>%s</loader>\n",
guests[i]->arch.defaultInfo.loader);
for (j = 0; j < guests[i]->arch.defaultInfo.nmachines; j++) {
virCapsGuestMachine *machine = guests[i]->arch.defaultInfo.machines[j];
virBufferAddLit(buf, "<machine");
if (machine->canonical)
virBufferAsprintf(buf, " canonical='%s'", machine->canonical);
if (machine->maxCpus > 0)
virBufferAsprintf(buf, " maxCpus='%d'", machine->maxCpus);
if (machine->deprecated)
virBufferAddLit(buf, " deprecated='yes'");
virBufferAsprintf(buf, ">%s</machine>\n", machine->name);
}
for (j = 0; j < guests[i]->arch.ndomains; j++) {
virBufferAsprintf(buf, "<domain type='%s'",
virDomainVirtTypeToString(guests[i]->arch.domains[j]->type));
if (!guests[i]->arch.domains[j]->info.emulator &&
!guests[i]->arch.domains[j]->info.loader &&
!guests[i]->arch.domains[j]->info.nmachines) {
virBufferAddLit(buf, "/>\n");
continue;
}
virBufferAddLit(buf, ">\n");
virBufferAdjustIndent(buf, 2);
if (guests[i]->arch.domains[j]->info.emulator)
virBufferAsprintf(buf, "<emulator>%s</emulator>\n",
guests[i]->arch.domains[j]->info.emulator);
if (guests[i]->arch.domains[j]->info.loader)
virBufferAsprintf(buf, "<loader>%s</loader>\n",
guests[i]->arch.domains[j]->info.loader);
for (k = 0; k < guests[i]->arch.domains[j]->info.nmachines; k++) {
virCapsGuestMachine *machine = guests[i]->arch.domains[j]->info.machines[k];
virBufferAddLit(buf, "<machine");
if (machine->canonical)
virBufferAsprintf(buf, " canonical='%s'", machine->canonical);
if (machine->maxCpus > 0)
virBufferAsprintf(buf, " maxCpus='%d'", machine->maxCpus);
virBufferAsprintf(buf, ">%s</machine>\n", machine->name);
}
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</domain>\n");
}
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</arch>\n");
virCapabilitiesFormatGuestFeatures(guests[i], buf);
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</guest>\n\n");
}
}
static void
virCapabilitiesFormatStoragePoolXML(virCapsStoragePool **pools,
size_t npools,
virBuffer *buf)
{
size_t i;
if (npools == 0)
return;
virBufferAddLit(buf, "<pool>\n");
virBufferAdjustIndent(buf, 2);
virBufferAddLit(buf, "<enum name='type'>\n");
virBufferAdjustIndent(buf, 2);
for (i = 0; i < npools; i++)
virBufferAsprintf(buf, "<value>%s</value>\n",
virStoragePoolTypeToString(pools[i]->type));
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</enum>\n");
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</pool>\n\n");
}
/**
* virCapabilitiesFormatXML:
* @caps: capabilities to format
*
* Convert the capabilities object into an XML representation
*
* Returns the XML document as a string
*/
char *
virCapabilitiesFormatXML(virCaps *caps)
{
g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
virBufferAddLit(&buf, "<capabilities>\n\n");
virBufferAdjustIndent(&buf, 2);
if (virCapabilitiesFormatHostXML(&caps->host, &buf) < 0)
return NULL;
virCapabilitiesFormatGuestXML(caps->guests, caps->nguests, &buf);
virCapabilitiesFormatStoragePoolXML(caps->pools, caps->npools, &buf);
virBufferAdjustIndent(&buf, -2);
virBufferAddLit(&buf, "</capabilities>\n");
return virBufferContentAndReset(&buf);
}
/* get the maximum ID of cpus in the host */
static unsigned int
virCapabilitiesHostNUMAGetMaxcpu(virCapsHostNUMA *caps)
{
unsigned int maxcpu = 0;
size_t node;
size_t cpu;
for (node = 0; node < caps->cells->len; node++) {
virCapsHostNUMACell *cell = g_ptr_array_index(caps->cells, node);
for (cpu = 0; cpu < cell->ncpus; cpu++) {
if (cell->cpus[cpu].id > maxcpu)
maxcpu = cell->cpus[cpu].id;
}
}
return maxcpu;
}
/* set cpus of a numa node in the bitmask */
static int
virCapabilitiesHostNUMAGetCellCpus(virCapsHostNUMA *caps,
size_t node,
virBitmap *cpumask)
{
virCapsHostNUMACell *cell = NULL;
size_t cpu;
size_t i;
/* The numa node numbers can be non-contiguous. Ex: 0,1,16,17. */
for (i = 0; i < caps->cells->len; i++) {
cell = g_ptr_array_index(caps->cells, i);
if (cell->num == node)
break;
cell = NULL;
}
for (cpu = 0; cell && cpu < cell->ncpus; cpu++) {
if (virBitmapSetBit(cpumask, cell->cpus[cpu].id) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Cpu '%u' in node '%zu' is out of range "
"of the provided bitmap"),
cell->cpus[cpu].id, node);
return -1;
}
}
return 0;
}
virBitmap *
virCapabilitiesHostNUMAGetCpus(virCapsHostNUMA *caps,
virBitmap *nodemask)
{
virBitmap *ret = NULL;
unsigned int maxcpu = virCapabilitiesHostNUMAGetMaxcpu(caps);
ssize_t node = -1;
ret = virBitmapNew(maxcpu + 1);
while ((node = virBitmapNextSetBit(nodemask, node)) >= 0) {
if (virCapabilitiesHostNUMAGetCellCpus(caps, node, ret) < 0) {
virBitmapFree(ret);
return NULL;
}
}
return ret;
}
int
virCapabilitiesGetNodeInfo(virNodeInfoPtr nodeinfo)
{
virArch hostarch = virArchFromHost();
unsigned long long memorybytes;
memset(nodeinfo, 0, sizeof(*nodeinfo));
if (virStrcpyStatic(nodeinfo->model, virArchToString(hostarch)) < 0)
return -1;
if (virHostMemGetInfo(&memorybytes, NULL) < 0)
return -1;
nodeinfo->memory = memorybytes / 1024;
if (virHostCPUGetInfo(hostarch,
&nodeinfo->cpus, &nodeinfo->mhz,
&nodeinfo->nodes, &nodeinfo->sockets,
&nodeinfo->cores, &nodeinfo->threads) < 0)
return -1;
return 0;
}
/* returns 1 on success, 0 if the detection failed and -1 on hard error */
static int
virCapabilitiesFillCPUInfo(int cpu_id G_GNUC_UNUSED,
virCapsHostNUMACellCPU *cpu G_GNUC_UNUSED)
{
#ifdef __linux__
cpu->id = cpu_id;
if (virHostCPUGetSocket(cpu_id, &cpu->socket_id) < 0 ||
virHostCPUGetDie(cpu_id, &cpu->die_id) < 0 ||
virHostCPUGetCore(cpu_id, &cpu->core_id) < 0)
return -1;
if (!(cpu->siblings = virHostCPUGetSiblingsList(cpu_id)))
return -1;
return 0;
#else
virReportError(VIR_ERR_NO_SUPPORT, "%s",
_("node cpu info not implemented on this platform"));
return -1;
#endif
}
static int
virCapabilitiesGetNUMADistances(int node,
virNumaDistance **distancesRet,
int *ndistancesRet)
{
virNumaDistance *tmp = NULL;
int tmp_size = 0;
int ret = -1;
int *distances = NULL;
int ndistances = 0;
size_t i;
if (virNumaGetDistances(node, &distances, &ndistances) < 0)
goto cleanup;
if (!distances) {
*distancesRet = NULL;
*ndistancesRet = 0;
return 0;
}
tmp = g_new0(virNumaDistance, ndistances);
for (i = 0; i < ndistances; i++) {
if (!distances[i])
continue;
tmp[tmp_size].cellid = i;
tmp[tmp_size].value = distances[i];
tmp_size++;
}
VIR_REALLOC_N(tmp, tmp_size);
*ndistancesRet = tmp_size;
*distancesRet = g_steal_pointer(&tmp);
tmp_size = 0;
ret = 0;
cleanup:
VIR_FREE(distances);
VIR_FREE(tmp);
return ret;
}
static int
virCapabilitiesGetNUMAPagesInfo(int node,
virCapsHostNUMACellPageInfo **pageinfo,
int *npageinfo)
{
int ret = -1;
unsigned int *pages_size = NULL;
unsigned long long *pages_avail = NULL;
size_t npages, i;
if (virNumaGetPages(node, &pages_size, &pages_avail, NULL, &npages) < 0)
goto cleanup;
*pageinfo = g_new0(virCapsHostNUMACellPageInfo, npages);
*npageinfo = npages;
for (i = 0; i < npages; i++) {
(*pageinfo)[i].size = pages_size[i];
(*pageinfo)[i].avail = pages_avail[i];
}
ret = 0;
cleanup:
VIR_FREE(pages_avail);
VIR_FREE(pages_size);
return ret;
}
static int
virCapabilitiesGetNodeCacheReadFile(const char *prefix,
const char *dir,
const char *file,
unsigned int *value)
{
g_autofree char *path = g_build_filename(prefix, dir, file, NULL);
int rv = virFileReadValueUint(value, "%s", path);
if (rv < 0) {
if (rv == -2) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("File '%s' does not exist"),
path);
}
return -1;
}
return 0;
}
static int
virCapsHostNUMACellCacheComparator(const void *a,
const void *b)
{
const virNumaCache *aa = a;
const virNumaCache *bb = b;
return aa->level - bb->level;
}
static int
virCapabilitiesGetNodeCache(int node,
GArray **cachesRet)
{
g_autoptr(DIR) dir = NULL;
int direrr = 0;
struct dirent *entry;
g_autofree char *path = NULL;
g_autoptr(GArray) caches = g_array_new(FALSE, FALSE, sizeof(virNumaCache));
path = g_strdup_printf(SYSFS_SYSTEM_PATH "/node/node%d/memory_side_cache", node);
if (virDirOpenIfExists(&dir, path) < 0)
return -1;
while (dir && (direrr = virDirRead(dir, &entry, path)) > 0) {
const char *dname = STRSKIP(entry->d_name, "index");
virNumaCache cache = { 0 };
unsigned int indexing;
unsigned int write_policy;
if (!dname)
continue;
if (virStrToLong_ui(dname, NULL, 10, &cache.level) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("unable to parse %s"),
entry->d_name);
return -1;
}
if (virCapabilitiesGetNodeCacheReadFile(path, entry->d_name,
"size", &cache.size) < 0)
return -1;
cache.size >>= 10; /* read in bytes but stored in kibibytes */
if (virCapabilitiesGetNodeCacheReadFile(path, entry->d_name,
"line_size", &cache.line) < 0)
return -1;
if (virCapabilitiesGetNodeCacheReadFile(path, entry->d_name,
"indexing", &indexing) < 0)
return -1;
/* see enum cache_indexing in kernel */
switch (indexing) {
case 0: cache.associativity = VIR_NUMA_CACHE_ASSOCIATIVITY_DIRECT; break;
case 1: cache.associativity = VIR_NUMA_CACHE_ASSOCIATIVITY_FULL; break;
case 2: cache.associativity = VIR_NUMA_CACHE_ASSOCIATIVITY_NONE; break;
default:
virReportError(VIR_ERR_INTERNAL_ERROR,
_("unknown indexing value '%u'"),
indexing);
return -1;
}
if (virCapabilitiesGetNodeCacheReadFile(path, entry->d_name,
"write_policy", &write_policy) < 0)
return -1;
/* see enum cache_write_policy in kernel */
switch (write_policy) {
case 0: cache.policy = VIR_NUMA_CACHE_POLICY_WRITEBACK; break;
case 1: cache.policy = VIR_NUMA_CACHE_POLICY_WRITETHROUGH; break;
case 2: cache.policy = VIR_NUMA_CACHE_POLICY_NONE; break;
default:
virReportError(VIR_ERR_INTERNAL_ERROR,
_("unknown write_policy value '%u'"),
write_policy);
return -1;
}
g_array_append_val(caches, cache);
}
if (direrr < 0)
return -1;
if (caches->len > 0) {
g_array_sort(caches, virCapsHostNUMACellCacheComparator);
*cachesRet = g_steal_pointer(&caches);
} else {
*cachesRet = NULL;
}
return 0;
}
static int
virCapabilitiesHostNUMAInitFake(virCapsHostNUMA *caps)
{
virNodeInfo nodeinfo;
virCapsHostNUMACellCPU *cpus;
int ncpus;
int n, s, c, t;
int id, cid;
int onlinecpus G_GNUC_UNUSED;
bool tmp;
if (virCapabilitiesGetNodeInfo(&nodeinfo) < 0)
return -1;
ncpus = VIR_NODEINFO_MAXCPUS(nodeinfo);
id = 0;
for (n = 0; n < nodeinfo.nodes; n++) {
int nodecpus = nodeinfo.sockets * nodeinfo.cores * nodeinfo.threads;
cid = 0;
cpus = g_new0(virCapsHostNUMACellCPU, nodecpus);
for (s = 0; s < nodeinfo.sockets; s++) {
for (c = 0; c < nodeinfo.cores; c++) {
g_autoptr(virBitmap) siblings = virBitmapNew(ncpus);
for (t = 0; t < nodeinfo.threads; t++)
ignore_value(virBitmapSetBit(siblings, id + t));
for (t = 0; t < nodeinfo.threads; t++) {
if (virHostCPUGetOnline(id, &tmp) < 0)
goto error;
if (tmp) {
cpus[cid].id = id;
cpus[cid].die_id = 0;
cpus[cid].socket_id = s;
cpus[cid].core_id = c;
cpus[cid].siblings = virBitmapNewCopy(siblings);
cid++;
}
id++;
}
}
}
virCapabilitiesHostNUMAAddCell(caps, 0,
nodeinfo.memory,
cid, &cpus,
0, NULL,
0, NULL,
NULL);
}
return 0;
error:
for (; cid >= 0; cid--)
virBitmapFree(cpus[cid].siblings);
VIR_FREE(cpus);
return -1;
}
static void
virCapabilitiesHostInsertHMAT(GArray *interconnects,
unsigned int initiator,
unsigned int target,
unsigned int read_bandwidth,
unsigned int write_bandwidth,
unsigned int read_latency,
unsigned int write_latency)
{
virNumaInterconnect ni;
ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_BANDWIDTH,
initiator, target, 0, VIR_MEMORY_LATENCY_READ, read_bandwidth};
g_array_append_val(interconnects, ni);
ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_BANDWIDTH,
initiator, target, 0, VIR_MEMORY_LATENCY_WRITE, write_bandwidth};
g_array_append_val(interconnects, ni);
ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_LATENCY,
initiator, target, 0, VIR_MEMORY_LATENCY_READ, read_latency};
g_array_append_val(interconnects, ni);
ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_LATENCY,
initiator, target, 0, VIR_MEMORY_LATENCY_WRITE, write_latency};
g_array_append_val(interconnects, ni);
}
static int
virCapabilitiesHostNUMAInitInterconnectsNode(GArray *interconnects,
unsigned int node)
{
g_autofree char *path = NULL;
g_autofree char *initPath = NULL;
g_autoptr(DIR) dir = NULL;
int direrr = 0;
struct dirent *entry;
unsigned int read_bandwidth;
unsigned int write_bandwidth;
unsigned int read_latency;
unsigned int write_latency;
/* Unfortunately, kernel does not expose full HMAT table. I mean it does,
* in its binary form under /sys/firmware/acpi/tables/HMAT but we don't
* want to parse that. But some important info is still exposed, under
* "access0" and "access1" directories. The former contains the best
* interconnect to given node including CPUs and devices that might do I/O
* (such as GPUs and NICs). The latter contains the best interconnect to
* given node but only CPUs are considered. Stick with access1 until sysfs
* exposes the full table in a sensible way.
* NB on most system access0 and access1 contain the same values. */
path = g_strdup_printf(SYSFS_SYSTEM_PATH "/node/node%d/access1", node);
if (!virFileExists(path))
return 0;
if (virCapabilitiesGetNodeCacheReadFile(path, "initiators",
"read_bandwidth",
&read_bandwidth) < 0)
return -1;
if (virCapabilitiesGetNodeCacheReadFile(path, "initiators",
"write_bandwidth",
&write_bandwidth) < 0)
return -1;
/* Bandwidths are read in MiB but stored in KiB */
read_bandwidth <<= 10;
write_bandwidth <<= 10;
if (virCapabilitiesGetNodeCacheReadFile(path, "initiators",
"read_latency",
&read_latency) < 0)
return -1;
if (virCapabilitiesGetNodeCacheReadFile(path, "initiators",
"write_latency",
&write_latency) < 0)
return -1;
initPath = g_strdup_printf("%s/initiators", path);
if (virDirOpen(&dir, initPath) < 0)
return -1;
while ((direrr = virDirRead(dir, &entry, path)) > 0) {
const char *dname = STRSKIP(entry->d_name, "node");
unsigned int initNode;
if (!dname)
continue;
if (virStrToLong_ui(dname, NULL, 10, &initNode) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("unable to parse %s"),
entry->d_name);
return -1;
}
virCapabilitiesHostInsertHMAT(interconnects,
initNode, node,
read_bandwidth,
write_bandwidth,
read_latency,
write_latency);
}
return 0;
}
static int
virCapsHostNUMAInterconnectComparator(const void *a,
const void *b)
{
const virNumaInterconnect *aa = a;
const virNumaInterconnect *bb = b;
if (aa->type != bb->type)
return aa->type - bb->type;
if (aa->initiator != bb->initiator)
return aa->initiator - bb->initiator;
if (aa->target != bb->target)
return aa->target - bb->target;
if (aa->cache != bb->cache)
return aa->cache - bb->cache;
if (aa->accessType != bb->accessType)
return aa->accessType - bb->accessType;
return aa->value - bb->value;
}
static int
virCapabilitiesHostNUMAInitInterconnects(virCapsHostNUMA *caps)
{
g_autoptr(DIR) dir = NULL;
int direrr = 0;
struct dirent *entry;
const char *path = SYSFS_SYSTEM_PATH "/node/";
g_autoptr(GArray) interconnects = g_array_new(FALSE, FALSE, sizeof(virNumaInterconnect));
if (virDirOpenIfExists(&dir, path) < 0)
return -1;
while (dir && (direrr = virDirRead(dir, &entry, path)) > 0) {
const char *dname = STRSKIP(entry->d_name, "node");
unsigned int node;
if (!dname)
continue;
if (virStrToLong_ui(dname, NULL, 10, &node) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("unable to parse %s"),
entry->d_name);
return -1;
}
if (virCapabilitiesHostNUMAInitInterconnectsNode(interconnects, node) < 0)
return -1;
}
if (interconnects->len > 0) {
g_array_sort(interconnects, virCapsHostNUMAInterconnectComparator);
caps->interconnects = g_steal_pointer(&interconnects);
}
return 0;
}
static int
virCapabilitiesHostNUMAInitReal(virCapsHostNUMA *caps)
{
int n;
virCapsHostNUMACellCPU *cpus = NULL;
int ret = -1;
int ncpus = 0;
int max_node;
if ((max_node = virNumaGetMaxNode()) < 0)
goto cleanup;
for (n = 0; n <= max_node; n++) {
g_autoptr(virBitmap) cpumap = NULL;
g_autofree virNumaDistance *distances = NULL;
int ndistances = 0;
g_autofree virCapsHostNUMACellPageInfo *pageinfo = NULL;
int npageinfo = 0;
unsigned long long memory;
g_autoptr(GArray) caches = NULL;
int cpu;
size_t i;
if ((ncpus = virNumaGetNodeCPUs(n, &cpumap)) < 0) {
if (ncpus == -2)
continue;
goto cleanup;
}
cpus = g_new0(virCapsHostNUMACellCPU, ncpus);
cpu = 0;
for (i = 0; i < virBitmapSize(cpumap); i++) {
if (virBitmapIsBitSet(cpumap, i)) {
if (virCapabilitiesFillCPUInfo(i, cpus + cpu++) < 0)
goto cleanup;
}
}
if (virCapabilitiesGetNUMADistances(n, &distances, &ndistances) < 0)
goto cleanup;
if (virCapabilitiesGetNUMAPagesInfo(n, &pageinfo, &npageinfo) < 0)
goto cleanup;
if (virCapabilitiesGetNodeCache(n, &caches) < 0)
goto cleanup;
/* Detect the amount of memory in the numa cell in KiB */
virNumaGetNodeMemory(n, &memory, NULL);
memory >>= 10;
virCapabilitiesHostNUMAAddCell(caps, n, memory,
ncpus, &cpus,
ndistances, &distances,
npageinfo, &pageinfo,
&caches);
}
if (virCapabilitiesHostNUMAInitInterconnects(caps) < 0)
goto cleanup;
ret = 0;
cleanup:
virCapabilitiesClearHostNUMACellCPUTopology(cpus, ncpus);
VIR_FREE(cpus);
return ret;
}
virCapsHostNUMA *
virCapabilitiesHostNUMANew(void)
{
virCapsHostNUMA *caps = NULL;
caps = g_new0(virCapsHostNUMA, 1);
caps->refs = 1;
caps->cells = g_ptr_array_new_with_free_func(
(GDestroyNotify)virCapabilitiesFreeHostNUMACell);
return caps;
}
virCapsHostNUMA *
virCapabilitiesHostNUMANewHost(void)
{
virCapsHostNUMA *caps = virCapabilitiesHostNUMANew();
if (virNumaIsAvailable()) {
if (virCapabilitiesHostNUMAInitReal(caps) == 0)
return caps;
virCapabilitiesHostNUMAUnref(caps);
caps = virCapabilitiesHostNUMANew();
VIR_WARN("Failed to query host NUMA topology, faking single NUMA node");
}
if (virCapabilitiesHostNUMAInitFake(caps) < 0) {
virCapabilitiesHostNUMAUnref(caps);
return NULL;
}
return caps;
}
int
virCapabilitiesInitPages(virCaps *caps)
{
int ret = -1;
unsigned int *pages_size = NULL;
size_t npages;
if (virNumaGetPages(-1 /* Magic constant for overall info */,
&pages_size, NULL, NULL, &npages) < 0)
goto cleanup;
caps->host.pagesSize = g_steal_pointer(&pages_size);
caps->host.nPagesSize = npages;
npages = 0;
ret = 0;
cleanup:
VIR_FREE(pages_size);
return ret;
}
bool
virCapsHostCacheBankEquals(virCapsHostCacheBank *a,
virCapsHostCacheBank *b)
{
return (a->id == b->id &&
a->level == b->level &&
a->type == b->type &&
a->size == b->size &&
virBitmapEqual(a->cpus, b->cpus));
}
void
virCapsHostCacheBankFree(virCapsHostCacheBank *ptr)
{
size_t i;
if (!ptr)
return;
virBitmapFree(ptr->cpus);
for (i = 0; i < ptr->ncontrols; i++)
g_free(ptr->controls[i]);
g_free(ptr->controls);
g_free(ptr);
}
static int
virCapsHostCacheBankSorter(const void *a,
const void *b)
{
virCapsHostCacheBank *ca = *(virCapsHostCacheBank **)a;
virCapsHostCacheBank *cb = *(virCapsHostCacheBank **)b;
if (ca->level < cb->level)
return -1;
if (ca->level > cb->level)
return 1;
return ca->id - cb->id;
}
static int
virCapabilitiesInitResctrl(virCaps *caps)
{
if (caps->host.resctrl)
return 0;
caps->host.resctrl = virResctrlInfoNew();
if (!caps->host.resctrl)
return -1;
return 0;
}
static int
virCapabilitiesInitResctrlMemory(virCaps *caps)
{
virCapsHostMemBWNode *node = NULL;
size_t i = 0;
int ret = -1;
const virResctrlMonitorType montype = VIR_RESCTRL_MONITOR_TYPE_MEMBW;
const char *prefix = virResctrlMonitorPrefixTypeToString(montype);
for (i = 0; i < caps->host.cache.nbanks; i++) {
virCapsHostCacheBank *bank = caps->host.cache.banks[i];
node = g_new0(virCapsHostMemBWNode, 1);
if (virResctrlInfoGetMemoryBandwidth(caps->host.resctrl,
bank->level, &node->control) > 0) {
node->id = bank->id;
node->cpus = virBitmapNewCopy(bank->cpus);
VIR_APPEND_ELEMENT(caps->host.memBW.nodes, caps->host.memBW.nnodes, node);
}
virCapsHostMemBWNodeFree(node);
node = NULL;
}
if (virResctrlInfoGetMonitorPrefix(caps->host.resctrl, prefix,
&caps->host.memBW.monitor) < 0)
goto cleanup;
ret = 0;
cleanup:
virCapsHostMemBWNodeFree(node);
return ret;
}
int
virCapabilitiesInitCaches(virCaps *caps)
{
size_t i = 0;
virBitmap *cpus = NULL;
ssize_t pos = -1;
int ret = -1;
char *path = NULL;
char *type = NULL;
struct dirent *ent = NULL;
virCapsHostCacheBank *bank = NULL;
const virResctrlMonitorType montype = VIR_RESCTRL_MONITOR_TYPE_CACHE;
const char *prefix = virResctrlMonitorPrefixTypeToString(montype);
/* Minimum level to expose in capabilities. Can be lowered or removed (with
* the appropriate code below), but should not be increased, because we'd
* lose information. */
const int cache_min_level = 3;
if (virCapabilitiesInitResctrl(caps) < 0)
return -1;
/* offline CPUs don't provide cache info */
if (virFileReadValueBitmap(&cpus, "%s/cpu/online", SYSFS_SYSTEM_PATH) < 0)
return -1;
while ((pos = virBitmapNextSetBit(cpus, pos)) >= 0) {
int rv = -1;
g_autoptr(DIR) dirp = NULL;
VIR_FREE(path);
path = g_strdup_printf("%s/cpu/cpu%zd/cache/", SYSFS_SYSTEM_PATH, pos);
rv = virDirOpenIfExists(&dirp, path);
if (rv < 0)
goto cleanup;
if (!dirp)
continue;
while ((rv = virDirRead(dirp, &ent, path)) > 0) {
int kernel_type;
unsigned int level;
if (!STRPREFIX(ent->d_name, "index"))
continue;
if (virFileReadValueUint(&level,
"%s/cpu/cpu%zd/cache/%s/level",
SYSFS_SYSTEM_PATH, pos, ent->d_name) < 0)
goto cleanup;
if (level < cache_min_level)
continue;
bank = g_new0(virCapsHostCacheBank, 1);
bank->level = level;
if (virFileReadValueUint(&bank->id,
"%s/cpu/cpu%zd/cache/%s/id",
SYSFS_SYSTEM_PATH, pos, ent->d_name) < 0)
goto cleanup;
if (virFileReadValueUint(&bank->level,
"%s/cpu/cpu%zd/cache/%s/level",
SYSFS_SYSTEM_PATH, pos, ent->d_name) < 0)
goto cleanup;
if (virFileReadValueString(&type,
"%s/cpu/cpu%zd/cache/%s/type",
SYSFS_SYSTEM_PATH, pos, ent->d_name) < 0)
goto cleanup;
if (virFileReadValueScaledInt(&bank->size,
"%s/cpu/cpu%zd/cache/%s/size",
SYSFS_SYSTEM_PATH, pos, ent->d_name) < 0)
goto cleanup;
if (virFileReadValueBitmap(&bank->cpus,
"%s/cpu/cpu%zd/cache/%s/shared_cpu_list",
SYSFS_SYSTEM_PATH, pos, ent->d_name) < 0)
goto cleanup;
kernel_type = virCacheKernelTypeFromString(type);
if (kernel_type < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Unknown cache type '%s'"), type);
goto cleanup;
}
bank->type = kernel_type;
VIR_FREE(type);
for (i = 0; i < caps->host.cache.nbanks; i++) {
if (virCapsHostCacheBankEquals(bank, caps->host.cache.banks[i]))
break;
}
if (i == caps->host.cache.nbanks) {
/* If it is a new cache, then update its resctrl information. */
if (virResctrlInfoGetCache(caps->host.resctrl,
bank->level,
bank->size,
&bank->ncontrols,
&bank->controls) < 0)
goto cleanup;
VIR_APPEND_ELEMENT(caps->host.cache.banks, caps->host.cache.nbanks, bank);
}
virCapsHostCacheBankFree(bank);
bank = NULL;
}
if (rv < 0)
goto cleanup;
}
/* Sort the array in order for the tests to be predictable. This way we can
* still traverse the directory instead of guessing names (in case there is
* 'index1' and 'index3' but no 'index2'). */
if (caps->host.cache.banks) {
qsort(caps->host.cache.banks, caps->host.cache.nbanks,
sizeof(*caps->host.cache.banks), virCapsHostCacheBankSorter);
}
if (virCapabilitiesInitResctrlMemory(caps) < 0)
goto cleanup;
if (virResctrlInfoGetMonitorPrefix(caps->host.resctrl, prefix,
&caps->host.cache.monitor) < 0)
goto cleanup;
ret = 0;
cleanup:
VIR_FREE(type);
VIR_FREE(path);
virCapsHostCacheBankFree(bank);
virBitmapFree(cpus);
return ret;
}
void
virCapabilitiesHostInitIOMMU(virCaps *caps)
{
caps->host.iommu = virHostHasIOMMU();
}