qemu, hypervisor: refactor some cgroup mgmt methods

Refactor some cgroup management methods from qemu into hypervisor.
These methods will be shared with ch driver for cgroup management.

Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com>
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
This commit is contained in:
Praveen K Paladugu 2022-01-25 16:19:53 +00:00 committed by Michal Privoznik
parent 50a40c8d52
commit 788e2b58cb
8 changed files with 568 additions and 445 deletions

View File

@ -22,11 +22,12 @@
#include "domain_cgroup.h"
#include "domain_driver.h"
#include "util/virnuma.h"
#include "virlog.h"
#include "virutil.h"
#define VIR_FROM_THIS VIR_FROM_DOMAIN
VIR_LOG_INIT("domain.cgroup");
int
virDomainCgroupSetupBlkio(virCgroup *cgroup, virDomainBlkiotune blkio)
@ -269,3 +270,447 @@ virDomainCgroupSetMemoryLimitParameters(virCgroup *cgroup,
return 0;
}
int
virDomainCgroupSetupBlkioCgroup(virDomainObj *vm,
virCgroup *cgroup)
{
if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_BLKIO)) {
if (vm->def->blkio.weight || vm->def->blkio.ndevices) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("Block I/O tuning is not available on this host"));
return -1;
}
return 0;
}
return virDomainCgroupSetupBlkio(cgroup, vm->def->blkio);
}
int
virDomainCgroupSetupMemoryCgroup(virDomainObj *vm,
virCgroup *cgroup)
{
if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_MEMORY)) {
if (virMemoryLimitIsSet(vm->def->mem.hard_limit) ||
virMemoryLimitIsSet(vm->def->mem.soft_limit) ||
virMemoryLimitIsSet(vm->def->mem.swap_hard_limit)) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("Memory cgroup is not available on this host"));
return -1;
}
return 0;
}
return virDomainCgroupSetupMemtune(cgroup, vm->def->mem);
}
int
virDomainCgroupSetupCpusetCgroup(virCgroup *cgroup)
{
if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
return 0;
if (virCgroupSetCpusetMemoryMigrate(cgroup, true) < 0)
return -1;
return 0;
}
int
virDomainCgroupSetupCpuCgroup(virDomainObj *vm,
virCgroup *cgroup)
{
if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
if (vm->def->cputune.sharesSpecified) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("CPU tuning is not available on this host"));
return -1;
}
return 0;
}
if (vm->def->cputune.sharesSpecified) {
if (virCgroupSetCpuShares(cgroup, vm->def->cputune.shares) < 0)
return -1;
}
return 0;
}
int
virDomainCgroupInitCgroup(const char *prefix,
virDomainObj *vm,
size_t nnicindexes,
int *nicindexes,
virCgroup **cgroup,
int cgroupControllers,
unsigned int maxThreadsPerProc,
bool privileged,
char *machineName)
{
if (!privileged)
return 0;
if (!virCgroupAvailable())
return 0;
g_clear_pointer(cgroup, virCgroupFree);
if (!vm->def->resource)
vm->def->resource = g_new0(virDomainResourceDef, 1);
if (!vm->def->resource->partition)
vm->def->resource->partition = g_strdup("/machine");
if (!g_path_is_absolute(vm->def->resource->partition)) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_("Resource partition '%s' must start with '/'"),
vm->def->resource->partition);
return -1;
}
if (virCgroupNewMachine(machineName,
prefix,
vm->def->uuid,
NULL,
vm->pid,
false,
nnicindexes, nicindexes,
vm->def->resource->partition,
cgroupControllers,
maxThreadsPerProc,
cgroup) < 0) {
if (virCgroupNewIgnoreError())
return 0;
return -1;
}
return 0;
}
void
virDomainCgroupRestoreCgroupState(virDomainObj *vm,
virCgroup *cgroup)
{
g_autofree char *mem_mask = NULL;
size_t i = 0;
g_autoptr(virBitmap) all_nodes = NULL;
if (!virNumaIsAvailable() ||
!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
return;
if (!(all_nodes = virNumaGetHostMemoryNodeset()))
goto error;
if (!(mem_mask = virBitmapFormat(all_nodes)))
goto error;
if (virCgroupHasEmptyTasks(cgroup, VIR_CGROUP_CONTROLLER_CPUSET) <= 0)
goto error;
if (virCgroupSetCpusetMems(cgroup, mem_mask) < 0)
goto error;
for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) {
virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i);
if (!vcpu->online)
continue;
if (virDomainCgroupRestoreCgroupThread(cgroup,
VIR_CGROUP_THREAD_VCPU,
i) < 0)
return;
}
for (i = 0; i < vm->def->niothreadids; i++) {
if (virDomainCgroupRestoreCgroupThread(cgroup,
VIR_CGROUP_THREAD_IOTHREAD,
vm->def->iothreadids[i]->iothread_id) < 0)
return;
}
if (virDomainCgroupRestoreCgroupThread(cgroup,
VIR_CGROUP_THREAD_EMULATOR,
0) < 0)
return;
return;
error:
virResetLastError();
VIR_DEBUG("Couldn't restore cgroups to meaningful state");
return;
}
int
virDomainCgroupRestoreCgroupThread(virCgroup *cgroup,
virCgroupThreadName thread,
int id)
{
g_autoptr(virCgroup) cgroup_temp = NULL;
g_autofree char *nodeset = NULL;
if (virCgroupNewThread(cgroup, thread, id, false, &cgroup_temp) < 0)
return -1;
if (virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0)
return -1;
if (virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0)
return -1;
if (virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0)
return -1;
return 0;
}
int
virDomainCgroupConnectCgroup(const char *prefix,
virDomainObj *vm,
virCgroup **cgroup,
int cgroupControllers,
bool privileged,
char *machineName)
{
if (privileged)
return 0;
if (!virCgroupAvailable())
return 0;
g_clear_pointer(cgroup, virCgroupFree);
if (virCgroupNewDetectMachine(vm->def->name,
prefix,
vm->pid,
cgroupControllers,
machineName,
cgroup) < 0)
return -1;
virDomainCgroupRestoreCgroupState(vm, *cgroup);
return 0;
}
int
virDomainCgroupSetupCgroup(const char *prefix,
virDomainObj *vm,
size_t nnicindexes,
int *nicindexes,
virCgroup **cgroup,
int cgroupControllers,
unsigned int maxThreadsPerProc,
bool privileged,
char *machineName)
{
if (!vm->pid) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Cannot setup cgroups until process is started"));
return -1;
}
if (virDomainCgroupInitCgroup(prefix,
vm,
nnicindexes,
nicindexes,
cgroup,
cgroupControllers,
maxThreadsPerProc,
privileged,
machineName) < 0)
return -1;
if (!*cgroup)
return 0;
if (virDomainCgroupSetupBlkioCgroup(vm, *cgroup) < 0)
return -1;
if (virDomainCgroupSetupMemoryCgroup(vm, *cgroup) < 0)
return -1;
if (virDomainCgroupSetupCpuCgroup(vm, *cgroup) < 0)
return -1;
if (virDomainCgroupSetupCpusetCgroup(*cgroup) < 0)
return -1;
return 0;
}
int
virDomainCgroupSetupVcpuBW(virCgroup *cgroup,
unsigned long long period,
long long quota)
{
return virCgroupSetupCpuPeriodQuota(cgroup, period, quota);
}
int
virDomainCgroupSetupCpusetCpus(virCgroup *cgroup,
virBitmap *cpumask)
{
return virCgroupSetupCpusetCpus(cgroup, cpumask);
}
int
virDomainCgroupSetupGlobalCpuCgroup(virDomainObj *vm,
virCgroup *cgroup,
virBitmap *autoNodeset)
{
unsigned long long period = vm->def->cputune.global_period;
long long quota = vm->def->cputune.global_quota;
g_autofree char *mem_mask = NULL;
virDomainNumatuneMemMode mem_mode;
if ((period || quota) &&
!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("cgroup cpu is required for scheduler tuning"));
return -1;
}
/*
* If CPU cgroup controller is not initialized here, then we need
* neither period nor quota settings. And if CPUSET controller is
* not initialized either, then there's nothing to do anyway.
*/
if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU) &&
!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
return 0;
if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
autoNodeset, &mem_mask, -1) < 0)
return -1;
if (period || quota) {
if (virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0)
return -1;
}
return 0;
}
int
virDomainCgroupRemoveCgroup(virDomainObj *vm,
virCgroup *cgroup,
char *machineName)
{
if (cgroup == NULL)
return 0; /* Not supported, so claim success */
if (virCgroupTerminateMachine(machineName) < 0) {
if (!virCgroupNewIgnoreError())
VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name);
}
return virCgroupRemove(cgroup);
}
void
virDomainCgroupEmulatorAllNodesDataFree(virCgroupEmulatorAllNodesData *data)
{
if (!data)
return;
virCgroupFree(data->emulatorCgroup);
g_free(data->emulatorMemMask);
g_free(data);
}
/**
* virDomainCgroupEmulatorAllNodesAllow:
* @cgroup: domain cgroup pointer
* @retData: filled with structure used to roll back the operation
*
* Allows all NUMA nodes for the cloud hypervisor thread temporarily. This is
* necessary when hotplugging cpus since it requires memory allocated in the
* DMA region. Afterwards the operation can be reverted by
* virDomainCgroupEmulatorAllNodesRestore.
*
* Returns 0 on success -1 on error
*/
int
virDomainCgroupEmulatorAllNodesAllow(virCgroup *cgroup,
virCgroupEmulatorAllNodesData **retData)
{
virCgroupEmulatorAllNodesData *data = NULL;
g_autofree char *all_nodes_str = NULL;
g_autoptr(virBitmap) all_nodes = NULL;
int ret = -1;
if (!virNumaIsAvailable() ||
!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
return 0;
if (!(all_nodes = virNumaGetHostMemoryNodeset()))
goto cleanup;
if (!(all_nodes_str = virBitmapFormat(all_nodes)))
goto cleanup;
data = g_new0(virCgroupEmulatorAllNodesData, 1);
if (virCgroupNewThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0,
false, &data->emulatorCgroup) < 0)
goto cleanup;
if (virCgroupGetCpusetMems(data->emulatorCgroup, &data->emulatorMemMask) < 0
|| virCgroupSetCpusetMems(data->emulatorCgroup, all_nodes_str) < 0)
goto cleanup;
*retData = g_steal_pointer(&data);
ret = 0;
cleanup:
virDomainCgroupEmulatorAllNodesDataFree(data);
return ret;
}
/**
* virDomainCgroupEmulatorAllNodesRestore:
* @data: data structure created by virDomainCgroupEmulatorAllNodesAllow
*
* Rolls back the setting done by virDomainCgroupEmulatorAllNodesAllow and frees the
* associated data.
*/
void
virDomainCgroupEmulatorAllNodesRestore(virCgroupEmulatorAllNodesData *data)
{
virError *err;
if (!data)
return;
virErrorPreserveLast(&err);
virCgroupSetCpusetMems(data->emulatorCgroup, data->emulatorMemMask);
virErrorRestore(&err);
virDomainCgroupEmulatorAllNodesDataFree(data);
}

View File

@ -23,6 +23,11 @@
#include "vircgroup.h"
#include "domain_conf.h"
typedef struct _virCgroupEmulatorAllNodesData virCgroupEmulatorAllNodesData;
struct _virCgroupEmulatorAllNodesData {
virCgroup *emulatorCgroup;
char *emulatorMemMask;
};
int virDomainCgroupSetupBlkio(virCgroup *cgroup, virDomainBlkiotune blkio);
int virDomainCgroupSetupMemtune(virCgroup *cgroup, virDomainMemtune mem);
@ -36,3 +41,70 @@ int virDomainCgroupSetMemoryLimitParameters(virCgroup *cgroup,
virDomainDef *persistentDef,
virTypedParameterPtr params,
int nparams);
int
virDomainCgroupSetupBlkioCgroup(virDomainObj *vm,
virCgroup *cgroup);
int
virDomainCgroupSetupMemoryCgroup(virDomainObj *vm,
virCgroup *cgroup);
int
virDomainCgroupSetupCpusetCgroup(virCgroup *cgroup);
int
virDomainCgroupSetupCpuCgroup(virDomainObj *vm,
virCgroup *cgroup);
int
virDomainCgroupInitCgroup(const char *prefix,
virDomainObj *vm,
size_t nnicindexes,
int *nicindexes,
virCgroup **cgroup,
int cgroupControllers,
unsigned int maxThreadsPerProc,
bool privileged,
char *machineName);
void
virDomainCgroupRestoreCgroupState(virDomainObj *vm,
virCgroup *cgroup);
int
virDomainCgroupConnectCgroup(const char *prefix,
virDomainObj *vm,
virCgroup **cgroup,
int cgroupControllers,
bool privileged,
char *machineName);
int
virDomainCgroupSetupCgroup(const char *prefix,
virDomainObj *vm,
size_t nnicindexes,
int *nicindexes,
virCgroup **cgroup,
int cgroupControllers,
unsigned int maxThreadsPerProc,
bool privileged,
char *machineName);
void
virDomainCgroupEmulatorAllNodesDataFree(virCgroupEmulatorAllNodesData *data);
int
virDomainCgroupEmulatorAllNodesAllow(virCgroup *cgroup,
virCgroupEmulatorAllNodesData **retData);
void
virDomainCgroupEmulatorAllNodesRestore(virCgroupEmulatorAllNodesData *data);
int
virDomainCgroupSetupVcpuBW(virCgroup *cgroup,
unsigned long long period,
long long quota);
int
virDomainCgroupSetupCpusetCpus(virCgroup *cgroup,
virBitmap *cpumask);
int
virDomainCgroupSetupGlobalCpuCgroup(virDomainObj *vm,
virCgroup *cgroup,
virBitmap *autoNodeset);
int
virDomainCgroupRemoveCgroup(virDomainObj *vm,
virCgroup *cgroup,
char *machineName);
int
virDomainCgroupRestoreCgroupThread(virCgroup *cgroup,
virCgroupThreadName thread,
int id);

View File

@ -1544,11 +1544,23 @@ virSetConnectStorage;
# hypervisor/domain_cgroup.h
virDomainCgroupConnectCgroup;
virDomainCgroupEmulatorAllNodesAllow;
virDomainCgroupEmulatorAllNodesRestore;
virDomainCgroupInitCgroup;
virDomainCgroupRemoveCgroup;
virDomainCgroupSetMemoryLimitParameters;
virDomainCgroupSetupBlkio;
virDomainCgroupSetupBlkioCgroup;
virDomainCgroupSetupCgroup;
virDomainCgroupSetupCpuCgroup;
virDomainCgroupSetupCpusetCgroup;
virDomainCgroupSetupCpusetCpus;
virDomainCgroupSetupDomainBlkioParameters;
virDomainCgroupSetupGlobalCpuCgroup;
virDomainCgroupSetupMemoryCgroup;
virDomainCgroupSetupMemtune;
virDomainCgroupSetupVcpuBW;
# hypervisor/domain_driver.h
virDomainDriverAddIOThreadCheck;

View File

@ -593,46 +593,6 @@ qemuSetupVideoCgroup(virDomainObj *vm,
return ret;
}
static int
qemuSetupBlkioCgroup(virDomainObj *vm)
{
qemuDomainObjPrivate *priv = vm->privateData;
if (!virCgroupHasController(priv->cgroup,
VIR_CGROUP_CONTROLLER_BLKIO)) {
if (vm->def->blkio.weight || vm->def->blkio.ndevices) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("Block I/O tuning is not available on this host"));
return -1;
}
return 0;
}
return virDomainCgroupSetupBlkio(priv->cgroup, vm->def->blkio);
}
static int
qemuSetupMemoryCgroup(virDomainObj *vm)
{
qemuDomainObjPrivate *priv = vm->privateData;
if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_MEMORY)) {
if (virMemoryLimitIsSet(vm->def->mem.hard_limit) ||
virMemoryLimitIsSet(vm->def->mem.soft_limit) ||
virMemoryLimitIsSet(vm->def->mem.swap_hard_limit)) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("Memory cgroup is not available on this host"));
return -1;
}
return 0;
}
return virDomainCgroupSetupMemtune(priv->cgroup, vm->def->mem);
}
static int
qemuSetupFirmwareCgroup(virDomainObj *vm)
{
@ -861,44 +821,6 @@ qemuSetupDevicesCgroup(virDomainObj *vm)
}
static int
qemuSetupCpusetCgroup(virDomainObj *vm)
{
qemuDomainObjPrivate *priv = vm->privateData;
if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
return 0;
if (virCgroupSetCpusetMemoryMigrate(priv->cgroup, true) < 0)
return -1;
return 0;
}
static int
qemuSetupCpuCgroup(virDomainObj *vm)
{
qemuDomainObjPrivate *priv = vm->privateData;
if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
if (vm->def->cputune.sharesSpecified) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("CPU tuning is not available on this host"));
return -1;
}
return 0;
}
if (vm->def->cputune.sharesSpecified) {
if (virCgroupSetCpuShares(priv->cgroup, vm->def->cputune.shares) < 0)
return -1;
}
return 0;
}
static int
qemuSetupCgroupAppid(virDomainObj *vm)
{
@ -927,174 +849,24 @@ qemuSetupCgroupAppid(virDomainObj *vm)
}
static int
qemuInitCgroup(virDomainObj *vm,
size_t nnicindexes,
int *nicindexes)
{
qemuDomainObjPrivate *priv = vm->privateData;
g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
if (!priv->driver->privileged)
return 0;
if (!virCgroupAvailable())
return 0;
virCgroupFree(priv->cgroup);
priv->cgroup = NULL;
if (!vm->def->resource)
vm->def->resource = g_new0(virDomainResourceDef, 1);
if (!vm->def->resource->partition)
vm->def->resource->partition = g_strdup("/machine");
if (!g_path_is_absolute(vm->def->resource->partition)) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_("Resource partition '%s' must start with '/'"),
vm->def->resource->partition);
return -1;
}
if (virCgroupNewMachine(priv->machineName,
"qemu",
vm->def->uuid,
NULL,
vm->pid,
false,
nnicindexes, nicindexes,
vm->def->resource->partition,
cfg->cgroupControllers,
cfg->maxThreadsPerProc,
&priv->cgroup) < 0) {
if (virCgroupNewIgnoreError())
return 0;
return -1;
}
return 0;
}
static int
qemuRestoreCgroupThread(virCgroup *cgroup,
virCgroupThreadName thread,
int id)
{
g_autoptr(virCgroup) cgroup_temp = NULL;
g_autofree char *nodeset = NULL;
if (virCgroupNewThread(cgroup, thread, id, false, &cgroup_temp) < 0)
return -1;
if (virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0)
return -1;
if (virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0)
return -1;
if (virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0)
return -1;
return 0;
}
static void
qemuRestoreCgroupState(virDomainObj *vm)
{
g_autofree char *mem_mask = NULL;
qemuDomainObjPrivate *priv = vm->privateData;
size_t i = 0;
g_autoptr(virBitmap) all_nodes = NULL;
if (!virNumaIsAvailable() ||
!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
return;
if (!(all_nodes = virNumaGetHostMemoryNodeset()))
goto error;
if (!(mem_mask = virBitmapFormat(all_nodes)))
goto error;
if (virCgroupHasEmptyTasks(priv->cgroup,
VIR_CGROUP_CONTROLLER_CPUSET) <= 0)
goto error;
if (virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0)
goto error;
for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) {
virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i);
if (!vcpu->online)
continue;
if (qemuRestoreCgroupThread(priv->cgroup,
VIR_CGROUP_THREAD_VCPU, i) < 0)
return;
}
for (i = 0; i < vm->def->niothreadids; i++) {
if (qemuRestoreCgroupThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD,
vm->def->iothreadids[i]->iothread_id) < 0)
return;
}
if (qemuRestoreCgroupThread(priv->cgroup,
VIR_CGROUP_THREAD_EMULATOR, 0) < 0)
return;
return;
error:
virResetLastError();
VIR_DEBUG("Couldn't restore cgroups to meaningful state");
return;
}
int
qemuConnectCgroup(virDomainObj *vm)
{
qemuDomainObjPrivate *priv = vm->privateData;
g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
if (!priv->driver->privileged)
return 0;
if (!virCgroupAvailable())
return 0;
virCgroupFree(priv->cgroup);
priv->cgroup = NULL;
if (virCgroupNewDetectMachine(vm->def->name,
"qemu",
vm->pid,
cfg->cgroupControllers,
priv->machineName,
&priv->cgroup) < 0)
return -1;
qemuRestoreCgroupState(vm);
return 0;
}
int
qemuSetupCgroup(virDomainObj *vm,
size_t nnicindexes,
int *nicindexes)
{
qemuDomainObjPrivate *priv = vm->privateData;
g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
if (!vm->pid) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Cannot setup cgroups until process is started"));
return -1;
}
if (virDomainCgroupSetupCgroup("qemu",
vm,
nnicindexes,
nicindexes,
&priv->cgroup,
cfg->cgroupControllers,
cfg->maxThreadsPerProc,
priv->driver->privileged,
priv->machineName) < 0)
if (qemuInitCgroup(vm, nnicindexes, nicindexes) < 0)
return -1;
if (!priv->cgroup)
@ -1103,41 +875,12 @@ qemuSetupCgroup(virDomainObj *vm,
if (qemuSetupDevicesCgroup(vm) < 0)
return -1;
if (qemuSetupBlkioCgroup(vm) < 0)
return -1;
if (qemuSetupMemoryCgroup(vm) < 0)
return -1;
if (qemuSetupCpuCgroup(vm) < 0)
return -1;
if (qemuSetupCpusetCgroup(vm) < 0)
return -1;
if (qemuSetupCgroupAppid(vm) < 0)
return -1;
return 0;
}
int
qemuSetupCgroupVcpuBW(virCgroup *cgroup,
unsigned long long period,
long long quota)
{
return virCgroupSetupCpuPeriodQuota(cgroup, period, quota);
}
int
qemuSetupCgroupCpusetCpus(virCgroup *cgroup,
virBitmap *cpumask)
{
return virCgroupSetupCpusetCpus(cgroup, cpumask);
}
int
qemuSetupCgroupForExtDevices(virDomainObj *vm,
virQEMUDriver *driver)
@ -1164,148 +907,3 @@ qemuSetupCgroupForExtDevices(virDomainObj *vm,
return qemuExtDevicesSetupCgroup(driver, vm, cgroup_temp);
}
int
qemuSetupGlobalCpuCgroup(virDomainObj *vm)
{
qemuDomainObjPrivate *priv = vm->privateData;
unsigned long long period = vm->def->cputune.global_period;
long long quota = vm->def->cputune.global_quota;
g_autofree char *mem_mask = NULL;
virDomainNumatuneMemMode mem_mode;
if ((period || quota) &&
!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("cgroup cpu is required for scheduler tuning"));
return -1;
}
/*
* If CPU cgroup controller is not initialized here, then we need
* neither period nor quota settings. And if CPUSET controller is
* not initialized either, then there's nothing to do anyway.
*/
if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) &&
!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
return 0;
if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
priv->autoNodeset,
&mem_mask, -1) < 0)
return -1;
if (period || quota) {
if (qemuSetupCgroupVcpuBW(priv->cgroup, period, quota) < 0)
return -1;
}
return 0;
}
int
qemuRemoveCgroup(virDomainObj *vm)
{
qemuDomainObjPrivate *priv = vm->privateData;
if (priv->cgroup == NULL)
return 0; /* Not supported, so claim success */
if (virCgroupTerminateMachine(priv->machineName) < 0) {
if (!virCgroupNewIgnoreError())
VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name);
}
return virCgroupRemove(priv->cgroup);
}
static void
qemuCgroupEmulatorAllNodesDataFree(qemuCgroupEmulatorAllNodesData *data)
{
if (!data)
return;
virCgroupFree(data->emulatorCgroup);
g_free(data->emulatorMemMask);
g_free(data);
}
/**
* qemuCgroupEmulatorAllNodesAllow:
* @cgroup: domain cgroup pointer
* @retData: filled with structure used to roll back the operation
*
* Allows all NUMA nodes for the qemu emulator thread temporarily. This is
* necessary when hotplugging cpus since it requires memory allocated in the
* DMA region. Afterwards the operation can be reverted by
* qemuCgroupEmulatorAllNodesRestore.
*
* Returns 0 on success -1 on error
*/
int
qemuCgroupEmulatorAllNodesAllow(virCgroup *cgroup,
qemuCgroupEmulatorAllNodesData **retData)
{
qemuCgroupEmulatorAllNodesData *data = NULL;
g_autofree char *all_nodes_str = NULL;
g_autoptr(virBitmap) all_nodes = NULL;
int ret = -1;
if (!virNumaIsAvailable() ||
!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
return 0;
if (!(all_nodes = virNumaGetHostMemoryNodeset()))
goto cleanup;
if (!(all_nodes_str = virBitmapFormat(all_nodes)))
goto cleanup;
data = g_new0(qemuCgroupEmulatorAllNodesData, 1);
if (virCgroupNewThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0,
false, &data->emulatorCgroup) < 0)
goto cleanup;
if (virCgroupGetCpusetMems(data->emulatorCgroup, &data->emulatorMemMask) < 0 ||
virCgroupSetCpusetMems(data->emulatorCgroup, all_nodes_str) < 0)
goto cleanup;
*retData = g_steal_pointer(&data);
ret = 0;
cleanup:
qemuCgroupEmulatorAllNodesDataFree(data);
return ret;
}
/**
* qemuCgroupEmulatorAllNodesRestore:
* @data: data structure created by qemuCgroupEmulatorAllNodesAllow
*
* Rolls back the setting done by qemuCgroupEmulatorAllNodesAllow and frees the
* associated data.
*/
void
qemuCgroupEmulatorAllNodesRestore(qemuCgroupEmulatorAllNodesData *data)
{
virErrorPtr err;
if (!data)
return;
virErrorPreserveLast(&err);
virCgroupSetCpusetMems(data->emulatorCgroup, data->emulatorMemMask);
virErrorRestore(&err);
qemuCgroupEmulatorAllNodesDataFree(data);
}

View File

@ -56,18 +56,11 @@ int qemuSetupChardevCgroup(virDomainObj *vm,
virDomainChrDef *dev);
int qemuTeardownChardevCgroup(virDomainObj *vm,
virDomainChrDef *dev);
int qemuConnectCgroup(virDomainObj *vm);
int qemuSetupCgroup(virDomainObj *vm,
size_t nnicindexes,
int *nicindexes);
int qemuSetupCgroupVcpuBW(virCgroup *cgroup,
unsigned long long period,
long long quota);
int qemuSetupCgroupCpusetCpus(virCgroup *cgroup, virBitmap *cpumask);
int qemuSetupGlobalCpuCgroup(virDomainObj *vm);
int qemuSetupCgroupForExtDevices(virDomainObj *vm,
virQEMUDriver *driver);
int qemuRemoveCgroup(virDomainObj *vm);
typedef struct _qemuCgroupEmulatorAllNodesData qemuCgroupEmulatorAllNodesData;
struct _qemuCgroupEmulatorAllNodesData {
@ -75,8 +68,4 @@ struct _qemuCgroupEmulatorAllNodesData {
char *emulatorMemMask;
};
int qemuCgroupEmulatorAllNodesAllow(virCgroup *cgroup,
qemuCgroupEmulatorAllNodesData **data);
void qemuCgroupEmulatorAllNodesRestore(qemuCgroupEmulatorAllNodesData *data);
extern const char *const defaultDeviceACL[];

View File

@ -4419,7 +4419,7 @@ qemuDomainPinVcpuLive(virDomainObj *vm,
if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_VCPU, vcpu,
false, &cgroup_vcpu) < 0)
goto cleanup;
if (qemuSetupCgroupCpusetCpus(cgroup_vcpu, cpumap) < 0)
if (virDomainCgroupSetupCpusetCpus(cgroup_vcpu, cpumap) < 0)
goto cleanup;
}
@ -4628,7 +4628,7 @@ qemuDomainPinEmulator(virDomainPtr dom,
0, false, &cgroup_emulator) < 0)
goto endjob;
if (qemuSetupCgroupCpusetCpus(cgroup_emulator, pcpumap) < 0) {
if (virDomainCgroupSetupCpusetCpus(cgroup_emulator, pcpumap) < 0) {
virReportError(VIR_ERR_OPERATION_INVALID, "%s",
_("failed to set cpuset.cpus in cgroup"
" for emulator threads"));
@ -5025,7 +5025,7 @@ qemuDomainPinIOThread(virDomainPtr dom,
if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD,
iothread_id, false, &cgroup_iothread) < 0)
goto endjob;
if (qemuSetupCgroupCpusetCpus(cgroup_iothread, pcpumap) < 0) {
if (virDomainCgroupSetupCpusetCpus(cgroup_iothread, pcpumap) < 0) {
virReportError(VIR_ERR_OPERATION_INVALID,
_("failed to set cpuset.cpus in cgroup"
" for iothread %d"), iothread_id);
@ -8925,7 +8925,7 @@ qemuSetGlobalBWLive(virCgroup *cgroup, unsigned long long period,
if (period == 0 && quota == 0)
return 0;
if (qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0)
if (virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0)
return -1;
return 0;
@ -9120,7 +9120,7 @@ qemuSetVcpusBWLive(virDomainObj *vm, virCgroup *cgroup,
false, &cgroup_vcpu) < 0)
return -1;
if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
if (virDomainCgroupSetupVcpuBW(cgroup_vcpu, period, quota) < 0)
return -1;
}
@ -9141,7 +9141,7 @@ qemuSetEmulatorBandwidthLive(virCgroup *cgroup,
false, &cgroup_emulator) < 0)
return -1;
if (qemuSetupCgroupVcpuBW(cgroup_emulator, period, quota) < 0)
if (virDomainCgroupSetupVcpuBW(cgroup_emulator, period, quota) < 0)
return -1;
return 0;
@ -9168,7 +9168,7 @@ qemuSetIOThreadsBWLive(virDomainObj *vm, virCgroup *cgroup,
false, &cgroup_iothread) < 0)
return -1;
if (qemuSetupCgroupVcpuBW(cgroup_iothread, period, quota) < 0)
if (virDomainCgroupSetupVcpuBW(cgroup_iothread, period, quota) < 0)
return -1;
}

View File

@ -37,6 +37,7 @@
#include "qemu_snapshot.h"
#include "qemu_virtiofs.h"
#include "domain_audit.h"
#include "domain_cgroup.h"
#include "netdev_bandwidth_conf.h"
#include "domain_nwfilter.h"
#include "virlog.h"
@ -6538,11 +6539,11 @@ qemuDomainSetVcpusLive(virQEMUDriver *driver,
bool enable)
{
qemuDomainObjPrivate *priv = vm->privateData;
qemuCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
virCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
ssize_t nextvcpu = -1;
int ret = -1;
if (qemuCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
if (virDomainCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
goto cleanup;
if (enable) {
@ -6563,7 +6564,7 @@ qemuDomainSetVcpusLive(virQEMUDriver *driver,
ret = 0;
cleanup:
qemuCgroupEmulatorAllNodesRestore(emulatorCgroup);
virDomainCgroupEmulatorAllNodesRestore(emulatorCgroup);
return ret;
}

View File

@ -73,6 +73,7 @@
#include "virpidfile.h"
#include "virhostcpu.h"
#include "domain_audit.h"
#include "domain_cgroup.h"
#include "domain_nwfilter.h"
#include "domain_validate.h"
#include "locking/domain_lock.h"
@ -2685,7 +2686,7 @@ qemuProcessSetupPid(virDomainObj *vm,
if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {
if (use_cpumask &&
qemuSetupCgroupCpusetCpus(cgroup, use_cpumask) < 0)
virDomainCgroupSetupCpusetCpus(cgroup, use_cpumask) < 0)
goto cleanup;
if (mem_mask && virCgroupSetCpusetMems(cgroup, mem_mask) < 0)
@ -2694,7 +2695,7 @@ qemuProcessSetupPid(virDomainObj *vm,
}
if ((period || quota) &&
qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0)
virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0)
goto cleanup;
/* Move the thread to the sub dir */
@ -5951,7 +5952,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver,
{
unsigned int maxvcpus = virDomainDefGetVcpusMax(vm->def);
qemuDomainObjPrivate *priv = vm->privateData;
qemuCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
virCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
virDomainVcpuDef *vcpu;
qemuDomainVcpuPrivate *vcpupriv;
size_t i;
@ -5979,7 +5980,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver,
qsort(bootHotplug, nbootHotplug, sizeof(*bootHotplug),
qemuProcessVcpusSortOrder);
if (qemuCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
if (virDomainCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
goto cleanup;
for (i = 0; i < nbootHotplug; i++) {
@ -6003,7 +6004,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver,
ret = 0;
cleanup:
qemuCgroupEmulatorAllNodesRestore(emulatorCgroup);
virDomainCgroupEmulatorAllNodesRestore(emulatorCgroup);
return ret;
}
@ -6993,7 +6994,7 @@ qemuProcessPrepareHost(virQEMUDriver *driver,
/* Ensure no historical cgroup for this VM is lying around bogus
* settings */
VIR_DEBUG("Ensuring no historical cgroup is lying around");
qemuRemoveCgroup(vm);
virDomainCgroupRemoveCgroup(vm, priv->cgroup, priv->machineName);
if (g_mkdir_with_parents(cfg->logDir, 0777) < 0) {
virReportSystemError(errno,
@ -7602,7 +7603,7 @@ qemuProcessLaunch(virConnectPtr conn,
goto cleanup;
VIR_DEBUG("Setting global CPU cgroup (if required)");
if (qemuSetupGlobalCpuCgroup(vm) < 0)
if (virDomainCgroupSetupGlobalCpuCgroup(vm, priv->cgroup, priv->autoNodeset) < 0)
goto cleanup;
VIR_DEBUG("Setting vCPU tuning/settings");
@ -8201,7 +8202,7 @@ void qemuProcessStop(virQEMUDriver *driver,
}
retry:
if ((ret = qemuRemoveCgroup(vm)) < 0) {
if ((ret = virDomainCgroupRemoveCgroup(vm, priv->cgroup, priv->machineName)) < 0) {
if (ret == -EBUSY && (retries++ < 5)) {
g_usleep(200*1000);
goto retry;
@ -8760,7 +8761,12 @@ qemuProcessReconnect(void *opaque)
if (!priv->machineName)
goto error;
if (qemuConnectCgroup(obj) < 0)
if (virDomainCgroupConnectCgroup("qemu",
obj,
&priv->cgroup,
cfg->cgroupControllers,
priv->driver->privileged,
priv->machineName) < 0)
goto error;
if (qemuDomainPerfRestart(obj) < 0)