qemu_domain.c: NUMA CPUs auto-fill for incomplete topologies

Libvirt allows the user to define an incomplete NUMA topology, where
the sum of all CPUs in each cell is less than the total of VCPUs.
What ends up happening is that QEMU allocates the non-enumerated CPUs
in the first NUMA node. This behavior is being flagged as 'to be
deprecated' at least since QEMU commit ec78f8114bc4 ("numa: use
possible_cpus for not mapped CPUs check").

In [1], Maxiwell suggested that we forbid the user to define such
topologies. In his review [2], Peter Krempa pointed out that we can't
break existing guests, and suggested that Libvirt should emulate the
QEMU behavior of putting the remaining vCPUs in the first NUMA node
in these cases.

This patch implements Peter Krempa's suggestion. Since we're going
to most likely end up with disjointed NUMA configuration in node 0
after the auto-fill, we're making auto-fill dependent on QEMU_CAPS_NUMA.

A following patch will update the documentation not just to inform
about the auto-fill mechanic with incomplete NUMA topologies, but also
to discourage the user to create such topologies in the future. This
approach also makes Libvirt independent of whether QEMU changes
its current behavior since we're either auto-filling the CPUs in
node 0 or the user (hopefully) is aware that incomplete topologies,
although supported in Libvirt, are to be avoided.

[1] https://www.redhat.com/archives/libvir-list/2019-June/msg00224.html
[2] https://www.redhat.com/archives/libvir-list/2019-June/msg00263.html

Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
This commit is contained in:
Daniel Henrique Barboza 2020-06-10 15:35:51 -03:00 committed by Michal Privoznik
parent 711a868861
commit baca59a538
3 changed files with 57 additions and 1 deletions

View File

@ -4963,6 +4963,50 @@ qemuDomainDefTsegPostParse(virDomainDefPtr def,
} }
/**
* qemuDomainDefNumaCPUsRectify:
* @numa: pointer to numa definition
* @maxCpus: number of CPUs this numa is supposed to have
*
* This function emulates the (to be deprecated) behavior of filling
* up in node0 with the remaining CPUs, in case of an incomplete NUMA
* setup, up to getVcpusMax.
*
* Returns: 0 on success, -1 on error
*/
int
qemuDomainDefNumaCPUsRectify(virDomainDefPtr def, virQEMUCapsPtr qemuCaps)
{
unsigned int vcpusMax, numacpus;
/* QEMU_CAPS_NUMA tells us if QEMU is able to handle disjointed
* NUMA CPU ranges. The filling process will create a disjointed
* setup in node0 most of the time. Do not proceed if QEMU
* can't handle it.*/
if (virDomainNumaGetNodeCount(def->numa) == 0 ||
!virQEMUCapsGet(qemuCaps, QEMU_CAPS_NUMA))
return 0;
vcpusMax = virDomainDefGetVcpusMax(def);
numacpus = virDomainNumaGetCPUCountTotal(def->numa);
if (numacpus < vcpusMax) {
if (virDomainNumaFillCPUsInNode(def->numa, 0, vcpusMax) < 0)
return -1;
}
return 0;
}
static int
qemuDomainDefNumaCPUsPostParse(virDomainDefPtr def,
virQEMUCapsPtr qemuCaps)
{
return qemuDomainDefNumaCPUsRectify(def, qemuCaps);
}
static int static int
qemuDomainDefPostParseBasic(virDomainDefPtr def, qemuDomainDefPostParseBasic(virDomainDefPtr def,
void *opaque G_GNUC_UNUSED) void *opaque G_GNUC_UNUSED)
@ -5049,6 +5093,9 @@ qemuDomainDefPostParse(virDomainDefPtr def,
if (qemuDomainDefTsegPostParse(def, qemuCaps) < 0) if (qemuDomainDefTsegPostParse(def, qemuCaps) < 0)
return -1; return -1;
if (qemuDomainDefNumaCPUsPostParse(def, qemuCaps) < 0)
return -1;
return 0; return 0;
} }

View File

@ -1297,3 +1297,7 @@ qemuDomainInitializePflashStorageSource(virDomainObjPtr vm);
bool bool
qemuDomainDiskBlockJobIsSupported(virDomainObjPtr vm, qemuDomainDiskBlockJobIsSupported(virDomainObjPtr vm,
virDomainDiskDefPtr disk); virDomainDiskDefPtr disk);
int
qemuDomainDefNumaCPUsRectify(virDomainDefPtr def,
virQEMUCapsPtr qemuCaps);

View File

@ -4994,11 +4994,13 @@ qemuDomainSetVcpusAgent(virDomainObjPtr vm,
static int static int
qemuDomainSetVcpusMax(virQEMUDriverPtr driver, qemuDomainSetVcpusMax(virQEMUDriverPtr driver,
virDomainObjPtr vm,
virDomainDefPtr def, virDomainDefPtr def,
virDomainDefPtr persistentDef, virDomainDefPtr persistentDef,
unsigned int nvcpus) unsigned int nvcpus)
{ {
g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
qemuDomainObjPrivatePtr priv = vm->privateData;
unsigned int topologycpus; unsigned int topologycpus;
if (def) { if (def) {
@ -5029,6 +5031,9 @@ qemuDomainSetVcpusMax(virQEMUDriverPtr driver,
if (virDomainDefSetVcpusMax(persistentDef, nvcpus, driver->xmlopt) < 0) if (virDomainDefSetVcpusMax(persistentDef, nvcpus, driver->xmlopt) < 0)
return -1; return -1;
if (qemuDomainDefNumaCPUsRectify(persistentDef, priv->qemuCaps) < 0)
return -1;
if (virDomainDefSave(persistentDef, driver->xmlopt, cfg->configDir) < 0) if (virDomainDefSave(persistentDef, driver->xmlopt, cfg->configDir) < 0)
return -1; return -1;
@ -5076,7 +5081,7 @@ qemuDomainSetVcpusFlags(virDomainPtr dom,
if (useAgent) if (useAgent)
ret = qemuDomainSetVcpusAgent(vm, nvcpus); ret = qemuDomainSetVcpusAgent(vm, nvcpus);
else if (flags & VIR_DOMAIN_VCPU_MAXIMUM) else if (flags & VIR_DOMAIN_VCPU_MAXIMUM)
ret = qemuDomainSetVcpusMax(driver, def, persistentDef, nvcpus); ret = qemuDomainSetVcpusMax(driver, vm, def, persistentDef, nvcpus);
else else
ret = qemuDomainSetVcpusInternal(driver, vm, def, persistentDef, ret = qemuDomainSetVcpusInternal(driver, vm, def, persistentDef,
nvcpus, hotpluggable); nvcpus, hotpluggable);