From a39f69d2bb5494d661be917956baa437d01a4d13 Mon Sep 17 00:00:00 2001 From: Osier Yang Date: Fri, 24 May 2013 17:08:28 +0800 Subject: [PATCH] qemu: Set cpuset.cpus for domain process When either "cpuset" of is specified, or the "placement" of is "auto", only setting the cpuset.mems might cause the guest starting to fail. E.g. ("placement" of both and is "auto"): 1) Related XMLs 4 2) Host NUMA topology % numactl --hardware available: 8 nodes (0-7) node 0 cpus: 0 4 8 12 16 20 24 28 node 0 size: 16374 MB node 0 free: 11899 MB node 1 cpus: 32 36 40 44 48 52 56 60 node 1 size: 16384 MB node 1 free: 15318 MB node 2 cpus: 2 6 10 14 18 22 26 30 node 2 size: 16384 MB node 2 free: 15766 MB node 3 cpus: 34 38 42 46 50 54 58 62 node 3 size: 16384 MB node 3 free: 15347 MB node 4 cpus: 3 7 11 15 19 23 27 31 node 4 size: 16384 MB node 4 free: 15041 MB node 5 cpus: 35 39 43 47 51 55 59 63 node 5 size: 16384 MB node 5 free: 15202 MB node 6 cpus: 1 5 9 13 17 21 25 29 node 6 size: 16384 MB node 6 free: 15197 MB node 7 cpus: 33 37 41 45 49 53 57 61 node 7 size: 16368 MB node 7 free: 15669 MB 4) cpuset.cpus will be set as: (from debug log) 2013-05-09 16:50:17.296+0000: 417: debug : virCgroupSetValueStr:331 : Set value '/sys/fs/cgroup/cpuset/libvirt/qemu/toy/cpuset.cpus' to '0-63' 5) The advisory nodeset got from querying numad (from debug log) 2013-05-09 16:50:17.295+0000: 417: debug : qemuProcessStart:3614 : Nodeset returned from numad: 1 6) cpuset.mems will be set as: (from debug log) 2013-05-09 16:50:17.296+0000: 417: debug : virCgroupSetValueStr:331 : Set value '/sys/fs/cgroup/cpuset/libvirt/qemu/toy/cpuset.mems' to '0-7' I.E, the domain process's memory is restricted on the first NUMA node, however, it can use all of the CPUs, which will likely cause the domain process to fail to start because of the kernel fails to allocate memory with the the memory policy as "strict". % tail -n 20 /var/log/libvirt/qemu/toy.log ... 2013-05-09 05:53:32.972+0000: 7318: debug : virCommandHandshakeChild:377 : Handshake with parent is done char device redirected to /dev/pts/2 (label charserial0) kvm_init_vcpu failed: Cannot allocate memory ... Signed-off-by: Peter Krempa --- src/qemu/qemu_cgroup.c | 53 +++++++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index fbe7e6f457..4bebd314a1 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -615,10 +615,12 @@ cleanup: static int qemuSetupCpusetCgroup(virDomainObjPtr vm, - virBitmapPtr nodemask) + virBitmapPtr nodemask, + virCapsPtr caps) { qemuDomainObjPrivatePtr priv = vm->privateData; - char *mask = NULL; + char *mem_mask = NULL; + char *cpu_mask = NULL; int rc; int ret = -1; @@ -632,17 +634,17 @@ qemuSetupCpusetCgroup(virDomainObjPtr vm, if (vm->def->numatune.memory.placement_mode == VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO) - mask = virBitmapFormat(nodemask); + mem_mask = virBitmapFormat(nodemask); else - mask = virBitmapFormat(vm->def->numatune.memory.nodemask); + mem_mask = virBitmapFormat(vm->def->numatune.memory.nodemask); - if (!mask) { + if (!mem_mask) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("failed to convert memory nodemask")); goto cleanup; } - rc = virCgroupSetCpusetMems(priv->cgroup, mask); + rc = virCgroupSetCpusetMems(priv->cgroup, mem_mask); if (rc != 0) { virReportSystemError(-rc, @@ -652,9 +654,39 @@ qemuSetupCpusetCgroup(virDomainObjPtr vm, } } + if (vm->def->cpumask || + (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO)) { + + if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) { + virBitmapPtr cpumap; + if (!(cpumap = virCapabilitiesGetCpusForNodemask(caps, nodemask))) + goto cleanup; + cpu_mask = virBitmapFormat(cpumap); + virBitmapFree(cpumap); + } else { + cpu_mask = virBitmapFormat(vm->def->cpumask); + } + + if (!cpu_mask) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("failed to convert cpu mask")); + goto cleanup; + } + + rc = virCgroupSetCpusetCpus(priv->cgroup, cpu_mask); + + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to set cpuset.cpus for domain %s"), + vm->def->name); + goto cleanup; + } + } + ret = 0; cleanup: - VIR_FREE(mask); + VIR_FREE(mem_mask); + VIR_FREE(cpu_mask); return ret; } @@ -801,6 +833,7 @@ int qemuSetupCgroup(virQEMUDriverPtr driver, virBitmapPtr nodemask) { qemuDomainObjPrivatePtr priv = vm->privateData; + virCapsPtr caps = NULL; int ret = -1; if (qemuInitCgroup(driver, vm, true) < 0) @@ -809,6 +842,9 @@ int qemuSetupCgroup(virQEMUDriverPtr driver, if (!priv->cgroup) return 0; + if (!(caps = virQEMUDriverGetCapabilities(driver, false))) + goto cleanup; + if (qemuSetupDevicesCgroup(driver, vm) < 0) goto cleanup; @@ -821,11 +857,12 @@ int qemuSetupCgroup(virQEMUDriverPtr driver, if (qemuSetupCpuCgroup(vm) < 0) goto cleanup; - if (qemuSetupCpusetCgroup(vm, nodemask) < 0) + if (qemuSetupCpusetCgroup(vm, nodemask, caps) < 0) goto cleanup; ret = 0; cleanup: + virObjectUnref(caps); return ret; }