From 3791f29b085c514b171f9d8fc702975f9df9733c Mon Sep 17 00:00:00 2001 From: Martin Kletzander Date: Fri, 4 Sep 2020 14:17:30 +0200 Subject: [PATCH] qemu: Do not error out when setting affinity failed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consider a host with 8 CPUs. There are the following possible scenarios 1. Bare metal; libvirtd has affinity of 8 CPUs; QEMU should get 8 CPUs 2. Bare metal; libvirtd has affinity of 2 CPUs; QEMU should get 8 CPUs 3. Container has affinity of 8 CPUs; libvirtd has affinity of 8 CPus; QEMU should get 8 CPUs 4. Container has affinity of 8 CPUs; libvirtd has affinity of 2 CPus; QEMU should get 8 CPUs 5. Container has affinity of 4 CPUs; libvirtd has affinity of 4 CPus; QEMU should get 4 CPUs 6. Container has affinity of 4 CPUs; libvirtd has affinity of 2 CPus; QEMU should get 4 CPUs Scenarios 1 & 2 always work unless systemd restricted libvirtd privs. Scenario 3 works because libvirt checks current affinity first and skips the sched_setaffinity call, avoiding the SYS_NICE issue Scenario 4 works only if CAP_SYS_NICE is availalbe Scenarios 5 & 6 works only if CAP_SYS_NICE is present *AND* the cgroups cpuset is not set on the container. If libvirt blindly ignores the sched_setaffinity failure, then scenarios 4, 5 and 6 should all work, but with caveat in case 4 and 6, that QEMU will only get 2 CPUs instead of the possible 8 and 4 respectively. This is still better than failing. Therefore libvirt can blindly ignore the setaffinity failure, but *ONLY* ignore it when there was no affinity specified in the XML config. If user specified affinity explicitly, libvirt must report an error if it can't be honoured. Resolves: https://bugzilla.redhat.com/1819801 Suggested-by: Daniel P. Berrangé Signed-off-by: Martin Kletzander Reviewed-by: Daniel P. Berrangé --- src/qemu/qemu_process.c | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index cfe09d6326..17d083d192 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -2528,6 +2528,7 @@ qemuProcessGetAllCpuAffinity(virBitmapPtr *cpumapRet) static int qemuProcessInitCpuAffinity(virDomainObjPtr vm) { + bool settingAll = false; g_autoptr(virBitmap) cpumapToSet = NULL; virDomainNumatuneMemMode mem_mode; qemuDomainObjPrivatePtr priv = vm->privateData; @@ -2566,13 +2567,29 @@ qemuProcessInitCpuAffinity(virDomainObjPtr vm) if (!(cpumapToSet = virBitmapNewCopy(vm->def->cputune.emulatorpin))) return -1; } else { + settingAll = true; if (qemuProcessGetAllCpuAffinity(&cpumapToSet) < 0) return -1; } if (cpumapToSet && virProcessSetAffinity(vm->pid, cpumapToSet) < 0) { - return -1; + /* + * We only want to error out if we failed to set the affinity to + * user-requested mapping. If we are just trying to reset the affinity + * to all CPUs and this fails it can only be an issue if: + * 1) libvirtd does not have CAP_SYS_NICE + * 2) libvirtd does not run on all CPUs + * + * This scenario can easily occurr when libvirtd is run inside a + * container with restrictive permissions and CPU pinning. + * + * See also: https://bugzilla.redhat.com/1819801#c2 + */ + if (settingAll) + virResetLastError(); + else + return -1; } return 0; @@ -2726,8 +2743,25 @@ qemuProcessSetupPid(virDomainObjPtr vm, affinity_cpumask = use_cpumask; /* Setup legacy affinity. */ - if (affinity_cpumask && virProcessSetAffinity(pid, affinity_cpumask) < 0) - goto cleanup; + if (affinity_cpumask && virProcessSetAffinity(pid, affinity_cpumask) < 0) { + /* + * We only want to error out if we failed to set the affinity to + * user-requested mapping. If we are just trying to reset the affinity + * to all CPUs and this fails it can only be an issue if: + * 1) libvirtd does not have CAP_SYS_NICE + * 2) libvirtd does not run on all CPUs + * + * However since this scenario is very improbable, we rather skip + * reporting the error because it helps running libvirtd in a a scenario + * where pinning is handled by someone else. + * + * See also: https://bugzilla.redhat.com/1819801#c2 + */ + if (affinity_cpumask == hostcpumap) + virResetLastError(); + else + goto cleanup; + } /* Set scheduler type and priority, but not for the main thread. */ if (sched &&