From 5670c50ffb3cd999f0bee779bbaa8f7dc7b6e0e0 Mon Sep 17 00:00:00 2001 From: Michal Privoznik Date: Wed, 12 Apr 2023 17:15:08 +0200 Subject: [PATCH] qemu_domain: Increase memlock limit for NVMe disks When starting QEMU, or when hotplugging a PCI device QEMU might lock some memory. How much? Well, that's an undecidable problem. But despite that, we try to guess. And it more or less works, until there's a counter example. This time, it's a guest with both and an NVMe . I've started a simple guest with 4GiB of memory: # virsh dominfo fedora Max memory: 4194304 KiB Used memory: 4194304 KiB And here are the amounts of memory that QEMU tried to lock, obtained via: grep VmLck /proc/$(pgrep qemu-kvm)/status 1) with just one VmLck: 4194308 kB 2) with just one NVMe VmLck: 4328544 kB 3) with one and one NVMe VmLck: 8522852 kB Now, what's surprising is case 2) where the locked memory exceeds the VM memory. It almost resembles VDPA. Therefore, treat is as such. Unfortunately, I don't have a box with two or more spare NVMe-s so I can't tell for sure. But setting limit too tight means QEMU refuses to start. Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2014030 Signed-off-by: Michal Privoznik Reviewed-by: Martin Kletzander --- src/qemu/qemu_domain.c | 35 +++++++++++++------ .../qemumemlock-pc-hostdev-nvme.xml | 24 +++++++++++++ tests/qemumemlocktest.c | 1 + 3 files changed, 50 insertions(+), 10 deletions(-) create mode 100644 tests/qemumemlockdata/qemumemlock-pc-hostdev-nvme.xml diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 63b13b6875..41db98880c 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -9532,7 +9532,7 @@ getPPC64MemLockLimitBytes(virDomainDef *def, static int -qemuDomainGetNumVFIODevices(const virDomainDef *def) +qemuDomainGetNumVFIOHostdevs(const virDomainDef *def) { size_t i; int n = 0; @@ -9542,10 +9542,22 @@ qemuDomainGetNumVFIODevices(const virDomainDef *def) virHostdevIsMdevDevice(def->hostdevs[i])) n++; } + + return n; +} + + +static int +qemuDomainGetNumNVMeDisks(const virDomainDef *def) +{ + size_t i; + int n = 0; + for (i = 0; i < def->ndisks; i++) { if (virStorageSourceChainHasNVMe(def->disks[i]->src)) n++; } + return n; } @@ -9585,6 +9597,7 @@ qemuDomainGetMemLockLimitBytes(virDomainDef *def, { unsigned long long memKB = 0; int nvfio; + int nnvme; int nvdpa; /* prefer the hard limit */ @@ -9604,7 +9617,8 @@ qemuDomainGetMemLockLimitBytes(virDomainDef *def, if (ARCH_IS_PPC64(def->os.arch) && def->virtType == VIR_DOMAIN_VIRT_KVM) return getPPC64MemLockLimitBytes(def, forceVFIO); - nvfio = qemuDomainGetNumVFIODevices(def); + nvfio = qemuDomainGetNumVFIOHostdevs(def); + nnvme = qemuDomainGetNumNVMeDisks(def); nvdpa = qemuDomainGetNumVDPANetDevices(def); /* For device passthrough using VFIO the guest memory and MMIO memory * regions need to be locked persistent in order to allow DMA. @@ -9624,16 +9638,17 @@ qemuDomainGetMemLockLimitBytes(virDomainDef *def, * * Note that this may not be valid for all platforms. */ - if (forceVFIO || nvfio || nvdpa) { + if (forceVFIO || nvfio || nnvme || nvdpa) { /* At present, the full memory needs to be locked for each VFIO / VDPA - * device. For VFIO devices, this only applies when there is a vIOMMU - * present. Yes, this may result in a memory limit that is greater than - * the host physical memory, which is not ideal. The long-term solution - * is a new userspace iommu interface (iommufd) which should eliminate - * this duplicate memory accounting. But for now this is the only way - * to enable configurations with e.g. multiple vdpa devices. + * NVMe device. For VFIO devices, this only applies when there is a + * vIOMMU present. Yes, this may result in a memory limit that is + * greater than the host physical memory, which is not ideal. The + * long-term solution is a new userspace iommu interface (iommufd) + * which should eliminate this duplicate memory accounting. But for now + * this is the only way to enable configurations with e.g. multiple + * VDPA/NVMe devices. */ - int factor = nvdpa; + int factor = nvdpa + nnvme; if (nvfio || forceVFIO) { if (nvfio && def->iommu) diff --git a/tests/qemumemlockdata/qemumemlock-pc-hostdev-nvme.xml b/tests/qemumemlockdata/qemumemlock-pc-hostdev-nvme.xml new file mode 100644 index 0000000000..06f1496970 --- /dev/null +++ b/tests/qemumemlockdata/qemumemlock-pc-hostdev-nvme.xml @@ -0,0 +1,24 @@ + + guest + 1048576 + 1 + + hvm + + + /usr/bin/qemu-system-x86_64 + + + +
+ + + + + + +
+ + + + diff --git a/tests/qemumemlocktest.c b/tests/qemumemlocktest.c index 61b73e1d79..c53905a7dd 100644 --- a/tests/qemumemlocktest.c +++ b/tests/qemumemlocktest.c @@ -97,6 +97,7 @@ mymain(void) DO_TEST("pc-hardlimit", 2147483648); DO_TEST("pc-locked", VIR_DOMAIN_MEMORY_PARAM_UNLIMITED); DO_TEST("pc-hostdev", 2147483648); + DO_TEST("pc-hostdev-nvme", 3221225472); DO_TEST("pc-hardlimit+locked", 2147483648); DO_TEST("pc-hardlimit+hostdev", 2147483648);