mirror of
https://gitlab.com/libvirt/libvirt.git
synced 2024-10-01 20:05:46 +00:00
qemu_domain: Increase memlock limit for NVMe disks
When starting QEMU, or when hotplugging a PCI device QEMU might lock some memory. How much? Well, that's an undecidable problem. But despite that, we try to guess. And it more or less works, until there's a counter example. This time, it's a guest with both <hostdev/> and an NVMe <disk/>. I've started a simple guest with 4GiB of memory: # virsh dominfo fedora Max memory: 4194304 KiB Used memory: 4194304 KiB And here are the amounts of memory that QEMU tried to lock, obtained via: grep VmLck /proc/$(pgrep qemu-kvm)/status 1) with just one <hostdev/> VmLck: 4194308 kB 2) with just one NVMe <disk/> VmLck: 4328544 kB 3) with one <hostdev/> and one NVMe <disk/> VmLck: 8522852 kB Now, what's surprising is case 2) where the locked memory exceeds the VM memory. It almost resembles VDPA. Therefore, treat is as such. Unfortunately, I don't have a box with two or more spare NVMe-s so I can't tell for sure. But setting limit too tight means QEMU refuses to start. Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2014030 Signed-off-by: Michal Privoznik <mprivozn@redhat.com> Reviewed-by: Martin Kletzander <mkletzan@redhat.com>
This commit is contained in:
parent
0d0604a51a
commit
5670c50ffb
@ -9532,7 +9532,7 @@ getPPC64MemLockLimitBytes(virDomainDef *def,
|
|||||||
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
qemuDomainGetNumVFIODevices(const virDomainDef *def)
|
qemuDomainGetNumVFIOHostdevs(const virDomainDef *def)
|
||||||
{
|
{
|
||||||
size_t i;
|
size_t i;
|
||||||
int n = 0;
|
int n = 0;
|
||||||
@ -9542,10 +9542,22 @@ qemuDomainGetNumVFIODevices(const virDomainDef *def)
|
|||||||
virHostdevIsMdevDevice(def->hostdevs[i]))
|
virHostdevIsMdevDevice(def->hostdevs[i]))
|
||||||
n++;
|
n++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int
|
||||||
|
qemuDomainGetNumNVMeDisks(const virDomainDef *def)
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
int n = 0;
|
||||||
|
|
||||||
for (i = 0; i < def->ndisks; i++) {
|
for (i = 0; i < def->ndisks; i++) {
|
||||||
if (virStorageSourceChainHasNVMe(def->disks[i]->src))
|
if (virStorageSourceChainHasNVMe(def->disks[i]->src))
|
||||||
n++;
|
n++;
|
||||||
}
|
}
|
||||||
|
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -9585,6 +9597,7 @@ qemuDomainGetMemLockLimitBytes(virDomainDef *def,
|
|||||||
{
|
{
|
||||||
unsigned long long memKB = 0;
|
unsigned long long memKB = 0;
|
||||||
int nvfio;
|
int nvfio;
|
||||||
|
int nnvme;
|
||||||
int nvdpa;
|
int nvdpa;
|
||||||
|
|
||||||
/* prefer the hard limit */
|
/* prefer the hard limit */
|
||||||
@ -9604,7 +9617,8 @@ qemuDomainGetMemLockLimitBytes(virDomainDef *def,
|
|||||||
if (ARCH_IS_PPC64(def->os.arch) && def->virtType == VIR_DOMAIN_VIRT_KVM)
|
if (ARCH_IS_PPC64(def->os.arch) && def->virtType == VIR_DOMAIN_VIRT_KVM)
|
||||||
return getPPC64MemLockLimitBytes(def, forceVFIO);
|
return getPPC64MemLockLimitBytes(def, forceVFIO);
|
||||||
|
|
||||||
nvfio = qemuDomainGetNumVFIODevices(def);
|
nvfio = qemuDomainGetNumVFIOHostdevs(def);
|
||||||
|
nnvme = qemuDomainGetNumNVMeDisks(def);
|
||||||
nvdpa = qemuDomainGetNumVDPANetDevices(def);
|
nvdpa = qemuDomainGetNumVDPANetDevices(def);
|
||||||
/* For device passthrough using VFIO the guest memory and MMIO memory
|
/* For device passthrough using VFIO the guest memory and MMIO memory
|
||||||
* regions need to be locked persistent in order to allow DMA.
|
* regions need to be locked persistent in order to allow DMA.
|
||||||
@ -9624,16 +9638,17 @@ qemuDomainGetMemLockLimitBytes(virDomainDef *def,
|
|||||||
*
|
*
|
||||||
* Note that this may not be valid for all platforms.
|
* Note that this may not be valid for all platforms.
|
||||||
*/
|
*/
|
||||||
if (forceVFIO || nvfio || nvdpa) {
|
if (forceVFIO || nvfio || nnvme || nvdpa) {
|
||||||
/* At present, the full memory needs to be locked for each VFIO / VDPA
|
/* At present, the full memory needs to be locked for each VFIO / VDPA
|
||||||
* device. For VFIO devices, this only applies when there is a vIOMMU
|
* NVMe device. For VFIO devices, this only applies when there is a
|
||||||
* present. Yes, this may result in a memory limit that is greater than
|
* vIOMMU present. Yes, this may result in a memory limit that is
|
||||||
* the host physical memory, which is not ideal. The long-term solution
|
* greater than the host physical memory, which is not ideal. The
|
||||||
* is a new userspace iommu interface (iommufd) which should eliminate
|
* long-term solution is a new userspace iommu interface (iommufd)
|
||||||
* this duplicate memory accounting. But for now this is the only way
|
* which should eliminate this duplicate memory accounting. But for now
|
||||||
* to enable configurations with e.g. multiple vdpa devices.
|
* this is the only way to enable configurations with e.g. multiple
|
||||||
|
* VDPA/NVMe devices.
|
||||||
*/
|
*/
|
||||||
int factor = nvdpa;
|
int factor = nvdpa + nnvme;
|
||||||
|
|
||||||
if (nvfio || forceVFIO) {
|
if (nvfio || forceVFIO) {
|
||||||
if (nvfio && def->iommu)
|
if (nvfio && def->iommu)
|
||||||
|
24
tests/qemumemlockdata/qemumemlock-pc-hostdev-nvme.xml
Normal file
24
tests/qemumemlockdata/qemumemlock-pc-hostdev-nvme.xml
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
<domain type='kvm'>
|
||||||
|
<name>guest</name>
|
||||||
|
<memory unit='KiB'>1048576</memory>
|
||||||
|
<vcpu placement='static'>1</vcpu>
|
||||||
|
<os>
|
||||||
|
<type arch='x86_64' machine='pc'>hvm</type>
|
||||||
|
</os>
|
||||||
|
<devices>
|
||||||
|
<emulator>/usr/bin/qemu-system-x86_64</emulator>
|
||||||
|
<disk type='nvme' device='disk'>
|
||||||
|
<driver name='qemu' type='raw'/>
|
||||||
|
<source type='pci' managed='yes' namespace='1'>
|
||||||
|
<address domain='0x0003' bus='0x02' slot='0x00' function='0x0'/>
|
||||||
|
</source>
|
||||||
|
<target dev='vda' bus='virtio'/>
|
||||||
|
</disk>
|
||||||
|
<hostdev mode='subsystem' type='pci' managed='yes'>
|
||||||
|
<driver name='vfio'/>
|
||||||
|
<source>
|
||||||
|
<address domain='0x0001' bus='0x01' slot='0x00' function='0x0'/>
|
||||||
|
</source>
|
||||||
|
</hostdev>
|
||||||
|
</devices>
|
||||||
|
</domain>
|
@ -97,6 +97,7 @@ mymain(void)
|
|||||||
DO_TEST("pc-hardlimit", 2147483648);
|
DO_TEST("pc-hardlimit", 2147483648);
|
||||||
DO_TEST("pc-locked", VIR_DOMAIN_MEMORY_PARAM_UNLIMITED);
|
DO_TEST("pc-locked", VIR_DOMAIN_MEMORY_PARAM_UNLIMITED);
|
||||||
DO_TEST("pc-hostdev", 2147483648);
|
DO_TEST("pc-hostdev", 2147483648);
|
||||||
|
DO_TEST("pc-hostdev-nvme", 3221225472);
|
||||||
|
|
||||||
DO_TEST("pc-hardlimit+locked", 2147483648);
|
DO_TEST("pc-hardlimit+locked", 2147483648);
|
||||||
DO_TEST("pc-hardlimit+hostdev", 2147483648);
|
DO_TEST("pc-hardlimit+hostdev", 2147483648);
|
||||||
|
Loading…
Reference in New Issue
Block a user