From 66643931e7db08681239c2f74e9ce300d6e291e1 Mon Sep 17 00:00:00 2001 From: Jiri Denemark Date: Thu, 8 Feb 2024 15:56:38 +0100 Subject: [PATCH] qemu: Add support for /dev/userfaultfd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit /dev/userfaultfd device is preferred over userfaultfd syscall for post-copy migrations. Unless qemu driver is configured to disable mount namespace or to forbid access to /dev/userfaultfd in cgroup_device_acl, we will copy it to the limited /dev filesystem QEMU will have access to and label it appropriately. So in the default configuration post-copy migration will be allowed even without enabling vm.unprivileged_userfaultfd sysctl. Signed-off-by: Jiri Denemark Reviewed-by: Ján Tomko --- src/qemu/qemu.conf.in | 3 +- src/qemu/qemu_cgroup.c | 1 + src/qemu/qemu_process.c | 38 +++++++++++++++++++++++++ src/qemu/qemu_security.c | 45 ++++++++++++++++++++++++++++++ src/qemu/qemu_security.h | 5 ++++ src/qemu/test_libvirtd_qemu.aug.in | 1 + 6 files changed, 92 insertions(+), 1 deletion(-) diff --git a/src/qemu/qemu.conf.in b/src/qemu/qemu.conf.in index 34025a02ef..f406df8749 100644 --- a/src/qemu/qemu.conf.in +++ b/src/qemu/qemu.conf.in @@ -565,7 +565,8 @@ #cgroup_device_acl = [ # "/dev/null", "/dev/full", "/dev/zero", # "/dev/random", "/dev/urandom", -# "/dev/ptmx", "/dev/kvm" +# "/dev/ptmx", "/dev/kvm", +# "/dev/userfaultfd" #] # # RDMA migration requires the following extra files to be added to the list: diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 47402b3750..5a5ba763a0 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -41,6 +41,7 @@ const char *const defaultDeviceACL[] = { "/dev/null", "/dev/full", "/dev/zero", "/dev/random", "/dev/urandom", "/dev/ptmx", "/dev/kvm", + "/dev/userfaultfd", NULL, }; #define DEVICE_PTY_MAJOR 136 diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 0a6c18a671..6e51d6586b 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -2882,6 +2882,40 @@ qemuProcessStartManagedPRDaemon(virDomainObj *vm) } +static int +qemuProcessAllowPostCopyMigration(virDomainObj *vm) +{ + qemuDomainObjPrivate *priv = vm->privateData; + virQEMUDriver *driver = priv->driver; + g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); + const char *const *devices = (const char *const *) cfg->cgroupDeviceACL; + const char *uffd = "/dev/userfaultfd"; + int rc; + + if (!virFileExists(uffd)) { + VIR_DEBUG("%s is not supported by the host", uffd); + return 0; + } + + if (!devices) + devices = defaultDeviceACL; + + if (!g_strv_contains(devices, uffd)) { + VIR_DEBUG("%s is not allowed by device ACL", uffd); + return 0; + } + + VIR_DEBUG("Labeling %s in mount namespace", uffd); + if ((rc = qemuSecurityDomainSetMountNSPathLabel(driver, vm, uffd)) < 0) + return -1; + + if (rc == 1) + VIR_DEBUG("Mount namespace is not enabled, leaving %s as is", uffd); + + return 0; +} + + static int qemuProcessInitPasswords(virQEMUDriver *driver, virDomainObj *vm, @@ -7802,6 +7836,10 @@ qemuProcessLaunch(virConnectPtr conn, qemuProcessStartManagedPRDaemon(vm) < 0) goto cleanup; + VIR_DEBUG("Setting up permissions to allow post-copy migration"); + if (qemuProcessAllowPostCopyMigration(vm) < 0) + goto cleanup; + VIR_DEBUG("Setting domain security labels"); if (qemuSecuritySetAllLabel(driver, vm, diff --git a/src/qemu/qemu_security.c b/src/qemu/qemu_security.c index 8bcef14d08..4aaa863ae9 100644 --- a/src/qemu/qemu_security.c +++ b/src/qemu/qemu_security.c @@ -615,6 +615,51 @@ qemuSecurityDomainRestorePathLabel(virQEMUDriver *driver, } +/** + * qemuSecurityDomainSetMountNSPathLabel: + * + * Label given path in mount namespace. If mount namespace is not enabled, + * nothing is labeled at all. + * + * Because the label is only applied in mount namespace, there's no need to + * restore it. + * + * Returns 0 on success, + * 1 when mount namespace is not enabled, + * -1 on error. + */ +int +qemuSecurityDomainSetMountNSPathLabel(virQEMUDriver *driver, + virDomainObj *vm, + const char *path) +{ + int ret = -1; + + if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) { + VIR_DEBUG("Not labeling '%s': mount namespace disabled for domain '%s'", + path, vm->def->name); + return 1; + } + + if (virSecurityManagerTransactionStart(driver->securityManager) < 0) + goto cleanup; + + if (virSecurityManagerDomainSetPathLabel(driver->securityManager, + vm->def, path, false) < 0) + goto cleanup; + + if (virSecurityManagerTransactionCommit(driver->securityManager, + vm->pid, false) < 0) + goto cleanup; + + ret = 0; + + cleanup: + virSecurityManagerTransactionAbort(driver->securityManager); + return ret; +} + + /** * qemuSecurityCommandRun: * @driver: the QEMU driver diff --git a/src/qemu/qemu_security.h b/src/qemu/qemu_security.h index 10f11771b4..41da33debc 100644 --- a/src/qemu/qemu_security.h +++ b/src/qemu/qemu_security.h @@ -110,6 +110,11 @@ int qemuSecurityDomainRestorePathLabel(virQEMUDriver *driver, virDomainObj *vm, const char *path); +int +qemuSecurityDomainSetMountNSPathLabel(virQEMUDriver *driver, + virDomainObj *vm, + const char *path); + int qemuSecurityCommandRun(virQEMUDriver *driver, virDomainObj *vm, virCommand *cmd, diff --git a/src/qemu/test_libvirtd_qemu.aug.in b/src/qemu/test_libvirtd_qemu.aug.in index e4cfde6cc7..b97e6de11e 100644 --- a/src/qemu/test_libvirtd_qemu.aug.in +++ b/src/qemu/test_libvirtd_qemu.aug.in @@ -67,6 +67,7 @@ module Test_libvirtd_qemu = { "5" = "/dev/urandom" } { "6" = "/dev/ptmx" } { "7" = "/dev/kvm" } + { "8" = "/dev/userfaultfd" } } { "save_image_format" = "raw" } { "dump_image_format" = "raw" }