qemu: Add support for /dev/userfaultfd

/dev/userfaultfd device is preferred over userfaultfd syscall for
post-copy migrations. Unless qemu driver is configured to disable mount
namespace or to forbid access to /dev/userfaultfd in cgroup_device_acl,
we will copy it to the limited /dev filesystem QEMU will have access to
and label it appropriately. So in the default configuration post-copy
migration will be allowed even without enabling
vm.unprivileged_userfaultfd sysctl.

Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
Reviewed-by: Ján Tomko <jtomko@redhat.com>
This commit is contained in:
Jiri Denemark 2024-02-08 15:56:38 +01:00
parent a2c3e390f7
commit 66643931e7
6 changed files with 92 additions and 1 deletions

View File

@ -565,7 +565,8 @@
#cgroup_device_acl = [
# "/dev/null", "/dev/full", "/dev/zero",
# "/dev/random", "/dev/urandom",
# "/dev/ptmx", "/dev/kvm"
# "/dev/ptmx", "/dev/kvm",
# "/dev/userfaultfd"
#]
#
# RDMA migration requires the following extra files to be added to the list:

View File

@ -41,6 +41,7 @@ const char *const defaultDeviceACL[] = {
"/dev/null", "/dev/full", "/dev/zero",
"/dev/random", "/dev/urandom",
"/dev/ptmx", "/dev/kvm",
"/dev/userfaultfd",
NULL,
};
#define DEVICE_PTY_MAJOR 136

View File

@ -2882,6 +2882,40 @@ qemuProcessStartManagedPRDaemon(virDomainObj *vm)
}
static int
qemuProcessAllowPostCopyMigration(virDomainObj *vm)
{
qemuDomainObjPrivate *priv = vm->privateData;
virQEMUDriver *driver = priv->driver;
g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
const char *const *devices = (const char *const *) cfg->cgroupDeviceACL;
const char *uffd = "/dev/userfaultfd";
int rc;
if (!virFileExists(uffd)) {
VIR_DEBUG("%s is not supported by the host", uffd);
return 0;
}
if (!devices)
devices = defaultDeviceACL;
if (!g_strv_contains(devices, uffd)) {
VIR_DEBUG("%s is not allowed by device ACL", uffd);
return 0;
}
VIR_DEBUG("Labeling %s in mount namespace", uffd);
if ((rc = qemuSecurityDomainSetMountNSPathLabel(driver, vm, uffd)) < 0)
return -1;
if (rc == 1)
VIR_DEBUG("Mount namespace is not enabled, leaving %s as is", uffd);
return 0;
}
static int
qemuProcessInitPasswords(virQEMUDriver *driver,
virDomainObj *vm,
@ -7802,6 +7836,10 @@ qemuProcessLaunch(virConnectPtr conn,
qemuProcessStartManagedPRDaemon(vm) < 0)
goto cleanup;
VIR_DEBUG("Setting up permissions to allow post-copy migration");
if (qemuProcessAllowPostCopyMigration(vm) < 0)
goto cleanup;
VIR_DEBUG("Setting domain security labels");
if (qemuSecuritySetAllLabel(driver,
vm,

View File

@ -615,6 +615,51 @@ qemuSecurityDomainRestorePathLabel(virQEMUDriver *driver,
}
/**
* qemuSecurityDomainSetMountNSPathLabel:
*
* Label given path in mount namespace. If mount namespace is not enabled,
* nothing is labeled at all.
*
* Because the label is only applied in mount namespace, there's no need to
* restore it.
*
* Returns 0 on success,
* 1 when mount namespace is not enabled,
* -1 on error.
*/
int
qemuSecurityDomainSetMountNSPathLabel(virQEMUDriver *driver,
virDomainObj *vm,
const char *path)
{
int ret = -1;
if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) {
VIR_DEBUG("Not labeling '%s': mount namespace disabled for domain '%s'",
path, vm->def->name);
return 1;
}
if (virSecurityManagerTransactionStart(driver->securityManager) < 0)
goto cleanup;
if (virSecurityManagerDomainSetPathLabel(driver->securityManager,
vm->def, path, false) < 0)
goto cleanup;
if (virSecurityManagerTransactionCommit(driver->securityManager,
vm->pid, false) < 0)
goto cleanup;
ret = 0;
cleanup:
virSecurityManagerTransactionAbort(driver->securityManager);
return ret;
}
/**
* qemuSecurityCommandRun:
* @driver: the QEMU driver

View File

@ -110,6 +110,11 @@ int qemuSecurityDomainRestorePathLabel(virQEMUDriver *driver,
virDomainObj *vm,
const char *path);
int
qemuSecurityDomainSetMountNSPathLabel(virQEMUDriver *driver,
virDomainObj *vm,
const char *path);
int qemuSecurityCommandRun(virQEMUDriver *driver,
virDomainObj *vm,
virCommand *cmd,

View File

@ -67,6 +67,7 @@ module Test_libvirtd_qemu =
{ "5" = "/dev/urandom" }
{ "6" = "/dev/ptmx" }
{ "7" = "/dev/kvm" }
{ "8" = "/dev/userfaultfd" }
}
{ "save_image_format" = "raw" }
{ "dump_image_format" = "raw" }