[libvirt PATCH] qemu: Add support for /dev/userfaultfd

Jiri Denemark posted 1 patch 10 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/fb222d2d5a12ccb340680727c57a91abb6b59dc8.1707406190.git.jdenemar@redhat.com
There is a newer version of this series
src/qemu/qemu.conf.in              |  3 +-
src/qemu/qemu_cgroup.c             |  1 +
src/qemu/qemu_process.c            | 38 +++++++++++++++++++++++++
src/qemu/qemu_security.c           | 45 ++++++++++++++++++++++++++++++
src/qemu/qemu_security.h           |  5 ++++
src/qemu/test_libvirtd_qemu.aug.in |  1 +
6 files changed, 92 insertions(+), 1 deletion(-)
[libvirt PATCH] qemu: Add support for /dev/userfaultfd
Posted by Jiri Denemark 10 months ago
/dev/userfaultfd device is preferred over userfaultfd syscall for
post-copy migrations. Unless qemu driver is configured to disable mount
namespace or to forbid access to /dev/userfaultfd in cgroup_device_acl,
we will copy it to the limited /dev filesystem QEMU will have access to
and label it appropriately. So in the default configuration post-copy
migration will be allowed even without enabling
vm.unprivileged_userfaultfd sysctl.

Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
---

Notes:
    The question is what should we do with the
    src/qemu/postcopy-migration.sysctl file which is installed by
    libvirt.spec to /usr/lib/sysctl.d/60-qemu-postcopy-migration.conf by
    default. The file is now useless and should ideally be removed, but only
    when the host kernel is new enough to support /dev/userfaultfd

 src/qemu/qemu.conf.in              |  3 +-
 src/qemu/qemu_cgroup.c             |  1 +
 src/qemu/qemu_process.c            | 38 +++++++++++++++++++++++++
 src/qemu/qemu_security.c           | 45 ++++++++++++++++++++++++++++++
 src/qemu/qemu_security.h           |  5 ++++
 src/qemu/test_libvirtd_qemu.aug.in |  1 +
 6 files changed, 92 insertions(+), 1 deletion(-)

diff --git a/src/qemu/qemu.conf.in b/src/qemu/qemu.conf.in
index 34025a02ef..f406df8749 100644
--- a/src/qemu/qemu.conf.in
+++ b/src/qemu/qemu.conf.in
@@ -565,7 +565,8 @@
 #cgroup_device_acl = [
 #    "/dev/null", "/dev/full", "/dev/zero",
 #    "/dev/random", "/dev/urandom",
-#    "/dev/ptmx", "/dev/kvm"
+#    "/dev/ptmx", "/dev/kvm",
+#    "/dev/userfaultfd"
 #]
 #
 # RDMA migration requires the following extra files to be added to the list:
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index 47402b3750..5a5ba763a0 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -41,6 +41,7 @@ const char *const defaultDeviceACL[] = {
     "/dev/null", "/dev/full", "/dev/zero",
     "/dev/random", "/dev/urandom",
     "/dev/ptmx", "/dev/kvm",
+    "/dev/userfaultfd",
     NULL,
 };
 #define DEVICE_PTY_MAJOR 136
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 0a6c18a671..6e51d6586b 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -2882,6 +2882,40 @@ qemuProcessStartManagedPRDaemon(virDomainObj *vm)
 }
 
 
+static int
+qemuProcessAllowPostCopyMigration(virDomainObj *vm)
+{
+    qemuDomainObjPrivate *priv = vm->privateData;
+    virQEMUDriver *driver = priv->driver;
+    g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
+    const char *const *devices = (const char *const *) cfg->cgroupDeviceACL;
+    const char *uffd = "/dev/userfaultfd";
+    int rc;
+
+    if (!virFileExists(uffd)) {
+        VIR_DEBUG("%s is not supported by the host", uffd);
+        return 0;
+    }
+
+    if (!devices)
+        devices = defaultDeviceACL;
+
+    if (!g_strv_contains(devices, uffd)) {
+        VIR_DEBUG("%s is not allowed by device ACL", uffd);
+        return 0;
+    }
+
+    VIR_DEBUG("Labeling %s in mount namespace", uffd);
+    if ((rc = qemuSecurityDomainSetMountNSPathLabel(driver, vm, uffd)) < 0)
+        return -1;
+
+    if (rc == 1)
+        VIR_DEBUG("Mount namespace is not enabled, leaving %s as is", uffd);
+
+    return 0;
+}
+
+
 static int
 qemuProcessInitPasswords(virQEMUDriver *driver,
                          virDomainObj *vm,
@@ -7802,6 +7836,10 @@ qemuProcessLaunch(virConnectPtr conn,
         qemuProcessStartManagedPRDaemon(vm) < 0)
         goto cleanup;
 
+    VIR_DEBUG("Setting up permissions to allow post-copy migration");
+    if (qemuProcessAllowPostCopyMigration(vm) < 0)
+        goto cleanup;
+
     VIR_DEBUG("Setting domain security labels");
     if (qemuSecuritySetAllLabel(driver,
                                 vm,
diff --git a/src/qemu/qemu_security.c b/src/qemu/qemu_security.c
index 8bcef14d08..4aaa863ae9 100644
--- a/src/qemu/qemu_security.c
+++ b/src/qemu/qemu_security.c
@@ -615,6 +615,51 @@ qemuSecurityDomainRestorePathLabel(virQEMUDriver *driver,
 }
 
 
+/**
+ * qemuSecurityDomainSetMountNSPathLabel:
+ *
+ * Label given path in mount namespace. If mount namespace is not enabled,
+ * nothing is labeled at all.
+ *
+ * Because the label is only applied in mount namespace, there's no need to
+ * restore it.
+ *
+ * Returns 0 on success,
+ *         1 when mount namespace is not enabled,
+ *        -1 on error.
+ */
+int
+qemuSecurityDomainSetMountNSPathLabel(virQEMUDriver *driver,
+                                      virDomainObj *vm,
+                                      const char *path)
+{
+    int ret = -1;
+
+    if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) {
+        VIR_DEBUG("Not labeling '%s': mount namespace disabled for domain '%s'",
+                  path, vm->def->name);
+        return 1;
+    }
+
+    if (virSecurityManagerTransactionStart(driver->securityManager) < 0)
+        goto cleanup;
+
+    if (virSecurityManagerDomainSetPathLabel(driver->securityManager,
+                                             vm->def, path, false) < 0)
+        goto cleanup;
+
+    if (virSecurityManagerTransactionCommit(driver->securityManager,
+                                            vm->pid, false) < 0)
+        goto cleanup;
+
+    ret = 0;
+
+ cleanup:
+    virSecurityManagerTransactionAbort(driver->securityManager);
+    return ret;
+}
+
+
 /**
  * qemuSecurityCommandRun:
  * @driver: the QEMU driver
diff --git a/src/qemu/qemu_security.h b/src/qemu/qemu_security.h
index 10f11771b4..41da33debc 100644
--- a/src/qemu/qemu_security.h
+++ b/src/qemu/qemu_security.h
@@ -110,6 +110,11 @@ int qemuSecurityDomainRestorePathLabel(virQEMUDriver *driver,
                                        virDomainObj *vm,
                                        const char *path);
 
+int
+qemuSecurityDomainSetMountNSPathLabel(virQEMUDriver *driver,
+                                      virDomainObj *vm,
+                                      const char *path);
+
 int qemuSecurityCommandRun(virQEMUDriver *driver,
                            virDomainObj *vm,
                            virCommand *cmd,
diff --git a/src/qemu/test_libvirtd_qemu.aug.in b/src/qemu/test_libvirtd_qemu.aug.in
index e4cfde6cc7..b97e6de11e 100644
--- a/src/qemu/test_libvirtd_qemu.aug.in
+++ b/src/qemu/test_libvirtd_qemu.aug.in
@@ -67,6 +67,7 @@ module Test_libvirtd_qemu =
     { "5" = "/dev/urandom" }
     { "6" = "/dev/ptmx" }
     { "7" = "/dev/kvm" }
+    { "8" = "/dev/userfaultfd" }
 }
 { "save_image_format" = "raw" }
 { "dump_image_format" = "raw" }
-- 
2.43.0
_______________________________________________
Devel mailing list -- devel@lists.libvirt.org
To unsubscribe send an email to devel-leave@lists.libvirt.org
Re: [libvirt PATCH] qemu: Add support for /dev/userfaultfd
Posted by Daniel P. Berrangé 10 months ago
On Thu, Feb 08, 2024 at 04:30:38PM +0100, Jiri Denemark wrote:
> /dev/userfaultfd device is preferred over userfaultfd syscall for
> post-copy migrations. Unless qemu driver is configured to disable mount
> namespace or to forbid access to /dev/userfaultfd in cgroup_device_acl,
> we will copy it to the limited /dev filesystem QEMU will have access to
> and label it appropriately. So in the default configuration post-copy
> migration will be allowed even without enabling
> vm.unprivileged_userfaultfd sysctl.
> 
> Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
> ---
> 
> Notes:
>     The question is what should we do with the
>     src/qemu/postcopy-migration.sysctl file which is installed by
>     libvirt.spec to /usr/lib/sysctl.d/60-qemu-postcopy-migration.conf by
>     default. The file is now useless and should ideally be removed, but only
>     when the host kernel is new enough to support /dev/userfaultfd

Just provide a meson_options.txt entry to disable it, and leave
it to be a distro problem to turn off in whatever releases they
consider new enough to prefer userfaultfd.


With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
_______________________________________________
Devel mailing list -- devel@lists.libvirt.org
To unsubscribe send an email to devel-leave@lists.libvirt.org