[PATCH] qemu: Drop /dev/kvm from default device ACL

Praveen K Paladugu posted 1 patch 1 week, 1 day ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/20251017165453.28304-1-praveenkpaladugu@gmail.com
There is a newer version of this series
src/qemu/qemu.conf.in              |  3 +-
src/qemu/qemu_cgroup.c             | 52 ++++++++++++++++++++++--------
src/qemu/qemu_cgroup.h             |  5 ++-
src/qemu/qemu_conf.c               | 14 ++++++--
src/qemu/qemu_conf.h               |  2 +-
src/qemu/qemu_driver.c             |  4 +++
src/qemu/qemu_namespace.c          | 12 +++----
src/qemu/qemu_process.c            |  6 ++--
src/qemu/test_libvirtd_qemu.aug.in |  3 +-
9 files changed, 71 insertions(+), 30 deletions(-)
[PATCH] qemu: Drop /dev/kvm from default device ACL
Posted by Praveen K Paladugu 1 week, 1 day ago
A domain that runs with TCG emulation does not need kvm device, so drop
it from default device ACL.

To dynamically add devices to defaultDeviceACL, make it a GSList. This
variable will be initialized when qemu driver is initalized.

Lastly, dynamically append /dev/kvm to default ACL only if the domain is
of type VIR_DOMAIN_VIRT_KVM.

Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com>
---
 src/qemu/qemu.conf.in              |  3 +-
 src/qemu/qemu_cgroup.c             | 52 ++++++++++++++++++++++--------
 src/qemu/qemu_cgroup.h             |  5 ++-
 src/qemu/qemu_conf.c               | 14 ++++++--
 src/qemu/qemu_conf.h               |  2 +-
 src/qemu/qemu_driver.c             |  4 +++
 src/qemu/qemu_namespace.c          | 12 +++----
 src/qemu/qemu_process.c            |  6 ++--
 src/qemu/test_libvirtd_qemu.aug.in |  3 +-
 9 files changed, 71 insertions(+), 30 deletions(-)

diff --git a/src/qemu/qemu.conf.in b/src/qemu/qemu.conf.in
index fc91ba8f08..0a8abd9544 100644
--- a/src/qemu/qemu.conf.in
+++ b/src/qemu/qemu.conf.in
@@ -618,8 +618,7 @@
 #cgroup_device_acl = [
 #    "/dev/null", "/dev/full", "/dev/zero",
 #    "/dev/random", "/dev/urandom",
-#    "/dev/ptmx", "/dev/kvm",
-#    "/dev/userfaultfd"
+#    "/dev/ptmx", "/dev/userfaultfd"
 #]
 #
 # RDMA migration requires the following extra files to be added to the list:
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index f10976c2b0..b2dcefd81e 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -38,17 +38,38 @@
 
 VIR_LOG_INIT("qemu.qemu_cgroup");
 
-const char *const defaultDeviceACL[] = {
+GSList *defaultDeviceACL;
+
+const char *const _defaultDeviceACL[] = {
     "/dev/null", "/dev/full", "/dev/zero",
     "/dev/random", "/dev/urandom",
-    "/dev/ptmx", "/dev/kvm",
-    "/dev/userfaultfd",
+    "/dev/ptmx", "/dev/userfaultfd",
     NULL,
 };
 #define DEVICE_PTY_MAJOR 136
 #define DEVICE_SND_MAJOR 116
 
 
+void
+initDefaultDeviceACL(void)
+{
+    size_t i;
+
+    for (i = 0; _defaultDeviceACL[i] != NULL; i++) {
+        defaultDeviceACL = g_slist_append(defaultDeviceACL,
+                                            g_strdup(_defaultDeviceACL[i]));
+    }
+}
+
+void
+updateDefaultDeviceACL(virDomainObj *vm)
+{
+    if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
+        defaultDeviceACL = g_slist_append(defaultDeviceACL,
+                                                g_strdup("/dev/kvm"));
+    }
+}
+
 static int
 qemuCgroupAllowDevicePath(virDomainObj *vm,
                           const char *path,
@@ -71,19 +92,19 @@ qemuCgroupAllowDevicePath(virDomainObj *vm,
 
 static int
 qemuCgroupAllowDevicesPaths(virDomainObj *vm,
-                            const char *const *deviceACL,
+                            GSList *deviceACL,
                             int perms,
                             bool ignoreEacces)
 {
-    size_t i;
+    GSList *cur = NULL;
 
-    for (i = 0; deviceACL[i] != NULL; i++) {
-        if (!virFileExists(deviceACL[i])) {
-            VIR_DEBUG("Ignoring non-existent device %s", deviceACL[i]);
+    for (cur = deviceACL; cur; cur = g_slist_next(cur)) {
+        if (!virFileExists(cur->data)) {
+            VIR_DEBUG("Ignoring non-existent device %s", (char *)cur->data);
             continue;
         }
 
-        if (qemuCgroupAllowDevicePath(vm, deviceACL[i], perms, ignoreEacces) < 0)
+        if (qemuCgroupAllowDevicePath(vm, cur->data, perms, ignoreEacces) < 0)
             return -1;
     }
 
@@ -99,13 +120,13 @@ qemuCgroupDenyDevicePath(virDomainObj *vm,
 {
     qemuDomainObjPrivate *priv = vm->privateData;
     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
-    const char *const *deviceACL = (const char *const *)cfg->cgroupDeviceACL;
     int ret;
+    GSList *deviceACL = cfg->cgroupDeviceACL;
 
     if (!deviceACL)
         deviceACL = defaultDeviceACL;
 
-    if (g_strv_contains(deviceACL, path)) {
+    if (g_slist_find(deviceACL, path)) {
         VIR_DEBUG("Skipping deny of path %s in CGroups because it's in cgroupDeviceACL",
                   path);
         return 0;
@@ -556,8 +577,11 @@ qemuSetupMemoryDevicesCgroup(virDomainObj *vm,
                              virDomainMemoryDef *mem)
 {
     qemuDomainObjPrivate *priv = vm->privateData;
-    const char *const sgxPaths[] = { QEMU_DEV_SGX_VEPVC,
-                                     QEMU_DEV_SGX_PROVISION, NULL };
+    g_autoptr(virGSListString) sgxPaths = NULL;
+
+    sgxPaths = g_slist_append(sgxPaths, g_strdup(QEMU_DEV_SGX_VEPVC));
+    sgxPaths = g_slist_append(sgxPaths, g_strdup(QEMU_DEV_SGX_PROVISION));
+    sgxPaths = g_slist_append(sgxPaths, NULL);
 
     if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
         return 0;
@@ -758,7 +782,7 @@ qemuSetupDevicesCgroup(virDomainObj *vm)
 {
     qemuDomainObjPrivate *priv = vm->privateData;
     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
-    const char *const *deviceACL = (const char *const *) cfg->cgroupDeviceACL;
+    GSList *deviceACL = cfg->cgroupDeviceACL;
     int rv = -1;
     size_t i;
 
diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h
index 3668034cde..402120a8f2 100644
--- a/src/qemu/qemu_cgroup.h
+++ b/src/qemu/qemu_cgroup.h
@@ -66,4 +66,7 @@ struct _qemuCgroupEmulatorAllNodesData {
     char *emulatorMemMask;
 };
 
-extern const char *const defaultDeviceACL[];
+void updateDefaultDeviceACL(virDomainObj *vm);
+void initDefaultDeviceACL(void);
+
+extern GSList *defaultDeviceACL;
diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c
index 242955200a..a19a86cd70 100644
--- a/src/qemu/qemu_conf.c
+++ b/src/qemu/qemu_conf.c
@@ -345,7 +345,8 @@ static void virQEMUDriverConfigDispose(void *obj)
 
     virBitmapFree(cfg->namespaces);
 
-    g_strfreev(cfg->cgroupDeviceACL);
+    g_slist_free(cfg->cgroupDeviceACL);
+    cfg->cgroupDeviceACL = NULL;
     g_free(cfg->uri);
 
     g_free(cfg->configBaseDir);
@@ -1068,6 +1069,7 @@ virQEMUDriverConfigLoadSecurityEntry(virQEMUDriverConfig *cfg,
     g_auto(GStrv) namespaces = NULL;
     g_autofree char *user = NULL;
     g_autofree char *group = NULL;
+    char **cgroupDeviceACL = NULL;
     size_t i, j;
 
     if (virConfGetValueStringList(conf, "security_driver", true, &cfg->securityDriverNames) < 0)
@@ -1125,9 +1127,17 @@ virQEMUDriverConfigLoadSecurityEntry(virQEMUDriverConfig *cfg,
     }
 
     if (virConfGetValueStringList(conf, "cgroup_device_acl", false,
-                                  &cfg->cgroupDeviceACL) < 0)
+                                  &cgroupDeviceACL) < 0)
         return -1;
 
+    if (cgroupDeviceACL) {
+        for (i = 0; cgroupDeviceACL[i] != NULL; i++) {
+            cfg->cgroupDeviceACL = g_slist_append(cfg->cgroupDeviceACL,
+                                                  g_strdup(cgroupDeviceACL[i]));
+        }
+        g_strfreev(cgroupDeviceACL);
+    }
+
     if (virConfGetValueInt(conf, "seccomp_sandbox", &cfg->seccompSandbox) < 0)
         return -1;
 
diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h
index edb65c99f4..bef198c2c8 100644
--- a/src/qemu/qemu_conf.h
+++ b/src/qemu/qemu_conf.h
@@ -96,7 +96,7 @@ struct _virQEMUDriverConfig {
     bool rememberOwner;
 
     int cgroupControllers;
-    char **cgroupDeviceACL;
+    GSList *cgroupDeviceACL;
 
     /* These five directories are ones libvirtd uses (so must be root:root
      * to avoid security risk from QEMU processes */
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index ac72ea5cb0..a5fff3dfb1 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -52,6 +52,7 @@
 #include "qemu_saveimage.h"
 #include "qemu_snapshot.h"
 #include "qemu_validate.h"
+#include "qemu_cgroup.h"
 
 #include "virerror.h"
 #include "virlog.h"
@@ -910,6 +911,8 @@ qemuStateInitialize(bool privileged,
     };
     virDomainDriverAutoStart(qemu_driver->domains, &autostartCfg);
 
+    initDefaultDeviceACL();
+
     return VIR_DRV_STATE_INIT_COMPLETE;
 
  error:
@@ -1037,6 +1040,7 @@ qemuStateCleanup(void)
     if (qemu_driver->lockFD != -1)
         virPidFileRelease(qemu_driver->config->stateDir, "driver", qemu_driver->lockFD);
 
+    g_slist_free(defaultDeviceACL);
     virObjectUnref(qemu_driver->config);
     virMutexDestroy(&qemu_driver->lock);
     VIR_FREE(qemu_driver);
diff --git a/src/qemu/qemu_namespace.c b/src/qemu/qemu_namespace.c
index f72da83929..74e2730d2d 100644
--- a/src/qemu/qemu_namespace.c
+++ b/src/qemu/qemu_namespace.c
@@ -212,14 +212,14 @@ static int
 qemuDomainPopulateDevices(virQEMUDriverConfig *cfg,
                           GSList **paths)
 {
-    const char *const *devices = (const char *const *) cfg->cgroupDeviceACL;
-    size_t i;
+    GSList *devices = cfg->cgroupDeviceACL;
+    GSList *cur = NULL;
 
     if (!devices)
         devices = defaultDeviceACL;
 
-    for (i = 0; devices[i]; i++) {
-        *paths = g_slist_prepend(*paths, g_strdup(devices[i]));
+    for (cur = devices; cur; cur = g_slist_next(cur)) {
+        *paths = g_slist_prepend(*paths, g_strdup(cur->data));
     }
 
     return 0;
@@ -1459,7 +1459,7 @@ qemuNamespaceUnlinkPaths(virDomainObj *vm,
         if (STRPREFIX(path, QEMU_DEVPREFIX)) {
             GStrv mount;
             bool inSubmount = false;
-            const char *const *devices = (const char *const *)cfg->cgroupDeviceACL;
+            GSList *devices = cfg->cgroupDeviceACL;
 
             for (mount = devMountsPath; *mount; mount++) {
                 if (STREQ(*mount, "/dev"))
@@ -1477,7 +1477,7 @@ qemuNamespaceUnlinkPaths(virDomainObj *vm,
             if (!devices)
                 devices = defaultDeviceACL;
 
-            if (g_strv_contains(devices, path))
+            if (g_slist_find(devices, path))
                 continue;
 
             unlinkPaths = g_slist_prepend(unlinkPaths, g_strdup(path));
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 9926998f85..d3a78266ef 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -3038,7 +3038,7 @@ qemuProcessAllowPostCopyMigration(virDomainObj *vm)
     qemuDomainObjPrivate *priv = vm->privateData;
     virQEMUDriver *driver = priv->driver;
     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
-    const char *const *devices = (const char *const *) cfg->cgroupDeviceACL;
+    GSList *devices = cfg->cgroupDeviceACL;
     const char *uffd = "/dev/userfaultfd";
     int rc;
 
@@ -3050,7 +3050,7 @@ qemuProcessAllowPostCopyMigration(virDomainObj *vm)
     if (!devices)
         devices = defaultDeviceACL;
 
-    if (!g_strv_contains(devices, uffd)) {
+    if (!g_slist_find(devices, uffd)) {
         VIR_DEBUG("%s is not allowed by device ACL", uffd);
         return 0;
     }
@@ -8193,6 +8193,8 @@ qemuProcessLaunch(virConnectPtr conn,
         goto cleanup;
     }
 
+    updateDefaultDeviceACL(vm);
+
     VIR_DEBUG("Building domain mount namespace (if required)");
     if (qemuDomainBuildNamespace(cfg, vm) < 0)
         goto cleanup;
diff --git a/src/qemu/test_libvirtd_qemu.aug.in b/src/qemu/test_libvirtd_qemu.aug.in
index 90012b3f52..82cfec3b4b 100644
--- a/src/qemu/test_libvirtd_qemu.aug.in
+++ b/src/qemu/test_libvirtd_qemu.aug.in
@@ -76,8 +76,7 @@ module Test_libvirtd_qemu =
     { "4" = "/dev/random" }
     { "5" = "/dev/urandom" }
     { "6" = "/dev/ptmx" }
-    { "7" = "/dev/kvm" }
-    { "8" = "/dev/userfaultfd" }
+    { "7" = "/dev/userfaultfd" }
 }
 { "save_image_format" = "raw" }
 { "dump_image_format" = "raw" }
-- 
2.51.0
Re: [PATCH] qemu: Drop /dev/kvm from default device ACL
Posted by Michal Prívozník via Devel 5 days, 15 hours ago
On 10/17/25 18:54, Praveen K Paladugu wrote:
> A domain that runs with TCG emulation does not need kvm device, so drop
> it from default device ACL.
> 
> To dynamically add devices to defaultDeviceACL, make it a GSList. This
> variable will be initialized when qemu driver is initalized.
> 
> Lastly, dynamically append /dev/kvm to default ACL only if the domain is
> of type VIR_DOMAIN_VIRT_KVM.
> 
> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com>
> ---
>  src/qemu/qemu.conf.in              |  3 +-
>  src/qemu/qemu_cgroup.c             | 52 ++++++++++++++++++++++--------
>  src/qemu/qemu_cgroup.h             |  5 ++-
>  src/qemu/qemu_conf.c               | 14 ++++++--
>  src/qemu/qemu_conf.h               |  2 +-
>  src/qemu/qemu_driver.c             |  4 +++
>  src/qemu/qemu_namespace.c          | 12 +++----
>  src/qemu/qemu_process.c            |  6 ++--
>  src/qemu/test_libvirtd_qemu.aug.in |  3 +-
>  9 files changed, 71 insertions(+), 30 deletions(-)
> 
> diff --git a/src/qemu/qemu.conf.in b/src/qemu/qemu.conf.in
> index fc91ba8f08..0a8abd9544 100644
> --- a/src/qemu/qemu.conf.in
> +++ b/src/qemu/qemu.conf.in
> @@ -618,8 +618,7 @@
>  #cgroup_device_acl = [
>  #    "/dev/null", "/dev/full", "/dev/zero",
>  #    "/dev/random", "/dev/urandom",
> -#    "/dev/ptmx", "/dev/kvm",
> -#    "/dev/userfaultfd"
> +#    "/dev/ptmx", "/dev/userfaultfd"
>  #]
>  #
>  # RDMA migration requires the following extra files to be added to the list:
> diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
> index f10976c2b0..b2dcefd81e 100644
> --- a/src/qemu/qemu_cgroup.c
> +++ b/src/qemu/qemu_cgroup.c
> @@ -38,17 +38,38 @@
>  
>  VIR_LOG_INIT("qemu.qemu_cgroup");
>  
> -const char *const defaultDeviceACL[] = {
> +GSList *defaultDeviceACL;
> +
> +const char *const _defaultDeviceACL[] = {
>      "/dev/null", "/dev/full", "/dev/zero",
>      "/dev/random", "/dev/urandom",
> -    "/dev/ptmx", "/dev/kvm",
> -    "/dev/userfaultfd",
> +    "/dev/ptmx", "/dev/userfaultfd",
>      NULL,
>  };
>  #define DEVICE_PTY_MAJOR 136
>  #define DEVICE_SND_MAJOR 116
>  
>  
> +void
> +initDefaultDeviceACL(void)
> +{
> +    size_t i;
> +
> +    for (i = 0; _defaultDeviceACL[i] != NULL; i++) {
> +        defaultDeviceACL = g_slist_append(defaultDeviceACL,
> +                                            g_strdup(_defaultDeviceACL[i]));
> +    }
> +}
> +
> +void
> +updateDefaultDeviceACL(virDomainObj *vm)
> +{
> +    if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
> +        defaultDeviceACL = g_slist_append(defaultDeviceACL,
> +                                                g_strdup("/dev/kvm"));
> +    }
> +}

So if this function is called multiple times then "/dev/kvm" will appear
multiple times on the list. Worse, if the first VM is of KVM type, then
/dev/kvm is added onto the list and the accelerator of the second one is
irrelevant as /dev/kvm will be allowed in its devices CGroup controller.

What you want to do is:

1) remove "/dev/kvm" from the defaultDeviceACL array,
2) audit and fix all users of defaultDeviceACL and let them act
   accordingly if domain is of KVM type. For instance,
   qemuSetupDevicesCgroup() will need to have something like:

qemuSetupDevicesCgroup(vm)
{
  ...
  qemuCgroupAllowDevicesPaths(vm, deviceACL, VIR_CGROUP_DEVICE_RW, false);

  if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
    qemuCgroupAllowDevicePath(vm, "/dev/kvm", VIR_CGROUP_DEVICE_RW, false);
  }
  ...
}

/* Modulo error checking, code fromatting, etc. */ 

> +
>  static int
>  qemuCgroupAllowDevicePath(virDomainObj *vm,
>                            const char *path,
> @@ -71,19 +92,19 @@ qemuCgroupAllowDevicePath(virDomainObj *vm,
>  
>  static int
>  qemuCgroupAllowDevicesPaths(virDomainObj *vm,
> -                            const char *const *deviceACL,
> +                            GSList *deviceACL,
>                              int perms,
>                              bool ignoreEacces)
>  {
> -    size_t i;
> +    GSList *cur = NULL;
>  
> -    for (i = 0; deviceACL[i] != NULL; i++) {
> -        if (!virFileExists(deviceACL[i])) {
> -            VIR_DEBUG("Ignoring non-existent device %s", deviceACL[i]);
> +    for (cur = deviceACL; cur; cur = g_slist_next(cur)) {
> +        if (!virFileExists(cur->data)) {
> +            VIR_DEBUG("Ignoring non-existent device %s", (char *)cur->data);
>              continue;
>          }
>  
> -        if (qemuCgroupAllowDevicePath(vm, deviceACL[i], perms, ignoreEacces) < 0)
> +        if (qemuCgroupAllowDevicePath(vm, cur->data, perms, ignoreEacces) < 0)
>              return -1;
>      }
>  
> @@ -99,13 +120,13 @@ qemuCgroupDenyDevicePath(virDomainObj *vm,
>  {
>      qemuDomainObjPrivate *priv = vm->privateData;
>      g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
> -    const char *const *deviceACL = (const char *const *)cfg->cgroupDeviceACL;
>      int ret;
> +    GSList *deviceACL = cfg->cgroupDeviceACL;
>  
>      if (!deviceACL)
>          deviceACL = defaultDeviceACL;
>  
> -    if (g_strv_contains(deviceACL, path)) {
> +    if (g_slist_find(deviceACL, path)) {
>          VIR_DEBUG("Skipping deny of path %s in CGroups because it's in cgroupDeviceACL",
>                    path);
>          return 0;
> @@ -556,8 +577,11 @@ qemuSetupMemoryDevicesCgroup(virDomainObj *vm,
>                               virDomainMemoryDef *mem)
>  {
>      qemuDomainObjPrivate *priv = vm->privateData;
> -    const char *const sgxPaths[] = { QEMU_DEV_SGX_VEPVC,
> -                                     QEMU_DEV_SGX_PROVISION, NULL };
> +    g_autoptr(virGSListString) sgxPaths = NULL;
> +
> +    sgxPaths = g_slist_append(sgxPaths, g_strdup(QEMU_DEV_SGX_VEPVC));
> +    sgxPaths = g_slist_append(sgxPaths, g_strdup(QEMU_DEV_SGX_PROVISION));
> +    sgxPaths = g_slist_append(sgxPaths, NULL);
>  
>      if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
>          return 0;

This change is unrelated (and needless anyway). With current code, the
function requires 3 pointers on stack, with your change, 1 pointer is
required on stack and 6 pointers on the heap and two strdup()-s. IOW at
least two times more space.


Michal
Re: [PATCH] qemu: Drop /dev/kvm from default device ACL
Posted by Daniel P. Berrangé via Devel 5 days, 13 hours ago
On Mon, Oct 20, 2025 at 03:07:34PM +0200, Michal Prívozník via Devel wrote:
> On 10/17/25 18:54, Praveen K Paladugu wrote:
> > A domain that runs with TCG emulation does not need kvm device, so drop
> > it from default device ACL.
> > 
> > To dynamically add devices to defaultDeviceACL, make it a GSList. This
> > variable will be initialized when qemu driver is initalized.
> > 
> > Lastly, dynamically append /dev/kvm to default ACL only if the domain is
> > of type VIR_DOMAIN_VIRT_KVM.
> > 
> > Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com>
> > ---
> >  src/qemu/qemu.conf.in              |  3 +-
> >  src/qemu/qemu_cgroup.c             | 52 ++++++++++++++++++++++--------
> >  src/qemu/qemu_cgroup.h             |  5 ++-
> >  src/qemu/qemu_conf.c               | 14 ++++++--
> >  src/qemu/qemu_conf.h               |  2 +-
> >  src/qemu/qemu_driver.c             |  4 +++
> >  src/qemu/qemu_namespace.c          | 12 +++----
> >  src/qemu/qemu_process.c            |  6 ++--
> >  src/qemu/test_libvirtd_qemu.aug.in |  3 +-
> >  9 files changed, 71 insertions(+), 30 deletions(-)
> > 
> > diff --git a/src/qemu/qemu.conf.in b/src/qemu/qemu.conf.in
> > index fc91ba8f08..0a8abd9544 100644
> > --- a/src/qemu/qemu.conf.in
> > +++ b/src/qemu/qemu.conf.in
> > @@ -618,8 +618,7 @@
> >  #cgroup_device_acl = [
> >  #    "/dev/null", "/dev/full", "/dev/zero",
> >  #    "/dev/random", "/dev/urandom",
> > -#    "/dev/ptmx", "/dev/kvm",
> > -#    "/dev/userfaultfd"
> > +#    "/dev/ptmx", "/dev/userfaultfd"
> >  #]
> >  #
> >  # RDMA migration requires the following extra files to be added to the list:
> > diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
> > index f10976c2b0..b2dcefd81e 100644
> > --- a/src/qemu/qemu_cgroup.c
> > +++ b/src/qemu/qemu_cgroup.c
> > @@ -38,17 +38,38 @@
> >  
> >  VIR_LOG_INIT("qemu.qemu_cgroup");
> >  
> > -const char *const defaultDeviceACL[] = {
> > +GSList *defaultDeviceACL;
> > +
> > +const char *const _defaultDeviceACL[] = {
> >      "/dev/null", "/dev/full", "/dev/zero",
> >      "/dev/random", "/dev/urandom",
> > -    "/dev/ptmx", "/dev/kvm",
> > -    "/dev/userfaultfd",
> > +    "/dev/ptmx", "/dev/userfaultfd",
> >      NULL,
> >  };
> >  #define DEVICE_PTY_MAJOR 136
> >  #define DEVICE_SND_MAJOR 116
> >  
> >  
> > +void
> > +initDefaultDeviceACL(void)
> > +{
> > +    size_t i;
> > +
> > +    for (i = 0; _defaultDeviceACL[i] != NULL; i++) {
> > +        defaultDeviceACL = g_slist_append(defaultDeviceACL,
> > +                                            g_strdup(_defaultDeviceACL[i]));
> > +    }
> > +}
> > +
> > +void
> > +updateDefaultDeviceACL(virDomainObj *vm)
> > +{
> > +    if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
> > +        defaultDeviceACL = g_slist_append(defaultDeviceACL,
> > +                                                g_strdup("/dev/kvm"));
> > +    }
> > +}
> 
> So if this function is called multiple times then "/dev/kvm" will appear
> multiple times on the list. Worse, if the first VM is of KVM type, then
> /dev/kvm is added onto the list and the accelerator of the second one is
> irrelevant as /dev/kvm will be allowed in its devices CGroup controller.
> 
> What you want to do is:
> 
> 1) remove "/dev/kvm" from the defaultDeviceACL array,
> 2) audit and fix all users of defaultDeviceACL and let them act
>    accordingly if domain is of KVM type. For instance,
>    qemuSetupDevicesCgroup() will need to have something like:
> 
> qemuSetupDevicesCgroup(vm)
> {
>   ...
>   qemuCgroupAllowDevicesPaths(vm, deviceACL, VIR_CGROUP_DEVICE_RW, false);
> 
>   if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
>     qemuCgroupAllowDevicePath(vm, "/dev/kvm", VIR_CGROUP_DEVICE_RW, false);
>   }

These 3 lines of code should simply be inside qemuCgroupAllowDevicesPaths,
either before/after the for() loop.

Then no code needs changing beyond removing /dev/kvm from the device
default list config.

>   ...
> }


With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

Re: [PATCH] qemu: Drop /dev/kvm from default device ACL
Posted by Michal Prívozník via Devel 4 days, 20 hours ago
On 10/20/25 16:55, Daniel P. Berrangé wrote:
> On Mon, Oct 20, 2025 at 03:07:34PM +0200, Michal Prívozník via Devel wrote:
>> On 10/17/25 18:54, Praveen K Paladugu wrote:
>>> A domain that runs with TCG emulation does not need kvm device, so drop
>>> it from default device ACL.
>>>

>> So if this function is called multiple times then "/dev/kvm" will appear
>> multiple times on the list. Worse, if the first VM is of KVM type, then
>> /dev/kvm is added onto the list and the accelerator of the second one is
>> irrelevant as /dev/kvm will be allowed in its devices CGroup controller.
>>
>> What you want to do is:
>>
>> 1) remove "/dev/kvm" from the defaultDeviceACL array,
>> 2) audit and fix all users of defaultDeviceACL and let them act
>>    accordingly if domain is of KVM type. For instance,
>>    qemuSetupDevicesCgroup() will need to have something like:
>>
>> qemuSetupDevicesCgroup(vm)
>> {
>>   ...
>>   qemuCgroupAllowDevicesPaths(vm, deviceACL, VIR_CGROUP_DEVICE_RW, false);
>>
>>   if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
>>     qemuCgroupAllowDevicePath(vm, "/dev/kvm", VIR_CGROUP_DEVICE_RW, false);
>>   }
> 
> These 3 lines of code should simply be inside qemuCgroupAllowDevicesPaths,
> either before/after the for() loop.

I don't think this is a good idea. I mean, qemuCgroupAllowDevicesPaths()
is basically the same as qemuCgroupAllowDevicePath(), i.e. qemu
specific, but still generic wrapper for allowing a single path in a
CGroup controller, or in this case an array of paths. It's called
whenever two or more devices need to be allowed: see
qemuSetupMemoryDevicesCgroup().

Since allowing /dev/kvm depends on the domain configuration, just like
other devices (say disks, hostdevs, etc.) I see qemuSetupDevicesCgroup()
more suitable.

Michal

Re: [PATCH] qemu: Drop /dev/kvm from default device ACL
Posted by Praveen K Paladugu 3 days, 12 hours ago

On 10/21/2025 3:39 AM, Michal Prívozník wrote:
> On 10/20/25 16:55, Daniel P. Berrangé wrote:
>> On Mon, Oct 20, 2025 at 03:07:34PM +0200, Michal Prívozník via Devel wrote:
>>> On 10/17/25 18:54, Praveen K Paladugu wrote:
>>>> A domain that runs with TCG emulation does not need kvm device, so drop
>>>> it from default device ACL.
>>>>
> 
>>> So if this function is called multiple times then "/dev/kvm" will appear
>>> multiple times on the list. Worse, if the first VM is of KVM type, then
>>> /dev/kvm is added onto the list and the accelerator of the second one is
>>> irrelevant as /dev/kvm will be allowed in its devices CGroup controller.
>>>
>>> What you want to do is:
>>>
>>> 1) remove "/dev/kvm" from the defaultDeviceACL array,
>>> 2) audit and fix all users of defaultDeviceACL and let them act
>>>     accordingly if domain is of KVM type. For instance,
>>>     qemuSetupDevicesCgroup() will need to have something like:
>>>
>>> qemuSetupDevicesCgroup(vm)
>>> {
>>>    ...
>>>    qemuCgroupAllowDevicesPaths(vm, deviceACL, VIR_CGROUP_DEVICE_RW, false);
>>>
>>>    if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
>>>      qemuCgroupAllowDevicePath(vm, "/dev/kvm", VIR_CGROUP_DEVICE_RW, false);
>>>    }
>>
>> These 3 lines of code should simply be inside qemuCgroupAllowDevicesPaths,
>> either before/after the for() loop.
> 
> I don't think this is a good idea. I mean, qemuCgroupAllowDevicesPaths()
> is basically the same as qemuCgroupAllowDevicePath(), i.e. qemu
> specific, but still generic wrapper for allowing a single path in a
> CGroup controller, or in this case an array of paths. It's called
> whenever two or more devices need to be allowed: see
> qemuSetupMemoryDevicesCgroup().
> 
> Since allowing /dev/kvm depends on the domain configuration, just like
> other devices (say disks, hostdevs, etc.) I see qemuSetupDevicesCgroup()
> more suitable.
> 
Make sense Michal. I addressed this in v3 of this patch, which I just sent.

> Michal
> 

-- 
Regards,
Praveen K Paladugu

Re: [PATCH] qemu: Drop /dev/kvm from default device ACL
Posted by Praveen K Paladugu 5 days, 13 hours ago

On 10/20/2025 9:55 AM, Daniel P. Berrangé wrote:
> On Mon, Oct 20, 2025 at 03:07:34PM +0200, Michal Prívozník via Devel wrote:
>> On 10/17/25 18:54, Praveen K Paladugu wrote:
>>> A domain that runs with TCG emulation does not need kvm device, so drop
>>> it from default device ACL.
>>>
>>> To dynamically add devices to defaultDeviceACL, make it a GSList. This
>>> variable will be initialized when qemu driver is initalized.
>>>
>>> Lastly, dynamically append /dev/kvm to default ACL only if the domain is
>>> of type VIR_DOMAIN_VIRT_KVM.
>>>
>>> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com>
>>> ---
>>>   src/qemu/qemu.conf.in              |  3 +-
>>>   src/qemu/qemu_cgroup.c             | 52 ++++++++++++++++++++++--------
>>>   src/qemu/qemu_cgroup.h             |  5 ++-
>>>   src/qemu/qemu_conf.c               | 14 ++++++--
>>>   src/qemu/qemu_conf.h               |  2 +-
>>>   src/qemu/qemu_driver.c             |  4 +++
>>>   src/qemu/qemu_namespace.c          | 12 +++----
>>>   src/qemu/qemu_process.c            |  6 ++--
>>>   src/qemu/test_libvirtd_qemu.aug.in |  3 +-
>>>   9 files changed, 71 insertions(+), 30 deletions(-)
>>>
>>> diff --git a/src/qemu/qemu.conf.in b/src/qemu/qemu.conf.in
>>> index fc91ba8f08..0a8abd9544 100644
>>> --- a/src/qemu/qemu.conf.in
>>> +++ b/src/qemu/qemu.conf.in
>>> @@ -618,8 +618,7 @@
>>>   #cgroup_device_acl = [
>>>   #    "/dev/null", "/dev/full", "/dev/zero",
>>>   #    "/dev/random", "/dev/urandom",
>>> -#    "/dev/ptmx", "/dev/kvm",
>>> -#    "/dev/userfaultfd"
>>> +#    "/dev/ptmx", "/dev/userfaultfd"
>>>   #]
>>>   #
>>>   # RDMA migration requires the following extra files to be added to the list:
>>> diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
>>> index f10976c2b0..b2dcefd81e 100644
>>> --- a/src/qemu/qemu_cgroup.c
>>> +++ b/src/qemu/qemu_cgroup.c
>>> @@ -38,17 +38,38 @@
>>>   
>>>   VIR_LOG_INIT("qemu.qemu_cgroup");
>>>   
>>> -const char *const defaultDeviceACL[] = {
>>> +GSList *defaultDeviceACL;
>>> +
>>> +const char *const _defaultDeviceACL[] = {
>>>       "/dev/null", "/dev/full", "/dev/zero",
>>>       "/dev/random", "/dev/urandom",
>>> -    "/dev/ptmx", "/dev/kvm",
>>> -    "/dev/userfaultfd",
>>> +    "/dev/ptmx", "/dev/userfaultfd",
>>>       NULL,
>>>   };
>>>   #define DEVICE_PTY_MAJOR 136
>>>   #define DEVICE_SND_MAJOR 116
>>>   
>>>   
>>> +void
>>> +initDefaultDeviceACL(void)
>>> +{
>>> +    size_t i;
>>> +
>>> +    for (i = 0; _defaultDeviceACL[i] != NULL; i++) {
>>> +        defaultDeviceACL = g_slist_append(defaultDeviceACL,
>>> +                                            g_strdup(_defaultDeviceACL[i]));
>>> +    }
>>> +}
>>> +
>>> +void
>>> +updateDefaultDeviceACL(virDomainObj *vm)
>>> +{
>>> +    if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
>>> +        defaultDeviceACL = g_slist_append(defaultDeviceACL,
>>> +                                                g_strdup("/dev/kvm"));
>>> +    }
>>> +}
>>
>> So if this function is called multiple times then "/dev/kvm" will appear
>> multiple times on the list. Worse, if the first VM is of KVM type, then
>> /dev/kvm is added onto the list and the accelerator of the second one is
>> irrelevant as /dev/kvm will be allowed in its devices CGroup controller.
>>
>> What you want to do is:
>>
>> 1) remove "/dev/kvm" from the defaultDeviceACL array,
>> 2) audit and fix all users of defaultDeviceACL and let them act
>>     accordingly if domain is of KVM type. For instance,
>>     qemuSetupDevicesCgroup() will need to have something like:
>>
>> qemuSetupDevicesCgroup(vm)
>> {
>>    ...
>>    qemuCgroupAllowDevicesPaths(vm, deviceACL, VIR_CGROUP_DEVICE_RW, false);
>>
>>    if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
>>      qemuCgroupAllowDevicePath(vm, "/dev/kvm", VIR_CGROUP_DEVICE_RW, false);
>>    }
> 
> These 3 lines of code should simply be inside qemuCgroupAllowDevicesPaths,
> either before/after the for() loop.
> 
> Then no code needs changing beyond removing /dev/kvm from the device
> default list config.
> 

In my testing I discovered that I had to append /dev/kvm to *paths in 
qemuDomainPopulateDevices method while building a namespace for the domain.

Without this change the domain fails to start.


>>    ...
>> }
> 
> 
> With regards,
> Daniel

-- 
Regards,
Praveen K Paladugu

Re: [PATCH] qemu: Drop /dev/kvm from default device ACL
Posted by Praveen K Paladugu 5 days, 13 hours ago

On 10/20/2025 8:07 AM, Michal Prívozník wrote:
> On 10/17/25 18:54, Praveen K Paladugu wrote:
>> A domain that runs with TCG emulation does not need kvm device, so drop
>> it from default device ACL.
>>
>> To dynamically add devices to defaultDeviceACL, make it a GSList. This
>> variable will be initialized when qemu driver is initalized.
>>
>> Lastly, dynamically append /dev/kvm to default ACL only if the domain is
>> of type VIR_DOMAIN_VIRT_KVM.
>>
>> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com>
>> ---
>>   src/qemu/qemu.conf.in              |  3 +-
>>   src/qemu/qemu_cgroup.c             | 52 ++++++++++++++++++++++--------
>>   src/qemu/qemu_cgroup.h             |  5 ++-
>>   src/qemu/qemu_conf.c               | 14 ++++++--
>>   src/qemu/qemu_conf.h               |  2 +-
>>   src/qemu/qemu_driver.c             |  4 +++
>>   src/qemu/qemu_namespace.c          | 12 +++----
>>   src/qemu/qemu_process.c            |  6 ++--
>>   src/qemu/test_libvirtd_qemu.aug.in |  3 +-
>>   9 files changed, 71 insertions(+), 30 deletions(-)
>>
>> diff --git a/src/qemu/qemu.conf.in b/src/qemu/qemu.conf.in
>> index fc91ba8f08..0a8abd9544 100644
>> --- a/src/qemu/qemu.conf.in
>> +++ b/src/qemu/qemu.conf.in
>> @@ -618,8 +618,7 @@
>>   #cgroup_device_acl = [
>>   #    "/dev/null", "/dev/full", "/dev/zero",
>>   #    "/dev/random", "/dev/urandom",
>> -#    "/dev/ptmx", "/dev/kvm",
>> -#    "/dev/userfaultfd"
>> +#    "/dev/ptmx", "/dev/userfaultfd"
>>   #]
>>   #
>>   # RDMA migration requires the following extra files to be added to the list:
>> diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
>> index f10976c2b0..b2dcefd81e 100644
>> --- a/src/qemu/qemu_cgroup.c
>> +++ b/src/qemu/qemu_cgroup.c
>> @@ -38,17 +38,38 @@
>>   
>>   VIR_LOG_INIT("qemu.qemu_cgroup");
>>   
>> -const char *const defaultDeviceACL[] = {
>> +GSList *defaultDeviceACL;
>> +
>> +const char *const _defaultDeviceACL[] = {
>>       "/dev/null", "/dev/full", "/dev/zero",
>>       "/dev/random", "/dev/urandom",
>> -    "/dev/ptmx", "/dev/kvm",
>> -    "/dev/userfaultfd",
>> +    "/dev/ptmx", "/dev/userfaultfd",
>>       NULL,
>>   };
>>   #define DEVICE_PTY_MAJOR 136
>>   #define DEVICE_SND_MAJOR 116
>>   
>>   
>> +void
>> +initDefaultDeviceACL(void)
>> +{
>> +    size_t i;
>> +
>> +    for (i = 0; _defaultDeviceACL[i] != NULL; i++) {
>> +        defaultDeviceACL = g_slist_append(defaultDeviceACL,
>> +                                            g_strdup(_defaultDeviceACL[i]));
>> +    }
>> +}
>> +
>> +void
>> +updateDefaultDeviceACL(virDomainObj *vm)
>> +{
>> +    if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
>> +        defaultDeviceACL = g_slist_append(defaultDeviceACL,
>> +                                                g_strdup("/dev/kvm"));
>> +    }
>> +}
> 
> So if this function is called multiple times then "/dev/kvm" will appear
> multiple times on the list. Worse, if the first VM is of KVM type, then
> /dev/kvm is added onto the list and the accelerator of the second one is
> irrelevant as /dev/kvm will be allowed in its devices CGroup controller.
> 
> What you want to do is:
> 
> 1) remove "/dev/kvm" from the defaultDeviceACL array,
> 2) audit and fix all users of defaultDeviceACL and let them act
>     accordingly if domain is of KVM type. For instance,
>     qemuSetupDevicesCgroup() will need to have something like:
> 
> qemuSetupDevicesCgroup(vm)
> {
>    ...
>    qemuCgroupAllowDevicesPaths(vm, deviceACL, VIR_CGROUP_DEVICE_RW, false);
> 
>    if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
>      qemuCgroupAllowDevicePath(vm, "/dev/kvm", VIR_CGROUP_DEVICE_RW, false);
>    }
>    ...
> }
> 
> /* Modulo error checking, code fromatting, etc. */
> 
>> +
>>   static int
>>   qemuCgroupAllowDevicePath(virDomainObj *vm,
>>                             const char *path,
>> @@ -71,19 +92,19 @@ qemuCgroupAllowDevicePath(virDomainObj *vm,
>>   
>>   static int
>>   qemuCgroupAllowDevicesPaths(virDomainObj *vm,
>> -                            const char *const *deviceACL,
>> +                            GSList *deviceACL,
>>                               int perms,
>>                               bool ignoreEacces)
>>   {
>> -    size_t i;
>> +    GSList *cur = NULL;
>>   
>> -    for (i = 0; deviceACL[i] != NULL; i++) {
>> -        if (!virFileExists(deviceACL[i])) {
>> -            VIR_DEBUG("Ignoring non-existent device %s", deviceACL[i]);
>> +    for (cur = deviceACL; cur; cur = g_slist_next(cur)) {
>> +        if (!virFileExists(cur->data)) {
>> +            VIR_DEBUG("Ignoring non-existent device %s", (char *)cur->data);
>>               continue;
>>           }
>>   
>> -        if (qemuCgroupAllowDevicePath(vm, deviceACL[i], perms, ignoreEacces) < 0)
>> +        if (qemuCgroupAllowDevicePath(vm, cur->data, perms, ignoreEacces) < 0)
>>               return -1;
>>       }
>>   
>> @@ -99,13 +120,13 @@ qemuCgroupDenyDevicePath(virDomainObj *vm,
>>   {
>>       qemuDomainObjPrivate *priv = vm->privateData;
>>       g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
>> -    const char *const *deviceACL = (const char *const *)cfg->cgroupDeviceACL;
>>       int ret;
>> +    GSList *deviceACL = cfg->cgroupDeviceACL;
>>   
>>       if (!deviceACL)
>>           deviceACL = defaultDeviceACL;
>>   
>> -    if (g_strv_contains(deviceACL, path)) {
>> +    if (g_slist_find(deviceACL, path)) {
>>           VIR_DEBUG("Skipping deny of path %s in CGroups because it's in cgroupDeviceACL",
>>                     path);
>>           return 0;
>> @@ -556,8 +577,11 @@ qemuSetupMemoryDevicesCgroup(virDomainObj *vm,
>>                                virDomainMemoryDef *mem)
>>   {
>>       qemuDomainObjPrivate *priv = vm->privateData;
>> -    const char *const sgxPaths[] = { QEMU_DEV_SGX_VEPVC,
>> -                                     QEMU_DEV_SGX_PROVISION, NULL };
>> +    g_autoptr(virGSListString) sgxPaths = NULL;
>> +
>> +    sgxPaths = g_slist_append(sgxPaths, g_strdup(QEMU_DEV_SGX_VEPVC));
>> +    sgxPaths = g_slist_append(sgxPaths, g_strdup(QEMU_DEV_SGX_PROVISION));
>> +    sgxPaths = g_slist_append(sgxPaths, NULL);
>>   
>>       if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
>>           return 0;
> 
> This change is unrelated (and needless anyway). With current code, the
> function requires 3 pointers on stack, with your change, 1 pointer is
> required on stack and 6 pointers on the heap and two strdup()-s. IOW at
> least two times more space.
> 

This change was made to adopt the new definition of 
qemuCgroupAllowDevicesPaths which takes GSList* as an argument in place 
of "char *const *deviceACL".


I agree with your feedback above that using a GSList for 
defaultDeviceACL and appending to the list will have undesired effects. 
I will refactor this patch in the next revision.

> 
> Michal
> 

-- 
Regards,
Praveen K Paladugu