src/qemu/qemu.conf.in | 3 +- src/qemu/qemu_cgroup.c | 52 ++++++++++++++++++++++-------- src/qemu/qemu_cgroup.h | 5 ++- src/qemu/qemu_conf.c | 14 ++++++-- src/qemu/qemu_conf.h | 2 +- src/qemu/qemu_driver.c | 4 +++ src/qemu/qemu_namespace.c | 12 +++---- src/qemu/qemu_process.c | 6 ++-- src/qemu/test_libvirtd_qemu.aug.in | 3 +- 9 files changed, 71 insertions(+), 30 deletions(-)
A domain that runs with TCG emulation does not need kvm device, so drop
it from default device ACL.
To dynamically add devices to defaultDeviceACL, make it a GSList. This
variable will be initialized when qemu driver is initalized.
Lastly, dynamically append /dev/kvm to default ACL only if the domain is
of type VIR_DOMAIN_VIRT_KVM.
Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com>
---
src/qemu/qemu.conf.in | 3 +-
src/qemu/qemu_cgroup.c | 52 ++++++++++++++++++++++--------
src/qemu/qemu_cgroup.h | 5 ++-
src/qemu/qemu_conf.c | 14 ++++++--
src/qemu/qemu_conf.h | 2 +-
src/qemu/qemu_driver.c | 4 +++
src/qemu/qemu_namespace.c | 12 +++----
src/qemu/qemu_process.c | 6 ++--
src/qemu/test_libvirtd_qemu.aug.in | 3 +-
9 files changed, 71 insertions(+), 30 deletions(-)
diff --git a/src/qemu/qemu.conf.in b/src/qemu/qemu.conf.in
index fc91ba8f08..0a8abd9544 100644
--- a/src/qemu/qemu.conf.in
+++ b/src/qemu/qemu.conf.in
@@ -618,8 +618,7 @@
#cgroup_device_acl = [
# "/dev/null", "/dev/full", "/dev/zero",
# "/dev/random", "/dev/urandom",
-# "/dev/ptmx", "/dev/kvm",
-# "/dev/userfaultfd"
+# "/dev/ptmx", "/dev/userfaultfd"
#]
#
# RDMA migration requires the following extra files to be added to the list:
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index f10976c2b0..b2dcefd81e 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -38,17 +38,38 @@
VIR_LOG_INIT("qemu.qemu_cgroup");
-const char *const defaultDeviceACL[] = {
+GSList *defaultDeviceACL;
+
+const char *const _defaultDeviceACL[] = {
"/dev/null", "/dev/full", "/dev/zero",
"/dev/random", "/dev/urandom",
- "/dev/ptmx", "/dev/kvm",
- "/dev/userfaultfd",
+ "/dev/ptmx", "/dev/userfaultfd",
NULL,
};
#define DEVICE_PTY_MAJOR 136
#define DEVICE_SND_MAJOR 116
+void
+initDefaultDeviceACL(void)
+{
+ size_t i;
+
+ for (i = 0; _defaultDeviceACL[i] != NULL; i++) {
+ defaultDeviceACL = g_slist_append(defaultDeviceACL,
+ g_strdup(_defaultDeviceACL[i]));
+ }
+}
+
+void
+updateDefaultDeviceACL(virDomainObj *vm)
+{
+ if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
+ defaultDeviceACL = g_slist_append(defaultDeviceACL,
+ g_strdup("/dev/kvm"));
+ }
+}
+
static int
qemuCgroupAllowDevicePath(virDomainObj *vm,
const char *path,
@@ -71,19 +92,19 @@ qemuCgroupAllowDevicePath(virDomainObj *vm,
static int
qemuCgroupAllowDevicesPaths(virDomainObj *vm,
- const char *const *deviceACL,
+ GSList *deviceACL,
int perms,
bool ignoreEacces)
{
- size_t i;
+ GSList *cur = NULL;
- for (i = 0; deviceACL[i] != NULL; i++) {
- if (!virFileExists(deviceACL[i])) {
- VIR_DEBUG("Ignoring non-existent device %s", deviceACL[i]);
+ for (cur = deviceACL; cur; cur = g_slist_next(cur)) {
+ if (!virFileExists(cur->data)) {
+ VIR_DEBUG("Ignoring non-existent device %s", (char *)cur->data);
continue;
}
- if (qemuCgroupAllowDevicePath(vm, deviceACL[i], perms, ignoreEacces) < 0)
+ if (qemuCgroupAllowDevicePath(vm, cur->data, perms, ignoreEacces) < 0)
return -1;
}
@@ -99,13 +120,13 @@ qemuCgroupDenyDevicePath(virDomainObj *vm,
{
qemuDomainObjPrivate *priv = vm->privateData;
g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
- const char *const *deviceACL = (const char *const *)cfg->cgroupDeviceACL;
int ret;
+ GSList *deviceACL = cfg->cgroupDeviceACL;
if (!deviceACL)
deviceACL = defaultDeviceACL;
- if (g_strv_contains(deviceACL, path)) {
+ if (g_slist_find(deviceACL, path)) {
VIR_DEBUG("Skipping deny of path %s in CGroups because it's in cgroupDeviceACL",
path);
return 0;
@@ -556,8 +577,11 @@ qemuSetupMemoryDevicesCgroup(virDomainObj *vm,
virDomainMemoryDef *mem)
{
qemuDomainObjPrivate *priv = vm->privateData;
- const char *const sgxPaths[] = { QEMU_DEV_SGX_VEPVC,
- QEMU_DEV_SGX_PROVISION, NULL };
+ g_autoptr(virGSListString) sgxPaths = NULL;
+
+ sgxPaths = g_slist_append(sgxPaths, g_strdup(QEMU_DEV_SGX_VEPVC));
+ sgxPaths = g_slist_append(sgxPaths, g_strdup(QEMU_DEV_SGX_PROVISION));
+ sgxPaths = g_slist_append(sgxPaths, NULL);
if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
return 0;
@@ -758,7 +782,7 @@ qemuSetupDevicesCgroup(virDomainObj *vm)
{
qemuDomainObjPrivate *priv = vm->privateData;
g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
- const char *const *deviceACL = (const char *const *) cfg->cgroupDeviceACL;
+ GSList *deviceACL = cfg->cgroupDeviceACL;
int rv = -1;
size_t i;
diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h
index 3668034cde..402120a8f2 100644
--- a/src/qemu/qemu_cgroup.h
+++ b/src/qemu/qemu_cgroup.h
@@ -66,4 +66,7 @@ struct _qemuCgroupEmulatorAllNodesData {
char *emulatorMemMask;
};
-extern const char *const defaultDeviceACL[];
+void updateDefaultDeviceACL(virDomainObj *vm);
+void initDefaultDeviceACL(void);
+
+extern GSList *defaultDeviceACL;
diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c
index 242955200a..a19a86cd70 100644
--- a/src/qemu/qemu_conf.c
+++ b/src/qemu/qemu_conf.c
@@ -345,7 +345,8 @@ static void virQEMUDriverConfigDispose(void *obj)
virBitmapFree(cfg->namespaces);
- g_strfreev(cfg->cgroupDeviceACL);
+ g_slist_free(cfg->cgroupDeviceACL);
+ cfg->cgroupDeviceACL = NULL;
g_free(cfg->uri);
g_free(cfg->configBaseDir);
@@ -1068,6 +1069,7 @@ virQEMUDriverConfigLoadSecurityEntry(virQEMUDriverConfig *cfg,
g_auto(GStrv) namespaces = NULL;
g_autofree char *user = NULL;
g_autofree char *group = NULL;
+ char **cgroupDeviceACL = NULL;
size_t i, j;
if (virConfGetValueStringList(conf, "security_driver", true, &cfg->securityDriverNames) < 0)
@@ -1125,9 +1127,17 @@ virQEMUDriverConfigLoadSecurityEntry(virQEMUDriverConfig *cfg,
}
if (virConfGetValueStringList(conf, "cgroup_device_acl", false,
- &cfg->cgroupDeviceACL) < 0)
+ &cgroupDeviceACL) < 0)
return -1;
+ if (cgroupDeviceACL) {
+ for (i = 0; cgroupDeviceACL[i] != NULL; i++) {
+ cfg->cgroupDeviceACL = g_slist_append(cfg->cgroupDeviceACL,
+ g_strdup(cgroupDeviceACL[i]));
+ }
+ g_strfreev(cgroupDeviceACL);
+ }
+
if (virConfGetValueInt(conf, "seccomp_sandbox", &cfg->seccompSandbox) < 0)
return -1;
diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h
index edb65c99f4..bef198c2c8 100644
--- a/src/qemu/qemu_conf.h
+++ b/src/qemu/qemu_conf.h
@@ -96,7 +96,7 @@ struct _virQEMUDriverConfig {
bool rememberOwner;
int cgroupControllers;
- char **cgroupDeviceACL;
+ GSList *cgroupDeviceACL;
/* These five directories are ones libvirtd uses (so must be root:root
* to avoid security risk from QEMU processes */
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index ac72ea5cb0..a5fff3dfb1 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -52,6 +52,7 @@
#include "qemu_saveimage.h"
#include "qemu_snapshot.h"
#include "qemu_validate.h"
+#include "qemu_cgroup.h"
#include "virerror.h"
#include "virlog.h"
@@ -910,6 +911,8 @@ qemuStateInitialize(bool privileged,
};
virDomainDriverAutoStart(qemu_driver->domains, &autostartCfg);
+ initDefaultDeviceACL();
+
return VIR_DRV_STATE_INIT_COMPLETE;
error:
@@ -1037,6 +1040,7 @@ qemuStateCleanup(void)
if (qemu_driver->lockFD != -1)
virPidFileRelease(qemu_driver->config->stateDir, "driver", qemu_driver->lockFD);
+ g_slist_free(defaultDeviceACL);
virObjectUnref(qemu_driver->config);
virMutexDestroy(&qemu_driver->lock);
VIR_FREE(qemu_driver);
diff --git a/src/qemu/qemu_namespace.c b/src/qemu/qemu_namespace.c
index f72da83929..74e2730d2d 100644
--- a/src/qemu/qemu_namespace.c
+++ b/src/qemu/qemu_namespace.c
@@ -212,14 +212,14 @@ static int
qemuDomainPopulateDevices(virQEMUDriverConfig *cfg,
GSList **paths)
{
- const char *const *devices = (const char *const *) cfg->cgroupDeviceACL;
- size_t i;
+ GSList *devices = cfg->cgroupDeviceACL;
+ GSList *cur = NULL;
if (!devices)
devices = defaultDeviceACL;
- for (i = 0; devices[i]; i++) {
- *paths = g_slist_prepend(*paths, g_strdup(devices[i]));
+ for (cur = devices; cur; cur = g_slist_next(cur)) {
+ *paths = g_slist_prepend(*paths, g_strdup(cur->data));
}
return 0;
@@ -1459,7 +1459,7 @@ qemuNamespaceUnlinkPaths(virDomainObj *vm,
if (STRPREFIX(path, QEMU_DEVPREFIX)) {
GStrv mount;
bool inSubmount = false;
- const char *const *devices = (const char *const *)cfg->cgroupDeviceACL;
+ GSList *devices = cfg->cgroupDeviceACL;
for (mount = devMountsPath; *mount; mount++) {
if (STREQ(*mount, "/dev"))
@@ -1477,7 +1477,7 @@ qemuNamespaceUnlinkPaths(virDomainObj *vm,
if (!devices)
devices = defaultDeviceACL;
- if (g_strv_contains(devices, path))
+ if (g_slist_find(devices, path))
continue;
unlinkPaths = g_slist_prepend(unlinkPaths, g_strdup(path));
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 9926998f85..d3a78266ef 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -3038,7 +3038,7 @@ qemuProcessAllowPostCopyMigration(virDomainObj *vm)
qemuDomainObjPrivate *priv = vm->privateData;
virQEMUDriver *driver = priv->driver;
g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
- const char *const *devices = (const char *const *) cfg->cgroupDeviceACL;
+ GSList *devices = cfg->cgroupDeviceACL;
const char *uffd = "/dev/userfaultfd";
int rc;
@@ -3050,7 +3050,7 @@ qemuProcessAllowPostCopyMigration(virDomainObj *vm)
if (!devices)
devices = defaultDeviceACL;
- if (!g_strv_contains(devices, uffd)) {
+ if (!g_slist_find(devices, uffd)) {
VIR_DEBUG("%s is not allowed by device ACL", uffd);
return 0;
}
@@ -8193,6 +8193,8 @@ qemuProcessLaunch(virConnectPtr conn,
goto cleanup;
}
+ updateDefaultDeviceACL(vm);
+
VIR_DEBUG("Building domain mount namespace (if required)");
if (qemuDomainBuildNamespace(cfg, vm) < 0)
goto cleanup;
diff --git a/src/qemu/test_libvirtd_qemu.aug.in b/src/qemu/test_libvirtd_qemu.aug.in
index 90012b3f52..82cfec3b4b 100644
--- a/src/qemu/test_libvirtd_qemu.aug.in
+++ b/src/qemu/test_libvirtd_qemu.aug.in
@@ -76,8 +76,7 @@ module Test_libvirtd_qemu =
{ "4" = "/dev/random" }
{ "5" = "/dev/urandom" }
{ "6" = "/dev/ptmx" }
- { "7" = "/dev/kvm" }
- { "8" = "/dev/userfaultfd" }
+ { "7" = "/dev/userfaultfd" }
}
{ "save_image_format" = "raw" }
{ "dump_image_format" = "raw" }
--
2.51.0
On 10/17/25 18:54, Praveen K Paladugu wrote:
> A domain that runs with TCG emulation does not need kvm device, so drop
> it from default device ACL.
>
> To dynamically add devices to defaultDeviceACL, make it a GSList. This
> variable will be initialized when qemu driver is initalized.
>
> Lastly, dynamically append /dev/kvm to default ACL only if the domain is
> of type VIR_DOMAIN_VIRT_KVM.
>
> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com>
> ---
> src/qemu/qemu.conf.in | 3 +-
> src/qemu/qemu_cgroup.c | 52 ++++++++++++++++++++++--------
> src/qemu/qemu_cgroup.h | 5 ++-
> src/qemu/qemu_conf.c | 14 ++++++--
> src/qemu/qemu_conf.h | 2 +-
> src/qemu/qemu_driver.c | 4 +++
> src/qemu/qemu_namespace.c | 12 +++----
> src/qemu/qemu_process.c | 6 ++--
> src/qemu/test_libvirtd_qemu.aug.in | 3 +-
> 9 files changed, 71 insertions(+), 30 deletions(-)
>
> diff --git a/src/qemu/qemu.conf.in b/src/qemu/qemu.conf.in
> index fc91ba8f08..0a8abd9544 100644
> --- a/src/qemu/qemu.conf.in
> +++ b/src/qemu/qemu.conf.in
> @@ -618,8 +618,7 @@
> #cgroup_device_acl = [
> # "/dev/null", "/dev/full", "/dev/zero",
> # "/dev/random", "/dev/urandom",
> -# "/dev/ptmx", "/dev/kvm",
> -# "/dev/userfaultfd"
> +# "/dev/ptmx", "/dev/userfaultfd"
> #]
> #
> # RDMA migration requires the following extra files to be added to the list:
> diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
> index f10976c2b0..b2dcefd81e 100644
> --- a/src/qemu/qemu_cgroup.c
> +++ b/src/qemu/qemu_cgroup.c
> @@ -38,17 +38,38 @@
>
> VIR_LOG_INIT("qemu.qemu_cgroup");
>
> -const char *const defaultDeviceACL[] = {
> +GSList *defaultDeviceACL;
> +
> +const char *const _defaultDeviceACL[] = {
> "/dev/null", "/dev/full", "/dev/zero",
> "/dev/random", "/dev/urandom",
> - "/dev/ptmx", "/dev/kvm",
> - "/dev/userfaultfd",
> + "/dev/ptmx", "/dev/userfaultfd",
> NULL,
> };
> #define DEVICE_PTY_MAJOR 136
> #define DEVICE_SND_MAJOR 116
>
>
> +void
> +initDefaultDeviceACL(void)
> +{
> + size_t i;
> +
> + for (i = 0; _defaultDeviceACL[i] != NULL; i++) {
> + defaultDeviceACL = g_slist_append(defaultDeviceACL,
> + g_strdup(_defaultDeviceACL[i]));
> + }
> +}
> +
> +void
> +updateDefaultDeviceACL(virDomainObj *vm)
> +{
> + if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
> + defaultDeviceACL = g_slist_append(defaultDeviceACL,
> + g_strdup("/dev/kvm"));
> + }
> +}
So if this function is called multiple times then "/dev/kvm" will appear
multiple times on the list. Worse, if the first VM is of KVM type, then
/dev/kvm is added onto the list and the accelerator of the second one is
irrelevant as /dev/kvm will be allowed in its devices CGroup controller.
What you want to do is:
1) remove "/dev/kvm" from the defaultDeviceACL array,
2) audit and fix all users of defaultDeviceACL and let them act
accordingly if domain is of KVM type. For instance,
qemuSetupDevicesCgroup() will need to have something like:
qemuSetupDevicesCgroup(vm)
{
...
qemuCgroupAllowDevicesPaths(vm, deviceACL, VIR_CGROUP_DEVICE_RW, false);
if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
qemuCgroupAllowDevicePath(vm, "/dev/kvm", VIR_CGROUP_DEVICE_RW, false);
}
...
}
/* Modulo error checking, code fromatting, etc. */
> +
> static int
> qemuCgroupAllowDevicePath(virDomainObj *vm,
> const char *path,
> @@ -71,19 +92,19 @@ qemuCgroupAllowDevicePath(virDomainObj *vm,
>
> static int
> qemuCgroupAllowDevicesPaths(virDomainObj *vm,
> - const char *const *deviceACL,
> + GSList *deviceACL,
> int perms,
> bool ignoreEacces)
> {
> - size_t i;
> + GSList *cur = NULL;
>
> - for (i = 0; deviceACL[i] != NULL; i++) {
> - if (!virFileExists(deviceACL[i])) {
> - VIR_DEBUG("Ignoring non-existent device %s", deviceACL[i]);
> + for (cur = deviceACL; cur; cur = g_slist_next(cur)) {
> + if (!virFileExists(cur->data)) {
> + VIR_DEBUG("Ignoring non-existent device %s", (char *)cur->data);
> continue;
> }
>
> - if (qemuCgroupAllowDevicePath(vm, deviceACL[i], perms, ignoreEacces) < 0)
> + if (qemuCgroupAllowDevicePath(vm, cur->data, perms, ignoreEacces) < 0)
> return -1;
> }
>
> @@ -99,13 +120,13 @@ qemuCgroupDenyDevicePath(virDomainObj *vm,
> {
> qemuDomainObjPrivate *priv = vm->privateData;
> g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
> - const char *const *deviceACL = (const char *const *)cfg->cgroupDeviceACL;
> int ret;
> + GSList *deviceACL = cfg->cgroupDeviceACL;
>
> if (!deviceACL)
> deviceACL = defaultDeviceACL;
>
> - if (g_strv_contains(deviceACL, path)) {
> + if (g_slist_find(deviceACL, path)) {
> VIR_DEBUG("Skipping deny of path %s in CGroups because it's in cgroupDeviceACL",
> path);
> return 0;
> @@ -556,8 +577,11 @@ qemuSetupMemoryDevicesCgroup(virDomainObj *vm,
> virDomainMemoryDef *mem)
> {
> qemuDomainObjPrivate *priv = vm->privateData;
> - const char *const sgxPaths[] = { QEMU_DEV_SGX_VEPVC,
> - QEMU_DEV_SGX_PROVISION, NULL };
> + g_autoptr(virGSListString) sgxPaths = NULL;
> +
> + sgxPaths = g_slist_append(sgxPaths, g_strdup(QEMU_DEV_SGX_VEPVC));
> + sgxPaths = g_slist_append(sgxPaths, g_strdup(QEMU_DEV_SGX_PROVISION));
> + sgxPaths = g_slist_append(sgxPaths, NULL);
>
> if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
> return 0;
This change is unrelated (and needless anyway). With current code, the
function requires 3 pointers on stack, with your change, 1 pointer is
required on stack and 6 pointers on the heap and two strdup()-s. IOW at
least two times more space.
Michal
On Mon, Oct 20, 2025 at 03:07:34PM +0200, Michal Prívozník via Devel wrote:
> On 10/17/25 18:54, Praveen K Paladugu wrote:
> > A domain that runs with TCG emulation does not need kvm device, so drop
> > it from default device ACL.
> >
> > To dynamically add devices to defaultDeviceACL, make it a GSList. This
> > variable will be initialized when qemu driver is initalized.
> >
> > Lastly, dynamically append /dev/kvm to default ACL only if the domain is
> > of type VIR_DOMAIN_VIRT_KVM.
> >
> > Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com>
> > ---
> > src/qemu/qemu.conf.in | 3 +-
> > src/qemu/qemu_cgroup.c | 52 ++++++++++++++++++++++--------
> > src/qemu/qemu_cgroup.h | 5 ++-
> > src/qemu/qemu_conf.c | 14 ++++++--
> > src/qemu/qemu_conf.h | 2 +-
> > src/qemu/qemu_driver.c | 4 +++
> > src/qemu/qemu_namespace.c | 12 +++----
> > src/qemu/qemu_process.c | 6 ++--
> > src/qemu/test_libvirtd_qemu.aug.in | 3 +-
> > 9 files changed, 71 insertions(+), 30 deletions(-)
> >
> > diff --git a/src/qemu/qemu.conf.in b/src/qemu/qemu.conf.in
> > index fc91ba8f08..0a8abd9544 100644
> > --- a/src/qemu/qemu.conf.in
> > +++ b/src/qemu/qemu.conf.in
> > @@ -618,8 +618,7 @@
> > #cgroup_device_acl = [
> > # "/dev/null", "/dev/full", "/dev/zero",
> > # "/dev/random", "/dev/urandom",
> > -# "/dev/ptmx", "/dev/kvm",
> > -# "/dev/userfaultfd"
> > +# "/dev/ptmx", "/dev/userfaultfd"
> > #]
> > #
> > # RDMA migration requires the following extra files to be added to the list:
> > diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
> > index f10976c2b0..b2dcefd81e 100644
> > --- a/src/qemu/qemu_cgroup.c
> > +++ b/src/qemu/qemu_cgroup.c
> > @@ -38,17 +38,38 @@
> >
> > VIR_LOG_INIT("qemu.qemu_cgroup");
> >
> > -const char *const defaultDeviceACL[] = {
> > +GSList *defaultDeviceACL;
> > +
> > +const char *const _defaultDeviceACL[] = {
> > "/dev/null", "/dev/full", "/dev/zero",
> > "/dev/random", "/dev/urandom",
> > - "/dev/ptmx", "/dev/kvm",
> > - "/dev/userfaultfd",
> > + "/dev/ptmx", "/dev/userfaultfd",
> > NULL,
> > };
> > #define DEVICE_PTY_MAJOR 136
> > #define DEVICE_SND_MAJOR 116
> >
> >
> > +void
> > +initDefaultDeviceACL(void)
> > +{
> > + size_t i;
> > +
> > + for (i = 0; _defaultDeviceACL[i] != NULL; i++) {
> > + defaultDeviceACL = g_slist_append(defaultDeviceACL,
> > + g_strdup(_defaultDeviceACL[i]));
> > + }
> > +}
> > +
> > +void
> > +updateDefaultDeviceACL(virDomainObj *vm)
> > +{
> > + if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
> > + defaultDeviceACL = g_slist_append(defaultDeviceACL,
> > + g_strdup("/dev/kvm"));
> > + }
> > +}
>
> So if this function is called multiple times then "/dev/kvm" will appear
> multiple times on the list. Worse, if the first VM is of KVM type, then
> /dev/kvm is added onto the list and the accelerator of the second one is
> irrelevant as /dev/kvm will be allowed in its devices CGroup controller.
>
> What you want to do is:
>
> 1) remove "/dev/kvm" from the defaultDeviceACL array,
> 2) audit and fix all users of defaultDeviceACL and let them act
> accordingly if domain is of KVM type. For instance,
> qemuSetupDevicesCgroup() will need to have something like:
>
> qemuSetupDevicesCgroup(vm)
> {
> ...
> qemuCgroupAllowDevicesPaths(vm, deviceACL, VIR_CGROUP_DEVICE_RW, false);
>
> if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
> qemuCgroupAllowDevicePath(vm, "/dev/kvm", VIR_CGROUP_DEVICE_RW, false);
> }
These 3 lines of code should simply be inside qemuCgroupAllowDevicesPaths,
either before/after the for() loop.
Then no code needs changing beyond removing /dev/kvm from the device
default list config.
> ...
> }
With regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On 10/20/25 16:55, Daniel P. Berrangé wrote:
> On Mon, Oct 20, 2025 at 03:07:34PM +0200, Michal Prívozník via Devel wrote:
>> On 10/17/25 18:54, Praveen K Paladugu wrote:
>>> A domain that runs with TCG emulation does not need kvm device, so drop
>>> it from default device ACL.
>>>
>> So if this function is called multiple times then "/dev/kvm" will appear
>> multiple times on the list. Worse, if the first VM is of KVM type, then
>> /dev/kvm is added onto the list and the accelerator of the second one is
>> irrelevant as /dev/kvm will be allowed in its devices CGroup controller.
>>
>> What you want to do is:
>>
>> 1) remove "/dev/kvm" from the defaultDeviceACL array,
>> 2) audit and fix all users of defaultDeviceACL and let them act
>> accordingly if domain is of KVM type. For instance,
>> qemuSetupDevicesCgroup() will need to have something like:
>>
>> qemuSetupDevicesCgroup(vm)
>> {
>> ...
>> qemuCgroupAllowDevicesPaths(vm, deviceACL, VIR_CGROUP_DEVICE_RW, false);
>>
>> if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
>> qemuCgroupAllowDevicePath(vm, "/dev/kvm", VIR_CGROUP_DEVICE_RW, false);
>> }
>
> These 3 lines of code should simply be inside qemuCgroupAllowDevicesPaths,
> either before/after the for() loop.
I don't think this is a good idea. I mean, qemuCgroupAllowDevicesPaths()
is basically the same as qemuCgroupAllowDevicePath(), i.e. qemu
specific, but still generic wrapper for allowing a single path in a
CGroup controller, or in this case an array of paths. It's called
whenever two or more devices need to be allowed: see
qemuSetupMemoryDevicesCgroup().
Since allowing /dev/kvm depends on the domain configuration, just like
other devices (say disks, hostdevs, etc.) I see qemuSetupDevicesCgroup()
more suitable.
Michal
On 10/21/2025 3:39 AM, Michal Prívozník wrote:
> On 10/20/25 16:55, Daniel P. Berrangé wrote:
>> On Mon, Oct 20, 2025 at 03:07:34PM +0200, Michal Prívozník via Devel wrote:
>>> On 10/17/25 18:54, Praveen K Paladugu wrote:
>>>> A domain that runs with TCG emulation does not need kvm device, so drop
>>>> it from default device ACL.
>>>>
>
>>> So if this function is called multiple times then "/dev/kvm" will appear
>>> multiple times on the list. Worse, if the first VM is of KVM type, then
>>> /dev/kvm is added onto the list and the accelerator of the second one is
>>> irrelevant as /dev/kvm will be allowed in its devices CGroup controller.
>>>
>>> What you want to do is:
>>>
>>> 1) remove "/dev/kvm" from the defaultDeviceACL array,
>>> 2) audit and fix all users of defaultDeviceACL and let them act
>>> accordingly if domain is of KVM type. For instance,
>>> qemuSetupDevicesCgroup() will need to have something like:
>>>
>>> qemuSetupDevicesCgroup(vm)
>>> {
>>> ...
>>> qemuCgroupAllowDevicesPaths(vm, deviceACL, VIR_CGROUP_DEVICE_RW, false);
>>>
>>> if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
>>> qemuCgroupAllowDevicePath(vm, "/dev/kvm", VIR_CGROUP_DEVICE_RW, false);
>>> }
>>
>> These 3 lines of code should simply be inside qemuCgroupAllowDevicesPaths,
>> either before/after the for() loop.
>
> I don't think this is a good idea. I mean, qemuCgroupAllowDevicesPaths()
> is basically the same as qemuCgroupAllowDevicePath(), i.e. qemu
> specific, but still generic wrapper for allowing a single path in a
> CGroup controller, or in this case an array of paths. It's called
> whenever two or more devices need to be allowed: see
> qemuSetupMemoryDevicesCgroup().
>
> Since allowing /dev/kvm depends on the domain configuration, just like
> other devices (say disks, hostdevs, etc.) I see qemuSetupDevicesCgroup()
> more suitable.
>
Make sense Michal. I addressed this in v3 of this patch, which I just sent.
> Michal
>
--
Regards,
Praveen K Paladugu
On 10/20/2025 9:55 AM, Daniel P. Berrangé wrote:
> On Mon, Oct 20, 2025 at 03:07:34PM +0200, Michal Prívozník via Devel wrote:
>> On 10/17/25 18:54, Praveen K Paladugu wrote:
>>> A domain that runs with TCG emulation does not need kvm device, so drop
>>> it from default device ACL.
>>>
>>> To dynamically add devices to defaultDeviceACL, make it a GSList. This
>>> variable will be initialized when qemu driver is initalized.
>>>
>>> Lastly, dynamically append /dev/kvm to default ACL only if the domain is
>>> of type VIR_DOMAIN_VIRT_KVM.
>>>
>>> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com>
>>> ---
>>> src/qemu/qemu.conf.in | 3 +-
>>> src/qemu/qemu_cgroup.c | 52 ++++++++++++++++++++++--------
>>> src/qemu/qemu_cgroup.h | 5 ++-
>>> src/qemu/qemu_conf.c | 14 ++++++--
>>> src/qemu/qemu_conf.h | 2 +-
>>> src/qemu/qemu_driver.c | 4 +++
>>> src/qemu/qemu_namespace.c | 12 +++----
>>> src/qemu/qemu_process.c | 6 ++--
>>> src/qemu/test_libvirtd_qemu.aug.in | 3 +-
>>> 9 files changed, 71 insertions(+), 30 deletions(-)
>>>
>>> diff --git a/src/qemu/qemu.conf.in b/src/qemu/qemu.conf.in
>>> index fc91ba8f08..0a8abd9544 100644
>>> --- a/src/qemu/qemu.conf.in
>>> +++ b/src/qemu/qemu.conf.in
>>> @@ -618,8 +618,7 @@
>>> #cgroup_device_acl = [
>>> # "/dev/null", "/dev/full", "/dev/zero",
>>> # "/dev/random", "/dev/urandom",
>>> -# "/dev/ptmx", "/dev/kvm",
>>> -# "/dev/userfaultfd"
>>> +# "/dev/ptmx", "/dev/userfaultfd"
>>> #]
>>> #
>>> # RDMA migration requires the following extra files to be added to the list:
>>> diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
>>> index f10976c2b0..b2dcefd81e 100644
>>> --- a/src/qemu/qemu_cgroup.c
>>> +++ b/src/qemu/qemu_cgroup.c
>>> @@ -38,17 +38,38 @@
>>>
>>> VIR_LOG_INIT("qemu.qemu_cgroup");
>>>
>>> -const char *const defaultDeviceACL[] = {
>>> +GSList *defaultDeviceACL;
>>> +
>>> +const char *const _defaultDeviceACL[] = {
>>> "/dev/null", "/dev/full", "/dev/zero",
>>> "/dev/random", "/dev/urandom",
>>> - "/dev/ptmx", "/dev/kvm",
>>> - "/dev/userfaultfd",
>>> + "/dev/ptmx", "/dev/userfaultfd",
>>> NULL,
>>> };
>>> #define DEVICE_PTY_MAJOR 136
>>> #define DEVICE_SND_MAJOR 116
>>>
>>>
>>> +void
>>> +initDefaultDeviceACL(void)
>>> +{
>>> + size_t i;
>>> +
>>> + for (i = 0; _defaultDeviceACL[i] != NULL; i++) {
>>> + defaultDeviceACL = g_slist_append(defaultDeviceACL,
>>> + g_strdup(_defaultDeviceACL[i]));
>>> + }
>>> +}
>>> +
>>> +void
>>> +updateDefaultDeviceACL(virDomainObj *vm)
>>> +{
>>> + if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
>>> + defaultDeviceACL = g_slist_append(defaultDeviceACL,
>>> + g_strdup("/dev/kvm"));
>>> + }
>>> +}
>>
>> So if this function is called multiple times then "/dev/kvm" will appear
>> multiple times on the list. Worse, if the first VM is of KVM type, then
>> /dev/kvm is added onto the list and the accelerator of the second one is
>> irrelevant as /dev/kvm will be allowed in its devices CGroup controller.
>>
>> What you want to do is:
>>
>> 1) remove "/dev/kvm" from the defaultDeviceACL array,
>> 2) audit and fix all users of defaultDeviceACL and let them act
>> accordingly if domain is of KVM type. For instance,
>> qemuSetupDevicesCgroup() will need to have something like:
>>
>> qemuSetupDevicesCgroup(vm)
>> {
>> ...
>> qemuCgroupAllowDevicesPaths(vm, deviceACL, VIR_CGROUP_DEVICE_RW, false);
>>
>> if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
>> qemuCgroupAllowDevicePath(vm, "/dev/kvm", VIR_CGROUP_DEVICE_RW, false);
>> }
>
> These 3 lines of code should simply be inside qemuCgroupAllowDevicesPaths,
> either before/after the for() loop.
>
> Then no code needs changing beyond removing /dev/kvm from the device
> default list config.
>
In my testing I discovered that I had to append /dev/kvm to *paths in
qemuDomainPopulateDevices method while building a namespace for the domain.
Without this change the domain fails to start.
>> ...
>> }
>
>
> With regards,
> Daniel
--
Regards,
Praveen K Paladugu
On 10/20/2025 8:07 AM, Michal Prívozník wrote:
> On 10/17/25 18:54, Praveen K Paladugu wrote:
>> A domain that runs with TCG emulation does not need kvm device, so drop
>> it from default device ACL.
>>
>> To dynamically add devices to defaultDeviceACL, make it a GSList. This
>> variable will be initialized when qemu driver is initalized.
>>
>> Lastly, dynamically append /dev/kvm to default ACL only if the domain is
>> of type VIR_DOMAIN_VIRT_KVM.
>>
>> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com>
>> ---
>> src/qemu/qemu.conf.in | 3 +-
>> src/qemu/qemu_cgroup.c | 52 ++++++++++++++++++++++--------
>> src/qemu/qemu_cgroup.h | 5 ++-
>> src/qemu/qemu_conf.c | 14 ++++++--
>> src/qemu/qemu_conf.h | 2 +-
>> src/qemu/qemu_driver.c | 4 +++
>> src/qemu/qemu_namespace.c | 12 +++----
>> src/qemu/qemu_process.c | 6 ++--
>> src/qemu/test_libvirtd_qemu.aug.in | 3 +-
>> 9 files changed, 71 insertions(+), 30 deletions(-)
>>
>> diff --git a/src/qemu/qemu.conf.in b/src/qemu/qemu.conf.in
>> index fc91ba8f08..0a8abd9544 100644
>> --- a/src/qemu/qemu.conf.in
>> +++ b/src/qemu/qemu.conf.in
>> @@ -618,8 +618,7 @@
>> #cgroup_device_acl = [
>> # "/dev/null", "/dev/full", "/dev/zero",
>> # "/dev/random", "/dev/urandom",
>> -# "/dev/ptmx", "/dev/kvm",
>> -# "/dev/userfaultfd"
>> +# "/dev/ptmx", "/dev/userfaultfd"
>> #]
>> #
>> # RDMA migration requires the following extra files to be added to the list:
>> diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
>> index f10976c2b0..b2dcefd81e 100644
>> --- a/src/qemu/qemu_cgroup.c
>> +++ b/src/qemu/qemu_cgroup.c
>> @@ -38,17 +38,38 @@
>>
>> VIR_LOG_INIT("qemu.qemu_cgroup");
>>
>> -const char *const defaultDeviceACL[] = {
>> +GSList *defaultDeviceACL;
>> +
>> +const char *const _defaultDeviceACL[] = {
>> "/dev/null", "/dev/full", "/dev/zero",
>> "/dev/random", "/dev/urandom",
>> - "/dev/ptmx", "/dev/kvm",
>> - "/dev/userfaultfd",
>> + "/dev/ptmx", "/dev/userfaultfd",
>> NULL,
>> };
>> #define DEVICE_PTY_MAJOR 136
>> #define DEVICE_SND_MAJOR 116
>>
>>
>> +void
>> +initDefaultDeviceACL(void)
>> +{
>> + size_t i;
>> +
>> + for (i = 0; _defaultDeviceACL[i] != NULL; i++) {
>> + defaultDeviceACL = g_slist_append(defaultDeviceACL,
>> + g_strdup(_defaultDeviceACL[i]));
>> + }
>> +}
>> +
>> +void
>> +updateDefaultDeviceACL(virDomainObj *vm)
>> +{
>> + if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
>> + defaultDeviceACL = g_slist_append(defaultDeviceACL,
>> + g_strdup("/dev/kvm"));
>> + }
>> +}
>
> So if this function is called multiple times then "/dev/kvm" will appear
> multiple times on the list. Worse, if the first VM is of KVM type, then
> /dev/kvm is added onto the list and the accelerator of the second one is
> irrelevant as /dev/kvm will be allowed in its devices CGroup controller.
>
> What you want to do is:
>
> 1) remove "/dev/kvm" from the defaultDeviceACL array,
> 2) audit and fix all users of defaultDeviceACL and let them act
> accordingly if domain is of KVM type. For instance,
> qemuSetupDevicesCgroup() will need to have something like:
>
> qemuSetupDevicesCgroup(vm)
> {
> ...
> qemuCgroupAllowDevicesPaths(vm, deviceACL, VIR_CGROUP_DEVICE_RW, false);
>
> if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
> qemuCgroupAllowDevicePath(vm, "/dev/kvm", VIR_CGROUP_DEVICE_RW, false);
> }
> ...
> }
>
> /* Modulo error checking, code fromatting, etc. */
>
>> +
>> static int
>> qemuCgroupAllowDevicePath(virDomainObj *vm,
>> const char *path,
>> @@ -71,19 +92,19 @@ qemuCgroupAllowDevicePath(virDomainObj *vm,
>>
>> static int
>> qemuCgroupAllowDevicesPaths(virDomainObj *vm,
>> - const char *const *deviceACL,
>> + GSList *deviceACL,
>> int perms,
>> bool ignoreEacces)
>> {
>> - size_t i;
>> + GSList *cur = NULL;
>>
>> - for (i = 0; deviceACL[i] != NULL; i++) {
>> - if (!virFileExists(deviceACL[i])) {
>> - VIR_DEBUG("Ignoring non-existent device %s", deviceACL[i]);
>> + for (cur = deviceACL; cur; cur = g_slist_next(cur)) {
>> + if (!virFileExists(cur->data)) {
>> + VIR_DEBUG("Ignoring non-existent device %s", (char *)cur->data);
>> continue;
>> }
>>
>> - if (qemuCgroupAllowDevicePath(vm, deviceACL[i], perms, ignoreEacces) < 0)
>> + if (qemuCgroupAllowDevicePath(vm, cur->data, perms, ignoreEacces) < 0)
>> return -1;
>> }
>>
>> @@ -99,13 +120,13 @@ qemuCgroupDenyDevicePath(virDomainObj *vm,
>> {
>> qemuDomainObjPrivate *priv = vm->privateData;
>> g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
>> - const char *const *deviceACL = (const char *const *)cfg->cgroupDeviceACL;
>> int ret;
>> + GSList *deviceACL = cfg->cgroupDeviceACL;
>>
>> if (!deviceACL)
>> deviceACL = defaultDeviceACL;
>>
>> - if (g_strv_contains(deviceACL, path)) {
>> + if (g_slist_find(deviceACL, path)) {
>> VIR_DEBUG("Skipping deny of path %s in CGroups because it's in cgroupDeviceACL",
>> path);
>> return 0;
>> @@ -556,8 +577,11 @@ qemuSetupMemoryDevicesCgroup(virDomainObj *vm,
>> virDomainMemoryDef *mem)
>> {
>> qemuDomainObjPrivate *priv = vm->privateData;
>> - const char *const sgxPaths[] = { QEMU_DEV_SGX_VEPVC,
>> - QEMU_DEV_SGX_PROVISION, NULL };
>> + g_autoptr(virGSListString) sgxPaths = NULL;
>> +
>> + sgxPaths = g_slist_append(sgxPaths, g_strdup(QEMU_DEV_SGX_VEPVC));
>> + sgxPaths = g_slist_append(sgxPaths, g_strdup(QEMU_DEV_SGX_PROVISION));
>> + sgxPaths = g_slist_append(sgxPaths, NULL);
>>
>> if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
>> return 0;
>
> This change is unrelated (and needless anyway). With current code, the
> function requires 3 pointers on stack, with your change, 1 pointer is
> required on stack and 6 pointers on the heap and two strdup()-s. IOW at
> least two times more space.
>
This change was made to adopt the new definition of
qemuCgroupAllowDevicesPaths which takes GSList* as an argument in place
of "char *const *deviceACL".
I agree with your feedback above that using a GSList for
defaultDeviceACL and appending to the list will have undesired effects.
I will refactor this patch in the next revision.
>
> Michal
>
--
Regards,
Praveen K Paladugu
© 2016 - 2025 Red Hat, Inc.