[PATCH 1/5] qemu.conf changes to support multiple memory backend directories

mgalaxy@akamai.com posted 5 patches 7 months, 1 week ago
There is a newer version of this series
[PATCH 1/5] qemu.conf changes to support multiple memory backend directories
Posted by mgalaxy@akamai.com 7 months, 1 week ago
From: Michael Galaxy <mgalaxy@akamai.com>

We start by introducing a backwards-compatible, comma-separated
specification that will not break existing installations, such
as in the following example:

$ cat qemu.conf | grep memory_backing_dir
memory_backing_dir = "/path/to/pmem/0,/path/to/pmem/1"

In our case, we almost always have two NUMA nodes, so in that
example, we have two PMEM regions which are created on the
Linux kernel command line that get mounted into those two
locations for libvirt to use.

Later patches will recognize this configuration and activate it.

Signed-off-by: Michael Galaxy <mgalaxy@akamai.com>
---
 src/qemu/qemu_conf.c | 39 +++++++++++++++++++++++++++++++++------
 src/qemu/qemu_conf.h |  3 ++-
 2 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c
index 4050a82341..aae9f316d8 100644
--- a/src/qemu/qemu_conf.c
+++ b/src/qemu/qemu_conf.c
@@ -43,6 +43,7 @@
 #include "virfile.h"
 #include "virstring.h"
 #include "virutil.h"
+#include "virnuma.h"
 #include "configmake.h"
 #include "security/security_util.h"
 
@@ -137,6 +138,9 @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged,
 
     cfg->cgroupControllers = -1; /* -1 == auto-detect */
 
+    cfg->memoryBackingDirs = g_new0(char *, 1);
+    cfg->nb_memoryBackingDirs = 1;
+
     if (root != NULL) {
         cfg->logDir = g_strdup_printf("%s/log/qemu", root);
         cfg->swtpmLogDir = g_strdup_printf("%s/log/swtpm", root);
@@ -153,7 +157,7 @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged,
         cfg->checkpointDir = g_strdup_printf("%s/checkpoint", cfg->libDir);
         cfg->autoDumpPath = g_strdup_printf("%s/dump", cfg->libDir);
         cfg->nvramDir = g_strdup_printf("%s/nvram", cfg->libDir);
-        cfg->memoryBackingDir = g_strdup_printf("%s/ram", cfg->libDir);
+        cfg->memoryBackingDirs[0] = g_strdup_printf("%s/ram", cfg->libDir);
     } else if (privileged) {
         cfg->logDir = g_strdup_printf("%s/log/libvirt/qemu", LOCALSTATEDIR);
 
@@ -174,7 +178,8 @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged,
         cfg->checkpointDir = g_strdup_printf("%s/checkpoint", cfg->libDir);
         cfg->autoDumpPath = g_strdup_printf("%s/dump", cfg->libDir);
         cfg->nvramDir = g_strdup_printf("%s/nvram", cfg->libDir);
-        cfg->memoryBackingDir = g_strdup_printf("%s/ram", cfg->libDir);
+
+        cfg->memoryBackingDirs[0] = g_strdup_printf("%s/ram", cfg->libDir);
         cfg->swtpmStorageDir = g_strdup_printf("%s/lib/libvirt/swtpm",
                                                LOCALSTATEDIR);
     } else {
@@ -201,7 +206,7 @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged,
                                              cfg->configBaseDir);
         cfg->autoDumpPath = g_strdup_printf("%s/qemu/dump", cfg->configBaseDir);
         cfg->nvramDir = g_strdup_printf("%s/qemu/nvram", cfg->configBaseDir);
-        cfg->memoryBackingDir = g_strdup_printf("%s/qemu/ram", cfg->configBaseDir);
+        cfg->memoryBackingDirs[0] = g_strdup_printf("%s/qemu/ram", cfg->configBaseDir);
         cfg->swtpmStorageDir = g_strdup_printf("%s/qemu/swtpm",
                                                cfg->configBaseDir);
     }
@@ -369,7 +374,12 @@ static void virQEMUDriverConfigDispose(void *obj)
 
     virFirmwareFreeList(cfg->firmwares, cfg->nfirmwares);
 
-    g_free(cfg->memoryBackingDir);
+    for (size_t i = 0; i < cfg->nb_memoryBackingDirs; i++) {
+	    g_free(cfg->memoryBackingDirs[i]);
+    }
+
+    g_free(cfg->memoryBackingDirs);
+
     g_free(cfg->swtpmStorageDir);
 
     g_strfreev(cfg->capabilityfilters);
@@ -1019,14 +1029,31 @@ virQEMUDriverConfigLoadMemoryEntry(virQEMUDriverConfig *cfg,
                                    virConf *conf)
 {
     g_autofree char *dir = NULL;
+    g_auto(GStrv) params = NULL;
+    GStrv next;
     int rc;
 
     if ((rc = virConfGetValueString(conf, "memory_backing_dir", &dir)) < 0)
         return -1;
 
     if (rc > 0) {
-        VIR_FREE(cfg->memoryBackingDir);
-        cfg->memoryBackingDir = g_strdup_printf("%s/libvirt/qemu", dir);
+        size_t i;
+        for (i = 0; i < cfg->nb_memoryBackingDirs; i++)
+            VIR_FREE(cfg->memoryBackingDirs[i]);
+
+        if (!(params = g_strsplit(dir, ",", 0))) {
+            return -1;
+        }
+
+        cfg->nb_memoryBackingDirs = 0;
+        for (next = params; *next; next++) {
+            cfg->nb_memoryBackingDirs++;
+        }
+
+        cfg->memoryBackingDirs = g_new0(char *, cfg->nb_memoryBackingDirs);
+        for (i = 0, next = params; *next; next++, i++) {
+            cfg->memoryBackingDirs[i] = g_strdup_printf("%s/libvirt/qemu", *next);
+        }
     }
 
     return 0;
diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h
index 36049b4bfa..2b8d540df0 100644
--- a/src/qemu/qemu_conf.h
+++ b/src/qemu/qemu_conf.h
@@ -221,7 +221,8 @@ struct _virQEMUDriverConfig {
     unsigned int glusterDebugLevel;
     bool virtiofsdDebug;
 
-    char *memoryBackingDir;
+    char **memoryBackingDirs;
+    size_t nb_memoryBackingDirs;
 
     uid_t swtpm_user;
     gid_t swtpm_group;
-- 
2.25.1
_______________________________________________
Devel mailing list -- devel@lists.libvirt.org
To unsubscribe send an email to devel-leave@lists.libvirt.org
Re: [PATCH 1/5] qemu.conf changes to support multiple memory backend directories
Posted by Martin Kletzander 6 months, 1 week ago
On Mon, Jan 29, 2024 at 04:43:53PM -0500, mgalaxy@akamai.com wrote:
>From: Michael Galaxy <mgalaxy@akamai.com>
>
>We start by introducing a backwards-compatible, comma-separated
>specification that will not break existing installations, such
>as in the following example:
>
>$ cat qemu.conf | grep memory_backing_dir
>memory_backing_dir = "/path/to/pmem/0,/path/to/pmem/1"
>

This would be better as an array, but I understand you thought it would
be easier to code for backward compatibility.

>In our case, we almost always have two NUMA nodes, so in that
>example, we have two PMEM regions which are created on the
>Linux kernel command line that get mounted into those two
>locations for libvirt to use.
>

There are PMEM devices which you then expose as filesystems to use for
libvirt as a backing for VM's PMEMs.  Do I understand that correctly?

If yes, how are these different?  Can't they be passed through?

>Later patches will recognize this configuration and activate it.
>
>Signed-off-by: Michael Galaxy <mgalaxy@akamai.com>
>---
> src/qemu/qemu_conf.c | 39 +++++++++++++++++++++++++++++++++------
> src/qemu/qemu_conf.h |  3 ++-
> 2 files changed, 35 insertions(+), 7 deletions(-)
>
>diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c
>index 4050a82341..aae9f316d8 100644
>--- a/src/qemu/qemu_conf.c
>+++ b/src/qemu/qemu_conf.c
>@@ -43,6 +43,7 @@
> #include "virfile.h"
> #include "virstring.h"
> #include "virutil.h"
>+#include "virnuma.h"
> #include "configmake.h"
> #include "security/security_util.h"
>
>@@ -137,6 +138,9 @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged,
>
>     cfg->cgroupControllers = -1; /* -1 == auto-detect */
>
>+    cfg->memoryBackingDirs = g_new0(char *, 1);
>+    cfg->nb_memoryBackingDirs = 1;
>+
>     if (root != NULL) {
>         cfg->logDir = g_strdup_printf("%s/log/qemu", root);
>         cfg->swtpmLogDir = g_strdup_printf("%s/log/swtpm", root);
>@@ -153,7 +157,7 @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged,
>         cfg->checkpointDir = g_strdup_printf("%s/checkpoint", cfg->libDir);
>         cfg->autoDumpPath = g_strdup_printf("%s/dump", cfg->libDir);
>         cfg->nvramDir = g_strdup_printf("%s/nvram", cfg->libDir);
>-        cfg->memoryBackingDir = g_strdup_printf("%s/ram", cfg->libDir);
>+        cfg->memoryBackingDirs[0] = g_strdup_printf("%s/ram", cfg->libDir);
>     } else if (privileged) {
>         cfg->logDir = g_strdup_printf("%s/log/libvirt/qemu", LOCALSTATEDIR);
>
>@@ -174,7 +178,8 @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged,
>         cfg->checkpointDir = g_strdup_printf("%s/checkpoint", cfg->libDir);
>         cfg->autoDumpPath = g_strdup_printf("%s/dump", cfg->libDir);
>         cfg->nvramDir = g_strdup_printf("%s/nvram", cfg->libDir);
>-        cfg->memoryBackingDir = g_strdup_printf("%s/ram", cfg->libDir);
>+
>+        cfg->memoryBackingDirs[0] = g_strdup_printf("%s/ram", cfg->libDir);
>         cfg->swtpmStorageDir = g_strdup_printf("%s/lib/libvirt/swtpm",
>                                                LOCALSTATEDIR);
>     } else {
>@@ -201,7 +206,7 @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged,
>                                              cfg->configBaseDir);
>         cfg->autoDumpPath = g_strdup_printf("%s/qemu/dump", cfg->configBaseDir);
>         cfg->nvramDir = g_strdup_printf("%s/qemu/nvram", cfg->configBaseDir);
>-        cfg->memoryBackingDir = g_strdup_printf("%s/qemu/ram", cfg->configBaseDir);
>+        cfg->memoryBackingDirs[0] = g_strdup_printf("%s/qemu/ram", cfg->configBaseDir);
>         cfg->swtpmStorageDir = g_strdup_printf("%s/qemu/swtpm",
>                                                cfg->configBaseDir);
>     }
>@@ -369,7 +374,12 @@ static void virQEMUDriverConfigDispose(void *obj)
>
>     virFirmwareFreeList(cfg->firmwares, cfg->nfirmwares);
>
>-    g_free(cfg->memoryBackingDir);
>+    for (size_t i = 0; i < cfg->nb_memoryBackingDirs; i++) {

We don't allow variable declarations in loops, make sure you run our
test suite, more tips are available here:

https://libvirt.org/hacking.html

>+	    g_free(cfg->memoryBackingDirs[i]);

TAB in indentation

>+    }

Extra braces around one-line body, make sure you follow our coding
style, linked from the above article, available here:

https://libvirt.org/coding-style.html

>+
>+    g_free(cfg->memoryBackingDirs);
>+
>     g_free(cfg->swtpmStorageDir);
>
>     g_strfreev(cfg->capabilityfilters);
>@@ -1019,14 +1029,31 @@ virQEMUDriverConfigLoadMemoryEntry(virQEMUDriverConfig *cfg,
>                                    virConf *conf)
> {
>     g_autofree char *dir = NULL;
>+    g_auto(GStrv) params = NULL;
>+    GStrv next;
>     int rc;
>
>     if ((rc = virConfGetValueString(conf, "memory_backing_dir", &dir)) < 0)
>         return -1;
>
>     if (rc > 0) {
>-        VIR_FREE(cfg->memoryBackingDir);
>-        cfg->memoryBackingDir = g_strdup_printf("%s/libvirt/qemu", dir);
>+        size_t i;
>+        for (i = 0; i < cfg->nb_memoryBackingDirs; i++)
>+            VIR_FREE(cfg->memoryBackingDirs[i]);

Please don't mix the old VIR_FREE() and new g_free() as described in our
glib adoption page:

https://www.libvirt.org/glib-adoption.html

>+
>+        if (!(params = g_strsplit(dir, ",", 0))) {
>+            return -1;
>+        }
>+
>+        cfg->nb_memoryBackingDirs = 0;
>+        for (next = params; *next; next++) {
>+            cfg->nb_memoryBackingDirs++;
>+        }

g_strv_length()?

>+
>+        cfg->memoryBackingDirs = g_new0(char *, cfg->nb_memoryBackingDirs);
>+        for (i = 0, next = params; *next; next++, i++) {
>+            cfg->memoryBackingDirs[i] = g_strdup_printf("%s/libvirt/qemu", *next);
>+        }

You can do this with one loop, the extra allocation at the start of the
program won't ever hurt.  Also if you stored the values as GStrv, you
could then work with it using g_strv*() functions.  See glib-adoption
linked above.

>     }
>
>     return 0;
>diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h
>index 36049b4bfa..2b8d540df0 100644
>--- a/src/qemu/qemu_conf.h
>+++ b/src/qemu/qemu_conf.h
>@@ -221,7 +221,8 @@ struct _virQEMUDriverConfig {
>     unsigned int glusterDebugLevel;
>     bool virtiofsdDebug;
>
>-    char *memoryBackingDir;
>+    char **memoryBackingDirs;
>+    size_t nb_memoryBackingDirs;
>

You change this here, but the other uses of the old member is fixed in
following patches.  I like that you split the series, but the tree
should be buildable (with passing tests) after each and every commit.

On top of all this, I still don't understand what's the benefit of
having multiple backing dirs, so please first enlighten me with that as
I'm clearly missing some info.

>     uid_t swtpm_user;
>     gid_t swtpm_group;
>--
>2.25.1
>_______________________________________________
>Devel mailing list -- devel@lists.libvirt.org
>To unsubscribe send an email to devel-leave@lists.libvirt.org
_______________________________________________
Devel mailing list -- devel@lists.libvirt.org
To unsubscribe send an email to devel-leave@lists.libvirt.org
Re: [PATCH 1/5] qemu.conf changes to support multiple memory backend directories
Posted by Michal Prívozník 6 months ago
On 3/1/24 11:00, Martin Kletzander wrote:
> On Mon, Jan 29, 2024 at 04:43:53PM -0500, mgalaxy@akamai.com wrote:
>> From: Michael Galaxy <mgalaxy@akamai.com>
>>
>> We start by introducing a backwards-compatible, comma-separated
>> specification that will not break existing installations, such
>> as in the following example:
>>
>> $ cat qemu.conf | grep memory_backing_dir
>> memory_backing_dir = "/path/to/pmem/0,/path/to/pmem/1"
>>
> 
> This would be better as an array, but I understand you thought it would
> be easier to code for backward compatibility.

This have to be array. virConfGetValueStringList() can handle both cases
just fine. And in fact, when used it'll result in cleaner code.

Michal
_______________________________________________
Devel mailing list -- devel@lists.libvirt.org
To unsubscribe send an email to devel-leave@lists.libvirt.org
Re: [PATCH 1/5] qemu.conf changes to support multiple memory backend directories
Posted by Michael Galaxy 6 months ago
On 3/5/24 03:46, Michal Prívozník wrote:
> On 3/1/24 11:00, Martin Kletzander wrote:
>> On Mon, Jan 29, 2024 at 04:43:53PM -0500, mgalaxy@akamai.com wrote:
>>> From: Michael Galaxy <mgalaxy@akamai.com>
>>>
>>> We start by introducing a backwards-compatible, comma-separated
>>> specification that will not break existing installations, such
>>> as in the following example:
>>>
>>> $ cat qemu.conf | grep memory_backing_dir
>>> memory_backing_dir = "/path/to/pmem/0,/path/to/pmem/1"
>>>
>> This would be better as an array, but I understand you thought it would
>> be easier to code for backward compatibility.
> This have to be array. virConfGetValueStringList() can handle both cases
> just fine. And in fact, when used it'll result in cleaner code.

Acknowledged!

_______________________________________________
Devel mailing list -- devel@lists.libvirt.org
To unsubscribe send an email to devel-leave@lists.libvirt.org
Re: [PATCH 1/5] qemu.conf changes to support multiple memory backend directories
Posted by Michael Galaxy 6 months, 1 week ago
Hi Martin,

Answers inline. Thanks for helping with the review and all the tips!

On 3/1/24 04:00, Martin Kletzander wrote:
> On Mon, Jan 29, 2024 at 04:43:53PM -0500, mgalaxy@akamai.com wrote:
>> From: Michael Galaxy <mgalaxy@akamai.com>
>>
>
>> In our case, we almost always have two NUMA nodes, so in that
>> example, we have two PMEM regions which are created on the
>> Linux kernel command line that get mounted into those two
>> locations for libvirt to use.
>>
>
> There are PMEM devices which you then expose as filesystems to use for
> libvirt as a backing for VM's PMEMs.  Do I understand that correctly?
>
> If yes, how are these different?  Can't they be passed through?
>
So, these are very different. QEMU currently already supports passing 
through
PMEM for guest internal use (The guest puts its own filesystem onto the 
passed-through
PMEM device).

In our case, we are using the PMEM area only in the host to place the 
QEMU memory backing
for all guests into a single PMEM area.

To support NUMA correctly, QEMU needs to support mutiple host-level PMEM 
areas which
have been pre-configured to be NUMA aware. This is strictly for the 
purposes of doing live updates,
not as a mechanism for guests to internally take advantage of persistent 
memory... that's
a completely different use case (which in and of itself is very 
interesting, but not what we are
using it for).

That's how it works. Does that make sense?


(I'll work on those other requests, thank you)

- Michael
_______________________________________________
Devel mailing list -- devel@lists.libvirt.org
To unsubscribe send an email to devel-leave@lists.libvirt.org
Re: [PATCH 1/5] qemu.conf changes to support multiple memory backend directories
Posted by Martin Kletzander 6 months ago
On Mon, Mar 04, 2024 at 03:54:23PM -0600, Michael Galaxy wrote:
>Hi Martin,
>
>Answers inline. Thanks for helping with the review and all the tips!
>
>On 3/1/24 04:00, Martin Kletzander wrote:
>> On Mon, Jan 29, 2024 at 04:43:53PM -0500, mgalaxy@akamai.com wrote:
>>> From: Michael Galaxy <mgalaxy@akamai.com>
>>>
>>
>>> In our case, we almost always have two NUMA nodes, so in that
>>> example, we have two PMEM regions which are created on the
>>> Linux kernel command line that get mounted into those two
>>> locations for libvirt to use.
>>>
>>
>> There are PMEM devices which you then expose as filesystems to use for
>> libvirt as a backing for VM's PMEMs.  Do I understand that correctly?
>>
>> If yes, how are these different?  Can't they be passed through?
>>
>So, these are very different. QEMU currently already supports passing
>through
>PMEM for guest internal use (The guest puts its own filesystem onto the
>passed-through
>PMEM device).
>
>In our case, we are using the PMEM area only in the host to place the
>QEMU memory backing
>for all guests into a single PMEM area.
>
>To support NUMA correctly, QEMU needs to support mutiple host-level PMEM
>areas which
>have been pre-configured to be NUMA aware. This is strictly for the

Is this preconfiguration something that libvirt should be able to do as
well?  How would anyone know which region is tied to which NUMA node?
Shouldn't there be some probing for that?

>purposes of doing live updates,
>not as a mechanism for guests to internally take advantage of persistent
>memory... that's
>a completely different use case (which in and of itself is very
>interesting, but not what we are
>using it for).
>
>That's how it works. Does that make sense?
>
>
>(I'll work on those other requests, thank you)
>
>- Michael
>
_______________________________________________
Devel mailing list -- devel@lists.libvirt.org
To unsubscribe send an email to devel-leave@lists.libvirt.org
Re: [PATCH 1/5] qemu.conf changes to support multiple memory backend directories
Posted by Michael Galaxy 6 months ago
Hi Martin,

On 3/6/24 04:41, Martin Kletzander wrote:
> On Mon, Mar 04, 2024 at 03:54:23PM -0600, Michael Galaxy wrote:
>> Hi Martin,
>>
>> Answers inline. Thanks for helping with the review and all the tips!
>>
>> On 3/1/24 04:00, Martin Kletzander wrote:
>>> On Mon, Jan 29, 2024 at 04:43:53PM -0500, mgalaxy@akamai.com wrote:
>>>> From: Michael Galaxy <mgalaxy@akamai.com>
>>>>
>>>
>>>> In our case, we almost always have two NUMA nodes, so in that
>>>> example, we have two PMEM regions which are created on the
>>>> Linux kernel command line that get mounted into those two
>>>> locations for libvirt to use.
>>>>
>>>
>>> There are PMEM devices which you then expose as filesystems to use for
>>> libvirt as a backing for VM's PMEMs.  Do I understand that correctly?
>>>
>>> If yes, how are these different?  Can't they be passed through?
>>>
>> So, these are very different. QEMU currently already supports passing
>> through
>> PMEM for guest internal use (The guest puts its own filesystem onto the
>> passed-through
>> PMEM device).
>>
>> In our case, we are using the PMEM area only in the host to place the
>> QEMU memory backing
>> for all guests into a single PMEM area.
>>
>> To support NUMA correctly, QEMU needs to support mutiple host-level PMEM
>> areas which
>> have been pre-configured to be NUMA aware. This is strictly for the
>
> Is this preconfiguration something that libvirt should be able to do as
> well?  How would anyone know which region is tied to which NUMA node?
> Shouldn't there be some probing for that?
>
That's a good question, but probably no. The preconfiguration  is done 
via the memmap=XXXX
paramter on the kernel command line. I doubt we want libvirt in the 
business of managing something
like that. I do have checks in the patchset to handle different corner 
cases where the user
provides invalid input in a way that does not match the existing number 
of NUMA nodes, but
beyond that, the reserved PMEM areas are strictly configured at kernel 
boot time.

  - Michael

_______________________________________________
Devel mailing list -- devel@lists.libvirt.org
To unsubscribe send an email to devel-leave@lists.libvirt.org