From: Michael Galaxy <mgalaxy@akamai.com>
We start by introducing a backwards-compatible, comma-separated
specification that will not break existing installations, such
as in the following example:
$ cat qemu.conf | grep memory_backing_dir
memory_backing_dir = "/path/to/pmem/0,/path/to/pmem/1"
In our case, we almost always have two NUMA nodes, so in that
example, we have two PMEM regions which are created on the
Linux kernel command line that get mounted into those two
locations for libvirt to use.
Later patches will recognize this configuration and activate it.
Signed-off-by: Michael Galaxy <mgalaxy@akamai.com>
---
src/qemu/qemu_conf.c | 39 +++++++++++++++++++++++++++++++++------
src/qemu/qemu_conf.h | 3 ++-
2 files changed, 35 insertions(+), 7 deletions(-)
diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c
index 4050a82341..aae9f316d8 100644
--- a/src/qemu/qemu_conf.c
+++ b/src/qemu/qemu_conf.c
@@ -43,6 +43,7 @@
#include "virfile.h"
#include "virstring.h"
#include "virutil.h"
+#include "virnuma.h"
#include "configmake.h"
#include "security/security_util.h"
@@ -137,6 +138,9 @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged,
cfg->cgroupControllers = -1; /* -1 == auto-detect */
+ cfg->memoryBackingDirs = g_new0(char *, 1);
+ cfg->nb_memoryBackingDirs = 1;
+
if (root != NULL) {
cfg->logDir = g_strdup_printf("%s/log/qemu", root);
cfg->swtpmLogDir = g_strdup_printf("%s/log/swtpm", root);
@@ -153,7 +157,7 @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged,
cfg->checkpointDir = g_strdup_printf("%s/checkpoint", cfg->libDir);
cfg->autoDumpPath = g_strdup_printf("%s/dump", cfg->libDir);
cfg->nvramDir = g_strdup_printf("%s/nvram", cfg->libDir);
- cfg->memoryBackingDir = g_strdup_printf("%s/ram", cfg->libDir);
+ cfg->memoryBackingDirs[0] = g_strdup_printf("%s/ram", cfg->libDir);
} else if (privileged) {
cfg->logDir = g_strdup_printf("%s/log/libvirt/qemu", LOCALSTATEDIR);
@@ -174,7 +178,8 @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged,
cfg->checkpointDir = g_strdup_printf("%s/checkpoint", cfg->libDir);
cfg->autoDumpPath = g_strdup_printf("%s/dump", cfg->libDir);
cfg->nvramDir = g_strdup_printf("%s/nvram", cfg->libDir);
- cfg->memoryBackingDir = g_strdup_printf("%s/ram", cfg->libDir);
+
+ cfg->memoryBackingDirs[0] = g_strdup_printf("%s/ram", cfg->libDir);
cfg->swtpmStorageDir = g_strdup_printf("%s/lib/libvirt/swtpm",
LOCALSTATEDIR);
} else {
@@ -201,7 +206,7 @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged,
cfg->configBaseDir);
cfg->autoDumpPath = g_strdup_printf("%s/qemu/dump", cfg->configBaseDir);
cfg->nvramDir = g_strdup_printf("%s/qemu/nvram", cfg->configBaseDir);
- cfg->memoryBackingDir = g_strdup_printf("%s/qemu/ram", cfg->configBaseDir);
+ cfg->memoryBackingDirs[0] = g_strdup_printf("%s/qemu/ram", cfg->configBaseDir);
cfg->swtpmStorageDir = g_strdup_printf("%s/qemu/swtpm",
cfg->configBaseDir);
}
@@ -369,7 +374,12 @@ static void virQEMUDriverConfigDispose(void *obj)
virFirmwareFreeList(cfg->firmwares, cfg->nfirmwares);
- g_free(cfg->memoryBackingDir);
+ for (size_t i = 0; i < cfg->nb_memoryBackingDirs; i++) {
+ g_free(cfg->memoryBackingDirs[i]);
+ }
+
+ g_free(cfg->memoryBackingDirs);
+
g_free(cfg->swtpmStorageDir);
g_strfreev(cfg->capabilityfilters);
@@ -1019,14 +1029,31 @@ virQEMUDriverConfigLoadMemoryEntry(virQEMUDriverConfig *cfg,
virConf *conf)
{
g_autofree char *dir = NULL;
+ g_auto(GStrv) params = NULL;
+ GStrv next;
int rc;
if ((rc = virConfGetValueString(conf, "memory_backing_dir", &dir)) < 0)
return -1;
if (rc > 0) {
- VIR_FREE(cfg->memoryBackingDir);
- cfg->memoryBackingDir = g_strdup_printf("%s/libvirt/qemu", dir);
+ size_t i;
+ for (i = 0; i < cfg->nb_memoryBackingDirs; i++)
+ VIR_FREE(cfg->memoryBackingDirs[i]);
+
+ if (!(params = g_strsplit(dir, ",", 0))) {
+ return -1;
+ }
+
+ cfg->nb_memoryBackingDirs = 0;
+ for (next = params; *next; next++) {
+ cfg->nb_memoryBackingDirs++;
+ }
+
+ cfg->memoryBackingDirs = g_new0(char *, cfg->nb_memoryBackingDirs);
+ for (i = 0, next = params; *next; next++, i++) {
+ cfg->memoryBackingDirs[i] = g_strdup_printf("%s/libvirt/qemu", *next);
+ }
}
return 0;
diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h
index 36049b4bfa..2b8d540df0 100644
--- a/src/qemu/qemu_conf.h
+++ b/src/qemu/qemu_conf.h
@@ -221,7 +221,8 @@ struct _virQEMUDriverConfig {
unsigned int glusterDebugLevel;
bool virtiofsdDebug;
- char *memoryBackingDir;
+ char **memoryBackingDirs;
+ size_t nb_memoryBackingDirs;
uid_t swtpm_user;
gid_t swtpm_group;
--
2.25.1
_______________________________________________
Devel mailing list -- devel@lists.libvirt.org
To unsubscribe send an email to devel-leave@lists.libvirt.org
On Mon, Jan 29, 2024 at 04:43:53PM -0500, mgalaxy@akamai.com wrote:
>From: Michael Galaxy <mgalaxy@akamai.com>
>
>We start by introducing a backwards-compatible, comma-separated
>specification that will not break existing installations, such
>as in the following example:
>
>$ cat qemu.conf | grep memory_backing_dir
>memory_backing_dir = "/path/to/pmem/0,/path/to/pmem/1"
>
This would be better as an array, but I understand you thought it would
be easier to code for backward compatibility.
>In our case, we almost always have two NUMA nodes, so in that
>example, we have two PMEM regions which are created on the
>Linux kernel command line that get mounted into those two
>locations for libvirt to use.
>
There are PMEM devices which you then expose as filesystems to use for
libvirt as a backing for VM's PMEMs. Do I understand that correctly?
If yes, how are these different? Can't they be passed through?
>Later patches will recognize this configuration and activate it.
>
>Signed-off-by: Michael Galaxy <mgalaxy@akamai.com>
>---
> src/qemu/qemu_conf.c | 39 +++++++++++++++++++++++++++++++++------
> src/qemu/qemu_conf.h | 3 ++-
> 2 files changed, 35 insertions(+), 7 deletions(-)
>
>diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c
>index 4050a82341..aae9f316d8 100644
>--- a/src/qemu/qemu_conf.c
>+++ b/src/qemu/qemu_conf.c
>@@ -43,6 +43,7 @@
> #include "virfile.h"
> #include "virstring.h"
> #include "virutil.h"
>+#include "virnuma.h"
> #include "configmake.h"
> #include "security/security_util.h"
>
>@@ -137,6 +138,9 @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged,
>
> cfg->cgroupControllers = -1; /* -1 == auto-detect */
>
>+ cfg->memoryBackingDirs = g_new0(char *, 1);
>+ cfg->nb_memoryBackingDirs = 1;
>+
> if (root != NULL) {
> cfg->logDir = g_strdup_printf("%s/log/qemu", root);
> cfg->swtpmLogDir = g_strdup_printf("%s/log/swtpm", root);
>@@ -153,7 +157,7 @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged,
> cfg->checkpointDir = g_strdup_printf("%s/checkpoint", cfg->libDir);
> cfg->autoDumpPath = g_strdup_printf("%s/dump", cfg->libDir);
> cfg->nvramDir = g_strdup_printf("%s/nvram", cfg->libDir);
>- cfg->memoryBackingDir = g_strdup_printf("%s/ram", cfg->libDir);
>+ cfg->memoryBackingDirs[0] = g_strdup_printf("%s/ram", cfg->libDir);
> } else if (privileged) {
> cfg->logDir = g_strdup_printf("%s/log/libvirt/qemu", LOCALSTATEDIR);
>
>@@ -174,7 +178,8 @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged,
> cfg->checkpointDir = g_strdup_printf("%s/checkpoint", cfg->libDir);
> cfg->autoDumpPath = g_strdup_printf("%s/dump", cfg->libDir);
> cfg->nvramDir = g_strdup_printf("%s/nvram", cfg->libDir);
>- cfg->memoryBackingDir = g_strdup_printf("%s/ram", cfg->libDir);
>+
>+ cfg->memoryBackingDirs[0] = g_strdup_printf("%s/ram", cfg->libDir);
> cfg->swtpmStorageDir = g_strdup_printf("%s/lib/libvirt/swtpm",
> LOCALSTATEDIR);
> } else {
>@@ -201,7 +206,7 @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged,
> cfg->configBaseDir);
> cfg->autoDumpPath = g_strdup_printf("%s/qemu/dump", cfg->configBaseDir);
> cfg->nvramDir = g_strdup_printf("%s/qemu/nvram", cfg->configBaseDir);
>- cfg->memoryBackingDir = g_strdup_printf("%s/qemu/ram", cfg->configBaseDir);
>+ cfg->memoryBackingDirs[0] = g_strdup_printf("%s/qemu/ram", cfg->configBaseDir);
> cfg->swtpmStorageDir = g_strdup_printf("%s/qemu/swtpm",
> cfg->configBaseDir);
> }
>@@ -369,7 +374,12 @@ static void virQEMUDriverConfigDispose(void *obj)
>
> virFirmwareFreeList(cfg->firmwares, cfg->nfirmwares);
>
>- g_free(cfg->memoryBackingDir);
>+ for (size_t i = 0; i < cfg->nb_memoryBackingDirs; i++) {
We don't allow variable declarations in loops, make sure you run our
test suite, more tips are available here:
https://libvirt.org/hacking.html
>+ g_free(cfg->memoryBackingDirs[i]);
TAB in indentation
>+ }
Extra braces around one-line body, make sure you follow our coding
style, linked from the above article, available here:
https://libvirt.org/coding-style.html
>+
>+ g_free(cfg->memoryBackingDirs);
>+
> g_free(cfg->swtpmStorageDir);
>
> g_strfreev(cfg->capabilityfilters);
>@@ -1019,14 +1029,31 @@ virQEMUDriverConfigLoadMemoryEntry(virQEMUDriverConfig *cfg,
> virConf *conf)
> {
> g_autofree char *dir = NULL;
>+ g_auto(GStrv) params = NULL;
>+ GStrv next;
> int rc;
>
> if ((rc = virConfGetValueString(conf, "memory_backing_dir", &dir)) < 0)
> return -1;
>
> if (rc > 0) {
>- VIR_FREE(cfg->memoryBackingDir);
>- cfg->memoryBackingDir = g_strdup_printf("%s/libvirt/qemu", dir);
>+ size_t i;
>+ for (i = 0; i < cfg->nb_memoryBackingDirs; i++)
>+ VIR_FREE(cfg->memoryBackingDirs[i]);
Please don't mix the old VIR_FREE() and new g_free() as described in our
glib adoption page:
https://www.libvirt.org/glib-adoption.html
>+
>+ if (!(params = g_strsplit(dir, ",", 0))) {
>+ return -1;
>+ }
>+
>+ cfg->nb_memoryBackingDirs = 0;
>+ for (next = params; *next; next++) {
>+ cfg->nb_memoryBackingDirs++;
>+ }
g_strv_length()?
>+
>+ cfg->memoryBackingDirs = g_new0(char *, cfg->nb_memoryBackingDirs);
>+ for (i = 0, next = params; *next; next++, i++) {
>+ cfg->memoryBackingDirs[i] = g_strdup_printf("%s/libvirt/qemu", *next);
>+ }
You can do this with one loop, the extra allocation at the start of the
program won't ever hurt. Also if you stored the values as GStrv, you
could then work with it using g_strv*() functions. See glib-adoption
linked above.
> }
>
> return 0;
>diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h
>index 36049b4bfa..2b8d540df0 100644
>--- a/src/qemu/qemu_conf.h
>+++ b/src/qemu/qemu_conf.h
>@@ -221,7 +221,8 @@ struct _virQEMUDriverConfig {
> unsigned int glusterDebugLevel;
> bool virtiofsdDebug;
>
>- char *memoryBackingDir;
>+ char **memoryBackingDirs;
>+ size_t nb_memoryBackingDirs;
>
You change this here, but the other uses of the old member is fixed in
following patches. I like that you split the series, but the tree
should be buildable (with passing tests) after each and every commit.
On top of all this, I still don't understand what's the benefit of
having multiple backing dirs, so please first enlighten me with that as
I'm clearly missing some info.
> uid_t swtpm_user;
> gid_t swtpm_group;
>--
>2.25.1
>_______________________________________________
>Devel mailing list -- devel@lists.libvirt.org
>To unsubscribe send an email to devel-leave@lists.libvirt.org
_______________________________________________
Devel mailing list -- devel@lists.libvirt.org
To unsubscribe send an email to devel-leave@lists.libvirt.org
On 3/1/24 11:00, Martin Kletzander wrote: > On Mon, Jan 29, 2024 at 04:43:53PM -0500, mgalaxy@akamai.com wrote: >> From: Michael Galaxy <mgalaxy@akamai.com> >> >> We start by introducing a backwards-compatible, comma-separated >> specification that will not break existing installations, such >> as in the following example: >> >> $ cat qemu.conf | grep memory_backing_dir >> memory_backing_dir = "/path/to/pmem/0,/path/to/pmem/1" >> > > This would be better as an array, but I understand you thought it would > be easier to code for backward compatibility. This have to be array. virConfGetValueStringList() can handle both cases just fine. And in fact, when used it'll result in cleaner code. Michal _______________________________________________ Devel mailing list -- devel@lists.libvirt.org To unsubscribe send an email to devel-leave@lists.libvirt.org
On 3/5/24 03:46, Michal Prívozník wrote: > On 3/1/24 11:00, Martin Kletzander wrote: >> On Mon, Jan 29, 2024 at 04:43:53PM -0500, mgalaxy@akamai.com wrote: >>> From: Michael Galaxy <mgalaxy@akamai.com> >>> >>> We start by introducing a backwards-compatible, comma-separated >>> specification that will not break existing installations, such >>> as in the following example: >>> >>> $ cat qemu.conf | grep memory_backing_dir >>> memory_backing_dir = "/path/to/pmem/0,/path/to/pmem/1" >>> >> This would be better as an array, but I understand you thought it would >> be easier to code for backward compatibility. > This have to be array. virConfGetValueStringList() can handle both cases > just fine. And in fact, when used it'll result in cleaner code. Acknowledged! _______________________________________________ Devel mailing list -- devel@lists.libvirt.org To unsubscribe send an email to devel-leave@lists.libvirt.org
Hi Martin, Answers inline. Thanks for helping with the review and all the tips! On 3/1/24 04:00, Martin Kletzander wrote: > On Mon, Jan 29, 2024 at 04:43:53PM -0500, mgalaxy@akamai.com wrote: >> From: Michael Galaxy <mgalaxy@akamai.com> >> > >> In our case, we almost always have two NUMA nodes, so in that >> example, we have two PMEM regions which are created on the >> Linux kernel command line that get mounted into those two >> locations for libvirt to use. >> > > There are PMEM devices which you then expose as filesystems to use for > libvirt as a backing for VM's PMEMs. Do I understand that correctly? > > If yes, how are these different? Can't they be passed through? > So, these are very different. QEMU currently already supports passing through PMEM for guest internal use (The guest puts its own filesystem onto the passed-through PMEM device). In our case, we are using the PMEM area only in the host to place the QEMU memory backing for all guests into a single PMEM area. To support NUMA correctly, QEMU needs to support mutiple host-level PMEM areas which have been pre-configured to be NUMA aware. This is strictly for the purposes of doing live updates, not as a mechanism for guests to internally take advantage of persistent memory... that's a completely different use case (which in and of itself is very interesting, but not what we are using it for). That's how it works. Does that make sense? (I'll work on those other requests, thank you) - Michael _______________________________________________ Devel mailing list -- devel@lists.libvirt.org To unsubscribe send an email to devel-leave@lists.libvirt.org
On Mon, Mar 04, 2024 at 03:54:23PM -0600, Michael Galaxy wrote: >Hi Martin, > >Answers inline. Thanks for helping with the review and all the tips! > >On 3/1/24 04:00, Martin Kletzander wrote: >> On Mon, Jan 29, 2024 at 04:43:53PM -0500, mgalaxy@akamai.com wrote: >>> From: Michael Galaxy <mgalaxy@akamai.com> >>> >> >>> In our case, we almost always have two NUMA nodes, so in that >>> example, we have two PMEM regions which are created on the >>> Linux kernel command line that get mounted into those two >>> locations for libvirt to use. >>> >> >> There are PMEM devices which you then expose as filesystems to use for >> libvirt as a backing for VM's PMEMs. Do I understand that correctly? >> >> If yes, how are these different? Can't they be passed through? >> >So, these are very different. QEMU currently already supports passing >through >PMEM for guest internal use (The guest puts its own filesystem onto the >passed-through >PMEM device). > >In our case, we are using the PMEM area only in the host to place the >QEMU memory backing >for all guests into a single PMEM area. > >To support NUMA correctly, QEMU needs to support mutiple host-level PMEM >areas which >have been pre-configured to be NUMA aware. This is strictly for the Is this preconfiguration something that libvirt should be able to do as well? How would anyone know which region is tied to which NUMA node? Shouldn't there be some probing for that? >purposes of doing live updates, >not as a mechanism for guests to internally take advantage of persistent >memory... that's >a completely different use case (which in and of itself is very >interesting, but not what we are >using it for). > >That's how it works. Does that make sense? > > >(I'll work on those other requests, thank you) > >- Michael > _______________________________________________ Devel mailing list -- devel@lists.libvirt.org To unsubscribe send an email to devel-leave@lists.libvirt.org
Hi Martin, On 3/6/24 04:41, Martin Kletzander wrote: > On Mon, Mar 04, 2024 at 03:54:23PM -0600, Michael Galaxy wrote: >> Hi Martin, >> >> Answers inline. Thanks for helping with the review and all the tips! >> >> On 3/1/24 04:00, Martin Kletzander wrote: >>> On Mon, Jan 29, 2024 at 04:43:53PM -0500, mgalaxy@akamai.com wrote: >>>> From: Michael Galaxy <mgalaxy@akamai.com> >>>> >>> >>>> In our case, we almost always have two NUMA nodes, so in that >>>> example, we have two PMEM regions which are created on the >>>> Linux kernel command line that get mounted into those two >>>> locations for libvirt to use. >>>> >>> >>> There are PMEM devices which you then expose as filesystems to use for >>> libvirt as a backing for VM's PMEMs. Do I understand that correctly? >>> >>> If yes, how are these different? Can't they be passed through? >>> >> So, these are very different. QEMU currently already supports passing >> through >> PMEM for guest internal use (The guest puts its own filesystem onto the >> passed-through >> PMEM device). >> >> In our case, we are using the PMEM area only in the host to place the >> QEMU memory backing >> for all guests into a single PMEM area. >> >> To support NUMA correctly, QEMU needs to support mutiple host-level PMEM >> areas which >> have been pre-configured to be NUMA aware. This is strictly for the > > Is this preconfiguration something that libvirt should be able to do as > well? How would anyone know which region is tied to which NUMA node? > Shouldn't there be some probing for that? > That's a good question, but probably no. The preconfiguration is done via the memmap=XXXX paramter on the kernel command line. I doubt we want libvirt in the business of managing something like that. I do have checks in the patchset to handle different corner cases where the user provides invalid input in a way that does not match the existing number of NUMA nodes, but beyond that, the reserved PMEM areas are strictly configured at kernel boot time. - Michael _______________________________________________ Devel mailing list -- devel@lists.libvirt.org To unsubscribe send an email to devel-leave@lists.libvirt.org
© 2016 - 2026 Red Hat, Inc.