:p
atchew
Login
From: Michael Galaxy <mgalaxy@akamai.com> CPR-based support for whole-hypervisor kexec-based live updates is making progress on qemu-devel. In support of this, we need NUMA to be support in these kinds of environments. To do this we use a technology called PMEM (persistent memory), which underpin the ability for CPR Live Updates to work so that QEMU memory can remain in RAM and be recovered after the kexec has completed. Our systems are highly NUMA-aware, and so this patch series enables NUMA awareness for live update and also allows live migrations to work. Further, we make a small change that allows live migrations to work between *non* PMEM-based systems and PMEM-based systems (and vice-versa). This allows for seemless upgrades from non-live-compatible systems to live-update-compatible sytems without any downtime. Michael Galaxy (5): qemu.conf changes to support multiple memory backend directories Update cleanup routines to handle multiple memory backing paths instead of just one. Implement multiple memory backing paths Support live migration between file-backed memory and anonymous memory. Update unit test to support multiple memory backends src/qemu/qemu_command.c | 8 ++- src/qemu/qemu_conf.c | 140 ++++++++++++++++++++++++++++++++++++---- src/qemu/qemu_conf.h | 14 ++-- src/qemu/qemu_domain.c | 24 +++++-- src/qemu/qemu_driver.c | 31 +++++---- src/qemu/qemu_hotplug.c | 6 +- src/qemu/qemu_process.c | 45 +++++++------ src/qemu/qemu_process.h | 13 ++-- tests/testutilsqemu.c | 5 +- 9 files changed, 217 insertions(+), 69 deletions(-) -- 2.25.1 _______________________________________________ Devel mailing list -- devel@lists.libvirt.org To unsubscribe send an email to devel-leave@lists.libvirt.org
From: Michael Galaxy <mgalaxy@akamai.com> We start by introducing a backwards-compatible, comma-separated specification that will not break existing installations, such as in the following example: $ cat qemu.conf | grep memory_backing_dir memory_backing_dir = "/path/to/pmem/0,/path/to/pmem/1" In our case, we almost always have two NUMA nodes, so in that example, we have two PMEM regions which are created on the Linux kernel command line that get mounted into those two locations for libvirt to use. Later patches will recognize this configuration and activate it. Signed-off-by: Michael Galaxy <mgalaxy@akamai.com> --- src/qemu/qemu_conf.c | 39 +++++++++++++++++++++++++++++++++------ src/qemu/qemu_conf.h | 3 ++- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index XXXXXXX..XXXXXXX 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -XXX,XX +XXX,XX @@ #include "virfile.h" #include "virstring.h" #include "virutil.h" +#include "virnuma.h" #include "configmake.h" #include "security/security_util.h" @@ -XXX,XX +XXX,XX @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged, cfg->cgroupControllers = -1; /* -1 == auto-detect */ + cfg->memoryBackingDirs = g_new0(char *, 1); + cfg->nb_memoryBackingDirs = 1; + if (root != NULL) { cfg->logDir = g_strdup_printf("%s/log/qemu", root); cfg->swtpmLogDir = g_strdup_printf("%s/log/swtpm", root); @@ -XXX,XX +XXX,XX @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged, cfg->checkpointDir = g_strdup_printf("%s/checkpoint", cfg->libDir); cfg->autoDumpPath = g_strdup_printf("%s/dump", cfg->libDir); cfg->nvramDir = g_strdup_printf("%s/nvram", cfg->libDir); - cfg->memoryBackingDir = g_strdup_printf("%s/ram", cfg->libDir); + cfg->memoryBackingDirs[0] = g_strdup_printf("%s/ram", cfg->libDir); } else if (privileged) { cfg->logDir = g_strdup_printf("%s/log/libvirt/qemu", LOCALSTATEDIR); @@ -XXX,XX +XXX,XX @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged, cfg->checkpointDir = g_strdup_printf("%s/checkpoint", cfg->libDir); cfg->autoDumpPath = g_strdup_printf("%s/dump", cfg->libDir); cfg->nvramDir = g_strdup_printf("%s/nvram", cfg->libDir); - cfg->memoryBackingDir = g_strdup_printf("%s/ram", cfg->libDir); + + cfg->memoryBackingDirs[0] = g_strdup_printf("%s/ram", cfg->libDir); cfg->swtpmStorageDir = g_strdup_printf("%s/lib/libvirt/swtpm", LOCALSTATEDIR); } else { @@ -XXX,XX +XXX,XX @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged, cfg->configBaseDir); cfg->autoDumpPath = g_strdup_printf("%s/qemu/dump", cfg->configBaseDir); cfg->nvramDir = g_strdup_printf("%s/qemu/nvram", cfg->configBaseDir); - cfg->memoryBackingDir = g_strdup_printf("%s/qemu/ram", cfg->configBaseDir); + cfg->memoryBackingDirs[0] = g_strdup_printf("%s/qemu/ram", cfg->configBaseDir); cfg->swtpmStorageDir = g_strdup_printf("%s/qemu/swtpm", cfg->configBaseDir); } @@ -XXX,XX +XXX,XX @@ static void virQEMUDriverConfigDispose(void *obj) virFirmwareFreeList(cfg->firmwares, cfg->nfirmwares); - g_free(cfg->memoryBackingDir); + for (size_t i = 0; i < cfg->nb_memoryBackingDirs; i++) { + g_free(cfg->memoryBackingDirs[i]); + } + + g_free(cfg->memoryBackingDirs); + g_free(cfg->swtpmStorageDir); g_strfreev(cfg->capabilityfilters); @@ -XXX,XX +XXX,XX @@ virQEMUDriverConfigLoadMemoryEntry(virQEMUDriverConfig *cfg, virConf *conf) { g_autofree char *dir = NULL; + g_auto(GStrv) params = NULL; + GStrv next; int rc; if ((rc = virConfGetValueString(conf, "memory_backing_dir", &dir)) < 0) return -1; if (rc > 0) { - VIR_FREE(cfg->memoryBackingDir); - cfg->memoryBackingDir = g_strdup_printf("%s/libvirt/qemu", dir); + size_t i; + for (i = 0; i < cfg->nb_memoryBackingDirs; i++) + VIR_FREE(cfg->memoryBackingDirs[i]); + + if (!(params = g_strsplit(dir, ",", 0))) { + return -1; + } + + cfg->nb_memoryBackingDirs = 0; + for (next = params; *next; next++) { + cfg->nb_memoryBackingDirs++; + } + + cfg->memoryBackingDirs = g_new0(char *, cfg->nb_memoryBackingDirs); + for (i = 0, next = params; *next; next++, i++) { + cfg->memoryBackingDirs[i] = g_strdup_printf("%s/libvirt/qemu", *next); + } } return 0; diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index XXXXXXX..XXXXXXX 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -XXX,XX +XXX,XX @@ struct _virQEMUDriverConfig { unsigned int glusterDebugLevel; bool virtiofsdDebug; - char *memoryBackingDir; + char **memoryBackingDirs; + size_t nb_memoryBackingDirs; uid_t swtpm_user; gid_t swtpm_group; -- 2.25.1 _______________________________________________ Devel mailing list -- devel@lists.libvirt.org To unsubscribe send an email to devel-leave@lists.libvirt.org
From: Michael Galaxy <mgalaxy@akamai.com> Signed-off-by: Michael Galaxy <mgalaxy@akamai.com> --- src/qemu/qemu_driver.c | 31 +++++++++++++++++----------- src/qemu/qemu_hotplug.c | 6 +++--- src/qemu/qemu_process.c | 45 ++++++++++++++++++++++++----------------- src/qemu/qemu_process.h | 13 ++++++------ 4 files changed, 54 insertions(+), 41 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index XXXXXXX..XXXXXXX 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -XXX,XX +XXX,XX @@ qemuStateInitialize(bool privileged, cfg->nvramDir); goto error; } - if (g_mkdir_with_parents(cfg->memoryBackingDir, 0777) < 0) { - virReportSystemError(errno, _("Failed to create memory backing dir %1$s"), - cfg->memoryBackingDir); - goto error; + for (i = 0; i < cfg->nb_memoryBackingDirs; i++) { + if (g_mkdir_with_parents(cfg->memoryBackingDirs[i], 0777) < 0) { + virReportSystemError(errno, _("Failed to create memory backing dir # %1$lu @ %2$s, total: %3$lu"), + i, cfg->memoryBackingDirs[i], cfg->nb_memoryBackingDirs); + goto error; + } } + if (g_mkdir_with_parents(cfg->slirpStateDir, 0777) < 0) { virReportSystemError(errno, _("Failed to create slirp state dir %1$s"), cfg->slirpStateDir); @@ -XXX,XX +XXX,XX @@ qemuStateInitialize(bool privileged, (int)cfg->group); goto error; } - if (chown(cfg->memoryBackingDir, cfg->user, cfg->group) < 0) { - virReportSystemError(errno, + for (i = 0; i < cfg->nb_memoryBackingDirs; i++) { + if (chown(cfg->memoryBackingDirs[i], cfg->user, cfg->group) < 0) { + virReportSystemError(errno, _("unable to set ownership of '%1$s' to %2$d:%3$d"), - cfg->memoryBackingDir, (int)cfg->user, + cfg->memoryBackingDirs[i], (int)cfg->user, (int)cfg->group); - goto error; - } + goto error; + } + } if (chown(cfg->slirpStateDir, cfg->user, cfg->group) < 0) { virReportSystemError(errno, _("unable to set ownership of '%1$s' to %2$d:%3$d"), @@ -XXX,XX +XXX,XX @@ qemuStateInitialize(bool privileged, goto error; } - if (privileged && - virFileUpdatePerm(cfg->memoryBackingDir, + for (i = 0; i < cfg->nb_memoryBackingDirs; i++) { + if (privileged && + virFileUpdatePerm(cfg->memoryBackingDirs[i], 0, S_IXGRP | S_IXOTH) < 0) - goto error; + goto error; + } /* Get all the running persistent or transient configs first */ if (virDomainObjListLoadAllConfigs(qemu_driver->domains, diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index XXXXXXX..XXXXXXX 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -XXX,XX +XXX,XX @@ qemuDomainAttachMemory(virQEMUDriver *driver, priv, vm->def, mem, true, false, NULL) < 0) goto cleanup; - if (qemuProcessBuildDestroyMemoryPaths(driver, vm, mem, true) < 0) + if (qemuProcessBuildDestroyMemoryPaths(vm, mem, true) < 0) goto cleanup; if (qemuDomainNamespaceSetupMemory(vm, mem, &teardowndevice) < 0) @@ -XXX,XX +XXX,XX @@ qemuDomainAttachMemory(virQEMUDriver *driver, qemuDomainObjExitMonitor(vm); if (objAdded && mem) - ignore_value(qemuProcessDestroyMemoryBackingPath(driver, vm, mem)); + ignore_value(qemuProcessDestroyMemoryBackingPath(vm->def, priv, mem)); virErrorRestore(&orig_err); if (!mem) @@ -XXX,XX +XXX,XX @@ qemuDomainRemoveMemoryDevice(virQEMUDriver *driver, if (qemuDomainNamespaceTeardownMemory(vm, mem) < 0) VIR_WARN("Unable to remove memory device from /dev"); - if (qemuProcessDestroyMemoryBackingPath(driver, vm, mem) < 0) + if (qemuProcessDestroyMemoryBackingPath(vm->def, priv, mem) < 0) VIR_WARN("Unable to destroy memory backing path"); qemuDomainReleaseMemoryDeviceSlot(vm, mem); diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index XXXXXXX..XXXXXXX 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -XXX,XX +XXX,XX @@ qemuProcessBuildDestroyMemoryPathsImpl(virQEMUDriver *driver, int -qemuProcessBuildDestroyMemoryPaths(virQEMUDriver *driver, - virDomainObj *vm, +qemuProcessBuildDestroyMemoryPaths(virDomainObj *vm, virDomainMemoryDef *mem, bool build) { - + qemuDomainObjPrivate *priv = vm->privateData; + virQEMUDriver *driver = priv->driver; g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); size_t i; bool shouldBuildHP = false; @@ -XXX,XX +XXX,XX @@ qemuProcessBuildDestroyMemoryPaths(virQEMUDriver *driver, } if (!build || shouldBuildMB) { - g_autofree char *path = NULL; - if (qemuGetMemoryBackingDomainPath(driver, vm->def, &path) < 0) - return -1; + for (i = 0; i < cfg->nb_memoryBackingDirs; i++) { + g_autofree char *path = NULL; + if (qemuGetMemoryBackingDomainPath(vm->def, vm->privateData, i, &path) < 0) + return -1; - if (qemuProcessBuildDestroyMemoryPathsImpl(driver, vm, - path, build) < 0) - return -1; + if (qemuProcessBuildDestroyMemoryPathsImpl(driver, vm, + path, build) < 0) + return -1; + } } return 0; @@ -XXX,XX +XXX,XX @@ qemuProcessBuildDestroyMemoryPaths(virQEMUDriver *driver, int -qemuProcessDestroyMemoryBackingPath(virQEMUDriver *driver, - virDomainObj *vm, +qemuProcessDestroyMemoryBackingPath(virDomainDef *def, + qemuDomainObjPrivate *priv, virDomainMemoryDef *mem) { + virQEMUDriver *driver = priv->driver; g_autofree char *path = NULL; + g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); - if (qemuGetMemoryBackingPath(driver, vm->def, mem->info.alias, &path) < 0) - return -1; + for (size_t i = 0; i < cfg->nb_memoryBackingDirs; i++) { + virDomainXMLPrivateDataCallbacks *privateData = (virDomainXMLPrivateDataCallbacks *) priv; + if (qemuGetMemoryBackingPath(def, privateData, i, mem->info.alias, &path) < 0) + return -1; - if (unlink(path) < 0 && - errno != ENOENT) { - virReportSystemError(errno, _("Unable to remove %1$s"), path); - return -1; + if (unlink(path) < 0 && + errno != ENOENT) { + virReportSystemError(errno, _("Unable to remove %1$s"), path); + return -1; + } } return 0; @@ -XXX,XX +XXX,XX @@ qemuProcessPrepareHost(virQEMUDriver *driver, if (qemuProcessPrepareHostBackendChardev(vm) < 0) return -1; - if (qemuProcessBuildDestroyMemoryPaths(driver, vm, NULL, true) < 0) + if (qemuProcessBuildDestroyMemoryPaths(vm, NULL, true) < 0) return -1; /* Ensure no historical cgroup for this VM is lying around bogus @@ -XXX,XX +XXX,XX @@ void qemuProcessStop(virQEMUDriver *driver, goto endjob; } - qemuProcessBuildDestroyMemoryPaths(driver, vm, NULL, false); + qemuProcessBuildDestroyMemoryPaths(vm, NULL, false); if (!!g_atomic_int_dec_and_test(&driver->nactive) && driver->inhibitCallback) driver->inhibitCallback(false, driver->inhibitOpaque); diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h index XXXXXXX..XXXXXXX 100644 --- a/src/qemu/qemu_process.h +++ b/src/qemu/qemu_process.h @@ -XXX,XX +XXX,XX @@ int qemuProcessStopCPUs(virQEMUDriver *driver, virDomainPausedReason reason, virDomainAsyncJob asyncJob); -int qemuProcessBuildDestroyMemoryPaths(virQEMUDriver *driver, - virDomainObj *vm, - virDomainMemoryDef *mem, - bool build); +int qemuProcessBuildDestroyMemoryPaths(virDomainObj *vm, + virDomainMemoryDef *mem, + bool build); -int qemuProcessDestroyMemoryBackingPath(virQEMUDriver *driver, - virDomainObj *vm, - virDomainMemoryDef *mem); +int qemuProcessDestroyMemoryBackingPath(virDomainDef *def, + qemuDomainObjPrivate *priv, + virDomainMemoryDef *mem); void qemuProcessReconnectAll(virQEMUDriver *driver); -- 2.25.1 _______________________________________________ Devel mailing list -- devel@lists.libvirt.org To unsubscribe send an email to devel-leave@lists.libvirt.org
From: Michael Galaxy <mgalaxy@akamai.com> We have different use cases: 1. The domain has multiple NUMA nodes, but they have only specified a single directory pathin qemu.conf (Original default behavior) 2. Domain has multiple NUMA nodes, but we have asked for multiple directory paths as well (new behavior). 3. Domain has single NUMA node, but we have asked for multiple directory paths (new behavior). Each one is elaborated more inline below in the comments. Signed-off-by: Michael Galaxy <mgalaxy@akamai.com> --- src/qemu/qemu_command.c | 8 +++- src/qemu/qemu_conf.c | 101 ++++++++++++++++++++++++++++++++++++---- src/qemu/qemu_conf.h | 11 +++-- 3 files changed, 106 insertions(+), 14 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index XXXXXXX..XXXXXXX 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -XXX,XX +XXX,XX @@ qemuBuildMemoryBackendProps(virJSONValue **backendProps, } else { /* We can have both pagesize and mem source. If that's the case, * prefer hugepages as those are more specific. */ - if (qemuGetMemoryBackingPath(priv->driver, def, mem->info.alias, &memPath) < 0) + + virDomainXMLPrivateDataCallbacks *privateData = (virDomainXMLPrivateDataCallbacks *) priv; + if (qemuGetMemoryBackingPath(def, privateData, mem->targetNode, mem->info.alias, &memPath) < 0) return -1; } @@ -XXX,XX +XXX,XX @@ qemuBuildMemPathStr(const virDomainDef *def, return -1; prealloc = true; } else if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { - if (qemuGetMemoryBackingPath(priv->driver, def, "ram", &mem_path) < 0) + // This path should not be reached if NUMA is requested + virDomainXMLPrivateDataCallbacks *privateData = (virDomainXMLPrivateDataCallbacks *) priv; + if (qemuGetMemoryBackingPath(def, privateData, 0, "ram", &mem_path) < 0) return -1; } diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index XXXXXXX..XXXXXXX 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -XXX,XX +XXX,XX @@ qemuGetDomainHupageMemPath(virQEMUDriver *driver, int -qemuGetMemoryBackingDomainPath(virQEMUDriver *driver, - const virDomainDef *def, +qemuGetMemoryBackingDomainPath(const virDomainDef *def, + virDomainXMLPrivateDataCallbacks *priv, + const size_t targetNode, char **path) { + qemuDomainObjPrivate *privateData = (qemuDomainObjPrivate *) priv; + virQEMUDriver *driver = privateData->driver; g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); const char *root = driver->embeddedRoot; g_autofree char *shortName = NULL; + size_t path_index = 0; // original behavior, described below if (!(shortName = virDomainDefGetShortName(def))) return -1; - if (root && !STRPREFIX(cfg->memoryBackingDir, root)) { + /* + * We have three use cases: + * + * 1. Domain has multiple NUMA nodes, but they have only specified + * a single directory path in qemu.conf. (Original default behavior). + * + * In this case, we already placed the memory backing path for each NUMA node + * into the same path location. Preserve the established default behavior. + * + * 2. Domain has multiple NUMA nodes, but we have asked for multiple directory + * paths as well. + * + * In this case, we will have a one-to-one relationship between the number + * of NUMA nodes and the order in which the paths are provided. + * If the user does not specify enough paths, then we need to throw an error. + * NOTE: This is open to comment. The "ordering" of the paths here is not intially + * configurable to preserve backwards compatibility with the original qemu.conf syntax. + * If controlling the ordering is desired, we would need to revise the syntax in + * qemu.conf to make that possible. That hasn't been needed so far. + * + * NOTE A): We must check with numatune here, if requested. The number of NUMA nodes + * may be less than or equal to the number of provided paths. If it is less, + * we have to respect the choices made by numatune. In this case, we will map the + * physical NUMA nodes (0, 1, 2...) in the order in which they appear in qemu.conf + * + * 3. Domain has a single NUMA node, but we have asked for multiple directory paths. + * + * In this case we also need to check if numatune is requested. If so, + * we want to pick the path indicated by numatune. + * + * NOTE B): In both cases 2 and 3, if numatune is requested, the path obviously cannot + * be changed on the fly, like it normally would be in "restrictive" mode + * during runtime. So, we will only do this is the mode requested is "strict". + * + * NOTE C): Furthermore, in both cases 2 and 3, if the number of directory paths provided + * is more than one, and one of either: a) no numatune is provided at all or + * b) numatune is in fact provided, but the mode is not strict, + * then we must thrown error. This is because we cannot know which backing + * directory path to choose without the user's input. + * + * NOTE D): If one or more directory paths is requested in any of the cases 1, 2, or 3, + * the numatune cannot specifiy more than one NUMA node, because the only mode + * possible with directory paths is "strict" (e.g. automatic numa balancing of + * memory will not work). Only one numa node can be requested by numatune, else + * we must throw an error. + */ + + if (cfg->nb_memoryBackingDirs > 1) { + virDomainNuma *numatune = def->numa; + virBitmap *numaBitmap = virDomainNumatuneGetNodeset(numatune, privateData->autoNodeset, targetNode); + size_t numa_node_count = virDomainNumaGetNodeCount(def->numa); + virDomainNumatuneMemMode mode; + + if ((numatune && numaBitmap && virNumaNodesetIsAvailable(numaBitmap)) && + virDomainNumatuneGetMode(def->numa, -1, &mode) == 0 && + mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && + virBitmapCountBits(numaBitmap) == 1) { + // Is numatune provided? + // Is it strict? + // Does it only specify a single pinning for this target? + // Yes to all 3? then good to go. + + if (cfg->nb_memoryBackingDirs < numa_node_count) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Domain requesting configuration for %lu NUMA nodes, but memory backing directory only has (%lu) directory paths available. Either reduce this to one directory or provide more paths to use."), numa_node_count, cfg->nb_memoryBackingDirs); + return -1; + } + + path_index = virBitmapNextSetBit(numaBitmap, -1); + } else if (numa_node_count > 1 && numa_node_count == cfg->nb_memoryBackingDirs) { + // Be nice. A valid numatune and pinning has not been specified, but the number + // of paths matches up exactly, so just assign them one-to-one. + path_index = targetNode; + } else { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("There are (%lu) memory directory directories configured. Domain must use a 'strict' numatune as well as an associated pinning configuration for each NUMA node before proceeding. An individual NUMA node can only be pinned to a single backing directory. Please correct the domain configuration or remove the memory backing directories and try again."), cfg->nb_memoryBackingDirs); + return -1; + } + } + + if (root && !STRPREFIX(cfg->memoryBackingDirs[path_index], root)) { g_autofree char * hash = virDomainDriverGenerateRootHash("qemu", root); - *path = g_strdup_printf("%s/%s-%s", cfg->memoryBackingDir, hash, shortName); + *path = g_strdup_printf("%s/%s-%s", cfg->memoryBackingDirs[path_index], hash, shortName); } else { - *path = g_strdup_printf("%s/%s", cfg->memoryBackingDir, shortName); + *path = g_strdup_printf("%s/%s", cfg->memoryBackingDirs[path_index], shortName); } return 0; @@ -XXX,XX +XXX,XX @@ qemuGetMemoryBackingDomainPath(virQEMUDriver *driver, * -1 otherwise (with error reported). */ int -qemuGetMemoryBackingPath(virQEMUDriver *driver, - const virDomainDef *def, +qemuGetMemoryBackingPath(const virDomainDef *def, + virDomainXMLPrivateDataCallbacks *priv, + const size_t targetNode, const char *alias, char **memPath) { @@ -XXX,XX +XXX,XX @@ qemuGetMemoryBackingPath(virQEMUDriver *driver, return -1; } - if (qemuGetMemoryBackingDomainPath(driver, def, &domainPath) < 0) + if (qemuGetMemoryBackingDomainPath(def, priv, targetNode, &domainPath) < 0) return -1; *memPath = g_strdup_printf("%s/%s", domainPath, alias); diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index XXXXXXX..XXXXXXX 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -XXX,XX +XXX,XX @@ int qemuGetDomainHupageMemPath(virQEMUDriver *driver, unsigned long long pagesize, char **memPath); -int qemuGetMemoryBackingDomainPath(virQEMUDriver *driver, - const virDomainDef *def, +int qemuGetMemoryBackingDomainPath(const virDomainDef *def, + virDomainXMLPrivateDataCallbacks *priv, + const size_t targetNode, char **path); -int qemuGetMemoryBackingPath(virQEMUDriver *driver, - const virDomainDef *def, + +int qemuGetMemoryBackingPath(const virDomainDef *def, + virDomainXMLPrivateDataCallbacks *priv, + const size_t targetNode, const char *alias, char **memPath); -- 2.25.1 _______________________________________________ Devel mailing list -- devel@lists.libvirt.org To unsubscribe send an email to devel-leave@lists.libvirt.org
From: Michael Galaxy <mgalaxy@akamai.com> In our environment, we need to convert VMs into a live-update-comptabile configuration "on-the-fly" (via live migration). So, for this very specific case, this needs to work when PMEM is being enabled. QEMU does not have a problem with this at all, but we need to relax the rules here a bit. Signed-off-by: Michael Galaxy <mgalaxy@akamai.com> --- src/qemu/qemu_domain.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index XXXXXXX..XXXXXXX 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -XXX,XX +XXX,XX @@ qemuDomainABIStabilityCheck(const virDomainDef *src, size_t i; if (src->mem.source != dst->mem.source) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("Target memoryBacking source '%1$s' doesn't match source memoryBacking source'%2$s'"), - virDomainMemorySourceTypeToString(dst->mem.source), - virDomainMemorySourceTypeToString(src->mem.source)); - return false; + /* + * The current use case for this is the live migration of live-update + * capable CPR guests mounted on PMEM devices at the host + * level (not in-guest PMEM). QEMU has no problem doing these kinds of + * live migrations between these two memory backends, so let them go through. + * This allows us to "upgrade" guests from regular memory to file-backed + * memory seemlessly without taking them down. + */ + if (!((src->mem.source == VIR_DOMAIN_MEMORY_SOURCE_NONE + && dst->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) || + (src->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE + && dst->mem.source == VIR_DOMAIN_MEMORY_SOURCE_NONE))) { + + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Target memoryBacking source '%1$s' doesn't match source memoryBacking source'%2$s'"), + virDomainMemorySourceTypeToString(dst->mem.source), + virDomainMemorySourceTypeToString(src->mem.source)); + return false; + } } for (i = 0; i < src->nmems; i++) { -- 2.25.1 _______________________________________________ Devel mailing list -- devel@lists.libvirt.org To unsubscribe send an email to devel-leave@lists.libvirt.org
From: Michael Galaxy <mgalaxy@akamai.com> Signed-off-by: Michael Galaxy <mgalaxy@akamai.com> --- tests/testutilsqemu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/testutilsqemu.c b/tests/testutilsqemu.c index XXXXXXX..XXXXXXX 100644 --- a/tests/testutilsqemu.c +++ b/tests/testutilsqemu.c @@ -XXX,XX +XXX,XX @@ int qemuTestDriverInit(virQEMUDriver *driver) cfg->libDir = g_strdup("/var/lib/libvirt/qemu"); VIR_FREE(cfg->channelTargetDir); cfg->channelTargetDir = g_strdup("/var/run/libvirt/qemu/channel"); - VIR_FREE(cfg->memoryBackingDir); - cfg->memoryBackingDir = g_strdup("/var/lib/libvirt/qemu/ram"); + VIR_FREE(cfg->memoryBackingDirs); + cfg->memoryBackingDirs = g_new0(char *, 1); + cfg->memoryBackingDirs[0] = g_strdup("/var/lib/libvirt/qemu/ram"); VIR_FREE(cfg->nvramDir); cfg->nvramDir = g_strdup("/var/lib/libvirt/qemu/nvram"); VIR_FREE(cfg->passtStateDir); -- 2.25.1 _______________________________________________ Devel mailing list -- devel@lists.libvirt.org To unsubscribe send an email to devel-leave@lists.libvirt.org
From: Michael Galaxy <mgalaxy@akamai.com> CPR-based support for whole-hypervisor kexec-based live updates is now finally merged into QEMU. In support of this, we need NUMA to be supported in these kinds of environments. To do this we use a technology called PMEM (persistent memory) in Linux, which underpins the ability for CPR Live Updates to work so that QEMU memory can remain in RAM and be recovered after a kexec operationg has completed. Our systems are highly NUMA-aware, and so this patch series enables NUMA awareness for live updates. Further, we make a small change that allows live migrations to work between *non* PMEM-based systems and PMEM-based systems (and vice-versa). This allows for seemless upgrades from non-live-compatible systems to live-update-compatible sytems without any downtime. Michael Galaxy (4): qemu.conf changes to support multiple memory backend Support live migration between file-backed memory and anonymous memory. Update unit test to support multiple memory backends Update documentation to reflect memory_backing_dir change in qemu.conf NEWS.rst | 7 ++ docs/kbase/virtiofs.rst | 2 + src/qemu/qemu.conf.in | 2 + src/qemu/qemu_command.c | 8 ++- src/qemu/qemu_conf.c | 141 +++++++++++++++++++++++++++++++++++----- src/qemu/qemu_conf.h | 14 ++-- src/qemu/qemu_domain.c | 24 +++++-- src/qemu/qemu_driver.c | 29 +++++---- src/qemu/qemu_hotplug.c | 6 +- src/qemu/qemu_process.c | 44 +++++++------ src/qemu/qemu_process.h | 7 +- tests/testutilsqemu.c | 5 +- 12 files changed, 221 insertions(+), 68 deletions(-) -- 2.34.1
From: Michael Galaxy <mgalaxy@akamai.com> We start by introducing a backwards-compatible, comma-separated specification that will not break existing installations, such as in the following example: $ cat qemu.conf | grep memory_backing_dir memory_backing_dir = ["/path/to/pmem/0", "/path/to/pmem/1"] (The old syntax with a single string is also still supported) memory_backing_dir = "/path/to/dir" In our case, we almost always have two NUMA nodes, so in that example, we have two PMEM regions which are created on the Linux kernel command line that get mounted into those two locations for libvirt to use. Signed-off-by: Michael Galaxy <mgalaxy@akamai.com> --- src/qemu/qemu_command.c | 8 ++- src/qemu/qemu_conf.c | 141 +++++++++++++++++++++++++++++++++++----- src/qemu/qemu_conf.h | 14 ++-- src/qemu/qemu_driver.c | 29 +++++---- src/qemu/qemu_hotplug.c | 6 +- src/qemu/qemu_process.c | 44 +++++++------ src/qemu/qemu_process.h | 7 +- 7 files changed, 188 insertions(+), 61 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index XXXXXXX..XXXXXXX 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -XXX,XX +XXX,XX @@ qemuBuildMemoryBackendProps(virJSONValue **backendProps, } else { /* We can have both pagesize and mem source. If that's the case, * prefer hugepages as those are more specific. */ - if (qemuGetMemoryBackingPath(priv->driver, def, mem->info.alias, &memPath) < 0) + + virDomainXMLPrivateDataCallbacks *privateData = (virDomainXMLPrivateDataCallbacks *) priv; + if (qemuGetMemoryBackingPath(def, privateData, mem->targetNode, mem->info.alias, &memPath) < 0) return -1; } @@ -XXX,XX +XXX,XX @@ qemuBuildMemPathStr(const virDomainDef *def, return -1; prealloc = true; } else if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { - if (qemuGetMemoryBackingPath(priv->driver, def, "ram", &mem_path) < 0) + // This path should not be reached if NUMA is requested + virDomainXMLPrivateDataCallbacks *privateData = (virDomainXMLPrivateDataCallbacks *) priv; + if (qemuGetMemoryBackingPath(def, privateData, 0, "ram", &mem_path) < 0) return -1; } diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index XXXXXXX..XXXXXXX 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -XXX,XX +XXX,XX @@ #include "virfile.h" #include "virstring.h" #include "virutil.h" +#include "virnuma.h" #include "configmake.h" #include "security/security_util.h" @@ -XXX,XX +XXX,XX @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged, cfg->cgroupControllers = -1; /* -1 == auto-detect */ + cfg->memoryBackingDirs = g_new0(char *, 1); + cfg->nb_memoryBackingDirs = 1; + if (root != NULL) { cfg->logDir = g_strdup_printf("%s/log/qemu", root); cfg->swtpmLogDir = g_strdup_printf("%s/log/swtpm", root); @@ -XXX,XX +XXX,XX @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged, cfg->checkpointDir = g_strdup_printf("%s/checkpoint", cfg->libDir); cfg->autoDumpPath = g_strdup_printf("%s/dump", cfg->libDir); cfg->nvramDir = g_strdup_printf("%s/nvram", cfg->libDir); - cfg->memoryBackingDir = g_strdup_printf("%s/ram", cfg->libDir); + cfg->memoryBackingDirs[0] = g_strdup_printf("%s/ram", cfg->libDir); } else if (privileged) { cfg->logDir = g_strdup_printf("%s/log/libvirt/qemu", LOCALSTATEDIR); @@ -XXX,XX +XXX,XX @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged, cfg->checkpointDir = g_strdup_printf("%s/checkpoint", cfg->libDir); cfg->autoDumpPath = g_strdup_printf("%s/dump", cfg->libDir); cfg->nvramDir = g_strdup_printf("%s/nvram", cfg->libDir); - cfg->memoryBackingDir = g_strdup_printf("%s/ram", cfg->libDir); + + cfg->memoryBackingDirs[0] = g_strdup_printf("%s/ram", cfg->libDir); cfg->swtpmStorageDir = g_strdup_printf("%s/lib/libvirt/swtpm", LOCALSTATEDIR); } else { @@ -XXX,XX +XXX,XX @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged, cfg->configBaseDir); cfg->autoDumpPath = g_strdup_printf("%s/qemu/dump", cfg->configBaseDir); cfg->nvramDir = g_strdup_printf("%s/qemu/nvram", cfg->configBaseDir); - cfg->memoryBackingDir = g_strdup_printf("%s/qemu/ram", cfg->configBaseDir); + cfg->memoryBackingDirs[0] = g_strdup_printf("%s/qemu/ram", cfg->configBaseDir); cfg->swtpmStorageDir = g_strdup_printf("%s/qemu/swtpm", cfg->configBaseDir); } @@ -XXX,XX +XXX,XX @@ virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged, static void virQEMUDriverConfigDispose(void *obj) { virQEMUDriverConfig *cfg = obj; + size_t i; virBitmapFree(cfg->namespaces); @@ -XXX,XX +XXX,XX @@ static void virQEMUDriverConfigDispose(void *obj) virFirmwareFreeList(cfg->firmwares, cfg->nfirmwares); - g_free(cfg->memoryBackingDir); + for (i = 0; i < cfg->nb_memoryBackingDirs; i++) { + g_free(cfg->memoryBackingDirs[i]); + } + + g_free(cfg->memoryBackingDirs); + g_free(cfg->swtpmStorageDir); g_strfreev(cfg->capabilityfilters); @@ -XXX,XX +XXX,XX @@ static int virQEMUDriverConfigLoadMemoryEntry(virQEMUDriverConfig *cfg, virConf *conf) { - g_autofree char *dir = NULL; + char **memoryBackingDirs = NULL; + g_auto(GStrv) params = NULL; int rc; - if ((rc = virConfGetValueString(conf, "memory_backing_dir", &dir)) < 0) + if ((rc = virConfGetValueStringList(conf, "memory_backing_dir", true, &memoryBackingDirs) < 0)) return -1; - if (rc > 0) { - VIR_FREE(cfg->memoryBackingDir); - cfg->memoryBackingDir = g_strdup_printf("%s/libvirt/qemu", dir); + if (rc == 0) { + size_t i; + for (i = 0; i < cfg->nb_memoryBackingDirs; i++) + g_free(cfg->memoryBackingDirs[i]); + + cfg->nb_memoryBackingDirs = g_strv_length(memoryBackingDirs); + cfg->memoryBackingDirs = g_new0(char *, cfg->nb_memoryBackingDirs); + for (i = 0; i < cfg->nb_memoryBackingDirs; i++) + cfg->memoryBackingDirs[i] = g_strdup_printf("%s/libvirt/qemu", memoryBackingDirs[i]); } return 0; @@ -XXX,XX +XXX,XX @@ qemuGetDomainHupageMemPath(virQEMUDriver *driver, int -qemuGetMemoryBackingDomainPath(virQEMUDriver *driver, - const virDomainDef *def, +qemuGetMemoryBackingDomainPath(const virDomainDef *def, + virDomainXMLPrivateDataCallbacks *priv, + const size_t targetNode, char **path) { + qemuDomainObjPrivate *privateData = (qemuDomainObjPrivate *) priv; + virQEMUDriver *driver = privateData->driver; g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); const char *root = driver->embeddedRoot; g_autofree char *shortName = NULL; + size_t path_index = 0; // original behavior, described below if (!(shortName = virDomainDefGetShortName(def))) return -1; - if (root && !STRPREFIX(cfg->memoryBackingDir, root)) { + /* + * We have three use cases: + * + * 1. Domain has multiple NUMA nodes, but they have only specified + * a single directory path in qemu.conf. (Original default behavior). + * + * In this case, we already placed the memory backing path for each NUMA node + * into the same path location. Preserve the established default behavior. + * + * 2. Domain has multiple NUMA nodes, but we have asked for multiple directory + * paths as well. + * + * In this case, we will have a one-to-one relationship between the number + * of NUMA nodes and the order in which the paths are provided. + * If the user does not specify enough paths, then we need to throw an error. + * NOTE: This is open to comment. The "ordering" of the paths here is not intially + * configurable to preserve backwards compatibility with the original qemu.conf syntax. + * If controlling the ordering is desired, we would need to revise the syntax in + * qemu.conf to make that possible. That hasn't been needed so far. + * + * NOTE A): We must check with numatune here, if requested. The number of NUMA nodes + * may be less than or equal to the number of provided paths. If it is less, + * we have to respect the choices made by numatune. In this case, we will map the + * physical NUMA nodes (0, 1, 2...) in the order in which they appear in qemu.conf + * + * 3. Domain has a single NUMA node, but we have asked for multiple directory paths. + * + * In this case we also need to check if numatune is requested. If so, + * we want to pick the path indicated by numatune. + * + * NOTE B): In both cases 2 and 3, if numatune is requested, the path obviously cannot + * be changed on the fly, like it normally would be in "restrictive" mode + * during runtime. So, we will only do this is the mode requested is "strict". + * + * NOTE C): Furthermore, in both cases 2 and 3, if the number of directory paths provided + * is more than one, and one of either: a) no numatune is provided at all or + * b) numatune is in fact provided, but the mode is not strict, + * then we must thrown error. This is because we cannot know which backing + * directory path to choose without the user's input. + * + * NOTE D): If one or more directory paths is requested in any of the cases 1, 2, or 3, + * the numatune cannot specifiy more than one NUMA node, because the only mode + * possible with directory paths is "strict" (e.g. automatic numa balancing of + * memory will not work). Only one numa node can be requested by numatune, else + * we must throw an error. + */ + + if (cfg->nb_memoryBackingDirs > 1) { + virDomainNuma *numatune = def->numa; + virBitmap *numaBitmap = virDomainNumatuneGetNodeset(numatune, privateData->autoNodeset, targetNode); + size_t numa_node_count = virDomainNumaGetNodeCount(def->numa); + virDomainNumatuneMemMode mode; + + if ((numatune && numaBitmap && virNumaNodesetIsAvailable(numaBitmap)) && + virDomainNumatuneGetMode(def->numa, -1, &mode) == 0 && + mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && + virBitmapCountBits(numaBitmap) == 1) { + + // Is numatune provided? + // Is it strict? + // Does it only specify a single pinning for this target? + // Yes to all 3? then good to go. + + if (cfg->nb_memoryBackingDirs < numa_node_count) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Domain requesting configuration for %1$lu NUMA nodes, but memory backing directory only has (%2$lu) directory paths available. Either reduce this to one directory or provide more paths to use."), + numa_node_count, + cfg->nb_memoryBackingDirs); + return -1; + } + + path_index = virBitmapNextSetBit(numaBitmap, -1); + } else if (numa_node_count > 1 && numa_node_count == cfg->nb_memoryBackingDirs) { + // Be nice. A valid numatune and pinning has not been specified, but the number + // of paths matches up exactly, so just assign them one-to-one. + path_index = targetNode; + } else { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("There are (%1$lu) memory directory directories configured. Domain must use a 'strict' numatune as well as an associated pinning configuration for each NUMA node before proceeding. An individual NUMA node can only be pinned to a single backing directory. Please correct the domain configuration or remove the memory backing directories and try again."), + cfg->nb_memoryBackingDirs); + return -1; + } + } + + if (root && !STRPREFIX(cfg->memoryBackingDirs[path_index], root)) { g_autofree char * hash = virDomainDriverGenerateRootHash("qemu", root); - *path = g_strdup_printf("%s/%s-%s", cfg->memoryBackingDir, hash, shortName); + *path = g_strdup_printf("%s/%s-%s", cfg->memoryBackingDirs[path_index], hash, shortName); } else { - *path = g_strdup_printf("%s/%s", cfg->memoryBackingDir, shortName); + *path = g_strdup_printf("%s/%s", cfg->memoryBackingDirs[path_index], shortName); } return 0; @@ -XXX,XX +XXX,XX @@ qemuGetMemoryBackingDomainPath(virQEMUDriver *driver, * -1 otherwise (with error reported). */ int -qemuGetMemoryBackingPath(virQEMUDriver *driver, - const virDomainDef *def, +qemuGetMemoryBackingPath(const virDomainDef *def, + virDomainXMLPrivateDataCallbacks *priv, + const size_t targetNode, const char *alias, char **memPath) { @@ -XXX,XX +XXX,XX @@ qemuGetMemoryBackingPath(virQEMUDriver *driver, return -1; } - if (qemuGetMemoryBackingDomainPath(driver, def, &domainPath) < 0) + if (qemuGetMemoryBackingDomainPath(def, priv, targetNode, &domainPath) < 0) return -1; *memPath = g_strdup_printf("%s/%s", domainPath, alias); diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index XXXXXXX..XXXXXXX 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -XXX,XX +XXX,XX @@ struct _virQEMUDriverConfig { unsigned int glusterDebugLevel; bool virtiofsdDebug; - char *memoryBackingDir; + char **memoryBackingDirs; + size_t nb_memoryBackingDirs; uid_t swtpm_user; gid_t swtpm_group; @@ -XXX,XX +XXX,XX @@ int qemuGetDomainHupageMemPath(virQEMUDriver *driver, unsigned long long pagesize, char **memPath); -int qemuGetMemoryBackingDomainPath(virQEMUDriver *driver, - const virDomainDef *def, +int qemuGetMemoryBackingDomainPath(const virDomainDef *def, + virDomainXMLPrivateDataCallbacks *priv, + const size_t targetNode, char **path); -int qemuGetMemoryBackingPath(virQEMUDriver *driver, - const virDomainDef *def, + +int qemuGetMemoryBackingPath(const virDomainDef *def, + virDomainXMLPrivateDataCallbacks *priv, + const size_t targetNode, const char *alias, char **memPath); diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index XXXXXXX..XXXXXXX 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -XXX,XX +XXX,XX @@ qemuStateInitialize(bool privileged, cfg->nvramDir); goto error; } - if (g_mkdir_with_parents(cfg->memoryBackingDir, 0777) < 0) { - virReportSystemError(errno, _("Failed to create memory backing dir %1$s"), - cfg->memoryBackingDir); - goto error; + for (i = 0; i < cfg->nb_memoryBackingDirs; i++) { + if (g_mkdir_with_parents(cfg->memoryBackingDirs[i], 0777) < 0) { + virReportSystemError(errno, _("Failed to create memory backing dir # %1$lu @ %2$s, total: %3$lu"), + i, cfg->memoryBackingDirs[i], cfg->nb_memoryBackingDirs); + goto error; + } } + if (g_mkdir_with_parents(cfg->slirpStateDir, 0777) < 0) { virReportSystemError(errno, _("Failed to create slirp state dir %1$s"), cfg->slirpStateDir); @@ -XXX,XX +XXX,XX @@ qemuStateInitialize(bool privileged, (int)cfg->group); goto error; } - if (chown(cfg->memoryBackingDir, cfg->user, cfg->group) < 0) { - virReportSystemError(errno, + for (i = 0; i < cfg->nb_memoryBackingDirs; i++) { + if (chown(cfg->memoryBackingDirs[i], cfg->user, cfg->group) < 0) { + virReportSystemError(errno, _("unable to set ownership of '%1$s' to %2$d:%3$d"), - cfg->memoryBackingDir, (int)cfg->user, + cfg->memoryBackingDirs[i], (int)cfg->user, (int)cfg->group); - goto error; + goto error; + } } if (chown(cfg->slirpStateDir, cfg->user, cfg->group) < 0) { virReportSystemError(errno, @@ -XXX,XX +XXX,XX @@ qemuStateInitialize(bool privileged, goto error; } - if (privileged && - virFileUpdatePerm(cfg->memoryBackingDir, + for (i = 0; i < cfg->nb_memoryBackingDirs; i++) { + if (privileged && + virFileUpdatePerm(cfg->memoryBackingDirs[i], 0, S_IXGRP | S_IXOTH) < 0) - goto error; + goto error; + } /* Get all the running persistent or transient configs first */ if (virDomainObjListLoadAllConfigs(qemu_driver->domains, diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index XXXXXXX..XXXXXXX 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -XXX,XX +XXX,XX @@ qemuDomainAttachMemory(virQEMUDriver *driver, priv, vm->def, mem, true, false, NULL) < 0) goto cleanup; - if (qemuProcessBuildDestroyMemoryPaths(driver, vm, mem, true) < 0) + if (qemuProcessBuildDestroyMemoryPaths(vm, mem, true) < 0) goto cleanup; if (qemuDomainNamespaceSetupMemory(vm, mem, &teardowndevice) < 0) @@ -XXX,XX +XXX,XX @@ qemuDomainAttachMemory(virQEMUDriver *driver, qemuDomainObjExitMonitor(vm); if (objAdded && mem) - ignore_value(qemuProcessDestroyMemoryBackingPath(driver, vm, mem)); + ignore_value(qemuProcessDestroyMemoryBackingPath(vm->def, priv, mem)); virErrorRestore(&orig_err); if (!mem) @@ -XXX,XX +XXX,XX @@ qemuDomainRemoveMemoryDevice(virQEMUDriver *driver, if (qemuDomainNamespaceTeardownMemory(vm, mem) < 0) VIR_WARN("Unable to remove memory device from /dev"); - if (qemuProcessDestroyMemoryBackingPath(driver, vm, mem) < 0) + if (qemuProcessDestroyMemoryBackingPath(vm->def, priv, mem) < 0) VIR_WARN("Unable to destroy memory backing path"); qemuDomainReleaseMemoryDeviceSlot(vm, mem); diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index XXXXXXX..XXXXXXX 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -XXX,XX +XXX,XX @@ qemuProcessBuildDestroyMemoryPathsImpl(virQEMUDriver *driver, int -qemuProcessBuildDestroyMemoryPaths(virQEMUDriver *driver, - virDomainObj *vm, +qemuProcessBuildDestroyMemoryPaths(virDomainObj *vm, virDomainMemoryDef *mem, bool build) { - + qemuDomainObjPrivate *priv = vm->privateData; + virQEMUDriver *driver = priv->driver; g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); size_t i; bool shouldBuildHP = false; @@ -XXX,XX +XXX,XX @@ qemuProcessBuildDestroyMemoryPaths(virQEMUDriver *driver, } if (!build || shouldBuildMB) { - g_autofree char *path = NULL; - if (qemuGetMemoryBackingDomainPath(driver, vm->def, &path) < 0) - return -1; + for (i = 0; i < cfg->nb_memoryBackingDirs; i++) { + g_autofree char *path = NULL; + if (qemuGetMemoryBackingDomainPath(vm->def, vm->privateData, i, &path) < 0) + return -1; - if (qemuProcessBuildDestroyMemoryPathsImpl(driver, vm, - path, build) < 0) - return -1; + if (qemuProcessBuildDestroyMemoryPathsImpl(driver, vm, path, build) < 0) + return -1; + } } return 0; @@ -XXX,XX +XXX,XX @@ qemuProcessBuildDestroyMemoryPaths(virQEMUDriver *driver, int -qemuProcessDestroyMemoryBackingPath(virQEMUDriver *driver, - virDomainObj *vm, +qemuProcessDestroyMemoryBackingPath(virDomainDef *def, + qemuDomainObjPrivate *priv, virDomainMemoryDef *mem) { + virQEMUDriver *driver = priv->driver; g_autofree char *path = NULL; + g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); + size_t i; - if (qemuGetMemoryBackingPath(driver, vm->def, mem->info.alias, &path) < 0) - return -1; + for (i = 0; i < cfg->nb_memoryBackingDirs; i++) { + virDomainXMLPrivateDataCallbacks *privateData = (virDomainXMLPrivateDataCallbacks *) priv; + if (qemuGetMemoryBackingPath(def, privateData, i, mem->info.alias, &path) < 0) + return -1; - if (unlink(path) < 0 && - errno != ENOENT) { - virReportSystemError(errno, _("Unable to remove %1$s"), path); - return -1; + if (unlink(path) < 0 && errno != ENOENT) { + virReportSystemError(errno, _("Unable to remove %1$s"), path); + return -1; + } } return 0; @@ -XXX,XX +XXX,XX @@ qemuProcessPrepareHost(virQEMUDriver *driver, if (qemuProcessPrepareHostBackendChardev(vm) < 0) return -1; - if (qemuProcessBuildDestroyMemoryPaths(driver, vm, NULL, true) < 0) + if (qemuProcessBuildDestroyMemoryPaths(vm, NULL, true) < 0) return -1; /* Ensure no historical cgroup for this VM is lying around bogus @@ -XXX,XX +XXX,XX @@ void qemuProcessStop(virQEMUDriver *driver, goto endjob; } - qemuProcessBuildDestroyMemoryPaths(driver, vm, NULL, false); + qemuProcessBuildDestroyMemoryPaths(vm, NULL, false); if (!!g_atomic_int_dec_and_test(&driver->nactive) && driver->inhibitCallback) driver->inhibitCallback(false, driver->inhibitOpaque); diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h index XXXXXXX..XXXXXXX 100644 --- a/src/qemu/qemu_process.h +++ b/src/qemu/qemu_process.h @@ -XXX,XX +XXX,XX @@ int qemuProcessStopCPUs(virQEMUDriver *driver, virDomainPausedReason reason, virDomainAsyncJob asyncJob); -int qemuProcessBuildDestroyMemoryPaths(virQEMUDriver *driver, - virDomainObj *vm, +int qemuProcessBuildDestroyMemoryPaths(virDomainObj *vm, virDomainMemoryDef *mem, bool build); -int qemuProcessDestroyMemoryBackingPath(virQEMUDriver *driver, - virDomainObj *vm, +int qemuProcessDestroyMemoryBackingPath(virDomainDef *def, + qemuDomainObjPrivate *priv, virDomainMemoryDef *mem); void qemuProcessReconnectAll(virQEMUDriver *driver); -- 2.34.1
From: Michael Galaxy <mgalaxy@akamai.com> In our environment, we need to convert VMs into a live-update-comptabile configuration "on-the-fly" (via live migration). More specifically: We need to convert between anonymous memory-backed VMs and file-backed memory VMs. So, for this very specific case, this needs to work when host-level PMEM is being enabled. QEMU does not have a problem with this at all, but we need to relax the rules here a bit so that libvirt allows it to go through normally. Signed-off-by: Michael Galaxy <mgalaxy@akamai.com> --- src/qemu/qemu_domain.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index XXXXXXX..XXXXXXX 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -XXX,XX +XXX,XX @@ qemuDomainABIStabilityCheck(const virDomainDef *src, size_t i; if (src->mem.source != dst->mem.source) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("Target memoryBacking source '%1$s' doesn't match source memoryBacking source'%2$s'"), - virDomainMemorySourceTypeToString(dst->mem.source), - virDomainMemorySourceTypeToString(src->mem.source)); - return false; + /* + * The current use case for this is the live migration of live-update + * capable CPR guests mounted on PMEM devices at the host + * level (not in-guest PMEM). QEMU has no problem doing these kinds of + * live migrations between these two memory backends, so let them go through. + * This allows us to "upgrade" guests from regular memory to file-backed + * memory seemlessly without taking them down. + */ + if (!((src->mem.source == VIR_DOMAIN_MEMORY_SOURCE_NONE + && dst->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) || + (src->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE + && dst->mem.source == VIR_DOMAIN_MEMORY_SOURCE_NONE))) { + + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Target memoryBacking source '%1$s' doesn't match source memoryBacking source'%2$s'"), + virDomainMemorySourceTypeToString(dst->mem.source), + virDomainMemorySourceTypeToString(src->mem.source)); + return false; + } } for (i = 0; i < src->nmems; i++) { -- 2.34.1
From: Michael Galaxy <mgalaxy@akamai.com> Update the associated test case. Signed-off-by: Michael Galaxy <mgalaxy@akamai.com> --- tests/testutilsqemu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/testutilsqemu.c b/tests/testutilsqemu.c index XXXXXXX..XXXXXXX 100644 --- a/tests/testutilsqemu.c +++ b/tests/testutilsqemu.c @@ -XXX,XX +XXX,XX @@ int qemuTestDriverInit(virQEMUDriver *driver) cfg->libDir = g_strdup("/var/lib/libvirt/qemu"); VIR_FREE(cfg->channelTargetDir); cfg->channelTargetDir = g_strdup("/var/run/libvirt/qemu/channel"); - VIR_FREE(cfg->memoryBackingDir); - cfg->memoryBackingDir = g_strdup("/var/lib/libvirt/qemu/ram"); + g_free(cfg->memoryBackingDirs); + cfg->memoryBackingDirs = g_new0(char *, 1); + cfg->memoryBackingDirs[0] = g_strdup("/var/lib/libvirt/qemu/ram"); VIR_FREE(cfg->nvramDir); cfg->nvramDir = g_strdup("/var/lib/libvirt/qemu/nvram"); VIR_FREE(cfg->passtStateDir); -- 2.34.1
From: Michael Galaxy <mgalaxy@akamai.com> Update the documentation and changelog accordingly to reflect that the syntax of the memory_backing_dir setting has changed. Signed-off-by: Michael Galaxy <mgalaxy@akamai.com> --- NEWS.rst | 7 +++++++ docs/kbase/virtiofs.rst | 2 ++ src/qemu/qemu.conf.in | 2 ++ 3 files changed, 11 insertions(+) diff --git a/NEWS.rst b/NEWS.rst index XXXXXXX..XXXXXXX 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -XXX,XX +XXX,XX @@ v10.4.0 (2024-06-03) ebtables/iptables). * **Improvements** + * qemu: memory backing directories now support NUMA + + The primary use case of this is when persistent memory is used on + NUMA-aware systems. The setting ``memory_backing_dir`` in qemu.conf + now supports either an array or the original string. This allows + you to provide a array of locations to use that are NUMA-compatible + if needed. * qemu: add zstd to supported compression formats diff --git a/docs/kbase/virtiofs.rst b/docs/kbase/virtiofs.rst index XXXXXXX..XXXXXXX 100644 --- a/docs/kbase/virtiofs.rst +++ b/docs/kbase/virtiofs.rst @@ -XXX,XX +XXX,XX @@ memobject interface). # This directory is used for memoryBacking source if configured as file. # NOTE: big files will be stored here + # NOTE: In the case of a NUMA-aware system (such has shm/tmpfs or PMEM), + # one can also do: memory_backing_dir = [ "/path/to/node/0", "/path/to/node/1" ] memory_backing_dir = "/dev/shm/" * Use hugepage-backed memory diff --git a/src/qemu/qemu.conf.in b/src/qemu/qemu.conf.in index XXXXXXX..XXXXXXX 100644 --- a/src/qemu/qemu.conf.in +++ b/src/qemu/qemu.conf.in @@ -XXX,XX +XXX,XX @@ # This directory is used for memoryBacking source if configured as file. # NOTE: big files will be stored here +# NOTE: In the case of a NUMA-aware system (such has shm/tmpfs or PMEM), +# one can also do: memory_backing_dir = [ "/path/to/node/0", "/path/to/node/1" ] #memory_backing_dir = "/var/lib/libvirt/qemu/ram" # Path to the SCSI persistent reservations helper. This helper is -- 2.34.1