[PATCH v2 2/2] oslib-posix: initialize backend memory objects in parallel

Mark Kanda posted 2 patches 10 months, 1 week ago
Maintainers: David Hildenbrand <david@redhat.com>, Igor Mammedov <imammedo@redhat.com>, "Michael S. Tsirkin" <mst@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>, Stefan Weil <sw@weilnetz.de>
There is a newer version of this series
[PATCH v2 2/2] oslib-posix: initialize backend memory objects in parallel
Posted by Mark Kanda 10 months, 1 week ago
QEMU initializes preallocated backend memory as the objects are parsed from
the command line. This is not optimal in some cases (e.g. memory spanning
multiple NUMA nodes) because the memory objects are initialized in series.

Allow the initialization to occur in parallel. In order to ensure optimal
thread placement, parallel initialization requires prealloc context threads
to be in use.

Signed-off-by: Mark Kanda <mark.kanda@oracle.com>
---
 backends/hostmem.c     |  8 ++++++--
 hw/virtio/virtio-mem.c |  4 ++--
 include/qemu/osdep.h   | 14 ++++++++++++--
 system/vl.c            |  6 ++++++
 util/oslib-posix.c     | 27 +++++++++++++++++----------
 util/oslib-win32.c     |  8 +++++++-
 6 files changed, 50 insertions(+), 17 deletions(-)

diff --git a/backends/hostmem.c b/backends/hostmem.c
index 30f69b2cb5..8f602dc86f 100644
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -20,6 +20,7 @@
 #include "qom/object_interfaces.h"
 #include "qemu/mmap-alloc.h"
 #include "qemu/madvise.h"
+#include "hw/qdev-core.h"
 
 #ifdef CONFIG_NUMA
 #include <numaif.h>
@@ -235,9 +236,10 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
         int fd = memory_region_get_fd(&backend->mr);
         void *ptr = memory_region_get_ram_ptr(&backend->mr);
         uint64_t sz = memory_region_size(&backend->mr);
+        bool async = !phase_check(PHASE_MACHINE_INITIALIZED);
 
         if (!qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads,
-                               backend->prealloc_context, errp)) {
+                               backend->prealloc_context, async, errp)) {
             return;
         }
         backend->prealloc = true;
@@ -323,6 +325,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
     HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
     void *ptr;
     uint64_t sz;
+    bool async = !phase_check(PHASE_MACHINE_INITIALIZED);
 
     if (!bc->alloc) {
         return;
@@ -398,7 +401,8 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
     if (backend->prealloc && !qemu_prealloc_mem(memory_region_get_fd(&backend->mr),
                                                 ptr, sz,
                                                 backend->prealloc_threads,
-                                                backend->prealloc_context, errp)) {
+                                                backend->prealloc_context,
+                                                async, errp)) {
         return;
     }
 }
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index 99ab989852..ffd119ebac 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -605,7 +605,7 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
         int fd = memory_region_get_fd(&vmem->memdev->mr);
         Error *local_err = NULL;
 
-        if (!qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err)) {
+        if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, &local_err)) {
             static bool warned;
 
             /*
@@ -1248,7 +1248,7 @@ static int virtio_mem_prealloc_range_cb(VirtIOMEM *vmem, void *arg,
     int fd = memory_region_get_fd(&vmem->memdev->mr);
     Error *local_err = NULL;
 
-    if (!qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err)) {
+    if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, &local_err)) {
         error_report_err(local_err);
         return -ENOMEM;
     }
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 9a405bed89..d6e074c515 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -672,17 +672,27 @@ typedef struct ThreadContext ThreadContext;
  * @area: start address of the are to preallocate
  * @sz: the size of the area to preallocate
  * @max_threads: maximum number of threads to use
+ * @tc: prealloc context threads pointer, NULL if not in use
+ * @async: request asynchronous preallocation, requires @tc
  * @errp: returns an error if this function fails
  *
  * Preallocate memory (populate/prefault page tables writable) for the virtual
  * memory area starting at @area with the size of @sz. After a successful call,
  * each page in the area was faulted in writable at least once, for example,
- * after allocating file blocks for mapped files.
+ * after allocating file blocks for mapped files. When using @async,
+ * wait_mem_prealloc() is required to wait for the prealloction threads to
+ * terminate and associated cleanup.
  *
  * Return: true on success, else false setting @errp with error.
  */
 bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
-                       ThreadContext *tc, Error **errp);
+                       ThreadContext *tc, bool async, Error **errp);
+
+/**
+ * Wait for any outstanding memory prealloc initialization
+ * to complete.
+ */
+int wait_mem_prealloc(void);
 
 /**
  * qemu_get_pid_name:
diff --git a/system/vl.c b/system/vl.c
index 53850a1daf..5696c53ace 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -2010,6 +2010,12 @@ static void qemu_create_late_backends(void)
 
     object_option_foreach_add(object_create_late);
 
+    /* Wait for any outstanding memory prealloc init to complete */
+    if (wait_mem_prealloc()) {
+        perror("memory preallocation failed");
+        exit(1);
+    }
+
     if (tpm_init() < 0) {
         exit(1);
     }
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 26bf2f2883..72b17e4a1f 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -417,7 +417,7 @@ static inline int get_memset_num_threads(size_t hpagesize, size_t numpages,
 }
 
 static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
-                           int max_threads, ThreadContext *tc,
+                           int max_threads, ThreadContext *tc, bool async,
                            bool use_madv_populate_write)
 {
     static gsize initialized = 0;
@@ -438,7 +438,7 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
 
     if (use_madv_populate_write) {
         /* Avoid creating a single thread for MADV_POPULATE_WRITE */
-        if (context->num_threads == 1) {
+        if (context->num_threads == 1 && !async) {
             if (qemu_madvise(area, hpagesize * numpages,
                              QEMU_MADV_POPULATE_WRITE)) {
                 return -errno;
@@ -480,7 +480,7 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
     return 0;
 }
 
-static int wait_mem_prealloc(void)
+int wait_mem_prealloc(void)
 {
     int i, ret = 0;
     MemsetContext *context, *next_context;
@@ -519,7 +519,7 @@ static bool madv_populate_write_possible(char *area, size_t pagesize)
 }
 
 bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
-                       ThreadContext *tc, Error **errp)
+                       ThreadContext *tc, bool async, Error **errp)
 {
     static gsize initialized;
     int ret;
@@ -561,7 +561,7 @@ bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
     }
 
     /* touch pages simultaneously */
-    ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc,
+    ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc, async,
                           use_madv_populate_write);
     if (ret) {
         error_setg_errno(errp, -ret,
@@ -570,12 +570,19 @@ bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
         goto err;
     }
 
-    ret = wait_mem_prealloc();
-    if (ret) {
-        error_setg_errno(errp, -ret,
-                         "qemu_prealloc_mem: failed waiting for memory prealloc");
-        rv = false;
+    /*
+     * Async prealloc is only allowed when using MADV_POPULATE_WRITE and
+     * prealloc context (to ensure optimal thread placement).
+     */
+    if (!async || !use_madv_populate_write || !tc) {
+        ret = wait_mem_prealloc();
+        if (ret) {
+            error_setg_errno(errp, -ret,
+                "qemu_prealloc_mem: failed waiting for memory prealloc");
+            rv = false;
+        }
     }
+
 err:
     if (!use_madv_populate_write) {
         ret = sigaction(SIGBUS, &sigbus_oldact, NULL);
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index c4a5f05a49..50284348e8 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -265,7 +265,7 @@ int getpagesize(void)
 }
 
 bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
-                       ThreadContext *tc, Error **errp)
+                       ThreadContext *tc, bool async, Error **errp)
 {
     int i;
     size_t pagesize = qemu_real_host_page_size();
@@ -278,6 +278,12 @@ bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
     return true;
 }
 
+int wait_mem_prealloc(void)
+{
+    /* async prealloc not supported */
+    return 0;
+}
+
 char *qemu_get_pid_name(pid_t pid)
 {
     /* XXX Implement me */
-- 
2.39.3