[PATCH v3 07/14] migration/multifd: Replace p->pages with an union pointer

Fabiano Rosas posted 14 patches 3 months, 3 weeks ago
There is a newer version of this series
[PATCH v3 07/14] migration/multifd: Replace p->pages with an union pointer
Posted by Fabiano Rosas 3 months, 3 weeks ago
We want multifd to be able to handle more types of data than just ram
pages. To start decoupling multifd from pages, replace p->pages
(MultiFDPages_t) with the new type MultiFDSendData that hides the
client payload inside an union.

The general idea here is to isolate functions that *need* to handle
MultiFDPages_t and move them in the future to multifd-ram.c, while
multifd.c will stay with only the core functions that handle
MultiFDSendData/MultiFDRecvData.

Signed-off-by: Fabiano Rosas <farosas@suse.de>
---
 migration/multifd-qpl.c       |  6 +--
 migration/multifd-uadk.c      |  2 +-
 migration/multifd-zero-page.c |  2 +-
 migration/multifd-zlib.c      |  2 +-
 migration/multifd-zstd.c      |  2 +-
 migration/multifd.c           | 85 ++++++++++++++++++++---------------
 migration/multifd.h           |  7 +--
 7 files changed, 57 insertions(+), 49 deletions(-)

diff --git a/migration/multifd-qpl.c b/migration/multifd-qpl.c
index db60c05795..21153f1987 100644
--- a/migration/multifd-qpl.c
+++ b/migration/multifd-qpl.c
@@ -406,7 +406,7 @@ retry:
 static void multifd_qpl_compress_pages_slow_path(MultiFDSendParams *p)
 {
     QplData *qpl = p->compress_data;
-    MultiFDPages_t *pages = p->pages;
+    MultiFDPages_t *pages = &p->data->u.ram;
     uint32_t size = p->page_size;
     qpl_job *job = qpl->sw_job;
     uint8_t *zbuf = qpl->zbuf;
@@ -437,7 +437,7 @@ static void multifd_qpl_compress_pages_slow_path(MultiFDSendParams *p)
 static void multifd_qpl_compress_pages(MultiFDSendParams *p)
 {
     QplData *qpl = p->compress_data;
-    MultiFDPages_t *pages = p->pages;
+    MultiFDPages_t *pages = &p->data->u.ram;
     uint32_t size = p->page_size;
     QplHwJob *hw_job;
     uint8_t *buf;
@@ -501,7 +501,7 @@ static void multifd_qpl_compress_pages(MultiFDSendParams *p)
 static int multifd_qpl_send_prepare(MultiFDSendParams *p, Error **errp)
 {
     QplData *qpl = p->compress_data;
-    MultiFDPages_t *pages = p->pages;
+    MultiFDPages_t *pages = &p->data->u.ram;
     uint32_t len = 0;
 
     if (!multifd_send_prepare_common(p)) {
diff --git a/migration/multifd-uadk.c b/migration/multifd-uadk.c
index 1ed1c6afe6..9d99807af5 100644
--- a/migration/multifd-uadk.c
+++ b/migration/multifd-uadk.c
@@ -177,7 +177,7 @@ static int multifd_uadk_send_prepare(MultiFDSendParams *p, Error **errp)
     uint32_t page_size = multifd_ram_page_size();
     uint8_t *buf = uadk_data->buf;
     int ret = 0;
-    MultiFDPages_t *pages = p->pages;
+    MultiFDPages_t *pages = &p->data->u.ram;
 
     if (!multifd_send_prepare_common(p)) {
         goto out;
diff --git a/migration/multifd-zero-page.c b/migration/multifd-zero-page.c
index cc624e36b3..6506a4aa89 100644
--- a/migration/multifd-zero-page.c
+++ b/migration/multifd-zero-page.c
@@ -46,7 +46,7 @@ static void swap_page_offset(ram_addr_t *pages_offset, int a, int b)
  */
 void multifd_send_zero_page_detect(MultiFDSendParams *p)
 {
-    MultiFDPages_t *pages = p->pages;
+    MultiFDPages_t *pages = &p->data->u.ram;
     RAMBlock *rb = pages->block;
     int i = 0;
     int j = pages->num - 1;
diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
index e47d7f70dc..66517c1067 100644
--- a/migration/multifd-zlib.c
+++ b/migration/multifd-zlib.c
@@ -123,7 +123,7 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp)
  */
 static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
 {
-    MultiFDPages_t *pages = p->pages;
+    MultiFDPages_t *pages = &p->data->u.ram;
     struct zlib_data *z = p->compress_data;
     z_stream *zs = &z->zs;
     uint32_t out_size = 0;
diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c
index 1812fd1b48..04ac711cf4 100644
--- a/migration/multifd-zstd.c
+++ b/migration/multifd-zstd.c
@@ -119,7 +119,7 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp)
  */
 static int zstd_send_prepare(MultiFDSendParams *p, Error **errp)
 {
-    MultiFDPages_t *pages = p->pages;
+    MultiFDPages_t *pages = &p->data->u.ram;
     struct zstd_data *z = p->compress_data;
     int ret;
     uint32_t i;
diff --git a/migration/multifd.c b/migration/multifd.c
index 64503604cf..8771cda734 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -49,8 +49,7 @@ typedef struct {
 
 struct {
     MultiFDSendParams *params;
-    /* array of pages to sent */
-    MultiFDPages_t *pages;
+    MultiFDSendData *data;
     /*
      * Global number of generated multifd packets.
      *
@@ -109,6 +108,28 @@ static size_t multifd_ram_payload_size(void)
     return sizeof(MultiFDPages_t) + n * sizeof(ram_addr_t);
 }
 
+static MultiFDSendData *multifd_send_data_alloc(void)
+{
+    size_t max_payload_size, size_minus_payload;
+
+    /*
+     * MultiFDPages_t has a flexible array at the end, account for it
+     * when allocating MultiFDSendData. Use max() in case other types
+     * added to the union in the future are larger than
+     * (MultiFDPages_t + flex array).
+     */
+    max_payload_size = MAX(multifd_ram_payload_size(), sizeof(MultiFDPayload));
+
+    /*
+     * Account for any holes the compiler might insert. We can't pack
+     * the structure because that misaligns the members and triggers
+     * Waddress-of-packed-member.
+     */
+    size_minus_payload = sizeof(MultiFDSendData) - sizeof(MultiFDPayload);
+
+    return g_malloc0(size_minus_payload + max_payload_size);
+}
+
 static bool multifd_use_packets(void)
 {
     return !migrate_mapped_ram();
@@ -121,7 +142,7 @@ void multifd_send_channel_created(void)
 
 static void multifd_set_file_bitmap(MultiFDSendParams *p)
 {
-    MultiFDPages_t *pages = p->pages;
+    MultiFDPages_t *pages = &p->data->u.ram;
 
     assert(pages->block);
 
@@ -177,7 +198,7 @@ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp)
 
 static void multifd_send_prepare_iovs(MultiFDSendParams *p)
 {
-    MultiFDPages_t *pages = p->pages;
+    MultiFDPages_t *pages = &p->data->u.ram;
     uint32_t page_size = multifd_ram_page_size();
 
     for (int i = 0; i < pages->normal_num; i++) {
@@ -403,23 +424,10 @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
     return msg.id;
 }
 
-static MultiFDPages_t *multifd_pages_init(uint32_t n)
-{
-    MultiFDPages_t *pages = g_malloc0(multifd_ram_payload_size());
-
-    return pages;
-}
-
-static void multifd_pages_clear(MultiFDPages_t *pages)
-{
-    multifd_pages_reset(pages);
-    g_free(pages);
-}
-
 void multifd_send_fill_packet(MultiFDSendParams *p)
 {
     MultiFDPacket_t *packet = p->packet;
-    MultiFDPages_t *pages = p->pages;
+    MultiFDPages_t *pages = &p->data->u.ram;
     uint64_t packet_num;
     uint32_t zero_num = pages->num - pages->normal_num;
     int i;
@@ -601,7 +609,7 @@ static bool multifd_send_pages(void)
     int i;
     static int next_channel;
     MultiFDSendParams *p = NULL; /* make happy gcc */
-    MultiFDPages_t *pages = multifd_send_state->pages;
+    MultiFDSendData *tmp;
 
     if (multifd_send_should_exit()) {
         return false;
@@ -636,11 +644,14 @@ static bool multifd_send_pages(void)
      * qatomic_store_release() in multifd_send_thread().
      */
     smp_mb_acquire();
-    assert(!p->pages->num);
-    multifd_send_state->pages = p->pages;
-    p->pages = pages;
+
+    assert(!p->data->u.ram.num);
+
+    tmp = multifd_send_state->data;
+    multifd_send_state->data = p->data;
+    p->data = tmp;
     /*
-     * Making sure p->pages is setup before marking pending_job=true. Pairs
+     * Making sure p->data is setup before marking pending_job=true. Pairs
      * with the qatomic_load_acquire() in multifd_send_thread().
      */
     qatomic_store_release(&p->pending_job, true);
@@ -670,7 +681,7 @@ bool multifd_queue_page(RAMBlock *block, ram_addr_t offset)
     MultiFDPages_t *pages;
 
 retry:
-    pages = multifd_send_state->pages;
+    pages = &multifd_send_state->data->u.ram;
 
     /* If the queue is empty, we can already enqueue now */
     if (multifd_queue_empty(pages)) {
@@ -800,8 +811,8 @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp)
     qemu_sem_destroy(&p->sem_sync);
     g_free(p->name);
     p->name = NULL;
-    multifd_pages_clear(p->pages);
-    p->pages = NULL;
+    g_free(p->data);
+    p->data = NULL;
     p->packet_len = 0;
     g_free(p->packet);
     p->packet = NULL;
@@ -818,8 +829,8 @@ static void multifd_send_cleanup_state(void)
     qemu_sem_destroy(&multifd_send_state->channels_ready);
     g_free(multifd_send_state->params);
     multifd_send_state->params = NULL;
-    multifd_pages_clear(multifd_send_state->pages);
-    multifd_send_state->pages = NULL;
+    g_free(multifd_send_state->data);
+    multifd_send_state->data = NULL;
     g_free(multifd_send_state);
     multifd_send_state = NULL;
 }
@@ -868,11 +879,13 @@ int multifd_send_sync_main(void)
 {
     int i;
     bool flush_zero_copy;
+    MultiFDPages_t *pages;
 
     if (!migrate_multifd()) {
         return 0;
     }
-    if (multifd_send_state->pages->num) {
+    pages = &multifd_send_state->data->u.ram;
+    if (pages->num) {
         if (!multifd_send_pages()) {
             error_report("%s: multifd_send_pages fail", __func__);
             return -1;
@@ -947,11 +960,11 @@ static void *multifd_send_thread(void *opaque)
         }
 
         /*
-         * Read pending_job flag before p->pages.  Pairs with the
+         * Read pending_job flag before p->data.  Pairs with the
          * qatomic_store_release() in multifd_send_pages().
          */
         if (qatomic_load_acquire(&p->pending_job)) {
-            MultiFDPages_t *pages = p->pages;
+            MultiFDPages_t *pages = &p->data->u.ram;
 
             p->iovs_num = 0;
             assert(pages->num);
@@ -963,7 +976,7 @@ static void *multifd_send_thread(void *opaque)
 
             if (migrate_mapped_ram()) {
                 ret = file_write_ramblock_iov(p->c, p->iov, p->iovs_num,
-                                              pages, &local_err);
+                                              &p->data->u.ram, &local_err);
             } else {
                 ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num,
                                                   NULL, 0, p->write_flags,
@@ -983,7 +996,7 @@ static void *multifd_send_thread(void *opaque)
             p->next_packet_size = 0;
 
             /*
-             * Making sure p->pages is published before saying "we're
+             * Making sure p->data is published before saying "we're
              * free".  Pairs with the smp_mb_acquire() in
              * multifd_send_pages().
              */
@@ -1179,7 +1192,7 @@ bool multifd_send_setup(void)
     thread_count = migrate_multifd_channels();
     multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
     multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
-    multifd_send_state->pages = multifd_pages_init(page_count);
+    multifd_send_state->data = multifd_send_data_alloc();
     qemu_sem_init(&multifd_send_state->channels_created, 0);
     qemu_sem_init(&multifd_send_state->channels_ready, 0);
     qatomic_set(&multifd_send_state->exiting, 0);
@@ -1191,7 +1204,7 @@ bool multifd_send_setup(void)
         qemu_sem_init(&p->sem, 0);
         qemu_sem_init(&p->sem_sync, 0);
         p->id = i;
-        p->pages = multifd_pages_init(page_count);
+        p->data = multifd_send_data_alloc();
         if (use_packets) {
             p->packet_len = sizeof(MultiFDPacket_t)
                           + sizeof(uint64_t) * page_count;
@@ -1689,7 +1702,7 @@ void multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
 
 bool multifd_send_prepare_common(MultiFDSendParams *p)
 {
-    MultiFDPages_t *pages = p->pages;
+    MultiFDPages_t *pages = &p->data->u.ram;
     multifd_send_zero_page_detect(p);
 
     if (!pages->normal_num) {
diff --git a/migration/multifd.h b/migration/multifd.h
index a7fdd97f70..c2ba4cad13 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -152,12 +152,7 @@ typedef struct {
      */
     bool pending_job;
     bool pending_sync;
-    /* array of pages to sent.
-     * The owner of 'pages' depends of 'pending_job' value:
-     * pending_job == 0 -> migration_thread can use it.
-     * pending_job != 0 -> multifd_channel can use it.
-     */
-    MultiFDPages_t *pages;
+    MultiFDSendData *data;
 
     /* thread local variables. No locking required */
 
-- 
2.35.3
Re: [PATCH v3 07/14] migration/multifd: Replace p->pages with an union pointer
Posted by Peter Xu 3 months ago
On Thu, Aug 01, 2024 at 09:35:09AM -0300, Fabiano Rosas wrote:
> We want multifd to be able to handle more types of data than just ram
> pages. To start decoupling multifd from pages, replace p->pages
> (MultiFDPages_t) with the new type MultiFDSendData that hides the
> client payload inside an union.
> 
> The general idea here is to isolate functions that *need* to handle
> MultiFDPages_t and move them in the future to multifd-ram.c, while
> multifd.c will stay with only the core functions that handle
> MultiFDSendData/MultiFDRecvData.
> 
> Signed-off-by: Fabiano Rosas <farosas@suse.de>

Reviewed-by: Peter Xu <peterx@redhat.com>

[...]

> +static MultiFDSendData *multifd_send_data_alloc(void)
> +{
> +    size_t max_payload_size, size_minus_payload;
> +
> +    /*
> +     * MultiFDPages_t has a flexible array at the end, account for it
> +     * when allocating MultiFDSendData. Use max() in case other types
> +     * added to the union in the future are larger than
> +     * (MultiFDPages_t + flex array).
> +     */
> +    max_payload_size = MAX(multifd_ram_payload_size(), sizeof(MultiFDPayload));
> +
> +    /*
> +     * Account for any holes the compiler might insert. We can't pack
> +     * the structure because that misaligns the members and triggers
> +     * Waddress-of-packed-member.
> +     */
> +    size_minus_payload = sizeof(MultiFDSendData) - sizeof(MultiFDPayload);
> +
> +    return g_malloc0(size_minus_payload + max_payload_size);
> +}

Hmm I didn't notice the hole issue for sure..

For the mid term we really should remove this in one way or another.. what
I was thinking is mentioned in the other thread:

https://lore.kernel.org/qemu-devel/ZsZZFwws5tlOMmZk@x1n/

I hope we can simply statically define offset[] to be the max.

I don't think we must stick with size-per-packet, in this case IMHO we
should choose whatever is easier for us, and I never worried on regression
yet so far as long as the relevant n_pages is still relatively large. Not
to mention AFAIU for production use, x86/s390 always uses 4K psize, while
arm64 doesn't yet have a stable kvm-avail vcpu model, which might be a
bigger issue as of now to solve..

Let's see how it goes..

-- 
Peter Xu