[PATCH 4/6] migration/multifd: Zero page transmission on the multifd thread.

Hao Xiang posted 6 patches 9 months, 3 weeks ago
Maintainers: Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>, Eric Blake <eblake@redhat.com>, Markus Armbruster <armbru@redhat.com>, Thomas Huth <thuth@redhat.com>, Laurent Vivier <lvivier@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>
There is a newer version of this series
[PATCH 4/6] migration/multifd: Zero page transmission on the multifd thread.
Posted by Hao Xiang 9 months, 3 weeks ago
This implements the zero page detection and handling on the multifd
threads.

Signed-off-by: Hao Xiang <hao.xiang@bytedance.com>
---
 migration/multifd.c | 62 +++++++++++++++++++++++++++++++++++++++++----
 migration/multifd.h |  5 ++++
 2 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index a20d0ed10e..c031f947c7 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -11,6 +11,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
 #include "qemu/rcu.h"
 #include "exec/target_page.h"
 #include "sysemu/sysemu.h"
@@ -278,6 +279,12 @@ static void multifd_send_fill_packet(MultiFDSendParams *p)
 
         packet->offset[i] = cpu_to_be64(temp);
     }
+    for (i = 0; i < p->zero_num; i++) {
+        /* there are architectures where ram_addr_t is 32 bit */
+        uint64_t temp = p->zero[i];
+
+        packet->offset[p->normal_num + i] = cpu_to_be64(temp);
+    }
 }
 
 static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
@@ -360,6 +367,18 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
         p->normal[i] = offset;
     }
 
+    for (i = 0; i < p->zero_num; i++) {
+        uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]);
+
+        if (offset > (p->block->used_length - p->page_size)) {
+            error_setg(errp, "multifd: offset too long %" PRIu64
+                       " (max " RAM_ADDR_FMT ")",
+                       offset, p->block->used_length);
+            return -1;
+        }
+        p->zero[i] = offset;
+    }
+
     return 0;
 }
 
@@ -658,13 +677,37 @@ int multifd_send_sync_main(void)
     return 0;
 }
 
+static void zero_page_check_send(MultiFDSendParams *p)
+{
+    /*
+     * QEMU older than 9.0 don't understand zero page
+     * on multifd channel. This switch is required to
+     * maintain backward compatibility.
+     */
+    bool use_multifd_zero_page = migrate_multifd_zero_page();
+    RAMBlock *rb = p->pages->block;
+
+    for (int i = 0; i < p->pages->num; i++) {
+        uint64_t offset = p->pages->offset[i];
+        if (use_multifd_zero_page &&
+            buffer_is_zero(rb->host + offset, p->page_size)) {
+            p->zero[p->zero_num] = offset;
+            p->zero_num++;
+            ram_release_page(rb->idstr, offset);
+        } else {
+            p->normal[p->normal_num] = offset;
+            p->normal_num++;
+        }
+    }
+}
+
 static void *multifd_send_thread(void *opaque)
 {
     MultiFDSendParams *p = opaque;
     MigrationThread *thread = NULL;
     Error *local_err = NULL;
-    int ret = 0;
     bool use_zero_copy_send = migrate_zero_copy_send();
+    int ret = 0;
 
     thread = migration_threads_add(p->name, qemu_get_thread_id());
 
@@ -699,10 +742,7 @@ static void *multifd_send_thread(void *opaque)
                 p->iovs_num = 1;
             }
 
-            for (int i = 0; i < p->pages->num; i++) {
-                p->normal[p->normal_num] = p->pages->offset[i];
-                p->normal_num++;
-            }
+            zero_page_check_send(p);
 
             if (p->normal_num) {
                 ret = multifd_send_state->ops->send_prepare(p, &local_err);
@@ -1107,6 +1147,16 @@ void multifd_recv_sync_main(void)
     trace_multifd_recv_sync_main(multifd_recv_state->packet_num);
 }
 
+static void zero_page_check_recv(MultiFDRecvParams *p)
+{
+    for (int i = 0; i < p->zero_num; i++) {
+        void *page = p->host + p->zero[i];
+        if (!buffer_is_zero(page, p->page_size)) {
+            memset(page, 0, p->page_size);
+        }
+    }
+}
+
 static void *multifd_recv_thread(void *opaque)
 {
     MultiFDRecvParams *p = opaque;
@@ -1153,6 +1203,8 @@ static void *multifd_recv_thread(void *opaque)
             }
         }
 
+        zero_page_check_recv(p);
+
         if (flags & MULTIFD_FLAG_SYNC) {
             qemu_sem_post(&multifd_recv_state->sem_sync);
             qemu_sem_wait(&p->sem_sync);
diff --git a/migration/multifd.h b/migration/multifd.h
index 6be9b2f6c1..7448cb1aa9 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -53,6 +53,11 @@ typedef struct {
     uint32_t unused32[1];    /* Reserved for future use */
     uint64_t unused64[3];    /* Reserved for future use */
     char ramblock[256];
+    /*
+     * This array contains the pointers to:
+     *  - normal pages (initial normal_pages entries)
+     *  - zero pages (following zero_pages entries)
+     */
     uint64_t offset[];
 } __attribute__((packed)) MultiFDPacket_t;
 
-- 
2.30.2
Re: [PATCH 4/6] migration/multifd: Zero page transmission on the multifd thread.
Posted by Peter Xu 9 months, 3 weeks ago
On Tue, Feb 06, 2024 at 11:19:06PM +0000, Hao Xiang wrote:
> This implements the zero page detection and handling on the multifd
> threads.
> 
> Signed-off-by: Hao Xiang <hao.xiang@bytedance.com>
> ---
>  migration/multifd.c | 62 +++++++++++++++++++++++++++++++++++++++++----
>  migration/multifd.h |  5 ++++
>  2 files changed, 62 insertions(+), 5 deletions(-)
> 
> diff --git a/migration/multifd.c b/migration/multifd.c
> index a20d0ed10e..c031f947c7 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -11,6 +11,7 @@
>   */
>  
>  #include "qemu/osdep.h"
> +#include "qemu/cutils.h"
>  #include "qemu/rcu.h"
>  #include "exec/target_page.h"
>  #include "sysemu/sysemu.h"
> @@ -278,6 +279,12 @@ static void multifd_send_fill_packet(MultiFDSendParams *p)
>  
>          packet->offset[i] = cpu_to_be64(temp);
>      }
> +    for (i = 0; i < p->zero_num; i++) {
> +        /* there are architectures where ram_addr_t is 32 bit */
> +        uint64_t temp = p->zero[i];
> +
> +        packet->offset[p->normal_num + i] = cpu_to_be64(temp);
> +    }
>  }

Please be noted taht p->normal_num will be dropped very soon, see:

https://lore.kernel.org/all/20240202102857.110210-6-peterx@redhat.com/

Please use p->pages->num instead.

This patch also relies on some changes in previous patch.. IMHO we can
split the patch better in this way:

  - Patch 1: Add new parameter "zero-page-detection", support "none",
    "legacy".  You'll need to implement "none" here that we skip zero page
    by returning 0 in save_zero_page() if "none".

  - Patch 2: Add new "multifd" mode in above, implement it in the same
    patch completely.

  - Patch 3: introduce ram_save_target_page_multifd()

  - Patch 4: test case

If you want to add "zeros" accounting, that can be done as more patches on
top.

Thanks,

-- 
Peter Xu