[PATCH v7 11/12] multifd: Zero pages transmission

Juan Quintela posted 12 patches 3 years, 6 months ago
Maintainers: Eduardo Habkost <eduardo@habkost.net>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, "Philippe Mathieu-Daudé" <f4bug@amsat.org>, Yanan Wang <wangyanan55@huawei.com>, Juan Quintela <quintela@redhat.com>, "Dr. David Alan Gilbert" <dgilbert@redhat.com>, Eric Blake <eblake@redhat.com>, Markus Armbruster <armbru@redhat.com>
[PATCH v7 11/12] multifd: Zero pages transmission
Posted by Juan Quintela 3 years, 6 months ago
This implements the zero page dection and handling.

Signed-off-by: Juan Quintela <quintela@redhat.com>

---

Add comment for offset (dave)
Use local variables for offset/block to have shorter lines
---
 migration/multifd.h |  5 +++++
 migration/multifd.c | 41 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/migration/multifd.h b/migration/multifd.h
index a1b852200d..5931de6f86 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -52,6 +52,11 @@ typedef struct {
     uint32_t unused32[1];    /* Reserved for future use */
     uint64_t unused64[3];    /* Reserved for future use */
     char ramblock[256];
+    /*
+     * This array contains the pointers to:
+     *  - normal pages (initial normal_pages entries)
+     *  - zero pages (following zero_pages entries)
+     */
     uint64_t offset[];
 } __attribute__((packed)) MultiFDPacket_t;
 
diff --git a/migration/multifd.c b/migration/multifd.c
index 4473d9f834..89811619d8 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -11,6 +11,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
 #include "qemu/rcu.h"
 #include "exec/target_page.h"
 #include "sysemu/sysemu.h"
@@ -275,6 +276,12 @@ static void multifd_send_fill_packet(MultiFDSendParams *p)
 
         packet->offset[i] = cpu_to_be64(temp);
     }
+    for (i = 0; i < p->zero_num; i++) {
+        /* there are architectures where ram_addr_t is 32 bit */
+        uint64_t temp = p->zero[i];
+
+        packet->offset[p->normal_num + i] = cpu_to_be64(temp);
+    }
 }
 
 static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
@@ -358,6 +365,18 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
         p->normal[i] = offset;
     }
 
+    for (i = 0; i < p->zero_num; i++) {
+        uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]);
+
+        if (offset > (block->used_length - p->page_size)) {
+            error_setg(errp, "multifd: offset too long %" PRIu64
+                       " (max " RAM_ADDR_FMT ")",
+                       offset, block->used_length);
+            return -1;
+        }
+        p->zero[i] = offset;
+    }
+
     return 0;
 }
 
@@ -648,6 +667,8 @@ static void *multifd_send_thread(void *opaque)
 {
     MultiFDSendParams *p = opaque;
     Error *local_err = NULL;
+    /* qemu older than 7.0 don't understand zero page on multifd channel */
+    bool use_zero_page = migrate_use_multifd_zero_page();
     int ret = 0;
     bool use_zero_copy_send = migrate_use_zero_copy_send();
 
@@ -670,6 +691,7 @@ static void *multifd_send_thread(void *opaque)
         qemu_mutex_lock(&p->mutex);
 
         if (p->pending_job) {
+            RAMBlock *rb = p->pages->block;
             uint64_t packet_num = p->packet_num;
             p->flags = 0;
             if (p->sync_needed) {
@@ -688,8 +710,16 @@ static void *multifd_send_thread(void *opaque)
             }
 
             for (int i = 0; i < p->pages->num; i++) {
-                p->normal[p->normal_num] = p->pages->offset[i];
-                p->normal_num++;
+                uint64_t offset = p->pages->offset[i];
+                if (use_zero_page &&
+                    buffer_is_zero(rb->host + offset, p->page_size)) {
+                    p->zero[p->zero_num] = offset;
+                    p->zero_num++;
+                    ram_release_page(rb->idstr, offset);
+                } else {
+                    p->normal[p->normal_num] = offset;
+                    p->normal_num++;
+                }
             }
 
             if (p->normal_num) {
@@ -1152,6 +1182,13 @@ static void *multifd_recv_thread(void *opaque)
             }
         }
 
+        for (int i = 0; i < p->zero_num; i++) {
+            void *page = p->host + p->zero[i];
+            if (!buffer_is_zero(page, p->page_size)) {
+                memset(page, 0, p->page_size);
+            }
+        }
+
         if (sync_needed) {
             qemu_sem_post(&multifd_recv_state->sem_sync);
             qemu_sem_wait(&p->sem_sync);
-- 
2.37.1
Re: [PATCH v7 11/12] multifd: Zero pages transmission
Posted by Leonardo Brás 3 years, 5 months ago
On Tue, 2022-08-02 at 08:39 +0200, Juan Quintela wrote:
> This implements the zero page dection and handling.
> 
> Signed-off-by: Juan Quintela <quintela@redhat.com>
> 
> ---
> 
> Add comment for offset (dave)
> Use local variables for offset/block to have shorter lines
> ---
>  migration/multifd.h |  5 +++++
>  migration/multifd.c | 41 +++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 44 insertions(+), 2 deletions(-)
> 
> diff --git a/migration/multifd.h b/migration/multifd.h
> index a1b852200d..5931de6f86 100644
> --- a/migration/multifd.h
> +++ b/migration/multifd.h
> @@ -52,6 +52,11 @@ typedef struct {
>      uint32_t unused32[1];    /* Reserved for future use */
>      uint64_t unused64[3];    /* Reserved for future use */
>      char ramblock[256];
> +    /*
> +     * This array contains the pointers to:
> +     *  - normal pages (initial normal_pages entries)
> +     *  - zero pages (following zero_pages entries)
> +     */
>      uint64_t offset[];
>  } __attribute__((packed)) MultiFDPacket_t;
>  
> diff --git a/migration/multifd.c b/migration/multifd.c
> index 4473d9f834..89811619d8 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -11,6 +11,7 @@
>   */
>  
>  #include "qemu/osdep.h"
> +#include "qemu/cutils.h"
>  #include "qemu/rcu.h"
>  #include "exec/target_page.h"
>  #include "sysemu/sysemu.h"
> @@ -275,6 +276,12 @@ static void multifd_send_fill_packet(MultiFDSendParams *p)
>  
>          packet->offset[i] = cpu_to_be64(temp);
>      }
> +    for (i = 0; i < p->zero_num; i++) {
> +        /* there are architectures where ram_addr_t is 32 bit */
> +        uint64_t temp = p->zero[i];
> +
> +        packet->offset[p->normal_num + i] = cpu_to_be64(temp);
> +    }
>  }
>  
>  static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
> @@ -358,6 +365,18 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
>          p->normal[i] = offset;
>      }
>  
> +    for (i = 0; i < p->zero_num; i++) {
> +        uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]);
> +
> +        if (offset > (block->used_length - p->page_size)) {
> +            error_setg(errp, "multifd: offset too long %" PRIu64
> +                       " (max " RAM_ADDR_FMT ")",
> +                       offset, block->used_length);
> +            return -1;
> +        }
> +        p->zero[i] = offset;
> +    }
> +
>      return 0;
>  }

IIUC ram_addr_t is supposed to be the address size for the architecture, mainly
being 32 or 64 bits. So packet->offset[i] is always u64, and p->zero[i] possibly
being u32 or u64.

Since both local variables and packet->offset[i] are 64-bit, there is no issue.

But on 'p->zero[i] = offset' we can have 'u32 = u64', and this should raise a
warning (or am I missing something?).
  

>  
> @@ -648,6 +667,8 @@ static void *multifd_send_thread(void *opaque)
>  {
>      MultiFDSendParams *p = opaque;
>      Error *local_err = NULL;
> +    /* qemu older than 7.0 don't understand zero page on multifd channel */
> +    bool use_zero_page = migrate_use_multifd_zero_page();
>      int ret = 0;
>      bool use_zero_copy_send = migrate_use_zero_copy_send();
>  
> @@ -670,6 +691,7 @@ static void *multifd_send_thread(void *opaque)
>          qemu_mutex_lock(&p->mutex);
>  
>          if (p->pending_job) {
> +            RAMBlock *rb = p->pages->block;
>              uint64_t packet_num = p->packet_num;
>              p->flags = 0;
>              if (p->sync_needed) {
> @@ -688,8 +710,16 @@ static void *multifd_send_thread(void *opaque)
>              }
>  
>              for (int i = 0; i < p->pages->num; i++) {
> -                p->normal[p->normal_num] = p->pages->offset[i];
> -                p->normal_num++;
> +                uint64_t offset = p->pages->offset[i];
> +                if (use_zero_page &&
> +                    buffer_is_zero(rb->host + offset, p->page_size)) {
> +                    p->zero[p->zero_num] = offset;

Same here.

> +                    p->zero_num++;
> +                    ram_release_page(rb->idstr, offset);
> +                } else {
> +                    p->normal[p->normal_num] = offset;

Same here? (p->normal[i] can also be u32)

> +                    p->normal_num++;
> +                }
>              }
>  
>              if (p->normal_num) {
> @@ -1152,6 +1182,13 @@ static void *multifd_recv_thread(void *opaque)
>              }
>          }
>  
> +        for (int i = 0; i < p->zero_num; i++) {
> +            void *page = p->host + p->zero[i];
> +            if (!buffer_is_zero(page, p->page_size)) {
> +                memset(page, 0, p->page_size);
> +            }
> +        }
> +
>          if (sync_needed) {
>              qemu_sem_post(&multifd_recv_state->sem_sync);
>              qemu_sem_wait(&p->sem_sync);
Re: [PATCH v7 11/12] multifd: Zero pages transmission
Posted by Juan Quintela 3 years, 2 months ago
Leonardo Brás <leobras@redhat.com> wrote:
> On Tue, 2022-08-02 at 08:39 +0200, Juan Quintela wrote:
>> This implements the zero page dection and handling.
>> 
>> Signed-off-by: Juan Quintela <quintela@redhat.com>

>> @@ -358,6 +365,18 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
>>          p->normal[i] = offset;
>>      }
>>  
>> +    for (i = 0; i < p->zero_num; i++) {
>> +        uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]);
>> +
>> +        if (offset > (block->used_length - p->page_size)) {
>> +            error_setg(errp, "multifd: offset too long %" PRIu64
>> +                       " (max " RAM_ADDR_FMT ")",
>> +                       offset, block->used_length);
>> +            return -1;
>> +        }
>> +        p->zero[i] = offset;
>> +    }
>> +
>>      return 0;
>>  }
>
> IIUC ram_addr_t is supposed to be the address size for the architecture, mainly
> being 32 or 64 bits. So packet->offset[i] is always u64, and p->zero[i] possibly
> being u32 or u64.
>
> Since both local variables and packet->offset[i] are 64-bit, there is no issue.
>
> But on 'p->zero[i] = offset' we can have 'u32 = u64', and this should raise a
> warning (or am I missing something?).

I don't really know what to do here.
The problem is only theoretical (in the long, long past, we have
supported migrating between different architectures, but we aren't
testing anymore).

And because it was a pain in the ass, we define it as:

/* address in the RAM (different from a physical address) */
#if defined(CONFIG_XEN_BACKEND)
typedef uint64_t ram_addr_t;
#  define RAM_ADDR_MAX UINT64_MAX
#  define RAM_ADDR_FMT "%" PRIx64
#else
typedef uintptr_t ram_addr_t;
#  define RAM_ADDR_MAX UINTPTR_MAX
#  define RAM_ADDR_FMT "%" PRIxPTR
#endif

So I am pretty sure that almost nothing uses 32bits for it now (I
haven't checked lately, but I guess that nobody is really using/testing
xen on 32 bits).

I don't really know.  But it could only happens when you are migrating
from Xen 64 bits to Xen 32 bits, I don't really know if that even work.

I will give it a try to change normal/zero to u64.

Thanks, Juan.
Re: [PATCH v7 11/12] multifd: Zero pages transmission
Posted by Juan Quintela 3 years, 2 months ago
Leonardo Brás <leobras@redhat.com> wrote:
> On Tue, 2022-08-02 at 08:39 +0200, Juan Quintela wrote:
>> This implements the zero page dection and handling.
>> 
>> Signed-off-by: Juan Quintela <quintela@redhat.com>

Hi

on further investigation, I see why it can't be a problem.


>>  static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
>> @@ -358,6 +365,18 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
>>          p->normal[i] = offset;
>>      }
>>  
>> +    for (i = 0; i < p->zero_num; i++) {
>> +        uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]);
>> +
>> +        if (offset > (block->used_length - p->page_size)) {

We are checked that we are inside the RAM block.  You can't have a
bigger that 32bit offset when you have 32bits of RAM.


>> @@ -688,8 +710,16 @@ static void *multifd_send_thread(void *opaque)
>>              }
>>  
>>              for (int i = 0; i < p->pages->num; i++) {
>> -                p->normal[p->normal_num] = p->pages->offset[i];
>> -                p->normal_num++;
>> +                uint64_t offset = p->pages->offset[i];

We are reading the offset here.
p->pages->offset is ram_addr_t, so no prolbem here.

>> +                if (use_zero_page &&
>> +                    buffer_is_zero(rb->host + offset, p->page_size)) {
>> +                    p->zero[p->zero_num] = offset;
>
> Same here.

This and next case are exactly the same, we are doing:

ram_addr_t offset1;
u64 foo = offset1;
ram_addr_t offest2 = foo;

So, it should be right.  Everything is unsigned here.

>> +                    p->zero_num++;
>> +                    ram_release_page(rb->idstr, offset);
>> +                } else {
>> +                    p->normal[p->normal_num] = offset;
>
> Same here? (p->normal[i] can also be u32)

Thanks, Juan.