[v3] Migration: Transmit and detect zero pages in the multifd threads

[PATCH v3 17/23] multifd: Use normal pages array on the send side

Posted by Juan Quintela 4 years, 2 months ago

Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 migration/multifd.h      |  8 ++++++--
 migration/multifd-zlib.c |  6 +++---
 migration/multifd-zstd.c |  6 +++---
 migration/multifd.c      | 30 +++++++++++++++++++-----------
 migration/trace-events   |  4 ++--
 5 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/migration/multifd.h b/migration/multifd.h
index 7496f951a7..78e73df3ec 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -104,14 +104,18 @@ typedef struct {
     /* thread local variables */
     /* packets sent through this channel */
     uint64_t num_packets;
-    /* pages sent through this channel */
-    uint64_t num_pages;
+    /* non zero pages sent through this channel */
+    uint64_t num_normal_pages;
     /* syncs main thread and channels */
     QemuSemaphore sem_sync;
     /* buffers to send */
     struct iovec *iov;
     /* number of iovs used */
     uint32_t iovs_num;
+    /* Pages that are not zero */
+    ram_addr_t *normal;
+    /* num of non zero pages */
+    uint32_t normal_num;
     /* used for compression methods */
     void *data;
 }  MultiFDSendParams;
diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
index f65159392a..25ef68a548 100644
--- a/migration/multifd-zlib.c
+++ b/migration/multifd-zlib.c
@@ -106,16 +106,16 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
     int ret;
     uint32_t i;
 
-    for (i = 0; i < p->pages->num; i++) {
+    for (i = 0; i < p->normal_num; i++) {
         uint32_t available = z->zbuff_len - out_size;
         int flush = Z_NO_FLUSH;
 
-        if (i == p->pages->num - 1) {
+        if (i == p->normal_num - 1) {
             flush = Z_SYNC_FLUSH;
         }
 
         zs->avail_in = page_size;
-        zs->next_in = p->pages->block->host + p->pages->offset[i];
+        zs->next_in = p->pages->block->host + p->normal[i];
 
         zs->avail_out = available;
         zs->next_out = z->zbuff + out_size;
diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c
index 6933ba622a..61842d713e 100644
--- a/migration/multifd-zstd.c
+++ b/migration/multifd-zstd.c
@@ -121,13 +121,13 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp)
     z->out.size = z->zbuff_len;
     z->out.pos = 0;
 
-    for (i = 0; i < p->pages->num; i++) {
+    for (i = 0; i < p->normal_num; i++) {
         ZSTD_EndDirective flush = ZSTD_e_continue;
 
-        if (i == p->pages->num - 1) {
+        if (i == p->normal_num - 1) {
             flush = ZSTD_e_flush;
         }
-        z->in.src = p->pages->block->host + p->pages->offset[i];
+        z->in.src = p->pages->block->host + p->normal[i];
         z->in.size = page_size;
         z->in.pos = 0;
 
diff --git a/migration/multifd.c b/migration/multifd.c
index 6983ba3e7c..dbe919b764 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -89,13 +89,13 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
     MultiFDPages_t *pages = p->pages;
     size_t page_size = qemu_target_page_size();
 
-    for (int i = 0; i < p->pages->num; i++) {
-        p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i];
+    for (int i = 0; i < p->normal_num; i++) {
+        p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i];
         p->iov[p->iovs_num].iov_len = page_size;
         p->iovs_num++;
     }
 
-    p->next_packet_size = p->pages->num * page_size;
+    p->next_packet_size = p->normal_num * page_size;
     p->flags |= MULTIFD_FLAG_NOCOMP;
     return 0;
 }
@@ -262,7 +262,7 @@ static void multifd_send_fill_packet(MultiFDSendParams *p)
 
     packet->flags = cpu_to_be32(p->flags);
     packet->pages_alloc = cpu_to_be32(p->pages->allocated);
-    packet->pages_used = cpu_to_be32(p->pages->num);
+    packet->pages_used = cpu_to_be32(p->normal_num);
     packet->next_packet_size = cpu_to_be32(p->next_packet_size);
     packet->packet_num = cpu_to_be64(p->packet_num);
 
@@ -270,9 +270,9 @@ static void multifd_send_fill_packet(MultiFDSendParams *p)
         strncpy(packet->ramblock, p->pages->block->idstr, 256);
     }
 
-    for (i = 0; i < p->pages->num; i++) {
+    for (i = 0; i < p->normal_num; i++) {
         /* there are architectures where ram_addr_t is 32 bit */
-        uint64_t temp = p->pages->offset[i];
+        uint64_t temp = p->normal[i];
 
         packet->offset[i] = cpu_to_be64(temp);
     }
@@ -556,6 +556,8 @@ void multifd_save_cleanup(void)
         p->packet = NULL;
         g_free(p->iov);
         p->iov = NULL;
+        g_free(p->normal);
+        p->normal = NULL;
         multifd_send_state->ops->send_cleanup(p, &local_err);
         if (local_err) {
             migrate_set_error(migrate_get_current(), local_err);
@@ -640,12 +642,17 @@ static void *multifd_send_thread(void *opaque)
         qemu_mutex_lock(&p->mutex);
 
         if (p->pending_job) {
-            uint32_t used = p->pages->num;
             uint64_t packet_num = p->packet_num;
             uint32_t flags = p->flags;
             p->iovs_num = 1;
+            p->normal_num = 0;
 
-            if (used) {
+            for (int i = 0; i < p->pages->num; i++) {
+                p->normal[p->normal_num] = p->pages->offset[i];
+                p->normal_num++;
+            }
+
+            if (p->normal_num) {
                 ret = multifd_send_state->ops->send_prepare(p, &local_err);
                 if (ret != 0) {
                     qemu_mutex_unlock(&p->mutex);
@@ -655,12 +662,12 @@ static void *multifd_send_thread(void *opaque)
             multifd_send_fill_packet(p);
             p->flags = 0;
             p->num_packets++;
-            p->num_pages += used;
+            p->num_normal_pages += p->normal_num;
             p->pages->num = 0;
             p->pages->block = NULL;
             qemu_mutex_unlock(&p->mutex);
 
-            trace_multifd_send(p->id, packet_num, used, flags,
+            trace_multifd_send(p->id, packet_num, p->normal_num, flags,
                                p->next_packet_size);
 
             p->iov[0].iov_len = p->packet_len;
@@ -710,7 +717,7 @@ out:
     qemu_mutex_unlock(&p->mutex);
 
     rcu_unregister_thread();
-    trace_multifd_send_thread_end(p->id, p->num_packets, p->num_pages);
+    trace_multifd_send_thread_end(p->id, p->num_packets, p->num_normal_pages);
 
     return NULL;
 }
@@ -910,6 +917,7 @@ int multifd_save_setup(Error **errp)
         p->tls_hostname = g_strdup(s->hostname);
         /* We need one extra place for the packet header */
         p->iov = g_new0(struct iovec, page_count + 1);
+        p->normal = g_new0(ram_addr_t, page_count);
         socket_send_channel_create(multifd_new_send_channel_async, p);
     }
 
diff --git a/migration/trace-events b/migration/trace-events
index b48d873b8a..af8dee9af0 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -124,13 +124,13 @@ multifd_recv_sync_main_wait(uint8_t id) "channel %d"
 multifd_recv_terminate_threads(bool error) "error %d"
 multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %" PRIu64
 multifd_recv_thread_start(uint8_t id) "%d"
-multifd_send(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %d packet_num %" PRIu64 " pages %d flags 0x%x next packet size %d"
+multifd_send(uint8_t id, uint64_t packet_num, uint32_t normal, uint32_t flags, uint32_t next_packet_size) "channel %d packet_num %" PRIu64 " normal pages %d flags 0x%x next packet size %d"
 multifd_send_error(uint8_t id) "channel %d"
 multifd_send_sync_main(long packet_num) "packet num %ld"
 multifd_send_sync_main_signal(uint8_t id) "channel %d"
 multifd_send_sync_main_wait(uint8_t id) "channel %d"
 multifd_send_terminate_threads(bool error) "error %d"
-multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %"  PRIu64
+multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages) "channel %d packets %" PRIu64 " normal pages %"  PRIu64
 multifd_send_thread_start(uint8_t id) "%d"
 multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s"
 multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s"
-- 
2.33.1

Re: [PATCH v3 17/23] multifd: Use normal pages array on the send side

Posted by Dr. David Alan Gilbert 4 years, 2 months ago

* Juan Quintela (quintela@redhat.com) wrote:
> Signed-off-by: Juan Quintela <quintela@redhat.com>

Can you explain a bit more what's going on here?

Dave

> ---
>  migration/multifd.h      |  8 ++++++--
>  migration/multifd-zlib.c |  6 +++---
>  migration/multifd-zstd.c |  6 +++---
>  migration/multifd.c      | 30 +++++++++++++++++++-----------
>  migration/trace-events   |  4 ++--
>  5 files changed, 33 insertions(+), 21 deletions(-)
> 
> diff --git a/migration/multifd.h b/migration/multifd.h
> index 7496f951a7..78e73df3ec 100644
> --- a/migration/multifd.h
> +++ b/migration/multifd.h
> @@ -104,14 +104,18 @@ typedef struct {
>      /* thread local variables */
>      /* packets sent through this channel */
>      uint64_t num_packets;
> -    /* pages sent through this channel */
> -    uint64_t num_pages;
> +    /* non zero pages sent through this channel */
> +    uint64_t num_normal_pages;
>      /* syncs main thread and channels */
>      QemuSemaphore sem_sync;
>      /* buffers to send */
>      struct iovec *iov;
>      /* number of iovs used */
>      uint32_t iovs_num;
> +    /* Pages that are not zero */
> +    ram_addr_t *normal;
> +    /* num of non zero pages */
> +    uint32_t normal_num;
>      /* used for compression methods */
>      void *data;
>  }  MultiFDSendParams;
> diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
> index f65159392a..25ef68a548 100644
> --- a/migration/multifd-zlib.c
> +++ b/migration/multifd-zlib.c
> @@ -106,16 +106,16 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
>      int ret;
>      uint32_t i;
>  
> -    for (i = 0; i < p->pages->num; i++) {
> +    for (i = 0; i < p->normal_num; i++) {
>          uint32_t available = z->zbuff_len - out_size;
>          int flush = Z_NO_FLUSH;
>  
> -        if (i == p->pages->num - 1) {
> +        if (i == p->normal_num - 1) {
>              flush = Z_SYNC_FLUSH;
>          }
>  
>          zs->avail_in = page_size;
> -        zs->next_in = p->pages->block->host + p->pages->offset[i];
> +        zs->next_in = p->pages->block->host + p->normal[i];
>  
>          zs->avail_out = available;
>          zs->next_out = z->zbuff + out_size;
> diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c
> index 6933ba622a..61842d713e 100644
> --- a/migration/multifd-zstd.c
> +++ b/migration/multifd-zstd.c
> @@ -121,13 +121,13 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp)
>      z->out.size = z->zbuff_len;
>      z->out.pos = 0;
>  
> -    for (i = 0; i < p->pages->num; i++) {
> +    for (i = 0; i < p->normal_num; i++) {
>          ZSTD_EndDirective flush = ZSTD_e_continue;
>  
> -        if (i == p->pages->num - 1) {
> +        if (i == p->normal_num - 1) {
>              flush = ZSTD_e_flush;
>          }
> -        z->in.src = p->pages->block->host + p->pages->offset[i];
> +        z->in.src = p->pages->block->host + p->normal[i];
>          z->in.size = page_size;
>          z->in.pos = 0;
>  
> diff --git a/migration/multifd.c b/migration/multifd.c
> index 6983ba3e7c..dbe919b764 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -89,13 +89,13 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
>      MultiFDPages_t *pages = p->pages;
>      size_t page_size = qemu_target_page_size();
>  
> -    for (int i = 0; i < p->pages->num; i++) {
> -        p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i];
> +    for (int i = 0; i < p->normal_num; i++) {
> +        p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i];
>          p->iov[p->iovs_num].iov_len = page_size;
>          p->iovs_num++;
>      }
>  
> -    p->next_packet_size = p->pages->num * page_size;
> +    p->next_packet_size = p->normal_num * page_size;
>      p->flags |= MULTIFD_FLAG_NOCOMP;
>      return 0;
>  }
> @@ -262,7 +262,7 @@ static void multifd_send_fill_packet(MultiFDSendParams *p)
>  
>      packet->flags = cpu_to_be32(p->flags);
>      packet->pages_alloc = cpu_to_be32(p->pages->allocated);
> -    packet->pages_used = cpu_to_be32(p->pages->num);
> +    packet->pages_used = cpu_to_be32(p->normal_num);
>      packet->next_packet_size = cpu_to_be32(p->next_packet_size);
>      packet->packet_num = cpu_to_be64(p->packet_num);
>  
> @@ -270,9 +270,9 @@ static void multifd_send_fill_packet(MultiFDSendParams *p)
>          strncpy(packet->ramblock, p->pages->block->idstr, 256);
>      }
>  
> -    for (i = 0; i < p->pages->num; i++) {
> +    for (i = 0; i < p->normal_num; i++) {
>          /* there are architectures where ram_addr_t is 32 bit */
> -        uint64_t temp = p->pages->offset[i];
> +        uint64_t temp = p->normal[i];
>  
>          packet->offset[i] = cpu_to_be64(temp);
>      }
> @@ -556,6 +556,8 @@ void multifd_save_cleanup(void)
>          p->packet = NULL;
>          g_free(p->iov);
>          p->iov = NULL;
> +        g_free(p->normal);
> +        p->normal = NULL;
>          multifd_send_state->ops->send_cleanup(p, &local_err);
>          if (local_err) {
>              migrate_set_error(migrate_get_current(), local_err);
> @@ -640,12 +642,17 @@ static void *multifd_send_thread(void *opaque)
>          qemu_mutex_lock(&p->mutex);
>  
>          if (p->pending_job) {
> -            uint32_t used = p->pages->num;
>              uint64_t packet_num = p->packet_num;
>              uint32_t flags = p->flags;
>              p->iovs_num = 1;
> +            p->normal_num = 0;
>  
> -            if (used) {
> +            for (int i = 0; i < p->pages->num; i++) {
> +                p->normal[p->normal_num] = p->pages->offset[i];
> +                p->normal_num++;
> +            }
> +
> +            if (p->normal_num) {
>                  ret = multifd_send_state->ops->send_prepare(p, &local_err);
>                  if (ret != 0) {
>                      qemu_mutex_unlock(&p->mutex);
> @@ -655,12 +662,12 @@ static void *multifd_send_thread(void *opaque)
>              multifd_send_fill_packet(p);
>              p->flags = 0;
>              p->num_packets++;
> -            p->num_pages += used;
> +            p->num_normal_pages += p->normal_num;
>              p->pages->num = 0;
>              p->pages->block = NULL;
>              qemu_mutex_unlock(&p->mutex);
>  
> -            trace_multifd_send(p->id, packet_num, used, flags,
> +            trace_multifd_send(p->id, packet_num, p->normal_num, flags,
>                                 p->next_packet_size);
>  
>              p->iov[0].iov_len = p->packet_len;
> @@ -710,7 +717,7 @@ out:
>      qemu_mutex_unlock(&p->mutex);
>  
>      rcu_unregister_thread();
> -    trace_multifd_send_thread_end(p->id, p->num_packets, p->num_pages);
> +    trace_multifd_send_thread_end(p->id, p->num_packets, p->num_normal_pages);
>  
>      return NULL;
>  }
> @@ -910,6 +917,7 @@ int multifd_save_setup(Error **errp)
>          p->tls_hostname = g_strdup(s->hostname);
>          /* We need one extra place for the packet header */
>          p->iov = g_new0(struct iovec, page_count + 1);
> +        p->normal = g_new0(ram_addr_t, page_count);
>          socket_send_channel_create(multifd_new_send_channel_async, p);
>      }
>  
> diff --git a/migration/trace-events b/migration/trace-events
> index b48d873b8a..af8dee9af0 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -124,13 +124,13 @@ multifd_recv_sync_main_wait(uint8_t id) "channel %d"
>  multifd_recv_terminate_threads(bool error) "error %d"
>  multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %" PRIu64
>  multifd_recv_thread_start(uint8_t id) "%d"
> -multifd_send(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %d packet_num %" PRIu64 " pages %d flags 0x%x next packet size %d"
> +multifd_send(uint8_t id, uint64_t packet_num, uint32_t normal, uint32_t flags, uint32_t next_packet_size) "channel %d packet_num %" PRIu64 " normal pages %d flags 0x%x next packet size %d"
>  multifd_send_error(uint8_t id) "channel %d"
>  multifd_send_sync_main(long packet_num) "packet num %ld"
>  multifd_send_sync_main_signal(uint8_t id) "channel %d"
>  multifd_send_sync_main_wait(uint8_t id) "channel %d"
>  multifd_send_terminate_threads(bool error) "error %d"
> -multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %"  PRIu64
> +multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages) "channel %d packets %" PRIu64 " normal pages %"  PRIu64
>  multifd_send_thread_start(uint8_t id) "%d"
>  multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s"
>  multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s"
> -- 
> 2.33.1
> 
-- 
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

Re: [PATCH v3 17/23] multifd: Use normal pages array on the send side

Posted by Juan Quintela 4 years, 2 months ago

"Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> * Juan Quintela (quintela@redhat.com) wrote:
>> Signed-off-by: Juan Quintela <quintela@redhat.com>
>
> Can you explain a bit more what's going on here?

Sorry.

Until patch 20, we have what we had always have:

pages that are sent through multifd (non zero pages).  We are going to
call it normal pages.  So right now, we use the array of pages that we
are passed in directly on the multifd send methods.

But when we introduce zero pages handling around patch 20, we end having
two types of pages sent through multifd:
- normal pages (a.k.a. non-zero pages)
- zero pages

So the options are:
- we rename the fields before we introduce the zero page code, and then
  we introduce the zero page code.
- we rename at the same time that we introduce the zero page code.

I decided to go with the 1st option.

The other thing that we do here is that we introduce the normal array
pages, so right now we do:

for (i = 0; i < pages->num; i++) {
    p->narmal[p->normal_num] = pages->offset[i];
    p->normal_num++:
}

Why?

Because then patch 20 becomes:

for (i = 0; i < pages->num; i++) {
    if (buffer_is_zero(page->offset[i])) {
        p->zerol[p->zero_num] = pages->offset[i];
        p->zeronum++:
    } else {
        p->narmal[p->normal_num] = pages->offset[i];
        p->normal_num++:
    }
}

i.e. don't have to touch the handling of normal pages at all, only this
for loop.

As an added benefit, after this patch, multifd methods don't need to
know about the pages array, only about the params array (that will allow
me to drop the locking earlier).

I hope this helps.

Later, Juan.

Re: [PATCH v3 17/23] multifd: Use normal pages array on the send side

Posted by Dr. David Alan Gilbert 4 years, 2 months ago

* Juan Quintela (quintela@redhat.com) wrote:
> "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > * Juan Quintela (quintela@redhat.com) wrote:
> >> Signed-off-by: Juan Quintela <quintela@redhat.com>
> >
> > Can you explain a bit more what's going on here?
> 
> Sorry.
> 
> Until patch 20, we have what we had always have:
> 
> pages that are sent through multifd (non zero pages).  We are going to
> call it normal pages.  So right now, we use the array of pages that we
> are passed in directly on the multifd send methods.
> 
> But when we introduce zero pages handling around patch 20, we end having
> two types of pages sent through multifd:
> - normal pages (a.k.a. non-zero pages)
> - zero pages
> 
> So the options are:
> - we rename the fields before we introduce the zero page code, and then
>   we introduce the zero page code.
> - we rename at the same time that we introduce the zero page code.
> 
> I decided to go with the 1st option.
> 
> The other thing that we do here is that we introduce the normal array
> pages, so right now we do:
> 
> for (i = 0; i < pages->num; i++) {
>     p->narmal[p->normal_num] = pages->offset[i];
>     p->normal_num++:
> }
> 
> 
> Why?
> 
> Because then patch 20 becomes:
> 
> for (i = 0; i < pages->num; i++) {
>     if (buffer_is_zero(page->offset[i])) {
>         p->zerol[p->zero_num] = pages->offset[i];
>         p->zeronum++:
>     } else {
>         p->narmal[p->normal_num] = pages->offset[i];
>         p->normal_num++:
>     }
> }
> 
> i.e. don't have to touch the handling of normal pages at all, only this
> for loop.
> 
> As an added benefit, after this patch, multifd methods don't need to
> know about the pages array, only about the params array (that will allow
> me to drop the locking earlier).
> 
> I hope this helps.

OK, so the code is OK, but it needs a commit message that explains all
that a bit more concisely.

Dave

> Later, Juan.
> 
-- 
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK