[v3] multifd: Copy pages before compressing them with zlib

[PATCH v3] multifd: Copy pages before compressing them with zlib

Posted by Ilya Leoshkevich 3 years, 7 months ago

zlib_send_prepare() compresses pages of a running VM. zlib does not
make any thread-safety guarantees with respect to changing deflate()
input concurrently with deflate() [1].

One can observe problems due to this with the IBM zEnterprise Data
Compression accelerator capable zlib [2]. When the hardware
acceleration is enabled, migration/multifd/tcp/plain/zlib test fails
intermittently [3] due to sliding window corruption. The accelerator's
architecture explicitly discourages concurrent accesses [4]:

    Page 26-57, "Other Conditions":

    As observed by this CPU, other CPUs, and channel
    programs, references to the parameter block, first,
    second, and third operands may be multiple-access
    references, accesses to these storage locations are
    not necessarily block-concurrent, and the sequence
    of these accesses or references is undefined.

Mark Adler pointed out that vanilla zlib performs double fetches under
certain circumstances as well [5], therefore we need to copy data
before passing it to deflate().

[1] https://zlib.net/manual.html
[2] https://github.com/madler/zlib/pull/410
[3] https://lists.nongnu.org/archive/html/qemu-devel/2022-03/msg03988.html
[4] http://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf
[5] https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg00889.html

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---

v1: https://lists.gnu.org/archive/html/qemu-devel/2022-03/msg06841.html
v1 -> v2: Rebase, mention Mark Adler's reply in the commit message.

v2: https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg00627.html
v2 -> v3: Get rid of pointer maths (David).
          Use a more relevant link to Mark Adler's comment (Peter).

 migration/multifd-zlib.c | 38 ++++++++++++++++++++++++++++++--------
 1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
index 3a7ae44485..18213a9513 100644
--- a/migration/multifd-zlib.c
+++ b/migration/multifd-zlib.c
@@ -27,6 +27,8 @@ struct zlib_data {
     uint8_t *zbuff;
     /* size of compressed buffer */
     uint32_t zbuff_len;
+    /* uncompressed buffer of size qemu_target_page_size() */
+    uint8_t *buf;
 };
 
 /* Multifd zlib compression */
@@ -45,26 +47,38 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp)
 {
     struct zlib_data *z = g_new0(struct zlib_data, 1);
     z_stream *zs = &z->zs;
+    const char *err_msg;
 
     zs->zalloc = Z_NULL;
     zs->zfree = Z_NULL;
     zs->opaque = Z_NULL;
     if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) {
-        g_free(z);
-        error_setg(errp, "multifd %u: deflate init failed", p->id);
-        return -1;
+        err_msg = "deflate init failed";
+        goto err_free_z;
     }
     /* This is the maxium size of the compressed buffer */
     z->zbuff_len = compressBound(MULTIFD_PACKET_SIZE);
     z->zbuff = g_try_malloc(z->zbuff_len);
     if (!z->zbuff) {
-        deflateEnd(&z->zs);
-        g_free(z);
-        error_setg(errp, "multifd %u: out of memory for zbuff", p->id);
-        return -1;
+        err_msg = "out of memory for zbuff";
+        goto err_deflate_end;
+    }
+    z->buf = g_try_malloc(qemu_target_page_size());
+    if (!z->buf) {
+        err_msg = "out of memory for buf";
+        goto err_free_zbuff;
     }
     p->data = z;
     return 0;
+
+err_free_zbuff:
+    g_free(z->zbuff);
+err_deflate_end:
+    deflateEnd(&z->zs);
+err_free_z:
+    g_free(z);
+    error_setg(errp, "multifd %u: %s", p->id, err_msg);
+    return -1;
 }
 
 /**
@@ -82,6 +96,8 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp)
     deflateEnd(&z->zs);
     g_free(z->zbuff);
     z->zbuff = NULL;
+    g_free(z->buf);
+    z->buf = NULL;
     g_free(p->data);
     p->data = NULL;
 }
@@ -114,8 +130,14 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
             flush = Z_SYNC_FLUSH;
         }
 
+        /*
+         * Since the VM might be running, the page may be changing concurrently
+         * with compression. zlib does not guarantee that this is safe,
+         * therefore copy the page before calling deflate().
+         */
+        memcpy(z->buf, p->pages->block->host + p->normal[i], page_size);
         zs->avail_in = page_size;
-        zs->next_in = p->pages->block->host + p->normal[i];
+        zs->next_in = z->buf;
 
         zs->avail_out = available;
         zs->next_out = z->zbuff + out_size;
-- 
2.35.3

Re: [PATCH v3] multifd: Copy pages before compressing them with zlib

Posted by Dr. David Alan Gilbert 3 years, 6 months ago

* Ilya Leoshkevich (iii@linux.ibm.com) wrote:
> zlib_send_prepare() compresses pages of a running VM. zlib does not
> make any thread-safety guarantees with respect to changing deflate()
> input concurrently with deflate() [1].
> 
> One can observe problems due to this with the IBM zEnterprise Data
> Compression accelerator capable zlib [2]. When the hardware
> acceleration is enabled, migration/multifd/tcp/plain/zlib test fails
> intermittently [3] due to sliding window corruption. The accelerator's
> architecture explicitly discourages concurrent accesses [4]:
> 
>     Page 26-57, "Other Conditions":
> 
>     As observed by this CPU, other CPUs, and channel
>     programs, references to the parameter block, first,
>     second, and third operands may be multiple-access
>     references, accesses to these storage locations are
>     not necessarily block-concurrent, and the sequence
>     of these accesses or references is undefined.
> 
> Mark Adler pointed out that vanilla zlib performs double fetches under
> certain circumstances as well [5], therefore we need to copy data
> before passing it to deflate().
> 
> [1] https://zlib.net/manual.html
> [2] https://github.com/madler/zlib/pull/410
> [3] https://lists.nongnu.org/archive/html/qemu-devel/2022-03/msg03988.html
> [4] http://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf
> [5] https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg00889.html
> 
> Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>

Queued, thank you!

Dave

> ---
> 
> v1: https://lists.gnu.org/archive/html/qemu-devel/2022-03/msg06841.html
> v1 -> v2: Rebase, mention Mark Adler's reply in the commit message.
> 
> v2: https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg00627.html
> v2 -> v3: Get rid of pointer maths (David).
>           Use a more relevant link to Mark Adler's comment (Peter).
> 
>  migration/multifd-zlib.c | 38 ++++++++++++++++++++++++++++++--------
>  1 file changed, 30 insertions(+), 8 deletions(-)
> 
> diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
> index 3a7ae44485..18213a9513 100644
> --- a/migration/multifd-zlib.c
> +++ b/migration/multifd-zlib.c
> @@ -27,6 +27,8 @@ struct zlib_data {
>      uint8_t *zbuff;
>      /* size of compressed buffer */
>      uint32_t zbuff_len;
> +    /* uncompressed buffer of size qemu_target_page_size() */
> +    uint8_t *buf;
>  };
>  
>  /* Multifd zlib compression */
> @@ -45,26 +47,38 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp)
>  {
>      struct zlib_data *z = g_new0(struct zlib_data, 1);
>      z_stream *zs = &z->zs;
> +    const char *err_msg;
>  
>      zs->zalloc = Z_NULL;
>      zs->zfree = Z_NULL;
>      zs->opaque = Z_NULL;
>      if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) {
> -        g_free(z);
> -        error_setg(errp, "multifd %u: deflate init failed", p->id);
> -        return -1;
> +        err_msg = "deflate init failed";
> +        goto err_free_z;
>      }
>      /* This is the maxium size of the compressed buffer */
>      z->zbuff_len = compressBound(MULTIFD_PACKET_SIZE);
>      z->zbuff = g_try_malloc(z->zbuff_len);
>      if (!z->zbuff) {
> -        deflateEnd(&z->zs);
> -        g_free(z);
> -        error_setg(errp, "multifd %u: out of memory for zbuff", p->id);
> -        return -1;
> +        err_msg = "out of memory for zbuff";
> +        goto err_deflate_end;
> +    }
> +    z->buf = g_try_malloc(qemu_target_page_size());
> +    if (!z->buf) {
> +        err_msg = "out of memory for buf";
> +        goto err_free_zbuff;
>      }
>      p->data = z;
>      return 0;
> +
> +err_free_zbuff:
> +    g_free(z->zbuff);
> +err_deflate_end:
> +    deflateEnd(&z->zs);
> +err_free_z:
> +    g_free(z);
> +    error_setg(errp, "multifd %u: %s", p->id, err_msg);
> +    return -1;
>  }
>  
>  /**
> @@ -82,6 +96,8 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp)
>      deflateEnd(&z->zs);
>      g_free(z->zbuff);
>      z->zbuff = NULL;
> +    g_free(z->buf);
> +    z->buf = NULL;
>      g_free(p->data);
>      p->data = NULL;
>  }
> @@ -114,8 +130,14 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
>              flush = Z_SYNC_FLUSH;
>          }
>  
> +        /*
> +         * Since the VM might be running, the page may be changing concurrently
> +         * with compression. zlib does not guarantee that this is safe,
> +         * therefore copy the page before calling deflate().
> +         */
> +        memcpy(z->buf, p->pages->block->host + p->normal[i], page_size);
>          zs->avail_in = page_size;
> -        zs->next_in = p->pages->block->host + p->normal[i];
> +        zs->next_in = z->buf;
>  
>          zs->avail_out = available;
>          zs->next_out = z->zbuff + out_size;
> -- 
> 2.35.3
> 
> 
-- 
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

Re: [PATCH v3] multifd: Copy pages before compressing them with zlib

Posted by Dr. David Alan Gilbert 3 years, 6 months ago

* Ilya Leoshkevich (iii@linux.ibm.com) wrote:
> zlib_send_prepare() compresses pages of a running VM. zlib does not
> make any thread-safety guarantees with respect to changing deflate()
> input concurrently with deflate() [1].
> 
> One can observe problems due to this with the IBM zEnterprise Data
> Compression accelerator capable zlib [2]. When the hardware
> acceleration is enabled, migration/multifd/tcp/plain/zlib test fails
> intermittently [3] due to sliding window corruption. The accelerator's
> architecture explicitly discourages concurrent accesses [4]:
> 
>     Page 26-57, "Other Conditions":
> 
>     As observed by this CPU, other CPUs, and channel
>     programs, references to the parameter block, first,
>     second, and third operands may be multiple-access
>     references, accesses to these storage locations are
>     not necessarily block-concurrent, and the sequence
>     of these accesses or references is undefined.
> 
> Mark Adler pointed out that vanilla zlib performs double fetches under
> certain circumstances as well [5], therefore we need to copy data
> before passing it to deflate().
> 
> [1] https://zlib.net/manual.html
> [2] https://github.com/madler/zlib/pull/410
> [3] https://lists.nongnu.org/archive/html/qemu-devel/2022-03/msg03988.html
> [4] http://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf
> [5] https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg00889.html
> 
> Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>

Thanks!


Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

> ---
> 
> v1: https://lists.gnu.org/archive/html/qemu-devel/2022-03/msg06841.html
> v1 -> v2: Rebase, mention Mark Adler's reply in the commit message.
> 
> v2: https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg00627.html
> v2 -> v3: Get rid of pointer maths (David).
>           Use a more relevant link to Mark Adler's comment (Peter).
> 
>  migration/multifd-zlib.c | 38 ++++++++++++++++++++++++++++++--------
>  1 file changed, 30 insertions(+), 8 deletions(-)
> 
> diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
> index 3a7ae44485..18213a9513 100644
> --- a/migration/multifd-zlib.c
> +++ b/migration/multifd-zlib.c
> @@ -27,6 +27,8 @@ struct zlib_data {
>      uint8_t *zbuff;
>      /* size of compressed buffer */
>      uint32_t zbuff_len;
> +    /* uncompressed buffer of size qemu_target_page_size() */
> +    uint8_t *buf;
>  };
>  
>  /* Multifd zlib compression */
> @@ -45,26 +47,38 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp)
>  {
>      struct zlib_data *z = g_new0(struct zlib_data, 1);
>      z_stream *zs = &z->zs;
> +    const char *err_msg;
>  
>      zs->zalloc = Z_NULL;
>      zs->zfree = Z_NULL;
>      zs->opaque = Z_NULL;
>      if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) {
> -        g_free(z);
> -        error_setg(errp, "multifd %u: deflate init failed", p->id);
> -        return -1;
> +        err_msg = "deflate init failed";
> +        goto err_free_z;
>      }
>      /* This is the maxium size of the compressed buffer */
>      z->zbuff_len = compressBound(MULTIFD_PACKET_SIZE);
>      z->zbuff = g_try_malloc(z->zbuff_len);
>      if (!z->zbuff) {
> -        deflateEnd(&z->zs);
> -        g_free(z);
> -        error_setg(errp, "multifd %u: out of memory for zbuff", p->id);
> -        return -1;
> +        err_msg = "out of memory for zbuff";
> +        goto err_deflate_end;
> +    }
> +    z->buf = g_try_malloc(qemu_target_page_size());
> +    if (!z->buf) {
> +        err_msg = "out of memory for buf";
> +        goto err_free_zbuff;
>      }
>      p->data = z;
>      return 0;
> +
> +err_free_zbuff:
> +    g_free(z->zbuff);
> +err_deflate_end:
> +    deflateEnd(&z->zs);
> +err_free_z:
> +    g_free(z);
> +    error_setg(errp, "multifd %u: %s", p->id, err_msg);
> +    return -1;
>  }
>  
>  /**
> @@ -82,6 +96,8 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp)
>      deflateEnd(&z->zs);
>      g_free(z->zbuff);
>      z->zbuff = NULL;
> +    g_free(z->buf);
> +    z->buf = NULL;
>      g_free(p->data);
>      p->data = NULL;
>  }
> @@ -114,8 +130,14 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
>              flush = Z_SYNC_FLUSH;
>          }
>  
> +        /*
> +         * Since the VM might be running, the page may be changing concurrently
> +         * with compression. zlib does not guarantee that this is safe,
> +         * therefore copy the page before calling deflate().
> +         */
> +        memcpy(z->buf, p->pages->block->host + p->normal[i], page_size);
>          zs->avail_in = page_size;
> -        zs->next_in = p->pages->block->host + p->normal[i];
> +        zs->next_in = z->buf;
>  
>          zs->avail_out = available;
>          zs->next_out = z->zbuff + out_size;
> -- 
> 2.35.3
> 
-- 
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK