migration/multifd-zlib.c | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-)
zlib_send_prepare() compresses pages of a running VM. zlib does not
make any thread-safety guarantees with respect to changing deflate()
input concurrently with deflate() [1].
One can observe problems due to this with the IBM zEnterprise Data
Compression accelerator capable zlib [2]. When the hardware
acceleration is enabled, migration/multifd/tcp/plain/zlib test fails
intermittently [3] due to sliding window corruption. The accelerator's
architecture explicitly discourages concurrent accesses [4]:
Page 26-57, "Other Conditions":
As observed by this CPU, other CPUs, and channel
programs, references to the parameter block, first,
second, and third operands may be multiple-access
references, accesses to these storage locations are
not necessarily block-concurrent, and the sequence
of these accesses or references is undefined.
Mark Adler pointed out that vanilla zlib performs double fetches under
certain circumstances as well [5], therefore we need to copy data
before passing it to deflate().
[1] https://zlib.net/manual.html
[2] https://github.com/madler/zlib/pull/410
[3] https://lists.nongnu.org/archive/html/qemu-devel/2022-03/msg03988.html
[4] http://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf
[5] https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg00889.html
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---
v1: https://lists.gnu.org/archive/html/qemu-devel/2022-03/msg06841.html
v1 -> v2: Rebase, mention Mark Adler's reply in the commit message.
v2: https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg00627.html
v2 -> v3: Get rid of pointer maths (David).
Use a more relevant link to Mark Adler's comment (Peter).
migration/multifd-zlib.c | 38 ++++++++++++++++++++++++++++++--------
1 file changed, 30 insertions(+), 8 deletions(-)
diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
index 3a7ae44485..18213a9513 100644
--- a/migration/multifd-zlib.c
+++ b/migration/multifd-zlib.c
@@ -27,6 +27,8 @@ struct zlib_data {
uint8_t *zbuff;
/* size of compressed buffer */
uint32_t zbuff_len;
+ /* uncompressed buffer of size qemu_target_page_size() */
+ uint8_t *buf;
};
/* Multifd zlib compression */
@@ -45,26 +47,38 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp)
{
struct zlib_data *z = g_new0(struct zlib_data, 1);
z_stream *zs = &z->zs;
+ const char *err_msg;
zs->zalloc = Z_NULL;
zs->zfree = Z_NULL;
zs->opaque = Z_NULL;
if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) {
- g_free(z);
- error_setg(errp, "multifd %u: deflate init failed", p->id);
- return -1;
+ err_msg = "deflate init failed";
+ goto err_free_z;
}
/* This is the maxium size of the compressed buffer */
z->zbuff_len = compressBound(MULTIFD_PACKET_SIZE);
z->zbuff = g_try_malloc(z->zbuff_len);
if (!z->zbuff) {
- deflateEnd(&z->zs);
- g_free(z);
- error_setg(errp, "multifd %u: out of memory for zbuff", p->id);
- return -1;
+ err_msg = "out of memory for zbuff";
+ goto err_deflate_end;
+ }
+ z->buf = g_try_malloc(qemu_target_page_size());
+ if (!z->buf) {
+ err_msg = "out of memory for buf";
+ goto err_free_zbuff;
}
p->data = z;
return 0;
+
+err_free_zbuff:
+ g_free(z->zbuff);
+err_deflate_end:
+ deflateEnd(&z->zs);
+err_free_z:
+ g_free(z);
+ error_setg(errp, "multifd %u: %s", p->id, err_msg);
+ return -1;
}
/**
@@ -82,6 +96,8 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp)
deflateEnd(&z->zs);
g_free(z->zbuff);
z->zbuff = NULL;
+ g_free(z->buf);
+ z->buf = NULL;
g_free(p->data);
p->data = NULL;
}
@@ -114,8 +130,14 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
flush = Z_SYNC_FLUSH;
}
+ /*
+ * Since the VM might be running, the page may be changing concurrently
+ * with compression. zlib does not guarantee that this is safe,
+ * therefore copy the page before calling deflate().
+ */
+ memcpy(z->buf, p->pages->block->host + p->normal[i], page_size);
zs->avail_in = page_size;
- zs->next_in = p->pages->block->host + p->normal[i];
+ zs->next_in = z->buf;
zs->avail_out = available;
zs->next_out = z->zbuff + out_size;
--
2.35.3
* Ilya Leoshkevich (iii@linux.ibm.com) wrote: > zlib_send_prepare() compresses pages of a running VM. zlib does not > make any thread-safety guarantees with respect to changing deflate() > input concurrently with deflate() [1]. > > One can observe problems due to this with the IBM zEnterprise Data > Compression accelerator capable zlib [2]. When the hardware > acceleration is enabled, migration/multifd/tcp/plain/zlib test fails > intermittently [3] due to sliding window corruption. The accelerator's > architecture explicitly discourages concurrent accesses [4]: > > Page 26-57, "Other Conditions": > > As observed by this CPU, other CPUs, and channel > programs, references to the parameter block, first, > second, and third operands may be multiple-access > references, accesses to these storage locations are > not necessarily block-concurrent, and the sequence > of these accesses or references is undefined. > > Mark Adler pointed out that vanilla zlib performs double fetches under > certain circumstances as well [5], therefore we need to copy data > before passing it to deflate(). > > [1] https://zlib.net/manual.html > [2] https://github.com/madler/zlib/pull/410 > [3] https://lists.nongnu.org/archive/html/qemu-devel/2022-03/msg03988.html > [4] http://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf > [5] https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg00889.html > > Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com> Queued, thank you! Dave > --- > > v1: https://lists.gnu.org/archive/html/qemu-devel/2022-03/msg06841.html > v1 -> v2: Rebase, mention Mark Adler's reply in the commit message. > > v2: https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg00627.html > v2 -> v3: Get rid of pointer maths (David). > Use a more relevant link to Mark Adler's comment (Peter). > > migration/multifd-zlib.c | 38 ++++++++++++++++++++++++++++++-------- > 1 file changed, 30 insertions(+), 8 deletions(-) > > diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c > index 3a7ae44485..18213a9513 100644 > --- a/migration/multifd-zlib.c > +++ b/migration/multifd-zlib.c > @@ -27,6 +27,8 @@ struct zlib_data { > uint8_t *zbuff; > /* size of compressed buffer */ > uint32_t zbuff_len; > + /* uncompressed buffer of size qemu_target_page_size() */ > + uint8_t *buf; > }; > > /* Multifd zlib compression */ > @@ -45,26 +47,38 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) > { > struct zlib_data *z = g_new0(struct zlib_data, 1); > z_stream *zs = &z->zs; > + const char *err_msg; > > zs->zalloc = Z_NULL; > zs->zfree = Z_NULL; > zs->opaque = Z_NULL; > if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) { > - g_free(z); > - error_setg(errp, "multifd %u: deflate init failed", p->id); > - return -1; > + err_msg = "deflate init failed"; > + goto err_free_z; > } > /* This is the maxium size of the compressed buffer */ > z->zbuff_len = compressBound(MULTIFD_PACKET_SIZE); > z->zbuff = g_try_malloc(z->zbuff_len); > if (!z->zbuff) { > - deflateEnd(&z->zs); > - g_free(z); > - error_setg(errp, "multifd %u: out of memory for zbuff", p->id); > - return -1; > + err_msg = "out of memory for zbuff"; > + goto err_deflate_end; > + } > + z->buf = g_try_malloc(qemu_target_page_size()); > + if (!z->buf) { > + err_msg = "out of memory for buf"; > + goto err_free_zbuff; > } > p->data = z; > return 0; > + > +err_free_zbuff: > + g_free(z->zbuff); > +err_deflate_end: > + deflateEnd(&z->zs); > +err_free_z: > + g_free(z); > + error_setg(errp, "multifd %u: %s", p->id, err_msg); > + return -1; > } > > /** > @@ -82,6 +96,8 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) > deflateEnd(&z->zs); > g_free(z->zbuff); > z->zbuff = NULL; > + g_free(z->buf); > + z->buf = NULL; > g_free(p->data); > p->data = NULL; > } > @@ -114,8 +130,14 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) > flush = Z_SYNC_FLUSH; > } > > + /* > + * Since the VM might be running, the page may be changing concurrently > + * with compression. zlib does not guarantee that this is safe, > + * therefore copy the page before calling deflate(). > + */ > + memcpy(z->buf, p->pages->block->host + p->normal[i], page_size); > zs->avail_in = page_size; > - zs->next_in = p->pages->block->host + p->normal[i]; > + zs->next_in = z->buf; > > zs->avail_out = available; > zs->next_out = z->zbuff + out_size; > -- > 2.35.3 > > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
* Ilya Leoshkevich (iii@linux.ibm.com) wrote: > zlib_send_prepare() compresses pages of a running VM. zlib does not > make any thread-safety guarantees with respect to changing deflate() > input concurrently with deflate() [1]. > > One can observe problems due to this with the IBM zEnterprise Data > Compression accelerator capable zlib [2]. When the hardware > acceleration is enabled, migration/multifd/tcp/plain/zlib test fails > intermittently [3] due to sliding window corruption. The accelerator's > architecture explicitly discourages concurrent accesses [4]: > > Page 26-57, "Other Conditions": > > As observed by this CPU, other CPUs, and channel > programs, references to the parameter block, first, > second, and third operands may be multiple-access > references, accesses to these storage locations are > not necessarily block-concurrent, and the sequence > of these accesses or references is undefined. > > Mark Adler pointed out that vanilla zlib performs double fetches under > certain circumstances as well [5], therefore we need to copy data > before passing it to deflate(). > > [1] https://zlib.net/manual.html > [2] https://github.com/madler/zlib/pull/410 > [3] https://lists.nongnu.org/archive/html/qemu-devel/2022-03/msg03988.html > [4] http://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf > [5] https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg00889.html > > Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com> Thanks! Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > --- > > v1: https://lists.gnu.org/archive/html/qemu-devel/2022-03/msg06841.html > v1 -> v2: Rebase, mention Mark Adler's reply in the commit message. > > v2: https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg00627.html > v2 -> v3: Get rid of pointer maths (David). > Use a more relevant link to Mark Adler's comment (Peter). > > migration/multifd-zlib.c | 38 ++++++++++++++++++++++++++++++-------- > 1 file changed, 30 insertions(+), 8 deletions(-) > > diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c > index 3a7ae44485..18213a9513 100644 > --- a/migration/multifd-zlib.c > +++ b/migration/multifd-zlib.c > @@ -27,6 +27,8 @@ struct zlib_data { > uint8_t *zbuff; > /* size of compressed buffer */ > uint32_t zbuff_len; > + /* uncompressed buffer of size qemu_target_page_size() */ > + uint8_t *buf; > }; > > /* Multifd zlib compression */ > @@ -45,26 +47,38 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) > { > struct zlib_data *z = g_new0(struct zlib_data, 1); > z_stream *zs = &z->zs; > + const char *err_msg; > > zs->zalloc = Z_NULL; > zs->zfree = Z_NULL; > zs->opaque = Z_NULL; > if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) { > - g_free(z); > - error_setg(errp, "multifd %u: deflate init failed", p->id); > - return -1; > + err_msg = "deflate init failed"; > + goto err_free_z; > } > /* This is the maxium size of the compressed buffer */ > z->zbuff_len = compressBound(MULTIFD_PACKET_SIZE); > z->zbuff = g_try_malloc(z->zbuff_len); > if (!z->zbuff) { > - deflateEnd(&z->zs); > - g_free(z); > - error_setg(errp, "multifd %u: out of memory for zbuff", p->id); > - return -1; > + err_msg = "out of memory for zbuff"; > + goto err_deflate_end; > + } > + z->buf = g_try_malloc(qemu_target_page_size()); > + if (!z->buf) { > + err_msg = "out of memory for buf"; > + goto err_free_zbuff; > } > p->data = z; > return 0; > + > +err_free_zbuff: > + g_free(z->zbuff); > +err_deflate_end: > + deflateEnd(&z->zs); > +err_free_z: > + g_free(z); > + error_setg(errp, "multifd %u: %s", p->id, err_msg); > + return -1; > } > > /** > @@ -82,6 +96,8 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) > deflateEnd(&z->zs); > g_free(z->zbuff); > z->zbuff = NULL; > + g_free(z->buf); > + z->buf = NULL; > g_free(p->data); > p->data = NULL; > } > @@ -114,8 +130,14 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) > flush = Z_SYNC_FLUSH; > } > > + /* > + * Since the VM might be running, the page may be changing concurrently > + * with compression. zlib does not guarantee that this is safe, > + * therefore copy the page before calling deflate(). > + */ > + memcpy(z->buf, p->pages->block->host + p->normal[i], page_size); > zs->avail_in = page_size; > - zs->next_in = p->pages->block->host + p->normal[i]; > + zs->next_in = z->buf; > > zs->avail_out = available; > zs->next_out = z->zbuff + out_size; > -- > 2.35.3 > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
© 2016 - 2024 Red Hat, Inc.