[v1] *** Implement using Intel QAT to offload ZLIB

[PATCH 3/5] migration: Introduce unimplemented 'qatzip' compression method

Posted by Bryan Zhang 11 months ago

Adds support for 'qatzip' as an option for the multifd compression
method parameter, but copy-pastes the no-op logic to leave the actual
methods effectively unimplemented. This is in preparation of a
subsequent commit that will implement actually using QAT for compression
and decompression.

Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com>
Signed-off-by: Hao Xiang <hao.xiang@bytedance.com>
---
 hw/core/qdev-properties-system.c |  6 ++-
 migration/meson.build            |  1 +
 migration/multifd-qatzip.c       | 81 ++++++++++++++++++++++++++++++++
 migration/multifd.h              |  1 +
 qapi/migration.json              |  5 +-
 5 files changed, 92 insertions(+), 2 deletions(-)
 create mode 100644 migration/multifd-qatzip.c

diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 1a396521d5..d8e48dcb0e 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -658,7 +658,11 @@ const PropertyInfo qdev_prop_fdc_drive_type = {
 const PropertyInfo qdev_prop_multifd_compression = {
     .name = "MultiFDCompression",
     .description = "multifd_compression values, "
-                   "none/zlib/zstd",
+                   "none/zlib/zstd"
+#ifdef CONFIG_QATZIP
+                   "/qatzip"
+#endif
+                   ,
     .enum_table = &MultiFDCompression_lookup,
     .get = qdev_propinfo_get_enum,
     .set = qdev_propinfo_set_enum,
diff --git a/migration/meson.build b/migration/meson.build
index 92b1cc4297..e20f318379 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -40,6 +40,7 @@ if get_option('live_block_migration').allowed()
   system_ss.add(files('block.c'))
 endif
 system_ss.add(when: zstd, if_true: files('multifd-zstd.c'))
+system_ss.add(when: qatzip, if_true: files('multifd-qatzip.c'))
 
 specific_ss.add(when: 'CONFIG_SYSTEM_ONLY',
                 if_true: files('ram.c',
diff --git a/migration/multifd-qatzip.c b/migration/multifd-qatzip.c
new file mode 100644
index 0000000000..1733bbddb7
--- /dev/null
+++ b/migration/multifd-qatzip.c
@@ -0,0 +1,81 @@
+/*
+ * Multifd QATzip compression implementation
+ *
+ * Copyright (c) Bytedance
+ *
+ * Authors:
+ *  Bryan Zhang <bryan.zhang@bytedance.com>
+ *  Hao Xiang   <hao.xiang@bytedance.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "exec/ramblock.h"
+#include "exec/target_page.h"
+#include "qapi/error.h"
+#include "migration.h"
+#include "options.h"
+#include "multifd.h"
+
+static int qatzip_send_setup(MultiFDSendParams *p, Error **errp)
+{
+    return 0;
+}
+
+static void qatzip_send_cleanup(MultiFDSendParams *p, Error **errp) {};
+
+static int qatzip_send_prepare(MultiFDSendParams *p, Error **errp)
+{
+    MultiFDPages_t *pages = p->pages;
+
+    for (int i = 0; i < p->normal_num; i++) {
+        p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i];
+        p->iov[p->iovs_num].iov_len = p->page_size;
+        p->iovs_num++;
+    }
+
+    p->next_packet_size = p->normal_num * p->page_size;
+    p->flags |= MULTIFD_FLAG_NOCOMP;
+    return 0;
+}
+
+static int qatzip_recv_setup(MultiFDRecvParams *p, Error **errp)
+{
+    return 0;
+}
+
+static void qatzip_recv_cleanup(MultiFDRecvParams *p) {};
+
+static int qatzip_recv_pages(MultiFDRecvParams *p, Error **errp)
+{
+    uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
+
+    if (flags != MULTIFD_FLAG_NOCOMP) {
+        error_setg(errp, "multifd %u: flags received %x flags expected %x",
+                   p->id, flags, MULTIFD_FLAG_NOCOMP);
+        return -1;
+    }
+    for (int i = 0; i < p->normal_num; i++) {
+        p->iov[i].iov_base = p->host + p->normal[i];
+        p->iov[i].iov_len = p->page_size;
+    }
+    return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp);
+}
+
+static MultiFDMethods multifd_qatzip_ops = {
+    .send_setup = qatzip_send_setup,
+    .send_cleanup = qatzip_send_cleanup,
+    .send_prepare = qatzip_send_prepare,
+    .recv_setup = qatzip_recv_setup,
+    .recv_cleanup = qatzip_recv_cleanup,
+    .recv_pages = qatzip_recv_pages
+};
+
+static void multifd_qatzip_register(void)
+{
+    multifd_register_ops(MULTIFD_COMPRESSION_QATZIP, &multifd_qatzip_ops);
+}
+
+migration_init(multifd_qatzip_register);
diff --git a/migration/multifd.h b/migration/multifd.h
index a835643b48..5600f7fc82 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -33,6 +33,7 @@ int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset);
 #define MULTIFD_FLAG_NOCOMP (0 << 1)
 #define MULTIFD_FLAG_ZLIB (1 << 1)
 #define MULTIFD_FLAG_ZSTD (2 << 1)
+#define MULTIFD_FLAG_QATZIP (3 << 1)
 
 /* This value needs to be a multiple of qemu_target_page_size() */
 #define MULTIFD_PACKET_SIZE (512 * 1024)
diff --git a/qapi/migration.json b/qapi/migration.json
index 6d5a4b0489..e3cc195aed 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -625,11 +625,14 @@
 #
 # @zstd: use zstd compression method.
 #
+# @qatzip: use qatzip compression method.
+#
 # Since: 5.0
 ##
 { 'enum': 'MultiFDCompression',
   'data': [ 'none', 'zlib',
-            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] }
+            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' },
+            { 'name': 'qatzip', 'if': 'CONFIG_QATZIP'} ] }
 
 ##
 # @MigMode:
-- 
2.30.2

Re: [PATCH 3/5] migration: Introduce unimplemented 'qatzip' compression method

Posted by Fabiano Rosas 10 months, 3 weeks ago

Bryan Zhang <bryan.zhang@bytedance.com> writes:

+cc Yuan Liu, Daniel Berrangé

> Adds support for 'qatzip' as an option for the multifd compression
> method parameter, but copy-pastes the no-op logic to leave the actual
> methods effectively unimplemented. This is in preparation of a
> subsequent commit that will implement actually using QAT for compression
> and decompression.
>
> Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com>
> Signed-off-by: Hao Xiang <hao.xiang@bytedance.com>
> ---
>  hw/core/qdev-properties-system.c |  6 ++-
>  migration/meson.build            |  1 +
>  migration/multifd-qatzip.c       | 81 ++++++++++++++++++++++++++++++++
>  migration/multifd.h              |  1 +
>  qapi/migration.json              |  5 +-
>  5 files changed, 92 insertions(+), 2 deletions(-)
>  create mode 100644 migration/multifd-qatzip.c
>
> diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
> index 1a396521d5..d8e48dcb0e 100644
> --- a/hw/core/qdev-properties-system.c
> +++ b/hw/core/qdev-properties-system.c
> @@ -658,7 +658,11 @@ const PropertyInfo qdev_prop_fdc_drive_type = {
>  const PropertyInfo qdev_prop_multifd_compression = {
>      .name = "MultiFDCompression",
>      .description = "multifd_compression values, "
> -                   "none/zlib/zstd",
> +                   "none/zlib/zstd"
> +#ifdef CONFIG_QATZIP
> +                   "/qatzip"
> +#endif
> +                   ,
>      .enum_table = &MultiFDCompression_lookup,
>      .get = qdev_propinfo_get_enum,
>      .set = qdev_propinfo_set_enum,
> diff --git a/migration/meson.build b/migration/meson.build
> index 92b1cc4297..e20f318379 100644
> --- a/migration/meson.build
> +++ b/migration/meson.build
> @@ -40,6 +40,7 @@ if get_option('live_block_migration').allowed()
>    system_ss.add(files('block.c'))
>  endif
>  system_ss.add(when: zstd, if_true: files('multifd-zstd.c'))
> +system_ss.add(when: qatzip, if_true: files('multifd-qatzip.c'))
>  
>  specific_ss.add(when: 'CONFIG_SYSTEM_ONLY',
>                  if_true: files('ram.c',
> diff --git a/migration/multifd-qatzip.c b/migration/multifd-qatzip.c
> new file mode 100644
> index 0000000000..1733bbddb7
> --- /dev/null
> +++ b/migration/multifd-qatzip.c
> @@ -0,0 +1,81 @@
> +/*
> + * Multifd QATzip compression implementation
> + *
> + * Copyright (c) Bytedance
> + *
> + * Authors:
> + *  Bryan Zhang <bryan.zhang@bytedance.com>
> + *  Hao Xiang   <hao.xiang@bytedance.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "exec/ramblock.h"
> +#include "exec/target_page.h"
> +#include "qapi/error.h"
> +#include "migration.h"
> +#include "options.h"
> +#include "multifd.h"
> +
> +static int qatzip_send_setup(MultiFDSendParams *p, Error **errp)
> +{
> +    return 0;
> +}
> +
> +static void qatzip_send_cleanup(MultiFDSendParams *p, Error **errp) {};
> +
> +static int qatzip_send_prepare(MultiFDSendParams *p, Error **errp)
> +{
> +    MultiFDPages_t *pages = p->pages;
> +
> +    for (int i = 0; i < p->normal_num; i++) {
> +        p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i];
> +        p->iov[p->iovs_num].iov_len = p->page_size;
> +        p->iovs_num++;
> +    }
> +
> +    p->next_packet_size = p->normal_num * p->page_size;
> +    p->flags |= MULTIFD_FLAG_NOCOMP;
> +    return 0;
> +}
> +
> +static int qatzip_recv_setup(MultiFDRecvParams *p, Error **errp)
> +{
> +    return 0;
> +}
> +
> +static void qatzip_recv_cleanup(MultiFDRecvParams *p) {};
> +
> +static int qatzip_recv_pages(MultiFDRecvParams *p, Error **errp)
> +{
> +    uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
> +
> +    if (flags != MULTIFD_FLAG_NOCOMP) {
> +        error_setg(errp, "multifd %u: flags received %x flags expected %x",
> +                   p->id, flags, MULTIFD_FLAG_NOCOMP);
> +        return -1;
> +    }
> +    for (int i = 0; i < p->normal_num; i++) {
> +        p->iov[i].iov_base = p->host + p->normal[i];
> +        p->iov[i].iov_len = p->page_size;
> +    }
> +    return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp);
> +}
> +
> +static MultiFDMethods multifd_qatzip_ops = {
> +    .send_setup = qatzip_send_setup,
> +    .send_cleanup = qatzip_send_cleanup,
> +    .send_prepare = qatzip_send_prepare,
> +    .recv_setup = qatzip_recv_setup,
> +    .recv_cleanup = qatzip_recv_cleanup,
> +    .recv_pages = qatzip_recv_pages
> +};
> +
> +static void multifd_qatzip_register(void)
> +{
> +    multifd_register_ops(MULTIFD_COMPRESSION_QATZIP, &multifd_qatzip_ops);
> +}
> +
> +migration_init(multifd_qatzip_register);
> diff --git a/migration/multifd.h b/migration/multifd.h
> index a835643b48..5600f7fc82 100644
> --- a/migration/multifd.h
> +++ b/migration/multifd.h
> @@ -33,6 +33,7 @@ int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset);
>  #define MULTIFD_FLAG_NOCOMP (0 << 1)
>  #define MULTIFD_FLAG_ZLIB (1 << 1)
>  #define MULTIFD_FLAG_ZSTD (2 << 1)
> +#define MULTIFD_FLAG_QATZIP (3 << 1)
>  
>  /* This value needs to be a multiple of qemu_target_page_size() */
>  #define MULTIFD_PACKET_SIZE (512 * 1024)
> diff --git a/qapi/migration.json b/qapi/migration.json
> index 6d5a4b0489..e3cc195aed 100644
> --- a/qapi/migration.json
> +++ b/qapi/migration.json
> @@ -625,11 +625,14 @@
>  #
>  # @zstd: use zstd compression method.
>  #
> +# @qatzip: use qatzip compression method.
> +#
>  # Since: 5.0
>  ##
>  { 'enum': 'MultiFDCompression',
>    'data': [ 'none', 'zlib',
> -            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] }
> +            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' },
> +            { 'name': 'qatzip', 'if': 'CONFIG_QATZIP'} ] }

In another thread adding support to another Intel accelerator (IAA) we
decided that it was better to select the offloading as an accelerator
method to multifd zlib rather than as an entirely new compression
format. Take a look at that discussion:
https://lore.kernel.org/r/ZTFCnqbbqlmsUkRC@redhat.com

As I understand it, QAT + QATzip would be compatible with both zlib and
IAA + QPL, so we'd add another accelerator method like this:

https://lore.kernel.org/r/20240103112851.908082-3-yuan1.liu@intel.com

All that, of course, assuming we even want to support both
accelerators. They're addressing the same problem after all. I wonder
how we'd choose a precedence, since both seem to be present in the same
processor family.

RE: [PATCH 3/5] migration: Introduce unimplemented 'qatzip' compression method

Posted by Liu, Yuan1 10 months, 3 weeks ago

> -----Original Message-----
> From: Fabiano Rosas <farosas@suse.de>
> Sent: Saturday, January 6, 2024 4:07 AM
> To: Bryan Zhang <bryan.zhang@bytedance.com>; qemu-devel@nongnu.org;
> marcandre.lureau@redhat.com; peterx@redhat.com; quintela@redhat.com;
> peter.maydell@linaro.org; hao.xiang@bytedance.com
> Cc: bryan.zhang@bytedance.com; Liu, Yuan1 <yuan1.liu@intel.com>;
> berrange@redhat.com
> Subject: Re: [PATCH 3/5] migration: Introduce unimplemented 'qatzip'
> compression method
> 
> Bryan Zhang <bryan.zhang@bytedance.com> writes:
> 
> +cc Yuan Liu, Daniel Berrangé
> 
> > Adds support for 'qatzip' as an option for the multifd compression
> > method parameter, but copy-pastes the no-op logic to leave the actual
> > methods effectively unimplemented. This is in preparation of a
> > subsequent commit that will implement actually using QAT for
> > compression and decompression.
> >
> > Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com>
> > Signed-off-by: Hao Xiang <hao.xiang@bytedance.com>
> > ---
> >  hw/core/qdev-properties-system.c |  6 ++-
> >  migration/meson.build            |  1 +
> >  migration/multifd-qatzip.c       | 81 ++++++++++++++++++++++++++++++++
> >  migration/multifd.h              |  1 +
> >  qapi/migration.json              |  5 +-
> >  5 files changed, 92 insertions(+), 2 deletions(-)  create mode 100644
> > migration/multifd-qatzip.c
> >
> > diff --git a/hw/core/qdev-properties-system.c
> > b/hw/core/qdev-properties-system.c
> > index 1a396521d5..d8e48dcb0e 100644
> > --- a/hw/core/qdev-properties-system.c
> > +++ b/hw/core/qdev-properties-system.c
> > @@ -658,7 +658,11 @@ const PropertyInfo qdev_prop_fdc_drive_type = {
> > const PropertyInfo qdev_prop_multifd_compression = {
> >      .name = "MultiFDCompression",
> >      .description = "multifd_compression values, "
> > -                   "none/zlib/zstd",
> > +                   "none/zlib/zstd"
> > +#ifdef CONFIG_QATZIP
> > +                   "/qatzip"
> > +#endif
> > +                   ,
> >      .enum_table = &MultiFDCompression_lookup,
> >      .get = qdev_propinfo_get_enum,
> >      .set = qdev_propinfo_set_enum,
> > diff --git a/migration/meson.build b/migration/meson.build index
> > 92b1cc4297..e20f318379 100644
> > --- a/migration/meson.build
> > +++ b/migration/meson.build
> > @@ -40,6 +40,7 @@ if get_option('live_block_migration').allowed()
> >    system_ss.add(files('block.c'))
> >  endif
> >  system_ss.add(when: zstd, if_true: files('multifd-zstd.c'))
> > +system_ss.add(when: qatzip, if_true: files('multifd-qatzip.c'))
> >
> >  specific_ss.add(when: 'CONFIG_SYSTEM_ONLY',
> >                  if_true: files('ram.c', diff --git
> > a/migration/multifd-qatzip.c b/migration/multifd-qatzip.c new file
> > mode 100644 index 0000000000..1733bbddb7
> > --- /dev/null
> > +++ b/migration/multifd-qatzip.c
> > @@ -0,0 +1,81 @@
> > +/*
> > + * Multifd QATzip compression implementation
> > + *
> > + * Copyright (c) Bytedance
> > + *
> > + * Authors:
> > + *  Bryan Zhang <bryan.zhang@bytedance.com>
> > + *  Hao Xiang   <hao.xiang@bytedance.com>
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or
> later.
> > + * See the COPYING file in the top-level directory.
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "exec/ramblock.h"
> > +#include "exec/target_page.h"
> > +#include "qapi/error.h"
> > +#include "migration.h"
> > +#include "options.h"
> > +#include "multifd.h"
> > +
> > +static int qatzip_send_setup(MultiFDSendParams *p, Error **errp) {
> > +    return 0;
> > +}
> > +
> > +static void qatzip_send_cleanup(MultiFDSendParams *p, Error **errp)
> > +{};
> > +
> > +static int qatzip_send_prepare(MultiFDSendParams *p, Error **errp) {
> > +    MultiFDPages_t *pages = p->pages;
> > +
> > +    for (int i = 0; i < p->normal_num; i++) {
> > +        p->iov[p->iovs_num].iov_base = pages->block->host + p-
> >normal[i];
> > +        p->iov[p->iovs_num].iov_len = p->page_size;
> > +        p->iovs_num++;
> > +    }
> > +
> > +    p->next_packet_size = p->normal_num * p->page_size;
> > +    p->flags |= MULTIFD_FLAG_NOCOMP;
> > +    return 0;
> > +}
> > +
> > +static int qatzip_recv_setup(MultiFDRecvParams *p, Error **errp) {
> > +    return 0;
> > +}
> > +
> > +static void qatzip_recv_cleanup(MultiFDRecvParams *p) {};
> > +
> > +static int qatzip_recv_pages(MultiFDRecvParams *p, Error **errp) {
> > +    uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
> > +
> > +    if (flags != MULTIFD_FLAG_NOCOMP) {
> > +        error_setg(errp, "multifd %u: flags received %x flags
> expected %x",
> > +                   p->id, flags, MULTIFD_FLAG_NOCOMP);
> > +        return -1;
> > +    }
> > +    for (int i = 0; i < p->normal_num; i++) {
> > +        p->iov[i].iov_base = p->host + p->normal[i];
> > +        p->iov[i].iov_len = p->page_size;
> > +    }
> > +    return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp);
> > +}
> > +
> > +static MultiFDMethods multifd_qatzip_ops = {
> > +    .send_setup = qatzip_send_setup,
> > +    .send_cleanup = qatzip_send_cleanup,
> > +    .send_prepare = qatzip_send_prepare,
> > +    .recv_setup = qatzip_recv_setup,
> > +    .recv_cleanup = qatzip_recv_cleanup,
> > +    .recv_pages = qatzip_recv_pages
> > +};
> > +
> > +static void multifd_qatzip_register(void) {
> > +    multifd_register_ops(MULTIFD_COMPRESSION_QATZIP,
> > +&multifd_qatzip_ops); }
> > +
> > +migration_init(multifd_qatzip_register);
> > diff --git a/migration/multifd.h b/migration/multifd.h index
> > a835643b48..5600f7fc82 100644
> > --- a/migration/multifd.h
> > +++ b/migration/multifd.h
> > @@ -33,6 +33,7 @@ int multifd_queue_page(QEMUFile *f, RAMBlock *block,
> > ram_addr_t offset);  #define MULTIFD_FLAG_NOCOMP (0 << 1)  #define
> > MULTIFD_FLAG_ZLIB (1 << 1)  #define MULTIFD_FLAG_ZSTD (2 << 1)
> > +#define MULTIFD_FLAG_QATZIP (3 << 1)
> >
> >  /* This value needs to be a multiple of qemu_target_page_size() */
> > #define MULTIFD_PACKET_SIZE (512 * 1024) diff --git
> > a/qapi/migration.json b/qapi/migration.json index
> > 6d5a4b0489..e3cc195aed 100644
> > --- a/qapi/migration.json
> > +++ b/qapi/migration.json
> > @@ -625,11 +625,14 @@
> >  #
> >  # @zstd: use zstd compression method.
> >  #
> > +# @qatzip: use qatzip compression method.
> > +#
> >  # Since: 5.0
> >  ##
> >  { 'enum': 'MultiFDCompression',
> >    'data': [ 'none', 'zlib',
> > -            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] }
> > +            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' },
> > +            { 'name': 'qatzip', 'if': 'CONFIG_QATZIP'} ] }
> 
> In another thread adding support to another Intel accelerator (IAA) we
> decided that it was better to select the offloading as an accelerator
> method to multifd zlib rather than as an entirely new compression format.
> Take a look at that discussion:
> https://lore.kernel.org/r/ZTFCnqbbqlmsUkRC@redhat.com
> 
> As I understand it, QAT + QATzip would be compatible with both zlib and
> IAA + QPL, so we'd add another accelerator method like this:
> 
> https://lore.kernel.org/r/20240103112851.908082-3-yuan1.liu@intel.com
> 
> All that, of course, assuming we even want to support both accelerators.
> They're addressing the same problem after all. I wonder how we'd choose a
> precedence, since both seem to be present in the same processor family.


I agree that IAA and QAT should run under the accelerator framework and be 
compatible with zlib.

Regarding the choice between QAT and IAA:

1. This decision can be determined by the end customer through live migration 
parameters. The Xeon products have different specifications, which will impact 
the presence and quantity of IAA and QAT devices.

2. If the customer's chosen product includes both IAA and QAT, the live migration
software can list the priority of accelerators supporting the zlib compression 
algorithm.

Re: [External] Re: [PATCH 3/5] migration: Introduce unimplemented 'qatzip' compression method

Posted by Hao Xiang 10 months, 3 weeks ago

On Fri, Jan 5, 2024 at 12:07 PM Fabiano Rosas <farosas@suse.de> wrote:
>
> Bryan Zhang <bryan.zhang@bytedance.com> writes:
>
> +cc Yuan Liu, Daniel Berrangé
>
> > Adds support for 'qatzip' as an option for the multifd compression
> > method parameter, but copy-pastes the no-op logic to leave the actual
> > methods effectively unimplemented. This is in preparation of a
> > subsequent commit that will implement actually using QAT for compression
> > and decompression.
> >
> > Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com>
> > Signed-off-by: Hao Xiang <hao.xiang@bytedance.com>
> > ---
> >  hw/core/qdev-properties-system.c |  6 ++-
> >  migration/meson.build            |  1 +
> >  migration/multifd-qatzip.c       | 81 ++++++++++++++++++++++++++++++++
> >  migration/multifd.h              |  1 +
> >  qapi/migration.json              |  5 +-
> >  5 files changed, 92 insertions(+), 2 deletions(-)
> >  create mode 100644 migration/multifd-qatzip.c
> >
> > diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
> > index 1a396521d5..d8e48dcb0e 100644
> > --- a/hw/core/qdev-properties-system.c
> > +++ b/hw/core/qdev-properties-system.c
> > @@ -658,7 +658,11 @@ const PropertyInfo qdev_prop_fdc_drive_type = {
> >  const PropertyInfo qdev_prop_multifd_compression = {
> >      .name = "MultiFDCompression",
> >      .description = "multifd_compression values, "
> > -                   "none/zlib/zstd",
> > +                   "none/zlib/zstd"
> > +#ifdef CONFIG_QATZIP
> > +                   "/qatzip"
> > +#endif
> > +                   ,
> >      .enum_table = &MultiFDCompression_lookup,
> >      .get = qdev_propinfo_get_enum,
> >      .set = qdev_propinfo_set_enum,
> > diff --git a/migration/meson.build b/migration/meson.build
> > index 92b1cc4297..e20f318379 100644
> > --- a/migration/meson.build
> > +++ b/migration/meson.build
> > @@ -40,6 +40,7 @@ if get_option('live_block_migration').allowed()
> >    system_ss.add(files('block.c'))
> >  endif
> >  system_ss.add(when: zstd, if_true: files('multifd-zstd.c'))
> > +system_ss.add(when: qatzip, if_true: files('multifd-qatzip.c'))
> >
> >  specific_ss.add(when: 'CONFIG_SYSTEM_ONLY',
> >                  if_true: files('ram.c',
> > diff --git a/migration/multifd-qatzip.c b/migration/multifd-qatzip.c
> > new file mode 100644
> > index 0000000000..1733bbddb7
> > --- /dev/null
> > +++ b/migration/multifd-qatzip.c
> > @@ -0,0 +1,81 @@
> > +/*
> > + * Multifd QATzip compression implementation
> > + *
> > + * Copyright (c) Bytedance
> > + *
> > + * Authors:
> > + *  Bryan Zhang <bryan.zhang@bytedance.com>
> > + *  Hao Xiang   <hao.xiang@bytedance.com>
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> > + * See the COPYING file in the top-level directory.
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "exec/ramblock.h"
> > +#include "exec/target_page.h"
> > +#include "qapi/error.h"
> > +#include "migration.h"
> > +#include "options.h"
> > +#include "multifd.h"
> > +
> > +static int qatzip_send_setup(MultiFDSendParams *p, Error **errp)
> > +{
> > +    return 0;
> > +}
> > +
> > +static void qatzip_send_cleanup(MultiFDSendParams *p, Error **errp) {};
> > +
> > +static int qatzip_send_prepare(MultiFDSendParams *p, Error **errp)
> > +{
> > +    MultiFDPages_t *pages = p->pages;
> > +
> > +    for (int i = 0; i < p->normal_num; i++) {
> > +        p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i];
> > +        p->iov[p->iovs_num].iov_len = p->page_size;
> > +        p->iovs_num++;
> > +    }
> > +
> > +    p->next_packet_size = p->normal_num * p->page_size;
> > +    p->flags |= MULTIFD_FLAG_NOCOMP;
> > +    return 0;
> > +}
> > +
> > +static int qatzip_recv_setup(MultiFDRecvParams *p, Error **errp)
> > +{
> > +    return 0;
> > +}
> > +
> > +static void qatzip_recv_cleanup(MultiFDRecvParams *p) {};
> > +
> > +static int qatzip_recv_pages(MultiFDRecvParams *p, Error **errp)
> > +{
> > +    uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
> > +
> > +    if (flags != MULTIFD_FLAG_NOCOMP) {
> > +        error_setg(errp, "multifd %u: flags received %x flags expected %x",
> > +                   p->id, flags, MULTIFD_FLAG_NOCOMP);
> > +        return -1;
> > +    }
> > +    for (int i = 0; i < p->normal_num; i++) {
> > +        p->iov[i].iov_base = p->host + p->normal[i];
> > +        p->iov[i].iov_len = p->page_size;
> > +    }
> > +    return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp);
> > +}
> > +
> > +static MultiFDMethods multifd_qatzip_ops = {
> > +    .send_setup = qatzip_send_setup,
> > +    .send_cleanup = qatzip_send_cleanup,
> > +    .send_prepare = qatzip_send_prepare,
> > +    .recv_setup = qatzip_recv_setup,
> > +    .recv_cleanup = qatzip_recv_cleanup,
> > +    .recv_pages = qatzip_recv_pages
> > +};
> > +
> > +static void multifd_qatzip_register(void)
> > +{
> > +    multifd_register_ops(MULTIFD_COMPRESSION_QATZIP, &multifd_qatzip_ops);
> > +}
> > +
> > +migration_init(multifd_qatzip_register);
> > diff --git a/migration/multifd.h b/migration/multifd.h
> > index a835643b48..5600f7fc82 100644
> > --- a/migration/multifd.h
> > +++ b/migration/multifd.h
> > @@ -33,6 +33,7 @@ int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset);
> >  #define MULTIFD_FLAG_NOCOMP (0 << 1)
> >  #define MULTIFD_FLAG_ZLIB (1 << 1)
> >  #define MULTIFD_FLAG_ZSTD (2 << 1)
> > +#define MULTIFD_FLAG_QATZIP (3 << 1)
> >
> >  /* This value needs to be a multiple of qemu_target_page_size() */
> >  #define MULTIFD_PACKET_SIZE (512 * 1024)
> > diff --git a/qapi/migration.json b/qapi/migration.json
> > index 6d5a4b0489..e3cc195aed 100644
> > --- a/qapi/migration.json
> > +++ b/qapi/migration.json
> > @@ -625,11 +625,14 @@
> >  #
> >  # @zstd: use zstd compression method.
> >  #
> > +# @qatzip: use qatzip compression method.
> > +#
> >  # Since: 5.0
> >  ##
> >  { 'enum': 'MultiFDCompression',
> >    'data': [ 'none', 'zlib',
> > -            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] }
> > +            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' },
> > +            { 'name': 'qatzip', 'if': 'CONFIG_QATZIP'} ] }
>
> In another thread adding support to another Intel accelerator (IAA) we
> decided that it was better to select the offloading as an accelerator
> method to multifd zlib rather than as an entirely new compression
> format. Take a look at that discussion:
> https://lore.kernel.org/r/ZTFCnqbbqlmsUkRC@redhat.com

We had some early discussion with Intel folks (probably a different
team than the one with the above patchset). The understanding at the
time is that QAT is good at both bulk data compression and
decompression. IAA is good at decompression with smaller data size but
compression performance is not the best. In multifd, we are
compressing up to 128 4k pages at a time and potentially this can
increase by configuring the packet size, at the time we thought QAT
could be a better fit in the multifd live migration scenario. We would
like to hear more from Intel.

From our benchmark testing, with two QAT devices, we can get deflate
compression throughout to around 7GB/s with ~160% CPU. That's beating
the current software implementation (zlib and zstd) by a lot. We are
still tuning the configuration in QEMU live migration now.

>
> As I understand it, QAT + QATzip would be compatible with both zlib and
> IAA + QPL, so we'd add another accelerator method like this:
>
> https://lore.kernel.org/r/20240103112851.908082-3-yuan1.liu@intel.com
>

I quickly read over the IAA patchset and I saw this:

"However, due to some reasons, QPL is currently
not compatible with the existing Zlib method that Zlib compressed data
can be decompressed by QPl and vice versa."

The above probably means the current zlib software implementation and
IAA are not compatible.

For QAT, although, both Intel's QATzip and zlib are internally using
deflate, QATzip only supports deflate with a 4 byte header, deflate
wrapped by Gzip header and footer, or deflate wrapped by Intel® QAT
Gzip* extension header and footer. None of the headers can be
recognized by zlib software implementation is my understanding. So if
we want to make them compatible with zlib, the QATzip library needs to
support that.

> All that, of course, assuming we even want to support both
> accelerators. They're addressing the same problem after all. I wonder
> how we'd choose a precedence, since both seem to be present in the same
> processor family.
>
>

That's an interesting question :-) I think overall performance
(throughput and CPU overhead) should both be considered. IAA and QAT
accelerators don't present on all systems. We Bytedance choose to have
both on our platform when purchasing from Intel.

RE: [External] Re: [PATCH 3/5] migration: Introduce unimplemented 'qatzip' compression method

Posted by Liu, Yuan1 10 months, 3 weeks ago

> -----Original Message-----
> From: Hao Xiang <hao.xiang@bytedance.com>
> Sent: Saturday, January 6, 2024 7:53 AM
> To: Fabiano Rosas <farosas@suse.de>
> Cc: Bryan Zhang <bryan.zhang@bytedance.com>; qemu-devel@nongnu.org;
> marcandre.lureau@redhat.com; peterx@redhat.com; quintela@redhat.com;
> peter.maydell@linaro.org; Liu, Yuan1 <yuan1.liu@intel.com>;
> berrange@redhat.com
> Subject: Re: [External] Re: [PATCH 3/5] migration: Introduce unimplemented
> 'qatzip' compression method
> 
> On Fri, Jan 5, 2024 at 12:07 PM Fabiano Rosas <farosas@suse.de> wrote:
> >
> > Bryan Zhang <bryan.zhang@bytedance.com> writes:
> >
> > +cc Yuan Liu, Daniel Berrangé
> >
> > > Adds support for 'qatzip' as an option for the multifd compression
> > > method parameter, but copy-pastes the no-op logic to leave the
> > > actual methods effectively unimplemented. This is in preparation of
> > > a subsequent commit that will implement actually using QAT for
> > > compression and decompression.
> > >
> > > Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com>
> > > Signed-off-by: Hao Xiang <hao.xiang@bytedance.com>
> > > ---
> > >  hw/core/qdev-properties-system.c |  6 ++-
> > >  migration/meson.build            |  1 +
> > >  migration/multifd-qatzip.c       | 81
> ++++++++++++++++++++++++++++++++
> > >  migration/multifd.h              |  1 +
> > >  qapi/migration.json              |  5 +-
> > >  5 files changed, 92 insertions(+), 2 deletions(-)  create mode
> > > 100644 migration/multifd-qatzip.c
> > >
> > > diff --git a/hw/core/qdev-properties-system.c
> > > b/hw/core/qdev-properties-system.c
> > > index 1a396521d5..d8e48dcb0e 100644
> > > --- a/hw/core/qdev-properties-system.c
> > > +++ b/hw/core/qdev-properties-system.c
> > > @@ -658,7 +658,11 @@ const PropertyInfo qdev_prop_fdc_drive_type = {
> > > const PropertyInfo qdev_prop_multifd_compression = {
> > >      .name = "MultiFDCompression",
> > >      .description = "multifd_compression values, "
> > > -                   "none/zlib/zstd",
> > > +                   "none/zlib/zstd"
> > > +#ifdef CONFIG_QATZIP
> > > +                   "/qatzip"
> > > +#endif
> > > +                   ,
> > >      .enum_table = &MultiFDCompression_lookup,
> > >      .get = qdev_propinfo_get_enum,
> > >      .set = qdev_propinfo_set_enum,
> > > diff --git a/migration/meson.build b/migration/meson.build index
> > > 92b1cc4297..e20f318379 100644
> > > --- a/migration/meson.build
> > > +++ b/migration/meson.build
> > > @@ -40,6 +40,7 @@ if get_option('live_block_migration').allowed()
> > >    system_ss.add(files('block.c'))
> > >  endif
> > >  system_ss.add(when: zstd, if_true: files('multifd-zstd.c'))
> > > +system_ss.add(when: qatzip, if_true: files('multifd-qatzip.c'))
> > >
> > >  specific_ss.add(when: 'CONFIG_SYSTEM_ONLY',
> > >                  if_true: files('ram.c', diff --git
> > > a/migration/multifd-qatzip.c b/migration/multifd-qatzip.c new file
> > > mode 100644 index 0000000000..1733bbddb7
> > > --- /dev/null
> > > +++ b/migration/multifd-qatzip.c
> > > @@ -0,0 +1,81 @@
> > > +/*
> > > + * Multifd QATzip compression implementation
> > > + *
> > > + * Copyright (c) Bytedance
> > > + *
> > > + * Authors:
> > > + *  Bryan Zhang <bryan.zhang@bytedance.com>
> > > + *  Hao Xiang   <hao.xiang@bytedance.com>
> > > + *
> > > + * This work is licensed under the terms of the GNU GPL, version 2 or
> later.
> > > + * See the COPYING file in the top-level directory.
> > > + */
> > > +
> > > +#include "qemu/osdep.h"
> > > +#include "exec/ramblock.h"
> > > +#include "exec/target_page.h"
> > > +#include "qapi/error.h"
> > > +#include "migration.h"
> > > +#include "options.h"
> > > +#include "multifd.h"
> > > +
> > > +static int qatzip_send_setup(MultiFDSendParams *p, Error **errp) {
> > > +    return 0;
> > > +}
> > > +
> > > +static void qatzip_send_cleanup(MultiFDSendParams *p, Error **errp)
> > > +{};
> > > +
> > > +static int qatzip_send_prepare(MultiFDSendParams *p, Error **errp)
> > > +{
> > > +    MultiFDPages_t *pages = p->pages;
> > > +
> > > +    for (int i = 0; i < p->normal_num; i++) {
> > > +        p->iov[p->iovs_num].iov_base = pages->block->host + p-
> >normal[i];
> > > +        p->iov[p->iovs_num].iov_len = p->page_size;
> > > +        p->iovs_num++;
> > > +    }
> > > +
> > > +    p->next_packet_size = p->normal_num * p->page_size;
> > > +    p->flags |= MULTIFD_FLAG_NOCOMP;
> > > +    return 0;
> > > +}
> > > +
> > > +static int qatzip_recv_setup(MultiFDRecvParams *p, Error **errp) {
> > > +    return 0;
> > > +}
> > > +
> > > +static void qatzip_recv_cleanup(MultiFDRecvParams *p) {};
> > > +
> > > +static int qatzip_recv_pages(MultiFDRecvParams *p, Error **errp) {
> > > +    uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
> > > +
> > > +    if (flags != MULTIFD_FLAG_NOCOMP) {
> > > +        error_setg(errp, "multifd %u: flags received %x flags
> expected %x",
> > > +                   p->id, flags, MULTIFD_FLAG_NOCOMP);
> > > +        return -1;
> > > +    }
> > > +    for (int i = 0; i < p->normal_num; i++) {
> > > +        p->iov[i].iov_base = p->host + p->normal[i];
> > > +        p->iov[i].iov_len = p->page_size;
> > > +    }
> > > +    return qio_channel_readv_all(p->c, p->iov, p->normal_num,
> > > +errp); }
> > > +
> > > +static MultiFDMethods multifd_qatzip_ops = {
> > > +    .send_setup = qatzip_send_setup,
> > > +    .send_cleanup = qatzip_send_cleanup,
> > > +    .send_prepare = qatzip_send_prepare,
> > > +    .recv_setup = qatzip_recv_setup,
> > > +    .recv_cleanup = qatzip_recv_cleanup,
> > > +    .recv_pages = qatzip_recv_pages };
> > > +
> > > +static void multifd_qatzip_register(void) {
> > > +    multifd_register_ops(MULTIFD_COMPRESSION_QATZIP,
> > > +&multifd_qatzip_ops); }
> > > +
> > > +migration_init(multifd_qatzip_register);
> > > diff --git a/migration/multifd.h b/migration/multifd.h index
> > > a835643b48..5600f7fc82 100644
> > > --- a/migration/multifd.h
> > > +++ b/migration/multifd.h
> > > @@ -33,6 +33,7 @@ int multifd_queue_page(QEMUFile *f, RAMBlock
> > > *block, ram_addr_t offset);  #define MULTIFD_FLAG_NOCOMP (0 << 1)
> > > #define MULTIFD_FLAG_ZLIB (1 << 1)  #define MULTIFD_FLAG_ZSTD (2 <<
> > > 1)
> > > +#define MULTIFD_FLAG_QATZIP (3 << 1)
> > >
> > >  /* This value needs to be a multiple of qemu_target_page_size() */
> > > #define MULTIFD_PACKET_SIZE (512 * 1024) diff --git
> > > a/qapi/migration.json b/qapi/migration.json index
> > > 6d5a4b0489..e3cc195aed 100644
> > > --- a/qapi/migration.json
> > > +++ b/qapi/migration.json
> > > @@ -625,11 +625,14 @@
> > >  #
> > >  # @zstd: use zstd compression method.
> > >  #
> > > +# @qatzip: use qatzip compression method.
> > > +#
> > >  # Since: 5.0
> > >  ##
> > >  { 'enum': 'MultiFDCompression',
> > >    'data': [ 'none', 'zlib',
> > > -            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] }
> > > +            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' },
> > > +            { 'name': 'qatzip', 'if': 'CONFIG_QATZIP'} ] }
> >
> > In another thread adding support to another Intel accelerator (IAA) we
> > decided that it was better to select the offloading as an accelerator
> > method to multifd zlib rather than as an entirely new compression
> > format. Take a look at that discussion:
> > https://lore.kernel.org/r/ZTFCnqbbqlmsUkRC@redhat.com
> 
> We had some early discussion with Intel folks (probably a different team
> than the one with the above patchset). The understanding at the time is
> that QAT is good at both bulk data compression and decompression. IAA is
> good at decompression with smaller data size but compression performance
> is not the best. In multifd, we are compressing up to 128 4k pages at a
> time and potentially this can increase by configuring the packet size, at
> the time we thought QAT could be a better fit in the multifd live
> migration scenario. We would like to hear more from Intel.
> 
> From our benchmark testing, with two QAT devices, we can get deflate
> compression throughout to around 7GB/s with ~160% CPU. That's beating the
> current software implementation (zlib and zstd) by a lot. We are still
> tuning the configuration in QEMU live migration now.
> 
> >
> > As I understand it, QAT + QATzip would be compatible with both zlib
> > and IAA + QPL, so we'd add another accelerator method like this:
> >
> > https://lore.kernel.org/r/20240103112851.908082-3-yuan1.liu@intel.com
> >
> 
> I quickly read over the IAA patchset and I saw this:
> 
> "However, due to some reasons, QPL is currently not compatible with the
> existing Zlib method that Zlib compressed data can be decompressed by QPl
> and vice versa."
> 
> The above probably means the current zlib software implementation and IAA
> are not compatible.
> 
> For QAT, although, both Intel's QATzip and zlib are internally using
> deflate, QATzip only supports deflate with a 4 byte header, deflate
> wrapped by Gzip header and footer, or deflate wrapped by Intel® QAT
> Gzip* extension header and footer. None of the headers can be recognized
> by zlib software implementation is my understanding. So if we want to make
> them compatible with zlib, the QATzip library needs to support that.

The QPL library currently cannot support the Z_SYNC_FLULSH operation of zlib steaming. 
This is the reason why it is not compatible with zlib.

> > All that, of course, assuming we even want to support both
> > accelerators. They're addressing the same problem after all. I wonder
> > how we'd choose a precedence, since both seem to be present in the
> > same processor family.
> >
> >
> 
> That's an interesting question :-) I think overall performance (throughput
> and CPU overhead) should both be considered. IAA and QAT accelerators
> don't present on all systems. We Bytedance choose to have both on our
> platform when purchasing from Intel.

RE: [External] Re: [PATCH 3/5] migration: Introduce unimplemented 'qatzip' compression method

Posted by Fabiano Rosas 10 months, 3 weeks ago

"Liu, Yuan1" <yuan1.liu@intel.com> writes:

>> -----Original Message-----
>> From: Hao Xiang <hao.xiang@bytedance.com>
>> Sent: Saturday, January 6, 2024 7:53 AM
>> To: Fabiano Rosas <farosas@suse.de>
>> Cc: Bryan Zhang <bryan.zhang@bytedance.com>; qemu-devel@nongnu.org;
>> marcandre.lureau@redhat.com; peterx@redhat.com; quintela@redhat.com;
>> peter.maydell@linaro.org; Liu, Yuan1 <yuan1.liu@intel.com>;
>> berrange@redhat.com
>> Subject: Re: [External] Re: [PATCH 3/5] migration: Introduce unimplemented
>> 'qatzip' compression method
>> 
>> On Fri, Jan 5, 2024 at 12:07 PM Fabiano Rosas <farosas@suse.de> wrote:
>> >
>> > Bryan Zhang <bryan.zhang@bytedance.com> writes:
>> >
>> > +cc Yuan Liu, Daniel Berrangé
>> >
>> > > Adds support for 'qatzip' as an option for the multifd compression
>> > > method parameter, but copy-pastes the no-op logic to leave the
>> > > actual methods effectively unimplemented. This is in preparation of
>> > > a subsequent commit that will implement actually using QAT for
>> > > compression and decompression.
>> > >
>> > > Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com>
>> > > Signed-off-by: Hao Xiang <hao.xiang@bytedance.com>
>> > > ---
>> > >  hw/core/qdev-properties-system.c |  6 ++-
>> > >  migration/meson.build            |  1 +
>> > >  migration/multifd-qatzip.c       | 81
>> ++++++++++++++++++++++++++++++++
>> > >  migration/multifd.h              |  1 +
>> > >  qapi/migration.json              |  5 +-
>> > >  5 files changed, 92 insertions(+), 2 deletions(-)  create mode
>> > > 100644 migration/multifd-qatzip.c
>> > >
>> > > diff --git a/hw/core/qdev-properties-system.c
>> > > b/hw/core/qdev-properties-system.c
>> > > index 1a396521d5..d8e48dcb0e 100644
>> > > --- a/hw/core/qdev-properties-system.c
>> > > +++ b/hw/core/qdev-properties-system.c
>> > > @@ -658,7 +658,11 @@ const PropertyInfo qdev_prop_fdc_drive_type = {
>> > > const PropertyInfo qdev_prop_multifd_compression = {
>> > >      .name = "MultiFDCompression",
>> > >      .description = "multifd_compression values, "
>> > > -                   "none/zlib/zstd",
>> > > +                   "none/zlib/zstd"
>> > > +#ifdef CONFIG_QATZIP
>> > > +                   "/qatzip"
>> > > +#endif
>> > > +                   ,
>> > >      .enum_table = &MultiFDCompression_lookup,
>> > >      .get = qdev_propinfo_get_enum,
>> > >      .set = qdev_propinfo_set_enum,
>> > > diff --git a/migration/meson.build b/migration/meson.build index
>> > > 92b1cc4297..e20f318379 100644
>> > > --- a/migration/meson.build
>> > > +++ b/migration/meson.build
>> > > @@ -40,6 +40,7 @@ if get_option('live_block_migration').allowed()
>> > >    system_ss.add(files('block.c'))
>> > >  endif
>> > >  system_ss.add(when: zstd, if_true: files('multifd-zstd.c'))
>> > > +system_ss.add(when: qatzip, if_true: files('multifd-qatzip.c'))
>> > >
>> > >  specific_ss.add(when: 'CONFIG_SYSTEM_ONLY',
>> > >                  if_true: files('ram.c', diff --git
>> > > a/migration/multifd-qatzip.c b/migration/multifd-qatzip.c new file
>> > > mode 100644 index 0000000000..1733bbddb7
>> > > --- /dev/null
>> > > +++ b/migration/multifd-qatzip.c
>> > > @@ -0,0 +1,81 @@
>> > > +/*
>> > > + * Multifd QATzip compression implementation
>> > > + *
>> > > + * Copyright (c) Bytedance
>> > > + *
>> > > + * Authors:
>> > > + *  Bryan Zhang <bryan.zhang@bytedance.com>
>> > > + *  Hao Xiang   <hao.xiang@bytedance.com>
>> > > + *
>> > > + * This work is licensed under the terms of the GNU GPL, version 2 or
>> later.
>> > > + * See the COPYING file in the top-level directory.
>> > > + */
>> > > +
>> > > +#include "qemu/osdep.h"
>> > > +#include "exec/ramblock.h"
>> > > +#include "exec/target_page.h"
>> > > +#include "qapi/error.h"
>> > > +#include "migration.h"
>> > > +#include "options.h"
>> > > +#include "multifd.h"
>> > > +
>> > > +static int qatzip_send_setup(MultiFDSendParams *p, Error **errp) {
>> > > +    return 0;
>> > > +}
>> > > +
>> > > +static void qatzip_send_cleanup(MultiFDSendParams *p, Error **errp)
>> > > +{};
>> > > +
>> > > +static int qatzip_send_prepare(MultiFDSendParams *p, Error **errp)
>> > > +{
>> > > +    MultiFDPages_t *pages = p->pages;
>> > > +
>> > > +    for (int i = 0; i < p->normal_num; i++) {
>> > > +        p->iov[p->iovs_num].iov_base = pages->block->host + p-
>> >normal[i];
>> > > +        p->iov[p->iovs_num].iov_len = p->page_size;
>> > > +        p->iovs_num++;
>> > > +    }
>> > > +
>> > > +    p->next_packet_size = p->normal_num * p->page_size;
>> > > +    p->flags |= MULTIFD_FLAG_NOCOMP;
>> > > +    return 0;
>> > > +}
>> > > +
>> > > +static int qatzip_recv_setup(MultiFDRecvParams *p, Error **errp) {
>> > > +    return 0;
>> > > +}
>> > > +
>> > > +static void qatzip_recv_cleanup(MultiFDRecvParams *p) {};
>> > > +
>> > > +static int qatzip_recv_pages(MultiFDRecvParams *p, Error **errp) {
>> > > +    uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
>> > > +
>> > > +    if (flags != MULTIFD_FLAG_NOCOMP) {
>> > > +        error_setg(errp, "multifd %u: flags received %x flags
>> expected %x",
>> > > +                   p->id, flags, MULTIFD_FLAG_NOCOMP);
>> > > +        return -1;
>> > > +    }
>> > > +    for (int i = 0; i < p->normal_num; i++) {
>> > > +        p->iov[i].iov_base = p->host + p->normal[i];
>> > > +        p->iov[i].iov_len = p->page_size;
>> > > +    }
>> > > +    return qio_channel_readv_all(p->c, p->iov, p->normal_num,
>> > > +errp); }
>> > > +
>> > > +static MultiFDMethods multifd_qatzip_ops = {
>> > > +    .send_setup = qatzip_send_setup,
>> > > +    .send_cleanup = qatzip_send_cleanup,
>> > > +    .send_prepare = qatzip_send_prepare,
>> > > +    .recv_setup = qatzip_recv_setup,
>> > > +    .recv_cleanup = qatzip_recv_cleanup,
>> > > +    .recv_pages = qatzip_recv_pages };
>> > > +
>> > > +static void multifd_qatzip_register(void) {
>> > > +    multifd_register_ops(MULTIFD_COMPRESSION_QATZIP,
>> > > +&multifd_qatzip_ops); }
>> > > +
>> > > +migration_init(multifd_qatzip_register);
>> > > diff --git a/migration/multifd.h b/migration/multifd.h index
>> > > a835643b48..5600f7fc82 100644
>> > > --- a/migration/multifd.h
>> > > +++ b/migration/multifd.h
>> > > @@ -33,6 +33,7 @@ int multifd_queue_page(QEMUFile *f, RAMBlock
>> > > *block, ram_addr_t offset);  #define MULTIFD_FLAG_NOCOMP (0 << 1)
>> > > #define MULTIFD_FLAG_ZLIB (1 << 1)  #define MULTIFD_FLAG_ZSTD (2 <<
>> > > 1)
>> > > +#define MULTIFD_FLAG_QATZIP (3 << 1)
>> > >
>> > >  /* This value needs to be a multiple of qemu_target_page_size() */
>> > > #define MULTIFD_PACKET_SIZE (512 * 1024) diff --git
>> > > a/qapi/migration.json b/qapi/migration.json index
>> > > 6d5a4b0489..e3cc195aed 100644
>> > > --- a/qapi/migration.json
>> > > +++ b/qapi/migration.json
>> > > @@ -625,11 +625,14 @@
>> > >  #
>> > >  # @zstd: use zstd compression method.
>> > >  #
>> > > +# @qatzip: use qatzip compression method.
>> > > +#
>> > >  # Since: 5.0
>> > >  ##
>> > >  { 'enum': 'MultiFDCompression',
>> > >    'data': [ 'none', 'zlib',
>> > > -            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] }
>> > > +            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' },
>> > > +            { 'name': 'qatzip', 'if': 'CONFIG_QATZIP'} ] }
>> >
>> > In another thread adding support to another Intel accelerator (IAA) we
>> > decided that it was better to select the offloading as an accelerator
>> > method to multifd zlib rather than as an entirely new compression
>> > format. Take a look at that discussion:
>> > https://lore.kernel.org/r/ZTFCnqbbqlmsUkRC@redhat.com
>> 
>> We had some early discussion with Intel folks (probably a different team
>> than the one with the above patchset). The understanding at the time is
>> that QAT is good at both bulk data compression and decompression. IAA is
>> good at decompression with smaller data size but compression performance
>> is not the best. In multifd, we are compressing up to 128 4k pages at a
>> time and potentially this can increase by configuring the packet size, at
>> the time we thought QAT could be a better fit in the multifd live
>> migration scenario. We would like to hear more from Intel.
>> 
>> From our benchmark testing, with two QAT devices, we can get deflate
>> compression throughout to around 7GB/s with ~160% CPU. That's beating the
>> current software implementation (zlib and zstd) by a lot. We are still
>> tuning the configuration in QEMU live migration now.
>> 
>> >
>> > As I understand it, QAT + QATzip would be compatible with both zlib
>> > and IAA + QPL, so we'd add another accelerator method like this:
>> >
>> > https://lore.kernel.org/r/20240103112851.908082-3-yuan1.liu@intel.com
>> >
>> 
>> I quickly read over the IAA patchset and I saw this:
>> 
>> "However, due to some reasons, QPL is currently not compatible with the
>> existing Zlib method that Zlib compressed data can be decompressed by QPl
>> and vice versa."
>> 
>> The above probably means the current zlib software implementation and IAA
>> are not compatible.
>> 
>> For QAT, although, both Intel's QATzip and zlib are internally using
>> deflate, QATzip only supports deflate with a 4 byte header, deflate
>> wrapped by Gzip header and footer, or deflate wrapped by Intel® QAT
>> Gzip* extension header and footer. None of the headers can be recognized
>> by zlib software implementation is my understanding. So if we want to make
>> them compatible with zlib, the QATzip library needs to support that.
>
> The QPL library currently cannot support the Z_SYNC_FLULSH operation of zlib steaming. 
> This is the reason why it is not compatible with zlib.
>

I had understood from previous discussion that we'd be able to at least
support compression with QPL and decompression with the existing
zlib-based code. Is that not correct? I was about to suggest
experimenting with the window size in the existing code to hopefully
solve the 4kb window size issue. If there are other limitations, then
that will not be enough.

Also, can you point to the source of that statement about Z_SYNC_FLUSH,
I couldn't find more information about it in the documentation.

>> > All that, of course, assuming we even want to support both
>> > accelerators. They're addressing the same problem after all. I wonder
>> > how we'd choose a precedence, since both seem to be present in the
>> > same processor family.
>> >
>> >
>> 
>> That's an interesting question :-) I think overall performance (throughput
>> and CPU overhead) should both be considered. IAA and QAT accelerators
>> don't present on all systems. We Bytedance choose to have both on our
>> platform when purchasing from Intel.

Re: [External] Re: [PATCH 3/5] migration: Introduce unimplemented 'qatzip' compression method

Posted by Hao Xiang 10 months, 2 weeks ago

On Mon, Jan 8, 2024 at 12:28 PM Fabiano Rosas <farosas@suse.de> wrote:
>
> "Liu, Yuan1" <yuan1.liu@intel.com> writes:
>
> >> -----Original Message-----
> >> From: Hao Xiang <hao.xiang@bytedance.com>
> >> Sent: Saturday, January 6, 2024 7:53 AM
> >> To: Fabiano Rosas <farosas@suse.de>
> >> Cc: Bryan Zhang <bryan.zhang@bytedance.com>; qemu-devel@nongnu.org;
> >> marcandre.lureau@redhat.com; peterx@redhat.com; quintela@redhat.com;
> >> peter.maydell@linaro.org; Liu, Yuan1 <yuan1.liu@intel.com>;
> >> berrange@redhat.com
> >> Subject: Re: [External] Re: [PATCH 3/5] migration: Introduce unimplemented
> >> 'qatzip' compression method
> >>
> >> On Fri, Jan 5, 2024 at 12:07 PM Fabiano Rosas <farosas@suse.de> wrote:
> >> >
> >> > Bryan Zhang <bryan.zhang@bytedance.com> writes:
> >> >
> >> > +cc Yuan Liu, Daniel Berrangé
> >> >
> >> > > Adds support for 'qatzip' as an option for the multifd compression
> >> > > method parameter, but copy-pastes the no-op logic to leave the
> >> > > actual methods effectively unimplemented. This is in preparation of
> >> > > a subsequent commit that will implement actually using QAT for
> >> > > compression and decompression.
> >> > >
> >> > > Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com>
> >> > > Signed-off-by: Hao Xiang <hao.xiang@bytedance.com>
> >> > > ---
> >> > >  hw/core/qdev-properties-system.c |  6 ++-
> >> > >  migration/meson.build            |  1 +
> >> > >  migration/multifd-qatzip.c       | 81
> >> ++++++++++++++++++++++++++++++++
> >> > >  migration/multifd.h              |  1 +
> >> > >  qapi/migration.json              |  5 +-
> >> > >  5 files changed, 92 insertions(+), 2 deletions(-)  create mode
> >> > > 100644 migration/multifd-qatzip.c
> >> > >
> >> > > diff --git a/hw/core/qdev-properties-system.c
> >> > > b/hw/core/qdev-properties-system.c
> >> > > index 1a396521d5..d8e48dcb0e 100644
> >> > > --- a/hw/core/qdev-properties-system.c
> >> > > +++ b/hw/core/qdev-properties-system.c
> >> > > @@ -658,7 +658,11 @@ const PropertyInfo qdev_prop_fdc_drive_type = {
> >> > > const PropertyInfo qdev_prop_multifd_compression = {
> >> > >      .name = "MultiFDCompression",
> >> > >      .description = "multifd_compression values, "
> >> > > -                   "none/zlib/zstd",
> >> > > +                   "none/zlib/zstd"
> >> > > +#ifdef CONFIG_QATZIP
> >> > > +                   "/qatzip"
> >> > > +#endif
> >> > > +                   ,
> >> > >      .enum_table = &MultiFDCompression_lookup,
> >> > >      .get = qdev_propinfo_get_enum,
> >> > >      .set = qdev_propinfo_set_enum,
> >> > > diff --git a/migration/meson.build b/migration/meson.build index
> >> > > 92b1cc4297..e20f318379 100644
> >> > > --- a/migration/meson.build
> >> > > +++ b/migration/meson.build
> >> > > @@ -40,6 +40,7 @@ if get_option('live_block_migration').allowed()
> >> > >    system_ss.add(files('block.c'))
> >> > >  endif
> >> > >  system_ss.add(when: zstd, if_true: files('multifd-zstd.c'))
> >> > > +system_ss.add(when: qatzip, if_true: files('multifd-qatzip.c'))
> >> > >
> >> > >  specific_ss.add(when: 'CONFIG_SYSTEM_ONLY',
> >> > >                  if_true: files('ram.c', diff --git
> >> > > a/migration/multifd-qatzip.c b/migration/multifd-qatzip.c new file
> >> > > mode 100644 index 0000000000..1733bbddb7
> >> > > --- /dev/null
> >> > > +++ b/migration/multifd-qatzip.c
> >> > > @@ -0,0 +1,81 @@
> >> > > +/*
> >> > > + * Multifd QATzip compression implementation
> >> > > + *
> >> > > + * Copyright (c) Bytedance
> >> > > + *
> >> > > + * Authors:
> >> > > + *  Bryan Zhang <bryan.zhang@bytedance.com>
> >> > > + *  Hao Xiang   <hao.xiang@bytedance.com>
> >> > > + *
> >> > > + * This work is licensed under the terms of the GNU GPL, version 2 or
> >> later.
> >> > > + * See the COPYING file in the top-level directory.
> >> > > + */
> >> > > +
> >> > > +#include "qemu/osdep.h"
> >> > > +#include "exec/ramblock.h"
> >> > > +#include "exec/target_page.h"
> >> > > +#include "qapi/error.h"
> >> > > +#include "migration.h"
> >> > > +#include "options.h"
> >> > > +#include "multifd.h"
> >> > > +
> >> > > +static int qatzip_send_setup(MultiFDSendParams *p, Error **errp) {
> >> > > +    return 0;
> >> > > +}
> >> > > +
> >> > > +static void qatzip_send_cleanup(MultiFDSendParams *p, Error **errp)
> >> > > +{};
> >> > > +
> >> > > +static int qatzip_send_prepare(MultiFDSendParams *p, Error **errp)
> >> > > +{
> >> > > +    MultiFDPages_t *pages = p->pages;
> >> > > +
> >> > > +    for (int i = 0; i < p->normal_num; i++) {
> >> > > +        p->iov[p->iovs_num].iov_base = pages->block->host + p-
> >> >normal[i];
> >> > > +        p->iov[p->iovs_num].iov_len = p->page_size;
> >> > > +        p->iovs_num++;
> >> > > +    }
> >> > > +
> >> > > +    p->next_packet_size = p->normal_num * p->page_size;
> >> > > +    p->flags |= MULTIFD_FLAG_NOCOMP;
> >> > > +    return 0;
> >> > > +}
> >> > > +
> >> > > +static int qatzip_recv_setup(MultiFDRecvParams *p, Error **errp) {
> >> > > +    return 0;
> >> > > +}
> >> > > +
> >> > > +static void qatzip_recv_cleanup(MultiFDRecvParams *p) {};
> >> > > +
> >> > > +static int qatzip_recv_pages(MultiFDRecvParams *p, Error **errp) {
> >> > > +    uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
> >> > > +
> >> > > +    if (flags != MULTIFD_FLAG_NOCOMP) {
> >> > > +        error_setg(errp, "multifd %u: flags received %x flags
> >> expected %x",
> >> > > +                   p->id, flags, MULTIFD_FLAG_NOCOMP);
> >> > > +        return -1;
> >> > > +    }
> >> > > +    for (int i = 0; i < p->normal_num; i++) {
> >> > > +        p->iov[i].iov_base = p->host + p->normal[i];
> >> > > +        p->iov[i].iov_len = p->page_size;
> >> > > +    }
> >> > > +    return qio_channel_readv_all(p->c, p->iov, p->normal_num,
> >> > > +errp); }
> >> > > +
> >> > > +static MultiFDMethods multifd_qatzip_ops = {
> >> > > +    .send_setup = qatzip_send_setup,
> >> > > +    .send_cleanup = qatzip_send_cleanup,
> >> > > +    .send_prepare = qatzip_send_prepare,
> >> > > +    .recv_setup = qatzip_recv_setup,
> >> > > +    .recv_cleanup = qatzip_recv_cleanup,
> >> > > +    .recv_pages = qatzip_recv_pages };
> >> > > +
> >> > > +static void multifd_qatzip_register(void) {
> >> > > +    multifd_register_ops(MULTIFD_COMPRESSION_QATZIP,
> >> > > +&multifd_qatzip_ops); }
> >> > > +
> >> > > +migration_init(multifd_qatzip_register);
> >> > > diff --git a/migration/multifd.h b/migration/multifd.h index
> >> > > a835643b48..5600f7fc82 100644
> >> > > --- a/migration/multifd.h
> >> > > +++ b/migration/multifd.h
> >> > > @@ -33,6 +33,7 @@ int multifd_queue_page(QEMUFile *f, RAMBlock
> >> > > *block, ram_addr_t offset);  #define MULTIFD_FLAG_NOCOMP (0 << 1)
> >> > > #define MULTIFD_FLAG_ZLIB (1 << 1)  #define MULTIFD_FLAG_ZSTD (2 <<
> >> > > 1)
> >> > > +#define MULTIFD_FLAG_QATZIP (3 << 1)
> >> > >
> >> > >  /* This value needs to be a multiple of qemu_target_page_size() */
> >> > > #define MULTIFD_PACKET_SIZE (512 * 1024) diff --git
> >> > > a/qapi/migration.json b/qapi/migration.json index
> >> > > 6d5a4b0489..e3cc195aed 100644
> >> > > --- a/qapi/migration.json
> >> > > +++ b/qapi/migration.json
> >> > > @@ -625,11 +625,14 @@
> >> > >  #
> >> > >  # @zstd: use zstd compression method.
> >> > >  #
> >> > > +# @qatzip: use qatzip compression method.
> >> > > +#
> >> > >  # Since: 5.0
> >> > >  ##
> >> > >  { 'enum': 'MultiFDCompression',
> >> > >    'data': [ 'none', 'zlib',
> >> > > -            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] }
> >> > > +            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' },
> >> > > +            { 'name': 'qatzip', 'if': 'CONFIG_QATZIP'} ] }
> >> >
> >> > In another thread adding support to another Intel accelerator (IAA) we
> >> > decided that it was better to select the offloading as an accelerator
> >> > method to multifd zlib rather than as an entirely new compression
> >> > format. Take a look at that discussion:
> >> > https://lore.kernel.org/r/ZTFCnqbbqlmsUkRC@redhat.com
> >>
> >> We had some early discussion with Intel folks (probably a different team
> >> than the one with the above patchset). The understanding at the time is
> >> that QAT is good at both bulk data compression and decompression. IAA is
> >> good at decompression with smaller data size but compression performance
> >> is not the best. In multifd, we are compressing up to 128 4k pages at a
> >> time and potentially this can increase by configuring the packet size, at
> >> the time we thought QAT could be a better fit in the multifd live
> >> migration scenario. We would like to hear more from Intel.
> >>
> >> From our benchmark testing, with two QAT devices, we can get deflate
> >> compression throughout to around 7GB/s with ~160% CPU. That's beating the
> >> current software implementation (zlib and zstd) by a lot. We are still
> >> tuning the configuration in QEMU live migration now.
> >>
> >> >
> >> > As I understand it, QAT + QATzip would be compatible with both zlib
> >> > and IAA + QPL, so we'd add another accelerator method like this:
> >> >
> >> > https://lore.kernel.org/r/20240103112851.908082-3-yuan1.liu@intel.com
> >> >
> >>
> >> I quickly read over the IAA patchset and I saw this:
> >>
> >> "However, due to some reasons, QPL is currently not compatible with the
> >> existing Zlib method that Zlib compressed data can be decompressed by QPl
> >> and vice versa."
> >>
> >> The above probably means the current zlib software implementation and IAA
> >> are not compatible.
> >>
> >> For QAT, although, both Intel's QATzip and zlib are internally using
> >> deflate, QATzip only supports deflate with a 4 byte header, deflate
> >> wrapped by Gzip header and footer, or deflate wrapped by Intel® QAT
> >> Gzip* extension header and footer. None of the headers can be recognized
> >> by zlib software implementation is my understanding. So if we want to make
> >> them compatible with zlib, the QATzip library needs to support that.
> >
> > The QPL library currently cannot support the Z_SYNC_FLULSH operation of zlib steaming.
> > This is the reason why it is not compatible with zlib.
> >

When doing the QPL compression, if we set both QPL_FLAG_FIRST and
QPL_FLAG_LAST flags and run a single job for all pages in a multifd
packet, would it generate the same format as Z_SYNC_FLULSH in the
current zlib-based code?

>
> I had understood from previous discussion that we'd be able to at least
> support compression with QPL and decompression with the existing
> zlib-based code. Is that not correct? I was about to suggest
> experimenting with the window size in the existing code to hopefully
> solve the 4kb window size issue. If there are other limitations, then
> that will not be enough.

>
> Also, can you point to the source of that statement about Z_SYNC_FLUSH,
> I couldn't find more information about it in the documentation.
>
> >> > All that, of course, assuming we even want to support both
> >> > accelerators. They're addressing the same problem after all. I wonder
> >> > how we'd choose a precedence, since both seem to be present in the
> >> > same processor family.
> >> >
> >> >
> >>
> >> That's an interesting question :-) I think overall performance (throughput
> >> and CPU overhead) should both be considered. IAA and QAT accelerators
> >> don't present on all systems. We Bytedance choose to have both on our
> >> platform when purchasing from Intel.

RE: [External] Re: [PATCH 3/5] migration: Introduce unimplemented 'qatzip' compression method

Posted by Liu, Yuan1 10 months, 2 weeks ago

> -----Original Message-----
> From: Hao Xiang <hao.xiang@bytedance.com>
> Sent: Thursday, January 11, 2024 2:40 PM
> To: Fabiano Rosas <farosas@suse.de>
> Cc: Liu, Yuan1 <yuan1.liu@intel.com>; Bryan Zhang
> <bryan.zhang@bytedance.com>; qemu-devel@nongnu.org;
> marcandre.lureau@redhat.com; peterx@redhat.com; quintela@redhat.com;
> peter.maydell@linaro.org; berrange@redhat.com
> Subject: Re: [External] Re: [PATCH 3/5] migration: Introduce unimplemented
> 'qatzip' compression method
> 
> On Mon, Jan 8, 2024 at 12:28 PM Fabiano Rosas <farosas@suse.de> wrote:
> >
> > "Liu, Yuan1" <yuan1.liu@intel.com> writes:
> >
> > >> -----Original Message-----
> > >> From: Hao Xiang <hao.xiang@bytedance.com>
> > >> Sent: Saturday, January 6, 2024 7:53 AM
> > >> To: Fabiano Rosas <farosas@suse.de>
> > >> Cc: Bryan Zhang <bryan.zhang@bytedance.com>; qemu-devel@nongnu.org;
> > >> marcandre.lureau@redhat.com; peterx@redhat.com;
> > >> quintela@redhat.com; peter.maydell@linaro.org; Liu, Yuan1
> > >> <yuan1.liu@intel.com>; berrange@redhat.com
> > >> Subject: Re: [External] Re: [PATCH 3/5] migration: Introduce
> > >> unimplemented 'qatzip' compression method
> > >>
> > >> On Fri, Jan 5, 2024 at 12:07 PM Fabiano Rosas <farosas@suse.de>
> wrote:
> > >> >
> > >> > Bryan Zhang <bryan.zhang@bytedance.com> writes:
> > >> >
> > >> > +cc Yuan Liu, Daniel Berrangé
> > >> >
> > >> > > Adds support for 'qatzip' as an option for the multifd
> > >> > > compression method parameter, but copy-pastes the no-op logic
> > >> > > to leave the actual methods effectively unimplemented. This is
> > >> > > in preparation of a subsequent commit that will implement
> > >> > > actually using QAT for compression and decompression.
> > >> > >
> > >> > > Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com>
> > >> > > Signed-off-by: Hao Xiang <hao.xiang@bytedance.com>
> > >> > > ---
> > >> > >  hw/core/qdev-properties-system.c |  6 ++-
> > >> > >  migration/meson.build            |  1 +
> > >> > >  migration/multifd-qatzip.c       | 81
> > >> ++++++++++++++++++++++++++++++++
> > >> > >  migration/multifd.h              |  1 +
> > >> > >  qapi/migration.json              |  5 +-
> > >> > >  5 files changed, 92 insertions(+), 2 deletions(-)  create mode
> > >> > > 100644 migration/multifd-qatzip.c
> > >> > >
> > >> > > diff --git a/hw/core/qdev-properties-system.c
> > >> > > b/hw/core/qdev-properties-system.c
> > >> > > index 1a396521d5..d8e48dcb0e 100644
> > >> > > --- a/hw/core/qdev-properties-system.c
> > >> > > +++ b/hw/core/qdev-properties-system.c
> > >> > > @@ -658,7 +658,11 @@ const PropertyInfo
> > >> > > qdev_prop_fdc_drive_type = { const PropertyInfo
> qdev_prop_multifd_compression = {
> > >> > >      .name = "MultiFDCompression",
> > >> > >      .description = "multifd_compression values, "
> > >> > > -                   "none/zlib/zstd",
> > >> > > +                   "none/zlib/zstd"
> > >> > > +#ifdef CONFIG_QATZIP
> > >> > > +                   "/qatzip"
> > >> > > +#endif
> > >> > > +                   ,
> > >> > >      .enum_table = &MultiFDCompression_lookup,
> > >> > >      .get = qdev_propinfo_get_enum,
> > >> > >      .set = qdev_propinfo_set_enum, diff --git
> > >> > > a/migration/meson.build b/migration/meson.build index
> > >> > > 92b1cc4297..e20f318379 100644
> > >> > > --- a/migration/meson.build
> > >> > > +++ b/migration/meson.build
> > >> > > @@ -40,6 +40,7 @@ if get_option('live_block_migration').allowed()
> > >> > >    system_ss.add(files('block.c'))  endif
> > >> > >  system_ss.add(when: zstd, if_true: files('multifd-zstd.c'))
> > >> > > +system_ss.add(when: qatzip, if_true:
> > >> > > +files('multifd-qatzip.c'))
> > >> > >
> > >> > >  specific_ss.add(when: 'CONFIG_SYSTEM_ONLY',
> > >> > >                  if_true: files('ram.c', diff --git
> > >> > > a/migration/multifd-qatzip.c b/migration/multifd-qatzip.c new
> file
> > >> > > mode 100644 index 0000000000..1733bbddb7
> > >> > > --- /dev/null
> > >> > > +++ b/migration/multifd-qatzip.c
> > >> > > @@ -0,0 +1,81 @@
> > >> > > +/*
> > >> > > + * Multifd QATzip compression implementation
> > >> > > + *
> > >> > > + * Copyright (c) Bytedance
> > >> > > + *
> > >> > > + * Authors:
> > >> > > + *  Bryan Zhang <bryan.zhang@bytedance.com>
> > >> > > + *  Hao Xiang   <hao.xiang@bytedance.com>
> > >> > > + *
> > >> > > + * This work is licensed under the terms of the GNU GPL, version
> 2 or
> > >> later.
> > >> > > + * See the COPYING file in the top-level directory.
> > >> > > + */
> > >> > > +
> > >> > > +#include "qemu/osdep.h"
> > >> > > +#include "exec/ramblock.h"
> > >> > > +#include "exec/target_page.h"
> > >> > > +#include "qapi/error.h"
> > >> > > +#include "migration.h"
> > >> > > +#include "options.h"
> > >> > > +#include "multifd.h"
> > >> > > +
> > >> > > +static int qatzip_send_setup(MultiFDSendParams *p, Error **errp)
> {
> > >> > > +    return 0;
> > >> > > +}
> > >> > > +
> > >> > > +static void qatzip_send_cleanup(MultiFDSendParams *p, Error
> **errp)
> > >> > > +{};
> > >> > > +
> > >> > > +static int qatzip_send_prepare(MultiFDSendParams *p, Error
> **errp)
> > >> > > +{
> > >> > > +    MultiFDPages_t *pages = p->pages;
> > >> > > +
> > >> > > +    for (int i = 0; i < p->normal_num; i++) {
> > >> > > +        p->iov[p->iovs_num].iov_base = pages->block->host + p-
> > >> >normal[i];
> > >> > > +        p->iov[p->iovs_num].iov_len = p->page_size;
> > >> > > +        p->iovs_num++;
> > >> > > +    }
> > >> > > +
> > >> > > +    p->next_packet_size = p->normal_num * p->page_size;
> > >> > > +    p->flags |= MULTIFD_FLAG_NOCOMP;
> > >> > > +    return 0;
> > >> > > +}
> > >> > > +
> > >> > > +static int qatzip_recv_setup(MultiFDRecvParams *p, Error **errp)
> {
> > >> > > +    return 0;
> > >> > > +}
> > >> > > +
> > >> > > +static void qatzip_recv_cleanup(MultiFDRecvParams *p) {};
> > >> > > +
> > >> > > +static int qatzip_recv_pages(MultiFDRecvParams *p, Error **errp)
> {
> > >> > > +    uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
> > >> > > +
> > >> > > +    if (flags != MULTIFD_FLAG_NOCOMP) {
> > >> > > +        error_setg(errp, "multifd %u: flags received %x flags
> > >> expected %x",
> > >> > > +                   p->id, flags, MULTIFD_FLAG_NOCOMP);
> > >> > > +        return -1;
> > >> > > +    }
> > >> > > +    for (int i = 0; i < p->normal_num; i++) {
> > >> > > +        p->iov[i].iov_base = p->host + p->normal[i];
> > >> > > +        p->iov[i].iov_len = p->page_size;
> > >> > > +    }
> > >> > > +    return qio_channel_readv_all(p->c, p->iov, p->normal_num,
> > >> > > +errp); }
> > >> > > +
> > >> > > +static MultiFDMethods multifd_qatzip_ops = {
> > >> > > +    .send_setup = qatzip_send_setup,
> > >> > > +    .send_cleanup = qatzip_send_cleanup,
> > >> > > +    .send_prepare = qatzip_send_prepare,
> > >> > > +    .recv_setup = qatzip_recv_setup,
> > >> > > +    .recv_cleanup = qatzip_recv_cleanup,
> > >> > > +    .recv_pages = qatzip_recv_pages };
> > >> > > +
> > >> > > +static void multifd_qatzip_register(void) {
> > >> > > +    multifd_register_ops(MULTIFD_COMPRESSION_QATZIP,
> > >> > > +&multifd_qatzip_ops); }
> > >> > > +
> > >> > > +migration_init(multifd_qatzip_register);
> > >> > > diff --git a/migration/multifd.h b/migration/multifd.h index
> > >> > > a835643b48..5600f7fc82 100644
> > >> > > --- a/migration/multifd.h
> > >> > > +++ b/migration/multifd.h
> > >> > > @@ -33,6 +33,7 @@ int multifd_queue_page(QEMUFile *f, RAMBlock
> > >> > > *block, ram_addr_t offset);  #define MULTIFD_FLAG_NOCOMP (0 << 1)
> > >> > > #define MULTIFD_FLAG_ZLIB (1 << 1)  #define MULTIFD_FLAG_ZSTD (2
> <<
> > >> > > 1)
> > >> > > +#define MULTIFD_FLAG_QATZIP (3 << 1)
> > >> > >
> > >> > >  /* This value needs to be a multiple of qemu_target_page_size()
> */
> > >> > > #define MULTIFD_PACKET_SIZE (512 * 1024) diff --git
> > >> > > a/qapi/migration.json b/qapi/migration.json index
> > >> > > 6d5a4b0489..e3cc195aed 100644
> > >> > > --- a/qapi/migration.json
> > >> > > +++ b/qapi/migration.json
> > >> > > @@ -625,11 +625,14 @@
> > >> > >  #
> > >> > >  # @zstd: use zstd compression method.
> > >> > >  #
> > >> > > +# @qatzip: use qatzip compression method.
> > >> > > +#
> > >> > >  # Since: 5.0
> > >> > >  ##
> > >> > >  { 'enum': 'MultiFDCompression',
> > >> > >    'data': [ 'none', 'zlib',
> > >> > > -            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] }
> > >> > > +            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' },
> > >> > > +            { 'name': 'qatzip', 'if': 'CONFIG_QATZIP'} ] }
> > >> >
> > >> > In another thread adding support to another Intel accelerator (IAA)
> we
> > >> > decided that it was better to select the offloading as an
> accelerator
> > >> > method to multifd zlib rather than as an entirely new compression
> > >> > format. Take a look at that discussion:
> > >> > https://lore.kernel.org/r/ZTFCnqbbqlmsUkRC@redhat.com
> > >>
> > >> We had some early discussion with Intel folks (probably a different
> team
> > >> than the one with the above patchset). The understanding at the time
> is
> > >> that QAT is good at both bulk data compression and decompression. IAA
> is
> > >> good at decompression with smaller data size but compression
> performance
> > >> is not the best. In multifd, we are compressing up to 128 4k pages at
> a
> > >> time and potentially this can increase by configuring the packet
> size, at
> > >> the time we thought QAT could be a better fit in the multifd live
> > >> migration scenario. We would like to hear more from Intel.
> > >>
> > >> From our benchmark testing, with two QAT devices, we can get deflate
> > >> compression throughout to around 7GB/s with ~160% CPU. That's beating
> the
> > >> current software implementation (zlib and zstd) by a lot. We are
> still
> > >> tuning the configuration in QEMU live migration now.
> > >>
> > >> >
> > >> > As I understand it, QAT + QATzip would be compatible with both zlib
> > >> > and IAA + QPL, so we'd add another accelerator method like this:
> > >> >
> > >> > https://lore.kernel.org/r/20240103112851.908082-3-
> yuan1.liu@intel.com
> > >> >
> > >>
> > >> I quickly read over the IAA patchset and I saw this:
> > >>
> > >> "However, due to some reasons, QPL is currently not compatible with
> the
> > >> existing Zlib method that Zlib compressed data can be decompressed by
> QPl
> > >> and vice versa."
> > >>
> > >> The above probably means the current zlib software implementation and
> IAA
> > >> are not compatible.
> > >>
> > >> For QAT, although, both Intel's QATzip and zlib are internally using
> > >> deflate, QATzip only supports deflate with a 4 byte header, deflate
> > >> wrapped by Gzip header and footer, or deflate wrapped by Intel® QAT
> > >> Gzip* extension header and footer. None of the headers can be
> recognized
> > >> by zlib software implementation is my understanding. So if we want to
> make
> > >> them compatible with zlib, the QATzip library needs to support that.
> > >
> > > The QPL library currently cannot support the Z_SYNC_FLULSH operation
> of zlib steaming.
> > > This is the reason why it is not compatible with zlib.
> > >
> 
> When doing the QPL compression, if we set both QPL_FLAG_FIRST and
> QPL_FLAG_LAST flags and run a single job for all pages in a multifd
> packet, would it generate the same format as Z_SYNC_FLULSH in the
> current zlib-based code?

Yes, When we want to use an IAA physics engine for data compression,
We can use a QPL job and set the QPL_FLAG_FIRST and QPL_FLAG_LAST flags, 
which will generate compressed data according to the deflate method.

Z_SYNC_FLUSH not only flushes the buffer, but also maintains data 
alignment and maintains the current compression state. 
QPL is currently unable to do this.

> >
> > I had understood from previous discussion that we'd be able to at least
> > support compression with QPL and decompression with the existing
> > zlib-based code. Is that not correct? I was about to suggest
> > experimenting with the window size in the existing code to hopefully
> > solve the 4kb window size issue. If there are other limitations, then
> > that will not be enough.
> 
> >
> > Also, can you point to the source of that statement about Z_SYNC_FLUSH,
> > I couldn't find more information about it in the documentation.
> >
> > >> > All that, of course, assuming we even want to support both
> > >> > accelerators. They're addressing the same problem after all. I
> wonder
> > >> > how we'd choose a precedence, since both seem to be present in the
> > >> > same processor family.
> > >> >
> > >> >
> > >>
> > >> That's an interesting question :-) I think overall performance
> (throughput
> > >> and CPU overhead) should both be considered. IAA and QAT accelerators
> > >> don't present on all systems. We Bytedance choose to have both on our
> > >> platform when purchasing from Intel.

RE: [External] Re: [PATCH 3/5] migration: Introduce unimplemented 'qatzip' compression method

Posted by Liu, Yuan1 10 months, 3 weeks ago

> -----Original Message-----
> From: Fabiano Rosas <farosas@suse.de>
> Sent: Tuesday, January 9, 2024 4:28 AM
> To: Liu, Yuan1 <yuan1.liu@intel.com>; Hao Xiang <hao.xiang@bytedance.com>
> Cc: Bryan Zhang <bryan.zhang@bytedance.com>; qemu-devel@nongnu.org;
> marcandre.lureau@redhat.com; peterx@redhat.com; quintela@redhat.com;
> peter.maydell@linaro.org; berrange@redhat.com
> Subject: RE: [External] Re: [PATCH 3/5] migration: Introduce unimplemented
> 'qatzip' compression method
> 
> "Liu, Yuan1" <yuan1.liu@intel.com> writes:
> 
> >> -----Original Message-----
> >> From: Hao Xiang <hao.xiang@bytedance.com>
> >> Sent: Saturday, January 6, 2024 7:53 AM
> >> To: Fabiano Rosas <farosas@suse.de>
> >> Cc: Bryan Zhang <bryan.zhang@bytedance.com>; qemu-devel@nongnu.org;
> >> marcandre.lureau@redhat.com; peterx@redhat.com; quintela@redhat.com;
> >> peter.maydell@linaro.org; Liu, Yuan1 <yuan1.liu@intel.com>;
> >> berrange@redhat.com
> >> Subject: Re: [External] Re: [PATCH 3/5] migration: Introduce
> >> unimplemented 'qatzip' compression method
> >>
> >> On Fri, Jan 5, 2024 at 12:07 PM Fabiano Rosas <farosas@suse.de> wrote:
> >> >
> >> > Bryan Zhang <bryan.zhang@bytedance.com> writes:
> >> >
> >> > +cc Yuan Liu, Daniel Berrangé
> >> >
> >> > > Adds support for 'qatzip' as an option for the multifd
> >> > > compression method parameter, but copy-pastes the no-op logic to
> >> > > leave the actual methods effectively unimplemented. This is in
> >> > > preparation of a subsequent commit that will implement actually
> >> > > using QAT for compression and decompression.
> >> > >
> >> > > Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com>
> >> > > Signed-off-by: Hao Xiang <hao.xiang@bytedance.com>
> >> > > ---
> >> > >  hw/core/qdev-properties-system.c |  6 ++-
> >> > >  migration/meson.build            |  1 +
> >> > >  migration/multifd-qatzip.c       | 81
> >> ++++++++++++++++++++++++++++++++
> >> > >  migration/multifd.h              |  1 +
> >> > >  qapi/migration.json              |  5 +-
> >> > >  5 files changed, 92 insertions(+), 2 deletions(-)  create mode
> >> > > 100644 migration/multifd-qatzip.c
> >> > >
> >> > > diff --git a/hw/core/qdev-properties-system.c
> >> > > b/hw/core/qdev-properties-system.c
> >> > > index 1a396521d5..d8e48dcb0e 100644
> >> > > --- a/hw/core/qdev-properties-system.c
> >> > > +++ b/hw/core/qdev-properties-system.c
> >> > > @@ -658,7 +658,11 @@ const PropertyInfo qdev_prop_fdc_drive_type
> >> > > = { const PropertyInfo qdev_prop_multifd_compression = {
> >> > >      .name = "MultiFDCompression",
> >> > >      .description = "multifd_compression values, "
> >> > > -                   "none/zlib/zstd",
> >> > > +                   "none/zlib/zstd"
> >> > > +#ifdef CONFIG_QATZIP
> >> > > +                   "/qatzip"
> >> > > +#endif
> >> > > +                   ,
> >> > >      .enum_table = &MultiFDCompression_lookup,
> >> > >      .get = qdev_propinfo_get_enum,
> >> > >      .set = qdev_propinfo_set_enum, diff --git
> >> > > a/migration/meson.build b/migration/meson.build index
> >> > > 92b1cc4297..e20f318379 100644
> >> > > --- a/migration/meson.build
> >> > > +++ b/migration/meson.build
> >> > > @@ -40,6 +40,7 @@ if get_option('live_block_migration').allowed()
> >> > >    system_ss.add(files('block.c'))  endif
> >> > >  system_ss.add(when: zstd, if_true: files('multifd-zstd.c'))
> >> > > +system_ss.add(when: qatzip, if_true: files('multifd-qatzip.c'))
> >> > >
> >> > >  specific_ss.add(when: 'CONFIG_SYSTEM_ONLY',
> >> > >                  if_true: files('ram.c', diff --git
> >> > > a/migration/multifd-qatzip.c b/migration/multifd-qatzip.c new file
> >> > > mode 100644 index 0000000000..1733bbddb7
> >> > > --- /dev/null
> >> > > +++ b/migration/multifd-qatzip.c
> >> > > @@ -0,0 +1,81 @@
> >> > > +/*
> >> > > + * Multifd QATzip compression implementation
> >> > > + *
> >> > > + * Copyright (c) Bytedance
> >> > > + *
> >> > > + * Authors:
> >> > > + *  Bryan Zhang <bryan.zhang@bytedance.com>
> >> > > + *  Hao Xiang   <hao.xiang@bytedance.com>
> >> > > + *
> >> > > + * This work is licensed under the terms of the GNU GPL, version 2
> or
> >> later.
> >> > > + * See the COPYING file in the top-level directory.
> >> > > + */
> >> > > +
> >> > > +#include "qemu/osdep.h"
> >> > > +#include "exec/ramblock.h"
> >> > > +#include "exec/target_page.h"
> >> > > +#include "qapi/error.h"
> >> > > +#include "migration.h"
> >> > > +#include "options.h"
> >> > > +#include "multifd.h"
> >> > > +
> >> > > +static int qatzip_send_setup(MultiFDSendParams *p, Error **errp) {
> >> > > +    return 0;
> >> > > +}
> >> > > +
> >> > > +static void qatzip_send_cleanup(MultiFDSendParams *p, Error
> **errp)
> >> > > +{};
> >> > > +
> >> > > +static int qatzip_send_prepare(MultiFDSendParams *p, Error **errp)
> >> > > +{
> >> > > +    MultiFDPages_t *pages = p->pages;
> >> > > +
> >> > > +    for (int i = 0; i < p->normal_num; i++) {
> >> > > +        p->iov[p->iovs_num].iov_base = pages->block->host + p-
> >> >normal[i];
> >> > > +        p->iov[p->iovs_num].iov_len = p->page_size;
> >> > > +        p->iovs_num++;
> >> > > +    }
> >> > > +
> >> > > +    p->next_packet_size = p->normal_num * p->page_size;
> >> > > +    p->flags |= MULTIFD_FLAG_NOCOMP;
> >> > > +    return 0;
> >> > > +}
> >> > > +
> >> > > +static int qatzip_recv_setup(MultiFDRecvParams *p, Error **errp) {
> >> > > +    return 0;
> >> > > +}
> >> > > +
> >> > > +static void qatzip_recv_cleanup(MultiFDRecvParams *p) {};
> >> > > +
> >> > > +static int qatzip_recv_pages(MultiFDRecvParams *p, Error **errp) {
> >> > > +    uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
> >> > > +
> >> > > +    if (flags != MULTIFD_FLAG_NOCOMP) {
> >> > > +        error_setg(errp, "multifd %u: flags received %x flags
> >> expected %x",
> >> > > +                   p->id, flags, MULTIFD_FLAG_NOCOMP);
> >> > > +        return -1;
> >> > > +    }
> >> > > +    for (int i = 0; i < p->normal_num; i++) {
> >> > > +        p->iov[i].iov_base = p->host + p->normal[i];
> >> > > +        p->iov[i].iov_len = p->page_size;
> >> > > +    }
> >> > > +    return qio_channel_readv_all(p->c, p->iov, p->normal_num,
> >> > > +errp); }
> >> > > +
> >> > > +static MultiFDMethods multifd_qatzip_ops = {
> >> > > +    .send_setup = qatzip_send_setup,
> >> > > +    .send_cleanup = qatzip_send_cleanup,
> >> > > +    .send_prepare = qatzip_send_prepare,
> >> > > +    .recv_setup = qatzip_recv_setup,
> >> > > +    .recv_cleanup = qatzip_recv_cleanup,
> >> > > +    .recv_pages = qatzip_recv_pages };
> >> > > +
> >> > > +static void multifd_qatzip_register(void) {
> >> > > +    multifd_register_ops(MULTIFD_COMPRESSION_QATZIP,
> >> > > +&multifd_qatzip_ops); }
> >> > > +
> >> > > +migration_init(multifd_qatzip_register);
> >> > > diff --git a/migration/multifd.h b/migration/multifd.h index
> >> > > a835643b48..5600f7fc82 100644
> >> > > --- a/migration/multifd.h
> >> > > +++ b/migration/multifd.h
> >> > > @@ -33,6 +33,7 @@ int multifd_queue_page(QEMUFile *f, RAMBlock
> >> > > *block, ram_addr_t offset);  #define MULTIFD_FLAG_NOCOMP (0 << 1)
> >> > > #define MULTIFD_FLAG_ZLIB (1 << 1)  #define MULTIFD_FLAG_ZSTD (2 <<
> >> > > 1)
> >> > > +#define MULTIFD_FLAG_QATZIP (3 << 1)
> >> > >
> >> > >  /* This value needs to be a multiple of qemu_target_page_size() */
> >> > > #define MULTIFD_PACKET_SIZE (512 * 1024) diff --git
> >> > > a/qapi/migration.json b/qapi/migration.json index
> >> > > 6d5a4b0489..e3cc195aed 100644
> >> > > --- a/qapi/migration.json
> >> > > +++ b/qapi/migration.json
> >> > > @@ -625,11 +625,14 @@
> >> > >  #
> >> > >  # @zstd: use zstd compression method.
> >> > >  #
> >> > > +# @qatzip: use qatzip compression method.
> >> > > +#
> >> > >  # Since: 5.0
> >> > >  ##
> >> > >  { 'enum': 'MultiFDCompression',
> >> > >    'data': [ 'none', 'zlib',
> >> > > -            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] }
> >> > > +            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' },
> >> > > +            { 'name': 'qatzip', 'if': 'CONFIG_QATZIP'} ] }
> >> >
> >> > In another thread adding support to another Intel accelerator (IAA)
> we
> >> > decided that it was better to select the offloading as an accelerator
> >> > method to multifd zlib rather than as an entirely new compression
> >> > format. Take a look at that discussion:
> >> > https://lore.kernel.org/r/ZTFCnqbbqlmsUkRC@redhat.com
> >>
> >> We had some early discussion with Intel folks (probably a different
> team
> >> than the one with the above patchset). The understanding at the time is
> >> that QAT is good at both bulk data compression and decompression. IAA
> is
> >> good at decompression with smaller data size but compression
> performance
> >> is not the best. In multifd, we are compressing up to 128 4k pages at a
> >> time and potentially this can increase by configuring the packet size,
> at
> >> the time we thought QAT could be a better fit in the multifd live
> >> migration scenario. We would like to hear more from Intel.
> >>
> >> From our benchmark testing, with two QAT devices, we can get deflate
> >> compression throughout to around 7GB/s with ~160% CPU. That's beating
> the
> >> current software implementation (zlib and zstd) by a lot. We are still
> >> tuning the configuration in QEMU live migration now.
> >>
> >> >
> >> > As I understand it, QAT + QATzip would be compatible with both zlib
> >> > and IAA + QPL, so we'd add another accelerator method like this:
> >> >
> >> > https://lore.kernel.org/r/20240103112851.908082-3-yuan1.liu@intel.com
> >> >
> >>
> >> I quickly read over the IAA patchset and I saw this:
> >>
> >> "However, due to some reasons, QPL is currently not compatible with the
> >> existing Zlib method that Zlib compressed data can be decompressed by
> QPl
> >> and vice versa."
> >>
> >> The above probably means the current zlib software implementation and
> IAA
> >> are not compatible.
> >>
> >> For QAT, although, both Intel's QATzip and zlib are internally using
> >> deflate, QATzip only supports deflate with a 4 byte header, deflate
> >> wrapped by Gzip header and footer, or deflate wrapped by Intel® QAT
> >> Gzip* extension header and footer. None of the headers can be
> recognized
> >> by zlib software implementation is my understanding. So if we want to
> make
> >> them compatible with zlib, the QATzip library needs to support that.
> >
> > The QPL library currently cannot support the Z_SYNC_FLULSH operation of
> zlib steaming.
> > This is the reason why it is not compatible with zlib.
> >
> 
> I had understood from previous discussion that we'd be able to at least
> support compression with QPL and decompression with the existing
> zlib-based code. Is that not correct? I was about to suggest
> experimenting with the window size in the existing code to hopefully
> solve the 4kb window size issue. If there are other limitations, then
> that will not be enough.
> 
> Also, can you point to the source of that statement about Z_SYNC_FLUSH,
> I couldn't find more information about it in the documentation.

static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
{
    struct zlib_data *z = p->data;
    z_stream *zs = &z->zs;
    …
    for (i = 0; i < p->normal_num; i++) {
        uint32_t available = z->zbuff_len - out_size;
        int flush = Z_NO_FLUSH;

        if (i == p->normal_num - 1) {
            flush = Z_SYNC_FLUSH;
        }
If I understand correctly, the current implementation of multifd zlib treats each multifd thread as a separate stream. This implementation adds zlib headers and footers only to the beginning and end of the data, and the data transmitted in between does not require headers and footers.

The current implementation for IAA supports compressing data indicated by p->normal_num as a single stream. Each compressed data segment has zlib headers and footers. Since Z_SYNC_FLUSH is not supported, this means IAA has to complete the compression for a stream at once and cannot output parts of a stream in advance. Therefore, the current IAA is not compatible with existing zlib. Currently, it seems that the QAT implementation follows a similar approach.

Regarding the reference to Z_SYNC_FLUSH, you can find it at https://www.zlib.net/manual.html:
“If the parameter flush is set to Z_SYNC_FLUSH, all pending output is flushed to the output buffer and the output is aligned on a byte boundary, so that the decompressor can get all input data available so far. (In particular avail_in is zero after the call if enough output space has been provided before the call.) Flushing may degrade compression for some compression algorithms and so it should be used only when necessary. This completes the current deflate block and follows it with an empty stored block that is three bits plus filler bits to the next byte, followed by four bytes (00 00 ff ff).”

Based on the information above, I suggest the following options:

1. Modify multifd zlib to perform stream compression each time with p->normal_num pages. If this modification makes IAA compatible with zlib, I will implement it in the next version as per https://lore.kernel.org/all/20240103112851.908082-4-yuan1.liu@intel.com/T/ and provide performance data. We will also verify the feasibility with QAT.

2. Use zlib compression without stream compression, meaning each page is independently compressed. The advantage is that accelerators can concurrently process more pages, and the current IAA and QAT can both be compatible. The downside is a loss in compression ratio, and the length of the data after compressing each page needs to be added to MultiFDPacket_t. If future compression functionality considers support only on accelerators, the compression ratio can be improved through compression levels or other features without additional host CPU overhead.

Additionally, the default window size is set to 4K, which should effectively support IAA hardware.

> >> > All that, of course, assuming we even want to support both
> >> > accelerators. They're addressing the same problem after all. I wonder
> >> > how we'd choose a precedence, since both seem to be present in the
> >> > same processor family.
> >> >
> >> >
> >>
> >> That's an interesting question :-) I think overall performance
> (throughput
> >> and CPU overhead) should both be considered. IAA and QAT accelerators
> >> don't present on all systems. We Bytedance choose to have both on our
> >> platform when purchasing from Intel.

Re: [External] Re: [PATCH 3/5] migration: Introduce unimplemented 'qatzip' compression method

Posted by Hao Xiang 10 months, 2 weeks ago

On Mon, Jan 8, 2024 at 6:26 PM Liu, Yuan1 <yuan1.liu@intel.com> wrote:
>
> > -----Original Message-----
> > From: Fabiano Rosas <farosas@suse.de>
> > Sent: Tuesday, January 9, 2024 4:28 AM
> > To: Liu, Yuan1 <yuan1.liu@intel.com>; Hao Xiang <hao.xiang@bytedance.com>
> > Cc: Bryan Zhang <bryan.zhang@bytedance.com>; qemu-devel@nongnu.org;
> > marcandre.lureau@redhat.com; peterx@redhat.com; quintela@redhat.com;
> > peter.maydell@linaro.org; berrange@redhat.com
> > Subject: RE: [External] Re: [PATCH 3/5] migration: Introduce unimplemented
> > 'qatzip' compression method
> >
> > "Liu, Yuan1" <yuan1.liu@intel.com> writes:
> >
> > >> -----Original Message-----
> > >> From: Hao Xiang <hao.xiang@bytedance.com>
> > >> Sent: Saturday, January 6, 2024 7:53 AM
> > >> To: Fabiano Rosas <farosas@suse.de>
> > >> Cc: Bryan Zhang <bryan.zhang@bytedance.com>; qemu-devel@nongnu.org;
> > >> marcandre.lureau@redhat.com; peterx@redhat.com; quintela@redhat.com;
> > >> peter.maydell@linaro.org; Liu, Yuan1 <yuan1.liu@intel.com>;
> > >> berrange@redhat.com
> > >> Subject: Re: [External] Re: [PATCH 3/5] migration: Introduce
> > >> unimplemented 'qatzip' compression method
> > >>
> > >> On Fri, Jan 5, 2024 at 12:07 PM Fabiano Rosas <farosas@suse.de> wrote:
> > >> >
> > >> > Bryan Zhang <bryan.zhang@bytedance.com> writes:
> > >> >
> > >> > +cc Yuan Liu, Daniel Berrangé
> > >> >
> > >> > > Adds support for 'qatzip' as an option for the multifd
> > >> > > compression method parameter, but copy-pastes the no-op logic to
> > >> > > leave the actual methods effectively unimplemented. This is in
> > >> > > preparation of a subsequent commit that will implement actually
> > >> > > using QAT for compression and decompression.
> > >> > >
> > >> > > Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com>
> > >> > > Signed-off-by: Hao Xiang <hao.xiang@bytedance.com>
> > >> > > ---
> > >> > >  hw/core/qdev-properties-system.c |  6 ++-
> > >> > >  migration/meson.build            |  1 +
> > >> > >  migration/multifd-qatzip.c       | 81
> > >> ++++++++++++++++++++++++++++++++
> > >> > >  migration/multifd.h              |  1 +
> > >> > >  qapi/migration.json              |  5 +-
> > >> > >  5 files changed, 92 insertions(+), 2 deletions(-)  create mode
> > >> > > 100644 migration/multifd-qatzip.c
> > >> > >
> > >> > > diff --git a/hw/core/qdev-properties-system.c
> > >> > > b/hw/core/qdev-properties-system.c
> > >> > > index 1a396521d5..d8e48dcb0e 100644
> > >> > > --- a/hw/core/qdev-properties-system.c
> > >> > > +++ b/hw/core/qdev-properties-system.c
> > >> > > @@ -658,7 +658,11 @@ const PropertyInfo qdev_prop_fdc_drive_type
> > >> > > = { const PropertyInfo qdev_prop_multifd_compression = {
> > >> > >      .name = "MultiFDCompression",
> > >> > >      .description = "multifd_compression values, "
> > >> > > -                   "none/zlib/zstd",
> > >> > > +                   "none/zlib/zstd"
> > >> > > +#ifdef CONFIG_QATZIP
> > >> > > +                   "/qatzip"
> > >> > > +#endif
> > >> > > +                   ,
> > >> > >      .enum_table = &MultiFDCompression_lookup,
> > >> > >      .get = qdev_propinfo_get_enum,
> > >> > >      .set = qdev_propinfo_set_enum, diff --git
> > >> > > a/migration/meson.build b/migration/meson.build index
> > >> > > 92b1cc4297..e20f318379 100644
> > >> > > --- a/migration/meson.build
> > >> > > +++ b/migration/meson.build
> > >> > > @@ -40,6 +40,7 @@ if get_option('live_block_migration').allowed()
> > >> > >    system_ss.add(files('block.c'))  endif
> > >> > >  system_ss.add(when: zstd, if_true: files('multifd-zstd.c'))
> > >> > > +system_ss.add(when: qatzip, if_true: files('multifd-qatzip.c'))
> > >> > >
> > >> > >  specific_ss.add(when: 'CONFIG_SYSTEM_ONLY',
> > >> > >                  if_true: files('ram.c', diff --git
> > >> > > a/migration/multifd-qatzip.c b/migration/multifd-qatzip.c new file
> > >> > > mode 100644 index 0000000000..1733bbddb7
> > >> > > --- /dev/null
> > >> > > +++ b/migration/multifd-qatzip.c
> > >> > > @@ -0,0 +1,81 @@
> > >> > > +/*
> > >> > > + * Multifd QATzip compression implementation
> > >> > > + *
> > >> > > + * Copyright (c) Bytedance
> > >> > > + *
> > >> > > + * Authors:
> > >> > > + *  Bryan Zhang <bryan.zhang@bytedance.com>
> > >> > > + *  Hao Xiang   <hao.xiang@bytedance.com>
> > >> > > + *
> > >> > > + * This work is licensed under the terms of the GNU GPL, version 2
> > or
> > >> later.
> > >> > > + * See the COPYING file in the top-level directory.
> > >> > > + */
> > >> > > +
> > >> > > +#include "qemu/osdep.h"
> > >> > > +#include "exec/ramblock.h"
> > >> > > +#include "exec/target_page.h"
> > >> > > +#include "qapi/error.h"
> > >> > > +#include "migration.h"
> > >> > > +#include "options.h"
> > >> > > +#include "multifd.h"
> > >> > > +
> > >> > > +static int qatzip_send_setup(MultiFDSendParams *p, Error **errp) {
> > >> > > +    return 0;
> > >> > > +}
> > >> > > +
> > >> > > +static void qatzip_send_cleanup(MultiFDSendParams *p, Error
> > **errp)
> > >> > > +{};
> > >> > > +
> > >> > > +static int qatzip_send_prepare(MultiFDSendParams *p, Error **errp)
> > >> > > +{
> > >> > > +    MultiFDPages_t *pages = p->pages;
> > >> > > +
> > >> > > +    for (int i = 0; i < p->normal_num; i++) {
> > >> > > +        p->iov[p->iovs_num].iov_base = pages->block->host + p-
> > >> >normal[i];
> > >> > > +        p->iov[p->iovs_num].iov_len = p->page_size;
> > >> > > +        p->iovs_num++;
> > >> > > +    }
> > >> > > +
> > >> > > +    p->next_packet_size = p->normal_num * p->page_size;
> > >> > > +    p->flags |= MULTIFD_FLAG_NOCOMP;
> > >> > > +    return 0;
> > >> > > +}
> > >> > > +
> > >> > > +static int qatzip_recv_setup(MultiFDRecvParams *p, Error **errp) {
> > >> > > +    return 0;
> > >> > > +}
> > >> > > +
> > >> > > +static void qatzip_recv_cleanup(MultiFDRecvParams *p) {};
> > >> > > +
> > >> > > +static int qatzip_recv_pages(MultiFDRecvParams *p, Error **errp) {
> > >> > > +    uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
> > >> > > +
> > >> > > +    if (flags != MULTIFD_FLAG_NOCOMP) {
> > >> > > +        error_setg(errp, "multifd %u: flags received %x flags
> > >> expected %x",
> > >> > > +                   p->id, flags, MULTIFD_FLAG_NOCOMP);
> > >> > > +        return -1;
> > >> > > +    }
> > >> > > +    for (int i = 0; i < p->normal_num; i++) {
> > >> > > +        p->iov[i].iov_base = p->host + p->normal[i];
> > >> > > +        p->iov[i].iov_len = p->page_size;
> > >> > > +    }
> > >> > > +    return qio_channel_readv_all(p->c, p->iov, p->normal_num,
> > >> > > +errp); }
> > >> > > +
> > >> > > +static MultiFDMethods multifd_qatzip_ops = {
> > >> > > +    .send_setup = qatzip_send_setup,
> > >> > > +    .send_cleanup = qatzip_send_cleanup,
> > >> > > +    .send_prepare = qatzip_send_prepare,
> > >> > > +    .recv_setup = qatzip_recv_setup,
> > >> > > +    .recv_cleanup = qatzip_recv_cleanup,
> > >> > > +    .recv_pages = qatzip_recv_pages };
> > >> > > +
> > >> > > +static void multifd_qatzip_register(void) {
> > >> > > +    multifd_register_ops(MULTIFD_COMPRESSION_QATZIP,
> > >> > > +&multifd_qatzip_ops); }
> > >> > > +
> > >> > > +migration_init(multifd_qatzip_register);
> > >> > > diff --git a/migration/multifd.h b/migration/multifd.h index
> > >> > > a835643b48..5600f7fc82 100644
> > >> > > --- a/migration/multifd.h
> > >> > > +++ b/migration/multifd.h
> > >> > > @@ -33,6 +33,7 @@ int multifd_queue_page(QEMUFile *f, RAMBlock
> > >> > > *block, ram_addr_t offset);  #define MULTIFD_FLAG_NOCOMP (0 << 1)
> > >> > > #define MULTIFD_FLAG_ZLIB (1 << 1)  #define MULTIFD_FLAG_ZSTD (2 <<
> > >> > > 1)
> > >> > > +#define MULTIFD_FLAG_QATZIP (3 << 1)
> > >> > >
> > >> > >  /* This value needs to be a multiple of qemu_target_page_size() */
> > >> > > #define MULTIFD_PACKET_SIZE (512 * 1024) diff --git
> > >> > > a/qapi/migration.json b/qapi/migration.json index
> > >> > > 6d5a4b0489..e3cc195aed 100644
> > >> > > --- a/qapi/migration.json
> > >> > > +++ b/qapi/migration.json
> > >> > > @@ -625,11 +625,14 @@
> > >> > >  #
> > >> > >  # @zstd: use zstd compression method.
> > >> > >  #
> > >> > > +# @qatzip: use qatzip compression method.
> > >> > > +#
> > >> > >  # Since: 5.0
> > >> > >  ##
> > >> > >  { 'enum': 'MultiFDCompression',
> > >> > >    'data': [ 'none', 'zlib',
> > >> > > -            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] }
> > >> > > +            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' },
> > >> > > +            { 'name': 'qatzip', 'if': 'CONFIG_QATZIP'} ] }
> > >> >
> > >> > In another thread adding support to another Intel accelerator (IAA)
> > we
> > >> > decided that it was better to select the offloading as an accelerator
> > >> > method to multifd zlib rather than as an entirely new compression
> > >> > format. Take a look at that discussion:
> > >> > https://lore.kernel.org/r/ZTFCnqbbqlmsUkRC@redhat.com
> > >>
> > >> We had some early discussion with Intel folks (probably a different
> > team
> > >> than the one with the above patchset). The understanding at the time is
> > >> that QAT is good at both bulk data compression and decompression. IAA
> > is
> > >> good at decompression with smaller data size but compression
> > performance
> > >> is not the best. In multifd, we are compressing up to 128 4k pages at a
> > >> time and potentially this can increase by configuring the packet size,
> > at
> > >> the time we thought QAT could be a better fit in the multifd live
> > >> migration scenario. We would like to hear more from Intel.
> > >>
> > >> From our benchmark testing, with two QAT devices, we can get deflate
> > >> compression throughout to around 7GB/s with ~160% CPU. That's beating
> > the
> > >> current software implementation (zlib and zstd) by a lot. We are still
> > >> tuning the configuration in QEMU live migration now.
> > >>
> > >> >
> > >> > As I understand it, QAT + QATzip would be compatible with both zlib
> > >> > and IAA + QPL, so we'd add another accelerator method like this:
> > >> >
> > >> > https://lore.kernel.org/r/20240103112851.908082-3-yuan1.liu@intel.com
> > >> >
> > >>
> > >> I quickly read over the IAA patchset and I saw this:
> > >>
> > >> "However, due to some reasons, QPL is currently not compatible with the
> > >> existing Zlib method that Zlib compressed data can be decompressed by
> > QPl
> > >> and vice versa."
> > >>
> > >> The above probably means the current zlib software implementation and
> > IAA
> > >> are not compatible.
> > >>
> > >> For QAT, although, both Intel's QATzip and zlib are internally using
> > >> deflate, QATzip only supports deflate with a 4 byte header, deflate
> > >> wrapped by Gzip header and footer, or deflate wrapped by Intel® QAT
> > >> Gzip* extension header and footer. None of the headers can be
> > recognized
> > >> by zlib software implementation is my understanding. So if we want to
> > make
> > >> them compatible with zlib, the QATzip library needs to support that.
> > >
> > > The QPL library currently cannot support the Z_SYNC_FLULSH operation of
> > zlib steaming.
> > > This is the reason why it is not compatible with zlib.
> > >
> >
> > I had understood from previous discussion that we'd be able to at least
> > support compression with QPL and decompression with the existing
> > zlib-based code. Is that not correct? I was about to suggest
> > experimenting with the window size in the existing code to hopefully
> > solve the 4kb window size issue. If there are other limitations, then
> > that will not be enough.
> >
> > Also, can you point to the source of that statement about Z_SYNC_FLUSH,
> > I couldn't find more information about it in the documentation.
>
> static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
> {
>     struct zlib_data *z = p->data;
>     z_stream *zs = &z->zs;
>     …
>     for (i = 0; i < p->normal_num; i++) {
>         uint32_t available = z->zbuff_len - out_size;
>         int flush = Z_NO_FLUSH;
>
>         if (i == p->normal_num - 1) {
>             flush = Z_SYNC_FLUSH;
>         }
> If I understand correctly, the current implementation of multifd zlib treats each multifd thread as a separate stream. This implementation adds zlib headers and footers only to the beginning and end of the data, and the data transmitted in between does not require headers and footers.

zlib_send_prepare() is just calling the deflate() repeatedly in a
loop. While compressing the last page, it sets the flag Z_SYNC_FLUSH,
which should dump all pending output to the buffer.

"This implementation adds zlib headers and footers only to the
beginning and end of the data,"
^ You mean putting a header/footer per multifd packet, correct?

>
> The current implementation for IAA supports compressing data indicated by p->normal_num as a single stream. Each compressed data segment has zlib headers and footers. Since Z_SYNC_FLUSH is not supported, this means IAA has to complete the compression for a stream at once and cannot output parts of a stream in advance. Therefore, the current IAA is not compatible with existing zlib. Currently, it seems that the QAT implementation follows a similar approach.

"Since Z_SYNC_FLUSH is not supported, this means IAA has to complete
the compression for a stream at once and cannot output parts of a
stream in advance."
Does IAA's deflate compression put a header/footer per page? Or per
multifd packet?

>
> Regarding the reference to Z_SYNC_FLUSH, you can find it at https://www.zlib.net/manual.html:
> “If the parameter flush is set to Z_SYNC_FLUSH, all pending output is flushed to the output buffer and the output is aligned on a byte boundary, so that the decompressor can get all input data available so far. (In particular avail_in is zero after the call if enough output space has been provided before the call.) Flushing may degrade compression for some compression algorithms and so it should be used only when necessary. This completes the current deflate block and follows it with an empty stored block that is three bits plus filler bits to the next byte, followed by four bytes (00 00 ff ff).”
>
> Based on the information above, I suggest the following options:
>
> 1. Modify multifd zlib to perform stream compression each time with p->normal_num pages. If this modification makes IAA compatible with zlib, I will implement it in the next version as per https://lore.kernel.org/all/20240103112851.908082-4-yuan1.liu@intel.com/T/ and provide performance data. We will also verify the feasibility with QAT.
>
> 2. Use zlib compression without stream compression, meaning each page is independently compressed. The advantage is that accelerators can concurrently process more pages, and the current IAA and QAT can both be compatible. The downside is a loss in compression ratio, and the length of the data after compressing each page needs to be added to MultiFDPacket_t. If future compression functionality considers support only on accelerators, the compression ratio can be improved through compression levels or other features without additional host CPU overhead.
>

We noticed a pretty significant performance difference in QAT deflate
while using the streaming version VS the non-streaming version. The
non-streaming version has better performance.
Sounds like the easiest way to have zlib/IAA/QAT to be capable of
compressing/decompressing interchangeably is to set the Z_SYNC_FLUSH
flag on all deflate() calls in zlib_send_prepare(). And in my opinion,
it is worth the trade-off. If my hardware doesn't have accelerators
and I want pure software based compression, I would choose zstd over
zlib.

> Additionally, the default window size is set to 4K, which should effectively support IAA hardware.
>
> > >> > All that, of course, assuming we even want to support both
> > >> > accelerators. They're addressing the same problem after all. I wonder
> > >> > how we'd choose a precedence, since both seem to be present in the
> > >> > same processor family.
> > >> >
> > >> >
> > >>
> > >> That's an interesting question :-) I think overall performance
> > (throughput
> > >> and CPU overhead) should both be considered. IAA and QAT accelerators
> > >> don't present on all systems. We Bytedance choose to have both on our
> > >> platform when purchasing from Intel.

RE: [External] Re: [PATCH 3/5] migration: Introduce unimplemented 'qatzip' compression method

Posted by Liu, Yuan1 10 months, 2 weeks ago


> -----Original Message-----
> From: Hao Xiang <hao.xiang@bytedance.com>
> Sent: Thursday, January 11, 2024 1:42 PM
> To: Liu, Yuan1 <yuan1.liu@intel.com>
> Cc: Fabiano Rosas <farosas@suse.de>; Bryan Zhang
> <bryan.zhang@bytedance.com>; qemu-devel@nongnu.org;
> marcandre.lureau@redhat.com; peterx@redhat.com; quintela@redhat.com;
> peter.maydell@linaro.org; berrange@redhat.com; Zou, Nanhai
> <nanhai.zou@intel.com>
> Subject: Re: [External] Re: [PATCH 3/5] migration: Introduce unimplemented
> 'qatzip' compression method
> 
> On Mon, Jan 8, 2024 at 6:26 PM Liu, Yuan1 <yuan1.liu@intel.com> wrote:
> >
> > > -----Original Message-----
> > > From: Fabiano Rosas <farosas@suse.de>
> > > Sent: Tuesday, January 9, 2024 4:28 AM
> > > To: Liu, Yuan1 <yuan1.liu@intel.com>; Hao Xiang
> > > <hao.xiang@bytedance.com>
> > > Cc: Bryan Zhang <bryan.zhang@bytedance.com>; qemu-devel@nongnu.org;
> > > marcandre.lureau@redhat.com; peterx@redhat.com; quintela@redhat.com;
> > > peter.maydell@linaro.org; berrange@redhat.com
> > > Subject: RE: [External] Re: [PATCH 3/5] migration: Introduce
> > > unimplemented 'qatzip' compression method
> > >
> > > "Liu, Yuan1" <yuan1.liu@intel.com> writes:
> > >
> > > >> -----Original Message-----
> > > >> From: Hao Xiang <hao.xiang@bytedance.com>
> > > >> Sent: Saturday, January 6, 2024 7:53 AM
> > > >> To: Fabiano Rosas <farosas@suse.de>
> > > >> Cc: Bryan Zhang <bryan.zhang@bytedance.com>;
> > > >> qemu-devel@nongnu.org; marcandre.lureau@redhat.com;
> > > >> peterx@redhat.com; quintela@redhat.com; peter.maydell@linaro.org;
> > > >> Liu, Yuan1 <yuan1.liu@intel.com>; berrange@redhat.com
> > > >> Subject: Re: [External] Re: [PATCH 3/5] migration: Introduce
> > > >> unimplemented 'qatzip' compression method
> > > >>
> > > >> On Fri, Jan 5, 2024 at 12:07 PM Fabiano Rosas <farosas@suse.de>
> wrote:
> > > >> >
> > > >> > Bryan Zhang <bryan.zhang@bytedance.com> writes:
> > > >> >
> > > >> > +cc Yuan Liu, Daniel Berrangé
> > > >> >
> > > >> > > Adds support for 'qatzip' as an option for the multifd
> > > >> > > compression method parameter, but copy-pastes the no-op logic
> > > >> > > to leave the actual methods effectively unimplemented. This
> > > >> > > is in preparation of a subsequent commit that will implement
> > > >> > > actually using QAT for compression and decompression.
> > > >> > >
> > > >> > > Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com>
> > > >> > > Signed-off-by: Hao Xiang <hao.xiang@bytedance.com>
> > > >> > > ---
> > > >> > >  hw/core/qdev-properties-system.c |  6 ++-
> > > >> > >  migration/meson.build            |  1 +
> > > >> > >  migration/multifd-qatzip.c       | 81
> > > >> ++++++++++++++++++++++++++++++++
> > > >> > >  migration/multifd.h              |  1 +
> > > >> > >  qapi/migration.json              |  5 +-
> > > >> > >  5 files changed, 92 insertions(+), 2 deletions(-)  create
> > > >> > > mode
> > > >> > > 100644 migration/multifd-qatzip.c
> > > >> > >
> > > >> > > diff --git a/hw/core/qdev-properties-system.c
> > > >> > > b/hw/core/qdev-properties-system.c
> > > >> > > index 1a396521d5..d8e48dcb0e 100644
> > > >> > > --- a/hw/core/qdev-properties-system.c
> > > >> > > +++ b/hw/core/qdev-properties-system.c
> > > >> > > @@ -658,7 +658,11 @@ const PropertyInfo
> > > >> > > qdev_prop_fdc_drive_type = { const PropertyInfo
> qdev_prop_multifd_compression = {
> > > >> > >      .name = "MultiFDCompression",
> > > >> > >      .description = "multifd_compression values, "
> > > >> > > -                   "none/zlib/zstd",
> > > >> > > +                   "none/zlib/zstd"
> > > >> > > +#ifdef CONFIG_QATZIP
> > > >> > > +                   "/qatzip"
> > > >> > > +#endif
> > > >> > > +                   ,
> > > >> > >      .enum_table = &MultiFDCompression_lookup,
> > > >> > >      .get = qdev_propinfo_get_enum,
> > > >> > >      .set = qdev_propinfo_set_enum, diff --git
> > > >> > > a/migration/meson.build b/migration/meson.build index
> > > >> > > 92b1cc4297..e20f318379 100644
> > > >> > > --- a/migration/meson.build
> > > >> > > +++ b/migration/meson.build
> > > >> > > @@ -40,6 +40,7 @@ if
> get_option('live_block_migration').allowed()
> > > >> > >    system_ss.add(files('block.c'))  endif
> > > >> > >  system_ss.add(when: zstd, if_true: files('multifd-zstd.c'))
> > > >> > > +system_ss.add(when: qatzip, if_true:
> > > >> > > +files('multifd-qatzip.c'))
> > > >> > >
> > > >> > >  specific_ss.add(when: 'CONFIG_SYSTEM_ONLY',
> > > >> > >                  if_true: files('ram.c', diff --git
> > > >> > > a/migration/multifd-qatzip.c b/migration/multifd-qatzip.c new
> > > >> > > file mode 100644 index 0000000000..1733bbddb7
> > > >> > > --- /dev/null
> > > >> > > +++ b/migration/multifd-qatzip.c
> > > >> > > @@ -0,0 +1,81 @@
> > > >> > > +/*
> > > >> > > + * Multifd QATzip compression implementation
> > > >> > > + *
> > > >> > > + * Copyright (c) Bytedance
> > > >> > > + *
> > > >> > > + * Authors:
> > > >> > > + *  Bryan Zhang <bryan.zhang@bytedance.com>
> > > >> > > + *  Hao Xiang   <hao.xiang@bytedance.com>
> > > >> > > + *
> > > >> > > + * This work is licensed under the terms of the GNU GPL,
> > > >> > > +version 2
> > > or
> > > >> later.
> > > >> > > + * See the COPYING file in the top-level directory.
> > > >> > > + */
> > > >> > > +
> > > >> > > +#include "qemu/osdep.h"
> > > >> > > +#include "exec/ramblock.h"
> > > >> > > +#include "exec/target_page.h"
> > > >> > > +#include "qapi/error.h"
> > > >> > > +#include "migration.h"
> > > >> > > +#include "options.h"
> > > >> > > +#include "multifd.h"
> > > >> > > +
> > > >> > > +static int qatzip_send_setup(MultiFDSendParams *p, Error
> **errp) {
> > > >> > > +    return 0;
> > > >> > > +}
> > > >> > > +
> > > >> > > +static void qatzip_send_cleanup(MultiFDSendParams *p, Error
> > > **errp)
> > > >> > > +{};
> > > >> > > +
> > > >> > > +static int qatzip_send_prepare(MultiFDSendParams *p, Error
> > > >> > > +**errp) {
> > > >> > > +    MultiFDPages_t *pages = p->pages;
> > > >> > > +
> > > >> > > +    for (int i = 0; i < p->normal_num; i++) {
> > > >> > > +        p->iov[p->iovs_num].iov_base = pages->block->host +
> > > >> > > + p-
> > > >> >normal[i];
> > > >> > > +        p->iov[p->iovs_num].iov_len = p->page_size;
> > > >> > > +        p->iovs_num++;
> > > >> > > +    }
> > > >> > > +
> > > >> > > +    p->next_packet_size = p->normal_num * p->page_size;
> > > >> > > +    p->flags |= MULTIFD_FLAG_NOCOMP;
> > > >> > > +    return 0;
> > > >> > > +}
> > > >> > > +
> > > >> > > +static int qatzip_recv_setup(MultiFDRecvParams *p, Error
> **errp) {
> > > >> > > +    return 0;
> > > >> > > +}
> > > >> > > +
> > > >> > > +static void qatzip_recv_cleanup(MultiFDRecvParams *p) {};
> > > >> > > +
> > > >> > > +static int qatzip_recv_pages(MultiFDRecvParams *p, Error
> **errp) {
> > > >> > > +    uint32_t flags = p->flags &
> > > >> > > +MULTIFD_FLAG_COMPRESSION_MASK;
> > > >> > > +
> > > >> > > +    if (flags != MULTIFD_FLAG_NOCOMP) {
> > > >> > > +        error_setg(errp, "multifd %u: flags received %x
> > > >> > > + flags
> > > >> expected %x",
> > > >> > > +                   p->id, flags, MULTIFD_FLAG_NOCOMP);
> > > >> > > +        return -1;
> > > >> > > +    }
> > > >> > > +    for (int i = 0; i < p->normal_num; i++) {
> > > >> > > +        p->iov[i].iov_base = p->host + p->normal[i];
> > > >> > > +        p->iov[i].iov_len = p->page_size;
> > > >> > > +    }
> > > >> > > +    return qio_channel_readv_all(p->c, p->iov,
> > > >> > > +p->normal_num, errp); }
> > > >> > > +
> > > >> > > +static MultiFDMethods multifd_qatzip_ops = {
> > > >> > > +    .send_setup = qatzip_send_setup,
> > > >> > > +    .send_cleanup = qatzip_send_cleanup,
> > > >> > > +    .send_prepare = qatzip_send_prepare,
> > > >> > > +    .recv_setup = qatzip_recv_setup,
> > > >> > > +    .recv_cleanup = qatzip_recv_cleanup,
> > > >> > > +    .recv_pages = qatzip_recv_pages };
> > > >> > > +
> > > >> > > +static void multifd_qatzip_register(void) {
> > > >> > > +    multifd_register_ops(MULTIFD_COMPRESSION_QATZIP,
> > > >> > > +&multifd_qatzip_ops); }
> > > >> > > +
> > > >> > > +migration_init(multifd_qatzip_register);
> > > >> > > diff --git a/migration/multifd.h b/migration/multifd.h index
> > > >> > > a835643b48..5600f7fc82 100644
> > > >> > > --- a/migration/multifd.h
> > > >> > > +++ b/migration/multifd.h
> > > >> > > @@ -33,6 +33,7 @@ int multifd_queue_page(QEMUFile *f,
> > > >> > > RAMBlock *block, ram_addr_t offset);  #define
> > > >> > > MULTIFD_FLAG_NOCOMP (0 << 1) #define MULTIFD_FLAG_ZLIB (1 <<
> > > >> > > 1)  #define MULTIFD_FLAG_ZSTD (2 <<
> > > >> > > 1)
> > > >> > > +#define MULTIFD_FLAG_QATZIP (3 << 1)
> > > >> > >
> > > >> > >  /* This value needs to be a multiple of
> > > >> > > qemu_target_page_size() */ #define MULTIFD_PACKET_SIZE (512 *
> > > >> > > 1024) diff --git a/qapi/migration.json b/qapi/migration.json
> > > >> > > index 6d5a4b0489..e3cc195aed 100644
> > > >> > > --- a/qapi/migration.json
> > > >> > > +++ b/qapi/migration.json
> > > >> > > @@ -625,11 +625,14 @@
> > > >> > >  #
> > > >> > >  # @zstd: use zstd compression method.
> > > >> > >  #
> > > >> > > +# @qatzip: use qatzip compression method.
> > > >> > > +#
> > > >> > >  # Since: 5.0
> > > >> > >  ##
> > > >> > >  { 'enum': 'MultiFDCompression',
> > > >> > >    'data': [ 'none', 'zlib',
> > > >> > > -            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] }
> > > >> > > +            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' },
> > > >> > > +            { 'name': 'qatzip', 'if': 'CONFIG_QATZIP'} ] }
> > > >> >
> > > >> > In another thread adding support to another Intel accelerator
> > > >> > (IAA)
> > > we
> > > >> > decided that it was better to select the offloading as an
> > > >> > accelerator method to multifd zlib rather than as an entirely
> > > >> > new compression format. Take a look at that discussion:
> > > >> > https://lore.kernel.org/r/ZTFCnqbbqlmsUkRC@redhat.com
> > > >>
> > > >> We had some early discussion with Intel folks (probably a
> > > >> different
> > > team
> > > >> than the one with the above patchset). The understanding at the
> > > >> time is that QAT is good at both bulk data compression and
> > > >> decompression. IAA
> > > is
> > > >> good at decompression with smaller data size but compression
> > > performance
> > > >> is not the best. In multifd, we are compressing up to 128 4k
> > > >> pages at a time and potentially this can increase by configuring
> > > >> the packet size,
> > > at
> > > >> the time we thought QAT could be a better fit in the multifd live
> > > >> migration scenario. We would like to hear more from Intel.
> > > >>
> > > >> From our benchmark testing, with two QAT devices, we can get
> > > >> deflate compression throughout to around 7GB/s with ~160% CPU.
> > > >> That's beating
> > > the
> > > >> current software implementation (zlib and zstd) by a lot. We are
> > > >> still tuning the configuration in QEMU live migration now.
> > > >>
> > > >> >
> > > >> > As I understand it, QAT + QATzip would be compatible with both
> > > >> > zlib and IAA + QPL, so we'd add another accelerator method like
> this:
> > > >> >
> > > >> > https://lore.kernel.org/r/20240103112851.908082-3-yuan1.liu@int
> > > >> > el.com
> > > >> >
> > > >>
> > > >> I quickly read over the IAA patchset and I saw this:
> > > >>
> > > >> "However, due to some reasons, QPL is currently not compatible
> > > >> with the existing Zlib method that Zlib compressed data can be
> > > >> decompressed by
> > > QPl
> > > >> and vice versa."
> > > >>
> > > >> The above probably means the current zlib software implementation
> > > >> and
> > > IAA
> > > >> are not compatible.
> > > >>
> > > >> For QAT, although, both Intel's QATzip and zlib are internally
> > > >> using deflate, QATzip only supports deflate with a 4 byte header,
> > > >> deflate wrapped by Gzip header and footer, or deflate wrapped by
> > > >> Intel® QAT
> > > >> Gzip* extension header and footer. None of the headers can be
> > > recognized
> > > >> by zlib software implementation is my understanding. So if we
> > > >> want to
> > > make
> > > >> them compatible with zlib, the QATzip library needs to support
> that.
> > > >
> > > > The QPL library currently cannot support the Z_SYNC_FLULSH
> > > > operation of
> > > zlib steaming.
> > > > This is the reason why it is not compatible with zlib.
> > > >
> > >
> > > I had understood from previous discussion that we'd be able to at
> > > least support compression with QPL and decompression with the
> > > existing zlib-based code. Is that not correct? I was about to
> > > suggest experimenting with the window size in the existing code to
> > > hopefully solve the 4kb window size issue. If there are other
> > > limitations, then that will not be enough.
> > >
> > > Also, can you point to the source of that statement about
> > > Z_SYNC_FLUSH, I couldn't find more information about it in the
> documentation.
> >
> > static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) {
> >     struct zlib_data *z = p->data;
> >     z_stream *zs = &z->zs;
> >     …
> >     for (i = 0; i < p->normal_num; i++) {
> >         uint32_t available = z->zbuff_len - out_size;
> >         int flush = Z_NO_FLUSH;
> >
> >         if (i == p->normal_num - 1) {
> >             flush = Z_SYNC_FLUSH;
> >         }
> > If I understand correctly, the current implementation of multifd zlib
> treats each multifd thread as a separate stream. This implementation adds
> zlib headers and footers only to the beginning and end of the data, and
> the data transmitted in between does not require headers and footers.
> 
> zlib_send_prepare() is just calling the deflate() repeatedly in a loop.
> While compressing the last page, it sets the flag Z_SYNC_FLUSH, which
> should dump all pending output to the buffer.
> 
> "This implementation adds zlib headers and footers only to the beginning
> and end of the data,"
> ^ You mean putting a header/footer per multifd packet, correct?

Yes, the header information is constructed by DeflateInit, which is 
only done once for each multifd channel. If there is footer information, 
the footer information can be added with Z_FINISH of deflate

> >
> > The current implementation for IAA supports compressing data indicated
> by p->normal_num as a single stream. Each compressed data segment has zlib
> headers and footers. Since Z_SYNC_FLUSH is not supported, this means IAA
> has to complete the compression for a stream at once and cannot output
> parts of a stream in advance. Therefore, the current IAA is not compatible
> with existing zlib. Currently, it seems that the QAT implementation
> follows a similar approach.
> 
> "Since Z_SYNC_FLUSH is not supported, this means IAA has to complete the
> compression for a stream at once and cannot output parts of a stream in
> advance."
Yes, this is a limitation for compatibility with existing zlib implementations

> Does IAA's deflate compression put a header/footer per page? Or per
> multifd packet?
Yes, both of them can be done. QPL library supports ZLIB header/footer.

> >
> > Regarding the reference to Z_SYNC_FLUSH, you can find it at
> https://www.zlib.net/manual.html:
> > “If the parameter flush is set to Z_SYNC_FLUSH, all pending output is
> flushed to the output buffer and the output is aligned on a byte boundary,
> so that the decompressor can get all input data available so far. (In
> particular avail_in is zero after the call if enough output space has been
> provided before the call.) Flushing may degrade compression for some
> compression algorithms and so it should be used only when necessary. This
> completes the current deflate block and follows it with an empty stored
> block that is three bits plus filler bits to the next byte, followed by
> four bytes (00 00 ff ff).”
> >
> > Based on the information above, I suggest the following options:
> >
> > 1. Modify multifd zlib to perform stream compression each time with p-
> >normal_num pages. If this modification makes IAA compatible with zlib, I
> will implement it in the next version as per
> https://lore.kernel.org/all/20240103112851.908082-4-yuan1.liu@intel.com/T/
> and provide performance data. We will also verify the feasibility with
> QAT.
> >
> > 2. Use zlib compression without stream compression, meaning each page is
> independently compressed. The advantage is that accelerators can
> concurrently process more pages, and the current IAA and QAT can both be
> compatible. The downside is a loss in compression ratio, and the length of
> the data after compressing each page needs to be added to MultiFDPacket_t.
> If future compression functionality considers support only on
> accelerators, the compression ratio can be improved through compression
> levels or other features without additional host CPU overhead.
> >
> 
> We noticed a pretty significant performance difference in QAT deflate
> while using the streaming version VS the non-streaming version. The non-
> streaming version has better performance.
> Sounds like the easiest way to have zlib/IAA/QAT to be capable of
> compressing/decompressing interchangeably is to set the Z_SYNC_FLUSH flag
> on all deflate() calls in zlib_send_prepare(). And in my opinion, it is
> worth the trade-off. If my hardware doesn't have accelerators and I want
> pure software based compression, I would choose zstd over zlib.

like you said before, we can use the deflate function to compress 
individual pages or multifd packets. I am working on solving this 
issue and will provide compatibility results and early performance 
data in the next version

> > Additionally, the default window size is set to 4K, which should
> effectively support IAA hardware.
> >
> > > >> > All that, of course, assuming we even want to support both
> > > >> > accelerators. They're addressing the same problem after all. I
> > > >> > wonder how we'd choose a precedence, since both seem to be
> > > >> > present in the same processor family.
> > > >> >
> > > >> >
> > > >>
> > > >> That's an interesting question :-) I think overall performance
> > > (throughput
> > > >> and CPU overhead) should both be considered. IAA and QAT
> > > >> accelerators don't present on all systems. We Bytedance choose to
> > > >> have both on our platform when purchasing from Intel.

Re: [External] Re: [PATCH 3/5] migration: Introduce unimplemented 'qatzip' compression method

Posted by Hao Xiang 10 months, 3 weeks ago

On Fri, Jan 5, 2024 at 3:52 PM Hao Xiang <hao.xiang@bytedance.com> wrote:
>
> On Fri, Jan 5, 2024 at 12:07 PM Fabiano Rosas <farosas@suse.de> wrote:
> >
> > Bryan Zhang <bryan.zhang@bytedance.com> writes:
> >
> > +cc Yuan Liu, Daniel Berrangé
> >
> > > Adds support for 'qatzip' as an option for the multifd compression
> > > method parameter, but copy-pastes the no-op logic to leave the actual
> > > methods effectively unimplemented. This is in preparation of a
> > > subsequent commit that will implement actually using QAT for compression
> > > and decompression.
> > >
> > > Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com>
> > > Signed-off-by: Hao Xiang <hao.xiang@bytedance.com>
> > > ---
> > >  hw/core/qdev-properties-system.c |  6 ++-
> > >  migration/meson.build            |  1 +
> > >  migration/multifd-qatzip.c       | 81 ++++++++++++++++++++++++++++++++
> > >  migration/multifd.h              |  1 +
> > >  qapi/migration.json              |  5 +-
> > >  5 files changed, 92 insertions(+), 2 deletions(-)
> > >  create mode 100644 migration/multifd-qatzip.c
> > >
> > > diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
> > > index 1a396521d5..d8e48dcb0e 100644
> > > --- a/hw/core/qdev-properties-system.c
> > > +++ b/hw/core/qdev-properties-system.c
> > > @@ -658,7 +658,11 @@ const PropertyInfo qdev_prop_fdc_drive_type = {
> > >  const PropertyInfo qdev_prop_multifd_compression = {
> > >      .name = "MultiFDCompression",
> > >      .description = "multifd_compression values, "
> > > -                   "none/zlib/zstd",
> > > +                   "none/zlib/zstd"
> > > +#ifdef CONFIG_QATZIP
> > > +                   "/qatzip"
> > > +#endif
> > > +                   ,
> > >      .enum_table = &MultiFDCompression_lookup,
> > >      .get = qdev_propinfo_get_enum,
> > >      .set = qdev_propinfo_set_enum,
> > > diff --git a/migration/meson.build b/migration/meson.build
> > > index 92b1cc4297..e20f318379 100644
> > > --- a/migration/meson.build
> > > +++ b/migration/meson.build
> > > @@ -40,6 +40,7 @@ if get_option('live_block_migration').allowed()
> > >    system_ss.add(files('block.c'))
> > >  endif
> > >  system_ss.add(when: zstd, if_true: files('multifd-zstd.c'))
> > > +system_ss.add(when: qatzip, if_true: files('multifd-qatzip.c'))
> > >
> > >  specific_ss.add(when: 'CONFIG_SYSTEM_ONLY',
> > >                  if_true: files('ram.c',
> > > diff --git a/migration/multifd-qatzip.c b/migration/multifd-qatzip.c
> > > new file mode 100644
> > > index 0000000000..1733bbddb7
> > > --- /dev/null
> > > +++ b/migration/multifd-qatzip.c
> > > @@ -0,0 +1,81 @@
> > > +/*
> > > + * Multifd QATzip compression implementation
> > > + *
> > > + * Copyright (c) Bytedance
> > > + *
> > > + * Authors:
> > > + *  Bryan Zhang <bryan.zhang@bytedance.com>
> > > + *  Hao Xiang   <hao.xiang@bytedance.com>
> > > + *
> > > + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> > > + * See the COPYING file in the top-level directory.
> > > + */
> > > +
> > > +#include "qemu/osdep.h"
> > > +#include "exec/ramblock.h"
> > > +#include "exec/target_page.h"
> > > +#include "qapi/error.h"
> > > +#include "migration.h"
> > > +#include "options.h"
> > > +#include "multifd.h"
> > > +
> > > +static int qatzip_send_setup(MultiFDSendParams *p, Error **errp)
> > > +{
> > > +    return 0;
> > > +}
> > > +
> > > +static void qatzip_send_cleanup(MultiFDSendParams *p, Error **errp) {};
> > > +
> > > +static int qatzip_send_prepare(MultiFDSendParams *p, Error **errp)
> > > +{
> > > +    MultiFDPages_t *pages = p->pages;
> > > +
> > > +    for (int i = 0; i < p->normal_num; i++) {
> > > +        p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i];
> > > +        p->iov[p->iovs_num].iov_len = p->page_size;
> > > +        p->iovs_num++;
> > > +    }
> > > +
> > > +    p->next_packet_size = p->normal_num * p->page_size;
> > > +    p->flags |= MULTIFD_FLAG_NOCOMP;
> > > +    return 0;
> > > +}
> > > +
> > > +static int qatzip_recv_setup(MultiFDRecvParams *p, Error **errp)
> > > +{
> > > +    return 0;
> > > +}
> > > +
> > > +static void qatzip_recv_cleanup(MultiFDRecvParams *p) {};
> > > +
> > > +static int qatzip_recv_pages(MultiFDRecvParams *p, Error **errp)
> > > +{
> > > +    uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
> > > +
> > > +    if (flags != MULTIFD_FLAG_NOCOMP) {
> > > +        error_setg(errp, "multifd %u: flags received %x flags expected %x",
> > > +                   p->id, flags, MULTIFD_FLAG_NOCOMP);
> > > +        return -1;
> > > +    }
> > > +    for (int i = 0; i < p->normal_num; i++) {
> > > +        p->iov[i].iov_base = p->host + p->normal[i];
> > > +        p->iov[i].iov_len = p->page_size;
> > > +    }
> > > +    return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp);
> > > +}
> > > +
> > > +static MultiFDMethods multifd_qatzip_ops = {
> > > +    .send_setup = qatzip_send_setup,
> > > +    .send_cleanup = qatzip_send_cleanup,
> > > +    .send_prepare = qatzip_send_prepare,
> > > +    .recv_setup = qatzip_recv_setup,
> > > +    .recv_cleanup = qatzip_recv_cleanup,
> > > +    .recv_pages = qatzip_recv_pages
> > > +};
> > > +
> > > +static void multifd_qatzip_register(void)
> > > +{
> > > +    multifd_register_ops(MULTIFD_COMPRESSION_QATZIP, &multifd_qatzip_ops);
> > > +}
> > > +
> > > +migration_init(multifd_qatzip_register);
> > > diff --git a/migration/multifd.h b/migration/multifd.h
> > > index a835643b48..5600f7fc82 100644
> > > --- a/migration/multifd.h
> > > +++ b/migration/multifd.h
> > > @@ -33,6 +33,7 @@ int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset);
> > >  #define MULTIFD_FLAG_NOCOMP (0 << 1)
> > >  #define MULTIFD_FLAG_ZLIB (1 << 1)
> > >  #define MULTIFD_FLAG_ZSTD (2 << 1)
> > > +#define MULTIFD_FLAG_QATZIP (3 << 1)
> > >
> > >  /* This value needs to be a multiple of qemu_target_page_size() */
> > >  #define MULTIFD_PACKET_SIZE (512 * 1024)
> > > diff --git a/qapi/migration.json b/qapi/migration.json
> > > index 6d5a4b0489..e3cc195aed 100644
> > > --- a/qapi/migration.json
> > > +++ b/qapi/migration.json
> > > @@ -625,11 +625,14 @@
> > >  #
> > >  # @zstd: use zstd compression method.
> > >  #
> > > +# @qatzip: use qatzip compression method.
> > > +#
> > >  # Since: 5.0
> > >  ##
> > >  { 'enum': 'MultiFDCompression',
> > >    'data': [ 'none', 'zlib',
> > > -            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] }
> > > +            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' },
> > > +            { 'name': 'qatzip', 'if': 'CONFIG_QATZIP'} ] }
> >
> > In another thread adding support to another Intel accelerator (IAA) we
> > decided that it was better to select the offloading as an accelerator
> > method to multifd zlib rather than as an entirely new compression
> > format. Take a look at that discussion:
> > https://lore.kernel.org/r/ZTFCnqbbqlmsUkRC@redhat.com
>
> We had some early discussion with Intel folks (probably a different
> team than the one with the above patchset). The understanding at the
> time is that QAT is good at both bulk data compression and
> decompression. IAA is good at decompression with smaller data size but
> compression performance is not the best. In multifd, we are
> compressing up to 128 4k pages at a time and potentially this can
> increase by configuring the packet size, at the time we thought QAT
> could be a better fit in the multifd live migration scenario. We would
> like to hear more from Intel.
>
> From our benchmark testing, with two QAT devices, we can get deflate
> compression throughout to around 7GB/s with ~160% CPU. That's beating
> the current software implementation (zlib and zstd) by a lot. We are
> still tuning the configuration in QEMU live migration now.
>
> >
> > As I understand it, QAT + QATzip would be compatible with both zlib and
> > IAA + QPL, so we'd add another accelerator method like this:
> >
> > https://lore.kernel.org/r/20240103112851.908082-3-yuan1.liu@intel.com
> >
>
> I quickly read over the IAA patchset and I saw this:
>
> "However, due to some reasons, QPL is currently
> not compatible with the existing Zlib method that Zlib compressed data
> can be decompressed by QPl and vice versa."
>
> The above probably means the current zlib software implementation and
> IAA are not compatible.
>
> For QAT, although, both Intel's QATzip and zlib are internally using
> deflate, QATzip only supports deflate with a 4 byte header, deflate
> wrapped by Gzip header and footer, or deflate wrapped by Intel® QAT
> Gzip* extension header and footer. None of the headers can be
> recognized by zlib software implementation is my understanding. So if
> we want to make them compatible with zlib, the QATzip library needs to
> support that.

I took a closer look at Intel's documentation here
https://github.com/intel/QATzip
QATzip does have a compression format QZ_DEFLATE_RAW, which uses the
deflate format but no header at all. This looks like the same as what
QEMU's current zlib implementation does - using the raw deflate
format. I can have a quick test to confirm that. Meanwhile, the
documentation mentioned that if using the raw deflate format,
decompression cannot be offloaded by QAT hardware. So there is a
trade-off here if we want to avoid creating a new compression format
in QEMU.

>
> > All that, of course, assuming we even want to support both
> > accelerators. They're addressing the same problem after all. I wonder
> > how we'd choose a precedence, since both seem to be present in the same
> > processor family.
> >
> >
>
> That's an interesting question :-) I think overall performance
> (throughput and CPU overhead) should both be considered. IAA and QAT
> accelerators don't present on all systems. We Bytedance choose to have
> both on our platform when purchasing from Intel.

Re: [External] Re: [PATCH 3/5] migration: Introduce unimplemented 'qatzip' compression method

Posted by Fabiano Rosas 10 months, 3 weeks ago

Hao Xiang <hao.xiang@bytedance.com> writes:

> On Fri, Jan 5, 2024 at 3:52 PM Hao Xiang <hao.xiang@bytedance.com> wrote:
>>
>> On Fri, Jan 5, 2024 at 12:07 PM Fabiano Rosas <farosas@suse.de> wrote:
>> >
>> > Bryan Zhang <bryan.zhang@bytedance.com> writes:
>> >
>> > +cc Yuan Liu, Daniel Berrangé
>> >
>> > > Adds support for 'qatzip' as an option for the multifd compression
>> > > method parameter, but copy-pastes the no-op logic to leave the actual
>> > > methods effectively unimplemented. This is in preparation of a
>> > > subsequent commit that will implement actually using QAT for compression
>> > > and decompression.
>> > >
>> > > Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com>
>> > > Signed-off-by: Hao Xiang <hao.xiang@bytedance.com>
>> > > ---
>> > >  hw/core/qdev-properties-system.c |  6 ++-
>> > >  migration/meson.build            |  1 +
>> > >  migration/multifd-qatzip.c       | 81 ++++++++++++++++++++++++++++++++
>> > >  migration/multifd.h              |  1 +
>> > >  qapi/migration.json              |  5 +-
>> > >  5 files changed, 92 insertions(+), 2 deletions(-)
>> > >  create mode 100644 migration/multifd-qatzip.c
>> > >
>> > > diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
>> > > index 1a396521d5..d8e48dcb0e 100644
>> > > --- a/hw/core/qdev-properties-system.c
>> > > +++ b/hw/core/qdev-properties-system.c
>> > > @@ -658,7 +658,11 @@ const PropertyInfo qdev_prop_fdc_drive_type = {
>> > >  const PropertyInfo qdev_prop_multifd_compression = {
>> > >      .name = "MultiFDCompression",
>> > >      .description = "multifd_compression values, "
>> > > -                   "none/zlib/zstd",
>> > > +                   "none/zlib/zstd"
>> > > +#ifdef CONFIG_QATZIP
>> > > +                   "/qatzip"
>> > > +#endif
>> > > +                   ,
>> > >      .enum_table = &MultiFDCompression_lookup,
>> > >      .get = qdev_propinfo_get_enum,
>> > >      .set = qdev_propinfo_set_enum,
>> > > diff --git a/migration/meson.build b/migration/meson.build
>> > > index 92b1cc4297..e20f318379 100644
>> > > --- a/migration/meson.build
>> > > +++ b/migration/meson.build
>> > > @@ -40,6 +40,7 @@ if get_option('live_block_migration').allowed()
>> > >    system_ss.add(files('block.c'))
>> > >  endif
>> > >  system_ss.add(when: zstd, if_true: files('multifd-zstd.c'))
>> > > +system_ss.add(when: qatzip, if_true: files('multifd-qatzip.c'))
>> > >
>> > >  specific_ss.add(when: 'CONFIG_SYSTEM_ONLY',
>> > >                  if_true: files('ram.c',
>> > > diff --git a/migration/multifd-qatzip.c b/migration/multifd-qatzip.c
>> > > new file mode 100644
>> > > index 0000000000..1733bbddb7
>> > > --- /dev/null
>> > > +++ b/migration/multifd-qatzip.c
>> > > @@ -0,0 +1,81 @@
>> > > +/*
>> > > + * Multifd QATzip compression implementation
>> > > + *
>> > > + * Copyright (c) Bytedance
>> > > + *
>> > > + * Authors:
>> > > + *  Bryan Zhang <bryan.zhang@bytedance.com>
>> > > + *  Hao Xiang   <hao.xiang@bytedance.com>
>> > > + *
>> > > + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> > > + * See the COPYING file in the top-level directory.
>> > > + */
>> > > +
>> > > +#include "qemu/osdep.h"
>> > > +#include "exec/ramblock.h"
>> > > +#include "exec/target_page.h"
>> > > +#include "qapi/error.h"
>> > > +#include "migration.h"
>> > > +#include "options.h"
>> > > +#include "multifd.h"
>> > > +
>> > > +static int qatzip_send_setup(MultiFDSendParams *p, Error **errp)
>> > > +{
>> > > +    return 0;
>> > > +}
>> > > +
>> > > +static void qatzip_send_cleanup(MultiFDSendParams *p, Error **errp) {};
>> > > +
>> > > +static int qatzip_send_prepare(MultiFDSendParams *p, Error **errp)
>> > > +{
>> > > +    MultiFDPages_t *pages = p->pages;
>> > > +
>> > > +    for (int i = 0; i < p->normal_num; i++) {
>> > > +        p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i];
>> > > +        p->iov[p->iovs_num].iov_len = p->page_size;
>> > > +        p->iovs_num++;
>> > > +    }
>> > > +
>> > > +    p->next_packet_size = p->normal_num * p->page_size;
>> > > +    p->flags |= MULTIFD_FLAG_NOCOMP;
>> > > +    return 0;
>> > > +}
>> > > +
>> > > +static int qatzip_recv_setup(MultiFDRecvParams *p, Error **errp)
>> > > +{
>> > > +    return 0;
>> > > +}
>> > > +
>> > > +static void qatzip_recv_cleanup(MultiFDRecvParams *p) {};
>> > > +
>> > > +static int qatzip_recv_pages(MultiFDRecvParams *p, Error **errp)
>> > > +{
>> > > +    uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
>> > > +
>> > > +    if (flags != MULTIFD_FLAG_NOCOMP) {
>> > > +        error_setg(errp, "multifd %u: flags received %x flags expected %x",
>> > > +                   p->id, flags, MULTIFD_FLAG_NOCOMP);
>> > > +        return -1;
>> > > +    }
>> > > +    for (int i = 0; i < p->normal_num; i++) {
>> > > +        p->iov[i].iov_base = p->host + p->normal[i];
>> > > +        p->iov[i].iov_len = p->page_size;
>> > > +    }
>> > > +    return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp);
>> > > +}
>> > > +
>> > > +static MultiFDMethods multifd_qatzip_ops = {
>> > > +    .send_setup = qatzip_send_setup,
>> > > +    .send_cleanup = qatzip_send_cleanup,
>> > > +    .send_prepare = qatzip_send_prepare,
>> > > +    .recv_setup = qatzip_recv_setup,
>> > > +    .recv_cleanup = qatzip_recv_cleanup,
>> > > +    .recv_pages = qatzip_recv_pages
>> > > +};
>> > > +
>> > > +static void multifd_qatzip_register(void)
>> > > +{
>> > > +    multifd_register_ops(MULTIFD_COMPRESSION_QATZIP, &multifd_qatzip_ops);
>> > > +}
>> > > +
>> > > +migration_init(multifd_qatzip_register);
>> > > diff --git a/migration/multifd.h b/migration/multifd.h
>> > > index a835643b48..5600f7fc82 100644
>> > > --- a/migration/multifd.h
>> > > +++ b/migration/multifd.h
>> > > @@ -33,6 +33,7 @@ int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset);
>> > >  #define MULTIFD_FLAG_NOCOMP (0 << 1)
>> > >  #define MULTIFD_FLAG_ZLIB (1 << 1)
>> > >  #define MULTIFD_FLAG_ZSTD (2 << 1)
>> > > +#define MULTIFD_FLAG_QATZIP (3 << 1)
>> > >
>> > >  /* This value needs to be a multiple of qemu_target_page_size() */
>> > >  #define MULTIFD_PACKET_SIZE (512 * 1024)
>> > > diff --git a/qapi/migration.json b/qapi/migration.json
>> > > index 6d5a4b0489..e3cc195aed 100644
>> > > --- a/qapi/migration.json
>> > > +++ b/qapi/migration.json
>> > > @@ -625,11 +625,14 @@
>> > >  #
>> > >  # @zstd: use zstd compression method.
>> > >  #
>> > > +# @qatzip: use qatzip compression method.
>> > > +#
>> > >  # Since: 5.0
>> > >  ##
>> > >  { 'enum': 'MultiFDCompression',
>> > >    'data': [ 'none', 'zlib',
>> > > -            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] }
>> > > +            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' },
>> > > +            { 'name': 'qatzip', 'if': 'CONFIG_QATZIP'} ] }
>> >
>> > In another thread adding support to another Intel accelerator (IAA) we
>> > decided that it was better to select the offloading as an accelerator
>> > method to multifd zlib rather than as an entirely new compression
>> > format. Take a look at that discussion:
>> > https://lore.kernel.org/r/ZTFCnqbbqlmsUkRC@redhat.com
>>
>> We had some early discussion with Intel folks (probably a different
>> team than the one with the above patchset). The understanding at the
>> time is that QAT is good at both bulk data compression and
>> decompression. IAA is good at decompression with smaller data size but
>> compression performance is not the best. In multifd, we are
>> compressing up to 128 4k pages at a time and potentially this can
>> increase by configuring the packet size, at the time we thought QAT
>> could be a better fit in the multifd live migration scenario. We would
>> like to hear more from Intel.
>>
>> From our benchmark testing, with two QAT devices, we can get deflate
>> compression throughout to around 7GB/s with ~160% CPU. That's beating
>> the current software implementation (zlib and zstd) by a lot. We are
>> still tuning the configuration in QEMU live migration now.
>>
>> >
>> > As I understand it, QAT + QATzip would be compatible with both zlib and
>> > IAA + QPL, so we'd add another accelerator method like this:
>> >
>> > https://lore.kernel.org/r/20240103112851.908082-3-yuan1.liu@intel.com
>> >
>>
>> I quickly read over the IAA patchset and I saw this:
>>
>> "However, due to some reasons, QPL is currently
>> not compatible with the existing Zlib method that Zlib compressed data
>> can be decompressed by QPl and vice versa."
>>
>> The above probably means the current zlib software implementation and
>> IAA are not compatible.
>>
>> For QAT, although, both Intel's QATzip and zlib are internally using
>> deflate, QATzip only supports deflate with a 4 byte header, deflate
>> wrapped by Gzip header and footer, or deflate wrapped by Intel® QAT
>> Gzip* extension header and footer. None of the headers can be
>> recognized by zlib software implementation is my understanding. So if
>> we want to make them compatible with zlib, the QATzip library needs to
>> support that.
>
> I took a closer look at Intel's documentation here
> https://github.com/intel/QATzip
> QATzip does have a compression format QZ_DEFLATE_RAW, which uses the
> deflate format but no header at all. This looks like the same as what
> QEMU's current zlib implementation does - using the raw deflate
> format. I can have a quick test to confirm that. Meanwhile, the
> documentation mentioned that if using the raw deflate format,
> decompression cannot be offloaded by QAT hardware. So there is a
> trade-off here if we want to avoid creating a new compression format
> in QEMU.

The default zlib behavior is to include the zlib header and trailer. To
obtain a raw deflate stream we'd need to suppress the zlib wrapper. It
might be an option to change QEMU code to make it produce a stream
compatible with QAT provided we don't currently use any feature that
needs the header.

One immediate issue I see is that without the header zlib defaults to
the 32kb window size, which stops us from changing the window size to
cope with QPL's 4kb limitation.

>
>>
>> > All that, of course, assuming we even want to support both
>> > accelerators. They're addressing the same problem after all. I wonder
>> > how we'd choose a precedence, since both seem to be present in the same
>> > processor family.
>> >
>> >
>>
>> That's an interesting question :-) I think overall performance
>> (throughput and CPU overhead) should both be considered. IAA and QAT
>> accelerators don't present on all systems. We Bytedance choose to have
>> both on our platform when purchasing from Intel.

We might need someone with the proper hardware to run a benchmark using
both accelerators if we end up deciding to supporting both. Ideally we'd
have a precedence defined so we don't need to force the user to select
one of them when both are present.

[PATCH 1/5] meson: Introduce 'qatzip' feature to the build system.
[PATCH 2/5] migration: Add compression level parameter for QATzip
[PATCH 3/5] migration: Introduce unimplemented 'qatzip' compression method
[PATCH 4/5] migration: Implement 'qatzip' methods using QAT
[PATCH 5/5] migration: Add integration test for 'qatzip' compression method