[v1] qcow2: add compression type feature

[Qemu-devel] [PATCH v1] [RFC] qcow2: add compression type feature

Posted by Denis Plotnikov 4 years, 11 months ago

The patch adds some preparation parts for incompatible compression type
feature into QCOW2 header that indicates that *all* compressed clusters
must be (de)compressed using a certain compression type.

It is implied that the compression type is set on the image creation and
can be changed only later by image conversion, thus the only compression
algorithm is used for the image.

The plan is to add support for ZSTD and then may be something more effective
in the future.

ZSTD compression algorithm consumes 3-5 times less CPU power with a
comparable compression ratio with zlib. It would be wise to use it for
data compression e.g. for backups.

The default compression is ZLIB.

Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
---
 block/qcow2.c          | 25 +++++++++++++++++++++++++
 block/qcow2.h          | 29 ++++++++++++++++++++++-------
 docs/interop/qcow2.txt | 25 ++++++++++++++++++++++++-
 qapi/block-core.json   | 14 ++++++++++++++
 4 files changed, 85 insertions(+), 8 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 3ace3b2209..bca506b80f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -74,6 +74,7 @@ typedef struct {
 #define  QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
 #define  QCOW2_EXT_MAGIC_BITMAPS 0x23852875
 #define  QCOW2_EXT_MAGIC_DATA_FILE 0x44415441
+#define  QCOW2_EXT_MAGIC_COMPRESSION_TYPE 0x434D5052
 
 static int coroutine_fn
 qcow2_co_preadv_compressed(BlockDriverState *bs,
@@ -398,6 +399,9 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
 #endif
             break;
 
+        case QCOW2_EXT_MAGIC_COMPRESSION_TYPE:
+            /* Setting compression type to BDRVQcow2State->compression_type */
+            /* from the image header is going to be here */
         case QCOW2_EXT_MAGIC_DATA_FILE:
         {
             s->image_data_file = g_malloc0(ext.len + 1);
@@ -2553,6 +2557,11 @@ int qcow2_update_header(BlockDriverState *bs)
                 .bit  = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
                 .name = "lazy refcounts",
             },
+            {
+                .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
+                .bit  = QCOW2_INCOMPAT_COMPRESSION_TYPE_BITNR,
+                .name = "compression type",
+            },
         };
 
         ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
@@ -2583,6 +2592,22 @@ int qcow2_update_header(BlockDriverState *bs)
         buflen -= ret;
     }
 
+    /* Compression type extension */
+    if (s->compression_type != 0) {
+        Qcow2CompressionTypeExt comp_header = {
+            .compression_type = cpu_to_be32(s->compression_type),
+        };
+        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_COMPRESSION_TYPE,
+                             &comp_header,
+                             cpu_to_be64(sizeof(comp_header)),
+                             buflen);
+        if (ret < 0) {
+            goto fail;
+        }
+        buf += ret;
+        buflen -= ret;
+    }
+
     /* Keep unknown header extensions */
     QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
         ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
diff --git a/block/qcow2.h b/block/qcow2.h
index fdee297f33..08468ab97d 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -198,16 +198,20 @@ enum {
 
 /* Incompatible feature bits */
 enum {
-    QCOW2_INCOMPAT_DIRTY_BITNR      = 0,
-    QCOW2_INCOMPAT_CORRUPT_BITNR    = 1,
-    QCOW2_INCOMPAT_DATA_FILE_BITNR  = 2,
-    QCOW2_INCOMPAT_DIRTY            = 1 << QCOW2_INCOMPAT_DIRTY_BITNR,
-    QCOW2_INCOMPAT_CORRUPT          = 1 << QCOW2_INCOMPAT_CORRUPT_BITNR,
-    QCOW2_INCOMPAT_DATA_FILE        = 1 << QCOW2_INCOMPAT_DATA_FILE_BITNR,
+    QCOW2_INCOMPAT_DIRTY_BITNR            = 0,
+    QCOW2_INCOMPAT_CORRUPT_BITNR          = 1,
+    QCOW2_INCOMPAT_DATA_FILE_BITNR        = 2,
+    QCOW2_INCOMPAT_COMPRESSION_TYPE_BITNR = 3,
+    QCOW2_INCOMPAT_DIRTY                  = 1 << QCOW2_INCOMPAT_DIRTY_BITNR,
+    QCOW2_INCOMPAT_CORRUPT                = 1 << QCOW2_INCOMPAT_CORRUPT_BITNR,
+    QCOW2_INCOMPAT_DATA_FILE              = 1 << QCOW2_INCOMPAT_DATA_FILE_BITNR,
+    QCOW2_INCOMPAT_COMPRESSION_TYPE       =
+        1 << QCOW2_INCOMPAT_COMPRESSION_TYPE_BITNR,
 
     QCOW2_INCOMPAT_MASK             = QCOW2_INCOMPAT_DIRTY
                                     | QCOW2_INCOMPAT_CORRUPT
-                                    | QCOW2_INCOMPAT_DATA_FILE,
+                                    | QCOW2_INCOMPAT_DATA_FILE
+                                    | QCOW2_INCOMPAT_COMPRESSION_TYPE,
 };
 
 /* Compatible feature bits */
@@ -263,6 +267,10 @@ typedef struct Qcow2BitmapHeaderExt {
     uint64_t bitmap_directory_offset;
 } QEMU_PACKED Qcow2BitmapHeaderExt;
 
+typedef struct Qcow2CompressionTypeExt {
+    uint32_t compression_type;
+} QEMU_PACKED Qcow2CompressionTypeExt;
+
 typedef struct BDRVQcow2State {
     int cluster_bits;
     int cluster_size;
@@ -350,6 +358,13 @@ typedef struct BDRVQcow2State {
     int nb_compress_threads;
 
     BdrvChild *data_file;
+    /**
+     * Compression type used for the image. Default: 0 - ZLIB
+     * The image compression type is set on image creation.
+     * The only way to change the compression type is to convert the image
+     * with the desired compression type set
+     */
+    uint32_t compression_type;
 } BDRVQcow2State;
 
 typedef struct Qcow2COWRegion {
diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
index af5711e533..2c907521af 100644
--- a/docs/interop/qcow2.txt
+++ b/docs/interop/qcow2.txt
@@ -109,7 +109,11 @@ in the description of a field.
                                 An External Data File Name header extension may
                                 be present if this bit is set.
 
-                    Bits 3-63:  Reserved (set to 0)
+                    Bit 3:      Compression type bit. If the bit is set, then the
+                                type of compression the image uses is set in the
+                                header extension
+
+                    Bits 4-63:  Reserved (set to 0)
 
          80 -  87:  compatible_features
                     Bitmask of compatible features. An implementation can
@@ -175,6 +179,7 @@ be stored. Each extension has a structure like the following:
                         0x23852875 - Bitmaps extension
                         0x0537be77 - Full disk encryption header pointer
                         0x44415441 - External data file name string
+                        0x434D5052 - Compression type extension
                         other      - Unknown header extension, can be safely
                                      ignored
 
@@ -771,3 +776,21 @@ In the image file the 'enabled' state is reflected by the 'auto' flag. If this
 flag is set, the software must consider the bitmap as 'enabled' and start
 tracking virtual disk changes to this bitmap from the first write to the
 virtual disk. If this flag is not set then the bitmap is disabled.
+
+
+== Compression type extension ==
+
+The compression type extension is an optional header extension. It stores the
+ID of the compressor which has to be used to compress/decompress disk clusters.
+The compression type is used for all disk cluster. Two clusters of the image
+couldn't be compressed with different compressors.
+
+The compression type can be set on the image creation. The only way to change
+the compression type is to convert the image explicitly.
+
+Available compression types:
+    ID    0: ZLIB (gzip)
+          1: ZSTD
+
+The default compression type is ZLIB. When ZLIB is used the compression type
+header extension is not present.
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 7ccbfff9d0..8eebcc728b 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -698,6 +698,7 @@
 { 'struct': 'BlockMeasureInfo',
   'data': {'required': 'int', 'fully-allocated': 'int'} }
 
+
 ##
 # @query-block:
 #
@@ -5257,3 +5258,16 @@
   'data' : { 'node-name': 'str',
              'iothread': 'StrOrNull',
              '*force': 'bool' } }
+
+##
+# @Qcow2CompressionType:
+#
+# Compression type used in qcow2 image file
+#
+# @zlib - gzip compressor
+# @zstd - zstd compression
+#
+# Since: 4.0
+##
+{ 'enum': 'Qcow2CompressionType',
+  'data': [ 'zlib', 'zstd' ] }
-- 
2.17.0

Re: [Qemu-devel] [PATCH v1] [RFC] qcow2: add compression type feature

Posted by Eric Blake 4 years, 11 months ago

On 5/16/19 8:48 AM, Denis Plotnikov wrote:
> The patch adds some preparation parts for incompatible compression type
> feature into QCOW2 header that indicates that *all* compressed clusters
> must be (de)compressed using a certain compression type.
> 
> It is implied that the compression type is set on the image creation and
> can be changed only later by image conversion, thus the only compression
> algorithm is used for the image.
> 
> The plan is to add support for ZSTD and then may be something more effective
> in the future.
> 
> ZSTD compression algorithm consumes 3-5 times less CPU power with a
> comparable compression ratio with zlib. It would be wise to use it for
> data compression e.g. for backups.
> 
> The default compression is ZLIB.
> 
> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
> ---

> +++ b/docs/interop/qcow2.txt
> @@ -109,7 +109,11 @@ in the description of a field.
>                                  An External Data File Name header extension may
>                                  be present if this bit is set.
>  
> -                    Bits 3-63:  Reserved (set to 0)
> +                    Bit 3:      Compression type bit. If the bit is set, then the
> +                                type of compression the image uses is set in the
> +                                header extension

I'd call out 'Compression type' header extension by name, to make it
more obvious.  Is it an error if bit 3 is set but the compression header
is not present? Is it an error if the compression header is present but
bit 3 is not set?

> +
> +                    Bits 4-63:  Reserved (set to 0)
>  
>           80 -  87:  compatible_features
>                      Bitmask of compatible features. An implementation can
> @@ -175,6 +179,7 @@ be stored. Each extension has a structure like the following:
>                          0x23852875 - Bitmaps extension
>                          0x0537be77 - Full disk encryption header pointer
>                          0x44415441 - External data file name string
> +                        0x434D5052 - Compression type extension

Our earlier magic numbers were probably created as random numbers and
contain 8-bit values, to make them less likely to appear naturally in
other parts of the file and thus less likely to be misinterpreted.  But
that's not a requirement, and I see that you followed the lead of "DATA"
and created "CMPR" for yours.  Works for me :)

>                          other      - Unknown header extension, can be safely
>                                       ignored
>  
> @@ -771,3 +776,21 @@ In the image file the 'enabled' state is reflected by the 'auto' flag. If this
>  flag is set, the software must consider the bitmap as 'enabled' and start
>  tracking virtual disk changes to this bitmap from the first write to the
>  virtual disk. If this flag is not set then the bitmap is disabled.
> +
> +
> +== Compression type extension ==
> +
> +The compression type extension is an optional header extension. It stores the

Could probably do a better job at describing when the header is optional
vs. mandatory.

> +ID of the compressor which has to be used to compress/decompress disk clusters.
> +The compression type is used for all disk cluster. Two clusters of the image
> +couldn't be compressed with different compressors.

Wording suggestion: A single compression type is applied to all
compressed disk clusters, with no way to change compression types per
cluster.

But is that a hard requirement? Since this is already an incompatible
feature extension, we could have a compression type that states that
each compressed cluster is self-describing via a 1-byte prefix (yes, it
means compression is not quite as dense, but probably not an issue).

Something like: in the image header, we have compression type 1 = zlib,
compression type 2 = zstd, etc, each of which treat all compressed
clusters as-is with no further per-cluster headers. Or, in the image
header, we have compression type 255 = per-cluster, at which point a
compressed cluster is now represented as: [1-byte prefix] [tail], where
the one-byte prefix is 1 = zlib, 2 = zstd, etc (but not 255), and then
the tail is decoded with the appropriate algorithm. In this way, it
might even be possible to encode different clusters with an optimal
algorithm per cluster, and thus create an image that requires both zlib
and zstd to be fully read.

I'm not sure if we need that much complexity, but just throwing it out
there for thought.

> +
> +The compression type can be set on the image creation. The only way to change
> +the compression type is to convert the image explicitly.
> +
> +Available compression types:
> +    ID    0: ZLIB (gzip)
> +          1: ZSTD
> +
> +The default compression type is ZLIB. When ZLIB is used the compression type
> +header extension is not present.

Here's where we have to think about back-compat. If zlib is used, and
the compression type header is present, must incompatible bit 3 be set?
Do we want to permit images that have incompatible bit 3 set and zlib
explicitly mentioned? Or are you making a hard requirement that if zlib
is chosen, incompatible bit 3 must be absent and no compression header
should be set? Or is it okay for the compression header to be present
and incompatible bit 3 clear, but only when compression type 0 is
chosen?  Let's spell out exactly what we want, probably with a goal of
minimizing the number of situations where an incompatible bit must be
set (as that makes it harder to work with images in older software).

Does the compression type really have to be chosen at image creation, or
can the decision be deferred until the time that the first compressed
cluster is written?  You could implement things to state that if
incompatible bit 3 is set but the compression header is absent, then
there must not be any compressed clusters in the image; as soon as the
first compressed cluster is written, then the compression header must
also be written (even if it explicitly calls out zlib), to make it
easier for new software to tell at a glance if the image has ever
contained compressed clusters at least once in the past.

> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 7ccbfff9d0..8eebcc728b 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -698,6 +698,7 @@
>  { 'struct': 'BlockMeasureInfo',
>    'data': {'required': 'int', 'fully-allocated': 'int'} }
>  
> +
>  ##

Why the added blank line?

>  # @query-block:
>  #
> @@ -5257,3 +5258,16 @@
>    'data' : { 'node-name': 'str',
>               'iothread': 'StrOrNull',
>               '*force': 'bool' } }
> +
> +##
> +# @Qcow2CompressionType:
> +#
> +# Compression type used in qcow2 image file
> +#
> +# @zlib - gzip compressor
> +# @zstd - zstd compression
> +#
> +# Since: 4.0

You've missed 4.0; this should be 4.1.

> +##
> +{ 'enum': 'Qcow2CompressionType',
> +  'data': [ 'zlib', 'zstd' ] }
> 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.           +1-919-301-3226
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-devel] [PATCH v1] [RFC] qcow2: add compression type feature

Posted by Denis Plotnikov 4 years, 11 months ago


On 16.05.2019 17:42, Eric Blake wrote:
> On 5/16/19 8:48 AM, Denis Plotnikov wrote:
>> The patch adds some preparation parts for incompatible compression type
>> feature into QCOW2 header that indicates that *all* compressed clusters
>> must be (de)compressed using a certain compression type.
>>
>> It is implied that the compression type is set on the image creation and
>> can be changed only later by image conversion, thus the only compression
>> algorithm is used for the image.
>>
>> The plan is to add support for ZSTD and then may be something more effective
>> in the future.
>>
>> ZSTD compression algorithm consumes 3-5 times less CPU power with a
>> comparable compression ratio with zlib. It would be wise to use it for
>> data compression e.g. for backups.
>>
>> The default compression is ZLIB.
>>
>> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
>> ---
> 
>> +++ b/docs/interop/qcow2.txt
>> @@ -109,7 +109,11 @@ in the description of a field.
>>                                   An External Data File Name header extension may
>>                                   be present if this bit is set.
>>   
>> -                    Bits 3-63:  Reserved (set to 0)
>> +                    Bit 3:      Compression type bit. If the bit is set, then the
>> +                                type of compression the image uses is set in the
>> +                                header extension
> 
> I'd call out 'Compression type' header extension by name, to make it
> more obvious.  Is it an error if bit 3 is set but the compression header
> is not present? Is it an error if the compression header is present but
> bit 3 is not set?
yes to both, the bit can't exist without the header and vise versa. This 
also implies that there is no use in the bit set and compression type = 
ZLIB. This is ensure that older qemu(s) can work with the images that 
use ZLIB without problems. Is there any drawbacks in this approach?
> 
>> +
>> +                    Bits 4-63:  Reserved (set to 0)
>>   
>>            80 -  87:  compatible_features
>>                       Bitmask of compatible features. An implementation can
>> @@ -175,6 +179,7 @@ be stored. Each extension has a structure like the following:
>>                           0x23852875 - Bitmaps extension
>>                           0x0537be77 - Full disk encryption header pointer
>>                           0x44415441 - External data file name string
>> +                        0x434D5052 - Compression type extension
> 
> Our earlier magic numbers were probably created as random numbers and
> contain 8-bit values, to make them less likely to appear naturally in
> other parts of the file and thus less likely to be misinterpreted.  But
> that's not a requirement, and I see that you followed the lead of "DATA"
> and created "CMPR" for yours.  Works for me :)
> 
>>                           other      - Unknown header extension, can be safely
>>                                        ignored
>>   
>> @@ -771,3 +776,21 @@ In the image file the 'enabled' state is reflected by the 'auto' flag. If this
>>   flag is set, the software must consider the bitmap as 'enabled' and start
>>   tracking virtual disk changes to this bitmap from the first write to the
>>   virtual disk. If this flag is not set then the bitmap is disabled.
>> +
>> +
>> +== Compression type extension ==
>> +
>> +The compression type extension is an optional header extension. It stores the
> 
> Could probably do a better job at describing when the header is optional
> vs. mandatory.
> 
>> +ID of the compressor which has to be used to compress/decompress disk clusters.
>> +The compression type is used for all disk cluster. Two clusters of the image
>> +couldn't be compressed with different compressors.
> 
> Wording suggestion: A single compression type is applied to all
> compressed disk clusters, with no way to change compression types per
> cluster.
> 
Nice one!
> But is that a hard requirement? Since this is already an incompatible
> feature extension, we could have a compression type that states that
> each compressed cluster is self-describing via a 1-byte prefix (yes, it
> means compression is not quite as dense, but probably not an issue).
> 
> Something like: in the image header, we have compression type 1 = zlib,
> compression type 2 = zstd, etc, each of which treat all compressed
> clusters as-is with no further per-cluster headers. Or, in the image
> header, we have compression type 255 = per-cluster, at which point a
> compressed cluster is now represented as: [1-byte prefix] [tail], where
> the one-byte prefix is 1 = zlib, 2 = zstd, etc (but not 255), and then
> the tail is decoded with the appropriate algorithm. In this way, it
> might even be possible to encode different clusters with an optimal
> algorithm per cluster, and thus create an image that requires both zlib
> and zstd to be fully read.
> 
> I'm not sure if we need that much complexity, but just throwing it out
> there for thought.
Yes, I thought about per-cluster compression type as well but I have a 
few concerns about that.
1. So far can't come up with approach to defining the best algorithm to 
the specific chunk of data in advance before actually compressing it 
with different compressors and comparing the sizes afterwards 
(compression speed is also important).
May be it's better to give users ability to decide on their own i.e. to 
choose a policy on disk creation:
the fastest compression, minimum image on disk size or balanced.

2. The image still should be converted to use in older qemu-s. Can't get 
rid of the conversion
> 
>> +
>> +The compression type can be set on the image creation. The only way to change
>> +the compression type is to convert the image explicitly.
>> +
>> +Available compression types:
>> +    ID    0: ZLIB (gzip)
>> +          1: ZSTD
>> +
>> +The default compression type is ZLIB. When ZLIB is used the compression type
>> +header extension is not present.
> 
> Here's where we have to think about back-compat. If zlib is used, and
> the compression type header is present, must incompatible bit 3 be set?
> Do we want to permit images that have incompatible bit 3 set and zlib
> explicitly mentioned? 
No
> Or are you making a hard requirement that if zlib
> is chosen, incompatible bit 3 must be absent and no compression header
> should be set?
Yes
> Or is it okay for the compression header to be present
> and incompatible bit 3 clear, but only when compression type 0 is
> chosen?
No
> Let's spell out exactly what we want, probably with a goal of
> minimizing the number of situations where an incompatible bit must be
> set (as that makes it harder to work with images in older software).
> 
Ok
> Does the compression type really have to be chosen at image creation, or
> can the decision be deferred until the time that the first compressed
> cluster is written?  
Yes, it could be that way (even better). If the compression type is 
changed in run-time and it's no ZLIB the extension header is written and 
the incompatible bit is set...
> You could implement things to state that if
> incompatible bit 3 is set but the compression header is absent, then
> there must not be any compressed clusters in the image; as soon as the
> first compressed cluster is written, then the compression header must
> also be written (even if it explicitly calls out zlib), to make it
> easier for new software to tell at a glance if the image has ever
> contained compressed clusters at least once in the past.
... yeah, when the first cluster is written both the bit and the header 
is written if not ZLIB but need a flag whether we have at least one 
cluster compressed
> 
>> diff --git a/qapi/block-core.json b/qapi/block-core.json
>> index 7ccbfff9d0..8eebcc728b 100644
>> --- a/qapi/block-core.json
>> +++ b/qapi/block-core.json
>> @@ -698,6 +698,7 @@
>>   { 'struct': 'BlockMeasureInfo',
>>     'data': {'required': 'int', 'fully-allocated': 'int'} }
>>   
>> +
>>   ##
> 
> Why the added blank line?
> 
>>   # @query-block:
>>   #
>> @@ -5257,3 +5258,16 @@
>>     'data' : { 'node-name': 'str',
>>                'iothread': 'StrOrNull',
>>                '*force': 'bool' } }
>> +
>> +##
>> +# @Qcow2CompressionType:
>> +#
>> +# Compression type used in qcow2 image file
>> +#
>> +# @zlib - gzip compressor
>> +# @zstd - zstd compression
>> +#
>> +# Since: 4.0
> 
> You've missed 4.0; this should be 4.1.
Will go with 4.1

Thanks!
Denis
> 
>> +##
>> +{ 'enum': 'Qcow2CompressionType',
>> +  'data': [ 'zlib', 'zstd' ] }
>>
> 

-- 
Best,
Denis

Re: [Qemu-devel] [Qemu-block] [PATCH v1] [RFC] qcow2: add compression type feature

Posted by John Snow 4 years, 11 months ago


On 5/16/19 9:48 AM, Denis Plotnikov wrote:
> The patch adds some preparation parts for incompatible compression type
> feature into QCOW2 header that indicates that *all* compressed clusters
> must be (de)compressed using a certain compression type.
> 
> It is implied that the compression type is set on the image creation and
> can be changed only later by image conversion, thus the only compression
> algorithm is used for the image.
> 
> The plan is to add support for ZSTD and then may be something more effective
> in the future.
> 
> ZSTD compression algorithm consumes 3-5 times less CPU power with a
> comparable compression ratio with zlib. It would be wise to use it for
> data compression e.g. for backups.
> 
> The default compression is ZLIB.
> 

(Merely a curiosity:)

Since this is coming from Virtuozzo, I trust that you've had good luck
with ZSTD already in R&D. What do the compression ratios look like in
practice? It's touted as "comparable to zlib" which certainly does sound
quite nice for streaming compression of backups.

I suppose in the worst case it ought to be faster than bandwidth speeds,
so no harm in utilizing it.

> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>

Re: [Qemu-devel] [Qemu-block] [PATCH v1] [RFC] qcow2: add compression type feature

Posted by Denis Plotnikov 4 years, 11 months ago


On 17.05.2019 2:25, John Snow wrote:
> 
> 
> On 5/16/19 9:48 AM, Denis Plotnikov wrote:
>> The patch adds some preparation parts for incompatible compression type
>> feature into QCOW2 header that indicates that *all* compressed clusters
>> must be (de)compressed using a certain compression type.
>>
>> It is implied that the compression type is set on the image creation and
>> can be changed only later by image conversion, thus the only compression
>> algorithm is used for the image.
>>
>> The plan is to add support for ZSTD and then may be something more effective
>> in the future.
>>
>> ZSTD compression algorithm consumes 3-5 times less CPU power with a
>> comparable compression ratio with zlib. It would be wise to use it for
>> data compression e.g. for backups.
>>
>> The default compression is ZLIB.
>>
> 
> (Merely a curiosity:)
> 
> Since this is coming from Virtuozzo, I trust that you've had good luck
> with ZSTD already in R&D. What do the compression ratios look like in
> practice? It's touted as "comparable to zlib" which certainly does sound
> quite nice for streaming compression of backups.
> 
> I suppose in the worst case it ought to be faster than bandwidth speeds,
> so no harm in utilizing it.
Yes, we did some research on it. Actually, there is a patch in the 
mailing list (please, take a look: ) which applies ZSTD compression in 
the migration.
Here are the numbers from there:

host: i7-4790 8xCPU @ 3.60GHz, 16G RAM
migration to the same host
VM: 2xVCPU, 8G RAM total
5G RAM used, memory populated with postgreqsl data
produced by pgbench performance benchmark

Threads: 1 compress – 1 decompress

zstd provides slightly less compression ratio with almost the same
CPU usage but copes with RAM compression roughly 2 times faster

compression type              zlib       |      zstd
---------------------------------------------------------
compression level          1       5     |   1       5
compression ratio          6.92    7.05  |   6.69    6.89
cpu idle, %                82      83    |   86      80
time, sec                  49      71    |   26      31
time diff to zlib, sec                   |  -25     -41
time diff to zlib, %                     |  -47%    -56%

I general ZSTD provides better compression ratio on big dependent chunks 
of data. Than bigger the data size then better ZSTD it compresses.

Since, in our cases (migration: 4K RAM block, qcow2: cluster) we
have to compress independent chunks the ability of ZSTD to find better 
compression solution is restricted.

Although, the compression ratio is pretty much the same in both cases 
the experiments shown that ZSTD does the compression much faster (x2).

Which is obviously good for us.

Here is other comparison of ZLIB vs ZSTD without any application in qemu 
which shows that ZSTD works faster

zlib = max compression level
zstd = compression level 5 (max 22)

cycles consumed for compression:
						
	4k txt	4K ram	4K bin	64K txt	 64K ram  64K bin
zlib	  400K	  344K	  1.3M	    13M       5M    92.3M
zstd 	  350K	  235K	  312K	   3.3M     1.3M     2.4M
Diff,%	   -12	   -32	   -77	    -75      -73      -97
						
size after compression in bytes:

	4k txt	4K ram	4K bin	64K txt	 64K ram  64K bin
zlib	  1542	  3599	  1403	17386	  64735	    20609
zstd 	  1568	  3540	  1250	17656	  65546	    20023
Diff,%	    -2	     2	    11	   -2	     -1	        3

Data sources for test files [we took 4K and 64K chunks from there]:

txt = linux/Documentation/memory-barriers.txt
ram = /boot/initramfs-4.20.0-rc6+.img
bin = x86_64-softmmu/qemu-system-x86_64

Increasing of ZSTD compression ratio didn't give any significant 
improvements of the out size but slowed down the pace of ZSTD

Denis

> 
>> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>

-- 
Best,
Denis

Re: [Qemu-devel] [Qemu-block] [PATCH v1] [RFC] qcow2: add compression type feature

Posted by John Snow 4 years, 11 months ago


On 5/17/19 4:05 AM, Denis Plotnikov wrote:
> 
> 
> On 17.05.2019 2:25, John Snow wrote:
>>
>>
>> On 5/16/19 9:48 AM, Denis Plotnikov wrote:
>>> The patch adds some preparation parts for incompatible compression type
>>> feature into QCOW2 header that indicates that *all* compressed clusters
>>> must be (de)compressed using a certain compression type.
>>>
>>> It is implied that the compression type is set on the image creation and
>>> can be changed only later by image conversion, thus the only compression
>>> algorithm is used for the image.
>>>
>>> The plan is to add support for ZSTD and then may be something more effective
>>> in the future.
>>>
>>> ZSTD compression algorithm consumes 3-5 times less CPU power with a
>>> comparable compression ratio with zlib. It would be wise to use it for
>>> data compression e.g. for backups.
>>>
>>> The default compression is ZLIB.
>>>
>>
>> (Merely a curiosity:)
>>
>> Since this is coming from Virtuozzo, I trust that you've had good luck
>> with ZSTD already in R&D. What do the compression ratios look like in
>> practice? It's touted as "comparable to zlib" which certainly does sound
>> quite nice for streaming compression of backups.
>>
>> I suppose in the worst case it ought to be faster than bandwidth speeds,
>> so no harm in utilizing it.
> Yes, we did some research on it. Actually, there is a patch in the 
> mailing list (please, take a look: ) which applies ZSTD compression in 
> the migration.
> Here are the numbers from there:
> 
> host: i7-4790 8xCPU @ 3.60GHz, 16G RAM
> migration to the same host
> VM: 2xVCPU, 8G RAM total
> 5G RAM used, memory populated with postgreqsl data
> produced by pgbench performance benchmark
> 
> Threads: 1 compress – 1 decompress
> 
> zstd provides slightly less compression ratio with almost the same
> CPU usage but copes with RAM compression roughly 2 times faster
> 
> compression type              zlib       |      zstd
> ---------------------------------------------------------
> compression level          1       5     |   1       5
> compression ratio          6.92    7.05  |   6.69    6.89
> cpu idle, %                82      83    |   86      80
> time, sec                  49      71    |   26      31
> time diff to zlib, sec                   |  -25     -41
> time diff to zlib, %                     |  -47%    -56%
> 
> I general ZSTD provides better compression ratio on big dependent chunks 
> of data. Than bigger the data size then better ZSTD it compresses.
> 
> Since, in our cases (migration: 4K RAM block, qcow2: cluster) we
> have to compress independent chunks the ability of ZSTD to find better 
> compression solution is restricted.
> 
> Although, the compression ratio is pretty much the same in both cases 
> the experiments shown that ZSTD does the compression much faster (x2).
> 
> Which is obviously good for us.
> 
> Here is other comparison of ZLIB vs ZSTD without any application in qemu 
> which shows that ZSTD works faster
> 
> zlib = max compression level
> zstd = compression level 5 (max 22)
> 
> cycles consumed for compression:
> 						
> 	4k txt	4K ram	4K bin	64K txt	 64K ram  64K bin
> zlib	  400K	  344K	  1.3M	    13M       5M    92.3M
> zstd 	  350K	  235K	  312K	   3.3M     1.3M     2.4M
> Diff,%	   -12	   -32	   -77	    -75      -73      -97
> 

Wow, the 4k bin one is drastic. The text is even more prominent. wow!
						
> size after compression in bytes:
> 
> 	4k txt	4K ram	4K bin	64K txt	 64K ram  64K bin
> zlib	  1542	  3599	  1403	17386	  64735	    20609
> zstd 	  1568	  3540	  1250	17656	  65546	    20023
> Diff,%	    -2	     2	    11	   -2	     -1	        3
> 

Yeah, that's pretty close. Seems like absolutely a great tradeoff for
the speed gain. If the little bit of difference matters to you, you can
always do some more heavy-duty compression of your choice in another
layer of the storage stack.

> Data sources for test files [we took 4K and 64K chunks from there]:
> 
> txt = linux/Documentation/memory-barriers.txt
> ram = /boot/initramfs-4.20.0-rc6+.img
> bin = x86_64-softmmu/qemu-system-x86_64
> 
> Increasing of ZSTD compression ratio didn't give any significant 
> improvements of the out size but slowed down the pace of ZSTD
> 
> Denis
> 
>>
>>> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
> 

Very useful data, thanks for sharing! Seems like this would indeed be a
great thing to have for qcow2.