block/qcow2.c | 25 +++++++++++++++++++++++++ block/qcow2.h | 29 ++++++++++++++++++++++------- docs/interop/qcow2.txt | 25 ++++++++++++++++++++++++- qapi/block-core.json | 14 ++++++++++++++ 4 files changed, 85 insertions(+), 8 deletions(-)
The patch adds some preparation parts for incompatible compression type
feature into QCOW2 header that indicates that *all* compressed clusters
must be (de)compressed using a certain compression type.
It is implied that the compression type is set on the image creation and
can be changed only later by image conversion, thus the only compression
algorithm is used for the image.
The plan is to add support for ZSTD and then may be something more effective
in the future.
ZSTD compression algorithm consumes 3-5 times less CPU power with a
comparable compression ratio with zlib. It would be wise to use it for
data compression e.g. for backups.
The default compression is ZLIB.
Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
---
block/qcow2.c | 25 +++++++++++++++++++++++++
block/qcow2.h | 29 ++++++++++++++++++++++-------
docs/interop/qcow2.txt | 25 ++++++++++++++++++++++++-
qapi/block-core.json | 14 ++++++++++++++
4 files changed, 85 insertions(+), 8 deletions(-)
diff --git a/block/qcow2.c b/block/qcow2.c
index 3ace3b2209..bca506b80f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -74,6 +74,7 @@ typedef struct {
#define QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
#define QCOW2_EXT_MAGIC_BITMAPS 0x23852875
#define QCOW2_EXT_MAGIC_DATA_FILE 0x44415441
+#define QCOW2_EXT_MAGIC_COMPRESSION_TYPE 0x434D5052
static int coroutine_fn
qcow2_co_preadv_compressed(BlockDriverState *bs,
@@ -398,6 +399,9 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
#endif
break;
+ case QCOW2_EXT_MAGIC_COMPRESSION_TYPE:
+ /* Setting compression type to BDRVQcow2State->compression_type */
+ /* from the image header is going to be here */
case QCOW2_EXT_MAGIC_DATA_FILE:
{
s->image_data_file = g_malloc0(ext.len + 1);
@@ -2553,6 +2557,11 @@ int qcow2_update_header(BlockDriverState *bs)
.bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
.name = "lazy refcounts",
},
+ {
+ .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
+ .bit = QCOW2_INCOMPAT_COMPRESSION_TYPE_BITNR,
+ .name = "compression type",
+ },
};
ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
@@ -2583,6 +2592,22 @@ int qcow2_update_header(BlockDriverState *bs)
buflen -= ret;
}
+ /* Compression type extension */
+ if (s->compression_type != 0) {
+ Qcow2CompressionTypeExt comp_header = {
+ .compression_type = cpu_to_be32(s->compression_type),
+ };
+ ret = header_ext_add(buf, QCOW2_EXT_MAGIC_COMPRESSION_TYPE,
+ &comp_header,
+ cpu_to_be64(sizeof(comp_header)),
+ buflen);
+ if (ret < 0) {
+ goto fail;
+ }
+ buf += ret;
+ buflen -= ret;
+ }
+
/* Keep unknown header extensions */
QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
diff --git a/block/qcow2.h b/block/qcow2.h
index fdee297f33..08468ab97d 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -198,16 +198,20 @@ enum {
/* Incompatible feature bits */
enum {
- QCOW2_INCOMPAT_DIRTY_BITNR = 0,
- QCOW2_INCOMPAT_CORRUPT_BITNR = 1,
- QCOW2_INCOMPAT_DATA_FILE_BITNR = 2,
- QCOW2_INCOMPAT_DIRTY = 1 << QCOW2_INCOMPAT_DIRTY_BITNR,
- QCOW2_INCOMPAT_CORRUPT = 1 << QCOW2_INCOMPAT_CORRUPT_BITNR,
- QCOW2_INCOMPAT_DATA_FILE = 1 << QCOW2_INCOMPAT_DATA_FILE_BITNR,
+ QCOW2_INCOMPAT_DIRTY_BITNR = 0,
+ QCOW2_INCOMPAT_CORRUPT_BITNR = 1,
+ QCOW2_INCOMPAT_DATA_FILE_BITNR = 2,
+ QCOW2_INCOMPAT_COMPRESSION_TYPE_BITNR = 3,
+ QCOW2_INCOMPAT_DIRTY = 1 << QCOW2_INCOMPAT_DIRTY_BITNR,
+ QCOW2_INCOMPAT_CORRUPT = 1 << QCOW2_INCOMPAT_CORRUPT_BITNR,
+ QCOW2_INCOMPAT_DATA_FILE = 1 << QCOW2_INCOMPAT_DATA_FILE_BITNR,
+ QCOW2_INCOMPAT_COMPRESSION_TYPE =
+ 1 << QCOW2_INCOMPAT_COMPRESSION_TYPE_BITNR,
QCOW2_INCOMPAT_MASK = QCOW2_INCOMPAT_DIRTY
| QCOW2_INCOMPAT_CORRUPT
- | QCOW2_INCOMPAT_DATA_FILE,
+ | QCOW2_INCOMPAT_DATA_FILE
+ | QCOW2_INCOMPAT_COMPRESSION_TYPE,
};
/* Compatible feature bits */
@@ -263,6 +267,10 @@ typedef struct Qcow2BitmapHeaderExt {
uint64_t bitmap_directory_offset;
} QEMU_PACKED Qcow2BitmapHeaderExt;
+typedef struct Qcow2CompressionTypeExt {
+ uint32_t compression_type;
+} QEMU_PACKED Qcow2CompressionTypeExt;
+
typedef struct BDRVQcow2State {
int cluster_bits;
int cluster_size;
@@ -350,6 +358,13 @@ typedef struct BDRVQcow2State {
int nb_compress_threads;
BdrvChild *data_file;
+ /**
+ * Compression type used for the image. Default: 0 - ZLIB
+ * The image compression type is set on image creation.
+ * The only way to change the compression type is to convert the image
+ * with the desired compression type set
+ */
+ uint32_t compression_type;
} BDRVQcow2State;
typedef struct Qcow2COWRegion {
diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
index af5711e533..2c907521af 100644
--- a/docs/interop/qcow2.txt
+++ b/docs/interop/qcow2.txt
@@ -109,7 +109,11 @@ in the description of a field.
An External Data File Name header extension may
be present if this bit is set.
- Bits 3-63: Reserved (set to 0)
+ Bit 3: Compression type bit. If the bit is set, then the
+ type of compression the image uses is set in the
+ header extension
+
+ Bits 4-63: Reserved (set to 0)
80 - 87: compatible_features
Bitmask of compatible features. An implementation can
@@ -175,6 +179,7 @@ be stored. Each extension has a structure like the following:
0x23852875 - Bitmaps extension
0x0537be77 - Full disk encryption header pointer
0x44415441 - External data file name string
+ 0x434D5052 - Compression type extension
other - Unknown header extension, can be safely
ignored
@@ -771,3 +776,21 @@ In the image file the 'enabled' state is reflected by the 'auto' flag. If this
flag is set, the software must consider the bitmap as 'enabled' and start
tracking virtual disk changes to this bitmap from the first write to the
virtual disk. If this flag is not set then the bitmap is disabled.
+
+
+== Compression type extension ==
+
+The compression type extension is an optional header extension. It stores the
+ID of the compressor which has to be used to compress/decompress disk clusters.
+The compression type is used for all disk cluster. Two clusters of the image
+couldn't be compressed with different compressors.
+
+The compression type can be set on the image creation. The only way to change
+the compression type is to convert the image explicitly.
+
+Available compression types:
+ ID 0: ZLIB (gzip)
+ 1: ZSTD
+
+The default compression type is ZLIB. When ZLIB is used the compression type
+header extension is not present.
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 7ccbfff9d0..8eebcc728b 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -698,6 +698,7 @@
{ 'struct': 'BlockMeasureInfo',
'data': {'required': 'int', 'fully-allocated': 'int'} }
+
##
# @query-block:
#
@@ -5257,3 +5258,16 @@
'data' : { 'node-name': 'str',
'iothread': 'StrOrNull',
'*force': 'bool' } }
+
+##
+# @Qcow2CompressionType:
+#
+# Compression type used in qcow2 image file
+#
+# @zlib - gzip compressor
+# @zstd - zstd compression
+#
+# Since: 4.0
+##
+{ 'enum': 'Qcow2CompressionType',
+ 'data': [ 'zlib', 'zstd' ] }
--
2.17.0
On 5/16/19 8:48 AM, Denis Plotnikov wrote: > The patch adds some preparation parts for incompatible compression type > feature into QCOW2 header that indicates that *all* compressed clusters > must be (de)compressed using a certain compression type. > > It is implied that the compression type is set on the image creation and > can be changed only later by image conversion, thus the only compression > algorithm is used for the image. > > The plan is to add support for ZSTD and then may be something more effective > in the future. > > ZSTD compression algorithm consumes 3-5 times less CPU power with a > comparable compression ratio with zlib. It would be wise to use it for > data compression e.g. for backups. > > The default compression is ZLIB. > > Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com> > --- > +++ b/docs/interop/qcow2.txt > @@ -109,7 +109,11 @@ in the description of a field. > An External Data File Name header extension may > be present if this bit is set. > > - Bits 3-63: Reserved (set to 0) > + Bit 3: Compression type bit. If the bit is set, then the > + type of compression the image uses is set in the > + header extension I'd call out 'Compression type' header extension by name, to make it more obvious. Is it an error if bit 3 is set but the compression header is not present? Is it an error if the compression header is present but bit 3 is not set? > + > + Bits 4-63: Reserved (set to 0) > > 80 - 87: compatible_features > Bitmask of compatible features. An implementation can > @@ -175,6 +179,7 @@ be stored. Each extension has a structure like the following: > 0x23852875 - Bitmaps extension > 0x0537be77 - Full disk encryption header pointer > 0x44415441 - External data file name string > + 0x434D5052 - Compression type extension Our earlier magic numbers were probably created as random numbers and contain 8-bit values, to make them less likely to appear naturally in other parts of the file and thus less likely to be misinterpreted. But that's not a requirement, and I see that you followed the lead of "DATA" and created "CMPR" for yours. Works for me :) > other - Unknown header extension, can be safely > ignored > > @@ -771,3 +776,21 @@ In the image file the 'enabled' state is reflected by the 'auto' flag. If this > flag is set, the software must consider the bitmap as 'enabled' and start > tracking virtual disk changes to this bitmap from the first write to the > virtual disk. If this flag is not set then the bitmap is disabled. > + > + > +== Compression type extension == > + > +The compression type extension is an optional header extension. It stores the Could probably do a better job at describing when the header is optional vs. mandatory. > +ID of the compressor which has to be used to compress/decompress disk clusters. > +The compression type is used for all disk cluster. Two clusters of the image > +couldn't be compressed with different compressors. Wording suggestion: A single compression type is applied to all compressed disk clusters, with no way to change compression types per cluster. But is that a hard requirement? Since this is already an incompatible feature extension, we could have a compression type that states that each compressed cluster is self-describing via a 1-byte prefix (yes, it means compression is not quite as dense, but probably not an issue). Something like: in the image header, we have compression type 1 = zlib, compression type 2 = zstd, etc, each of which treat all compressed clusters as-is with no further per-cluster headers. Or, in the image header, we have compression type 255 = per-cluster, at which point a compressed cluster is now represented as: [1-byte prefix] [tail], where the one-byte prefix is 1 = zlib, 2 = zstd, etc (but not 255), and then the tail is decoded with the appropriate algorithm. In this way, it might even be possible to encode different clusters with an optimal algorithm per cluster, and thus create an image that requires both zlib and zstd to be fully read. I'm not sure if we need that much complexity, but just throwing it out there for thought. > + > +The compression type can be set on the image creation. The only way to change > +the compression type is to convert the image explicitly. > + > +Available compression types: > + ID 0: ZLIB (gzip) > + 1: ZSTD > + > +The default compression type is ZLIB. When ZLIB is used the compression type > +header extension is not present. Here's where we have to think about back-compat. If zlib is used, and the compression type header is present, must incompatible bit 3 be set? Do we want to permit images that have incompatible bit 3 set and zlib explicitly mentioned? Or are you making a hard requirement that if zlib is chosen, incompatible bit 3 must be absent and no compression header should be set? Or is it okay for the compression header to be present and incompatible bit 3 clear, but only when compression type 0 is chosen? Let's spell out exactly what we want, probably with a goal of minimizing the number of situations where an incompatible bit must be set (as that makes it harder to work with images in older software). Does the compression type really have to be chosen at image creation, or can the decision be deferred until the time that the first compressed cluster is written? You could implement things to state that if incompatible bit 3 is set but the compression header is absent, then there must not be any compressed clusters in the image; as soon as the first compressed cluster is written, then the compression header must also be written (even if it explicitly calls out zlib), to make it easier for new software to tell at a glance if the image has ever contained compressed clusters at least once in the past. > diff --git a/qapi/block-core.json b/qapi/block-core.json > index 7ccbfff9d0..8eebcc728b 100644 > --- a/qapi/block-core.json > +++ b/qapi/block-core.json > @@ -698,6 +698,7 @@ > { 'struct': 'BlockMeasureInfo', > 'data': {'required': 'int', 'fully-allocated': 'int'} } > > + > ## Why the added blank line? > # @query-block: > # > @@ -5257,3 +5258,16 @@ > 'data' : { 'node-name': 'str', > 'iothread': 'StrOrNull', > '*force': 'bool' } } > + > +## > +# @Qcow2CompressionType: > +# > +# Compression type used in qcow2 image file > +# > +# @zlib - gzip compressor > +# @zstd - zstd compression > +# > +# Since: 4.0 You've missed 4.0; this should be 4.1. > +## > +{ 'enum': 'Qcow2CompressionType', > + 'data': [ 'zlib', 'zstd' ] } > -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
On 16.05.2019 17:42, Eric Blake wrote: > On 5/16/19 8:48 AM, Denis Plotnikov wrote: >> The patch adds some preparation parts for incompatible compression type >> feature into QCOW2 header that indicates that *all* compressed clusters >> must be (de)compressed using a certain compression type. >> >> It is implied that the compression type is set on the image creation and >> can be changed only later by image conversion, thus the only compression >> algorithm is used for the image. >> >> The plan is to add support for ZSTD and then may be something more effective >> in the future. >> >> ZSTD compression algorithm consumes 3-5 times less CPU power with a >> comparable compression ratio with zlib. It would be wise to use it for >> data compression e.g. for backups. >> >> The default compression is ZLIB. >> >> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com> >> --- > >> +++ b/docs/interop/qcow2.txt >> @@ -109,7 +109,11 @@ in the description of a field. >> An External Data File Name header extension may >> be present if this bit is set. >> >> - Bits 3-63: Reserved (set to 0) >> + Bit 3: Compression type bit. If the bit is set, then the >> + type of compression the image uses is set in the >> + header extension > > I'd call out 'Compression type' header extension by name, to make it > more obvious. Is it an error if bit 3 is set but the compression header > is not present? Is it an error if the compression header is present but > bit 3 is not set? yes to both, the bit can't exist without the header and vise versa. This also implies that there is no use in the bit set and compression type = ZLIB. This is ensure that older qemu(s) can work with the images that use ZLIB without problems. Is there any drawbacks in this approach? > >> + >> + Bits 4-63: Reserved (set to 0) >> >> 80 - 87: compatible_features >> Bitmask of compatible features. An implementation can >> @@ -175,6 +179,7 @@ be stored. Each extension has a structure like the following: >> 0x23852875 - Bitmaps extension >> 0x0537be77 - Full disk encryption header pointer >> 0x44415441 - External data file name string >> + 0x434D5052 - Compression type extension > > Our earlier magic numbers were probably created as random numbers and > contain 8-bit values, to make them less likely to appear naturally in > other parts of the file and thus less likely to be misinterpreted. But > that's not a requirement, and I see that you followed the lead of "DATA" > and created "CMPR" for yours. Works for me :) > >> other - Unknown header extension, can be safely >> ignored >> >> @@ -771,3 +776,21 @@ In the image file the 'enabled' state is reflected by the 'auto' flag. If this >> flag is set, the software must consider the bitmap as 'enabled' and start >> tracking virtual disk changes to this bitmap from the first write to the >> virtual disk. If this flag is not set then the bitmap is disabled. >> + >> + >> +== Compression type extension == >> + >> +The compression type extension is an optional header extension. It stores the > > Could probably do a better job at describing when the header is optional > vs. mandatory. > >> +ID of the compressor which has to be used to compress/decompress disk clusters. >> +The compression type is used for all disk cluster. Two clusters of the image >> +couldn't be compressed with different compressors. > > Wording suggestion: A single compression type is applied to all > compressed disk clusters, with no way to change compression types per > cluster. > Nice one! > But is that a hard requirement? Since this is already an incompatible > feature extension, we could have a compression type that states that > each compressed cluster is self-describing via a 1-byte prefix (yes, it > means compression is not quite as dense, but probably not an issue). > > Something like: in the image header, we have compression type 1 = zlib, > compression type 2 = zstd, etc, each of which treat all compressed > clusters as-is with no further per-cluster headers. Or, in the image > header, we have compression type 255 = per-cluster, at which point a > compressed cluster is now represented as: [1-byte prefix] [tail], where > the one-byte prefix is 1 = zlib, 2 = zstd, etc (but not 255), and then > the tail is decoded with the appropriate algorithm. In this way, it > might even be possible to encode different clusters with an optimal > algorithm per cluster, and thus create an image that requires both zlib > and zstd to be fully read. > > I'm not sure if we need that much complexity, but just throwing it out > there for thought. Yes, I thought about per-cluster compression type as well but I have a few concerns about that. 1. So far can't come up with approach to defining the best algorithm to the specific chunk of data in advance before actually compressing it with different compressors and comparing the sizes afterwards (compression speed is also important). May be it's better to give users ability to decide on their own i.e. to choose a policy on disk creation: the fastest compression, minimum image on disk size or balanced. 2. The image still should be converted to use in older qemu-s. Can't get rid of the conversion > >> + >> +The compression type can be set on the image creation. The only way to change >> +the compression type is to convert the image explicitly. >> + >> +Available compression types: >> + ID 0: ZLIB (gzip) >> + 1: ZSTD >> + >> +The default compression type is ZLIB. When ZLIB is used the compression type >> +header extension is not present. > > Here's where we have to think about back-compat. If zlib is used, and > the compression type header is present, must incompatible bit 3 be set? > Do we want to permit images that have incompatible bit 3 set and zlib > explicitly mentioned? No > Or are you making a hard requirement that if zlib > is chosen, incompatible bit 3 must be absent and no compression header > should be set? Yes > Or is it okay for the compression header to be present > and incompatible bit 3 clear, but only when compression type 0 is > chosen? No > Let's spell out exactly what we want, probably with a goal of > minimizing the number of situations where an incompatible bit must be > set (as that makes it harder to work with images in older software). > Ok > Does the compression type really have to be chosen at image creation, or > can the decision be deferred until the time that the first compressed > cluster is written? Yes, it could be that way (even better). If the compression type is changed in run-time and it's no ZLIB the extension header is written and the incompatible bit is set... > You could implement things to state that if > incompatible bit 3 is set but the compression header is absent, then > there must not be any compressed clusters in the image; as soon as the > first compressed cluster is written, then the compression header must > also be written (even if it explicitly calls out zlib), to make it > easier for new software to tell at a glance if the image has ever > contained compressed clusters at least once in the past. ... yeah, when the first cluster is written both the bit and the header is written if not ZLIB but need a flag whether we have at least one cluster compressed > >> diff --git a/qapi/block-core.json b/qapi/block-core.json >> index 7ccbfff9d0..8eebcc728b 100644 >> --- a/qapi/block-core.json >> +++ b/qapi/block-core.json >> @@ -698,6 +698,7 @@ >> { 'struct': 'BlockMeasureInfo', >> 'data': {'required': 'int', 'fully-allocated': 'int'} } >> >> + >> ## > > Why the added blank line? > >> # @query-block: >> # >> @@ -5257,3 +5258,16 @@ >> 'data' : { 'node-name': 'str', >> 'iothread': 'StrOrNull', >> '*force': 'bool' } } >> + >> +## >> +# @Qcow2CompressionType: >> +# >> +# Compression type used in qcow2 image file >> +# >> +# @zlib - gzip compressor >> +# @zstd - zstd compression >> +# >> +# Since: 4.0 > > You've missed 4.0; this should be 4.1. Will go with 4.1 Thanks! Denis > >> +## >> +{ 'enum': 'Qcow2CompressionType', >> + 'data': [ 'zlib', 'zstd' ] } >> > -- Best, Denis
On 5/16/19 9:48 AM, Denis Plotnikov wrote: > The patch adds some preparation parts for incompatible compression type > feature into QCOW2 header that indicates that *all* compressed clusters > must be (de)compressed using a certain compression type. > > It is implied that the compression type is set on the image creation and > can be changed only later by image conversion, thus the only compression > algorithm is used for the image. > > The plan is to add support for ZSTD and then may be something more effective > in the future. > > ZSTD compression algorithm consumes 3-5 times less CPU power with a > comparable compression ratio with zlib. It would be wise to use it for > data compression e.g. for backups. > > The default compression is ZLIB. > (Merely a curiosity:) Since this is coming from Virtuozzo, I trust that you've had good luck with ZSTD already in R&D. What do the compression ratios look like in practice? It's touted as "comparable to zlib" which certainly does sound quite nice for streaming compression of backups. I suppose in the worst case it ought to be faster than bandwidth speeds, so no harm in utilizing it. > Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
On 17.05.2019 2:25, John Snow wrote: > > > On 5/16/19 9:48 AM, Denis Plotnikov wrote: >> The patch adds some preparation parts for incompatible compression type >> feature into QCOW2 header that indicates that *all* compressed clusters >> must be (de)compressed using a certain compression type. >> >> It is implied that the compression type is set on the image creation and >> can be changed only later by image conversion, thus the only compression >> algorithm is used for the image. >> >> The plan is to add support for ZSTD and then may be something more effective >> in the future. >> >> ZSTD compression algorithm consumes 3-5 times less CPU power with a >> comparable compression ratio with zlib. It would be wise to use it for >> data compression e.g. for backups. >> >> The default compression is ZLIB. >> > > (Merely a curiosity:) > > Since this is coming from Virtuozzo, I trust that you've had good luck > with ZSTD already in R&D. What do the compression ratios look like in > practice? It's touted as "comparable to zlib" which certainly does sound > quite nice for streaming compression of backups. > > I suppose in the worst case it ought to be faster than bandwidth speeds, > so no harm in utilizing it. Yes, we did some research on it. Actually, there is a patch in the mailing list (please, take a look: ) which applies ZSTD compression in the migration. Here are the numbers from there: host: i7-4790 8xCPU @ 3.60GHz, 16G RAM migration to the same host VM: 2xVCPU, 8G RAM total 5G RAM used, memory populated with postgreqsl data produced by pgbench performance benchmark Threads: 1 compress – 1 decompress zstd provides slightly less compression ratio with almost the same CPU usage but copes with RAM compression roughly 2 times faster compression type zlib | zstd --------------------------------------------------------- compression level 1 5 | 1 5 compression ratio 6.92 7.05 | 6.69 6.89 cpu idle, % 82 83 | 86 80 time, sec 49 71 | 26 31 time diff to zlib, sec | -25 -41 time diff to zlib, % | -47% -56% I general ZSTD provides better compression ratio on big dependent chunks of data. Than bigger the data size then better ZSTD it compresses. Since, in our cases (migration: 4K RAM block, qcow2: cluster) we have to compress independent chunks the ability of ZSTD to find better compression solution is restricted. Although, the compression ratio is pretty much the same in both cases the experiments shown that ZSTD does the compression much faster (x2). Which is obviously good for us. Here is other comparison of ZLIB vs ZSTD without any application in qemu which shows that ZSTD works faster zlib = max compression level zstd = compression level 5 (max 22) cycles consumed for compression: 4k txt 4K ram 4K bin 64K txt 64K ram 64K bin zlib 400K 344K 1.3M 13M 5M 92.3M zstd 350K 235K 312K 3.3M 1.3M 2.4M Diff,% -12 -32 -77 -75 -73 -97 size after compression in bytes: 4k txt 4K ram 4K bin 64K txt 64K ram 64K bin zlib 1542 3599 1403 17386 64735 20609 zstd 1568 3540 1250 17656 65546 20023 Diff,% -2 2 11 -2 -1 3 Data sources for test files [we took 4K and 64K chunks from there]: txt = linux/Documentation/memory-barriers.txt ram = /boot/initramfs-4.20.0-rc6+.img bin = x86_64-softmmu/qemu-system-x86_64 Increasing of ZSTD compression ratio didn't give any significant improvements of the out size but slowed down the pace of ZSTD Denis > >> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com> -- Best, Denis
On 5/17/19 4:05 AM, Denis Plotnikov wrote: > > > On 17.05.2019 2:25, John Snow wrote: >> >> >> On 5/16/19 9:48 AM, Denis Plotnikov wrote: >>> The patch adds some preparation parts for incompatible compression type >>> feature into QCOW2 header that indicates that *all* compressed clusters >>> must be (de)compressed using a certain compression type. >>> >>> It is implied that the compression type is set on the image creation and >>> can be changed only later by image conversion, thus the only compression >>> algorithm is used for the image. >>> >>> The plan is to add support for ZSTD and then may be something more effective >>> in the future. >>> >>> ZSTD compression algorithm consumes 3-5 times less CPU power with a >>> comparable compression ratio with zlib. It would be wise to use it for >>> data compression e.g. for backups. >>> >>> The default compression is ZLIB. >>> >> >> (Merely a curiosity:) >> >> Since this is coming from Virtuozzo, I trust that you've had good luck >> with ZSTD already in R&D. What do the compression ratios look like in >> practice? It's touted as "comparable to zlib" which certainly does sound >> quite nice for streaming compression of backups. >> >> I suppose in the worst case it ought to be faster than bandwidth speeds, >> so no harm in utilizing it. > Yes, we did some research on it. Actually, there is a patch in the > mailing list (please, take a look: ) which applies ZSTD compression in > the migration. > Here are the numbers from there: > > host: i7-4790 8xCPU @ 3.60GHz, 16G RAM > migration to the same host > VM: 2xVCPU, 8G RAM total > 5G RAM used, memory populated with postgreqsl data > produced by pgbench performance benchmark > > Threads: 1 compress – 1 decompress > > zstd provides slightly less compression ratio with almost the same > CPU usage but copes with RAM compression roughly 2 times faster > > compression type zlib | zstd > --------------------------------------------------------- > compression level 1 5 | 1 5 > compression ratio 6.92 7.05 | 6.69 6.89 > cpu idle, % 82 83 | 86 80 > time, sec 49 71 | 26 31 > time diff to zlib, sec | -25 -41 > time diff to zlib, % | -47% -56% > > I general ZSTD provides better compression ratio on big dependent chunks > of data. Than bigger the data size then better ZSTD it compresses. > > Since, in our cases (migration: 4K RAM block, qcow2: cluster) we > have to compress independent chunks the ability of ZSTD to find better > compression solution is restricted. > > Although, the compression ratio is pretty much the same in both cases > the experiments shown that ZSTD does the compression much faster (x2). > > Which is obviously good for us. > > Here is other comparison of ZLIB vs ZSTD without any application in qemu > which shows that ZSTD works faster > > zlib = max compression level > zstd = compression level 5 (max 22) > > cycles consumed for compression: > > 4k txt 4K ram 4K bin 64K txt 64K ram 64K bin > zlib 400K 344K 1.3M 13M 5M 92.3M > zstd 350K 235K 312K 3.3M 1.3M 2.4M > Diff,% -12 -32 -77 -75 -73 -97 > Wow, the 4k bin one is drastic. The text is even more prominent. wow! > size after compression in bytes: > > 4k txt 4K ram 4K bin 64K txt 64K ram 64K bin > zlib 1542 3599 1403 17386 64735 20609 > zstd 1568 3540 1250 17656 65546 20023 > Diff,% -2 2 11 -2 -1 3 > Yeah, that's pretty close. Seems like absolutely a great tradeoff for the speed gain. If the little bit of difference matters to you, you can always do some more heavy-duty compression of your choice in another layer of the storage stack. > Data sources for test files [we took 4K and 64K chunks from there]: > > txt = linux/Documentation/memory-barriers.txt > ram = /boot/initramfs-4.20.0-rc6+.img > bin = x86_64-softmmu/qemu-system-x86_64 > > Increasing of ZSTD compression ratio didn't give any significant > improvements of the out size but slowed down the pace of ZSTD > > Denis > >> >>> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com> > Very useful data, thanks for sharing! Seems like this would indeed be a great thing to have for qcow2.
© 2016 - 2024 Red Hat, Inc.