zstd significantly reduces cluster compression time.
It provides better compression performance maintaining
the same level of the compression ratio in comparison with
zlib, which, at the moment, is the only compression
method available.
The performance test results:
Test compresses and decompresses qemu qcow2 image with just
installed rhel-7.6 guest.
Image cluster size: 64K. Image on disk size: 2.2G
The test was conducted with brd disk to reduce the influence
of disk subsystem to the test results.
The results is given in seconds.
compress cmd:
time ./qemu-img convert -O qcow2 -c -o compression_type=[zlib|zstd]
src.img [zlib|zstd]_compressed.img
decompress cmd
time ./qemu-img convert -O qcow2
[zlib|zstd]_compressed.img uncompressed.img
compression decompression
zlib zstd zlib zstd
------------------------------------------------------------
real 65.5 16.3 (-75 %) 1.9 1.6 (-16 %)
user 65.0 15.8 5.3 2.5
sys 3.3 0.2 2.0 2.0
Both ZLIB and ZSTD gave the same compression ratio: 1.57
compressed image size in both cases: 1.4G
Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
QAPI part:
Acked-by: Markus Armbruster <armbru@redhat.com>
---
docs/interop/qcow2.txt | 1 +
configure | 2 +-
qapi/block-core.json | 3 +-
block/qcow2-threads.c | 169 +++++++++++++++++++++++++++++++++++++++++
block/qcow2.c | 7 ++
slirp | 2 +-
6 files changed, 181 insertions(+), 3 deletions(-)
diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
index 640e0eca40..18a77f737e 100644
--- a/docs/interop/qcow2.txt
+++ b/docs/interop/qcow2.txt
@@ -209,6 +209,7 @@ version 2.
Available compression type values:
0: zlib <https://www.zlib.net/>
+ 1: zstd <http://github.com/facebook/zstd>
=== Header padding ===
diff --git a/configure b/configure
index 23b5e93752..4e3a1690ea 100755
--- a/configure
+++ b/configure
@@ -1861,7 +1861,7 @@ disabled with --disable-FEATURE, default is enabled if available:
lzfse support of lzfse compression library
(for reading lzfse-compressed dmg images)
zstd support for zstd compression library
- (for migration compression)
+ (for migration compression and qcow2 cluster compression)
seccomp seccomp support
coroutine-pool coroutine freelist (better performance)
glusterfs GlusterFS backend
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 1522e2983f..6fbacddab2 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4293,11 +4293,12 @@
# Compression type used in qcow2 image file
#
# @zlib: zlib compression, see <http://zlib.net/>
+# @zstd: zstd compression, see <http://github.com/facebook/zstd>
#
# Since: 5.1
##
{ 'enum': 'Qcow2CompressionType',
- 'data': [ 'zlib' ] }
+ 'data': [ 'zlib', { 'name': 'zstd', 'if': 'defined(CONFIG_ZSTD)' } ] }
##
# @BlockdevCreateOptionsQcow2:
diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
index 7dbaf53489..a0b12e1b15 100644
--- a/block/qcow2-threads.c
+++ b/block/qcow2-threads.c
@@ -28,6 +28,11 @@
#define ZLIB_CONST
#include <zlib.h>
+#ifdef CONFIG_ZSTD
+#include <zstd.h>
+#include <zstd_errors.h>
+#endif
+
#include "qcow2.h"
#include "block/thread-pool.h"
#include "crypto.h"
@@ -166,6 +171,160 @@ static ssize_t qcow2_zlib_decompress(void *dest, size_t dest_size,
return ret;
}
+#ifdef CONFIG_ZSTD
+
+/*
+ * qcow2_zstd_compress()
+ *
+ * Compress @src_size bytes of data using zstd compression method
+ *
+ * @dest - destination buffer, @dest_size bytes
+ * @src - source buffer, @src_size bytes
+ *
+ * Returns: compressed size on success
+ * -ENOMEM destination buffer is not enough to store compressed data
+ * -EIO on any other error
+ */
+static ssize_t qcow2_zstd_compress(void *dest, size_t dest_size,
+ const void *src, size_t src_size)
+{
+ ssize_t ret;
+ size_t zstd_ret;
+ ZSTD_outBuffer output = {
+ .dst = dest,
+ .size = dest_size,
+ .pos = 0
+ };
+ ZSTD_inBuffer input = {
+ .src = src,
+ .size = src_size,
+ .pos = 0
+ };
+ ZSTD_CCtx *cctx = ZSTD_createCCtx();
+
+ if (!cctx) {
+ return -EIO;
+ }
+ /*
+ * Use the zstd streamed interface for symmetry with decompression,
+ * where streaming is essential since we don't record the exact
+ * compressed size.
+ *
+ * ZSTD_compressStream2() tries to compress everything it could
+ * with a single call. Although, ZSTD docs says that:
+ * "You must continue calling ZSTD_compressStream2() with ZSTD_e_end
+ * until it returns 0, at which point you are free to start a new frame",
+ * in out tests we saw the only case when it returned with >0 -
+ * when the output buffer was too small. In that case,
+ * ZSTD_compressStream2() expects a bigger buffer on the next call.
+ * We can't provide a bigger buffer because we are limited with dest_size
+ * which we pass to the ZSTD_compressStream2() at once.
+ * So, we don't need any loops and just abort the compression when we
+ * don't get 0 result on the first call.
+ */
+ zstd_ret = ZSTD_compressStream2(cctx, &output, &input, ZSTD_e_end);
+
+ if (zstd_ret) {
+ if (zstd_ret > output.size - output.pos) {
+ ret = -ENOMEM;
+ } else {
+ ret = -EIO;
+ }
+ goto out;
+ }
+
+ /* make sure we can safely return compressed buffer size with ssize_t */
+ assert(output.pos <= SSIZE_MAX);
+ ret = output.pos;
+out:
+ ZSTD_freeCCtx(cctx);
+ return ret;
+}
+
+/*
+ * qcow2_zstd_decompress()
+ *
+ * Decompress some data (not more than @src_size bytes) to produce exactly
+ * @dest_size bytes using zstd compression method
+ *
+ * @dest - destination buffer, @dest_size bytes
+ * @src - source buffer, @src_size bytes
+ *
+ * Returns: 0 on success
+ * -EIO on any error
+ */
+static ssize_t qcow2_zstd_decompress(void *dest, size_t dest_size,
+ const void *src, size_t src_size)
+{
+ size_t zstd_ret = 0;
+ ssize_t ret = 0;
+ ZSTD_outBuffer output = {
+ .dst = dest,
+ .size = dest_size,
+ .pos = 0
+ };
+ ZSTD_inBuffer input = {
+ .src = src,
+ .size = src_size,
+ .pos = 0
+ };
+ ZSTD_DCtx *dctx = ZSTD_createDCtx();
+
+ if (!dctx) {
+ return -EIO;
+ }
+
+ /*
+ * The compressed stream from the input buffer may consist of more
+ * than one zstd frame. So we iterate until we get a fully
+ * uncompressed cluster.
+ * From zstd docs related to ZSTD_decompressStream:
+ * "return : 0 when a frame is completely decoded and fully flushed"
+ * We suppose that this means: each time ZSTD_decompressStream reads
+ * only ONE full frame and returns 0 if and only if that frame
+ * is completely decoded and flushed. Only after returning 0,
+ * ZSTD_decompressStream reads another ONE full frame.
+ */
+ while (output.pos < output.size) {
+ size_t last_in_pos = input.pos;
+ size_t last_out_pos = output.pos;
+ zstd_ret = ZSTD_decompressStream(dctx, &output, &input);
+
+ if (ZSTD_isError(zstd_ret)) {
+ ret = -EIO;
+ break;
+ }
+
+ /*
+ * The ZSTD manual is vague about what to do if it reads
+ * the buffer partially, and we don't want to get stuck
+ * in an infinite loop where ZSTD_decompressStream
+ * returns > 0 waiting for another input chunk. So, we add
+ * a check which ensures that the loop makes some progress
+ * on each step.
+ */
+ if (last_in_pos >= input.pos &&
+ last_out_pos >= output.pos) {
+ ret = -EIO;
+ break;
+ }
+ }
+ /*
+ * Make sure that we have the frame fully flushed here
+ * if not, we somehow managed to get uncompressed cluster
+ * greater then the cluster size, possibly because of its
+ * damage.
+ */
+ if (zstd_ret > 0) {
+ ret = -EIO;
+ }
+
+ ZSTD_freeDCtx(dctx);
+ assert(ret == 0 || ret == -EIO);
+ return ret;
+}
+#endif
+
static int qcow2_compress_pool_func(void *opaque)
{
Qcow2CompressData *data = opaque;
@@ -217,6 +376,11 @@ qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size,
fn = qcow2_zlib_compress;
break;
+#ifdef CONFIG_ZSTD
+ case QCOW2_COMPRESSION_TYPE_ZSTD:
+ fn = qcow2_zstd_compress;
+ break;
+#endif
default:
abort();
}
@@ -249,6 +413,11 @@ qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size,
fn = qcow2_zlib_decompress;
break;
+#ifdef CONFIG_ZSTD
+ case QCOW2_COMPRESSION_TYPE_ZSTD:
+ fn = qcow2_zstd_decompress;
+ break;
+#endif
default:
abort();
}
diff --git a/block/qcow2.c b/block/qcow2.c
index 9c0b20c912..21f281b7f8 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1246,6 +1246,9 @@ static int validate_compression_type(BDRVQcow2State *s, Error **errp)
{
switch (s->compression_type) {
case QCOW2_COMPRESSION_TYPE_ZLIB:
+#ifdef CONFIG_ZSTD
+ case QCOW2_COMPRESSION_TYPE_ZSTD:
+#endif
break;
default:
@@ -3478,6 +3481,10 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
}
switch (qcow2_opts->compression_type) {
+#ifdef CONFIG_ZSTD
+ case QCOW2_COMPRESSION_TYPE_ZSTD:
+ break;
+#endif
default:
error_setg(errp, "Unknown compression type");
goto out;
diff --git a/slirp b/slirp
index 2faae0f778..55ab21c9a3 160000
--- a/slirp
+++ b/slirp
@@ -1 +1 @@
-Subproject commit 2faae0f778f818fadc873308f983289df697eb93
+Subproject commit 55ab21c9a36852915b81f1b41ebaf3b6509dd8ba
--
2.17.0
On 4/28/20 3:00 PM, Denis Plotnikov wrote:
> zstd significantly reduces cluster compression time.
> It provides better compression performance maintaining
> the same level of the compression ratio in comparison with
> zlib, which, at the moment, is the only compression
> method available.
>
> The performance test results:
> Test compresses and decompresses qemu qcow2 image with just
> installed rhel-7.6 guest.
> Image cluster size: 64K. Image on disk size: 2.2G
>
> The test was conducted with brd disk to reduce the influence
> of disk subsystem to the test results.
> The results is given in seconds.
>
> compress cmd:
> time ./qemu-img convert -O qcow2 -c -o compression_type=[zlib|zstd]
> src.img [zlib|zstd]_compressed.img
> decompress cmd
> time ./qemu-img convert -O qcow2
> [zlib|zstd]_compressed.img uncompressed.img
>
> compression decompression
> zlib zstd zlib zstd
> ------------------------------------------------------------
> real 65.5 16.3 (-75 %) 1.9 1.6 (-16 %)
> user 65.0 15.8 5.3 2.5
> sys 3.3 0.2 2.0 2.0
>
> Both ZLIB and ZSTD gave the same compression ratio: 1.57
> compressed image size in both cases: 1.4G
>
> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
> QAPI part:
> Acked-by: Markus Armbruster <armbru@redhat.com>
> ---
> +static ssize_t qcow2_zstd_compress(void *dest, size_t dest_size,
> + const void *src, size_t src_size)
> +{
> + ssize_t ret;
> + size_t zstd_ret;
> + ZSTD_outBuffer output = {
> + .dst = dest,
> + .size = dest_size,
> + .pos = 0
> + };
> +
> + /* make sure we can safely return compressed buffer size with ssize_t */
> + assert(output.pos <= SSIZE_MAX);
Seems rather vague, since we know that .pos won't exceed .size which was
initialized by dest_size which is <= 2M. A tighter assertion:
assert(output.pos <= dest_size)
seems like it is more realistic to your real constraint (namely, that
zstd did not overflow dest).
> + ret = output.pos;
> +out:
> + ZSTD_freeCCtx(cctx);
> + return ret;
> +}
> +
> +++ b/slirp
> @@ -1 +1 @@
> -Subproject commit 2faae0f778f818fadc873308f983289df697eb93
> +Subproject commit 55ab21c9a36852915b81f1b41ebaf3b6509dd8ba
Umm, you definitely don't want that.
A maintainer could touch up both of those, but I'm not sure which block
maintainer will be accepting this series. Maybe wait for that question
to be answered before trying to post a v23.
--
Eric Blake, Principal Software Engineer
Red Hat, Inc. +1-919-301-3226
Virtualization: qemu.org | libvirt.org
On 28.04.20 22:00, Denis Plotnikov wrote:
> zstd significantly reduces cluster compression time.
> It provides better compression performance maintaining
> the same level of the compression ratio in comparison with
> zlib, which, at the moment, is the only compression
> method available.
>
> The performance test results:
> Test compresses and decompresses qemu qcow2 image with just
> installed rhel-7.6 guest.
> Image cluster size: 64K. Image on disk size: 2.2G
>
> The test was conducted with brd disk to reduce the influence
> of disk subsystem to the test results.
> The results is given in seconds.
>
> compress cmd:
> time ./qemu-img convert -O qcow2 -c -o compression_type=[zlib|zstd]
> src.img [zlib|zstd]_compressed.img
> decompress cmd
> time ./qemu-img convert -O qcow2
> [zlib|zstd]_compressed.img uncompressed.img
>
> compression decompression
> zlib zstd zlib zstd
> ------------------------------------------------------------
> real 65.5 16.3 (-75 %) 1.9 1.6 (-16 %)
> user 65.0 15.8 5.3 2.5
> sys 3.3 0.2 2.0 2.0
>
> Both ZLIB and ZSTD gave the same compression ratio: 1.57
> compressed image size in both cases: 1.4G
>
> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
> QAPI part:
> Acked-by: Markus Armbruster <armbru@redhat.com>
> ---
> docs/interop/qcow2.txt | 1 +
> configure | 2 +-
> qapi/block-core.json | 3 +-
> block/qcow2-threads.c | 169 +++++++++++++++++++++++++++++++++++++++++
> block/qcow2.c | 7 ++
> slirp | 2 +-
> 6 files changed, 181 insertions(+), 3 deletions(-)
[...]
> diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
> index 7dbaf53489..a0b12e1b15 100644
> --- a/block/qcow2-threads.c
> +++ b/block/qcow2-threads.c
[...]
> +static ssize_t qcow2_zstd_decompress(void *dest, size_t dest_size,
> + const void *src, size_t src_size)
> +{
[...]
> + /*
> + * The compressed stream from the input buffer may consist of more
> + * than one zstd frame.
Can it?
Max
29.04.2020 13:24, Max Reitz wrote:
> On 28.04.20 22:00, Denis Plotnikov wrote:
>> zstd significantly reduces cluster compression time.
>> It provides better compression performance maintaining
>> the same level of the compression ratio in comparison with
>> zlib, which, at the moment, is the only compression
>> method available.
>>
>> The performance test results:
>> Test compresses and decompresses qemu qcow2 image with just
>> installed rhel-7.6 guest.
>> Image cluster size: 64K. Image on disk size: 2.2G
>>
>> The test was conducted with brd disk to reduce the influence
>> of disk subsystem to the test results.
>> The results is given in seconds.
>>
>> compress cmd:
>> time ./qemu-img convert -O qcow2 -c -o compression_type=[zlib|zstd]
>> src.img [zlib|zstd]_compressed.img
>> decompress cmd
>> time ./qemu-img convert -O qcow2
>> [zlib|zstd]_compressed.img uncompressed.img
>>
>> compression decompression
>> zlib zstd zlib zstd
>> ------------------------------------------------------------
>> real 65.5 16.3 (-75 %) 1.9 1.6 (-16 %)
>> user 65.0 15.8 5.3 2.5
>> sys 3.3 0.2 2.0 2.0
>>
>> Both ZLIB and ZSTD gave the same compression ratio: 1.57
>> compressed image size in both cases: 1.4G
>>
>> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
>> QAPI part:
>> Acked-by: Markus Armbruster <armbru@redhat.com>
>> ---
>> docs/interop/qcow2.txt | 1 +
>> configure | 2 +-
>> qapi/block-core.json | 3 +-
>> block/qcow2-threads.c | 169 +++++++++++++++++++++++++++++++++++++++++
>> block/qcow2.c | 7 ++
>> slirp | 2 +-
>> 6 files changed, 181 insertions(+), 3 deletions(-)
>
> [...]
>
>> diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
>> index 7dbaf53489..a0b12e1b15 100644
>> --- a/block/qcow2-threads.c
>> +++ b/block/qcow2-threads.c
>
> [...]
>
>> +static ssize_t qcow2_zstd_decompress(void *dest, size_t dest_size,
>> + const void *src, size_t src_size)
>> +{
>
> [...]
>
>> + /*
>> + * The compressed stream from the input buffer may consist of more
>> + * than one zstd frame.
>
> Can it?
If not, we must require it in the specification. Hmm. If at some point we'll want multi-threaded compression of one big (2M) cluster.. Could this be implemented with zstd lib, if multiple frames are allowed, will allowing multiple frames help? I don't know actually, but I think better not to forbid it. On the other hand, I don't see any benefit in large compressed clusters. At least, in our scenarios (for compressed backups) we use 64k compressed clusters, for good granularity of incremental backups (when for running vm we use 1M clusters).
--
Best regards,
Vladimir
On 29.04.20 12:37, Vladimir Sementsov-Ogievskiy wrote:
> 29.04.2020 13:24, Max Reitz wrote:
>> On 28.04.20 22:00, Denis Plotnikov wrote:
>>> zstd significantly reduces cluster compression time.
>>> It provides better compression performance maintaining
>>> the same level of the compression ratio in comparison with
>>> zlib, which, at the moment, is the only compression
>>> method available.
>>>
>>> The performance test results:
>>> Test compresses and decompresses qemu qcow2 image with just
>>> installed rhel-7.6 guest.
>>> Image cluster size: 64K. Image on disk size: 2.2G
>>>
>>> The test was conducted with brd disk to reduce the influence
>>> of disk subsystem to the test results.
>>> The results is given in seconds.
>>>
>>> compress cmd:
>>> time ./qemu-img convert -O qcow2 -c -o compression_type=[zlib|zstd]
>>> src.img [zlib|zstd]_compressed.img
>>> decompress cmd
>>> time ./qemu-img convert -O qcow2
>>> [zlib|zstd]_compressed.img uncompressed.img
>>>
>>> compression decompression
>>> zlib zstd zlib zstd
>>> ------------------------------------------------------------
>>> real 65.5 16.3 (-75 %) 1.9 1.6 (-16 %)
>>> user 65.0 15.8 5.3 2.5
>>> sys 3.3 0.2 2.0 2.0
>>>
>>> Both ZLIB and ZSTD gave the same compression ratio: 1.57
>>> compressed image size in both cases: 1.4G
>>>
>>> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
>>> QAPI part:
>>> Acked-by: Markus Armbruster <armbru@redhat.com>
>>> ---
>>> docs/interop/qcow2.txt | 1 +
>>> configure | 2 +-
>>> qapi/block-core.json | 3 +-
>>> block/qcow2-threads.c | 169 +++++++++++++++++++++++++++++++++++++++++
>>> block/qcow2.c | 7 ++
>>> slirp | 2 +-
>>> 6 files changed, 181 insertions(+), 3 deletions(-)
>>
>> [...]
>>
>>> diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
>>> index 7dbaf53489..a0b12e1b15 100644
>>> --- a/block/qcow2-threads.c
>>> +++ b/block/qcow2-threads.c
>>
>> [...]
>>
>>> +static ssize_t qcow2_zstd_decompress(void *dest, size_t dest_size,
>>> + const void *src, size_t src_size)
>>> +{
>>
>> [...]
>>
>>> + /*
>>> + * The compressed stream from the input buffer may consist of more
>>> + * than one zstd frame.
>>
>> Can it?
>
> If not, we must require it in the specification.
Actually, now that you mention it, it would make sense anyway to add
some note to the specification on what exactly compressed with zstd means.
> Hmm. If at some point
> we'll want multi-threaded compression of one big (2M) cluster.. Could
> this be implemented with zstd lib, if multiple frames are allowed, will
> allowing multiple frames help? I don't know actually, but I think better
> not to forbid it. On the other hand, I don't see any benefit in large
> compressed clusters. At least, in our scenarios (for compressed backups)
> we use 64k compressed clusters, for good granularity of incremental
> backups (when for running vm we use 1M clusters).
Is it really that important? Naïvely, it sounds rather complicated to
introduce multithreading into block drivers.
(Also, as for compression, it can only be used in backup scenarios
anyway, where you write many clusters at once. So parallelism on the
cluster level should sufficient to get high usage, and it would benefit
all compression types and cluster sizes.)
Max
29.04.2020 15:17, Max Reitz wrote:
> On 29.04.20 12:37, Vladimir Sementsov-Ogievskiy wrote:
>> 29.04.2020 13:24, Max Reitz wrote:
>>> On 28.04.20 22:00, Denis Plotnikov wrote:
>>>> zstd significantly reduces cluster compression time.
>>>> It provides better compression performance maintaining
>>>> the same level of the compression ratio in comparison with
>>>> zlib, which, at the moment, is the only compression
>>>> method available.
>>>>
>>>> The performance test results:
>>>> Test compresses and decompresses qemu qcow2 image with just
>>>> installed rhel-7.6 guest.
>>>> Image cluster size: 64K. Image on disk size: 2.2G
>>>>
>>>> The test was conducted with brd disk to reduce the influence
>>>> of disk subsystem to the test results.
>>>> The results is given in seconds.
>>>>
>>>> compress cmd:
>>>> time ./qemu-img convert -O qcow2 -c -o compression_type=[zlib|zstd]
>>>> src.img [zlib|zstd]_compressed.img
>>>> decompress cmd
>>>> time ./qemu-img convert -O qcow2
>>>> [zlib|zstd]_compressed.img uncompressed.img
>>>>
>>>> compression decompression
>>>> zlib zstd zlib zstd
>>>> ------------------------------------------------------------
>>>> real 65.5 16.3 (-75 %) 1.9 1.6 (-16 %)
>>>> user 65.0 15.8 5.3 2.5
>>>> sys 3.3 0.2 2.0 2.0
>>>>
>>>> Both ZLIB and ZSTD gave the same compression ratio: 1.57
>>>> compressed image size in both cases: 1.4G
>>>>
>>>> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
>>>> QAPI part:
>>>> Acked-by: Markus Armbruster <armbru@redhat.com>
>>>> ---
>>>> docs/interop/qcow2.txt | 1 +
>>>> configure | 2 +-
>>>> qapi/block-core.json | 3 +-
>>>> block/qcow2-threads.c | 169 +++++++++++++++++++++++++++++++++++++++++
>>>> block/qcow2.c | 7 ++
>>>> slirp | 2 +-
>>>> 6 files changed, 181 insertions(+), 3 deletions(-)
>>>
>>> [...]
>>>
>>>> diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
>>>> index 7dbaf53489..a0b12e1b15 100644
>>>> --- a/block/qcow2-threads.c
>>>> +++ b/block/qcow2-threads.c
>>>
>>> [...]
>>>
>>>> +static ssize_t qcow2_zstd_decompress(void *dest, size_t dest_size,
>>>> + const void *src, size_t src_size)
>>>> +{
>>>
>>> [...]
>>>
>>>> + /*
>>>> + * The compressed stream from the input buffer may consist of more
>>>> + * than one zstd frame.
>>>
>>> Can it?
>>
>> If not, we must require it in the specification.
>
> Actually, now that you mention it, it would make sense anyway to add
> some note to the specification on what exactly compressed with zstd means.
>
>> Hmm. If at some point
>> we'll want multi-threaded compression of one big (2M) cluster.. Could
>> this be implemented with zstd lib, if multiple frames are allowed, will
>> allowing multiple frames help? I don't know actually, but I think better
>> not to forbid it. On the other hand, I don't see any benefit in large
>> compressed clusters. At least, in our scenarios (for compressed backups)
>> we use 64k compressed clusters, for good granularity of incremental
>> backups (when for running vm we use 1M clusters).
>
> Is it really that important? Naïvely, it sounds rather complicated to
> introduce multithreading into block drivers.
It is already here: compression and encryption already multithreaded.
But of course, one cluster is handled in one thread.
>
> (Also, as for compression, it can only be used in backup scenarios
> anyway, where you write many clusters at once. So parallelism on the
> cluster level should sufficient to get high usage, and it would benefit
> all compression types and cluster sizes.)
>
Yes it works in this way already :)
===
So, we don't know do we want one frame restriction or not. Do you have a preference?
--
Best regards,
Vladimir
On 29.04.20 15:02, Vladimir Sementsov-Ogievskiy wrote:
> 29.04.2020 15:17, Max Reitz wrote:
>> On 29.04.20 12:37, Vladimir Sementsov-Ogievskiy wrote:
>>> 29.04.2020 13:24, Max Reitz wrote:
>>>> On 28.04.20 22:00, Denis Plotnikov wrote:
>>>>> zstd significantly reduces cluster compression time.
>>>>> It provides better compression performance maintaining
>>>>> the same level of the compression ratio in comparison with
>>>>> zlib, which, at the moment, is the only compression
>>>>> method available.
>>>>>
>>>>> The performance test results:
>>>>> Test compresses and decompresses qemu qcow2 image with just
>>>>> installed rhel-7.6 guest.
>>>>> Image cluster size: 64K. Image on disk size: 2.2G
>>>>>
>>>>> The test was conducted with brd disk to reduce the influence
>>>>> of disk subsystem to the test results.
>>>>> The results is given in seconds.
>>>>>
>>>>> compress cmd:
>>>>> time ./qemu-img convert -O qcow2 -c -o
>>>>> compression_type=[zlib|zstd]
>>>>> src.img [zlib|zstd]_compressed.img
>>>>> decompress cmd
>>>>> time ./qemu-img convert -O qcow2
>>>>> [zlib|zstd]_compressed.img uncompressed.img
>>>>>
>>>>> compression decompression
>>>>> zlib zstd zlib zstd
>>>>> ------------------------------------------------------------
>>>>> real 65.5 16.3 (-75 %) 1.9 1.6 (-16 %)
>>>>> user 65.0 15.8 5.3 2.5
>>>>> sys 3.3 0.2 2.0 2.0
>>>>>
>>>>> Both ZLIB and ZSTD gave the same compression ratio: 1.57
>>>>> compressed image size in both cases: 1.4G
>>>>>
>>>>> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
>>>>> QAPI part:
>>>>> Acked-by: Markus Armbruster <armbru@redhat.com>
>>>>> ---
>>>>> docs/interop/qcow2.txt | 1 +
>>>>> configure | 2 +-
>>>>> qapi/block-core.json | 3 +-
>>>>> block/qcow2-threads.c | 169
>>>>> +++++++++++++++++++++++++++++++++++++++++
>>>>> block/qcow2.c | 7 ++
>>>>> slirp | 2 +-
>>>>> 6 files changed, 181 insertions(+), 3 deletions(-)
>>>>
>>>> [...]
>>>>
>>>>> diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
>>>>> index 7dbaf53489..a0b12e1b15 100644
>>>>> --- a/block/qcow2-threads.c
>>>>> +++ b/block/qcow2-threads.c
>>>>
>>>> [...]
>>>>
>>>>> +static ssize_t qcow2_zstd_decompress(void *dest, size_t dest_size,
>>>>> + const void *src, size_t
>>>>> src_size)
>>>>> +{
>>>>
>>>> [...]
>>>>
>>>>> + /*
>>>>> + * The compressed stream from the input buffer may consist of
>>>>> more
>>>>> + * than one zstd frame.
>>>>
>>>> Can it?
>>>
>>> If not, we must require it in the specification.
>>
>> Actually, now that you mention it, it would make sense anyway to add
>> some note to the specification on what exactly compressed with zstd
>> means.
>>
>>> Hmm. If at some point
>>> we'll want multi-threaded compression of one big (2M) cluster.. Could
>>> this be implemented with zstd lib, if multiple frames are allowed, will
>>> allowing multiple frames help? I don't know actually, but I think better
>>> not to forbid it. On the other hand, I don't see any benefit in large
>>> compressed clusters. At least, in our scenarios (for compressed backups)
>>> we use 64k compressed clusters, for good granularity of incremental
>>> backups (when for running vm we use 1M clusters).
>>
>> Is it really that important? Naïvely, it sounds rather complicated to
>> introduce multithreading into block drivers.
>
> It is already here: compression and encryption already multithreaded.
> But of course, one cluster is handled in one thread.
Ah, good. I forgot.
>> (Also, as for compression, it can only be used in backup scenarios
>> anyway, where you write many clusters at once. So parallelism on the
>> cluster level should sufficient to get high usage, and it would benefit
>> all compression types and cluster sizes.)
>>
>
> Yes it works in this way already :)
Well, OK then.
> So, we don't know do we want one frame restriction or not. Do you have a
> preference?
*shrug*
Seems like it would be preferential to allow multiple frames still. A
note in the spec would be nice (i.e., streaming format, multiple frames
per cluster possible).
Max
On 30.04.2020 11:26, Max Reitz wrote:
> On 29.04.20 15:02, Vladimir Sementsov-Ogievskiy wrote:
>> 29.04.2020 15:17, Max Reitz wrote:
>>> On 29.04.20 12:37, Vladimir Sementsov-Ogievskiy wrote:
>>>> 29.04.2020 13:24, Max Reitz wrote:
>>>>> On 28.04.20 22:00, Denis Plotnikov wrote:
>>>>>> zstd significantly reduces cluster compression time.
>>>>>> It provides better compression performance maintaining
>>>>>> the same level of the compression ratio in comparison with
>>>>>> zlib, which, at the moment, is the only compression
>>>>>> method available.
>>>>>>
>>>>>> The performance test results:
>>>>>> Test compresses and decompresses qemu qcow2 image with just
>>>>>> installed rhel-7.6 guest.
>>>>>> Image cluster size: 64K. Image on disk size: 2.2G
>>>>>>
>>>>>> The test was conducted with brd disk to reduce the influence
>>>>>> of disk subsystem to the test results.
>>>>>> The results is given in seconds.
>>>>>>
>>>>>> compress cmd:
>>>>>> time ./qemu-img convert -O qcow2 -c -o
>>>>>> compression_type=[zlib|zstd]
>>>>>> src.img [zlib|zstd]_compressed.img
>>>>>> decompress cmd
>>>>>> time ./qemu-img convert -O qcow2
>>>>>> [zlib|zstd]_compressed.img uncompressed.img
>>>>>>
>>>>>> compression decompression
>>>>>> zlib zstd zlib zstd
>>>>>> ------------------------------------------------------------
>>>>>> real 65.5 16.3 (-75 %) 1.9 1.6 (-16 %)
>>>>>> user 65.0 15.8 5.3 2.5
>>>>>> sys 3.3 0.2 2.0 2.0
>>>>>>
>>>>>> Both ZLIB and ZSTD gave the same compression ratio: 1.57
>>>>>> compressed image size in both cases: 1.4G
>>>>>>
>>>>>> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
>>>>>> QAPI part:
>>>>>> Acked-by: Markus Armbruster <armbru@redhat.com>
>>>>>> ---
>>>>>> docs/interop/qcow2.txt | 1 +
>>>>>> configure | 2 +-
>>>>>> qapi/block-core.json | 3 +-
>>>>>> block/qcow2-threads.c | 169
>>>>>> +++++++++++++++++++++++++++++++++++++++++
>>>>>> block/qcow2.c | 7 ++
>>>>>> slirp | 2 +-
>>>>>> 6 files changed, 181 insertions(+), 3 deletions(-)
>>>>> [...]
>>>>>
>>>>>> diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
>>>>>> index 7dbaf53489..a0b12e1b15 100644
>>>>>> --- a/block/qcow2-threads.c
>>>>>> +++ b/block/qcow2-threads.c
>>>>> [...]
>>>>>
>>>>>> +static ssize_t qcow2_zstd_decompress(void *dest, size_t dest_size,
>>>>>> + const void *src, size_t
>>>>>> src_size)
>>>>>> +{
>>>>> [...]
>>>>>
>>>>>> + /*
>>>>>> + * The compressed stream from the input buffer may consist of
>>>>>> more
>>>>>> + * than one zstd frame.
>>>>> Can it?
>>>> If not, we must require it in the specification.
>>> Actually, now that you mention it, it would make sense anyway to add
>>> some note to the specification on what exactly compressed with zstd
>>> means.
>>>
>>>> Hmm. If at some point
>>>> we'll want multi-threaded compression of one big (2M) cluster.. Could
>>>> this be implemented with zstd lib, if multiple frames are allowed, will
>>>> allowing multiple frames help? I don't know actually, but I think better
>>>> not to forbid it. On the other hand, I don't see any benefit in large
>>>> compressed clusters. At least, in our scenarios (for compressed backups)
>>>> we use 64k compressed clusters, for good granularity of incremental
>>>> backups (when for running vm we use 1M clusters).
>>> Is it really that important? Naïvely, it sounds rather complicated to
>>> introduce multithreading into block drivers.
>> It is already here: compression and encryption already multithreaded.
>> But of course, one cluster is handled in one thread.
> Ah, good. I forgot.
>
>>> (Also, as for compression, it can only be used in backup scenarios
>>> anyway, where you write many clusters at once. So parallelism on the
>>> cluster level should sufficient to get high usage, and it would benefit
>>> all compression types and cluster sizes.)
>>>
>> Yes it works in this way already :)
> Well, OK then.
>
>> So, we don't know do we want one frame restriction or not. Do you have a
>> preference?
> *shrug*
>
> Seems like it would be preferential to allow multiple frames still. A
> note in the spec would be nice (i.e., streaming format, multiple frames
> per cluster possible).
We don't mention anything about zlib compressing details in the spec.
If we mention anything about ZSTD compressing details we'll have to do
it for
zlib as well. So, I think we have two possibilities for the spec:
1. mention for both
2. don't mention at all
I think the 2nd is better. It gives more degree of freedom for the
future improvements.
and we've already covered both cases (one frame, may frames) in the code.
I'm note sure I'm right. Any other opinions?
Denis
>
> Max
>
On 30.04.20 11:48, Denis Plotnikov wrote:
>
>
> On 30.04.2020 11:26, Max Reitz wrote:
>> On 29.04.20 15:02, Vladimir Sementsov-Ogievskiy wrote:
>>> 29.04.2020 15:17, Max Reitz wrote:
>>>> On 29.04.20 12:37, Vladimir Sementsov-Ogievskiy wrote:
>>>>> 29.04.2020 13:24, Max Reitz wrote:
>>>>>> On 28.04.20 22:00, Denis Plotnikov wrote:
>>>>>>> zstd significantly reduces cluster compression time.
>>>>>>> It provides better compression performance maintaining
>>>>>>> the same level of the compression ratio in comparison with
>>>>>>> zlib, which, at the moment, is the only compression
>>>>>>> method available.
>>>>>>>
>>>>>>> The performance test results:
>>>>>>> Test compresses and decompresses qemu qcow2 image with just
>>>>>>> installed rhel-7.6 guest.
>>>>>>> Image cluster size: 64K. Image on disk size: 2.2G
>>>>>>>
>>>>>>> The test was conducted with brd disk to reduce the influence
>>>>>>> of disk subsystem to the test results.
>>>>>>> The results is given in seconds.
>>>>>>>
>>>>>>> compress cmd:
>>>>>>> time ./qemu-img convert -O qcow2 -c -o
>>>>>>> compression_type=[zlib|zstd]
>>>>>>> src.img [zlib|zstd]_compressed.img
>>>>>>> decompress cmd
>>>>>>> time ./qemu-img convert -O qcow2
>>>>>>> [zlib|zstd]_compressed.img uncompressed.img
>>>>>>>
>>>>>>> compression decompression
>>>>>>> zlib zstd zlib zstd
>>>>>>> ------------------------------------------------------------
>>>>>>> real 65.5 16.3 (-75 %) 1.9 1.6 (-16 %)
>>>>>>> user 65.0 15.8 5.3 2.5
>>>>>>> sys 3.3 0.2 2.0 2.0
>>>>>>>
>>>>>>> Both ZLIB and ZSTD gave the same compression ratio: 1.57
>>>>>>> compressed image size in both cases: 1.4G
>>>>>>>
>>>>>>> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
>>>>>>> QAPI part:
>>>>>>> Acked-by: Markus Armbruster <armbru@redhat.com>
>>>>>>> ---
>>>>>>> docs/interop/qcow2.txt | 1 +
>>>>>>> configure | 2 +-
>>>>>>> qapi/block-core.json | 3 +-
>>>>>>> block/qcow2-threads.c | 169
>>>>>>> +++++++++++++++++++++++++++++++++++++++++
>>>>>>> block/qcow2.c | 7 ++
>>>>>>> slirp | 2 +-
>>>>>>> 6 files changed, 181 insertions(+), 3 deletions(-)
>>>>>> [...]
>>>>>>
>>>>>>> diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
>>>>>>> index 7dbaf53489..a0b12e1b15 100644
>>>>>>> --- a/block/qcow2-threads.c
>>>>>>> +++ b/block/qcow2-threads.c
>>>>>> [...]
>>>>>>
>>>>>>> +static ssize_t qcow2_zstd_decompress(void *dest, size_t dest_size,
>>>>>>> + const void *src, size_t
>>>>>>> src_size)
>>>>>>> +{
>>>>>> [...]
>>>>>>
>>>>>>> + /*
>>>>>>> + * The compressed stream from the input buffer may consist of
>>>>>>> more
>>>>>>> + * than one zstd frame.
>>>>>> Can it?
>>>>> If not, we must require it in the specification.
>>>> Actually, now that you mention it, it would make sense anyway to add
>>>> some note to the specification on what exactly compressed with zstd
>>>> means.
>>>>
>>>>> Hmm. If at some point
>>>>> we'll want multi-threaded compression of one big (2M) cluster.. Could
>>>>> this be implemented with zstd lib, if multiple frames are allowed,
>>>>> will
>>>>> allowing multiple frames help? I don't know actually, but I think
>>>>> better
>>>>> not to forbid it. On the other hand, I don't see any benefit in large
>>>>> compressed clusters. At least, in our scenarios (for compressed
>>>>> backups)
>>>>> we use 64k compressed clusters, for good granularity of incremental
>>>>> backups (when for running vm we use 1M clusters).
>>>> Is it really that important? Naïvely, it sounds rather complicated to
>>>> introduce multithreading into block drivers.
>>> It is already here: compression and encryption already multithreaded.
>>> But of course, one cluster is handled in one thread.
>> Ah, good. I forgot.
>>
>>>> (Also, as for compression, it can only be used in backup scenarios
>>>> anyway, where you write many clusters at once. So parallelism on the
>>>> cluster level should sufficient to get high usage, and it would benefit
>>>> all compression types and cluster sizes.)
>>>>
>>> Yes it works in this way already :)
>> Well, OK then.
>>
>>> So, we don't know do we want one frame restriction or not. Do you have a
>>> preference?
>> *shrug*
>>
>> Seems like it would be preferential to allow multiple frames still. A
>> note in the spec would be nice (i.e., streaming format, multiple frames
>> per cluster possible).
>
> We don't mention anything about zlib compressing details in the spec.
Yep. Which is bad enough.
> If we mention anything about ZSTD compressing details we'll have to do
> it for
> zlib as well.
I personally don’t like “If you can’t make it perfect, you shouldn’t do
it at all”. Mentioning it for zstd would still be an improvement.
Also, I believe that “zlib compression” is pretty much unambiguous,
considering all the code does is call deflate()/inflate().
I’m not saying we need to amend the spec in this series, but I don’t see
a good argument against doing so at some point.
> So, I think we have two possibilities for the spec:
> 1. mention for both
> 2. don't mention at all
>
> I think the 2nd is better. It gives more degree of freedom for the
> future improvements.
No, it absolutely doesn’t. There is a de-facto format, namely what qemu
accepts. Changing that would be an incompatible change. Just because
we don’t write what’s allowed into the spec doesn’t change that fact.
> and we've already covered both cases (one frame, may frames) in the code.
There are still different zstd formats, namely streaming or not
streaming. That’s what should be mentioned.
Max
On 30.04.2020 14:47, Max Reitz wrote:
> On 30.04.20 11:48, Denis Plotnikov wrote:
>>
>> On 30.04.2020 11:26, Max Reitz wrote:
>>> On 29.04.20 15:02, Vladimir Sementsov-Ogievskiy wrote:
>>>> 29.04.2020 15:17, Max Reitz wrote:
>>>>> On 29.04.20 12:37, Vladimir Sementsov-Ogievskiy wrote:
>>>>>> 29.04.2020 13:24, Max Reitz wrote:
>>>>>>> On 28.04.20 22:00, Denis Plotnikov wrote:
>>>>>>>> zstd significantly reduces cluster compression time.
>>>>>>>> It provides better compression performance maintaining
>>>>>>>> the same level of the compression ratio in comparison with
>>>>>>>> zlib, which, at the moment, is the only compression
>>>>>>>> method available.
>>>>>>>>
>>>>>>>> The performance test results:
>>>>>>>> Test compresses and decompresses qemu qcow2 image with just
>>>>>>>> installed rhel-7.6 guest.
>>>>>>>> Image cluster size: 64K. Image on disk size: 2.2G
>>>>>>>>
>>>>>>>> The test was conducted with brd disk to reduce the influence
>>>>>>>> of disk subsystem to the test results.
>>>>>>>> The results is given in seconds.
>>>>>>>>
>>>>>>>> compress cmd:
>>>>>>>> time ./qemu-img convert -O qcow2 -c -o
>>>>>>>> compression_type=[zlib|zstd]
>>>>>>>> src.img [zlib|zstd]_compressed.img
>>>>>>>> decompress cmd
>>>>>>>> time ./qemu-img convert -O qcow2
>>>>>>>> [zlib|zstd]_compressed.img uncompressed.img
>>>>>>>>
>>>>>>>> compression decompression
>>>>>>>> zlib zstd zlib zstd
>>>>>>>> ------------------------------------------------------------
>>>>>>>> real 65.5 16.3 (-75 %) 1.9 1.6 (-16 %)
>>>>>>>> user 65.0 15.8 5.3 2.5
>>>>>>>> sys 3.3 0.2 2.0 2.0
>>>>>>>>
>>>>>>>> Both ZLIB and ZSTD gave the same compression ratio: 1.57
>>>>>>>> compressed image size in both cases: 1.4G
>>>>>>>>
>>>>>>>> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
>>>>>>>> QAPI part:
>>>>>>>> Acked-by: Markus Armbruster <armbru@redhat.com>
>>>>>>>> ---
>>>>>>>> docs/interop/qcow2.txt | 1 +
>>>>>>>> configure | 2 +-
>>>>>>>> qapi/block-core.json | 3 +-
>>>>>>>> block/qcow2-threads.c | 169
>>>>>>>> +++++++++++++++++++++++++++++++++++++++++
>>>>>>>> block/qcow2.c | 7 ++
>>>>>>>> slirp | 2 +-
>>>>>>>> 6 files changed, 181 insertions(+), 3 deletions(-)
>>>>>>> [...]
>>>>>>>
>>>>>>>> diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
>>>>>>>> index 7dbaf53489..a0b12e1b15 100644
>>>>>>>> --- a/block/qcow2-threads.c
>>>>>>>> +++ b/block/qcow2-threads.c
>>>>>>> [...]
>>>>>>>
>>>>>>>> +static ssize_t qcow2_zstd_decompress(void *dest, size_t dest_size,
>>>>>>>> + const void *src, size_t
>>>>>>>> src_size)
>>>>>>>> +{
>>>>>>> [...]
>>>>>>>
>>>>>>>> + /*
>>>>>>>> + * The compressed stream from the input buffer may consist of
>>>>>>>> more
>>>>>>>> + * than one zstd frame.
>>>>>>> Can it?
>>>>>> If not, we must require it in the specification.
>>>>> Actually, now that you mention it, it would make sense anyway to add
>>>>> some note to the specification on what exactly compressed with zstd
>>>>> means.
>>>>>
>>>>>> Hmm. If at some point
>>>>>> we'll want multi-threaded compression of one big (2M) cluster.. Could
>>>>>> this be implemented with zstd lib, if multiple frames are allowed,
>>>>>> will
>>>>>> allowing multiple frames help? I don't know actually, but I think
>>>>>> better
>>>>>> not to forbid it. On the other hand, I don't see any benefit in large
>>>>>> compressed clusters. At least, in our scenarios (for compressed
>>>>>> backups)
>>>>>> we use 64k compressed clusters, for good granularity of incremental
>>>>>> backups (when for running vm we use 1M clusters).
>>>>> Is it really that important? Naïvely, it sounds rather complicated to
>>>>> introduce multithreading into block drivers.
>>>> It is already here: compression and encryption already multithreaded.
>>>> But of course, one cluster is handled in one thread.
>>> Ah, good. I forgot.
>>>
>>>>> (Also, as for compression, it can only be used in backup scenarios
>>>>> anyway, where you write many clusters at once. So parallelism on the
>>>>> cluster level should sufficient to get high usage, and it would benefit
>>>>> all compression types and cluster sizes.)
>>>>>
>>>> Yes it works in this way already :)
>>> Well, OK then.
>>>
>>>> So, we don't know do we want one frame restriction or not. Do you have a
>>>> preference?
>>> *shrug*
>>>
>>> Seems like it would be preferential to allow multiple frames still. A
>>> note in the spec would be nice (i.e., streaming format, multiple frames
>>> per cluster possible).
>> We don't mention anything about zlib compressing details in the spec.
> Yep. Which is bad enough.
>
>> If we mention anything about ZSTD compressing details we'll have to do
>> it for
>> zlib as well.
> I personally don’t like “If you can’t make it perfect, you shouldn’t do
> it at all”. Mentioning it for zstd would still be an improvement.
Good approach. I like it. But I'm trying to be cautious.
>
> Also, I believe that “zlib compression” is pretty much unambiguous,
> considering all the code does is call deflate()/inflate().
>
> I’m not saying we need to amend the spec in this series, but I don’t see
> a good argument against doing so at some point.
>
>> So, I think we have two possibilities for the spec:
>> 1. mention for both
>> 2. don't mention at all
>>
>> I think the 2nd is better. It gives more degree of freedom for the
>> future improvements.
> No, it absolutely doesn’t. There is a de-facto format, namely what qemu
> accepts. Changing that would be an incompatible change. Just because
> we don’t write what’s allowed into the spec doesn’t change that fact.
>
>> and we've already covered both cases (one frame, may frames) in the code.
> There are still different zstd formats, namely streaming or not
> streaming. That’s what should be mentioned.
It looks like the ZSTD format is always the same.
The ZSTD interface is different: for some reason the simple
zstd_decompress()
wants to know the size of data to decompress
From zstd doc:
*size_t ZSTD_decompress( void* dst, size_t dstCapacity, const void* src,
size_t compressedSize); *
`compressedSize` : must be the _exact_ size of some number of compressed
and/or skippable frames.
I made a test (based on Vladimir's code) to check it:
// compile with gcc -lzstd -g test_zstd.c -o test_zstd
#include <stdio.h> #include <assert.h> #include <zstd.h> #include
<zstd_errors.h> int main() { char buf1[] = "erbebewbwe"; char buf2[100];
char buf3[100]; int ret; ZSTD_outBuffer output; ZSTD_inBuffer input; ret
= ZSTD_compress(buf2, sizeof(buf2), buf1, sizeof(buf1), 5); printf("ret:
%d\n", ret); ZSTD_DCtx *dctx = ZSTD_createDCtx(); input =
(ZSTD_inBuffer){buf2, ret, 0}; output = (ZSTD_outBuffer){buf3,
sizeof(buf3), 0}; ret = ZSTD_decompressStream(dctx, &output, &input);
printf("ret: %d, input.pos: %ld, output.pos: %ld\n", ret, input.pos,
output.pos); printf("To compress: %s\n", buf1); buf3[output.pos] = 0;
printf("Uncompressed: %s\n", buf3); return 0; }
And it works fine.
We use streaming semantics just to be consistent with the interfaces
(stream_compress/stream_decompress), otherwise
we could use simple_compress/stream_decompress
Denis
>
> Max
>
On 30.04.20 15:56, Denis Plotnikov wrote:
>
>
> On 30.04.2020 14:47, Max Reitz wrote:
>> On 30.04.20 11:48, Denis Plotnikov wrote:
>>>
>>> On 30.04.2020 11:26, Max Reitz wrote:
>>>> On 29.04.20 15:02, Vladimir Sementsov-Ogievskiy wrote:
>>>>> 29.04.2020 15:17, Max Reitz wrote:
>>>>>> On 29.04.20 12:37, Vladimir Sementsov-Ogievskiy wrote:
>>>>>>> 29.04.2020 13:24, Max Reitz wrote:
>>>>>>>> On 28.04.20 22:00, Denis Plotnikov wrote:
>>>>>>>>> zstd significantly reduces cluster compression time.
>>>>>>>>> It provides better compression performance maintaining
>>>>>>>>> the same level of the compression ratio in comparison with
>>>>>>>>> zlib, which, at the moment, is the only compression
>>>>>>>>> method available.
>>>>>>>>>
>>>>>>>>> The performance test results:
>>>>>>>>> Test compresses and decompresses qemu qcow2 image with just
>>>>>>>>> installed rhel-7.6 guest.
>>>>>>>>> Image cluster size: 64K. Image on disk size: 2.2G
>>>>>>>>>
>>>>>>>>> The test was conducted with brd disk to reduce the influence
>>>>>>>>> of disk subsystem to the test results.
>>>>>>>>> The results is given in seconds.
>>>>>>>>>
>>>>>>>>> compress cmd:
>>>>>>>>> time ./qemu-img convert -O qcow2 -c -o
>>>>>>>>> compression_type=[zlib|zstd]
>>>>>>>>> src.img [zlib|zstd]_compressed.img
>>>>>>>>> decompress cmd
>>>>>>>>> time ./qemu-img convert -O qcow2
>>>>>>>>> [zlib|zstd]_compressed.img uncompressed.img
>>>>>>>>>
>>>>>>>>> compression decompression
>>>>>>>>> zlib zstd zlib zstd
>>>>>>>>> ------------------------------------------------------------
>>>>>>>>> real 65.5 16.3 (-75 %) 1.9 1.6 (-16 %)
>>>>>>>>> user 65.0 15.8 5.3 2.5
>>>>>>>>> sys 3.3 0.2 2.0 2.0
>>>>>>>>>
>>>>>>>>> Both ZLIB and ZSTD gave the same compression ratio: 1.57
>>>>>>>>> compressed image size in both cases: 1.4G
>>>>>>>>>
>>>>>>>>> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
>>>>>>>>> QAPI part:
>>>>>>>>> Acked-by: Markus Armbruster <armbru@redhat.com>
>>>>>>>>> ---
>>>>>>>>> docs/interop/qcow2.txt | 1 +
>>>>>>>>> configure | 2 +-
>>>>>>>>> qapi/block-core.json | 3 +-
>>>>>>>>> block/qcow2-threads.c | 169
>>>>>>>>> +++++++++++++++++++++++++++++++++++++++++
>>>>>>>>> block/qcow2.c | 7 ++
>>>>>>>>> slirp | 2 +-
>>>>>>>>> 6 files changed, 181 insertions(+), 3 deletions(-)
>>>>>>>> [...]
>>>>>>>>
>>>>>>>>> diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
>>>>>>>>> index 7dbaf53489..a0b12e1b15 100644
>>>>>>>>> --- a/block/qcow2-threads.c
>>>>>>>>> +++ b/block/qcow2-threads.c
>>>>>>>> [...]
>>>>>>>>
>>>>>>>>> +static ssize_t qcow2_zstd_decompress(void *dest, size_t
>>>>>>>>> dest_size,
>>>>>>>>> + const void *src, size_t
>>>>>>>>> src_size)
>>>>>>>>> +{
>>>>>>>> [...]
>>>>>>>>
>>>>>>>>> + /*
>>>>>>>>> + * The compressed stream from the input buffer may consist of
>>>>>>>>> more
>>>>>>>>> + * than one zstd frame.
>>>>>>>> Can it?
>>>>>>> If not, we must require it in the specification.
>>>>>> Actually, now that you mention it, it would make sense anyway to add
>>>>>> some note to the specification on what exactly compressed with zstd
>>>>>> means.
>>>>>>
>>>>>>> Hmm. If at some point
>>>>>>> we'll want multi-threaded compression of one big (2M) cluster..
>>>>>>> Could
>>>>>>> this be implemented with zstd lib, if multiple frames are allowed,
>>>>>>> will
>>>>>>> allowing multiple frames help? I don't know actually, but I think
>>>>>>> better
>>>>>>> not to forbid it. On the other hand, I don't see any benefit in
>>>>>>> large
>>>>>>> compressed clusters. At least, in our scenarios (for compressed
>>>>>>> backups)
>>>>>>> we use 64k compressed clusters, for good granularity of incremental
>>>>>>> backups (when for running vm we use 1M clusters).
>>>>>> Is it really that important? Naïvely, it sounds rather
>>>>>> complicated to
>>>>>> introduce multithreading into block drivers.
>>>>> It is already here: compression and encryption already multithreaded.
>>>>> But of course, one cluster is handled in one thread.
>>>> Ah, good. I forgot.
>>>>
>>>>>> (Also, as for compression, it can only be used in backup scenarios
>>>>>> anyway, where you write many clusters at once. So parallelism on the
>>>>>> cluster level should sufficient to get high usage, and it would
>>>>>> benefit
>>>>>> all compression types and cluster sizes.)
>>>>>>
>>>>> Yes it works in this way already :)
>>>> Well, OK then.
>>>>
>>>>> So, we don't know do we want one frame restriction or not. Do you
>>>>> have a
>>>>> preference?
>>>> *shrug*
>>>>
>>>> Seems like it would be preferential to allow multiple frames still. A
>>>> note in the spec would be nice (i.e., streaming format, multiple frames
>>>> per cluster possible).
>>> We don't mention anything about zlib compressing details in the spec.
>> Yep. Which is bad enough.
>>
>>> If we mention anything about ZSTD compressing details we'll have to do
>>> it for
>>> zlib as well.
>> I personally don’t like “If you can’t make it perfect, you shouldn’t do
>> it at all”. Mentioning it for zstd would still be an improvement.
>
> Good approach. I like it. But I'm trying to be cautious.
>>
>> Also, I believe that “zlib compression” is pretty much unambiguous,
>> considering all the code does is call deflate()/inflate().
>>
>> I’m not saying we need to amend the spec in this series, but I don’t see
>> a good argument against doing so at some point.
>>
>>> So, I think we have two possibilities for the spec:
>>> 1. mention for both
>>> 2. don't mention at all
>>>
>>> I think the 2nd is better. It gives more degree of freedom for the
>>> future improvements.
>> No, it absolutely doesn’t. There is a de-facto format, namely what qemu
>> accepts. Changing that would be an incompatible change. Just because
>> we don’t write what’s allowed into the spec doesn’t change that fact.
>>
>>> and we've already covered both cases (one frame, may frames) in the
>>> code.
>> There are still different zstd formats, namely streaming or not
>> streaming. That’s what should be mentioned.
>
> It looks like the ZSTD format is always the same.
> The ZSTD interface is different: for some reason the simple
> zstd_decompress()
> wants to know the size of data to decompress
>
> From zstd doc:
> *size_t ZSTD_decompress( void* dst, size_t dstCapacity, const void* src,
> size_t compressedSize); *
>
> `compressedSize` : must be the _exact_ size of some number of compressed
> and/or skippable frames.
That’s... interesting.
> I made a test (based on Vladimir's code) to check it:
>
> // compile with gcc -lzstd -g test_zstd.c -o test_zstd
>
> #include <stdio.h> #include <assert.h> #include <zstd.h> #include
> <zstd_errors.h> int main() { char buf1[] = "erbebewbwe"; char buf2[100];
> char buf3[100]; int ret; ZSTD_outBuffer output; ZSTD_inBuffer input; ret
> = ZSTD_compress(buf2, sizeof(buf2), buf1, sizeof(buf1), 5); printf("ret:
> %d\n", ret); ZSTD_DCtx *dctx = ZSTD_createDCtx(); input =
> (ZSTD_inBuffer){buf2, ret, 0}; output = (ZSTD_outBuffer){buf3,
> sizeof(buf3), 0}; ret = ZSTD_decompressStream(dctx, &output, &input);
> printf("ret: %d, input.pos: %ld, output.pos: %ld\n", ret, input.pos,
> output.pos); printf("To compress: %s\n", buf1); buf3[output.pos] = 0;
> printf("Uncompressed: %s\n", buf3); return 0; }
>
> And it works fine.
>
> We use streaming semantics just to be consistent with the interfaces
> (stream_compress/stream_decompress), otherwise
>
> we could use simple_compress/stream_decompress
OK, if there’s just a single format and we accept multiple frames, then
there’s indeed no need to document it.
Max
On 4/29/20 8:02 AM, Vladimir Sementsov-Ogievskiy wrote: >>>>> + /* >>>>> + * The compressed stream from the input buffer may consist of >>>>> more >>>>> + * than one zstd frame. >>>> >>>> Can it? >>> >>> If not, we must require it in the specification. >> >> Actually, now that you mention it, it would make sense anyway to add >> some note to the specification on what exactly compressed with zstd >> means. > So, we don't know do we want one frame restriction or not. Do you have a > preference? > I'm a fan of 'be strict in what you produce, liberal in what you accept'. While our implementation always produces only a single frame of compressed data, I think our decoder should be prepared to see more than one frame from other implementations, as that is more liberal than tightening the specification to require that encoding must produce exactly one frame. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
On 29.04.2020 13:24, Max Reitz wrote:
> On 28.04.20 22:00, Denis Plotnikov wrote:
>> zstd significantly reduces cluster compression time.
>> It provides better compression performance maintaining
>> the same level of the compression ratio in comparison with
>> zlib, which, at the moment, is the only compression
>> method available.
>>
>> The performance test results:
>> Test compresses and decompresses qemu qcow2 image with just
>> installed rhel-7.6 guest.
>> Image cluster size: 64K. Image on disk size: 2.2G
>>
>> The test was conducted with brd disk to reduce the influence
>> of disk subsystem to the test results.
>> The results is given in seconds.
>>
>> compress cmd:
>> time ./qemu-img convert -O qcow2 -c -o compression_type=[zlib|zstd]
>> src.img [zlib|zstd]_compressed.img
>> decompress cmd
>> time ./qemu-img convert -O qcow2
>> [zlib|zstd]_compressed.img uncompressed.img
>>
>> compression decompression
>> zlib zstd zlib zstd
>> ------------------------------------------------------------
>> real 65.5 16.3 (-75 %) 1.9 1.6 (-16 %)
>> user 65.0 15.8 5.3 2.5
>> sys 3.3 0.2 2.0 2.0
>>
>> Both ZLIB and ZSTD gave the same compression ratio: 1.57
>> compressed image size in both cases: 1.4G
>>
>> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
>> QAPI part:
>> Acked-by: Markus Armbruster <armbru@redhat.com>
>> ---
>> docs/interop/qcow2.txt | 1 +
>> configure | 2 +-
>> qapi/block-core.json | 3 +-
>> block/qcow2-threads.c | 169 +++++++++++++++++++++++++++++++++++++++++
>> block/qcow2.c | 7 ++
>> slirp | 2 +-
>> 6 files changed, 181 insertions(+), 3 deletions(-)
> [...]
>
>> diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
>> index 7dbaf53489..a0b12e1b15 100644
>> --- a/block/qcow2-threads.c
>> +++ b/block/qcow2-threads.c
> [...]
>
>> +static ssize_t qcow2_zstd_decompress(void *dest, size_t dest_size,
>> + const void *src, size_t src_size)
>> +{
> [...]
>
>> + /*
>> + * The compressed stream from the input buffer may consist of more
>> + * than one zstd frame.
> Can it?
Potentially, it can, if another implemention of qcow2 saves a couple of
frames for some reason.
Denis
>
> Max
>
On 29.04.20 12:38, Denis Plotnikov wrote:
>
>
> On 29.04.2020 13:24, Max Reitz wrote:
>> On 28.04.20 22:00, Denis Plotnikov wrote:
>>> zstd significantly reduces cluster compression time.
>>> It provides better compression performance maintaining
>>> the same level of the compression ratio in comparison with
>>> zlib, which, at the moment, is the only compression
>>> method available.
>>>
>>> The performance test results:
>>> Test compresses and decompresses qemu qcow2 image with just
>>> installed rhel-7.6 guest.
>>> Image cluster size: 64K. Image on disk size: 2.2G
>>>
>>> The test was conducted with brd disk to reduce the influence
>>> of disk subsystem to the test results.
>>> The results is given in seconds.
>>>
>>> compress cmd:
>>> time ./qemu-img convert -O qcow2 -c -o compression_type=[zlib|zstd]
>>> src.img [zlib|zstd]_compressed.img
>>> decompress cmd
>>> time ./qemu-img convert -O qcow2
>>> [zlib|zstd]_compressed.img uncompressed.img
>>>
>>> compression decompression
>>> zlib zstd zlib zstd
>>> ------------------------------------------------------------
>>> real 65.5 16.3 (-75 %) 1.9 1.6 (-16 %)
>>> user 65.0 15.8 5.3 2.5
>>> sys 3.3 0.2 2.0 2.0
>>>
>>> Both ZLIB and ZSTD gave the same compression ratio: 1.57
>>> compressed image size in both cases: 1.4G
>>>
>>> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
>>> QAPI part:
>>> Acked-by: Markus Armbruster <armbru@redhat.com>
>>> ---
>>> docs/interop/qcow2.txt | 1 +
>>> configure | 2 +-
>>> qapi/block-core.json | 3 +-
>>> block/qcow2-threads.c | 169 +++++++++++++++++++++++++++++++++++++++++
>>> block/qcow2.c | 7 ++
>>> slirp | 2 +-
>>> 6 files changed, 181 insertions(+), 3 deletions(-)
>> [...]
>>
>>> diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
>>> index 7dbaf53489..a0b12e1b15 100644
>>> --- a/block/qcow2-threads.c
>>> +++ b/block/qcow2-threads.c
>> [...]
>>
>>> +static ssize_t qcow2_zstd_decompress(void *dest, size_t dest_size,
>>> + const void *src, size_t src_size)
>>> +{
>> [...]
>>
>>> + /*
>>> + * The compressed stream from the input buffer may consist of more
>>> + * than one zstd frame.
>> Can it?
>
> Potentially, it can, if another implemention of qcow2 saves a couple of
> frames for some reason.
Well, it can’t do that if qemu cannot decode it. Maybe there should be
a note in the specification on the precise format to expect.
If we decide that it might make sense for someone to encode a cluster
with multiple frames, then OK.
Max
© 2016 - 2026 Red Hat, Inc.