Do data compression in separate threads. This significantly improve
performance for qemu-img convert with -W (allow async writes) and -c
(compressed) options.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
block/qcow2.h | 3 +++
block/qcow2.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 64 insertions(+), 1 deletion(-)
diff --git a/block/qcow2.h b/block/qcow2.h
index 01b5250415..0bd21623c2 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -326,6 +326,9 @@ typedef struct BDRVQcow2State {
* override) */
char *image_backing_file;
char *image_backing_format;
+
+ CoQueue compress_wait_queue;
+ int nb_compress_threads;
} BDRVQcow2State;
typedef struct Qcow2COWRegion {
diff --git a/block/qcow2.c b/block/qcow2.c
index d4dbe329ab..91465893e2 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -42,6 +42,7 @@
#include "qapi/qobject-input-visitor.h"
#include "qapi/qapi-visit-block-core.h"
#include "crypto.h"
+#include "block/thread-pool.h"
/*
Differences with QCOW:
@@ -1544,6 +1545,9 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
qcow2_check_refcounts(bs, &result, 0);
}
#endif
+
+ qemu_co_queue_init(&s->compress_wait_queue);
+
return ret;
fail:
@@ -3715,6 +3719,62 @@ static ssize_t qcow2_compress(void *dest, const void *src, size_t size)
return ret;
}
+#define MAX_COMPRESS_THREADS 4
+
+typedef struct Qcow2CompressData {
+ void *dest;
+ const void *src;
+ size_t size;
+ ssize_t ret;
+} Qcow2CompressData;
+
+static int qcow2_compress_pool_func(void *opaque)
+{
+ Qcow2CompressData *data = opaque;
+
+ data->ret = qcow2_compress(data->dest, data->src, data->size);
+
+ return 0;
+}
+
+static void qcow2_compress_complete(void *opaque, int ret)
+{
+ qemu_coroutine_enter(opaque);
+}
+
+/* See qcow2_compress definition for parameters description */
+static ssize_t qcow2_co_compress(BlockDriverState *bs,
+ void *dest, const void *src, size_t size)
+{
+ BDRVQcow2State *s = bs->opaque;
+ BlockAIOCB *acb;
+ ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
+ Qcow2CompressData arg = {
+ .dest = dest,
+ .src = src,
+ .size = size,
+ };
+
+ while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) {
+ qemu_co_queue_wait(&s->compress_wait_queue, NULL);
+ }
+
+ s->nb_compress_threads++;
+ acb = thread_pool_submit_aio(pool, qcow2_compress_pool_func, &arg,
+ qcow2_compress_complete,
+ qemu_coroutine_self());
+
+ if (!acb) {
+ s->nb_compress_threads--;
+ return -EINVAL;
+ }
+ qemu_coroutine_yield();
+ s->nb_compress_threads--;
+ qemu_co_queue_next(&s->compress_wait_queue);
+
+ return arg.ret;
+}
+
/* XXX: put compressed sectors first, then all the cluster aligned
tables to avoid losing bytes in alignment */
static coroutine_fn int
@@ -3758,7 +3818,7 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
out_buf = g_malloc(s->cluster_size);
- out_len = qcow2_compress(out_buf, buf, s->cluster_size);
+ out_len = qcow2_co_compress(bs, out_buf, buf, s->cluster_size);
if (out_len == -2) {
ret = -EINVAL;
goto fail;
--
2.11.1
Am 08.06.2018 um 21:20 hat Vladimir Sementsov-Ogievskiy geschrieben: > Do data compression in separate threads. This significantly improve > performance for qemu-img convert with -W (allow async writes) and -c > (compressed) options. > > Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Looks correct to me, but why do we introduce a separate MAX_COMPRESS_THREADS? Can't we simply leave the maximum number of threads to the thread poll? I see that you chose a much smaller number here (4 vs. 64), but is there actually a good reason for this? Kevin
On 06/14/2018 04:16 PM, Kevin Wolf wrote: > Am 08.06.2018 um 21:20 hat Vladimir Sementsov-Ogievskiy geschrieben: >> Do data compression in separate threads. This significantly improve >> performance for qemu-img convert with -W (allow async writes) and -c >> (compressed) options. >> >> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> > Looks correct to me, but why do we introduce a separate > MAX_COMPRESS_THREADS? Can't we simply leave the maximum number of > threads to the thread poll? > > I see that you chose a much smaller number here (4 vs. 64), but is there > actually a good reason for this? > > Kevin yes. In the other case the guest will suffer much more from this increased activity and load on the host. Den
Am 14.06.2018 um 15:19 hat Denis V. Lunev geschrieben: > On 06/14/2018 04:16 PM, Kevin Wolf wrote: > > Am 08.06.2018 um 21:20 hat Vladimir Sementsov-Ogievskiy geschrieben: > >> Do data compression in separate threads. This significantly improve > >> performance for qemu-img convert with -W (allow async writes) and -c > >> (compressed) options. > >> > >> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> > > Looks correct to me, but why do we introduce a separate > > MAX_COMPRESS_THREADS? Can't we simply leave the maximum number of > > threads to the thread poll? > > > > I see that you chose a much smaller number here (4 vs. 64), but is there > > actually a good reason for this? > > > > Kevin > yes. In the other case the guest will suffer much more from this increased > activity and load on the host. Ah, your primary motivation is use in a backup block job? I completely forgot about that one (and qemu-img shouldn't care because there is no guest), but that makes some sense. Makes me wonder whether this value should be configurable. But that can come later. Kevin
© 2016 - 2026 Red Hat, Inc.