[v1] qcow2 compress threads

[Qemu-devel] [PATCH 3/3] qcow2: add compress threads

Posted by Vladimir Sementsov-Ogievskiy 7 years, 8 months ago

Do data compression in separate threads. This significantly improve
performance for qemu-img convert with -W (allow async writes) and -c
(compressed) options.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 block/qcow2.h |  3 +++
 block/qcow2.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/block/qcow2.h b/block/qcow2.h
index 01b5250415..0bd21623c2 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -326,6 +326,9 @@ typedef struct BDRVQcow2State {
      * override) */
     char *image_backing_file;
     char *image_backing_format;
+
+    CoQueue compress_wait_queue;
+    int nb_compress_threads;
 } BDRVQcow2State;
 
 typedef struct Qcow2COWRegion {
diff --git a/block/qcow2.c b/block/qcow2.c
index d4dbe329ab..91465893e2 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -42,6 +42,7 @@
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/qapi-visit-block-core.h"
 #include "crypto.h"
+#include "block/thread-pool.h"
 
 /*
   Differences with QCOW:
@@ -1544,6 +1545,9 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
         qcow2_check_refcounts(bs, &result, 0);
     }
 #endif
+
+    qemu_co_queue_init(&s->compress_wait_queue);
+
     return ret;
 
  fail:
@@ -3715,6 +3719,62 @@ static ssize_t qcow2_compress(void *dest, const void *src, size_t size)
     return ret;
 }
 
+#define MAX_COMPRESS_THREADS 4
+
+typedef struct Qcow2CompressData {
+    void *dest;
+    const void *src;
+    size_t size;
+    ssize_t ret;
+} Qcow2CompressData;
+
+static int qcow2_compress_pool_func(void *opaque)
+{
+    Qcow2CompressData *data = opaque;
+
+    data->ret = qcow2_compress(data->dest, data->src, data->size);
+
+    return 0;
+}
+
+static void qcow2_compress_complete(void *opaque, int ret)
+{
+    qemu_coroutine_enter(opaque);
+}
+
+/* See qcow2_compress definition for parameters description */
+static ssize_t qcow2_co_compress(BlockDriverState *bs,
+                                 void *dest, const void *src, size_t size)
+{
+    BDRVQcow2State *s = bs->opaque;
+    BlockAIOCB *acb;
+    ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
+    Qcow2CompressData arg = {
+        .dest = dest,
+        .src = src,
+        .size = size,
+    };
+
+    while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) {
+        qemu_co_queue_wait(&s->compress_wait_queue, NULL);
+    }
+
+    s->nb_compress_threads++;
+    acb = thread_pool_submit_aio(pool, qcow2_compress_pool_func, &arg,
+                                 qcow2_compress_complete,
+                                 qemu_coroutine_self());
+
+    if (!acb) {
+        s->nb_compress_threads--;
+        return -EINVAL;
+    }
+    qemu_coroutine_yield();
+    s->nb_compress_threads--;
+    qemu_co_queue_next(&s->compress_wait_queue);
+
+    return arg.ret;
+}
+
 /* XXX: put compressed sectors first, then all the cluster aligned
    tables to avoid losing bytes in alignment */
 static coroutine_fn int
@@ -3758,7 +3818,7 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
 
     out_buf = g_malloc(s->cluster_size);
 
-    out_len = qcow2_compress(out_buf, buf, s->cluster_size);
+    out_len = qcow2_co_compress(bs, out_buf, buf, s->cluster_size);
     if (out_len == -2) {
         ret = -EINVAL;
         goto fail;
-- 
2.11.1

Re: [Qemu-devel] [PATCH 3/3] qcow2: add compress threads

Posted by Kevin Wolf 7 years, 7 months ago

Am 08.06.2018 um 21:20 hat Vladimir Sementsov-Ogievskiy geschrieben:
> Do data compression in separate threads. This significantly improve
> performance for qemu-img convert with -W (allow async writes) and -c
> (compressed) options.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>

Looks correct to me, but why do we introduce a separate
MAX_COMPRESS_THREADS? Can't we simply leave the maximum number of
threads to the thread poll?

I see that you chose a much smaller number here (4 vs. 64), but is there
actually a good reason for this?

Kevin

Re: [Qemu-devel] [PATCH 3/3] qcow2: add compress threads

Posted by Denis V. Lunev 7 years, 7 months ago

On 06/14/2018 04:16 PM, Kevin Wolf wrote:
> Am 08.06.2018 um 21:20 hat Vladimir Sementsov-Ogievskiy geschrieben:
>> Do data compression in separate threads. This significantly improve
>> performance for qemu-img convert with -W (allow async writes) and -c
>> (compressed) options.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> Looks correct to me, but why do we introduce a separate
> MAX_COMPRESS_THREADS? Can't we simply leave the maximum number of
> threads to the thread poll?
>
> I see that you chose a much smaller number here (4 vs. 64), but is there
> actually a good reason for this?
>
> Kevin
yes. In the other case the guest will suffer much more from this increased
activity and load on the host.

Den

Re: [Qemu-devel] [PATCH 3/3] qcow2: add compress threads

Posted by Kevin Wolf 7 years, 7 months ago

Am 14.06.2018 um 15:19 hat Denis V. Lunev geschrieben:
> On 06/14/2018 04:16 PM, Kevin Wolf wrote:
> > Am 08.06.2018 um 21:20 hat Vladimir Sementsov-Ogievskiy geschrieben:
> >> Do data compression in separate threads. This significantly improve
> >> performance for qemu-img convert with -W (allow async writes) and -c
> >> (compressed) options.
> >>
> >> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> > Looks correct to me, but why do we introduce a separate
> > MAX_COMPRESS_THREADS? Can't we simply leave the maximum number of
> > threads to the thread poll?
> >
> > I see that you chose a much smaller number here (4 vs. 64), but is there
> > actually a good reason for this?
> >
> > Kevin
> yes. In the other case the guest will suffer much more from this increased
> activity and load on the host.

Ah, your primary motivation is use in a backup block job? I completely
forgot about that one (and qemu-img shouldn't care because there is no
guest), but that makes some sense.

Makes me wonder whether this value should be configurable. But that can
come later.

Kevin