diff mbox series

[3/3] qcow2: add compress threads

Message ID 20180608192027.284601-4-vsementsov@virtuozzo.com
State New
Headers show
Series qcow2 compress threads | expand

Commit Message

Vladimir Sementsov-Ogievskiy June 8, 2018, 7:20 p.m. UTC
Do data compression in separate threads. This significantly improve
performance for qemu-img convert with -W (allow async writes) and -c
(compressed) options.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 block/qcow2.h |  3 +++
 block/qcow2.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 64 insertions(+), 1 deletion(-)

Comments

Kevin Wolf June 14, 2018, 1:16 p.m. UTC | #1
Am 08.06.2018 um 21:20 hat Vladimir Sementsov-Ogievskiy geschrieben:
> Do data compression in separate threads. This significantly improve
> performance for qemu-img convert with -W (allow async writes) and -c
> (compressed) options.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>

Looks correct to me, but why do we introduce a separate
MAX_COMPRESS_THREADS? Can't we simply leave the maximum number of
threads to the thread poll?

I see that you chose a much smaller number here (4 vs. 64), but is there
actually a good reason for this?

Kevin
Denis V. Lunev June 14, 2018, 1:19 p.m. UTC | #2
On 06/14/2018 04:16 PM, Kevin Wolf wrote:
> Am 08.06.2018 um 21:20 hat Vladimir Sementsov-Ogievskiy geschrieben:
>> Do data compression in separate threads. This significantly improve
>> performance for qemu-img convert with -W (allow async writes) and -c
>> (compressed) options.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> Looks correct to me, but why do we introduce a separate
> MAX_COMPRESS_THREADS? Can't we simply leave the maximum number of
> threads to the thread poll?
>
> I see that you chose a much smaller number here (4 vs. 64), but is there
> actually a good reason for this?
>
> Kevin
yes. In the other case the guest will suffer much more from this increased
activity and load on the host.

Den
Kevin Wolf June 14, 2018, 1:29 p.m. UTC | #3
Am 14.06.2018 um 15:19 hat Denis V. Lunev geschrieben:
> On 06/14/2018 04:16 PM, Kevin Wolf wrote:
> > Am 08.06.2018 um 21:20 hat Vladimir Sementsov-Ogievskiy geschrieben:
> >> Do data compression in separate threads. This significantly improve
> >> performance for qemu-img convert with -W (allow async writes) and -c
> >> (compressed) options.
> >>
> >> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> > Looks correct to me, but why do we introduce a separate
> > MAX_COMPRESS_THREADS? Can't we simply leave the maximum number of
> > threads to the thread poll?
> >
> > I see that you chose a much smaller number here (4 vs. 64), but is there
> > actually a good reason for this?
> >
> > Kevin
> yes. In the other case the guest will suffer much more from this increased
> activity and load on the host.

Ah, your primary motivation is use in a backup block job? I completely
forgot about that one (and qemu-img shouldn't care because there is no
guest), but that makes some sense.

Makes me wonder whether this value should be configurable. But that can
come later.

Kevin
diff mbox series

Patch

diff --git a/block/qcow2.h b/block/qcow2.h
index 01b5250415..0bd21623c2 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -326,6 +326,9 @@  typedef struct BDRVQcow2State {
      * override) */
     char *image_backing_file;
     char *image_backing_format;
+
+    CoQueue compress_wait_queue;
+    int nb_compress_threads;
 } BDRVQcow2State;
 
 typedef struct Qcow2COWRegion {
diff --git a/block/qcow2.c b/block/qcow2.c
index d4dbe329ab..91465893e2 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -42,6 +42,7 @@ 
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/qapi-visit-block-core.h"
 #include "crypto.h"
+#include "block/thread-pool.h"
 
 /*
   Differences with QCOW:
@@ -1544,6 +1545,9 @@  static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
         qcow2_check_refcounts(bs, &result, 0);
     }
 #endif
+
+    qemu_co_queue_init(&s->compress_wait_queue);
+
     return ret;
 
  fail:
@@ -3715,6 +3719,62 @@  static ssize_t qcow2_compress(void *dest, const void *src, size_t size)
     return ret;
 }
 
+#define MAX_COMPRESS_THREADS 4
+
+typedef struct Qcow2CompressData {
+    void *dest;
+    const void *src;
+    size_t size;
+    ssize_t ret;
+} Qcow2CompressData;
+
+static int qcow2_compress_pool_func(void *opaque)
+{
+    Qcow2CompressData *data = opaque;
+
+    data->ret = qcow2_compress(data->dest, data->src, data->size);
+
+    return 0;
+}
+
+static void qcow2_compress_complete(void *opaque, int ret)
+{
+    qemu_coroutine_enter(opaque);
+}
+
+/* See qcow2_compress definition for parameters description */
+static ssize_t qcow2_co_compress(BlockDriverState *bs,
+                                 void *dest, const void *src, size_t size)
+{
+    BDRVQcow2State *s = bs->opaque;
+    BlockAIOCB *acb;
+    ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
+    Qcow2CompressData arg = {
+        .dest = dest,
+        .src = src,
+        .size = size,
+    };
+
+    while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) {
+        qemu_co_queue_wait(&s->compress_wait_queue, NULL);
+    }
+
+    s->nb_compress_threads++;
+    acb = thread_pool_submit_aio(pool, qcow2_compress_pool_func, &arg,
+                                 qcow2_compress_complete,
+                                 qemu_coroutine_self());
+
+    if (!acb) {
+        s->nb_compress_threads--;
+        return -EINVAL;
+    }
+    qemu_coroutine_yield();
+    s->nb_compress_threads--;
+    qemu_co_queue_next(&s->compress_wait_queue);
+
+    return arg.ret;
+}
+
 /* XXX: put compressed sectors first, then all the cluster aligned
    tables to avoid losing bytes in alignment */
 static coroutine_fn int
@@ -3758,7 +3818,7 @@  qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
 
     out_buf = g_malloc(s->cluster_size);
 
-    out_len = qcow2_compress(out_buf, buf, s->cluster_size);
+    out_len = qcow2_co_compress(bs, out_buf, buf, s->cluster_size);
     if (out_len == -2) {
         ret = -EINVAL;
         goto fail;