diff mbox series

[v2,1/6] qcow2: Allow writing compressed data to multiple clusters

Message ID 1570026166-748566-2-git-send-email-andrey.shinkevich@virtuozzo.com
State New
Headers show
Series qcow2: advanced compression options | expand

Commit Message

Andrey Shinkevich Oct. 2, 2019, 2:22 p.m. UTC
QEMU currently supports writing compressed data of size less than or
equal to one cluster. This patch allows writing QCOW2 compressed data
that exceed one cluster. The implementation is simple, we split buffered
data into separate clusters and write them using the existing
functionality. For unaligned requests, we use a workaround that writes
the data without compression.

Suggested-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Suggested-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
---
 block/qcow2.c | 113 +++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 85 insertions(+), 28 deletions(-)

Comments

Vladimir Sementsov-Ogievskiy Oct. 3, 2019, 2:16 p.m. UTC | #1
02.10.2019 17:22, Andrey Shinkevich wrote:
> QEMU currently supports writing compressed data of size less than or
> equal to one cluster. This patch allows writing QCOW2 compressed data
> that exceed one cluster. The implementation is simple, we split buffered
> data into separate clusters and write them using the existing
> functionality. For unaligned requests, we use a workaround that writes
> the data without compression.
> 
> Suggested-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
> Suggested-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
> ---
>   block/qcow2.c | 113 +++++++++++++++++++++++++++++++++++++++++++---------------
>   1 file changed, 85 insertions(+), 28 deletions(-)
> 
> diff --git a/block/qcow2.c b/block/qcow2.c
> index 7961c05..54ccaf6 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -4152,10 +4152,8 @@ fail:
>       return ret;
>   }
>   
> -/* XXX: put compressed sectors first, then all the cluster aligned
> -   tables to avoid losing bytes in alignment */
>   static coroutine_fn int
> -qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
> +qcow2_co_pwritev_compressed_task(BlockDriverState *bs,
>                                    uint64_t offset, uint64_t bytes,
>                                    QEMUIOVector *qiov, size_t qiov_offset)
>   {
> @@ -4165,36 +4163,14 @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
>       uint8_t *buf, *out_buf;
>       uint64_t cluster_offset;
>   
> -    if (has_data_file(bs)) {
> -        return -ENOTSUP;
> -    }
> -
> -    if (bytes == 0) {
> -        /* align end of file to a sector boundary to ease reading with
> -           sector based I/Os */
> -        int64_t len = bdrv_getlength(bs->file->bs);
> -        if (len < 0) {
> -            return len;
> -        }
> -        return bdrv_co_truncate(bs->file, len, PREALLOC_MODE_OFF, NULL);
> -    }
> -
> -    if (offset_into_cluster(s, offset)) {
> -        return -EINVAL;
> -    }
> +    assert(bytes <= s->cluster_size);

I think we'd better assert

assert(bytes == s->cluster_size || (bytes < s->cluster_size && (offset + bytes == bdrv_getlength())

to match old logic.jkjkj


>   
>       buf = qemu_blockalign(bs, s->cluster_size);
> -    if (bytes != s->cluster_size) {
> -        if (bytes > s->cluster_size ||
> -            offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
> -        {
> -            qemu_vfree(buf);
> -            return -EINVAL;
> -        }
> +    if (bytes < s->cluster_size) {
>           /* Zero-pad last write if image size is not cluster aligned */
>           memset(buf + bytes, 0, s->cluster_size - bytes);
>       }
> -    qemu_iovec_to_buf(qiov, qiov_offset, buf, bytes);
> +    qemu_iovec_to_buf(qiov, qiov_offset, buf, s->cluster_size);

Why did you changed it to s->cluster_size? bytes may be less than cluster

>   
>       out_buf = g_malloc(s->cluster_size);
>   
> @@ -4228,6 +4204,9 @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
>   
>       BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED);
>       ret = bdrv_co_pwrite(s->data_file, cluster_offset, out_len, out_buf, 0);
> +    if (ret == -ENOTSUP) {
> +        ret = qcow2_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, 0);
> +    }

It should not be here:
1. write will unlikely return ENOTSUP
2. if you mean fallback to normal write if we failed to compress, it's done above already.


>       if (ret < 0) {
>           goto fail;
>       }
> @@ -4239,6 +4218,84 @@ fail:
>       return ret;
>   }
>   
> +static coroutine_fn int qcow2_co_pwritev_compressed_task_entry(AioTask *task)
> +{
> +    Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
> +
> +    assert(!t->cluster_type);

and assert(!t->l2meta)

> +
> +    return qcow2_co_pwritev_compressed_task(t->bs, t->offset, t->bytes, t->qiov,
> +                                            t->qiov_offset);
> +}
> +
> +/*
> + * XXX: put compressed sectors first, then all the cluster aligned
> +   tables to avoid losing bytes in alignment
> + */
> +static coroutine_fn int
> +qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
> +                                 uint64_t offset, uint64_t bytes,
> +                                 QEMUIOVector *qiov, size_t qiov_offset)
> +{
> +    BDRVQcow2State *s = bs->opaque;
> +    QCowL2Meta *l2meta = NULL;

it's always NULL and actually unused.

> +    AioTaskPool *aio = NULL;
> +    uint64_t curr_off = 0;
> +    int ret;
> +
> +    if (has_data_file(bs)) {
> +        return -ENOTSUP;
> +    }
> +
> +    if (bytes == 0) {
> +        /*
> +         * align end of file to a sector boundary to ease reading with
> +         * sector based I/Os
> +         */
> +        int64_t cluster_offset = bdrv_getlength(bs->file->bs);

Hmm, I like old variable name 'len' more.

> +        if (cluster_offset < 0) {
> +            return cluster_offset;
> +        }
> +        return bdrv_co_truncate(bs->file, cluster_offset, PREALLOC_MODE_OFF,
> +                                NULL);
> +    }
> +
> +    if (offset_into_cluster(s, offset)) {
> +        return -EINVAL;
> +    }
> +
> +    while (bytes && aio_task_pool_status(aio) == 0) {
> +        uint32_t chunk_size = MIN(bytes, s->cluster_size);
> +
> +        assert((curr_off & (BDRV_SECTOR_SIZE - 1)) == 0);
> +        assert((chunk_size & (BDRV_SECTOR_SIZE - 1)) == 0);

first: always use QEMU_IS_ALIGNED instead
second: why do you add these asserts, do we need them?

> +
> +        if (!aio && chunk_size != bytes) {
> +            aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
> +        }
> +
> +        ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_compressed_task_entry,
> +                             0, 0, offset + curr_off, chunk_size,
> +                             qiov, qiov_offset, l2meta);
> +        if (ret < 0) {
> +            break;
> +        }
> +        qiov_offset += chunk_size;
> +        curr_off += chunk_size;

you may just offset += chunk_size and drop extra variable

> +        bytes -= chunk_size;
> +    }
> +
> +    if (aio) {
> +        aio_task_pool_wait_all(aio);
> +        if (ret == 0) {
> +            ret = aio_task_pool_status(aio);
> +        }
> +        g_free(aio);
> +    }
> +
> +    return ret;
> +}
> +
>   static int coroutine_fn
>   qcow2_co_preadv_compressed(BlockDriverState *bs,
>                              uint64_t file_cluster_offset,
> 

overall - OK
diff mbox series

Patch

diff --git a/block/qcow2.c b/block/qcow2.c
index 7961c05..54ccaf6 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -4152,10 +4152,8 @@  fail:
     return ret;
 }
 
-/* XXX: put compressed sectors first, then all the cluster aligned
-   tables to avoid losing bytes in alignment */
 static coroutine_fn int
-qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
+qcow2_co_pwritev_compressed_task(BlockDriverState *bs,
                                  uint64_t offset, uint64_t bytes,
                                  QEMUIOVector *qiov, size_t qiov_offset)
 {
@@ -4165,36 +4163,14 @@  qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
     uint8_t *buf, *out_buf;
     uint64_t cluster_offset;
 
-    if (has_data_file(bs)) {
-        return -ENOTSUP;
-    }
-
-    if (bytes == 0) {
-        /* align end of file to a sector boundary to ease reading with
-           sector based I/Os */
-        int64_t len = bdrv_getlength(bs->file->bs);
-        if (len < 0) {
-            return len;
-        }
-        return bdrv_co_truncate(bs->file, len, PREALLOC_MODE_OFF, NULL);
-    }
-
-    if (offset_into_cluster(s, offset)) {
-        return -EINVAL;
-    }
+    assert(bytes <= s->cluster_size);
 
     buf = qemu_blockalign(bs, s->cluster_size);
-    if (bytes != s->cluster_size) {
-        if (bytes > s->cluster_size ||
-            offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
-        {
-            qemu_vfree(buf);
-            return -EINVAL;
-        }
+    if (bytes < s->cluster_size) {
         /* Zero-pad last write if image size is not cluster aligned */
         memset(buf + bytes, 0, s->cluster_size - bytes);
     }
-    qemu_iovec_to_buf(qiov, qiov_offset, buf, bytes);
+    qemu_iovec_to_buf(qiov, qiov_offset, buf, s->cluster_size);
 
     out_buf = g_malloc(s->cluster_size);
 
@@ -4228,6 +4204,9 @@  qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
 
     BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED);
     ret = bdrv_co_pwrite(s->data_file, cluster_offset, out_len, out_buf, 0);
+    if (ret == -ENOTSUP) {
+        ret = qcow2_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, 0);
+    }
     if (ret < 0) {
         goto fail;
     }
@@ -4239,6 +4218,84 @@  fail:
     return ret;
 }
 
+static coroutine_fn int qcow2_co_pwritev_compressed_task_entry(AioTask *task)
+{
+    Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
+
+    assert(!t->cluster_type);
+
+    return qcow2_co_pwritev_compressed_task(t->bs, t->offset, t->bytes, t->qiov,
+                                            t->qiov_offset);
+}
+
+/*
+ * XXX: put compressed sectors first, then all the cluster aligned
+   tables to avoid losing bytes in alignment
+ */
+static coroutine_fn int
+qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
+                                 uint64_t offset, uint64_t bytes,
+                                 QEMUIOVector *qiov, size_t qiov_offset)
+{
+    BDRVQcow2State *s = bs->opaque;
+    QCowL2Meta *l2meta = NULL;
+    AioTaskPool *aio = NULL;
+    uint64_t curr_off = 0;
+    int ret;
+
+    if (has_data_file(bs)) {
+        return -ENOTSUP;
+    }
+
+    if (bytes == 0) {
+        /*
+         * align end of file to a sector boundary to ease reading with
+         * sector based I/Os
+         */
+        int64_t cluster_offset = bdrv_getlength(bs->file->bs);
+        if (cluster_offset < 0) {
+            return cluster_offset;
+        }
+        return bdrv_co_truncate(bs->file, cluster_offset, PREALLOC_MODE_OFF,
+                                NULL);
+    }
+
+    if (offset_into_cluster(s, offset)) {
+        return -EINVAL;
+    }
+
+    while (bytes && aio_task_pool_status(aio) == 0) {
+        uint32_t chunk_size = MIN(bytes, s->cluster_size);
+
+        assert((curr_off & (BDRV_SECTOR_SIZE - 1)) == 0);
+        assert((chunk_size & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+        if (!aio && chunk_size != bytes) {
+            aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
+        }
+
+        ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_compressed_task_entry,
+                             0, 0, offset + curr_off, chunk_size,
+                             qiov, qiov_offset, l2meta);
+        if (ret < 0) {
+            break;
+        }
+        qiov_offset += chunk_size;
+        curr_off += chunk_size;
+        bytes -= chunk_size;
+    }
+
+    if (aio) {
+        aio_task_pool_wait_all(aio);
+        if (ret == 0) {
+            ret = aio_task_pool_status(aio);
+        }
+        g_free(aio);
+    }
+
+    return ret;
+}
+
 static int coroutine_fn
 qcow2_co_preadv_compressed(BlockDriverState *bs,
                            uint64_t file_cluster_offset,