diff mbox

[04/10] qcow: add qcow_co_write_compressed

Message ID 1463229957-14253-5-git-send-email-den@openvz.org
State New
Headers show

Commit Message

Denis V. Lunev May 14, 2016, 12:45 p.m. UTC
From: Pavel Butsykin <pbutsykin@virtuozzo.com>

Added implementation of the qcow_co_write_compressed function that
will allow us to safely use compressed writes for the qcow from running
VMs.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
---
 block/qcow.c | 78 +++++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 45 insertions(+), 33 deletions(-)

Comments

Stefan Hajnoczi May 27, 2016, 5:45 p.m. UTC | #1
On Sat, May 14, 2016 at 03:45:52PM +0300, Denis V. Lunev wrote:
> +    qemu_co_mutex_lock(&s->lock);
> +    cluster_offset = get_cluster_offset(bs, sector_num << 9, 2, out_len, 0, 0);
> +    qemu_co_mutex_unlock(&s->lock);
> +    if (cluster_offset == 0) {
> +        ret = -EIO;
> +        goto fail;
> +    }
> +    cluster_offset &= s->cluster_offset_mask;
> +
> +    iov = (struct iovec) {
> +        .iov_base   = out_buf,
> +        .iov_len    = out_len,
> +    };
> +    qemu_iovec_init_external(&hd_qiov, &iov, 1);
> +    ret = bdrv_co_pwritev(bs->file->bs, cluster_offset, out_len, &hd_qiov, 0);

Not sure if this has the same race condition as the qcow2 patch.  It
seems that bdrv_getlength() is used to extend the file on a per-sector
basis.  That would mean compressed data is not packed inside sectors and
no read-write-modify race condition exists, but I haven't fully audited
get_cluster_offset().

Stefan
Pavel Butsykin May 30, 2016, 2:27 p.m. UTC | #2
On 27.05.2016 20:45, Stefan Hajnoczi wrote:
> On Sat, May 14, 2016 at 03:45:52PM +0300, Denis V. Lunev wrote:
>> +    qemu_co_mutex_lock(&s->lock);
>> +    cluster_offset = get_cluster_offset(bs, sector_num << 9, 2, out_len, 0, 0);
>> +    qemu_co_mutex_unlock(&s->lock);
>> +    if (cluster_offset == 0) {
>> +        ret = -EIO;
>> +        goto fail;
>> +    }
>> +    cluster_offset &= s->cluster_offset_mask;
>> +
>> +    iov = (struct iovec) {
>> +        .iov_base   = out_buf,
>> +        .iov_len    = out_len,
>> +    };
>> +    qemu_iovec_init_external(&hd_qiov, &iov, 1);
>> +    ret = bdrv_co_pwritev(bs->file->bs, cluster_offset, out_len, &hd_qiov, 0);
>
> Not sure if this has the same race condition as the qcow2 patch.  It
> seems that bdrv_getlength() is used to extend the file on a per-sector
> basis.  That would mean compressed data is not packed inside sectors and
> no read-write-modify race condition exists, but I haven't fully audited
> get_cluster_offset().
>

The get_cluster_offset() also doesn't allow to do multiple compressed
writes in a single cluster, because this function for all offsets
within the cluster returns the same cluster_offset. So here we just
can't write at offset in the cluster, only at the beginning of the
cluster.

> Stefan
>
diff mbox

Patch

diff --git a/block/qcow.c b/block/qcow.c
index d6dc1b0..80a4ce8 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -909,38 +909,37 @@  static int qcow_make_empty(BlockDriverState *bs)
 
 /* XXX: put compressed sectors first, then all the cluster aligned
    tables to avoid losing bytes in alignment */
-static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
-                                 const uint8_t *buf, int nb_sectors)
+static coroutine_fn int
+qcow_co_write_compressed(BlockDriverState *bs, int64_t sector_num,
+                         int nb_sectors, QEMUIOVector *qiov)
 {
     BDRVQcowState *s = bs->opaque;
+    QEMUIOVector hd_qiov;
+    struct iovec iov;
     z_stream strm;
     int ret, out_len;
-    uint8_t *out_buf;
+    uint8_t *buf, *out_buf;
     uint64_t cluster_offset;
 
+    buf = qemu_blockalign(bs, s->cluster_size);
     if (nb_sectors != s->cluster_sectors) {
-        ret = -EINVAL;
-
-        /* Zero-pad last write if image size is not cluster aligned */
-        if (sector_num + nb_sectors == bs->total_sectors &&
-            nb_sectors < s->cluster_sectors) {
-            uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
-            memset(pad_buf, 0, s->cluster_size);
-            memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
-            ret = qcow_write_compressed(bs, sector_num,
-                                        pad_buf, s->cluster_sectors);
-            qemu_vfree(pad_buf);
+        if (nb_sectors > s->cluster_sectors ||
+            sector_num + nb_sectors != bs->total_sectors)
+        {
+            qemu_vfree(buf);
+            return -EINVAL;
         }
-        return ret;
+        /* Zero-pad last write if image size is not cluster aligned */
+        memset(buf, 0, s->cluster_size);
     }
+    qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
 
     out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
 
     /* best compression, small window, no zlib header */
     memset(&strm, 0, sizeof(strm));
-    ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
-                       Z_DEFLATED, -12,
-                       9, Z_DEFAULT_STRATEGY);
+    ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
+                       -12, 9, Z_DEFAULT_STRATEGY);
     if (ret != 0) {
         ret = -EINVAL;
         goto fail;
@@ -962,28 +961,41 @@  static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
     deflateEnd(&strm);
 
     if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
+        struct iovec iov = {
+            .iov_base   = buf,
+            .iov_len    = out_len,
+        };
+        qemu_iovec_init_external(&hd_qiov, &iov, 1);
         /* could not compress: write normal cluster */
-        ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
-        if (ret < 0) {
-            goto fail;
-        }
-    } else {
-        cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
-                                            out_len, 0, 0);
-        if (cluster_offset == 0) {
-            ret = -EIO;
-            goto fail;
-        }
-
-        cluster_offset &= s->cluster_offset_mask;
-        ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len);
+        ret = qcow_co_writev(bs, sector_num, s->cluster_sectors, &hd_qiov);
         if (ret < 0) {
             goto fail;
         }
+        goto success;
     }
 
+    qemu_co_mutex_lock(&s->lock);
+    cluster_offset = get_cluster_offset(bs, sector_num << 9, 2, out_len, 0, 0);
+    qemu_co_mutex_unlock(&s->lock);
+    if (cluster_offset == 0) {
+        ret = -EIO;
+        goto fail;
+    }
+    cluster_offset &= s->cluster_offset_mask;
+
+    iov = (struct iovec) {
+        .iov_base   = out_buf,
+        .iov_len    = out_len,
+    };
+    qemu_iovec_init_external(&hd_qiov, &iov, 1);
+    ret = bdrv_co_pwritev(bs->file->bs, cluster_offset, out_len, &hd_qiov, 0);
+    if (ret < 0) {
+        goto fail;
+    }
+success:
     ret = 0;
 fail:
+    qemu_vfree(buf);
     g_free(out_buf);
     return ret;
 }
@@ -1036,7 +1048,7 @@  static BlockDriver bdrv_qcow = {
 
     .bdrv_set_key           = qcow_set_key,
     .bdrv_make_empty        = qcow_make_empty,
-    .bdrv_write_compressed  = qcow_write_compressed,
+    .bdrv_co_write_compressed = qcow_co_write_compressed,
     .bdrv_get_info          = qcow_get_info,
 
     .create_opts            = &qcow_create_opts,