diff mbox

[v2,13/17] vmdk: Implement .bdrv_co_pwritev() interface

Message ID 1461849406-29743-14-git-send-email-kwolf@redhat.com
State New
Headers show

Commit Message

Kevin Wolf April 28, 2016, 1:16 p.m. UTC
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/vmdk.c | 209 ++++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 135 insertions(+), 74 deletions(-)

Comments

Fam Zheng April 29, 2016, 3:08 a.m. UTC | #1
On Thu, 04/28 15:16, Kevin Wolf wrote:
> +typedef struct VmdkWriteCompressedCo {
> +    BlockDriverState *bs;
> +    int64_t sector_num;
> +    const uint8_t *buf;
> +    int nb_sectors;
> +    int ret;
> +} VmdkWriteCompressedCo;
> +
> +static void vmdk_co_write_compressed(void *opaque)
> +{
> +    VmdkWriteCompressedCo *co = opaque;
> +    QEMUIOVector local_qiov;
> +    uint64_t offset = co->sector_num * BDRV_SECTOR_SIZE;
> +    uint64_t bytes = co->nb_sectors * BDRV_SECTOR_SIZE;
> +
> +    struct iovec iov = (struct iovec) {
> +        .iov_base   = (uint8_t*) co->buf,
> +        .iov_len    = bytes,
> +    };
> +    qemu_iovec_init_external(&local_qiov, &iov, 1);
> +
> +    co->ret = vmdk_pwritev(co->bs, offset, bytes, &local_qiov, false, false);

Should it acquire s->lock?

> +}
> +
>  static int vmdk_write_compressed(BlockDriverState *bs,
>                                   int64_t sector_num,
>                                   const uint8_t *buf,
>                                   int nb_sectors)
>  {
>      BDRVVmdkState *s = bs->opaque;
> +
>      if (s->num_extents == 1 && s->extents[0].compressed) {
> -        return vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
> +        Coroutine *co;
> +        AioContext *aio_context = bdrv_get_aio_context(bs);
> +        VmdkWriteCompressedCo data = {
> +            .bs         = bs,
> +            .sector_num = sector_num,
> +            .buf        = buf,
> +            .nb_sectors = nb_sectors,
> +            .ret        = -EINPROGRESS,
> +        };
> +        co = qemu_coroutine_create(vmdk_co_write_compressed);
> +        qemu_coroutine_enter(co, &data);
> +        while (data.ret == -EINPROGRESS) {
> +            aio_poll(aio_context, true);
> +        }
> +        return data.ret;

Don't you have a plan to make the creation of coroutine for compressed write in
in block layer?  Or will bdrv_co_pwritev gain a "compressed" flag
(BDRV_REQ_COMPRESSED) in the future?

Fam

>      } else {
>          return -ENOTSUP;
>      }
Kevin Wolf April 29, 2016, 7:41 a.m. UTC | #2
Am 29.04.2016 um 05:08 hat Fam Zheng geschrieben:
> On Thu, 04/28 15:16, Kevin Wolf wrote:
> > +typedef struct VmdkWriteCompressedCo {
> > +    BlockDriverState *bs;
> > +    int64_t sector_num;
> > +    const uint8_t *buf;
> > +    int nb_sectors;
> > +    int ret;
> > +} VmdkWriteCompressedCo;
> > +
> > +static void vmdk_co_write_compressed(void *opaque)
> > +{
> > +    VmdkWriteCompressedCo *co = opaque;
> > +    QEMUIOVector local_qiov;
> > +    uint64_t offset = co->sector_num * BDRV_SECTOR_SIZE;
> > +    uint64_t bytes = co->nb_sectors * BDRV_SECTOR_SIZE;
> > +
> > +    struct iovec iov = (struct iovec) {
> > +        .iov_base   = (uint8_t*) co->buf,
> > +        .iov_len    = bytes,
> > +    };
> > +    qemu_iovec_init_external(&local_qiov, &iov, 1);
> > +
> > +    co->ret = vmdk_pwritev(co->bs, offset, bytes, &local_qiov, false, false);
> 
> Should it acquire s->lock?

Yes, probably, if only for consistency.

It didn't acquire the lock before, so it didn't seem appropriate to add
it in this patch. Not taking the lock is fine because this function is
only called by qemu-img, where there is no concurrency.

There was a proposal recently to allow compressed writes during a normal
VM run. If we want to enable this, the locking would be needed. (The
same is true for qcow2 and qcow2.)

> > +}
> > +
> >  static int vmdk_write_compressed(BlockDriverState *bs,
> >                                   int64_t sector_num,
> >                                   const uint8_t *buf,
> >                                   int nb_sectors)
> >  {
> >      BDRVVmdkState *s = bs->opaque;
> > +
> >      if (s->num_extents == 1 && s->extents[0].compressed) {
> > -        return vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
> > +        Coroutine *co;
> > +        AioContext *aio_context = bdrv_get_aio_context(bs);
> > +        VmdkWriteCompressedCo data = {
> > +            .bs         = bs,
> > +            .sector_num = sector_num,
> > +            .buf        = buf,
> > +            .nb_sectors = nb_sectors,
> > +            .ret        = -EINPROGRESS,
> > +        };
> > +        co = qemu_coroutine_create(vmdk_co_write_compressed);
> > +        qemu_coroutine_enter(co, &data);
> > +        while (data.ret == -EINPROGRESS) {
> > +            aio_poll(aio_context, true);
> > +        }
> > +        return data.ret;
> 
> Don't you have a plan to make the creation of coroutine for compressed write in
> in block layer?  Or will bdrv_co_pwritev gain a "compressed" flag
> (BDRV_REQ_COMPRESSED) in the future?

Well, there was this recent discussion, but personally I don't have any
plans. If we want to do compressed writes from running VMs, we will need
the conversion because we must not block the vcpu.

Or maybe the flag would actually be the nicer option, yes. I don't have
a strong opinion on this yet.

Kevin
Pavel Butsykin April 29, 2016, 8:49 a.m. UTC | #3
On 29.04.2016 06:08, Fam Zheng wrote:
> On Thu, 04/28 15:16, Kevin Wolf wrote:
>> +typedef struct VmdkWriteCompressedCo {
>> +    BlockDriverState *bs;
>> +    int64_t sector_num;
>> +    const uint8_t *buf;
>> +    int nb_sectors;
>> +    int ret;
>> +} VmdkWriteCompressedCo;
>> +
>> +static void vmdk_co_write_compressed(void *opaque)
>> +{
>> +    VmdkWriteCompressedCo *co = opaque;
>> +    QEMUIOVector local_qiov;
>> +    uint64_t offset = co->sector_num * BDRV_SECTOR_SIZE;
>> +    uint64_t bytes = co->nb_sectors * BDRV_SECTOR_SIZE;
>> +
>> +    struct iovec iov = (struct iovec) {
>> +        .iov_base   = (uint8_t*) co->buf,
>> +        .iov_len    = bytes,
>> +    };
>> +    qemu_iovec_init_external(&local_qiov, &iov, 1);
>> +
>> +    co->ret = vmdk_pwritev(co->bs, offset, bytes, &local_qiov, false, false);
>
> Should it acquire s->lock?
>
The write_compressed callback is currently used only for image
converting, so I think the lock is not required.

>> +}
>> +
>>   static int vmdk_write_compressed(BlockDriverState *bs,
>>                                    int64_t sector_num,
>>                                    const uint8_t *buf,
>>                                    int nb_sectors)
>>   {
>>       BDRVVmdkState *s = bs->opaque;
>> +
>>       if (s->num_extents == 1 && s->extents[0].compressed) {
>> -        return vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
>> +        Coroutine *co;
>> +        AioContext *aio_context = bdrv_get_aio_context(bs);
>> +        VmdkWriteCompressedCo data = {
>> +            .bs         = bs,
>> +            .sector_num = sector_num,
>> +            .buf        = buf,
>> +            .nb_sectors = nb_sectors,
>> +            .ret        = -EINPROGRESS,
>> +        };
>> +        co = qemu_coroutine_create(vmdk_co_write_compressed);
>> +        qemu_coroutine_enter(co, &data);
>> +        while (data.ret == -EINPROGRESS) {
>> +            aio_poll(aio_context, true);
>> +        }
>> +        return data.ret;
>
> Don't you have a plan to make the creation of coroutine for compressed write in
> in block layer?  Or will bdrv_co_pwritev gain a "compressed" flag
> (BDRV_REQ_COMPRESSED) in the future?
>
Actually, I already have these patches as part of the issue of backup
compression, hope to send today.

> Fam
>
>>       } else {
>>           return -ENOTSUP;
>>       }
>
Kevin Wolf April 29, 2016, 9:49 a.m. UTC | #4
Am 29.04.2016 um 10:49 hat Pavel Butsykin geschrieben:
> On 29.04.2016 06:08, Fam Zheng wrote:
> >On Thu, 04/28 15:16, Kevin Wolf wrote:
> >>  static int vmdk_write_compressed(BlockDriverState *bs,
> >>                                   int64_t sector_num,
> >>                                   const uint8_t *buf,
> >>                                   int nb_sectors)
> >>  {
> >>      BDRVVmdkState *s = bs->opaque;
> >>+
> >>      if (s->num_extents == 1 && s->extents[0].compressed) {
> >>-        return vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
> >>+        Coroutine *co;
> >>+        AioContext *aio_context = bdrv_get_aio_context(bs);
> >>+        VmdkWriteCompressedCo data = {
> >>+            .bs         = bs,
> >>+            .sector_num = sector_num,
> >>+            .buf        = buf,
> >>+            .nb_sectors = nb_sectors,
> >>+            .ret        = -EINPROGRESS,
> >>+        };
> >>+        co = qemu_coroutine_create(vmdk_co_write_compressed);
> >>+        qemu_coroutine_enter(co, &data);
> >>+        while (data.ret == -EINPROGRESS) {
> >>+            aio_poll(aio_context, true);
> >>+        }
> >>+        return data.ret;
> >
> >Don't you have a plan to make the creation of coroutine for compressed write in
> >in block layer?  Or will bdrv_co_pwritev gain a "compressed" flag
> >(BDRV_REQ_COMPRESSED) in the future?
> >
> Actually, I already have these patches as part of the issue of backup
> compression, hope to send today.

Sounds good. Do you use a new flag for the normal write functions or just
update the existing .bdrv_write_compressed function?

Also, please base your series on this one to avoid merge conflicts in
vmdk.c. I'm going to apply the series to block-next now as it's fully
reveiwed. I guess you'll effectively need to revert what this patch does
to vmdk_write_compressed() in favour of a block/io.c solution.

Kevin
Pavel Butsykin April 29, 2016, 10:31 a.m. UTC | #5
On 29.04.2016 12:49, Kevin Wolf wrote:
> Am 29.04.2016 um 10:49 hat Pavel Butsykin geschrieben:
>> On 29.04.2016 06:08, Fam Zheng wrote:
>>> On Thu, 04/28 15:16, Kevin Wolf wrote:
>>>>   static int vmdk_write_compressed(BlockDriverState *bs,
>>>>                                    int64_t sector_num,
>>>>                                    const uint8_t *buf,
>>>>                                    int nb_sectors)
>>>>   {
>>>>       BDRVVmdkState *s = bs->opaque;
>>>> +
>>>>       if (s->num_extents == 1 && s->extents[0].compressed) {
>>>> -        return vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
>>>> +        Coroutine *co;
>>>> +        AioContext *aio_context = bdrv_get_aio_context(bs);
>>>> +        VmdkWriteCompressedCo data = {
>>>> +            .bs         = bs,
>>>> +            .sector_num = sector_num,
>>>> +            .buf        = buf,
>>>> +            .nb_sectors = nb_sectors,
>>>> +            .ret        = -EINPROGRESS,
>>>> +        };
>>>> +        co = qemu_coroutine_create(vmdk_co_write_compressed);
>>>> +        qemu_coroutine_enter(co, &data);
>>>> +        while (data.ret == -EINPROGRESS) {
>>>> +            aio_poll(aio_context, true);
>>>> +        }
>>>> +        return data.ret;
>>>
>>> Don't you have a plan to make the creation of coroutine for compressed write in
>>> in block layer?  Or will bdrv_co_pwritev gain a "compressed" flag
>>> (BDRV_REQ_COMPRESSED) in the future?
>>>
>> Actually, I already have these patches as part of the issue of backup
>> compression, hope to send today.
>
> Sounds good. Do you use a new flag for the normal write functions or just
> update the existing .bdrv_write_compressed function?
>
No, I just added a new interface .bdrv_co_write_compressed and removed
the old. As for compatibility with the old interface in the
bdrv_write_compressed will create a new coroutine and called
bdrv_co_write_compressed, how you did it in the vmdk(only now is in
io.c). Maybe it's not the best idea, but it can be discussed.

> Also, please base your series on this one to avoid merge conflicts in
> vmdk.c. I'm going to apply the series to block-next now as it's fully
> reveiwed. I guess you'll effectively need to revert what this patch does
> to vmdk_write_compressed() in favour of a block/io.c solution.
>
Yes, patches will be based on this series, I specifically made sure of it :)

> Kevin
>
diff mbox

Patch

diff --git a/block/vmdk.c b/block/vmdk.c
index 6c447ad..f243527 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1016,27 +1016,26 @@  static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
  */
 static int get_whole_cluster(BlockDriverState *bs,
                              VmdkExtent *extent,
-                             uint64_t cluster_sector_num,
-                             uint64_t sector_num,
-                             uint64_t skip_start_sector,
-                             uint64_t skip_end_sector)
+                             uint64_t cluster_offset,
+                             uint64_t offset,
+                             uint64_t skip_start_bytes,
+                             uint64_t skip_end_bytes)
 {
     int ret = VMDK_OK;
     int64_t cluster_bytes;
     uint8_t *whole_grain;
 
     /* For COW, align request sector_num to cluster start */
-    sector_num = QEMU_ALIGN_DOWN(sector_num, extent->cluster_sectors);
     cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
+    offset = QEMU_ALIGN_DOWN(offset, cluster_bytes);
     whole_grain = qemu_blockalign(bs, cluster_bytes);
 
     if (!bs->backing) {
-        memset(whole_grain, 0,  skip_start_sector << BDRV_SECTOR_BITS);
-        memset(whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), 0,
-               cluster_bytes - (skip_end_sector << BDRV_SECTOR_BITS));
+        memset(whole_grain, 0, skip_start_bytes);
+        memset(whole_grain + skip_end_bytes, 0, cluster_bytes - skip_end_bytes);
     }
 
-    assert(skip_end_sector <= extent->cluster_sectors);
+    assert(skip_end_bytes <= cluster_bytes);
     /* we will be here if it's first write on non-exist grain(cluster).
      * try to read from parent image, if exist */
     if (bs->backing && !vmdk_is_cid_valid(bs)) {
@@ -1045,42 +1044,43 @@  static int get_whole_cluster(BlockDriverState *bs,
     }
 
     /* Read backing data before skip range */
-    if (skip_start_sector > 0) {
+    if (skip_start_bytes > 0) {
         if (bs->backing) {
-            ret = bdrv_read(bs->backing->bs, sector_num,
-                            whole_grain, skip_start_sector);
+            ret = bdrv_pread(bs->backing->bs, offset, whole_grain,
+                             skip_start_bytes);
             if (ret < 0) {
                 ret = VMDK_ERROR;
                 goto exit;
             }
         }
-        ret = bdrv_write(extent->file->bs, cluster_sector_num, whole_grain,
-                         skip_start_sector);
+        ret = bdrv_pwrite(extent->file->bs, cluster_offset, whole_grain,
+                          skip_start_bytes);
         if (ret < 0) {
             ret = VMDK_ERROR;
             goto exit;
         }
     }
     /* Read backing data after skip range */
-    if (skip_end_sector < extent->cluster_sectors) {
+    if (skip_end_bytes < cluster_bytes) {
         if (bs->backing) {
-            ret = bdrv_read(bs->backing->bs, sector_num + skip_end_sector,
-                            whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
-                            extent->cluster_sectors - skip_end_sector);
+            ret = bdrv_pread(bs->backing->bs, offset + skip_end_bytes,
+                             whole_grain + skip_end_bytes,
+                             cluster_bytes - skip_end_bytes);
             if (ret < 0) {
                 ret = VMDK_ERROR;
                 goto exit;
             }
         }
-        ret = bdrv_write(extent->file->bs, cluster_sector_num + skip_end_sector,
-                         whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
-                         extent->cluster_sectors - skip_end_sector);
+        ret = bdrv_pwrite(extent->file->bs, cluster_offset + skip_end_bytes,
+                          whole_grain + skip_end_bytes,
+                          cluster_bytes - skip_end_bytes);
         if (ret < 0) {
             ret = VMDK_ERROR;
             goto exit;
         }
     }
 
+    ret = VMDK_OK;
 exit:
     qemu_vfree(whole_grain);
     return ret;
@@ -1142,8 +1142,8 @@  static int get_cluster_offset(BlockDriverState *bs,
                               uint64_t offset,
                               bool allocate,
                               uint64_t *cluster_offset,
-                              uint64_t skip_start_sector,
-                              uint64_t skip_end_sector)
+                              uint64_t skip_start_bytes,
+                              uint64_t skip_end_bytes)
 {
     unsigned int l1_index, l2_offset, l2_index;
     int min_index, i, j;
@@ -1230,10 +1230,8 @@  static int get_cluster_offset(BlockDriverState *bs,
          * This problem may occur because of insufficient space on host disk
          * or inappropriate VM shutdown.
          */
-        ret = get_whole_cluster(bs, extent,
-                                cluster_sector,
-                                offset >> BDRV_SECTOR_BITS,
-                                skip_start_sector, skip_end_sector);
+        ret = get_whole_cluster(bs, extent, cluster_sector * BDRV_SECTOR_SIZE,
+                                offset, skip_start_bytes, skip_end_bytes);
         if (ret) {
             return ret;
         }
@@ -1330,38 +1328,57 @@  static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
 }
 
 static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
-                            int64_t offset_in_cluster, const uint8_t *buf,
-                            int nb_sectors, int64_t sector_num)
+                            int64_t offset_in_cluster, QEMUIOVector *qiov,
+                            uint64_t qiov_offset, uint64_t n_bytes,
+                            uint64_t offset)
 {
     int ret;
     VmdkGrainMarker *data = NULL;
     uLongf buf_len;
-    const uint8_t *write_buf = buf;
-    int write_len = nb_sectors * 512;
+    QEMUIOVector local_qiov;
+    struct iovec iov;
     int64_t write_offset;
     int64_t write_end_sector;
 
     if (extent->compressed) {
+        void *compressed_data;
+
         if (!extent->has_marker) {
             ret = -EINVAL;
             goto out;
         }
         buf_len = (extent->cluster_sectors << 9) * 2;
         data = g_malloc(buf_len + sizeof(VmdkGrainMarker));
-        if (compress(data->data, &buf_len, buf, nb_sectors << 9) != Z_OK ||
-                buf_len == 0) {
+
+        compressed_data = g_malloc(n_bytes);
+        qemu_iovec_to_buf(qiov, qiov_offset, compressed_data, n_bytes);
+        ret = compress(data->data, &buf_len, compressed_data, n_bytes);
+        g_free(compressed_data);
+
+        if (ret != Z_OK || buf_len == 0) {
             ret = -EINVAL;
             goto out;
         }
-        data->lba = sector_num;
+
+        data->lba = offset >> BDRV_SECTOR_BITS;
         data->size = buf_len;
-        write_buf = (uint8_t *)data;
-        write_len = buf_len + sizeof(VmdkGrainMarker);
+
+        n_bytes = buf_len + sizeof(VmdkGrainMarker);
+        iov = (struct iovec) {
+            .iov_base   = data,
+            .iov_len    = n_bytes,
+        };
+        qemu_iovec_init_external(&local_qiov, &iov, 1);
+    } else {
+        qemu_iovec_init(&local_qiov, qiov->niov);
+        qemu_iovec_concat(&local_qiov, qiov, qiov_offset, n_bytes);
     }
+
     write_offset = cluster_offset + offset_in_cluster,
-    ret = bdrv_pwrite(extent->file->bs, write_offset, write_buf, write_len);
+    ret = bdrv_co_pwritev(extent->file->bs, write_offset, n_bytes,
+                          &local_qiov, 0);
 
-    write_end_sector = DIV_ROUND_UP(write_offset + write_len, BDRV_SECTOR_SIZE);
+    write_end_sector = DIV_ROUND_UP(write_offset + n_bytes, BDRV_SECTOR_SIZE);
 
     if (extent->compressed) {
         extent->next_cluster_sector = write_end_sector;
@@ -1370,13 +1387,15 @@  static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
                                           write_end_sector);
     }
 
-    if (ret != write_len) {
-        ret = ret < 0 ? ret : -EIO;
+    if (ret < 0) {
         goto out;
     }
     ret = 0;
  out:
     g_free(data);
+    if (!extent->compressed) {
+        qemu_iovec_destroy(&local_qiov);
+    }
     return ret;
 }
 
@@ -1525,38 +1544,38 @@  fail:
  *
  * Returns: error code with 0 for success.
  */
-static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
-                      const uint8_t *buf, int nb_sectors,
-                      bool zeroed, bool zero_dry_run)
+static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
+                       uint64_t bytes, QEMUIOVector *qiov,
+                       bool zeroed, bool zero_dry_run)
 {
     BDRVVmdkState *s = bs->opaque;
     VmdkExtent *extent = NULL;
     int ret;
-    int64_t index_in_cluster, n;
+    int64_t offset_in_cluster, n_bytes;
     uint64_t cluster_offset;
+    uint64_t bytes_done = 0;
     VmdkMetaData m_data;
 
-    if (sector_num > bs->total_sectors) {
-        error_report("Wrong offset: sector_num=0x%" PRIx64
+    if (DIV_ROUND_UP(offset, BDRV_SECTOR_SIZE) > bs->total_sectors) {
+        error_report("Wrong offset: offset=0x%" PRIx64
                      " total_sectors=0x%" PRIx64,
-                     sector_num, bs->total_sectors);
+                     offset, bs->total_sectors);
         return -EIO;
     }
 
-    while (nb_sectors > 0) {
-        extent = find_extent(s, sector_num, extent);
+    while (bytes > 0) {
+        extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent);
         if (!extent) {
             return -EIO;
         }
-        index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num);
-        n = extent->cluster_sectors - index_in_cluster;
-        if (n > nb_sectors) {
-            n = nb_sectors;
-        }
-        ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
+        offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
+        n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
+                             - offset_in_cluster);
+
+        ret = get_cluster_offset(bs, extent, &m_data, offset,
                                  !(extent->compressed || zeroed),
-                                 &cluster_offset,
-                                 index_in_cluster, index_in_cluster + n);
+                                 &cluster_offset, offset_in_cluster,
+                                 offset_in_cluster + n_bytes);
         if (extent->compressed) {
             if (ret == VMDK_OK) {
                 /* Refuse write to allocated cluster for streamOptimized */
@@ -1565,7 +1584,7 @@  static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
                 return -EIO;
             } else {
                 /* allocate */
-                ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
+                ret = get_cluster_offset(bs, extent, &m_data, offset,
                                          true, &cluster_offset, 0, 0);
             }
         }
@@ -1575,9 +1594,9 @@  static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
         if (zeroed) {
             /* Do zeroed write, buf is ignored */
             if (extent->has_zero_grain &&
-                    index_in_cluster == 0 &&
-                    n >= extent->cluster_sectors) {
-                n = extent->cluster_sectors;
+                    offset_in_cluster == 0 &&
+                    n_bytes >= extent->cluster_sectors * BDRV_SECTOR_SIZE) {
+                n_bytes = extent->cluster_sectors * BDRV_SECTOR_SIZE;
                 if (!zero_dry_run) {
                     /* update L2 tables */
                     if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED)
@@ -1589,9 +1608,8 @@  static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
                 return -ENOTSUP;
             }
         } else {
-            ret = vmdk_write_extent(extent,
-                            cluster_offset, index_in_cluster * 512,
-                            buf, n, sector_num);
+            ret = vmdk_write_extent(extent, cluster_offset, offset_in_cluster,
+                                    qiov, bytes_done, n_bytes, offset);
             if (ret) {
                 return ret;
             }
@@ -1604,9 +1622,9 @@  static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
                 }
             }
         }
-        nb_sectors -= n;
-        sector_num += n;
-        buf += n * 512;
+        bytes -= n_bytes;
+        offset += n_bytes;
+        bytes_done += n_bytes;
 
         /* update CID on the first write every time the virtual disk is
          * opened */
@@ -1621,25 +1639,65 @@  static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
     return 0;
 }
 
-static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
-                                      const uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vmdk_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                QEMUIOVector *qiov, int flags)
 {
     int ret;
     BDRVVmdkState *s = bs->opaque;
     qemu_co_mutex_lock(&s->lock);
-    ret = vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
+    ret = vmdk_pwritev(bs, offset, bytes, qiov, false, false);
     qemu_co_mutex_unlock(&s->lock);
     return ret;
 }
 
+typedef struct VmdkWriteCompressedCo {
+    BlockDriverState *bs;
+    int64_t sector_num;
+    const uint8_t *buf;
+    int nb_sectors;
+    int ret;
+} VmdkWriteCompressedCo;
+
+static void vmdk_co_write_compressed(void *opaque)
+{
+    VmdkWriteCompressedCo *co = opaque;
+    QEMUIOVector local_qiov;
+    uint64_t offset = co->sector_num * BDRV_SECTOR_SIZE;
+    uint64_t bytes = co->nb_sectors * BDRV_SECTOR_SIZE;
+
+    struct iovec iov = (struct iovec) {
+        .iov_base   = (uint8_t*) co->buf,
+        .iov_len    = bytes,
+    };
+    qemu_iovec_init_external(&local_qiov, &iov, 1);
+
+    co->ret = vmdk_pwritev(co->bs, offset, bytes, &local_qiov, false, false);
+}
+
 static int vmdk_write_compressed(BlockDriverState *bs,
                                  int64_t sector_num,
                                  const uint8_t *buf,
                                  int nb_sectors)
 {
     BDRVVmdkState *s = bs->opaque;
+
     if (s->num_extents == 1 && s->extents[0].compressed) {
-        return vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
+        Coroutine *co;
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+        VmdkWriteCompressedCo data = {
+            .bs         = bs,
+            .sector_num = sector_num,
+            .buf        = buf,
+            .nb_sectors = nb_sectors,
+            .ret        = -EINPROGRESS,
+        };
+        co = qemu_coroutine_create(vmdk_co_write_compressed);
+        qemu_coroutine_enter(co, &data);
+        while (data.ret == -EINPROGRESS) {
+            aio_poll(aio_context, true);
+        }
+        return data.ret;
     } else {
         return -ENOTSUP;
     }
@@ -1652,12 +1710,15 @@  static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
 {
     int ret;
     BDRVVmdkState *s = bs->opaque;
+    uint64_t offset = sector_num * BDRV_SECTOR_SIZE;
+    uint64_t bytes = nb_sectors * BDRV_SECTOR_SIZE;
+
     qemu_co_mutex_lock(&s->lock);
     /* write zeroes could fail if sectors not aligned to cluster, test it with
      * dry_run == true before really updating image */
-    ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true);
+    ret = vmdk_pwritev(bs, offset, bytes, NULL, true, true);
     if (!ret) {
-        ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false);
+        ret = vmdk_pwritev(bs, offset, bytes, NULL, true, false);
     }
     qemu_co_mutex_unlock(&s->lock);
     return ret;
@@ -2341,7 +2402,7 @@  static BlockDriver bdrv_vmdk = {
     .bdrv_check                   = vmdk_check,
     .bdrv_reopen_prepare          = vmdk_reopen_prepare,
     .bdrv_co_preadv               = vmdk_co_preadv,
-    .bdrv_write                   = vmdk_co_write,
+    .bdrv_co_pwritev              = vmdk_co_pwritev,
     .bdrv_write_compressed        = vmdk_write_compressed,
     .bdrv_co_write_zeroes         = vmdk_co_write_zeroes,
     .bdrv_close                   = vmdk_close,