Patchwork [v3,6/6] vmdk: add bdrv_co_write_zeroes

login
register
mail settings
Submitter Fam Zheng
Date April 24, 2013, 12:44 p.m.
Message ID <1366807475-26350-7-git-send-email-famz@redhat.com>
Download mbox | patch
Permalink /patch/239185/
State New
Headers show

Comments

Fam Zheng - April 24, 2013, 12:44 p.m.
Use special offset to write zeroes efficiently, when zeroed-grain GTE is
available. If zero-write an allocated cluster, cluster is leaked because
its offset pointer is overwritten by "0x1".

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block/vmdk.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 63 insertions(+), 14 deletions(-)
Stefan Hajnoczi - April 25, 2013, 1:20 p.m.
On Wed, Apr 24, 2013 at 08:44:35PM +0800, Fam Zheng wrote:
> @@ -905,6 +905,13 @@ static int get_cluster_offset(BlockDriverState *bs,
>      l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
>      *cluster_offset = le32_to_cpu(l2_table[l2_index]);
>  
> +    if (m_data) {
> +        m_data->valid = 1;
> +        m_data->l1_index = l1_index;
> +        m_data->l2_index = l2_index;
> +        m_data->offset = *cluster_offset;
> +        m_data->l2_offset = extent->l1_table[m_data->l1_index];

This line can simply be:

m_data->l2_offset = l2_offset;

> +    }

Filling in m_data up here means that only the ->offset field needs to be
filled in when we allocate a cluster further down.  Right now the code
is duplicated, but it just overwrites the fields with the same value
again.

> @@ -1222,17 +1238,34 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
>          if (n > nb_sectors) {
>              n = nb_sectors;
>          }
> -
> -        ret = vmdk_write_extent(extent,
> -                        cluster_offset, index_in_cluster * 512,
> -                        buf, n, sector_num);
> -        if (ret) {
> -            return ret;
> -        }
> -        if (m_data.valid) {
> -            /* update L2 tables */
> -            if (vmdk_L2update(extent, &m_data) == -1) {
> -                return -EIO;
> +        if (zeroed) {
> +            /* Do zeroed write, buf is ignored */
> +            if (extent->has_zero_grain &&
> +                    index_in_cluster == 0 &&
> +                    n >= extent->cluster_sectors) {
> +                n = extent->cluster_sectors;
> +                if (!zero_dry_run) {
> +                    m_data.offset = cpu_to_le32(VMDK_GTE_ZEROED);

offset is host endian now!

> +                    /* update L2 tables */
> +                    if (vmdk_L2update(extent, &m_data) != VMDK_OK) {

Zeroing cluster-by-cluster is slow - vmdk_L2update() uses sync to flush
the L2 update.  The vmdk.c code isn't great for buffering up metadata
changes and flushing them in a single operation though, so this is fine
for now.

> +                        return -EIO;
> +                    }

l2_cache[] has not been updated with the new VMDK_GTE_ZEROED offset.

Patch

diff --git a/block/vmdk.c b/block/vmdk.c
index 16e1417..90cb071 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -905,6 +905,13 @@  static int get_cluster_offset(BlockDriverState *bs,
     l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
     *cluster_offset = le32_to_cpu(l2_table[l2_index]);
 
+    if (m_data) {
+        m_data->valid = 1;
+        m_data->l1_index = l1_index;
+        m_data->l2_index = l2_index;
+        m_data->offset = *cluster_offset;
+        m_data->l2_offset = extent->l1_table[m_data->l1_index];
+    }
     if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) {
         zeroed = true;
     }
@@ -1165,8 +1172,17 @@  static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num,
     return ret;
 }
 
+/**
+ * vmdk_write:
+ * @zeroed:       buf is ignored (data is zero), use zeroed_grain GTE feature
+ * if possible, otherwise return -ENOTSUP.
+ * @zero_dry_run: used for zeroed == true only, don't update L2 table, just
+ *
+ * Returns: error code with 0 for success.
+ */
 static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
-                     const uint8_t *buf, int nb_sectors)
+                      const uint8_t *buf, int nb_sectors,
+                      bool zeroed, bool zero_dry_run)
 {
     BDRVVmdkState *s = bs->opaque;
     VmdkExtent *extent = NULL;
@@ -1212,7 +1228,7 @@  static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
                                         &cluster_offset);
             }
         }
-        if (ret) {
+        if (ret == VMDK_ERROR) {
             return -EINVAL;
         }
         extent_begin_sector = extent->end_sector - extent->sectors;
@@ -1222,17 +1238,34 @@  static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
         if (n > nb_sectors) {
             n = nb_sectors;
         }
-
-        ret = vmdk_write_extent(extent,
-                        cluster_offset, index_in_cluster * 512,
-                        buf, n, sector_num);
-        if (ret) {
-            return ret;
-        }
-        if (m_data.valid) {
-            /* update L2 tables */
-            if (vmdk_L2update(extent, &m_data) == -1) {
-                return -EIO;
+        if (zeroed) {
+            /* Do zeroed write, buf is ignored */
+            if (extent->has_zero_grain &&
+                    index_in_cluster == 0 &&
+                    n >= extent->cluster_sectors) {
+                n = extent->cluster_sectors;
+                if (!zero_dry_run) {
+                    m_data.offset = cpu_to_le32(VMDK_GTE_ZEROED);
+                    /* update L2 tables */
+                    if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
+                        return -EIO;
+                    }
+                }
+            } else {
+                return -ENOTSUP;
+            }
+        } else {
+            ret = vmdk_write_extent(extent,
+                            cluster_offset, index_in_cluster * 512,
+                            buf, n, sector_num);
+            if (ret) {
+                return ret;
+            }
+            if (m_data.valid) {
+                /* update L2 tables */
+                if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
+                    return -EIO;
+                }
             }
         }
         nb_sectors -= n;
@@ -1258,7 +1291,22 @@  static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
     int ret;
     BDRVVmdkState *s = bs->opaque;
     qemu_co_mutex_lock(&s->lock);
-    ret = vmdk_write(bs, sector_num, buf, nb_sectors);
+    ret = vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
+    qemu_co_mutex_unlock(&s->lock);
+    return ret;
+}
+
+static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
+                                             int64_t sector_num,
+                                             int nb_sectors)
+{
+    int ret;
+    BDRVVmdkState *s = bs->opaque;
+    qemu_co_mutex_lock(&s->lock);
+    ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true);
+    if (!ret) {
+        ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false);
+    }
     qemu_co_mutex_unlock(&s->lock);
     return ret;
 }
@@ -1738,6 +1786,7 @@  static BlockDriver bdrv_vmdk = {
     .bdrv_reopen_prepare = vmdk_reopen_prepare,
     .bdrv_read      = vmdk_co_read,
     .bdrv_write     = vmdk_co_write,
+    .bdrv_co_write_zeroes = vmdk_co_write_zeroes,
     .bdrv_close     = vmdk_close,
     .bdrv_create    = vmdk_create,
     .bdrv_co_flush_to_disk  = vmdk_co_flush,