Patchwork qcow2: Add bdrv_discard support

login
register
mail settings
Submitter Kevin Wolf
Date Jan. 27, 2011, 12:40 p.m.
Message ID <1296132021-10363-1-git-send-email-kwolf@redhat.com>
Download mbox | patch
Permalink /patch/80669/
State New
Headers show

Comments

Kevin Wolf - Jan. 27, 2011, 12:40 p.m.
This adds a bdrv_discard function to qcow2 that frees the discarded clusters.
It does not yet pass the discard on to the underlying file system driver, but
the space can be reused by future writes to the image.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/qcow2-cluster.c |   78 +++++++++++++++++++++++++++++++++++++++++++++++++
 block/qcow2.c         |    8 +++++
 block/qcow2.h         |    2 +
 3 files changed, 88 insertions(+), 0 deletions(-)
Stefan Hajnoczi - Jan. 28, 2011, 9:57 a.m.
On Thu, Jan 27, 2011 at 01:40:21PM +0100, Kevin Wolf wrote:
> +/*
> + * This discards as many clusters of nb_clusters as possible at once (i.e.
> + * all clusters in the same L2 table) and returns the number of discarded
> + * clusters.
> + */
> +static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
> +    unsigned int nb_clusters)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    uint64_t l2_offset, *l2_table;
> +    int l2_index;
> +    int ret;
> +    int i;
> +
> +    ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    /* Limit nb_clusters to one L2 table */
> +    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
> +
> +    for (i = 0; i < nb_clusters; i++) {
> +        uint64_t old_offset;
> +
> +        old_offset = be64_to_cpu(l2_table[l2_index + i]);
> +        old_offset &= ~QCOW_OFLAG_COPIED;
> +
> +        if (old_offset == 0) {
> +            continue;
> +        }
> +
> +        /* First remove L2 entries */
> +        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
> +        l2_table[l2_index + i] = cpu_to_be64(0);
> +
> +        /* Then decrease the refcount */
> +        qcow2_free_any_clusters(bs, old_offset, 1);
> +    }
> +
> +    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
> +    if (ret < 0) {
> +        return ret;
> +    }

There is no loop to continue discards across L2 boundaries.  Guests
could use discard on the entire disk from an installer, for example.

> +
> +    return nb_clusters;
> +}
> +
> +int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
> +    int nb_sectors)

qcow2_discard_sectors() since units are in sectors not clusters?

> +{
> +    BDRVQcowState *s = bs->opaque;
> +    uint64_t end_offset;
> +    unsigned int nb_clusters;
> +    int ret;
> +

When offset=0x10200, nb_sectors=1, and cluster_size=65536...

> +    end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);
> +
> +    /* Round start up and end down */
> +    offset = align_offset(offset, s->cluster_size);
> +    end_offset &= ~(s->cluster_size - 1);

offset=0x20000
end_offset=0x10000

> +
> +    nb_clusters = size_to_clusters(s, end_offset - offset);

nb_clusters=4294967295

...and the loop will discard almost 256TB of data.  We need to check
against overflow/underflow or do this in the block layer.

Stefan
Stefan Hajnoczi - Jan. 28, 2011, 10:13 a.m.
On Fri, Jan 28, 2011 at 9:57 AM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Thu, Jan 27, 2011 at 01:40:21PM +0100, Kevin Wolf wrote:
>> +/*
>> + * This discards as many clusters of nb_clusters as possible at once (i.e.
>> + * all clusters in the same L2 table) and returns the number of discarded
>> + * clusters.
>> + */
>> +static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
>> +    unsigned int nb_clusters)
>> +{
>> +    BDRVQcowState *s = bs->opaque;
>> +    uint64_t l2_offset, *l2_table;
>> +    int l2_index;
>> +    int ret;
>> +    int i;
>> +
>> +    ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
>> +    if (ret < 0) {
>> +        return ret;
>> +    }
>> +
>> +    /* Limit nb_clusters to one L2 table */
>> +    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
>> +
>> +    for (i = 0; i < nb_clusters; i++) {
>> +        uint64_t old_offset;
>> +
>> +        old_offset = be64_to_cpu(l2_table[l2_index + i]);
>> +        old_offset &= ~QCOW_OFLAG_COPIED;
>> +
>> +        if (old_offset == 0) {
>> +            continue;
>> +        }
>> +
>> +        /* First remove L2 entries */
>> +        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
>> +        l2_table[l2_index + i] = cpu_to_be64(0);
>> +
>> +        /* Then decrease the refcount */
>> +        qcow2_free_any_clusters(bs, old_offset, 1);
>> +    }
>> +
>> +    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
>> +    if (ret < 0) {
>> +        return ret;
>> +    }
>
> There is no loop to continue discards across L2 boundaries.  Guests
> could use discard on the entire disk from an installer, for example.

Sorry, please ignore this comment.  This function discards as many
clusters as possible at once but gets called from a loop so that
eventually we discard the full amount.

Stefan

Patch

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 1c2003a..119d257 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -888,3 +888,81 @@  int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
     }
     return 0;
 }
+
+/*
+ * This discards as many clusters of nb_clusters as possible at once (i.e.
+ * all clusters in the same L2 table) and returns the number of discarded
+ * clusters.
+ */
+static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
+    unsigned int nb_clusters)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t l2_offset, *l2_table;
+    int l2_index;
+    int ret;
+    int i;
+
+    ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* Limit nb_clusters to one L2 table */
+    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+
+    for (i = 0; i < nb_clusters; i++) {
+        uint64_t old_offset;
+
+        old_offset = be64_to_cpu(l2_table[l2_index + i]);
+        old_offset &= ~QCOW_OFLAG_COPIED;
+
+        if (old_offset == 0) {
+            continue;
+        }
+
+        /* First remove L2 entries */
+        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+        l2_table[l2_index + i] = cpu_to_be64(0);
+
+        /* Then decrease the refcount */
+        qcow2_free_any_clusters(bs, old_offset, 1);
+    }
+
+    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return nb_clusters;
+}
+
+int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
+    int nb_sectors)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t end_offset;
+    unsigned int nb_clusters;
+    int ret;
+
+    end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);
+
+    /* Round start up and end down */
+    offset = align_offset(offset, s->cluster_size);
+    end_offset &= ~(s->cluster_size - 1);
+
+    nb_clusters = size_to_clusters(s, end_offset - offset);
+
+    /* Each L2 table is handled by its own loop iteration */
+    while (nb_clusters > 0) {
+        ret = discard_single_l2(bs, offset, nb_clusters);
+        if (ret < 0) {
+            return ret;
+        }
+
+        nb_clusters -= ret;
+        offset += (ret * s->cluster_size);
+    }
+
+    return 0;
+}
diff --git a/block/qcow2.c b/block/qcow2.c
index 49bf7b9..dbe4fdd 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1084,6 +1084,13 @@  static int qcow2_make_empty(BlockDriverState *bs)
     return 0;
 }
 
+static int qcow2_discard(BlockDriverState *bs, int64_t sector_num,
+    int nb_sectors)
+{
+    return qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
+        nb_sectors);
+}
+
 static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
 {
     BDRVQcowState *s = bs->opaque;
@@ -1349,6 +1356,7 @@  static BlockDriver bdrv_qcow2 = {
     .bdrv_aio_writev    = qcow2_aio_writev,
     .bdrv_aio_flush     = qcow2_aio_flush,
 
+    .bdrv_discard           = qcow2_discard,
     .bdrv_truncate          = qcow2_truncate,
     .bdrv_write_compressed  = qcow2_write_compressed,
 
diff --git a/block/qcow2.h b/block/qcow2.h
index 6d80120..a019831 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -209,6 +209,8 @@  uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
                                          int compressed_size);
 
 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
+int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
+    int nb_sectors);
 
 /* qcow2-snapshot.c functions */
 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);