diff mbox

[v5,01/12] qcow2: Allow "full" discard

Message ID 1397771992-31126-2-git-send-email-mreitz@redhat.com
State New
Headers show

Commit Message

Max Reitz April 17, 2014, 9:59 p.m. UTC
Normally, discarded sectors should read back as zero. However, there are
cases in which a sector (or rather cluster) should be discarded as if
they were never written in the first place, that is, reading them should
fall through to the backing file again.

Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c  | 26 ++++++++++++++++----------
 block/qcow2-snapshot.c |  2 +-
 block/qcow2.c          |  2 +-
 block/qcow2.h          |  2 +-
 4 files changed, 19 insertions(+), 13 deletions(-)

Comments

Kevin Wolf April 22, 2014, 2:04 p.m. UTC | #1
Am 17.04.2014 um 23:59 hat Max Reitz geschrieben:
> Normally, discarded sectors should read back as zero. However, there are
> cases in which a sector (or rather cluster) should be discarded as if
> they were never written in the first place, that is, reading them should
> fall through to the backing file again.
> 
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> ---
>  block/qcow2-cluster.c  | 26 ++++++++++++++++----------
>  block/qcow2-snapshot.c |  2 +-
>  block/qcow2.c          |  2 +-
>  block/qcow2.h          |  2 +-
>  4 files changed, 19 insertions(+), 13 deletions(-)
> 
> diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
> index 331ab08..9b73d97 100644
> --- a/block/qcow2-cluster.c
> +++ b/block/qcow2-cluster.c
> @@ -1343,7 +1343,7 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
>   * clusters.
>   */
>  static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
> -    unsigned int nb_clusters, enum qcow2_discard_type type)
> +    unsigned int nb_clusters, enum qcow2_discard_type type, bool full_discard)
>  {
>      BDRVQcowState *s = bs->opaque;
>      uint64_t *l2_table;
> @@ -1365,25 +1365,31 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
>          old_offset = be64_to_cpu(l2_table[l2_index + i]);
>  
>          /*
> -         * Make sure that a discarded area reads back as zeroes for v3 images
> -         * (we cannot do it for v2 without actually writing a zero-filled
> -         * buffer). We can skip the operation if the cluster is already marked
> -         * as zero, or if it's unallocated and we don't have a backing file.
> +         * If full_discard is false, make sure that a discarded area reads back
> +         * as zeroes for v3 images (we cannot do it for v2 without actually
> +         * writing a zero-filled buffer). We can skip the operation if the
> +         * cluster is already marked as zero, or if it's unallocated and we
> +         * don't have a backing file.
>           *
>           * TODO We might want to use bdrv_get_block_status(bs) here, but we're
>           * holding s->lock, so that doesn't work today.
> +         *
> +         * In case of full_discard being true, the sector should not be read
> +         * back as zeroes, but rather fall through to the backing file.
>           */
> -        if (old_offset & QCOW_OFLAG_ZERO) {
> +        if (!full_discard && (old_offset & QCOW_OFLAG_ZERO)) {
>              continue;
>          }
>  
> -        if ((old_offset & L2E_OFFSET_MASK) == 0 && !bs->backing_hd) {
> +        if ((old_offset & L2E_OFFSET_MASK) == 0 &&
> +            (full_discard || !bs->backing_hd))
> +        {
>              continue;
>          }

I don't think that's right. You wouldn't discard (non-preallocated) zero
clusters with this code. You should probably check the cluster type.

Kevin
Max Reitz April 22, 2014, 3:57 p.m. UTC | #2
On 22.04.2014 16:04, Kevin Wolf wrote:
> Am 17.04.2014 um 23:59 hat Max Reitz geschrieben:
>> Normally, discarded sectors should read back as zero. However, there are
>> cases in which a sector (or rather cluster) should be discarded as if
>> they were never written in the first place, that is, reading them should
>> fall through to the backing file again.
>>
>> Signed-off-by: Max Reitz <mreitz@redhat.com>
>> ---
>>   block/qcow2-cluster.c  | 26 ++++++++++++++++----------
>>   block/qcow2-snapshot.c |  2 +-
>>   block/qcow2.c          |  2 +-
>>   block/qcow2.h          |  2 +-
>>   4 files changed, 19 insertions(+), 13 deletions(-)
>>
>> diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
>> index 331ab08..9b73d97 100644
>> --- a/block/qcow2-cluster.c
>> +++ b/block/qcow2-cluster.c
>> @@ -1343,7 +1343,7 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
>>    * clusters.
>>    */
>>   static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
>> -    unsigned int nb_clusters, enum qcow2_discard_type type)
>> +    unsigned int nb_clusters, enum qcow2_discard_type type, bool full_discard)
>>   {
>>       BDRVQcowState *s = bs->opaque;
>>       uint64_t *l2_table;
>> @@ -1365,25 +1365,31 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
>>           old_offset = be64_to_cpu(l2_table[l2_index + i]);
>>   
>>           /*
>> -         * Make sure that a discarded area reads back as zeroes for v3 images
>> -         * (we cannot do it for v2 without actually writing a zero-filled
>> -         * buffer). We can skip the operation if the cluster is already marked
>> -         * as zero, or if it's unallocated and we don't have a backing file.
>> +         * If full_discard is false, make sure that a discarded area reads back
>> +         * as zeroes for v3 images (we cannot do it for v2 without actually
>> +         * writing a zero-filled buffer). We can skip the operation if the
>> +         * cluster is already marked as zero, or if it's unallocated and we
>> +         * don't have a backing file.
>>            *
>>            * TODO We might want to use bdrv_get_block_status(bs) here, but we're
>>            * holding s->lock, so that doesn't work today.
>> +         *
>> +         * In case of full_discard being true, the sector should not be read
>> +         * back as zeroes, but rather fall through to the backing file.
>>            */
>> -        if (old_offset & QCOW_OFLAG_ZERO) {
>> +        if (!full_discard && (old_offset & QCOW_OFLAG_ZERO)) {
>>               continue;
>>           }
>>   
>> -        if ((old_offset & L2E_OFFSET_MASK) == 0 && !bs->backing_hd) {
>> +        if ((old_offset & L2E_OFFSET_MASK) == 0 &&
>> +            (full_discard || !bs->backing_hd))
>> +        {
>>               continue;
>>           }
> I don't think that's right. You wouldn't discard (non-preallocated) zero
> clusters with this code. You should probably check the cluster type.

You're right, I'll fix it.

Max
diff mbox

Patch

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 331ab08..9b73d97 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1343,7 +1343,7 @@  int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
  * clusters.
  */
 static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
-    unsigned int nb_clusters, enum qcow2_discard_type type)
+    unsigned int nb_clusters, enum qcow2_discard_type type, bool full_discard)
 {
     BDRVQcowState *s = bs->opaque;
     uint64_t *l2_table;
@@ -1365,25 +1365,31 @@  static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
         old_offset = be64_to_cpu(l2_table[l2_index + i]);
 
         /*
-         * Make sure that a discarded area reads back as zeroes for v3 images
-         * (we cannot do it for v2 without actually writing a zero-filled
-         * buffer). We can skip the operation if the cluster is already marked
-         * as zero, or if it's unallocated and we don't have a backing file.
+         * If full_discard is false, make sure that a discarded area reads back
+         * as zeroes for v3 images (we cannot do it for v2 without actually
+         * writing a zero-filled buffer). We can skip the operation if the
+         * cluster is already marked as zero, or if it's unallocated and we
+         * don't have a backing file.
          *
          * TODO We might want to use bdrv_get_block_status(bs) here, but we're
          * holding s->lock, so that doesn't work today.
+         *
+         * In case of full_discard being true, the sector should not be read
+         * back as zeroes, but rather fall through to the backing file.
          */
-        if (old_offset & QCOW_OFLAG_ZERO) {
+        if (!full_discard && (old_offset & QCOW_OFLAG_ZERO)) {
             continue;
         }
 
-        if ((old_offset & L2E_OFFSET_MASK) == 0 && !bs->backing_hd) {
+        if ((old_offset & L2E_OFFSET_MASK) == 0 &&
+            (full_discard || !bs->backing_hd))
+        {
             continue;
         }
 
         /* First remove L2 entries */
         qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
-        if (s->qcow_version >= 3) {
+        if (!full_discard && s->qcow_version >= 3) {
             l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
         } else {
             l2_table[l2_index + i] = cpu_to_be64(0);
@@ -1402,7 +1408,7 @@  static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
 }
 
 int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
-    int nb_sectors, enum qcow2_discard_type type)
+    int nb_sectors, enum qcow2_discard_type type, bool full_discard)
 {
     BDRVQcowState *s = bs->opaque;
     uint64_t end_offset;
@@ -1425,7 +1431,7 @@  int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
 
     /* Each L2 table is handled by its own loop iteration */
     while (nb_clusters > 0) {
-        ret = discard_single_l2(bs, offset, nb_clusters, type);
+        ret = discard_single_l2(bs, offset, nb_clusters, type, full_discard);
         if (ret < 0) {
             goto fail;
         }
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 0aa9def..c5ea2cd 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -436,7 +436,7 @@  int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
     qcow2_discard_clusters(bs, qcow2_vm_state_offset(s),
                            align_offset(sn->vm_state_size, s->cluster_size)
                                 >> BDRV_SECTOR_BITS,
-                           QCOW2_DISCARD_NEVER);
+                           QCOW2_DISCARD_NEVER, false);
 
 #ifdef DEBUG_ALLOC
     {
diff --git a/block/qcow2.c b/block/qcow2.c
index e903d97..1e7b7d5 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1864,7 +1864,7 @@  static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
 
     qemu_co_mutex_lock(&s->lock);
     ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
-        nb_sectors, QCOW2_DISCARD_REQUEST);
+        nb_sectors, QCOW2_DISCARD_REQUEST, false);
     qemu_co_mutex_unlock(&s->lock);
     return ret;
 }
diff --git a/block/qcow2.h b/block/qcow2.h
index b49424b..2332634 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -519,7 +519,7 @@  uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
 
 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
 int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
-    int nb_sectors, enum qcow2_discard_type type);
+    int nb_sectors, enum qcow2_discard_type type, bool full_discard);
 int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);
 
 int qcow2_expand_zero_clusters(BlockDriverState *bs);