diff mbox series

[2/2] qcow2: truncate the tail of the image file after shrinking the image

Message ID 20170920135833.20472-3-pbutsykin@virtuozzo.com
State New
Headers show
Series Truncate the tail of the image file in qcow2 shrinking | expand

Commit Message

Pavel Butsykin Sept. 20, 2017, 1:58 p.m. UTC
Now after shrinking the image, at the end of the image file, there might be a
tail that probably will never be used. So we can find the last used cluster and
cut the tail.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
---
 block/qcow2-refcount.c | 21 +++++++++++++++++++++
 block/qcow2.c          | 19 +++++++++++++++++++
 block/qcow2.h          |  1 +
 3 files changed, 41 insertions(+)

Comments

John Snow Sept. 20, 2017, 9:38 p.m. UTC | #1
On 09/20/2017 09:58 AM, Pavel Butsykin wrote:
> Now after shrinking the image, at the end of the image file, there might be a
> tail that probably will never be used. So we can find the last used cluster and
> cut the tail.
> 
> Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
> ---
>  block/qcow2-refcount.c | 21 +++++++++++++++++++++
>  block/qcow2.c          | 19 +++++++++++++++++++
>  block/qcow2.h          |  1 +
>  3 files changed, 41 insertions(+)
> 
> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
> index 88d5a3f1ad..5e221a166c 100644
> --- a/block/qcow2-refcount.c
> +++ b/block/qcow2-refcount.c
> @@ -3181,3 +3181,24 @@ out:
>      g_free(reftable_tmp);
>      return ret;
>  }
> +
> +int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
> +{
> +    BDRVQcow2State *s = bs->opaque;
> +    int64_t i, last_cluster, nb_clusters = size_to_clusters(s, size);
> +    uint64_t refcount;
> +
> +    for (i = 0, last_cluster = 0; i < nb_clusters; i++) {
> +        int ret = qcow2_get_refcount(bs, i, &refcount);
> +        if (ret < 0) {
> +            fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
> +                    i, strerror(-ret));
> +            continue;
> +        }
> +
> +        if (refcount > 0) {
> +            last_cluster = i;
> +        }
> +    }
> +    return last_cluster;
> +}> diff --git a/block/qcow2.c b/block/qcow2.c
> index 8a4311d338..c3b6dd44c4 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -3106,6 +3106,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
>      new_l1_size = size_to_l1(s, offset);
>  
>      if (offset < old_length) {
> +        int64_t image_end_offset, old_file_size;
>          if (prealloc != PREALLOC_MODE_OFF) {
>              error_setg(errp,
>                         "Preallocation can't be used for shrinking an image");
> @@ -3134,6 +3135,24 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
>                               "Failed to discard unused refblocks");
>              return ret;
>          }
> +
> +        old_file_size = bdrv_getlength(bs->file->bs);
> +        if (old_file_size < 0) {
> +            error_setg_errno(errp, -old_file_size,
> +                             "Failed to inquire current file length");
> +            return old_file_size;
> +        }
> +        image_end_offset = (qcow2_get_last_cluster(bs, old_file_size) + 1) *
> +                           s->cluster_size;
> +        if (image_end_offset < old_file_size) {
> +            ret = bdrv_truncate(bs->file, image_end_offset,
> +                                PREALLOC_MODE_OFF, NULL);
> +            if (ret < 0) {
> +                error_setg_errno(errp, -ret,
> +                                 "Failed to truncate the tail of the image");

I've recently become skeptical of what partial resize successes look
like, but that's an issue for another day entirely.

> +                return ret;
> +            }
> +        }
>      } else {
>          ret = qcow2_grow_l1_table(bs, new_l1_size, true);
>          if (ret < 0) {
> diff --git a/block/qcow2.h b/block/qcow2.h
> index 5a289a81e2..782a206ecb 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -597,6 +597,7 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
>                                  BlockDriverAmendStatusCB *status_cb,
>                                  void *cb_opaque, Error **errp);
>  int qcow2_shrink_reftable(BlockDriverState *bs);
> +int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);
>  
>  /* qcow2-cluster.c functions */
>  int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
> 

Reviewed-by: John Snow <jsnow@redhat.com>

Looks sane to me, but under which circumstances might we grow such a
tail? I assume the actual truncate call aligns to cluster boundaries as
appropriate, so is this a bit of a "quick fix" to cull unused clusters
that happened to be near the truncate boundary?

It might be worth documenting the circumstances that produces this
unused space that will never get used. My hunch is that such unused
space should likely be getting reclaimed elsewhere and not here, but
perhaps I'm misunderstanding the causal factors.

--js
Pavel Butsykin Sept. 21, 2017, 9:49 a.m. UTC | #2
On 21.09.2017 00:38, John Snow wrote:
> 
> 
> On 09/20/2017 09:58 AM, Pavel Butsykin wrote:
>> Now after shrinking the image, at the end of the image file, there might be a
>> tail that probably will never be used. So we can find the last used cluster and
>> cut the tail.
>>
>> Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
>> ---
>>   block/qcow2-refcount.c | 21 +++++++++++++++++++++
>>   block/qcow2.c          | 19 +++++++++++++++++++
>>   block/qcow2.h          |  1 +
>>   3 files changed, 41 insertions(+)
>>
>> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
>> index 88d5a3f1ad..5e221a166c 100644
>> --- a/block/qcow2-refcount.c
>> +++ b/block/qcow2-refcount.c
>> @@ -3181,3 +3181,24 @@ out:
>>       g_free(reftable_tmp);
>>       return ret;
>>   }
>> +
>> +int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
>> +{
>> +    BDRVQcow2State *s = bs->opaque;
>> +    int64_t i, last_cluster, nb_clusters = size_to_clusters(s, size);
>> +    uint64_t refcount;
>> +
>> +    for (i = 0, last_cluster = 0; i < nb_clusters; i++) {
>> +        int ret = qcow2_get_refcount(bs, i, &refcount);
>> +        if (ret < 0) {
>> +            fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
>> +                    i, strerror(-ret));
>> +            continue;
>> +        }
>> +
>> +        if (refcount > 0) {
>> +            last_cluster = i;
>> +        }
>> +    }
>> +    return last_cluster;
>> +}> diff --git a/block/qcow2.c b/block/qcow2.c
>> index 8a4311d338..c3b6dd44c4 100644
>> --- a/block/qcow2.c
>> +++ b/block/qcow2.c
>> @@ -3106,6 +3106,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
>>       new_l1_size = size_to_l1(s, offset);
>>   
>>       if (offset < old_length) {
>> +        int64_t image_end_offset, old_file_size;
>>           if (prealloc != PREALLOC_MODE_OFF) {
>>               error_setg(errp,
>>                          "Preallocation can't be used for shrinking an image");
>> @@ -3134,6 +3135,24 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
>>                                "Failed to discard unused refblocks");
>>               return ret;
>>           }
>> +
>> +        old_file_size = bdrv_getlength(bs->file->bs);
>> +        if (old_file_size < 0) {
>> +            error_setg_errno(errp, -old_file_size,
>> +                             "Failed to inquire current file length");
>> +            return old_file_size;
>> +        }
>> +        image_end_offset = (qcow2_get_last_cluster(bs, old_file_size) + 1) *
>> +                           s->cluster_size;
>> +        if (image_end_offset < old_file_size) {
>> +            ret = bdrv_truncate(bs->file, image_end_offset,
>> +                                PREALLOC_MODE_OFF, NULL);
>> +            if (ret < 0) {
>> +                error_setg_errno(errp, -ret,
>> +                                 "Failed to truncate the tail of the image");
> 
> I've recently become skeptical of what partial resize successes look
> like, but that's an issue for another day entirely.
> 
>> +                return ret;
>> +            }
>> +        }
>>       } else {
>>           ret = qcow2_grow_l1_table(bs, new_l1_size, true);
>>           if (ret < 0) {
>> diff --git a/block/qcow2.h b/block/qcow2.h
>> index 5a289a81e2..782a206ecb 100644
>> --- a/block/qcow2.h
>> +++ b/block/qcow2.h
>> @@ -597,6 +597,7 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
>>                                   BlockDriverAmendStatusCB *status_cb,
>>                                   void *cb_opaque, Error **errp);
>>   int qcow2_shrink_reftable(BlockDriverState *bs);
>> +int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);
>>   
>>   /* qcow2-cluster.c functions */
>>   int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
>>
> 
> Reviewed-by: John Snow <jsnow@redhat.com>
> 
> Looks sane to me, but under which circumstances might we grow such a
> tail? I assume the actual truncate call aligns to cluster boundaries as
> appropriate, so is this a bit of a "quick fix" to cull unused clusters
> that happened to be near the truncate boundary?
> 
> It might be worth documenting the circumstances that produces this
> unused space that will never get used. My hunch is that such unused
> space should likely be getting reclaimed elsewhere and not here, but
> perhaps I'm misunderstanding the causal factors.
>

This is a consequence of how we implemented shrinking the qcow2 image.
(https://lists.gnu.org/archive/html/qemu-devel/2017-09/msg04580.html)
But on the other hand, if we need to shrink the qcow2 image without
copying the data, this is the only way. The same guest offset can be
converted to almost any host offset in the file i.e. the first guest
cluster may be located somewhere at the end or the middle of the image
file. So we can't just take and truncate the image file on the border of
the truncation, therefore to shrink the image we just discard the
clusters that corresponds to the truncated area. The result is a
sparse image file where the apparent file size differs from actual size.
And the tail in this case is the difference between the actual size and
last used cluster in the image, so in fact the cutting of the tail does
not change the apparent file size.

> --js
>
Max Reitz Sept. 21, 2017, 3:28 p.m. UTC | #3
On 2017-09-20 15:58, Pavel Butsykin wrote:
> Now after shrinking the image, at the end of the image file, there might be a
> tail that probably will never be used. So we can find the last used cluster and
> cut the tail.
> 
> Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
> ---
>  block/qcow2-refcount.c | 21 +++++++++++++++++++++
>  block/qcow2.c          | 19 +++++++++++++++++++
>  block/qcow2.h          |  1 +
>  3 files changed, 41 insertions(+)
> 
> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
> index 88d5a3f1ad..5e221a166c 100644
> --- a/block/qcow2-refcount.c
> +++ b/block/qcow2-refcount.c
> @@ -3181,3 +3181,24 @@ out:
>      g_free(reftable_tmp);
>      return ret;
>  }
> +
> +int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
> +{
> +    BDRVQcow2State *s = bs->opaque;
> +    int64_t i, last_cluster, nb_clusters = size_to_clusters(s, size);
> +    uint64_t refcount;
> +
> +    for (i = 0, last_cluster = 0; i < nb_clusters; i++) {
> +        int ret = qcow2_get_refcount(bs, i, &refcount);
> +        if (ret < 0) {
> +            fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
> +                    i, strerror(-ret));
> +            continue;
> +        }
> +
> +        if (refcount > 0) {
> +            last_cluster = i;
> +        }
> +    }
> +    return last_cluster;
> +}

Wouldn't it make more sense to start from the end of the image?

Max
Max Reitz Sept. 21, 2017, 3:30 p.m. UTC | #4
On 2017-09-20 15:58, Pavel Butsykin wrote:
> Now after shrinking the image, at the end of the image file, there might be a
> tail that probably will never be used. So we can find the last used cluster and
> cut the tail.
> 
> Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
> ---
>  block/qcow2-refcount.c | 21 +++++++++++++++++++++
>  block/qcow2.c          | 19 +++++++++++++++++++
>  block/qcow2.h          |  1 +
>  3 files changed, 41 insertions(+)
> 
> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
> index 88d5a3f1ad..5e221a166c 100644
> --- a/block/qcow2-refcount.c
> +++ b/block/qcow2-refcount.c
> @@ -3181,3 +3181,24 @@ out:
>      g_free(reftable_tmp);
>      return ret;
>  }
> +
> +int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
> +{
> +    BDRVQcow2State *s = bs->opaque;
> +    int64_t i, last_cluster, nb_clusters = size_to_clusters(s, size);
> +    uint64_t refcount;
> +
> +    for (i = 0, last_cluster = 0; i < nb_clusters; i++) {
> +        int ret = qcow2_get_refcount(bs, i, &refcount);
> +        if (ret < 0) {
> +            fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
> +                    i, strerror(-ret));
> +            continue;

Oh, and another thing: If you decide to ignore errors here, I'd at least
consider the cluster allocated.

Of course it would also be possible not to ignore errors, and instead
return them to the caller which would then just not truncate the file.

Max

> +        }
> +
> +        if (refcount > 0) {
> +            last_cluster = i;
> +        }
> +    }
> +    return last_cluster;
> +}
Pavel Butsykin Sept. 21, 2017, 3:52 p.m. UTC | #5
On 21.09.2017 18:30, Max Reitz wrote:
> On 2017-09-20 15:58, Pavel Butsykin wrote:
>> Now after shrinking the image, at the end of the image file, there might be a
>> tail that probably will never be used. So we can find the last used cluster and
>> cut the tail.
>>
>> Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
>> ---
>>   block/qcow2-refcount.c | 21 +++++++++++++++++++++
>>   block/qcow2.c          | 19 +++++++++++++++++++
>>   block/qcow2.h          |  1 +
>>   3 files changed, 41 insertions(+)
>>
>> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
>> index 88d5a3f1ad..5e221a166c 100644
>> --- a/block/qcow2-refcount.c
>> +++ b/block/qcow2-refcount.c
>> @@ -3181,3 +3181,24 @@ out:
>>       g_free(reftable_tmp);
>>       return ret;
>>   }
>> +
>> +int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
>> +{
>> +    BDRVQcow2State *s = bs->opaque;
>> +    int64_t i, last_cluster, nb_clusters = size_to_clusters(s, size);
>> +    uint64_t refcount;
>> +
>> +    for (i = 0, last_cluster = 0; i < nb_clusters; i++) {
>> +        int ret = qcow2_get_refcount(bs, i, &refcount);
>> +        if (ret < 0) {
>> +            fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
>> +                    i, strerror(-ret));
>> +            continue;
> 
> Oh, and another thing: If you decide to ignore errors here, I'd at least
> consider the cluster allocated.
> 
> Of course it would also be possible not to ignore errors, and instead
> return them to the caller which would then just not truncate the file.

Yes, it seems so safer.

> Max
> 
>> +        }
>> +
>> +        if (refcount > 0) {
>> +            last_cluster = i;
>> +        }
>> +    }
>> +    return last_cluster;
>> +}
>
Pavel Butsykin Sept. 21, 2017, 4:16 p.m. UTC | #6
On 21.09.2017 18:28, Max Reitz wrote:
> On 2017-09-20 15:58, Pavel Butsykin wrote:
>> Now after shrinking the image, at the end of the image file, there might be a
>> tail that probably will never be used. So we can find the last used cluster and
>> cut the tail.
>>
>> Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
>> ---
>>   block/qcow2-refcount.c | 21 +++++++++++++++++++++
>>   block/qcow2.c          | 19 +++++++++++++++++++
>>   block/qcow2.h          |  1 +
>>   3 files changed, 41 insertions(+)
>>
>> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
>> index 88d5a3f1ad..5e221a166c 100644
>> --- a/block/qcow2-refcount.c
>> +++ b/block/qcow2-refcount.c
>> @@ -3181,3 +3181,24 @@ out:
>>       g_free(reftable_tmp);
>>       return ret;
>>   }
>> +
>> +int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
>> +{
>> +    BDRVQcow2State *s = bs->opaque;
>> +    int64_t i, last_cluster, nb_clusters = size_to_clusters(s, size);
>> +    uint64_t refcount;
>> +
>> +    for (i = 0, last_cluster = 0; i < nb_clusters; i++) {
>> +        int ret = qcow2_get_refcount(bs, i, &refcount);
>> +        if (ret < 0) {
>> +            fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
>> +                    i, strerror(-ret));
>> +            continue;
>> +        }
>> +
>> +        if (refcount > 0) {
>> +            last_cluster = i;
>> +        }
>> +    }
>> +    return last_cluster;
>> +}
> 
> Wouldn't it make more sense to start from the end of the image?

If this will reduce the iterations, then yes. But it will depend on the
situation. If you truncate the image more than half, it can increase the
number of iterations. But intuitively it seems that to start from the
end would be better :)
> Max
>
Max Reitz Sept. 21, 2017, 4:20 p.m. UTC | #7
On 2017-09-21 18:16, Pavel Butsykin wrote:
> On 21.09.2017 18:28, Max Reitz wrote:
>> On 2017-09-20 15:58, Pavel Butsykin wrote:
>>> Now after shrinking the image, at the end of the image file, there
>>> might be a
>>> tail that probably will never be used. So we can find the last used
>>> cluster and
>>> cut the tail.
>>>
>>> Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
>>> ---
>>>   block/qcow2-refcount.c | 21 +++++++++++++++++++++
>>>   block/qcow2.c          | 19 +++++++++++++++++++
>>>   block/qcow2.h          |  1 +
>>>   3 files changed, 41 insertions(+)
>>>
>>> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
>>> index 88d5a3f1ad..5e221a166c 100644
>>> --- a/block/qcow2-refcount.c
>>> +++ b/block/qcow2-refcount.c
>>> @@ -3181,3 +3181,24 @@ out:
>>>       g_free(reftable_tmp);
>>>       return ret;
>>>   }
>>> +
>>> +int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
>>> +{
>>> +    BDRVQcow2State *s = bs->opaque;
>>> +    int64_t i, last_cluster, nb_clusters = size_to_clusters(s, size);
>>> +    uint64_t refcount;
>>> +
>>> +    for (i = 0, last_cluster = 0; i < nb_clusters; i++) {
>>> +        int ret = qcow2_get_refcount(bs, i, &refcount);
>>> +        if (ret < 0) {
>>> +            fprintf(stderr, "Can't get refcount for cluster %"
>>> PRId64 ": %s\n",
>>> +                    i, strerror(-ret));
>>> +            continue;
>>> +        }
>>> +
>>> +        if (refcount > 0) {
>>> +            last_cluster = i;
>>> +        }
>>> +    }
>>> +    return last_cluster;
>>> +}
>>
>> Wouldn't it make more sense to start from the end of the image?
> 
> If this will reduce the iterations, then yes. But it will depend on the
> situation. If you truncate the image more than half, it can increase the
> number of iterations. But intuitively it seems that to start from the
> end would be better :)

That's one thing (also, I think usually the end should coincide with
some allocated cluster, but yes, that's just intuition :-)); but also,
this would simplify things a bit because we would no longer need the
last_cluster variable (the loop would just stop at the first allocated
cluster).

Max
John Snow Sept. 21, 2017, 4:48 p.m. UTC | #8
On 09/21/2017 05:49 AM, Pavel Butsykin wrote:
> On 21.09.2017 00:38, John Snow wrote:
>>
>>
>> On 09/20/2017 09:58 AM, Pavel Butsykin wrote:
>>> Now after shrinking the image, at the end of the image file, there
>>> might be a
>>> tail that probably will never be used. So we can find the last used
>>> cluster and
>>> cut the tail.
>>>
>>> Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
>>> ---
>>>   block/qcow2-refcount.c | 21 +++++++++++++++++++++
>>>   block/qcow2.c          | 19 +++++++++++++++++++
>>>   block/qcow2.h          |  1 +
>>>   3 files changed, 41 insertions(+)
>>>
>>> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
>>> index 88d5a3f1ad..5e221a166c 100644
>>> --- a/block/qcow2-refcount.c
>>> +++ b/block/qcow2-refcount.c
>>> @@ -3181,3 +3181,24 @@ out:
>>>       g_free(reftable_tmp);
>>>       return ret;
>>>   }
>>> +
>>> +int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
>>> +{
>>> +    BDRVQcow2State *s = bs->opaque;
>>> +    int64_t i, last_cluster, nb_clusters = size_to_clusters(s, size);
>>> +    uint64_t refcount;
>>> +
>>> +    for (i = 0, last_cluster = 0; i < nb_clusters; i++) {
>>> +        int ret = qcow2_get_refcount(bs, i, &refcount);
>>> +        if (ret < 0) {
>>> +            fprintf(stderr, "Can't get refcount for cluster %"
>>> PRId64 ": %s\n",
>>> +                    i, strerror(-ret));
>>> +            continue;
>>> +        }
>>> +
>>> +        if (refcount > 0) {
>>> +            last_cluster = i;
>>> +        }
>>> +    }
>>> +    return last_cluster;
>>> +}> diff --git a/block/qcow2.c b/block/qcow2.c
>>> index 8a4311d338..c3b6dd44c4 100644
>>> --- a/block/qcow2.c
>>> +++ b/block/qcow2.c
>>> @@ -3106,6 +3106,7 @@ static int qcow2_truncate(BlockDriverState *bs,
>>> int64_t offset,
>>>       new_l1_size = size_to_l1(s, offset);
>>>         if (offset < old_length) {
>>> +        int64_t image_end_offset, old_file_size;
>>>           if (prealloc != PREALLOC_MODE_OFF) {
>>>               error_setg(errp,
>>>                          "Preallocation can't be used for shrinking
>>> an image");
>>> @@ -3134,6 +3135,24 @@ static int qcow2_truncate(BlockDriverState
>>> *bs, int64_t offset,
>>>                                "Failed to discard unused refblocks");
>>>               return ret;
>>>           }
>>> +
>>> +        old_file_size = bdrv_getlength(bs->file->bs);
>>> +        if (old_file_size < 0) {
>>> +            error_setg_errno(errp, -old_file_size,
>>> +                             "Failed to inquire current file length");
>>> +            return old_file_size;
>>> +        }
>>> +        image_end_offset = (qcow2_get_last_cluster(bs,
>>> old_file_size) + 1) *
>>> +                           s->cluster_size;
>>> +        if (image_end_offset < old_file_size) {
>>> +            ret = bdrv_truncate(bs->file, image_end_offset,
>>> +                                PREALLOC_MODE_OFF, NULL);
>>> +            if (ret < 0) {
>>> +                error_setg_errno(errp, -ret,
>>> +                                 "Failed to truncate the tail of the
>>> image");
>>
>> I've recently become skeptical of what partial resize successes look
>> like, but that's an issue for another day entirely.
>>
>>> +                return ret;
>>> +            }
>>> +        }
>>>       } else {
>>>           ret = qcow2_grow_l1_table(bs, new_l1_size, true);
>>>           if (ret < 0) {
>>> diff --git a/block/qcow2.h b/block/qcow2.h
>>> index 5a289a81e2..782a206ecb 100644
>>> --- a/block/qcow2.h
>>> +++ b/block/qcow2.h
>>> @@ -597,6 +597,7 @@ int qcow2_change_refcount_order(BlockDriverState
>>> *bs, int refcount_order,
>>>                                   BlockDriverAmendStatusCB *status_cb,
>>>                                   void *cb_opaque, Error **errp);
>>>   int qcow2_shrink_reftable(BlockDriverState *bs);
>>> +int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);
>>>     /* qcow2-cluster.c functions */
>>>   int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
>>>
>>
>> Reviewed-by: John Snow <jsnow@redhat.com>
>>
>> Looks sane to me, but under which circumstances might we grow such a
>> tail? I assume the actual truncate call aligns to cluster boundaries as
>> appropriate, so is this a bit of a "quick fix" to cull unused clusters
>> that happened to be near the truncate boundary?
>>
>> It might be worth documenting the circumstances that produces this
>> unused space that will never get used. My hunch is that such unused
>> space should likely be getting reclaimed elsewhere and not here, but
>> perhaps I'm misunderstanding the causal factors.
>>
> 
> This is a consequence of how we implemented shrinking the qcow2 image.
> (https://lists.gnu.org/archive/html/qemu-devel/2017-09/msg04580.html)
> But on the other hand, if we need to shrink the qcow2 image without
> copying the data, this is the only way. The same guest offset can be
> converted to almost any host offset in the file i.e. the first guest
> cluster may be located somewhere at the end or the middle of the image
> file. So we can't just take and truncate the image file on the border of
> the truncation, therefore to shrink the image we just discard the
> clusters that corresponds to the truncated area. The result is a
> sparse image file where the apparent file size differs from actual size.
> And the tail in this case is the difference between the actual size and
> last used cluster in the image, so in fact the cutting of the tail does
> not change the apparent file size.
> 

Oh, duh, I get it. The truncation itself creates a lot of sparseness.

Thanks for the explanation.
Eric Blake Sept. 21, 2017, 6:23 p.m. UTC | #9
On 09/21/2017 11:48 AM, John Snow wrote:
>>> Looks sane to me, but under which circumstances might we grow such a
>>> tail? I assume the actual truncate call aligns to cluster boundaries as
>>> appropriate, so is this a bit of a "quick fix" to cull unused clusters
>>> that happened to be near the truncate boundary?
>>>
>>> It might be worth documenting the circumstances that produces this
>>> unused space that will never get used. My hunch is that such unused
>>> space should likely be getting reclaimed elsewhere and not here, but
>>> perhaps I'm misunderstanding the causal factors.
>>>
>>
>> This is a consequence of how we implemented shrinking the qcow2 image.
>> (https://lists.gnu.org/archive/html/qemu-devel/2017-09/msg04580.html)
>> But on the other hand, if we need to shrink the qcow2 image without
>> copying the data, this is the only way. The same guest offset can be
>> converted to almost any host offset in the file i.e. the first guest
>> cluster may be located somewhere at the end or the middle of the image
>> file. So we can't just take and truncate the image file on the border of
>> the truncation, therefore to shrink the image we just discard the
>> clusters that corresponds to the truncated area. The result is a
>> sparse image file where the apparent file size differs from actual size.
>> And the tail in this case is the difference between the actual size and
>> last used cluster in the image, so in fact the cutting of the tail does
>> not change the apparent file size.
>>
> 
> Oh, duh, I get it. The truncation itself creates a lot of sparseness.

It is also interesting to think about whether we should someday
implement a qcow2 defragmenting operation (either a simple one: pull all
later clusters into earlier holes, but the end result is not necessarily
contiguous; or a complex one: shuffle things so that all clusters are in
order, even though it may require moving some clusters around twice), so
that you can remove all holes from the file (perhaps useful if the file
is stored on a system that does not support holes).  But not for this
series :)
diff mbox series

Patch

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 88d5a3f1ad..5e221a166c 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -3181,3 +3181,24 @@  out:
     g_free(reftable_tmp);
     return ret;
 }
+
+int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
+{
+    BDRVQcow2State *s = bs->opaque;
+    int64_t i, last_cluster, nb_clusters = size_to_clusters(s, size);
+    uint64_t refcount;
+
+    for (i = 0, last_cluster = 0; i < nb_clusters; i++) {
+        int ret = qcow2_get_refcount(bs, i, &refcount);
+        if (ret < 0) {
+            fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
+                    i, strerror(-ret));
+            continue;
+        }
+
+        if (refcount > 0) {
+            last_cluster = i;
+        }
+    }
+    return last_cluster;
+}
diff --git a/block/qcow2.c b/block/qcow2.c
index 8a4311d338..c3b6dd44c4 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3106,6 +3106,7 @@  static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
     new_l1_size = size_to_l1(s, offset);
 
     if (offset < old_length) {
+        int64_t image_end_offset, old_file_size;
         if (prealloc != PREALLOC_MODE_OFF) {
             error_setg(errp,
                        "Preallocation can't be used for shrinking an image");
@@ -3134,6 +3135,24 @@  static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
                              "Failed to discard unused refblocks");
             return ret;
         }
+
+        old_file_size = bdrv_getlength(bs->file->bs);
+        if (old_file_size < 0) {
+            error_setg_errno(errp, -old_file_size,
+                             "Failed to inquire current file length");
+            return old_file_size;
+        }
+        image_end_offset = (qcow2_get_last_cluster(bs, old_file_size) + 1) *
+                           s->cluster_size;
+        if (image_end_offset < old_file_size) {
+            ret = bdrv_truncate(bs->file, image_end_offset,
+                                PREALLOC_MODE_OFF, NULL);
+            if (ret < 0) {
+                error_setg_errno(errp, -ret,
+                                 "Failed to truncate the tail of the image");
+                return ret;
+            }
+        }
     } else {
         ret = qcow2_grow_l1_table(bs, new_l1_size, true);
         if (ret < 0) {
diff --git a/block/qcow2.h b/block/qcow2.h
index 5a289a81e2..782a206ecb 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -597,6 +597,7 @@  int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
                                 BlockDriverAmendStatusCB *status_cb,
                                 void *cb_opaque, Error **errp);
 int qcow2_shrink_reftable(BlockDriverState *bs);
+int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);
 
 /* qcow2-cluster.c functions */
 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,