Message ID | 1475046261-15679-1-git-send-email-famz@redhat.com |
---|---|
State | New |
Headers | show |
On 28.09.2016 09:04, Fam Zheng wrote: > Handling this is similar to what is done to the L2 entry in the case of > compressed clusters. > > Signed-off-by: Fam Zheng <famz@redhat.com> > --- > block/qcow2-cluster.c | 9 +++++---- > block/qcow2.c | 3 ++- > block/qcow2.h | 3 ++- > 3 files changed, 9 insertions(+), 6 deletions(-) > > diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c > index 61d1ffd..928c1e2 100644 > --- a/block/qcow2-cluster.c > +++ b/block/qcow2-cluster.c > @@ -1558,7 +1558,7 @@ fail: > * clusters. > */ > static int zero_single_l2(BlockDriverState *bs, uint64_t offset, > - uint64_t nb_clusters) > + uint64_t nb_clusters, int flags) > { > BDRVQcow2State *s = bs->opaque; > uint64_t *l2_table; > @@ -1582,7 +1582,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, > > /* Update L2 entries */ > qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); > - if (old_offset & QCOW_OFLAG_COMPRESSED) { > + if (old_offset & QCOW_OFLAG_COMPRESSED || flags & BDRV_REQ_MAY_UNMAP) { > l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); > qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST); I don't quite understand the reasoning behind this. How is this more efficient than just using the existing path where we don't discard anything? Note that BDRV_REQ_MAY_UNMAP does not mean "Yes, please discard" but just "You may discard if it's easier for you". But it's actually not easier for us, so I don't see why we're doing it. As far as I can guess you actually want some way to tell a block driver to actually make an effort to discard clusters as long they then read back as zero (which is why you cannot simply use bdrv_pdiscard()). However, I think this would require a new flag called BDRV_REQ_SHOULD_UNMAP (which should imply BDRV_REQ_MAY_UNMAP). Note that there is actually a case where qcow2 should support BDRV_REQ_MAY_UNMAP, which is for v2 images. Currently, we just return -ENOTSUP for them, but if we don't have a backing file and BDRV_REQ_MAY_UNMAP is set, we could go on and make qcow2_zero_clusters() work for them. Max > } else { > @@ -1595,7 +1595,8 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, > return nb_clusters; > } > > -int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors) > +int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors, > + int flags) > { > BDRVQcow2State *s = bs->opaque; > uint64_t nb_clusters; > @@ -1612,7 +1613,7 @@ int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors) > s->cache_discards = true; > > while (nb_clusters > 0) { > - ret = zero_single_l2(bs, offset, nb_clusters); > + ret = zero_single_l2(bs, offset, nb_clusters, flags); > if (ret < 0) { > goto fail; > } > diff --git a/block/qcow2.c b/block/qcow2.c > index 0e53a4d..474f244 100644 > --- a/block/qcow2.c > +++ b/block/qcow2.c > @@ -1154,6 +1154,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, > > /* Initialise locks */ > qemu_co_mutex_init(&s->lock); > + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP; > > /* Repair image if dirty */ > if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && > @@ -2476,7 +2477,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, > trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, count); > > /* Whatever is left can use real zero clusters */ > - ret = qcow2_zero_clusters(bs, offset, count >> BDRV_SECTOR_BITS); > + ret = qcow2_zero_clusters(bs, offset, count >> BDRV_SECTOR_BITS, flags); > qemu_co_mutex_unlock(&s->lock); > > return ret; > diff --git a/block/qcow2.h b/block/qcow2.h > index 9ce5a37..92203a8 100644 > --- a/block/qcow2.h > +++ b/block/qcow2.h > @@ -547,7 +547,8 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, > int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m); > int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, > int nb_sectors, enum qcow2_discard_type type, bool full_discard); > -int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors); > +int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors, > + int flags); > > int qcow2_expand_zero_clusters(BlockDriverState *bs, > BlockDriverAmendStatusCB *status_cb, >
On Wed, 09/28 18:11, Max Reitz wrote: > On 28.09.2016 09:04, Fam Zheng wrote: > > Handling this is similar to what is done to the L2 entry in the case of > > compressed clusters. > > > > Signed-off-by: Fam Zheng <famz@redhat.com> > > --- > > block/qcow2-cluster.c | 9 +++++---- > > block/qcow2.c | 3 ++- > > block/qcow2.h | 3 ++- > > 3 files changed, 9 insertions(+), 6 deletions(-) > > > > diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c > > index 61d1ffd..928c1e2 100644 > > --- a/block/qcow2-cluster.c > > +++ b/block/qcow2-cluster.c > > @@ -1558,7 +1558,7 @@ fail: > > * clusters. > > */ > > static int zero_single_l2(BlockDriverState *bs, uint64_t offset, > > - uint64_t nb_clusters) > > + uint64_t nb_clusters, int flags) > > { > > BDRVQcow2State *s = bs->opaque; > > uint64_t *l2_table; > > @@ -1582,7 +1582,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, > > > > /* Update L2 entries */ > > qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); > > - if (old_offset & QCOW_OFLAG_COMPRESSED) { > > + if (old_offset & QCOW_OFLAG_COMPRESSED || flags & BDRV_REQ_MAY_UNMAP) { > > l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); > > qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST); > > I don't quite understand the reasoning behind this. How is this more > efficient than just using the existing path where we don't discard anything? It's more efficient in disk space. I didn't mention because it is so not specific to this, but: what virt-sparsify does is creating an overlay -> fstrim it -> qemu-img commit. This flow revealed to me that BDRV_REQ_MAY_UNMAP should have been honored this way (after a hint of "how" by Kevin). > > Note that BDRV_REQ_MAY_UNMAP does not mean "Yes, please discard" but > just "You may discard if it's easier for you". But it's actually not > easier for us, so I don't see why we're doing it. > > As far as I can guess you actually want some way to tell a block driver > to actually make an effort to discard clusters as long they then read > back as zero (which is why you cannot simply use bdrv_pdiscard()). > However, I think this would require a new flag called > BDRV_REQ_SHOULD_UNMAP (which should imply BDRV_REQ_MAY_UNMAP). This flag doesn't make sense to me, if the protocol doesn't know how to unmap, it can ignore BDRV_REQ_MAY_UNMAP, but not BDRV_REQ_SHOULD_UNMAP. It just complicates things a little. Fam
On 29/09/2016 04:21, Fam Zheng wrote: > On Wed, 09/28 18:11, Max Reitz wrote: >> Note that BDRV_REQ_MAY_UNMAP does not mean "Yes, please discard" but >> just "You may discard if it's easier for you". But it's actually not >> easier for us, so I don't see why we're doing it. >> >> As far as I can guess you actually want some way to tell a block driver >> to actually make an effort to discard clusters as long they then read >> back as zero (which is why you cannot simply use bdrv_pdiscard()). >> However, I think this would require a new flag called >> BDRV_REQ_SHOULD_UNMAP (which should imply BDRV_REQ_MAY_UNMAP). > > This flag doesn't make sense to me, if the protocol doesn't know how to unmap, > it can ignore BDRV_REQ_MAY_UNMAP, but not BDRV_REQ_SHOULD_UNMAP. It just > complicates things a little. I don't think we actually have a use for a "MAY" unmap flag. Either we keep the not-so-perfect name or we replace MAY_UNMAP with "should" or "want" or "would_like" unmap... But Fam's patch does do what was intended for the flag (which is the equivalent of the UNMAP bit in the SCSI WRITE SAME command). Paolo
On Thu, 09/29 09:58, Paolo Bonzini wrote: > > > On 29/09/2016 04:21, Fam Zheng wrote: > > On Wed, 09/28 18:11, Max Reitz wrote: > >> Note that BDRV_REQ_MAY_UNMAP does not mean "Yes, please discard" but > >> just "You may discard if it's easier for you". But it's actually not > >> easier for us, so I don't see why we're doing it. > >> > >> As far as I can guess you actually want some way to tell a block driver > >> to actually make an effort to discard clusters as long they then read > >> back as zero (which is why you cannot simply use bdrv_pdiscard()). > >> However, I think this would require a new flag called > >> BDRV_REQ_SHOULD_UNMAP (which should imply BDRV_REQ_MAY_UNMAP). > > > > This flag doesn't make sense to me, if the protocol doesn't know how to unmap, > > it can ignore BDRV_REQ_MAY_UNMAP, but not BDRV_REQ_SHOULD_UNMAP. It just > > complicates things a little. > > I don't think we actually have a use for a "MAY" unmap flag. Either we > keep the not-so-perfect name or we replace MAY_UNMAP with "should" or > "want" or "would_like" unmap... But Fam's patch does do what was > intended for the flag (which is the equivalent of the UNMAP bit in the > SCSI WRITE SAME command). After reading rfc2119, now I agree that "SHOULD" is better. :) Fam
Am 28.09.2016 um 09:04 hat Fam Zheng geschrieben: > Handling this is similar to what is done to the L2 entry in the case of > compressed clusters. > > Signed-off-by: Fam Zheng <famz@redhat.com> > --- > block/qcow2-cluster.c | 9 +++++---- > block/qcow2.c | 3 ++- > block/qcow2.h | 3 ++- > 3 files changed, 9 insertions(+), 6 deletions(-) > > diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c > index 61d1ffd..928c1e2 100644 > --- a/block/qcow2-cluster.c > +++ b/block/qcow2-cluster.c > @@ -1558,7 +1558,7 @@ fail: > * clusters. > */ > static int zero_single_l2(BlockDriverState *bs, uint64_t offset, > - uint64_t nb_clusters) > + uint64_t nb_clusters, int flags) > { > BDRVQcow2State *s = bs->opaque; > uint64_t *l2_table; > @@ -1582,7 +1582,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, > > /* Update L2 entries */ > qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); > - if (old_offset & QCOW_OFLAG_COMPRESSED) { > + if (old_offset & QCOW_OFLAG_COMPRESSED || flags & BDRV_REQ_MAY_UNMAP) { > l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); > qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST); > } else { I don't think we should always do this for BDRV_REQ_MAY_UNMAP. The user may want to keep the image fully allocated even if the guest (or block job etc.) thinks this can be discarded. When we discussed this in another email thread (?), I think I suggested checking whether the image was opened with pass-discard-request=on. If so, go ahead and deallocate the cluster, otherwise keep it allocated. In those cases where pass-discard-request=off, it doesn't make a lot of sense to deallocate the cluster anyway because it won't shrink the file size. Kevin
On Thu, 09/29 11:29, Kevin Wolf wrote: > Am 28.09.2016 um 09:04 hat Fam Zheng geschrieben: > > Handling this is similar to what is done to the L2 entry in the case of > > compressed clusters. > > > > Signed-off-by: Fam Zheng <famz@redhat.com> > > --- > > block/qcow2-cluster.c | 9 +++++---- > > block/qcow2.c | 3 ++- > > block/qcow2.h | 3 ++- > > 3 files changed, 9 insertions(+), 6 deletions(-) > > > > diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c > > index 61d1ffd..928c1e2 100644 > > --- a/block/qcow2-cluster.c > > +++ b/block/qcow2-cluster.c > > @@ -1558,7 +1558,7 @@ fail: > > * clusters. > > */ > > static int zero_single_l2(BlockDriverState *bs, uint64_t offset, > > - uint64_t nb_clusters) > > + uint64_t nb_clusters, int flags) > > { > > BDRVQcow2State *s = bs->opaque; > > uint64_t *l2_table; > > @@ -1582,7 +1582,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, > > > > /* Update L2 entries */ > > qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); > > - if (old_offset & QCOW_OFLAG_COMPRESSED) { > > + if (old_offset & QCOW_OFLAG_COMPRESSED || flags & BDRV_REQ_MAY_UNMAP) { > > l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); > > qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST); > > } else { > > I don't think we should always do this for BDRV_REQ_MAY_UNMAP. The user > may want to keep the image fully allocated even if the guest (or block > job etc.) thinks this can be discarded. > > When we discussed this in another email thread (?), I think I suggested > checking whether the image was opened with pass-discard-request=on. If > so, go ahead and deallocate the cluster, otherwise keep it allocated. > > In those cases where pass-discard-request=off, it doesn't make a lot of > sense to deallocate the cluster anyway because it won't shrink the file > size. I think this patch does what you mean: $ strace -f -e fallocate qemu-io \ -c 'open -o pass-discard-request=on /var/tmp/test' \ -c 'write 0 1M' \ -c 'write -u -z 0 1M' \ 2>&1 >/dev/null | \ grep fallocate [pid 17548] fallocate(17, FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE, 327680, 1048576) = 0 $ strace -f -e fallocate qemu-io \ -c 'open -o pass-discard-request=off /var/tmp/test' \ -c 'write 0 1M' \ -c 'write -u -z 0 1M' \ 2>&1 >/dev/null | \ grep fallocate Because there is another check of pass-discard-request value in update_refcount: if (refcount == 0 && s->discard_passthrough[type]) { update_refcount_discard(bs, cluster_offset, s->cluster_size); } Fam
Am 29.09.2016 um 11:55 hat Fam Zheng geschrieben: > On Thu, 09/29 11:29, Kevin Wolf wrote: > > Am 28.09.2016 um 09:04 hat Fam Zheng geschrieben: > > > Handling this is similar to what is done to the L2 entry in the case of > > > compressed clusters. > > > > > > Signed-off-by: Fam Zheng <famz@redhat.com> > > > --- > > > block/qcow2-cluster.c | 9 +++++---- > > > block/qcow2.c | 3 ++- > > > block/qcow2.h | 3 ++- > > > 3 files changed, 9 insertions(+), 6 deletions(-) > > > > > > diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c > > > index 61d1ffd..928c1e2 100644 > > > --- a/block/qcow2-cluster.c > > > +++ b/block/qcow2-cluster.c > > > @@ -1558,7 +1558,7 @@ fail: > > > * clusters. > > > */ > > > static int zero_single_l2(BlockDriverState *bs, uint64_t offset, > > > - uint64_t nb_clusters) > > > + uint64_t nb_clusters, int flags) > > > { > > > BDRVQcow2State *s = bs->opaque; > > > uint64_t *l2_table; > > > @@ -1582,7 +1582,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, > > > > > > /* Update L2 entries */ > > > qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); > > > - if (old_offset & QCOW_OFLAG_COMPRESSED) { > > > + if (old_offset & QCOW_OFLAG_COMPRESSED || flags & BDRV_REQ_MAY_UNMAP) { > > > l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); > > > qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST); > > > } else { > > > > I don't think we should always do this for BDRV_REQ_MAY_UNMAP. The user > > may want to keep the image fully allocated even if the guest (or block > > job etc.) thinks this can be discarded. > > > > When we discussed this in another email thread (?), I think I suggested > > checking whether the image was opened with pass-discard-request=on. If > > so, go ahead and deallocate the cluster, otherwise keep it allocated. > > > > In those cases where pass-discard-request=off, it doesn't make a lot of > > sense to deallocate the cluster anyway because it won't shrink the file > > size. > > I think this patch does what you mean: > > $ strace -f -e fallocate qemu-io \ > -c 'open -o pass-discard-request=on /var/tmp/test' \ > -c 'write 0 1M' \ > -c 'write -u -z 0 1M' \ > 2>&1 >/dev/null | \ > grep fallocate > [pid 17548] fallocate(17, FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE, 327680, 1048576) = 0 > $ strace -f -e fallocate qemu-io \ > -c 'open -o pass-discard-request=off /var/tmp/test' \ > -c 'write 0 1M' \ > -c 'write -u -z 0 1M' \ > 2>&1 >/dev/null | \ > grep fallocate > > Because there is another check of pass-discard-request value in > update_refcount: > > if (refcount == 0 && s->discard_passthrough[type]) { > update_refcount_discard(bs, cluster_offset, s->cluster_size); > } What I mean is that in the second case, you're still uselessly deallocating the cluster on the qcow2 level while you can't reclaim it on the filesystem level. So it would be better to leave it allocated in qcow2, too, so that you don't get an expensive reallocation the next time you write to it. Kevin
On 29/09/2016 12:39, Kevin Wolf wrote: >> > >> > Because there is another check of pass-discard-request value in >> > update_refcount: >> > >> > if (refcount == 0 && s->discard_passthrough[type]) { >> > update_refcount_discard(bs, cluster_offset, s->cluster_size); >> > } > What I mean is that in the second case, you're still uselessly > deallocating the cluster on the qcow2 level while you can't reclaim it > on the filesystem level. So it would be better to leave it allocated in > qcow2, too, so that you don't get an expensive reallocation the next > time you write to it. But if you do a qemu-img convert, the deallocated cluster wouldn't be in the destination. Paolo
Am 29.09.2016 um 14:14 hat Paolo Bonzini geschrieben: > On 29/09/2016 12:39, Kevin Wolf wrote: > >> > Because there is another check of pass-discard-request value in > >> > update_refcount: > >> > > >> > if (refcount == 0 && s->discard_passthrough[type]) { > >> > update_refcount_discard(bs, cluster_offset, s->cluster_size); > >> > } > > What I mean is that in the second case, you're still uselessly > > deallocating the cluster on the qcow2 level while you can't reclaim it > > on the filesystem level. So it would be better to leave it allocated in > > qcow2, too, so that you don't get an expensive reallocation the next > > time you write to it. > > But if you do a qemu-img convert, the deallocated cluster wouldn't be in > the destination. Right. I still think that there has to be an option to keep the image fully allocated. Perhaps what we really need to check is BDRV_O_UNMAP. Kevin
On 29/09/2016 14:48, Kevin Wolf wrote: > Am 29.09.2016 um 14:14 hat Paolo Bonzini geschrieben: >> On 29/09/2016 12:39, Kevin Wolf wrote: >>>>> Because there is another check of pass-discard-request value in >>>>> update_refcount: >>>>> >>>>> if (refcount == 0 && s->discard_passthrough[type]) { >>>>> update_refcount_discard(bs, cluster_offset, s->cluster_size); >>>>> } >>> What I mean is that in the second case, you're still uselessly >>> deallocating the cluster on the qcow2 level while you can't reclaim it >>> on the filesystem level. So it would be better to leave it allocated in >>> qcow2, too, so that you don't get an expensive reallocation the next >>> time you write to it. >> >> But if you do a qemu-img convert, the deallocated cluster wouldn't be in >> the destination. > > Right. I still think that there has to be an option to keep the image > fully allocated. Perhaps what we really need to check is BDRV_O_UNMAP. Duh, of course it is. Paolo
On 29/09/2016 14:50, Paolo Bonzini wrote: > > > On 29/09/2016 14:48, Kevin Wolf wrote: >> Am 29.09.2016 um 14:14 hat Paolo Bonzini geschrieben: >>> On 29/09/2016 12:39, Kevin Wolf wrote: >>>>>> Because there is another check of pass-discard-request value in >>>>>> update_refcount: >>>>>> >>>>>> if (refcount == 0 && s->discard_passthrough[type]) { >>>>>> update_refcount_discard(bs, cluster_offset, s->cluster_size); >>>>>> } >>>> What I mean is that in the second case, you're still uselessly >>>> deallocating the cluster on the qcow2 level while you can't reclaim it >>>> on the filesystem level. So it would be better to leave it allocated in >>>> qcow2, too, so that you don't get an expensive reallocation the next >>>> time you write to it. >>> >>> But if you do a qemu-img convert, the deallocated cluster wouldn't be in >>> the destination. >> >> Right. I still think that there has to be an option to keep the image >> fully allocated. Perhaps what we really need to check is BDRV_O_UNMAP. > > Duh, of course it is. ... and it's handled in bdrv_co_pwrite_zeroes, so Fam's patch should be okay: if (!(child->bs->open_flags & BDRV_O_UNMAP)) { flags &= ~BDRV_REQ_MAY_UNMAP; } return bdrv_co_pwritev(child, offset, count, NULL, BDRV_REQ_ZERO_WRITE | flags); Paolo
On 29.09.2016 10:10, Fam Zheng wrote: > On Thu, 09/29 09:58, Paolo Bonzini wrote: >> >> >> On 29/09/2016 04:21, Fam Zheng wrote: >>> On Wed, 09/28 18:11, Max Reitz wrote: >>>> Note that BDRV_REQ_MAY_UNMAP does not mean "Yes, please discard" but >>>> just "You may discard if it's easier for you". But it's actually not >>>> easier for us, so I don't see why we're doing it. >>>> >>>> As far as I can guess you actually want some way to tell a block driver >>>> to actually make an effort to discard clusters as long they then read >>>> back as zero (which is why you cannot simply use bdrv_pdiscard()). >>>> However, I think this would require a new flag called >>>> BDRV_REQ_SHOULD_UNMAP (which should imply BDRV_REQ_MAY_UNMAP). >>> >>> This flag doesn't make sense to me, if the protocol doesn't know how to unmap, >>> it can ignore BDRV_REQ_MAY_UNMAP, but not BDRV_REQ_SHOULD_UNMAP. It just >>> complicates things a little. >> >> I don't think we actually have a use for a "MAY" unmap flag. Either we >> keep the not-so-perfect name or we replace MAY_UNMAP with "should" or >> "want" or "would_like" unmap... But Fam's patch does do what was >> intended for the flag (which is the equivalent of the UNMAP bit in the >> SCSI WRITE SAME command). > > After reading rfc2119, now I agree that "SHOULD" is better. :) That's OK with me, then. Max
On Wed, 09/28 15:04, Fam Zheng wrote: > Handling this is similar to what is done to the L2 entry in the case of > compressed clusters. Kevin, Max, is there anything else I need to do before this patch can be applied? Fam
Am 12.10.2016 um 03:14 hat Fam Zheng geschrieben: > On Wed, 09/28 15:04, Fam Zheng wrote: > > Handling this is similar to what is done to the L2 entry in the case of > > compressed clusters. > > Kevin, Max, is there anything else I need to do before this patch can be > applied? Hm, actually, it looks like we had a few discussions, but came to the conclusions that the patch is alright. Thanks, applied to the block branch. Kevin
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 61d1ffd..928c1e2 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1558,7 +1558,7 @@ fail: * clusters. */ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, - uint64_t nb_clusters) + uint64_t nb_clusters, int flags) { BDRVQcow2State *s = bs->opaque; uint64_t *l2_table; @@ -1582,7 +1582,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, /* Update L2 entries */ qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); - if (old_offset & QCOW_OFLAG_COMPRESSED) { + if (old_offset & QCOW_OFLAG_COMPRESSED || flags & BDRV_REQ_MAY_UNMAP) { l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST); } else { @@ -1595,7 +1595,8 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, return nb_clusters; } -int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors) +int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors, + int flags) { BDRVQcow2State *s = bs->opaque; uint64_t nb_clusters; @@ -1612,7 +1613,7 @@ int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors) s->cache_discards = true; while (nb_clusters > 0) { - ret = zero_single_l2(bs, offset, nb_clusters); + ret = zero_single_l2(bs, offset, nb_clusters, flags); if (ret < 0) { goto fail; } diff --git a/block/qcow2.c b/block/qcow2.c index 0e53a4d..474f244 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1154,6 +1154,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, /* Initialise locks */ qemu_co_mutex_init(&s->lock); + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP; /* Repair image if dirty */ if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && @@ -2476,7 +2477,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, count); /* Whatever is left can use real zero clusters */ - ret = qcow2_zero_clusters(bs, offset, count >> BDRV_SECTOR_BITS); + ret = qcow2_zero_clusters(bs, offset, count >> BDRV_SECTOR_BITS, flags); qemu_co_mutex_unlock(&s->lock); return ret; diff --git a/block/qcow2.h b/block/qcow2.h index 9ce5a37..92203a8 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -547,7 +547,8 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m); int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors, enum qcow2_discard_type type, bool full_discard); -int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors); +int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors, + int flags); int qcow2_expand_zero_clusters(BlockDriverState *bs, BlockDriverAmendStatusCB *status_cb,
Handling this is similar to what is done to the L2 entry in the case of compressed clusters. Signed-off-by: Fam Zheng <famz@redhat.com> --- block/qcow2-cluster.c | 9 +++++---- block/qcow2.c | 3 ++- block/qcow2.h | 3 ++- 3 files changed, 9 insertions(+), 6 deletions(-)