[v4,07/23] block: Convert bdrv_get_block_status() to bytes

Message ID 20170913160333.23622-8-eblake@redhat.com
State New
Headers show
Series
  • make bdrv_get_block_status byte-based
Related show

Commit Message

Eric Blake Sept. 13, 2017, 4:03 p.m.
We are gradually moving away from sector-based interfaces, towards
byte-based.  In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.

Changing the name of the function from bdrv_get_block_status() to
bdrv_block_status() ensures that the compiler enforces that all
callers are updated.  For now, the io.c layer still assert()s that
all callers are sector-aligned, but that can be relaxed when a later
patch implements byte-based block status in the drivers.

Note that we have an inherent limitation in the BDRV_BLOCK_* return
values: BDRV_BLOCK_OFFSET_VALID can only return the start of a
sector, even if we later relax the interface to query for the status
starting at an intermediate byte; document the obvious interpretation
that valid offsets are always sector-relative.

Therefore, for the most part this patch is just the addition of scaling
at the callers followed by inverse scaling at bdrv_block_status().  But
some code, particularly bdrv_is_allocated(), gets a lot simpler because
it no longer has to mess with sectors.

For ease of review, bdrv_get_block_status_above() will be tackled
separately.

Signed-off-by: Eric Blake <eblake@redhat.com>

---
v3: clamp bytes to 32-bits, rather than asserting
v2: rebase to earlier changes
---
 include/block/block.h | 12 +++++++-----
 block/io.c            | 31 +++++++++++++++++++------------
 block/qcow2-cluster.c |  2 +-
 qemu-img.c            | 20 +++++++++++---------
 4 files changed, 38 insertions(+), 27 deletions(-)

Comments

John Snow Sept. 26, 2017, 7:39 p.m. | #1
On 09/13/2017 12:03 PM, Eric Blake wrote:
> We are gradually moving away from sector-based interfaces, towards
> byte-based.  In the common case, allocation is unlikely to ever use
> values that are not naturally sector-aligned, but it is possible
> that byte-based values will let us be more precise about allocation
> at the end of an unaligned file that can do byte-based access.
> 
> Changing the name of the function from bdrv_get_block_status() to
> bdrv_block_status() ensures that the compiler enforces that all
> callers are updated.  For now, the io.c layer still assert()s that
> all callers are sector-aligned, but that can be relaxed when a later
> patch implements byte-based block status in the drivers.
> 
> Note that we have an inherent limitation in the BDRV_BLOCK_* return
> values: BDRV_BLOCK_OFFSET_VALID can only return the start of a
> sector, even if we later relax the interface to query for the status
> starting at an intermediate byte; document the obvious interpretation
> that valid offsets are always sector-relative.
> 
> Therefore, for the most part this patch is just the addition of scaling
> at the callers followed by inverse scaling at bdrv_block_status().  But
> some code, particularly bdrv_is_allocated(), gets a lot simpler because
> it no longer has to mess with sectors.
> 
> For ease of review, bdrv_get_block_status_above() will be tackled
> separately.
> 
> Signed-off-by: Eric Blake <eblake@redhat.com>
> 
> ---
> v3: clamp bytes to 32-bits, rather than asserting
> v2: rebase to earlier changes
> ---
>  include/block/block.h | 12 +++++++-----
>  block/io.c            | 31 +++++++++++++++++++------------
>  block/qcow2-cluster.c |  2 +-
>  qemu-img.c            | 20 +++++++++++---------
>  4 files changed, 38 insertions(+), 27 deletions(-)
> 
> diff --git a/include/block/block.h b/include/block/block.h
> index bb3b95d491..7a9a8db588 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -138,8 +138,10 @@ typedef struct HDGeometry {
>   *
>   * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 (BDRV_BLOCK_OFFSET_MASK)
>   * represent the offset in the returned BDS that is allocated for the
> - * corresponding raw data; however, whether that offset actually contains
> - * data also depends on BDRV_BLOCK_DATA and BDRV_BLOCK_ZERO, as follows:
> + * corresponding raw data.  Individual bytes are at the same sector-relative
> + * locations (and thus, this bit cannot be set for mappings which are
> + * not equivalent modulo 512).  However, whether that offset actually
> + * contains data also depends on BDRV_BLOCK_DATA, as follows:
>   *
>   * DATA ZERO OFFSET_VALID
>   *  t    t        t       sectors read as zero, returned file is zero at offset
> @@ -421,9 +423,9 @@ int bdrv_has_zero_init_1(BlockDriverState *bs);
>  int bdrv_has_zero_init(BlockDriverState *bs);
>  bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs);
>  bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
> -int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
> -                              int nb_sectors, int *pnum,
> -                              BlockDriverState **file);
> +int64_t bdrv_block_status(BlockDriverState *bs, int64_t offset,
> +                          int64_t bytes, int64_t *pnum,
> +                          BlockDriverState **file);
>  int64_t bdrv_get_block_status_above(BlockDriverState *bs,
>                                      BlockDriverState *base,
>                                      int64_t sector_num,
> diff --git a/block/io.c b/block/io.c
> index 638b3890b7..1ed46bcece 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -695,7 +695,6 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
>  {
>      int64_t target_size, ret, bytes, offset = 0;
>      BlockDriverState *bs = child->bs;
> -    int n; /* sectors */
> 
>      target_size = bdrv_getlength(bs);
>      if (target_size < 0) {
> @@ -707,24 +706,23 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
>          if (bytes <= 0) {
>              return 0;
>          }
> -        ret = bdrv_get_block_status(bs, offset >> BDRV_SECTOR_BITS,
> -                                    bytes >> BDRV_SECTOR_BITS, &n, NULL);
> +        ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL);
>          if (ret < 0) {
>              error_report("error getting block status at offset %" PRId64 ": %s",
>                           offset, strerror(-ret));
>              return ret;
>          }
>          if (ret & BDRV_BLOCK_ZERO) {
> -            offset += n * BDRV_SECTOR_BITS;
> +            offset += bytes;
>              continue;
>          }
> -        ret = bdrv_pwrite_zeroes(child, offset, n * BDRV_SECTOR_SIZE, flags);
> +        ret = bdrv_pwrite_zeroes(child, offset, bytes, flags);
>          if (ret < 0) {
>              error_report("error writing zeroes at offset %" PRId64 ": %s",
>                           offset, strerror(-ret));
>              return ret;
>          }
> -        offset += n * BDRV_SECTOR_SIZE;
> +        offset += bytes;
>      }
>  }
> 
> @@ -1983,13 +1981,22 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs,
>                                            nb_sectors, pnum, file);
>  }
> 
> -int64_t bdrv_get_block_status(BlockDriverState *bs,
> -                              int64_t sector_num,
> -                              int nb_sectors, int *pnum,
> -                              BlockDriverState **file)
> +int64_t bdrv_block_status(BlockDriverState *bs,
> +                          int64_t offset, int64_t bytes, int64_t *pnum,
> +                          BlockDriverState **file)
>  {
> -    return bdrv_get_block_status_above(bs, backing_bs(bs),
> -                                       sector_num, nb_sectors, pnum, file);
> +    int64_t ret;
> +    int n;
> +
> +    assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE));
> +    bytes = MIN(bytes, BDRV_REQUEST_MAX_BYTES);
> +    ret = bdrv_get_block_status_above(bs, backing_bs(bs),
> +                                      offset >> BDRV_SECTOR_BITS,
> +                                      bytes >> BDRV_SECTOR_BITS, &n, file);
> +    if (pnum) {
> +        *pnum = n * BDRV_SECTOR_SIZE;
> +    }

Is it safe to truncate the request in the event that the caller did not
provide a pnum target? that is, how will they know for what range we are
answering?

> +    return ret;
>  }
> 
>  int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
> diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
> index 0d4824993c..d837b3980d 100644
> --- a/block/qcow2-cluster.c
> +++ b/block/qcow2-cluster.c
> @@ -1584,7 +1584,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
>           * cluster is already marked as zero, or if it's unallocated and we
>           * don't have a backing file.
>           *
> -         * TODO We might want to use bdrv_get_block_status(bs) here, but we're
> +         * TODO We might want to use bdrv_block_status(bs) here, but we're

thanks for updating comments too :)

>           * holding s->lock, so that doesn't work today.
>           *
>           * If full_discard is true, the sector should not read back as zeroes,
> diff --git a/qemu-img.c b/qemu-img.c
> index 54f7682069..897f80abb3 100644
> --- a/qemu-img.c
> +++ b/qemu-img.c
> @@ -1598,9 +1598,14 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
> 
>      if (s->sector_next_status <= sector_num) {
>          if (s->target_has_backing) {
> -            ret = bdrv_get_block_status(blk_bs(s->src[src_cur]),
> -                                        sector_num - src_cur_offset,
> -                                        n, &n, NULL);
> +            int64_t count = n * BDRV_SECTOR_SIZE;
> +
> +            ret = bdrv_block_status(blk_bs(s->src[src_cur]),
> +                                    (sector_num - src_cur_offset) *
> +                                    BDRV_SECTOR_SIZE,
> +                                    count, &count, NULL);
> +            assert(ret < 0 || QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
> +            n = count >> BDRV_SECTOR_BITS;
>          } else {
>              ret = bdrv_get_block_status_above(blk_bs(s->src[src_cur]), NULL,
>                                                sector_num - src_cur_offset,
> @@ -2677,9 +2682,7 @@ static int get_block_status(BlockDriverState *bs, int64_t offset,
>      int depth;
>      BlockDriverState *file;
>      bool has_offset;
> -    int nb_sectors = bytes >> BDRV_SECTOR_BITS;
> 
> -    assert(bytes < INT_MAX);
>      /* As an optimization, we could cache the current range of unallocated
>       * clusters in each file of the chain, and avoid querying the same
>       * range repeatedly.
> @@ -2687,12 +2690,11 @@ static int get_block_status(BlockDriverState *bs, int64_t offset,
> 
>      depth = 0;
>      for (;;) {
> -        ret = bdrv_get_block_status(bs, offset >> BDRV_SECTOR_BITS, nb_sectors,
> -                                    &nb_sectors, &file);
> +        ret = bdrv_block_status(bs, offset, bytes, &bytes, &file);
>          if (ret < 0) {
>              return ret;
>          }
> -        assert(nb_sectors);
> +        assert(bytes);
>          if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
>              break;
>          }
> @@ -2709,7 +2711,7 @@ static int get_block_status(BlockDriverState *bs, int64_t offset,
> 
>      *e = (MapEntry) {
>          .start = offset,
> -        .length = nb_sectors * BDRV_SECTOR_SIZE,
> +        .length = bytes,
>          .data = !!(ret & BDRV_BLOCK_DATA),
>          .zero = !!(ret & BDRV_BLOCK_ZERO),
>          .offset = ret & BDRV_BLOCK_OFFSET_MASK,
> 

Rest appears obviously correct.
Eric Blake Sept. 26, 2017, 7:57 p.m. | #2
On 09/26/2017 02:39 PM, John Snow wrote:
>> -int64_t bdrv_get_block_status(BlockDriverState *bs,
>> -                              int64_t sector_num,
>> -                              int nb_sectors, int *pnum,
>> -                              BlockDriverState **file)
>> +int64_t bdrv_block_status(BlockDriverState *bs,
>> +                          int64_t offset, int64_t bytes, int64_t *pnum,
>> +                          BlockDriverState **file)
>>  {
>> -    return bdrv_get_block_status_above(bs, backing_bs(bs),
>> -                                       sector_num, nb_sectors, pnum, file);
>> +    int64_t ret;
>> +    int n;
>> +
>> +    assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE));
>> +    bytes = MIN(bytes, BDRV_REQUEST_MAX_BYTES);
>> +    ret = bdrv_get_block_status_above(bs, backing_bs(bs),
>> +                                      offset >> BDRV_SECTOR_BITS,
>> +                                      bytes >> BDRV_SECTOR_BITS, &n, file);
>> +    if (pnum) {
>> +        *pnum = n * BDRV_SECTOR_SIZE;
>> +    }
> 
> Is it safe to truncate the request in the event that the caller did not
> provide a pnum target? that is, how will they know for what range we are
> answering?

Hmm. I think I have some rebase cruft here. At one point, I was playing
with the idea of allowing pnum == NULL for ALL get_status() callers,
similar to the existing block/vvfat.c:cluster_was_modified():

block/vvfat.c:                    res = bdrv_is_allocated(s->qcow->bs,
block/vvfat.c-                                            (offset + i) *
BDRV_SECTOR_SIZE,
block/vvfat.c-
BDRV_SECTOR_SIZE, NULL);

but looking further, only bdrv_is_allocated() (and NOT
bdrv_[get_]block_status) is ever used in that manner.  Or, in terms of
the 'mapping' variable, a NULL pnum only makes sense when mapping ==
false.  So the conditional on 'if (pnum)' should be dropped here.

Patch

diff --git a/include/block/block.h b/include/block/block.h
index bb3b95d491..7a9a8db588 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -138,8 +138,10 @@  typedef struct HDGeometry {
  *
  * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 (BDRV_BLOCK_OFFSET_MASK)
  * represent the offset in the returned BDS that is allocated for the
- * corresponding raw data; however, whether that offset actually contains
- * data also depends on BDRV_BLOCK_DATA and BDRV_BLOCK_ZERO, as follows:
+ * corresponding raw data.  Individual bytes are at the same sector-relative
+ * locations (and thus, this bit cannot be set for mappings which are
+ * not equivalent modulo 512).  However, whether that offset actually
+ * contains data also depends on BDRV_BLOCK_DATA, as follows:
  *
  * DATA ZERO OFFSET_VALID
  *  t    t        t       sectors read as zero, returned file is zero at offset
@@ -421,9 +423,9 @@  int bdrv_has_zero_init_1(BlockDriverState *bs);
 int bdrv_has_zero_init(BlockDriverState *bs);
 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs);
 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
-int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
-                              int nb_sectors, int *pnum,
-                              BlockDriverState **file);
+int64_t bdrv_block_status(BlockDriverState *bs, int64_t offset,
+                          int64_t bytes, int64_t *pnum,
+                          BlockDriverState **file);
 int64_t bdrv_get_block_status_above(BlockDriverState *bs,
                                     BlockDriverState *base,
                                     int64_t sector_num,
diff --git a/block/io.c b/block/io.c
index 638b3890b7..1ed46bcece 100644
--- a/block/io.c
+++ b/block/io.c
@@ -695,7 +695,6 @@  int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
 {
     int64_t target_size, ret, bytes, offset = 0;
     BlockDriverState *bs = child->bs;
-    int n; /* sectors */

     target_size = bdrv_getlength(bs);
     if (target_size < 0) {
@@ -707,24 +706,23 @@  int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
         if (bytes <= 0) {
             return 0;
         }
-        ret = bdrv_get_block_status(bs, offset >> BDRV_SECTOR_BITS,
-                                    bytes >> BDRV_SECTOR_BITS, &n, NULL);
+        ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL);
         if (ret < 0) {
             error_report("error getting block status at offset %" PRId64 ": %s",
                          offset, strerror(-ret));
             return ret;
         }
         if (ret & BDRV_BLOCK_ZERO) {
-            offset += n * BDRV_SECTOR_BITS;
+            offset += bytes;
             continue;
         }
-        ret = bdrv_pwrite_zeroes(child, offset, n * BDRV_SECTOR_SIZE, flags);
+        ret = bdrv_pwrite_zeroes(child, offset, bytes, flags);
         if (ret < 0) {
             error_report("error writing zeroes at offset %" PRId64 ": %s",
                          offset, strerror(-ret));
             return ret;
         }
-        offset += n * BDRV_SECTOR_SIZE;
+        offset += bytes;
     }
 }

@@ -1983,13 +1981,22 @@  int64_t bdrv_get_block_status_above(BlockDriverState *bs,
                                           nb_sectors, pnum, file);
 }

-int64_t bdrv_get_block_status(BlockDriverState *bs,
-                              int64_t sector_num,
-                              int nb_sectors, int *pnum,
-                              BlockDriverState **file)
+int64_t bdrv_block_status(BlockDriverState *bs,
+                          int64_t offset, int64_t bytes, int64_t *pnum,
+                          BlockDriverState **file)
 {
-    return bdrv_get_block_status_above(bs, backing_bs(bs),
-                                       sector_num, nb_sectors, pnum, file);
+    int64_t ret;
+    int n;
+
+    assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE));
+    bytes = MIN(bytes, BDRV_REQUEST_MAX_BYTES);
+    ret = bdrv_get_block_status_above(bs, backing_bs(bs),
+                                      offset >> BDRV_SECTOR_BITS,
+                                      bytes >> BDRV_SECTOR_BITS, &n, file);
+    if (pnum) {
+        *pnum = n * BDRV_SECTOR_SIZE;
+    }
+    return ret;
 }

 int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 0d4824993c..d837b3980d 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1584,7 +1584,7 @@  static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
          * cluster is already marked as zero, or if it's unallocated and we
          * don't have a backing file.
          *
-         * TODO We might want to use bdrv_get_block_status(bs) here, but we're
+         * TODO We might want to use bdrv_block_status(bs) here, but we're
          * holding s->lock, so that doesn't work today.
          *
          * If full_discard is true, the sector should not read back as zeroes,
diff --git a/qemu-img.c b/qemu-img.c
index 54f7682069..897f80abb3 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1598,9 +1598,14 @@  static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)

     if (s->sector_next_status <= sector_num) {
         if (s->target_has_backing) {
-            ret = bdrv_get_block_status(blk_bs(s->src[src_cur]),
-                                        sector_num - src_cur_offset,
-                                        n, &n, NULL);
+            int64_t count = n * BDRV_SECTOR_SIZE;
+
+            ret = bdrv_block_status(blk_bs(s->src[src_cur]),
+                                    (sector_num - src_cur_offset) *
+                                    BDRV_SECTOR_SIZE,
+                                    count, &count, NULL);
+            assert(ret < 0 || QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
+            n = count >> BDRV_SECTOR_BITS;
         } else {
             ret = bdrv_get_block_status_above(blk_bs(s->src[src_cur]), NULL,
                                               sector_num - src_cur_offset,
@@ -2677,9 +2682,7 @@  static int get_block_status(BlockDriverState *bs, int64_t offset,
     int depth;
     BlockDriverState *file;
     bool has_offset;
-    int nb_sectors = bytes >> BDRV_SECTOR_BITS;

-    assert(bytes < INT_MAX);
     /* As an optimization, we could cache the current range of unallocated
      * clusters in each file of the chain, and avoid querying the same
      * range repeatedly.
@@ -2687,12 +2690,11 @@  static int get_block_status(BlockDriverState *bs, int64_t offset,

     depth = 0;
     for (;;) {
-        ret = bdrv_get_block_status(bs, offset >> BDRV_SECTOR_BITS, nb_sectors,
-                                    &nb_sectors, &file);
+        ret = bdrv_block_status(bs, offset, bytes, &bytes, &file);
         if (ret < 0) {
             return ret;
         }
-        assert(nb_sectors);
+        assert(bytes);
         if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
             break;
         }
@@ -2709,7 +2711,7 @@  static int get_block_status(BlockDriverState *bs, int64_t offset,

     *e = (MapEntry) {
         .start = offset,
-        .length = nb_sectors * BDRV_SECTOR_SIZE,
+        .length = bytes,
         .data = !!(ret & BDRV_BLOCK_DATA),
         .zero = !!(ret & BDRV_BLOCK_ZERO),
         .offset = ret & BDRV_BLOCK_OFFSET_MASK,