diff mbox series

[v4,02/23] block: Add flag to avoid wasted work in bdrv_is_allocated()

Message ID 20170913160333.23622-3-eblake@redhat.com
State New
Headers show
Series make bdrv_get_block_status byte-based | expand

Commit Message

Eric Blake Sept. 13, 2017, 4:03 p.m. UTC
Not all callers care about which BDS owns the mapping for a given
range of the file.  In particular, bdrv_is_allocated() cares more
about finding the largest run of allocated data from the guest
perspective, whether or not that data is consecutive from the
host perspective.  Therefore, doing subsequent refinements such
as checking how much of the format-layer allocation also satisfies
BDRV_BLOCK_ZERO at the protocol layer is wasted work - in the best
case, it just costs extra CPU cycles during a single
bdrv_is_allocated(), but in the worst case, it results in a smaller
*pnum, and forces callers to iterate through more status probes when
visiting the entire file for even more extra CPU cycles.

This patch only optimizes the block layer.  But subsequent patches
will tweak the driver callback to be byte-based, and in the process,
can also pass this hint through to the driver.

Signed-off-by: Eric Blake <eblake@redhat.com>

---
v4: only context changes
v3: s/allocation/mapping/ and flip sense of bool
v2: new patch
---
 block/io.c | 52 ++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 38 insertions(+), 14 deletions(-)

Comments

John Snow Sept. 26, 2017, 6:31 p.m. UTC | #1
On 09/13/2017 12:03 PM, Eric Blake wrote:
> Not all callers care about which BDS owns the mapping for a given
> range of the file.  In particular, bdrv_is_allocated() cares more
> about finding the largest run of allocated data from the guest
> perspective, whether or not that data is consecutive from the
> host perspective.  Therefore, doing subsequent refinements such
> as checking how much of the format-layer allocation also satisfies
> BDRV_BLOCK_ZERO at the protocol layer is wasted work - in the best
> case, it just costs extra CPU cycles during a single
> bdrv_is_allocated(), but in the worst case, it results in a smaller
> *pnum, and forces callers to iterate through more status probes when
> visiting the entire file for even more extra CPU cycles.
> 
> This patch only optimizes the block layer.  But subsequent patches
> will tweak the driver callback to be byte-based, and in the process,
> can also pass this hint through to the driver.
> 
> Signed-off-by: Eric Blake <eblake@redhat.com>
> 
> ---
> v4: only context changes
> v3: s/allocation/mapping/ and flip sense of bool
> v2: new patch
> ---
>  block/io.c | 52 ++++++++++++++++++++++++++++++++++++++--------------
>  1 file changed, 38 insertions(+), 14 deletions(-)
> 
> diff --git a/block/io.c b/block/io.c
> index f250029395..6509c804d4 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -1709,6 +1709,7 @@ typedef struct BdrvCoGetBlockStatusData {
>      int nb_sectors;
>      int *pnum;
>      int64_t ret;
> +    bool mapping;
>      bool done;
>  } BdrvCoGetBlockStatusData;
> 
> @@ -1743,6 +1744,11 @@ int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs,
>   * Drivers not implementing the functionality are assumed to not support
>   * backing files, hence all their sectors are reported as allocated.
>   *
> + * If 'mapping' is true, the caller is querying for mapping purposes,
> + * and the result should include BDRV_BLOCK_OFFSET_VALID where
> + * possible; otherwise, the result may omit that bit particularly if
> + * it allows for a larger value in 'pnum'.
> + *
>   * If 'sector_num' is beyond the end of the disk image the return value is
>   * BDRV_BLOCK_EOF and 'pnum' is set to 0.
>   *
> @@ -1759,6 +1765,7 @@ int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs,
>   * is allocated in.
>   */
>  static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
> +                                                     bool mapping,
>                                                       int64_t sector_num,
>                                                       int nb_sectors, int *pnum,
>                                                       BlockDriverState **file)
> @@ -1817,14 +1824,15 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
> 
>      if (ret & BDRV_BLOCK_RAW) {
>          assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
> -        ret = bdrv_co_get_block_status(local_file, ret >> BDRV_SECTOR_BITS,
> +        ret = bdrv_co_get_block_status(local_file, mapping,
> +                                       ret >> BDRV_SECTOR_BITS,
>                                         *pnum, pnum, &local_file);
>          goto out;
>      }
> 
>      if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
>          ret |= BDRV_BLOCK_ALLOCATED;
> -    } else {
> +    } else if (mapping) {
>          if (bdrv_unallocated_blocks_are_zero(bs)) {
>              ret |= BDRV_BLOCK_ZERO;
>          } else if (bs->backing) {
> @@ -1836,12 +1844,13 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
>          }
>      }
> 
> -    if (local_file && local_file != bs &&
> +    if (mapping && local_file && local_file != bs &&

Tentatively this looks OK to me, but I have to admit I'm a little shaky
on this portion because I've not really investigated this function too
much. I am at the very least convinced that when mapping is true that
the function is equivalent and that existing callers don't have their
behavior changed too much.

Benefit of the doubt:

Reviewed-by: John Snow <jsnow@redhat.com>

>          (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
>          (ret & BDRV_BLOCK_OFFSET_VALID)) {
>          int file_pnum;
> 
> -        ret2 = bdrv_co_get_block_status(local_file, ret >> BDRV_SECTOR_BITS,
> +        ret2 = bdrv_co_get_block_status(local_file, mapping,
> +                                        ret >> BDRV_SECTOR_BITS,
>                                          *pnum, &file_pnum, NULL);
>          if (ret2 >= 0) {
>              /* Ignore errors.  This is just providing extra information, it
> @@ -1876,6 +1885,7 @@ out:
> 
>  static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs,
>          BlockDriverState *base,
> +        bool mapping,
>          int64_t sector_num,
>          int nb_sectors,
>          int *pnum,
> @@ -1887,7 +1897,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs,
> 
>      assert(bs != base);
>      for (p = bs; p != base; p = backing_bs(p)) {
> -        ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum, file);
> +        ret = bdrv_co_get_block_status(p, mapping, sector_num, nb_sectors,
> +                                       pnum, file);
>          if (ret < 0) {
>              break;
>          }
> @@ -1917,6 +1928,7 @@ static void coroutine_fn bdrv_get_block_status_above_co_entry(void *opaque)
>      BdrvCoGetBlockStatusData *data = opaque;
> 
>      data->ret = bdrv_co_get_block_status_above(data->bs, data->base,
> +                                               data->mapping,
>                                                 data->sector_num,
>                                                 data->nb_sectors,
>                                                 data->pnum,
> @@ -1929,11 +1941,12 @@ static void coroutine_fn bdrv_get_block_status_above_co_entry(void *opaque)
>   *
>   * See bdrv_co_get_block_status_above() for details.
>   */
> -int64_t bdrv_get_block_status_above(BlockDriverState *bs,
> -                                    BlockDriverState *base,
> -                                    int64_t sector_num,
> -                                    int nb_sectors, int *pnum,
> -                                    BlockDriverState **file)
> +static int64_t bdrv_common_block_status_above(BlockDriverState *bs,
> +                                              BlockDriverState *base,
> +                                              bool mapping,
> +                                              int64_t sector_num,
> +                                              int nb_sectors, int *pnum,
> +                                              BlockDriverState **file)
>  {
>      Coroutine *co;
>      BdrvCoGetBlockStatusData data = {
> @@ -1943,6 +1956,7 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs,
>          .sector_num = sector_num,
>          .nb_sectors = nb_sectors,
>          .pnum = pnum,
> +        .mapping = mapping,
>          .done = false,
>      };
> 
> @@ -1958,6 +1972,16 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs,
>      return data.ret;
>  }
> 
> +int64_t bdrv_get_block_status_above(BlockDriverState *bs,
> +                                    BlockDriverState *base,
> +                                    int64_t sector_num,
> +                                    int nb_sectors, int *pnum,
> +                                    BlockDriverState **file)
> +{
> +    return bdrv_common_block_status_above(bs, base, true, sector_num,
> +                                          nb_sectors, pnum, file);
> +}
> +
>  int64_t bdrv_get_block_status(BlockDriverState *bs,
>                                int64_t sector_num,
>                                int nb_sectors, int *pnum,
> @@ -1970,15 +1994,15 @@ int64_t bdrv_get_block_status(BlockDriverState *bs,
>  int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
>                                     int64_t bytes, int64_t *pnum)
>  {
> -    int64_t sector_num = offset >> BDRV_SECTOR_BITS;
> -    int nb_sectors = bytes >> BDRV_SECTOR_BITS;
>      int64_t ret;
>      int psectors;
> 
>      assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
>      assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE) && bytes < INT_MAX);
> -    ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &psectors,
> -                                NULL);
> +    ret = bdrv_common_block_status_above(bs, backing_bs(bs), false,
> +                                         offset >> BDRV_SECTOR_BITS,
> +                                         bytes >> BDRV_SECTOR_BITS, &psectors,
> +                                         NULL);
>      if (ret < 0) {
>          return ret;
>      }
>
Eric Blake Sept. 28, 2017, 2:58 p.m. UTC | #2
On 09/26/2017 01:31 PM, John Snow wrote:
> 
> 
> On 09/13/2017 12:03 PM, Eric Blake wrote:
>> Not all callers care about which BDS owns the mapping for a given
>> range of the file.  In particular, bdrv_is_allocated() cares more
>> about finding the largest run of allocated data from the guest
>> perspective, whether or not that data is consecutive from the
>> host perspective.  Therefore, doing subsequent refinements such
>> as checking how much of the format-layer allocation also satisfies
>> BDRV_BLOCK_ZERO at the protocol layer is wasted work - in the best
>> case, it just costs extra CPU cycles during a single
>> bdrv_is_allocated(), but in the worst case, it results in a smaller
>> *pnum, and forces callers to iterate through more status probes when
>> visiting the entire file for even more extra CPU cycles.
>>
>> This patch only optimizes the block layer.  But subsequent patches
>> will tweak the driver callback to be byte-based, and in the process,
>> can also pass this hint through to the driver.
>>
>> Signed-off-by: Eric Blake <eblake@redhat.com>
>>

>>   *
>> + * If 'mapping' is true, the caller is querying for mapping purposes,
>> + * and the result should include BDRV_BLOCK_OFFSET_VALID where
>> + * possible; otherwise, the result may omit that bit particularly if
>> + * it allows for a larger value in 'pnum'.

I decided one more tweak to the comment will help:

+ * If 'mapping' is true, the caller is querying for mapping purposes,
+ * and the result should include BDRV_BLOCK_OFFSET_VALID and
+ * BDRV_BLOCK_ZERO where possible; otherwise, the result may omit those
+ * bits particularly if it allows for a larger value in 'pnum'.


>> @@ -1836,12 +1844,13 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
>>          }
>>      }
>>
>> -    if (local_file && local_file != bs &&
>> +    if (mapping && local_file && local_file != bs &&
> 
> Tentatively this looks OK to me, but I have to admit I'm a little shaky
> on this portion because I've not really investigated this function too
> much. I am at the very least convinced that when mapping is true that
> the function is equivalent and that existing callers don't have their
> behavior changed too much.
> 
> Benefit of the doubt:
> 
> Reviewed-by: John Snow <jsnow@redhat.com>

Then I'll tentatively keep your R-b even with the comment tweak, unless
you say otherwise :)
diff mbox series

Patch

diff --git a/block/io.c b/block/io.c
index f250029395..6509c804d4 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1709,6 +1709,7 @@  typedef struct BdrvCoGetBlockStatusData {
     int nb_sectors;
     int *pnum;
     int64_t ret;
+    bool mapping;
     bool done;
 } BdrvCoGetBlockStatusData;

@@ -1743,6 +1744,11 @@  int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs,
  * Drivers not implementing the functionality are assumed to not support
  * backing files, hence all their sectors are reported as allocated.
  *
+ * If 'mapping' is true, the caller is querying for mapping purposes,
+ * and the result should include BDRV_BLOCK_OFFSET_VALID where
+ * possible; otherwise, the result may omit that bit particularly if
+ * it allows for a larger value in 'pnum'.
+ *
  * If 'sector_num' is beyond the end of the disk image the return value is
  * BDRV_BLOCK_EOF and 'pnum' is set to 0.
  *
@@ -1759,6 +1765,7 @@  int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs,
  * is allocated in.
  */
 static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
+                                                     bool mapping,
                                                      int64_t sector_num,
                                                      int nb_sectors, int *pnum,
                                                      BlockDriverState **file)
@@ -1817,14 +1824,15 @@  static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,

     if (ret & BDRV_BLOCK_RAW) {
         assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
-        ret = bdrv_co_get_block_status(local_file, ret >> BDRV_SECTOR_BITS,
+        ret = bdrv_co_get_block_status(local_file, mapping,
+                                       ret >> BDRV_SECTOR_BITS,
                                        *pnum, pnum, &local_file);
         goto out;
     }

     if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
         ret |= BDRV_BLOCK_ALLOCATED;
-    } else {
+    } else if (mapping) {
         if (bdrv_unallocated_blocks_are_zero(bs)) {
             ret |= BDRV_BLOCK_ZERO;
         } else if (bs->backing) {
@@ -1836,12 +1844,13 @@  static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
         }
     }

-    if (local_file && local_file != bs &&
+    if (mapping && local_file && local_file != bs &&
         (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
         (ret & BDRV_BLOCK_OFFSET_VALID)) {
         int file_pnum;

-        ret2 = bdrv_co_get_block_status(local_file, ret >> BDRV_SECTOR_BITS,
+        ret2 = bdrv_co_get_block_status(local_file, mapping,
+                                        ret >> BDRV_SECTOR_BITS,
                                         *pnum, &file_pnum, NULL);
         if (ret2 >= 0) {
             /* Ignore errors.  This is just providing extra information, it
@@ -1876,6 +1885,7 @@  out:

 static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs,
         BlockDriverState *base,
+        bool mapping,
         int64_t sector_num,
         int nb_sectors,
         int *pnum,
@@ -1887,7 +1897,8 @@  static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs,

     assert(bs != base);
     for (p = bs; p != base; p = backing_bs(p)) {
-        ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum, file);
+        ret = bdrv_co_get_block_status(p, mapping, sector_num, nb_sectors,
+                                       pnum, file);
         if (ret < 0) {
             break;
         }
@@ -1917,6 +1928,7 @@  static void coroutine_fn bdrv_get_block_status_above_co_entry(void *opaque)
     BdrvCoGetBlockStatusData *data = opaque;

     data->ret = bdrv_co_get_block_status_above(data->bs, data->base,
+                                               data->mapping,
                                                data->sector_num,
                                                data->nb_sectors,
                                                data->pnum,
@@ -1929,11 +1941,12 @@  static void coroutine_fn bdrv_get_block_status_above_co_entry(void *opaque)
  *
  * See bdrv_co_get_block_status_above() for details.
  */
-int64_t bdrv_get_block_status_above(BlockDriverState *bs,
-                                    BlockDriverState *base,
-                                    int64_t sector_num,
-                                    int nb_sectors, int *pnum,
-                                    BlockDriverState **file)
+static int64_t bdrv_common_block_status_above(BlockDriverState *bs,
+                                              BlockDriverState *base,
+                                              bool mapping,
+                                              int64_t sector_num,
+                                              int nb_sectors, int *pnum,
+                                              BlockDriverState **file)
 {
     Coroutine *co;
     BdrvCoGetBlockStatusData data = {
@@ -1943,6 +1956,7 @@  int64_t bdrv_get_block_status_above(BlockDriverState *bs,
         .sector_num = sector_num,
         .nb_sectors = nb_sectors,
         .pnum = pnum,
+        .mapping = mapping,
         .done = false,
     };

@@ -1958,6 +1972,16 @@  int64_t bdrv_get_block_status_above(BlockDriverState *bs,
     return data.ret;
 }

+int64_t bdrv_get_block_status_above(BlockDriverState *bs,
+                                    BlockDriverState *base,
+                                    int64_t sector_num,
+                                    int nb_sectors, int *pnum,
+                                    BlockDriverState **file)
+{
+    return bdrv_common_block_status_above(bs, base, true, sector_num,
+                                          nb_sectors, pnum, file);
+}
+
 int64_t bdrv_get_block_status(BlockDriverState *bs,
                               int64_t sector_num,
                               int nb_sectors, int *pnum,
@@ -1970,15 +1994,15 @@  int64_t bdrv_get_block_status(BlockDriverState *bs,
 int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
                                    int64_t bytes, int64_t *pnum)
 {
-    int64_t sector_num = offset >> BDRV_SECTOR_BITS;
-    int nb_sectors = bytes >> BDRV_SECTOR_BITS;
     int64_t ret;
     int psectors;

     assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
     assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE) && bytes < INT_MAX);
-    ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &psectors,
-                                NULL);
+    ret = bdrv_common_block_status_above(bs, backing_bs(bs), false,
+                                         offset >> BDRV_SECTOR_BITS,
+                                         bytes >> BDRV_SECTOR_BITS, &psectors,
+                                         NULL);
     if (ret < 0) {
         return ret;
     }