[v4,03/23] block: Make bdrv_round_to_clusters() signature more useful

Message ID 20170913160333.23622-4-eblake@redhat.com
State New
Headers show
Series
  • make bdrv_get_block_status byte-based
Related show

Commit Message

Eric Blake Sept. 13, 2017, 4:03 p.m.
In the process of converting sector-based interfaces to bytes,
I'm finding it easier to represent a byte count as a 64-bit
integer at the block layer (even if we are internally capped
by SIZE_MAX or even INT_MAX for individual transactions, it's
still nicer to not have to worry about truncation/overflow
issues on as many variables).  Update the signature of
bdrv_round_to_clusters() to uniformly use int64_t, matching
the signature already chosen for bdrv_is_allocated and the
fact that off_t is also a signed type, then adjust clients
according to the required fallout.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>

---
v4: only context changes
v3: no change
v2: fix commit message [John], rebase to earlier changes, including
mirror_clip_bytes() signature update
---
 include/block/block.h | 4 ++--
 block/io.c            | 7 ++++---
 block/mirror.c        | 7 +++----
 block/trace-events    | 2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

Comments

John Snow Sept. 26, 2017, 6:51 p.m. | #1
On 09/13/2017 12:03 PM, Eric Blake wrote:
> In the process of converting sector-based interfaces to bytes,
> I'm finding it easier to represent a byte count as a 64-bit
> integer at the block layer (even if we are internally capped
> by SIZE_MAX or even INT_MAX for individual transactions, it's
> still nicer to not have to worry about truncation/overflow
> issues on as many variables).  Update the signature of
> bdrv_round_to_clusters() to uniformly use int64_t, matching
> the signature already chosen for bdrv_is_allocated and the
> fact that off_t is also a signed type, then adjust clients
> according to the required fallout.
> 
> Signed-off-by: Eric Blake <eblake@redhat.com>
> Reviewed-by: Fam Zheng <famz@redhat.com>
> 
> ---
> v4: only context changes
> v3: no change
> v2: fix commit message [John], rebase to earlier changes, including
> mirror_clip_bytes() signature update
> ---
>  include/block/block.h | 4 ++--
>  block/io.c            | 7 ++++---
>  block/mirror.c        | 7 +++----
>  block/trace-events    | 2 +-
>  4 files changed, 10 insertions(+), 10 deletions(-)
> 
> diff --git a/include/block/block.h b/include/block/block.h
> index 2ad18775af..bb3b95d491 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -475,9 +475,9 @@ int bdrv_get_flags(BlockDriverState *bs);
>  int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
>  ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs);
>  void bdrv_round_to_clusters(BlockDriverState *bs,
> -                            int64_t offset, unsigned int bytes,
> +                            int64_t offset, int64_t bytes,
>                              int64_t *cluster_offset,
> -                            unsigned int *cluster_bytes);
> +                            int64_t *cluster_bytes);
> 
>  const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
>  void bdrv_get_backing_filename(BlockDriverState *bs,
> diff --git a/block/io.c b/block/io.c
> index 6509c804d4..b362b46e3d 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -446,9 +446,9 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
>   * Round a region to cluster boundaries
>   */
>  void bdrv_round_to_clusters(BlockDriverState *bs,
> -                            int64_t offset, unsigned int bytes,
> +                            int64_t offset, int64_t bytes,
>                              int64_t *cluster_offset,
> -                            unsigned int *cluster_bytes)
> +                            int64_t *cluster_bytes)
>  {
>      BlockDriverInfo bdi;
> 
> @@ -946,7 +946,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
>      struct iovec iov;
>      QEMUIOVector bounce_qiov;
>      int64_t cluster_offset;
> -    unsigned int cluster_bytes;
> +    int64_t cluster_bytes;
>      size_t skip_bytes;
>      int ret;
> 
> @@ -967,6 +967,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
>      trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
>                                     cluster_offset, cluster_bytes);
> 
> +    assert(cluster_bytes < SIZE_MAX);

later in this function, is there any real or imagined risk of
cluster_bytes exceeding INT_MAX when it's passed to
bdrv_co_do_pwrite_zeroes?

>      iov.iov_len = cluster_bytes;
>      iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
>      if (bounce_buffer == NULL) {
> diff --git a/block/mirror.c b/block/mirror.c
> index 032cfe91fa..67f45cec4e 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -190,10 +190,9 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
>      bool need_cow;
>      int ret = 0;
>      int64_t align_offset = *offset;
> -    unsigned int align_bytes = *bytes;
> +    int64_t align_bytes = *bytes;
>      int max_bytes = s->granularity * s->max_iov;
> 
> -    assert(*bytes < INT_MAX);
>      need_cow = !test_bit(*offset / s->granularity, s->cow_bitmap);
>      need_cow |= !test_bit((*offset + *bytes - 1) / s->granularity,
>                            s->cow_bitmap);
> @@ -388,7 +387,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
>      while (nb_chunks > 0 && offset < s->bdev_length) {
>          int64_t ret;
>          int io_sectors;
> -        unsigned int io_bytes;
> +        int64_t io_bytes;
>          int64_t io_bytes_acct;
>          enum MirrorMethod {
>              MIRROR_METHOD_COPY,
> @@ -413,7 +412,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
>              io_bytes = s->granularity;
>          } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
>              int64_t target_offset;
> -            unsigned int target_bytes;
> +            int64_t target_bytes;
>              bdrv_round_to_clusters(blk_bs(s->target), offset, io_bytes,
>                                     &target_offset, &target_bytes);
>              if (target_offset == offset &&
> diff --git a/block/trace-events b/block/trace-events
> index 25dd5a3026..4c6586f156 100644
> --- a/block/trace-events
> +++ b/block/trace-events
> @@ -12,7 +12,7 @@ blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flag
>  bdrv_co_preadv(void *bs, int64_t offset, int64_t nbytes, unsigned int flags) "bs %p offset %"PRId64" nbytes %"PRId64" flags 0x%x"
>  bdrv_co_pwritev(void *bs, int64_t offset, int64_t nbytes, unsigned int flags) "bs %p offset %"PRId64" nbytes %"PRId64" flags 0x%x"
>  bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p offset %"PRId64" count %d flags 0x%x"
> -bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t cluster_offset, unsigned int cluster_bytes) "bs %p offset %"PRId64" bytes %u cluster_offset %"PRId64" cluster_bytes %u"
> +bdrv_co_do_copy_on_readv(void *bs, int64_t offset, int64_t bytes, int64_t cluster_offset, unsigned int cluster_bytes) "bs %p offset %"PRId64" bytes %"PRId64" cluster_offset %"PRId64" cluster_bytes %u"
> 
>  # block/stream.c
>  stream_one_iteration(void *s, int64_t offset, uint64_t bytes, int is_allocated) "s %p offset %" PRId64 " bytes %" PRIu64 " is_allocated %d"
> 

Everything else looks obviously correct to me.
Eric Blake Sept. 26, 2017, 7:18 p.m. | #2
On 09/26/2017 01:51 PM, John Snow wrote:
> 
> 
> On 09/13/2017 12:03 PM, Eric Blake wrote:
>> In the process of converting sector-based interfaces to bytes,
>> I'm finding it easier to represent a byte count as a 64-bit
>> integer at the block layer (even if we are internally capped
>> by SIZE_MAX or even INT_MAX for individual transactions, it's
>> still nicer to not have to worry about truncation/overflow
>> issues on as many variables).  Update the signature of
>> bdrv_round_to_clusters() to uniformly use int64_t, matching
>> the signature already chosen for bdrv_is_allocated and the
>> fact that off_t is also a signed type, then adjust clients
>> according to the required fallout.
>>
>> Signed-off-by: Eric Blake <eblake@redhat.com>
>> Reviewed-by: Fam Zheng <famz@redhat.com>
>>

>> @@ -946,7 +946,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
>>      struct iovec iov;
>>      QEMUIOVector bounce_qiov;
>>      int64_t cluster_offset;
>> -    unsigned int cluster_bytes;
>> +    int64_t cluster_bytes;
>>      size_t skip_bytes;
>>      int ret;
>>
>> @@ -967,6 +967,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
>>      trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
>>                                     cluster_offset, cluster_bytes);
>>
>> +    assert(cluster_bytes < SIZE_MAX);
> 
> later in this function, is there any real or imagined risk of
> cluster_bytes exceeding INT_MAX when it's passed to
> bdrv_co_do_pwrite_zeroes?
> 
>>      iov.iov_len = cluster_bytes;

cluster_bytes is the input 'unsigned int bytes' rounded out to cluster
boundaries, but where we know 'bytes <= BDRV_REQUEST_MAX_BYTES' (which
is 2^31 - 511).  Still, I guess you are right that rounding to a cluster
size could produce a larger value of exactly 2^31 (bigger than INT_MAX,
but still fits in 32-bit unsigned int, so my assert was to make sure
that truncating 64 bits to size_t iov.iov_len still works on 32-bit
platforms).

In theory, I don't think we ever attempt an unaligned operation near
2^31 that would round up to INT_MAX overflow (if we can, that's a
pre-existing bug that should be fixed separately).

Should I tighten the assertion to assert(cluster_bytes <=
BDRV_REQUEST_MAX_BYTES), then see if I can come up with a case where we
can violate that?

> Everything else looks obviously correct to me.
>
John Snow Sept. 26, 2017, 7:29 p.m. | #3
On 09/26/2017 03:18 PM, Eric Blake wrote:
> On 09/26/2017 01:51 PM, John Snow wrote:
>>
>>
>> On 09/13/2017 12:03 PM, Eric Blake wrote:
>>> In the process of converting sector-based interfaces to bytes,
>>> I'm finding it easier to represent a byte count as a 64-bit
>>> integer at the block layer (even if we are internally capped
>>> by SIZE_MAX or even INT_MAX for individual transactions, it's
>>> still nicer to not have to worry about truncation/overflow
>>> issues on as many variables).  Update the signature of
>>> bdrv_round_to_clusters() to uniformly use int64_t, matching
>>> the signature already chosen for bdrv_is_allocated and the
>>> fact that off_t is also a signed type, then adjust clients
>>> according to the required fallout.
>>>
>>> Signed-off-by: Eric Blake <eblake@redhat.com>
>>> Reviewed-by: Fam Zheng <famz@redhat.com>
>>>
> 
>>> @@ -946,7 +946,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
>>>      struct iovec iov;
>>>      QEMUIOVector bounce_qiov;
>>>      int64_t cluster_offset;
>>> -    unsigned int cluster_bytes;
>>> +    int64_t cluster_bytes;
>>>      size_t skip_bytes;
>>>      int ret;
>>>
>>> @@ -967,6 +967,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
>>>      trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
>>>                                     cluster_offset, cluster_bytes);
>>>
>>> +    assert(cluster_bytes < SIZE_MAX);
>>
>> later in this function, is there any real or imagined risk of
>> cluster_bytes exceeding INT_MAX when it's passed to
>> bdrv_co_do_pwrite_zeroes?
>>
>>>      iov.iov_len = cluster_bytes;
> 
> cluster_bytes is the input 'unsigned int bytes' rounded out to cluster

Ah, yes, we're probably not going to exceed that, you're right.

> boundaries, but where we know 'bytes <= BDRV_REQUEST_MAX_BYTES' (which
> is 2^31 - 511).  Still, I guess you are right that rounding to a cluster
> size could produce a larger value of exactly 2^31 (bigger than INT_MAX,
> but still fits in 32-bit unsigned int, so my assert was to make sure
> that truncating 64 bits to size_t iov.iov_len still works on 32-bit
> platforms).
> 
> In theory, I don't think we ever attempt an unaligned operation near
> 2^31 that would round up to INT_MAX overflow (if we can, that's a
> pre-existing bug that should be fixed separately).
> 
> Should I tighten the assertion to assert(cluster_bytes <=
> BDRV_REQUEST_MAX_BYTES), then see if I can come up with a case where we
> can violate that?
> 

*Only* if you think it's worth your time. You'd know better than me at
this point if this is remotely possible or not. Just a simple width
check that caught my eye.

(Gotta prove to everyone I'm reading these, right? :p)

>> Everything else looks obviously correct to me.
>>
>

Patch

diff --git a/include/block/block.h b/include/block/block.h
index 2ad18775af..bb3b95d491 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -475,9 +475,9 @@  int bdrv_get_flags(BlockDriverState *bs);
 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs);
 void bdrv_round_to_clusters(BlockDriverState *bs,
-                            int64_t offset, unsigned int bytes,
+                            int64_t offset, int64_t bytes,
                             int64_t *cluster_offset,
-                            unsigned int *cluster_bytes);
+                            int64_t *cluster_bytes);

 const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
 void bdrv_get_backing_filename(BlockDriverState *bs,
diff --git a/block/io.c b/block/io.c
index 6509c804d4..b362b46e3d 100644
--- a/block/io.c
+++ b/block/io.c
@@ -446,9 +446,9 @@  static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
  * Round a region to cluster boundaries
  */
 void bdrv_round_to_clusters(BlockDriverState *bs,
-                            int64_t offset, unsigned int bytes,
+                            int64_t offset, int64_t bytes,
                             int64_t *cluster_offset,
-                            unsigned int *cluster_bytes)
+                            int64_t *cluster_bytes)
 {
     BlockDriverInfo bdi;

@@ -946,7 +946,7 @@  static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
     struct iovec iov;
     QEMUIOVector bounce_qiov;
     int64_t cluster_offset;
-    unsigned int cluster_bytes;
+    int64_t cluster_bytes;
     size_t skip_bytes;
     int ret;

@@ -967,6 +967,7 @@  static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
     trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
                                    cluster_offset, cluster_bytes);

+    assert(cluster_bytes < SIZE_MAX);
     iov.iov_len = cluster_bytes;
     iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
     if (bounce_buffer == NULL) {
diff --git a/block/mirror.c b/block/mirror.c
index 032cfe91fa..67f45cec4e 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -190,10 +190,9 @@  static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
     bool need_cow;
     int ret = 0;
     int64_t align_offset = *offset;
-    unsigned int align_bytes = *bytes;
+    int64_t align_bytes = *bytes;
     int max_bytes = s->granularity * s->max_iov;

-    assert(*bytes < INT_MAX);
     need_cow = !test_bit(*offset / s->granularity, s->cow_bitmap);
     need_cow |= !test_bit((*offset + *bytes - 1) / s->granularity,
                           s->cow_bitmap);
@@ -388,7 +387,7 @@  static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
     while (nb_chunks > 0 && offset < s->bdev_length) {
         int64_t ret;
         int io_sectors;
-        unsigned int io_bytes;
+        int64_t io_bytes;
         int64_t io_bytes_acct;
         enum MirrorMethod {
             MIRROR_METHOD_COPY,
@@ -413,7 +412,7 @@  static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
             io_bytes = s->granularity;
         } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
             int64_t target_offset;
-            unsigned int target_bytes;
+            int64_t target_bytes;
             bdrv_round_to_clusters(blk_bs(s->target), offset, io_bytes,
                                    &target_offset, &target_bytes);
             if (target_offset == offset &&
diff --git a/block/trace-events b/block/trace-events
index 25dd5a3026..4c6586f156 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -12,7 +12,7 @@  blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flag
 bdrv_co_preadv(void *bs, int64_t offset, int64_t nbytes, unsigned int flags) "bs %p offset %"PRId64" nbytes %"PRId64" flags 0x%x"
 bdrv_co_pwritev(void *bs, int64_t offset, int64_t nbytes, unsigned int flags) "bs %p offset %"PRId64" nbytes %"PRId64" flags 0x%x"
 bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p offset %"PRId64" count %d flags 0x%x"
-bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t cluster_offset, unsigned int cluster_bytes) "bs %p offset %"PRId64" bytes %u cluster_offset %"PRId64" cluster_bytes %u"
+bdrv_co_do_copy_on_readv(void *bs, int64_t offset, int64_t bytes, int64_t cluster_offset, unsigned int cluster_bytes) "bs %p offset %"PRId64" bytes %"PRId64" cluster_offset %"PRId64" cluster_bytes %u"

 # block/stream.c
 stream_one_iteration(void *s, int64_t offset, uint64_t bytes, int is_allocated) "s %p offset %" PRId64 " bytes %" PRIu64 " is_allocated %d"