diff mbox series

[v3,08/17] block/io: support int64_t bytes in bdrv_aligned_preadv()

Message ID 20200430111033.29980-9-vsementsov@virtuozzo.com
State New
Headers show
Series 64bit block-layer | expand

Commit Message

Vladimir Sementsov-Ogievskiy April 30, 2020, 11:10 a.m. UTC
We are generally moving to int64_t for both offset and bytes parameters
on all io paths.

Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.

We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).

So, prepare bdrv_aligned_preadv() now.

Make byte variable in bdrv_padding_rmw_read() int64_t, as it defined
only to be passed to bdrv_aligned_preadv().

Series: 64bit-block-status
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 block/io.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

Comments

Eric Blake May 22, 2020, 3:14 p.m. UTC | #1
On 4/30/20 6:10 AM, Vladimir Sementsov-Ogievskiy wrote:
> We are generally moving to int64_t for both offset and bytes parameters
> on all io paths.
> 
> Main motivation is realization of 64-bit write_zeroes operation for
> fast zeroing large disk chunks, up to the whole disk.
> 
> We chose signed type, to be consistent with off_t (which is signed) and
> with possibility for signed return type (where negative value means
> error).
> 
> So, prepare bdrv_aligned_preadv() now.
> 
> Make byte variable in bdrv_padding_rmw_read() int64_t, as it defined
> only to be passed to bdrv_aligned_preadv().
> 
> Series: 64bit-block-status
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
>   block/io.c | 9 +++++----
>   1 file changed, 5 insertions(+), 4 deletions(-)
> 
> diff --git a/block/io.c b/block/io.c
> index 6990d8cabe..d336e4e691 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -1250,16 +1250,17 @@ err:
>    * reads; any other features must be implemented by the caller.
>    */
>   static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
> -    BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
> +    BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
>       int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
>   {

Callers:

bdrv_padding_rmw_read() - passes int64_t (uint64_t before this patch), 
which in turn is bounded by request_alignment (still 32-bit) or 
BdrvRequestPadding.buf_len (size_t, but also constrained by creation to 
32-bit) - safe

bdrv_do_preadv_part() - passes unsigned int - safe


>       BlockDriverState *bs = child->bs;
>       int64_t total_bytes, max_bytes;
>       int ret = 0;
> -    uint64_t bytes_remaining = bytes;
> +    int64_t bytes_remaining = bytes;
>       int max_transfer;
>   
>       assert(is_power_of_2(align));
> +    assert(offset >= 0 && bytes >= 0);

Use within the function:

the new assertion added here does not check for whether offset+bytes is 
positive; I would suggest we strengthen it to instead be:
assert(offset >= 0 && (uint64_t) bytes <= INT64_MAX - offset);

         ret = bdrv_is_allocated(bs, offset, bytes, &pnum);
  - takes int64_t, safe

         if (!ret || pnum != bytes) {
             ret = bdrv_co_do_copy_on_readv(child, offset, bytes,
                                            qiov, qiov_offset, flags);
  - takes int64_t, safe

     if (bytes <= max_bytes && bytes <= max_transfer) {
         ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, 0);
  - takes int64_t, safe

>       assert((offset & (align - 1)) == 0);
>       assert((bytes & (align - 1)) == 0);
>       assert((bs->open_flags & BDRV_O_NO_IO) == 0);
> @@ -1315,7 +1316,7 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
>       }
>   
>       while (bytes_remaining) {
> -        int num;
> +        int64_t num;
>   
>           if (max_bytes) {
>               num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));

  - safe, bounded by max_transfer which is <= INT_MAX earlier in the 
function

             ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
                                      num, qiov, bytes - 
bytes_remaining, 0);
  - takes int64_t, and num is capped by max_transfer - safe

             ret = qemu_iovec_memset(qiov, bytes - bytes_remaining, 0,
                                     bytes_remaining);
  - takes size_t, risky for 32-bit platforms.  Works because we checked 
that all our callers are still bounded by 32-bits, but we should 
consider adding an assertion that bytes <= SIZE_MAX so that even when 
our callers are updated in later patches, we know that we are still 
fragmenting requests appropriately on 32-bit platforms

> @@ -1416,7 +1417,7 @@ static int bdrv_padding_rmw_read(BdrvChild *child,
>       assert(req->serialising && pad->buf);
>   
>       if (pad->head || pad->merge_reads) {
> -        uint64_t bytes = pad->merge_reads ? pad->buf_len : align;
> +        int64_t bytes = pad->merge_reads ? pad->buf_len : align;
>   
>           qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);
>   
> 

Preferably with the suggested assertions added,
Reviewed-by: Eric Blake <eblake@redhat.com>
Alberto Garcia June 18, 2020, 2:35 p.m. UTC | #2
On Fri 22 May 2020 05:14:36 PM CEST, Eric Blake wrote:
>>   static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
>> -    BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
>> +    BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
>>       int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
 [...]
>>       BlockDriverState *bs = child->bs;
>>       int64_t total_bytes, max_bytes;
>>       int ret = 0;
>> -    uint64_t bytes_remaining = bytes;
>> +    int64_t bytes_remaining = bytes;
>>       int max_transfer;
>>   
>>       assert(is_power_of_2(align));
>> +    assert(offset >= 0 && bytes >= 0);
>
> Use within the function:
>
> the new assertion added here does not check for whether offset+bytes is 
> positive; I would suggest we strengthen it to instead be:
> assert(offset >= 0 && (uint64_t) bytes <= INT64_MAX - offset);

But here you would be making 'bytes' unsigned without asserting that
it's not negative.

Berto
Eric Blake June 18, 2020, 2:47 p.m. UTC | #3
On 6/18/20 9:35 AM, Alberto Garcia wrote:
> On Fri 22 May 2020 05:14:36 PM CEST, Eric Blake wrote:
>>>    static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
>>> -    BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
>>> +    BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
>>>        int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
>   [...]
>>>        BlockDriverState *bs = child->bs;
>>>        int64_t total_bytes, max_bytes;
>>>        int ret = 0;
>>> -    uint64_t bytes_remaining = bytes;
>>> +    int64_t bytes_remaining = bytes;
>>>        int max_transfer;
>>>    
>>>        assert(is_power_of_2(align));
>>> +    assert(offset >= 0 && bytes >= 0);
>>
>> Use within the function:
>>
>> the new assertion added here does not check for whether offset+bytes is
>> positive; I would suggest we strengthen it to instead be:
>> assert(offset >= 0 && (uint64_t) bytes <= INT64_MAX - offset);
> 
> But here you would be making 'bytes' unsigned without asserting that
> it's not negative.

'offset >= 0' proves that offset is nonnegative.  'INT64_MAX - offset' 
is still a signed integer, and is also necessarily non-negative (the 
maximum positive integer minus any other positive integer cannot go 
negative).  We then coerce that into an unsigned comparison, which means 
if bytes was negative, the coercion will result in something larger than 
(unsigned)INT64_MAX, and the overall assert() will fail.  Thus, as 
written, the assertion works just fine at proving both bytes and offset 
were non-negative, and that bytes+offset did not overflow a signed integer.
diff mbox series

Patch

diff --git a/block/io.c b/block/io.c
index 6990d8cabe..d336e4e691 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1250,16 +1250,17 @@  err:
  * reads; any other features must be implemented by the caller.
  */
 static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
-    BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
+    BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
     int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
 {
     BlockDriverState *bs = child->bs;
     int64_t total_bytes, max_bytes;
     int ret = 0;
-    uint64_t bytes_remaining = bytes;
+    int64_t bytes_remaining = bytes;
     int max_transfer;
 
     assert(is_power_of_2(align));
+    assert(offset >= 0 && bytes >= 0);
     assert((offset & (align - 1)) == 0);
     assert((bytes & (align - 1)) == 0);
     assert((bs->open_flags & BDRV_O_NO_IO) == 0);
@@ -1315,7 +1316,7 @@  static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
     }
 
     while (bytes_remaining) {
-        int num;
+        int64_t num;
 
         if (max_bytes) {
             num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
@@ -1416,7 +1417,7 @@  static int bdrv_padding_rmw_read(BdrvChild *child,
     assert(req->serialising && pad->buf);
 
     if (pad->head || pad->merge_reads) {
-        uint64_t bytes = pad->merge_reads ? pad->buf_len : align;
+        int64_t bytes = pad->merge_reads ? pad->buf_len : align;
 
         qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);