diff mbox series

[v10,5/9] block: treat BDRV_REQ_ALLOCATE as serialising

Message ID 20181203101429.88735-6-anton.nefedov@virtuozzo.com
State New
Headers show
Series qcow2: cluster space preallocation | expand

Commit Message

Anton Nefedov Dec. 3, 2018, 10:14 a.m. UTC
The idea is that ALLOCATE requests may overlap with other requests.
Reuse the existing block layer infrastructure for serialising requests.
Use the following approach:
  - mark ALLOCATE also SERIALISING, so subsequent requests to the area wait
  - ALLOCATE request itself must never wait if another request is in flight
    already. Return EAGAIN, let the caller reconsider.

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
---
 block/io.c | 31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

Comments

Vladimir Sementsov-Ogievskiy Dec. 5, 2018, 1:14 p.m. UTC | #1
03.12.2018 13:14, Anton Nefedov wrote:
> The idea is that ALLOCATE requests may overlap with other requests.

please, describe why

> Reuse the existing block layer infrastructure for serialising requests.
> Use the following approach:
>    - mark ALLOCATE also SERIALISING, so subsequent requests to the area wait
>    - ALLOCATE request itself must never wait if another request is in flight
>      already. Return EAGAIN, let the caller reconsider.
> 
> Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
> ---
>   block/io.c | 31 ++++++++++++++++++++++++-------
>   1 file changed, 24 insertions(+), 7 deletions(-)
> 
> diff --git a/block/io.c b/block/io.c
> index d9d7644858..6ff946f63d 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -720,12 +720,13 @@ void bdrv_dec_in_flight(BlockDriverState *bs)
>       bdrv_wakeup(bs);
>   }
>   
> -static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
> +static bool coroutine_fn find_or_wait_serialising_requests(
> +    BdrvTrackedRequest *self, bool wait)
>   {
>       BlockDriverState *bs = self->bs;
>       BdrvTrackedRequest *req;
>       bool retry;
> -    bool waited = false;
> +    bool found = false;
>   
>       if (!atomic_read(&bs->serialising_in_flight)) {
>           return false;
> @@ -751,11 +752,14 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
>                    * will wait for us as soon as it wakes up, then just go on
>                    * (instead of producing a deadlock in the former case). */
>                   if (!req->waiting_for) {
> +                    found = true;
> +                    if (!wait) {
> +                        break;
> +                    }
>                       self->waiting_for = req;
>                       qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
>                       self->waiting_for = NULL;
>                       retry = true;
> -                    waited = true;
>                       break;
>                   }
>               }
> @@ -763,7 +767,12 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
>           qemu_co_mutex_unlock(&bs->reqs_lock);
>       } while (retry);
>   
> -    return waited;
> +    return found;
> +}
> +
> +static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
> +{
> +    return find_or_wait_serialising_requests(self, true);
>   }
>   
>   static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
> @@ -1585,7 +1594,7 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
>                             BdrvTrackedRequest *req, int flags)
>   {
>       BlockDriverState *bs = child->bs;
> -    bool waited;
> +    bool found;
>       int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
>   
>       if (bs->read_only) {
> @@ -1602,9 +1611,13 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
>           mark_request_serialising(req, bdrv_get_cluster_size(bs));
>       }
>   
> -    waited = wait_serialising_requests(req);
> +    found = find_or_wait_serialising_requests(req,
> +                                              !(flags & BDRV_REQ_ALLOCATE));
> +    if (found && (flags & BDRV_REQ_ALLOCATE)) {
> +        return -EAGAIN;
> +    }
>   
> -    assert(!waited || !req->serialising ||
> +    assert(!found || !req->serialising ||
>              is_request_serialising_and_aligned(req));
>       assert(req->overlap_offset <= offset);
>       assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
> @@ -1866,6 +1879,10 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
>       assert(!((flags & BDRV_REQ_ALLOCATE) && (flags & BDRV_REQ_MAY_UNMAP)));
>       assert(!((flags & BDRV_REQ_ALLOCATE) && !(flags & BDRV_REQ_ZERO_WRITE)));
>   
> +    if (flags & BDRV_REQ_ALLOCATE) {
> +        flags |= BDRV_REQ_SERIALISING;
> +    }
> +
>       trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
>   
>       if (!bs->drv) {
> 

patch looks correct technically, I just don't know the reasoning..

the only thing, is that it would be good to add a comment in BDRV flags definition section, that _ALLOCATE implies _SERIALIZE.
Anton Nefedov Dec. 5, 2018, 2:01 p.m. UTC | #2
On 5/12/2018 4:14 PM, Vladimir Sementsov-Ogievskiy wrote:
> 03.12.2018 13:14, Anton Nefedov wrote:
>> The idea is that ALLOCATE requests may overlap with other requests.
> 
> please, describe why
> 

It is not used in this series from some point, but the idea is that the
caller might use ALLOCATE requests on a larger extent.

Described in the series header:

   2. moreover, efficient write_zeroes() operation can be used to
preallocate
      space megabytes (*configurable number) ahead which gives noticeable
      improvement on some storage types (e.g. distributed storage)
      where the space allocation operation might be expensive)
      (Not included in this patchset since v6).

So, it's possible to drop from this series and add later but I'd like
to receive general remarks on whether this is an acceptable way.

>> Reuse the existing block layer infrastructure for serialising requests.
>> Use the following approach:
>>     - mark ALLOCATE also SERIALISING, so subsequent requests to the area wait
>>     - ALLOCATE request itself must never wait if another request is in flight
>>       already. Return EAGAIN, let the caller reconsider.
>>
>> Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
>> ---
>>    block/io.c | 31 ++++++++++++++++++++++++-------
>>    1 file changed, 24 insertions(+), 7 deletions(-)
>>
>> diff --git a/block/io.c b/block/io.c
>> index d9d7644858..6ff946f63d 100644
>> --- a/block/io.c
>> +++ b/block/io.c
>> @@ -720,12 +720,13 @@ void bdrv_dec_in_flight(BlockDriverState *bs)
>>        bdrv_wakeup(bs);
>>    }
>>    
>> -static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
>> +static bool coroutine_fn find_or_wait_serialising_requests(
>> +    BdrvTrackedRequest *self, bool wait)
>>    {
>>        BlockDriverState *bs = self->bs;
>>        BdrvTrackedRequest *req;
>>        bool retry;
>> -    bool waited = false;
>> +    bool found = false;
>>    
>>        if (!atomic_read(&bs->serialising_in_flight)) {
>>            return false;
>> @@ -751,11 +752,14 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
>>                     * will wait for us as soon as it wakes up, then just go on
>>                     * (instead of producing a deadlock in the former case). */
>>                    if (!req->waiting_for) {
>> +                    found = true;
>> +                    if (!wait) {
>> +                        break;
>> +                    }
>>                        self->waiting_for = req;
>>                        qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
>>                        self->waiting_for = NULL;
>>                        retry = true;
>> -                    waited = true;
>>                        break;
>>                    }
>>                }
>> @@ -763,7 +767,12 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
>>            qemu_co_mutex_unlock(&bs->reqs_lock);
>>        } while (retry);
>>    
>> -    return waited;
>> +    return found;
>> +}
>> +
>> +static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
>> +{
>> +    return find_or_wait_serialising_requests(self, true);
>>    }
>>    
>>    static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
>> @@ -1585,7 +1594,7 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
>>                              BdrvTrackedRequest *req, int flags)
>>    {
>>        BlockDriverState *bs = child->bs;
>> -    bool waited;
>> +    bool found;
>>        int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
>>    
>>        if (bs->read_only) {
>> @@ -1602,9 +1611,13 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
>>            mark_request_serialising(req, bdrv_get_cluster_size(bs));
>>        }
>>    
>> -    waited = wait_serialising_requests(req);
>> +    found = find_or_wait_serialising_requests(req,
>> +                                              !(flags & BDRV_REQ_ALLOCATE));
>> +    if (found && (flags & BDRV_REQ_ALLOCATE)) {
>> +        return -EAGAIN;
>> +    }
>>    
>> -    assert(!waited || !req->serialising ||
>> +    assert(!found || !req->serialising ||
>>               is_request_serialising_and_aligned(req));
>>        assert(req->overlap_offset <= offset);
>>        assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
>> @@ -1866,6 +1879,10 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
>>        assert(!((flags & BDRV_REQ_ALLOCATE) && (flags & BDRV_REQ_MAY_UNMAP)));
>>        assert(!((flags & BDRV_REQ_ALLOCATE) && !(flags & BDRV_REQ_ZERO_WRITE)));
>>    
>> +    if (flags & BDRV_REQ_ALLOCATE) {
>> +        flags |= BDRV_REQ_SERIALISING;
>> +    }
>> +
>>        trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
>>    
>>        if (!bs->drv) {
>>
> 
> patch looks correct technically, I just don't know the reasoning..
> 
> the only thing, is that it would be good to add a comment in BDRV flags definition section, that _ALLOCATE implies _SERIALIZE.
> 
> 

I see your point, but it does not imply SERIALIZE in sense that the
caller must set both (as with ALLOCATE and WRITE_ZEROES) - it's set
implicitly.

maybe:

diff --git a/include/block/block.h b/include/block/block.h
index f571082415..a0bf3fed93 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -87,6 +87,9 @@ typedef enum {
       * efficiently allocate the space so it reads as zeroes, or return 
an error.
       * If this flag is set then BDRV_REQ_ZERO_WRITE must also be set.
       * This flag cannot be set together with BDRV_REQ_MAY_UNMAP.
+     * This flag implicitly behaves as BDRV_REQ_SERIALISING i.e. it is
+     * protected from conflicts with overlapping requests. If such 
conflict is
+     * detected, -EAGAIN is returned.
       */
      BDRV_REQ_ALLOCATE           = 0x100,
Vladimir Sementsov-Ogievskiy Dec. 12, 2018, 12:48 p.m. UTC | #3
05.12.2018 17:01, Anton Nefedov wrote:
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -87,6 +87,9 @@ typedef enum {
>         * efficiently allocate the space so it reads as zeroes, or return
> an error.
>         * If this flag is set then BDRV_REQ_ZERO_WRITE must also be set.
>         * This flag cannot be set together with BDRV_REQ_MAY_UNMAP.
> +     * This flag implicitly behaves as BDRV_REQ_SERIALISING i.e. it is
> +     * protected from conflicts with overlapping requests. If such
> conflict is
> +     * detected, -EAGAIN is returned.

"behaves as" sounds like "do the same" for me, so better is "implicitly sets" or something like this.

>         */
>        BDRV_REQ_ALLOCATE           = 0x100,
> 
>
Anton Nefedov Dec. 13, 2018, 11:57 a.m. UTC | #4
On 12/12/2018 3:48 PM, Vladimir Sementsov-Ogievskiy wrote:
> 05.12.2018 17:01, Anton Nefedov wrote:
>> --- a/include/block/block.h
>> +++ b/include/block/block.h
>> @@ -87,6 +87,9 @@ typedef enum {
>>          * efficiently allocate the space so it reads as zeroes, or return
>> an error.
>>          * If this flag is set then BDRV_REQ_ZERO_WRITE must also be set.
>>          * This flag cannot be set together with BDRV_REQ_MAY_UNMAP.
>> +     * This flag implicitly behaves as BDRV_REQ_SERIALISING i.e. it is
>> +     * protected from conflicts with overlapping requests. If such
>> conflict is
>> +     * detected, -EAGAIN is returned.
> 
> "behaves as" sounds like "do the same" for me, so better is "implicitly sets" or something like this.
> 

"implicitly sets" is good enough for me
diff mbox series

Patch

diff --git a/block/io.c b/block/io.c
index d9d7644858..6ff946f63d 100644
--- a/block/io.c
+++ b/block/io.c
@@ -720,12 +720,13 @@  void bdrv_dec_in_flight(BlockDriverState *bs)
     bdrv_wakeup(bs);
 }
 
-static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
+static bool coroutine_fn find_or_wait_serialising_requests(
+    BdrvTrackedRequest *self, bool wait)
 {
     BlockDriverState *bs = self->bs;
     BdrvTrackedRequest *req;
     bool retry;
-    bool waited = false;
+    bool found = false;
 
     if (!atomic_read(&bs->serialising_in_flight)) {
         return false;
@@ -751,11 +752,14 @@  static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
                  * will wait for us as soon as it wakes up, then just go on
                  * (instead of producing a deadlock in the former case). */
                 if (!req->waiting_for) {
+                    found = true;
+                    if (!wait) {
+                        break;
+                    }
                     self->waiting_for = req;
                     qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
                     self->waiting_for = NULL;
                     retry = true;
-                    waited = true;
                     break;
                 }
             }
@@ -763,7 +767,12 @@  static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
         qemu_co_mutex_unlock(&bs->reqs_lock);
     } while (retry);
 
-    return waited;
+    return found;
+}
+
+static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
+{
+    return find_or_wait_serialising_requests(self, true);
 }
 
 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
@@ -1585,7 +1594,7 @@  bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
                           BdrvTrackedRequest *req, int flags)
 {
     BlockDriverState *bs = child->bs;
-    bool waited;
+    bool found;
     int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
 
     if (bs->read_only) {
@@ -1602,9 +1611,13 @@  bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
         mark_request_serialising(req, bdrv_get_cluster_size(bs));
     }
 
-    waited = wait_serialising_requests(req);
+    found = find_or_wait_serialising_requests(req,
+                                              !(flags & BDRV_REQ_ALLOCATE));
+    if (found && (flags & BDRV_REQ_ALLOCATE)) {
+        return -EAGAIN;
+    }
 
-    assert(!waited || !req->serialising ||
+    assert(!found || !req->serialising ||
            is_request_serialising_and_aligned(req));
     assert(req->overlap_offset <= offset);
     assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
@@ -1866,6 +1879,10 @@  int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
     assert(!((flags & BDRV_REQ_ALLOCATE) && (flags & BDRV_REQ_MAY_UNMAP)));
     assert(!((flags & BDRV_REQ_ALLOCATE) && !(flags & BDRV_REQ_ZERO_WRITE)));
 
+    if (flags & BDRV_REQ_ALLOCATE) {
+        flags |= BDRV_REQ_SERIALISING;
+    }
+
     trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
 
     if (!bs->drv) {