diff mbox

[12/17] block: protect tracked_requests and flush_queue with reqs_lock

Message ID 20170420120058.28404-13-pbonzini@redhat.com
State New
Headers show

Commit Message

Paolo Bonzini April 20, 2017, noon UTC
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block.c                   |  1 +
 block/io.c                | 20 +++++++++++++++++---
 include/block/block_int.h | 12 +++++++-----
 3 files changed, 25 insertions(+), 8 deletions(-)

Comments

Fam Zheng May 4, 2017, 7:30 a.m. UTC | #1
On Thu, 04/20 14:00, Paolo Bonzini wrote:
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  block.c                   |  1 +
>  block/io.c                | 20 +++++++++++++++++---
>  include/block/block_int.h | 12 +++++++-----
>  3 files changed, 25 insertions(+), 8 deletions(-)
> 
> diff --git a/block.c b/block.c
> index f1aec36..3b2ed29 100644
> --- a/block.c
> +++ b/block.c
> @@ -234,6 +234,7 @@ BlockDriverState *bdrv_new(void)
>          QLIST_INIT(&bs->op_blockers[i]);
>      }
>      notifier_with_return_list_init(&bs->before_write_notifiers);
> +    qemu_co_mutex_init(&bs->reqs_lock);
>      bs->refcnt = 1;
>      bs->aio_context = qemu_get_aio_context();
>  
> diff --git a/block/io.c b/block/io.c
> index d17564b..7af9d47 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -378,8 +378,10 @@ static void tracked_request_end(BdrvTrackedRequest *req)
>          atomic_dec(&req->bs->serialising_in_flight);
>      }
>  
> +    qemu_co_mutex_lock(&req->bs->reqs_lock);
>      QLIST_REMOVE(req, list);
>      qemu_co_queue_restart_all(&req->wait_queue);
> +    qemu_co_mutex_unlock(&req->bs->reqs_lock);
>  }
>  
>  /**
> @@ -404,7 +406,9 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
>  
>      qemu_co_queue_init(&req->wait_queue);
>  
> +    qemu_co_mutex_lock(&bs->reqs_lock);
>      QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
> +    qemu_co_mutex_unlock(&bs->reqs_lock);
>  }
>  
>  static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
> @@ -526,6 +530,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
>  
>      do {
>          retry = false;
> +        qemu_co_mutex_lock(&bs->reqs_lock);
>          QLIST_FOREACH(req, &bs->tracked_requests, list) {
>              if (req == self || (!req->serialising && !self->serialising)) {
>                  continue;
> @@ -544,7 +549,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
>                   * (instead of producing a deadlock in the former case). */
>                  if (!req->waiting_for) {
>                      self->waiting_for = req;
> -                    qemu_co_queue_wait(&req->wait_queue, NULL);
> +                    qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
>                      self->waiting_for = NULL;
>                      retry = true;
>                      waited = true;
> @@ -552,6 +557,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
>                  }
>              }
>          }
> +        qemu_co_mutex_unlock(&bs->reqs_lock);
>      } while (retry);
>  
>      return waited;
> @@ -2302,11 +2308,13 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
>      current_gen = atomic_read(&bs->write_gen);
>  
>      /* Wait until any previous flushes are completed */
> +    qemu_co_mutex_lock(&bs->reqs_lock);
>      while (bs->active_flush_req) {
> -        qemu_co_queue_wait(&bs->flush_queue, NULL);
> +        qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
>      }
>  
>      bs->active_flush_req = true;
> +    qemu_co_mutex_unlock(&bs->reqs_lock);
>  
>      /* Write back all layers by calling one driver function */
>      if (bs->drv->bdrv_co_flush) {
> @@ -2328,10 +2336,14 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
>          goto flush_parent;
>      }
>  
> -    /* Check if we really need to flush anything */
> +    /* Check if we really need to flush anything
> +     * TODO: use int and atomic access */
> +    qemu_co_mutex_lock(&bs->reqs_lock);
>      if (bs->flushed_gen == current_gen) {

Should the atomic reading of current_gen be moved down here, to avoid TOCTOU?

> +        qemu_co_mutex_unlock(&bs->reqs_lock);
>          goto flush_parent;
>      }
> +    qemu_co_mutex_unlock(&bs->reqs_lock);
>  
>      BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
>      if (bs->drv->bdrv_co_flush_to_disk) {
> @@ -2375,12 +2387,14 @@ flush_parent:
>      ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
>  out:
>      /* Notify any pending flushes that we have completed */
> +    qemu_co_mutex_lock(&bs->reqs_lock);
>      if (ret == 0) {
>          bs->flushed_gen = current_gen;
>      }
>      bs->active_flush_req = false;
>      /* Return value is ignored - it's ok if wait queue is empty */
>      qemu_co_queue_next(&bs->flush_queue);
> +    qemu_co_mutex_unlock(&bs->reqs_lock);
>  
>  early_exit:
>      bdrv_dec_in_flight(bs);
Paolo Bonzini May 4, 2017, 8:35 a.m. UTC | #2
On 04/05/2017 09:30, Fam Zheng wrote:
>> @@ -2302,11 +2308,13 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
>>      current_gen = atomic_read(&bs->write_gen);
>>  
>>      /* Wait until any previous flushes are completed */
>> +    qemu_co_mutex_lock(&bs->reqs_lock);
>>      while (bs->active_flush_req) {
>> -        qemu_co_queue_wait(&bs->flush_queue, NULL);
>> +        qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
>>      }
>>  
>>      bs->active_flush_req = true;
>> +    qemu_co_mutex_unlock(&bs->reqs_lock);
>>  
>>      /* Write back all layers by calling one driver function */
>>      if (bs->drv->bdrv_co_flush) {
>> @@ -2328,10 +2336,14 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
>>          goto flush_parent;
>>      }
>>  
>> -    /* Check if we really need to flush anything */
>> +    /* Check if we really need to flush anything
>> +     * TODO: use int and atomic access */
>> +    qemu_co_mutex_lock(&bs->reqs_lock);
>>      if (bs->flushed_gen == current_gen) {
> 
> Should the atomic reading of current_gen be moved down here, to avoid TOCTOU?

No, but another change is needed; current_gen needs to be read under the
lock, to ensure that flushes are processed in increasing generation order.

In addition, this access to flushed_gen does not need the lock;
bs->active_flush_req itself acts as a "lock" for bs->flushed_gen, only
the coroutine that set it to true will read it or write it.  I'll adjust
this patch accordingly.

Paolo
diff mbox

Patch

diff --git a/block.c b/block.c
index f1aec36..3b2ed29 100644
--- a/block.c
+++ b/block.c
@@ -234,6 +234,7 @@  BlockDriverState *bdrv_new(void)
         QLIST_INIT(&bs->op_blockers[i]);
     }
     notifier_with_return_list_init(&bs->before_write_notifiers);
+    qemu_co_mutex_init(&bs->reqs_lock);
     bs->refcnt = 1;
     bs->aio_context = qemu_get_aio_context();
 
diff --git a/block/io.c b/block/io.c
index d17564b..7af9d47 100644
--- a/block/io.c
+++ b/block/io.c
@@ -378,8 +378,10 @@  static void tracked_request_end(BdrvTrackedRequest *req)
         atomic_dec(&req->bs->serialising_in_flight);
     }
 
+    qemu_co_mutex_lock(&req->bs->reqs_lock);
     QLIST_REMOVE(req, list);
     qemu_co_queue_restart_all(&req->wait_queue);
+    qemu_co_mutex_unlock(&req->bs->reqs_lock);
 }
 
 /**
@@ -404,7 +406,9 @@  static void tracked_request_begin(BdrvTrackedRequest *req,
 
     qemu_co_queue_init(&req->wait_queue);
 
+    qemu_co_mutex_lock(&bs->reqs_lock);
     QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
+    qemu_co_mutex_unlock(&bs->reqs_lock);
 }
 
 static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
@@ -526,6 +530,7 @@  static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
 
     do {
         retry = false;
+        qemu_co_mutex_lock(&bs->reqs_lock);
         QLIST_FOREACH(req, &bs->tracked_requests, list) {
             if (req == self || (!req->serialising && !self->serialising)) {
                 continue;
@@ -544,7 +549,7 @@  static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
                  * (instead of producing a deadlock in the former case). */
                 if (!req->waiting_for) {
                     self->waiting_for = req;
-                    qemu_co_queue_wait(&req->wait_queue, NULL);
+                    qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
                     self->waiting_for = NULL;
                     retry = true;
                     waited = true;
@@ -552,6 +557,7 @@  static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
                 }
             }
         }
+        qemu_co_mutex_unlock(&bs->reqs_lock);
     } while (retry);
 
     return waited;
@@ -2302,11 +2308,13 @@  int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
     current_gen = atomic_read(&bs->write_gen);
 
     /* Wait until any previous flushes are completed */
+    qemu_co_mutex_lock(&bs->reqs_lock);
     while (bs->active_flush_req) {
-        qemu_co_queue_wait(&bs->flush_queue, NULL);
+        qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
     }
 
     bs->active_flush_req = true;
+    qemu_co_mutex_unlock(&bs->reqs_lock);
 
     /* Write back all layers by calling one driver function */
     if (bs->drv->bdrv_co_flush) {
@@ -2328,10 +2336,14 @@  int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
         goto flush_parent;
     }
 
-    /* Check if we really need to flush anything */
+    /* Check if we really need to flush anything
+     * TODO: use int and atomic access */
+    qemu_co_mutex_lock(&bs->reqs_lock);
     if (bs->flushed_gen == current_gen) {
+        qemu_co_mutex_unlock(&bs->reqs_lock);
         goto flush_parent;
     }
+    qemu_co_mutex_unlock(&bs->reqs_lock);
 
     BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
     if (bs->drv->bdrv_co_flush_to_disk) {
@@ -2375,12 +2387,14 @@  flush_parent:
     ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
 out:
     /* Notify any pending flushes that we have completed */
+    qemu_co_mutex_lock(&bs->reqs_lock);
     if (ret == 0) {
         bs->flushed_gen = current_gen;
     }
     bs->active_flush_req = false;
     /* Return value is ignored - it's ok if wait queue is empty */
     qemu_co_queue_next(&bs->flush_queue);
+    qemu_co_mutex_unlock(&bs->reqs_lock);
 
 early_exit:
     bdrv_dec_in_flight(bs);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 552680c..42b49f5 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -595,11 +595,6 @@  struct BlockDriverState {
     uint64_t write_threshold_offset;
     NotifierWithReturn write_threshold_notifier;
 
-    QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
-    CoQueue flush_queue;                  /* Serializing flush queue */
-    bool active_flush_req;                /* Flush request in flight? */
-    unsigned int flushed_gen;             /* Flushed write generation */
-
     QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
 
     /* Offset after the highest byte written to */
@@ -630,6 +625,13 @@  struct BlockDriverState {
     /* Accessed with atomic ops.  */
     int quiesce_counter;
     unsigned int write_gen;               /* Current data generation */
+
+    /* Protected by reqs_lock.  */
+    QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
+    CoQueue flush_queue;                  /* Serializing flush queue */
+    bool active_flush_req;                /* Flush request in flight? */
+    unsigned int flushed_gen;             /* Flushed write generation */
+    CoMutex reqs_lock;
 };
 
 struct BlockBackendRootState {