diff mbox

[13/16] block: only call aio_poll from iothread

Message ID 1455645388-32401-14-git-send-email-pbonzini@redhat.com
State New
Headers show

Commit Message

Paolo Bonzini Feb. 16, 2016, 5:56 p.m. UTC
aio_poll is not thread safe; for example bdrv_drain can hang if
the last in-flight I/O operation is completed in the I/O thread after
the main thread has checked bs->in_flight.

The bug remains latent as long as all of it is called within
aio_context_acquire/aio_context_release, but this will change soon.

To fix this, if bdrv_drain is called from outside the I/O thread handle
it internally in the BDS, without involving AioContext and aio_poll.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block.c                   |  2 ++
 block/io.c                | 21 ++++++++++++++++++---
 include/block/block_int.h |  5 ++++-
 3 files changed, 24 insertions(+), 4 deletions(-)

Comments

Fam Zheng March 9, 2016, 8:30 a.m. UTC | #1
On Tue, 02/16 18:56, Paolo Bonzini wrote:
> aio_poll is not thread safe; for example bdrv_drain can hang if
> the last in-flight I/O operation is completed in the I/O thread after
> the main thread has checked bs->in_flight.
> 
> The bug remains latent as long as all of it is called within
> aio_context_acquire/aio_context_release, but this will change soon.
> 
> To fix this, if bdrv_drain is called from outside the I/O thread handle
> it internally in the BDS, without involving AioContext and aio_poll.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  block.c                   |  2 ++
>  block/io.c                | 21 ++++++++++++++++++---
>  include/block/block_int.h |  5 ++++-
>  3 files changed, 24 insertions(+), 4 deletions(-)
> 
> diff --git a/block.c b/block.c
> index fb02d7f..601a73f 100644
> --- a/block.c
> +++ b/block.c
> @@ -267,6 +267,7 @@ BlockDriverState *bdrv_new(void)
>      qemu_co_queue_init(&bs->throttled_reqs[1]);
>      bs->refcnt = 1;
>      bs->aio_context = qemu_get_aio_context();
> +    qemu_event_init(&bs->in_flight_event, true);
>  
>      QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
>  
> @@ -2395,6 +2396,7 @@ static void bdrv_delete(BlockDriverState *bs)
>      bdrv_make_anon(bs);
>  
>      QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
> +    qemu_event_destroy(&bs->in_flight_event);
>  
>      g_free(bs);
>  }
> diff --git a/block/io.c b/block/io.c
> index 04b52c8..ea0546f 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -251,11 +251,24 @@ static void bdrv_drain_recurse(BlockDriverState *bs)
>  
>  static bool bdrv_drain_io_recurse(BlockDriverState *bs)
>  {
> -    BdrvChild *child;
> +    AioContext *ctx = bdrv_get_aio_context(bs);
>      bool waited = false;
> +    BdrvChild *child;
>  
>      while (atomic_read(&bs->in_flight) > 0) {
> -        aio_poll(bdrv_get_aio_context(bs), true);
> +        if (aio_context_in_iothread(ctx)) {
> +            /* This case should not occur at all, except for the
> +             * main thread.
> +             */

Maybe assert ctx == qemu_get_aio_context()?

> +            aio_poll(bdrv_get_aio_context(bs), true);
> +        } else {
> +            qemu_event_reset(&bs->in_flight_event);
> +            if (atomic_read(&bs->in_flight) > 0) {
> +                aio_context_release(bdrv_get_aio_context(bs));
> +                qemu_event_wait(&bs->in_flight_event);
> +                aio_context_acquire(bdrv_get_aio_context(bs));
> +            }
> +        }
>          waited = true;
>      }
>  
> @@ -465,7 +478,9 @@ void bdrv_inc_in_flight(BlockDriverState *bs)
>  
>  void bdrv_dec_in_flight(BlockDriverState *bs)
>  {
> -    atomic_dec(&bs->in_flight);
> +    if (atomic_fetch_dec(&bs->in_flight) == 1) {
> +        qemu_event_set(&bs->in_flight_event);
> +    }
>  }
>  
>  static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 89c38c0..9c96d5d 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -404,9 +404,12 @@ struct BlockDriverState {
>      /* Callback before write request is processed */
>      NotifierWithReturnList before_write_notifiers;
>  
> -    /* number of in-flight requests; overall and serialising */
> +    /* number of in-flight requests; overall and serialising.
> +     * in_flight_event is set when in_flight becomes 0.
> +     */
>      unsigned int in_flight;
>      unsigned int serialising_in_flight;
> +    QemuEvent in_flight_event;
>  
>      /* I/O throttling.
>       * throttle_state tells us if this BDS has I/O limits configured.
> -- 
> 2.5.0
> 
> 
>
Paolo Bonzini March 9, 2016, 8:55 a.m. UTC | #2
On 09/03/2016 09:30, Fam Zheng wrote:
> > -        aio_poll(bdrv_get_aio_context(bs), true);
> > +        if (aio_context_in_iothread(ctx)) {
> > +            /* This case should not occur at all, except for the
> > +             * main thread.
> > +             */
> 
> Maybe assert ctx == qemu_get_aio_context()?

Sure.

Paolo
Paolo Bonzini March 9, 2016, 9:10 a.m. UTC | #3
On 09/03/2016 09:30, Fam Zheng wrote:
> > -        aio_poll(bdrv_get_aio_context(bs), true);
> > +        if (aio_context_in_iothread(ctx)) {
> > +            /* This case should not occur at all, except for the
> > +             * main thread.
> > +             */
> 
> Maybe assert ctx == qemu_get_aio_context()?

Actually it happens for block/mirror.c's bdrv_drained_begin, but it's safe.

Paolo
Fam Zheng March 9, 2016, 9:27 a.m. UTC | #4
On Wed, 03/09 10:10, Paolo Bonzini wrote:
> 
> 
> On 09/03/2016 09:30, Fam Zheng wrote:
> > > -        aio_poll(bdrv_get_aio_context(bs), true);
> > > +        if (aio_context_in_iothread(ctx)) {
> > > +            /* This case should not occur at all, except for the
> > > +             * main thread.
> > > +             */
> > 
> > Maybe assert ctx == qemu_get_aio_context()?
> 
> Actually it happens for block/mirror.c's bdrv_drained_begin, but it's safe.

Oh yes, then we cannot assert, and the comment need adjustion. Thanks for
pointing out.

Fam
diff mbox

Patch

diff --git a/block.c b/block.c
index fb02d7f..601a73f 100644
--- a/block.c
+++ b/block.c
@@ -267,6 +267,7 @@  BlockDriverState *bdrv_new(void)
     qemu_co_queue_init(&bs->throttled_reqs[1]);
     bs->refcnt = 1;
     bs->aio_context = qemu_get_aio_context();
+    qemu_event_init(&bs->in_flight_event, true);
 
     QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
 
@@ -2395,6 +2396,7 @@  static void bdrv_delete(BlockDriverState *bs)
     bdrv_make_anon(bs);
 
     QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
+    qemu_event_destroy(&bs->in_flight_event);
 
     g_free(bs);
 }
diff --git a/block/io.c b/block/io.c
index 04b52c8..ea0546f 100644
--- a/block/io.c
+++ b/block/io.c
@@ -251,11 +251,24 @@  static void bdrv_drain_recurse(BlockDriverState *bs)
 
 static bool bdrv_drain_io_recurse(BlockDriverState *bs)
 {
-    BdrvChild *child;
+    AioContext *ctx = bdrv_get_aio_context(bs);
     bool waited = false;
+    BdrvChild *child;
 
     while (atomic_read(&bs->in_flight) > 0) {
-        aio_poll(bdrv_get_aio_context(bs), true);
+        if (aio_context_in_iothread(ctx)) {
+            /* This case should not occur at all, except for the
+             * main thread.
+             */
+            aio_poll(bdrv_get_aio_context(bs), true);
+        } else {
+            qemu_event_reset(&bs->in_flight_event);
+            if (atomic_read(&bs->in_flight) > 0) {
+                aio_context_release(bdrv_get_aio_context(bs));
+                qemu_event_wait(&bs->in_flight_event);
+                aio_context_acquire(bdrv_get_aio_context(bs));
+            }
+        }
         waited = true;
     }
 
@@ -465,7 +478,9 @@  void bdrv_inc_in_flight(BlockDriverState *bs)
 
 void bdrv_dec_in_flight(BlockDriverState *bs)
 {
-    atomic_dec(&bs->in_flight);
+    if (atomic_fetch_dec(&bs->in_flight) == 1) {
+        qemu_event_set(&bs->in_flight_event);
+    }
 }
 
 static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 89c38c0..9c96d5d 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -404,9 +404,12 @@  struct BlockDriverState {
     /* Callback before write request is processed */
     NotifierWithReturnList before_write_notifiers;
 
-    /* number of in-flight requests; overall and serialising */
+    /* number of in-flight requests; overall and serialising.
+     * in_flight_event is set when in_flight becomes 0.
+     */
     unsigned int in_flight;
     unsigned int serialising_in_flight;
+    QemuEvent in_flight_event;
 
     /* I/O throttling.
      * throttle_state tells us if this BDS has I/O limits configured.