diff mbox

[2/2] block: Let bdrv_drain_all() to call aio_poll() for each AioContext

Message ID 1433339175-12300-3-git-send-email-yarygin@linux.vnet.ibm.com
State New
Headers show

Commit Message

Alexander Yarygin June 3, 2015, 1:46 p.m. UTC
After the commit 9b536adc ("block: acquire AioContext in
bdrv_drain_all()") the aio_poll() function got called for every
BlockDriverState, in assumption that every device may have its own
AioContext. If we have thousands of disks attached, there are a lot of
BlockDriverStates but only a few AioContexts, leading to tons of
unnecessary aio_poll() calls.

This patch changes the bdrv_drain_all() function allowing it find shared
AioContexts and to call aio_poll() only for unique ones.

Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Alexander Yarygin <yarygin@linux.vnet.ibm.com>
---
 block/io.c | 42 ++++++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 16 deletions(-)

Comments

Christian Borntraeger June 3, 2015, 2:11 p.m. UTC | #1
Am 03.06.2015 um 15:46 schrieb Alexander Yarygin:
> After the commit 9b536adc ("block: acquire AioContext in
> bdrv_drain_all()") the aio_poll() function got called for every
> BlockDriverState, in assumption that every device may have its own
> AioContext. If we have thousands of disks attached, there are a lot of
> BlockDriverStates but only a few AioContexts, leading to tons of
> unnecessary aio_poll() calls.
> 
> This patch changes the bdrv_drain_all() function allowing it find shared
> AioContexts and to call aio_poll() only for unique ones.
> 
> Cc: Christian Borntraeger <borntraeger@de.ibm.com>
> Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
> Cc: Kevin Wolf <kwolf@redhat.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Stefan Hajnoczi <stefanha@redhat.com>
> Signed-off-by: Alexander Yarygin <yarygin@linux.vnet.ibm.com>

This seems to help in the case where we have one iothread and hundreds of
disks - maybe others. I dont understand yet why this patch works fine
and your previous patch caused hangs.
Its the same cluelessness on my side for the patch from Paolo that Stefan will revert.

Christian



> ---
>  block/io.c | 42 ++++++++++++++++++++++++++----------------
>  1 file changed, 26 insertions(+), 16 deletions(-)
> 
> diff --git a/block/io.c b/block/io.c
> index e394d92..7502186 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -271,17 +271,6 @@ static bool bdrv_requests_pending(BlockDriverState *bs)
>      return false;
>  }
> 
> -static bool bdrv_drain_one(BlockDriverState *bs)
> -{
> -    bool bs_busy;
> -
> -    bdrv_flush_io_queue(bs);
> -    bdrv_start_throttled_reqs(bs);
> -    bs_busy = bdrv_requests_pending(bs);
> -    bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy);
> -    return bs_busy;
> -}
> -
>  /*
>   * Wait for pending requests to complete on a single BlockDriverState subtree
>   *
> @@ -294,8 +283,13 @@ static bool bdrv_drain_one(BlockDriverState *bs)
>   */
>  void bdrv_drain(BlockDriverState *bs)
>  {
> -    while (bdrv_drain_one(bs)) {
> +    bool busy = true;
> +
> +    while (busy) {
>          /* Keep iterating */
> +         bdrv_flush_io_queue(bs);
> +         busy = bdrv_requests_pending(bs);
> +         busy |= aio_poll(bdrv_get_aio_context(bs), busy);
>      }
>  }
> 
> @@ -316,6 +310,7 @@ void bdrv_drain_all(void)
>      /* Always run first iteration so any pending completion BHs run */
>      bool busy = true;
>      BlockDriverState *bs = NULL;
> +    GSList *aio_ctxs = NULL, *ctx;
> 
>      while ((bs = bdrv_next(bs))) {
>          AioContext *aio_context = bdrv_get_aio_context(bs);
> @@ -325,17 +320,30 @@ void bdrv_drain_all(void)
>              block_job_pause(bs->job);
>          }
>          aio_context_release(aio_context);
> +
> +        if (!aio_ctxs || !g_slist_find(aio_ctxs, aio_context)) {
> +            aio_ctxs = g_slist_prepend(aio_ctxs, aio_context);
> +        }
>      }
> 
>      while (busy) {
>          busy = false;
> -        bs = NULL;
> 
> -        while ((bs = bdrv_next(bs))) {
> -            AioContext *aio_context = bdrv_get_aio_context(bs);
> +        for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
> +            AioContext *aio_context = ctx->data;
> +            bs = NULL;
> 
>              aio_context_acquire(aio_context);
> -            busy |= bdrv_drain_one(bs);
> +            while ((bs = bdrv_next(bs))) {
> +                if (aio_context == bdrv_get_aio_context(bs)) {
> +                    bdrv_flush_io_queue(bs);
> +                    if (bdrv_requests_pending(bs)) {
> +                        busy = true;
> +                        aio_poll(aio_context, busy);
> +                    }
> +                }
> +            }
> +            busy |= aio_poll(aio_context, false);
>              aio_context_release(aio_context);
>          }
>      }
> @@ -350,6 +358,7 @@ void bdrv_drain_all(void)
>          }
>          aio_context_release(aio_context);
>      }
> +    g_slist_free(aio_ctxs);
>  }
> 
>  /**
> @@ -2600,4 +2609,5 @@ void bdrv_flush_io_queue(BlockDriverState *bs)
>      } else if (bs->file) {
>          bdrv_flush_io_queue(bs->file);
>      }
> +    bdrv_start_throttled_reqs(bs);
>  }
>
diff mbox

Patch

diff --git a/block/io.c b/block/io.c
index e394d92..7502186 100644
--- a/block/io.c
+++ b/block/io.c
@@ -271,17 +271,6 @@  static bool bdrv_requests_pending(BlockDriverState *bs)
     return false;
 }
 
-static bool bdrv_drain_one(BlockDriverState *bs)
-{
-    bool bs_busy;
-
-    bdrv_flush_io_queue(bs);
-    bdrv_start_throttled_reqs(bs);
-    bs_busy = bdrv_requests_pending(bs);
-    bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy);
-    return bs_busy;
-}
-
 /*
  * Wait for pending requests to complete on a single BlockDriverState subtree
  *
@@ -294,8 +283,13 @@  static bool bdrv_drain_one(BlockDriverState *bs)
  */
 void bdrv_drain(BlockDriverState *bs)
 {
-    while (bdrv_drain_one(bs)) {
+    bool busy = true;
+
+    while (busy) {
         /* Keep iterating */
+         bdrv_flush_io_queue(bs);
+         busy = bdrv_requests_pending(bs);
+         busy |= aio_poll(bdrv_get_aio_context(bs), busy);
     }
 }
 
@@ -316,6 +310,7 @@  void bdrv_drain_all(void)
     /* Always run first iteration so any pending completion BHs run */
     bool busy = true;
     BlockDriverState *bs = NULL;
+    GSList *aio_ctxs = NULL, *ctx;
 
     while ((bs = bdrv_next(bs))) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
@@ -325,17 +320,30 @@  void bdrv_drain_all(void)
             block_job_pause(bs->job);
         }
         aio_context_release(aio_context);
+
+        if (!aio_ctxs || !g_slist_find(aio_ctxs, aio_context)) {
+            aio_ctxs = g_slist_prepend(aio_ctxs, aio_context);
+        }
     }
 
     while (busy) {
         busy = false;
-        bs = NULL;
 
-        while ((bs = bdrv_next(bs))) {
-            AioContext *aio_context = bdrv_get_aio_context(bs);
+        for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
+            AioContext *aio_context = ctx->data;
+            bs = NULL;
 
             aio_context_acquire(aio_context);
-            busy |= bdrv_drain_one(bs);
+            while ((bs = bdrv_next(bs))) {
+                if (aio_context == bdrv_get_aio_context(bs)) {
+                    bdrv_flush_io_queue(bs);
+                    if (bdrv_requests_pending(bs)) {
+                        busy = true;
+                        aio_poll(aio_context, busy);
+                    }
+                }
+            }
+            busy |= aio_poll(aio_context, false);
             aio_context_release(aio_context);
         }
     }
@@ -350,6 +358,7 @@  void bdrv_drain_all(void)
         }
         aio_context_release(aio_context);
     }
+    g_slist_free(aio_ctxs);
 }
 
 /**
@@ -2600,4 +2609,5 @@  void bdrv_flush_io_queue(BlockDriverState *bs)
     } else if (bs->file) {
         bdrv_flush_io_queue(bs->file);
     }
+    bdrv_start_throttled_reqs(bs);
 }