diff mbox

[PULL,17/20] block: only call aio_poll on the current thread's AioContext

Message ID 1477666165-17297-18-git-send-email-famz@redhat.com
State New
Headers show

Commit Message

Fam Zheng Oct. 28, 2016, 2:49 p.m. UTC
From: Paolo Bonzini <pbonzini@redhat.com>

aio_poll is not thread safe; for example bdrv_drain can hang if
the last in-flight I/O operation is completed in the I/O thread after
the main thread has checked bs->in_flight.

The bug remains latent as long as all of it is called within
aio_context_acquire/aio_context_release, but this will change soon.

To fix this, if bdrv_drain is called from outside the I/O thread,
signal the main AioContext through a dummy bottom half.  The event
loop then only runs in the I/O thread.

Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <1477565348-5458-18-git-send-email-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
---
 async.c                         |  1 +
 block.c                         |  2 ++
 block/io.c                      | 12 ++++++++++++
 block/nfs.c                     |  1 +
 block/sheepdog.c                |  3 +++
 hw/scsi/virtio-scsi-dataplane.c |  4 +---
 include/block/block.h           | 26 +++++++++++++++++++++++---
 include/block/block_int.h       | 17 +++++++++++++++++
 8 files changed, 60 insertions(+), 6 deletions(-)
diff mbox

Patch

diff --git a/async.c b/async.c
index f30d011..fb37b03 100644
--- a/async.c
+++ b/async.c
@@ -61,6 +61,7 @@  void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
     smp_wmb();
     ctx->first_bh = bh;
     qemu_mutex_unlock(&ctx->bh_lock);
+    aio_notify(ctx);
 }
 
 QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
diff --git a/block.c b/block.c
index fbe485c..a17baab 100644
--- a/block.c
+++ b/block.c
@@ -2090,7 +2090,9 @@  int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **er
 
     assert(bs_queue != NULL);
 
+    aio_context_release(ctx);
     bdrv_drain_all();
+    aio_context_acquire(ctx);
 
     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
diff --git a/block/io.c b/block/io.c
index 6fc0145..be0d862 100644
--- a/block/io.c
+++ b/block/io.c
@@ -474,9 +474,21 @@  void bdrv_inc_in_flight(BlockDriverState *bs)
     atomic_inc(&bs->in_flight);
 }
 
+static void dummy_bh_cb(void *opaque)
+{
+}
+
+void bdrv_wakeup(BlockDriverState *bs)
+{
+    if (bs->wakeup) {
+        aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
+    }
+}
+
 void bdrv_dec_in_flight(BlockDriverState *bs)
 {
     atomic_dec(&bs->in_flight);
+    bdrv_wakeup(bs);
 }
 
 static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
diff --git a/block/nfs.c b/block/nfs.c
index 7474fbc..88c60a9 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -505,6 +505,7 @@  nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
         error_report("NFS Error: %s", nfs_get_error(nfs));
     }
     task->complete = 1;
+    bdrv_wakeup(task->bs);
 }
 
 static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 16a5c1c..1fb9173 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -702,6 +702,9 @@  out:
 
     srco->ret = ret;
     srco->finished = true;
+    if (srco->bs) {
+        bdrv_wakeup(srco->bs);
+    }
 }
 
 /*
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
index b173b94..9424f0e 100644
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c
@@ -189,13 +189,11 @@  void virtio_scsi_dataplane_stop(VirtIOSCSI *s)
     assert(s->ctx == iothread_get_aio_context(vs->conf.iothread));
 
     aio_context_acquire(s->ctx);
-
     virtio_scsi_clear_aio(s);
+    aio_context_release(s->ctx);
 
     blk_drain_all(); /* ensure there are no in-flight requests */
 
-    aio_context_release(s->ctx);
-
     for (i = 0; i < vs->conf.num_queues + 2; i++) {
         virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
     }
diff --git a/include/block/block.h b/include/block/block.h
index 84257ab..b7dc7d5 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -337,9 +337,29 @@  void bdrv_drain_all(void);
 #define BDRV_POLL_WHILE(bs, cond) ({                       \
     bool waited_ = false;                                  \
     BlockDriverState *bs_ = (bs);                          \
-    while ((cond)) {                                       \
-        aio_poll(bdrv_get_aio_context(bs_), true);         \
-        waited_ = true;                                    \
+    AioContext *ctx_ = bdrv_get_aio_context(bs_);          \
+    if (aio_context_in_iothread(ctx_)) {                   \
+        while ((cond)) {                                   \
+            aio_poll(ctx_, true);                          \
+            waited_ = true;                                \
+        }                                                  \
+    } else {                                               \
+        assert(qemu_get_current_aio_context() ==           \
+               qemu_get_aio_context());                    \
+        /* Ask bdrv_dec_in_flight to wake up the main      \
+         * QEMU AioContext.  Extra I/O threads never take  \
+         * other I/O threads' AioContexts (see for example \
+         * block_job_defer_to_main_loop for how to do it). \
+         */                                                \
+        assert(!bs_->wakeup);                              \
+        bs_->wakeup = true;                                \
+        while ((cond)) {                                   \
+            aio_context_release(ctx_);                     \
+            aio_poll(qemu_get_aio_context(), true);        \
+            aio_context_acquire(ctx_);                     \
+            waited_ = true;                                \
+        }                                                  \
+        bs_->wakeup = false;                               \
     }                                                      \
     waited_; })
 
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 1044dfe..e7ff584 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -475,6 +475,8 @@  struct BlockDriverState {
     unsigned int in_flight;
     unsigned int serialising_in_flight;
 
+    bool wakeup;
+
     /* Offset after the highest byte written to */
     uint64_t wr_highest_offset;
 
@@ -633,6 +635,21 @@  void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
                                       void (*aio_context_detached)(void *),
                                       void *opaque);
 
+/**
+ * bdrv_wakeup:
+ * @bs: The BlockDriverState for which an I/O operation has been completed.
+ *
+ * Wake up the main thread if it is waiting on BDRV_POLL_WHILE.  During
+ * synchronous I/O on a BlockDriverState that is attached to another
+ * I/O thread, the main thread lets the I/O thread's event loop run,
+ * waiting for the I/O operation to complete.  A bdrv_wakeup will wake
+ * up the main thread if necessary.
+ *
+ * Manual calls to bdrv_wakeup are rarely necessary, because
+ * bdrv_dec_in_flight already calls it.
+ */
+void bdrv_wakeup(BlockDriverState *bs);
+
 #ifdef _WIN32
 int is_windows_drive(const char *filename);
 #endif