diff mbox

[05/11] blockjob: add block_job_defer_to_main_loop()

Message ID 1412182919-9550-6-git-send-email-stefanha@redhat.com
State New
Headers show

Commit Message

Stefan Hajnoczi Oct. 1, 2014, 5:01 p.m. UTC
Block jobs will run in the BlockDriverState's AioContext, which may not
always be the QEMU main loop.

There are some block layer APIs that are either not thread-safe or risk
lock ordering problems.  This includes bdrv_unref(), bdrv_close(), and
anything that calls bdrv_drain_all().

The block_job_defer_to_main_loop() API allows a block job to schedule a
function to run in the main loop with the BlockDriverState AioContext
held.

This function will be used to perform cleanup and backing chain
manipulations in block jobs.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 blockjob.c               | 35 +++++++++++++++++++++++++++++++++++
 include/block/blockjob.h | 19 +++++++++++++++++++
 2 files changed, 54 insertions(+)

Comments

Max Reitz Oct. 1, 2014, 7:06 p.m. UTC | #1
On 01.10.2014 19:01, Stefan Hajnoczi wrote:
> Block jobs will run in the BlockDriverState's AioContext, which may not
> always be the QEMU main loop.
>
> There are some block layer APIs that are either not thread-safe or risk
> lock ordering problems.  This includes bdrv_unref(), bdrv_close(), and
> anything that calls bdrv_drain_all().
>
> The block_job_defer_to_main_loop() API allows a block job to schedule a
> function to run in the main loop with the BlockDriverState AioContext
> held.
>
> This function will be used to perform cleanup and backing chain
> manipulations in block jobs.
>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>   blockjob.c               | 35 +++++++++++++++++++++++++++++++++++
>   include/block/blockjob.h | 19 +++++++++++++++++++
>   2 files changed, 54 insertions(+)
>
> diff --git a/blockjob.c b/blockjob.c
> index 0689fdd..24a64d8 100644
> --- a/blockjob.c
> +++ b/blockjob.c
> @@ -313,3 +313,38 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
>       }
>       return action;
>   }
> +
> +typedef struct {
> +    BlockJob *job;
> +    QEMUBH *bh;
> +    AioContext *aio_context;
> +    BlockJobDeferToMainLoopFn *fn;
> +    void *opaque;
> +} BlockJobDeferToMainLoopData;
> +
> +static void block_job_defer_to_main_loop_bh(void *opaque)
> +{
> +    BlockJobDeferToMainLoopData *data = opaque;
> +
> +    qemu_bh_delete(data->bh);
> +
> +    aio_context_acquire(data->aio_context);
> +    data->fn(data->job, data->opaque);
> +    aio_context_release(data->aio_context);
> +
> +    g_free(data);
> +}
> +
> +void block_job_defer_to_main_loop(BlockJob *job,
> +                                  BlockJobDeferToMainLoopFn *fn,
> +                                  void *opaque)
> +{
> +    BlockJobDeferToMainLoopData *data = g_malloc(sizeof(*data));
> +    data->job = job;
> +    data->bh = qemu_bh_new(block_job_defer_to_main_loop_bh, data);
> +    data->aio_context = bdrv_get_aio_context(job->bs);
> +    data->fn = fn;
> +    data->opaque = opaque;
> +
> +    qemu_bh_schedule(data->bh);
> +}

I'm not so sure whether it'd be possible to change the BDS's AIO context 
(in another thread) after the call to bdrv_get_aio_context() in 
block_job_defer_to_main_loop() and before 
block_job_defer_to_main_loop_bh() is run. If so, this might create race 
conditions.

Other than that, this patch looks good.

Max
Stefan Hajnoczi Oct. 2, 2014, 2:58 p.m. UTC | #2
On Wed, Oct 01, 2014 at 09:06:36PM +0200, Max Reitz wrote:
> On 01.10.2014 19:01, Stefan Hajnoczi wrote:
> >+typedef struct {
> >+    BlockJob *job;
> >+    QEMUBH *bh;
> >+    AioContext *aio_context;
> >+    BlockJobDeferToMainLoopFn *fn;
> >+    void *opaque;
> >+} BlockJobDeferToMainLoopData;
> >+
> >+static void block_job_defer_to_main_loop_bh(void *opaque)
> >+{
> >+    BlockJobDeferToMainLoopData *data = opaque;
> >+
> >+    qemu_bh_delete(data->bh);
> >+
> >+    aio_context_acquire(data->aio_context);
> >+    data->fn(data->job, data->opaque);
> >+    aio_context_release(data->aio_context);
> >+
> >+    g_free(data);
> >+}
> >+
> >+void block_job_defer_to_main_loop(BlockJob *job,
> >+                                  BlockJobDeferToMainLoopFn *fn,
> >+                                  void *opaque)
> >+{
> >+    BlockJobDeferToMainLoopData *data = g_malloc(sizeof(*data));
> >+    data->job = job;
> >+    data->bh = qemu_bh_new(block_job_defer_to_main_loop_bh, data);
> >+    data->aio_context = bdrv_get_aio_context(job->bs);
> >+    data->fn = fn;
> >+    data->opaque = opaque;
> >+
> >+    qemu_bh_schedule(data->bh);
> >+}
> 
> I'm not so sure whether it'd be possible to change the BDS's AIO context (in
> another thread) after the call to bdrv_get_aio_context() in
> block_job_defer_to_main_loop() and before block_job_defer_to_main_loop_bh()
> is run. If so, this might create race conditions.
> 
> Other than that, this patch looks good.

Yes, I think you are correct.

The solution is a little tricky, we need to hold both AioContexts:

  static void block_job_defer_to_main_loop_bh(void *opaque)
  {
      BlockJobDeferToMainLoopData *data = opaque;
      AioContext aio_context;

      qemu_bh_delete(data->bh);

      /* Prevent race with block_job_defer_to_main_loop() */
      aio_context_acquire(data->aio_context);

      /* Fetch BDS AioContext again, in case it has changed */
      aio_context = bdrv_get_aio_context(data->job->bs);
      aio_context_acquire(aio_context);

      data->fn(data->job, data->opaque);

      aio_context_release(aio_context);

      aio_context_release(data->aio_context);

      g_free(data);
  }

Tada!  Because this executes in the main loop it is safe to do this - no lock
ordering problems.  And because aio_context_acquire() is recursive we can lock
twice without problems.

Stefan
diff mbox

Patch

diff --git a/blockjob.c b/blockjob.c
index 0689fdd..24a64d8 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -313,3 +313,38 @@  BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
     }
     return action;
 }
+
+typedef struct {
+    BlockJob *job;
+    QEMUBH *bh;
+    AioContext *aio_context;
+    BlockJobDeferToMainLoopFn *fn;
+    void *opaque;
+} BlockJobDeferToMainLoopData;
+
+static void block_job_defer_to_main_loop_bh(void *opaque)
+{
+    BlockJobDeferToMainLoopData *data = opaque;
+
+    qemu_bh_delete(data->bh);
+
+    aio_context_acquire(data->aio_context);
+    data->fn(data->job, data->opaque);
+    aio_context_release(data->aio_context);
+
+    g_free(data);
+}
+
+void block_job_defer_to_main_loop(BlockJob *job,
+                                  BlockJobDeferToMainLoopFn *fn,
+                                  void *opaque)
+{
+    BlockJobDeferToMainLoopData *data = g_malloc(sizeof(*data));
+    data->job = job;
+    data->bh = qemu_bh_new(block_job_defer_to_main_loop_bh, data);
+    data->aio_context = bdrv_get_aio_context(job->bs);
+    data->fn = fn;
+    data->opaque = opaque;
+
+    qemu_bh_schedule(data->bh);
+}
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 60aa835..5c13124 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -295,4 +295,23 @@  void block_job_iostatus_reset(BlockJob *job);
 BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
                                         BlockdevOnError on_err,
                                         int is_read, int error);
+
+typedef void BlockJobDeferToMainLoopFn(BlockJob *job, void *opaque);
+
+/**
+ * block_job_defer_to_main_loop:
+ * @job: The job
+ * @fn: The function to run in the main loop
+ * @opaque: The opaque value that is passed to @fn
+ *
+ * Execute a given function in the main loop with the BlockDriverState
+ * AioContext acquired.  Block jobs must call bdrv_unref(), bdrv_close(), and
+ * anything that uses bdrv_drain_all() in the main loop.
+ *
+ * The @job AioContext is held while @fn executes.
+ */
+void block_job_defer_to_main_loop(BlockJob *job,
+                                  BlockJobDeferToMainLoopFn *fn,
+                                  void *opaque);
+
 #endif