diff mbox series

block/nvme: invoke blk_io_plug_call() outside q->lock

Message ID 20230712191628.252806-1-stefanha@redhat.com
State New
Headers show
Series block/nvme: invoke blk_io_plug_call() outside q->lock | expand

Commit Message

Stefan Hajnoczi July 12, 2023, 7:16 p.m. UTC
blk_io_plug_call() is invoked outside a blk_io_plug()/blk_io_unplug()
section while opening the NVMe drive from:

  nvme_file_open() ->
  nvme_init() ->
  nvme_identify() ->
  nvme_admin_cmd_sync() ->
  nvme_submit_command() ->
  blk_io_plug_call()

blk_io_plug_call() immediately invokes the given callback when the
current thread is not plugged, as is the case during nvme_file_open().

Unfortunately, nvme_submit_command() calls blk_io_plug_call() with
q->lock still held:

    ...
    q->sq.tail = (q->sq.tail + 1) % NVME_QUEUE_SIZE;
    q->need_kick++;
    blk_io_plug_call(nvme_unplug_fn, q);
    qemu_mutex_unlock(&q->lock);
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^

nvme_unplug_fn() deadlocks trying to acquire q->lock because the lock is
already acquired by the same thread. The symptom is that QEMU hangs
during startup while opening the NVMe drive.

Fix this by moving the blk_io_plug_call() outside q->lock. This is safe
because no other thread runs code related to this queue and
blk_io_plug_call()'s internal state is immune to thread safety issues
since it is thread-local.

Reported-by: Lukáš Doktor <ldoktor@redhat.com>
Fixes: f2e590002bd6 ("block/nvme: convert to blk_io_plug_call() API")
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/nvme.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

Comments

Lukáš Doktor July 17, 2023, 10:38 a.m. UTC | #1
Thank you, Stefan, I tested this one as well and it boots now and seems to behave correctly under the load as well.

Regards,
Lukáš

Tested-by: Lukas Doktor <ldoktor@redhat.com>

Dne 12. 07. 23 v 21:16 Stefan Hajnoczi napsal(a):
> blk_io_plug_call() is invoked outside a blk_io_plug()/blk_io_unplug()
> section while opening the NVMe drive from:
> 
>   nvme_file_open() ->
>   nvme_init() ->
>   nvme_identify() ->
>   nvme_admin_cmd_sync() ->
>   nvme_submit_command() ->
>   blk_io_plug_call()
> 
> blk_io_plug_call() immediately invokes the given callback when the
> current thread is not plugged, as is the case during nvme_file_open().
> 
> Unfortunately, nvme_submit_command() calls blk_io_plug_call() with
> q->lock still held:
> 
>     ...
>     q->sq.tail = (q->sq.tail + 1) % NVME_QUEUE_SIZE;
>     q->need_kick++;
>     blk_io_plug_call(nvme_unplug_fn, q);
>     qemu_mutex_unlock(&q->lock);
>     ^^^^^^^^^^^^^^^^^^^^^^^^^^^
> 
> nvme_unplug_fn() deadlocks trying to acquire q->lock because the lock is
> already acquired by the same thread. The symptom is that QEMU hangs
> during startup while opening the NVMe drive.
> 
> Fix this by moving the blk_io_plug_call() outside q->lock. This is safe
> because no other thread runs code related to this queue and
> blk_io_plug_call()'s internal state is immune to thread safety issues
> since it is thread-local.
> 
> Reported-by: Lukáš Doktor <ldoktor@redhat.com>
> Fixes: f2e590002bd6 ("block/nvme: convert to blk_io_plug_call() API")
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  block/nvme.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/block/nvme.c b/block/nvme.c
> index 7ca85bc44a..b6e95f0b7e 100644
> --- a/block/nvme.c
> +++ b/block/nvme.c
> @@ -501,8 +501,9 @@ static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req,
>             q->sq.tail * NVME_SQ_ENTRY_BYTES, cmd, sizeof(*cmd));
>      q->sq.tail = (q->sq.tail + 1) % NVME_QUEUE_SIZE;
>      q->need_kick++;
> +    qemu_mutex_unlock(&q->lock);
> +
>      blk_io_plug_call(nvme_unplug_fn, q);
> -    qemu_mutex_unlock(&q->lock);
>  }
>  
>  static void nvme_admin_cmd_sync_cb(void *opaque, int ret)
diff mbox series

Patch

diff --git a/block/nvme.c b/block/nvme.c
index 7ca85bc44a..b6e95f0b7e 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -501,8 +501,9 @@  static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req,
            q->sq.tail * NVME_SQ_ENTRY_BYTES, cmd, sizeof(*cmd));
     q->sq.tail = (q->sq.tail + 1) % NVME_QUEUE_SIZE;
     q->need_kick++;
+    qemu_mutex_unlock(&q->lock);
+
     blk_io_plug_call(nvme_unplug_fn, q);
-    qemu_mutex_unlock(&q->lock);
 }
 
 static void nvme_admin_cmd_sync_cb(void *opaque, int ret)