diff mbox

[RFC,0/9] vhost-nvme: new qemu nvme backend using nvme target

Message ID 1448266667.18175.5.camel@hasee
State New
Headers show

Commit Message

Ming Lin Nov. 23, 2015, 8:17 a.m. UTC
On Sat, 2015-11-21 at 14:11 +0100, Paolo Bonzini wrote:
> 
> On 20/11/2015 01:20, Ming Lin wrote:
> > One improvment could be to use google's NVMe vendor extension that
> > I send in another thread, aslo here:
> > https://git.kernel.org/cgit/linux/kernel/git/mlin/linux.git/log/?h=nvme-google-ext
> > 
> > Qemu side:
> > http://www.minggr.net/cgit/cgit.cgi/qemu/log/?h=vhost-nvme.0
> > Kernel side also here:
> > https://git.kernel.org/cgit/linux/kernel/git/mlin/linux.git/log/?h=vhost-nvme.0
> 
> How much do you get with vhost-nvme plus vendor extension, compared to
> 190 MB/s for QEMU?

There is still some bug. I'll update.

> 
> Note that in all likelihood, QEMU can actually do better than 190 MB/s,
> and gain more parallelism too, by moving the processing of the
> ioeventfds to a separate thread.  This is similar to
> hw/block/dataplane/virtio-blk.c.
> 
> It's actually pretty easy to do.  Even though
> hw/block/dataplane/virtio-blk.c is still using some old APIs, all memory
> access in QEMU is now thread-safe.  I have pending patches for 2.6 that
> cut that file down to a mere 200 lines of code, NVMe would probably be
> about the same.

Is there a git tree for your patches?

Did you mean some pseduo code as below?
1. need a iothread for each cq/sq?
2. need a AioContext for each cq/sq?

 hw/block/nvme.c | 32 ++++++++++++++++++++++++++++++--
 hw/block/nvme.h |  8 ++++++++
 2 files changed, 38 insertions(+), 2 deletions(-)


> 
> Paolo

Comments

Paolo Bonzini Nov. 23, 2015, 2:14 p.m. UTC | #1
On 23/11/2015 09:17, Ming Lin wrote:
> On Sat, 2015-11-21 at 14:11 +0100, Paolo Bonzini wrote:
>>
>> On 20/11/2015 01:20, Ming Lin wrote:
>>> One improvment could be to use google's NVMe vendor extension that
>>> I send in another thread, aslo here:
>>> https://git.kernel.org/cgit/linux/kernel/git/mlin/linux.git/log/?h=nvme-google-ext
>>>
>>> Qemu side:
>>> http://www.minggr.net/cgit/cgit.cgi/qemu/log/?h=vhost-nvme.0
>>> Kernel side also here:
>>> https://git.kernel.org/cgit/linux/kernel/git/mlin/linux.git/log/?h=vhost-nvme.0
>>
>> How much do you get with vhost-nvme plus vendor extension, compared to
>> 190 MB/s for QEMU?
> 
> There is still some bug. I'll update.

Sure.

>> Note that in all likelihood, QEMU can actually do better than 190 MB/s,
>> and gain more parallelism too, by moving the processing of the
>> ioeventfds to a separate thread.  This is similar to
>> hw/block/dataplane/virtio-blk.c.
>>
>> It's actually pretty easy to do.  Even though
>> hw/block/dataplane/virtio-blk.c is still using some old APIs, all memory
>> access in QEMU is now thread-safe.  I have pending patches for 2.6 that
>> cut that file down to a mere 200 lines of code, NVMe would probably be
>> about the same.
> 
> Is there a git tree for your patches?

No, not yet.  I'll post them today or tomorrow, will make sure to Cc you.

> Did you mean some pseduo code as below?
> 1. need a iothread for each cq/sq?
> 2. need a AioContext for each cq/sq?
> 
>  hw/block/nvme.c | 32 ++++++++++++++++++++++++++++++--
>  hw/block/nvme.h |  8 ++++++++
>  2 files changed, 38 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index f27fd35..fed4827 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -28,6 +28,8 @@
>  #include "sysemu/sysemu.h"
>  #include "qapi/visitor.h"
>  #include "sysemu/block-backend.h"
> +#include "sysemu/iothread.h"
> +#include "qom/object_interfaces.h"
>  
>  #include "nvme.h"
>  
> @@ -558,9 +560,22 @@ static void nvme_init_cq_eventfd(NvmeCQueue *cq)
>      uint16_t offset = (cq->cqid*2+1) * (4 << NVME_CAP_DSTRD(n->bar.cap));
>  
>      event_notifier_init(&cq->notifier, 0);
> -    event_notifier_set_handler(&cq->notifier, nvme_cq_notifier);
>      memory_region_add_eventfd(&n->iomem,
>          0x1000 + offset, 4, false, 0, &cq->notifier);
> +
> +    object_initialize(&cq->internal_iothread_obj,
> +                      sizeof(cq->internal_iothread_obj),
> +                      TYPE_IOTHREAD);
> +    user_creatable_complete(OBJECT(&cq->internal_iothread_obj), &error_abort);

For now, you have to use one iothread for all cq/sq of a single NVMe
device; multiqueue block layer is planned for 2.7 or 2.8.  Otherwise
yes, it's very close to just these changes.

If you use "-object iothread,id=NN" and a iothread property, you can
also use an N:M model with multiple disks attached to the same iothread.
 Defining the iothread property is like

	object_property_add_link(obj, "iothread", TYPE_IOTHREAD,
				(Object **)&s->conf.iothread,
				qdev_prop_allow_set_link_before_realize,
				OBJ_PROP_LINK_UNREF_ON_RELEASE, NULL);

Thanks,

Paolo

> +    cq->iothread = &cq->internal_iothread_obj;
> +    cq->ctx = iothread_get_aio_context(cq->iothread);
> +    //Question: Need a conf.blk for each cq/sq???
> +    //blk_set_aio_context(cq->conf->conf.blk, cq->ctx);
> +    aio_context_acquire(cq->ctx);
> +    aio_set_event_notifier(cq->ctx, &cq->notifier, true,
> +                           nvme_cq_notifier);
> +    aio_context_release(cq->ctx);
>  }
>  
>  static void nvme_sq_notifier(EventNotifier *e)
> @@ -578,9 +593,22 @@ static void nvme_init_sq_eventfd(NvmeSQueue *sq)
>      uint16_t offset = sq->sqid * 2 * (4 << NVME_CAP_DSTRD(n->bar.cap));
>  
>      event_notifier_init(&sq->notifier, 0);
> -    event_notifier_set_handler(&sq->notifier, nvme_sq_notifier);
>      memory_region_add_eventfd(&n->iomem,
>          0x1000 + offset, 4, false, 0, &sq->notifier);
> +
> +    object_initialize(&sq->internal_iothread_obj,
> +                      sizeof(sq->internal_iothread_obj),
> +                      TYPE_IOTHREAD);
> +    user_creatable_complete(OBJECT(&sq->internal_iothread_obj), &error_abort);
> +    sq->iothread = &sq->internal_iothread_obj;
> +    sq->ctx = iothread_get_aio_context(sq->iothread);
> +    //Question: Need a conf.blk for each cq/sq???
> +    //blk_set_aio_context(sq->conf->conf.blk, sq->ctx);
> +
> +    aio_context_acquire(sq->ctx);
> +    aio_set_event_notifier(sq->ctx, &sq->notifier, true,
> +                           nvme_sq_notifier);
> +    aio_context_release(sq->ctx);
>  }
>  
>  static uint16_t nvme_set_db_memory(NvmeCtrl *n, const NvmeCmd *cmd)
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 608f202..171ee0b 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -667,6 +667,10 @@ typedef struct NvmeSQueue {
>       * do not go over this value will not result in MMIO writes (but will
>       * still write the tail pointer to the "db_addr" location above). */
>      uint64_t    eventidx_addr;
> +
> +    IOThread *iothread;
> +    IOThread internal_iothread_obj;
> +    AioContext *ctx;
>      EventNotifier notifier;
>  } NvmeSQueue;
>  
> @@ -690,6 +694,10 @@ typedef struct NvmeCQueue {
>       * do not go over this value will not result in MMIO writes (but will
>       * still write the head pointer to the "db_addr" location above). */
>      uint64_t    eventidx_addr;
> +
> +    IOThread *iothread;
> +    IOThread internal_iothread_obj;
> +    AioContext *ctx;
>      EventNotifier notifier;
>  } NvmeCQueue;
>  
> 
>>
>> Paolo
Ming Lin Nov. 24, 2015, 7:27 a.m. UTC | #2
On Mon, 2015-11-23 at 15:14 +0100, Paolo Bonzini wrote:
> 
> On 23/11/2015 09:17, Ming Lin wrote:
> > On Sat, 2015-11-21 at 14:11 +0100, Paolo Bonzini wrote:
> >>
> >> On 20/11/2015 01:20, Ming Lin wrote:
> >>> One improvment could be to use google's NVMe vendor extension that
> >>> I send in another thread, aslo here:
> >>> https://git.kernel.org/cgit/linux/kernel/git/mlin/linux.git/log/?h=nvme-google-ext
> >>>
> >>> Qemu side:
> >>> http://www.minggr.net/cgit/cgit.cgi/qemu/log/?h=vhost-nvme.0
> >>> Kernel side also here:
> >>> https://git.kernel.org/cgit/linux/kernel/git/mlin/linux.git/log/?h=vhost-nvme.0
> >>
> >> How much do you get with vhost-nvme plus vendor extension, compared to
> >> 190 MB/s for QEMU?
> > 
> > There is still some bug. I'll update.
> 
> Sure.
> 
> >> Note that in all likelihood, QEMU can actually do better than 190 MB/s,
> >> and gain more parallelism too, by moving the processing of the
> >> ioeventfds to a separate thread.  This is similar to
> >> hw/block/dataplane/virtio-blk.c.
> >>
> >> It's actually pretty easy to do.  Even though
> >> hw/block/dataplane/virtio-blk.c is still using some old APIs, all memory
> >> access in QEMU is now thread-safe.  I have pending patches for 2.6 that
> >> cut that file down to a mere 200 lines of code, NVMe would probably be
> >> about the same.
> > 
> > Is there a git tree for your patches?
> 
> No, not yet.  I'll post them today or tomorrow, will make sure to Cc you.
> 
> > Did you mean some pseduo code as below?
> > 1. need a iothread for each cq/sq?
> > 2. need a AioContext for each cq/sq?
> > 
> >  hw/block/nvme.c | 32 ++++++++++++++++++++++++++++++--
> >  hw/block/nvme.h |  8 ++++++++
> >  2 files changed, 38 insertions(+), 2 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index f27fd35..fed4827 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -28,6 +28,8 @@
> >  #include "sysemu/sysemu.h"
> >  #include "qapi/visitor.h"
> >  #include "sysemu/block-backend.h"
> > +#include "sysemu/iothread.h"
> > +#include "qom/object_interfaces.h"
> >  
> >  #include "nvme.h"
> >  
> > @@ -558,9 +560,22 @@ static void nvme_init_cq_eventfd(NvmeCQueue *cq)
> >      uint16_t offset = (cq->cqid*2+1) * (4 << NVME_CAP_DSTRD(n->bar.cap));
> >  
> >      event_notifier_init(&cq->notifier, 0);
> > -    event_notifier_set_handler(&cq->notifier, nvme_cq_notifier);
> >      memory_region_add_eventfd(&n->iomem,
> >          0x1000 + offset, 4, false, 0, &cq->notifier);
> > +
> > +    object_initialize(&cq->internal_iothread_obj,
> > +                      sizeof(cq->internal_iothread_obj),
> > +                      TYPE_IOTHREAD);
> > +    user_creatable_complete(OBJECT(&cq->internal_iothread_obj), &error_abort);
> 
> For now, you have to use one iothread for all cq/sq of a single NVMe
> device; multiqueue block layer is planned for 2.7 or 2.8.  Otherwise
> yes, it's very close to just these changes.

Here is the call stack of iothread for virtio-blk-dataplane.

handle_notify (qemu/hw/block/dataplane/virtio-blk.c:126)
aio_dispatch (qemu/aio-posix.c:329)
aio_poll (qemu/aio-posix.c:474)
iothread_run (qemu/iothread.c:45)
start_thread (pthread_create.c:312)
/lib/x86_64-linux-gnu/libc.so.6(clone+0x6d)

I think I'll have a "nvme_dev_notify" similar as "handle_notify"

static void nvme_dev_notify(EventNotifier *e)
{
    ....
}

But then how can I know this notify is for cq or sq?
Paolo Bonzini Nov. 24, 2015, 10:51 a.m. UTC | #3
On 24/11/2015 08:27, Ming Lin wrote:
> handle_notify (qemu/hw/block/dataplane/virtio-blk.c:126)
> aio_dispatch (qemu/aio-posix.c:329)
> aio_poll (qemu/aio-posix.c:474)
> iothread_run (qemu/iothread.c:45)
> start_thread (pthread_create.c:312)
> /lib/x86_64-linux-gnu/libc.so.6(clone+0x6d)
> 
> I think I'll have a "nvme_dev_notify" similar as "handle_notify"
> 
> static void nvme_dev_notify(EventNotifier *e)
> {
>     ....
> }
> 
> But then how can I know this notify is for cq or sq?

virtio-blk has a single queue, so it has a single EventNotifier.  Your
code using multiple EventNotifiers is fine, you can use
aio_set_event_notifier multiple times on the same iothread.

Paolo
Ming Lin Nov. 30, 2015, 11:20 p.m. UTC | #4
On Mon, 2015-11-23 at 15:14 +0100, Paolo Bonzini wrote:
> 
> On 23/11/2015 09:17, Ming Lin wrote:
> > On Sat, 2015-11-21 at 14:11 +0100, Paolo Bonzini wrote:
> >>
> >> On 20/11/2015 01:20, Ming Lin wrote:
> >>> One improvment could be to use google's NVMe vendor extension that
> >>> I send in another thread, aslo here:
> >>> https://git.kernel.org/cgit/linux/kernel/git/mlin/linux.git/log/?h=nvme-google-ext
> >>>
> >>> Qemu side:
> >>> http://www.minggr.net/cgit/cgit.cgi/qemu/log/?h=vhost-nvme.0
> >>> Kernel side also here:
> >>> https://git.kernel.org/cgit/linux/kernel/git/mlin/linux.git/log/?h=vhost-nvme.0
> >>
> >> How much do you get with vhost-nvme plus vendor extension, compared to
> >> 190 MB/s for QEMU?
> > 
> > There is still some bug. I'll update.
> 
> Sure.

Fixed it after Thanksgiving holiday :)
https://git.kernel.org/cgit/linux/kernel/git/mlin/linux.git/log/?h=vhost-nvme.0-ext

Combined with previous test results(lower to higher):

qemu-nvme: 148MB/s
vhost-nvme + google-ext: 230MB/s
qemu-nvme + google-ext + eventfd: 294MB/s
virtio-scsi: 296MB/s
virtio-blk: 344MB/s

"vhost-nvme + google-ext" didn't get good enough performance.
Still tuning.
Paolo Bonzini Dec. 1, 2015, 4:02 p.m. UTC | #5
On 01/12/2015 00:20, Ming Lin wrote:
> qemu-nvme: 148MB/s
> vhost-nvme + google-ext: 230MB/s
> qemu-nvme + google-ext + eventfd: 294MB/s
> virtio-scsi: 296MB/s
> virtio-blk: 344MB/s
> 
> "vhost-nvme + google-ext" didn't get good enough performance.

I'd expect it to be on par of qemu-nvme with ioeventfd but the question
is: why should it be better?  For vhost-net, the answer is that more
zerocopy can be done if you put the data path in the kernel.

But qemu-nvme is already using io_submit for the data path, perhaps
there's not much to gain from vhost-nvme...

Paolo

> Still tuning.
Ming Lin Dec. 1, 2015, 4:26 p.m. UTC | #6
On Tue, 2015-12-01 at 17:02 +0100, Paolo Bonzini wrote:
> 
> On 01/12/2015 00:20, Ming Lin wrote:
> > qemu-nvme: 148MB/s
> > vhost-nvme + google-ext: 230MB/s
> > qemu-nvme + google-ext + eventfd: 294MB/s
> > virtio-scsi: 296MB/s
> > virtio-blk: 344MB/s
> > 
> > "vhost-nvme + google-ext" didn't get good enough performance.
> 
> I'd expect it to be on par of qemu-nvme with ioeventfd but the question
> is: why should it be better?  For vhost-net, the answer is that more
> zerocopy can be done if you put the data path in the kernel.
> 
> But qemu-nvme is already using io_submit for the data path, perhaps
> there's not much to gain from vhost-nvme...

What do you think about virtio-nvme+vhost-nvme?
I also have patch for vritio-nvme:
https://git.kernel.org/cgit/linux/kernel/git/mlin/linux.git/log/?h=nvme-split/virtio

Just need to change vhost-nvme to work with it.

> 
> Paolo
> 
> > Still tuning.
Paolo Bonzini Dec. 1, 2015, 4:59 p.m. UTC | #7
> What do you think about virtio-nvme+vhost-nvme?

What would be the advantage over virtio-blk?  Multiqueue is not supported
by QEMU but it's already supported by Linux (commit 6a27b656fc).

To me, the advantage of nvme is that it provides more than decent performance on
unmodified Windows guests, and thanks to your vendor extension can be used
on Linux as well with speeds comparable to virtio-blk.  So it's potentially
a very good choice for a cloud provider that wants to support Windows guests
(together with e.g. a fast SAS emulated controller to replace virtio-scsi,
and emulated igb or ixgbe to replace virtio-net).

Which features are supported by NVMe and not virtio-blk?

Paolo

> I also have patch for vritio-nvme:
> https://git.kernel.org/cgit/linux/kernel/git/mlin/linux.git/log/?h=nvme-split/virtio
> 
> Just need to change vhost-nvme to work with it.
> 
> > 
> > Paolo
> > 
> > > Still tuning.
> 
> 
>
Ming Lin Dec. 2, 2015, 5:13 a.m. UTC | #8
On Tue, 2015-12-01 at 11:59 -0500, Paolo Bonzini wrote:
> > What do you think about virtio-nvme+vhost-nvme?
> 
> What would be the advantage over virtio-blk?  Multiqueue is not supported
> by QEMU but it's already supported by Linux (commit 6a27b656fc).

I expect performance would be better.

Seems google cloud VM uses both nvme and virtio-scsi. Not sure if
virtio-blk is also used.
https://cloud.google.com/compute/docs/disks/local-ssd#runscript

> 
> To me, the advantage of nvme is that it provides more than decent performance on
> unmodified Windows guests, and thanks to your vendor extension can be used
> on Linux as well with speeds comparable to virtio-blk.  So it's potentially
> a very good choice for a cloud provider that wants to support Windows guests
> (together with e.g. a fast SAS emulated controller to replace virtio-scsi,
> and emulated igb or ixgbe to replace virtio-net).

vhost-nvme patches are learned from rts-megasas, which could possibly be
a fast SAS emulated controller.
https://github.com/Datera/rts-megasas

> 
> Which features are supported by NVMe and not virtio-blk?

Rob (CCed),

Would you share whether google uses any NVMe specific feature?

Thanks.
Paolo Bonzini Dec. 2, 2015, 10:07 a.m. UTC | #9
On 02/12/2015 06:13, Ming Lin wrote:
> On Tue, 2015-12-01 at 11:59 -0500, Paolo Bonzini wrote:
>>> What do you think about virtio-nvme+vhost-nvme?
>>
>> What would be the advantage over virtio-blk?  Multiqueue is not supported
>> by QEMU but it's already supported by Linux (commit 6a27b656fc).
> 
> I expect performance would be better.

Why?  nvme and virtio-blk are almost the same, even more so with the
doorbell extension.  virtio is designed to only hit paths that are not
slowed down by virtualization.  It's really hard to do better, except
perhaps with VFIO (and then you don't need your vendor extension).

>> To me, the advantage of nvme is that it provides more than decent performance on
>> unmodified Windows guests, and thanks to your vendor extension can be used
>> on Linux as well with speeds comparable to virtio-blk.  So it's potentially
>> a very good choice for a cloud provider that wants to support Windows guests
>> (together with e.g. a fast SAS emulated controller to replace virtio-scsi,
>> and emulated igb or ixgbe to replace virtio-net).
> 
> vhost-nvme patches are learned from rts-megasas, which could possibly be
> a fast SAS emulated controller.
> https://github.com/Datera/rts-megasas

Why the hate for userspace? :)

I don't see a reason why vhost-nvme would be faster than a userspace
implementation.  vhost-blk was never committed upstream for similar
reasons: it lost all the userspace features (snapshots, storage
migration, etc.)---which are nice to have and do not cost performance if
you do not use them---without any compelling performance gain.

Without the doorbell extension you'd have to go back to userspace on
every write and ioctl to vhost (see MEGASAS_IOC_FRAME in rts-megasas).
With the doorbell extension you're doing exactly the same work, and then
kernel thread vs. userspace thread shouldn't matter much given similar
optimization effort.  A userspace NVMe, however, will gain all
optimization that is done to QEMU's block layer for free.  We have done
a lot and have more planned.

>> Which features are supported by NVMe and not virtio-blk?

Having read the driver, the main improvements of NVMe compared to
virtio-blk are support for discard and FUA.  Discard is easy to add to
virtio-blk.  In the past the idea was "just use virtio-scsi", but it may
be worth adding it now that SSDs are more common.

Thus, FUA is pretty much the only reason for a kernel-based
implementation, because it is not exported in userspace.  However, does
it actually make a difference on real-world workloads?  Local SSDs on
Google Cloud are not even persistent, so you never need to flush to them.

Paolo
diff mbox

Patch

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index f27fd35..fed4827 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -28,6 +28,8 @@ 
 #include "sysemu/sysemu.h"
 #include "qapi/visitor.h"
 #include "sysemu/block-backend.h"
+#include "sysemu/iothread.h"
+#include "qom/object_interfaces.h"
 
 #include "nvme.h"
 
@@ -558,9 +560,22 @@  static void nvme_init_cq_eventfd(NvmeCQueue *cq)
     uint16_t offset = (cq->cqid*2+1) * (4 << NVME_CAP_DSTRD(n->bar.cap));
 
     event_notifier_init(&cq->notifier, 0);
-    event_notifier_set_handler(&cq->notifier, nvme_cq_notifier);
     memory_region_add_eventfd(&n->iomem,
         0x1000 + offset, 4, false, 0, &cq->notifier);
+
+    object_initialize(&cq->internal_iothread_obj,
+                      sizeof(cq->internal_iothread_obj),
+                      TYPE_IOTHREAD);
+    user_creatable_complete(OBJECT(&cq->internal_iothread_obj), &error_abort);
+    cq->iothread = &cq->internal_iothread_obj;
+    cq->ctx = iothread_get_aio_context(cq->iothread);
+    //Question: Need a conf.blk for each cq/sq???
+    //blk_set_aio_context(cq->conf->conf.blk, cq->ctx);
+
+    aio_context_acquire(cq->ctx);
+    aio_set_event_notifier(cq->ctx, &cq->notifier, true,
+                           nvme_cq_notifier);
+    aio_context_release(cq->ctx);
 }
 
 static void nvme_sq_notifier(EventNotifier *e)
@@ -578,9 +593,22 @@  static void nvme_init_sq_eventfd(NvmeSQueue *sq)
     uint16_t offset = sq->sqid * 2 * (4 << NVME_CAP_DSTRD(n->bar.cap));
 
     event_notifier_init(&sq->notifier, 0);
-    event_notifier_set_handler(&sq->notifier, nvme_sq_notifier);
     memory_region_add_eventfd(&n->iomem,
         0x1000 + offset, 4, false, 0, &sq->notifier);
+
+    object_initialize(&sq->internal_iothread_obj,
+                      sizeof(sq->internal_iothread_obj),
+                      TYPE_IOTHREAD);
+    user_creatable_complete(OBJECT(&sq->internal_iothread_obj), &error_abort);
+    sq->iothread = &sq->internal_iothread_obj;
+    sq->ctx = iothread_get_aio_context(sq->iothread);
+    //Question: Need a conf.blk for each cq/sq???
+    //blk_set_aio_context(sq->conf->conf.blk, sq->ctx);
+
+    aio_context_acquire(sq->ctx);
+    aio_set_event_notifier(sq->ctx, &sq->notifier, true,
+                           nvme_sq_notifier);
+    aio_context_release(sq->ctx);
 }
 
 static uint16_t nvme_set_db_memory(NvmeCtrl *n, const NvmeCmd *cmd)
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 608f202..171ee0b 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -667,6 +667,10 @@  typedef struct NvmeSQueue {
      * do not go over this value will not result in MMIO writes (but will
      * still write the tail pointer to the "db_addr" location above). */
     uint64_t    eventidx_addr;
+
+    IOThread *iothread;
+    IOThread internal_iothread_obj;
+    AioContext *ctx;
     EventNotifier notifier;
 } NvmeSQueue;
 
@@ -690,6 +694,10 @@  typedef struct NvmeCQueue {
      * do not go over this value will not result in MMIO writes (but will
      * still write the head pointer to the "db_addr" location above). */
     uint64_t    eventidx_addr;
+
+    IOThread *iothread;
+    IOThread internal_iothread_obj;
+    AioContext *ctx;
     EventNotifier notifier;
 } NvmeCQueue;