diff mbox series

hw/nvme: reenable cqe batching

Message ID 20221020113538.36526-1-its@irrelevant.dk
State New
Headers show
Series hw/nvme: reenable cqe batching | expand

Commit Message

Klaus Jensen Oct. 20, 2022, 11:35 a.m. UTC
From: Klaus Jensen <k.jensen@samsung.com>

Commit 2e53b0b45024 ("hw/nvme: Use ioeventfd to handle doorbell
updates") had the unintended effect of disabling batching of CQEs.

This patch changes the sq/cq timers to bottom halfs and instead of
calling nvme_post_cqes() immediately (causing an interrupt per cqe), we
defer the call.

                   | iops
  -----------------+------
    baseline       | 138k
    +cqe batching  | 233k

Fixes: 2e53b0b45024 ("hw/nvme: Use ioeventfd to handle doorbell updates")
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 hw/nvme/ctrl.c | 26 +++++++++++---------------
 hw/nvme/nvme.h |  4 ++--
 2 files changed, 13 insertions(+), 17 deletions(-)

Comments

Keith Busch Oct. 20, 2022, 4:37 p.m. UTC | #1
On Thu, Oct 20, 2022 at 01:35:38PM +0200, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Commit 2e53b0b45024 ("hw/nvme: Use ioeventfd to handle doorbell
> updates") had the unintended effect of disabling batching of CQEs.
> 
> This patch changes the sq/cq timers to bottom halfs and instead of
> calling nvme_post_cqes() immediately (causing an interrupt per cqe), we
> defer the call.

Nice change, looks good! Timers never did seem to be the best fit for
this.

Reviewed-by: Keith Busch <kbusch@kernel.org>
Jinhao Fan Oct. 21, 2022, 2:37 a.m. UTC | #2
at 7:35 PM, Klaus Jensen <its@irrelevant.dk> wrote:

> Commit 2e53b0b45024 ("hw/nvme: Use ioeventfd to handle doorbell
> updates") had the unintended effect of disabling batching of CQEs.
> 
> This patch changes the sq/cq timers to bottom halfs and instead of
> calling nvme_post_cqes() immediately (causing an interrupt per cqe), we
> defer the call.

This change is definitely desired.

> 
>                   | iops
>  -----------------+------
>    baseline       | 138k
>    +cqe batching  | 233k

What is the queue depth config for this test?
Klaus Jensen Oct. 21, 2022, 5:37 a.m. UTC | #3
On Okt 21 10:37, Jinhao Fan wrote:
> at 7:35 PM, Klaus Jensen <its@irrelevant.dk> wrote:
> 
> > Commit 2e53b0b45024 ("hw/nvme: Use ioeventfd to handle doorbell
> > updates") had the unintended effect of disabling batching of CQEs.
> > 
> > This patch changes the sq/cq timers to bottom halfs and instead of
> > calling nvme_post_cqes() immediately (causing an interrupt per cqe), we
> > defer the call.
> 
> This change is definitely desired.
> 
> > 
> >                   | iops
> >  -----------------+------
> >    baseline       | 138k
> >    +cqe batching  | 233k
> 
> What is the queue depth config for this test?
> 

That's 64. My box isnt nearly as beefy as yours ;)
Jinhao Fan Oct. 21, 2022, 6:24 a.m. UTC | #4
at 1:37 PM, Klaus Jensen <its@irrelevant.dk> wrote:

> On Okt 21 10:37, Jinhao Fan wrote:
>> at 7:35 PM, Klaus Jensen <its@irrelevant.dk> wrote:
>> 
>>> Commit 2e53b0b45024 ("hw/nvme: Use ioeventfd to handle doorbell
>>> updates") had the unintended effect of disabling batching of CQEs.
>>> 
>>> This patch changes the sq/cq timers to bottom halfs and instead of
>>> calling nvme_post_cqes() immediately (causing an interrupt per cqe), we
>>> defer the call.
>> 
>> This change is definitely desired.
>> 
>>>                  | iops
>>> -----------------+------
>>>   baseline       | 138k
>>>   +cqe batching  | 233k
>> 
>> What is the queue depth config for this test?
> 
> That's 64. My box isnt nearly as beefy as yours ;)

:). The improvement looks great.

Reviewed-by: Jinhao Fan <fanjinhao21s@ict.ac.cn>
Klaus Jensen Nov. 1, 2022, 10:35 a.m. UTC | #5
On Oct 20 13:35, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Commit 2e53b0b45024 ("hw/nvme: Use ioeventfd to handle doorbell
> updates") had the unintended effect of disabling batching of CQEs.
> 
> This patch changes the sq/cq timers to bottom halfs and instead of
> calling nvme_post_cqes() immediately (causing an interrupt per cqe), we
> defer the call.
> 
>                    | iops
>   -----------------+------
>     baseline       | 138k
>     +cqe batching  | 233k
> 
> Fixes: 2e53b0b45024 ("hw/nvme: Use ioeventfd to handle doorbell updates")
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>

Thanks for the reviews, applied to nvme-next!
diff mbox series

Patch

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 87aeba056499..73c870a42996 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1401,13 +1401,7 @@  static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
     QTAILQ_REMOVE(&req->sq->out_req_list, req, entry);
     QTAILQ_INSERT_TAIL(&cq->req_list, req, entry);
 
-    if (req->sq->ioeventfd_enabled) {
-        /* Post CQE directly since we are in main loop thread */
-        nvme_post_cqes(cq);
-    } else {
-        /* Schedule the timer to post CQE later since we are in vcpu thread */
-        timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
-    }
+    qemu_bh_schedule(cq->bh);
 }
 
 static void nvme_process_aers(void *opaque)
@@ -4252,7 +4246,7 @@  static void nvme_cq_notifier(EventNotifier *e)
         nvme_irq_deassert(n, cq);
     }
 
-    nvme_post_cqes(cq);
+    qemu_bh_schedule(cq->bh);
 }
 
 static int nvme_init_cq_ioeventfd(NvmeCQueue *cq)
@@ -4307,7 +4301,7 @@  static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
     uint16_t offset = sq->sqid << 3;
 
     n->sq[sq->sqid] = NULL;
-    timer_free(sq->timer);
+    qemu_bh_delete(sq->bh);
     if (sq->ioeventfd_enabled) {
         memory_region_del_eventfd(&n->iomem,
                                   0x1000 + offset, 4, false, 0, &sq->notifier);
@@ -4381,7 +4375,8 @@  static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
         sq->io_req[i].sq = sq;
         QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry);
     }
-    sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq);
+
+    sq->bh = qemu_bh_new(nvme_process_sq, sq);
 
     if (n->dbbuf_enabled) {
         sq->db_addr = n->dbbuf_dbs + (sqid << 3);
@@ -4698,7 +4693,7 @@  static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
     uint16_t offset = (cq->cqid << 3) + (1 << 2);
 
     n->cq[cq->cqid] = NULL;
-    timer_free(cq->timer);
+    qemu_bh_delete(cq->bh);
     if (cq->ioeventfd_enabled) {
         memory_region_del_eventfd(&n->iomem,
                                   0x1000 + offset, 4, false, 0, &cq->notifier);
@@ -4771,7 +4766,7 @@  static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
         }
     }
     n->cq[cqid] = cq;
-    cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
+    cq->bh = qemu_bh_new(nvme_post_cqes, cq);
 }
 
 static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
@@ -6913,9 +6908,9 @@  static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
         if (start_sqs) {
             NvmeSQueue *sq;
             QTAILQ_FOREACH(sq, &cq->sq_list, entry) {
-                timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
+                qemu_bh_schedule(sq->bh);
             }
-            timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
+            qemu_bh_schedule(cq->bh);
         }
 
         if (cq->tail == cq->head) {
@@ -6984,7 +6979,8 @@  static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
             pci_dma_write(&n->parent_obj, sq->db_addr, &sq->tail,
                           sizeof(sq->tail));
         }
-        timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
+
+        qemu_bh_schedule(sq->bh);
     }
 }
 
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 79f5c281c223..7adf042ec3e4 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -375,7 +375,7 @@  typedef struct NvmeSQueue {
     uint64_t    dma_addr;
     uint64_t    db_addr;
     uint64_t    ei_addr;
-    QEMUTimer   *timer;
+    QEMUBH      *bh;
     EventNotifier notifier;
     bool        ioeventfd_enabled;
     NvmeRequest *io_req;
@@ -396,7 +396,7 @@  typedef struct NvmeCQueue {
     uint64_t    dma_addr;
     uint64_t    db_addr;
     uint64_t    ei_addr;
-    QEMUTimer   *timer;
+    QEMUBH      *bh;
     EventNotifier notifier;
     bool        ioeventfd_enabled;
     QTAILQ_HEAD(, NvmeSQueue) sq_list;