Patchwork [09/11] QEMU NVMe: Implement discontiguous queues

login
register
mail settings
Submitter Keith Busch
Date Feb. 27, 2013, 12:47 a.m.
Message ID <1361926034-21824-10-git-send-email-keith.busch@intel.com>
Download mbox | patch
Permalink /patch/223454/
State New
Headers show

Comments

Keith Busch - Feb. 27, 2013, 12:47 a.m.
This seems like a silly feature, but the spec allows it, so this adds
support for queues that are not physically contiguous.

Signed-off-by: Keith Busch <keith.busch@intel.com>
---
 hw/nvme.c |  143 +++++++++++++++++++++++++++++++++++++++++++++++++-----------
 hw/nvme.h |    2 +
 2 files changed, 118 insertions(+), 27 deletions(-)
Anthony Liguori - Feb. 27, 2013, 2:16 a.m.
Keith Busch <keith.busch@intel.com> writes:

> This seems like a silly feature, but the spec allows it, so this adds
> support for queues that are not physically contiguous.
>
> Signed-off-by: Keith Busch <keith.busch@intel.com>
> ---
>  hw/nvme.c |  143 +++++++++++++++++++++++++++++++++++++++++++++++++-----------
>  hw/nvme.h |    2 +
>  2 files changed, 118 insertions(+), 27 deletions(-)
>
> diff --git a/hw/nvme.c b/hw/nvme.c
> index 3e5cfc0..2945406 100644
> --- a/hw/nvme.c
> +++ b/hw/nvme.c
> @@ -110,6 +110,42 @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq)
>      }
>  }
>  
> +static uint64_t *nvme_setup_discontig(NvmeCtrl *n, uint64_t prp_addr,
> +    uint16_t queue_depth, uint16_t entry_size)
> +{
> +    int i;
> +    uint16_t prps_per_page = n->page_size / sizeof(uint64_t);
> +    uint64_t prp[prps_per_page];
> +    uint16_t total_prps = DIV_ROUND_UP(queue_depth * entry_size, n->page_size);
> +    uint64_t *prp_list = g_malloc0(total_prps * sizeof(*prp_list));
> +
> +    for (i = 0; i < total_prps; i++) {
> +        if (i % prps_per_page == 0 && i < total_prps - 1) {
> +            if (!prp_addr || prp_addr & (n->page_size - 1)) {
> +                g_free(prp_list);
> +                return NULL;
> +            }
> +            pci_dma_read(&n->dev, prp_addr, (void *)&prp, sizeof(prp));
> +            prp_addr = prp[prps_per_page - 1];

This doesn't appear to be endian safe.

Regards,

Anthony Liguori

> +        }
> +        prp_list[i] = prp[i % prps_per_page];
> +        if (!prp_list[i] || prp_list[i] & (n->page_size - 1)) {
> +            g_free(prp_list);
> +            return NULL;
> +        }
> +    }
> +    return prp_list;
> +}
> +
> +static hwaddr nvme_discontig(uint64_t *dma_addr, uint16_t page_size,
> +    uint16_t queue_idx, uint16_t entry_size)
> +{
> +    uint16_t entries_per_page = page_size / entry_size;
> +    uint16_t prp_index = queue_idx / entries_per_page;
> +    uint16_t index_in_prp = queue_idx % entries_per_page;
> +    return dma_addr[prp_index] + index_in_prp * entry_size;
> +}
> +
>  static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
>      uint32_t len, NvmeCtrl *n)
>  {
> @@ -212,7 +248,12 @@ static void nvme_post_cqes(void *opaque)
>          req->cqe.status |= cq->phase;
>          req->cqe.sq_id = sq->id;
>          req->cqe.sq_head = sq->head;
> -        addr = cq->dma_addr + cq->tail * n->cqe_size;
> +        if (cq->phys_contig) {
> +            addr = cq->dma_addr + cq->tail * n->cqe_size;
> +        } else {
> +            addr = nvme_discontig(cq->prp_list, cq->tail, n->page_size,
> +                n->cqe_size);
> +        }
>          nvme_inc_cq_tail(cq);
>          pci_dma_write(&n->dev, addr, (void *)&req->cqe, sizeof(req->cqe));
>          QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
> @@ -528,7 +569,9 @@ static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
>      qemu_del_timer(sq->timer);
>      qemu_free_timer(sq->timer);
>      g_free(sq->io_req);
> -    sq->io_req = NULL;
> +    if (sq->prp_list) {
> +        g_free(sq->prp_list);
> +    }
>      if (sq->id)
>          g_free(sq);
>  }
> @@ -566,20 +609,32 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
>      return NVME_SUCCESS;
>  }
>  
> -static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
> -    uint16_t sqid, uint16_t cqid, uint16_t size, enum QueueFlags prio)
> +static uint16_t nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
> +    uint16_t sqid, uint16_t cqid, uint16_t size, enum QueueFlags prio,
> +    int contig)
>  {
>      int i;
>      NvmeCQueue *cq;
>  
>      sq->ctrl = n;
> -    sq->dma_addr = dma_addr;
>      sq->id = sqid;
>      sq->size = size;
>      sq->cqid = cqid;
>      sq->head = sq->tail = 0;
> +    sq->phys_contig = contig;
>      sq->io_req = g_malloc(sq->size * sizeof(*sq->io_req));
>  
> +    if (sq->phys_contig) {
> +        sq->dma_addr = dma_addr;
> +    }
> +    else {
> +        sq->prp_list = nvme_setup_discontig(n, dma_addr, size,
> +            n->sqe_size);
> +        if (!sq->prp_list) {
> +            return NVME_INVALID_FIELD | NVME_DNR;
> +        }
> +    }
> +
>      QTAILQ_INIT(&sq->req_list);
>      QTAILQ_INIT(&sq->out_req_list);
>      for (i = 0; i < sq->size; i++) {
> @@ -607,6 +662,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
>      cq = n->cq[cqid];
>      QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry);
>      n->sq[sqid] = sq;
> +
> +    return NVME_SUCCESS;
>  }
>  
>  static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
> @@ -625,12 +682,15 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
>      if (!c->prp1 || c->prp1 & (n->page_size - 1)) {
>          return NVME_INVALID_FIELD | NVME_DNR;
>      }
> -    if (!(NVME_SQ_FLAGS_PC(c->sq_flags))) {
> +    if (!(NVME_SQ_FLAGS_PC(c->sq_flags)) && NVME_CAP_CQR(n->bar.cap)) {
>          return NVME_INVALID_FIELD | NVME_DNR;
>      }
>      sq = g_malloc0(sizeof(*sq));
> -    nvme_init_sq(sq, n, c->prp1, c->sqid, c->cqid, c->qsize + 1,
> -        NVME_SQ_FLAGS_QPRIO(c->sq_flags));
> +    if (nvme_init_sq(sq, n, c->prp1, c->sqid, c->cqid, c->qsize + 1,
> +            NVME_SQ_FLAGS_QPRIO(c->sq_flags), NVME_SQ_FLAGS_PC(c->sq_flags))) {
> +        g_free(sq);
> +        return NVME_INVALID_FIELD | NVME_DNR;
> +    }
>      return NVME_SUCCESS;
>  }
>  
> @@ -640,8 +700,12 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
>      qemu_del_timer(cq->timer);
>      qemu_free_timer(cq->timer);
>      msix_vector_unuse(&n->dev, cq->vector);
> -    if (cq->id)
> +    if (cq->prp_list) {
> +        g_free(cq->prp_list);
> +    }
> +    if (cq->id) {
>          g_free(cq);
> +    }
>  
>  }
>  
> @@ -662,22 +726,32 @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd)
>      return NVME_SUCCESS;
>  }
>  
> -static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
> -    uint16_t cqid, uint16_t vector, uint16_t size)
> +static uint16_t nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
> +    uint16_t cqid, uint16_t vector, uint16_t size, int contig)
>  {
>      cq->ctrl = n;
>      cq->id = cqid;
>      cq->size = size; 
> -    cq->dma_addr = dma_addr;
>      cq->phase = 1;
>      cq->irq_enabled = 1;
>      cq->vector = vector;
>      cq->head = cq->tail = 0;
> +    cq->phys_contig = contig;
> +    if (cq->phys_contig) {
> +        cq->dma_addr = dma_addr;
> +    } else {
> +        cq->prp_list = nvme_setup_discontig(n, dma_addr, size,
> +            n->cqe_size);
> +        if (!cq->prp_list) {
> +            return NVME_INVALID_FIELD | NVME_DNR;
> +        }
> +    }
>      QTAILQ_INIT(&cq->req_list);
>      QTAILQ_INIT(&cq->sq_list);
>      msix_vector_use(&n->dev, cq->vector);
>      n->cq[cqid] = cq;
>      cq->timer = qemu_new_timer_ns(vm_clock, nvme_post_cqes, cq);
> +    return NVME_SUCCESS;
>  }
>  
>  static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
> @@ -696,12 +770,16 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
>      if (c->irq_vector > n->num_queues) {
>          return NVME_INVALID_IRQ_VECTOR;
>      }
> -    if (!(NVME_CQ_FLAGS_PC(c->cq_flags))) {
> +    if (!(NVME_CQ_FLAGS_PC(c->cq_flags)) && NVME_CAP_CQR(n->bar.cap)) {
>          return NVME_INVALID_FIELD | NVME_DNR;
>      }
>  
>      cq = g_malloc0(sizeof(*cq));
> -    nvme_init_cq(cq, n, c->prp1, c->cqid, c->irq_vector, c->qsize + 1);
> +    if (nvme_init_cq(cq, n, c->prp1, c->cqid, c->irq_vector, c->qsize + 1,
> +            NVME_CQ_FLAGS_PC(c->cq_flags))) {
> +        g_free(cq);
> +        return NVME_INVALID_FIELD | NVME_DNR;
> +    }
>      return NVME_SUCCESS;
>  }
>  
> @@ -1001,8 +1079,14 @@ static uint16_t nvme_abort_req(NvmeCtrl *n, NvmeCmd *cmd, uint32_t *result)
>      sq = n->sq[sqid];
>      while ((sq->head + index) % sq->size != sq->tail) {
>          NvmeCmd abort_cmd;
> -        hwaddr addr = sq->dma_addr + ((sq->head + index) % sq->size) *
> -            n->sqe_size;
> +        hwaddr addr;
> +        if (sq->phys_contig) {
> +            addr = sq->dma_addr + ((sq->head + index) % sq->size) *
> +                n->sqe_size;
> +        } else {
> +            addr = nvme_discontig(sq->prp_list, (sq->head + index) % sq->size,
> +                n->page_size, n->sqe_size);
> +        }
>          pci_dma_read(&n->dev, addr, (void *)&abort_cmd, sizeof(abort_cmd));
>          if (abort_cmd.cid == cid) {
>              NvmeRequest *req = QTAILQ_FIRST(&sq->req_list);
> @@ -1150,7 +1234,12 @@ static void nvme_sq_process(void *opaque)
>  
>      while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list)) &&
>              processed++ < sq->arb_burst) {
> -        addr = sq->dma_addr + sq->head * n->sqe_size;
> +        if (sq->phys_contig) {
> +            addr = sq->dma_addr + sq->head * n->sqe_size;
> +        } else {
> +            addr = nvme_discontig(sq->prp_list, sq->head, n->page_size,
> +                n->sqe_size);
> +        }
>          pci_dma_read(&n->dev, addr, (void *)&cmd, sizeof(cmd));
>          nvme_inc_sq_head(sq);
>  
> @@ -1207,7 +1296,6 @@ static int nvme_start_ctrl(NvmeCtrl *n)
>  {
>      uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12;
>      uint32_t page_size = 1 << page_bits;
> -
>      if (n->cq[0] || n->sq[0]) {
>          return -1;
>      }
> @@ -1236,20 +1324,21 @@ static int nvme_start_ctrl(NvmeCtrl *n)
>          return -1;
>      }
>  
> -    n->page_bits = NVME_CC_MPS(n->bar.cc) + 12;
> +    n->page_bits = page_bits;
>      n->page_size = 1 << n->page_bits;
>      n->max_prp_ents = n->page_size / sizeof(uint64_t);
>      n->cqe_size = 1 << NVME_CC_IOCQES(n->bar.cc);
>      n->sqe_size = 1 << NVME_CC_IOSQES(n->bar.cc);
> -
> -    nvme_init_cq(&n->admin_cq, n, n->bar.acq, 0, 0,
> -        NVME_AQA_ACQS(n->bar.aqa) + 1);
> -    nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0,
> -        NVME_AQA_ASQS(n->bar.aqa) + 1, NVME_Q_PRIO_HIGH);
> -
> +    if (nvme_init_cq(&n->admin_cq, n, n->bar.acq, 0, 0,
> +            NVME_AQA_ACQS(n->bar.aqa) + 1, 1)) {
> +        return -1;
> +    }
> +    if (nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0,
> +            NVME_AQA_ASQS(n->bar.aqa) + 1, NVME_Q_PRIO_HIGH, 1)) {
> +        return -1;
> +    }
>      n->aer_timer = qemu_new_timer_ns(vm_clock, nvme_aer_process_cb, n);
>      QSIMPLEQ_INIT(&n->aer_queue);
> -
>      return 0;
>  }
>  
> @@ -1426,7 +1515,7 @@ static int nvme_init(PCIDevice *pci_dev)
>          n->max_sqes < NVME_MIN_SQUEUE_ES || n->max_cqes < NVME_MIN_CQUEUE_ES) {
>          return -1;
>      }
> -    if (n->cqr != 1) {
> +    if (n->cqr > 1) {
>          return -1;
>      }
>      if (n->vwc > 1) {
> diff --git a/hw/nvme.h b/hw/nvme.h
> index b7d6615..926f843 100644
> --- a/hw/nvme.h
> +++ b/hw/nvme.h
> @@ -612,6 +612,7 @@ typedef struct NvmeSQueue {
>      uint32_t    size;
>      uint64_t    dma_addr;
>      uint64_t    completed;
> +    uint64_t    *prp_list;
>      QEMUTimer   *timer;
>      QTAILQ_ENTRY(NvmeSQueue) entry;
>      NvmeRequest *io_req;
> @@ -630,6 +631,7 @@ typedef struct NvmeCQueue {
>      uint32_t    vector;
>      uint32_t    size;
>      uint64_t    dma_addr;
> +    uint64_t    *prp_list;
>      QEMUTimer   *timer;
>      QTAILQ_HEAD(sq_list, NvmeSQueue) sq_list;
>      QTAILQ_HEAD(cq_req_list, NvmeRequest) req_list;
> -- 
> 1.7.0.4

Patch

diff --git a/hw/nvme.c b/hw/nvme.c
index 3e5cfc0..2945406 100644
--- a/hw/nvme.c
+++ b/hw/nvme.c
@@ -110,6 +110,42 @@  static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq)
     }
 }
 
+static uint64_t *nvme_setup_discontig(NvmeCtrl *n, uint64_t prp_addr,
+    uint16_t queue_depth, uint16_t entry_size)
+{
+    int i;
+    uint16_t prps_per_page = n->page_size / sizeof(uint64_t);
+    uint64_t prp[prps_per_page];
+    uint16_t total_prps = DIV_ROUND_UP(queue_depth * entry_size, n->page_size);
+    uint64_t *prp_list = g_malloc0(total_prps * sizeof(*prp_list));
+
+    for (i = 0; i < total_prps; i++) {
+        if (i % prps_per_page == 0 && i < total_prps - 1) {
+            if (!prp_addr || prp_addr & (n->page_size - 1)) {
+                g_free(prp_list);
+                return NULL;
+            }
+            pci_dma_read(&n->dev, prp_addr, (void *)&prp, sizeof(prp));
+            prp_addr = prp[prps_per_page - 1];
+        }
+        prp_list[i] = prp[i % prps_per_page];
+        if (!prp_list[i] || prp_list[i] & (n->page_size - 1)) {
+            g_free(prp_list);
+            return NULL;
+        }
+    }
+    return prp_list;
+}
+
+static hwaddr nvme_discontig(uint64_t *dma_addr, uint16_t page_size,
+    uint16_t queue_idx, uint16_t entry_size)
+{
+    uint16_t entries_per_page = page_size / entry_size;
+    uint16_t prp_index = queue_idx / entries_per_page;
+    uint16_t index_in_prp = queue_idx % entries_per_page;
+    return dma_addr[prp_index] + index_in_prp * entry_size;
+}
+
 static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
     uint32_t len, NvmeCtrl *n)
 {
@@ -212,7 +248,12 @@  static void nvme_post_cqes(void *opaque)
         req->cqe.status |= cq->phase;
         req->cqe.sq_id = sq->id;
         req->cqe.sq_head = sq->head;
-        addr = cq->dma_addr + cq->tail * n->cqe_size;
+        if (cq->phys_contig) {
+            addr = cq->dma_addr + cq->tail * n->cqe_size;
+        } else {
+            addr = nvme_discontig(cq->prp_list, cq->tail, n->page_size,
+                n->cqe_size);
+        }
         nvme_inc_cq_tail(cq);
         pci_dma_write(&n->dev, addr, (void *)&req->cqe, sizeof(req->cqe));
         QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
@@ -528,7 +569,9 @@  static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
     qemu_del_timer(sq->timer);
     qemu_free_timer(sq->timer);
     g_free(sq->io_req);
-    sq->io_req = NULL;
+    if (sq->prp_list) {
+        g_free(sq->prp_list);
+    }
     if (sq->id)
         g_free(sq);
 }
@@ -566,20 +609,32 @@  static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
     return NVME_SUCCESS;
 }
 
-static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
-    uint16_t sqid, uint16_t cqid, uint16_t size, enum QueueFlags prio)
+static uint16_t nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
+    uint16_t sqid, uint16_t cqid, uint16_t size, enum QueueFlags prio,
+    int contig)
 {
     int i;
     NvmeCQueue *cq;
 
     sq->ctrl = n;
-    sq->dma_addr = dma_addr;
     sq->id = sqid;
     sq->size = size;
     sq->cqid = cqid;
     sq->head = sq->tail = 0;
+    sq->phys_contig = contig;
     sq->io_req = g_malloc(sq->size * sizeof(*sq->io_req));
 
+    if (sq->phys_contig) {
+        sq->dma_addr = dma_addr;
+    }
+    else {
+        sq->prp_list = nvme_setup_discontig(n, dma_addr, size,
+            n->sqe_size);
+        if (!sq->prp_list) {
+            return NVME_INVALID_FIELD | NVME_DNR;
+        }
+    }
+
     QTAILQ_INIT(&sq->req_list);
     QTAILQ_INIT(&sq->out_req_list);
     for (i = 0; i < sq->size; i++) {
@@ -607,6 +662,8 @@  static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
     cq = n->cq[cqid];
     QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry);
     n->sq[sqid] = sq;
+
+    return NVME_SUCCESS;
 }
 
 static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
@@ -625,12 +682,15 @@  static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
     if (!c->prp1 || c->prp1 & (n->page_size - 1)) {
         return NVME_INVALID_FIELD | NVME_DNR;
     }
-    if (!(NVME_SQ_FLAGS_PC(c->sq_flags))) {
+    if (!(NVME_SQ_FLAGS_PC(c->sq_flags)) && NVME_CAP_CQR(n->bar.cap)) {
         return NVME_INVALID_FIELD | NVME_DNR;
     }
     sq = g_malloc0(sizeof(*sq));
-    nvme_init_sq(sq, n, c->prp1, c->sqid, c->cqid, c->qsize + 1,
-        NVME_SQ_FLAGS_QPRIO(c->sq_flags));
+    if (nvme_init_sq(sq, n, c->prp1, c->sqid, c->cqid, c->qsize + 1,
+            NVME_SQ_FLAGS_QPRIO(c->sq_flags), NVME_SQ_FLAGS_PC(c->sq_flags))) {
+        g_free(sq);
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
     return NVME_SUCCESS;
 }
 
@@ -640,8 +700,12 @@  static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
     qemu_del_timer(cq->timer);
     qemu_free_timer(cq->timer);
     msix_vector_unuse(&n->dev, cq->vector);
-    if (cq->id)
+    if (cq->prp_list) {
+        g_free(cq->prp_list);
+    }
+    if (cq->id) {
         g_free(cq);
+    }
 
 }
 
@@ -662,22 +726,32 @@  static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd)
     return NVME_SUCCESS;
 }
 
-static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
-    uint16_t cqid, uint16_t vector, uint16_t size)
+static uint16_t nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
+    uint16_t cqid, uint16_t vector, uint16_t size, int contig)
 {
     cq->ctrl = n;
     cq->id = cqid;
     cq->size = size; 
-    cq->dma_addr = dma_addr;
     cq->phase = 1;
     cq->irq_enabled = 1;
     cq->vector = vector;
     cq->head = cq->tail = 0;
+    cq->phys_contig = contig;
+    if (cq->phys_contig) {
+        cq->dma_addr = dma_addr;
+    } else {
+        cq->prp_list = nvme_setup_discontig(n, dma_addr, size,
+            n->cqe_size);
+        if (!cq->prp_list) {
+            return NVME_INVALID_FIELD | NVME_DNR;
+        }
+    }
     QTAILQ_INIT(&cq->req_list);
     QTAILQ_INIT(&cq->sq_list);
     msix_vector_use(&n->dev, cq->vector);
     n->cq[cqid] = cq;
     cq->timer = qemu_new_timer_ns(vm_clock, nvme_post_cqes, cq);
+    return NVME_SUCCESS;
 }
 
 static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
@@ -696,12 +770,16 @@  static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
     if (c->irq_vector > n->num_queues) {
         return NVME_INVALID_IRQ_VECTOR;
     }
-    if (!(NVME_CQ_FLAGS_PC(c->cq_flags))) {
+    if (!(NVME_CQ_FLAGS_PC(c->cq_flags)) && NVME_CAP_CQR(n->bar.cap)) {
         return NVME_INVALID_FIELD | NVME_DNR;
     }
 
     cq = g_malloc0(sizeof(*cq));
-    nvme_init_cq(cq, n, c->prp1, c->cqid, c->irq_vector, c->qsize + 1);
+    if (nvme_init_cq(cq, n, c->prp1, c->cqid, c->irq_vector, c->qsize + 1,
+            NVME_CQ_FLAGS_PC(c->cq_flags))) {
+        g_free(cq);
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
     return NVME_SUCCESS;
 }
 
@@ -1001,8 +1079,14 @@  static uint16_t nvme_abort_req(NvmeCtrl *n, NvmeCmd *cmd, uint32_t *result)
     sq = n->sq[sqid];
     while ((sq->head + index) % sq->size != sq->tail) {
         NvmeCmd abort_cmd;
-        hwaddr addr = sq->dma_addr + ((sq->head + index) % sq->size) *
-            n->sqe_size;
+        hwaddr addr;
+        if (sq->phys_contig) {
+            addr = sq->dma_addr + ((sq->head + index) % sq->size) *
+                n->sqe_size;
+        } else {
+            addr = nvme_discontig(sq->prp_list, (sq->head + index) % sq->size,
+                n->page_size, n->sqe_size);
+        }
         pci_dma_read(&n->dev, addr, (void *)&abort_cmd, sizeof(abort_cmd));
         if (abort_cmd.cid == cid) {
             NvmeRequest *req = QTAILQ_FIRST(&sq->req_list);
@@ -1150,7 +1234,12 @@  static void nvme_sq_process(void *opaque)
 
     while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list)) &&
             processed++ < sq->arb_burst) {
-        addr = sq->dma_addr + sq->head * n->sqe_size;
+        if (sq->phys_contig) {
+            addr = sq->dma_addr + sq->head * n->sqe_size;
+        } else {
+            addr = nvme_discontig(sq->prp_list, sq->head, n->page_size,
+                n->sqe_size);
+        }
         pci_dma_read(&n->dev, addr, (void *)&cmd, sizeof(cmd));
         nvme_inc_sq_head(sq);
 
@@ -1207,7 +1296,6 @@  static int nvme_start_ctrl(NvmeCtrl *n)
 {
     uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12;
     uint32_t page_size = 1 << page_bits;
-
     if (n->cq[0] || n->sq[0]) {
         return -1;
     }
@@ -1236,20 +1324,21 @@  static int nvme_start_ctrl(NvmeCtrl *n)
         return -1;
     }
 
-    n->page_bits = NVME_CC_MPS(n->bar.cc) + 12;
+    n->page_bits = page_bits;
     n->page_size = 1 << n->page_bits;
     n->max_prp_ents = n->page_size / sizeof(uint64_t);
     n->cqe_size = 1 << NVME_CC_IOCQES(n->bar.cc);
     n->sqe_size = 1 << NVME_CC_IOSQES(n->bar.cc);
-
-    nvme_init_cq(&n->admin_cq, n, n->bar.acq, 0, 0,
-        NVME_AQA_ACQS(n->bar.aqa) + 1);
-    nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0,
-        NVME_AQA_ASQS(n->bar.aqa) + 1, NVME_Q_PRIO_HIGH);
-
+    if (nvme_init_cq(&n->admin_cq, n, n->bar.acq, 0, 0,
+            NVME_AQA_ACQS(n->bar.aqa) + 1, 1)) {
+        return -1;
+    }
+    if (nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0,
+            NVME_AQA_ASQS(n->bar.aqa) + 1, NVME_Q_PRIO_HIGH, 1)) {
+        return -1;
+    }
     n->aer_timer = qemu_new_timer_ns(vm_clock, nvme_aer_process_cb, n);
     QSIMPLEQ_INIT(&n->aer_queue);
-
     return 0;
 }
 
@@ -1426,7 +1515,7 @@  static int nvme_init(PCIDevice *pci_dev)
         n->max_sqes < NVME_MIN_SQUEUE_ES || n->max_cqes < NVME_MIN_CQUEUE_ES) {
         return -1;
     }
-    if (n->cqr != 1) {
+    if (n->cqr > 1) {
         return -1;
     }
     if (n->vwc > 1) {
diff --git a/hw/nvme.h b/hw/nvme.h
index b7d6615..926f843 100644
--- a/hw/nvme.h
+++ b/hw/nvme.h
@@ -612,6 +612,7 @@  typedef struct NvmeSQueue {
     uint32_t    size;
     uint64_t    dma_addr;
     uint64_t    completed;
+    uint64_t    *prp_list;
     QEMUTimer   *timer;
     QTAILQ_ENTRY(NvmeSQueue) entry;
     NvmeRequest *io_req;
@@ -630,6 +631,7 @@  typedef struct NvmeCQueue {
     uint32_t    vector;
     uint32_t    size;
     uint64_t    dma_addr;
+    uint64_t    *prp_list;
     QEMUTimer   *timer;
     QTAILQ_HEAD(sq_list, NvmeSQueue) sq_list;
     QTAILQ_HEAD(cq_req_list, NvmeRequest) req_list;