diff mbox series

[v5,07/18] nvme: add max_ioqpairs device parameter

Message ID 20200505054840.186586-8-its@irrelevant.dk
State New
Headers show
Series nvme: refactoring and cleanups | expand

Commit Message

Klaus Jensen May 5, 2020, 5:48 a.m. UTC
From: Klaus Jensen <k.jensen@samsung.com>

The num_queues device paramater has a slightly confusing meaning because
it accounts for the admin queue pair which is not really optional.
Secondly, it is really a maximum value of queues allowed.

Add a new max_ioqpairs parameter that only accounts for I/O queue pairs,
but keep num_queues for compatibility.

Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
---
 hw/block/nvme.c | 51 ++++++++++++++++++++++++++++++-------------------
 hw/block/nvme.h |  3 ++-
 2 files changed, 33 insertions(+), 21 deletions(-)

Comments

Klaus Jensen May 5, 2020, 8:52 p.m. UTC | #1
On May  5 07:48, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> The num_queues device paramater has a slightly confusing meaning because
> it accounts for the admin queue pair which is not really optional.
> Secondly, it is really a maximum value of queues allowed.
> 
> Add a new max_ioqpairs parameter that only accounts for I/O queue pairs,
> but keep num_queues for compatibility.
> 
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
> Reviewed-by: Keith Busch <kbusch@kernel.org>
> ---
>  hw/block/nvme.c | 51 ++++++++++++++++++++++++++++++-------------------
>  hw/block/nvme.h |  3 ++-
>  2 files changed, 33 insertions(+), 21 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 623a88be93dc..3875a5f3dcbf 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -1571,7 +1581,8 @@ static Property nvme_props[] = {
>                       HostMemoryBackend *),
>      DEFINE_PROP_STRING("serial", NvmeCtrl, params.serial),
>      DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, params.cmb_size_mb, 0),
> -    DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 64),
> +    DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 0),
> +    DEFINE_PROP_UINT32("max_ioqpairs", NvmeCtrl, params.max_ioqpairs, 64),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  

I noticed that this default of 64 makes the default configuration
unsafe by allowing the cq->cqid < 64 assert in nvme_irq_{,de}assert() to
trigger if the pin-based interrupt logic is used (under SPDK for
instance). The assert protects against undefined behavior caused by
shifting by more than 63 into the 64 bit irq_status variable.

As far as I can tell, the assert, the shift and the size of the
irq_status variable is bogus, so I posted a patch for this in
"hw/block/nvme: fixes for interrupt behavior". Preferably that should go
in before this series.
diff mbox series

Patch

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 623a88be93dc..3875a5f3dcbf 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -20,7 +20,7 @@ 
  *      -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \
  *              cmb_size_mb=<cmb_size_mb[optional]>, \
  *              [pmrdev=<mem_backend_file_id>,] \
- *              num_queues=<N[optional]>
+ *              max_ioqpairs=<N[optional]>
  *
  * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
  * offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
@@ -36,6 +36,7 @@ 
 
 #include "qemu/osdep.h"
 #include "qemu/units.h"
+#include "qemu/error-report.h"
 #include "hw/block/block.h"
 #include "hw/pci/msix.h"
 #include "hw/pci/pci.h"
@@ -86,12 +87,12 @@  static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
 
 static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
 {
-    return sqid < n->params.num_queues && n->sq[sqid] != NULL ? 0 : -1;
+    return sqid < n->params.max_ioqpairs + 1 && n->sq[sqid] != NULL ? 0 : -1;
 }
 
 static int nvme_check_cqid(NvmeCtrl *n, uint16_t cqid)
 {
-    return cqid < n->params.num_queues && n->cq[cqid] != NULL ? 0 : -1;
+    return cqid < n->params.max_ioqpairs + 1 && n->cq[cqid] != NULL ? 0 : -1;
 }
 
 static void nvme_inc_cq_tail(NvmeCQueue *cq)
@@ -653,7 +654,7 @@  static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
         trace_pci_nvme_err_invalid_create_cq_addr(prp1);
         return NVME_INVALID_FIELD | NVME_DNR;
     }
-    if (unlikely(vector > n->params.num_queues)) {
+    if (unlikely(vector > n->params.max_ioqpairs + 1)) {
         trace_pci_nvme_err_invalid_create_cq_vector(vector);
         return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
     }
@@ -805,8 +806,8 @@  static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
         trace_pci_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
         break;
     case NVME_NUMBER_OF_QUEUES:
-        result = cpu_to_le32((n->params.num_queues - 2) |
-                             ((n->params.num_queues - 2) << 16));
+        result = cpu_to_le32((n->params.max_ioqpairs - 1) |
+                             ((n->params.max_ioqpairs - 1) << 16));
         trace_pci_nvme_getfeat_numq(result);
         break;
     case NVME_TIMESTAMP:
@@ -850,10 +851,10 @@  static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
     case NVME_NUMBER_OF_QUEUES:
         trace_pci_nvme_setfeat_numq((dw11 & 0xFFFF) + 1,
                                     ((dw11 >> 16) & 0xFFFF) + 1,
-                                    n->params.num_queues - 1,
-                                    n->params.num_queues - 1);
-        req->cqe.result = cpu_to_le32((n->params.num_queues - 2) |
-                                      ((n->params.num_queues - 2) << 16));
+                                    n->params.max_ioqpairs,
+                                    n->params.max_ioqpairs);
+        req->cqe.result = cpu_to_le32((n->params.max_ioqpairs - 1) |
+                                      ((n->params.max_ioqpairs - 1) << 16));
         break;
     case NVME_TIMESTAMP:
         return nvme_set_feature_timestamp(n, cmd);
@@ -924,12 +925,12 @@  static void nvme_clear_ctrl(NvmeCtrl *n)
 
     blk_drain(n->conf.blk);
 
-    for (i = 0; i < n->params.num_queues; i++) {
+    for (i = 0; i < n->params.max_ioqpairs + 1; i++) {
         if (n->sq[i] != NULL) {
             nvme_free_sq(n->sq[i], n);
         }
     }
-    for (i = 0; i < n->params.num_queues; i++) {
+    for (i = 0; i < n->params.max_ioqpairs + 1; i++) {
         if (n->cq[i] != NULL) {
             nvme_free_cq(n->cq[i], n);
         }
@@ -1360,8 +1361,17 @@  static void nvme_realize(PCIDevice *pci_dev, Error **errp)
     int64_t bs_size;
     uint8_t *pci_conf;
 
-    if (!n->params.num_queues) {
-        error_setg(errp, "num_queues can't be zero");
+    if (n->params.num_queues) {
+        warn_report("num_queues is deprecated; please use max_ioqpairs "
+                    "instead");
+
+        n->params.max_ioqpairs = n->params.num_queues - 1;
+    }
+
+    if (n->params.max_ioqpairs < 1 ||
+        n->params.max_ioqpairs > PCI_MSIX_FLAGS_QSIZE) {
+        error_setg(errp, "max_ioqpairs must be between 1 and %d",
+                   PCI_MSIX_FLAGS_QSIZE);
         return;
     }
 
@@ -1411,21 +1421,21 @@  static void nvme_realize(PCIDevice *pci_dev, Error **errp)
 
     n->num_namespaces = 1;
 
-    /* num_queues is really number of pairs, so each has two doorbells */
+    /* add one to max_ioqpairs to account for the admin queue pair */
     n->reg_size = pow2ceil(NVME_REG_SIZE +
-                           2 * n->params.num_queues * NVME_DB_SIZE);
+                           2 * (n->params.max_ioqpairs + 1) * NVME_DB_SIZE);
     n->ns_size = bs_size / (uint64_t)n->num_namespaces;
 
     n->namespaces = g_new0(NvmeNamespace, n->num_namespaces);
-    n->sq = g_new0(NvmeSQueue *, n->params.num_queues);
-    n->cq = g_new0(NvmeCQueue *, n->params.num_queues);
+    n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1);
+    n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1);
 
     memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n,
                           "nvme", n->reg_size);
     pci_register_bar(pci_dev, 0,
         PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64,
         &n->iomem);
-    msix_init_exclusive_bar(pci_dev, n->params.num_queues, 4, NULL);
+    msix_init_exclusive_bar(pci_dev, n->params.max_ioqpairs + 1, 4, NULL);
 
     id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
     id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
@@ -1571,7 +1581,8 @@  static Property nvme_props[] = {
                      HostMemoryBackend *),
     DEFINE_PROP_STRING("serial", NvmeCtrl, params.serial),
     DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, params.cmb_size_mb, 0),
-    DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 64),
+    DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 0),
+    DEFINE_PROP_UINT32("max_ioqpairs", NvmeCtrl, params.max_ioqpairs, 64),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 9df244c93c02..c4e3edfebe0b 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -5,7 +5,8 @@ 
 
 typedef struct NvmeParams {
     char     *serial;
-    uint32_t num_queues;
+    uint32_t num_queues; /* deprecated since 5.1 */
+    uint32_t max_ioqpairs;
     uint32_t cmb_size_mb;
 } NvmeParams;