@@ -16,7 +16,30 @@
*/
/**
- * Usage: add "-drive file=<file>,if=nvme"
+ * Usage:
+ *
+ * Add "-device nvme[,options]" parameters when starting QEMU.
+ * Repeat "-device nvme" for multiple nvme controller pci devices.
+ *
+ * Options:
+ *
+ * drive=<drive_id> : (Required) drive id to use as backing storage
+ * namespaces=<1-256> : Number of namespaces. Default: 1
+ * queues=<1-2047> : Number of controller IO queues. Default:64
+ * entries=<1-16535> : Maximum number of queue entries. Default:2047
+ * aerl=<0-255> : Number of async event request to accept. Default:3
+ * acl=<0-255> : The abort command limit. Default:3
+ * mdts=<0-255> : Maximum data transfer size, see NVMe spec. Default:5
+ * cqr=<0,1> : Contiguous Queues Required. Default:1
+ * vwc=<0,1> : Volatile write cache enabled. Default:1
+ * max_cqes=<4,0xf> : Max completion queue entry size. Default:0xf
+ * max_sqes=<4,0xf> : Max submission queue entry size. Default:0xf
+ * lba_idx=<0,3> : Initial flbas lba index for namespaces. Default:0
+ * stride=<0-12> : Doorbell stride. Default:0
+ *
+ * Alternate method: to use all the default options, execute as
+ * "-drive file=<file>,if=nvme"
+ *
*/
#include "block-common.h"
@@ -26,7 +49,14 @@
#include "nvme.h"
-#define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE
+#define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE
+#define NVME_MAX_QUEUE_ENTRIES 0xffff
+#define NVME_MAX_STRIDE 12
+#define NVME_MAX_NUM_NAMESPACES 256
+#define NVME_MAX_QUEUE_ES 0xf
+#define NVME_MIN_CQUEUE_ES 0x4
+#define NVME_MIN_SQUEUE_ES 0x6
+
static int instance;
static void nvme_sq_process(void *opaque);
@@ -220,6 +250,9 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
if ((rw->slba + rw->nlb) > ns->id_ns.nsze) {
return NVME_LBA_RANGE | NVME_DNR;
}
+ if (n->id_ctrl.mdts && data_size > n->page_size * (1 << n->id_ctrl.mdts)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
ret = nvme_map_prp(&req->qsg, rw->prp1, rw->prp2, data_size, n);
if (ret == NVME_SUCCESS) {
@@ -540,16 +573,32 @@ static int nvme_start_ctrl(NvmeCtrl *n)
{
uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12;
uint32_t page_size = 1 << page_bits;
- if (n->cq[0] || n->sq[0] || !n->bar.asq || !n->bar.acq ||
- n->bar.asq & (page_size - 1) || n->bar.acq & (page_size - 1) ||
- NVME_CC_MPS(n->bar.cc) < NVME_CAP_MPSMIN(n->bar.cap) ||
- NVME_CC_MPS(n->bar.cc) > NVME_CAP_MPSMAX(n->bar.cap) ||
- NVME_CC_IOCQES(n->bar.cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes) ||
- NVME_CC_IOCQES(n->bar.cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes) ||
- NVME_CC_IOSQES(n->bar.cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.cqes) ||
- NVME_CC_IOSQES(n->bar.cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.cqes) ||
- !NVME_AQA_ASQS(n->bar.aqa) || NVME_AQA_ASQS(n->bar.aqa) > 4095 ||
- !NVME_AQA_ACQS(n->bar.aqa) || NVME_AQA_ACQS(n->bar.aqa) > 4095) {
+
+ if (n->cq[0] || n->sq[0]) {
+ return -1;
+ }
+ if (!n->bar.asq || !n->bar.acq) {
+ return -1;
+ }
+ if (n->bar.asq & (page_size - 1) || n->bar.acq & (page_size - 1)) {
+ return -1;
+ }
+ if (NVME_CC_MPS(n->bar.cc) < NVME_CAP_MPSMIN(n->bar.cap) ||
+ NVME_CC_MPS(n->bar.cc) > NVME_CAP_MPSMAX(n->bar.cap)) {
+ return -1;
+ }
+ if (NVME_CC_IOCQES(n->bar.cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes) ||
+ NVME_CC_IOCQES(n->bar.cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes)) {
+ return -1;
+ }
+ if (NVME_CC_IOSQES(n->bar.cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.cqes) ||
+ NVME_CC_IOSQES(n->bar.cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.cqes)) {
+ return -1;
+ }
+ if (NVME_AQA_ASQS(n->bar.aqa) == 0 || NVME_AQA_ASQS(n->bar.aqa) > 4095) {
+ return -1;
+ }
+ if (NVME_AQA_ACQS(n->bar.aqa) == 0 || NVME_AQA_ACQS(n->bar.aqa) > 4095) {
return -1;
}
@@ -633,16 +682,17 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
{
uint32_t qid;
- if (addr & ((1 << 2) - 1)) {
+ if (addr & ((1 << (2 + n->db_stride)) - 1)) {
return;
}
- if (((addr - 0x1000) >> 2) & 1) {
+ if (((addr - 0x1000) >> (2 + n->db_stride)) & 1) {
NvmeCQueue *cq;
uint16_t new_head = val & 0xffff;
int start_sqs;
- qid = (addr - (0x1000 + (1 << 2))) >> 3;
+ qid = (addr - (0x1000 + (1 << (2 + n->db_stride)))) >>
+ (3 + n->db_stride);
if (nvme_check_cqid(n, qid)) {
return;
}
@@ -668,7 +718,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
uint16_t new_tail = val & 0xffff;
NvmeSQueue *sq;
- qid = (addr - 0x1000) >> 3;
+ qid = (addr - 0x1000) >> (3 + n->db_stride);
if (nvme_check_sqid(n, qid)) {
return;
}
@@ -714,6 +764,31 @@ static int nvme_init(PCIDevice *pci_dev)
if (!n->conf.bs) {
return -1;
}
+ if (n->num_namespaces == 0 || n->num_namespaces > NVME_MAX_NUM_NAMESPACES) {
+ return -1;
+ }
+ if (n->num_queues < 1 || n->num_queues > NVME_MAX_QS) {
+ return -1;
+ }
+ if (n->db_stride > NVME_MAX_STRIDE) {
+ return -1;
+ }
+ if (n->max_q_ents < 1 || n->max_q_ents > NVME_MAX_QUEUE_ENTRIES) {
+ return -1;
+ }
+ if (n->max_sqes > NVME_MAX_QUEUE_ES || n->max_cqes > NVME_MAX_QUEUE_ES ||
+ n->max_sqes < NVME_MIN_SQUEUE_ES || n->max_cqes < NVME_MIN_CQUEUE_ES) {
+ return -1;
+ }
+ if (n->cqr != 1) {
+ return -1;
+ }
+ if (n->vwc > 1) {
+ return -1;
+ }
+ if (n->lba_index > 3) {
+ return -1;
+ }
bs_size = bdrv_getlength(n->conf.bs);
if (bs_size <= 0) {
@@ -725,27 +800,34 @@ static int nvme_init(PCIDevice *pci_dev)
pci_config_set_prog_interface(pci_dev->config, 0x2);
pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS);
- n->num_namespaces = 1;
- n->num_queues = 64;
- n->max_q_ents = 0x7ff;
n->reg_size = 1 << qemu_fls(0x1004 + 2 * (n->num_queues + 1) * 4);
n->ns_size = bs_size / n->num_namespaces;
-
n->instance = instance++;
n->namespaces = g_malloc0(sizeof(*n->namespaces)*n->num_namespaces);
n->sq = g_malloc0(sizeof(*n->sq)*n->num_queues);
n->cq = g_malloc0(sizeof(*n->cq)*n->num_queues);
+ memory_region_init_io(&n->iomem, &nvme_mmio_ops, n, "nvme-mmio",
+ n->reg_size);
+ pci_register_bar(&n->dev, 0,
+ PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64,
+ &n->iomem);
+ msix_init_exclusive_bar(&n->dev, n->num_queues, 4);
+
id->vid = PCI_VENDOR_ID_INTEL;
id->ssvid = 0x0111;
id->rab = 6;
id->ieee[0] = 0x00;
id->ieee[1] = 0x02;
id->ieee[2] = 0xb3;
- id->sqes = 0xf << 4 | 0x6;
- id->cqes = 0xf << 4 | 0x4;
+ id->mdts = n->mdts;
+ id->acl = n->acl;
+ id->aerl = n->aerl;
+ id->elpe = n->elpe;
+ id->sqes = n->max_sqes << 4 | 0x6;
+ id->cqes = n->max_cqes << 4 | 0x4;
id->nn = n->num_namespaces;
- id->vwc = 1;
+ id->vwc = n->vwc;
snprintf((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl");
snprintf((char *)id->fr, sizeof(id->fr), "1.0");
snprintf((char *)id->sn, sizeof(id->sn), "NVMeQx10%02x", n->instance);
@@ -754,26 +836,23 @@ static int nvme_init(PCIDevice *pci_dev)
id->psd[0].exlat = 0x4;
n->bar.cap = (uint64_t)(n->max_q_ents & CAP_MQES_MASK) << CAP_MQES_SHIFT;
- n->bar.cap |= (uint64_t)(1 & CAP_CQR_MASK) << CAP_CQR_SHIFT;
- n->bar.cap |= (uint64_t)(1 & CAP_AMS_MASK) << CAP_AMS_SHIFT;
- n->bar.cap |= (uint64_t)(0xf & CAP_TO_MASK) << CAP_TO_SHIFT;
- n->bar.cap |= (uint64_t)(1 & CAP_CSS_MASK) << CAP_CSS_SHIFT;
+ n->bar.cap |= (uint64_t)(n->cqr & CAP_CQR_MASK) << CAP_CQR_SHIFT;
+ n->bar.cap |= (uint64_t)(1 & CAP_AMS_MASK) << CAP_AMS_SHIFT;
+ n->bar.cap |= (uint64_t)(0xf & CAP_TO_MASK) << CAP_TO_SHIFT;
+ n->bar.cap |= (uint64_t)(n->db_stride & CAP_DSTRD_MASK) << CAP_DSTRD_SHIFT;
+ n->bar.cap |= (uint64_t)(0 & CAP_NSSRS_MASK) << CAP_NSSRS_SHIFT;
+ n->bar.cap |= (uint64_t)(1 & CAP_CSS_MASK) << CAP_CSS_SHIFT;
+ n->bar.cap |= (uint64_t)(0 & CAP_MPSMIN_MASK) << CAP_MPSMIN_SHIFT;
n->bar.cap |= (uint64_t)(0xf & CAP_MPSMAX_MASK) << CAP_MPSMAX_SHIFT;
n->bar.vs = 0x00010001;
n->bar.intmc = n->bar.intms = 0;
- memory_region_init_io(&n->iomem, &nvme_mmio_ops, n, "nvme-mmio",
- n->reg_size);
- pci_register_bar(&n->dev, 0,
- PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64,
- &n->iomem);
- msix_init_exclusive_bar(&n->dev, n->num_queues, 4);
-
for (i = 0; i < n->num_namespaces; i++) {
NvmeNamespace *ns = &n->namespaces[i];
NvmeIdNs *id_ns = &ns->id_ns;
id_ns->ncap = id_ns->nsze = (n->ns_size) >> 9;
id_ns->nlbaf = 0x4;
+ id_ns->flbas = n->lba_index;
for (j = 0; j <= id_ns->nlbaf; j++) {
id_ns->lbaf[j].ds = 9 + j;
@@ -804,6 +883,20 @@ static void nvme_reset(DeviceState *dev)
}
static Property nvme_props[] = {
+ DEFINE_PROP_UINT32("namespaces", NvmeCtrl, num_namespaces, 1),
+ DEFINE_PROP_UINT32("queues", NvmeCtrl, num_queues, 64),
+ DEFINE_PROP_UINT32("entries", NvmeCtrl, max_q_ents, 0x7ff),
+ DEFINE_PROP_UINT8("max_cqes", NvmeCtrl, max_cqes, 0xf),
+ DEFINE_PROP_UINT8("max_sqes", NvmeCtrl, max_sqes, 0xf),
+ DEFINE_PROP_UINT8("stride", NvmeCtrl, db_stride, 0),
+ DEFINE_PROP_UINT8("aerl", NvmeCtrl, aerl, 3),
+ DEFINE_PROP_UINT8("acl", NvmeCtrl, acl, 3),
+ DEFINE_PROP_UINT8("elpe", NvmeCtrl, elpe, 3),
+ DEFINE_PROP_UINT8("mdts", NvmeCtrl, mdts, 5),
+ DEFINE_PROP_UINT8("cqr", NvmeCtrl, cqr, 1),
+ DEFINE_PROP_UINT8("meta", NvmeCtrl, meta, 0),
+ DEFINE_PROP_UINT8("vwc", NvmeCtrl, vwc, 0),
+ DEFINE_PROP_UINT8("lba_idx", NvmeCtrl, lba_index, 0),
DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf),
DEFINE_PROP_END_OF_LIST(),
};
@@ -654,6 +654,17 @@ typedef struct NvmeCtrl {
uint32_t ns_size;
uint32_t num_queues;
uint32_t max_q_ents;
+ uint8_t db_stride;
+ uint8_t aerl;
+ uint8_t acl;
+ uint8_t elpe;
+ uint8_t mdts;
+ uint8_t cqr;
+ uint8_t max_sqes;
+ uint8_t max_cqes;
+ uint8_t meta;
+ uint8_t vwc;
+ uint8_t lba_index;
NvmeNamespace *namespaces;
NvmeSQueue **sq;
For controller capabilities and features visibile to a host driver, add command line parameters to allow them to be any value the spec allows. Useful for verifying a driver against a device with various constraints. Signed-off-by: Keith Busch <keith.busch@intel.com> --- hw/nvme.c | 161 ++++++++++++++++++++++++++++++++++++++++++++++++------------- hw/nvme.h | 11 ++++ 2 files changed, 138 insertions(+), 34 deletions(-)