Patchwork [03/11] QEMU NVMe: Implement NVMe features

login
register
mail settings
Submitter Keith Busch
Date Feb. 27, 2013, 12:47 a.m.
Message ID <1361926034-21824-4-git-send-email-keith.busch@intel.com>
Download mbox | patch
Permalink /patch/223440/
State New
Headers show

Comments

Keith Busch - Feb. 27, 2013, 12:47 a.m.
This allows a driver to set and retrieve the various nvme features defined
in the spec. Where applicable, setting the features changes the device
behavior to reflect the desired feature.

Signed-off-by: Keith Busch <keith.busch@intel.com>
---
 hw/nvme.c |  148 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 hw/nvme.h |    1 +
 2 files changed, 137 insertions(+), 12 deletions(-)

Patch

diff --git a/hw/nvme.c b/hw/nvme.c
index 178a495..f1e0792 100644
--- a/hw/nvme.c
+++ b/hw/nvme.c
@@ -192,6 +192,10 @@  static void nvme_post_cqes(void *opaque)
     NvmeCtrl *n = cq->ctrl;
     NvmeRequest *req, *next;
     NvmeSQueue *sq;
+    int processed = 0;
+    int coalesce = (n->features.int_vector_config[cq->vector] >> 16) & 1;
+    int thresh = NVME_INTC_THR(n->features.int_coalescing) + 1;
+
     QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
         hwaddr addr;
         if (nvme_cq_full(cq)) {
@@ -207,8 +211,14 @@  static void nvme_post_cqes(void *opaque)
         nvme_inc_cq_tail(cq);
         pci_dma_write(&n->dev, addr, (void *)&req->cqe, sizeof(req->cqe));
         QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
+        if (coalesce && ++processed % thresh == 0) {
+            nvme_isr_notify(n, cq);
+            processed = 0;
+        }
+    }
+    if (processed) {
+        nvme_isr_notify(n, cq);
     }
-    nvme_isr_notify(n, cq);
 }
 
 static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
@@ -332,7 +342,7 @@  static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
 }
 
 static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
-    uint16_t sqid, uint16_t cqid, uint16_t size)
+    uint16_t sqid, uint16_t cqid, uint16_t size, enum QueueFlags prio)
 {
     int i;
     NvmeCQueue *cq;
@@ -352,6 +362,22 @@  static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
         QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry);
     }
 
+    switch (prio) {
+    case NVME_Q_PRIO_URGENT:
+        sq->arb_burst = (1 << NVME_ARB_AB(n->features.arbitration));
+        break;
+    case NVME_Q_PRIO_HIGH:
+        sq->arb_burst = NVME_ARB_HPW(n->features.arbitration) + 1;
+        break;
+    case NVME_Q_PRIO_NORMAL:
+        sq->arb_burst = NVME_ARB_MPW(n->features.arbitration) + 1;
+        break;
+    case NVME_Q_PRIO_LOW:
+    default:
+        sq->arb_burst = NVME_ARB_LPW(n->features.arbitration) + 1;
+        break;
+    }
+
     sq->timer = qemu_new_timer_ns(vm_clock, nvme_sq_process, sq);
     cq = n->cq[cqid];
     QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry);
@@ -378,7 +404,8 @@  static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
         return NVME_INVALID_FIELD | NVME_DNR;
     }
     sq = g_malloc0(sizeof(*sq));
-    nvme_init_sq(sq, n, c->prp1, c->sqid, c->cqid, c->qsize + 1);
+    nvme_init_sq(sq, n, c->prp1, c->sqid, c->cqid, c->qsize + 1,
+        NVME_SQ_FLAGS_QPRIO(c->sq_flags));
     return NVME_SUCCESS;
 }
 
@@ -432,7 +459,6 @@  static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
 {
     NvmeCreateCq *c = (NvmeCreateCq *)cmd;
     NvmeCQueue *cq;
-
     if (!c->cqid || (c->cqid && !nvme_check_cqid(n, c->cqid))) {
         return NVME_INVALID_CQID | NVME_DNR;
     }
@@ -475,14 +501,47 @@  static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
     uint32_t dw11 = cmd->cdw11;
     switch (cmd->cdw10) {
+    case NVME_ARBITRATION:
+        req->cqe.result = n->features.arbitration;
+        break;
+    case NVME_POWER_MANAGEMENT:
+        req->cqe.result = n->features.power_mgmt;
+        break;
     case NVME_LBA_RANGE_TYPE:
         return nvme_dma_prp(cmd->prp1, cmd->prp2,
-                    MIN(sizeof(n->namespaces[cmd->nsid].lba_range),
-                        (dw11 & 0x3f) * sizeof(NvmeRangeType)),
-                    n, (uint8_t *)n->namespaces[cmd->nsid].lba_range,
-                    DMA_DIRECTION_TO_DEVICE);
+                MIN(sizeof(n->namespaces[cmd->nsid].lba_range),
+                    (dw11 & 0x3f) * sizeof(NvmeRangeType)), n, 
+                (uint8_t *)n->namespaces[cmd->nsid].lba_range,
+                DMA_DIRECTION_FROM_DEVICE);
+    case NVME_TEMPERATURE_THRESHOLD:
+        req->cqe.result = n->features.temp_thresh;
+        break;
+    case NVME_ERROR_RECOVERY:
+        req->cqe.result = n->features.err_rec;
+        break;
+    case NVME_VOLATILE_WRITE_CACHE:
+        req->cqe.result = n->features.volatile_wc;
+        break;
     case NVME_NUMBER_OF_QUEUES:
-        req->cqe.result = n->num_queues;
+        req->cqe.result = n->features.num_queues;
+        break;
+    case NVME_INTERRUPT_COALESCING:
+        req->cqe.result = n->features.int_coalescing;
+        break;
+    case NVME_INTERRUPT_VECTOR_CONF:
+        if ((dw11 & 0xffff) > n->num_queues) {
+            return NVME_INVALID_FIELD | NVME_DNR;
+        }
+        req->cqe.result = n->features.int_vector_config[dw11 & 0xffff];
+        break;
+    case NVME_WRITE_ATOMICITY:
+        req->cqe.result = n->features.write_atomicity;
+        break;
+    case NVME_ASYNCHRONOUS_EVENT_CONF:
+        req->cqe.result = n->features.async_config;
+        break;
+    case NVME_SOFTWARE_PROGRESS_MARKER:
+        req->cqe.result = n->features.sw_prog_marker;
         break;
     default:
         return NVME_INVALID_FIELD | NVME_DNR;
@@ -492,9 +551,47 @@  static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 
 static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
+    uint32_t dw11 = cmd->cdw11;
     switch (cmd->cdw10) {
+    case NVME_ARBITRATION:
+        req->cqe.result = n->features.arbitration;
+        break;
+    case NVME_POWER_MANAGEMENT:
+        n->features.power_mgmt = dw11;
+        break;
+    case NVME_LBA_RANGE_TYPE:
+        return nvme_dma_prp(cmd->prp1, cmd->prp2,
+                MIN(sizeof(n->namespaces[cmd->nsid].lba_range),
+                    (dw11 & 0x3f) * sizeof(NvmeRangeType)), n, 
+                (uint8_t *)n->namespaces[cmd->nsid].lba_range,
+                DMA_DIRECTION_TO_DEVICE);
+    case NVME_TEMPERATURE_THRESHOLD:
+        n->features.temp_thresh = dw11;
+        break;
+    case NVME_ERROR_RECOVERY:
+        n->features.err_rec = dw11;
+        break;
+    case NVME_VOLATILE_WRITE_CACHE:
+        break;
     case NVME_NUMBER_OF_QUEUES:
-        req->cqe.result = n->num_queues;
+        req->cqe.result = n->features.num_queues;
+        break;
+    case NVME_INTERRUPT_COALESCING:
+        break;
+    case NVME_INTERRUPT_VECTOR_CONF:
+        if ((dw11 & 0xffff) > n->num_queues) {
+            return NVME_INVALID_FIELD | NVME_DNR;
+        }
+        n->features.int_vector_config[dw11 & 0xffff] = dw11 & 0x1ffff;
+        break;
+    case NVME_WRITE_ATOMICITY:
+        n->features.write_atomicity = dw11;
+        break;
+    case NVME_ASYNCHRONOUS_EVENT_CONF:
+        n->features.async_config = dw11;
+        break;
+    case NVME_SOFTWARE_PROGRESS_MARKER:
+        n->features.sw_prog_marker = dw11;
         break;
     default:
         return NVME_INVALID_FIELD | NVME_DNR;
@@ -533,7 +630,10 @@  static void nvme_sq_process(void *opaque)
     NvmeSQueue *sq = opaque;
     NvmeCtrl *n = sq->ctrl;
     NvmeCQueue *cq = n->cq[sq->cqid];
-    while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
+    int processed = 0;
+
+    while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list)) &&
+            processed++ < sq->arb_burst) {
         addr = sq->dma_addr + sq->head * n->sqe_size;
         pci_dma_read(&n->dev, addr, (void *)&cmd, sizeof(cmd));
         nvme_inc_sq_head(sq);
@@ -551,6 +651,11 @@  static void nvme_sq_process(void *opaque)
             nvme_enqueue_req_completion(cq, req);
         }
     }
+
+    if (!nvme_sq_empty(sq)) {
+        qemu_mod_timer(sq->timer, qemu_get_clock_ns(vm_clock) + 500);
+    }
+
 }
 
 static void nvme_clear_ctrl(NvmeCtrl *n)
@@ -611,7 +716,7 @@  static int nvme_start_ctrl(NvmeCtrl *n)
     nvme_init_cq(&n->admin_cq, n, n->bar.acq, 0, 0,
         NVME_AQA_ACQS(n->bar.aqa) + 1);
     nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0,
-        NVME_AQA_ASQS(n->bar.aqa) + 1);
+        NVME_AQA_ASQS(n->bar.aqa) + 1, NVME_Q_PRIO_HIGH);
 
     return 0;
 }
@@ -806,6 +911,8 @@  static int nvme_init(PCIDevice *pci_dev)
     n->namespaces = g_malloc0(sizeof(*n->namespaces)*n->num_namespaces);
     n->sq = g_malloc0(sizeof(*n->sq)*n->num_queues);
     n->cq = g_malloc0(sizeof(*n->cq)*n->num_queues);
+    n->features.int_vector_config = g_malloc(n->num_queues *
+        sizeof(*n->features.int_vector_config));
 
     memory_region_init_io(&n->iomem, &nvme_mmio_ops, n, "nvme-mmio",
         n->reg_size);
@@ -835,6 +942,22 @@  static int nvme_init(PCIDevice *pci_dev)
     id->psd[0].enlat = 0x10;
     id->psd[0].exlat = 0x4;
 
+    n->features.arbitration     = 0x1f0f0706;
+    n->features.power_mgmt      = 0;
+    n->features.temp_thresh     = 0x14d;
+    n->features.err_rec         = 0;
+    n->features.volatile_wc     = 0;
+    n->features.num_queues      = (n->num_queues - 1) |
+                                 ((n->num_queues - 1) << 16);
+    n->features.int_coalescing  = 0;
+    n->features.write_atomicity = 0;
+    n->features.async_config    = 0x0;
+    n->features.sw_prog_marker  = 0;
+
+    for (i = 0; i < n->num_queues; i++) {
+        n->features.int_vector_config[i] = i | (1 << 16);
+    }
+
     n->bar.cap  = (uint64_t)(n->max_q_ents & CAP_MQES_MASK) << CAP_MQES_SHIFT;
     n->bar.cap |= (uint64_t)(n->cqr & CAP_CQR_MASK) << CAP_CQR_SHIFT;
     n->bar.cap |= (uint64_t)(1 & CAP_AMS_MASK) << CAP_AMS_SHIFT;
@@ -872,6 +995,7 @@  static void nvme_exit(PCIDevice *pci_dev)
     g_free(n->namespaces);
     g_free(n->cq);
     g_free(n->sq);
+    g_free(n->features.int_vector_config);
     msix_uninit_exclusive_bar(pci_dev);
     memory_region_destroy(&n->iomem);
 }
diff --git a/hw/nvme.h b/hw/nvme.h
index d292391..6296f04 100644
--- a/hw/nvme.h
+++ b/hw/nvme.h
@@ -669,6 +669,7 @@  typedef struct NvmeCtrl {
     NvmeNamespace   *namespaces;
     NvmeSQueue      **sq;
     NvmeCQueue      **cq;
+    NvmeFeatureVal  features;
     NvmeSQueue      admin_sq;
     NvmeCQueue      admin_cq;
     NvmeIdCtrl      id_ctrl;