diff mbox series

[PULL,v2,36/38] hw/block/nvme: support namespace attachment command

Message ID 20210309114512.536489-37-its@irrelevant.dk
State New
Headers show
Series [PULL,v2,01/38] hw/block/nvme: introduce nvme-subsys device | expand

Commit Message

Klaus Jensen March 9, 2021, 11:45 a.m. UTC
From: Minwoo Im <minwoo.im.dev@gmail.com>

This patch supports Namespace Attachment command for the pre-defined
nvme-ns device nodes.  Of course, attach/detach namespace should only be
supported in case 'subsys' is given.  This is because if we detach a
namespace from a controller, somebody needs to manage the detached, but
allocated namespace in the NVMe subsystem.

As command effect for the namespace attachment command is registered,
the host will be notified that namespace inventory is changed so that
host will rescan the namespace inventory after this command.  For
example, kernel driver manages this command effect via passthru IOCTL.

Signed-off-by: Minwoo Im <minwoo.im.dev@gmail.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Klaus Jensen <k.jensen@samsung.com>
Tested-by: Klaus Jensen <k.jensen@samsung.com>
[k.jensen: rebased for dma refactor]
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 hw/block/nvme-subsys.h | 10 +++++++
 hw/block/nvme.h        |  5 ++++
 include/block/nvme.h   |  6 +++++
 hw/block/nvme.c        | 60 +++++++++++++++++++++++++++++++++++++++++-
 hw/block/trace-events  |  2 ++
 5 files changed, 82 insertions(+), 1 deletion(-)

Comments

Peter Maydell March 12, 2021, 1:12 p.m. UTC | #1
On Tue, 9 Mar 2021 at 11:46, Klaus Jensen <its@irrelevant.dk> wrote:
>
> From: Minwoo Im <minwoo.im.dev@gmail.com>
>
> This patch supports Namespace Attachment command for the pre-defined
> nvme-ns device nodes.  Of course, attach/detach namespace should only be
> supported in case 'subsys' is given.  This is because if we detach a
> namespace from a controller, somebody needs to manage the detached, but
> allocated namespace in the NVMe subsystem.
>
> As command effect for the namespace attachment command is registered,
> the host will be notified that namespace inventory is changed so that
> host will rescan the namespace inventory after this command.  For
> example, kernel driver manages this command effect via passthru IOCTL.

> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 85a7b5a14f4e..1287bc2cd17a 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -235,6 +235,11 @@ static inline void nvme_ns_attach(NvmeCtrl *n, NvmeNamespace *ns)
>      n->namespaces[nvme_nsid(ns) - 1] = ns;
>  }
>
> +static inline void nvme_ns_detach(NvmeCtrl *n, NvmeNamespace *ns)
> +{
> +    n->namespaces[nvme_nsid(ns) - 1] = NULL;
> +}

Hi; Coverity complains about a possible array overflow both here
in nvme_ns_detach() (CID 1450757) and in nvme_ns_attach() (CID 1450758):
nvme_nsid() can return -1, but this code does not check for that.

If these functions both assume that their ns argument is non-NULL,
then adding an "assert(ns)" would probably placate Coverity and also
would mean that any bugs elsewhere resulting in accidentally passing
a NULL pointer would result in a clean assertion failure rather than
memory corruption. (Or you could directly assert that the array index
is in-bounds, I guess.)

thanks
-- PMM
Klaus Jensen March 12, 2021, 3:10 p.m. UTC | #2
On Mar 12 13:12, Peter Maydell wrote:
> On Tue, 9 Mar 2021 at 11:46, Klaus Jensen <its@irrelevant.dk> wrote:
> >
> > From: Minwoo Im <minwoo.im.dev@gmail.com>
> >
> > This patch supports Namespace Attachment command for the pre-defined
> > nvme-ns device nodes.  Of course, attach/detach namespace should only be
> > supported in case 'subsys' is given.  This is because if we detach a
> > namespace from a controller, somebody needs to manage the detached, but
> > allocated namespace in the NVMe subsystem.
> >
> > As command effect for the namespace attachment command is registered,
> > the host will be notified that namespace inventory is changed so that
> > host will rescan the namespace inventory after this command.  For
> > example, kernel driver manages this command effect via passthru IOCTL.
> 
> > diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> > index 85a7b5a14f4e..1287bc2cd17a 100644
> > --- a/hw/block/nvme.h
> > +++ b/hw/block/nvme.h
> > @@ -235,6 +235,11 @@ static inline void nvme_ns_attach(NvmeCtrl *n, NvmeNamespace *ns)
> >      n->namespaces[nvme_nsid(ns) - 1] = ns;
> >  }
> >
> > +static inline void nvme_ns_detach(NvmeCtrl *n, NvmeNamespace *ns)
> > +{
> > +    n->namespaces[nvme_nsid(ns) - 1] = NULL;
> > +}
> 
> Hi; Coverity complains about a possible array overflow both here
> in nvme_ns_detach() (CID 1450757) and in nvme_ns_attach() (CID 1450758):
> nvme_nsid() can return -1, but this code does not check for that.
> 
> If these functions both assume that their ns argument is non-NULL,
> then adding an "assert(ns)" would probably placate Coverity and also
> would mean that any bugs elsewhere resulting in accidentally passing
> a NULL pointer would result in a clean assertion failure rather than
> memory corruption. (Or you could directly assert that the array index
> is in-bounds, I guess.)
> 

Hi Peter,

Thanks!

As far as I can tell, we never reach this without nsid being a valid
value or ns being NULL, but as you say, future misuse would be bad. I
will post a fix to make sure such misuse does not happen in the future.
diff mbox series

Patch

diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
index 83a6427b6e9d..fb66ae752ad5 100644
--- a/hw/block/nvme-subsys.h
+++ b/hw/block/nvme-subsys.h
@@ -34,6 +34,16 @@  typedef struct NvmeSubsystem {
 int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
 int nvme_subsys_register_ns(NvmeNamespace *ns, Error **errp);
 
+static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
+        uint32_t cntlid)
+{
+    if (!subsys) {
+        return NULL;
+    }
+
+    return subsys->ctrls[cntlid];
+}
+
 /*
  * Return allocated namespace of the specified nsid in the subsystem.
  */
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 85a7b5a14f4e..1287bc2cd17a 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -235,6 +235,11 @@  static inline void nvme_ns_attach(NvmeCtrl *n, NvmeNamespace *ns)
     n->namespaces[nvme_nsid(ns) - 1] = ns;
 }
 
+static inline void nvme_ns_detach(NvmeCtrl *n, NvmeNamespace *ns)
+{
+    n->namespaces[nvme_nsid(ns) - 1] = NULL;
+}
+
 static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
 {
     NvmeSQueue *sq = req->sq;
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 16d8c4c90f7e..03471a4d5abd 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -566,6 +566,7 @@  enum NvmeAdminCommands {
     NVME_ADM_CMD_ASYNC_EV_REQ   = 0x0c,
     NVME_ADM_CMD_ACTIVATE_FW    = 0x10,
     NVME_ADM_CMD_DOWNLOAD_FW    = 0x11,
+    NVME_ADM_CMD_NS_ATTACHMENT  = 0x15,
     NVME_ADM_CMD_FORMAT_NVM     = 0x80,
     NVME_ADM_CMD_SECURITY_SEND  = 0x81,
     NVME_ADM_CMD_SECURITY_RECV  = 0x82,
@@ -836,6 +837,9 @@  enum NvmeStatusCodes {
     NVME_FEAT_NOT_CHANGEABLE    = 0x010e,
     NVME_FEAT_NOT_NS_SPEC       = 0x010f,
     NVME_FW_REQ_SUSYSTEM_RESET  = 0x0110,
+    NVME_NS_ALREADY_ATTACHED    = 0x0118,
+    NVME_NS_NOT_ATTACHED        = 0x011A,
+    NVME_NS_CTRL_LIST_INVALID   = 0x011C,
     NVME_CONFLICTING_ATTRS      = 0x0180,
     NVME_INVALID_PROT_INFO      = 0x0181,
     NVME_WRITE_TO_RO            = 0x0182,
@@ -951,6 +955,7 @@  typedef struct QEMU_PACKED NvmePSD {
     uint8_t     resv[16];
 } NvmePSD;
 
+#define NVME_CONTROLLER_LIST_SIZE 2048
 #define NVME_IDENTIFY_DATA_SIZE 4096
 
 enum NvmeIdCns {
@@ -1055,6 +1060,7 @@  enum NvmeIdCtrlOacs {
     NVME_OACS_SECURITY  = 1 << 0,
     NVME_OACS_FORMAT    = 1 << 1,
     NVME_OACS_FW        = 1 << 2,
+    NVME_OACS_NS_MGMT   = 1 << 3,
 };
 
 enum NvmeIdCtrlOncs {
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index b4843d3bcf5e..86fbab1fc43c 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -196,6 +196,7 @@  static const uint32_t nvme_cse_acs[256] = {
     [NVME_ADM_CMD_SET_FEATURES]     = NVME_CMD_EFF_CSUPP,
     [NVME_ADM_CMD_GET_FEATURES]     = NVME_CMD_EFF_CSUPP,
     [NVME_ADM_CMD_ASYNC_EV_REQ]     = NVME_CMD_EFF_CSUPP,
+    [NVME_ADM_CMD_NS_ATTACHMENT]    = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
 };
 
 static const uint32_t nvme_cse_iocs_none[256];
@@ -3905,6 +3906,61 @@  static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req)
     return NVME_NO_COMPLETE;
 }
 
+static void __nvme_select_ns_iocs(NvmeCtrl *n, NvmeNamespace *ns);
+static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeNamespace *ns;
+    NvmeCtrl *ctrl;
+    uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {};
+    uint32_t nsid = le32_to_cpu(req->cmd.nsid);
+    uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
+    bool attach = !(dw10 & 0xf);
+    uint16_t *nr_ids = &list[0];
+    uint16_t *ids = &list[1];
+    uint16_t ret;
+    int i;
+
+    trace_pci_nvme_ns_attachment(nvme_cid(req), dw10 & 0xf);
+
+    ns = nvme_subsys_ns(n->subsys, nsid);
+    if (!ns) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    ret = nvme_h2c(n, (uint8_t *)list, 4096, req);
+    if (ret) {
+        return ret;
+    }
+
+    if (!*nr_ids) {
+        return NVME_NS_CTRL_LIST_INVALID | NVME_DNR;
+    }
+
+    for (i = 0; i < *nr_ids; i++) {
+        ctrl = nvme_subsys_ctrl(n->subsys, ids[i]);
+        if (!ctrl) {
+            return NVME_NS_CTRL_LIST_INVALID | NVME_DNR;
+        }
+
+        if (attach) {
+            if (nvme_ns_is_attached(ctrl, ns)) {
+                return NVME_NS_ALREADY_ATTACHED | NVME_DNR;
+            }
+
+            nvme_ns_attach(ctrl, ns);
+            __nvme_select_ns_iocs(ctrl, ns);
+        } else {
+            if (!nvme_ns_is_attached(ctrl, ns)) {
+                return NVME_NS_NOT_ATTACHED | NVME_DNR;
+            }
+
+            nvme_ns_detach(ctrl, ns);
+        }
+    }
+
+    return NVME_SUCCESS;
+}
+
 static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
 {
     trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode,
@@ -3941,6 +3997,8 @@  static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
         return nvme_get_feature(n, req);
     case NVME_ADM_CMD_ASYNC_EV_REQ:
         return nvme_aer(n, req);
+    case NVME_ADM_CMD_NS_ATTACHMENT:
+        return nvme_ns_attachment(n, req);
     default:
         assert(false);
     }
@@ -4910,7 +4968,7 @@  static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
 
     id->mdts = n->params.mdts;
     id->ver = cpu_to_le32(NVME_SPEC_VER);
-    id->oacs = cpu_to_le16(0);
+    id->oacs = cpu_to_le16(NVME_OACS_NS_MGMT);
     id->cntrltype = 0x1;
 
     /*
diff --git a/hw/block/trace-events b/hw/block/trace-events
index 60a076cea54f..c5dba935a0c1 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -84,6 +84,8 @@  pci_nvme_aer(uint16_t cid) "cid %"PRIu16""
 pci_nvme_aer_aerl_exceeded(void) "aerl exceeded"
 pci_nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8""
 pci_nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
+pci_nvme_ns_attachment(uint16_t cid, uint8_t sel) "cid %"PRIu16", sel=0x%"PRIx8""
+pci_nvme_ns_attachment_attach(uint16_t cntlid, uint32_t nsid) "cntlid=0x%"PRIx16", nsid=0x%"PRIx32""
 pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
 pci_nvme_enqueue_event_noqueue(int queued) "queued %d"
 pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""