diff mbox series

[2/2] hw/nvme: fix copy cmd for pi enabled namespaces

Message ID 20220420090336.10124-3-d.tihov@yadro.com
State New
Headers show
Series Fix nvme copy command with pi metadata | expand

Commit Message

Dmitry Tihov April 20, 2022, 9:03 a.m. UTC
Current implementation have two problems:
First in the read part of copy command. Because there is no metadata
mangling before nvme_dif_check invocation, reftag error is thrown for
blocks of namespace that have not been previously written to.
Second in the write part. Reftag in the protection information section
of the source metadata should not be copied as is to the destination.
Source range start lba and destination range start lba could differ so
recalculation of reftag is always needed.

Signed-off-by: Dmitry Tikhov <d.tihov@yadro.com>
---
 hw/nvme/ctrl.c |  5 ++++
 hw/nvme/dif.c  | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/nvme/dif.h  |  2 ++
 3 files changed, 72 insertions(+)

Comments

Klaus Jensen April 20, 2022, 10:04 a.m. UTC | #1
On Apr 20 12:03, Dmitry Tikhov wrote:
> Current implementation have two problems:
> First in the read part of copy command. Because there is no metadata
> mangling before nvme_dif_check invocation, reftag error is thrown for
> blocks of namespace that have not been previously written to.

Yes, this is definitely a bug and the fix is good, thanks!

> Second in the write part. Reftag in the protection information section
> of the source metadata should not be copied as is to the destination.

Hmm, says who?

> Source range start lba and destination range start lba could differ so
> recalculation of reftag is always needed.
> 

If PRACT is 0, we really should not touch the protection information. My
interpretation of the Copy command is that it is simply just screwed if
used with PRACT 0 and Type 1. PRACT bit is specifically to allow the
controller to generate appropriate PI in this case.

On the other hand, I can totally see your interpretation as valid as
well. Let me ask some spec people about this, and I will get back to
you.
Klaus Jensen April 20, 2022, 7:16 p.m. UTC | #2
On Apr 20 12:04, Klaus Jensen wrote:
> On Apr 20 12:03, Dmitry Tikhov wrote:
> > Current implementation have two problems:
> > First in the read part of copy command. Because there is no metadata
> > mangling before nvme_dif_check invocation, reftag error is thrown for
> > blocks of namespace that have not been previously written to.
> 
> Yes, this is definitely a bug and the fix is good, thanks!
> 
> > Second in the write part. Reftag in the protection information section
> > of the source metadata should not be copied as is to the destination.
> 
> Hmm, says who?
> 
> > Source range start lba and destination range start lba could differ so
> > recalculation of reftag is always needed.
> > 
> 
> If PRACT is 0, we really should not touch the protection information. My
> interpretation of the Copy command is that it is simply just screwed if
> used with PRACT 0 and Type 1. PRACT bit is specifically to allow the
> controller to generate appropriate PI in this case.
> 
> On the other hand, I can totally see your interpretation as valid as
> well. Let me ask some spec people about this, and I will get back to
> you.

Discussed this with the TP authors and, no, reftag should not be
re-computed for PRACT 0, regardless of the PI type.
Dmitry Tihov April 21, 2022, 7:41 a.m. UTC | #3
On Wed, Apr 20, 2022 at 21:16:15, Klaus Jensen wrote:
> Discussed this with the TP authors and, no, reftag should not be
> re-computed for PRACT 0, regardless of the PI type.
Ok, should i resend patch with only adding nvme_dif_mangle_mdata in
the read part?
Klaus Jensen April 21, 2022, 10:13 a.m. UTC | #4
On Apr 21 10:41, Dmitry Tikhov wrote:
> On Wed, Apr 20, 2022 at 21:16:15, Klaus Jensen wrote:
> > Discussed this with the TP authors and, no, reftag should not be
> > re-computed for PRACT 0, regardless of the PI type.
> Ok, should i resend patch with only adding nvme_dif_mangle_mdata in
> the read part?

Yes, that is still a bug :)
diff mbox series

Patch

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 74540a03d5..cb493f4506 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -2787,6 +2787,10 @@  static void nvme_copy_in_completed_cb(void *opaque, int ret)
         size_t mlen = nvme_m2b(ns, nlb);
         uint8_t *mbounce = iocb->bounce + nvme_l2b(ns, nlb);
 
+        status = nvme_dif_mangle_mdata(ns, mbounce, mlen, reftag);
+        if (status) {
+            goto invalid;
+        }
         status = nvme_dif_check(ns, iocb->bounce, len, mbounce, mlen, prinfor,
                                 slba, apptag, appmask, &reftag);
         if (status) {
@@ -2805,6 +2809,7 @@  static void nvme_copy_in_completed_cb(void *opaque, int ret)
             nvme_dif_pract_generate_dif(ns, iocb->bounce, len, mbounce, mlen,
                                         apptag, &iocb->reftag);
         } else {
+            nvme_dif_restore_reftag(ns, mbounce, mlen, iocb->reftag);
             status = nvme_dif_check(ns, iocb->bounce, len, mbounce, mlen,
                                     prinfow, iocb->slba, apptag, appmask,
                                     &iocb->reftag);
diff --git a/hw/nvme/dif.c b/hw/nvme/dif.c
index 0f65687396..f29c5893e2 100644
--- a/hw/nvme/dif.c
+++ b/hw/nvme/dif.c
@@ -385,6 +385,71 @@  uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
     return NVME_SUCCESS;
 }
 
+static void nvme_dif_restore_reftag_crc16(NvmeNamespace *ns, uint8_t *mbuf,
+                                          size_t mlen, uint64_t reftag,
+                                          int16_t pil)
+{
+    uint8_t *mbufp, *end = mbuf + mlen;
+
+    for (mbufp = mbuf; mbufp < end; mbufp += ns->lbaf.ms) {
+        NvmeDifTuple *dif = (NvmeDifTuple *)(mbufp + pil);
+
+        if (!nvme_dif_is_disabled_crc16(ns, dif)) {
+            dif->g16.reftag = cpu_to_be32(reftag++);
+        }
+
+    }
+
+    return;
+}
+
+static void nvme_dif_restore_reftag_crc64(NvmeNamespace *ns, uint8_t *mbuf,
+                                          size_t mlen, uint64_t reftag,
+                                          int16_t pil)
+{
+    uint8_t *mbufp, *end = mbuf + mlen;
+
+    for (mbufp = mbuf; mbufp < end; mbufp += ns->lbaf.ms) {
+        NvmeDifTuple *dif = (NvmeDifTuple *)(mbufp + pil);
+
+        if (!nvme_dif_is_disabled_crc64(ns, dif)) {
+            dif->g64.sr[0] = reftag >> 40;
+            dif->g64.sr[1] = reftag >> 32;
+            dif->g64.sr[2] = reftag >> 24;
+            dif->g64.sr[3] = reftag >> 16;
+            dif->g64.sr[4] = reftag >> 8;
+            dif->g64.sr[5] = reftag;
+
+            reftag++;
+        }
+
+    }
+
+    return;
+}
+
+void nvme_dif_restore_reftag(NvmeNamespace *ns, uint8_t *mbuf,
+                             size_t mlen, uint64_t reftag)
+{
+    int16_t pil = 0;
+
+    if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
+        pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
+    }
+
+    switch (ns->pif) {
+    case NVME_PI_GUARD_16:
+        nvme_dif_restore_reftag_crc16(ns, mbuf, mlen, reftag, pil);
+        return;
+    case NVME_PI_GUARD_64:
+        nvme_dif_restore_reftag_crc64(ns, mbuf, mlen, reftag, pil);
+        return;
+    default:
+        abort();
+    }
+
+}
+
 uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
                                uint64_t slba)
 {
diff --git a/hw/nvme/dif.h b/hw/nvme/dif.h
index fe1e5828d7..3203837658 100644
--- a/hw/nvme/dif.h
+++ b/hw/nvme/dif.h
@@ -186,6 +186,8 @@  uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
                         uint8_t *mbuf, size_t mlen, uint8_t prinfo,
                         uint64_t slba, uint16_t apptag,
                         uint16_t appmask, uint64_t *reftag);
+void nvme_dif_restore_reftag(NvmeNamespace *ns, uint8_t *mbuf,
+                             size_t mlen, uint64_t reftag);
 bool nvme_dif_is_disabled(NvmeNamespace *ns, NvmeDifTuple *dif);
 uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req);