[v3,3/6] block/nvme: support larger that 512 bytes sector devices
diff mbox series

Message ID 20190703155944.9637-4-mlevitsk@redhat.com
State New
Headers show
Series
  • Few fixes for userspace NVME driver
Related show

Commit Message

Maxim Levitsky July 3, 2019, 3:59 p.m. UTC
Currently the driver hardcodes the sector size to 512,
and doesn't check the underlying device. Fix that.

Also fail if underlying nvme device is formatted with metadata
as this needs special support.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 block/nvme.c | 45 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 5 deletions(-)

Comments

Max Reitz July 5, 2019, 11:58 a.m. UTC | #1
On 03.07.19 17:59, Maxim Levitsky wrote:
> Currently the driver hardcodes the sector size to 512,
> and doesn't check the underlying device. Fix that.
> 
> Also fail if underlying nvme device is formatted with metadata
> as this needs special support.
> 
> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  block/nvme.c | 45 ++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 40 insertions(+), 5 deletions(-)
> 
> diff --git a/block/nvme.c b/block/nvme.c
> index 52798081b2..1f0d09349f 100644
> --- a/block/nvme.c
> +++ b/block/nvme.c

[...]

> @@ -463,7 +467,22 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
>      }
>  
>      s->nsze = le64_to_cpu(idns->nsze);
> +    lbaf = &idns->lbaf[NVME_ID_NS_FLBAS_INDEX(idns->flbas)];
> +
> +    if (lbaf->ms) {
> +        error_setg(errp, "Namespaces with metadata are not yet supported");
> +        goto out;
> +    }
> +
> +    hwsect_size = 1 << lbaf->ds;
> +
> +    if (hwsect_size < BDRV_SECTOR_BITS || hwsect_size > s->page_size) {

s/BDRV_SECTOR_BITS/BDRV_SECTOR_SIZE/

> +        error_setg(errp, "Namespace has unsupported block size (%d)",
> +                hwsect_size);
> +        goto out;
> +    }
>  
> +    s->blkshift = lbaf->ds;
>  out:
>      qemu_vfio_dma_unmap(s->vfio, resp);
>      qemu_vfree(resp);
> @@ -782,8 +801,22 @@ fail:
>  static int64_t nvme_getlength(BlockDriverState *bs)
>  {
>      BDRVNVMeState *s = bs->opaque;
> +    return s->nsze << s->blkshift;
> +}
>  
> -    return s->nsze << BDRV_SECTOR_BITS;
> +static int64_t nvme_get_blocksize(BlockDriverState *bs)
> +{
> +    BDRVNVMeState *s = bs->opaque;
> +    assert(s->blkshift >= 9);

I think BDRV_SECTOR_BITS is more correct here (this is about what the
general block layer code expects).  Also, there’s no pain in doing so,
as you did check against BDRV_SECTOR_SIZE in nvme_identify().

Max

> +    return 1 << s->blkshift;
> +}
> +
> +static int nvme_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
> +{
> +    int64_t blocksize = nvme_get_blocksize(bs);
> +    bsz->phys = blocksize;
> +    bsz->log = blocksize;
> +    return 0;
>  }
>  
>  /* Called with s->dma_map_lock */
Maxim Levitsky July 7, 2019, 8:51 a.m. UTC | #2
On Fri, 2019-07-05 at 13:58 +0200, Max Reitz wrote:
> On 03.07.19 17:59, Maxim Levitsky wrote:
> > Currently the driver hardcodes the sector size to 512,
> > and doesn't check the underlying device. Fix that.
> > 
> > Also fail if underlying nvme device is formatted with metadata
> > as this needs special support.
> > 
> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >  block/nvme.c | 45 ++++++++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 40 insertions(+), 5 deletions(-)
> > 
> > diff --git a/block/nvme.c b/block/nvme.c
> > index 52798081b2..1f0d09349f 100644
> > --- a/block/nvme.c
> > +++ b/block/nvme.c
> 
> [...]
> 
> > @@ -463,7 +467,22 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
> >      }
> >  
> >      s->nsze = le64_to_cpu(idns->nsze);
> > +    lbaf = &idns->lbaf[NVME_ID_NS_FLBAS_INDEX(idns->flbas)];
> > +
> > +    if (lbaf->ms) {
> > +        error_setg(errp, "Namespaces with metadata are not yet supported");
> > +        goto out;
> > +    }
> > +
> > +    hwsect_size = 1 << lbaf->ds;
> > +
> > +    if (hwsect_size < BDRV_SECTOR_BITS || hwsect_size > s->page_size) {
> 
> s/BDRV_SECTOR_BITS/BDRV_SECTOR_SIZE/
Oops.

> 
> > +        error_setg(errp, "Namespace has unsupported block size (%d)",
> > +                hwsect_size);
> > +        goto out;
> > +    }
> >  
> > +    s->blkshift = lbaf->ds;
> >  out:
> >      qemu_vfio_dma_unmap(s->vfio, resp);
> >      qemu_vfree(resp);
> > @@ -782,8 +801,22 @@ fail:
> >  static int64_t nvme_getlength(BlockDriverState *bs)
> >  {
> >      BDRVNVMeState *s = bs->opaque;
> > +    return s->nsze << s->blkshift;
> > +}
> >  
> > -    return s->nsze << BDRV_SECTOR_BITS;
> > +static int64_t nvme_get_blocksize(BlockDriverState *bs)
> > +{
> > +    BDRVNVMeState *s = bs->opaque;
> > +    assert(s->blkshift >= 9);
> 
> I think BDRV_SECTOR_BITS is more correct here (this is about what the
> general block layer code expects).  Also, there’s no pain in doing so,
> as you did check against BDRV_SECTOR_SIZE in nvme_identify().
> Max
Of course, thanks!!

> 
> > +    return 1 << s->blkshift;
> > +}
> > +
> > +static int nvme_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
> > +{
> > +    int64_t blocksize = nvme_get_blocksize(bs);
> > +    bsz->phys = blocksize;
> > +    bsz->log = blocksize;
> > +    return 0;
> >  }
> >  
> >  /* Called with s->dma_map_lock */
> 
> 

Thanks for the review,
	Best regards,
		Maxim Levitsky

Patch
diff mbox series

diff --git a/block/nvme.c b/block/nvme.c
index 52798081b2..1f0d09349f 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -102,8 +102,11 @@  typedef struct {
     size_t doorbell_scale;
     bool write_cache_supported;
     EventNotifier irq_notifier;
+
     uint64_t nsze; /* Namespace size reported by identify command */
     int nsid;      /* The namespace id to read/write data. */
+    size_t blkshift;
+
     uint64_t max_transfer;
     bool plugged;
 
@@ -415,8 +418,9 @@  static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
     BDRVNVMeState *s = bs->opaque;
     NvmeIdCtrl *idctrl;
     NvmeIdNs *idns;
+    NvmeLBAF *lbaf;
     uint8_t *resp;
-    int r;
+    int r, hwsect_size;
     uint64_t iova;
     NvmeCmd cmd = {
         .opcode = NVME_ADM_CMD_IDENTIFY,
@@ -463,7 +467,22 @@  static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
     }
 
     s->nsze = le64_to_cpu(idns->nsze);
+    lbaf = &idns->lbaf[NVME_ID_NS_FLBAS_INDEX(idns->flbas)];
+
+    if (lbaf->ms) {
+        error_setg(errp, "Namespaces with metadata are not yet supported");
+        goto out;
+    }
+
+    hwsect_size = 1 << lbaf->ds;
+
+    if (hwsect_size < BDRV_SECTOR_BITS || hwsect_size > s->page_size) {
+        error_setg(errp, "Namespace has unsupported block size (%d)",
+                hwsect_size);
+        goto out;
+    }
 
+    s->blkshift = lbaf->ds;
 out:
     qemu_vfio_dma_unmap(s->vfio, resp);
     qemu_vfree(resp);
@@ -782,8 +801,22 @@  fail:
 static int64_t nvme_getlength(BlockDriverState *bs)
 {
     BDRVNVMeState *s = bs->opaque;
+    return s->nsze << s->blkshift;
+}
 
-    return s->nsze << BDRV_SECTOR_BITS;
+static int64_t nvme_get_blocksize(BlockDriverState *bs)
+{
+    BDRVNVMeState *s = bs->opaque;
+    assert(s->blkshift >= 9);
+    return 1 << s->blkshift;
+}
+
+static int nvme_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
+{
+    int64_t blocksize = nvme_get_blocksize(bs);
+    bsz->phys = blocksize;
+    bsz->log = blocksize;
+    return 0;
 }
 
 /* Called with s->dma_map_lock */
@@ -914,13 +947,14 @@  static coroutine_fn int nvme_co_prw_aligned(BlockDriverState *bs,
     BDRVNVMeState *s = bs->opaque;
     NVMeQueuePair *ioq = s->queues[1];
     NVMeRequest *req;
-    uint32_t cdw12 = (((bytes >> BDRV_SECTOR_BITS) - 1) & 0xFFFF) |
+
+    uint32_t cdw12 = (((bytes >> s->blkshift) - 1) & 0xFFFF) |
                        (flags & BDRV_REQ_FUA ? 1 << 30 : 0);
     NvmeCmd cmd = {
         .opcode = is_write ? NVME_CMD_WRITE : NVME_CMD_READ,
         .nsid = cpu_to_le32(s->nsid),
-        .cdw10 = cpu_to_le32((offset >> BDRV_SECTOR_BITS) & 0xFFFFFFFF),
-        .cdw11 = cpu_to_le32(((offset >> BDRV_SECTOR_BITS) >> 32) & 0xFFFFFFFF),
+        .cdw10 = cpu_to_le32((offset >> s->blkshift) & 0xFFFFFFFF),
+        .cdw11 = cpu_to_le32(((offset >> s->blkshift) >> 32) & 0xFFFFFFFF),
         .cdw12 = cpu_to_le32(cdw12),
     };
     NVMeCoData data = {
@@ -1151,6 +1185,7 @@  static BlockDriver bdrv_nvme = {
     .bdrv_file_open           = nvme_file_open,
     .bdrv_close               = nvme_close,
     .bdrv_getlength           = nvme_getlength,
+    .bdrv_probe_blocksizes    = nvme_probe_blocksizes,
 
     .bdrv_co_preadv           = nvme_co_preadv,
     .bdrv_co_pwritev          = nvme_co_pwritev,