[v3,4/6] block/nvme: add support for image creation
diff mbox series

Message ID 20190703155944.9637-5-mlevitsk@redhat.com
State New
Headers show
Series
  • Few fixes for userspace NVME driver
Related show

Commit Message

Maxim Levitsky July 3, 2019, 3:59 p.m. UTC
Tesed on a nvme device like that:

# create preallocated qcow2 image
$ qemu-img create -f qcow2 nvme://0000:06:00.0/1 10G -o preallocation=metadata
Formatting 'nvme://0000:06:00.0/1', fmt=qcow2 size=10737418240 cluster_size=65536 preallocation=metadata lazy_refcounts=off refcount_bits=16

# create an empty qcow2 image
$ qemu-img create -f qcow2 nvme://0000:06:00.0/1 10G -o preallocation=off
Formatting 'nvme://0000:06:00.0/1', fmt=qcow2 size=10737418240 cluster_size=65536 preallocation=off lazy_refcounts=off refcount_bits=16

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 block/nvme.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 108 insertions(+)

Comments

Max Reitz July 5, 2019, 12:09 p.m. UTC | #1
On 03.07.19 17:59, Maxim Levitsky wrote:
> Tesed on a nvme device like that:
> 
> # create preallocated qcow2 image
> $ qemu-img create -f qcow2 nvme://0000:06:00.0/1 10G -o preallocation=metadata
> Formatting 'nvme://0000:06:00.0/1', fmt=qcow2 size=10737418240 cluster_size=65536 preallocation=metadata lazy_refcounts=off refcount_bits=16
> 
> # create an empty qcow2 image
> $ qemu-img create -f qcow2 nvme://0000:06:00.0/1 10G -o preallocation=off
> Formatting 'nvme://0000:06:00.0/1', fmt=qcow2 size=10737418240 cluster_size=65536 preallocation=off lazy_refcounts=off refcount_bits=16
> 
> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  block/nvme.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 108 insertions(+)

Hm.  I’m not quite sure I like this, because this is not image creation.

What we need is a general interface for formatting existing files.  I
mean, we have that in QMP (blockdev-create), but the problem is that
this doesn’t really translate to qemu-img create.

I wonder whether it’s best to hack something up that makes
bdrv_create_file() a no-op, or whether we should expose blockdev-create
over qemu-img.  I’ll see how difficult the latter is, it sounds fun
(famous last words).

Max
Maxim Levitsky July 7, 2019, 9:03 a.m. UTC | #2
On Fri, 2019-07-05 at 14:09 +0200, Max Reitz wrote:
> On 03.07.19 17:59, Maxim Levitsky wrote:
> > Tesed on a nvme device like that:
> > 
> > # create preallocated qcow2 image
> > $ qemu-img create -f qcow2 nvme://0000:06:00.0/1 10G -o preallocation=metadata
> > Formatting 'nvme://0000:06:00.0/1', fmt=qcow2 size=10737418240 cluster_size=65536 preallocation=metadata lazy_refcounts=off refcount_bits=16
> > 
> > # create an empty qcow2 image
> > $ qemu-img create -f qcow2 nvme://0000:06:00.0/1 10G -o preallocation=off
> > Formatting 'nvme://0000:06:00.0/1', fmt=qcow2 size=10737418240 cluster_size=65536 preallocation=off lazy_refcounts=off refcount_bits=16
> > 
> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >  block/nvme.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 108 insertions(+)
> 
> Hm.  I’m not quite sure I like this, because this is not image creation.

I fully agree with you, and the whole thing did felt kind of wrong.
I kind of think that bdrv_co_create_opts is kind of outdated for the purpose, especially
with the nvme driver.
I think that it would be better if the bdrv_file_open just supported something like 'O_CREAT'.

I done this the mostly the same was as the file-posix does this on the block devices,
including that 'hack' of zeroing the first sector, for which I really don't know if this is the right solution.



> 
> What we need is a general interface for formatting existing files.  I
> mean, we have that in QMP (blockdev-create), but the problem is that
> this doesn’t really translate to qemu-img create.
> 
> I wonder whether it’s best to hack something up that makes
> bdrv_create_file() a no-op, or whether we should expose blockdev-create
> over qemu-img.  I’ll see how difficult the latter is, it sounds fun
> (famous last words).
For existing images, the 'bdrv_create_file' is already kind of a nop, other that zeroing the first sector,
which kind of makes sense, but probably best done on higher level than in each driver.

So these are my thoughts about this, thanks for the review!

Best regards,
	Maxim Levitsky

Patch
diff mbox series

diff --git a/block/nvme.c b/block/nvme.c
index 1f0d09349f..152d27b07f 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -1148,6 +1148,90 @@  static void nvme_aio_unplug(BlockDriverState *bs)
     }
 }
 
+static int coroutine_fn nvme_co_create_opts(const char *filename,
+        QemuOpts *opts, Error **errp)
+{
+
+    int64_t total_size = 0;
+    char *buf = NULL;
+    BlockDriverState *bs;
+    QEMUIOVector local_qiov;
+    int ret = 0;
+    int64_t blocksize;
+    QDict *options;
+    Error *local_err = NULL;
+    PreallocMode prealloc;
+
+    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                              BDRV_SECTOR_SIZE);
+
+    buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
+    prealloc = qapi_enum_parse(&PreallocMode_lookup, buf,
+                                  PREALLOC_MODE_OFF, &local_err);
+    g_free(buf);
+
+    if (prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Only prealloc=off is supported");
+        return -EINVAL;
+    }
+
+    options = qdict_new();
+    qdict_put_str(options, "driver", "nvme");
+    nvme_parse_filename(filename, options, &local_err);
+
+    if (local_err) {
+        error_propagate(errp, local_err);
+        qobject_unref(options);
+        return -EINVAL;
+    }
+
+    bs = bdrv_open(NULL, NULL, options,
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    if (bs == NULL) {
+        return -EIO;
+    }
+
+    if (nvme_getlength(bs) < total_size) {
+        error_setg(errp, "Device is too small");
+        bdrv_unref(bs);
+        qobject_unref(options);
+        return -ENOSPC;
+    }
+
+    blocksize = nvme_get_blocksize(bs);
+    buf = qemu_try_blockalign0(bs, blocksize);
+    qemu_iovec_init(&local_qiov, 1);
+    qemu_iovec_add(&local_qiov, buf, blocksize);
+
+    ret = nvme_co_prw_aligned(bs, 0, blocksize,
+            &local_qiov, true, BDRV_REQ_FUA);
+    if (ret) {
+        error_setg(errp, "Write error to sector 0");
+    }
+
+    qemu_vfree(buf);
+    bdrv_unref(bs);
+    return ret;
+}
+
+
+static int coroutine_fn nvme_co_truncate(BlockDriverState *bs, int64_t offset,
+                                        PreallocMode prealloc, Error **errp)
+{
+    if (prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Preallocation mode '%s' unsupported nvme devices",
+                PreallocMode_str(prealloc));
+        return -ENOTSUP;
+    }
+
+    if (offset > nvme_getlength(bs)) {
+        error_setg(errp, "Cannot grow nvme devices");
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
 static void nvme_register_buf(BlockDriverState *bs, void *host, size_t size)
 {
     int ret;
@@ -1169,6 +1253,7 @@  static void nvme_unregister_buf(BlockDriverState *bs, void *host)
     qemu_vfio_dma_unmap(s->vfio, host);
 }
 
+
 static const char *const nvme_strong_runtime_opts[] = {
     NVME_BLOCK_OPT_DEVICE,
     NVME_BLOCK_OPT_NAMESPACE,
@@ -1176,6 +1261,25 @@  static const char *const nvme_strong_runtime_opts[] = {
     NULL
 };
 
+
+static QemuOptsList nvme_create_opts = {
+    .name = "nvme-create-opts",
+    .head = QTAILQ_HEAD_INITIALIZER(nvme_create_opts.head),
+    .desc = {
+        {
+            .name = BLOCK_OPT_SIZE,
+            .type = QEMU_OPT_SIZE,
+            .help = "Virtual disk size"
+        },
+        {
+            .name = BLOCK_OPT_PREALLOC,
+            .type = QEMU_OPT_STRING,
+            .help = "Preallocation mode (allowed values: off)",
+        },
+        { /* end of list */ }
+    }
+};
+
 static BlockDriver bdrv_nvme = {
     .format_name              = "nvme",
     .protocol_name            = "nvme",
@@ -1187,6 +1291,10 @@  static BlockDriver bdrv_nvme = {
     .bdrv_getlength           = nvme_getlength,
     .bdrv_probe_blocksizes    = nvme_probe_blocksizes,
 
+    .bdrv_co_create_opts      = nvme_co_create_opts,
+    .bdrv_co_truncate         = nvme_co_truncate,
+    .create_opts              = &nvme_create_opts,
+
     .bdrv_co_preadv           = nvme_co_preadv,
     .bdrv_co_pwritev          = nvme_co_pwritev,
     .bdrv_co_flush_to_disk    = nvme_co_flush,