diff mbox

block: add native support for NFS

Message ID 1387208069-9302-1-git-send-email-pl@kamp.de
State New
Headers show

Commit Message

Peter Lieven Dec. 16, 2013, 3:34 p.m. UTC
This patch adds native support for accessing images on NFS shares without
the requirement to actually mount the entire NFS share on the host.

NFS Images can simply be specified by an url of the form:
nfs://<host>/<export>/<filename>

For example:
qemu-img create -f qcow2 nfs://10.0.0.1/qemu-images/test.qcow2

You need libnfs from Ronnie Sahlberg available at:
   git://github.com/sahlberg/libnfs.git
for this to work.

During configure it is automatically probed for libnfs and support
is enabled on-the-fly. You can forbid or enforce libnfs support
with --disable-libnfs or --enable-libnfs respectively.

Due to NFS restrictions you might need to execute your binaries
as root, allow them to open priviledged ports (<1024) or specify
insecure option on the NFS server.

Signed-off-by: Peter Lieven <pl@kamp.de>
---
 MAINTAINERS         |    5 +
 block/Makefile.objs |    1 +
 block/nfs.c         |  420 +++++++++++++++++++++++++++++++++++++++++++++++++++
 configure           |   38 +++++
 4 files changed, 464 insertions(+)
 create mode 100644 block/nfs.c

Comments

ronnie sahlberg Dec. 16, 2013, 4:33 p.m. UTC | #1
Nice.

+block-obj-$(CONFIG_LIBISCSI) += nfs.

Should be CONFIG_LIBNFS

On Mon, Dec 16, 2013 at 7:34 AM, Peter Lieven <pl@kamp.de> wrote:
> This patch adds native support for accessing images on NFS shares without
> the requirement to actually mount the entire NFS share on the host.
>
> NFS Images can simply be specified by an url of the form:
> nfs://<host>/<export>/<filename>
>
> For example:
> qemu-img create -f qcow2 nfs://10.0.0.1/qemu-images/test.qcow2
>
> You need libnfs from Ronnie Sahlberg available at:
>    git://github.com/sahlberg/libnfs.git
> for this to work.
>
> During configure it is automatically probed for libnfs and support
> is enabled on-the-fly. You can forbid or enforce libnfs support
> with --disable-libnfs or --enable-libnfs respectively.
>
> Due to NFS restrictions you might need to execute your binaries
> as root, allow them to open priviledged ports (<1024) or specify
> insecure option on the NFS server.
>
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
>  MAINTAINERS         |    5 +
>  block/Makefile.objs |    1 +
>  block/nfs.c         |  420 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  configure           |   38 +++++
>  4 files changed, 464 insertions(+)
>  create mode 100644 block/nfs.c
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index c19133f..f53d184 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -899,6 +899,11 @@ M: Peter Lieven <pl@kamp.de>
>  S: Supported
>  F: block/iscsi.c
>
> +NFS
> +M: Peter Lieven <pl@kamp.de>
> +S: Maintained
> +F: block/nfs.c
> +
>  SSH
>  M: Richard W.M. Jones <rjones@redhat.com>
>  S: Supported
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index f43ecbc..1bac94e 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -12,6 +12,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>  ifeq ($(CONFIG_POSIX),y)
>  block-obj-y += nbd.o sheepdog.o
>  block-obj-$(CONFIG_LIBISCSI) += iscsi.o
> +block-obj-$(CONFIG_LIBISCSI) += nfs.o
>  block-obj-$(CONFIG_CURL) += curl.o
>  block-obj-$(CONFIG_RBD) += rbd.o
>  block-obj-$(CONFIG_GLUSTERFS) += gluster.o
> diff --git a/block/nfs.c b/block/nfs.c
> new file mode 100644
> index 0000000..d6cb4c0
> --- /dev/null
> +++ b/block/nfs.c
> @@ -0,0 +1,420 @@
> +/*
> + * QEMU Block driver for native access to files on NFS shares
> + *
> + * Copyright (c) 2013 Peter Lieven <pl@kamp.de>
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "config-host.h"
> +
> +#include <poll.h>
> +#include <arpa/inet.h>
> +#include "qemu-common.h"
> +#include "qemu/config-file.h"
> +#include "qemu/error-report.h"
> +#include "block/block_int.h"
> +#include "trace.h"
> +#include "block/scsi.h"
> +#include "qemu/iov.h"
> +#include "sysemu/sysemu.h"
> +#include "qmp-commands.h"
> +
> +#include <nfsc/libnfs-zdr.h>
> +#include <nfsc/libnfs.h>
> +#include <nfsc/libnfs-raw.h>
> +#include <nfsc/libnfs-raw-mount.h>
> +
> +typedef struct nfsclient {
> +    struct nfs_context *context;
> +    struct nfsfh *fh;
> +    int events;
> +    bool has_zero_init;
> +    int64_t allocated_file_size;
> +    QEMUBH *close_bh;
> +} nfsclient;
> +
> +typedef struct NFSTask {
> +    int status;
> +    int complete;
> +    QEMUIOVector *iov;
> +    Coroutine *co;
> +    QEMUBH *bh;
> +} NFSTask;
> +
> +static void nfs_process_read(void *arg);
> +static void nfs_process_write(void *arg);
> +
> +static void nfs_set_events(nfsclient *client)
> +{
> +    int ev;
> +    /* We always register a read handler.  */
> +    ev = POLLIN;
> +    ev |= nfs_which_events(client->context);
> +    if (ev != client->events) {
> +        qemu_aio_set_fd_handler(nfs_get_fd(client->context),
> +                      nfs_process_read,
> +                      (ev & POLLOUT) ? nfs_process_write : NULL,
> +                      client);
> +
> +    }
> +    client->events = ev;
> +}
> +
> +static void nfs_process_read(void *arg)
> +{
> +    nfsclient *client = arg;
> +    nfs_service(client->context, POLLIN);
> +    nfs_set_events(client);
> +}
> +
> +static void nfs_process_write(void *arg)
> +{
> +    nfsclient *client = arg;
> +    nfs_service(client->context, POLLOUT);
> +    nfs_set_events(client);
> +}
> +
> +static void nfs_co_init_task(nfsclient *client, NFSTask *Task)
> +{
> +    *Task = (NFSTask) {
> +        .co         = qemu_coroutine_self(),
> +    };
> +}
> +
> +static void nfs_co_generic_bh_cb(void *opaque)
> +{
> +    NFSTask *Task = opaque;
> +    qemu_bh_delete(Task->bh);
> +    qemu_coroutine_enter(Task->co, NULL);
> +}
> +
> +static void nfs_co_generic_cb(int status, struct nfs_context *nfs, void *data, void *private_data)
> +{
> +    NFSTask *Task = private_data;
> +    Task->complete = 1;
> +    Task->status = status;
> +    if (Task->status > 0 && Task->iov) {
> +        if (Task->status == Task->iov->size) {
> +            qemu_iovec_from_buf(Task->iov, 0, data, status);
> +        } else {
> +            Task->status = -1;
> +        }
> +    }
> +    if (Task->co) {
> +        Task->bh = qemu_bh_new(nfs_co_generic_bh_cb, Task);
> +        qemu_bh_schedule(Task->bh);
> +    }
> +}
> +
> +static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
> +                                     int64_t sector_num, int nb_sectors,
> +                                     QEMUIOVector *iov)
> +{
> +    nfsclient *client = bs->opaque;
> +    struct NFSTask Task;
> +
> +    nfs_co_init_task(client, &Task);
> +    Task.iov = iov;
> +
> +    if (nfs_pread_async(client->context, client->fh,
> +                        sector_num * BDRV_SECTOR_SIZE,
> +                        nb_sectors * BDRV_SECTOR_SIZE,
> +                        nfs_co_generic_cb, &Task) != 0) {
> +        return -EIO;
> +    }
> +
> +    while (!Task.complete) {
> +        nfs_set_events(client);
> +        qemu_coroutine_yield();
> +    }
> +
> +    if (Task.status != nb_sectors * BDRV_SECTOR_SIZE) {
> +        return -EIO;
> +    }
> +
> +    return 0;
> +}
> +
> +static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
> +                                        int64_t sector_num, int nb_sectors,
> +                                        QEMUIOVector *iov)
> +{
> +    nfsclient *client = bs->opaque;
> +    struct NFSTask Task;
> +    char *buf = NULL;
> +
> +    nfs_co_init_task(client, &Task);
> +
> +    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
> +    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
> +
> +    if (nfs_pwrite_async(client->context, client->fh,
> +                         sector_num * BDRV_SECTOR_SIZE,
> +                         nb_sectors * BDRV_SECTOR_SIZE,
> +                         buf, nfs_co_generic_cb, &Task) != 0) {
> +        g_free(buf);
> +        return -EIO;
> +    }
> +
> +    while (!Task.complete) {
> +        nfs_set_events(client);
> +        qemu_coroutine_yield();
> +    }
> +
> +    g_free(buf);
> +
> +    if (Task.status != nb_sectors * BDRV_SECTOR_SIZE) {
> +        return -EIO;
> +    }
> +
> +    bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
> +    client->allocated_file_size = -ENOTSUP;
> +    return 0;
> +}
> +
> +static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
> +{
> +    nfsclient *client = bs->opaque;
> +    struct NFSTask Task;
> +
> +    nfs_co_init_task(client, &Task);
> +
> +    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb, &Task) != 0) {
> +        return -EIO;
> +    }
> +
> +    while (!Task.complete) {
> +        nfs_set_events(client);
> +        qemu_coroutine_yield();
> +    }
> +
> +    if (Task.status != 0) {
> +        return -EIO;
> +    }
> +
> +    return 0;
> +}
> +
> +static QemuOptsList runtime_opts = {
> +    .name = "nfs",
> +    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
> +    .desc = {
> +        {
> +            .name = "filename",
> +            .type = QEMU_OPT_STRING,
> +            .help = "URL to the NFS file",
> +        },
> +        { /* end of list */ }
> +    },
> +};
> +
> +static void nfs_file_close(BlockDriverState *bs)
> +{
> +    nfsclient *client = bs->opaque;
> +    if (client->context) {
> +        if (client->fh) {
> +            nfs_close(client->context, client->fh);
> +        }
> +        qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
> +        nfs_destroy_context(client->context);
> +    }
> +    memset(client, 0, sizeof(nfsclient));
> +}
> +
> +
> +static int nfs_file_open_common(BlockDriverState *bs, QDict *options, int flags,
> +                         int open_flags, Error **errp)
> +{
> +    nfsclient *client = bs->opaque;
> +    const char *filename;
> +    int ret = 0;
> +    QemuOpts *opts;
> +    Error *local_err = NULL;
> +    char *server = NULL, *path = NULL, *file = NULL, *strp;
> +    struct stat st;
> +
> +    opts = qemu_opts_create_nofail(&runtime_opts);
> +    qemu_opts_absorb_qdict(opts, options, &local_err);
> +    if (error_is_set(&local_err)) {
> +        qerror_report_err(local_err);
> +        error_free(local_err);
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +
> +    filename = qemu_opt_get(opts, "filename");
> +
> +    client->context = nfs_init_context();
> +
> +    if (client->context == NULL) {
> +        error_setg(errp, "Failed to init NFS context");
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +
> +    server = g_strdup(filename + 6);
> +    if (server[0] == '/' || server[0] == '\0') {
> +        error_setg(errp, "Invalid server in URL");
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +    strp = strchr(server, '/');
> +    if (strp == NULL) {
> +        error_setg(errp, "Invalid URL specified.\n");
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +    path = g_strdup(strp);
> +    *strp = 0;
> +    strp = strrchr(path, '/');
> +    if (strp == NULL) {
> +        error_setg(errp, "Invalid URL specified.\n");
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +    file = g_strdup(strp);
> +    *strp = 0;
> +
> +    if (nfs_mount(client->context, server, path) != 0) {
> +        error_setg(errp, "Failed to mount nfs share: %s",
> +                    nfs_get_error(client->context));
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +
> +    if (open_flags & O_CREAT) {
> +        if (nfs_creat(client->context, file, 0600, &client->fh) != 0) {
> +            error_setg(errp, "Failed to create file: %s",
> +                             nfs_get_error(client->context));
> +            ret = -EINVAL;
> +            goto fail;
> +        }
> +    } else {
> +        open_flags = (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY;
> +        if (nfs_open(client->context, file, open_flags, &client->fh) != 0) {
> +            error_setg(errp, "Failed to open file : %s",
> +                       nfs_get_error(client->context));
> +            ret = -EINVAL;
> +            goto fail;
> +        }
> +    }
> +
> +    if (nfs_fstat(client->context, client->fh, &st) != 0) {
> +        error_setg(errp, "Failed to fstat file: %s",
> +                   nfs_get_error(client->context));
> +        ret = -EIO;
> +        goto fail;
> +    }
> +
> +    bs->total_sectors = st.st_size / BDRV_SECTOR_SIZE;
> +    client->has_zero_init = S_ISREG(st.st_mode);
> +    client->allocated_file_size = st.st_blocks * st.st_blksize;
> +    goto out;
> +fail:
> +    nfs_file_close(bs);
> +out:
> +    g_free(server);
> +    g_free(path);
> +    g_free(file);
> +    return ret;
> +}
> +
> +static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
> +                         Error **errp) {
> +    return nfs_file_open_common(bs, options, flags, 0, errp);
> +}
> +
> +static int nfs_file_create(const char *filename, QEMUOptionParameter *options,
> +                         Error **errp)
> +{
> +    int ret = 0;
> +    int64_t total_size = 0;
> +    BlockDriverState *bs;
> +    nfsclient *client = NULL;
> +    QDict *bs_options;
> +
> +    bs = bdrv_new("");
> +
> +    /* Read out options */
> +    while (options && options->name) {
> +        if (!strcmp(options->name, "size")) {
> +            total_size = options->value.n / BDRV_SECTOR_SIZE;
> +        }
> +        options++;
> +    }
> +
> +    bs->opaque = g_malloc0(sizeof(struct nfsclient));
> +    client = bs->opaque;
> +
> +    bs_options = qdict_new();
> +    qdict_put(bs_options, "filename", qstring_from_str(filename));
> +    ret = nfs_file_open_common(bs, bs_options, 0, O_CREAT, NULL);
> +    QDECREF(bs_options);
> +    if (ret != 0) {
> +        goto out;
> +    }
> +    ret = nfs_ftruncate(client->context, client->fh, total_size * BDRV_SECTOR_SIZE);
> +    if (ret != 0) {
> +        ret = -ENOSPC;;
> +    }
> +out:
> +    nfs_file_close(bs);
> +    g_free(bs->opaque);
> +    bs->opaque = NULL;
> +    bdrv_unref(bs);
> +    return ret;
> +}
> +
> +static int nfs_has_zero_init(BlockDriverState *bs)
> +{
> +    nfsclient *client = bs->opaque;
> +    return client->has_zero_init;
> +}
> +
> +static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
> +{
> +    nfsclient *client = bs->opaque;
> +    return client->allocated_file_size;
> +}
> +
> +static BlockDriver bdrv_nfs = {
> +    .format_name     = "nfs",
> +    .protocol_name   = "nfs",
> +
> +    .instance_size   = sizeof(nfsclient),
> +    .bdrv_needs_filename = true,
> +    .bdrv_has_zero_init = nfs_has_zero_init,
> +    .bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
> +
> +    .bdrv_file_open  = nfs_file_open,
> +    .bdrv_close      = nfs_file_close,
> +    .bdrv_create     = nfs_file_create,
> +
> +    .bdrv_co_readv         = nfs_co_readv,
> +    .bdrv_co_writev        = nfs_co_writev,
> +    .bdrv_co_flush_to_disk = nfs_co_flush,
> +};
> +
> +static void nfs_block_init(void)
> +{
> +    bdrv_register(&bdrv_nfs);
> +}
> +
> +block_init(nfs_block_init);
> diff --git a/configure b/configure
> index 8144d9f..6c9e47a 100755
> --- a/configure
> +++ b/configure
> @@ -250,6 +250,7 @@ vss_win32_sdk=""
>  win_sdk="no"
>  want_tools="yes"
>  libiscsi=""
> +libnfs=""
>  coroutine=""
>  coroutine_pool=""
>  seccomp=""
> @@ -833,6 +834,10 @@ for opt do
>    ;;
>    --enable-libiscsi) libiscsi="yes"
>    ;;
> +  --disable-libnfs) libnfs="no"
> +  ;;
> +  --enable-libnfs) libnfs="yes"
> +  ;;
>    --enable-profiler) profiler="yes"
>    ;;
>    --disable-cocoa) cocoa="no"
> @@ -1202,6 +1207,8 @@ echo "  --enable-spice           enable spice"
>  echo "  --enable-rbd             enable building the rados block device (rbd)"
>  echo "  --disable-libiscsi       disable iscsi support"
>  echo "  --enable-libiscsi        enable iscsi support"
> +echo "  --disable-libnfs         disable nfs support"
> +echo "  --enable-libnfs          enable nfs support"
>  echo "  --disable-smartcard-nss  disable smartcard nss support"
>  echo "  --enable-smartcard-nss   enable smartcard nss support"
>  echo "  --disable-libusb         disable libusb (for usb passthrough)"
> @@ -3050,6 +3057,32 @@ EOF
>    fi
>  fi
>
> +##########################################
> +# Do we have libnfs
> +if test "$libnfs" != "no" ; then
> +  cat > $TMPC << EOF
> +#include <nfsc/libnfs-zdr.h>
> +#include <nfsc/libnfs.h>
> +#include <nfsc/libnfs-raw.h>
> +#include <nfsc/libnfs-raw-mount.h>
> +int main(void) {
> +    nfs_init_context();
> +    nfs_pread_async(0,0,0,0,0,0);
> +    nfs_pwrite_async(0,0,0,0,0,0,0);
> +    nfs_fstat(0,0,0);
> +    return 0;
> +    }
> +EOF
> +  if compile_prog "-Werror" "-lnfs" ; then
> +    libnfs="yes"
> +    LIBS="$LIBS -lnfs"
> +  else
> +    if test "$libnfs" = "yes" ; then
> +      feature_not_found "libnfs"
> +    fi
> +    libnfs="no"
> +  fi
> +fi
>
>  ##########################################
>  # Do we need libm
> @@ -3777,6 +3810,7 @@ echo "libusb            $libusb"
>  echo "usb net redir     $usb_redir"
>  echo "GLX support       $glx"
>  echo "libiscsi support  $libiscsi"
> +echo "libnfs support    $libnfs"
>  echo "build guest agent $guest_agent"
>  echo "QGA VSS support   $guest_agent_with_vss"
>  echo "seccomp support   $seccomp"
> @@ -4107,6 +4141,10 @@ if test "$libiscsi" = "yes" ; then
>    echo "CONFIG_LIBISCSI=y" >> $config_host_mak
>  fi
>
> +if test "$libnfs" = "yes" ; then
> +  echo "CONFIG_LIBNFS=y" >> $config_host_mak
> +fi
> +
>  if test "$seccomp" = "yes"; then
>    echo "CONFIG_SECCOMP=y" >> $config_host_mak
>  fi
> --
> 1.7.9.5
>
ronnie sahlberg Dec. 16, 2013, 5:01 p.m. UTC | #2
On Mon, Dec 16, 2013 at 7:34 AM, Peter Lieven <pl@kamp.de> wrote:
> This patch adds native support for accessing images on NFS shares without
> the requirement to actually mount the entire NFS share on the host.
>
> NFS Images can simply be specified by an url of the form:
> nfs://<host>/<export>/<filename>
>
> For example:
> qemu-img create -f qcow2 nfs://10.0.0.1/qemu-images/test.qcow2
>
> You need libnfs from Ronnie Sahlberg available at:
>    git://github.com/sahlberg/libnfs.git
> for this to work.
>
> During configure it is automatically probed for libnfs and support
> is enabled on-the-fly. You can forbid or enforce libnfs support
> with --disable-libnfs or --enable-libnfs respectively.
>
> Due to NFS restrictions you might need to execute your binaries
> as root, allow them to open priviledged ports (<1024) or specify
> insecure option on the NFS server.
>
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
>  MAINTAINERS         |    5 +
>  block/Makefile.objs |    1 +
>  block/nfs.c         |  420 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  configure           |   38 +++++
>  4 files changed, 464 insertions(+)
>  create mode 100644 block/nfs.c
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index c19133f..f53d184 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -899,6 +899,11 @@ M: Peter Lieven <pl@kamp.de>
>  S: Supported
>  F: block/iscsi.c
>
> +NFS
> +M: Peter Lieven <pl@kamp.de>
> +S: Maintained
> +F: block/nfs.c
> +
>  SSH
>  M: Richard W.M. Jones <rjones@redhat.com>
>  S: Supported
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index f43ecbc..1bac94e 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -12,6 +12,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>  ifeq ($(CONFIG_POSIX),y)
>  block-obj-y += nbd.o sheepdog.o
>  block-obj-$(CONFIG_LIBISCSI) += iscsi.o
> +block-obj-$(CONFIG_LIBISCSI) += nfs.o

CONFIG_LIBNFS

>  block-obj-$(CONFIG_CURL) += curl.o
>  block-obj-$(CONFIG_RBD) += rbd.o
>  block-obj-$(CONFIG_GLUSTERFS) += gluster.o
> diff --git a/block/nfs.c b/block/nfs.c
> new file mode 100644
> index 0000000..d6cb4c0
> --- /dev/null
> +++ b/block/nfs.c
> @@ -0,0 +1,420 @@
> +/*
> + * QEMU Block driver for native access to files on NFS shares
> + *
> + * Copyright (c) 2013 Peter Lieven <pl@kamp.de>
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "config-host.h"
> +
> +#include <poll.h>
> +#include <arpa/inet.h>
> +#include "qemu-common.h"
> +#include "qemu/config-file.h"
> +#include "qemu/error-report.h"
> +#include "block/block_int.h"
> +#include "trace.h"
> +#include "block/scsi.h"
> +#include "qemu/iov.h"
> +#include "sysemu/sysemu.h"
> +#include "qmp-commands.h"
> +
> +#include <nfsc/libnfs-zdr.h>
> +#include <nfsc/libnfs.h>
> +#include <nfsc/libnfs-raw.h>
> +#include <nfsc/libnfs-raw-mount.h>
> +
> +typedef struct nfsclient {
> +    struct nfs_context *context;
> +    struct nfsfh *fh;
> +    int events;
> +    bool has_zero_init;
> +    int64_t allocated_file_size;
> +    QEMUBH *close_bh;
> +} nfsclient;
> +
> +typedef struct NFSTask {
> +    int status;
> +    int complete;
> +    QEMUIOVector *iov;
> +    Coroutine *co;
> +    QEMUBH *bh;
> +} NFSTask;
> +
> +static void nfs_process_read(void *arg);
> +static void nfs_process_write(void *arg);
> +
> +static void nfs_set_events(nfsclient *client)
> +{
> +    int ev;
> +    /* We always register a read handler.  */
> +    ev = POLLIN;

I don't think you should always register a read handler here since
there are no situations where the NFS server
will send unsolicited packets back to the client (contrary to iSCSI
where the target might send NOPs back to ping the client).

When NFS sessions are idle, it is common that servers will tear down
the TCP connection to save resources and rely on that
the clients will transparently reconnect the TPC session again once
there is new I/O to send.
This reconnect is handled automatically/transparently within libnfs.

> +    ev |= nfs_which_events(client->context);
> +    if (ev != client->events) {
> +        qemu_aio_set_fd_handler(nfs_get_fd(client->context),
> +                      nfs_process_read,
> +                      (ev & POLLOUT) ? nfs_process_write : NULL,
> +                      client);
> +
> +    }
> +    client->events = ev;
> +}
> +
> +static void nfs_process_read(void *arg)
> +{
> +    nfsclient *client = arg;
> +    nfs_service(client->context, POLLIN);
> +    nfs_set_events(client);
> +}
> +
> +static void nfs_process_write(void *arg)
> +{
> +    nfsclient *client = arg;
> +    nfs_service(client->context, POLLOUT);
> +    nfs_set_events(client);
> +}
> +
> +static void nfs_co_init_task(nfsclient *client, NFSTask *Task)
> +{
> +    *Task = (NFSTask) {
> +        .co         = qemu_coroutine_self(),
> +    };
> +}
> +
> +static void nfs_co_generic_bh_cb(void *opaque)
> +{
> +    NFSTask *Task = opaque;
> +    qemu_bh_delete(Task->bh);
> +    qemu_coroutine_enter(Task->co, NULL);
> +}
> +
> +static void nfs_co_generic_cb(int status, struct nfs_context *nfs, void *data, void *private_data)
> +{
> +    NFSTask *Task = private_data;
> +    Task->complete = 1;
> +    Task->status = status;
> +    if (Task->status > 0 && Task->iov) {
> +        if (Task->status == Task->iov->size) {
> +            qemu_iovec_from_buf(Task->iov, 0, data, status);
> +        } else {
> +            Task->status = -1;
> +        }
> +    }
> +    if (Task->co) {
> +        Task->bh = qemu_bh_new(nfs_co_generic_bh_cb, Task);
> +        qemu_bh_schedule(Task->bh);
> +    }
> +}
> +
> +static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
> +                                     int64_t sector_num, int nb_sectors,
> +                                     QEMUIOVector *iov)
> +{
> +    nfsclient *client = bs->opaque;
> +    struct NFSTask Task;
> +
> +    nfs_co_init_task(client, &Task);
> +    Task.iov = iov;
> +
> +    if (nfs_pread_async(client->context, client->fh,
> +                        sector_num * BDRV_SECTOR_SIZE,
> +                        nb_sectors * BDRV_SECTOR_SIZE,
> +                        nfs_co_generic_cb, &Task) != 0) {
> +        return -EIO;
> +    }
> +
> +    while (!Task.complete) {
> +        nfs_set_events(client);
> +        qemu_coroutine_yield();
> +    }
> +
> +    if (Task.status != nb_sectors * BDRV_SECTOR_SIZE) {
> +        return -EIO;
> +    }
> +
> +    return 0;
> +}
> +
> +static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
> +                                        int64_t sector_num, int nb_sectors,
> +                                        QEMUIOVector *iov)
> +{
> +    nfsclient *client = bs->opaque;
> +    struct NFSTask Task;
> +    char *buf = NULL;
> +
> +    nfs_co_init_task(client, &Task);
> +
> +    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
> +    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
> +
> +    if (nfs_pwrite_async(client->context, client->fh,
> +                         sector_num * BDRV_SECTOR_SIZE,
> +                         nb_sectors * BDRV_SECTOR_SIZE,
> +                         buf, nfs_co_generic_cb, &Task) != 0) {
> +        g_free(buf);
> +        return -EIO;
> +    }
> +
> +    while (!Task.complete) {
> +        nfs_set_events(client);
> +        qemu_coroutine_yield();
> +    }
> +
> +    g_free(buf);
> +
> +    if (Task.status != nb_sectors * BDRV_SECTOR_SIZE) {
> +        return -EIO;
> +    }
> +
> +    bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
> +    client->allocated_file_size = -ENOTSUP;
> +    return 0;
> +}
> +
> +static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
> +{
> +    nfsclient *client = bs->opaque;
> +    struct NFSTask Task;
> +
> +    nfs_co_init_task(client, &Task);
> +
> +    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb, &Task) != 0) {
> +        return -EIO;
> +    }
> +
> +    while (!Task.complete) {
> +        nfs_set_events(client);
> +        qemu_coroutine_yield();
> +    }
> +
> +    if (Task.status != 0) {
> +        return -EIO;
> +    }
> +
> +    return 0;
> +}
> +
> +static QemuOptsList runtime_opts = {
> +    .name = "nfs",
> +    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
> +    .desc = {
> +        {
> +            .name = "filename",
> +            .type = QEMU_OPT_STRING,
> +            .help = "URL to the NFS file",
> +        },
> +        { /* end of list */ }
> +    },
> +};
> +
> +static void nfs_file_close(BlockDriverState *bs)
> +{
> +    nfsclient *client = bs->opaque;
> +    if (client->context) {
> +        if (client->fh) {
> +            nfs_close(client->context, client->fh);
> +        }
> +        qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
> +        nfs_destroy_context(client->context);
> +    }
> +    memset(client, 0, sizeof(nfsclient));
> +}
> +
> +
> +static int nfs_file_open_common(BlockDriverState *bs, QDict *options, int flags,
> +                         int open_flags, Error **errp)
> +{
> +    nfsclient *client = bs->opaque;
> +    const char *filename;
> +    int ret = 0;
> +    QemuOpts *opts;
> +    Error *local_err = NULL;
> +    char *server = NULL, *path = NULL, *file = NULL, *strp;
> +    struct stat st;
> +
> +    opts = qemu_opts_create_nofail(&runtime_opts);
> +    qemu_opts_absorb_qdict(opts, options, &local_err);
> +    if (error_is_set(&local_err)) {
> +        qerror_report_err(local_err);
> +        error_free(local_err);
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +
> +    filename = qemu_opt_get(opts, "filename");
> +
> +    client->context = nfs_init_context();
> +
> +    if (client->context == NULL) {
> +        error_setg(errp, "Failed to init NFS context");
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +
> +    server = g_strdup(filename + 6);
> +    if (server[0] == '/' || server[0] == '\0') {
> +        error_setg(errp, "Invalid server in URL");
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +    strp = strchr(server, '/');
> +    if (strp == NULL) {
> +        error_setg(errp, "Invalid URL specified.\n");
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +    path = g_strdup(strp);
> +    *strp = 0;
> +    strp = strrchr(path, '/');
> +    if (strp == NULL) {
> +        error_setg(errp, "Invalid URL specified.\n");
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +    file = g_strdup(strp);
> +    *strp = 0;
> +
> +    if (nfs_mount(client->context, server, path) != 0) {
> +        error_setg(errp, "Failed to mount nfs share: %s",
> +                    nfs_get_error(client->context));
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +
> +    if (open_flags & O_CREAT) {
> +        if (nfs_creat(client->context, file, 0600, &client->fh) != 0) {
> +            error_setg(errp, "Failed to create file: %s",
> +                             nfs_get_error(client->context));
> +            ret = -EINVAL;
> +            goto fail;
> +        }
> +    } else {
> +        open_flags = (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY;
> +        if (nfs_open(client->context, file, open_flags, &client->fh) != 0) {
> +            error_setg(errp, "Failed to open file : %s",
> +                       nfs_get_error(client->context));
> +            ret = -EINVAL;
> +            goto fail;
> +        }
> +    }
> +
> +    if (nfs_fstat(client->context, client->fh, &st) != 0) {
> +        error_setg(errp, "Failed to fstat file: %s",
> +                   nfs_get_error(client->context));
> +        ret = -EIO;
> +        goto fail;
> +    }
> +
> +    bs->total_sectors = st.st_size / BDRV_SECTOR_SIZE;
> +    client->has_zero_init = S_ISREG(st.st_mode);
> +    client->allocated_file_size = st.st_blocks * st.st_blksize;
> +    goto out;
> +fail:
> +    nfs_file_close(bs);
> +out:
> +    g_free(server);
> +    g_free(path);
> +    g_free(file);
> +    return ret;
> +}
> +
> +static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
> +                         Error **errp) {
> +    return nfs_file_open_common(bs, options, flags, 0, errp);
> +}
> +
> +static int nfs_file_create(const char *filename, QEMUOptionParameter *options,
> +                         Error **errp)
> +{
> +    int ret = 0;
> +    int64_t total_size = 0;
> +    BlockDriverState *bs;
> +    nfsclient *client = NULL;
> +    QDict *bs_options;
> +
> +    bs = bdrv_new("");
> +
> +    /* Read out options */
> +    while (options && options->name) {
> +        if (!strcmp(options->name, "size")) {
> +            total_size = options->value.n / BDRV_SECTOR_SIZE;
> +        }
> +        options++;
> +    }
> +
> +    bs->opaque = g_malloc0(sizeof(struct nfsclient));
> +    client = bs->opaque;
> +
> +    bs_options = qdict_new();
> +    qdict_put(bs_options, "filename", qstring_from_str(filename));
> +    ret = nfs_file_open_common(bs, bs_options, 0, O_CREAT, NULL);
> +    QDECREF(bs_options);
> +    if (ret != 0) {
> +        goto out;
> +    }
> +    ret = nfs_ftruncate(client->context, client->fh, total_size * BDRV_SECTOR_SIZE);
> +    if (ret != 0) {
> +        ret = -ENOSPC;;
> +    }
> +out:
> +    nfs_file_close(bs);
> +    g_free(bs->opaque);
> +    bs->opaque = NULL;
> +    bdrv_unref(bs);
> +    return ret;
> +}
> +
> +static int nfs_has_zero_init(BlockDriverState *bs)
> +{
> +    nfsclient *client = bs->opaque;
> +    return client->has_zero_init;
> +}
> +
> +static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
> +{
> +    nfsclient *client = bs->opaque;
> +    return client->allocated_file_size;
> +}
> +
> +static BlockDriver bdrv_nfs = {
> +    .format_name     = "nfs",
> +    .protocol_name   = "nfs",
> +
> +    .instance_size   = sizeof(nfsclient),
> +    .bdrv_needs_filename = true,
> +    .bdrv_has_zero_init = nfs_has_zero_init,
> +    .bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
> +
> +    .bdrv_file_open  = nfs_file_open,
> +    .bdrv_close      = nfs_file_close,
> +    .bdrv_create     = nfs_file_create,
> +
> +    .bdrv_co_readv         = nfs_co_readv,
> +    .bdrv_co_writev        = nfs_co_writev,
> +    .bdrv_co_flush_to_disk = nfs_co_flush,
> +};
> +
> +static void nfs_block_init(void)
> +{
> +    bdrv_register(&bdrv_nfs);
> +}
> +
> +block_init(nfs_block_init);
> diff --git a/configure b/configure
> index 8144d9f..6c9e47a 100755
> --- a/configure
> +++ b/configure
> @@ -250,6 +250,7 @@ vss_win32_sdk=""
>  win_sdk="no"
>  want_tools="yes"
>  libiscsi=""
> +libnfs=""
>  coroutine=""
>  coroutine_pool=""
>  seccomp=""
> @@ -833,6 +834,10 @@ for opt do
>    ;;
>    --enable-libiscsi) libiscsi="yes"
>    ;;
> +  --disable-libnfs) libnfs="no"
> +  ;;
> +  --enable-libnfs) libnfs="yes"
> +  ;;
>    --enable-profiler) profiler="yes"
>    ;;
>    --disable-cocoa) cocoa="no"
> @@ -1202,6 +1207,8 @@ echo "  --enable-spice           enable spice"
>  echo "  --enable-rbd             enable building the rados block device (rbd)"
>  echo "  --disable-libiscsi       disable iscsi support"
>  echo "  --enable-libiscsi        enable iscsi support"
> +echo "  --disable-libnfs         disable nfs support"
> +echo "  --enable-libnfs          enable nfs support"
>  echo "  --disable-smartcard-nss  disable smartcard nss support"
>  echo "  --enable-smartcard-nss   enable smartcard nss support"
>  echo "  --disable-libusb         disable libusb (for usb passthrough)"
> @@ -3050,6 +3057,32 @@ EOF
>    fi
>  fi
>
> +##########################################
> +# Do we have libnfs
> +if test "$libnfs" != "no" ; then
> +  cat > $TMPC << EOF
> +#include <nfsc/libnfs-zdr.h>
> +#include <nfsc/libnfs.h>
> +#include <nfsc/libnfs-raw.h>
> +#include <nfsc/libnfs-raw-mount.h>
> +int main(void) {
> +    nfs_init_context();
> +    nfs_pread_async(0,0,0,0,0,0);
> +    nfs_pwrite_async(0,0,0,0,0,0,0);
> +    nfs_fstat(0,0,0);
> +    return 0;
> +    }
> +EOF
> +  if compile_prog "-Werror" "-lnfs" ; then
> +    libnfs="yes"
> +    LIBS="$LIBS -lnfs"
> +  else
> +    if test "$libnfs" = "yes" ; then
> +      feature_not_found "libnfs"
> +    fi
> +    libnfs="no"
> +  fi
> +fi
>
>  ##########################################
>  # Do we need libm
> @@ -3777,6 +3810,7 @@ echo "libusb            $libusb"
>  echo "usb net redir     $usb_redir"
>  echo "GLX support       $glx"
>  echo "libiscsi support  $libiscsi"
> +echo "libnfs support    $libnfs"
>  echo "build guest agent $guest_agent"
>  echo "QGA VSS support   $guest_agent_with_vss"
>  echo "seccomp support   $seccomp"
> @@ -4107,6 +4141,10 @@ if test "$libiscsi" = "yes" ; then
>    echo "CONFIG_LIBISCSI=y" >> $config_host_mak
>  fi
>
> +if test "$libnfs" = "yes" ; then
> +  echo "CONFIG_LIBNFS=y" >> $config_host_mak
> +fi
> +
>  if test "$seccomp" = "yes"; then
>    echo "CONFIG_SECCOMP=y" >> $config_host_mak
>  fi
> --
> 1.7.9.5
>
Fam Zheng Dec. 17, 2013, 4:07 a.m. UTC | #3
On 2013年12月16日 23:34, Peter Lieven wrote:
> This patch adds native support for accessing images on NFS shares without
> the requirement to actually mount the entire NFS share on the host.
>
> NFS Images can simply be specified by an url of the form:
> nfs://<host>/<export>/<filename>
>
> For example:
> qemu-img create -f qcow2 nfs://10.0.0.1/qemu-images/test.qcow2
>
> You need libnfs from Ronnie Sahlberg available at:
>     git://github.com/sahlberg/libnfs.git
> for this to work.
>
> During configure it is automatically probed for libnfs and support
> is enabled on-the-fly. You can forbid or enforce libnfs support
> with --disable-libnfs or --enable-libnfs respectively.
>
> Due to NFS restrictions you might need to execute your binaries
> as root, allow them to open priviledged ports (<1024) or specify
> insecure option on the NFS server.
>
> Signed-off-by: Peter Lieven <pl@kamp.de>

Looks nice! Thanks for the work!

> ---
>   MAINTAINERS         |    5 +
>   block/Makefile.objs |    1 +
>   block/nfs.c         |  420 +++++++++++++++++++++++++++++++++++++++++++++++++++
>   configure           |   38 +++++
>   4 files changed, 464 insertions(+)
>   create mode 100644 block/nfs.c
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index c19133f..f53d184 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -899,6 +899,11 @@ M: Peter Lieven <pl@kamp.de>
>   S: Supported
>   F: block/iscsi.c
>
> +NFS
> +M: Peter Lieven <pl@kamp.de>
> +S: Maintained
> +F: block/nfs.c
> +
>   SSH
>   M: Richard W.M. Jones <rjones@redhat.com>
>   S: Supported
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index f43ecbc..1bac94e 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -12,6 +12,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>   ifeq ($(CONFIG_POSIX),y)
>   block-obj-y += nbd.o sheepdog.o
>   block-obj-$(CONFIG_LIBISCSI) += iscsi.o
> +block-obj-$(CONFIG_LIBISCSI) += nfs.o
>   block-obj-$(CONFIG_CURL) += curl.o
>   block-obj-$(CONFIG_RBD) += rbd.o
>   block-obj-$(CONFIG_GLUSTERFS) += gluster.o
> diff --git a/block/nfs.c b/block/nfs.c
> new file mode 100644
> index 0000000..d6cb4c0
> --- /dev/null
> +++ b/block/nfs.c
> @@ -0,0 +1,420 @@
> +/*
> + * QEMU Block driver for native access to files on NFS shares
> + *
> + * Copyright (c) 2013 Peter Lieven <pl@kamp.de>
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "config-host.h"
> +
> +#include <poll.h>
> +#include <arpa/inet.h>
> +#include "qemu-common.h"
> +#include "qemu/config-file.h"
> +#include "qemu/error-report.h"
> +#include "block/block_int.h"
> +#include "trace.h"
> +#include "block/scsi.h"
> +#include "qemu/iov.h"
> +#include "sysemu/sysemu.h"
> +#include "qmp-commands.h"

Copied from block/iscsi.c? SCSI and QMP are not necessary for this file. 
And maybe also arpa/inet.h, I'm not sure about that though.

> +
> +#include <nfsc/libnfs-zdr.h>
> +#include <nfsc/libnfs.h>
> +#include <nfsc/libnfs-raw.h>
> +#include <nfsc/libnfs-raw-mount.h>
> +
> +typedef struct nfsclient {
> +    struct nfs_context *context;
> +    struct nfsfh *fh;
> +    int events;
> +    bool has_zero_init;
> +    int64_t allocated_file_size;
> +    QEMUBH *close_bh;

This is unused.

> +} nfsclient;

Please use CamelCase for type names...

> +
> +typedef struct NFSTask {
> +    int status;
> +    int complete;
> +    QEMUIOVector *iov;
> +    Coroutine *co;
> +    QEMUBH *bh;
> +} NFSTask;

as you do with this.

> +
> +static void nfs_process_read(void *arg);
> +static void nfs_process_write(void *arg);
> +
> +static void nfs_set_events(nfsclient *client)
> +{
> +    int ev;
> +    /* We always register a read handler.  */
> +    ev = POLLIN;
> +    ev |= nfs_which_events(client->context);
> +    if (ev != client->events) {
> +        qemu_aio_set_fd_handler(nfs_get_fd(client->context),
> +                      nfs_process_read,
> +                      (ev & POLLOUT) ? nfs_process_write : NULL,
> +                      client);
> +
> +    }
> +    client->events = ev;
> +}
> +
> +static void nfs_process_read(void *arg)
> +{
> +    nfsclient *client = arg;
> +    nfs_service(client->context, POLLIN);
> +    nfs_set_events(client);
> +}
> +
> +static void nfs_process_write(void *arg)
> +{
> +    nfsclient *client = arg;
> +    nfs_service(client->context, POLLOUT);
> +    nfs_set_events(client);
> +}
> +
> +static void nfs_co_init_task(nfsclient *client, NFSTask *Task)
> +{
> +    *Task = (NFSTask) {

Please use lower case for variable names.

> +        .co         = qemu_coroutine_self(),
> +    };
> +}
> +
> +static void nfs_co_generic_bh_cb(void *opaque)
> +{
> +    NFSTask *Task = opaque;
> +    qemu_bh_delete(Task->bh);
> +    qemu_coroutine_enter(Task->co, NULL);
> +}
> +
> +static void nfs_co_generic_cb(int status, struct nfs_context *nfs, void *data, void *private_data)

This line is too long. Please use scripts/checkpatch.pl to check the 
coding style. (Some other lines have trailing whitespaces)

> +{
> +    NFSTask *Task = private_data;
> +    Task->complete = 1;
> +    Task->status = status;
> +    if (Task->status > 0 && Task->iov) {
> +        if (Task->status == Task->iov->size) {
> +            qemu_iovec_from_buf(Task->iov, 0, data, status);
> +        } else {
> +            Task->status = -1;
> +        }
> +    }
> +    if (Task->co) {
> +        Task->bh = qemu_bh_new(nfs_co_generic_bh_cb, Task);
> +        qemu_bh_schedule(Task->bh);
> +    }
> +}
> +
> +static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
> +                                     int64_t sector_num, int nb_sectors,
> +                                     QEMUIOVector *iov)
> +{
> +    nfsclient *client = bs->opaque;
> +    struct NFSTask Task;
> +
> +    nfs_co_init_task(client, &Task);
> +    Task.iov = iov;
> +
> +    if (nfs_pread_async(client->context, client->fh,
> +                        sector_num * BDRV_SECTOR_SIZE,
> +                        nb_sectors * BDRV_SECTOR_SIZE,
> +                        nfs_co_generic_cb, &Task) != 0) {
> +        return -EIO;
> +    }
> +
> +    while (!Task.complete) {
> +        nfs_set_events(client);
> +        qemu_coroutine_yield();
> +    }
> +
> +    if (Task.status != nb_sectors * BDRV_SECTOR_SIZE) {
> +        return -EIO;

In error case, does Task.status possibly contain error number other than 
-EIO? Would it be useful to return the value?

> +    }
> +
> +    return 0;
> +}
> +
> +static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
> +                                        int64_t sector_num, int nb_sectors,
> +                                        QEMUIOVector *iov)
> +{
> +    nfsclient *client = bs->opaque;
> +    struct NFSTask Task;
> +    char *buf = NULL;
> +
> +    nfs_co_init_task(client, &Task);
> +
> +    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
> +    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
> +
> +    if (nfs_pwrite_async(client->context, client->fh,
> +                         sector_num * BDRV_SECTOR_SIZE,
> +                         nb_sectors * BDRV_SECTOR_SIZE,
> +                         buf, nfs_co_generic_cb, &Task) != 0) {
> +        g_free(buf);
> +        return -EIO;
> +    }
> +
> +    while (!Task.complete) {
> +        nfs_set_events(client);
> +        qemu_coroutine_yield();
> +    }
> +
> +    g_free(buf);
> +
> +    if (Task.status != nb_sectors * BDRV_SECTOR_SIZE) {
> +        return -EIO;
> +    }
> +
> +    bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
> +    client->allocated_file_size = -ENOTSUP;

Why does allocated_file_size become not supported after a write?

> +    return 0;
> +}
> +
> +static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
> +{
> +    nfsclient *client = bs->opaque;
> +    struct NFSTask Task;
> +
> +    nfs_co_init_task(client, &Task);
> +
> +    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb, &Task) != 0) {
> +        return -EIO;
> +    }
> +
> +    while (!Task.complete) {
> +        nfs_set_events(client);
> +        qemu_coroutine_yield();
> +    }
> +
> +    if (Task.status != 0) {
> +        return -EIO;
> +    }
> +
> +    return 0;
> +}
> +
> +static QemuOptsList runtime_opts = {
> +    .name = "nfs",
> +    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
> +    .desc = {
> +        {
> +            .name = "filename",
> +            .type = QEMU_OPT_STRING,
> +            .help = "URL to the NFS file",
> +        },
> +        { /* end of list */ }
> +    },
> +};
> +
> +static void nfs_file_close(BlockDriverState *bs)
> +{
> +    nfsclient *client = bs->opaque;
> +    if (client->context) {
> +        if (client->fh) {
> +            nfs_close(client->context, client->fh);
> +        }
> +        qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
> +        nfs_destroy_context(client->context);
> +    }
> +    memset(client, 0, sizeof(nfsclient));
> +}
> +
> +
> +static int nfs_file_open_common(BlockDriverState *bs, QDict *options, int flags,
> +                         int open_flags, Error **errp)
> +{
> +    nfsclient *client = bs->opaque;
> +    const char *filename;
> +    int ret = 0;
> +    QemuOpts *opts;
> +    Error *local_err = NULL;
> +    char *server = NULL, *path = NULL, *file = NULL, *strp;
> +    struct stat st;
> +
> +    opts = qemu_opts_create_nofail(&runtime_opts);
> +    qemu_opts_absorb_qdict(opts, options, &local_err);
> +    if (error_is_set(&local_err)) {
> +        qerror_report_err(local_err);
> +        error_free(local_err);
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +
> +    filename = qemu_opt_get(opts, "filename");
> +
> +    client->context = nfs_init_context();
> +
> +    if (client->context == NULL) {
> +        error_setg(errp, "Failed to init NFS context");
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +
> +    server = g_strdup(filename + 6);

Please check the length of filename is longer than 6 before accessing 
filename[6].

> +    if (server[0] == '/' || server[0] == '\0') {
> +        error_setg(errp, "Invalid server in URL");
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +    strp = strchr(server, '/');
> +    if (strp == NULL) {
> +        error_setg(errp, "Invalid URL specified.\n");
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +    path = g_strdup(strp);
> +    *strp = 0;
> +    strp = strrchr(path, '/');
> +    if (strp == NULL) {
> +        error_setg(errp, "Invalid URL specified.\n");
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +    file = g_strdup(strp);
> +    *strp = 0;
> +
> +    if (nfs_mount(client->context, server, path) != 0) {
> +        error_setg(errp, "Failed to mount nfs share: %s",
> +                    nfs_get_error(client->context));
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +
> +    if (open_flags & O_CREAT) {
> +        if (nfs_creat(client->context, file, 0600, &client->fh) != 0) {
> +            error_setg(errp, "Failed to create file: %s",
> +                             nfs_get_error(client->context));
> +            ret = -EINVAL;
> +            goto fail;
> +        }
> +    } else {
> +        open_flags = (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY;
> +        if (nfs_open(client->context, file, open_flags, &client->fh) != 0) {
> +            error_setg(errp, "Failed to open file : %s",
> +                       nfs_get_error(client->context));
> +            ret = -EINVAL;
> +            goto fail;
> +        }
> +    }
> +
> +    if (nfs_fstat(client->context, client->fh, &st) != 0) {
> +        error_setg(errp, "Failed to fstat file: %s",
> +                   nfs_get_error(client->context));
> +        ret = -EIO;
> +        goto fail;
> +    }
> +
> +    bs->total_sectors = st.st_size / BDRV_SECTOR_SIZE;

Please use DIV_ROUND_UP(). Otherwise the remainder in last sector 
couldn't be read.

> +    client->has_zero_init = S_ISREG(st.st_mode);
> +    client->allocated_file_size = st.st_blocks * st.st_blksize;
> +    goto out;
> +fail:
> +    nfs_file_close(bs);
> +out:
> +    g_free(server);
> +    g_free(path);
> +    g_free(file);
> +    return ret;
> +}
> +
> +static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
> +                         Error **errp) {
> +    return nfs_file_open_common(bs, options, flags, 0, errp);
> +}
> +
> +static int nfs_file_create(const char *filename, QEMUOptionParameter *options,
> +                         Error **errp)
> +{
> +    int ret = 0;
> +    int64_t total_size = 0;
> +    BlockDriverState *bs;
> +    nfsclient *client = NULL;
> +    QDict *bs_options;
> +
> +    bs = bdrv_new("");
> +
> +    /* Read out options */
> +    while (options && options->name) {
> +        if (!strcmp(options->name, "size")) {
> +            total_size = options->value.n / BDRV_SECTOR_SIZE;

Why divide by BDRV_SECTOR_SIZE only to ...

> +        }
> +        options++;
> +    }
> +
> +    bs->opaque = g_malloc0(sizeof(struct nfsclient));
> +    client = bs->opaque;
> +
> +    bs_options = qdict_new();
> +    qdict_put(bs_options, "filename", qstring_from_str(filename));
> +    ret = nfs_file_open_common(bs, bs_options, 0, O_CREAT, NULL);
> +    QDECREF(bs_options);
> +    if (ret != 0) {
> +        goto out;
> +    }
> +    ret = nfs_ftruncate(client->context, client->fh, total_size * BDRV_SECTOR_SIZE);

... multiply it back later?

> +    if (ret != 0) {
> +        ret = -ENOSPC;;

There is an extra semicolon. And is it right to hard code ENOSPC here, 
without checking value of ret?

> +    }
> +out:
> +    nfs_file_close(bs);
> +    g_free(bs->opaque);
> +    bs->opaque = NULL;
> +    bdrv_unref(bs);
> +    return ret;
> +}
> +

Fam
Peter Lieven Dec. 17, 2013, 7:53 a.m. UTC | #4
Hi Fam,

On 17.12.2013 05:07, Fam Zheng wrote:
> On 2013年12月16日 23:34, Peter Lieven wrote:
>> This patch adds native support for accessing images on NFS shares without
>> the requirement to actually mount the entire NFS share on the host.
>>
>> NFS Images can simply be specified by an url of the form:
>> nfs://<host>/<export>/<filename>
>>
>> For example:
>> qemu-img create -f qcow2 nfs://10.0.0.1/qemu-images/test.qcow2
>>
>> You need libnfs from Ronnie Sahlberg available at:
>>     git://github.com/sahlberg/libnfs.git
>> for this to work.
>>
>> During configure it is automatically probed for libnfs and support
>> is enabled on-the-fly. You can forbid or enforce libnfs support
>> with --disable-libnfs or --enable-libnfs respectively.
>>
>> Due to NFS restrictions you might need to execute your binaries
>> as root, allow them to open priviledged ports (<1024) or specify
>> insecure option on the NFS server.
>>
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>
> Looks nice! Thanks for the work!
Thank you ;-)
>
>> ---
>>   MAINTAINERS         |    5 +
>>   block/Makefile.objs |    1 +
>>   block/nfs.c         |  420 +++++++++++++++++++++++++++++++++++++++++++++++++++
>>   configure           |   38 +++++
>>   4 files changed, 464 insertions(+)
>>   create mode 100644 block/nfs.c
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index c19133f..f53d184 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -899,6 +899,11 @@ M: Peter Lieven <pl@kamp.de>
>>   S: Supported
>>   F: block/iscsi.c
>>
>> +NFS
>> +M: Peter Lieven <pl@kamp.de>
>> +S: Maintained
>> +F: block/nfs.c
>> +
>>   SSH
>>   M: Richard W.M. Jones <rjones@redhat.com>
>>   S: Supported
>> diff --git a/block/Makefile.objs b/block/Makefile.objs
>> index f43ecbc..1bac94e 100644
>> --- a/block/Makefile.objs
>> +++ b/block/Makefile.objs
>> @@ -12,6 +12,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>>   ifeq ($(CONFIG_POSIX),y)
>>   block-obj-y += nbd.o sheepdog.o
>>   block-obj-$(CONFIG_LIBISCSI) += iscsi.o
>> +block-obj-$(CONFIG_LIBISCSI) += nfs.o
>>   block-obj-$(CONFIG_CURL) += curl.o
>>   block-obj-$(CONFIG_RBD) += rbd.o
>>   block-obj-$(CONFIG_GLUSTERFS) += gluster.o
>> diff --git a/block/nfs.c b/block/nfs.c
>> new file mode 100644
>> index 0000000..d6cb4c0
>> --- /dev/null
>> +++ b/block/nfs.c
>> @@ -0,0 +1,420 @@
>> +/*
>> + * QEMU Block driver for native access to files on NFS shares
>> + *
>> + * Copyright (c) 2013 Peter Lieven <pl@kamp.de>
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a copy
>> + * of this software and associated documentation files (the "Software"), to deal
>> + * in the Software without restriction, including without limitation the rights
>> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
>> + * copies of the Software, and to permit persons to whom the Software is
>> + * furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
>> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>> + * THE SOFTWARE.
>> + */
>> +
>> +#include "config-host.h"
>> +
>> +#include <poll.h>
>> +#include <arpa/inet.h>
>> +#include "qemu-common.h"
>> +#include "qemu/config-file.h"
>> +#include "qemu/error-report.h"
>> +#include "block/block_int.h"
>> +#include "trace.h"
>> +#include "block/scsi.h"
>> +#include "qemu/iov.h"
>> +#include "sysemu/sysemu.h"
>> +#include "qmp-commands.h"
>
> Copied from block/iscsi.c? SCSI and QMP are not necessary for this file. And maybe also arpa/inet.h, I'm not sure about that though.
Yes, libiscsi and libnfs are quite similar. ;-)
>
>> +
>> +#include <nfsc/libnfs-zdr.h>
>> +#include <nfsc/libnfs.h>
>> +#include <nfsc/libnfs-raw.h>
>> +#include <nfsc/libnfs-raw-mount.h>
>> +
>> +typedef struct nfsclient {
>> +    struct nfs_context *context;
>> +    struct nfsfh *fh;
>> +    int events;
>> +    bool has_zero_init;
>> +    int64_t allocated_file_size;
>> +    QEMUBH *close_bh;
>
> This is unused.
Ups, its a leftover from a nasty segfault debug session.
>
>> +} nfsclient;
>
> Please use CamelCase for type names...
Ok
>
>> +
>> +typedef struct NFSTask {
>> +    int status;
>> +    int complete;
>> +    QEMUIOVector *iov;
>> +    Coroutine *co;
>> +    QEMUBH *bh;
>> +} NFSTask;
>
> as you do with this.
>
>> +
>> +static void nfs_process_read(void *arg);
>> +static void nfs_process_write(void *arg);
>> +
>> +static void nfs_set_events(nfsclient *client)
>> +{
>> +    int ev;
>> +    /* We always register a read handler.  */
>> +    ev = POLLIN;
>> +    ev |= nfs_which_events(client->context);
>> +    if (ev != client->events) {
>> +        qemu_aio_set_fd_handler(nfs_get_fd(client->context),
>> +                      nfs_process_read,
>> +                      (ev & POLLOUT) ? nfs_process_write : NULL,
>> +                      client);
>> +
>> +    }
>> +    client->events = ev;
>> +}
>> +
>> +static void nfs_process_read(void *arg)
>> +{
>> +    nfsclient *client = arg;
>> +    nfs_service(client->context, POLLIN);
>> +    nfs_set_events(client);
>> +}
>> +
>> +static void nfs_process_write(void *arg)
>> +{
>> +    nfsclient *client = arg;
>> +    nfs_service(client->context, POLLOUT);
>> +    nfs_set_events(client);
>> +}
>> +
>> +static void nfs_co_init_task(nfsclient *client, NFSTask *Task)
>> +{
>> +    *Task = (NFSTask) {
>
> Please use lower case for variable names.
Ok
>
>> +        .co         = qemu_coroutine_self(),
>> +    };
>> +}
>> +
>> +static void nfs_co_generic_bh_cb(void *opaque)
>> +{
>> +    NFSTask *Task = opaque;
>> +    qemu_bh_delete(Task->bh);
>> +    qemu_coroutine_enter(Task->co, NULL);
>> +}
>> +
>> +static void nfs_co_generic_cb(int status, struct nfs_context *nfs, void *data, void *private_data)
>
> This line is too long. Please use scripts/checkpatch.pl to check the coding style. (Some other lines have trailing whitespaces)
I thought I had done this. Strange...
>
>> +{
>> +    NFSTask *Task = private_data;
>> +    Task->complete = 1;
>> +    Task->status = status;
>> +    if (Task->status > 0 && Task->iov) {
>> +        if (Task->status == Task->iov->size) {
>> +            qemu_iovec_from_buf(Task->iov, 0, data, status);
>> +        } else {
>> +            Task->status = -1;
>> +        }
>> +    }
>> +    if (Task->co) {
>> +        Task->bh = qemu_bh_new(nfs_co_generic_bh_cb, Task);
>> +        qemu_bh_schedule(Task->bh);
>> +    }
>> +}
>> +
>> +static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
>> +                                     int64_t sector_num, int nb_sectors,
>> +                                     QEMUIOVector *iov)
>> +{
>> +    nfsclient *client = bs->opaque;
>> +    struct NFSTask Task;
>> +
>> +    nfs_co_init_task(client, &Task);
>> +    Task.iov = iov;
>> +
>> +    if (nfs_pread_async(client->context, client->fh,
>> +                        sector_num * BDRV_SECTOR_SIZE,
>> +                        nb_sectors * BDRV_SECTOR_SIZE,
>> +                        nfs_co_generic_cb, &Task) != 0) {
>> +        return -EIO;
>> +    }
>> +
>> +    while (!Task.complete) {
>> +        nfs_set_events(client);
>> +        qemu_coroutine_yield();
>> +    }
>> +
>> +    if (Task.status != nb_sectors * BDRV_SECTOR_SIZE) {
>> +        return -EIO;
>
> In error case, does Task.status possibly contain error number other than -EIO? Would it be useful to return the value?
>
You are right. checked the libnfs code and the function nfsstat3_to_errno(int error) which wraps NFS errors to POSIX errors.
I will pass the error in case Task.status is < 0.

>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
>> +                                        int64_t sector_num, int nb_sectors,
>> +                                        QEMUIOVector *iov)
>> +{
>> +    nfsclient *client = bs->opaque;
>> +    struct NFSTask Task;
>> +    char *buf = NULL;
>> +
>> +    nfs_co_init_task(client, &Task);
>> +
>> +    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
>> +    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
>> +
>> +    if (nfs_pwrite_async(client->context, client->fh,
>> +                         sector_num * BDRV_SECTOR_SIZE,
>> +                         nb_sectors * BDRV_SECTOR_SIZE,
>> +                         buf, nfs_co_generic_cb, &Task) != 0) {
>> +        g_free(buf);
>> +        return -EIO;
>> +    }
>> +
>> +    while (!Task.complete) {
>> +        nfs_set_events(client);
>> +        qemu_coroutine_yield();
>> +    }
>> +
>> +    g_free(buf);
>> +
>> +    if (Task.status != nb_sectors * BDRV_SECTOR_SIZE) {
>> +        return -EIO;
>> +    }
>> +
>> +    bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
>> +    client->allocated_file_size = -ENOTSUP;
>
> Why does allocated_file_size become not supported after a write?
I thought that someone would ask this ;-) bdrv_allocated_file_size is only
used in image info. I saved some code here implementing an async call.
On open I fstat anyway and store that value. For qemu-img info this is
sufficient, but the allocated size likely changes after a write. -ENOTSUP
is the default if bdrv_allocated_file_size is not implemented.
>
>> +    return 0;
>> +}
>> +
>> +static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
>> +{
>> +    nfsclient *client = bs->opaque;
>> +    struct NFSTask Task;
>> +
>> +    nfs_co_init_task(client, &Task);
>> +
>> +    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb, &Task) != 0) {
>> +        return -EIO;
>> +    }
>> +
>> +    while (!Task.complete) {
>> +        nfs_set_events(client);
>> +        qemu_coroutine_yield();
>> +    }
>> +
>> +    if (Task.status != 0) {
>> +        return -EIO;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static QemuOptsList runtime_opts = {
>> +    .name = "nfs",
>> +    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
>> +    .desc = {
>> +        {
>> +            .name = "filename",
>> +            .type = QEMU_OPT_STRING,
>> +            .help = "URL to the NFS file",
>> +        },
>> +        { /* end of list */ }
>> +    },
>> +};
>> +
>> +static void nfs_file_close(BlockDriverState *bs)
>> +{
>> +    nfsclient *client = bs->opaque;
>> +    if (client->context) {
>> +        if (client->fh) {
>> +            nfs_close(client->context, client->fh);
>> +        }
>> +        qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
>> +        nfs_destroy_context(client->context);
>> +    }
>> +    memset(client, 0, sizeof(nfsclient));
>> +}
>> +
>> +
>> +static int nfs_file_open_common(BlockDriverState *bs, QDict *options, int flags,
>> +                         int open_flags, Error **errp)
>> +{
>> +    nfsclient *client = bs->opaque;
>> +    const char *filename;
>> +    int ret = 0;
>> +    QemuOpts *opts;
>> +    Error *local_err = NULL;
>> +    char *server = NULL, *path = NULL, *file = NULL, *strp;
>> +    struct stat st;
>> +
>> +    opts = qemu_opts_create_nofail(&runtime_opts);
>> +    qemu_opts_absorb_qdict(opts, options, &local_err);
>> +    if (error_is_set(&local_err)) {
>> +        qerror_report_err(local_err);
>> +        error_free(local_err);
>> +        ret = -EINVAL;
>> +        goto fail;
>> +    }
>> +
>> +    filename = qemu_opt_get(opts, "filename");
>> +
>> +    client->context = nfs_init_context();
>> +
>> +    if (client->context == NULL) {
>> +        error_setg(errp, "Failed to init NFS context");
>> +        ret = -EINVAL;
>> +        goto fail;
>> +    }
>> +
>> +    server = g_strdup(filename + 6);
>
> Please check the length of filename is longer than 6 before accessing filename[6].
Good point. I will check for this, but in fact I think it can't happen because we will
never end up there if filename does not start with nfs://
>
>> +    if (server[0] == '/' || server[0] == '\0') {
>> +        error_setg(errp, "Invalid server in URL");
>> +        ret = -EINVAL;
>> +        goto fail;
>> +    }
>> +    strp = strchr(server, '/');
>> +    if (strp == NULL) {
>> +        error_setg(errp, "Invalid URL specified.\n");
>> +        ret = -EINVAL;
>> +        goto fail;
>> +    }
>> +    path = g_strdup(strp);
>> +    *strp = 0;
>> +    strp = strrchr(path, '/');
>> +    if (strp == NULL) {
>> +        error_setg(errp, "Invalid URL specified.\n");
>> +        ret = -EINVAL;
>> +        goto fail;
>> +    }
>> +    file = g_strdup(strp);
>> +    *strp = 0;
>> +
>> +    if (nfs_mount(client->context, server, path) != 0) {
>> +        error_setg(errp, "Failed to mount nfs share: %s",
>> +                    nfs_get_error(client->context));
>> +        ret = -EINVAL;
>> +        goto fail;
>> +    }
>> +
>> +    if (open_flags & O_CREAT) {
>> +        if (nfs_creat(client->context, file, 0600, &client->fh) != 0) {
>> +            error_setg(errp, "Failed to create file: %s",
>> + nfs_get_error(client->context));
>> +            ret = -EINVAL;
>> +            goto fail;
>> +        }
>> +    } else {
>> +        open_flags = (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY;
>> +        if (nfs_open(client->context, file, open_flags, &client->fh) != 0) {
>> +            error_setg(errp, "Failed to open file : %s",
>> +                       nfs_get_error(client->context));
>> +            ret = -EINVAL;
>> +            goto fail;
>> +        }
>> +    }
>> +
>> +    if (nfs_fstat(client->context, client->fh, &st) != 0) {
>> +        error_setg(errp, "Failed to fstat file: %s",
>> +                   nfs_get_error(client->context));
>> +        ret = -EIO;
>> +        goto fail;
>> +    }
>> +
>> +    bs->total_sectors = st.st_size / BDRV_SECTOR_SIZE;
>
> Please use DIV_ROUND_UP(). Otherwise the remainder in last sector couldn't be read.
Will do. Can't it happen that we end up reading unallocated sectors?
>
>> +    client->has_zero_init = S_ISREG(st.st_mode);
>> +    client->allocated_file_size = st.st_blocks * st.st_blksize;
>> +    goto out;
>> +fail:
>> +    nfs_file_close(bs);
>> +out:
>> +    g_free(server);
>> +    g_free(path);
>> +    g_free(file);
>> +    return ret;
>> +}
>> +
>> +static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
>> +                         Error **errp) {
>> +    return nfs_file_open_common(bs, options, flags, 0, errp);
>> +}
>> +
>> +static int nfs_file_create(const char *filename, QEMUOptionParameter *options,
>> +                         Error **errp)
>> +{
>> +    int ret = 0;
>> +    int64_t total_size = 0;
>> +    BlockDriverState *bs;
>> +    nfsclient *client = NULL;
>> +    QDict *bs_options;
>> +
>> +    bs = bdrv_new("");
>> +
>> +    /* Read out options */
>> +    while (options && options->name) {
>> +        if (!strcmp(options->name, "size")) {
>> +            total_size = options->value.n / BDRV_SECTOR_SIZE;
>
> Why divide by BDRV_SECTOR_SIZE only to ...
copy and paste error.
>
>> +        }
>> +        options++;
>> +    }
>> +
>> +    bs->opaque = g_malloc0(sizeof(struct nfsclient));
>> +    client = bs->opaque;
>> +
>> +    bs_options = qdict_new();
>> +    qdict_put(bs_options, "filename", qstring_from_str(filename));
>> +    ret = nfs_file_open_common(bs, bs_options, 0, O_CREAT, NULL);
>> +    QDECREF(bs_options);
>> +    if (ret != 0) {
>> +        goto out;
>> +    }
>> +    ret = nfs_ftruncate(client->context, client->fh, total_size * BDRV_SECTOR_SIZE);
>
> ... multiply it back later?
>
>> +    if (ret != 0) {
>> +        ret = -ENOSPC;;
>
> There is an extra semicolon. And is it right to hard code ENOSPC here, without checking value of ret?
will return the real error wrapped by nfsstat3_to_errno().

>
>> +    }
>> +out:
>> +    nfs_file_close(bs);
>> +    g_free(bs->opaque);
>> +    bs->opaque = NULL;
>> +    bdrv_unref(bs);
>> +    return ret;
>> +}
>> +
>
> Fam
>
Thanks for reviewing!

One wish as I think you are the VMDK format maintainer. Can you rework vmdk_create and possible
other fucntions in VMDK to use only bdrv_* functions (like in qcow2_create). Currently its not possible to create
a VMDK on an NFS share directly caused by useing qemu_file_* calls.
This also affectecs other drivers. QCOW2 and RAW work perfectly.

Peter
Fam Zheng Dec. 17, 2013, 8:29 a.m. UTC | #5
On 2013年12月17日 15:53, Peter Lieven wrote:
> Hi Fam,
>
> On 17.12.2013 05:07, Fam Zheng wrote:
>> On 2013年12月16日 23:34, Peter Lieven wrote:
>>> +static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
>>> +                                        int64_t sector_num, int
>>> nb_sectors,
>>> +                                        QEMUIOVector *iov)
>>> +{
>>> +    nfsclient *client = bs->opaque;
>>> +    struct NFSTask Task;
>>> +    char *buf = NULL;
>>> +
>>> +    nfs_co_init_task(client, &Task);
>>> +
>>> +    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
>>> +    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
>>> +
>>> +    if (nfs_pwrite_async(client->context, client->fh,
>>> +                         sector_num * BDRV_SECTOR_SIZE,
>>> +                         nb_sectors * BDRV_SECTOR_SIZE,
>>> +                         buf, nfs_co_generic_cb, &Task) != 0) {
>>> +        g_free(buf);
>>> +        return -EIO;
>>> +    }
>>> +
>>> +    while (!Task.complete) {
>>> +        nfs_set_events(client);
>>> +        qemu_coroutine_yield();
>>> +    }
>>> +
>>> +    g_free(buf);
>>> +
>>> +    if (Task.status != nb_sectors * BDRV_SECTOR_SIZE) {
>>> +        return -EIO;
>>> +    }
>>> +
>>> +    bs->total_sectors = MAX(bs->total_sectors, sector_num +
>>> nb_sectors);
>>> +    client->allocated_file_size = -ENOTSUP;
>>
>> Why does allocated_file_size become not supported after a write?
> I thought that someone would ask this ;-) bdrv_allocated_file_size is only
> used in image info. I saved some code here implementing an async call.
> On open I fstat anyway and store that value. For qemu-img info this is
> sufficient, but the allocated size likely changes after a write. -ENOTSUP
> is the default if bdrv_allocated_file_size is not implemented.

OK. Please add some comment here.

>>> +static int nfs_file_open_common(BlockDriverState *bs, QDict
>>> *options, int flags,
>>> +                         int open_flags, Error **errp)
>>> +{
>>> +    nfsclient *client = bs->opaque;
>>> +    const char *filename;
>>> +    int ret = 0;
>>> +    QemuOpts *opts;
>>> +    Error *local_err = NULL;
>>> +    char *server = NULL, *path = NULL, *file = NULL, *strp;
>>> +    struct stat st;
>>> +
>>> +    opts = qemu_opts_create_nofail(&runtime_opts);
>>> +    qemu_opts_absorb_qdict(opts, options, &local_err);
>>> +    if (error_is_set(&local_err)) {
>>> +        qerror_report_err(local_err);
>>> +        error_free(local_err);
>>> +        ret = -EINVAL;
>>> +        goto fail;
>>> +    }
>>> +
>>> +    filename = qemu_opt_get(opts, "filename");
>>> +
>>> +    client->context = nfs_init_context();
>>> +
>>> +    if (client->context == NULL) {
>>> +        error_setg(errp, "Failed to init NFS context");
>>> +        ret = -EINVAL;
>>> +        goto fail;
>>> +    }
>>> +
>>> +    server = g_strdup(filename + 6);
>>
>> Please check the length of filename is longer than 6 before accessing
>> filename[6].
> Good point. I will check for this, but in fact I think it can't happen
> because we will
> never end up there if filename does not start with nfs://

True, at least for now, but it doesn't hurt to be defensive, especially 
with strings.

>>
>>> +    if (server[0] == '/' || server[0] == '\0') {
>>> +        error_setg(errp, "Invalid server in URL");
>>> +        ret = -EINVAL;
>>> +        goto fail;
>>> +    }
>>> +    strp = strchr(server, '/');
>>> +    if (strp == NULL) {
>>> +        error_setg(errp, "Invalid URL specified.\n");
>>> +        ret = -EINVAL;
>>> +        goto fail;
>>> +    }
>>> +    path = g_strdup(strp);
>>> +    *strp = 0;
>>> +    strp = strrchr(path, '/');
>>> +    if (strp == NULL) {
>>> +        error_setg(errp, "Invalid URL specified.\n");
>>> +        ret = -EINVAL;
>>> +        goto fail;
>>> +    }
>>> +    file = g_strdup(strp);
>>> +    *strp = 0;
>>> +
>>> +    if (nfs_mount(client->context, server, path) != 0) {
>>> +        error_setg(errp, "Failed to mount nfs share: %s",
>>> +                    nfs_get_error(client->context));
>>> +        ret = -EINVAL;
>>> +        goto fail;
>>> +    }
>>> +
>>> +    if (open_flags & O_CREAT) {
>>> +        if (nfs_creat(client->context, file, 0600, &client->fh) != 0) {
>>> +            error_setg(errp, "Failed to create file: %s",
>>> + nfs_get_error(client->context));
>>> +            ret = -EINVAL;
>>> +            goto fail;
>>> +        }
>>> +    } else {
>>> +        open_flags = (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY;
>>> +        if (nfs_open(client->context, file, open_flags, &client->fh)
>>> != 0) {
>>> +            error_setg(errp, "Failed to open file : %s",
>>> +                       nfs_get_error(client->context));
>>> +            ret = -EINVAL;
>>> +            goto fail;
>>> +        }
>>> +    }
>>> +
>>> +    if (nfs_fstat(client->context, client->fh, &st) != 0) {
>>> +        error_setg(errp, "Failed to fstat file: %s",
>>> +                   nfs_get_error(client->context));
>>> +        ret = -EIO;
>>> +        goto fail;
>>> +    }
>>> +
>>> +    bs->total_sectors = st.st_size / BDRV_SECTOR_SIZE;
>>
>> Please use DIV_ROUND_UP(). Otherwise the remainder in last sector
>> couldn't be read.
> Will do. Can't it happen that we end up reading unallocated sectors?

Hmm, maybe. Not missing last bytes of unaligned sector is essential for 
VMDK description file. But you are right, please add check code and make 
sure that we don't read beyond EOF as well.

> Thanks for reviewing!
>
> One wish as I think you are the VMDK format maintainer. Can you rework
> vmdk_create and possible
> other fucntions in VMDK to use only bdrv_* functions (like in
> qcow2_create). Currently its not possible to create
> a VMDK on an NFS share directly caused by useing qemu_file_* calls.
> This also affectecs other drivers. QCOW2 and RAW work perfectly.

Yes, this is a good request. I have this on my todo list, thanks for 
reminding.

Fam
Peter Lieven Dec. 17, 2013, 8:46 a.m. UTC | #6
On 17.12.2013 09:29, Fam Zheng wrote:
> On 2013年12月17日 15:53, Peter Lieven wrote:
>> Hi Fam,
>>
>> On 17.12.2013 05:07, Fam Zheng wrote:
>>> On 2013年12月16日 23:34, Peter Lieven wrote:
>>>> +static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
>>>> +                                        int64_t sector_num, int
>>>> nb_sectors,
>>>> +                                        QEMUIOVector *iov)
>>>> +{
>>>> +    nfsclient *client = bs->opaque;
>>>> +    struct NFSTask Task;
>>>> +    char *buf = NULL;
>>>> +
>>>> +    nfs_co_init_task(client, &Task);
>>>> +
>>>> +    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
>>>> +    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
>>>> +
>>>> +    if (nfs_pwrite_async(client->context, client->fh,
>>>> +                         sector_num * BDRV_SECTOR_SIZE,
>>>> +                         nb_sectors * BDRV_SECTOR_SIZE,
>>>> +                         buf, nfs_co_generic_cb, &Task) != 0) {
>>>> +        g_free(buf);
>>>> +        return -EIO;
>>>> +    }
>>>> +
>>>> +    while (!Task.complete) {
>>>> +        nfs_set_events(client);
>>>> +        qemu_coroutine_yield();
>>>> +    }
>>>> +
>>>> +    g_free(buf);
>>>> +
>>>> +    if (Task.status != nb_sectors * BDRV_SECTOR_SIZE) {
>>>> +        return -EIO;
>>>> +    }
>>>> +
>>>> +    bs->total_sectors = MAX(bs->total_sectors, sector_num +
>>>> nb_sectors);
>>>> +    client->allocated_file_size = -ENOTSUP;
>>>
>>> Why does allocated_file_size become not supported after a write?
>> I thought that someone would ask this ;-) bdrv_allocated_file_size is only
>> used in image info. I saved some code here implementing an async call.
>> On open I fstat anyway and store that value. For qemu-img info this is
>> sufficient, but the allocated size likely changes after a write. -ENOTSUP
>> is the default if bdrv_allocated_file_size is not implemented.
>
> OK. Please add some comment here.
>
>>>> +static int nfs_file_open_common(BlockDriverState *bs, QDict
>>>> *options, int flags,
>>>> +                         int open_flags, Error **errp)
>>>> +{
>>>> +    nfsclient *client = bs->opaque;
>>>> +    const char *filename;
>>>> +    int ret = 0;
>>>> +    QemuOpts *opts;
>>>> +    Error *local_err = NULL;
>>>> +    char *server = NULL, *path = NULL, *file = NULL, *strp;
>>>> +    struct stat st;
>>>> +
>>>> +    opts = qemu_opts_create_nofail(&runtime_opts);
>>>> +    qemu_opts_absorb_qdict(opts, options, &local_err);
>>>> +    if (error_is_set(&local_err)) {
>>>> +        qerror_report_err(local_err);
>>>> +        error_free(local_err);
>>>> +        ret = -EINVAL;
>>>> +        goto fail;
>>>> +    }
>>>> +
>>>> +    filename = qemu_opt_get(opts, "filename");
>>>> +
>>>> +    client->context = nfs_init_context();
>>>> +
>>>> +    if (client->context == NULL) {
>>>> +        error_setg(errp, "Failed to init NFS context");
>>>> +        ret = -EINVAL;
>>>> +        goto fail;
>>>> +    }
>>>> +
>>>> +    server = g_strdup(filename + 6);
>>>
>>> Please check the length of filename is longer than 6 before accessing
>>> filename[6].
>> Good point. I will check for this, but in fact I think it can't happen
>> because we will
>> never end up there if filename does not start with nfs://
>
> True, at least for now, but it doesn't hurt to be defensive, especially with strings.
>
>>>
>>>> +    if (server[0] == '/' || server[0] == '\0') {
>>>> +        error_setg(errp, "Invalid server in URL");
>>>> +        ret = -EINVAL;
>>>> +        goto fail;
>>>> +    }
>>>> +    strp = strchr(server, '/');
>>>> +    if (strp == NULL) {
>>>> +        error_setg(errp, "Invalid URL specified.\n");
>>>> +        ret = -EINVAL;
>>>> +        goto fail;
>>>> +    }
>>>> +    path = g_strdup(strp);
>>>> +    *strp = 0;
>>>> +    strp = strrchr(path, '/');
>>>> +    if (strp == NULL) {
>>>> +        error_setg(errp, "Invalid URL specified.\n");
>>>> +        ret = -EINVAL;
>>>> +        goto fail;
>>>> +    }
>>>> +    file = g_strdup(strp);
>>>> +    *strp = 0;
>>>> +
>>>> +    if (nfs_mount(client->context, server, path) != 0) {
>>>> +        error_setg(errp, "Failed to mount nfs share: %s",
>>>> +                    nfs_get_error(client->context));
>>>> +        ret = -EINVAL;
>>>> +        goto fail;
>>>> +    }
>>>> +
>>>> +    if (open_flags & O_CREAT) {
>>>> +        if (nfs_creat(client->context, file, 0600, &client->fh) != 0) {
>>>> +            error_setg(errp, "Failed to create file: %s",
>>>> + nfs_get_error(client->context));
>>>> +            ret = -EINVAL;
>>>> +            goto fail;
>>>> +        }
>>>> +    } else {
>>>> +        open_flags = (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY;
>>>> +        if (nfs_open(client->context, file, open_flags, &client->fh)
>>>> != 0) {
>>>> +            error_setg(errp, "Failed to open file : %s",
>>>> +                       nfs_get_error(client->context));
>>>> +            ret = -EINVAL;
>>>> +            goto fail;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    if (nfs_fstat(client->context, client->fh, &st) != 0) {
>>>> +        error_setg(errp, "Failed to fstat file: %s",
>>>> +                   nfs_get_error(client->context));
>>>> +        ret = -EIO;
>>>> +        goto fail;
>>>> +    }
>>>> +
>>>> +    bs->total_sectors = st.st_size / BDRV_SECTOR_SIZE;
>>>
>>> Please use DIV_ROUND_UP(). Otherwise the remainder in last sector
>>> couldn't be read.
>> Will do. Can't it happen that we end up reading unallocated sectors?
>
> Hmm, maybe. Not missing last bytes of unaligned sector is essential for VMDK description file. But you are right, please add check code and make sure that we don't read beyond EOF as well.
Actually it would like to keep bs->total_sectors = st.st_size / BDRV_SECTOR_SIZE; for now until I have checked how libnfs and the whole stack react on beyond EOF reads.

>> Thanks for reviewing!
>>
>> One wish as I think you are the VMDK format maintainer. Can you rework
>> vmdk_create and possible
>> other fucntions in VMDK to use only bdrv_* functions (like in
>> qcow2_create). Currently its not possible to create
>> a VMDK on an NFS share directly caused by useing qemu_file_* calls.
>> This also affectecs other drivers. QCOW2 and RAW work perfectly.
>
> Yes, this is a good request. I have this on my todo list, thanks for reminding.
>
> Fam
>

Peter
Fam Zheng Dec. 17, 2013, 8:51 a.m. UTC | #7
于2013年12月17日 星期二 16时46分50秒,Peter Lieven写到:
> On 17.12.2013 09:29, Fam Zheng wrote:
>> On 2013年12月17日 15:53, Peter Lieven wrote:
>>> Hi Fam,
>>>
>>> On 17.12.2013 05:07, Fam Zheng wrote:
>>>> On 2013年12月16日 23:34, Peter Lieven wrote:
>>>>> +static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
>>>>> +                                        int64_t sector_num, int
>>>>> nb_sectors,
>>>>> +                                        QEMUIOVector *iov)
>>>>> +{
>>>>> +    nfsclient *client = bs->opaque;
>>>>> +    struct NFSTask Task;
>>>>> +    char *buf = NULL;
>>>>> +
>>>>> +    nfs_co_init_task(client, &Task);
>>>>> +
>>>>> +    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
>>>>> +    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
>>>>> +
>>>>> +    if (nfs_pwrite_async(client->context, client->fh,
>>>>> +                         sector_num * BDRV_SECTOR_SIZE,
>>>>> +                         nb_sectors * BDRV_SECTOR_SIZE,
>>>>> +                         buf, nfs_co_generic_cb, &Task) != 0) {
>>>>> +        g_free(buf);
>>>>> +        return -EIO;
>>>>> +    }
>>>>> +
>>>>> +    while (!Task.complete) {
>>>>> +        nfs_set_events(client);
>>>>> +        qemu_coroutine_yield();
>>>>> +    }
>>>>> +
>>>>> +    g_free(buf);
>>>>> +
>>>>> +    if (Task.status != nb_sectors * BDRV_SECTOR_SIZE) {
>>>>> +        return -EIO;
>>>>> +    }
>>>>> +
>>>>> +    bs->total_sectors = MAX(bs->total_sectors, sector_num +
>>>>> nb_sectors);
>>>>> +    client->allocated_file_size = -ENOTSUP;
>>>>
>>>> Why does allocated_file_size become not supported after a write?
>>> I thought that someone would ask this ;-) bdrv_allocated_file_size
>>> is only
>>> used in image info. I saved some code here implementing an async call.
>>> On open I fstat anyway and store that value. For qemu-img info this is
>>> sufficient, but the allocated size likely changes after a write.
>>> -ENOTSUP
>>> is the default if bdrv_allocated_file_size is not implemented.
>>
>> OK. Please add some comment here.
>>
>>>>> +static int nfs_file_open_common(BlockDriverState *bs, QDict
>>>>> *options, int flags,
>>>>> +                         int open_flags, Error **errp)
>>>>> +{
>>>>> +    nfsclient *client = bs->opaque;
>>>>> +    const char *filename;
>>>>> +    int ret = 0;
>>>>> +    QemuOpts *opts;
>>>>> +    Error *local_err = NULL;
>>>>> +    char *server = NULL, *path = NULL, *file = NULL, *strp;
>>>>> +    struct stat st;
>>>>> +
>>>>> +    opts = qemu_opts_create_nofail(&runtime_opts);
>>>>> +    qemu_opts_absorb_qdict(opts, options, &local_err);
>>>>> +    if (error_is_set(&local_err)) {
>>>>> +        qerror_report_err(local_err);
>>>>> +        error_free(local_err);
>>>>> +        ret = -EINVAL;
>>>>> +        goto fail;
>>>>> +    }
>>>>> +
>>>>> +    filename = qemu_opt_get(opts, "filename");
>>>>> +
>>>>> +    client->context = nfs_init_context();
>>>>> +
>>>>> +    if (client->context == NULL) {
>>>>> +        error_setg(errp, "Failed to init NFS context");
>>>>> +        ret = -EINVAL;
>>>>> +        goto fail;
>>>>> +    }
>>>>> +
>>>>> +    server = g_strdup(filename + 6);
>>>>
>>>> Please check the length of filename is longer than 6 before accessing
>>>> filename[6].
>>> Good point. I will check for this, but in fact I think it can't happen
>>> because we will
>>> never end up there if filename does not start with nfs://
>>
>> True, at least for now, but it doesn't hurt to be defensive,
>> especially with strings.
>>
>>>>
>>>>> +    if (server[0] == '/' || server[0] == '\0') {
>>>>> +        error_setg(errp, "Invalid server in URL");
>>>>> +        ret = -EINVAL;
>>>>> +        goto fail;
>>>>> +    }
>>>>> +    strp = strchr(server, '/');
>>>>> +    if (strp == NULL) {
>>>>> +        error_setg(errp, "Invalid URL specified.\n");
>>>>> +        ret = -EINVAL;
>>>>> +        goto fail;
>>>>> +    }
>>>>> +    path = g_strdup(strp);
>>>>> +    *strp = 0;
>>>>> +    strp = strrchr(path, '/');
>>>>> +    if (strp == NULL) {
>>>>> +        error_setg(errp, "Invalid URL specified.\n");
>>>>> +        ret = -EINVAL;
>>>>> +        goto fail;
>>>>> +    }
>>>>> +    file = g_strdup(strp);
>>>>> +    *strp = 0;
>>>>> +
>>>>> +    if (nfs_mount(client->context, server, path) != 0) {
>>>>> +        error_setg(errp, "Failed to mount nfs share: %s",
>>>>> +                    nfs_get_error(client->context));
>>>>> +        ret = -EINVAL;
>>>>> +        goto fail;
>>>>> +    }
>>>>> +
>>>>> +    if (open_flags & O_CREAT) {
>>>>> +        if (nfs_creat(client->context, file, 0600, &client->fh)
>>>>> != 0) {
>>>>> +            error_setg(errp, "Failed to create file: %s",
>>>>> + nfs_get_error(client->context));
>>>>> +            ret = -EINVAL;
>>>>> +            goto fail;
>>>>> +        }
>>>>> +    } else {
>>>>> +        open_flags = (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY;
>>>>> +        if (nfs_open(client->context, file, open_flags, &client->fh)
>>>>> != 0) {
>>>>> +            error_setg(errp, "Failed to open file : %s",
>>>>> +                       nfs_get_error(client->context));
>>>>> +            ret = -EINVAL;
>>>>> +            goto fail;
>>>>> +        }
>>>>> +    }
>>>>> +
>>>>> +    if (nfs_fstat(client->context, client->fh, &st) != 0) {
>>>>> +        error_setg(errp, "Failed to fstat file: %s",
>>>>> +                   nfs_get_error(client->context));
>>>>> +        ret = -EIO;
>>>>> +        goto fail;
>>>>> +    }
>>>>> +
>>>>> +    bs->total_sectors = st.st_size / BDRV_SECTOR_SIZE;
>>>>
>>>> Please use DIV_ROUND_UP(). Otherwise the remainder in last sector
>>>> couldn't be read.
>>> Will do. Can't it happen that we end up reading unallocated sectors?
>>
>> Hmm, maybe. Not missing last bytes of unaligned sector is essential
>> for VMDK description file. But you are right, please add check code
>> and make sure that we don't read beyond EOF as well.
> Actually it would like to keep bs->total_sectors = st.st_size /
> BDRV_SECTOR_SIZE; for now until I have checked how libnfs and the
> whole stack react on beyond EOF reads.
>

You don't need to care about libnfs' EOF behavior, nfs_pread_async is 
in bytes granularity, you could just read the last partial sector till 
EOF and zero padding the buffer.

Fam
Peter Lieven Dec. 17, 2013, 8:55 a.m. UTC | #8
On 17.12.2013 09:51, Fam Zheng wrote:
> 于2013年 12月17日 星期二 16时46分50秒,Peter Lieven写到:
>> On 17.12.2013 09:29, Fam Zheng wrote:
>>> On 2013年12月17日 15:53, Peter Lieven wrote:
>>>> Hi Fam,
>>>>
>>>> On 17.12.2013 05:07, Fam Zheng wrote:
>>>>> On 2013年12月16日 23:34, Peter Lieven wrote:
>>>>>> +static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
>>>>>> +                                        int64_t sector_num, int
>>>>>> nb_sectors,
>>>>>> +                                        QEMUIOVector *iov)
>>>>>> +{
>>>>>> +    nfsclient *client = bs->opaque;
>>>>>> +    struct NFSTask Task;
>>>>>> +    char *buf = NULL;
>>>>>> +
>>>>>> +    nfs_co_init_task(client, &Task);
>>>>>> +
>>>>>> +    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
>>>>>> +    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
>>>>>> +
>>>>>> +    if (nfs_pwrite_async(client->context, client->fh,
>>>>>> +                         sector_num * BDRV_SECTOR_SIZE,
>>>>>> +                         nb_sectors * BDRV_SECTOR_SIZE,
>>>>>> +                         buf, nfs_co_generic_cb, &Task) != 0) {
>>>>>> +        g_free(buf);
>>>>>> +        return -EIO;
>>>>>> +    }
>>>>>> +
>>>>>> +    while (!Task.complete) {
>>>>>> +        nfs_set_events(client);
>>>>>> +        qemu_coroutine_yield();
>>>>>> +    }
>>>>>> +
>>>>>> +    g_free(buf);
>>>>>> +
>>>>>> +    if (Task.status != nb_sectors * BDRV_SECTOR_SIZE) {
>>>>>> +        return -EIO;
>>>>>> +    }
>>>>>> +
>>>>>> +    bs->total_sectors = MAX(bs->total_sectors, sector_num +
>>>>>> nb_sectors);
>>>>>> +    client->allocated_file_size = -ENOTSUP;
>>>>>
>>>>> Why does allocated_file_size become not supported after a write?
>>>> I thought that someone would ask this ;-) bdrv_allocated_file_size
>>>> is only
>>>> used in image info. I saved some code here implementing an async call.
>>>> On open I fstat anyway and store that value. For qemu-img info this is
>>>> sufficient, but the allocated size likely changes after a write.
>>>> -ENOTSUP
>>>> is the default if bdrv_allocated_file_size is not implemented.
>>>
>>> OK. Please add some comment here.
>>>
>>>>>> +static int nfs_file_open_common(BlockDriverState *bs, QDict
>>>>>> *options, int flags,
>>>>>> +                         int open_flags, Error **errp)
>>>>>> +{
>>>>>> +    nfsclient *client = bs->opaque;
>>>>>> +    const char *filename;
>>>>>> +    int ret = 0;
>>>>>> +    QemuOpts *opts;
>>>>>> +    Error *local_err = NULL;
>>>>>> +    char *server = NULL, *path = NULL, *file = NULL, *strp;
>>>>>> +    struct stat st;
>>>>>> +
>>>>>> +    opts = qemu_opts_create_nofail(&runtime_opts);
>>>>>> +    qemu_opts_absorb_qdict(opts, options, &local_err);
>>>>>> +    if (error_is_set(&local_err)) {
>>>>>> +        qerror_report_err(local_err);
>>>>>> +        error_free(local_err);
>>>>>> +        ret = -EINVAL;
>>>>>> +        goto fail;
>>>>>> +    }
>>>>>> +
>>>>>> +    filename = qemu_opt_get(opts, "filename");
>>>>>> +
>>>>>> +    client->context = nfs_init_context();
>>>>>> +
>>>>>> +    if (client->context == NULL) {
>>>>>> +        error_setg(errp, "Failed to init NFS context");
>>>>>> +        ret = -EINVAL;
>>>>>> +        goto fail;
>>>>>> +    }
>>>>>> +
>>>>>> +    server = g_strdup(filename + 6);
>>>>>
>>>>> Please check the length of filename is longer than 6 before accessing
>>>>> filename[6].
>>>> Good point. I will check for this, but in fact I think it can't happen
>>>> because we will
>>>> never end up there if filename does not start with nfs://
>>>
>>> True, at least for now, but it doesn't hurt to be defensive,
>>> especially with strings.
>>>
>>>>>
>>>>>> +    if (server[0] == '/' || server[0] == '\0') {
>>>>>> +        error_setg(errp, "Invalid server in URL");
>>>>>> +        ret = -EINVAL;
>>>>>> +        goto fail;
>>>>>> +    }
>>>>>> +    strp = strchr(server, '/');
>>>>>> +    if (strp == NULL) {
>>>>>> +        error_setg(errp, "Invalid URL specified.\n");
>>>>>> +        ret = -EINVAL;
>>>>>> +        goto fail;
>>>>>> +    }
>>>>>> +    path = g_strdup(strp);
>>>>>> +    *strp = 0;
>>>>>> +    strp = strrchr(path, '/');
>>>>>> +    if (strp == NULL) {
>>>>>> +        error_setg(errp, "Invalid URL specified.\n");
>>>>>> +        ret = -EINVAL;
>>>>>> +        goto fail;
>>>>>> +    }
>>>>>> +    file = g_strdup(strp);
>>>>>> +    *strp = 0;
>>>>>> +
>>>>>> +    if (nfs_mount(client->context, server, path) != 0) {
>>>>>> +        error_setg(errp, "Failed to mount nfs share: %s",
>>>>>> +                    nfs_get_error(client->context));
>>>>>> +        ret = -EINVAL;
>>>>>> +        goto fail;
>>>>>> +    }
>>>>>> +
>>>>>> +    if (open_flags & O_CREAT) {
>>>>>> +        if (nfs_creat(client->context, file, 0600, &client->fh)
>>>>>> != 0) {
>>>>>> +            error_setg(errp, "Failed to create file: %s",
>>>>>> + nfs_get_error(client->context));
>>>>>> +            ret = -EINVAL;
>>>>>> +            goto fail;
>>>>>> +        }
>>>>>> +    } else {
>>>>>> +        open_flags = (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY;
>>>>>> +        if (nfs_open(client->context, file, open_flags, &client->fh)
>>>>>> != 0) {
>>>>>> +            error_setg(errp, "Failed to open file : %s",
>>>>>> + nfs_get_error(client->context));
>>>>>> +            ret = -EINVAL;
>>>>>> +            goto fail;
>>>>>> +        }
>>>>>> +    }
>>>>>> +
>>>>>> +    if (nfs_fstat(client->context, client->fh, &st) != 0) {
>>>>>> +        error_setg(errp, "Failed to fstat file: %s",
>>>>>> +                   nfs_get_error(client->context));
>>>>>> +        ret = -EIO;
>>>>>> +        goto fail;
>>>>>> +    }
>>>>>> +
>>>>>> +    bs->total_sectors = st.st_size / BDRV_SECTOR_SIZE;
>>>>>
>>>>> Please use DIV_ROUND_UP(). Otherwise the remainder in last sector
>>>>> couldn't be read.
>>>> Will do. Can't it happen that we end up reading unallocated sectors?
>>>
>>> Hmm, maybe. Not missing last bytes of unaligned sector is essential
>>> for VMDK description file. But you are right, please add check code
>>> and make sure that we don't read beyond EOF as well.
>> Actually it would like to keep bs->total_sectors = st.st_size /
>> BDRV_SECTOR_SIZE; for now until I have checked how libnfs and the
>> whole stack react on beyond EOF reads.
>>
>
> You don't need to care about libnfs' EOF behavior, nfs_pread_async is in bytes granularity, you could just read the last partial sector till EOF and zero padding the buffer.
isn't this something that is already in bdrv_co_do_readv?

do you have an example of an VMDK with descriptor file for me. I would try the implementation then.

Peter
Fam Zheng Dec. 17, 2013, 9:01 a.m. UTC | #9
On 2013年12月17日 16:55, Peter Lieven wrote:
> On 17.12.2013 09:51, Fam Zheng wrote:
>> 于2013年 12月17日 星期二 16时46分50秒,Peter Lieven写到:
>>> On 17.12.2013 09:29, Fam Zheng wrote:
>>>> On 2013年12月17日 15:53, Peter Lieven wrote:
>>>>> Hi Fam,
>>>>>
>>>>> On 17.12.2013 05:07, Fam Zheng wrote:
>>>>>> On 2013年12月16日 23:34, Peter Lieven wrote:
>>>>>>> +static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
>>>>>>> +                                        int64_t sector_num, int
>>>>>>> nb_sectors,
>>>>>>> +                                        QEMUIOVector *iov)
>>>>>>> +{
>>>>>>> +    nfsclient *client = bs->opaque;
>>>>>>> +    struct NFSTask Task;
>>>>>>> +    char *buf = NULL;
>>>>>>> +
>>>>>>> +    nfs_co_init_task(client, &Task);
>>>>>>> +
>>>>>>> +    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
>>>>>>> +    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
>>>>>>> +
>>>>>>> +    if (nfs_pwrite_async(client->context, client->fh,
>>>>>>> +                         sector_num * BDRV_SECTOR_SIZE,
>>>>>>> +                         nb_sectors * BDRV_SECTOR_SIZE,
>>>>>>> +                         buf, nfs_co_generic_cb, &Task) != 0) {
>>>>>>> +        g_free(buf);
>>>>>>> +        return -EIO;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    while (!Task.complete) {
>>>>>>> +        nfs_set_events(client);
>>>>>>> +        qemu_coroutine_yield();
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    g_free(buf);
>>>>>>> +
>>>>>>> +    if (Task.status != nb_sectors * BDRV_SECTOR_SIZE) {
>>>>>>> +        return -EIO;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    bs->total_sectors = MAX(bs->total_sectors, sector_num +
>>>>>>> nb_sectors);
>>>>>>> +    client->allocated_file_size = -ENOTSUP;
>>>>>>
>>>>>> Why does allocated_file_size become not supported after a write?
>>>>> I thought that someone would ask this ;-) bdrv_allocated_file_size
>>>>> is only
>>>>> used in image info. I saved some code here implementing an async call.
>>>>> On open I fstat anyway and store that value. For qemu-img info this is
>>>>> sufficient, but the allocated size likely changes after a write.
>>>>> -ENOTSUP
>>>>> is the default if bdrv_allocated_file_size is not implemented.
>>>>
>>>> OK. Please add some comment here.
>>>>
>>>>>>> +static int nfs_file_open_common(BlockDriverState *bs, QDict
>>>>>>> *options, int flags,
>>>>>>> +                         int open_flags, Error **errp)
>>>>>>> +{
>>>>>>> +    nfsclient *client = bs->opaque;
>>>>>>> +    const char *filename;
>>>>>>> +    int ret = 0;
>>>>>>> +    QemuOpts *opts;
>>>>>>> +    Error *local_err = NULL;
>>>>>>> +    char *server = NULL, *path = NULL, *file = NULL, *strp;
>>>>>>> +    struct stat st;
>>>>>>> +
>>>>>>> +    opts = qemu_opts_create_nofail(&runtime_opts);
>>>>>>> +    qemu_opts_absorb_qdict(opts, options, &local_err);
>>>>>>> +    if (error_is_set(&local_err)) {
>>>>>>> +        qerror_report_err(local_err);
>>>>>>> +        error_free(local_err);
>>>>>>> +        ret = -EINVAL;
>>>>>>> +        goto fail;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    filename = qemu_opt_get(opts, "filename");
>>>>>>> +
>>>>>>> +    client->context = nfs_init_context();
>>>>>>> +
>>>>>>> +    if (client->context == NULL) {
>>>>>>> +        error_setg(errp, "Failed to init NFS context");
>>>>>>> +        ret = -EINVAL;
>>>>>>> +        goto fail;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    server = g_strdup(filename + 6);
>>>>>>
>>>>>> Please check the length of filename is longer than 6 before accessing
>>>>>> filename[6].
>>>>> Good point. I will check for this, but in fact I think it can't happen
>>>>> because we will
>>>>> never end up there if filename does not start with nfs://
>>>>
>>>> True, at least for now, but it doesn't hurt to be defensive,
>>>> especially with strings.
>>>>
>>>>>>
>>>>>>> +    if (server[0] == '/' || server[0] == '\0') {
>>>>>>> +        error_setg(errp, "Invalid server in URL");
>>>>>>> +        ret = -EINVAL;
>>>>>>> +        goto fail;
>>>>>>> +    }
>>>>>>> +    strp = strchr(server, '/');
>>>>>>> +    if (strp == NULL) {
>>>>>>> +        error_setg(errp, "Invalid URL specified.\n");
>>>>>>> +        ret = -EINVAL;
>>>>>>> +        goto fail;
>>>>>>> +    }
>>>>>>> +    path = g_strdup(strp);
>>>>>>> +    *strp = 0;
>>>>>>> +    strp = strrchr(path, '/');
>>>>>>> +    if (strp == NULL) {
>>>>>>> +        error_setg(errp, "Invalid URL specified.\n");
>>>>>>> +        ret = -EINVAL;
>>>>>>> +        goto fail;
>>>>>>> +    }
>>>>>>> +    file = g_strdup(strp);
>>>>>>> +    *strp = 0;
>>>>>>> +
>>>>>>> +    if (nfs_mount(client->context, server, path) != 0) {
>>>>>>> +        error_setg(errp, "Failed to mount nfs share: %s",
>>>>>>> +                    nfs_get_error(client->context));
>>>>>>> +        ret = -EINVAL;
>>>>>>> +        goto fail;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    if (open_flags & O_CREAT) {
>>>>>>> +        if (nfs_creat(client->context, file, 0600, &client->fh)
>>>>>>> != 0) {
>>>>>>> +            error_setg(errp, "Failed to create file: %s",
>>>>>>> + nfs_get_error(client->context));
>>>>>>> +            ret = -EINVAL;
>>>>>>> +            goto fail;
>>>>>>> +        }
>>>>>>> +    } else {
>>>>>>> +        open_flags = (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY;
>>>>>>> +        if (nfs_open(client->context, file, open_flags,
>>>>>>> &client->fh)
>>>>>>> != 0) {
>>>>>>> +            error_setg(errp, "Failed to open file : %s",
>>>>>>> + nfs_get_error(client->context));
>>>>>>> +            ret = -EINVAL;
>>>>>>> +            goto fail;
>>>>>>> +        }
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    if (nfs_fstat(client->context, client->fh, &st) != 0) {
>>>>>>> +        error_setg(errp, "Failed to fstat file: %s",
>>>>>>> +                   nfs_get_error(client->context));
>>>>>>> +        ret = -EIO;
>>>>>>> +        goto fail;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    bs->total_sectors = st.st_size / BDRV_SECTOR_SIZE;
>>>>>>
>>>>>> Please use DIV_ROUND_UP(). Otherwise the remainder in last sector
>>>>>> couldn't be read.
>>>>> Will do. Can't it happen that we end up reading unallocated sectors?
>>>>
>>>> Hmm, maybe. Not missing last bytes of unaligned sector is essential
>>>> for VMDK description file. But you are right, please add check code
>>>> and make sure that we don't read beyond EOF as well.
>>> Actually it would like to keep bs->total_sectors = st.st_size /
>>> BDRV_SECTOR_SIZE; for now until I have checked how libnfs and the
>>> whole stack react on beyond EOF reads.
>>>
>>
>> You don't need to care about libnfs' EOF behavior, nfs_pread_async is
>> in bytes granularity, you could just read the last partial sector till
>> EOF and zero padding the buffer.
> isn't this something that is already in bdrv_co_do_readv?
>
> do you have an example of an VMDK with descriptor file for me. I would
> try the implementation then.
>

You can create a VMDK with description file with:

> $ qemu-img create -f vmdk -o subformat=monolithicFlat foo.vmdk 1G
> Formatting 'foo.vmdk', fmt=vmdk size=1073741824 compat6=off subformat='monolithicFlat' zeroed_grain=off

> $ cat foo.vmdk
> # Disk DescriptorFile
> version=1
> CID=52b01245
> parentCID=ffffffff
> createType="monolithicFlat"
>
> # Extent description
> RW 2097152 FLAT "foo-flat.vmdk" 0
>
> # The Disk Data Base
> #DDB
>
> ddb.virtualHWVersion = "4"
> ddb.geometry.cylinders = "2080"
> ddb.geometry.heads = "16"
> ddb.geometry.sectors = "63"
> ddb.adapterType = "ide"

It's a 313 bytes text file which I assume might not work with nfs 
backend here. Please have a try.

Thanks :)

Fam
Peter Lieven Dec. 17, 2013, 9:07 a.m. UTC | #10
On 17.12.2013 10:01, Fam Zheng wrote:
> On 2013年12月17日 16:55, Peter Lieven wrote:
>> On 17.12.2013 09:51, Fam Zheng wrote:
>>> 于2013年 12月17日 星期二 16时46分50秒,Peter Lieven写到:
>>>> On 17.12.2013 09:29, Fam Zheng wrote:
>>>>> On 2013年12月17日 15:53, Peter Lieven wrote:
>>>>>> Hi Fam,
>>>>>>
>>>>>> On 17.12.2013 05:07, Fam Zheng wrote:
>>>>>>> On 2013年12月16日 23:34, Peter Lieven wrote:
>>>>>>>> +static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
>>>>>>>> +                                        int64_t sector_num, int
>>>>>>>> nb_sectors,
>>>>>>>> + QEMUIOVector *iov)
>>>>>>>> +{
>>>>>>>> +    nfsclient *client = bs->opaque;
>>>>>>>> +    struct NFSTask Task;
>>>>>>>> +    char *buf = NULL;
>>>>>>>> +
>>>>>>>> +    nfs_co_init_task(client, &Task);
>>>>>>>> +
>>>>>>>> +    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
>>>>>>>> +    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
>>>>>>>> +
>>>>>>>> +    if (nfs_pwrite_async(client->context, client->fh,
>>>>>>>> +                         sector_num * BDRV_SECTOR_SIZE,
>>>>>>>> +                         nb_sectors * BDRV_SECTOR_SIZE,
>>>>>>>> +                         buf, nfs_co_generic_cb, &Task) != 0) {
>>>>>>>> +        g_free(buf);
>>>>>>>> +        return -EIO;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    while (!Task.complete) {
>>>>>>>> +        nfs_set_events(client);
>>>>>>>> +        qemu_coroutine_yield();
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    g_free(buf);
>>>>>>>> +
>>>>>>>> +    if (Task.status != nb_sectors * BDRV_SECTOR_SIZE) {
>>>>>>>> +        return -EIO;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    bs->total_sectors = MAX(bs->total_sectors, sector_num +
>>>>>>>> nb_sectors);
>>>>>>>> +    client->allocated_file_size = -ENOTSUP;
>>>>>>>
>>>>>>> Why does allocated_file_size become not supported after a write?
>>>>>> I thought that someone would ask this ;-) bdrv_allocated_file_size
>>>>>> is only
>>>>>> used in image info. I saved some code here implementing an async call.
>>>>>> On open I fstat anyway and store that value. For qemu-img info this is
>>>>>> sufficient, but the allocated size likely changes after a write.
>>>>>> -ENOTSUP
>>>>>> is the default if bdrv_allocated_file_size is not implemented.
>>>>>
>>>>> OK. Please add some comment here.
>>>>>
>>>>>>>> +static int nfs_file_open_common(BlockDriverState *bs, QDict
>>>>>>>> *options, int flags,
>>>>>>>> +                         int open_flags, Error **errp)
>>>>>>>> +{
>>>>>>>> +    nfsclient *client = bs->opaque;
>>>>>>>> +    const char *filename;
>>>>>>>> +    int ret = 0;
>>>>>>>> +    QemuOpts *opts;
>>>>>>>> +    Error *local_err = NULL;
>>>>>>>> +    char *server = NULL, *path = NULL, *file = NULL, *strp;
>>>>>>>> +    struct stat st;
>>>>>>>> +
>>>>>>>> +    opts = qemu_opts_create_nofail(&runtime_opts);
>>>>>>>> +    qemu_opts_absorb_qdict(opts, options, &local_err);
>>>>>>>> +    if (error_is_set(&local_err)) {
>>>>>>>> +        qerror_report_err(local_err);
>>>>>>>> +        error_free(local_err);
>>>>>>>> +        ret = -EINVAL;
>>>>>>>> +        goto fail;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    filename = qemu_opt_get(opts, "filename");
>>>>>>>> +
>>>>>>>> +    client->context = nfs_init_context();
>>>>>>>> +
>>>>>>>> +    if (client->context == NULL) {
>>>>>>>> +        error_setg(errp, "Failed to init NFS context");
>>>>>>>> +        ret = -EINVAL;
>>>>>>>> +        goto fail;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    server = g_strdup(filename + 6);
>>>>>>>
>>>>>>> Please check the length of filename is longer than 6 before accessing
>>>>>>> filename[6].
>>>>>> Good point. I will check for this, but in fact I think it can't happen
>>>>>> because we will
>>>>>> never end up there if filename does not start with nfs://
>>>>>
>>>>> True, at least for now, but it doesn't hurt to be defensive,
>>>>> especially with strings.
>>>>>
>>>>>>>
>>>>>>>> +    if (server[0] == '/' || server[0] == '\0') {
>>>>>>>> +        error_setg(errp, "Invalid server in URL");
>>>>>>>> +        ret = -EINVAL;
>>>>>>>> +        goto fail;
>>>>>>>> +    }
>>>>>>>> +    strp = strchr(server, '/');
>>>>>>>> +    if (strp == NULL) {
>>>>>>>> +        error_setg(errp, "Invalid URL specified.\n");
>>>>>>>> +        ret = -EINVAL;
>>>>>>>> +        goto fail;
>>>>>>>> +    }
>>>>>>>> +    path = g_strdup(strp);
>>>>>>>> +    *strp = 0;
>>>>>>>> +    strp = strrchr(path, '/');
>>>>>>>> +    if (strp == NULL) {
>>>>>>>> +        error_setg(errp, "Invalid URL specified.\n");
>>>>>>>> +        ret = -EINVAL;
>>>>>>>> +        goto fail;
>>>>>>>> +    }
>>>>>>>> +    file = g_strdup(strp);
>>>>>>>> +    *strp = 0;
>>>>>>>> +
>>>>>>>> +    if (nfs_mount(client->context, server, path) != 0) {
>>>>>>>> +        error_setg(errp, "Failed to mount nfs share: %s",
>>>>>>>> + nfs_get_error(client->context));
>>>>>>>> +        ret = -EINVAL;
>>>>>>>> +        goto fail;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    if (open_flags & O_CREAT) {
>>>>>>>> +        if (nfs_creat(client->context, file, 0600, &client->fh)
>>>>>>>> != 0) {
>>>>>>>> +            error_setg(errp, "Failed to create file: %s",
>>>>>>>> + nfs_get_error(client->context));
>>>>>>>> +            ret = -EINVAL;
>>>>>>>> +            goto fail;
>>>>>>>> +        }
>>>>>>>> +    } else {
>>>>>>>> +        open_flags = (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY;
>>>>>>>> +        if (nfs_open(client->context, file, open_flags,
>>>>>>>> &client->fh)
>>>>>>>> != 0) {
>>>>>>>> +            error_setg(errp, "Failed to open file : %s",
>>>>>>>> + nfs_get_error(client->context));
>>>>>>>> +            ret = -EINVAL;
>>>>>>>> +            goto fail;
>>>>>>>> +        }
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    if (nfs_fstat(client->context, client->fh, &st) != 0) {
>>>>>>>> +        error_setg(errp, "Failed to fstat file: %s",
>>>>>>>> + nfs_get_error(client->context));
>>>>>>>> +        ret = -EIO;
>>>>>>>> +        goto fail;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    bs->total_sectors = st.st_size / BDRV_SECTOR_SIZE;
>>>>>>>
>>>>>>> Please use DIV_ROUND_UP(). Otherwise the remainder in last sector
>>>>>>> couldn't be read.
>>>>>> Will do. Can't it happen that we end up reading unallocated sectors?
>>>>>
>>>>> Hmm, maybe. Not missing last bytes of unaligned sector is essential
>>>>> for VMDK description file. But you are right, please add check code
>>>>> and make sure that we don't read beyond EOF as well.
>>>> Actually it would like to keep bs->total_sectors = st.st_size /
>>>> BDRV_SECTOR_SIZE; for now until I have checked how libnfs and the
>>>> whole stack react on beyond EOF reads.
>>>>
>>>
>>> You don't need to care about libnfs' EOF behavior, nfs_pread_async is
>>> in bytes granularity, you could just read the last partial sector till
>>> EOF and zero padding the buffer.
>> isn't this something that is already in bdrv_co_do_readv?
>>
>> do you have an example of an VMDK with descriptor file for me. I would
>> try the implementation then.
>>
>
> You can create a VMDK with description file with:
>
>> $ qemu-img create -f vmdk -o subformat=monolithicFlat foo.vmdk 1G
>> Formatting 'foo.vmdk', fmt=vmdk size=1073741824 compat6=off subformat='monolithicFlat' zeroed_grain=off
>
>> $ cat foo.vmdk
>> # Disk DescriptorFile
>> version=1
>> CID=52b01245
>> parentCID=ffffffff
>> createType="monolithicFlat"
>>
>> # Extent description
>> RW 2097152 FLAT "foo-flat.vmdk" 0
>>
>> # The Disk Data Base
>> #DDB
>>
>> ddb.virtualHWVersion = "4"
>> ddb.geometry.cylinders = "2080"
>> ddb.geometry.heads = "16"
>> ddb.geometry.sectors = "63"
>> ddb.adapterType = "ide"
>
> It's a 313 bytes text file which I assume might not work with nfs backend here. Please have a try.
>
> Thanks :)
It works better than expected. Just creating does not work for obvious reasons, but if I have those files on NFS it works *g*

image: nfs://172.21.200.61/vcore-dev-cdrom/foo.vmdk
file format: vmdk
virtual size: 1.0G (1073741824 bytes)
disk size: 1.0G
Format specific information:
     cid: 1387271102
     parent cid: 4294967295
     create type: monolithicFlat
     extents:
         [0]:
             virtual size: 1073741824
             filename: nfs://172.21.200.61/vcore-dev-cdrom/foo-flat.vmdk
             format: FLAT

You should prioritze the conversion of vmdk_create ;-)

Peter
Fam Zheng Dec. 17, 2013, 9:46 a.m. UTC | #11
On 2013年12月17日 17:07, Peter Lieven wrote:
> On 17.12.2013 10:01, Fam Zheng wrote:
>> On 2013年12月17日 16:55, Peter Lieven wrote:
>>> On 17.12.2013 09:51, Fam Zheng wrote:
>>>> 于2013年 12月17日 星期二 16时46分50秒,Peter Lieven写到:
>>>>> On 17.12.2013 09:29, Fam Zheng wrote:
>>>>>> On 2013年12月17日 15:53, Peter Lieven wrote:
>>>>>>> Hi Fam,
>>>>>>>
>>>>>>> On 17.12.2013 05:07, Fam Zheng wrote:
>>>>>>>> On 2013年12月16日 23:34, Peter Lieven wrote:
>>>>>>>>> +static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
>>>>>>>>> +                                        int64_t sector_num, int
>>>>>>>>> nb_sectors,
>>>>>>>>> + QEMUIOVector *iov)
>>>>>>>>> +{
>>>>>>>>> +    nfsclient *client = bs->opaque;
>>>>>>>>> +    struct NFSTask Task;
>>>>>>>>> +    char *buf = NULL;
>>>>>>>>> +
>>>>>>>>> +    nfs_co_init_task(client, &Task);
>>>>>>>>> +
>>>>>>>>> +    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
>>>>>>>>> +    qemu_iovec_to_buf(iov, 0, buf, nb_sectors *
>>>>>>>>> BDRV_SECTOR_SIZE);
>>>>>>>>> +
>>>>>>>>> +    if (nfs_pwrite_async(client->context, client->fh,
>>>>>>>>> +                         sector_num * BDRV_SECTOR_SIZE,
>>>>>>>>> +                         nb_sectors * BDRV_SECTOR_SIZE,
>>>>>>>>> +                         buf, nfs_co_generic_cb, &Task) != 0) {
>>>>>>>>> +        g_free(buf);
>>>>>>>>> +        return -EIO;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    while (!Task.complete) {
>>>>>>>>> +        nfs_set_events(client);
>>>>>>>>> +        qemu_coroutine_yield();
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    g_free(buf);
>>>>>>>>> +
>>>>>>>>> +    if (Task.status != nb_sectors * BDRV_SECTOR_SIZE) {
>>>>>>>>> +        return -EIO;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    bs->total_sectors = MAX(bs->total_sectors, sector_num +
>>>>>>>>> nb_sectors);
>>>>>>>>> +    client->allocated_file_size = -ENOTSUP;
>>>>>>>>
>>>>>>>> Why does allocated_file_size become not supported after a write?
>>>>>>> I thought that someone would ask this ;-) bdrv_allocated_file_size
>>>>>>> is only
>>>>>>> used in image info. I saved some code here implementing an async
>>>>>>> call.
>>>>>>> On open I fstat anyway and store that value. For qemu-img info
>>>>>>> this is
>>>>>>> sufficient, but the allocated size likely changes after a write.
>>>>>>> -ENOTSUP
>>>>>>> is the default if bdrv_allocated_file_size is not implemented.
>>>>>>
>>>>>> OK. Please add some comment here.
>>>>>>
>>>>>>>>> +static int nfs_file_open_common(BlockDriverState *bs, QDict
>>>>>>>>> *options, int flags,
>>>>>>>>> +                         int open_flags, Error **errp)
>>>>>>>>> +{
>>>>>>>>> +    nfsclient *client = bs->opaque;
>>>>>>>>> +    const char *filename;
>>>>>>>>> +    int ret = 0;
>>>>>>>>> +    QemuOpts *opts;
>>>>>>>>> +    Error *local_err = NULL;
>>>>>>>>> +    char *server = NULL, *path = NULL, *file = NULL, *strp;
>>>>>>>>> +    struct stat st;
>>>>>>>>> +
>>>>>>>>> +    opts = qemu_opts_create_nofail(&runtime_opts);
>>>>>>>>> +    qemu_opts_absorb_qdict(opts, options, &local_err);
>>>>>>>>> +    if (error_is_set(&local_err)) {
>>>>>>>>> +        qerror_report_err(local_err);
>>>>>>>>> +        error_free(local_err);
>>>>>>>>> +        ret = -EINVAL;
>>>>>>>>> +        goto fail;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    filename = qemu_opt_get(opts, "filename");
>>>>>>>>> +
>>>>>>>>> +    client->context = nfs_init_context();
>>>>>>>>> +
>>>>>>>>> +    if (client->context == NULL) {
>>>>>>>>> +        error_setg(errp, "Failed to init NFS context");
>>>>>>>>> +        ret = -EINVAL;
>>>>>>>>> +        goto fail;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    server = g_strdup(filename + 6);
>>>>>>>>
>>>>>>>> Please check the length of filename is longer than 6 before
>>>>>>>> accessing
>>>>>>>> filename[6].
>>>>>>> Good point. I will check for this, but in fact I think it can't
>>>>>>> happen
>>>>>>> because we will
>>>>>>> never end up there if filename does not start with nfs://
>>>>>>
>>>>>> True, at least for now, but it doesn't hurt to be defensive,
>>>>>> especially with strings.
>>>>>>
>>>>>>>>
>>>>>>>>> +    if (server[0] == '/' || server[0] == '\0') {
>>>>>>>>> +        error_setg(errp, "Invalid server in URL");
>>>>>>>>> +        ret = -EINVAL;
>>>>>>>>> +        goto fail;
>>>>>>>>> +    }
>>>>>>>>> +    strp = strchr(server, '/');
>>>>>>>>> +    if (strp == NULL) {
>>>>>>>>> +        error_setg(errp, "Invalid URL specified.\n");
>>>>>>>>> +        ret = -EINVAL;
>>>>>>>>> +        goto fail;
>>>>>>>>> +    }
>>>>>>>>> +    path = g_strdup(strp);
>>>>>>>>> +    *strp = 0;
>>>>>>>>> +    strp = strrchr(path, '/');
>>>>>>>>> +    if (strp == NULL) {
>>>>>>>>> +        error_setg(errp, "Invalid URL specified.\n");
>>>>>>>>> +        ret = -EINVAL;
>>>>>>>>> +        goto fail;
>>>>>>>>> +    }
>>>>>>>>> +    file = g_strdup(strp);
>>>>>>>>> +    *strp = 0;
>>>>>>>>> +
>>>>>>>>> +    if (nfs_mount(client->context, server, path) != 0) {
>>>>>>>>> +        error_setg(errp, "Failed to mount nfs share: %s",
>>>>>>>>> + nfs_get_error(client->context));
>>>>>>>>> +        ret = -EINVAL;
>>>>>>>>> +        goto fail;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    if (open_flags & O_CREAT) {
>>>>>>>>> +        if (nfs_creat(client->context, file, 0600, &client->fh)
>>>>>>>>> != 0) {
>>>>>>>>> +            error_setg(errp, "Failed to create file: %s",
>>>>>>>>> + nfs_get_error(client->context));
>>>>>>>>> +            ret = -EINVAL;
>>>>>>>>> +            goto fail;
>>>>>>>>> +        }
>>>>>>>>> +    } else {
>>>>>>>>> +        open_flags = (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY;
>>>>>>>>> +        if (nfs_open(client->context, file, open_flags,
>>>>>>>>> &client->fh)
>>>>>>>>> != 0) {
>>>>>>>>> +            error_setg(errp, "Failed to open file : %s",
>>>>>>>>> + nfs_get_error(client->context));
>>>>>>>>> +            ret = -EINVAL;
>>>>>>>>> +            goto fail;
>>>>>>>>> +        }
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    if (nfs_fstat(client->context, client->fh, &st) != 0) {
>>>>>>>>> +        error_setg(errp, "Failed to fstat file: %s",
>>>>>>>>> + nfs_get_error(client->context));
>>>>>>>>> +        ret = -EIO;
>>>>>>>>> +        goto fail;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    bs->total_sectors = st.st_size / BDRV_SECTOR_SIZE;
>>>>>>>>
>>>>>>>> Please use DIV_ROUND_UP(). Otherwise the remainder in last sector
>>>>>>>> couldn't be read.
>>>>>>> Will do. Can't it happen that we end up reading unallocated sectors?
>>>>>>
>>>>>> Hmm, maybe. Not missing last bytes of unaligned sector is essential
>>>>>> for VMDK description file. But you are right, please add check code
>>>>>> and make sure that we don't read beyond EOF as well.
>>>>> Actually it would like to keep bs->total_sectors = st.st_size /
>>>>> BDRV_SECTOR_SIZE; for now until I have checked how libnfs and the
>>>>> whole stack react on beyond EOF reads.
>>>>>
>>>>
>>>> You don't need to care about libnfs' EOF behavior, nfs_pread_async is
>>>> in bytes granularity, you could just read the last partial sector till
>>>> EOF and zero padding the buffer.
>>> isn't this something that is already in bdrv_co_do_readv?
>>>
>>> do you have an example of an VMDK with descriptor file for me. I would
>>> try the implementation then.
>>>
>>
>> You can create a VMDK with description file with:
>>
>>> $ qemu-img create -f vmdk -o subformat=monolithicFlat foo.vmdk 1G
>>> Formatting 'foo.vmdk', fmt=vmdk size=1073741824 compat6=off
>>> subformat='monolithicFlat' zeroed_grain=off
>>
>>> $ cat foo.vmdk
>>> # Disk DescriptorFile
>>> version=1
>>> CID=52b01245
>>> parentCID=ffffffff
>>> createType="monolithicFlat"
>>>
>>> # Extent description
>>> RW 2097152 FLAT "foo-flat.vmdk" 0
>>>
>>> # The Disk Data Base
>>> #DDB
>>>
>>> ddb.virtualHWVersion = "4"
>>> ddb.geometry.cylinders = "2080"
>>> ddb.geometry.heads = "16"
>>> ddb.geometry.sectors = "63"
>>> ddb.adapterType = "ide"
>>
>> It's a 313 bytes text file which I assume might not work with nfs
>> backend here. Please have a try.
>>
>> Thanks :)
> It works better than expected. Just creating does not work for obvious
> reasons, but if I have those files on NFS it works *g*
>
> image: nfs://172.21.200.61/vcore-dev-cdrom/foo.vmdk
> file format: vmdk
> virtual size: 1.0G (1073741824 bytes)
> disk size: 1.0G
> Format specific information:
>      cid: 1387271102
>      parent cid: 4294967295
>      create type: monolithicFlat
>      extents:
>          [0]:
>              virtual size: 1073741824
>              filename: nfs://172.21.200.61/vcore-dev-cdrom/foo-flat.vmdk
>              format: FLAT
>

Without DIV_ROUND_UP? That would be conditional on zero_beyond_eof. I 
tried this as well, it's not working if total_sectors is not rounded up:

> $ sudo ./qemu-img info nfs://127.0.0.1/stor/vmdk/foo.vmdk
> image: nfs://127.0.0.1/stor/vmdk/foo.vmdk
> file format: raw
> virtual size: 0 (0 bytes)
> disk size: 0

Fam
Peter Lieven Dec. 17, 2013, 10:31 a.m. UTC | #12
On 17.12.2013 10:46, Fam Zheng wrote:
> On 2013年12月17日 17:07, Peter Lieven wrote:
>> On 17.12.2013 10:01, Fam Zheng wrote:
>>> On 2013年12月17日 16:55, Peter Lieven wrote:
>>>> On 17.12.2013 09:51, Fam Zheng wrote:
>>>>> 于2013年 12月17日 星期二 16时46分50秒,Peter Lieven写到:
>>>>>> On 17.12.2013 09:29, Fam Zheng wrote:
>>>>>>> On 2013年12月17日 15:53, Peter Lieven wrote:
>>>>>>>> Hi Fam,
>>>>>>>>
>>>>>>>> On 17.12.2013 05:07, Fam Zheng wrote:
>>>>>>>>> On 2013年12月16日 23:34, Peter Lieven wrote:
>>>>>>>>>> +static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
>>>>>>>>>> +                                        int64_t sector_num, int
>>>>>>>>>> nb_sectors,
>>>>>>>>>> + QEMUIOVector *iov)
>>>>>>>>>> +{
>>>>>>>>>> +    nfsclient *client = bs->opaque;
>>>>>>>>>> +    struct NFSTask Task;
>>>>>>>>>> +    char *buf = NULL;
>>>>>>>>>> +
>>>>>>>>>> +    nfs_co_init_task(client, &Task);
>>>>>>>>>> +
>>>>>>>>>> +    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
>>>>>>>>>> +    qemu_iovec_to_buf(iov, 0, buf, nb_sectors *
>>>>>>>>>> BDRV_SECTOR_SIZE);
>>>>>>>>>> +
>>>>>>>>>> +    if (nfs_pwrite_async(client->context, client->fh,
>>>>>>>>>> +                         sector_num * BDRV_SECTOR_SIZE,
>>>>>>>>>> +                         nb_sectors * BDRV_SECTOR_SIZE,
>>>>>>>>>> +                         buf, nfs_co_generic_cb, &Task) != 0) {
>>>>>>>>>> +        g_free(buf);
>>>>>>>>>> +        return -EIO;
>>>>>>>>>> +    }
>>>>>>>>>> +
>>>>>>>>>> +    while (!Task.complete) {
>>>>>>>>>> +        nfs_set_events(client);
>>>>>>>>>> +        qemu_coroutine_yield();
>>>>>>>>>> +    }
>>>>>>>>>> +
>>>>>>>>>> +    g_free(buf);
>>>>>>>>>> +
>>>>>>>>>> +    if (Task.status != nb_sectors * BDRV_SECTOR_SIZE) {
>>>>>>>>>> +        return -EIO;
>>>>>>>>>> +    }
>>>>>>>>>> +
>>>>>>>>>> +    bs->total_sectors = MAX(bs->total_sectors, sector_num +
>>>>>>>>>> nb_sectors);
>>>>>>>>>> +    client->allocated_file_size = -ENOTSUP;
>>>>>>>>>
>>>>>>>>> Why does allocated_file_size become not supported after a write?
>>>>>>>> I thought that someone would ask this ;-) bdrv_allocated_file_size
>>>>>>>> is only
>>>>>>>> used in image info. I saved some code here implementing an async
>>>>>>>> call.
>>>>>>>> On open I fstat anyway and store that value. For qemu-img info
>>>>>>>> this is
>>>>>>>> sufficient, but the allocated size likely changes after a write.
>>>>>>>> -ENOTSUP
>>>>>>>> is the default if bdrv_allocated_file_size is not implemented.
>>>>>>>
>>>>>>> OK. Please add some comment here.
>>>>>>>
>>>>>>>>>> +static int nfs_file_open_common(BlockDriverState *bs, QDict
>>>>>>>>>> *options, int flags,
>>>>>>>>>> +                         int open_flags, Error **errp)
>>>>>>>>>> +{
>>>>>>>>>> +    nfsclient *client = bs->opaque;
>>>>>>>>>> +    const char *filename;
>>>>>>>>>> +    int ret = 0;
>>>>>>>>>> +    QemuOpts *opts;
>>>>>>>>>> +    Error *local_err = NULL;
>>>>>>>>>> +    char *server = NULL, *path = NULL, *file = NULL, *strp;
>>>>>>>>>> +    struct stat st;
>>>>>>>>>> +
>>>>>>>>>> +    opts = qemu_opts_create_nofail(&runtime_opts);
>>>>>>>>>> +    qemu_opts_absorb_qdict(opts, options, &local_err);
>>>>>>>>>> +    if (error_is_set(&local_err)) {
>>>>>>>>>> +        qerror_report_err(local_err);
>>>>>>>>>> +        error_free(local_err);
>>>>>>>>>> +        ret = -EINVAL;
>>>>>>>>>> +        goto fail;
>>>>>>>>>> +    }
>>>>>>>>>> +
>>>>>>>>>> +    filename = qemu_opt_get(opts, "filename");
>>>>>>>>>> +
>>>>>>>>>> +    client->context = nfs_init_context();
>>>>>>>>>> +
>>>>>>>>>> +    if (client->context == NULL) {
>>>>>>>>>> +        error_setg(errp, "Failed to init NFS context");
>>>>>>>>>> +        ret = -EINVAL;
>>>>>>>>>> +        goto fail;
>>>>>>>>>> +    }
>>>>>>>>>> +
>>>>>>>>>> +    server = g_strdup(filename + 6);
>>>>>>>>>
>>>>>>>>> Please check the length of filename is longer than 6 before
>>>>>>>>> accessing
>>>>>>>>> filename[6].
>>>>>>>> Good point. I will check for this, but in fact I think it can't
>>>>>>>> happen
>>>>>>>> because we will
>>>>>>>> never end up there if filename does not start with nfs://
>>>>>>>
>>>>>>> True, at least for now, but it doesn't hurt to be defensive,
>>>>>>> especially with strings.
>>>>>>>
>>>>>>>>>
>>>>>>>>>> +    if (server[0] == '/' || server[0] == '\0') {
>>>>>>>>>> +        error_setg(errp, "Invalid server in URL");
>>>>>>>>>> +        ret = -EINVAL;
>>>>>>>>>> +        goto fail;
>>>>>>>>>> +    }
>>>>>>>>>> +    strp = strchr(server, '/');
>>>>>>>>>> +    if (strp == NULL) {
>>>>>>>>>> +        error_setg(errp, "Invalid URL specified.\n");
>>>>>>>>>> +        ret = -EINVAL;
>>>>>>>>>> +        goto fail;
>>>>>>>>>> +    }
>>>>>>>>>> +    path = g_strdup(strp);
>>>>>>>>>> +    *strp = 0;
>>>>>>>>>> +    strp = strrchr(path, '/');
>>>>>>>>>> +    if (strp == NULL) {
>>>>>>>>>> +        error_setg(errp, "Invalid URL specified.\n");
>>>>>>>>>> +        ret = -EINVAL;
>>>>>>>>>> +        goto fail;
>>>>>>>>>> +    }
>>>>>>>>>> +    file = g_strdup(strp);
>>>>>>>>>> +    *strp = 0;
>>>>>>>>>> +
>>>>>>>>>> +    if (nfs_mount(client->context, server, path) != 0) {
>>>>>>>>>> +        error_setg(errp, "Failed to mount nfs share: %s",
>>>>>>>>>> + nfs_get_error(client->context));
>>>>>>>>>> +        ret = -EINVAL;
>>>>>>>>>> +        goto fail;
>>>>>>>>>> +    }
>>>>>>>>>> +
>>>>>>>>>> +    if (open_flags & O_CREAT) {
>>>>>>>>>> +        if (nfs_creat(client->context, file, 0600, &client->fh)
>>>>>>>>>> != 0) {
>>>>>>>>>> +            error_setg(errp, "Failed to create file: %s",
>>>>>>>>>> + nfs_get_error(client->context));
>>>>>>>>>> +            ret = -EINVAL;
>>>>>>>>>> +            goto fail;
>>>>>>>>>> +        }
>>>>>>>>>> +    } else {
>>>>>>>>>> +        open_flags = (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY;
>>>>>>>>>> +        if (nfs_open(client->context, file, open_flags,
>>>>>>>>>> &client->fh)
>>>>>>>>>> != 0) {
>>>>>>>>>> +            error_setg(errp, "Failed to open file : %s",
>>>>>>>>>> + nfs_get_error(client->context));
>>>>>>>>>> +            ret = -EINVAL;
>>>>>>>>>> +            goto fail;
>>>>>>>>>> +        }
>>>>>>>>>> +    }
>>>>>>>>>> +
>>>>>>>>>> +    if (nfs_fstat(client->context, client->fh, &st) != 0) {
>>>>>>>>>> +        error_setg(errp, "Failed to fstat file: %s",
>>>>>>>>>> + nfs_get_error(client->context));
>>>>>>>>>> +        ret = -EIO;
>>>>>>>>>> +        goto fail;
>>>>>>>>>> +    }
>>>>>>>>>> +
>>>>>>>>>> +    bs->total_sectors = st.st_size / BDRV_SECTOR_SIZE;
>>>>>>>>>
>>>>>>>>> Please use DIV_ROUND_UP(). Otherwise the remainder in last sector
>>>>>>>>> couldn't be read.
>>>>>>>> Will do. Can't it happen that we end up reading unallocated sectors?
>>>>>>>
>>>>>>> Hmm, maybe. Not missing last bytes of unaligned sector is essential
>>>>>>> for VMDK description file. But you are right, please add check code
>>>>>>> and make sure that we don't read beyond EOF as well.
>>>>>> Actually it would like to keep bs->total_sectors = st.st_size /
>>>>>> BDRV_SECTOR_SIZE; for now until I have checked how libnfs and the
>>>>>> whole stack react on beyond EOF reads.
>>>>>>
>>>>>
>>>>> You don't need to care about libnfs' EOF behavior, nfs_pread_async is
>>>>> in bytes granularity, you could just read the last partial sector till
>>>>> EOF and zero padding the buffer.
>>>> isn't this something that is already in bdrv_co_do_readv?
>>>>
>>>> do you have an example of an VMDK with descriptor file for me. I would
>>>> try the implementation then.
>>>>
>>>
>>> You can create a VMDK with description file with:
>>>
>>>> $ qemu-img create -f vmdk -o subformat=monolithicFlat foo.vmdk 1G
>>>> Formatting 'foo.vmdk', fmt=vmdk size=1073741824 compat6=off
>>>> subformat='monolithicFlat' zeroed_grain=off
>>>
>>>> $ cat foo.vmdk
>>>> # Disk DescriptorFile
>>>> version=1
>>>> CID=52b01245
>>>> parentCID=ffffffff
>>>> createType="monolithicFlat"
>>>>
>>>> # Extent description
>>>> RW 2097152 FLAT "foo-flat.vmdk" 0
>>>>
>>>> # The Disk Data Base
>>>> #DDB
>>>>
>>>> ddb.virtualHWVersion = "4"
>>>> ddb.geometry.cylinders = "2080"
>>>> ddb.geometry.heads = "16"
>>>> ddb.geometry.sectors = "63"
>>>> ddb.adapterType = "ide"
>>>
>>> It's a 313 bytes text file which I assume might not work with nfs
>>> backend here. Please have a try.
>>>
>>> Thanks :)
>> It works better than expected. Just creating does not work for obvious
>> reasons, but if I have those files on NFS it works *g*
>>
>> image: nfs://172.21.200.61/vcore-dev-cdrom/foo.vmdk
>> file format: vmdk
>> virtual size: 1.0G (1073741824 bytes)
>> disk size: 1.0G
>> Format specific information:
>>      cid: 1387271102
>>      parent cid: 4294967295
>>      create type: monolithicFlat
>>      extents:
>>          [0]:
>>              virtual size: 1073741824
>>              filename: nfs://172.21.200.61/vcore-dev-cdrom/foo-flat.vmdk
>>              format: FLAT
>>
>
> Without DIV_ROUND_UP? That would be conditional on zero_beyond_eof. I tried this as well, it's not working if total_sectors is not rounded up:
no, with round up. I've just sent v2 of the patch.

Peter
diff mbox

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index c19133f..f53d184 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -899,6 +899,11 @@  M: Peter Lieven <pl@kamp.de>
 S: Supported
 F: block/iscsi.c
 
+NFS
+M: Peter Lieven <pl@kamp.de>
+S: Maintained
+F: block/nfs.c
+
 SSH
 M: Richard W.M. Jones <rjones@redhat.com>
 S: Supported
diff --git a/block/Makefile.objs b/block/Makefile.objs
index f43ecbc..1bac94e 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -12,6 +12,7 @@  block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 ifeq ($(CONFIG_POSIX),y)
 block-obj-y += nbd.o sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
+block-obj-$(CONFIG_LIBISCSI) += nfs.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
diff --git a/block/nfs.c b/block/nfs.c
new file mode 100644
index 0000000..d6cb4c0
--- /dev/null
+++ b/block/nfs.c
@@ -0,0 +1,420 @@ 
+/*
+ * QEMU Block driver for native access to files on NFS shares
+ *
+ * Copyright (c) 2013 Peter Lieven <pl@kamp.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "config-host.h"
+
+#include <poll.h>
+#include <arpa/inet.h>
+#include "qemu-common.h"
+#include "qemu/config-file.h"
+#include "qemu/error-report.h"
+#include "block/block_int.h"
+#include "trace.h"
+#include "block/scsi.h"
+#include "qemu/iov.h"
+#include "sysemu/sysemu.h"
+#include "qmp-commands.h"
+
+#include <nfsc/libnfs-zdr.h>
+#include <nfsc/libnfs.h>
+#include <nfsc/libnfs-raw.h>
+#include <nfsc/libnfs-raw-mount.h>
+
+typedef struct nfsclient {
+    struct nfs_context *context;
+    struct nfsfh *fh;
+    int events;
+    bool has_zero_init;
+    int64_t allocated_file_size;
+    QEMUBH *close_bh;
+} nfsclient;
+
+typedef struct NFSTask {
+    int status;
+    int complete;
+    QEMUIOVector *iov;
+    Coroutine *co;
+    QEMUBH *bh;
+} NFSTask;
+
+static void nfs_process_read(void *arg);
+static void nfs_process_write(void *arg);
+
+static void nfs_set_events(nfsclient *client)
+{
+    int ev;
+    /* We always register a read handler.  */
+    ev = POLLIN;
+    ev |= nfs_which_events(client->context);
+    if (ev != client->events) {
+        qemu_aio_set_fd_handler(nfs_get_fd(client->context),
+                      nfs_process_read,
+                      (ev & POLLOUT) ? nfs_process_write : NULL,
+                      client);
+
+    }
+    client->events = ev;
+}
+
+static void nfs_process_read(void *arg)
+{
+    nfsclient *client = arg;
+    nfs_service(client->context, POLLIN);
+    nfs_set_events(client);
+}
+
+static void nfs_process_write(void *arg)
+{
+    nfsclient *client = arg;
+    nfs_service(client->context, POLLOUT);
+    nfs_set_events(client);
+}
+
+static void nfs_co_init_task(nfsclient *client, NFSTask *Task)
+{
+    *Task = (NFSTask) {
+        .co         = qemu_coroutine_self(),
+    };
+}
+
+static void nfs_co_generic_bh_cb(void *opaque)
+{
+    NFSTask *Task = opaque;
+    qemu_bh_delete(Task->bh);
+    qemu_coroutine_enter(Task->co, NULL);
+}
+
+static void nfs_co_generic_cb(int status, struct nfs_context *nfs, void *data, void *private_data)
+{
+    NFSTask *Task = private_data;
+    Task->complete = 1;
+    Task->status = status;
+    if (Task->status > 0 && Task->iov) {
+        if (Task->status == Task->iov->size) {
+            qemu_iovec_from_buf(Task->iov, 0, data, status);
+        } else {
+            Task->status = -1;
+        }
+    }
+    if (Task->co) {
+        Task->bh = qemu_bh_new(nfs_co_generic_bh_cb, Task);
+        qemu_bh_schedule(Task->bh);
+    }
+}
+
+static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
+                                     int64_t sector_num, int nb_sectors,
+                                     QEMUIOVector *iov)
+{
+    nfsclient *client = bs->opaque;
+    struct NFSTask Task;
+
+    nfs_co_init_task(client, &Task);
+    Task.iov = iov;
+
+    if (nfs_pread_async(client->context, client->fh,
+                        sector_num * BDRV_SECTOR_SIZE,
+                        nb_sectors * BDRV_SECTOR_SIZE,
+                        nfs_co_generic_cb, &Task) != 0) {
+        return -EIO;
+    }
+
+    while (!Task.complete) {
+        nfs_set_events(client);
+        qemu_coroutine_yield();
+    }
+    
+    if (Task.status != nb_sectors * BDRV_SECTOR_SIZE) {
+        return -EIO;
+    }
+
+    return 0;
+}
+
+static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
+                                        int64_t sector_num, int nb_sectors,
+                                        QEMUIOVector *iov)
+{
+    nfsclient *client = bs->opaque;
+    struct NFSTask Task;
+    char *buf = NULL;
+    
+    nfs_co_init_task(client, &Task);
+    
+    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
+    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
+    
+    if (nfs_pwrite_async(client->context, client->fh,
+                         sector_num * BDRV_SECTOR_SIZE,
+                         nb_sectors * BDRV_SECTOR_SIZE,
+                         buf, nfs_co_generic_cb, &Task) != 0) {
+        g_free(buf);
+        return -EIO;
+    }
+
+    while (!Task.complete) {
+        nfs_set_events(client);
+        qemu_coroutine_yield();
+    }
+    
+    g_free(buf);
+    
+    if (Task.status != nb_sectors * BDRV_SECTOR_SIZE) {
+        return -EIO;
+    }
+    
+    bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
+    client->allocated_file_size = -ENOTSUP;
+    return 0;
+}
+
+static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
+{
+    nfsclient *client = bs->opaque;
+    struct NFSTask Task;
+    
+    nfs_co_init_task(client, &Task);
+    
+    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb, &Task) != 0) {
+        return -EIO;
+    }
+
+    while (!Task.complete) {
+        nfs_set_events(client);
+        qemu_coroutine_yield();
+    }
+    
+    if (Task.status != 0) {
+        return -EIO;
+    }
+
+    return 0;
+}
+
+static QemuOptsList runtime_opts = {
+    .name = "nfs",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = "filename",
+            .type = QEMU_OPT_STRING,
+            .help = "URL to the NFS file",
+        },
+        { /* end of list */ }
+    },
+};
+
+static void nfs_file_close(BlockDriverState *bs)
+{
+    nfsclient *client = bs->opaque;
+    if (client->context) {
+        if (client->fh) {
+            nfs_close(client->context, client->fh);
+        }
+        qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
+        nfs_destroy_context(client->context);
+    }
+    memset(client, 0, sizeof(nfsclient));
+}
+
+
+static int nfs_file_open_common(BlockDriverState *bs, QDict *options, int flags,
+                         int open_flags, Error **errp)
+{
+    nfsclient *client = bs->opaque;
+    const char *filename;
+    int ret = 0;
+    QemuOpts *opts;
+    Error *local_err = NULL;
+    char *server = NULL, *path = NULL, *file = NULL, *strp;
+    struct stat st;
+
+    opts = qemu_opts_create_nofail(&runtime_opts);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (error_is_set(&local_err)) {
+        qerror_report_err(local_err);
+        error_free(local_err);
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    filename = qemu_opt_get(opts, "filename");
+    
+    client->context = nfs_init_context();
+    
+    if (client->context == NULL) {
+        error_setg(errp, "Failed to init NFS context");
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    server = g_strdup(filename + 6);
+    if (server[0] == '/' || server[0] == '\0') {
+        error_setg(errp, "Invalid server in URL");
+        ret = -EINVAL;
+        goto fail;
+    }
+    strp = strchr(server, '/');
+    if (strp == NULL) {
+        error_setg(errp, "Invalid URL specified.\n");
+        ret = -EINVAL;
+        goto fail;
+    }
+    path = g_strdup(strp);
+    *strp = 0;
+    strp = strrchr(path, '/');
+    if (strp == NULL) {
+        error_setg(errp, "Invalid URL specified.\n");
+        ret = -EINVAL;
+        goto fail;
+    }
+    file = g_strdup(strp);
+    *strp = 0;
+
+    if (nfs_mount(client->context, server, path) != 0) {
+        error_setg(errp, "Failed to mount nfs share: %s",
+                    nfs_get_error(client->context));
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    if (open_flags & O_CREAT) {
+        if (nfs_creat(client->context, file, 0600, &client->fh) != 0) {
+            error_setg(errp, "Failed to create file: %s",
+                             nfs_get_error(client->context));
+            ret = -EINVAL;
+            goto fail;
+        }
+    } else {
+        open_flags = (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY;
+        if (nfs_open(client->context, file, open_flags, &client->fh) != 0) {
+            error_setg(errp, "Failed to open file : %s",
+                       nfs_get_error(client->context));
+            ret = -EINVAL;
+            goto fail;
+        }
+    }
+
+    if (nfs_fstat(client->context, client->fh, &st) != 0) {
+        error_setg(errp, "Failed to fstat file: %s",
+                   nfs_get_error(client->context));
+        ret = -EIO;
+        goto fail;
+    }
+    
+    bs->total_sectors = st.st_size / BDRV_SECTOR_SIZE;
+    client->has_zero_init = S_ISREG(st.st_mode);
+    client->allocated_file_size = st.st_blocks * st.st_blksize;
+    goto out;
+fail:
+    nfs_file_close(bs);
+out:
+    g_free(server);
+    g_free(path);
+    g_free(file);
+    return ret;
+}
+
+static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
+                         Error **errp) {
+    return nfs_file_open_common(bs, options, flags, 0, errp);                             
+}
+
+static int nfs_file_create(const char *filename, QEMUOptionParameter *options,
+                         Error **errp)
+{
+    int ret = 0;
+    int64_t total_size = 0;
+    BlockDriverState *bs;
+    nfsclient *client = NULL;
+    QDict *bs_options;
+
+    bs = bdrv_new("");
+
+    /* Read out options */
+    while (options && options->name) {
+        if (!strcmp(options->name, "size")) {
+            total_size = options->value.n / BDRV_SECTOR_SIZE;
+        }
+        options++;
+    }
+
+    bs->opaque = g_malloc0(sizeof(struct nfsclient));
+    client = bs->opaque;
+
+    bs_options = qdict_new();
+    qdict_put(bs_options, "filename", qstring_from_str(filename));
+    ret = nfs_file_open_common(bs, bs_options, 0, O_CREAT, NULL);
+    QDECREF(bs_options);
+    if (ret != 0) {
+        goto out;
+    }
+    ret = nfs_ftruncate(client->context, client->fh, total_size * BDRV_SECTOR_SIZE);
+    if (ret != 0) {
+        ret = -ENOSPC;;
+    }
+out:
+    nfs_file_close(bs);
+    g_free(bs->opaque);
+    bs->opaque = NULL;
+    bdrv_unref(bs);
+    return ret;
+}
+
+static int nfs_has_zero_init(BlockDriverState *bs)
+{
+    nfsclient *client = bs->opaque;
+    return client->has_zero_init;
+}
+
+static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
+{
+    nfsclient *client = bs->opaque;
+    return client->allocated_file_size;
+}
+
+static BlockDriver bdrv_nfs = {
+    .format_name     = "nfs",
+    .protocol_name   = "nfs",
+
+    .instance_size   = sizeof(nfsclient),
+    .bdrv_needs_filename = true,
+    .bdrv_has_zero_init = nfs_has_zero_init,
+    .bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
+
+    .bdrv_file_open  = nfs_file_open,
+    .bdrv_close      = nfs_file_close,
+    .bdrv_create     = nfs_file_create,
+
+    .bdrv_co_readv         = nfs_co_readv,
+    .bdrv_co_writev        = nfs_co_writev,
+    .bdrv_co_flush_to_disk = nfs_co_flush,
+};
+
+static void nfs_block_init(void)
+{
+    bdrv_register(&bdrv_nfs);
+}
+
+block_init(nfs_block_init);
diff --git a/configure b/configure
index 8144d9f..6c9e47a 100755
--- a/configure
+++ b/configure
@@ -250,6 +250,7 @@  vss_win32_sdk=""
 win_sdk="no"
 want_tools="yes"
 libiscsi=""
+libnfs=""
 coroutine=""
 coroutine_pool=""
 seccomp=""
@@ -833,6 +834,10 @@  for opt do
   ;;
   --enable-libiscsi) libiscsi="yes"
   ;;
+  --disable-libnfs) libnfs="no"
+  ;;
+  --enable-libnfs) libnfs="yes"
+  ;;
   --enable-profiler) profiler="yes"
   ;;
   --disable-cocoa) cocoa="no"
@@ -1202,6 +1207,8 @@  echo "  --enable-spice           enable spice"
 echo "  --enable-rbd             enable building the rados block device (rbd)"
 echo "  --disable-libiscsi       disable iscsi support"
 echo "  --enable-libiscsi        enable iscsi support"
+echo "  --disable-libnfs         disable nfs support"
+echo "  --enable-libnfs          enable nfs support"
 echo "  --disable-smartcard-nss  disable smartcard nss support"
 echo "  --enable-smartcard-nss   enable smartcard nss support"
 echo "  --disable-libusb         disable libusb (for usb passthrough)"
@@ -3050,6 +3057,32 @@  EOF
   fi
 fi
 
+##########################################
+# Do we have libnfs
+if test "$libnfs" != "no" ; then
+  cat > $TMPC << EOF
+#include <nfsc/libnfs-zdr.h>
+#include <nfsc/libnfs.h>
+#include <nfsc/libnfs-raw.h>
+#include <nfsc/libnfs-raw-mount.h>
+int main(void) {
+    nfs_init_context();
+    nfs_pread_async(0,0,0,0,0,0);
+    nfs_pwrite_async(0,0,0,0,0,0,0);
+    nfs_fstat(0,0,0);
+    return 0;
+    }
+EOF
+  if compile_prog "-Werror" "-lnfs" ; then
+    libnfs="yes"
+    LIBS="$LIBS -lnfs"
+  else
+    if test "$libnfs" = "yes" ; then
+      feature_not_found "libnfs"
+    fi
+    libnfs="no"
+  fi
+fi
 
 ##########################################
 # Do we need libm
@@ -3777,6 +3810,7 @@  echo "libusb            $libusb"
 echo "usb net redir     $usb_redir"
 echo "GLX support       $glx"
 echo "libiscsi support  $libiscsi"
+echo "libnfs support    $libnfs"
 echo "build guest agent $guest_agent"
 echo "QGA VSS support   $guest_agent_with_vss"
 echo "seccomp support   $seccomp"
@@ -4107,6 +4141,10 @@  if test "$libiscsi" = "yes" ; then
   echo "CONFIG_LIBISCSI=y" >> $config_host_mak
 fi
 
+if test "$libnfs" = "yes" ; then
+  echo "CONFIG_LIBNFS=y" >> $config_host_mak
+fi
+
 if test "$seccomp" = "yes"; then
   echo "CONFIG_SECCOMP=y" >> $config_host_mak
 fi