diff mbox

[7/7] virtio-blk: add x-data-plane=on|off performance feature

Message ID 1352992746-8767-8-git-send-email-stefanha@redhat.com
State New
Headers show

Commit Message

Stefan Hajnoczi Nov. 15, 2012, 3:19 p.m. UTC
The virtio-blk-data-plane feature is easy to integrate into
hw/virtio-blk.c.  The data plane can be started and stopped similar to
vhost-net.

Users can take advantage of the virtio-blk-data-plane feature using the
new -device virtio-blk-pci,x-data-plane=on property.

The x-data-plane name was chosen because at this stage the feature is
experimental and likely to see changes in the future.

If the VM configuration does not support virtio-blk-data-plane an error
message is printed.  Although we could fall back to regular virtio-blk,
I prefer the explicit approach since it prompts the user to fix their
configuration if they want the performance benefit of
virtio-blk-data-plane.

Limitations:
 * Only format=raw is supported
 * Live migration is not supported
 * Block jobs, hot unplug, and other operations fail with -EBUSY
 * I/O throttling limits are ignored
 * Only Linux hosts are supported due to Linux AIO usage

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/virtio-blk.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/virtio-blk.h |  1 +
 hw/virtio-pci.c |  3 +++
 3 files changed, 62 insertions(+), 1 deletion(-)

Comments

Michael S. Tsirkin Nov. 15, 2012, 6:48 p.m. UTC | #1
On Thu, Nov 15, 2012 at 04:19:06PM +0100, Stefan Hajnoczi wrote:
> The virtio-blk-data-plane feature is easy to integrate into
> hw/virtio-blk.c.  The data plane can be started and stopped similar to
> vhost-net.
> 
> Users can take advantage of the virtio-blk-data-plane feature using the
> new -device virtio-blk-pci,x-data-plane=on property.
> 
> The x-data-plane name was chosen because at this stage the feature is
> experimental and likely to see changes in the future.
> 
> If the VM configuration does not support virtio-blk-data-plane an error
> message is printed.  Although we could fall back to regular virtio-blk,
> I prefer the explicit approach since it prompts the user to fix their
> configuration if they want the performance benefit of
> virtio-blk-data-plane.
> 
> Limitations:
>  * Only format=raw is supported
>  * Live migration is not supported
>  * Block jobs, hot unplug, and other operations fail with -EBUSY
>  * I/O throttling limits are ignored
>  * Only Linux hosts are supported due to Linux AIO usage
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>


Would be very interested in learning about the performance
impact of this. How does this compare to current model and
to vhost-blk?


> ---
>  hw/virtio-blk.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  hw/virtio-blk.h |  1 +
>  hw/virtio-pci.c |  3 +++
>  3 files changed, 62 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
> index e25cc96..7f6004e 100644
> --- a/hw/virtio-blk.c
> +++ b/hw/virtio-blk.c
> @@ -17,6 +17,8 @@
>  #include "hw/block-common.h"
>  #include "blockdev.h"
>  #include "virtio-blk.h"
> +#include "hw/dataplane/virtio-blk.h"
> +#include "migration.h"
>  #include "scsi-defs.h"
>  #ifdef __linux__
>  # include <scsi/sg.h>
> @@ -33,6 +35,8 @@ typedef struct VirtIOBlock
>      VirtIOBlkConf *blk;
>      unsigned short sector_mask;
>      DeviceState *qdev;
> +    VirtIOBlockDataPlane *dataplane;
> +    Error *migration_blocker;
>  } VirtIOBlock;
>  
>  static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
> @@ -407,6 +411,14 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>          .num_writes = 0,
>      };
>  
> +    /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
> +     * dataplane here instead of waiting for .set_status().
> +     */
> +    if (s->dataplane) {
> +        virtio_blk_data_plane_start(s->dataplane);
> +        return;
> +    }
> +
>      while ((req = virtio_blk_get_request(s))) {
>          virtio_blk_handle_request(req, &mrb);
>      }
> @@ -446,8 +458,13 @@ static void virtio_blk_dma_restart_cb(void *opaque, int running,
>  {
>      VirtIOBlock *s = opaque;
>  
> -    if (!running)
> +    if (!running) {
> +        /* qemu_drain_all() doesn't know about data plane, quiesce here */
> +        if (s->dataplane) {
> +            virtio_blk_data_plane_drain(s->dataplane);
> +        }
>          return;
> +    }
>  
>      if (!s->bh) {
>          s->bh = qemu_bh_new(virtio_blk_dma_restart_bh, s);
> @@ -538,6 +555,10 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
>      VirtIOBlock *s = to_virtio_blk(vdev);
>      uint32_t features;
>  
> +    if (s->dataplane && !(status & VIRTIO_CONFIG_S_DRIVER)) {
> +        virtio_blk_data_plane_stop(s->dataplane);
> +    }
> +
>      if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
>          return;
>      }
> @@ -604,6 +625,7 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
>  {
>      VirtIOBlock *s;
>      static int virtio_blk_id;
> +    int fd = -1;
>  
>      if (!blk->conf.bs) {
>          error_report("drive property not set");
> @@ -619,6 +641,21 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
>          return NULL;
>      }
>  
> +    if (blk->data_plane) {
> +        if (blk->scsi) {
> +            error_report("device is incompatible with x-data-plane, "
> +                         "use scsi=off");
> +            return NULL;
> +        }
> +
> +        fd = raw_get_aio_fd(blk->conf.bs);
> +        if (fd < 0) {
> +            error_report("drive is incompatible with x-data-plane, "
> +                         "use format=raw,cache=none,aio=native");
> +            return NULL;
> +        }
> +    }
> +
>      s = (VirtIOBlock *)virtio_common_init("virtio-blk", VIRTIO_ID_BLOCK,
>                                            sizeof(struct virtio_blk_config),
>                                            sizeof(VirtIOBlock));
> @@ -636,6 +673,17 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
>  
>      s->vq = virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output);
>  
> +    if (fd >= 0) {
> +        s->dataplane = virtio_blk_data_plane_create(&s->vdev, fd);
> +
> +        /* Prevent block operations that conflict with data plane thread */
> +        bdrv_set_in_use(s->bs, 1);
> +
> +        error_setg(&s->migration_blocker,
> +                   "x-data-plane does not support migration");
> +        migrate_add_blocker(s->migration_blocker);
> +    }
> +
>      qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
>      s->qdev = dev;
>      register_savevm(dev, "virtio-blk", virtio_blk_id++, 2,
> @@ -652,6 +700,15 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
>  void virtio_blk_exit(VirtIODevice *vdev)
>  {
>      VirtIOBlock *s = to_virtio_blk(vdev);
> +
> +    if (s->dataplane) {
> +        migrate_del_blocker(s->migration_blocker);
> +        error_free(s->migration_blocker);
> +        bdrv_set_in_use(s->bs, 0);
> +        virtio_blk_data_plane_destroy(s->dataplane);
> +        s->dataplane = NULL;
> +    }
> +
>      unregister_savevm(s->qdev, "virtio-blk", s);
>      blockdev_mark_auto_del(s->bs);
>      virtio_cleanup(vdev);
> diff --git a/hw/virtio-blk.h b/hw/virtio-blk.h
> index f0740d0..53d7971 100644
> --- a/hw/virtio-blk.h
> +++ b/hw/virtio-blk.h
> @@ -105,6 +105,7 @@ struct VirtIOBlkConf
>      char *serial;
>      uint32_t scsi;
>      uint32_t config_wce;
> +    uint32_t data_plane;
>  };
>  
>  #define DEFINE_VIRTIO_BLK_FEATURES(_state, _field) \
> diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
> index 9603150..401f5ea 100644
> --- a/hw/virtio-pci.c
> +++ b/hw/virtio-pci.c
> @@ -862,6 +862,9 @@ static Property virtio_blk_properties[] = {
>  #endif
>      DEFINE_PROP_BIT("config-wce", VirtIOPCIProxy, blk.config_wce, 0, true),
>      DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
> +#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
> +    DEFINE_PROP_BIT("x-data-plane", VirtIOPCIProxy, blk.data_plane, 0, false),
> +#endif
>      DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
>      DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features),
>      DEFINE_PROP_END_OF_LIST(),
> -- 
> 1.8.0
Khoa Huynh Nov. 15, 2012, 7:34 p.m. UTC | #2
"Michael S. Tsirkin" <mst@redhat.com> wrote on 11/15/2012 12:48:49 PM:

> From: "Michael S. Tsirkin" <mst@redhat.com>
> To: Stefan Hajnoczi <stefanha@redhat.com>,
> Cc: qemu-devel@nongnu.org, Anthony Liguori/Austin/IBM@IBMUS, Paolo
> Bonzini <pbonzini@redhat.com>, Kevin Wolf <kwolf@redhat.com>, Asias
> He <asias@redhat.com>, Khoa Huynh/Austin/IBM@IBMUS
> Date: 11/15/2012 12:46 PM
> Subject: Re: [PATCH 7/7] virtio-blk: add x-data-plane=on|off
> performance feature
>
> On Thu, Nov 15, 2012 at 04:19:06PM +0100, Stefan Hajnoczi wrote:
> > The virtio-blk-data-plane feature is easy to integrate into
> > hw/virtio-blk.c.  The data plane can be started and stopped similar to
> > vhost-net.
> >
> > Users can take advantage of the virtio-blk-data-plane feature using the
> > new -device virtio-blk-pci,x-data-plane=on property.
> >
> > The x-data-plane name was chosen because at this stage the feature is
> > experimental and likely to see changes in the future.
> >
> > If the VM configuration does not support virtio-blk-data-plane an error
> > message is printed.  Although we could fall back to regular virtio-blk,
> > I prefer the explicit approach since it prompts the user to fix their
> > configuration if they want the performance benefit of
> > virtio-blk-data-plane.
> >
> > Limitations:
> >  * Only format=raw is supported
> >  * Live migration is not supported
> >  * Block jobs, hot unplug, and other operations fail with -EBUSY
> >  * I/O throttling limits are ignored
> >  * Only Linux hosts are supported due to Linux AIO usage
> >
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
>
>
> Would be very interested in learning about the performance
> impact of this. How does this compare to current model and
> to vhost-blk?

I plan to do a complete evaluation of this patchset in the coming days.
However, I have done quite a bit of performance testing on earlier versions
of the data-plane and vhost-blk code bits. Here's what I found:

1) The existing kvm/qemu code can only handle up to about 150,000 IOPS for
a single KVM guest.  The bottleneck here is the global qemu mutex.

2) With performance tuning, I was able to achieve 1.33 million IOPS for a
single KVM guest with data-plane. This is very close to the
1.4-million-IOPS
limit of my storage setup.

3) Similarly, I was able to get up to 1.34 million IOPS for a single KVM
guest with an earlier prototype of vhost-blk (using Linux AIO, from Liu
Yuan).

This shows that bypassing the global qemu mutex would allow us to achieve
much higher I/O rates.  In fact, these I/O rates would handily beat claims
from all other competing hypervisors.

I am currently evaluating the latest vhost-blk code bits from Asias He and
will report results soon. I have already got past 1 million IOPS per guest
with Asias' code and is trying to see if I could get even higher I/O rates.
And as I mentioned earlier, I'll also evaluate this latest data-plane code
from Stefan real soon.  Please stay tuned...

-Khoa

>
>
> > ---
> >  hw/virtio-blk.c | 59 ++++++++++++++++++++++++++++++++++++++++++++
> ++++++++++++-
> >  hw/virtio-blk.h |  1 +
> >  hw/virtio-pci.c |  3 +++
> >  3 files changed, 62 insertions(+), 1 deletion(-)
> >
> > diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
> > index e25cc96..7f6004e 100644
> > --- a/hw/virtio-blk.c
> > +++ b/hw/virtio-blk.c
> > @@ -17,6 +17,8 @@
> >  #include "hw/block-common.h"
> >  #include "blockdev.h"
> >  #include "virtio-blk.h"
> > +#include "hw/dataplane/virtio-blk.h"
> > +#include "migration.h"
> >  #include "scsi-defs.h"
> >  #ifdef __linux__
> >  # include <scsi/sg.h>
> > @@ -33,6 +35,8 @@ typedef struct VirtIOBlock
> >      VirtIOBlkConf *blk;
> >      unsigned short sector_mask;
> >      DeviceState *qdev;
> > +    VirtIOBlockDataPlane *dataplane;
> > +    Error *migration_blocker;
> >  } VirtIOBlock;
> >
> >  static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
> > @@ -407,6 +411,14 @@ static void virtio_blk_handle_output
> (VirtIODevice *vdev, VirtQueue *vq)
> >          .num_writes = 0,
> >      };
> >
> > +    /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so
start
> > +     * dataplane here instead of waiting for .set_status().
> > +     */
> > +    if (s->dataplane) {
> > +        virtio_blk_data_plane_start(s->dataplane);
> > +        return;
> > +    }
> > +
> >      while ((req = virtio_blk_get_request(s))) {
> >          virtio_blk_handle_request(req, &mrb);
> >      }
> > @@ -446,8 +458,13 @@ static void virtio_blk_dma_restart_cb(void
> *opaque, int running,
> >  {
> >      VirtIOBlock *s = opaque;
> >
> > -    if (!running)
> > +    if (!running) {
> > +        /* qemu_drain_all() doesn't know about data plane, quiesce
here */
> > +        if (s->dataplane) {
> > +            virtio_blk_data_plane_drain(s->dataplane);
> > +        }
> >          return;
> > +    }
> >
> >      if (!s->bh) {
> >          s->bh = qemu_bh_new(virtio_blk_dma_restart_bh, s);
> > @@ -538,6 +555,10 @@ static void virtio_blk_set_status
> (VirtIODevice *vdev, uint8_t status)
> >      VirtIOBlock *s = to_virtio_blk(vdev);
> >      uint32_t features;
> >
> > +    if (s->dataplane && !(status & VIRTIO_CONFIG_S_DRIVER)) {
> > +        virtio_blk_data_plane_stop(s->dataplane);
> > +    }
> > +
> >      if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
> >          return;
> >      }
> > @@ -604,6 +625,7 @@ VirtIODevice *virtio_blk_init(DeviceState
> *dev, VirtIOBlkConf *blk)
> >  {
> >      VirtIOBlock *s;
> >      static int virtio_blk_id;
> > +    int fd = -1;
> >
> >      if (!blk->conf.bs) {
> >          error_report("drive property not set");
> > @@ -619,6 +641,21 @@ VirtIODevice *virtio_blk_init(DeviceState
> *dev, VirtIOBlkConf *blk)
> >          return NULL;
> >      }
> >
> > +    if (blk->data_plane) {
> > +        if (blk->scsi) {
> > +            error_report("device is incompatible with x-data-plane, "
> > +                         "use scsi=off");
> > +            return NULL;
> > +        }
> > +
> > +        fd = raw_get_aio_fd(blk->conf.bs);
> > +        if (fd < 0) {
> > +            error_report("drive is incompatible with x-data-plane, "
> > +                         "use format=raw,cache=none,aio=native");
> > +            return NULL;
> > +        }
> > +    }
> > +
> >      s = (VirtIOBlock *)virtio_common_init("virtio-blk",
VIRTIO_ID_BLOCK,
> >                                            sizeof(struct
virtio_blk_config),
> >                                            sizeof(VirtIOBlock));
> > @@ -636,6 +673,17 @@ VirtIODevice *virtio_blk_init(DeviceState
> *dev, VirtIOBlkConf *blk)
> >
> >      s->vq = virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output);
> >
> > +    if (fd >= 0) {
> > +        s->dataplane = virtio_blk_data_plane_create(&s->vdev, fd);
> > +
> > +        /* Prevent block operations that conflict with data
planethread */
> > +        bdrv_set_in_use(s->bs, 1);
> > +
> > +        error_setg(&s->migration_blocker,
> > +                   "x-data-plane does not support migration");
> > +        migrate_add_blocker(s->migration_blocker);
> > +    }
> > +
> >      qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
> >      s->qdev = dev;
> >      register_savevm(dev, "virtio-blk", virtio_blk_id++, 2,
> > @@ -652,6 +700,15 @@ VirtIODevice *virtio_blk_init(DeviceState
> *dev, VirtIOBlkConf *blk)
> >  void virtio_blk_exit(VirtIODevice *vdev)
> >  {
> >      VirtIOBlock *s = to_virtio_blk(vdev);
> > +
> > +    if (s->dataplane) {
> > +        migrate_del_blocker(s->migration_blocker);
> > +        error_free(s->migration_blocker);
> > +        bdrv_set_in_use(s->bs, 0);
> > +        virtio_blk_data_plane_destroy(s->dataplane);
> > +        s->dataplane = NULL;
> > +    }
> > +
> >      unregister_savevm(s->qdev, "virtio-blk", s);
> >      blockdev_mark_auto_del(s->bs);
> >      virtio_cleanup(vdev);
> > diff --git a/hw/virtio-blk.h b/hw/virtio-blk.h
> > index f0740d0..53d7971 100644
> > --- a/hw/virtio-blk.h
> > +++ b/hw/virtio-blk.h
> > @@ -105,6 +105,7 @@ struct VirtIOBlkConf
> >      char *serial;
> >      uint32_t scsi;
> >      uint32_t config_wce;
> > +    uint32_t data_plane;
> >  };
> >
> >  #define DEFINE_VIRTIO_BLK_FEATURES(_state, _field) \
> > diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
> > index 9603150..401f5ea 100644
> > --- a/hw/virtio-pci.c
> > +++ b/hw/virtio-pci.c
> > @@ -862,6 +862,9 @@ static Property virtio_blk_properties[] = {
> >  #endif
> >      DEFINE_PROP_BIT("config-wce", VirtIOPCIProxy, blk.config_wce,0,
true),
> >      DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
> VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
> > +#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
> > +    DEFINE_PROP_BIT("x-data-plane", VirtIOPCIProxy,
> blk.data_plane, 0, false),
> > +#endif
> >      DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
> >      DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features),
> >      DEFINE_PROP_END_OF_LIST(),
> > --
> > 1.8.0
>
Anthony Liguori Nov. 15, 2012, 9:08 p.m. UTC | #3
Stefan Hajnoczi <stefanha@redhat.com> writes:

> The virtio-blk-data-plane feature is easy to integrate into
> hw/virtio-blk.c.  The data plane can be started and stopped similar to
> vhost-net.
>
> Users can take advantage of the virtio-blk-data-plane feature using the
> new -device virtio-blk-pci,x-data-plane=on property.

I don't think this should be a property of virtio-blk-pci but rather a
separate device.

It's an alternative interface with a slightly different guest view.  

Regards,

Anthony Liguori

>
> The x-data-plane name was chosen because at this stage the feature is
> experimental and likely to see changes in the future.
>
> If the VM configuration does not support virtio-blk-data-plane an error
> message is printed.  Although we could fall back to regular virtio-blk,
> I prefer the explicit approach since it prompts the user to fix their
> configuration if they want the performance benefit of
> virtio-blk-data-plane.
>
> Limitations:
>  * Only format=raw is supported
>  * Live migration is not supported
>  * Block jobs, hot unplug, and other operations fail with -EBUSY
>  * I/O throttling limits are ignored
>  * Only Linux hosts are supported due to Linux AIO usage
>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  hw/virtio-blk.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  hw/virtio-blk.h |  1 +
>  hw/virtio-pci.c |  3 +++
>  3 files changed, 62 insertions(+), 1 deletion(-)
>
> diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
> index e25cc96..7f6004e 100644
> --- a/hw/virtio-blk.c
> +++ b/hw/virtio-blk.c
> @@ -17,6 +17,8 @@
>  #include "hw/block-common.h"
>  #include "blockdev.h"
>  #include "virtio-blk.h"
> +#include "hw/dataplane/virtio-blk.h"
> +#include "migration.h"
>  #include "scsi-defs.h"
>  #ifdef __linux__
>  # include <scsi/sg.h>
> @@ -33,6 +35,8 @@ typedef struct VirtIOBlock
>      VirtIOBlkConf *blk;
>      unsigned short sector_mask;
>      DeviceState *qdev;
> +    VirtIOBlockDataPlane *dataplane;
> +    Error *migration_blocker;
>  } VirtIOBlock;
>  
>  static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
> @@ -407,6 +411,14 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>          .num_writes = 0,
>      };
>  
> +    /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
> +     * dataplane here instead of waiting for .set_status().
> +     */
> +    if (s->dataplane) {
> +        virtio_blk_data_plane_start(s->dataplane);
> +        return;
> +    }
> +
>      while ((req = virtio_blk_get_request(s))) {
>          virtio_blk_handle_request(req, &mrb);
>      }
> @@ -446,8 +458,13 @@ static void virtio_blk_dma_restart_cb(void *opaque, int running,
>  {
>      VirtIOBlock *s = opaque;
>  
> -    if (!running)
> +    if (!running) {
> +        /* qemu_drain_all() doesn't know about data plane, quiesce here */
> +        if (s->dataplane) {
> +            virtio_blk_data_plane_drain(s->dataplane);
> +        }
>          return;
> +    }
>  
>      if (!s->bh) {
>          s->bh = qemu_bh_new(virtio_blk_dma_restart_bh, s);
> @@ -538,6 +555,10 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
>      VirtIOBlock *s = to_virtio_blk(vdev);
>      uint32_t features;
>  
> +    if (s->dataplane && !(status & VIRTIO_CONFIG_S_DRIVER)) {
> +        virtio_blk_data_plane_stop(s->dataplane);
> +    }
> +
>      if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
>          return;
>      }
> @@ -604,6 +625,7 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
>  {
>      VirtIOBlock *s;
>      static int virtio_blk_id;
> +    int fd = -1;
>  
>      if (!blk->conf.bs) {
>          error_report("drive property not set");
> @@ -619,6 +641,21 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
>          return NULL;
>      }
>  
> +    if (blk->data_plane) {
> +        if (blk->scsi) {
> +            error_report("device is incompatible with x-data-plane, "
> +                         "use scsi=off");
> +            return NULL;
> +        }
> +
> +        fd = raw_get_aio_fd(blk->conf.bs);
> +        if (fd < 0) {
> +            error_report("drive is incompatible with x-data-plane, "
> +                         "use format=raw,cache=none,aio=native");
> +            return NULL;
> +        }
> +    }
> +
>      s = (VirtIOBlock *)virtio_common_init("virtio-blk", VIRTIO_ID_BLOCK,
>                                            sizeof(struct virtio_blk_config),
>                                            sizeof(VirtIOBlock));
> @@ -636,6 +673,17 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
>  
>      s->vq = virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output);
>  
> +    if (fd >= 0) {
> +        s->dataplane = virtio_blk_data_plane_create(&s->vdev, fd);
> +
> +        /* Prevent block operations that conflict with data plane thread */
> +        bdrv_set_in_use(s->bs, 1);
> +
> +        error_setg(&s->migration_blocker,
> +                   "x-data-plane does not support migration");
> +        migrate_add_blocker(s->migration_blocker);
> +    }
> +
>      qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
>      s->qdev = dev;
>      register_savevm(dev, "virtio-blk", virtio_blk_id++, 2,
> @@ -652,6 +700,15 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
>  void virtio_blk_exit(VirtIODevice *vdev)
>  {
>      VirtIOBlock *s = to_virtio_blk(vdev);
> +
> +    if (s->dataplane) {
> +        migrate_del_blocker(s->migration_blocker);
> +        error_free(s->migration_blocker);
> +        bdrv_set_in_use(s->bs, 0);
> +        virtio_blk_data_plane_destroy(s->dataplane);
> +        s->dataplane = NULL;
> +    }
> +
>      unregister_savevm(s->qdev, "virtio-blk", s);
>      blockdev_mark_auto_del(s->bs);
>      virtio_cleanup(vdev);
> diff --git a/hw/virtio-blk.h b/hw/virtio-blk.h
> index f0740d0..53d7971 100644
> --- a/hw/virtio-blk.h
> +++ b/hw/virtio-blk.h
> @@ -105,6 +105,7 @@ struct VirtIOBlkConf
>      char *serial;
>      uint32_t scsi;
>      uint32_t config_wce;
> +    uint32_t data_plane;
>  };
>  
>  #define DEFINE_VIRTIO_BLK_FEATURES(_state, _field) \
> diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
> index 9603150..401f5ea 100644
> --- a/hw/virtio-pci.c
> +++ b/hw/virtio-pci.c
> @@ -862,6 +862,9 @@ static Property virtio_blk_properties[] = {
>  #endif
>      DEFINE_PROP_BIT("config-wce", VirtIOPCIProxy, blk.config_wce, 0, true),
>      DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
> +#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
> +    DEFINE_PROP_BIT("x-data-plane", VirtIOPCIProxy, blk.data_plane, 0, false),
> +#endif
>      DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
>      DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features),
>      DEFINE_PROP_END_OF_LIST(),
> -- 
> 1.8.0
Anthony Liguori Nov. 15, 2012, 9:11 p.m. UTC | #4
Khoa Huynh <khoa@us.ibm.com> writes:

> "Michael S. Tsirkin" <mst@redhat.com> wrote on 11/15/2012 12:48:49 PM:
>
>> From: "Michael S. Tsirkin" <mst@redhat.com>
>> To: Stefan Hajnoczi <stefanha@redhat.com>,
>> Cc: qemu-devel@nongnu.org, Anthony Liguori/Austin/IBM@IBMUS, Paolo
>> Bonzini <pbonzini@redhat.com>, Kevin Wolf <kwolf@redhat.com>, Asias
>> He <asias@redhat.com>, Khoa Huynh/Austin/IBM@IBMUS
>> Date: 11/15/2012 12:46 PM
>> Subject: Re: [PATCH 7/7] virtio-blk: add x-data-plane=on|off
>> performance feature
>>
>> On Thu, Nov 15, 2012 at 04:19:06PM +0100, Stefan Hajnoczi wrote:
>> > The virtio-blk-data-plane feature is easy to integrate into
>> > hw/virtio-blk.c.  The data plane can be started and stopped similar to
>> > vhost-net.
>> >
>> > Users can take advantage of the virtio-blk-data-plane feature using the
>> > new -device virtio-blk-pci,x-data-plane=on property.
>> >
>> > The x-data-plane name was chosen because at this stage the feature is
>> > experimental and likely to see changes in the future.
>> >
>> > If the VM configuration does not support virtio-blk-data-plane an error
>> > message is printed.  Although we could fall back to regular virtio-blk,
>> > I prefer the explicit approach since it prompts the user to fix their
>> > configuration if they want the performance benefit of
>> > virtio-blk-data-plane.
>> >
>> > Limitations:
>> >  * Only format=raw is supported
>> >  * Live migration is not supported
>> >  * Block jobs, hot unplug, and other operations fail with -EBUSY
>> >  * I/O throttling limits are ignored
>> >  * Only Linux hosts are supported due to Linux AIO usage
>> >
>> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
>>
>>
>> Would be very interested in learning about the performance
>> impact of this. How does this compare to current model and
>> to vhost-blk?
>
> I plan to do a complete evaluation of this patchset in the coming days.
> However, I have done quite a bit of performance testing on earlier versions
> of the data-plane and vhost-blk code bits. Here's what I found:
>
> 1) The existing kvm/qemu code can only handle up to about 150,000 IOPS for
> a single KVM guest.  The bottleneck here is the global qemu mutex.
>
> 2) With performance tuning, I was able to achieve 1.33 million IOPS for a
> single KVM guest with data-plane. This is very close to the
> 1.4-million-IOPS
> limit of my storage setup.

From my POV, if we can get this close to bare metal with
virtio-blk-dataplane, there's absolutely no reason to merge vhost-blk
support.

We simply lose too much with a kernel-based solution.

I'm sure there's more we can do to improve the userspace implementation
too like a hypercall-based notify and adaptive polling.

Regards,

Anthony Liguori
Stefan Hajnoczi Nov. 16, 2012, 6:22 a.m. UTC | #5
On Thu, Nov 15, 2012 at 10:08 PM, Anthony Liguori <aliguori@us.ibm.com> wrote:
> Stefan Hajnoczi <stefanha@redhat.com> writes:
>
>> The virtio-blk-data-plane feature is easy to integrate into
>> hw/virtio-blk.c.  The data plane can be started and stopped similar to
>> vhost-net.
>>
>> Users can take advantage of the virtio-blk-data-plane feature using the
>> new -device virtio-blk-pci,x-data-plane=on property.
>
> I don't think this should be a property of virtio-blk-pci but rather a
> separate device.

The hw/virtio-blk.c code still needs to be used since
hw/dataplane/virtio-blk.c is only a subset of virtio-blk.

So we're talking about adding a new virtio-blk-data-plane-pci device
type to hw/virtio-pci.c?

Stefan
Paolo Bonzini Nov. 16, 2012, 7:40 a.m. UTC | #6
Il 15/11/2012 22:08, Anthony Liguori ha scritto:
> Stefan Hajnoczi <stefanha@redhat.com> writes:
> 
>> The virtio-blk-data-plane feature is easy to integrate into
>> hw/virtio-blk.c.  The data plane can be started and stopped similar to
>> vhost-net.
>>
>> Users can take advantage of the virtio-blk-data-plane feature using the
>> new -device virtio-blk-pci,x-data-plane=on property.
> 
> I don't think this should be a property of virtio-blk-pci but rather a
> separate device.

Since this is all temporary and we want it to become the default
(perhaps keeping the old code for ioeventfd=false only), I don't think
it really matters.

Paolo
Kevin Wolf Nov. 19, 2012, 10:38 a.m. UTC | #7
Am 16.11.2012 07:22, schrieb Stefan Hajnoczi:
> On Thu, Nov 15, 2012 at 10:08 PM, Anthony Liguori <aliguori@us.ibm.com> wrote:
>> Stefan Hajnoczi <stefanha@redhat.com> writes:
>>
>>> The virtio-blk-data-plane feature is easy to integrate into
>>> hw/virtio-blk.c.  The data plane can be started and stopped similar to
>>> vhost-net.
>>>
>>> Users can take advantage of the virtio-blk-data-plane feature using the
>>> new -device virtio-blk-pci,x-data-plane=on property.
>>
>> I don't think this should be a property of virtio-blk-pci but rather a
>> separate device.
> 
> The hw/virtio-blk.c code still needs to be used since
> hw/dataplane/virtio-blk.c is only a subset of virtio-blk.
> 
> So we're talking about adding a new virtio-blk-data-plane-pci device
> type to hw/virtio-pci.c?

A new device sounds wrong to me, it's the very same thing from a guest
perspective. Which makes me wonder if in the final version it shouldn't
be a -blockdev option rather than a -device one...

Kevin
Paolo Bonzini Nov. 19, 2012, 10:51 a.m. UTC | #8
Il 19/11/2012 11:38, Kevin Wolf ha scritto:
> Am 16.11.2012 07:22, schrieb Stefan Hajnoczi:
>> On Thu, Nov 15, 2012 at 10:08 PM, Anthony Liguori <aliguori@us.ibm.com> wrote:
>>> Stefan Hajnoczi <stefanha@redhat.com> writes:
>>>
>>>> The virtio-blk-data-plane feature is easy to integrate into
>>>> hw/virtio-blk.c.  The data plane can be started and stopped similar to
>>>> vhost-net.
>>>>
>>>> Users can take advantage of the virtio-blk-data-plane feature using the
>>>> new -device virtio-blk-pci,x-data-plane=on property.
>>>
>>> I don't think this should be a property of virtio-blk-pci but rather a
>>> separate device.
>>
>> The hw/virtio-blk.c code still needs to be used since
>> hw/dataplane/virtio-blk.c is only a subset of virtio-blk.
>>
>> So we're talking about adding a new virtio-blk-data-plane-pci device
>> type to hw/virtio-pci.c?
> 
> A new device sounds wrong to me, it's the very same thing from a guest
> perspective. Which makes me wonder if in the final version it shouldn't
> be a -blockdev option rather than a -device one...

In the final version it shouldn't be an option at all. :)

Paolo
diff mbox

Patch

diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index e25cc96..7f6004e 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -17,6 +17,8 @@ 
 #include "hw/block-common.h"
 #include "blockdev.h"
 #include "virtio-blk.h"
+#include "hw/dataplane/virtio-blk.h"
+#include "migration.h"
 #include "scsi-defs.h"
 #ifdef __linux__
 # include <scsi/sg.h>
@@ -33,6 +35,8 @@  typedef struct VirtIOBlock
     VirtIOBlkConf *blk;
     unsigned short sector_mask;
     DeviceState *qdev;
+    VirtIOBlockDataPlane *dataplane;
+    Error *migration_blocker;
 } VirtIOBlock;
 
 static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
@@ -407,6 +411,14 @@  static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
         .num_writes = 0,
     };
 
+    /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
+     * dataplane here instead of waiting for .set_status().
+     */
+    if (s->dataplane) {
+        virtio_blk_data_plane_start(s->dataplane);
+        return;
+    }
+
     while ((req = virtio_blk_get_request(s))) {
         virtio_blk_handle_request(req, &mrb);
     }
@@ -446,8 +458,13 @@  static void virtio_blk_dma_restart_cb(void *opaque, int running,
 {
     VirtIOBlock *s = opaque;
 
-    if (!running)
+    if (!running) {
+        /* qemu_drain_all() doesn't know about data plane, quiesce here */
+        if (s->dataplane) {
+            virtio_blk_data_plane_drain(s->dataplane);
+        }
         return;
+    }
 
     if (!s->bh) {
         s->bh = qemu_bh_new(virtio_blk_dma_restart_bh, s);
@@ -538,6 +555,10 @@  static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
     VirtIOBlock *s = to_virtio_blk(vdev);
     uint32_t features;
 
+    if (s->dataplane && !(status & VIRTIO_CONFIG_S_DRIVER)) {
+        virtio_blk_data_plane_stop(s->dataplane);
+    }
+
     if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
         return;
     }
@@ -604,6 +625,7 @@  VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
 {
     VirtIOBlock *s;
     static int virtio_blk_id;
+    int fd = -1;
 
     if (!blk->conf.bs) {
         error_report("drive property not set");
@@ -619,6 +641,21 @@  VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
         return NULL;
     }
 
+    if (blk->data_plane) {
+        if (blk->scsi) {
+            error_report("device is incompatible with x-data-plane, "
+                         "use scsi=off");
+            return NULL;
+        }
+
+        fd = raw_get_aio_fd(blk->conf.bs);
+        if (fd < 0) {
+            error_report("drive is incompatible with x-data-plane, "
+                         "use format=raw,cache=none,aio=native");
+            return NULL;
+        }
+    }
+
     s = (VirtIOBlock *)virtio_common_init("virtio-blk", VIRTIO_ID_BLOCK,
                                           sizeof(struct virtio_blk_config),
                                           sizeof(VirtIOBlock));
@@ -636,6 +673,17 @@  VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
 
     s->vq = virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output);
 
+    if (fd >= 0) {
+        s->dataplane = virtio_blk_data_plane_create(&s->vdev, fd);
+
+        /* Prevent block operations that conflict with data plane thread */
+        bdrv_set_in_use(s->bs, 1);
+
+        error_setg(&s->migration_blocker,
+                   "x-data-plane does not support migration");
+        migrate_add_blocker(s->migration_blocker);
+    }
+
     qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
     s->qdev = dev;
     register_savevm(dev, "virtio-blk", virtio_blk_id++, 2,
@@ -652,6 +700,15 @@  VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
 void virtio_blk_exit(VirtIODevice *vdev)
 {
     VirtIOBlock *s = to_virtio_blk(vdev);
+
+    if (s->dataplane) {
+        migrate_del_blocker(s->migration_blocker);
+        error_free(s->migration_blocker);
+        bdrv_set_in_use(s->bs, 0);
+        virtio_blk_data_plane_destroy(s->dataplane);
+        s->dataplane = NULL;
+    }
+
     unregister_savevm(s->qdev, "virtio-blk", s);
     blockdev_mark_auto_del(s->bs);
     virtio_cleanup(vdev);
diff --git a/hw/virtio-blk.h b/hw/virtio-blk.h
index f0740d0..53d7971 100644
--- a/hw/virtio-blk.h
+++ b/hw/virtio-blk.h
@@ -105,6 +105,7 @@  struct VirtIOBlkConf
     char *serial;
     uint32_t scsi;
     uint32_t config_wce;
+    uint32_t data_plane;
 };
 
 #define DEFINE_VIRTIO_BLK_FEATURES(_state, _field) \
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 9603150..401f5ea 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -862,6 +862,9 @@  static Property virtio_blk_properties[] = {
 #endif
     DEFINE_PROP_BIT("config-wce", VirtIOPCIProxy, blk.config_wce, 0, true),
     DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
+#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
+    DEFINE_PROP_BIT("x-data-plane", VirtIOPCIProxy, blk.data_plane, 0, false),
+#endif
     DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
     DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features),
     DEFINE_PROP_END_OF_LIST(),