diff mbox

[v4,11/11] virtio-blk: add x-data-plane=on|off performance feature

Message ID 1353597412-12232-12-git-send-email-stefanha@redhat.com
State New
Headers show

Commit Message

Stefan Hajnoczi Nov. 22, 2012, 3:16 p.m. UTC
The virtio-blk-data-plane feature is easy to integrate into
hw/virtio-blk.c.  The data plane can be started and stopped similar to
vhost-net.

Users can take advantage of the virtio-blk-data-plane feature using the
new -device virtio-blk-pci,x-data-plane=on property.

The x-data-plane name was chosen because at this stage the feature is
experimental and likely to see changes in the future.

If the VM configuration does not support virtio-blk-data-plane an error
message is printed.  Although we could fall back to regular virtio-blk,
I prefer the explicit approach since it prompts the user to fix their
configuration if they want the performance benefit of
virtio-blk-data-plane.

Limitations:
 * Only format=raw is supported
 * Live migration is not supported
 * Block jobs, hot unplug, and other operations fail with -EBUSY
 * I/O throttling limits are ignored
 * Only Linux hosts are supported due to Linux AIO usage

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/virtio-blk.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/virtio-blk.h |  1 +
 hw/virtio-pci.c |  3 +++
 3 files changed, 62 insertions(+), 1 deletion(-)

Comments

Michael S. Tsirkin Nov. 29, 2012, 1:12 p.m. UTC | #1
On Thu, Nov 22, 2012 at 04:16:52PM +0100, Stefan Hajnoczi wrote:
> The virtio-blk-data-plane feature is easy to integrate into
> hw/virtio-blk.c.  The data plane can be started and stopped similar to
> vhost-net.
> 
> Users can take advantage of the virtio-blk-data-plane feature using the
> new -device virtio-blk-pci,x-data-plane=on property.
> 
> The x-data-plane name was chosen because at this stage the feature is
> experimental and likely to see changes in the future.
> 
> If the VM configuration does not support virtio-blk-data-plane an error
> message is printed.  Although we could fall back to regular virtio-blk,
> I prefer the explicit approach since it prompts the user to fix their
> configuration if they want the performance benefit of
> virtio-blk-data-plane.

Not only that, this affects features exposed to guest so it really can't be
trasparent.

Which reminds me - shouldn't some features be turned off?
For example, VIRTIO_BLK_F_SCSI?

> Limitations:
>  * Only format=raw is supported
>  * Live migration is not supported

This is probably fixable long term?

>  * Block jobs, hot unplug, and other operations fail with -EBUSY

Hmm I don't see code to disable PCU unplug in this patch.
I expected no_hotplug to be set.
Where is it?

>  * I/O throttling limits are ignored

And this?
Meanwhile can we have attempts to set them fail?

>  * Only Linux hosts are supported due to Linux AIO usage
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  hw/virtio-blk.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  hw/virtio-blk.h |  1 +
>  hw/virtio-pci.c |  3 +++
>  3 files changed, 62 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
> index e25cc96..7f6004e 100644
> --- a/hw/virtio-blk.c
> +++ b/hw/virtio-blk.c
> @@ -17,6 +17,8 @@
>  #include "hw/block-common.h"
>  #include "blockdev.h"
>  #include "virtio-blk.h"
> +#include "hw/dataplane/virtio-blk.h"
> +#include "migration.h"
>  #include "scsi-defs.h"
>  #ifdef __linux__
>  # include <scsi/sg.h>
> @@ -33,6 +35,8 @@ typedef struct VirtIOBlock
>      VirtIOBlkConf *blk;
>      unsigned short sector_mask;
>      DeviceState *qdev;
> +    VirtIOBlockDataPlane *dataplane;
> +    Error *migration_blocker;

Would be nice to move the migration disabling
checking supported formats
and all the rest of it out to dataplane code.

>  } VirtIOBlock;
>  
>  static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
> @@ -407,6 +411,14 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>          .num_writes = 0,
>      };
>  
> +    /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
> +     * dataplane here instead of waiting for .set_status().
> +     */
> +    if (s->dataplane) {
> +        virtio_blk_data_plane_start(s->dataplane);
> +        return;
> +    }
> +
>      while ((req = virtio_blk_get_request(s))) {
>          virtio_blk_handle_request(req, &mrb);
>      }
> @@ -446,8 +458,13 @@ static void virtio_blk_dma_restart_cb(void *opaque, int running,
>  {
>      VirtIOBlock *s = opaque;
>  
> -    if (!running)
> +    if (!running) {
> +        /* qemu_drain_all() doesn't know about data plane, quiesce here */
> +        if (s->dataplane) {
> +            virtio_blk_data_plane_drain(s->dataplane);
> +        }
>          return;
> +    }
>  
>      if (!s->bh) {
>          s->bh = qemu_bh_new(virtio_blk_dma_restart_bh, s);
> @@ -538,6 +555,10 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
>      VirtIOBlock *s = to_virtio_blk(vdev);
>      uint32_t features;
>  
> +    if (s->dataplane && !(status & VIRTIO_CONFIG_S_DRIVER)) {
> +        virtio_blk_data_plane_stop(s->dataplane);
> +    }
> +
>      if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
>          return;
>      }
> @@ -604,6 +625,7 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
>  {
>      VirtIOBlock *s;
>      static int virtio_blk_id;
> +    int fd = -1;
>  
>      if (!blk->conf.bs) {
>          error_report("drive property not set");
> @@ -619,6 +641,21 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
>          return NULL;
>      }
>  
> +    if (blk->data_plane) {
> +        if (blk->scsi) {
> +            error_report("device is incompatible with x-data-plane, "
> +                         "use scsi=off");
> +            return NULL;
> +        }
> +
> +        fd = raw_get_aio_fd(blk->conf.bs);
> +        if (fd < 0) {
> +            error_report("drive is incompatible with x-data-plane, "
> +                         "use format=raw,cache=none,aio=native");
> +            return NULL;
> +        }
> +    }
> +
>      s = (VirtIOBlock *)virtio_common_init("virtio-blk", VIRTIO_ID_BLOCK,
>                                            sizeof(struct virtio_blk_config),
>                                            sizeof(VirtIOBlock));
> @@ -636,6 +673,17 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
>  
>      s->vq = virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output);
>  
> +    if (fd >= 0) {
> +        s->dataplane = virtio_blk_data_plane_create(&s->vdev, fd);
> +
> +        /* Prevent block operations that conflict with data plane thread */
> +        bdrv_set_in_use(s->bs, 1);
> +
> +        error_setg(&s->migration_blocker,
> +                   "x-data-plane does not support migration");
> +        migrate_add_blocker(s->migration_blocker);
> +    }
> +
>      qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
>      s->qdev = dev;
>      register_savevm(dev, "virtio-blk", virtio_blk_id++, 2,
> @@ -652,6 +700,15 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
>  void virtio_blk_exit(VirtIODevice *vdev)
>  {
>      VirtIOBlock *s = to_virtio_blk(vdev);
> +
> +    if (s->dataplane) {
> +        migrate_del_blocker(s->migration_blocker);
> +        error_free(s->migration_blocker);
> +        bdrv_set_in_use(s->bs, 0);
> +        virtio_blk_data_plane_destroy(s->dataplane);
> +        s->dataplane = NULL;
> +    }
> +
>      unregister_savevm(s->qdev, "virtio-blk", s);
>      blockdev_mark_auto_del(s->bs);
>      virtio_cleanup(vdev);
> diff --git a/hw/virtio-blk.h b/hw/virtio-blk.h
> index f0740d0..53d7971 100644
> --- a/hw/virtio-blk.h
> +++ b/hw/virtio-blk.h
> @@ -105,6 +105,7 @@ struct VirtIOBlkConf
>      char *serial;
>      uint32_t scsi;
>      uint32_t config_wce;
> +    uint32_t data_plane;
>  };
>  
>  #define DEFINE_VIRTIO_BLK_FEATURES(_state, _field) \
> diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
> index 71f4fb5..32cc910 100644
> --- a/hw/virtio-pci.c
> +++ b/hw/virtio-pci.c
> @@ -897,6 +897,9 @@ static Property virtio_blk_properties[] = {
>  #endif
>      DEFINE_PROP_BIT("config-wce", VirtIOPCIProxy, blk.config_wce, 0, true),
>      DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
> +#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
> +    DEFINE_PROP_BIT("x-data-plane", VirtIOPCIProxy, blk.data_plane, 0, false),
> +#endif
>      DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
>      DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features),
>      DEFINE_PROP_END_OF_LIST(),
> -- 
> 1.8.0
Stefan Hajnoczi Nov. 29, 2012, 2:45 p.m. UTC | #2
On Thu, Nov 29, 2012 at 03:12:35PM +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 22, 2012 at 04:16:52PM +0100, Stefan Hajnoczi wrote:
> > The virtio-blk-data-plane feature is easy to integrate into
> > hw/virtio-blk.c.  The data plane can be started and stopped similar to
> > vhost-net.
> > 
> > Users can take advantage of the virtio-blk-data-plane feature using the
> > new -device virtio-blk-pci,x-data-plane=on property.
> > 
> > The x-data-plane name was chosen because at this stage the feature is
> > experimental and likely to see changes in the future.
> > 
> > If the VM configuration does not support virtio-blk-data-plane an error
> > message is printed.  Although we could fall back to regular virtio-blk,
> > I prefer the explicit approach since it prompts the user to fix their
> > configuration if they want the performance benefit of
> > virtio-blk-data-plane.
> 
> Not only that, this affects features exposed to guest so it really can't be
> trasparent.
> 
> Which reminds me - shouldn't some features be turned off?
> For example, VIRTIO_BLK_F_SCSI?

Yes, virtio-blk-data-plane only starts when you give -device
virtio-blk-pci,scsi=off,x-data-plane=on.  If you use scsi=on an error
message is printed.

> > Limitations:
> >  * Only format=raw is supported
> >  * Live migration is not supported
> 
> This is probably fixable long term?

Absolutely.  There are two parts:

1. Marking written memory dirty so live RAM migration can work.  Missing
   today, easy cheat is to switch off virtio-blk-data-plane and silently
   switch to regular virtio-blk emulation while memory dirty logging is
   enabled.  The more long-term solution is to actually communicate the
   dirty information back to the memory API.

2. Synchronizing virtio-blk-data-plane vring state with virtio-blk so
   save/load works.  This should be relatively straightforward.

I don't want to gate this patch series on live migration support but it
is on my TODO list for virtio-blk-data-plane after this initial series
has been merged.

> >  * Block jobs, hot unplug, and other operations fail with -EBUSY
> 
> Hmm I don't see code to disable PCU unplug in this patch.
> I expected no_hotplug to be set.
> Where is it?

It uses the bdrv_in_use() mechanism.

> >  * I/O throttling limits are ignored
> 
> And this?
> Meanwhile can we have attempts to set them fail?

This limitation exists because virtio-blk-data-plane today bypasses the
QEMU block layer.  The next step is to get the block layer working
inside the data plane thread.  At that point I/O limits work again.

Adding an error would be a layering violation because I/O throttling
happens in the QEMU block layer and is unaware of the emulated storage
controller (virtio-blk, IDE, SCSI, etc).

I think it's better to document the limitation and continue working on
AioContext so that we can soon support I/O throttling with
virtio-blk-data-plane.  It would be quite ugly to add checks.

> > @@ -33,6 +35,8 @@ typedef struct VirtIOBlock
> >      VirtIOBlkConf *blk;
> >      unsigned short sector_mask;
> >      DeviceState *qdev;
> > +    VirtIOBlockDataPlane *dataplane;
> > +    Error *migration_blocker;
> 
> Would be nice to move the migration disabling
> checking supported formats
> and all the rest of it out to dataplane code.

The reason to do it in virtio-blk.c is that we already have access to
the device configuration.  If we move it to hw/dataplane/virtio-blk.c
then that code needs to reach inside and check data that it doesn't
otherwise access.

IMO it's nice to keep data plane "dumb" and perform these checks where
we already have to deal with the relationship between VirtIOBlkConf and
friends.

Stefan
Michael S. Tsirkin Nov. 29, 2012, 2:55 p.m. UTC | #3
On Thu, Nov 29, 2012 at 03:45:55PM +0100, Stefan Hajnoczi wrote:
> On Thu, Nov 29, 2012 at 03:12:35PM +0200, Michael S. Tsirkin wrote:
> > On Thu, Nov 22, 2012 at 04:16:52PM +0100, Stefan Hajnoczi wrote:
> > > The virtio-blk-data-plane feature is easy to integrate into
> > > hw/virtio-blk.c.  The data plane can be started and stopped similar to
> > > vhost-net.
> > > 
> > > Users can take advantage of the virtio-blk-data-plane feature using the
> > > new -device virtio-blk-pci,x-data-plane=on property.
> > > 
> > > The x-data-plane name was chosen because at this stage the feature is
> > > experimental and likely to see changes in the future.
> > > 
> > > If the VM configuration does not support virtio-blk-data-plane an error
> > > message is printed.  Although we could fall back to regular virtio-blk,
> > > I prefer the explicit approach since it prompts the user to fix their
> > > configuration if they want the performance benefit of
> > > virtio-blk-data-plane.
> > 
> > Not only that, this affects features exposed to guest so it really can't be
> > trasparent.
> > 
> > Which reminds me - shouldn't some features be turned off?
> > For example, VIRTIO_BLK_F_SCSI?
> 
> Yes, virtio-blk-data-plane only starts when you give -device
> virtio-blk-pci,scsi=off,x-data-plane=on.  If you use scsi=on an error
> message is printed.
> 
> > > Limitations:
> > >  * Only format=raw is supported
> > >  * Live migration is not supported
> > 
> > This is probably fixable long term?
> 
> Absolutely.  There are two parts:
> 
> 1. Marking written memory dirty so live RAM migration can work.  Missing
>    today, easy cheat is to switch off virtio-blk-data-plane and silently
>    switch to regular virtio-blk emulation while memory dirty logging is
>    enabled.  The more long-term solution is to actually communicate the
>    dirty information back to the memory API.
> 
> 2. Synchronizing virtio-blk-data-plane vring state with virtio-blk so
>    save/load works.  This should be relatively straightforward.
> 
> I don't want to gate this patch series on live migration support but it
> is on my TODO list for virtio-blk-data-plane after this initial series
> has been merged.
> 
> > >  * Block jobs, hot unplug, and other operations fail with -EBUSY
> > 
> > Hmm I don't see code to disable PCU unplug in this patch.
> > I expected no_hotplug to be set.
> > Where is it?
> 
> It uses the bdrv_in_use() mechanism.

Hmm but PCI device can still go away if
guest ejects it. Does this work fine?

> > >  * I/O throttling limits are ignored
> > 
> > And this?
> > Meanwhile can we have attempts to set them fail?
> 
> This limitation exists because virtio-blk-data-plane today bypasses the
> QEMU block layer.  The next step is to get the block layer working
> inside the data plane thread.  At that point I/O limits work again.
> 
> Adding an error would be a layering violation because I/O throttling
> happens in the QEMU block layer and is unaware of the emulated storage
> controller (virtio-blk, IDE, SCSI, etc).
> 
> I think it's better to document the limitation and continue working on
> AioContext so that we can soon support I/O throttling with
> virtio-blk-data-plane.  It would be quite ugly to add checks.
> 
> > > @@ -33,6 +35,8 @@ typedef struct VirtIOBlock
> > >      VirtIOBlkConf *blk;
> > >      unsigned short sector_mask;
> > >      DeviceState *qdev;
> > > +    VirtIOBlockDataPlane *dataplane;
> > > +    Error *migration_blocker;
> > 
> > Would be nice to move the migration disabling
> > checking supported formats
> > and all the rest of it out to dataplane code.
> 
> The reason to do it in virtio-blk.c is that we already have access to
> the device configuration.  If we move it to hw/dataplane/virtio-blk.c
> then that code needs to reach inside and check data that it doesn't
> otherwise access.

Not really, just pass it all necessary data.

> IMO it's nice to keep data plane "dumb" and perform these checks where
> we already have to deal with the relationship between VirtIOBlkConf and
> friends.
> 
> Stefan

Yes but then it's not contained.
Michael S. Tsirkin Dec. 4, 2012, 11:20 a.m. UTC | #4
On Thu, Nov 29, 2012 at 04:55:48PM +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 29, 2012 at 03:45:55PM +0100, Stefan Hajnoczi wrote:
> > On Thu, Nov 29, 2012 at 03:12:35PM +0200, Michael S. Tsirkin wrote:
> > > On Thu, Nov 22, 2012 at 04:16:52PM +0100, Stefan Hajnoczi wrote:
> > > > The virtio-blk-data-plane feature is easy to integrate into
> > > > hw/virtio-blk.c.  The data plane can be started and stopped similar to
> > > > vhost-net.
> > > > 
> > > > Users can take advantage of the virtio-blk-data-plane feature using the
> > > > new -device virtio-blk-pci,x-data-plane=on property.
> > > > 
> > > > The x-data-plane name was chosen because at this stage the feature is
> > > > experimental and likely to see changes in the future.
> > > > 
> > > > If the VM configuration does not support virtio-blk-data-plane an error
> > > > message is printed.  Although we could fall back to regular virtio-blk,
> > > > I prefer the explicit approach since it prompts the user to fix their
> > > > configuration if they want the performance benefit of
> > > > virtio-blk-data-plane.
> > > 
> > > Not only that, this affects features exposed to guest so it really can't be
> > > trasparent.
> > > 
> > > Which reminds me - shouldn't some features be turned off?
> > > For example, VIRTIO_BLK_F_SCSI?
> > 
> > Yes, virtio-blk-data-plane only starts when you give -device
> > virtio-blk-pci,scsi=off,x-data-plane=on.  If you use scsi=on an error
> > message is printed.
> > 
> > > > Limitations:
> > > >  * Only format=raw is supported
> > > >  * Live migration is not supported
> > > 
> > > This is probably fixable long term?
> > 
> > Absolutely.  There are two parts:
> > 
> > 1. Marking written memory dirty so live RAM migration can work.  Missing
> >    today, easy cheat is to switch off virtio-blk-data-plane and silently
> >    switch to regular virtio-blk emulation while memory dirty logging is
> >    enabled.  The more long-term solution is to actually communicate the
> >    dirty information back to the memory API.
> > 
> > 2. Synchronizing virtio-blk-data-plane vring state with virtio-blk so
> >    save/load works.  This should be relatively straightforward.
> > 
> > I don't want to gate this patch series on live migration support but it
> > is on my TODO list for virtio-blk-data-plane after this initial series
> > has been merged.
> > 
> > > >  * Block jobs, hot unplug, and other operations fail with -EBUSY
> > > 
> > > Hmm I don't see code to disable PCU unplug in this patch.
> > > I expected no_hotplug to be set.
> > > Where is it?
> > 
> > It uses the bdrv_in_use() mechanism.
> 
> Hmm but PCI device can still go away if
> guest ejects it. Does this work fine?

Any comment?
Stefan Hajnoczi Dec. 4, 2012, 2:19 p.m. UTC | #5
On Tue, Dec 04, 2012 at 01:20:20PM +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 29, 2012 at 04:55:48PM +0200, Michael S. Tsirkin wrote:
> > On Thu, Nov 29, 2012 at 03:45:55PM +0100, Stefan Hajnoczi wrote:
> > > On Thu, Nov 29, 2012 at 03:12:35PM +0200, Michael S. Tsirkin wrote:
> > > > On Thu, Nov 22, 2012 at 04:16:52PM +0100, Stefan Hajnoczi wrote:
> > > > > The virtio-blk-data-plane feature is easy to integrate into
> > > > > hw/virtio-blk.c.  The data plane can be started and stopped similar to
> > > > > vhost-net.
> > > > > 
> > > > > Users can take advantage of the virtio-blk-data-plane feature using the
> > > > > new -device virtio-blk-pci,x-data-plane=on property.
> > > > > 
> > > > > The x-data-plane name was chosen because at this stage the feature is
> > > > > experimental and likely to see changes in the future.
> > > > > 
> > > > > If the VM configuration does not support virtio-blk-data-plane an error
> > > > > message is printed.  Although we could fall back to regular virtio-blk,
> > > > > I prefer the explicit approach since it prompts the user to fix their
> > > > > configuration if they want the performance benefit of
> > > > > virtio-blk-data-plane.
> > > > 
> > > > Not only that, this affects features exposed to guest so it really can't be
> > > > trasparent.
> > > > 
> > > > Which reminds me - shouldn't some features be turned off?
> > > > For example, VIRTIO_BLK_F_SCSI?
> > > 
> > > Yes, virtio-blk-data-plane only starts when you give -device
> > > virtio-blk-pci,scsi=off,x-data-plane=on.  If you use scsi=on an error
> > > message is printed.
> > > 
> > > > > Limitations:
> > > > >  * Only format=raw is supported
> > > > >  * Live migration is not supported
> > > > 
> > > > This is probably fixable long term?
> > > 
> > > Absolutely.  There are two parts:
> > > 
> > > 1. Marking written memory dirty so live RAM migration can work.  Missing
> > >    today, easy cheat is to switch off virtio-blk-data-plane and silently
> > >    switch to regular virtio-blk emulation while memory dirty logging is
> > >    enabled.  The more long-term solution is to actually communicate the
> > >    dirty information back to the memory API.
> > > 
> > > 2. Synchronizing virtio-blk-data-plane vring state with virtio-blk so
> > >    save/load works.  This should be relatively straightforward.
> > > 
> > > I don't want to gate this patch series on live migration support but it
> > > is on my TODO list for virtio-blk-data-plane after this initial series
> > > has been merged.
> > > 
> > > > >  * Block jobs, hot unplug, and other operations fail with -EBUSY
> > > > 
> > > > Hmm I don't see code to disable PCU unplug in this patch.
> > > > I expected no_hotplug to be set.
> > > > Where is it?
> > > 
> > > It uses the bdrv_in_use() mechanism.
> > 
> > Hmm but PCI device can still go away if
> > guest ejects it. Does this work fine?
> 
> Any comment?

Sorry for the delay.

virtio_blk_exit() is called when the device is freed.  The code destroys
the data plane thread - this includes draining requests and then
terminating the thread.

I tested with pci_del so the guest is cooperating but virtio_blk_exit()
does not assume that the data plane thread is already stopped.

Is this what you were asking?

Stefan
diff mbox

Patch

diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index e25cc96..7f6004e 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -17,6 +17,8 @@ 
 #include "hw/block-common.h"
 #include "blockdev.h"
 #include "virtio-blk.h"
+#include "hw/dataplane/virtio-blk.h"
+#include "migration.h"
 #include "scsi-defs.h"
 #ifdef __linux__
 # include <scsi/sg.h>
@@ -33,6 +35,8 @@  typedef struct VirtIOBlock
     VirtIOBlkConf *blk;
     unsigned short sector_mask;
     DeviceState *qdev;
+    VirtIOBlockDataPlane *dataplane;
+    Error *migration_blocker;
 } VirtIOBlock;
 
 static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
@@ -407,6 +411,14 @@  static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
         .num_writes = 0,
     };
 
+    /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
+     * dataplane here instead of waiting for .set_status().
+     */
+    if (s->dataplane) {
+        virtio_blk_data_plane_start(s->dataplane);
+        return;
+    }
+
     while ((req = virtio_blk_get_request(s))) {
         virtio_blk_handle_request(req, &mrb);
     }
@@ -446,8 +458,13 @@  static void virtio_blk_dma_restart_cb(void *opaque, int running,
 {
     VirtIOBlock *s = opaque;
 
-    if (!running)
+    if (!running) {
+        /* qemu_drain_all() doesn't know about data plane, quiesce here */
+        if (s->dataplane) {
+            virtio_blk_data_plane_drain(s->dataplane);
+        }
         return;
+    }
 
     if (!s->bh) {
         s->bh = qemu_bh_new(virtio_blk_dma_restart_bh, s);
@@ -538,6 +555,10 @@  static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
     VirtIOBlock *s = to_virtio_blk(vdev);
     uint32_t features;
 
+    if (s->dataplane && !(status & VIRTIO_CONFIG_S_DRIVER)) {
+        virtio_blk_data_plane_stop(s->dataplane);
+    }
+
     if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
         return;
     }
@@ -604,6 +625,7 @@  VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
 {
     VirtIOBlock *s;
     static int virtio_blk_id;
+    int fd = -1;
 
     if (!blk->conf.bs) {
         error_report("drive property not set");
@@ -619,6 +641,21 @@  VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
         return NULL;
     }
 
+    if (blk->data_plane) {
+        if (blk->scsi) {
+            error_report("device is incompatible with x-data-plane, "
+                         "use scsi=off");
+            return NULL;
+        }
+
+        fd = raw_get_aio_fd(blk->conf.bs);
+        if (fd < 0) {
+            error_report("drive is incompatible with x-data-plane, "
+                         "use format=raw,cache=none,aio=native");
+            return NULL;
+        }
+    }
+
     s = (VirtIOBlock *)virtio_common_init("virtio-blk", VIRTIO_ID_BLOCK,
                                           sizeof(struct virtio_blk_config),
                                           sizeof(VirtIOBlock));
@@ -636,6 +673,17 @@  VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
 
     s->vq = virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output);
 
+    if (fd >= 0) {
+        s->dataplane = virtio_blk_data_plane_create(&s->vdev, fd);
+
+        /* Prevent block operations that conflict with data plane thread */
+        bdrv_set_in_use(s->bs, 1);
+
+        error_setg(&s->migration_blocker,
+                   "x-data-plane does not support migration");
+        migrate_add_blocker(s->migration_blocker);
+    }
+
     qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
     s->qdev = dev;
     register_savevm(dev, "virtio-blk", virtio_blk_id++, 2,
@@ -652,6 +700,15 @@  VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
 void virtio_blk_exit(VirtIODevice *vdev)
 {
     VirtIOBlock *s = to_virtio_blk(vdev);
+
+    if (s->dataplane) {
+        migrate_del_blocker(s->migration_blocker);
+        error_free(s->migration_blocker);
+        bdrv_set_in_use(s->bs, 0);
+        virtio_blk_data_plane_destroy(s->dataplane);
+        s->dataplane = NULL;
+    }
+
     unregister_savevm(s->qdev, "virtio-blk", s);
     blockdev_mark_auto_del(s->bs);
     virtio_cleanup(vdev);
diff --git a/hw/virtio-blk.h b/hw/virtio-blk.h
index f0740d0..53d7971 100644
--- a/hw/virtio-blk.h
+++ b/hw/virtio-blk.h
@@ -105,6 +105,7 @@  struct VirtIOBlkConf
     char *serial;
     uint32_t scsi;
     uint32_t config_wce;
+    uint32_t data_plane;
 };
 
 #define DEFINE_VIRTIO_BLK_FEATURES(_state, _field) \
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 71f4fb5..32cc910 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -897,6 +897,9 @@  static Property virtio_blk_properties[] = {
 #endif
     DEFINE_PROP_BIT("config-wce", VirtIOPCIProxy, blk.config_wce, 0, true),
     DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
+#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
+    DEFINE_PROP_BIT("x-data-plane", VirtIOPCIProxy, blk.data_plane, 0, false),
+#endif
     DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
     DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features),
     DEFINE_PROP_END_OF_LIST(),