Patchwork [STABLE] Fix virtio-blk hot add after remove

login
register
mail settings
Submitter Anthony Liguori
Date Oct. 14, 2009, 4:59 p.m.
Message ID <1255539554-7956-1-git-send-email-aliguori@us.ibm.com>
Download mbox | patch
Permalink /patch/35996/
State New
Headers show

Comments

Anthony Liguori - Oct. 14, 2009, 4:59 p.m.
qdev_init_bdrv() expects that each drive added is the next logical unit for
the given interface type.  However, when dealing with hotplug, there may
be holes in the units.  drive_init reclaims holes in units but qdev_init_bdrv()
is not smart enough to do this.

Fortunately, in master, this has all been rewritten so for stable, let's hack
around this a bit.  To fix this, we need to tell qdev that a device has been
removed so that it can keep track of which units are allocated.  The only way
we can get a hook for this though is to attach to the pci uninit callback.  In
order for virtio-blk to get this, another uninit callback needs to be added to
the virtio layer.

Suggestions for a less ugly solution are appreciated.

This fixes https://bugs.launchpad.net/bugs/419590

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
---
 hw/qdev.c       |   33 ++++++++++++++++++++++++++++++---
 hw/virtio-blk.c |    8 ++++++++
 hw/virtio-pci.c |   19 ++++++++++++++++++-
 hw/virtio.h     |    1 +
 sysemu.h        |    2 ++
 5 files changed, 59 insertions(+), 4 deletions(-)
Dustin Kirkland - Oct. 14, 2009, 6:22 p.m.
On Wed, 2009-10-14 at 11:59 -0500, Anthony Liguori wrote:
> qdev_init_bdrv() expects that each drive added is the next logical unit for
> the given interface type.  However, when dealing with hotplug, there may
> be holes in the units.  drive_init reclaims holes in units but qdev_init_bdrv()
> is not smart enough to do this.
> 
> Fortunately, in master, this has all been rewritten so for stable, let's hack
> around this a bit.  To fix this, we need to tell qdev that a device has been
> removed so that it can keep track of which units are allocated.  The only way
> we can get a hook for this though is to attach to the pci uninit callback.  In
> order for virtio-blk to get this, another uninit callback needs to be added to
> the virtio layer.
> 
> Suggestions for a less ugly solution are appreciated.
> 
> This fixes https://bugs.launchpad.net/bugs/419590
> 
> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
> ---
>  hw/qdev.c       |   33 ++++++++++++++++++++++++++++++---
>  hw/virtio-blk.c |    8 ++++++++
>  hw/virtio-pci.c |   19 ++++++++++++++++++-
>  hw/virtio.h     |    1 +
>  sysemu.h        |    2 ++
>  5 files changed, 59 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/qdev.c b/hw/qdev.c
> index faecc76..af60e3e 100644
> --- a/hw/qdev.c
> +++ b/hw/qdev.c
> @@ -181,21 +181,48 @@ void qdev_get_macaddr(DeviceState *dev, uint8_t *macaddr)
>      memcpy(macaddr, dev->nd->macaddr, 6);
>  }
>  
> -static int next_block_unit[IF_COUNT];
> +typedef struct BlockUnitState
> +{
> +    BlockDriverState *bs[32];
> +} BlockUnitState;
> +static BlockUnitState next_block_unit[IF_COUNT];
>  
>  /* Get a block device.  This should only be used for single-drive devices
>     (e.g. SD/Floppy/MTD).  Multi-disk devices (scsi/ide) should use the
>     appropriate bus.  */
>  BlockDriverState *qdev_init_bdrv(DeviceState *dev, BlockInterfaceType type)
>  {
> -    int unit = next_block_unit[type]++;
> +    BlockDriverState *bs;
> +    int unit;
>      int index;
>  
> +    unit = 0;
> +    while (next_block_unit[type].bs[unit]) {
> +        unit++;
> +    }
> +
>      index = drive_get_index(type, 0, unit);
>      if (index == -1) {
>          return NULL;
>      }
> -    return drives_table[index].bdrv;
> +
> +    bs = drives_table[index].bdrv;
> +    next_block_unit[type].bs[unit] = bs;
> +
> +    return bs;
> +}
> +
> +void qdev_uninit_bdrv(BlockDriverState *bs, BlockInterfaceType type)
> +{
> +    int unit;
> +
> +    for (unit = 0; unit < 32; unit++) {
> +        if (next_block_unit[type].bs[unit] == bs) {
> +            break;
> +        }
> +    }
> +
> +    next_block_unit[type].bs[unit] = NULL;
>  }
>  
>  BusState *qdev_get_child_bus(DeviceState *dev, const char *name)
> diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
> index 5036b5b..4bc11e6 100644
> --- a/hw/virtio-blk.c
> +++ b/hw/virtio-blk.c
> @@ -414,6 +414,13 @@ static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id)
>      return 0;
>  }
>  
> +static void virtio_blk_uninit(VirtIODevice *vdev)
> +{
> +    VirtIOBlock *s = to_virtio_blk(vdev);
> +
> +    qdev_uninit_bdrv(s->bs, IF_VIRTIO);
> +}
> +
>  VirtIODevice *virtio_blk_init(DeviceState *dev)
>  {
>      VirtIOBlock *s;
> @@ -430,6 +437,7 @@ VirtIODevice *virtio_blk_init(DeviceState *dev)
>      s->vdev.get_config = virtio_blk_update_config;
>      s->vdev.get_features = virtio_blk_get_features;
>      s->vdev.reset = virtio_blk_reset;
> +    s->vdev.uninit = virtio_blk_uninit;
>      s->bs = bs;
>      s->rq = NULL;
>      if (strlen(ps = (char *)drive_get_serial(bs)))
> diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
> index 703f4fe..00b3998 100644
> --- a/hw/virtio-pci.c
> +++ b/hw/virtio-pci.c
> @@ -375,6 +375,22 @@ static const VirtIOBindings virtio_pci_bindings = {
>      .load_queue = virtio_pci_load_queue,
>  };
>  
> +static int virtio_pci_uninit(PCIDevice *pci_dev)
> +{
> +    VirtIOPCIProxy *proxy = container_of(pci_dev, VirtIOPCIProxy, pci_dev);
> +    VirtIODevice *vdev = proxy->vdev;
> +
> +    if (vdev->uninit) {
> +        vdev->uninit(vdev);
> +    }
> +
> +    if (vdev->nvectors) {
> +        return msix_uninit(pci_dev);
> +    }
> +
> +    return 0;
> +}
> +
>  static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
>                              uint16_t vendor, uint16_t device,
>                              uint16_t class_code, uint8_t pif)
> @@ -407,10 +423,11 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
>                           PCI_ADDRESS_SPACE_MEM,
>                           msix_mmio_map);
>          proxy->pci_dev.config_write = virtio_write_config;
> -        proxy->pci_dev.unregister = msix_uninit;
>      } else
>          vdev->nvectors = 0;
>  
> +    proxy->pci_dev.unregister = virtio_pci_uninit;
> +
>      size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev) + vdev->config_len;
>      if (size & (size-1))
>          size = 1 << qemu_fls(size);
> diff --git a/hw/virtio.h b/hw/virtio.h
> index aa55677..330b317 100644
> --- a/hw/virtio.h
> +++ b/hw/virtio.h
> @@ -103,6 +103,7 @@ struct VirtIODevice
>      void (*get_config)(VirtIODevice *vdev, uint8_t *config);
>      void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
>      void (*reset)(VirtIODevice *vdev);
> +    void (*uninit)(VirtIODevice *vdev);
>      VirtQueue *vq;
>      const VirtIOBindings *binding;
>      void *binding_opaque;
> diff --git a/sysemu.h b/sysemu.h
> index ce25109..18f610b 100644
> --- a/sysemu.h
> +++ b/sysemu.h
> @@ -193,6 +193,8 @@ extern const char *drive_get_serial(BlockDriverState *bdrv);
>  extern BlockInterfaceErrorAction drive_get_onerror(BlockDriverState *bdrv);
>  
>  BlockDriverState *qdev_init_bdrv(DeviceState *dev, BlockInterfaceType type);
> +void qdev_uninit_bdrv(BlockDriverState *bs, BlockInterfaceType type);
> +
>  
>  struct drive_opt {
>      const char *file;

Hi Anthony-

Thanks for the patch.  I've tested this against qemu-kvm-0.11.0 and it's
working well.  I can add/remove/add/remove virtio disks without
segfaulting qemu.  We're going to carry this patch in Ubuntu.

Tested-by: Dustin Kirkland <kirkland@canonical.com>

Patch

diff --git a/hw/qdev.c b/hw/qdev.c
index faecc76..af60e3e 100644
--- a/hw/qdev.c
+++ b/hw/qdev.c
@@ -181,21 +181,48 @@  void qdev_get_macaddr(DeviceState *dev, uint8_t *macaddr)
     memcpy(macaddr, dev->nd->macaddr, 6);
 }
 
-static int next_block_unit[IF_COUNT];
+typedef struct BlockUnitState
+{
+    BlockDriverState *bs[32];
+} BlockUnitState;
+static BlockUnitState next_block_unit[IF_COUNT];
 
 /* Get a block device.  This should only be used for single-drive devices
    (e.g. SD/Floppy/MTD).  Multi-disk devices (scsi/ide) should use the
    appropriate bus.  */
 BlockDriverState *qdev_init_bdrv(DeviceState *dev, BlockInterfaceType type)
 {
-    int unit = next_block_unit[type]++;
+    BlockDriverState *bs;
+    int unit;
     int index;
 
+    unit = 0;
+    while (next_block_unit[type].bs[unit]) {
+        unit++;
+    }
+
     index = drive_get_index(type, 0, unit);
     if (index == -1) {
         return NULL;
     }
-    return drives_table[index].bdrv;
+
+    bs = drives_table[index].bdrv;
+    next_block_unit[type].bs[unit] = bs;
+
+    return bs;
+}
+
+void qdev_uninit_bdrv(BlockDriverState *bs, BlockInterfaceType type)
+{
+    int unit;
+
+    for (unit = 0; unit < 32; unit++) {
+        if (next_block_unit[type].bs[unit] == bs) {
+            break;
+        }
+    }
+
+    next_block_unit[type].bs[unit] = NULL;
 }
 
 BusState *qdev_get_child_bus(DeviceState *dev, const char *name)
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index 5036b5b..4bc11e6 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -414,6 +414,13 @@  static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id)
     return 0;
 }
 
+static void virtio_blk_uninit(VirtIODevice *vdev)
+{
+    VirtIOBlock *s = to_virtio_blk(vdev);
+
+    qdev_uninit_bdrv(s->bs, IF_VIRTIO);
+}
+
 VirtIODevice *virtio_blk_init(DeviceState *dev)
 {
     VirtIOBlock *s;
@@ -430,6 +437,7 @@  VirtIODevice *virtio_blk_init(DeviceState *dev)
     s->vdev.get_config = virtio_blk_update_config;
     s->vdev.get_features = virtio_blk_get_features;
     s->vdev.reset = virtio_blk_reset;
+    s->vdev.uninit = virtio_blk_uninit;
     s->bs = bs;
     s->rq = NULL;
     if (strlen(ps = (char *)drive_get_serial(bs)))
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 703f4fe..00b3998 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -375,6 +375,22 @@  static const VirtIOBindings virtio_pci_bindings = {
     .load_queue = virtio_pci_load_queue,
 };
 
+static int virtio_pci_uninit(PCIDevice *pci_dev)
+{
+    VirtIOPCIProxy *proxy = container_of(pci_dev, VirtIOPCIProxy, pci_dev);
+    VirtIODevice *vdev = proxy->vdev;
+
+    if (vdev->uninit) {
+        vdev->uninit(vdev);
+    }
+
+    if (vdev->nvectors) {
+        return msix_uninit(pci_dev);
+    }
+
+    return 0;
+}
+
 static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
                             uint16_t vendor, uint16_t device,
                             uint16_t class_code, uint8_t pif)
@@ -407,10 +423,11 @@  static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
                          PCI_ADDRESS_SPACE_MEM,
                          msix_mmio_map);
         proxy->pci_dev.config_write = virtio_write_config;
-        proxy->pci_dev.unregister = msix_uninit;
     } else
         vdev->nvectors = 0;
 
+    proxy->pci_dev.unregister = virtio_pci_uninit;
+
     size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev) + vdev->config_len;
     if (size & (size-1))
         size = 1 << qemu_fls(size);
diff --git a/hw/virtio.h b/hw/virtio.h
index aa55677..330b317 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -103,6 +103,7 @@  struct VirtIODevice
     void (*get_config)(VirtIODevice *vdev, uint8_t *config);
     void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
     void (*reset)(VirtIODevice *vdev);
+    void (*uninit)(VirtIODevice *vdev);
     VirtQueue *vq;
     const VirtIOBindings *binding;
     void *binding_opaque;
diff --git a/sysemu.h b/sysemu.h
index ce25109..18f610b 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -193,6 +193,8 @@  extern const char *drive_get_serial(BlockDriverState *bdrv);
 extern BlockInterfaceErrorAction drive_get_onerror(BlockDriverState *bdrv);
 
 BlockDriverState *qdev_init_bdrv(DeviceState *dev, BlockInterfaceType type);
+void qdev_uninit_bdrv(BlockDriverState *bs, BlockInterfaceType type);
+
 
 struct drive_opt {
     const char *file;