Patchwork msix: Drop tracking of used vectors

login
register
mail settings
Submitter Jan Kiszka
Date July 5, 2012, 9:42 a.m.
Message ID <4FF56176.5000100@siemens.com>
Download mbox | patch
Permalink /patch/169106/
State New
Headers show

Comments

Jan Kiszka - July 5, 2012, 9:42 a.m.
This optimization was once used in qemu-kvm to keep KVM route usage low.
But now we solved that problem via lazy updates. It also tried to handle
the case of vectors shared between different sources of the same device.
However, this never really worked and will have to be addressed
differently anyway. So drop this obsolete interface.

We still need interfaces to clear pending vectors though. Provide
msix_clear_vector and msix_clear_all_vectors for this.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 hw/ivshmem.c    |   20 -------------------
 hw/msix.c       |   57 ++++++++++++------------------------------------------
 hw/msix.h       |    5 +--
 hw/pci.h        |    2 -
 hw/virtio-pci.c |   20 +++++++-----------
 5 files changed, 23 insertions(+), 81 deletions(-)
Michael S. Tsirkin - July 5, 2012, 1:17 p.m.
On Thu, Jul 05, 2012 at 11:42:14AM +0200, Jan Kiszka wrote:
> This optimization was once used in qemu-kvm to keep KVM route usage low.
> But now we solved that problem via lazy updates.

What if we are using vhost which AFAIK can't use the lazy path?

> It also tried to handle
> the case of vectors shared between different sources of the same device.
> However, this never really worked

Interesting. Guests actually have support and it seems to work for me.
What's the issue?

> and will have to be addressed
> differently anyway.

what's the plan to address this?

> So drop this obsolete interface.
> 
> We still need interfaces to clear pending vectors though. Provide
> msix_clear_vector and msix_clear_all_vectors for this.
> 
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>

Looks like if guest tries to use too many vectors it will have to switch
to userspace virtio where previously we would report failure to guest
and it would configure less vectors, which seems better.

I'd like to understand whether this patch is a pre-requisite for
anything or just a cleanup?


> ---
>  hw/ivshmem.c    |   20 -------------------
>  hw/msix.c       |   57 ++++++++++++------------------------------------------
>  hw/msix.h       |    5 +--
>  hw/pci.h        |    2 -
>  hw/virtio-pci.c |   20 +++++++-----------
>  5 files changed, 23 insertions(+), 81 deletions(-)
> 
> diff --git a/hw/ivshmem.c b/hw/ivshmem.c
> index 7d4123c..9afc4a2 100644
> --- a/hw/ivshmem.c
> +++ b/hw/ivshmem.c
> @@ -508,28 +508,11 @@ static void ivshmem_read(void *opaque, const uint8_t * buf, int flags)
>      return;
>  }
>  
> -/* Select the MSI-X vectors used by device.
> - * ivshmem maps events to vectors statically, so
> - * we just enable all vectors on init and after reset. */
> -static void ivshmem_use_msix(IVShmemState * s)
> -{
> -    int i;
> -
> -    if (!msix_present(&s->dev)) {
> -        return;
> -    }
> -
> -    for (i = 0; i < s->vectors; i++) {
> -        msix_vector_use(&s->dev, i);
> -    }
> -}
> -
>  static void ivshmem_reset(DeviceState *d)
>  {
>      IVShmemState *s = DO_UPCAST(IVShmemState, dev.qdev, d);
>  
>      s->intrstatus = 0;
> -    ivshmem_use_msix(s);
>      return;
>  }
>  
> @@ -571,8 +554,6 @@ static void ivshmem_setup_msi(IVShmemState * s)
>  
>      /* allocate QEMU char devices for receiving interrupts */
>      s->eventfd_table = g_malloc0(s->vectors * sizeof(EventfdEntry));
> -
> -    ivshmem_use_msix(s);
>  }
>  
>  static void ivshmem_save(QEMUFile* f, void *opaque)
> @@ -614,7 +595,6 @@ static int ivshmem_load(QEMUFile* f, void *opaque, int version_id)
>  
>      if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) {
>          msix_load(&proxy->dev, f);
> -	ivshmem_use_msix(proxy);
>      } else {
>          proxy->intrstatus = qemu_get_be32(f);
>          proxy->intrmask = qemu_get_be32(f);
> diff --git a/hw/msix.c b/hw/msix.c
> index fd9ea95..e38927c 100644
> --- a/hw/msix.c
> +++ b/hw/msix.c
> @@ -260,7 +260,6 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries,
>  
>      dev->msix_table = g_malloc0(table_size);
>      dev->msix_pba = g_malloc0(pba_size);
> -    dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used);
>  
>      msix_mask_all(dev, nentries);
>  
> @@ -317,16 +316,6 @@ int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
>      return 0;
>  }
>  
> -static void msix_free_irq_entries(PCIDevice *dev)
> -{
> -    int vector;
> -
> -    for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
> -        dev->msix_entry_used[vector] = 0;
> -        msix_clr_pending(dev, vector);
> -    }
> -}
> -
>  /* Clean up resources for the device. */
>  void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
>  {
> @@ -335,7 +324,6 @@ void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
>      }
>      pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
>      dev->msix_cap = 0;
> -    msix_free_irq_entries(dev);
>      dev->msix_entries_nr = 0;
>      memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio);
>      memory_region_destroy(&dev->msix_pba_mmio);
> @@ -345,8 +333,6 @@ void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
>      memory_region_destroy(&dev->msix_table_mmio);
>      g_free(dev->msix_table);
>      dev->msix_table = NULL;
> -    g_free(dev->msix_entry_used);
> -    dev->msix_entry_used = NULL;
>      dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
>      return;
>  }
> @@ -381,7 +367,6 @@ void msix_load(PCIDevice *dev, QEMUFile *f)
>          return;
>      }
>  
> -    msix_free_irq_entries(dev);
>      qemu_get_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
>      qemu_get_buffer(f, dev->msix_pba, (n + 7) / 8);
>      msix_update_function_masked(dev);
> @@ -410,8 +395,9 @@ void msix_notify(PCIDevice *dev, unsigned vector)
>  {
>      MSIMessage msg;
>  
> -    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
> +    if (vector >= dev->msix_entries_nr) {
>          return;
> +    }
>      if (msix_is_masked(dev, vector)) {
>          msix_set_pending(dev, vector);
>          return;
> @@ -427,7 +413,7 @@ void msix_reset(PCIDevice *dev)
>      if (!msix_present(dev)) {
>          return;
>      }
> -    msix_free_irq_entries(dev);
> +    msix_clear_all_vectors(dev);
>      dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &=
>  	    ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET];
>      memset(dev->msix_table, 0, dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE);
> @@ -435,41 +421,24 @@ void msix_reset(PCIDevice *dev)
>      msix_mask_all(dev, dev->msix_entries_nr);
>  }
>  
> -/* PCI spec suggests that devices make it possible for software to configure
> - * less vectors than supported by the device, but does not specify a standard
> - * mechanism for devices to do so.
> - *
> - * We support this by asking devices to declare vectors software is going to
> - * actually use, and checking this on the notification path. Devices that
> - * don't want to follow the spec suggestion can declare all vectors as used. */
> -
> -/* Mark vector as used. */
> -int msix_vector_use(PCIDevice *dev, unsigned vector)
> +/* Clear pending vector. */
> +void msix_clear_vector(PCIDevice *dev, unsigned vector)
>  {
> -    if (vector >= dev->msix_entries_nr)
> -        return -EINVAL;
> -    dev->msix_entry_used[vector]++;
> -    return 0;
> -}
> -
> -/* Mark vector as unused. */
> -void msix_vector_unuse(PCIDevice *dev, unsigned vector)
> -{
> -    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) {
> -        return;
> -    }
> -    if (--dev->msix_entry_used[vector]) {
> -        return;
> +    if (msix_present(dev) && vector < dev->msix_entries_nr) {
> +        msix_clr_pending(dev, vector);
>      }
> -    msix_clr_pending(dev, vector);
>  }
>  
> -void msix_unuse_all_vectors(PCIDevice *dev)
> +void msix_clear_all_vectors(PCIDevice *dev)
>  {
> +    unsigned int vector;
> +
>      if (!msix_present(dev)) {
>          return;
>      }
> -    msix_free_irq_entries(dev);
> +    for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
> +        msix_clr_pending(dev, vector);
> +    }
>  }
>  
>  unsigned int msix_nr_vectors_allocated(const PCIDevice *dev)
> diff --git a/hw/msix.h b/hw/msix.h
> index 1786e27..f235e32 100644
> --- a/hw/msix.h
> +++ b/hw/msix.h
> @@ -25,9 +25,8 @@ void msix_load(PCIDevice *dev, QEMUFile *f);
>  int msix_enabled(PCIDevice *dev);
>  int msix_present(PCIDevice *dev);
>  
> -int msix_vector_use(PCIDevice *dev, unsigned vector);
> -void msix_vector_unuse(PCIDevice *dev, unsigned vector);
> -void msix_unuse_all_vectors(PCIDevice *dev);
> +void msix_clear_vector(PCIDevice *dev, unsigned vector);
> +void msix_clear_all_vectors(PCIDevice *dev);
>  
>  void msix_notify(PCIDevice *dev, unsigned vector);
>  
> diff --git a/hw/pci.h b/hw/pci.h
> index 6c80442..e4a2603 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -238,8 +238,6 @@ struct PCIDevice {
>      /* Memory Regions for MSIX table and pending bit entries. */
>      MemoryRegion msix_table_mmio;
>      MemoryRegion msix_pba_mmio;
> -    /* Reference-count for entries actually in use by driver. */
> -    unsigned *msix_entry_used;
>      /* MSIX function mask set or MSIX disabled */
>      bool msix_function_masked;
>      /* Version id needed for VMState */
> diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
> index 6ed21b7..34a936e 100644
> --- a/hw/virtio-pci.c
> +++ b/hw/virtio-pci.c
> @@ -137,9 +137,6 @@ static int virtio_pci_load_config(void * opaque, QEMUFile *f)
>      } else {
>          proxy->vdev->config_vector = VIRTIO_NO_VECTOR;
>      }
> -    if (proxy->vdev->config_vector != VIRTIO_NO_VECTOR) {
> -        return msix_vector_use(&proxy->pci_dev, proxy->vdev->config_vector);
> -    }
>      return 0;
>  }
>  
> @@ -153,9 +150,6 @@ static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f)
>          vector = VIRTIO_NO_VECTOR;
>      }
>      virtio_queue_set_vector(proxy->vdev, n, vector);
> -    if (vector != VIRTIO_NO_VECTOR) {
> -        return msix_vector_use(&proxy->pci_dev, vector);
> -    }
>      return 0;
>  }
>  
> @@ -300,7 +294,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
>          if (pa == 0) {
>              virtio_pci_stop_ioeventfd(proxy);
>              virtio_reset(proxy->vdev);
> -            msix_unuse_all_vectors(&proxy->pci_dev);
> +            msix_clear_all_vectors(&proxy->pci_dev);
>          }
>          else
>              virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
> @@ -327,7 +321,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
>  
>          if (vdev->status == 0) {
>              virtio_reset(proxy->vdev);
> -            msix_unuse_all_vectors(&proxy->pci_dev);
> +            msix_clear_all_vectors(&proxy->pci_dev);
>          }
>  
>          /* Linux before 2.6.34 sets the device as OK without enabling
> @@ -339,18 +333,20 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
>          }
>          break;
>      case VIRTIO_MSI_CONFIG_VECTOR:
> -        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
> +        msix_clear_vector(&proxy->pci_dev, vdev->config_vector);
>          /* Make it possible for guest to discover an error took place. */
> -        if (msix_vector_use(&proxy->pci_dev, val) < 0)
> +        if (val >= vdev->nvectors) {
>              val = VIRTIO_NO_VECTOR;
> +        }
>          vdev->config_vector = val;
>          break;
>      case VIRTIO_MSI_QUEUE_VECTOR:
> -        msix_vector_unuse(&proxy->pci_dev,
> +        msix_clear_vector(&proxy->pci_dev,
>                            virtio_queue_vector(vdev, vdev->queue_sel));
>          /* Make it possible for guest to discover an error took place. */
> -        if (msix_vector_use(&proxy->pci_dev, val) < 0)
> +        if (val >= vdev->nvectors) {
>              val = VIRTIO_NO_VECTOR;
> +        }
>          virtio_queue_set_vector(vdev, vdev->queue_sel, val);
>          break;
>      default:
> -- 
> 1.7.3.4
Jan Kiszka - July 5, 2012, 2:27 p.m.
On 2012-07-05 15:17, Michael S. Tsirkin wrote:
> On Thu, Jul 05, 2012 at 11:42:14AM +0200, Jan Kiszka wrote:
>> This optimization was once used in qemu-kvm to keep KVM route usage low.
>> But now we solved that problem via lazy updates.
> 
> What if we are using vhost which AFAIK can't use the lazy path?

It uses static vIRQs with irqfd, just as before.

> 
>> It also tried to handle
>> the case of vectors shared between different sources of the same device.
>> However, this never really worked
> 
> Interesting. Guests actually have support and it seems to work for me.
> What's the issue?

The state of shared vectors is not tracked. Consider a device has two
sources for the same vector and one source which raised the "line"
before is deregistered, then we will leave the vector set.

What we rather need is to track the sharing at device level and report
the resulting state to the MSI-X layer. That's what the reduced API will
allow. I don't think that vector sharing belongs to the generic layer.

> 
>> and will have to be addressed
>> differently anyway.
> 
> what's the plan to address this?

As said above: Extend virtio to track their queue states, let it
calculate the vector state and report that to the generic layer.

If we really once identify another device model - besides virtio - that
does vector sharing, we can still try to model an optional extension to
the MSI-X layer for such devices. But the majority of device models
should not be bothered with such special cases.

> 
>> So drop this obsolete interface.
>>
>> We still need interfaces to clear pending vectors though. Provide
>> msix_clear_vector and msix_clear_all_vectors for this.
>>
>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> 
> Looks like if guest tries to use too many vectors it will have to switch
> to userspace virtio where previously we would report failure to guest
> and it would configure less vectors, which seems better.

There is nothing in the current approach that tracks the availability of
vector. That's because it depends on the device if it considers a vector
used when a single event source grabbed it or if it is able to share it
internally.

> 
> I'd like to understand whether this patch is a pre-requisite for
> anything or just a cleanup?

It avoids spreading of redundant msix_vector_use/unuse to more devices
(next ones will be VFIO and kvm device assignment).

Jan
Michael S. Tsirkin - July 5, 2012, 8:47 p.m.
On Thu, Jul 05, 2012 at 04:27:33PM +0200, Jan Kiszka wrote:
> On 2012-07-05 15:17, Michael S. Tsirkin wrote:
> > On Thu, Jul 05, 2012 at 11:42:14AM +0200, Jan Kiszka wrote:
> >> This optimization was once used in qemu-kvm to keep KVM route usage low.
> >> But now we solved that problem via lazy updates.
> > 
> > What if we are using vhost which AFAIK can't use the lazy path?
> 
> It uses static vIRQs with irqfd, just as before.

Exactly. And the API is helpful in that case.

> > 
> >> It also tried to handle
> >> the case of vectors shared between different sources of the same device.
> >> However, this never really worked
> > 
> > Interesting. Guests actually have support and it seems to work for me.
> > What's the issue?
> 
> The state of shared vectors is not tracked. Consider a device has two
> sources for the same vector and one source which raised the "line"
> before is deregistered, then we will leave the vector set.

In virtio vectors are registered/deregistered during setup
(before driver-ok), so this does not happen.
Further, even in theory this might not be an issue since anyone sharing
interrupts needs to deal with spurious events anyway.

So is or is there not a bug in virtio when it shares vectors?

> What we rather need is to track the sharing at device level and report
> the resulting state to the MSI-X layer. That's what the reduced API will
> allow. I don't think that vector sharing belongs to the generic layer.
> > 
> >> and will have to be addressed
> >> differently anyway.
> > 
> > what's the plan to address this?
> 
> As said above: Extend virtio to track their queue states, let it
> calculate the vector state and report that to the generic layer.
> 
> If we really once identify another device model - besides virtio - that
> does vector sharing, we can still try to model an optional extension to
> the MSI-X layer for such devices. But the majority of device models
> should not be bothered with such special cases.
> 
> > 
> >> So drop this obsolete interface.
> >>
> >> We still need interfaces to clear pending vectors though. Provide
> >> msix_clear_vector and msix_clear_all_vectors for this.
> >>
> >> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> > 
> > Looks like if guest tries to use too many vectors it will have to switch
> > to userspace virtio where previously we would report failure to guest
> > and it would configure less vectors, which seems better.
> 
> There is nothing in the current approach that tracks the availability of
> vector. That's because it depends on the device if it considers a vector
> used when a single event source grabbed it or if it is able to share it
> internally.

So I'd like to see some code that shows how we avoid the regression in the
above scenario, before we drop the API.

> > 
> > I'd like to understand whether this patch is a pre-requisite for
> > anything or just a cleanup?
> 
> It avoids spreading of redundant msix_vector_use/unuse to more devices
> (next ones will be VFIO and kvm device assignment).
> 
> Jan

Another less intrusive way is to add a flag that say
"device always uses all vectors".

> -- 
> Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
> Corporate Competence Center Embedded Linux
Jan Kiszka - July 5, 2012, 10:56 p.m.
On 2012-07-05 22:47, Michael S. Tsirkin wrote:
> On Thu, Jul 05, 2012 at 04:27:33PM +0200, Jan Kiszka wrote:
>> On 2012-07-05 15:17, Michael S. Tsirkin wrote:
>>> On Thu, Jul 05, 2012 at 11:42:14AM +0200, Jan Kiszka wrote:
>>>> This optimization was once used in qemu-kvm to keep KVM route usage low.
>>>> But now we solved that problem via lazy updates.
>>>
>>> What if we are using vhost which AFAIK can't use the lazy path?
>>
>> It uses static vIRQs with irqfd, just as before.
> 
> Exactly. And the API is helpful in that case.

Look at the code: this API serves no practical purpose anymore, not even
for the irqfd scenario. I'm not touching the irqfd code while removing
msix_entry_used.

> 
>>>
>>>> It also tried to handle
>>>> the case of vectors shared between different sources of the same device.
>>>> However, this never really worked
>>>
>>> Interesting. Guests actually have support and it seems to work for me.
>>> What's the issue?
>>
>> The state of shared vectors is not tracked. Consider a device has two
>> sources for the same vector and one source which raised the "line"
>> before is deregistered, then we will leave the vector set.
> 
> In virtio vectors are registered/deregistered during setup
> (before driver-ok), so this does not happen.

From that point of view, the current final msix_clr_pending in
msix_vector_unuse is also useless. Then what is the remaining purpose of
msix_entry_used? Also, we are not discussing a virtio API here but a
generic MSI-X feature that affects every MSI-X using device.

> Further, even in theory this might not be an issue since anyone sharing
> interrupts needs to deal with spurious events anyway.

This is not about spurious interrupts, it's about the proper state of
the PBA, something that should not contain any spurious states.

> 
> So is or is there not a bug in virtio when it shares vectors?

There are deficits in virtio about how it handles potential vector
sharing internally. But the use/unuse API will not be involved in making
this more spec conforming. Nor are there other users behind the horizon.
That's my point.

> 
>> What we rather need is to track the sharing at device level and report
>> the resulting state to the MSI-X layer. That's what the reduced API will
>> allow. I don't think that vector sharing belongs to the generic layer.
>>>
>>>> and will have to be addressed
>>>> differently anyway.
>>>
>>> what's the plan to address this?
>>
>> As said above: Extend virtio to track their queue states, let it
>> calculate the vector state and report that to the generic layer.
>>
>> If we really once identify another device model - besides virtio - that
>> does vector sharing, we can still try to model an optional extension to
>> the MSI-X layer for such devices. But the majority of device models
>> should not be bothered with such special cases.
>>
>>>
>>>> So drop this obsolete interface.
>>>>
>>>> We still need interfaces to clear pending vectors though. Provide
>>>> msix_clear_vector and msix_clear_all_vectors for this.
>>>>
>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>
>>> Looks like if guest tries to use too many vectors it will have to switch
>>> to userspace virtio where previously we would report failure to guest
>>> and it would configure less vectors, which seems better.
>>
>> There is nothing in the current approach that tracks the availability of
>> vector. That's because it depends on the device if it considers a vector
>> used when a single event source grabbed it or if it is able to share it
>> internally.
> 
> So I'd like to see some code that shows how we avoid the regression in the
> above scenario, before we drop the API.

Sorry, what regression? This patch does not change the behavior of
virtio. Please review it and point out the contrary.

> 
>>>
>>> I'd like to understand whether this patch is a pre-requisite for
>>> anything or just a cleanup?
>>
>> It avoids spreading of redundant msix_vector_use/unuse to more devices
>> (next ones will be VFIO and kvm device assignment).
>>
>> Jan
> 
> Another less intrusive way is to add a flag that say
> "device always uses all vectors".

The existing API is the intrusive one, that's why I'm trying to remove
it for a while now.

Can you sketch a scenario where it helps to track vector usage via a
central counter like it's done now? I simply don't see this. Therefore I
see no point in keeping this infrastructure around - or even adding a
flag to prolong its life.

Jan

Patch

diff --git a/hw/ivshmem.c b/hw/ivshmem.c
index 7d4123c..9afc4a2 100644
--- a/hw/ivshmem.c
+++ b/hw/ivshmem.c
@@ -508,28 +508,11 @@  static void ivshmem_read(void *opaque, const uint8_t * buf, int flags)
     return;
 }
 
-/* Select the MSI-X vectors used by device.
- * ivshmem maps events to vectors statically, so
- * we just enable all vectors on init and after reset. */
-static void ivshmem_use_msix(IVShmemState * s)
-{
-    int i;
-
-    if (!msix_present(&s->dev)) {
-        return;
-    }
-
-    for (i = 0; i < s->vectors; i++) {
-        msix_vector_use(&s->dev, i);
-    }
-}
-
 static void ivshmem_reset(DeviceState *d)
 {
     IVShmemState *s = DO_UPCAST(IVShmemState, dev.qdev, d);
 
     s->intrstatus = 0;
-    ivshmem_use_msix(s);
     return;
 }
 
@@ -571,8 +554,6 @@  static void ivshmem_setup_msi(IVShmemState * s)
 
     /* allocate QEMU char devices for receiving interrupts */
     s->eventfd_table = g_malloc0(s->vectors * sizeof(EventfdEntry));
-
-    ivshmem_use_msix(s);
 }
 
 static void ivshmem_save(QEMUFile* f, void *opaque)
@@ -614,7 +595,6 @@  static int ivshmem_load(QEMUFile* f, void *opaque, int version_id)
 
     if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) {
         msix_load(&proxy->dev, f);
-	ivshmem_use_msix(proxy);
     } else {
         proxy->intrstatus = qemu_get_be32(f);
         proxy->intrmask = qemu_get_be32(f);
diff --git a/hw/msix.c b/hw/msix.c
index fd9ea95..e38927c 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -260,7 +260,6 @@  int msix_init(struct PCIDevice *dev, unsigned short nentries,
 
     dev->msix_table = g_malloc0(table_size);
     dev->msix_pba = g_malloc0(pba_size);
-    dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used);
 
     msix_mask_all(dev, nentries);
 
@@ -317,16 +316,6 @@  int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
     return 0;
 }
 
-static void msix_free_irq_entries(PCIDevice *dev)
-{
-    int vector;
-
-    for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
-        dev->msix_entry_used[vector] = 0;
-        msix_clr_pending(dev, vector);
-    }
-}
-
 /* Clean up resources for the device. */
 void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
 {
@@ -335,7 +324,6 @@  void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
     }
     pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
     dev->msix_cap = 0;
-    msix_free_irq_entries(dev);
     dev->msix_entries_nr = 0;
     memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio);
     memory_region_destroy(&dev->msix_pba_mmio);
@@ -345,8 +333,6 @@  void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
     memory_region_destroy(&dev->msix_table_mmio);
     g_free(dev->msix_table);
     dev->msix_table = NULL;
-    g_free(dev->msix_entry_used);
-    dev->msix_entry_used = NULL;
     dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
     return;
 }
@@ -381,7 +367,6 @@  void msix_load(PCIDevice *dev, QEMUFile *f)
         return;
     }
 
-    msix_free_irq_entries(dev);
     qemu_get_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
     qemu_get_buffer(f, dev->msix_pba, (n + 7) / 8);
     msix_update_function_masked(dev);
@@ -410,8 +395,9 @@  void msix_notify(PCIDevice *dev, unsigned vector)
 {
     MSIMessage msg;
 
-    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
+    if (vector >= dev->msix_entries_nr) {
         return;
+    }
     if (msix_is_masked(dev, vector)) {
         msix_set_pending(dev, vector);
         return;
@@ -427,7 +413,7 @@  void msix_reset(PCIDevice *dev)
     if (!msix_present(dev)) {
         return;
     }
-    msix_free_irq_entries(dev);
+    msix_clear_all_vectors(dev);
     dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &=
 	    ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET];
     memset(dev->msix_table, 0, dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE);
@@ -435,41 +421,24 @@  void msix_reset(PCIDevice *dev)
     msix_mask_all(dev, dev->msix_entries_nr);
 }
 
-/* PCI spec suggests that devices make it possible for software to configure
- * less vectors than supported by the device, but does not specify a standard
- * mechanism for devices to do so.
- *
- * We support this by asking devices to declare vectors software is going to
- * actually use, and checking this on the notification path. Devices that
- * don't want to follow the spec suggestion can declare all vectors as used. */
-
-/* Mark vector as used. */
-int msix_vector_use(PCIDevice *dev, unsigned vector)
+/* Clear pending vector. */
+void msix_clear_vector(PCIDevice *dev, unsigned vector)
 {
-    if (vector >= dev->msix_entries_nr)
-        return -EINVAL;
-    dev->msix_entry_used[vector]++;
-    return 0;
-}
-
-/* Mark vector as unused. */
-void msix_vector_unuse(PCIDevice *dev, unsigned vector)
-{
-    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) {
-        return;
-    }
-    if (--dev->msix_entry_used[vector]) {
-        return;
+    if (msix_present(dev) && vector < dev->msix_entries_nr) {
+        msix_clr_pending(dev, vector);
     }
-    msix_clr_pending(dev, vector);
 }
 
-void msix_unuse_all_vectors(PCIDevice *dev)
+void msix_clear_all_vectors(PCIDevice *dev)
 {
+    unsigned int vector;
+
     if (!msix_present(dev)) {
         return;
     }
-    msix_free_irq_entries(dev);
+    for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
+        msix_clr_pending(dev, vector);
+    }
 }
 
 unsigned int msix_nr_vectors_allocated(const PCIDevice *dev)
diff --git a/hw/msix.h b/hw/msix.h
index 1786e27..f235e32 100644
--- a/hw/msix.h
+++ b/hw/msix.h
@@ -25,9 +25,8 @@  void msix_load(PCIDevice *dev, QEMUFile *f);
 int msix_enabled(PCIDevice *dev);
 int msix_present(PCIDevice *dev);
 
-int msix_vector_use(PCIDevice *dev, unsigned vector);
-void msix_vector_unuse(PCIDevice *dev, unsigned vector);
-void msix_unuse_all_vectors(PCIDevice *dev);
+void msix_clear_vector(PCIDevice *dev, unsigned vector);
+void msix_clear_all_vectors(PCIDevice *dev);
 
 void msix_notify(PCIDevice *dev, unsigned vector);
 
diff --git a/hw/pci.h b/hw/pci.h
index 6c80442..e4a2603 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -238,8 +238,6 @@  struct PCIDevice {
     /* Memory Regions for MSIX table and pending bit entries. */
     MemoryRegion msix_table_mmio;
     MemoryRegion msix_pba_mmio;
-    /* Reference-count for entries actually in use by driver. */
-    unsigned *msix_entry_used;
     /* MSIX function mask set or MSIX disabled */
     bool msix_function_masked;
     /* Version id needed for VMState */
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 6ed21b7..34a936e 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -137,9 +137,6 @@  static int virtio_pci_load_config(void * opaque, QEMUFile *f)
     } else {
         proxy->vdev->config_vector = VIRTIO_NO_VECTOR;
     }
-    if (proxy->vdev->config_vector != VIRTIO_NO_VECTOR) {
-        return msix_vector_use(&proxy->pci_dev, proxy->vdev->config_vector);
-    }
     return 0;
 }
 
@@ -153,9 +150,6 @@  static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f)
         vector = VIRTIO_NO_VECTOR;
     }
     virtio_queue_set_vector(proxy->vdev, n, vector);
-    if (vector != VIRTIO_NO_VECTOR) {
-        return msix_vector_use(&proxy->pci_dev, vector);
-    }
     return 0;
 }
 
@@ -300,7 +294,7 @@  static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
         if (pa == 0) {
             virtio_pci_stop_ioeventfd(proxy);
             virtio_reset(proxy->vdev);
-            msix_unuse_all_vectors(&proxy->pci_dev);
+            msix_clear_all_vectors(&proxy->pci_dev);
         }
         else
             virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
@@ -327,7 +321,7 @@  static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 
         if (vdev->status == 0) {
             virtio_reset(proxy->vdev);
-            msix_unuse_all_vectors(&proxy->pci_dev);
+            msix_clear_all_vectors(&proxy->pci_dev);
         }
 
         /* Linux before 2.6.34 sets the device as OK without enabling
@@ -339,18 +333,20 @@  static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
         }
         break;
     case VIRTIO_MSI_CONFIG_VECTOR:
-        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
+        msix_clear_vector(&proxy->pci_dev, vdev->config_vector);
         /* Make it possible for guest to discover an error took place. */
-        if (msix_vector_use(&proxy->pci_dev, val) < 0)
+        if (val >= vdev->nvectors) {
             val = VIRTIO_NO_VECTOR;
+        }
         vdev->config_vector = val;
         break;
     case VIRTIO_MSI_QUEUE_VECTOR:
-        msix_vector_unuse(&proxy->pci_dev,
+        msix_clear_vector(&proxy->pci_dev,
                           virtio_queue_vector(vdev, vdev->queue_sel));
         /* Make it possible for guest to discover an error took place. */
-        if (msix_vector_use(&proxy->pci_dev, val) < 0)
+        if (val >= vdev->nvectors) {
             val = VIRTIO_NO_VECTOR;
+        }
         virtio_queue_set_vector(vdev, vdev->queue_sel, val);
         break;
     default: