Patchwork [v2,5/6] msix: Allow full specification of MSIX layout

login
register
mail settings
Submitter Alex Williamson
Date June 14, 2012, 4:51 a.m.
Message ID <20120614045155.11034.91392.stgit@bling.home>
Download mbox | patch
Permalink /patch/164814/
State New
Headers show

Comments

Alex Williamson - June 14, 2012, 4:51 a.m.
Finally, complete the fully specified interface.  msix_add_config()
gets moved to be closer to the setup functions where it's actually
used.  msix_mmio_setup() gets folded into msix_init().  And
msix_uninit() gets reworked a bit so we can call it as cleanup
from msix_init().

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

 hw/msix.c |  217 +++++++++++++++++++++++++++----------------------------------
 hw/msix.h |   10 ++-
 2 files changed, 101 insertions(+), 126 deletions(-)
Jan Kiszka - June 14, 2012, 6:17 a.m.
On 2012-06-14 06:51, Alex Williamson wrote:
> Finally, complete the fully specified interface.  msix_add_config()
> gets moved to be closer to the setup functions where it's actually
> used.  msix_mmio_setup() gets folded into msix_init().  And
> msix_uninit() gets reworked a bit so we can call it as cleanup
> from msix_init().
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> ---
> 
>  hw/msix.c |  217 +++++++++++++++++++++++++++----------------------------------
>  hw/msix.h |   10 ++-
>  2 files changed, 101 insertions(+), 126 deletions(-)
> 
> diff --git a/hw/msix.c b/hw/msix.c
> index d476d07..047646a 100644
> --- a/hw/msix.c
> +++ b/hw/msix.c
> @@ -27,14 +27,6 @@
>  #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
>  #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
>  
> -/* How much space does an MSIX table need. */
> -/* The spec requires giving the table structure
> - * a 4K aligned region all by itself. */
> -#define MSIX_PAGE_SIZE 0x1000
> -/* Reserve second half of the page for pending bits */
> -#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
> -#define MSIX_MAX_ENTRIES 32
> -
>  static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
>  {
>      uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
> @@ -45,47 +37,6 @@ static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
>      return msg;
>  }
>  
> -/* Add MSI-X capability to the config space for the device. */
> -/* Given a bar and its size, add MSI-X table on top of it
> - * and fill MSI-X capability in the config space.
> - * Original bar size must be a power of 2 or 0.
> - * New bar size is returned. */
> -static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
> -                           unsigned bar_nr, unsigned bar_size)
> -{
> -    int config_offset;
> -    uint8_t *config;
> -
> -    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
> -        return -EINVAL;
> -    if (bar_size > 0x80000000)
> -        return -ENOSPC;
> -
> -    /* Require aligned offset for MSI-X structures */
> -    if (bar_size & ~(MSIX_PAGE_SIZE - 1)) {
> -        return -EINVAL;
> -    }
> -
> -    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX,
> -                                       0, MSIX_CAP_LENGTH);
> -    if (config_offset < 0)
> -        return config_offset;
> -    config = pdev->config + config_offset;
> -
> -    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
> -    /* Table on top of BAR */
> -    pci_set_long(config + PCI_MSIX_TABLE, bar_size | bar_nr);
> -    /* Pending bits on top of that */
> -    pci_set_long(config + PCI_MSIX_PBA, (bar_size + MSIX_PAGE_PENDING) |
> -                 bar_nr);
> -    pdev->msix_cap = config_offset;
> -    /* Make flags bit writable. */
> -    pdev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
> -	    MSIX_MASKALL_MASK;
> -    pdev->msix_function_masked = true;
> -    return 0;
> -}
> -
>  static uint8_t msix_pending_mask(int vector)
>  {
>      return 1 << (vector % 8);
> @@ -240,20 +191,6 @@ static const MemoryRegionOps msix_pba_mmio_ops = {
>      },
>  };
>  
> -static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar)
> -{
> -    uint8_t *config = d->config + d->msix_cap;
> -    uint32_t table = pci_get_long(config + PCI_MSIX_TABLE);
> -    uint32_t table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK;
> -    uint32_t pba = pci_get_long(config + PCI_MSIX_PBA);
> -    uint32_t pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK;
> -    /* TODO: for assigned devices, we'll want to make it possible to map
> -     * pending bits separately in case they are in a separate bar. */
> -
> -    memory_region_add_subregion(bar, table_offset, &d->msix_table_mmio);
> -    memory_region_add_subregion(bar, pba_offset, &d->msix_pba_mmio);
> -}
> -
>  static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
>  {
>      int vector;
> @@ -268,11 +205,72 @@ static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
>      }
>  }
>  
> -/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
> - * modified, it should be retrieved with msix_bar_size. */
> +/* Add MSI-X capability to the config space for the device. */
> +static int msix_add_config(struct PCIDevice *dev, unsigned short nentries,
> +                           uint8_t table_bar, unsigned table_offset,
> +                           uint8_t pba_bar, unsigned pba_offset, uint8_t pos)

Why not fold msix_add_config into msix_init and move sanity checks to
the beginning, i.e. before resource allocations? Then you also do not
need to call msix_uninit from msix_init. Likely a matter of taste, so
not a must-have.

> +{
> +    int config_offset;
> +    uint8_t *config;
> +
> +    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) {
> +        return -EINVAL;
> +    }
> +
> +    config_offset = pci_add_capability(dev, PCI_CAP_ID_MSIX,
> +                                       pos, MSIX_CAP_LENGTH);
> +    if (config_offset < 0) {
> +        return config_offset;
> +    }
> +
> +    config = dev->config + config_offset;
> +
> +    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
> +    pci_set_long(config + PCI_MSIX_TABLE, table_offset | table_bar);
> +    pci_set_long(config + PCI_MSIX_PBA, pba_offset | pba_bar);
> +
> +    dev->msix_cap = config_offset;
> +
> +    /* Make flags bit writable. */
> +    dev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
> +                                                       MSIX_MASKALL_MASK;
> +
> +    dev->msix_function_masked = true;
> +
> +    return 0;
> +}
> +
> +/* Clean up resources for the device. */
> +void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
> +{
> +    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {

msix_present()

> +        return;
> +    }
> +
> +    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
> +    dev->msix_cap = 0;
> +    dev->msix_entries_nr = 0;
> +
> +    memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio);
> +    memory_region_destroy(&dev->msix_pba_mmio);
> +    g_free(dev->msix_pba);
> +    dev->msix_pba = NULL;
> +
> +    memory_region_del_subregion(table_bar, &dev->msix_table_mmio);
> +    memory_region_destroy(&dev->msix_table_mmio);
> +    g_free(dev->msix_table);
> +    dev->msix_table = NULL;
> +
> +    g_free(dev->msix_entry_used);
> +    dev->msix_entry_used = NULL;
> +    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
> +}
> +
> +/* Initialize the MSI-X structures */
>  int msix_init(struct PCIDevice *dev, unsigned short nentries,
> -              MemoryRegion *bar,
> -              unsigned bar_nr, unsigned bar_size)
> +              MemoryRegion *table_bar, uint8_t table_bar_nr,
> +              unsigned table_offset, MemoryRegion *pba_bar,
> +              uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos)
>  {
>      int ret;
>      unsigned table_size, pba_size;
> @@ -281,43 +279,41 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries,
>      if (!msi_supported) {
>          return -ENOTSUP;
>      }
> -    if (nentries > MSIX_MAX_ENTRIES)
> -        return -EINVAL;
>  
>      table_size = nentries * PCI_MSIX_ENTRY_SIZE;
>      pba_size = QEMU_ALIGN_UP(nentries, 64) / 8;
>  
> -    dev->msix_entry_used = g_malloc0(MSIX_MAX_ENTRIES *
> -                                        sizeof *dev->msix_entry_used);
> +    /* Sanity test: table & pba don't overlap, fit within BARs, min aligned */
> +    if ((table_bar_nr == pba_bar_nr &&
> +         ranges_overlap(table_offset, table_size, pba_offset, pba_size)) ||
> +        table_offset + table_size > memory_region_size(table_bar) ||
> +        pba_offset + pba_size > memory_region_size(pba_bar) ||
> +        (table_offset | pba_offset) & PCI_MSIX_FLAGS_BIRMASK) {
> +        return -EINVAL;
> +    }
>  
>      dev->msix_table = g_malloc0(table_size);
>      dev->msix_pba = g_malloc0(pba_size);
> +    dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used);
> +    dev->msix_entries_nr = nentries;
> +    dev->cap_present |= QEMU_PCI_CAP_MSIX;
> +
>      msix_mask_all(dev, nentries);
>  
>      memory_region_init_io(&dev->msix_table_mmio, &msix_table_mmio_ops, dev,
>                            "msix", table_size);
> +    memory_region_add_subregion(table_bar, table_offset, &dev->msix_table_mmio);
> +
>      memory_region_init_io(&dev->msix_pba_mmio, &msix_pba_mmio_ops, dev,
>                            "msix-pba", pba_size);
> +    memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio);
>  
> -    dev->msix_entries_nr = nentries;
> -    ret = msix_add_config(dev, nentries, bar_nr, bar_size);
> -    if (ret)
> -        goto err_config;
> -
> -    dev->cap_present |= QEMU_PCI_CAP_MSIX;
> -    msix_mmio_setup(dev, bar);
> -    return 0;
> +    ret = msix_add_config(dev, nentries, table_bar_nr, table_offset,
> +                          pba_bar_nr, pba_offset, cap_pos);
> +    if (ret) {
> +        msix_uninit(dev, table_bar, pba_bar);
> +    }
>  
> -err_config:
> -    dev->msix_entries_nr = 0;
> -    memory_region_destroy(&dev->msix_pba_mmio);
> -    g_free(dev->msix_pba);
> -    dev->msix_pba = NULL;
> -    memory_region_destroy(&dev->msix_table_mmio);
> -    g_free(dev->msix_table);
> -    dev->msix_table = NULL;
> -    g_free(dev->msix_entry_used);
> -    dev->msix_entry_used = NULL;
>      return ret;
>  }
>  
> @@ -344,7 +340,8 @@ int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
>  
>      free(name);
>  
> -    ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr, 4096);
> +    ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr, 0,
> +                    &dev->msix_exclusive_bar, bar_nr, 2048, 0);

Doesn't this have to be msix_table_mmi and msix_pba_mmio?

>      if (ret) {
>          memory_region_destroy(&dev->msix_exclusive_bar);
>          return ret;
> @@ -356,6 +353,14 @@ int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
>      return 0;
>  }
>  
> +void msix_uninit_exclusive_bar(PCIDevice *dev)
> +{
> +    if (msix_present(dev)) {
> +        msix_uninit(dev, &dev->msix_exclusive_bar, &dev->msix_exclusive_bar);

Same here.

> +        memory_region_destroy(&dev->msix_exclusive_bar);
> +    }
> +}
> +
>  static void msix_free_irq_entries(PCIDevice *dev)
>  {
>      int vector;
> @@ -366,38 +371,6 @@ static void msix_free_irq_entries(PCIDevice *dev)
>      }
>  }
>  
> -/* Clean up resources for the device. */
> -int msix_uninit(PCIDevice *dev, MemoryRegion *bar)
> -{
> -    if (!msix_present(dev)) {
> -        return 0;
> -    }
> -    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
> -    dev->msix_cap = 0;
> -    msix_free_irq_entries(dev);
> -    dev->msix_entries_nr = 0;
> -    memory_region_del_subregion(bar, &dev->msix_pba_mmio);
> -    memory_region_destroy(&dev->msix_pba_mmio);
> -    g_free(dev->msix_pba);
> -    dev->msix_pba = NULL;
> -    memory_region_del_subregion(bar, &dev->msix_table_mmio);
> -    memory_region_destroy(&dev->msix_table_mmio);
> -    g_free(dev->msix_table);
> -    dev->msix_table = NULL;
> -    g_free(dev->msix_entry_used);
> -    dev->msix_entry_used = NULL;
> -    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
> -    return 0;
> -}
> -
> -void msix_uninit_exclusive_bar(PCIDevice *dev)
> -{
> -    if (msix_present(dev)) {
> -        msix_uninit(dev, &dev->msix_exclusive_bar);
> -        memory_region_destroy(&dev->msix_exclusive_bar);
> -    }
> -}
> -
>  void msix_save(PCIDevice *dev, QEMUFile *f)
>  {
>      unsigned n = dev->msix_entries_nr;
> diff --git a/hw/msix.h b/hw/msix.h
> index bed6bfb..14b1a2e 100644
> --- a/hw/msix.h
> +++ b/hw/msix.h
> @@ -4,16 +4,18 @@
>  #include "qemu-common.h"
>  #include "pci.h"
>  
> -int msix_init(PCIDevice *pdev, unsigned short nentries,
> -              MemoryRegion *bar,
> -              unsigned bar_nr, unsigned bar_size);
> +int msix_init(PCIDevice *dev, unsigned short nentries,
> +              MemoryRegion *table_bar, uint8_t table_bar_nr,
> +              unsigned table_offset, MemoryRegion *pba_bar,
> +              uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos);
>  int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
>                              uint8_t bar_nr);
>  
>  void msix_write_config(PCIDevice *pci_dev, uint32_t address,
>                         uint32_t val, int len);
>  
> -int msix_uninit(PCIDevice *d, MemoryRegion *bar);
> +void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar,
> +                 MemoryRegion *pba_bar);
>  void msix_uninit_exclusive_bar(PCIDevice *dev);
>  
>  unsigned int msix_nr_vectors_allocated(const PCIDevice *dev);
> 

Interfaces look good to me, the logic as well - except for the few remarks.

Jan
Michael S. Tsirkin - June 14, 2012, 8:10 a.m.
On Wed, Jun 13, 2012 at 10:51:56PM -0600, Alex Williamson wrote:
> Finally, complete the fully specified interface.  msix_add_config()
> gets moved to be closer to the setup functions where it's actually
> used.  msix_mmio_setup() gets folded into msix_init().  And
> msix_uninit() gets reworked a bit so we can call it as cleanup
> from msix_init().
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>

Some review comments seem to have been ignored: this patch -
uses hardcoded constants, still has duplicated parameters,
move functions in the same patch as adding functionality.
Do you intend to send another revision?

> ---
> 
>  hw/msix.c |  217 +++++++++++++++++++++++++++----------------------------------
>  hw/msix.h |   10 ++-
>  2 files changed, 101 insertions(+), 126 deletions(-)
> 
> diff --git a/hw/msix.c b/hw/msix.c
> index d476d07..047646a 100644
> --- a/hw/msix.c
> +++ b/hw/msix.c
> @@ -27,14 +27,6 @@
>  #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
>  #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
>  
> -/* How much space does an MSIX table need. */
> -/* The spec requires giving the table structure
> - * a 4K aligned region all by itself. */
> -#define MSIX_PAGE_SIZE 0x1000
> -/* Reserve second half of the page for pending bits */
> -#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
> -#define MSIX_MAX_ENTRIES 32
> -
>  static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
>  {
>      uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
> @@ -45,47 +37,6 @@ static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
>      return msg;
>  }
>  
> -/* Add MSI-X capability to the config space for the device. */
> -/* Given a bar and its size, add MSI-X table on top of it
> - * and fill MSI-X capability in the config space.
> - * Original bar size must be a power of 2 or 0.
> - * New bar size is returned. */
> -static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
> -                           unsigned bar_nr, unsigned bar_size)
> -{
> -    int config_offset;
> -    uint8_t *config;
> -
> -    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
> -        return -EINVAL;
> -    if (bar_size > 0x80000000)
> -        return -ENOSPC;
> -
> -    /* Require aligned offset for MSI-X structures */
> -    if (bar_size & ~(MSIX_PAGE_SIZE - 1)) {
> -        return -EINVAL;
> -    }
> -
> -    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX,
> -                                       0, MSIX_CAP_LENGTH);
> -    if (config_offset < 0)
> -        return config_offset;
> -    config = pdev->config + config_offset;
> -
> -    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
> -    /* Table on top of BAR */
> -    pci_set_long(config + PCI_MSIX_TABLE, bar_size | bar_nr);
> -    /* Pending bits on top of that */
> -    pci_set_long(config + PCI_MSIX_PBA, (bar_size + MSIX_PAGE_PENDING) |
> -                 bar_nr);
> -    pdev->msix_cap = config_offset;
> -    /* Make flags bit writable. */
> -    pdev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
> -	    MSIX_MASKALL_MASK;
> -    pdev->msix_function_masked = true;
> -    return 0;
> -}
> -
>  static uint8_t msix_pending_mask(int vector)
>  {
>      return 1 << (vector % 8);
> @@ -240,20 +191,6 @@ static const MemoryRegionOps msix_pba_mmio_ops = {
>      },
>  };
>  
> -static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar)
> -{
> -    uint8_t *config = d->config + d->msix_cap;
> -    uint32_t table = pci_get_long(config + PCI_MSIX_TABLE);
> -    uint32_t table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK;
> -    uint32_t pba = pci_get_long(config + PCI_MSIX_PBA);
> -    uint32_t pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK;
> -    /* TODO: for assigned devices, we'll want to make it possible to map
> -     * pending bits separately in case they are in a separate bar. */
> -
> -    memory_region_add_subregion(bar, table_offset, &d->msix_table_mmio);
> -    memory_region_add_subregion(bar, pba_offset, &d->msix_pba_mmio);
> -}
> -
>  static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
>  {
>      int vector;
> @@ -268,11 +205,72 @@ static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
>      }
>  }
>  
> -/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
> - * modified, it should be retrieved with msix_bar_size. */
> +/* Add MSI-X capability to the config space for the device. */
> +static int msix_add_config(struct PCIDevice *dev, unsigned short nentries,
> +                           uint8_t table_bar, unsigned table_offset,
> +                           uint8_t pba_bar, unsigned pba_offset, uint8_t pos)
> +{
> +    int config_offset;
> +    uint8_t *config;
> +
> +    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) {
> +        return -EINVAL;
> +    }
> +
> +    config_offset = pci_add_capability(dev, PCI_CAP_ID_MSIX,
> +                                       pos, MSIX_CAP_LENGTH);
> +    if (config_offset < 0) {
> +        return config_offset;
> +    }
> +
> +    config = dev->config + config_offset;
> +
> +    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
> +    pci_set_long(config + PCI_MSIX_TABLE, table_offset | table_bar);
> +    pci_set_long(config + PCI_MSIX_PBA, pba_offset | pba_bar);
> +
> +    dev->msix_cap = config_offset;
> +
> +    /* Make flags bit writable. */
> +    dev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
> +                                                       MSIX_MASKALL_MASK;
> +
> +    dev->msix_function_masked = true;
> +
> +    return 0;
> +}
> +
> +/* Clean up resources for the device. */
> +void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
> +{
> +    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {
> +        return;
> +    }
> +
> +    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
> +    dev->msix_cap = 0;
> +    dev->msix_entries_nr = 0;
> +
> +    memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio);
> +    memory_region_destroy(&dev->msix_pba_mmio);
> +    g_free(dev->msix_pba);
> +    dev->msix_pba = NULL;
> +
> +    memory_region_del_subregion(table_bar, &dev->msix_table_mmio);
> +    memory_region_destroy(&dev->msix_table_mmio);
> +    g_free(dev->msix_table);
> +    dev->msix_table = NULL;
> +
> +    g_free(dev->msix_entry_used);
> +    dev->msix_entry_used = NULL;
> +    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
> +}
> +
> +/* Initialize the MSI-X structures */
>  int msix_init(struct PCIDevice *dev, unsigned short nentries,
> -              MemoryRegion *bar,
> -              unsigned bar_nr, unsigned bar_size)
> +              MemoryRegion *table_bar, uint8_t table_bar_nr,
> +              unsigned table_offset, MemoryRegion *pba_bar,
> +              uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos)
>  {
>      int ret;
>      unsigned table_size, pba_size;
> @@ -281,43 +279,41 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries,
>      if (!msi_supported) {
>          return -ENOTSUP;
>      }
> -    if (nentries > MSIX_MAX_ENTRIES)
> -        return -EINVAL;
>  
>      table_size = nentries * PCI_MSIX_ENTRY_SIZE;
>      pba_size = QEMU_ALIGN_UP(nentries, 64) / 8;
>  
> -    dev->msix_entry_used = g_malloc0(MSIX_MAX_ENTRIES *
> -                                        sizeof *dev->msix_entry_used);
> +    /* Sanity test: table & pba don't overlap, fit within BARs, min aligned */
> +    if ((table_bar_nr == pba_bar_nr &&
> +         ranges_overlap(table_offset, table_size, pba_offset, pba_size)) ||
> +        table_offset + table_size > memory_region_size(table_bar) ||
> +        pba_offset + pba_size > memory_region_size(pba_bar) ||
> +        (table_offset | pba_offset) & PCI_MSIX_FLAGS_BIRMASK) {
> +        return -EINVAL;
> +    }
>  
>      dev->msix_table = g_malloc0(table_size);
>      dev->msix_pba = g_malloc0(pba_size);
> +    dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used);
> +    dev->msix_entries_nr = nentries;
> +    dev->cap_present |= QEMU_PCI_CAP_MSIX;
> +
>      msix_mask_all(dev, nentries);
>  
>      memory_region_init_io(&dev->msix_table_mmio, &msix_table_mmio_ops, dev,
>                            "msix", table_size);
> +    memory_region_add_subregion(table_bar, table_offset, &dev->msix_table_mmio);
> +
>      memory_region_init_io(&dev->msix_pba_mmio, &msix_pba_mmio_ops, dev,
>                            "msix-pba", pba_size);
> +    memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio);
>  
> -    dev->msix_entries_nr = nentries;
> -    ret = msix_add_config(dev, nentries, bar_nr, bar_size);
> -    if (ret)
> -        goto err_config;
> -
> -    dev->cap_present |= QEMU_PCI_CAP_MSIX;
> -    msix_mmio_setup(dev, bar);
> -    return 0;
> +    ret = msix_add_config(dev, nentries, table_bar_nr, table_offset,
> +                          pba_bar_nr, pba_offset, cap_pos);
> +    if (ret) {
> +        msix_uninit(dev, table_bar, pba_bar);
> +    }
>  
> -err_config:
> -    dev->msix_entries_nr = 0;
> -    memory_region_destroy(&dev->msix_pba_mmio);
> -    g_free(dev->msix_pba);
> -    dev->msix_pba = NULL;
> -    memory_region_destroy(&dev->msix_table_mmio);
> -    g_free(dev->msix_table);
> -    dev->msix_table = NULL;
> -    g_free(dev->msix_entry_used);
> -    dev->msix_entry_used = NULL;
>      return ret;
>  }
>  
> @@ -344,7 +340,8 @@ int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
>  
>      free(name);
>  
> -    ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr, 4096);
> +    ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr, 0,
> +                    &dev->msix_exclusive_bar, bar_nr, 2048, 0);
>      if (ret) {
>          memory_region_destroy(&dev->msix_exclusive_bar);
>          return ret;
> @@ -356,6 +353,14 @@ int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
>      return 0;
>  }
>  
> +void msix_uninit_exclusive_bar(PCIDevice *dev)
> +{
> +    if (msix_present(dev)) {
> +        msix_uninit(dev, &dev->msix_exclusive_bar, &dev->msix_exclusive_bar);
> +        memory_region_destroy(&dev->msix_exclusive_bar);
> +    }
> +}
> +
>  static void msix_free_irq_entries(PCIDevice *dev)
>  {
>      int vector;
> @@ -366,38 +371,6 @@ static void msix_free_irq_entries(PCIDevice *dev)
>      }
>  }
>  
> -/* Clean up resources for the device. */
> -int msix_uninit(PCIDevice *dev, MemoryRegion *bar)
> -{
> -    if (!msix_present(dev)) {
> -        return 0;
> -    }
> -    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
> -    dev->msix_cap = 0;
> -    msix_free_irq_entries(dev);
> -    dev->msix_entries_nr = 0;
> -    memory_region_del_subregion(bar, &dev->msix_pba_mmio);
> -    memory_region_destroy(&dev->msix_pba_mmio);
> -    g_free(dev->msix_pba);
> -    dev->msix_pba = NULL;
> -    memory_region_del_subregion(bar, &dev->msix_table_mmio);
> -    memory_region_destroy(&dev->msix_table_mmio);
> -    g_free(dev->msix_table);
> -    dev->msix_table = NULL;
> -    g_free(dev->msix_entry_used);
> -    dev->msix_entry_used = NULL;
> -    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
> -    return 0;
> -}
> -
> -void msix_uninit_exclusive_bar(PCIDevice *dev)
> -{
> -    if (msix_present(dev)) {
> -        msix_uninit(dev, &dev->msix_exclusive_bar);
> -        memory_region_destroy(&dev->msix_exclusive_bar);
> -    }
> -}
> -
>  void msix_save(PCIDevice *dev, QEMUFile *f)
>  {
>      unsigned n = dev->msix_entries_nr;
> diff --git a/hw/msix.h b/hw/msix.h
> index bed6bfb..14b1a2e 100644
> --- a/hw/msix.h
> +++ b/hw/msix.h
> @@ -4,16 +4,18 @@
>  #include "qemu-common.h"
>  #include "pci.h"
>  
> -int msix_init(PCIDevice *pdev, unsigned short nentries,
> -              MemoryRegion *bar,
> -              unsigned bar_nr, unsigned bar_size);
> +int msix_init(PCIDevice *dev, unsigned short nentries,
> +              MemoryRegion *table_bar, uint8_t table_bar_nr,
> +              unsigned table_offset, MemoryRegion *pba_bar,
> +              uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos);
>  int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
>                              uint8_t bar_nr);
>  
>  void msix_write_config(PCIDevice *pci_dev, uint32_t address,
>                         uint32_t val, int len);
>  
> -int msix_uninit(PCIDevice *d, MemoryRegion *bar);
> +void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar,
> +                 MemoryRegion *pba_bar);
>  void msix_uninit_exclusive_bar(PCIDevice *dev);
>  
>  unsigned int msix_nr_vectors_allocated(const PCIDevice *dev);
Michael S. Tsirkin - June 14, 2012, 8:12 a.m.
On Thu, Jun 14, 2012 at 08:17:22AM +0200, Jan Kiszka wrote:
> On 2012-06-14 06:51, Alex Williamson wrote:
> > Finally, complete the fully specified interface.  msix_add_config()
> > gets moved to be closer to the setup functions where it's actually
> > used.  msix_mmio_setup() gets folded into msix_init().  And
> > msix_uninit() gets reworked a bit so we can call it as cleanup
> > from msix_init().
> > 
> > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > ---
> > 
> >  hw/msix.c |  217 +++++++++++++++++++++++++++----------------------------------
> >  hw/msix.h |   10 ++-
> >  2 files changed, 101 insertions(+), 126 deletions(-)
> > 
> > diff --git a/hw/msix.c b/hw/msix.c
> > index d476d07..047646a 100644
> > --- a/hw/msix.c
> > +++ b/hw/msix.c
> > @@ -27,14 +27,6 @@
> >  #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
> >  #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
> >  
> > -/* How much space does an MSIX table need. */
> > -/* The spec requires giving the table structure
> > - * a 4K aligned region all by itself. */
> > -#define MSIX_PAGE_SIZE 0x1000
> > -/* Reserve second half of the page for pending bits */
> > -#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
> > -#define MSIX_MAX_ENTRIES 32
> > -
> >  static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
> >  {
> >      uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
> > @@ -45,47 +37,6 @@ static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
> >      return msg;
> >  }
> >  
> > -/* Add MSI-X capability to the config space for the device. */
> > -/* Given a bar and its size, add MSI-X table on top of it
> > - * and fill MSI-X capability in the config space.
> > - * Original bar size must be a power of 2 or 0.
> > - * New bar size is returned. */
> > -static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
> > -                           unsigned bar_nr, unsigned bar_size)
> > -{
> > -    int config_offset;
> > -    uint8_t *config;
> > -
> > -    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
> > -        return -EINVAL;
> > -    if (bar_size > 0x80000000)
> > -        return -ENOSPC;
> > -
> > -    /* Require aligned offset for MSI-X structures */
> > -    if (bar_size & ~(MSIX_PAGE_SIZE - 1)) {
> > -        return -EINVAL;
> > -    }
> > -
> > -    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX,
> > -                                       0, MSIX_CAP_LENGTH);
> > -    if (config_offset < 0)
> > -        return config_offset;
> > -    config = pdev->config + config_offset;
> > -
> > -    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
> > -    /* Table on top of BAR */
> > -    pci_set_long(config + PCI_MSIX_TABLE, bar_size | bar_nr);
> > -    /* Pending bits on top of that */
> > -    pci_set_long(config + PCI_MSIX_PBA, (bar_size + MSIX_PAGE_PENDING) |
> > -                 bar_nr);
> > -    pdev->msix_cap = config_offset;
> > -    /* Make flags bit writable. */
> > -    pdev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
> > -	    MSIX_MASKALL_MASK;
> > -    pdev->msix_function_masked = true;
> > -    return 0;
> > -}
> > -
> >  static uint8_t msix_pending_mask(int vector)
> >  {
> >      return 1 << (vector % 8);
> > @@ -240,20 +191,6 @@ static const MemoryRegionOps msix_pba_mmio_ops = {
> >      },
> >  };
> >  
> > -static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar)
> > -{
> > -    uint8_t *config = d->config + d->msix_cap;
> > -    uint32_t table = pci_get_long(config + PCI_MSIX_TABLE);
> > -    uint32_t table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK;
> > -    uint32_t pba = pci_get_long(config + PCI_MSIX_PBA);
> > -    uint32_t pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK;
> > -    /* TODO: for assigned devices, we'll want to make it possible to map
> > -     * pending bits separately in case they are in a separate bar. */
> > -
> > -    memory_region_add_subregion(bar, table_offset, &d->msix_table_mmio);
> > -    memory_region_add_subregion(bar, pba_offset, &d->msix_pba_mmio);
> > -}
> > -
> >  static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
> >  {
> >      int vector;
> > @@ -268,11 +205,72 @@ static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
> >      }
> >  }
> >  
> > -/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
> > - * modified, it should be retrieved with msix_bar_size. */
> > +/* Add MSI-X capability to the config space for the device. */
> > +static int msix_add_config(struct PCIDevice *dev, unsigned short nentries,
> > +                           uint8_t table_bar, unsigned table_offset,
> > +                           uint8_t pba_bar, unsigned pba_offset, uint8_t pos)
> 
> Why not fold msix_add_config into msix_init and move sanity checks to
> the beginning, i.e. before resource allocations? Then you also do not
> need to call msix_uninit from msix_init. Likely a matter of taste, so
> not a must-have.

More importantly pls try to split code reorgs and adding functionality
so diff is readable.

> > +{
> > +    int config_offset;
> > +    uint8_t *config;
> > +
> > +    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) {
> > +        return -EINVAL;
> > +    }
> > +
> > +    config_offset = pci_add_capability(dev, PCI_CAP_ID_MSIX,
> > +                                       pos, MSIX_CAP_LENGTH);
> > +    if (config_offset < 0) {
> > +        return config_offset;
> > +    }
> > +
> > +    config = dev->config + config_offset;
> > +
> > +    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
> > +    pci_set_long(config + PCI_MSIX_TABLE, table_offset | table_bar);
> > +    pci_set_long(config + PCI_MSIX_PBA, pba_offset | pba_bar);
> > +
> > +    dev->msix_cap = config_offset;
> > +
> > +    /* Make flags bit writable. */
> > +    dev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
> > +                                                       MSIX_MASKALL_MASK;
> > +
> > +    dev->msix_function_masked = true;
> > +
> > +    return 0;
> > +}
> > +
> > +/* Clean up resources for the device. */
> > +void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
> > +{
> > +    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {
> 
> msix_present()
> 
> > +        return;
> > +    }
> > +
> > +    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
> > +    dev->msix_cap = 0;
> > +    dev->msix_entries_nr = 0;
> > +
> > +    memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio);
> > +    memory_region_destroy(&dev->msix_pba_mmio);
> > +    g_free(dev->msix_pba);
> > +    dev->msix_pba = NULL;
> > +
> > +    memory_region_del_subregion(table_bar, &dev->msix_table_mmio);
> > +    memory_region_destroy(&dev->msix_table_mmio);
> > +    g_free(dev->msix_table);
> > +    dev->msix_table = NULL;
> > +
> > +    g_free(dev->msix_entry_used);
> > +    dev->msix_entry_used = NULL;
> > +    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
> > +}
> > +
> > +/* Initialize the MSI-X structures */
> >  int msix_init(struct PCIDevice *dev, unsigned short nentries,
> > -              MemoryRegion *bar,
> > -              unsigned bar_nr, unsigned bar_size)
> > +              MemoryRegion *table_bar, uint8_t table_bar_nr,
> > +              unsigned table_offset, MemoryRegion *pba_bar,
> > +              uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos)
> >  {
> >      int ret;
> >      unsigned table_size, pba_size;
> > @@ -281,43 +279,41 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries,
> >      if (!msi_supported) {
> >          return -ENOTSUP;
> >      }
> > -    if (nentries > MSIX_MAX_ENTRIES)
> > -        return -EINVAL;
> >  
> >      table_size = nentries * PCI_MSIX_ENTRY_SIZE;
> >      pba_size = QEMU_ALIGN_UP(nentries, 64) / 8;
> >  
> > -    dev->msix_entry_used = g_malloc0(MSIX_MAX_ENTRIES *
> > -                                        sizeof *dev->msix_entry_used);
> > +    /* Sanity test: table & pba don't overlap, fit within BARs, min aligned */
> > +    if ((table_bar_nr == pba_bar_nr &&
> > +         ranges_overlap(table_offset, table_size, pba_offset, pba_size)) ||
> > +        table_offset + table_size > memory_region_size(table_bar) ||
> > +        pba_offset + pba_size > memory_region_size(pba_bar) ||
> > +        (table_offset | pba_offset) & PCI_MSIX_FLAGS_BIRMASK) {
> > +        return -EINVAL;
> > +    }
> >  
> >      dev->msix_table = g_malloc0(table_size);
> >      dev->msix_pba = g_malloc0(pba_size);
> > +    dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used);
> > +    dev->msix_entries_nr = nentries;
> > +    dev->cap_present |= QEMU_PCI_CAP_MSIX;
> > +
> >      msix_mask_all(dev, nentries);
> >  
> >      memory_region_init_io(&dev->msix_table_mmio, &msix_table_mmio_ops, dev,
> >                            "msix", table_size);
> > +    memory_region_add_subregion(table_bar, table_offset, &dev->msix_table_mmio);
> > +
> >      memory_region_init_io(&dev->msix_pba_mmio, &msix_pba_mmio_ops, dev,
> >                            "msix-pba", pba_size);
> > +    memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio);
> >  
> > -    dev->msix_entries_nr = nentries;
> > -    ret = msix_add_config(dev, nentries, bar_nr, bar_size);
> > -    if (ret)
> > -        goto err_config;
> > -
> > -    dev->cap_present |= QEMU_PCI_CAP_MSIX;
> > -    msix_mmio_setup(dev, bar);
> > -    return 0;
> > +    ret = msix_add_config(dev, nentries, table_bar_nr, table_offset,
> > +                          pba_bar_nr, pba_offset, cap_pos);
> > +    if (ret) {
> > +        msix_uninit(dev, table_bar, pba_bar);
> > +    }
> >  
> > -err_config:
> > -    dev->msix_entries_nr = 0;
> > -    memory_region_destroy(&dev->msix_pba_mmio);
> > -    g_free(dev->msix_pba);
> > -    dev->msix_pba = NULL;
> > -    memory_region_destroy(&dev->msix_table_mmio);
> > -    g_free(dev->msix_table);
> > -    dev->msix_table = NULL;
> > -    g_free(dev->msix_entry_used);
> > -    dev->msix_entry_used = NULL;
> >      return ret;
> >  }
> >  
> > @@ -344,7 +340,8 @@ int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
> >  
> >      free(name);
> >  
> > -    ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr, 4096);
> > +    ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr, 0,
> > +                    &dev->msix_exclusive_bar, bar_nr, 2048, 0);
> 
> Doesn't this have to be msix_table_mmi and msix_pba_mmio?
> 
> >      if (ret) {
> >          memory_region_destroy(&dev->msix_exclusive_bar);
> >          return ret;
> > @@ -356,6 +353,14 @@ int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
> >      return 0;
> >  }
> >  
> > +void msix_uninit_exclusive_bar(PCIDevice *dev)
> > +{
> > +    if (msix_present(dev)) {
> > +        msix_uninit(dev, &dev->msix_exclusive_bar, &dev->msix_exclusive_bar);
> 
> Same here.
> 
> > +        memory_region_destroy(&dev->msix_exclusive_bar);
> > +    }
> > +}
> > +
> >  static void msix_free_irq_entries(PCIDevice *dev)
> >  {
> >      int vector;
> > @@ -366,38 +371,6 @@ static void msix_free_irq_entries(PCIDevice *dev)
> >      }
> >  }
> >  
> > -/* Clean up resources for the device. */
> > -int msix_uninit(PCIDevice *dev, MemoryRegion *bar)
> > -{
> > -    if (!msix_present(dev)) {
> > -        return 0;
> > -    }
> > -    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
> > -    dev->msix_cap = 0;
> > -    msix_free_irq_entries(dev);
> > -    dev->msix_entries_nr = 0;
> > -    memory_region_del_subregion(bar, &dev->msix_pba_mmio);
> > -    memory_region_destroy(&dev->msix_pba_mmio);
> > -    g_free(dev->msix_pba);
> > -    dev->msix_pba = NULL;
> > -    memory_region_del_subregion(bar, &dev->msix_table_mmio);
> > -    memory_region_destroy(&dev->msix_table_mmio);
> > -    g_free(dev->msix_table);
> > -    dev->msix_table = NULL;
> > -    g_free(dev->msix_entry_used);
> > -    dev->msix_entry_used = NULL;
> > -    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
> > -    return 0;
> > -}
> > -
> > -void msix_uninit_exclusive_bar(PCIDevice *dev)
> > -{
> > -    if (msix_present(dev)) {
> > -        msix_uninit(dev, &dev->msix_exclusive_bar);
> > -        memory_region_destroy(&dev->msix_exclusive_bar);
> > -    }
> > -}
> > -
> >  void msix_save(PCIDevice *dev, QEMUFile *f)
> >  {
> >      unsigned n = dev->msix_entries_nr;
> > diff --git a/hw/msix.h b/hw/msix.h
> > index bed6bfb..14b1a2e 100644
> > --- a/hw/msix.h
> > +++ b/hw/msix.h
> > @@ -4,16 +4,18 @@
> >  #include "qemu-common.h"
> >  #include "pci.h"
> >  
> > -int msix_init(PCIDevice *pdev, unsigned short nentries,
> > -              MemoryRegion *bar,
> > -              unsigned bar_nr, unsigned bar_size);
> > +int msix_init(PCIDevice *dev, unsigned short nentries,
> > +              MemoryRegion *table_bar, uint8_t table_bar_nr,
> > +              unsigned table_offset, MemoryRegion *pba_bar,
> > +              uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos);
> >  int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
> >                              uint8_t bar_nr);
> >  
> >  void msix_write_config(PCIDevice *pci_dev, uint32_t address,
> >                         uint32_t val, int len);
> >  
> > -int msix_uninit(PCIDevice *d, MemoryRegion *bar);
> > +void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar,
> > +                 MemoryRegion *pba_bar);
> >  void msix_uninit_exclusive_bar(PCIDevice *dev);
> >  
> >  unsigned int msix_nr_vectors_allocated(const PCIDevice *dev);
> > 
> 
> Interfaces look good to me, the logic as well - except for the few remarks.
> 
> Jan
> 
> -- 
> Siemens AG, Corporate Technology, CT T DE IT 1
> Corporate Competence Center Embedded Linux
Alex Williamson - June 14, 2012, 2:12 p.m.
On Thu, 2012-06-14 at 08:17 +0200, Jan Kiszka wrote:
> On 2012-06-14 06:51, Alex Williamson wrote:
>   
> > -/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
> > - * modified, it should be retrieved with msix_bar_size. */
> > +/* Add MSI-X capability to the config space for the device. */
> > +static int msix_add_config(struct PCIDevice *dev, unsigned short nentries,
> > +                           uint8_t table_bar, unsigned table_offset,
> > +                           uint8_t pba_bar, unsigned pba_offset, uint8_t pos)
> 
> Why not fold msix_add_config into msix_init and move sanity checks to
> the beginning, i.e. before resource allocations? Then you also do not
> need to call msix_uninit from msix_init. Likely a matter of taste, so
> not a must-have.

Yep, that works nicely.  fixed.

> > +{
> > +    int config_offset;
> > +    uint8_t *config;
> > +
> > +    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) {
> > +        return -EINVAL;
> > +    }
> > +
> > +    config_offset = pci_add_capability(dev, PCI_CAP_ID_MSIX,
> > +                                       pos, MSIX_CAP_LENGTH);
> > +    if (config_offset < 0) {
> > +        return config_offset;
> > +    }
> > +
> > +    config = dev->config + config_offset;
> > +
> > +    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
> > +    pci_set_long(config + PCI_MSIX_TABLE, table_offset | table_bar);
> > +    pci_set_long(config + PCI_MSIX_PBA, pba_offset | pba_bar);
> > +
> > +    dev->msix_cap = config_offset;
> > +
> > +    /* Make flags bit writable. */
> > +    dev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
> > +                                                       MSIX_MASKALL_MASK;
> > +
> > +    dev->msix_function_masked = true;
> > +
> > +    return 0;
> > +}
> > +
> > +/* Clean up resources for the device. */
> > +void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
> > +{
> > +    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {
> 
> msix_present()

Fixed.

> > +        return;
> > +    }
> > +
> > +    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
> > +    dev->msix_cap = 0;
> > +    dev->msix_entries_nr = 0;
> > +
> > +    memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio);
> > +    memory_region_destroy(&dev->msix_pba_mmio);
> > +    g_free(dev->msix_pba);
> > +    dev->msix_pba = NULL;
> > +
> > +    memory_region_del_subregion(table_bar, &dev->msix_table_mmio);
> > +    memory_region_destroy(&dev->msix_table_mmio);
> > +    g_free(dev->msix_table);
> > +    dev->msix_table = NULL;
> > +
> > +    g_free(dev->msix_entry_used);
> > +    dev->msix_entry_used = NULL;
> > +    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
> > +}
> > +
> > +/* Initialize the MSI-X structures */
> >  int msix_init(struct PCIDevice *dev, unsigned short nentries,
> > -              MemoryRegion *bar,
> > -              unsigned bar_nr, unsigned bar_size)
> > +              MemoryRegion *table_bar, uint8_t table_bar_nr,
> > +              unsigned table_offset, MemoryRegion *pba_bar,
> > +              uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos)
> >  {
> >      int ret;
> >      unsigned table_size, pba_size;
> > @@ -281,43 +279,41 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries,
> >      if (!msi_supported) {
> >          return -ENOTSUP;
> >      }
> > -    if (nentries > MSIX_MAX_ENTRIES)
> > -        return -EINVAL;
> >  
> >      table_size = nentries * PCI_MSIX_ENTRY_SIZE;
> >      pba_size = QEMU_ALIGN_UP(nentries, 64) / 8;
> >  
> > -    dev->msix_entry_used = g_malloc0(MSIX_MAX_ENTRIES *
> > -                                        sizeof *dev->msix_entry_used);
> > +    /* Sanity test: table & pba don't overlap, fit within BARs, min aligned */
> > +    if ((table_bar_nr == pba_bar_nr &&
> > +         ranges_overlap(table_offset, table_size, pba_offset, pba_size)) ||
> > +        table_offset + table_size > memory_region_size(table_bar) ||
> > +        pba_offset + pba_size > memory_region_size(pba_bar) ||
> > +        (table_offset | pba_offset) & PCI_MSIX_FLAGS_BIRMASK) {
> > +        return -EINVAL;
> > +    }
> >  
> >      dev->msix_table = g_malloc0(table_size);
> >      dev->msix_pba = g_malloc0(pba_size);
> > +    dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used);
> > +    dev->msix_entries_nr = nentries;
> > +    dev->cap_present |= QEMU_PCI_CAP_MSIX;
> > +
> >      msix_mask_all(dev, nentries);
> >  
> >      memory_region_init_io(&dev->msix_table_mmio, &msix_table_mmio_ops, dev,
> >                            "msix", table_size);
> > +    memory_region_add_subregion(table_bar, table_offset, &dev->msix_table_mmio);
> > +
> >      memory_region_init_io(&dev->msix_pba_mmio, &msix_pba_mmio_ops, dev,
> >                            "msix-pba", pba_size);
> > +    memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio);
> >  
> > -    dev->msix_entries_nr = nentries;
> > -    ret = msix_add_config(dev, nentries, bar_nr, bar_size);
> > -    if (ret)
> > -        goto err_config;
> > -
> > -    dev->cap_present |= QEMU_PCI_CAP_MSIX;
> > -    msix_mmio_setup(dev, bar);
> > -    return 0;
> > +    ret = msix_add_config(dev, nentries, table_bar_nr, table_offset,
> > +                          pba_bar_nr, pba_offset, cap_pos);
> > +    if (ret) {
> > +        msix_uninit(dev, table_bar, pba_bar);
> > +    }
> >  
> > -err_config:
> > -    dev->msix_entries_nr = 0;
> > -    memory_region_destroy(&dev->msix_pba_mmio);
> > -    g_free(dev->msix_pba);
> > -    dev->msix_pba = NULL;
> > -    memory_region_destroy(&dev->msix_table_mmio);
> > -    g_free(dev->msix_table);
> > -    dev->msix_table = NULL;
> > -    g_free(dev->msix_entry_used);
> > -    dev->msix_entry_used = NULL;
> >      return ret;
> >  }
> >  
> > @@ -344,7 +340,8 @@ int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
> >  
> >      free(name);
> >  
> > -    ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr, 4096);
> > +    ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr, 0,
> > +                    &dev->msix_exclusive_bar, bar_nr, 2048, 0);
> 
> Doesn't this have to be msix_table_mmi and msix_pba_mmio?

Nope, msix_exclusive_bar is the container, msix_table_mmio and
msix_pba_mmio are the subregions for their respective types.

> >      if (ret) {
> >          memory_region_destroy(&dev->msix_exclusive_bar);
> >          return ret;
> > @@ -356,6 +353,14 @@ int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
> >      return 0;
> >  }
> >  
> > +void msix_uninit_exclusive_bar(PCIDevice *dev)
> > +{
> > +    if (msix_present(dev)) {
> > +        msix_uninit(dev, &dev->msix_exclusive_bar, &dev->msix_exclusive_bar);
> 
> Same here.

Same as above.

> > +        memory_region_destroy(&dev->msix_exclusive_bar);
> > +    }
> > +}
> > +
> >  static void msix_free_irq_entries(PCIDevice *dev)
> >  {
> >      int vector;
> > @@ -366,38 +371,6 @@ static void msix_free_irq_entries(PCIDevice *dev)
> >      }
> >  }
> >  
> > -/* Clean up resources for the device. */
> > -int msix_uninit(PCIDevice *dev, MemoryRegion *bar)
> > -{
> > -    if (!msix_present(dev)) {
> > -        return 0;
> > -    }
> > -    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
> > -    dev->msix_cap = 0;
> > -    msix_free_irq_entries(dev);
> > -    dev->msix_entries_nr = 0;
> > -    memory_region_del_subregion(bar, &dev->msix_pba_mmio);
> > -    memory_region_destroy(&dev->msix_pba_mmio);
> > -    g_free(dev->msix_pba);
> > -    dev->msix_pba = NULL;
> > -    memory_region_del_subregion(bar, &dev->msix_table_mmio);
> > -    memory_region_destroy(&dev->msix_table_mmio);
> > -    g_free(dev->msix_table);
> > -    dev->msix_table = NULL;
> > -    g_free(dev->msix_entry_used);
> > -    dev->msix_entry_used = NULL;
> > -    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
> > -    return 0;
> > -}
> > -
> > -void msix_uninit_exclusive_bar(PCIDevice *dev)
> > -{
> > -    if (msix_present(dev)) {
> > -        msix_uninit(dev, &dev->msix_exclusive_bar);
> > -        memory_region_destroy(&dev->msix_exclusive_bar);
> > -    }
> > -}
> > -
> >  void msix_save(PCIDevice *dev, QEMUFile *f)
> >  {
> >      unsigned n = dev->msix_entries_nr;
> > diff --git a/hw/msix.h b/hw/msix.h
> > index bed6bfb..14b1a2e 100644
> > --- a/hw/msix.h
> > +++ b/hw/msix.h
> > @@ -4,16 +4,18 @@
> >  #include "qemu-common.h"
> >  #include "pci.h"
> >  
> > -int msix_init(PCIDevice *pdev, unsigned short nentries,
> > -              MemoryRegion *bar,
> > -              unsigned bar_nr, unsigned bar_size);
> > +int msix_init(PCIDevice *dev, unsigned short nentries,
> > +              MemoryRegion *table_bar, uint8_t table_bar_nr,
> > +              unsigned table_offset, MemoryRegion *pba_bar,
> > +              uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos);
> >  int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
> >                              uint8_t bar_nr);
> >  
> >  void msix_write_config(PCIDevice *pci_dev, uint32_t address,
> >                         uint32_t val, int len);
> >  
> > -int msix_uninit(PCIDevice *d, MemoryRegion *bar);
> > +void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar,
> > +                 MemoryRegion *pba_bar);
> >  void msix_uninit_exclusive_bar(PCIDevice *dev);
> >  
> >  unsigned int msix_nr_vectors_allocated(const PCIDevice *dev);
> > 
> 
> Interfaces look good to me, the logic as well - except for the few remarks.

Thanks!

Alex
Alex Williamson - June 14, 2012, 2:15 p.m.
On Thu, 2012-06-14 at 11:10 +0300, Michael S. Tsirkin wrote:
> On Wed, Jun 13, 2012 at 10:51:56PM -0600, Alex Williamson wrote:
> > Finally, complete the fully specified interface.  msix_add_config()
> > gets moved to be closer to the setup functions where it's actually
> > used.  msix_mmio_setup() gets folded into msix_init().  And
> > msix_uninit() gets reworked a bit so we can call it as cleanup
> > from msix_init().
> > 
> > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> 
> Some review comments seem to have been ignored: this patch -
> uses hardcoded constants, still has duplicated parameters,
> move functions in the same patch as adding functionality.
> Do you intend to send another revision?

The comments weren't ignored, I replied to them.  Please check your
mail.  In the interim, I sent a new series.  Thanks,

Alex

> > ---
> > 
> >  hw/msix.c |  217 +++++++++++++++++++++++++++----------------------------------
> >  hw/msix.h |   10 ++-
> >  2 files changed, 101 insertions(+), 126 deletions(-)
> > 
> > diff --git a/hw/msix.c b/hw/msix.c
> > index d476d07..047646a 100644
> > --- a/hw/msix.c
> > +++ b/hw/msix.c
> > @@ -27,14 +27,6 @@
> >  #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
> >  #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
> >  
> > -/* How much space does an MSIX table need. */
> > -/* The spec requires giving the table structure
> > - * a 4K aligned region all by itself. */
> > -#define MSIX_PAGE_SIZE 0x1000
> > -/* Reserve second half of the page for pending bits */
> > -#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
> > -#define MSIX_MAX_ENTRIES 32
> > -
> >  static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
> >  {
> >      uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
> > @@ -45,47 +37,6 @@ static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
> >      return msg;
> >  }
> >  
> > -/* Add MSI-X capability to the config space for the device. */
> > -/* Given a bar and its size, add MSI-X table on top of it
> > - * and fill MSI-X capability in the config space.
> > - * Original bar size must be a power of 2 or 0.
> > - * New bar size is returned. */
> > -static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
> > -                           unsigned bar_nr, unsigned bar_size)
> > -{
> > -    int config_offset;
> > -    uint8_t *config;
> > -
> > -    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
> > -        return -EINVAL;
> > -    if (bar_size > 0x80000000)
> > -        return -ENOSPC;
> > -
> > -    /* Require aligned offset for MSI-X structures */
> > -    if (bar_size & ~(MSIX_PAGE_SIZE - 1)) {
> > -        return -EINVAL;
> > -    }
> > -
> > -    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX,
> > -                                       0, MSIX_CAP_LENGTH);
> > -    if (config_offset < 0)
> > -        return config_offset;
> > -    config = pdev->config + config_offset;
> > -
> > -    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
> > -    /* Table on top of BAR */
> > -    pci_set_long(config + PCI_MSIX_TABLE, bar_size | bar_nr);
> > -    /* Pending bits on top of that */
> > -    pci_set_long(config + PCI_MSIX_PBA, (bar_size + MSIX_PAGE_PENDING) |
> > -                 bar_nr);
> > -    pdev->msix_cap = config_offset;
> > -    /* Make flags bit writable. */
> > -    pdev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
> > -	    MSIX_MASKALL_MASK;
> > -    pdev->msix_function_masked = true;
> > -    return 0;
> > -}
> > -
> >  static uint8_t msix_pending_mask(int vector)
> >  {
> >      return 1 << (vector % 8);
> > @@ -240,20 +191,6 @@ static const MemoryRegionOps msix_pba_mmio_ops = {
> >      },
> >  };
> >  
> > -static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar)
> > -{
> > -    uint8_t *config = d->config + d->msix_cap;
> > -    uint32_t table = pci_get_long(config + PCI_MSIX_TABLE);
> > -    uint32_t table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK;
> > -    uint32_t pba = pci_get_long(config + PCI_MSIX_PBA);
> > -    uint32_t pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK;
> > -    /* TODO: for assigned devices, we'll want to make it possible to map
> > -     * pending bits separately in case they are in a separate bar. */
> > -
> > -    memory_region_add_subregion(bar, table_offset, &d->msix_table_mmio);
> > -    memory_region_add_subregion(bar, pba_offset, &d->msix_pba_mmio);
> > -}
> > -
> >  static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
> >  {
> >      int vector;
> > @@ -268,11 +205,72 @@ static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
> >      }
> >  }
> >  
> > -/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
> > - * modified, it should be retrieved with msix_bar_size. */
> > +/* Add MSI-X capability to the config space for the device. */
> > +static int msix_add_config(struct PCIDevice *dev, unsigned short nentries,
> > +                           uint8_t table_bar, unsigned table_offset,
> > +                           uint8_t pba_bar, unsigned pba_offset, uint8_t pos)
> > +{
> > +    int config_offset;
> > +    uint8_t *config;
> > +
> > +    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) {
> > +        return -EINVAL;
> > +    }
> > +
> > +    config_offset = pci_add_capability(dev, PCI_CAP_ID_MSIX,
> > +                                       pos, MSIX_CAP_LENGTH);
> > +    if (config_offset < 0) {
> > +        return config_offset;
> > +    }
> > +
> > +    config = dev->config + config_offset;
> > +
> > +    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
> > +    pci_set_long(config + PCI_MSIX_TABLE, table_offset | table_bar);
> > +    pci_set_long(config + PCI_MSIX_PBA, pba_offset | pba_bar);
> > +
> > +    dev->msix_cap = config_offset;
> > +
> > +    /* Make flags bit writable. */
> > +    dev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
> > +                                                       MSIX_MASKALL_MASK;
> > +
> > +    dev->msix_function_masked = true;
> > +
> > +    return 0;
> > +}
> > +
> > +/* Clean up resources for the device. */
> > +void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
> > +{
> > +    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {
> > +        return;
> > +    }
> > +
> > +    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
> > +    dev->msix_cap = 0;
> > +    dev->msix_entries_nr = 0;
> > +
> > +    memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio);
> > +    memory_region_destroy(&dev->msix_pba_mmio);
> > +    g_free(dev->msix_pba);
> > +    dev->msix_pba = NULL;
> > +
> > +    memory_region_del_subregion(table_bar, &dev->msix_table_mmio);
> > +    memory_region_destroy(&dev->msix_table_mmio);
> > +    g_free(dev->msix_table);
> > +    dev->msix_table = NULL;
> > +
> > +    g_free(dev->msix_entry_used);
> > +    dev->msix_entry_used = NULL;
> > +    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
> > +}
> > +
> > +/* Initialize the MSI-X structures */
> >  int msix_init(struct PCIDevice *dev, unsigned short nentries,
> > -              MemoryRegion *bar,
> > -              unsigned bar_nr, unsigned bar_size)
> > +              MemoryRegion *table_bar, uint8_t table_bar_nr,
> > +              unsigned table_offset, MemoryRegion *pba_bar,
> > +              uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos)
> >  {
> >      int ret;
> >      unsigned table_size, pba_size;
> > @@ -281,43 +279,41 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries,
> >      if (!msi_supported) {
> >          return -ENOTSUP;
> >      }
> > -    if (nentries > MSIX_MAX_ENTRIES)
> > -        return -EINVAL;
> >  
> >      table_size = nentries * PCI_MSIX_ENTRY_SIZE;
> >      pba_size = QEMU_ALIGN_UP(nentries, 64) / 8;
> >  
> > -    dev->msix_entry_used = g_malloc0(MSIX_MAX_ENTRIES *
> > -                                        sizeof *dev->msix_entry_used);
> > +    /* Sanity test: table & pba don't overlap, fit within BARs, min aligned */
> > +    if ((table_bar_nr == pba_bar_nr &&
> > +         ranges_overlap(table_offset, table_size, pba_offset, pba_size)) ||
> > +        table_offset + table_size > memory_region_size(table_bar) ||
> > +        pba_offset + pba_size > memory_region_size(pba_bar) ||
> > +        (table_offset | pba_offset) & PCI_MSIX_FLAGS_BIRMASK) {
> > +        return -EINVAL;
> > +    }
> >  
> >      dev->msix_table = g_malloc0(table_size);
> >      dev->msix_pba = g_malloc0(pba_size);
> > +    dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used);
> > +    dev->msix_entries_nr = nentries;
> > +    dev->cap_present |= QEMU_PCI_CAP_MSIX;
> > +
> >      msix_mask_all(dev, nentries);
> >  
> >      memory_region_init_io(&dev->msix_table_mmio, &msix_table_mmio_ops, dev,
> >                            "msix", table_size);
> > +    memory_region_add_subregion(table_bar, table_offset, &dev->msix_table_mmio);
> > +
> >      memory_region_init_io(&dev->msix_pba_mmio, &msix_pba_mmio_ops, dev,
> >                            "msix-pba", pba_size);
> > +    memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio);
> >  
> > -    dev->msix_entries_nr = nentries;
> > -    ret = msix_add_config(dev, nentries, bar_nr, bar_size);
> > -    if (ret)
> > -        goto err_config;
> > -
> > -    dev->cap_present |= QEMU_PCI_CAP_MSIX;
> > -    msix_mmio_setup(dev, bar);
> > -    return 0;
> > +    ret = msix_add_config(dev, nentries, table_bar_nr, table_offset,
> > +                          pba_bar_nr, pba_offset, cap_pos);
> > +    if (ret) {
> > +        msix_uninit(dev, table_bar, pba_bar);
> > +    }
> >  
> > -err_config:
> > -    dev->msix_entries_nr = 0;
> > -    memory_region_destroy(&dev->msix_pba_mmio);
> > -    g_free(dev->msix_pba);
> > -    dev->msix_pba = NULL;
> > -    memory_region_destroy(&dev->msix_table_mmio);
> > -    g_free(dev->msix_table);
> > -    dev->msix_table = NULL;
> > -    g_free(dev->msix_entry_used);
> > -    dev->msix_entry_used = NULL;
> >      return ret;
> >  }
> >  
> > @@ -344,7 +340,8 @@ int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
> >  
> >      free(name);
> >  
> > -    ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr, 4096);
> > +    ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr, 0,
> > +                    &dev->msix_exclusive_bar, bar_nr, 2048, 0);
> >      if (ret) {
> >          memory_region_destroy(&dev->msix_exclusive_bar);
> >          return ret;
> > @@ -356,6 +353,14 @@ int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
> >      return 0;
> >  }
> >  
> > +void msix_uninit_exclusive_bar(PCIDevice *dev)
> > +{
> > +    if (msix_present(dev)) {
> > +        msix_uninit(dev, &dev->msix_exclusive_bar, &dev->msix_exclusive_bar);
> > +        memory_region_destroy(&dev->msix_exclusive_bar);
> > +    }
> > +}
> > +
> >  static void msix_free_irq_entries(PCIDevice *dev)
> >  {
> >      int vector;
> > @@ -366,38 +371,6 @@ static void msix_free_irq_entries(PCIDevice *dev)
> >      }
> >  }
> >  
> > -/* Clean up resources for the device. */
> > -int msix_uninit(PCIDevice *dev, MemoryRegion *bar)
> > -{
> > -    if (!msix_present(dev)) {
> > -        return 0;
> > -    }
> > -    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
> > -    dev->msix_cap = 0;
> > -    msix_free_irq_entries(dev);
> > -    dev->msix_entries_nr = 0;
> > -    memory_region_del_subregion(bar, &dev->msix_pba_mmio);
> > -    memory_region_destroy(&dev->msix_pba_mmio);
> > -    g_free(dev->msix_pba);
> > -    dev->msix_pba = NULL;
> > -    memory_region_del_subregion(bar, &dev->msix_table_mmio);
> > -    memory_region_destroy(&dev->msix_table_mmio);
> > -    g_free(dev->msix_table);
> > -    dev->msix_table = NULL;
> > -    g_free(dev->msix_entry_used);
> > -    dev->msix_entry_used = NULL;
> > -    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
> > -    return 0;
> > -}
> > -
> > -void msix_uninit_exclusive_bar(PCIDevice *dev)
> > -{
> > -    if (msix_present(dev)) {
> > -        msix_uninit(dev, &dev->msix_exclusive_bar);
> > -        memory_region_destroy(&dev->msix_exclusive_bar);
> > -    }
> > -}
> > -
> >  void msix_save(PCIDevice *dev, QEMUFile *f)
> >  {
> >      unsigned n = dev->msix_entries_nr;
> > diff --git a/hw/msix.h b/hw/msix.h
> > index bed6bfb..14b1a2e 100644
> > --- a/hw/msix.h
> > +++ b/hw/msix.h
> > @@ -4,16 +4,18 @@
> >  #include "qemu-common.h"
> >  #include "pci.h"
> >  
> > -int msix_init(PCIDevice *pdev, unsigned short nentries,
> > -              MemoryRegion *bar,
> > -              unsigned bar_nr, unsigned bar_size);
> > +int msix_init(PCIDevice *dev, unsigned short nentries,
> > +              MemoryRegion *table_bar, uint8_t table_bar_nr,
> > +              unsigned table_offset, MemoryRegion *pba_bar,
> > +              uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos);
> >  int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
> >                              uint8_t bar_nr);
> >  
> >  void msix_write_config(PCIDevice *pci_dev, uint32_t address,
> >                         uint32_t val, int len);
> >  
> > -int msix_uninit(PCIDevice *d, MemoryRegion *bar);
> > +void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar,
> > +                 MemoryRegion *pba_bar);
> >  void msix_uninit_exclusive_bar(PCIDevice *dev);
> >  
> >  unsigned int msix_nr_vectors_allocated(const PCIDevice *dev);

Patch

diff --git a/hw/msix.c b/hw/msix.c
index d476d07..047646a 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -27,14 +27,6 @@ 
 #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
 #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
 
-/* How much space does an MSIX table need. */
-/* The spec requires giving the table structure
- * a 4K aligned region all by itself. */
-#define MSIX_PAGE_SIZE 0x1000
-/* Reserve second half of the page for pending bits */
-#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
-#define MSIX_MAX_ENTRIES 32
-
 static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
 {
     uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
@@ -45,47 +37,6 @@  static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
     return msg;
 }
 
-/* Add MSI-X capability to the config space for the device. */
-/* Given a bar and its size, add MSI-X table on top of it
- * and fill MSI-X capability in the config space.
- * Original bar size must be a power of 2 or 0.
- * New bar size is returned. */
-static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
-                           unsigned bar_nr, unsigned bar_size)
-{
-    int config_offset;
-    uint8_t *config;
-
-    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
-        return -EINVAL;
-    if (bar_size > 0x80000000)
-        return -ENOSPC;
-
-    /* Require aligned offset for MSI-X structures */
-    if (bar_size & ~(MSIX_PAGE_SIZE - 1)) {
-        return -EINVAL;
-    }
-
-    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX,
-                                       0, MSIX_CAP_LENGTH);
-    if (config_offset < 0)
-        return config_offset;
-    config = pdev->config + config_offset;
-
-    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
-    /* Table on top of BAR */
-    pci_set_long(config + PCI_MSIX_TABLE, bar_size | bar_nr);
-    /* Pending bits on top of that */
-    pci_set_long(config + PCI_MSIX_PBA, (bar_size + MSIX_PAGE_PENDING) |
-                 bar_nr);
-    pdev->msix_cap = config_offset;
-    /* Make flags bit writable. */
-    pdev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
-	    MSIX_MASKALL_MASK;
-    pdev->msix_function_masked = true;
-    return 0;
-}
-
 static uint8_t msix_pending_mask(int vector)
 {
     return 1 << (vector % 8);
@@ -240,20 +191,6 @@  static const MemoryRegionOps msix_pba_mmio_ops = {
     },
 };
 
-static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar)
-{
-    uint8_t *config = d->config + d->msix_cap;
-    uint32_t table = pci_get_long(config + PCI_MSIX_TABLE);
-    uint32_t table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK;
-    uint32_t pba = pci_get_long(config + PCI_MSIX_PBA);
-    uint32_t pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK;
-    /* TODO: for assigned devices, we'll want to make it possible to map
-     * pending bits separately in case they are in a separate bar. */
-
-    memory_region_add_subregion(bar, table_offset, &d->msix_table_mmio);
-    memory_region_add_subregion(bar, pba_offset, &d->msix_pba_mmio);
-}
-
 static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
 {
     int vector;
@@ -268,11 +205,72 @@  static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
     }
 }
 
-/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
- * modified, it should be retrieved with msix_bar_size. */
+/* Add MSI-X capability to the config space for the device. */
+static int msix_add_config(struct PCIDevice *dev, unsigned short nentries,
+                           uint8_t table_bar, unsigned table_offset,
+                           uint8_t pba_bar, unsigned pba_offset, uint8_t pos)
+{
+    int config_offset;
+    uint8_t *config;
+
+    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) {
+        return -EINVAL;
+    }
+
+    config_offset = pci_add_capability(dev, PCI_CAP_ID_MSIX,
+                                       pos, MSIX_CAP_LENGTH);
+    if (config_offset < 0) {
+        return config_offset;
+    }
+
+    config = dev->config + config_offset;
+
+    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
+    pci_set_long(config + PCI_MSIX_TABLE, table_offset | table_bar);
+    pci_set_long(config + PCI_MSIX_PBA, pba_offset | pba_bar);
+
+    dev->msix_cap = config_offset;
+
+    /* Make flags bit writable. */
+    dev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
+                                                       MSIX_MASKALL_MASK;
+
+    dev->msix_function_masked = true;
+
+    return 0;
+}
+
+/* Clean up resources for the device. */
+void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
+{
+    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {
+        return;
+    }
+
+    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
+    dev->msix_cap = 0;
+    dev->msix_entries_nr = 0;
+
+    memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio);
+    memory_region_destroy(&dev->msix_pba_mmio);
+    g_free(dev->msix_pba);
+    dev->msix_pba = NULL;
+
+    memory_region_del_subregion(table_bar, &dev->msix_table_mmio);
+    memory_region_destroy(&dev->msix_table_mmio);
+    g_free(dev->msix_table);
+    dev->msix_table = NULL;
+
+    g_free(dev->msix_entry_used);
+    dev->msix_entry_used = NULL;
+    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
+}
+
+/* Initialize the MSI-X structures */
 int msix_init(struct PCIDevice *dev, unsigned short nentries,
-              MemoryRegion *bar,
-              unsigned bar_nr, unsigned bar_size)
+              MemoryRegion *table_bar, uint8_t table_bar_nr,
+              unsigned table_offset, MemoryRegion *pba_bar,
+              uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos)
 {
     int ret;
     unsigned table_size, pba_size;
@@ -281,43 +279,41 @@  int msix_init(struct PCIDevice *dev, unsigned short nentries,
     if (!msi_supported) {
         return -ENOTSUP;
     }
-    if (nentries > MSIX_MAX_ENTRIES)
-        return -EINVAL;
 
     table_size = nentries * PCI_MSIX_ENTRY_SIZE;
     pba_size = QEMU_ALIGN_UP(nentries, 64) / 8;
 
-    dev->msix_entry_used = g_malloc0(MSIX_MAX_ENTRIES *
-                                        sizeof *dev->msix_entry_used);
+    /* Sanity test: table & pba don't overlap, fit within BARs, min aligned */
+    if ((table_bar_nr == pba_bar_nr &&
+         ranges_overlap(table_offset, table_size, pba_offset, pba_size)) ||
+        table_offset + table_size > memory_region_size(table_bar) ||
+        pba_offset + pba_size > memory_region_size(pba_bar) ||
+        (table_offset | pba_offset) & PCI_MSIX_FLAGS_BIRMASK) {
+        return -EINVAL;
+    }
 
     dev->msix_table = g_malloc0(table_size);
     dev->msix_pba = g_malloc0(pba_size);
+    dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used);
+    dev->msix_entries_nr = nentries;
+    dev->cap_present |= QEMU_PCI_CAP_MSIX;
+
     msix_mask_all(dev, nentries);
 
     memory_region_init_io(&dev->msix_table_mmio, &msix_table_mmio_ops, dev,
                           "msix", table_size);
+    memory_region_add_subregion(table_bar, table_offset, &dev->msix_table_mmio);
+
     memory_region_init_io(&dev->msix_pba_mmio, &msix_pba_mmio_ops, dev,
                           "msix-pba", pba_size);
+    memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio);
 
-    dev->msix_entries_nr = nentries;
-    ret = msix_add_config(dev, nentries, bar_nr, bar_size);
-    if (ret)
-        goto err_config;
-
-    dev->cap_present |= QEMU_PCI_CAP_MSIX;
-    msix_mmio_setup(dev, bar);
-    return 0;
+    ret = msix_add_config(dev, nentries, table_bar_nr, table_offset,
+                          pba_bar_nr, pba_offset, cap_pos);
+    if (ret) {
+        msix_uninit(dev, table_bar, pba_bar);
+    }
 
-err_config:
-    dev->msix_entries_nr = 0;
-    memory_region_destroy(&dev->msix_pba_mmio);
-    g_free(dev->msix_pba);
-    dev->msix_pba = NULL;
-    memory_region_destroy(&dev->msix_table_mmio);
-    g_free(dev->msix_table);
-    dev->msix_table = NULL;
-    g_free(dev->msix_entry_used);
-    dev->msix_entry_used = NULL;
     return ret;
 }
 
@@ -344,7 +340,8 @@  int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
 
     free(name);
 
-    ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr, 4096);
+    ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr, 0,
+                    &dev->msix_exclusive_bar, bar_nr, 2048, 0);
     if (ret) {
         memory_region_destroy(&dev->msix_exclusive_bar);
         return ret;
@@ -356,6 +353,14 @@  int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
     return 0;
 }
 
+void msix_uninit_exclusive_bar(PCIDevice *dev)
+{
+    if (msix_present(dev)) {
+        msix_uninit(dev, &dev->msix_exclusive_bar, &dev->msix_exclusive_bar);
+        memory_region_destroy(&dev->msix_exclusive_bar);
+    }
+}
+
 static void msix_free_irq_entries(PCIDevice *dev)
 {
     int vector;
@@ -366,38 +371,6 @@  static void msix_free_irq_entries(PCIDevice *dev)
     }
 }
 
-/* Clean up resources for the device. */
-int msix_uninit(PCIDevice *dev, MemoryRegion *bar)
-{
-    if (!msix_present(dev)) {
-        return 0;
-    }
-    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
-    dev->msix_cap = 0;
-    msix_free_irq_entries(dev);
-    dev->msix_entries_nr = 0;
-    memory_region_del_subregion(bar, &dev->msix_pba_mmio);
-    memory_region_destroy(&dev->msix_pba_mmio);
-    g_free(dev->msix_pba);
-    dev->msix_pba = NULL;
-    memory_region_del_subregion(bar, &dev->msix_table_mmio);
-    memory_region_destroy(&dev->msix_table_mmio);
-    g_free(dev->msix_table);
-    dev->msix_table = NULL;
-    g_free(dev->msix_entry_used);
-    dev->msix_entry_used = NULL;
-    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
-    return 0;
-}
-
-void msix_uninit_exclusive_bar(PCIDevice *dev)
-{
-    if (msix_present(dev)) {
-        msix_uninit(dev, &dev->msix_exclusive_bar);
-        memory_region_destroy(&dev->msix_exclusive_bar);
-    }
-}
-
 void msix_save(PCIDevice *dev, QEMUFile *f)
 {
     unsigned n = dev->msix_entries_nr;
diff --git a/hw/msix.h b/hw/msix.h
index bed6bfb..14b1a2e 100644
--- a/hw/msix.h
+++ b/hw/msix.h
@@ -4,16 +4,18 @@ 
 #include "qemu-common.h"
 #include "pci.h"
 
-int msix_init(PCIDevice *pdev, unsigned short nentries,
-              MemoryRegion *bar,
-              unsigned bar_nr, unsigned bar_size);
+int msix_init(PCIDevice *dev, unsigned short nentries,
+              MemoryRegion *table_bar, uint8_t table_bar_nr,
+              unsigned table_offset, MemoryRegion *pba_bar,
+              uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos);
 int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
                             uint8_t bar_nr);
 
 void msix_write_config(PCIDevice *pci_dev, uint32_t address,
                        uint32_t val, int len);
 
-int msix_uninit(PCIDevice *d, MemoryRegion *bar);
+void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar,
+                 MemoryRegion *pba_bar);
 void msix_uninit_exclusive_bar(PCIDevice *dev);
 
 unsigned int msix_nr_vectors_allocated(const PCIDevice *dev);