diff mbox

[19/19] spapr-pci: rework MSI/MSIX

Message ID 1373118856-30171-20-git-send-email-aik@ozlabs.ru
State New
Headers show

Commit Message

Alexey Kardashevskiy July 6, 2013, 1:54 p.m. UTC
The specific of sPAPR platform is that the guest allocates MSI/MSIX
vectors via RTAS hypercalls and only operates with global IRQ numbers.
In the real hardware, PHB is expected to convert MSIMessage to an IRQ
number. So it is up to the host kernel to setup correct MSIMessage in
a real device and a PHB where a device sits on.

Therefore MSIMessage handling is completely hidden in QEMU.

Previously every PCI host bridge implemented its own MSI memory window
to catch msi_notify()/msix_notify() calls from QEMU devices (virtio-pci
or vfio) and redirect them to the guest via qemu_pulse_irq().

MSIMessage encoding was:
* .addr - address within the PHB MSI window;
* .data - the device index on PHB plus vector number.

The MSI MR write function translated this MSIMessage to a global VIRQ
number and called qemu_pulse_irq().

However the total number of IRQs is not really big (at the moment it is
1024 IRQs starting from 4096) and even 16bit data field of MSIMessage
seems to be enough to store a VIRQ number there so no decoding will be
needed.

The patch does:

1. remove MSI windows from a PHB;
2. add a single memory region for all MSIs in the guest;
3. encode MSIMessage as:
    * .addr - a fixed address of SPAPR_PCI_MSI_WINDOW==0x40000000000ULL;
    * .data as a IRQ number.
4. change IRQ allocator to align first IRQ number for MSI as it uses
lowest .data bits to put a vector number; this is not required for MSI-X
though as it has a per vector .data field.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 hw/ppc/spapr.c              | 29 ++++++++++++--
 hw/ppc/spapr_pci.c          | 94 +++++++++++++++++++--------------------------
 include/hw/pci-host/spapr.h |  8 ++--
 include/hw/ppc/spapr.h      |  4 +-
 4 files changed, 73 insertions(+), 62 deletions(-)
diff mbox

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 5ecd81b..29d2be5 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -89,6 +89,9 @@  int spapr_allocate_irq(int hint, bool lsi)
 
     if (hint) {
         irq = hint;
+        if (hint >= spapr->next_irq) {
+            spapr->next_irq = hint + 1;
+        }
         /* FIXME: we should probably check for collisions somehow */
     } else {
         irq = spapr->next_irq++;
@@ -104,22 +107,39 @@  int spapr_allocate_irq(int hint, bool lsi)
     return irq;
 }
 
-/* Allocate block of consequtive IRQs, returns a number of the first */
-int spapr_allocate_irq_block(int num, bool lsi)
+/*
+ * Allocate block of consequtive IRQs, returns a number of the first.
+ * If msi==true, aligns the first IRQ number to num.
+ */
+int spapr_allocate_irq_block(int num, bool lsi, bool msi)
 {
     int first = -1;
-    int i;
+    int i, hint = 0;
+
+    /*
+     * MSIMesage::data is used for storing VIRQ so
+     * it has to be aligned to num to support multiple
+     * MSI vectors. MSI-X is not affected by this.
+     * The hint is used for the first IRQ, the rest should
+     * be allocated continously.
+     */
+    if (msi) {
+        assert((num == 1) || (num == 2) || (num == 4) ||
+               (num == 8) || (num == 16) || (num == 32));
+        hint = (spapr->next_irq + num - 1) & ~(num - 1);
+    }
 
     for (i = 0; i < num; ++i) {
         int irq;
 
-        irq = spapr_allocate_irq(0, lsi);
+        irq = spapr_allocate_irq(hint, lsi);
         if (!irq) {
             return -1;
         }
 
         if (0 == i) {
             first = irq;
+            hint = 0;
         }
 
         /* If the above doesn't create a consecutive block then that's
@@ -1256,6 +1276,7 @@  static void ppc_spapr_init(QEMUMachineInitArgs *args)
     spapr_create_nvram(spapr);
 
     /* Set up PCI */
+    spapr_pci_msi_init(spapr, SPAPR_PCI_MSI_WINDOW);
     spapr_pci_rtas_init();
 
     phb = spapr_create_phb(spapr, 0);
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 4d8e3cd..23dbc0e 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -253,30 +253,6 @@  static int spapr_msicfg_find(sPAPRPHBState *phb, uint32_t config_addr,
     return -1;
 }
 
-/*
- * Set MSI/MSIX message data.
- * This is required for msi_notify()/msix_notify() which
- * will write at the addresses via spapr_msi_write().
- */
-static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr,
-                             bool msix, unsigned req_num)
-{
-    unsigned i;
-    MSIMessage msg = { .address = addr, .data = 0 };
-
-    if (!msix) {
-        msi_set_message(pdev, msg);
-        trace_spapr_pci_msi_setup(pdev->name, 0, msg.address);
-        return;
-    }
-
-    for (i = 0; i < req_num; ++i) {
-        msg.address = addr | (i << 2);
-        msix_set_message(pdev, i, msg);
-        trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
-    }
-}
-
 static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
                                 uint32_t token, uint32_t nargs,
                                 target_ulong args, uint32_t nret,
@@ -288,9 +264,10 @@  static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
     unsigned int seq_num = rtas_ld(args, 5);
     unsigned int ret_intr_type;
-    int ndev, irq;
+    int i, ndev, irq;
     sPAPRPHBState *phb = NULL;
     PCIDevice *pdev = NULL;
+    MSIMessage msg;
 
     switch (func) {
     case RTAS_CHANGE_MSI_FN:
@@ -351,7 +328,8 @@  static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
 
     /* There is no cached config, allocate MSIs */
     if (!phb->msi_table[ndev].nvec) {
-        irq = spapr_allocate_irq_block(req_num, false);
+        irq = spapr_allocate_irq_block(req_num, false,
+                                       ret_intr_type == RTAS_TYPE_MSI);
         if (irq < 0) {
             fprintf(stderr, "Cannot allocate MSIs for device#%d", ndev);
             rtas_st(rets, 0, -1); /* Hardware error */
@@ -362,9 +340,23 @@  static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
         phb->msi_table[ndev].config_addr = config_addr;
     }
 
-    /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
-    spapr_msi_setmsg(pdev, phb->msi_win_addr | (ndev << 16),
-                     ret_intr_type == RTAS_TYPE_MSIX, req_num);
+    /*
+     * Set MSI/MSIX message data.
+     * This is required for msi_notify()/msix_notify() which
+     * will write at the addresses via spapr_msi_write().
+     */
+    msg.address = spapr->msi_win_addr;
+    if (ret_intr_type == RTAS_TYPE_MSI) {
+        msg.data = phb->msi_table[ndev].irq;
+        msi_set_message(pdev, msg);
+        trace_spapr_pci_msi_setup(pdev->name, 0, msg.address);
+    } else {
+        for (i = 0; i < phb->msi_table[ndev].nvec; ++i) {
+            msg.data = phb->msi_table[ndev].irq + i;
+            msix_set_message(pdev, i, msg);
+            trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
+        }
+    }
 
     rtas_st(rets, 0, 0);
     rtas_st(rets, 1, req_num);
@@ -487,10 +479,7 @@  static const MemoryRegionOps spapr_io_ops = {
 static void spapr_msi_write(void *opaque, hwaddr addr,
                             uint64_t data, unsigned size)
 {
-    sPAPRPHBState *phb = opaque;
-    int ndev = addr >> 16;
-    int vec = ((addr & 0xFFFF) >> 2) | data;
-    uint32_t irq = phb->msi_table[ndev].irq + vec;
+    uint32_t irq = data;
 
     trace_spapr_pci_msi_write(addr, data, irq);
 
@@ -504,6 +493,23 @@  static const MemoryRegionOps spapr_msi_ops = {
     .endianness = DEVICE_LITTLE_ENDIAN
 };
 
+void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr)
+{
+    /*
+     * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
+     * we need to allocate some memory to catch those writes coming
+     * from msi_notify()/msix_notify().
+     * As MSIMessage:addr is going to be the same and MSIMessage:data
+     * is going to be a VIRQ number, 4 bytes of the MSI MR will only
+     * be used.
+     */
+    spapr->msi_win_addr = addr;
+    memory_region_init_io(&spapr->msiwindow, &spapr_msi_ops, spapr,
+                          "msi", getpagesize());
+    memory_region_add_subregion(get_system_memory(), spapr->msi_win_addr,
+                                &spapr->msiwindow);
+}
+
 /*
  * PHB PCI device
  */
@@ -528,8 +534,7 @@  static int spapr_phb_init(SysBusDevice *s)
 
         if ((sphb->buid != -1) || (sphb->dma_liobn != -1)
             || (sphb->mem_win_addr != -1)
-            || (sphb->io_win_addr != -1)
-            || (sphb->msi_win_addr != -1)) {
+            || (sphb->io_win_addr != -1)) {
             fprintf(stderr, "Either \"index\" or other parameters must"
                     " be specified for PAPR PHB, not both\n");
             return -1;
@@ -542,7 +547,6 @@  static int spapr_phb_init(SysBusDevice *s)
             + sphb->index * SPAPR_PCI_WINDOW_SPACING;
         sphb->mem_win_addr = windows_base + SPAPR_PCI_MMIO_WIN_OFF;
         sphb->io_win_addr = windows_base + SPAPR_PCI_IO_WIN_OFF;
-        sphb->msi_win_addr = windows_base + SPAPR_PCI_MSI_WIN_OFF;
     }
 
     if (sphb->buid == -1) {
@@ -565,11 +569,6 @@  static int spapr_phb_init(SysBusDevice *s)
         return -1;
     }
 
-    if (sphb->msi_win_addr == -1) {
-        fprintf(stderr, "MSI window address not specified for PHB\n");
-        return -1;
-    }
-
     if (find_phb(spapr, sphb->buid)) {
         fprintf(stderr, "PCI host bridges must have unique BUIDs\n");
         return -1;
@@ -608,17 +607,6 @@  static int spapr_phb_init(SysBusDevice *s)
     memory_region_add_subregion(get_system_memory(), sphb->io_win_addr,
                                 &sphb->iowindow);
 
-    /* As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
-     * we need to allocate some memory to catch those writes coming
-     * from msi_notify()/msix_notify() */
-    if (msi_supported) {
-        sprintf(namebuf, "%s.msi", sphb->dtbusname);
-        memory_region_init_io(&sphb->msiwindow, &spapr_msi_ops, sphb,
-                              namebuf, SPAPR_MSIX_MAX_DEVS * 0x10000);
-        memory_region_add_subregion(get_system_memory(), sphb->msi_win_addr,
-                                    &sphb->msiwindow);
-    }
-
     /*
      * Selecting a busname is more complex than you'd think, due to
      * interacting constraints.  If the user has specified an id
@@ -692,7 +680,6 @@  static Property spapr_phb_properties[] = {
     DEFINE_PROP_HEX64("io_win_addr", sPAPRPHBState, io_win_addr, -1),
     DEFINE_PROP_HEX64("io_win_size", sPAPRPHBState, io_win_size,
                       SPAPR_PCI_IO_WIN_SIZE),
-    DEFINE_PROP_HEX64("msi_win_addr", sPAPRPHBState, msi_win_addr, -1),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -734,7 +721,6 @@  static const VMStateDescription vmstate_spapr_pci = {
         VMSTATE_UINT64_EQUAL(mem_win_size, sPAPRPHBState),
         VMSTATE_UINT64_EQUAL(io_win_addr, sPAPRPHBState),
         VMSTATE_UINT64_EQUAL(io_win_size, sPAPRPHBState),
-        VMSTATE_UINT64_EQUAL(msi_win_addr, sPAPRPHBState),
         VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0,
                              vmstate_spapr_pci_lsi, struct spapr_pci_lsi),
         VMSTATE_STRUCT_ARRAY(msi_table, sPAPRPHBState, SPAPR_MSIX_MAX_DEVS, 0,
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 93f9511..970b4a9 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -43,8 +43,7 @@  typedef struct sPAPRPHBState {
 
     MemoryRegion memspace, iospace;
     hwaddr mem_win_addr, mem_win_size, io_win_addr, io_win_size;
-    hwaddr msi_win_addr;
-    MemoryRegion memwindow, iowindow, msiwindow;
+    MemoryRegion memwindow, iowindow;
 
     uint32_t dma_liobn;
     uint64_t dma_window_start;
@@ -73,7 +72,8 @@  typedef struct sPAPRPHBState {
 #define SPAPR_PCI_MMIO_WIN_SIZE      0x20000000
 #define SPAPR_PCI_IO_WIN_OFF         0x80000000
 #define SPAPR_PCI_IO_WIN_SIZE        0x10000
-#define SPAPR_PCI_MSI_WIN_OFF        0x90000000
+
+#define SPAPR_PCI_MSI_WINDOW         0x40000000000ULL
 
 #define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL
 
@@ -88,6 +88,8 @@  int spapr_populate_pci_dt(sPAPRPHBState *phb,
                           uint32_t xics_phandle,
                           void *fdt);
 
+void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr);
+
 void spapr_pci_rtas_init(void);
 
 #endif /* __HW_SPAPR_PCI_H__ */
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 3da31f0..f0129f4 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -14,6 +14,8 @@  struct icp_state;
 typedef struct sPAPREnvironment {
     struct VIOsPAPRBus *vio_bus;
     QLIST_HEAD(, sPAPRPHBState) phbs;
+    hwaddr msi_win_addr;
+    MemoryRegion msiwindow;
     struct sPAPRNVRAM *nvram;
     struct icp_state *icp;
 
@@ -304,7 +306,7 @@  target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
                              target_ulong *args);
 
 int spapr_allocate_irq(int hint, bool lsi);
-int spapr_allocate_irq_block(int num, bool lsi);
+int spapr_allocate_irq_block(int num, bool lsi, bool msi);
 
 static inline int spapr_allocate_msi(int hint)
 {