diff mbox

[21/24] pseries: Add PCI MSI/MSI-X support

Message ID 1345024742-18394-22-git-send-email-agraf@suse.de
State New
Headers show

Commit Message

Alexander Graf Aug. 15, 2012, 9:58 a.m. UTC
From: Alexey Kardashevskiy <aik@ozlabs.ru>

This patch implements MSI and MSI-X support for the pseries PCI host
bridge.  To do this it adds:

 * A "config_space_address to msi_table" map, since the MSI RTAS calls
take a PCI config space address as an identifier.

 * A MSIX memory region to catch msi_notify()/msix_notiry() from
virtio-pci and pass them to the guest via qemu_irq_pulse().

 * RTAS call "ibm,change-msi" which sets up MSI vectors for a
device. Note that this call may configure and return lesser number of
vectors than requested.

 * RTAS call "ibm,query-interrupt-source-number" which translates MSI
vector to interrupt controller (XICS) IRQ number.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/spapr.c     |    7 ++-
 hw/spapr_pci.c |  245 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/spapr_pci.h |   15 +++-
 trace-events   |    5 +
 4 files changed, 268 insertions(+), 4 deletions(-)

Comments

Andreas Färber Aug. 15, 2012, 5:19 p.m. UTC | #1
Am 15.08.2012 11:58, schrieb Alexander Graf:
> From: Alexey Kardashevskiy <aik@ozlabs.ru>
> 
> This patch implements MSI and MSI-X support for the pseries PCI host
> bridge.  To do this it adds:
> 
>  * A "config_space_address to msi_table" map, since the MSI RTAS calls
> take a PCI config space address as an identifier.
> 
>  * A MSIX memory region to catch msi_notify()/msix_notiry() from
> virtio-pci and pass them to the guest via qemu_irq_pulse().
> 
>  * RTAS call "ibm,change-msi" which sets up MSI vectors for a
> device. Note that this call may configure and return lesser number of
> vectors than requested.
> 
>  * RTAS call "ibm,query-interrupt-source-number" which translates MSI
> vector to interrupt controller (XICS) IRQ number.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> Signed-off-by: Alexander Graf <agraf@suse.de>
> ---
>  hw/spapr.c     |    7 ++-
>  hw/spapr_pci.c |  245 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  hw/spapr_pci.h |   15 +++-
>  trace-events   |    5 +
>  4 files changed, 268 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/spapr.c b/hw/spapr.c
> index afbdbc5..5178721 100644
> --- a/hw/spapr.c
> +++ b/hw/spapr.c
> @@ -41,6 +41,7 @@
>  #include "hw/spapr_vio.h"
>  #include "hw/spapr_pci.h"
>  #include "hw/xics.h"
> +#include "hw/msi.h"
>  
>  #include "kvm.h"
>  #include "kvm_ppc.h"
> @@ -79,6 +80,7 @@
>  #define SPAPR_PCI_MEM_WIN_ADDR  (0x10000000000ULL + 0xA0000000)
>  #define SPAPR_PCI_MEM_WIN_SIZE  0x20000000
>  #define SPAPR_PCI_IO_WIN_ADDR   (0x10000000000ULL + 0x80000000)
> +#define SPAPR_PCI_MSI_WIN_ADDR  (0x10000000000ULL + 0x90000000)
>  
>  #define PHANDLE_XICP            0x00001111
>  
> @@ -619,6 +621,8 @@ static void ppc_spapr_init(ram_addr_t ram_size,
>      long pteg_shift = 17;
>      char *filename;
>  
> +    msi_supported = true;
> +
>      spapr = g_malloc0(sizeof(*spapr));
>      QLIST_INIT(&spapr->phbs);
>  
> @@ -735,7 +739,8 @@ static void ppc_spapr_init(ram_addr_t ram_size,
>      spapr_create_phb(spapr, "pci", SPAPR_PCI_BUID,
>                       SPAPR_PCI_MEM_WIN_ADDR,
>                       SPAPR_PCI_MEM_WIN_SIZE,
> -                     SPAPR_PCI_IO_WIN_ADDR);
> +                     SPAPR_PCI_IO_WIN_ADDR,
> +                     SPAPR_PCI_MSI_WIN_ADDR);
>  
>      for (i = 0; i < nb_nics; i++) {
>          NICInfo *nd = &nd_table[i];
> diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
> index 1eb1a7e..1108eea 100644
> --- a/hw/spapr_pci.c
> +++ b/hw/spapr_pci.c
> @@ -24,6 +24,8 @@
>   */
>  #include "hw.h"
>  #include "pci.h"
> +#include "msi.h"
> +#include "msix.h"
>  #include "pci_host.h"
>  #include "hw/spapr.h"
>  #include "hw/spapr_pci.h"
> @@ -33,6 +35,17 @@
>  
>  #include "hw/pci_internals.h"
>  
> +/* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
> +#define RTAS_QUERY_FN           0
> +#define RTAS_CHANGE_FN          1
> +#define RTAS_RESET_FN           2
> +#define RTAS_CHANGE_MSI_FN      3
> +#define RTAS_CHANGE_MSIX_FN     4
> +
> +/* Interrupt types to return on RTAS_CHANGE_* */
> +#define RTAS_TYPE_MSI           1
> +#define RTAS_TYPE_MSIX          2
> +
>  static sPAPRPHBState *find_phb(sPAPREnvironment *spapr, uint64_t buid)
>  {
>      sPAPRPHBState *phb;
> @@ -211,6 +224,191 @@ static void rtas_write_pci_config(sPAPREnvironment *spapr,
>      finish_write_pci_config(spapr, 0, addr, size, val, rets);
>  }
>  
> +/*
> + * Find an entry with config_addr or returns the empty one if not found AND
> + * alloc_new is set.
> + * At the moment the msi_table entries are never released so there is
> + * no point to look till the end of the list if we need to find the free entry.
> + */
> +static int spapr_msicfg_find(sPAPRPHBState *phb, uint32_t config_addr,
> +                             bool alloc_new)
> +{
> +    int i;
> +
> +    for (i = 0; i < SPAPR_MSIX_MAX_DEVS; ++i) {
> +        if (!phb->msi_table[i].nvec) {
> +            break;
> +        }
> +        if (phb->msi_table[i].config_addr == config_addr) {
> +            return i;
> +        }
> +    }
> +    if ((i < SPAPR_MSIX_MAX_DEVS) && alloc_new) {
> +        trace_spapr_pci_msi("Allocating new MSI config", i, config_addr);
> +        return i;
> +    }
> +
> +    return -1;
> +}
> +
> +/*
> + * Set MSI/MSIX message data.
> + * This is required for msi_notify()/msix_notify() which
> + * will write at the addresses via spapr_msi_write().
> + */
> +static void spapr_msi_setmsg(PCIDevice *pdev, target_phys_addr_t addr,
> +                             bool msix, unsigned req_num)
> +{
> +    unsigned i;
> +    MSIMessage msg = { .address = addr, .data = 0 };
> +
> +    if (!msix) {
> +        msi_set_message(pdev, msg);
> +        trace_spapr_pci_msi_setup(pdev->name, 0, msg.address);
> +        return;
> +    }
> +
> +    for (i = 0; i < req_num; ++i) {
> +        msg.address = addr | (i << 2);
> +        msix_set_message(pdev, i, msg);
> +        trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
> +    }
> +}
> +
> +static void rtas_ibm_change_msi(sPAPREnvironment *spapr,
> +                                uint32_t token, uint32_t nargs,
> +                                target_ulong args, uint32_t nret,
> +                                target_ulong rets)
> +{
> +    uint32_t config_addr = rtas_ld(args, 0);
> +    uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
> +    unsigned int func = rtas_ld(args, 3);
> +    unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
> +    unsigned int seq_num = rtas_ld(args, 5);
> +    unsigned int ret_intr_type;
> +    int ndev, irq;
> +    sPAPRPHBState *phb = NULL;
> +    PCIDevice *pdev = NULL;
> +
> +    switch (func) {
> +    case RTAS_CHANGE_MSI_FN:
> +    case RTAS_CHANGE_FN:
> +        ret_intr_type = RTAS_TYPE_MSI;
> +        break;
> +    case RTAS_CHANGE_MSIX_FN:
> +        ret_intr_type = RTAS_TYPE_MSIX;
> +        break;
> +    default:
> +        fprintf(stderr, "rtas_ibm_change_msi(%u) is not implemented\n", func);
> +        rtas_st(rets, 0, -3); /* Parameter error */
> +        return;
> +    }
> +
> +    /* Fins sPAPRPHBState */
> +    phb = find_phb(spapr, buid);
> +    if (phb) {
> +        pdev = find_dev(spapr, buid, config_addr);
> +    }
> +    if (!phb || !pdev) {
> +        rtas_st(rets, 0, -3); /* Parameter error */
> +        return;
> +    }
> +
> +    /* Releasing MSIs */
> +    if (!req_num) {
> +        ndev = spapr_msicfg_find(phb, config_addr, false);
> +        if (ndev < 0) {
> +            trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
> +            rtas_st(rets, 0, -1); /* Hardware error */
> +            return;
> +        }
> +        trace_spapr_pci_msi("Released MSIs", ndev, config_addr);
> +        rtas_st(rets, 0, 0);
> +        rtas_st(rets, 1, 0);
> +        return;
> +    }
> +
> +    /* Enabling MSI */
> +
> +    /* Find a device number in the map to add or reuse the existing one */
> +    ndev = spapr_msicfg_find(phb, config_addr, true);
> +    if (ndev >= SPAPR_MSIX_MAX_DEVS) {
> +        fprintf(stderr, "No free entry for a new MSI device\n");
> +        rtas_st(rets, 0, -1); /* Hardware error */
> +        return;
> +    }
> +    trace_spapr_pci_msi("Configuring MSI", ndev, config_addr);
> +
> +    /* Check if there is an old config and MSI number has not changed */
> +    if (phb->msi_table[ndev].nvec && (req_num != phb->msi_table[ndev].nvec)) {


Breaks the build on ppc-next using openSUSE's gcc 4.6.2:

  CC    ppc64-softmmu/hw/ppc/../spapr_pci.o
/home/andreas/QEMU/qemu-ppc/hw/ppc/../spapr_pci.c: In function
‘rtas_ibm_change_msi’:
/home/andreas/QEMU/qemu-ppc/hw/ppc/../spapr_pci.c:343:23: error: array
subscript is below array bounds [-Werror=array-bounds]
cc1: all warnings being treated as errors

ndev is declared as int above...

Andreas


> +        /* Unexpected behaviour */
> +        fprintf(stderr, "Cannot reuse MSI config for device#%d", ndev);
> +        rtas_st(rets, 0, -1); /* Hardware error */
> +        return;
> +    }
> +
> +    /* There is no cached config, allocate MSIs */
> +    if (!phb->msi_table[ndev].nvec) {
> +        irq = spapr_allocate_irq_block(req_num, XICS_MSI);
> +        if (irq < 0) {
> +            fprintf(stderr, "Cannot allocate MSIs for device#%d", ndev);
> +            rtas_st(rets, 0, -1); /* Hardware error */
> +            return;
> +        }
> +        phb->msi_table[ndev].irq = irq;
> +        phb->msi_table[ndev].nvec = req_num;
> +        phb->msi_table[ndev].config_addr = config_addr;
> +    }
> +
> +    /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
> +    spapr_msi_setmsg(pdev, phb->msi_win_addr | (ndev << 16),
> +                     ret_intr_type == RTAS_TYPE_MSIX, req_num);
> +
> +    rtas_st(rets, 0, 0);
> +    rtas_st(rets, 1, req_num);
> +    rtas_st(rets, 2, ++seq_num);
> +    rtas_st(rets, 3, ret_intr_type);
> +
> +    trace_spapr_pci_rtas_ibm_change_msi(func, req_num);
> +}
> +
> +static void rtas_ibm_query_interrupt_source_number(sPAPREnvironment *spapr,
> +                                                   uint32_t token,
> +                                                   uint32_t nargs,
> +                                                   target_ulong args,
> +                                                   uint32_t nret,
> +                                                   target_ulong rets)
> +{
> +    uint32_t config_addr = rtas_ld(args, 0);
> +    uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
> +    unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3);
> +    int ndev;
> +    sPAPRPHBState *phb = NULL;
> +
> +    /* Fins sPAPRPHBState */
> +    phb = find_phb(spapr, buid);
> +    if (!phb) {
> +        rtas_st(rets, 0, -3); /* Parameter error */
> +        return;
> +    }
> +
> +    /* Find device descriptor and start IRQ */
> +    ndev = spapr_msicfg_find(phb, config_addr, false);
> +    if (ndev < 0) {
> +        trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
> +        rtas_st(rets, 0, -1); /* Hardware error */
> +        return;
> +    }
> +
> +    intr_src_num = phb->msi_table[ndev].irq + ioa_intr_num;
> +    trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num,
> +                                                           intr_src_num);
> +
> +    rtas_st(rets, 0, 0);
> +    rtas_st(rets, 1, intr_src_num);
> +    rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
> +}
> +
>  static int pci_spapr_swizzle(int slot, int pin)
>  {
>      return (slot + pin) % PCI_NUM_PINS;
> @@ -277,6 +475,33 @@ static const MemoryRegionOps spapr_io_ops = {
>  };
>  
>  /*
> + * MSI/MSIX memory region implementation.
> + * The handler handles both MSI and MSIX.
> + * For MSI-X, the vector number is encoded as a part of the address,
> + * data is set to 0.
> + * For MSI, the vector number is encoded in least bits in data.
> + */
> +static void spapr_msi_write(void *opaque, target_phys_addr_t addr,
> +                            uint64_t data, unsigned size)
> +{
> +    sPAPRPHBState *phb = opaque;
> +    int ndev = addr >> 16;
> +    int vec = ((addr & 0xFFFF) >> 2) | data;
> +    uint32_t irq = phb->msi_table[ndev].irq + vec;
> +
> +    trace_spapr_pci_msi_write(addr, data, irq);
> +
> +    qemu_irq_pulse(xics_get_qirq(spapr->icp, irq));
> +}
> +
> +static const MemoryRegionOps spapr_msi_ops = {
> +    /* There is no .read as the read result is undefined by PCI spec */
> +    .read = NULL,
> +    .write = spapr_msi_write,
> +    .endianness = DEVICE_LITTLE_ENDIAN
> +};
> +
> +/*
>   * PHB PCI device
>   */
>  static DMAContext *spapr_pci_dma_context_fn(PCIBus *bus, void *opaque,
> @@ -327,6 +552,17 @@ static int spapr_phb_init(SysBusDevice *s)
>      memory_region_add_subregion(get_system_memory(), phb->io_win_addr,
>                                  &phb->iowindow);
>  
> +    /* As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
> +     * we need to allocate some memory to catch those writes coming
> +     * from msi_notify()/msix_notify() */
> +    if (msi_supported) {
> +        sprintf(namebuf, "%s.msi", phb->dtbusname);
> +        memory_region_init_io(&phb->msiwindow, &spapr_msi_ops, phb,
> +                              namebuf, SPAPR_MSIX_MAX_DEVS * 0x10000);
> +        memory_region_add_subregion(get_system_memory(), phb->msi_win_addr,
> +                                    &phb->msiwindow);
> +    }
> +
>      bus = pci_register_bus(&phb->host_state.busdev.qdev,
>                             phb->busname ? phb->busname : phb->dtbusname,
>                             pci_spapr_set_irq, pci_spapr_map_irq, phb,
> @@ -362,6 +598,7 @@ static Property spapr_phb_properties[] = {
>      DEFINE_PROP_HEX64("mem_win_size", sPAPRPHBState, mem_win_size, 0x20000000),
>      DEFINE_PROP_HEX64("io_win_addr", sPAPRPHBState, io_win_addr, 0),
>      DEFINE_PROP_HEX64("io_win_size", sPAPRPHBState, io_win_size, 0x10000),
> +    DEFINE_PROP_HEX64("msi_win_addr", sPAPRPHBState, msi_win_addr, 0),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> @@ -384,7 +621,7 @@ static TypeInfo spapr_phb_info = {
>  void spapr_create_phb(sPAPREnvironment *spapr,
>                        const char *busname, uint64_t buid,
>                        uint64_t mem_win_addr, uint64_t mem_win_size,
> -                      uint64_t io_win_addr)
> +                      uint64_t io_win_addr, uint64_t msi_win_addr)
>  {
>      DeviceState *dev;
>  
> @@ -397,6 +634,7 @@ void spapr_create_phb(sPAPREnvironment *spapr,
>      qdev_prop_set_uint64(dev, "mem_win_addr", mem_win_addr);
>      qdev_prop_set_uint64(dev, "mem_win_size", mem_win_size);
>      qdev_prop_set_uint64(dev, "io_win_addr", io_win_addr);
> +    qdev_prop_set_uint64(dev, "msi_win_addr", msi_win_addr);
>  
>      qdev_init_nofail(dev);
>  }
> @@ -502,6 +740,11 @@ void spapr_pci_rtas_init(void)
>      spapr_rtas_register("write-pci-config", rtas_write_pci_config);
>      spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config);
>      spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config);
> +    if (msi_supported) {
> +        spapr_rtas_register("ibm,query-interrupt-source-number",
> +                            rtas_ibm_query_interrupt_source_number);
> +        spapr_rtas_register("ibm,change-msi", rtas_ibm_change_msi);
> +    }
>  }
>  
>  static void register_types(void)
diff mbox

Patch

diff --git a/hw/spapr.c b/hw/spapr.c
index afbdbc5..5178721 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -41,6 +41,7 @@ 
 #include "hw/spapr_vio.h"
 #include "hw/spapr_pci.h"
 #include "hw/xics.h"
+#include "hw/msi.h"
 
 #include "kvm.h"
 #include "kvm_ppc.h"
@@ -79,6 +80,7 @@ 
 #define SPAPR_PCI_MEM_WIN_ADDR  (0x10000000000ULL + 0xA0000000)
 #define SPAPR_PCI_MEM_WIN_SIZE  0x20000000
 #define SPAPR_PCI_IO_WIN_ADDR   (0x10000000000ULL + 0x80000000)
+#define SPAPR_PCI_MSI_WIN_ADDR  (0x10000000000ULL + 0x90000000)
 
 #define PHANDLE_XICP            0x00001111
 
@@ -619,6 +621,8 @@  static void ppc_spapr_init(ram_addr_t ram_size,
     long pteg_shift = 17;
     char *filename;
 
+    msi_supported = true;
+
     spapr = g_malloc0(sizeof(*spapr));
     QLIST_INIT(&spapr->phbs);
 
@@ -735,7 +739,8 @@  static void ppc_spapr_init(ram_addr_t ram_size,
     spapr_create_phb(spapr, "pci", SPAPR_PCI_BUID,
                      SPAPR_PCI_MEM_WIN_ADDR,
                      SPAPR_PCI_MEM_WIN_SIZE,
-                     SPAPR_PCI_IO_WIN_ADDR);
+                     SPAPR_PCI_IO_WIN_ADDR,
+                     SPAPR_PCI_MSI_WIN_ADDR);
 
     for (i = 0; i < nb_nics; i++) {
         NICInfo *nd = &nd_table[i];
diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
index 1eb1a7e..1108eea 100644
--- a/hw/spapr_pci.c
+++ b/hw/spapr_pci.c
@@ -24,6 +24,8 @@ 
  */
 #include "hw.h"
 #include "pci.h"
+#include "msi.h"
+#include "msix.h"
 #include "pci_host.h"
 #include "hw/spapr.h"
 #include "hw/spapr_pci.h"
@@ -33,6 +35,17 @@ 
 
 #include "hw/pci_internals.h"
 
+/* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
+#define RTAS_QUERY_FN           0
+#define RTAS_CHANGE_FN          1
+#define RTAS_RESET_FN           2
+#define RTAS_CHANGE_MSI_FN      3
+#define RTAS_CHANGE_MSIX_FN     4
+
+/* Interrupt types to return on RTAS_CHANGE_* */
+#define RTAS_TYPE_MSI           1
+#define RTAS_TYPE_MSIX          2
+
 static sPAPRPHBState *find_phb(sPAPREnvironment *spapr, uint64_t buid)
 {
     sPAPRPHBState *phb;
@@ -211,6 +224,191 @@  static void rtas_write_pci_config(sPAPREnvironment *spapr,
     finish_write_pci_config(spapr, 0, addr, size, val, rets);
 }
 
+/*
+ * Find an entry with config_addr or returns the empty one if not found AND
+ * alloc_new is set.
+ * At the moment the msi_table entries are never released so there is
+ * no point to look till the end of the list if we need to find the free entry.
+ */
+static int spapr_msicfg_find(sPAPRPHBState *phb, uint32_t config_addr,
+                             bool alloc_new)
+{
+    int i;
+
+    for (i = 0; i < SPAPR_MSIX_MAX_DEVS; ++i) {
+        if (!phb->msi_table[i].nvec) {
+            break;
+        }
+        if (phb->msi_table[i].config_addr == config_addr) {
+            return i;
+        }
+    }
+    if ((i < SPAPR_MSIX_MAX_DEVS) && alloc_new) {
+        trace_spapr_pci_msi("Allocating new MSI config", i, config_addr);
+        return i;
+    }
+
+    return -1;
+}
+
+/*
+ * Set MSI/MSIX message data.
+ * This is required for msi_notify()/msix_notify() which
+ * will write at the addresses via spapr_msi_write().
+ */
+static void spapr_msi_setmsg(PCIDevice *pdev, target_phys_addr_t addr,
+                             bool msix, unsigned req_num)
+{
+    unsigned i;
+    MSIMessage msg = { .address = addr, .data = 0 };
+
+    if (!msix) {
+        msi_set_message(pdev, msg);
+        trace_spapr_pci_msi_setup(pdev->name, 0, msg.address);
+        return;
+    }
+
+    for (i = 0; i < req_num; ++i) {
+        msg.address = addr | (i << 2);
+        msix_set_message(pdev, i, msg);
+        trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
+    }
+}
+
+static void rtas_ibm_change_msi(sPAPREnvironment *spapr,
+                                uint32_t token, uint32_t nargs,
+                                target_ulong args, uint32_t nret,
+                                target_ulong rets)
+{
+    uint32_t config_addr = rtas_ld(args, 0);
+    uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
+    unsigned int func = rtas_ld(args, 3);
+    unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
+    unsigned int seq_num = rtas_ld(args, 5);
+    unsigned int ret_intr_type;
+    int ndev, irq;
+    sPAPRPHBState *phb = NULL;
+    PCIDevice *pdev = NULL;
+
+    switch (func) {
+    case RTAS_CHANGE_MSI_FN:
+    case RTAS_CHANGE_FN:
+        ret_intr_type = RTAS_TYPE_MSI;
+        break;
+    case RTAS_CHANGE_MSIX_FN:
+        ret_intr_type = RTAS_TYPE_MSIX;
+        break;
+    default:
+        fprintf(stderr, "rtas_ibm_change_msi(%u) is not implemented\n", func);
+        rtas_st(rets, 0, -3); /* Parameter error */
+        return;
+    }
+
+    /* Fins sPAPRPHBState */
+    phb = find_phb(spapr, buid);
+    if (phb) {
+        pdev = find_dev(spapr, buid, config_addr);
+    }
+    if (!phb || !pdev) {
+        rtas_st(rets, 0, -3); /* Parameter error */
+        return;
+    }
+
+    /* Releasing MSIs */
+    if (!req_num) {
+        ndev = spapr_msicfg_find(phb, config_addr, false);
+        if (ndev < 0) {
+            trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
+            rtas_st(rets, 0, -1); /* Hardware error */
+            return;
+        }
+        trace_spapr_pci_msi("Released MSIs", ndev, config_addr);
+        rtas_st(rets, 0, 0);
+        rtas_st(rets, 1, 0);
+        return;
+    }
+
+    /* Enabling MSI */
+
+    /* Find a device number in the map to add or reuse the existing one */
+    ndev = spapr_msicfg_find(phb, config_addr, true);
+    if (ndev >= SPAPR_MSIX_MAX_DEVS) {
+        fprintf(stderr, "No free entry for a new MSI device\n");
+        rtas_st(rets, 0, -1); /* Hardware error */
+        return;
+    }
+    trace_spapr_pci_msi("Configuring MSI", ndev, config_addr);
+
+    /* Check if there is an old config and MSI number has not changed */
+    if (phb->msi_table[ndev].nvec && (req_num != phb->msi_table[ndev].nvec)) {
+        /* Unexpected behaviour */
+        fprintf(stderr, "Cannot reuse MSI config for device#%d", ndev);
+        rtas_st(rets, 0, -1); /* Hardware error */
+        return;
+    }
+
+    /* There is no cached config, allocate MSIs */
+    if (!phb->msi_table[ndev].nvec) {
+        irq = spapr_allocate_irq_block(req_num, XICS_MSI);
+        if (irq < 0) {
+            fprintf(stderr, "Cannot allocate MSIs for device#%d", ndev);
+            rtas_st(rets, 0, -1); /* Hardware error */
+            return;
+        }
+        phb->msi_table[ndev].irq = irq;
+        phb->msi_table[ndev].nvec = req_num;
+        phb->msi_table[ndev].config_addr = config_addr;
+    }
+
+    /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
+    spapr_msi_setmsg(pdev, phb->msi_win_addr | (ndev << 16),
+                     ret_intr_type == RTAS_TYPE_MSIX, req_num);
+
+    rtas_st(rets, 0, 0);
+    rtas_st(rets, 1, req_num);
+    rtas_st(rets, 2, ++seq_num);
+    rtas_st(rets, 3, ret_intr_type);
+
+    trace_spapr_pci_rtas_ibm_change_msi(func, req_num);
+}
+
+static void rtas_ibm_query_interrupt_source_number(sPAPREnvironment *spapr,
+                                                   uint32_t token,
+                                                   uint32_t nargs,
+                                                   target_ulong args,
+                                                   uint32_t nret,
+                                                   target_ulong rets)
+{
+    uint32_t config_addr = rtas_ld(args, 0);
+    uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
+    unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3);
+    int ndev;
+    sPAPRPHBState *phb = NULL;
+
+    /* Fins sPAPRPHBState */
+    phb = find_phb(spapr, buid);
+    if (!phb) {
+        rtas_st(rets, 0, -3); /* Parameter error */
+        return;
+    }
+
+    /* Find device descriptor and start IRQ */
+    ndev = spapr_msicfg_find(phb, config_addr, false);
+    if (ndev < 0) {
+        trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
+        rtas_st(rets, 0, -1); /* Hardware error */
+        return;
+    }
+
+    intr_src_num = phb->msi_table[ndev].irq + ioa_intr_num;
+    trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num,
+                                                           intr_src_num);
+
+    rtas_st(rets, 0, 0);
+    rtas_st(rets, 1, intr_src_num);
+    rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
+}
+
 static int pci_spapr_swizzle(int slot, int pin)
 {
     return (slot + pin) % PCI_NUM_PINS;
@@ -277,6 +475,33 @@  static const MemoryRegionOps spapr_io_ops = {
 };
 
 /*
+ * MSI/MSIX memory region implementation.
+ * The handler handles both MSI and MSIX.
+ * For MSI-X, the vector number is encoded as a part of the address,
+ * data is set to 0.
+ * For MSI, the vector number is encoded in least bits in data.
+ */
+static void spapr_msi_write(void *opaque, target_phys_addr_t addr,
+                            uint64_t data, unsigned size)
+{
+    sPAPRPHBState *phb = opaque;
+    int ndev = addr >> 16;
+    int vec = ((addr & 0xFFFF) >> 2) | data;
+    uint32_t irq = phb->msi_table[ndev].irq + vec;
+
+    trace_spapr_pci_msi_write(addr, data, irq);
+
+    qemu_irq_pulse(xics_get_qirq(spapr->icp, irq));
+}
+
+static const MemoryRegionOps spapr_msi_ops = {
+    /* There is no .read as the read result is undefined by PCI spec */
+    .read = NULL,
+    .write = spapr_msi_write,
+    .endianness = DEVICE_LITTLE_ENDIAN
+};
+
+/*
  * PHB PCI device
  */
 static DMAContext *spapr_pci_dma_context_fn(PCIBus *bus, void *opaque,
@@ -327,6 +552,17 @@  static int spapr_phb_init(SysBusDevice *s)
     memory_region_add_subregion(get_system_memory(), phb->io_win_addr,
                                 &phb->iowindow);
 
+    /* As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
+     * we need to allocate some memory to catch those writes coming
+     * from msi_notify()/msix_notify() */
+    if (msi_supported) {
+        sprintf(namebuf, "%s.msi", phb->dtbusname);
+        memory_region_init_io(&phb->msiwindow, &spapr_msi_ops, phb,
+                              namebuf, SPAPR_MSIX_MAX_DEVS * 0x10000);
+        memory_region_add_subregion(get_system_memory(), phb->msi_win_addr,
+                                    &phb->msiwindow);
+    }
+
     bus = pci_register_bus(&phb->host_state.busdev.qdev,
                            phb->busname ? phb->busname : phb->dtbusname,
                            pci_spapr_set_irq, pci_spapr_map_irq, phb,
@@ -362,6 +598,7 @@  static Property spapr_phb_properties[] = {
     DEFINE_PROP_HEX64("mem_win_size", sPAPRPHBState, mem_win_size, 0x20000000),
     DEFINE_PROP_HEX64("io_win_addr", sPAPRPHBState, io_win_addr, 0),
     DEFINE_PROP_HEX64("io_win_size", sPAPRPHBState, io_win_size, 0x10000),
+    DEFINE_PROP_HEX64("msi_win_addr", sPAPRPHBState, msi_win_addr, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -384,7 +621,7 @@  static TypeInfo spapr_phb_info = {
 void spapr_create_phb(sPAPREnvironment *spapr,
                       const char *busname, uint64_t buid,
                       uint64_t mem_win_addr, uint64_t mem_win_size,
-                      uint64_t io_win_addr)
+                      uint64_t io_win_addr, uint64_t msi_win_addr)
 {
     DeviceState *dev;
 
@@ -397,6 +634,7 @@  void spapr_create_phb(sPAPREnvironment *spapr,
     qdev_prop_set_uint64(dev, "mem_win_addr", mem_win_addr);
     qdev_prop_set_uint64(dev, "mem_win_size", mem_win_size);
     qdev_prop_set_uint64(dev, "io_win_addr", io_win_addr);
+    qdev_prop_set_uint64(dev, "msi_win_addr", msi_win_addr);
 
     qdev_init_nofail(dev);
 }
@@ -502,6 +740,11 @@  void spapr_pci_rtas_init(void)
     spapr_rtas_register("write-pci-config", rtas_write_pci_config);
     spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config);
     spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config);
+    if (msi_supported) {
+        spapr_rtas_register("ibm,query-interrupt-source-number",
+                            rtas_ibm_query_interrupt_source_number);
+        spapr_rtas_register("ibm,change-msi", rtas_ibm_change_msi);
+    }
 }
 
 static void register_types(void)
diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h
index 2aee67f..6892e4f 100644
--- a/hw/spapr_pci.h
+++ b/hw/spapr_pci.h
@@ -27,6 +27,8 @@ 
 #include "hw/pci_host.h"
 #include "hw/xics.h"
 
+#define SPAPR_MSIX_MAX_DEVS 32
+
 typedef struct sPAPRPHBState {
     PCIHostState host_state;
 
@@ -36,13 +38,22 @@  typedef struct sPAPRPHBState {
 
     MemoryRegion memspace, iospace;
     target_phys_addr_t mem_win_addr, mem_win_size, io_win_addr, io_win_size;
-    MemoryRegion memwindow, iowindow;
+    target_phys_addr_t msi_win_addr;
+    MemoryRegion memwindow, iowindow, msiwindow;
+
+
     DMAContext *dma;
 
     struct {
         uint32_t irq;
     } lsi_table[PCI_NUM_PINS];
 
+    struct {
+        uint32_t config_addr;
+        uint32_t irq;
+        int nvec;
+    } msi_table[SPAPR_MSIX_MAX_DEVS];
+
     QLIST_ENTRY(sPAPRPHBState) list;
 } sPAPRPHBState;
 
@@ -57,7 +68,7 @@  static inline qemu_irq spapr_phb_lsi_qirq(struct sPAPRPHBState *phb, int pin)
 void spapr_create_phb(sPAPREnvironment *spapr,
                       const char *busname, uint64_t buid,
                       uint64_t mem_win_addr, uint64_t mem_win_size,
-                      uint64_t io_win_addr);
+                      uint64_t io_win_addr, uint64_t msi_win_addr);
 
 int spapr_populate_pci_dt(sPAPRPHBState *phb,
                           uint32_t xics_phandle,
diff --git a/trace-events b/trace-events
index 191b39e..04b0723 100644
--- a/trace-events
+++ b/trace-events
@@ -972,4 +972,9 @@  qxl_render_guest_primary_resized(int32_t width, int32_t height, int32_t stride,
 qxl_render_update_area_done(void *cookie) "%p"
 
 # hw/spapr_pci.c
+spapr_pci_msi(const char *msg, uint32_t n, uint32_t ca) "%s (device#%d, cfg=%x)"
+spapr_pci_msi_setup(const char *name, unsigned vector, uint64_t addr) "dev\"%s\" vector %u, addr=%"PRIx64
+spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req) "func %u, requested %u"
+spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned ioa, unsigned intr) "queries for #%u, IRQ%u"
+spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq) "@%"PRIx64"<=%"PRIx64" IRQ %u"
 spapr_pci_lsi_set(const char *busname, int pin, uint32_t irq) "%s PIN%d IRQ %u"