Patchwork RFCv3 kvm irqfd: support msimessage to irq translation in PHB

login
register
mail settings
Submitter Alexey Kardashevskiy
Date June 29, 2013, 1:45 p.m.
Message ID <1372513521-10050-1-git-send-email-aik@ozlabs.ru>
Download mbox | patch
Permalink /patch/255737/
State New
Headers show

Comments

Alexey Kardashevskiy - June 29, 2013, 1:45 p.m.
On PPC64 systems MSI Messages are translated to system IRQ in a PCI
host bridge. This is already supported for emulated MSI/MSIX but
not for irqfd where the current QEMU allocates IRQ numbers from
irqchip and maps MSIMessages to those IRQ in the host kernel.

The patch extends irqfd support in order to avoid unnecessary
mapping and reuse the one which already exists in a PCI host bridge.

Specifically, a map_msi callback is added to PCIBus and pci_bus_map_msi()
to PCI API. The latter returns -1 if a specific PHB does not provide
with any trsnslation so the existing code will work.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>

---

Looks like we agreed that in general PHB is the right place for this,
not KVM, so I am trying again.

Probably something should be done to kvm_irqchip_update_msi_route()
as well but I do not really understand what exactly. Any suggestions?


---
 hw/misc/vfio.c           |    7 +++++--
 hw/pci/pci.c             |   13 +++++++++++++
 hw/ppc/spapr_pci.c       |    6 ++++++
 hw/virtio/virtio-pci.c   |    2 +-
 include/hw/pci/pci.h     |    4 ++++
 include/hw/pci/pci_bus.h |    1 +
 include/sysemu/kvm.h     |    2 +-
 kvm-all.c                |    7 ++++++-
 8 files changed, 37 insertions(+), 5 deletions(-)
Andreas Färber - June 29, 2013, 2:06 p.m.
Am 29.06.2013 15:45, schrieb Alexey Kardashevskiy:
> On PPC64 systems MSI Messages are translated to system IRQ in a PCI
> host bridge. This is already supported for emulated MSI/MSIX but
> not for irqfd where the current QEMU allocates IRQ numbers from
> irqchip and maps MSIMessages to those IRQ in the host kernel.
> 
> The patch extends irqfd support in order to avoid unnecessary
> mapping and reuse the one which already exists in a PCI host bridge.
> 
> Specifically, a map_msi callback is added to PCIBus and pci_bus_map_msi()
> to PCI API. The latter returns -1 if a specific PHB does not provide
> with any trsnslation so the existing code will work.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> 
> ---
> 
> Looks like we agreed that in general PHB is the right place for this,
> not KVM, so I am trying again.
> 
> Probably something should be done to kvm_irqchip_update_msi_route()
> as well but I do not really understand what exactly. Any suggestions?
> 
> 
> ---
>  hw/misc/vfio.c           |    7 +++++--
>  hw/pci/pci.c             |   13 +++++++++++++
>  hw/ppc/spapr_pci.c       |    6 ++++++
>  hw/virtio/virtio-pci.c   |    2 +-
>  include/hw/pci/pci.h     |    4 ++++
>  include/hw/pci/pci_bus.h |    1 +
>  include/sysemu/kvm.h     |    2 +-
>  kvm-all.c                |    7 ++++++-
>  8 files changed, 37 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
> index 52fb036..59911bb 100644
> --- a/hw/misc/vfio.c
> +++ b/hw/misc/vfio.c
> @@ -624,7 +624,9 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>       * Attempt to enable route through KVM irqchip,
>       * default to userspace handling if unavailable.
>       */
> -    vector->virq = msg ? kvm_irqchip_add_msi_route(kvm_state, *msg) : -1;
> +
> +    vector->virq = msg ?
> +            kvm_irqchip_add_msi_route(kvm_state, vdev->pdev.bus, *msg) : -1;

Please don't access device-internal parent fields like ->pdev. To obtain
a PCIDevice you can use PCI_DEVICE(vdev). But in this case isn't that
just a complicated way to access the function's argument pdev?

>      if (vector->virq < 0 ||
>          kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
>                                         vector->virq) < 0) {
> @@ -792,7 +794,8 @@ retry:
>           * Attempt to enable route through KVM irqchip,
>           * default to userspace handling if unavailable.
>           */
> -        vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg);
> +        vector->virq = kvm_irqchip_add_msi_route(kvm_state, vdev->pdev.bus,
> +                                                 msg);

Ditto.

>          if (vector->virq < 0 ||
>              kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
>                                             vector->virq) < 0) {
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 61b681a..543f172 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -1240,6 +1240,19 @@ void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>      dev->intx_routing_notifier = notifier;
>  }
>  
> +void pci_bus_set_map_msi_fn(PCIBus *bus, pci_map_msi_fn map_msi_fn)
> +{
> +    bus->map_msi = map_msi_fn;
> +}
> +
> +int pci_bus_map_msi(PCIBus *bus, MSIMessage msg)
> +{
> +    if (bus->map_msi) {
> +        return bus->map_msi(bus, msg);
> +    }
> +    return -1;
> +}
> +
>  /*
>   * PCI-to-PCI bridge specification
>   * 9.1: Interrupt routing. Table 9-1
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 23dbc0e..bae4faf 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -486,6 +486,11 @@ static void spapr_msi_write(void *opaque, hwaddr addr,
>      qemu_irq_pulse(xics_get_qirq(spapr->icp, irq));
>  }
>  
> +static int spapr_msi_get_irq(PCIBus *bus, MSIMessage msg)
> +{
> +    return msg.data;
> +}
> +
>  static const MemoryRegionOps spapr_msi_ops = {
>      /* There is no .read as the read result is undefined by PCI spec */
>      .read = NULL,
> @@ -657,6 +662,7 @@ static int spapr_phb_init(SysBusDevice *s)
>  
>          sphb->lsi_table[i].irq = irq;
>      }
> +    pci_bus_set_map_msi_fn(bus, spapr_msi_get_irq);
>  
>      return 0;
>  }
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index b070b64..06a4e13 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -481,7 +481,7 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
>      int ret;
>  
>      if (irqfd->users == 0) {
> -        ret = kvm_irqchip_add_msi_route(kvm_state, msg);
> +        ret = kvm_irqchip_add_msi_route(kvm_state, proxy->pci_dev.bus, msg);

PCIDevice *pci_dev = PCI_DEVICE(proxy); and pci_dev->bus please.

Andreas

>          if (ret < 0) {
>              return ret;
>          }
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index 6ef1f97..8c1edd6 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -332,6 +332,7 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev);
>  typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level);
>  typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num);
>  typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin);
> +typedef int (*pci_map_msi_fn)(PCIBus *bus, MSIMessage msg);
>  
>  typedef enum {
>      PCI_HOTPLUG_DISABLED,
> @@ -375,6 +376,9 @@ bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new);
>  void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
>  void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>                                            PCIINTxRoutingNotifier notifier);
> +void pci_bus_set_map_msi_fn(PCIBus *bus, pci_map_msi_fn map_msi_fn);
> +int pci_bus_map_msi(PCIBus *bus, MSIMessage msg);
> +
>  void pci_device_reset(PCIDevice *dev);
>  void pci_bus_reset(PCIBus *bus);
>  
> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> index 66762f6..81efd2b 100644
> --- a/include/hw/pci/pci_bus.h
> +++ b/include/hw/pci/pci_bus.h
> @@ -16,6 +16,7 @@ struct PCIBus {
>      pci_set_irq_fn set_irq;
>      pci_map_irq_fn map_irq;
>      pci_route_irq_fn route_intx_to_irq;
> +    pci_map_msi_fn map_msi;
>      pci_hotplug_fn hotplug;
>      DeviceState *hotplug_qdev;
>      void *irq_opaque;
> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> index f404d16..1bf2abe 100644
> --- a/include/sysemu/kvm.h
> +++ b/include/sysemu/kvm.h
> @@ -305,8 +305,8 @@ static inline void cpu_synchronize_post_init(CPUState *cpu)
>      }
>  }
>  
> -int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg);
>  int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg);
> +int kvm_irqchip_add_msi_route(KVMState *s, PCIBus *pbus, MSIMessage msg);
>  void kvm_irqchip_release_virq(KVMState *s, int virq);
>  
>  int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, int virq);
> diff --git a/kvm-all.c b/kvm-all.c
> index 1f81cca..3b7710d 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -1180,11 +1180,16 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
>      return kvm_set_irq(s, route->kroute.gsi, 1);
>  }
>  
> -int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
> +int kvm_irqchip_add_msi_route(KVMState *s, PCIBus *pbus, MSIMessage msg)
>  {
>      struct kvm_irq_routing_entry kroute;
>      int virq;
>  
> +    virq = pci_bus_map_msi(pbus, msg);
> +    if (virq >= 0) {
> +        return virq;
> +    }
> +
>      if (!kvm_gsi_routing_enabled()) {
>          return -ENOSYS;
>      }
>
Anthony Liguori - June 29, 2013, 2:28 p.m.
On Sat, Jun 29, 2013 at 8:45 AM, Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
> On PPC64 systems MSI Messages are translated to system IRQ in a PCI
> host bridge. This is already supported for emulated MSI/MSIX but
> not for irqfd where the current QEMU allocates IRQ numbers from
> irqchip and maps MSIMessages to those IRQ in the host kernel.
>
> The patch extends irqfd support in order to avoid unnecessary
> mapping and reuse the one which already exists in a PCI host bridge.
>
> Specifically, a map_msi callback is added to PCIBus and pci_bus_map_msi()
> to PCI API. The latter returns -1 if a specific PHB does not provide
> with any trsnslation so the existing code will work.

I think there's a bit of confusion here.  The kernel needs a "virq"
number to create an eventfd.  virq is just a KVM concept, it doesn't
correspond to anything useful in hardware.

On pseries, there is a 1-1 mapping between XICS IRQs and VIRQs and MSI
can be trivially mapped to a virq.

On x86, we need to call a special kernel function which essentially
creates an apic message->virq mapping such that we can deliver the
irqfd.

So what this should look like is:

1) A PCI bus function to do the MSI -> virq mapping
2) On x86 (and e500), this is implemented by calling kvm_irqchip_add_msi_route()
3) On pseries, this just returns msi->data

Perhaps (2) can just be the default PCI bus implementation to simplify things.

> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>
> ---
>
> Looks like we agreed that in general PHB is the right place for this,
> not KVM, so I am trying again.
>
> Probably something should be done to kvm_irqchip_update_msi_route()
> as well but I do not really understand what exactly. Any suggestions?
>
>
> ---
>  hw/misc/vfio.c           |    7 +++++--
>  hw/pci/pci.c             |   13 +++++++++++++
>  hw/ppc/spapr_pci.c       |    6 ++++++
>  hw/virtio/virtio-pci.c   |    2 +-
>  include/hw/pci/pci.h     |    4 ++++
>  include/hw/pci/pci_bus.h |    1 +
>  include/sysemu/kvm.h     |    2 +-
>  kvm-all.c                |    7 ++++++-
>  8 files changed, 37 insertions(+), 5 deletions(-)
>
> diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
> index 52fb036..59911bb 100644
> --- a/hw/misc/vfio.c
> +++ b/hw/misc/vfio.c
> @@ -624,7 +624,9 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>       * Attempt to enable route through KVM irqchip,
>       * default to userspace handling if unavailable.
>       */
> -    vector->virq = msg ? kvm_irqchip_add_msi_route(kvm_state, *msg) : -1;
> +
> +    vector->virq = msg ?
> +            kvm_irqchip_add_msi_route(kvm_state, vdev->pdev.bus, *msg) : -1;

This is wrong.  You could call the bus function to map an MSI message
to a virq here.

>      if (vector->virq < 0 ||
>          kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
>                                         vector->virq) < 0) {
> @@ -792,7 +794,8 @@ retry:
>           * Attempt to enable route through KVM irqchip,
>           * default to userspace handling if unavailable.
>           */
> -        vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg);
> +        vector->virq = kvm_irqchip_add_msi_route(kvm_state, vdev->pdev.bus,
> +                                                 msg);

And here.

>          if (vector->virq < 0 ||
>              kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
>                                             vector->virq) < 0) {
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 61b681a..543f172 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -1240,6 +1240,19 @@ void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>      dev->intx_routing_notifier = notifier;
>  }
>
> +void pci_bus_set_map_msi_fn(PCIBus *bus, pci_map_msi_fn map_msi_fn)
> +{
> +    bus->map_msi = map_msi_fn;
> +}

You don't need this function.  You can do this overloading as part of
the PCI bus initialization in spapr_pci.c

Regards,

Anthony Liguori

> +int pci_bus_map_msi(PCIBus *bus, MSIMessage msg)
> +{
> +    if (bus->map_msi) {
> +        return bus->map_msi(bus, msg);
> +    }
> +    return -1;
> +}
> +
>  /*
>   * PCI-to-PCI bridge specification
>   * 9.1: Interrupt routing. Table 9-1
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 23dbc0e..bae4faf 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -486,6 +486,11 @@ static void spapr_msi_write(void *opaque, hwaddr addr,
>      qemu_irq_pulse(xics_get_qirq(spapr->icp, irq));
>  }
>
> +static int spapr_msi_get_irq(PCIBus *bus, MSIMessage msg)
> +{
> +    return msg.data;
> +}
> +
>  static const MemoryRegionOps spapr_msi_ops = {
>      /* There is no .read as the read result is undefined by PCI spec */
>      .read = NULL,
> @@ -657,6 +662,7 @@ static int spapr_phb_init(SysBusDevice *s)
>
>          sphb->lsi_table[i].irq = irq;
>      }
> +    pci_bus_set_map_msi_fn(bus, spapr_msi_get_irq);
>
>      return 0;
>  }
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index b070b64..06a4e13 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -481,7 +481,7 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
>      int ret;
>
>      if (irqfd->users == 0) {
> -        ret = kvm_irqchip_add_msi_route(kvm_state, msg);
> +        ret = kvm_irqchip_add_msi_route(kvm_state, proxy->pci_dev.bus, msg);
>          if (ret < 0) {
>              return ret;
>          }
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index 6ef1f97..8c1edd6 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -332,6 +332,7 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev);
>  typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level);
>  typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num);
>  typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin);
> +typedef int (*pci_map_msi_fn)(PCIBus *bus, MSIMessage msg);
>
>  typedef enum {
>      PCI_HOTPLUG_DISABLED,
> @@ -375,6 +376,9 @@ bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new);
>  void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
>  void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>                                            PCIINTxRoutingNotifier notifier);
> +void pci_bus_set_map_msi_fn(PCIBus *bus, pci_map_msi_fn map_msi_fn);
> +int pci_bus_map_msi(PCIBus *bus, MSIMessage msg);
> +
>  void pci_device_reset(PCIDevice *dev);
>  void pci_bus_reset(PCIBus *bus);
>
> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> index 66762f6..81efd2b 100644
> --- a/include/hw/pci/pci_bus.h
> +++ b/include/hw/pci/pci_bus.h
> @@ -16,6 +16,7 @@ struct PCIBus {
>      pci_set_irq_fn set_irq;
>      pci_map_irq_fn map_irq;
>      pci_route_irq_fn route_intx_to_irq;
> +    pci_map_msi_fn map_msi;
>      pci_hotplug_fn hotplug;
>      DeviceState *hotplug_qdev;
>      void *irq_opaque;
> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> index f404d16..1bf2abe 100644
> --- a/include/sysemu/kvm.h
> +++ b/include/sysemu/kvm.h
> @@ -305,8 +305,8 @@ static inline void cpu_synchronize_post_init(CPUState *cpu)
>      }
>  }
>
> -int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg);
>  int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg);
> +int kvm_irqchip_add_msi_route(KVMState *s, PCIBus *pbus, MSIMessage msg);
>  void kvm_irqchip_release_virq(KVMState *s, int virq);
>
>  int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, int virq);
> diff --git a/kvm-all.c b/kvm-all.c
> index 1f81cca..3b7710d 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -1180,11 +1180,16 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
>      return kvm_set_irq(s, route->kroute.gsi, 1);
>  }
>
> -int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
> +int kvm_irqchip_add_msi_route(KVMState *s, PCIBus *pbus, MSIMessage msg)
>  {
>      struct kvm_irq_routing_entry kroute;
>      int virq;
>
> +    virq = pci_bus_map_msi(pbus, msg);
> +    if (virq >= 0) {
> +        return virq;
> +    }
> +
>      if (!kvm_gsi_routing_enabled()) {
>          return -ENOSYS;
>      }
> --
> 1.7.10.4
>
>
Alexey Kardashevskiy - June 30, 2013, 12:59 a.m.
On 06/30/2013 12:28 AM, Anthony Liguori wrote:
> On Sat, Jun 29, 2013 at 8:45 AM, Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>> On PPC64 systems MSI Messages are translated to system IRQ in a PCI
>> host bridge. This is already supported for emulated MSI/MSIX but
>> not for irqfd where the current QEMU allocates IRQ numbers from
>> irqchip and maps MSIMessages to those IRQ in the host kernel.
>>
>> The patch extends irqfd support in order to avoid unnecessary
>> mapping and reuse the one which already exists in a PCI host bridge.
>>
>> Specifically, a map_msi callback is added to PCIBus and pci_bus_map_msi()
>> to PCI API. The latter returns -1 if a specific PHB does not provide
>> with any trsnslation so the existing code will work.
> 
> I think there's a bit of confusion here.  The kernel needs a "virq"
> number to create an eventfd.  virq is just a KVM concept, it doesn't
> correspond to anything useful in hardware.


Yes, it does not. But... There is a global IRQ number space and PHBs
convert MSIMessage to global IRQ, this is what our real hardware does on a
ppc64-pseries host (if I do not confuse things again). And I am trying to
follow the same principle here too.


> On pseries, there is a 1-1 mapping between XICS IRQs and VIRQs and MSI
> can be trivially mapped to a virq.


> On x86, we need to call a special kernel function which essentially
> creates an apic message->virq mapping such that we can deliver the
> irqfd.
> 
> So what this should look like is:
> 
> 1) A PCI bus function to do the MSI -> virq mapping
> 2) On x86 (and e500), this is implemented by calling kvm_irqchip_add_msi_route()
> 3) On pseries, this just returns msi->data
> 
> Perhaps (2) can just be the default PCI bus implementation to simplify things.


hw/pci/pci.c does not have any kvm code yet and I would like not to be the
first person who tries adding this there :)
But ok, I'll do it.


>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>
>> ---
>>
>> Looks like we agreed that in general PHB is the right place for this,
>> not KVM, so I am trying again.
>>
>> Probably something should be done to kvm_irqchip_update_msi_route()
>> as well but I do not really understand what exactly. Any suggestions?
>>
>>
>> ---
>>  hw/misc/vfio.c           |    7 +++++--
>>  hw/pci/pci.c             |   13 +++++++++++++
>>  hw/ppc/spapr_pci.c       |    6 ++++++
>>  hw/virtio/virtio-pci.c   |    2 +-
>>  include/hw/pci/pci.h     |    4 ++++
>>  include/hw/pci/pci_bus.h |    1 +
>>  include/sysemu/kvm.h     |    2 +-
>>  kvm-all.c                |    7 ++++++-
>>  8 files changed, 37 insertions(+), 5 deletions(-)
>>
>> diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
>> index 52fb036..59911bb 100644
>> --- a/hw/misc/vfio.c
>> +++ b/hw/misc/vfio.c
>> @@ -624,7 +624,9 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>>       * Attempt to enable route through KVM irqchip,
>>       * default to userspace handling if unavailable.
>>       */
>> -    vector->virq = msg ? kvm_irqchip_add_msi_route(kvm_state, *msg) : -1;
>> +
>> +    vector->virq = msg ?
>> +            kvm_irqchip_add_msi_route(kvm_state, vdev->pdev.bus, *msg) : -1;
> 
> This is wrong.  You could call the bus function to map an MSI message
> to a virq here.
> 
>>      if (vector->virq < 0 ||
>>          kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
>>                                         vector->virq) < 0) {
>> @@ -792,7 +794,8 @@ retry:
>>           * Attempt to enable route through KVM irqchip,
>>           * default to userspace handling if unavailable.
>>           */
>> -        vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg);
>> +        vector->virq = kvm_irqchip_add_msi_route(kvm_state, vdev->pdev.bus,
>> +                                                 msg);
> 
> And here.
> 
>>          if (vector->virq < 0 ||
>>              kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
>>                                             vector->virq) < 0) {
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index 61b681a..543f172 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -1240,6 +1240,19 @@ void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>>      dev->intx_routing_notifier = notifier;
>>  }
>>
>> +void pci_bus_set_map_msi_fn(PCIBus *bus, pci_map_msi_fn map_msi_fn)
>> +{
>> +    bus->map_msi = map_msi_fn;
>> +}
> 
> You don't need this function.  You can do this overloading as part of
> the PCI bus initialization in spapr_pci.c


pci_bus_set_route_irq_fn is there already and I tried to follow the
existing pattern (yeah, missed assert though). Or this is different?


> 
> Regards,
> 
> Anthony Liguori
> 
>> +int pci_bus_map_msi(PCIBus *bus, MSIMessage msg)
>> +{
>> +    if (bus->map_msi) {
>> +        return bus->map_msi(bus, msg);
>> +    }
>> +    return -1;
>> +}
>> +
>>  /*
>>   * PCI-to-PCI bridge specification
>>   * 9.1: Interrupt routing. Table 9-1
>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>> index 23dbc0e..bae4faf 100644
>> --- a/hw/ppc/spapr_pci.c
>> +++ b/hw/ppc/spapr_pci.c
>> @@ -486,6 +486,11 @@ static void spapr_msi_write(void *opaque, hwaddr addr,
>>      qemu_irq_pulse(xics_get_qirq(spapr->icp, irq));
>>  }
>>
>> +static int spapr_msi_get_irq(PCIBus *bus, MSIMessage msg)
>> +{
>> +    return msg.data;
>> +}
>> +
>>  static const MemoryRegionOps spapr_msi_ops = {
>>      /* There is no .read as the read result is undefined by PCI spec */
>>      .read = NULL,
>> @@ -657,6 +662,7 @@ static int spapr_phb_init(SysBusDevice *s)
>>
>>          sphb->lsi_table[i].irq = irq;
>>      }
>> +    pci_bus_set_map_msi_fn(bus, spapr_msi_get_irq);
>>
>>      return 0;
>>  }
>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
>> index b070b64..06a4e13 100644
>> --- a/hw/virtio/virtio-pci.c
>> +++ b/hw/virtio/virtio-pci.c
>> @@ -481,7 +481,7 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
>>      int ret;
>>
>>      if (irqfd->users == 0) {
>> -        ret = kvm_irqchip_add_msi_route(kvm_state, msg);
>> +        ret = kvm_irqchip_add_msi_route(kvm_state, proxy->pci_dev.bus, msg);
>>          if (ret < 0) {
>>              return ret;
>>          }
>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>> index 6ef1f97..8c1edd6 100644
>> --- a/include/hw/pci/pci.h
>> +++ b/include/hw/pci/pci.h
>> @@ -332,6 +332,7 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev);
>>  typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level);
>>  typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num);
>>  typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin);
>> +typedef int (*pci_map_msi_fn)(PCIBus *bus, MSIMessage msg);
>>
>>  typedef enum {
>>      PCI_HOTPLUG_DISABLED,
>> @@ -375,6 +376,9 @@ bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new);
>>  void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
>>  void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>>                                            PCIINTxRoutingNotifier notifier);
>> +void pci_bus_set_map_msi_fn(PCIBus *bus, pci_map_msi_fn map_msi_fn);
>> +int pci_bus_map_msi(PCIBus *bus, MSIMessage msg);
>> +
>>  void pci_device_reset(PCIDevice *dev);
>>  void pci_bus_reset(PCIBus *bus);
>>
>> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
>> index 66762f6..81efd2b 100644
>> --- a/include/hw/pci/pci_bus.h
>> +++ b/include/hw/pci/pci_bus.h
>> @@ -16,6 +16,7 @@ struct PCIBus {
>>      pci_set_irq_fn set_irq;
>>      pci_map_irq_fn map_irq;
>>      pci_route_irq_fn route_intx_to_irq;
>> +    pci_map_msi_fn map_msi;
>>      pci_hotplug_fn hotplug;
>>      DeviceState *hotplug_qdev;
>>      void *irq_opaque;
>> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
>> index f404d16..1bf2abe 100644
>> --- a/include/sysemu/kvm.h
>> +++ b/include/sysemu/kvm.h
>> @@ -305,8 +305,8 @@ static inline void cpu_synchronize_post_init(CPUState *cpu)
>>      }
>>  }
>>
>> -int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg);
>>  int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg);
>> +int kvm_irqchip_add_msi_route(KVMState *s, PCIBus *pbus, MSIMessage msg);
>>  void kvm_irqchip_release_virq(KVMState *s, int virq);
>>
>>  int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, int virq);
>> diff --git a/kvm-all.c b/kvm-all.c
>> index 1f81cca..3b7710d 100644
>> --- a/kvm-all.c
>> +++ b/kvm-all.c
>> @@ -1180,11 +1180,16 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
>>      return kvm_set_irq(s, route->kroute.gsi, 1);
>>  }
>>
>> -int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
>> +int kvm_irqchip_add_msi_route(KVMState *s, PCIBus *pbus, MSIMessage msg)
>>  {
>>      struct kvm_irq_routing_entry kroute;
>>      int virq;
>>
>> +    virq = pci_bus_map_msi(pbus, msg);
>> +    if (virq >= 0) {
>> +        return virq;
>> +    }
>> +
>>      if (!kvm_gsi_routing_enabled()) {
>>          return -ENOSYS;
>>      }
>> --
>> 1.7.10.4
>>
>>
Benjamin Herrenschmidt - June 30, 2013, 6:32 a.m.
On Sun, 2013-06-30 at 10:59 +1000, Alexey Kardashevskiy wrote:
> > 1) A PCI bus function to do the MSI -> virq mapping
> > 2) On x86 (and e500), this is implemented by calling kvm_irqchip_add_msi_route()
> > 3) On pseries, this just returns msi->data
> > 
> > Perhaps (2) can just be the default PCI bus implementation to simplify things.
> 
> 
> hw/pci/pci.c does not have any kvm code yet and I would like not to be the
> first person who tries adding this there :)
> But ok, I'll do it.

Unless I'm confused (which is very possible) I seem to remember that there was
duplication of that MSI / KVM mapping between virtio-pci and vfio as well,
so it makes sense to move it to the PCI code.

Cheers,
Ben.
Alexey Kardashevskiy - June 30, 2013, 7:09 a.m.
On 06/30/2013 04:32 PM, Benjamin Herrenschmidt wrote:
> On Sun, 2013-06-30 at 10:59 +1000, Alexey Kardashevskiy wrote:
>>> 1) A PCI bus function to do the MSI -> virq mapping
>>> 2) On x86 (and e500), this is implemented by calling kvm_irqchip_add_msi_route()
>>> 3) On pseries, this just returns msi->data
>>>
>>> Perhaps (2) can just be the default PCI bus implementation to simplify things.
>>
>>
>> hw/pci/pci.c does not have any kvm code yet and I would like not to be the
>> first person who tries adding this there :)
>> But ok, I'll do it.
> 
> Unless I'm confused (which is very possible) I seem to remember that there was
> duplication of that MSI / KVM mapping between virtio-pci and vfio as well,
> so it makes sense to move it to the PCI code.

No, you are right and this is what Anthony is telling me to do.
Alexey Kardashevskiy - June 30, 2013, 7:37 a.m.
On 06/30/2013 12:28 AM, Anthony Liguori wrote:
> On Sat, Jun 29, 2013 at 8:45 AM, Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>> On PPC64 systems MSI Messages are translated to system IRQ in a PCI
>> host bridge. This is already supported for emulated MSI/MSIX but
>> not for irqfd where the current QEMU allocates IRQ numbers from
>> irqchip and maps MSIMessages to those IRQ in the host kernel.
>>
>> The patch extends irqfd support in order to avoid unnecessary
>> mapping and reuse the one which already exists in a PCI host bridge.
>>
>> Specifically, a map_msi callback is added to PCIBus and pci_bus_map_msi()
>> to PCI API. The latter returns -1 if a specific PHB does not provide
>> with any trsnslation so the existing code will work.
> 
> I think there's a bit of confusion here.  The kernel needs a "virq"
> number to create an eventfd.  virq is just a KVM concept, it doesn't
> correspond to anything useful in hardware.
> 
> On pseries, there is a 1-1 mapping between XICS IRQs and VIRQs and MSI
> can be trivially mapped to a virq.
> 
> On x86, we need to call a special kernel function which essentially
> creates an apic message->virq mapping such that we can deliver the
> irqfd.
> 
> So what this should look like is:
> 
> 1) A PCI bus function to do the MSI -> virq mapping
> 2) On x86 (and e500), this is implemented by calling kvm_irqchip_add_msi_route()
> 3) On pseries, this just returns msi->data
> 
> Perhaps (2) can just be the default PCI bus implementation to simplify things.
> 
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>
>> ---
>>
>> Looks like we agreed that in general PHB is the right place for this,
>> not KVM, so I am trying again.
>>
>> Probably something should be done to kvm_irqchip_update_msi_route()
>> as well but I do not really understand what exactly. Any suggestions?


Ah. Everybody ignored, I'll try asking again :)

kvm_irqchip_update_msi_route() - where should it go? What is it for?
virtio-pci and pci device assignment use it but vfio does not - is it a bug
of vfio? Thanks.
Anthony Liguori - June 30, 2013, 2:20 p.m.
On Sun, Jun 30, 2013 at 2:37 AM, Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
> On 06/30/2013 12:28 AM, Anthony Liguori wrote:
>
> Ah. Everybody ignored, I'll try asking again :)
>
> kvm_irqchip_update_msi_route() - where should it go? What is it for?
> virtio-pci and pci device assignment use it but vfio does not - is it a bug
> of vfio? Thanks.

The data part of an MSI message contains information about how to
deliver the interrupt.  But when we add a virq, it has essentially a
fixed data value.

So if the guest sends a data message that contains anything but the
fixed data value, we need to swizzle the routing table to use the new
data value.

Michael can probably describe why this is needed in practice and Alex
why this isn't used in VFIO.

Regards,

Anthony Liguori

>
>
>
> --
> Alexey
Anthony Liguori - June 30, 2013, 2:23 p.m.
On Sat, Jun 29, 2013 at 7:59 PM, Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
> On 06/30/2013 12:28 AM, Anthony Liguori wrote:
>> Perhaps (2) can just be the default PCI bus implementation to simplify things.
>
>
> hw/pci/pci.c does not have any kvm code yet and I would like not to be the
> first person who tries adding this there :)
> But ok, I'll do it.

More often than not, the PHB is part of the northbridge and has direct
interactions with the CPUs.   Since KVM is really modeling the CPUs,
it makes sense that the PHB would need to have some knowledge of KVM.

It's a bit silly for a PCI device like virtio-pci to have direct
knowledge of KVM...

>> You don't need this function.  You can do this overloading as part of
>> the PCI bus initialization in spapr_pci.c
>
>
> pci_bus_set_route_irq_fn is there already and I tried to follow the
> existing pattern (yeah, missed assert though). Or this is different?

Yeah, the existing pattern sucks.  PCI is long overdue for a
significant refactoring.

Regards,

Anthony Liguori

>
>>
>> Regards,
>>
>> Anthony Liguori
>>
>>> +int pci_bus_map_msi(PCIBus *bus, MSIMessage msg)
>>> +{
>>> +    if (bus->map_msi) {
>>> +        return bus->map_msi(bus, msg);
>>> +    }
>>> +    return -1;
>>> +}
>>> +
>>>  /*
>>>   * PCI-to-PCI bridge specification
>>>   * 9.1: Interrupt routing. Table 9-1
>>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>>> index 23dbc0e..bae4faf 100644
>>> --- a/hw/ppc/spapr_pci.c
>>> +++ b/hw/ppc/spapr_pci.c
>>> @@ -486,6 +486,11 @@ static void spapr_msi_write(void *opaque, hwaddr addr,
>>>      qemu_irq_pulse(xics_get_qirq(spapr->icp, irq));
>>>  }
>>>
>>> +static int spapr_msi_get_irq(PCIBus *bus, MSIMessage msg)
>>> +{
>>> +    return msg.data;
>>> +}
>>> +
>>>  static const MemoryRegionOps spapr_msi_ops = {
>>>      /* There is no .read as the read result is undefined by PCI spec */
>>>      .read = NULL,
>>> @@ -657,6 +662,7 @@ static int spapr_phb_init(SysBusDevice *s)
>>>
>>>          sphb->lsi_table[i].irq = irq;
>>>      }
>>> +    pci_bus_set_map_msi_fn(bus, spapr_msi_get_irq);
>>>
>>>      return 0;
>>>  }
>>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
>>> index b070b64..06a4e13 100644
>>> --- a/hw/virtio/virtio-pci.c
>>> +++ b/hw/virtio/virtio-pci.c
>>> @@ -481,7 +481,7 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
>>>      int ret;
>>>
>>>      if (irqfd->users == 0) {
>>> -        ret = kvm_irqchip_add_msi_route(kvm_state, msg);
>>> +        ret = kvm_irqchip_add_msi_route(kvm_state, proxy->pci_dev.bus, msg);
>>>          if (ret < 0) {
>>>              return ret;
>>>          }
>>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>>> index 6ef1f97..8c1edd6 100644
>>> --- a/include/hw/pci/pci.h
>>> +++ b/include/hw/pci/pci.h
>>> @@ -332,6 +332,7 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev);
>>>  typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level);
>>>  typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num);
>>>  typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin);
>>> +typedef int (*pci_map_msi_fn)(PCIBus *bus, MSIMessage msg);
>>>
>>>  typedef enum {
>>>      PCI_HOTPLUG_DISABLED,
>>> @@ -375,6 +376,9 @@ bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new);
>>>  void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
>>>  void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>>>                                            PCIINTxRoutingNotifier notifier);
>>> +void pci_bus_set_map_msi_fn(PCIBus *bus, pci_map_msi_fn map_msi_fn);
>>> +int pci_bus_map_msi(PCIBus *bus, MSIMessage msg);
>>> +
>>>  void pci_device_reset(PCIDevice *dev);
>>>  void pci_bus_reset(PCIBus *bus);
>>>
>>> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
>>> index 66762f6..81efd2b 100644
>>> --- a/include/hw/pci/pci_bus.h
>>> +++ b/include/hw/pci/pci_bus.h
>>> @@ -16,6 +16,7 @@ struct PCIBus {
>>>      pci_set_irq_fn set_irq;
>>>      pci_map_irq_fn map_irq;
>>>      pci_route_irq_fn route_intx_to_irq;
>>> +    pci_map_msi_fn map_msi;
>>>      pci_hotplug_fn hotplug;
>>>      DeviceState *hotplug_qdev;
>>>      void *irq_opaque;
>>> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
>>> index f404d16..1bf2abe 100644
>>> --- a/include/sysemu/kvm.h
>>> +++ b/include/sysemu/kvm.h
>>> @@ -305,8 +305,8 @@ static inline void cpu_synchronize_post_init(CPUState *cpu)
>>>      }
>>>  }
>>>
>>> -int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg);
>>>  int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg);
>>> +int kvm_irqchip_add_msi_route(KVMState *s, PCIBus *pbus, MSIMessage msg);
>>>  void kvm_irqchip_release_virq(KVMState *s, int virq);
>>>
>>>  int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, int virq);
>>> diff --git a/kvm-all.c b/kvm-all.c
>>> index 1f81cca..3b7710d 100644
>>> --- a/kvm-all.c
>>> +++ b/kvm-all.c
>>> @@ -1180,11 +1180,16 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
>>>      return kvm_set_irq(s, route->kroute.gsi, 1);
>>>  }
>>>
>>> -int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
>>> +int kvm_irqchip_add_msi_route(KVMState *s, PCIBus *pbus, MSIMessage msg)
>>>  {
>>>      struct kvm_irq_routing_entry kroute;
>>>      int virq;
>>>
>>> +    virq = pci_bus_map_msi(pbus, msg);
>>> +    if (virq >= 0) {
>>> +        return virq;
>>> +    }
>>> +
>>>      if (!kvm_gsi_routing_enabled()) {
>>>          return -ENOSYS;
>>>      }
>>> --
>>> 1.7.10.4
>>>
>>>
>
>
> --
> Alexey
Alexey Kardashevskiy - Aug. 19, 2013, 8:10 a.m.
Bring it up to support discussion in
[PATCH 0/2 v4] kvm irqfd: support msimessage to irq translation in PHB


On 06/30/2013 12:28 AM, Anthony Liguori wrote:
> On Sat, Jun 29, 2013 at 8:45 AM, Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>> On PPC64 systems MSI Messages are translated to system IRQ in a PCI
>> host bridge. This is already supported for emulated MSI/MSIX but
>> not for irqfd where the current QEMU allocates IRQ numbers from
>> irqchip and maps MSIMessages to those IRQ in the host kernel.
>>
>> The patch extends irqfd support in order to avoid unnecessary
>> mapping and reuse the one which already exists in a PCI host bridge.
>>
>> Specifically, a map_msi callback is added to PCIBus and pci_bus_map_msi()
>> to PCI API. The latter returns -1 if a specific PHB does not provide
>> with any trsnslation so the existing code will work.
> 
> I think there's a bit of confusion here.  The kernel needs a "virq"
> number to create an eventfd.  virq is just a KVM concept, it doesn't
> correspond to anything useful in hardware.
> 
> On pseries, there is a 1-1 mapping between XICS IRQs and VIRQs and MSI
> can be trivially mapped to a virq.
> 
> On x86, we need to call a special kernel function which essentially
> creates an apic message->virq mapping such that we can deliver the
> irqfd.
> 
> So what this should look like is:
> 
> 1) A PCI bus function to do the MSI -> virq mapping
> 2) On x86 (and e500), this is implemented by calling kvm_irqchip_add_msi_route()
> 3) On pseries, this just returns msi->data
> 
> Perhaps (2) can just be the default PCI bus implementation to simplify things.
> 
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>
>> ---
>>
>> Looks like we agreed that in general PHB is the right place for this,
>> not KVM, so I am trying again.
>>
>> Probably something should be done to kvm_irqchip_update_msi_route()
>> as well but I do not really understand what exactly. Any suggestions?
>>
>>
>> ---
>>  hw/misc/vfio.c           |    7 +++++--
>>  hw/pci/pci.c             |   13 +++++++++++++
>>  hw/ppc/spapr_pci.c       |    6 ++++++
>>  hw/virtio/virtio-pci.c   |    2 +-
>>  include/hw/pci/pci.h     |    4 ++++
>>  include/hw/pci/pci_bus.h |    1 +
>>  include/sysemu/kvm.h     |    2 +-
>>  kvm-all.c                |    7 ++++++-
>>  8 files changed, 37 insertions(+), 5 deletions(-)
>>
>> diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
>> index 52fb036..59911bb 100644
>> --- a/hw/misc/vfio.c
>> +++ b/hw/misc/vfio.c
>> @@ -624,7 +624,9 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>>       * Attempt to enable route through KVM irqchip,
>>       * default to userspace handling if unavailable.
>>       */
>> -    vector->virq = msg ? kvm_irqchip_add_msi_route(kvm_state, *msg) : -1;
>> +
>> +    vector->virq = msg ?
>> +            kvm_irqchip_add_msi_route(kvm_state, vdev->pdev.bus, *msg) : -1;
> 
> This is wrong.  You could call the bus function to map an MSI message
> to a virq here.
> 
>>      if (vector->virq < 0 ||
>>          kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
>>                                         vector->virq) < 0) {
>> @@ -792,7 +794,8 @@ retry:
>>           * Attempt to enable route through KVM irqchip,
>>           * default to userspace handling if unavailable.
>>           */
>> -        vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg);
>> +        vector->virq = kvm_irqchip_add_msi_route(kvm_state, vdev->pdev.bus,
>> +                                                 msg);
> 
> And here.
> 
>>          if (vector->virq < 0 ||
>>              kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
>>                                             vector->virq) < 0) {
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index 61b681a..543f172 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -1240,6 +1240,19 @@ void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>>      dev->intx_routing_notifier = notifier;
>>  }
>>
>> +void pci_bus_set_map_msi_fn(PCIBus *bus, pci_map_msi_fn map_msi_fn)
>> +{
>> +    bus->map_msi = map_msi_fn;
>> +}
> 
> You don't need this function.  You can do this overloading as part of
> the PCI bus initialization in spapr_pci.c
> 
> Regards,
> 
> Anthony Liguori
> 
>> +int pci_bus_map_msi(PCIBus *bus, MSIMessage msg)
>> +{
>> +    if (bus->map_msi) {
>> +        return bus->map_msi(bus, msg);
>> +    }
>> +    return -1;
>> +}
>> +
>>  /*
>>   * PCI-to-PCI bridge specification
>>   * 9.1: Interrupt routing. Table 9-1
>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>> index 23dbc0e..bae4faf 100644
>> --- a/hw/ppc/spapr_pci.c
>> +++ b/hw/ppc/spapr_pci.c
>> @@ -486,6 +486,11 @@ static void spapr_msi_write(void *opaque, hwaddr addr,
>>      qemu_irq_pulse(xics_get_qirq(spapr->icp, irq));
>>  }
>>
>> +static int spapr_msi_get_irq(PCIBus *bus, MSIMessage msg)
>> +{
>> +    return msg.data;
>> +}
>> +
>>  static const MemoryRegionOps spapr_msi_ops = {
>>      /* There is no .read as the read result is undefined by PCI spec */
>>      .read = NULL,
>> @@ -657,6 +662,7 @@ static int spapr_phb_init(SysBusDevice *s)
>>
>>          sphb->lsi_table[i].irq = irq;
>>      }
>> +    pci_bus_set_map_msi_fn(bus, spapr_msi_get_irq);
>>
>>      return 0;
>>  }
>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
>> index b070b64..06a4e13 100644
>> --- a/hw/virtio/virtio-pci.c
>> +++ b/hw/virtio/virtio-pci.c
>> @@ -481,7 +481,7 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
>>      int ret;
>>
>>      if (irqfd->users == 0) {
>> -        ret = kvm_irqchip_add_msi_route(kvm_state, msg);
>> +        ret = kvm_irqchip_add_msi_route(kvm_state, proxy->pci_dev.bus, msg);
>>          if (ret < 0) {
>>              return ret;
>>          }
>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>> index 6ef1f97..8c1edd6 100644
>> --- a/include/hw/pci/pci.h
>> +++ b/include/hw/pci/pci.h
>> @@ -332,6 +332,7 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev);
>>  typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level);
>>  typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num);
>>  typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin);
>> +typedef int (*pci_map_msi_fn)(PCIBus *bus, MSIMessage msg);
>>
>>  typedef enum {
>>      PCI_HOTPLUG_DISABLED,
>> @@ -375,6 +376,9 @@ bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new);
>>  void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
>>  void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>>                                            PCIINTxRoutingNotifier notifier);
>> +void pci_bus_set_map_msi_fn(PCIBus *bus, pci_map_msi_fn map_msi_fn);
>> +int pci_bus_map_msi(PCIBus *bus, MSIMessage msg);
>> +
>>  void pci_device_reset(PCIDevice *dev);
>>  void pci_bus_reset(PCIBus *bus);
>>
>> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
>> index 66762f6..81efd2b 100644
>> --- a/include/hw/pci/pci_bus.h
>> +++ b/include/hw/pci/pci_bus.h
>> @@ -16,6 +16,7 @@ struct PCIBus {
>>      pci_set_irq_fn set_irq;
>>      pci_map_irq_fn map_irq;
>>      pci_route_irq_fn route_intx_to_irq;
>> +    pci_map_msi_fn map_msi;
>>      pci_hotplug_fn hotplug;
>>      DeviceState *hotplug_qdev;
>>      void *irq_opaque;
>> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
>> index f404d16..1bf2abe 100644
>> --- a/include/sysemu/kvm.h
>> +++ b/include/sysemu/kvm.h
>> @@ -305,8 +305,8 @@ static inline void cpu_synchronize_post_init(CPUState *cpu)
>>      }
>>  }
>>
>> -int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg);
>>  int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg);
>> +int kvm_irqchip_add_msi_route(KVMState *s, PCIBus *pbus, MSIMessage msg);
>>  void kvm_irqchip_release_virq(KVMState *s, int virq);
>>
>>  int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, int virq);
>> diff --git a/kvm-all.c b/kvm-all.c
>> index 1f81cca..3b7710d 100644
>> --- a/kvm-all.c
>> +++ b/kvm-all.c
>> @@ -1180,11 +1180,16 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
>>      return kvm_set_irq(s, route->kroute.gsi, 1);
>>  }
>>
>> -int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
>> +int kvm_irqchip_add_msi_route(KVMState *s, PCIBus *pbus, MSIMessage msg)
>>  {
>>      struct kvm_irq_routing_entry kroute;
>>      int virq;
>>
>> +    virq = pci_bus_map_msi(pbus, msg);
>> +    if (virq >= 0) {
>> +        return virq;
>> +    }
>> +
>>      if (!kvm_gsi_routing_enabled()) {
>>          return -ENOSYS;
>>      }
>> --
>> 1.7.10.4
>>
>>

Patch

diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 52fb036..59911bb 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -624,7 +624,9 @@  static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
      * Attempt to enable route through KVM irqchip,
      * default to userspace handling if unavailable.
      */
-    vector->virq = msg ? kvm_irqchip_add_msi_route(kvm_state, *msg) : -1;
+
+    vector->virq = msg ?
+            kvm_irqchip_add_msi_route(kvm_state, vdev->pdev.bus, *msg) : -1;
     if (vector->virq < 0 ||
         kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
                                        vector->virq) < 0) {
@@ -792,7 +794,8 @@  retry:
          * Attempt to enable route through KVM irqchip,
          * default to userspace handling if unavailable.
          */
-        vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg);
+        vector->virq = kvm_irqchip_add_msi_route(kvm_state, vdev->pdev.bus,
+                                                 msg);
         if (vector->virq < 0 ||
             kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
                                            vector->virq) < 0) {
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 61b681a..543f172 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1240,6 +1240,19 @@  void pci_device_set_intx_routing_notifier(PCIDevice *dev,
     dev->intx_routing_notifier = notifier;
 }
 
+void pci_bus_set_map_msi_fn(PCIBus *bus, pci_map_msi_fn map_msi_fn)
+{
+    bus->map_msi = map_msi_fn;
+}
+
+int pci_bus_map_msi(PCIBus *bus, MSIMessage msg)
+{
+    if (bus->map_msi) {
+        return bus->map_msi(bus, msg);
+    }
+    return -1;
+}
+
 /*
  * PCI-to-PCI bridge specification
  * 9.1: Interrupt routing. Table 9-1
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 23dbc0e..bae4faf 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -486,6 +486,11 @@  static void spapr_msi_write(void *opaque, hwaddr addr,
     qemu_irq_pulse(xics_get_qirq(spapr->icp, irq));
 }
 
+static int spapr_msi_get_irq(PCIBus *bus, MSIMessage msg)
+{
+    return msg.data;
+}
+
 static const MemoryRegionOps spapr_msi_ops = {
     /* There is no .read as the read result is undefined by PCI spec */
     .read = NULL,
@@ -657,6 +662,7 @@  static int spapr_phb_init(SysBusDevice *s)
 
         sphb->lsi_table[i].irq = irq;
     }
+    pci_bus_set_map_msi_fn(bus, spapr_msi_get_irq);
 
     return 0;
 }
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index b070b64..06a4e13 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -481,7 +481,7 @@  static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
     int ret;
 
     if (irqfd->users == 0) {
-        ret = kvm_irqchip_add_msi_route(kvm_state, msg);
+        ret = kvm_irqchip_add_msi_route(kvm_state, proxy->pci_dev.bus, msg);
         if (ret < 0) {
             return ret;
         }
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 6ef1f97..8c1edd6 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -332,6 +332,7 @@  MemoryRegion *pci_address_space_io(PCIDevice *dev);
 typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level);
 typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num);
 typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin);
+typedef int (*pci_map_msi_fn)(PCIBus *bus, MSIMessage msg);
 
 typedef enum {
     PCI_HOTPLUG_DISABLED,
@@ -375,6 +376,9 @@  bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new);
 void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
 void pci_device_set_intx_routing_notifier(PCIDevice *dev,
                                           PCIINTxRoutingNotifier notifier);
+void pci_bus_set_map_msi_fn(PCIBus *bus, pci_map_msi_fn map_msi_fn);
+int pci_bus_map_msi(PCIBus *bus, MSIMessage msg);
+
 void pci_device_reset(PCIDevice *dev);
 void pci_bus_reset(PCIBus *bus);
 
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 66762f6..81efd2b 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -16,6 +16,7 @@  struct PCIBus {
     pci_set_irq_fn set_irq;
     pci_map_irq_fn map_irq;
     pci_route_irq_fn route_intx_to_irq;
+    pci_map_msi_fn map_msi;
     pci_hotplug_fn hotplug;
     DeviceState *hotplug_qdev;
     void *irq_opaque;
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index f404d16..1bf2abe 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -305,8 +305,8 @@  static inline void cpu_synchronize_post_init(CPUState *cpu)
     }
 }
 
-int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg);
 int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg);
+int kvm_irqchip_add_msi_route(KVMState *s, PCIBus *pbus, MSIMessage msg);
 void kvm_irqchip_release_virq(KVMState *s, int virq);
 
 int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, int virq);
diff --git a/kvm-all.c b/kvm-all.c
index 1f81cca..3b7710d 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1180,11 +1180,16 @@  int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
     return kvm_set_irq(s, route->kroute.gsi, 1);
 }
 
-int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
+int kvm_irqchip_add_msi_route(KVMState *s, PCIBus *pbus, MSIMessage msg)
 {
     struct kvm_irq_routing_entry kroute;
     int virq;
 
+    virq = pci_bus_map_msi(pbus, msg);
+    if (virq >= 0) {
+        return virq;
+    }
+
     if (!kvm_gsi_routing_enabled()) {
         return -ENOSYS;
     }