Patchwork [1/2] kvm irqfd: support msimessage to irq translation in PHB

login
register
mail settings
Submitter Alexey Kardashevskiy
Date Aug. 7, 2013, 8:51 a.m.
Message ID <1375865493-19143-2-git-send-email-aik@ozlabs.ru>
Download mbox | patch
Permalink /patch/265413/
State New
Headers show

Comments

Alexey Kardashevskiy - Aug. 7, 2013, 8:51 a.m.
On PPC64 systems MSI Messages are translated to system IRQ in a PCI
host bridge. This is already supported for emulated MSI/MSIX but
not for irqfd where the current QEMU allocates IRQ numbers from
irqchip and maps MSIMessages to those IRQ in the host kernel.

The patch extends irqfd support in order to avoid unnecessary
mapping and reuse the one which already exists in a PCI host bridge.

Specifically, a map_msi callback is added to PCIBus and pci_bus_map_msi()
to PCI API. The latter tries a bus specific map_msi and falls back to
the default kvm_irqchip_add_msi_route() if none set.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>

---
Changes:
2013/08/07 v5:
* pci_bus_map_msi now has default behaviour which is to call
kvm_irqchip_add_msi_route
* kvm_irqchip_release_virq fixed not crash when there is no routes
---
 hw/i386/kvm/pci-assign.c | 4 ++--
 hw/misc/vfio.c           | 4 ++--
 hw/pci/pci.c             | 9 +++++++++
 hw/virtio/virtio-pci.c   | 2 +-
 include/hw/pci/pci.h     | 2 ++
 include/hw/pci/pci_bus.h | 1 +
 kvm-all.c                | 3 +++
 7 files changed, 20 insertions(+), 5 deletions(-)
Michael S. Tsirkin - Aug. 19, 2013, 7:35 a.m.
On Wed, Aug 07, 2013 at 06:51:32PM +1000, Alexey Kardashevskiy wrote:
> On PPC64 systems MSI Messages are translated to system IRQ in a PCI
> host bridge. This is already supported for emulated MSI/MSIX but
> not for irqfd where the current QEMU allocates IRQ numbers from
> irqchip and maps MSIMessages to those IRQ in the host kernel.
> 
> The patch extends irqfd support in order to avoid unnecessary
> mapping and reuse the one which already exists in a PCI host bridge.
> 
> Specifically, a map_msi callback is added to PCIBus and pci_bus_map_msi()
> to PCI API. The latter tries a bus specific map_msi and falls back to
> the default kvm_irqchip_add_msi_route() if none set.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>

The mapping (irq # within data) is really KVM on PPC64 specific,
isn't it?

So why not implement
kvm_irqchip_add_msi_route(kvm_state, msg);
to simply return msg.data on PPC64?

Then you won't have to change all the rest of the code.

> ---
> Changes:
> 2013/08/07 v5:
> * pci_bus_map_msi now has default behaviour which is to call
> kvm_irqchip_add_msi_route
> * kvm_irqchip_release_virq fixed not crash when there is no routes
> ---
>  hw/i386/kvm/pci-assign.c | 4 ++--
>  hw/misc/vfio.c           | 4 ++--
>  hw/pci/pci.c             | 9 +++++++++
>  hw/virtio/virtio-pci.c   | 2 +-
>  include/hw/pci/pci.h     | 2 ++
>  include/hw/pci/pci_bus.h | 1 +
>  kvm-all.c                | 3 +++
>  7 files changed, 20 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
> index 5618173..fb59982 100644
> --- a/hw/i386/kvm/pci-assign.c
> +++ b/hw/i386/kvm/pci-assign.c
> @@ -1008,9 +1008,9 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
>          MSIMessage msg = msi_get_message(pci_dev, 0);
>          int virq;
>  
> -        virq = kvm_irqchip_add_msi_route(kvm_state, msg);
> +        virq = pci_bus_map_msi(kvm_state, pci_dev->bus, msg);
>          if (virq < 0) {
> -            perror("assigned_dev_update_msi: kvm_irqchip_add_msi_route");
> +            perror("assigned_dev_update_msi: pci_bus_map_msi");
>              return;
>          }
>

This really confuses me. Why are you changing i386 code?

  
> diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
> index 017e693..adcd23d 100644
> --- a/hw/misc/vfio.c
> +++ b/hw/misc/vfio.c
> @@ -643,7 +643,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>       * Attempt to enable route through KVM irqchip,
>       * default to userspace handling if unavailable.
>       */
> -    vector->virq = msg ? kvm_irqchip_add_msi_route(kvm_state, *msg) : -1;
> +    vector->virq = msg ? pci_bus_map_msi(kvm_state, vdev->pdev.bus, *msg) : -1;
>      if (vector->virq < 0 ||
>          kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
>                                         vector->virq) < 0) {
> @@ -811,7 +811,7 @@ retry:
>           * Attempt to enable route through KVM irqchip,
>           * default to userspace handling if unavailable.
>           */
> -        vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg);
> +        vector->virq = pci_bus_map_msi(kvm_state, vdev->pdev.bus, msg);
>          if (vector->virq < 0 ||
>              kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
>                                             vector->virq) < 0) {
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 4c004f5..4bce3e7 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -1249,6 +1249,15 @@ void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>      dev->intx_routing_notifier = notifier;
>  }
>  
> +int pci_bus_map_msi(KVMState *s, PCIBus *bus, MSIMessage msg)
> +{
> +    if (bus->map_msi) {
> +        return bus->map_msi(s, bus, msg);
> +    }
> +
> +    return kvm_irqchip_add_msi_route(s, msg);
> +}
> +
>  /*
>   * PCI-to-PCI bridge specification
>   * 9.1: Interrupt routing. Table 9-1
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index d37037e..21180d2 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -481,7 +481,7 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
>      int ret;
>  
>      if (irqfd->users == 0) {
> -        ret = kvm_irqchip_add_msi_route(kvm_state, msg);
> +        ret = pci_bus_map_msi(kvm_state, proxy->pci_dev.bus, msg);
>          if (ret < 0) {
>              return ret;
>          }
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index ccec2ba..b6ad9e4 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -332,6 +332,7 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev);
>  typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level);
>  typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num);
>  typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin);
> +typedef int (*pci_map_msi_fn)(KVMState *s, PCIBus *bus, MSIMessage msg);
>  
>  typedef enum {
>      PCI_HOTPLUG_DISABLED,
> @@ -375,6 +376,7 @@ bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new);
>  void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
>  void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>                                            PCIINTxRoutingNotifier notifier);
> +int pci_bus_map_msi(KVMState *s, PCIBus *bus, MSIMessage msg);
>  void pci_device_reset(PCIDevice *dev);
>  void pci_bus_reset(PCIBus *bus);
>  
> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> index 9df1788..5bf7994 100644
> --- a/include/hw/pci/pci_bus.h
> +++ b/include/hw/pci/pci_bus.h
> @@ -16,6 +16,7 @@ struct PCIBus {
>      pci_set_irq_fn set_irq;
>      pci_map_irq_fn map_irq;
>      pci_route_irq_fn route_intx_to_irq;
> +    pci_map_msi_fn map_msi;
>      pci_hotplug_fn hotplug;
>      DeviceState *hotplug_qdev;
>      void *irq_opaque;
> diff --git a/kvm-all.c b/kvm-all.c
> index 716860f..3ae5274 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -1069,6 +1069,9 @@ void kvm_irqchip_release_virq(KVMState *s, int virq)
>      struct kvm_irq_routing_entry *e;
>      int i;
>  
> +    if (!s->irq_routes) {
> +        return;
> +    }
>      for (i = 0; i < s->irq_routes->nr; i++) {
>          e = &s->irq_routes->entries[i];
>          if (e->gsi == virq) {
> -- 
> 1.8.3.2
Alexey Kardashevskiy - Aug. 19, 2013, 7:44 a.m.
On 08/19/2013 05:35 PM, Michael S. Tsirkin wrote:
> On Wed, Aug 07, 2013 at 06:51:32PM +1000, Alexey Kardashevskiy wrote:
>> On PPC64 systems MSI Messages are translated to system IRQ in a PCI
>> host bridge. This is already supported for emulated MSI/MSIX but
>> not for irqfd where the current QEMU allocates IRQ numbers from
>> irqchip and maps MSIMessages to those IRQ in the host kernel.
>>
>> The patch extends irqfd support in order to avoid unnecessary
>> mapping and reuse the one which already exists in a PCI host bridge.
>>
>> Specifically, a map_msi callback is added to PCIBus and pci_bus_map_msi()
>> to PCI API. The latter tries a bus specific map_msi and falls back to
>> the default kvm_irqchip_add_msi_route() if none set.
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> 
> The mapping (irq # within data) is really KVM on PPC64 specific,
> isn't it?
> 
> So why not implement
> kvm_irqchip_add_msi_route(kvm_state, msg);
> to simply return msg.data on PPC64?

How exactly? Please give some details. kvm_irqchip_add_msi_route() is
implemented in kvm-all.c once for all platforms so hack it right there?

I thought we discussed all of this already and decided that this thing
should go to PCI host bridge and by default PHB's map_msi() callback should
just call kvm_irqchip_add_msi_route(). This is why I touched i386.

Things have changed since then?



> Then you won't have to change all the rest of the code.
> 
>> ---
>> Changes:
>> 2013/08/07 v5:
>> * pci_bus_map_msi now has default behaviour which is to call
>> kvm_irqchip_add_msi_route
>> * kvm_irqchip_release_virq fixed not crash when there is no routes
>> ---
>>  hw/i386/kvm/pci-assign.c | 4 ++--
>>  hw/misc/vfio.c           | 4 ++--
>>  hw/pci/pci.c             | 9 +++++++++
>>  hw/virtio/virtio-pci.c   | 2 +-
>>  include/hw/pci/pci.h     | 2 ++
>>  include/hw/pci/pci_bus.h | 1 +
>>  kvm-all.c                | 3 +++
>>  7 files changed, 20 insertions(+), 5 deletions(-)
>>
>> diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
>> index 5618173..fb59982 100644
>> --- a/hw/i386/kvm/pci-assign.c
>> +++ b/hw/i386/kvm/pci-assign.c
>> @@ -1008,9 +1008,9 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
>>          MSIMessage msg = msi_get_message(pci_dev, 0);
>>          int virq;
>>  
>> -        virq = kvm_irqchip_add_msi_route(kvm_state, msg);
>> +        virq = pci_bus_map_msi(kvm_state, pci_dev->bus, msg);
>>          if (virq < 0) {
>> -            perror("assigned_dev_update_msi: kvm_irqchip_add_msi_route");
>> +            perror("assigned_dev_update_msi: pci_bus_map_msi");
>>              return;
>>          }
>>
> 
> This really confuses me. Why are you changing i386 code?
> 
>   
>> diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
>> index 017e693..adcd23d 100644
>> --- a/hw/misc/vfio.c
>> +++ b/hw/misc/vfio.c
>> @@ -643,7 +643,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>>       * Attempt to enable route through KVM irqchip,
>>       * default to userspace handling if unavailable.
>>       */
>> -    vector->virq = msg ? kvm_irqchip_add_msi_route(kvm_state, *msg) : -1;
>> +    vector->virq = msg ? pci_bus_map_msi(kvm_state, vdev->pdev.bus, *msg) : -1;
>>      if (vector->virq < 0 ||
>>          kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
>>                                         vector->virq) < 0) {
>> @@ -811,7 +811,7 @@ retry:
>>           * Attempt to enable route through KVM irqchip,
>>           * default to userspace handling if unavailable.
>>           */
>> -        vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg);
>> +        vector->virq = pci_bus_map_msi(kvm_state, vdev->pdev.bus, msg);
>>          if (vector->virq < 0 ||
>>              kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
>>                                             vector->virq) < 0) {
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index 4c004f5..4bce3e7 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -1249,6 +1249,15 @@ void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>>      dev->intx_routing_notifier = notifier;
>>  }
>>  
>> +int pci_bus_map_msi(KVMState *s, PCIBus *bus, MSIMessage msg)
>> +{
>> +    if (bus->map_msi) {
>> +        return bus->map_msi(s, bus, msg);
>> +    }
>> +
>> +    return kvm_irqchip_add_msi_route(s, msg);
>> +}
>> +
>>  /*
>>   * PCI-to-PCI bridge specification
>>   * 9.1: Interrupt routing. Table 9-1
>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
>> index d37037e..21180d2 100644
>> --- a/hw/virtio/virtio-pci.c
>> +++ b/hw/virtio/virtio-pci.c
>> @@ -481,7 +481,7 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
>>      int ret;
>>  
>>      if (irqfd->users == 0) {
>> -        ret = kvm_irqchip_add_msi_route(kvm_state, msg);
>> +        ret = pci_bus_map_msi(kvm_state, proxy->pci_dev.bus, msg);
>>          if (ret < 0) {
>>              return ret;
>>          }
>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>> index ccec2ba..b6ad9e4 100644
>> --- a/include/hw/pci/pci.h
>> +++ b/include/hw/pci/pci.h
>> @@ -332,6 +332,7 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev);
>>  typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level);
>>  typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num);
>>  typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin);
>> +typedef int (*pci_map_msi_fn)(KVMState *s, PCIBus *bus, MSIMessage msg);
>>  
>>  typedef enum {
>>      PCI_HOTPLUG_DISABLED,
>> @@ -375,6 +376,7 @@ bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new);
>>  void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
>>  void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>>                                            PCIINTxRoutingNotifier notifier);
>> +int pci_bus_map_msi(KVMState *s, PCIBus *bus, MSIMessage msg);
>>  void pci_device_reset(PCIDevice *dev);
>>  void pci_bus_reset(PCIBus *bus);
>>  
>> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
>> index 9df1788..5bf7994 100644
>> --- a/include/hw/pci/pci_bus.h
>> +++ b/include/hw/pci/pci_bus.h
>> @@ -16,6 +16,7 @@ struct PCIBus {
>>      pci_set_irq_fn set_irq;
>>      pci_map_irq_fn map_irq;
>>      pci_route_irq_fn route_intx_to_irq;
>> +    pci_map_msi_fn map_msi;
>>      pci_hotplug_fn hotplug;
>>      DeviceState *hotplug_qdev;
>>      void *irq_opaque;
>> diff --git a/kvm-all.c b/kvm-all.c
>> index 716860f..3ae5274 100644
>> --- a/kvm-all.c
>> +++ b/kvm-all.c
>> @@ -1069,6 +1069,9 @@ void kvm_irqchip_release_virq(KVMState *s, int virq)
>>      struct kvm_irq_routing_entry *e;
>>      int i;
>>  
>> +    if (!s->irq_routes) {
>> +        return;
>> +    }
>>      for (i = 0; i < s->irq_routes->nr; i++) {
>>          e = &s->irq_routes->entries[i];
>>          if (e->gsi == virq) {
>> -- 
>> 1.8.3.2
Michael S. Tsirkin - Aug. 19, 2013, 7:54 a.m.
On Mon, Aug 19, 2013 at 05:44:04PM +1000, Alexey Kardashevskiy wrote:
> On 08/19/2013 05:35 PM, Michael S. Tsirkin wrote:
> > On Wed, Aug 07, 2013 at 06:51:32PM +1000, Alexey Kardashevskiy wrote:
> >> On PPC64 systems MSI Messages are translated to system IRQ in a PCI
> >> host bridge. This is already supported for emulated MSI/MSIX but
> >> not for irqfd where the current QEMU allocates IRQ numbers from
> >> irqchip and maps MSIMessages to those IRQ in the host kernel.
> >>
> >> The patch extends irqfd support in order to avoid unnecessary
> >> mapping and reuse the one which already exists in a PCI host bridge.
> >>
> >> Specifically, a map_msi callback is added to PCIBus and pci_bus_map_msi()
> >> to PCI API. The latter tries a bus specific map_msi and falls back to
> >> the default kvm_irqchip_add_msi_route() if none set.
> >>
> >> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> > 
> > The mapping (irq # within data) is really KVM on PPC64 specific,
> > isn't it?
> > 
> > So why not implement
> > kvm_irqchip_add_msi_route(kvm_state, msg);
> > to simply return msg.data on PPC64?
> 
> How exactly? Please give some details. kvm_irqchip_add_msi_route() is
> implemented in kvm-all.c once for all platforms so hack it right there?

You can add the map_msi callback in kvm state,
then just call it if it's set, right?

> I thought we discussed all of this already and decided that this thing
> should go to PCI host bridge and by default PHB's map_msi() callback should
> just call kvm_irqchip_add_msi_route(). This is why I touched i386.
> 
> Things have changed since then?


I think pci_bus_map_msi was there since day 1, it's not like
we are going back and forth on it, right?

I always felt it's best to have irqfd isolated to kvm somehow
rather than have hooks in pci code, I just don't know enough
about kvm on ppc to figure out the right way to do this.
With your latest patches I think this is becoming clearer ...

> 
> > Then you won't have to change all the rest of the code.
> > 
> >> ---
> >> Changes:
> >> 2013/08/07 v5:
> >> * pci_bus_map_msi now has default behaviour which is to call
> >> kvm_irqchip_add_msi_route
> >> * kvm_irqchip_release_virq fixed not crash when there is no routes
> >> ---
> >>  hw/i386/kvm/pci-assign.c | 4 ++--
> >>  hw/misc/vfio.c           | 4 ++--
> >>  hw/pci/pci.c             | 9 +++++++++
> >>  hw/virtio/virtio-pci.c   | 2 +-
> >>  include/hw/pci/pci.h     | 2 ++
> >>  include/hw/pci/pci_bus.h | 1 +
> >>  kvm-all.c                | 3 +++
> >>  7 files changed, 20 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
> >> index 5618173..fb59982 100644
> >> --- a/hw/i386/kvm/pci-assign.c
> >> +++ b/hw/i386/kvm/pci-assign.c
> >> @@ -1008,9 +1008,9 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
> >>          MSIMessage msg = msi_get_message(pci_dev, 0);
> >>          int virq;
> >>  
> >> -        virq = kvm_irqchip_add_msi_route(kvm_state, msg);
> >> +        virq = pci_bus_map_msi(kvm_state, pci_dev->bus, msg);
> >>          if (virq < 0) {
> >> -            perror("assigned_dev_update_msi: kvm_irqchip_add_msi_route");
> >> +            perror("assigned_dev_update_msi: pci_bus_map_msi");
> >>              return;
> >>          }
> >>
> > 
> > This really confuses me. Why are you changing i386 code?
> > 
> >   
> >> diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
> >> index 017e693..adcd23d 100644
> >> --- a/hw/misc/vfio.c
> >> +++ b/hw/misc/vfio.c
> >> @@ -643,7 +643,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
> >>       * Attempt to enable route through KVM irqchip,
> >>       * default to userspace handling if unavailable.
> >>       */
> >> -    vector->virq = msg ? kvm_irqchip_add_msi_route(kvm_state, *msg) : -1;
> >> +    vector->virq = msg ? pci_bus_map_msi(kvm_state, vdev->pdev.bus, *msg) : -1;
> >>      if (vector->virq < 0 ||
> >>          kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
> >>                                         vector->virq) < 0) {
> >> @@ -811,7 +811,7 @@ retry:
> >>           * Attempt to enable route through KVM irqchip,
> >>           * default to userspace handling if unavailable.
> >>           */
> >> -        vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg);
> >> +        vector->virq = pci_bus_map_msi(kvm_state, vdev->pdev.bus, msg);
> >>          if (vector->virq < 0 ||
> >>              kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
> >>                                             vector->virq) < 0) {
> >> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> >> index 4c004f5..4bce3e7 100644
> >> --- a/hw/pci/pci.c
> >> +++ b/hw/pci/pci.c
> >> @@ -1249,6 +1249,15 @@ void pci_device_set_intx_routing_notifier(PCIDevice *dev,
> >>      dev->intx_routing_notifier = notifier;
> >>  }
> >>  
> >> +int pci_bus_map_msi(KVMState *s, PCIBus *bus, MSIMessage msg)
> >> +{
> >> +    if (bus->map_msi) {
> >> +        return bus->map_msi(s, bus, msg);
> >> +    }
> >> +
> >> +    return kvm_irqchip_add_msi_route(s, msg);
> >> +}
> >> +
> >>  /*
> >>   * PCI-to-PCI bridge specification
> >>   * 9.1: Interrupt routing. Table 9-1
> >> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> >> index d37037e..21180d2 100644
> >> --- a/hw/virtio/virtio-pci.c
> >> +++ b/hw/virtio/virtio-pci.c
> >> @@ -481,7 +481,7 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
> >>      int ret;
> >>  
> >>      if (irqfd->users == 0) {
> >> -        ret = kvm_irqchip_add_msi_route(kvm_state, msg);
> >> +        ret = pci_bus_map_msi(kvm_state, proxy->pci_dev.bus, msg);
> >>          if (ret < 0) {
> >>              return ret;
> >>          }
> >> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> >> index ccec2ba..b6ad9e4 100644
> >> --- a/include/hw/pci/pci.h
> >> +++ b/include/hw/pci/pci.h
> >> @@ -332,6 +332,7 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev);
> >>  typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level);
> >>  typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num);
> >>  typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin);
> >> +typedef int (*pci_map_msi_fn)(KVMState *s, PCIBus *bus, MSIMessage msg);
> >>  
> >>  typedef enum {
> >>      PCI_HOTPLUG_DISABLED,
> >> @@ -375,6 +376,7 @@ bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new);
> >>  void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
> >>  void pci_device_set_intx_routing_notifier(PCIDevice *dev,
> >>                                            PCIINTxRoutingNotifier notifier);
> >> +int pci_bus_map_msi(KVMState *s, PCIBus *bus, MSIMessage msg);
> >>  void pci_device_reset(PCIDevice *dev);
> >>  void pci_bus_reset(PCIBus *bus);
> >>  
> >> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> >> index 9df1788..5bf7994 100644
> >> --- a/include/hw/pci/pci_bus.h
> >> +++ b/include/hw/pci/pci_bus.h
> >> @@ -16,6 +16,7 @@ struct PCIBus {
> >>      pci_set_irq_fn set_irq;
> >>      pci_map_irq_fn map_irq;
> >>      pci_route_irq_fn route_intx_to_irq;
> >> +    pci_map_msi_fn map_msi;
> >>      pci_hotplug_fn hotplug;
> >>      DeviceState *hotplug_qdev;
> >>      void *irq_opaque;
> >> diff --git a/kvm-all.c b/kvm-all.c
> >> index 716860f..3ae5274 100644
> >> --- a/kvm-all.c
> >> +++ b/kvm-all.c
> >> @@ -1069,6 +1069,9 @@ void kvm_irqchip_release_virq(KVMState *s, int virq)
> >>      struct kvm_irq_routing_entry *e;
> >>      int i;
> >>  
> >> +    if (!s->irq_routes) {
> >> +        return;
> >> +    }
> >>      for (i = 0; i < s->irq_routes->nr; i++) {
> >>          e = &s->irq_routes->entries[i];
> >>          if (e->gsi == virq) {
> >> -- 
> >> 1.8.3.2
> 
> 
> -- 
> Alexey
Alexey Kardashevskiy - Aug. 19, 2013, 8:10 a.m.
On 08/19/2013 05:54 PM, Michael S. Tsirkin wrote:
> On Mon, Aug 19, 2013 at 05:44:04PM +1000, Alexey Kardashevskiy wrote:
>> On 08/19/2013 05:35 PM, Michael S. Tsirkin wrote:
>>> On Wed, Aug 07, 2013 at 06:51:32PM +1000, Alexey Kardashevskiy wrote:
>>>> On PPC64 systems MSI Messages are translated to system IRQ in a PCI
>>>> host bridge. This is already supported for emulated MSI/MSIX but
>>>> not for irqfd where the current QEMU allocates IRQ numbers from
>>>> irqchip and maps MSIMessages to those IRQ in the host kernel.
>>>>
>>>> The patch extends irqfd support in order to avoid unnecessary
>>>> mapping and reuse the one which already exists in a PCI host bridge.
>>>>
>>>> Specifically, a map_msi callback is added to PCIBus and pci_bus_map_msi()
>>>> to PCI API. The latter tries a bus specific map_msi and falls back to
>>>> the default kvm_irqchip_add_msi_route() if none set.
>>>>
>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>>
>>> The mapping (irq # within data) is really KVM on PPC64 specific,
>>> isn't it?
>>>
>>> So why not implement
>>> kvm_irqchip_add_msi_route(kvm_state, msg);
>>> to simply return msg.data on PPC64?
>>
>> How exactly? Please give some details. kvm_irqchip_add_msi_route() is
>> implemented in kvm-all.c once for all platforms so hack it right there?
> 
> You can add the map_msi callback in kvm state,
> then just call it if it's set, right?
> 
>> I thought we discussed all of this already and decided that this thing
>> should go to PCI host bridge and by default PHB's map_msi() callback should
>> just call kvm_irqchip_add_msi_route(). This is why I touched i386.
>>
>> Things have changed since then?
> 
> 
> I think pci_bus_map_msi was there since day 1, it's not like
> we are going back and forth on it, right?


Sorry, I am not following you. pci_bus_map_msi() is added by this patch.
Where was it from day 1?


> I always felt it's best to have irqfd isolated to kvm somehow
> rather than have hooks in pci code, I just don't know enough
> about kvm on ppc to figure out the right way to do this.
> With your latest patches I think this is becoming clearer ...


Well... This encoding (irq# as msg.data) is used in spapr_pci.c in any
case, whether it is KVM or TCG.

I am confused.
1.5 month ago Anthony suggested (I'll answer to that mail to bring that
discussion up) to do this thing as:

> So what this should look like is:
>
> 1) A PCI bus function to do the MSI -> virq mapping
> 2) On x86 (and e500), this is implemented by calling
kvm_irqchip_add_msi_route()
> 3) On pseries, this just returns msi->data
>
> Perhaps (2) can just be the default PCI bus implementation to simplify
things.


Now it is not right. Anthony, please help.



>>> Then you won't have to change all the rest of the code.
>>>
>>>> ---
>>>> Changes:
>>>> 2013/08/07 v5:
>>>> * pci_bus_map_msi now has default behaviour which is to call
>>>> kvm_irqchip_add_msi_route
>>>> * kvm_irqchip_release_virq fixed not crash when there is no routes
>>>> ---
>>>>  hw/i386/kvm/pci-assign.c | 4 ++--
>>>>  hw/misc/vfio.c           | 4 ++--
>>>>  hw/pci/pci.c             | 9 +++++++++
>>>>  hw/virtio/virtio-pci.c   | 2 +-
>>>>  include/hw/pci/pci.h     | 2 ++
>>>>  include/hw/pci/pci_bus.h | 1 +
>>>>  kvm-all.c                | 3 +++
>>>>  7 files changed, 20 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
>>>> index 5618173..fb59982 100644
>>>> --- a/hw/i386/kvm/pci-assign.c
>>>> +++ b/hw/i386/kvm/pci-assign.c
>>>> @@ -1008,9 +1008,9 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
>>>>          MSIMessage msg = msi_get_message(pci_dev, 0);
>>>>          int virq;
>>>>  
>>>> -        virq = kvm_irqchip_add_msi_route(kvm_state, msg);
>>>> +        virq = pci_bus_map_msi(kvm_state, pci_dev->bus, msg);
>>>>          if (virq < 0) {
>>>> -            perror("assigned_dev_update_msi: kvm_irqchip_add_msi_route");
>>>> +            perror("assigned_dev_update_msi: pci_bus_map_msi");
>>>>              return;
>>>>          }
>>>>
>>>
>>> This really confuses me. Why are you changing i386 code?
>>>
>>>   
>>>> diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
>>>> index 017e693..adcd23d 100644
>>>> --- a/hw/misc/vfio.c
>>>> +++ b/hw/misc/vfio.c
>>>> @@ -643,7 +643,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>>>>       * Attempt to enable route through KVM irqchip,
>>>>       * default to userspace handling if unavailable.
>>>>       */
>>>> -    vector->virq = msg ? kvm_irqchip_add_msi_route(kvm_state, *msg) : -1;
>>>> +    vector->virq = msg ? pci_bus_map_msi(kvm_state, vdev->pdev.bus, *msg) : -1;
>>>>      if (vector->virq < 0 ||
>>>>          kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
>>>>                                         vector->virq) < 0) {
>>>> @@ -811,7 +811,7 @@ retry:
>>>>           * Attempt to enable route through KVM irqchip,
>>>>           * default to userspace handling if unavailable.
>>>>           */
>>>> -        vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg);
>>>> +        vector->virq = pci_bus_map_msi(kvm_state, vdev->pdev.bus, msg);
>>>>          if (vector->virq < 0 ||
>>>>              kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
>>>>                                             vector->virq) < 0) {
>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>> index 4c004f5..4bce3e7 100644
>>>> --- a/hw/pci/pci.c
>>>> +++ b/hw/pci/pci.c
>>>> @@ -1249,6 +1249,15 @@ void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>>>>      dev->intx_routing_notifier = notifier;
>>>>  }
>>>>  
>>>> +int pci_bus_map_msi(KVMState *s, PCIBus *bus, MSIMessage msg)
>>>> +{
>>>> +    if (bus->map_msi) {
>>>> +        return bus->map_msi(s, bus, msg);
>>>> +    }
>>>> +
>>>> +    return kvm_irqchip_add_msi_route(s, msg);
>>>> +}
>>>> +
>>>>  /*
>>>>   * PCI-to-PCI bridge specification
>>>>   * 9.1: Interrupt routing. Table 9-1
>>>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
>>>> index d37037e..21180d2 100644
>>>> --- a/hw/virtio/virtio-pci.c
>>>> +++ b/hw/virtio/virtio-pci.c
>>>> @@ -481,7 +481,7 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
>>>>      int ret;
>>>>  
>>>>      if (irqfd->users == 0) {
>>>> -        ret = kvm_irqchip_add_msi_route(kvm_state, msg);
>>>> +        ret = pci_bus_map_msi(kvm_state, proxy->pci_dev.bus, msg);
>>>>          if (ret < 0) {
>>>>              return ret;
>>>>          }
>>>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>>>> index ccec2ba..b6ad9e4 100644
>>>> --- a/include/hw/pci/pci.h
>>>> +++ b/include/hw/pci/pci.h
>>>> @@ -332,6 +332,7 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev);
>>>>  typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level);
>>>>  typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num);
>>>>  typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin);
>>>> +typedef int (*pci_map_msi_fn)(KVMState *s, PCIBus *bus, MSIMessage msg);
>>>>  
>>>>  typedef enum {
>>>>      PCI_HOTPLUG_DISABLED,
>>>> @@ -375,6 +376,7 @@ bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new);
>>>>  void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
>>>>  void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>>>>                                            PCIINTxRoutingNotifier notifier);
>>>> +int pci_bus_map_msi(KVMState *s, PCIBus *bus, MSIMessage msg);
>>>>  void pci_device_reset(PCIDevice *dev);
>>>>  void pci_bus_reset(PCIBus *bus);
>>>>  
>>>> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
>>>> index 9df1788..5bf7994 100644
>>>> --- a/include/hw/pci/pci_bus.h
>>>> +++ b/include/hw/pci/pci_bus.h
>>>> @@ -16,6 +16,7 @@ struct PCIBus {
>>>>      pci_set_irq_fn set_irq;
>>>>      pci_map_irq_fn map_irq;
>>>>      pci_route_irq_fn route_intx_to_irq;
>>>> +    pci_map_msi_fn map_msi;
>>>>      pci_hotplug_fn hotplug;
>>>>      DeviceState *hotplug_qdev;
>>>>      void *irq_opaque;
>>>> diff --git a/kvm-all.c b/kvm-all.c
>>>> index 716860f..3ae5274 100644
>>>> --- a/kvm-all.c
>>>> +++ b/kvm-all.c
>>>> @@ -1069,6 +1069,9 @@ void kvm_irqchip_release_virq(KVMState *s, int virq)
>>>>      struct kvm_irq_routing_entry *e;
>>>>      int i;
>>>>  
>>>> +    if (!s->irq_routes) {
>>>> +        return;
>>>> +    }
>>>>      for (i = 0; i < s->irq_routes->nr; i++) {
>>>>          e = &s->irq_routes->entries[i];
>>>>          if (e->gsi == virq) {
>>>> -- 
>>>> 1.8.3.2
>>
>>
>> -- 
>> Alexey
Michael S. Tsirkin - Aug. 19, 2013, 9:01 a.m.
On Mon, Aug 19, 2013 at 06:10:01PM +1000, Alexey Kardashevskiy wrote:
> On 08/19/2013 05:54 PM, Michael S. Tsirkin wrote:
> > On Mon, Aug 19, 2013 at 05:44:04PM +1000, Alexey Kardashevskiy wrote:
> >> On 08/19/2013 05:35 PM, Michael S. Tsirkin wrote:
> >>> On Wed, Aug 07, 2013 at 06:51:32PM +1000, Alexey Kardashevskiy wrote:
> >>>> On PPC64 systems MSI Messages are translated to system IRQ in a PCI
> >>>> host bridge. This is already supported for emulated MSI/MSIX but
> >>>> not for irqfd where the current QEMU allocates IRQ numbers from
> >>>> irqchip and maps MSIMessages to those IRQ in the host kernel.
> >>>>
> >>>> The patch extends irqfd support in order to avoid unnecessary
> >>>> mapping and reuse the one which already exists in a PCI host bridge.
> >>>>
> >>>> Specifically, a map_msi callback is added to PCIBus and pci_bus_map_msi()
> >>>> to PCI API. The latter tries a bus specific map_msi and falls back to
> >>>> the default kvm_irqchip_add_msi_route() if none set.
> >>>>
> >>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> >>>
> >>> The mapping (irq # within data) is really KVM on PPC64 specific,
> >>> isn't it?
> >>>
> >>> So why not implement
> >>> kvm_irqchip_add_msi_route(kvm_state, msg);
> >>> to simply return msg.data on PPC64?
> >>
> >> How exactly? Please give some details. kvm_irqchip_add_msi_route() is
> >> implemented in kvm-all.c once for all platforms so hack it right there?
> > 
> > You can add the map_msi callback in kvm state,
> > then just call it if it's set, right?
> > 
> >> I thought we discussed all of this already and decided that this thing
> >> should go to PCI host bridge and by default PHB's map_msi() callback should
> >> just call kvm_irqchip_add_msi_route(). This is why I touched i386.
> >>
> >> Things have changed since then?
> > 
> > 
> > I think pci_bus_map_msi was there since day 1, it's not like
> > we are going back and forth on it, right?
> 
> 
> Sorry, I am not following you. pci_bus_map_msi() is added by this patch.
> Where was it from day 1?

I'm sorry. I merely meant that it's there in v1 of this patch.

> 
> > I always felt it's best to have irqfd isolated to kvm somehow
> > rather than have hooks in pci code, I just don't know enough
> > about kvm on ppc to figure out the right way to do this.
> > With your latest patches I think this is becoming clearer ...
> 
> 
> Well... This encoding (irq# as msg.data) is used in spapr_pci.c in any
> case, whether it is KVM or TCG.

Not only on TCG. All emulated devices merely do a write to send an MSI,
this is exactly what happens with real hardware.  If this happens to
land in the MSI region, you get an interrupt.  The concept of mapping
msi to irq normally doesn't belong in devices/pci core, it's something
done by APIC or pci host.

For KVM, we have this special hook where devices allocate a route and
then Linux can send an interrupt to guest quickly bypassing QEMU.  It
was originally designed for paravirtualization, so it doesn't support
strange cases such as guest programming MSI to write into guest memory,
which is possible with real PCI devices. However, no one seems to do
these hacks in practice, so in practice this works for
some other cases, such as device assignment.

That's why we have kvm_irqchip_add_msi_route in some places
in code - it's so specific devices implemented by
Linux can take this shortcut (it does not make sense for
devices implemented by qemu).
So the way I see it, it's not a PCI thing at all, it's
a KVM specific implementation, so it seems cleaner if
it's isolated there.

Now, we have this allocate/free API for reasons that
have to do with the API of kvm on x86. spapr doesn't need to
allocate/free resources, so just shortcut free and
do map when we allocate.

Doesn't this sound reasonable?

> 
> I am confused.
> 1.5 month ago Anthony suggested (I'll answer to that mail to bring that
> discussion up) to do this thing as:
> 
> > So what this should look like is:
> >
> > 1) A PCI bus function to do the MSI -> virq mapping
> > 2) On x86 (and e500), this is implemented by calling
> kvm_irqchip_add_msi_route()
> > 3) On pseries, this just returns msi->data
> >
> > Perhaps (2) can just be the default PCI bus implementation to simplify
> things.
> 
> 
> Now it is not right. Anthony, please help.

That's right, you implemented exactly what Anthony suggested.  Now that
you did, I think I see an even better, more contained way to do this.
And it's not much of a change as compared to what you have, is it?

I'm sorry if this looks like you are given a run-around,
not everyone understands how spapr works, sometimes
it takes a full implementation to make everyone understand
the issues.

But I agree, let's see what Anthony thinks, maybe I
misunderstood how spapr works.

> 
> >>> Then you won't have to change all the rest of the code.
> >>>
> >>>> ---
> >>>> Changes:
> >>>> 2013/08/07 v5:
> >>>> * pci_bus_map_msi now has default behaviour which is to call
> >>>> kvm_irqchip_add_msi_route
> >>>> * kvm_irqchip_release_virq fixed not crash when there is no routes
> >>>> ---
> >>>>  hw/i386/kvm/pci-assign.c | 4 ++--
> >>>>  hw/misc/vfio.c           | 4 ++--
> >>>>  hw/pci/pci.c             | 9 +++++++++
> >>>>  hw/virtio/virtio-pci.c   | 2 +-
> >>>>  include/hw/pci/pci.h     | 2 ++
> >>>>  include/hw/pci/pci_bus.h | 1 +
> >>>>  kvm-all.c                | 3 +++
> >>>>  7 files changed, 20 insertions(+), 5 deletions(-)
> >>>>
> >>>> diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
> >>>> index 5618173..fb59982 100644
> >>>> --- a/hw/i386/kvm/pci-assign.c
> >>>> +++ b/hw/i386/kvm/pci-assign.c
> >>>> @@ -1008,9 +1008,9 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
> >>>>          MSIMessage msg = msi_get_message(pci_dev, 0);
> >>>>          int virq;
> >>>>  
> >>>> -        virq = kvm_irqchip_add_msi_route(kvm_state, msg);
> >>>> +        virq = pci_bus_map_msi(kvm_state, pci_dev->bus, msg);
> >>>>          if (virq < 0) {
> >>>> -            perror("assigned_dev_update_msi: kvm_irqchip_add_msi_route");
> >>>> +            perror("assigned_dev_update_msi: pci_bus_map_msi");
> >>>>              return;
> >>>>          }
> >>>>
> >>>
> >>> This really confuses me. Why are you changing i386 code?
> >>>
> >>>   
> >>>> diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
> >>>> index 017e693..adcd23d 100644
> >>>> --- a/hw/misc/vfio.c
> >>>> +++ b/hw/misc/vfio.c
> >>>> @@ -643,7 +643,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
> >>>>       * Attempt to enable route through KVM irqchip,
> >>>>       * default to userspace handling if unavailable.
> >>>>       */
> >>>> -    vector->virq = msg ? kvm_irqchip_add_msi_route(kvm_state, *msg) : -1;
> >>>> +    vector->virq = msg ? pci_bus_map_msi(kvm_state, vdev->pdev.bus, *msg) : -1;
> >>>>      if (vector->virq < 0 ||
> >>>>          kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
> >>>>                                         vector->virq) < 0) {
> >>>> @@ -811,7 +811,7 @@ retry:
> >>>>           * Attempt to enable route through KVM irqchip,
> >>>>           * default to userspace handling if unavailable.
> >>>>           */
> >>>> -        vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg);
> >>>> +        vector->virq = pci_bus_map_msi(kvm_state, vdev->pdev.bus, msg);
> >>>>          if (vector->virq < 0 ||
> >>>>              kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
> >>>>                                             vector->virq) < 0) {
> >>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> >>>> index 4c004f5..4bce3e7 100644
> >>>> --- a/hw/pci/pci.c
> >>>> +++ b/hw/pci/pci.c
> >>>> @@ -1249,6 +1249,15 @@ void pci_device_set_intx_routing_notifier(PCIDevice *dev,
> >>>>      dev->intx_routing_notifier = notifier;
> >>>>  }
> >>>>  
> >>>> +int pci_bus_map_msi(KVMState *s, PCIBus *bus, MSIMessage msg)
> >>>> +{
> >>>> +    if (bus->map_msi) {
> >>>> +        return bus->map_msi(s, bus, msg);
> >>>> +    }
> >>>> +
> >>>> +    return kvm_irqchip_add_msi_route(s, msg);
> >>>> +}
> >>>> +
> >>>>  /*
> >>>>   * PCI-to-PCI bridge specification
> >>>>   * 9.1: Interrupt routing. Table 9-1
> >>>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> >>>> index d37037e..21180d2 100644
> >>>> --- a/hw/virtio/virtio-pci.c
> >>>> +++ b/hw/virtio/virtio-pci.c
> >>>> @@ -481,7 +481,7 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
> >>>>      int ret;
> >>>>  
> >>>>      if (irqfd->users == 0) {
> >>>> -        ret = kvm_irqchip_add_msi_route(kvm_state, msg);
> >>>> +        ret = pci_bus_map_msi(kvm_state, proxy->pci_dev.bus, msg);
> >>>>          if (ret < 0) {
> >>>>              return ret;
> >>>>          }
> >>>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> >>>> index ccec2ba..b6ad9e4 100644
> >>>> --- a/include/hw/pci/pci.h
> >>>> +++ b/include/hw/pci/pci.h
> >>>> @@ -332,6 +332,7 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev);
> >>>>  typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level);
> >>>>  typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num);
> >>>>  typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin);
> >>>> +typedef int (*pci_map_msi_fn)(KVMState *s, PCIBus *bus, MSIMessage msg);
> >>>>  
> >>>>  typedef enum {
> >>>>      PCI_HOTPLUG_DISABLED,
> >>>> @@ -375,6 +376,7 @@ bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new);
> >>>>  void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
> >>>>  void pci_device_set_intx_routing_notifier(PCIDevice *dev,
> >>>>                                            PCIINTxRoutingNotifier notifier);
> >>>> +int pci_bus_map_msi(KVMState *s, PCIBus *bus, MSIMessage msg);
> >>>>  void pci_device_reset(PCIDevice *dev);
> >>>>  void pci_bus_reset(PCIBus *bus);
> >>>>  
> >>>> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> >>>> index 9df1788..5bf7994 100644
> >>>> --- a/include/hw/pci/pci_bus.h
> >>>> +++ b/include/hw/pci/pci_bus.h
> >>>> @@ -16,6 +16,7 @@ struct PCIBus {
> >>>>      pci_set_irq_fn set_irq;
> >>>>      pci_map_irq_fn map_irq;
> >>>>      pci_route_irq_fn route_intx_to_irq;
> >>>> +    pci_map_msi_fn map_msi;
> >>>>      pci_hotplug_fn hotplug;
> >>>>      DeviceState *hotplug_qdev;
> >>>>      void *irq_opaque;
> >>>> diff --git a/kvm-all.c b/kvm-all.c
> >>>> index 716860f..3ae5274 100644
> >>>> --- a/kvm-all.c
> >>>> +++ b/kvm-all.c
> >>>> @@ -1069,6 +1069,9 @@ void kvm_irqchip_release_virq(KVMState *s, int virq)
> >>>>      struct kvm_irq_routing_entry *e;
> >>>>      int i;
> >>>>  
> >>>> +    if (!s->irq_routes) {
> >>>> +        return;
> >>>> +    }
> >>>>      for (i = 0; i < s->irq_routes->nr; i++) {
> >>>>          e = &s->irq_routes->entries[i];
> >>>>          if (e->gsi == virq) {
> >>>> -- 
> >>>> 1.8.3.2
> >>
> >>
> >> -- 
> >> Alexey
> 
> 
> -- 
> Alexey
Alexey Kardashevskiy - Aug. 23, 2013, 4:29 a.m.
On 08/19/2013 07:01 PM, Michael S. Tsirkin wrote:
> On Mon, Aug 19, 2013 at 06:10:01PM +1000, Alexey Kardashevskiy wrote:
>> On 08/19/2013 05:54 PM, Michael S. Tsirkin wrote:
>>> On Mon, Aug 19, 2013 at 05:44:04PM +1000, Alexey Kardashevskiy wrote:
>>>> On 08/19/2013 05:35 PM, Michael S. Tsirkin wrote:
>>>>> On Wed, Aug 07, 2013 at 06:51:32PM +1000, Alexey Kardashevskiy wrote:
>>>>>> On PPC64 systems MSI Messages are translated to system IRQ in a PCI
>>>>>> host bridge. This is already supported for emulated MSI/MSIX but
>>>>>> not for irqfd where the current QEMU allocates IRQ numbers from
>>>>>> irqchip and maps MSIMessages to those IRQ in the host kernel.
>>>>>>
>>>>>> The patch extends irqfd support in order to avoid unnecessary
>>>>>> mapping and reuse the one which already exists in a PCI host bridge.
>>>>>>
>>>>>> Specifically, a map_msi callback is added to PCIBus and pci_bus_map_msi()
>>>>>> to PCI API. The latter tries a bus specific map_msi and falls back to
>>>>>> the default kvm_irqchip_add_msi_route() if none set.
>>>>>>
>>>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>>>>
>>>>> The mapping (irq # within data) is really KVM on PPC64 specific,
>>>>> isn't it?
>>>>>
>>>>> So why not implement
>>>>> kvm_irqchip_add_msi_route(kvm_state, msg);
>>>>> to simply return msg.data on PPC64?
>>>>
>>>> How exactly? Please give some details. kvm_irqchip_add_msi_route() is
>>>> implemented in kvm-all.c once for all platforms so hack it right there?
>>>
>>> You can add the map_msi callback in kvm state,
>>> then just call it if it's set, right?
>>>
>>>> I thought we discussed all of this already and decided that this thing
>>>> should go to PCI host bridge and by default PHB's map_msi() callback should
>>>> just call kvm_irqchip_add_msi_route(). This is why I touched i386.
>>>>
>>>> Things have changed since then?
>>>
>>>
>>> I think pci_bus_map_msi was there since day 1, it's not like
>>> we are going back and forth on it, right?
>>
>>
>> Sorry, I am not following you. pci_bus_map_msi() is added by this patch.
>> Where was it from day 1?
> 
> I'm sorry. I merely meant that it's there in v1 of this patch.
> 
>>
>>> I always felt it's best to have irqfd isolated to kvm somehow
>>> rather than have hooks in pci code, I just don't know enough
>>> about kvm on ppc to figure out the right way to do this.
>>> With your latest patches I think this is becoming clearer ...
>>
>>
>> Well... This encoding (irq# as msg.data) is used in spapr_pci.c in any
>> case, whether it is KVM or TCG.
> 
> Not only on TCG. All emulated devices merely do a write to send an MSI,
> this is exactly what happens with real hardware.  If this happens to
> land in the MSI region, you get an interrupt.  The concept of mapping
> msi to irq normally doesn't belong in devices/pci core, it's something
> done by APIC or pci host.
> 
> For KVM, we have this special hook where devices allocate a route and
> then Linux can send an interrupt to guest quickly bypassing QEMU.  It
> was originally designed for paravirtualization, so it doesn't support
> strange cases such as guest programming MSI to write into guest memory,
> which is possible with real PCI devices. However, no one seems to do
> these hacks in practice, so in practice this works for
> some other cases, such as device assignment.
> 
> That's why we have kvm_irqchip_add_msi_route in some places
> in code - it's so specific devices implemented by
> Linux can take this shortcut (it does not make sense for
> devices implemented by qemu).
> So the way I see it, it's not a PCI thing at all, it's
> a KVM specific implementation, so it seems cleaner if
> it's isolated there.

The only PCI thing here (at least for spapr) is the way we translate
MSIMessage to IRQ number. Besides that, yeah, there is no good reason to
poison pci.c or spapr_pci.c with KVM.


> Now, we have this allocate/free API for reasons that
> have to do with the API of kvm on x86. spapr doesn't need to
> allocate/free resources, so just shortcut free and
> do map when we allocate.
> 
> Doesn't this sound reasonable?


Yes, pretty much. But it did not help to get this patch accepted at the
first place and my vote costs a little here :)


>> I am confused.
>> 1.5 month ago Anthony suggested (I'll answer to that mail to bring that
>> discussion up) to do this thing as:
>>
>>> So what this should look like is:
>>>
>>> 1) A PCI bus function to do the MSI -> virq mapping
>>> 2) On x86 (and e500), this is implemented by calling
>> kvm_irqchip_add_msi_route()
>>> 3) On pseries, this just returns msi->data
>>>
>>> Perhaps (2) can just be the default PCI bus implementation to simplify
>> things.
>>
>>
>> Now it is not right. Anthony, please help.
> 
> That's right, you implemented exactly what Anthony suggested.  Now that
> you did, I think I see an even better, more contained way to do this.
> And it's not much of a change as compared to what you have, is it?
> 
> I'm sorry if this looks like you are given a run-around,
> not everyone understands how spapr works, sometimes
> it takes a full implementation to make everyone understand
> the issues.
> 
> But I agree, let's see what Anthony thinks, maybe I
> misunderstood how spapr works.


Anthony, Alex (Graf), ping, please?



>>>>> Then you won't have to change all the rest of the code.
>>>>>
>>>>>> ---
>>>>>> Changes:
>>>>>> 2013/08/07 v5:
>>>>>> * pci_bus_map_msi now has default behaviour which is to call
>>>>>> kvm_irqchip_add_msi_route
>>>>>> * kvm_irqchip_release_virq fixed not crash when there is no routes
>>>>>> ---
>>>>>>  hw/i386/kvm/pci-assign.c | 4 ++--
>>>>>>  hw/misc/vfio.c           | 4 ++--
>>>>>>  hw/pci/pci.c             | 9 +++++++++
>>>>>>  hw/virtio/virtio-pci.c   | 2 +-
>>>>>>  include/hw/pci/pci.h     | 2 ++
>>>>>>  include/hw/pci/pci_bus.h | 1 +
>>>>>>  kvm-all.c                | 3 +++
>>>>>>  7 files changed, 20 insertions(+), 5 deletions(-)
>>>>>>
>>>>>> diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
>>>>>> index 5618173..fb59982 100644
>>>>>> --- a/hw/i386/kvm/pci-assign.c
>>>>>> +++ b/hw/i386/kvm/pci-assign.c
>>>>>> @@ -1008,9 +1008,9 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
>>>>>>          MSIMessage msg = msi_get_message(pci_dev, 0);
>>>>>>          int virq;
>>>>>>  
>>>>>> -        virq = kvm_irqchip_add_msi_route(kvm_state, msg);
>>>>>> +        virq = pci_bus_map_msi(kvm_state, pci_dev->bus, msg);
>>>>>>          if (virq < 0) {
>>>>>> -            perror("assigned_dev_update_msi: kvm_irqchip_add_msi_route");
>>>>>> +            perror("assigned_dev_update_msi: pci_bus_map_msi");
>>>>>>              return;
>>>>>>          }
>>>>>>
>>>>>
>>>>> This really confuses me. Why are you changing i386 code?
>>>>>
>>>>>   
>>>>>> diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
>>>>>> index 017e693..adcd23d 100644
>>>>>> --- a/hw/misc/vfio.c
>>>>>> +++ b/hw/misc/vfio.c
>>>>>> @@ -643,7 +643,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>>>>>>       * Attempt to enable route through KVM irqchip,
>>>>>>       * default to userspace handling if unavailable.
>>>>>>       */
>>>>>> -    vector->virq = msg ? kvm_irqchip_add_msi_route(kvm_state, *msg) : -1;
>>>>>> +    vector->virq = msg ? pci_bus_map_msi(kvm_state, vdev->pdev.bus, *msg) : -1;
>>>>>>      if (vector->virq < 0 ||
>>>>>>          kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
>>>>>>                                         vector->virq) < 0) {
>>>>>> @@ -811,7 +811,7 @@ retry:
>>>>>>           * Attempt to enable route through KVM irqchip,
>>>>>>           * default to userspace handling if unavailable.
>>>>>>           */
>>>>>> -        vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg);
>>>>>> +        vector->virq = pci_bus_map_msi(kvm_state, vdev->pdev.bus, msg);
>>>>>>          if (vector->virq < 0 ||
>>>>>>              kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
>>>>>>                                             vector->virq) < 0) {
>>>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>>>> index 4c004f5..4bce3e7 100644
>>>>>> --- a/hw/pci/pci.c
>>>>>> +++ b/hw/pci/pci.c
>>>>>> @@ -1249,6 +1249,15 @@ void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>>>>>>      dev->intx_routing_notifier = notifier;
>>>>>>  }
>>>>>>  
>>>>>> +int pci_bus_map_msi(KVMState *s, PCIBus *bus, MSIMessage msg)
>>>>>> +{
>>>>>> +    if (bus->map_msi) {
>>>>>> +        return bus->map_msi(s, bus, msg);
>>>>>> +    }
>>>>>> +
>>>>>> +    return kvm_irqchip_add_msi_route(s, msg);
>>>>>> +}
>>>>>> +
>>>>>>  /*
>>>>>>   * PCI-to-PCI bridge specification
>>>>>>   * 9.1: Interrupt routing. Table 9-1
>>>>>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
>>>>>> index d37037e..21180d2 100644
>>>>>> --- a/hw/virtio/virtio-pci.c
>>>>>> +++ b/hw/virtio/virtio-pci.c
>>>>>> @@ -481,7 +481,7 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
>>>>>>      int ret;
>>>>>>  
>>>>>>      if (irqfd->users == 0) {
>>>>>> -        ret = kvm_irqchip_add_msi_route(kvm_state, msg);
>>>>>> +        ret = pci_bus_map_msi(kvm_state, proxy->pci_dev.bus, msg);
>>>>>>          if (ret < 0) {
>>>>>>              return ret;
>>>>>>          }
>>>>>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>>>>>> index ccec2ba..b6ad9e4 100644
>>>>>> --- a/include/hw/pci/pci.h
>>>>>> +++ b/include/hw/pci/pci.h
>>>>>> @@ -332,6 +332,7 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev);
>>>>>>  typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level);
>>>>>>  typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num);
>>>>>>  typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin);
>>>>>> +typedef int (*pci_map_msi_fn)(KVMState *s, PCIBus *bus, MSIMessage msg);
>>>>>>  
>>>>>>  typedef enum {
>>>>>>      PCI_HOTPLUG_DISABLED,
>>>>>> @@ -375,6 +376,7 @@ bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new);
>>>>>>  void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
>>>>>>  void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>>>>>>                                            PCIINTxRoutingNotifier notifier);
>>>>>> +int pci_bus_map_msi(KVMState *s, PCIBus *bus, MSIMessage msg);
>>>>>>  void pci_device_reset(PCIDevice *dev);
>>>>>>  void pci_bus_reset(PCIBus *bus);
>>>>>>  
>>>>>> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
>>>>>> index 9df1788..5bf7994 100644
>>>>>> --- a/include/hw/pci/pci_bus.h
>>>>>> +++ b/include/hw/pci/pci_bus.h
>>>>>> @@ -16,6 +16,7 @@ struct PCIBus {
>>>>>>      pci_set_irq_fn set_irq;
>>>>>>      pci_map_irq_fn map_irq;
>>>>>>      pci_route_irq_fn route_intx_to_irq;
>>>>>> +    pci_map_msi_fn map_msi;
>>>>>>      pci_hotplug_fn hotplug;
>>>>>>      DeviceState *hotplug_qdev;
>>>>>>      void *irq_opaque;
>>>>>> diff --git a/kvm-all.c b/kvm-all.c
>>>>>> index 716860f..3ae5274 100644
>>>>>> --- a/kvm-all.c
>>>>>> +++ b/kvm-all.c
>>>>>> @@ -1069,6 +1069,9 @@ void kvm_irqchip_release_virq(KVMState *s, int virq)
>>>>>>      struct kvm_irq_routing_entry *e;
>>>>>>      int i;
>>>>>>  
>>>>>> +    if (!s->irq_routes) {
>>>>>> +        return;
>>>>>> +    }
>>>>>>      for (i = 0; i < s->irq_routes->nr; i++) {
>>>>>>          e = &s->irq_routes->entries[i];
>>>>>>          if (e->gsi == virq) {

Patch

diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
index 5618173..fb59982 100644
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -1008,9 +1008,9 @@  static void assigned_dev_update_msi(PCIDevice *pci_dev)
         MSIMessage msg = msi_get_message(pci_dev, 0);
         int virq;
 
-        virq = kvm_irqchip_add_msi_route(kvm_state, msg);
+        virq = pci_bus_map_msi(kvm_state, pci_dev->bus, msg);
         if (virq < 0) {
-            perror("assigned_dev_update_msi: kvm_irqchip_add_msi_route");
+            perror("assigned_dev_update_msi: pci_bus_map_msi");
             return;
         }
 
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 017e693..adcd23d 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -643,7 +643,7 @@  static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
      * Attempt to enable route through KVM irqchip,
      * default to userspace handling if unavailable.
      */
-    vector->virq = msg ? kvm_irqchip_add_msi_route(kvm_state, *msg) : -1;
+    vector->virq = msg ? pci_bus_map_msi(kvm_state, vdev->pdev.bus, *msg) : -1;
     if (vector->virq < 0 ||
         kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
                                        vector->virq) < 0) {
@@ -811,7 +811,7 @@  retry:
          * Attempt to enable route through KVM irqchip,
          * default to userspace handling if unavailable.
          */
-        vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg);
+        vector->virq = pci_bus_map_msi(kvm_state, vdev->pdev.bus, msg);
         if (vector->virq < 0 ||
             kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
                                            vector->virq) < 0) {
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 4c004f5..4bce3e7 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1249,6 +1249,15 @@  void pci_device_set_intx_routing_notifier(PCIDevice *dev,
     dev->intx_routing_notifier = notifier;
 }
 
+int pci_bus_map_msi(KVMState *s, PCIBus *bus, MSIMessage msg)
+{
+    if (bus->map_msi) {
+        return bus->map_msi(s, bus, msg);
+    }
+
+    return kvm_irqchip_add_msi_route(s, msg);
+}
+
 /*
  * PCI-to-PCI bridge specification
  * 9.1: Interrupt routing. Table 9-1
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index d37037e..21180d2 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -481,7 +481,7 @@  static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
     int ret;
 
     if (irqfd->users == 0) {
-        ret = kvm_irqchip_add_msi_route(kvm_state, msg);
+        ret = pci_bus_map_msi(kvm_state, proxy->pci_dev.bus, msg);
         if (ret < 0) {
             return ret;
         }
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index ccec2ba..b6ad9e4 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -332,6 +332,7 @@  MemoryRegion *pci_address_space_io(PCIDevice *dev);
 typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level);
 typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num);
 typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin);
+typedef int (*pci_map_msi_fn)(KVMState *s, PCIBus *bus, MSIMessage msg);
 
 typedef enum {
     PCI_HOTPLUG_DISABLED,
@@ -375,6 +376,7 @@  bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new);
 void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
 void pci_device_set_intx_routing_notifier(PCIDevice *dev,
                                           PCIINTxRoutingNotifier notifier);
+int pci_bus_map_msi(KVMState *s, PCIBus *bus, MSIMessage msg);
 void pci_device_reset(PCIDevice *dev);
 void pci_bus_reset(PCIBus *bus);
 
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 9df1788..5bf7994 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -16,6 +16,7 @@  struct PCIBus {
     pci_set_irq_fn set_irq;
     pci_map_irq_fn map_irq;
     pci_route_irq_fn route_intx_to_irq;
+    pci_map_msi_fn map_msi;
     pci_hotplug_fn hotplug;
     DeviceState *hotplug_qdev;
     void *irq_opaque;
diff --git a/kvm-all.c b/kvm-all.c
index 716860f..3ae5274 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1069,6 +1069,9 @@  void kvm_irqchip_release_virq(KVMState *s, int virq)
     struct kvm_irq_routing_entry *e;
     int i;
 
+    if (!s->irq_routes) {
+        return;
+    }
     for (i = 0; i < s->irq_routes->nr; i++) {
         e = &s->irq_routes->entries[i];
         if (e->gsi == virq) {