diff mbox

[05/13] pci: Add pci_device_route_intx_to_irq

Message ID ae30bba4024251d46e1d139d509c817fc4002b7e.1338799935.git.jan.kiszka@siemens.com
State New
Headers show

Commit Message

Jan Kiszka June 4, 2012, 8:52 a.m. UTC
Add a PCI IRQ path discovery function that walks from a given device to
the host bridge, returning the mode (enabled/inverted/disabled) and the
IRQ number that is reported to the attached interrupt controller. For
this purpose, another host bridge callback function is introduced:
route_intx_to_irq. It is so far only implemented by the PIIX3, other
host bridges can be added later on as required.

Will be used for KVM PCI device assignment and VFIO.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 hw/alpha_typhoon.c |    2 +-
 hw/apb_pci.c       |    2 +-
 hw/bonito.c        |    2 +-
 hw/grackle_pci.c   |    1 +
 hw/gt64xxx.c       |    1 +
 hw/pci.c           |   14 +++++++++++++-
 hw/pci.h           |   13 +++++++++++++
 hw/pci_internals.h |    1 +
 hw/piix_pci.c      |   23 ++++++++++++++++++++---
 hw/ppc4xx_pci.c    |    2 +-
 hw/ppce500_pci.c   |    2 +-
 hw/prep_pci.c      |    2 +-
 hw/sh_pci.c        |    2 +-
 hw/spapr_pci.c     |    2 +-
 hw/unin_pci.c      |    4 ++--
 hw/versatile_pci.c |    2 +-
 16 files changed, 60 insertions(+), 15 deletions(-)

Comments

Michael S. Tsirkin June 7, 2012, 2:32 p.m. UTC | #1
On Mon, Jun 04, 2012 at 10:52:13AM +0200, Jan Kiszka wrote:
> @@ -1089,6 +1093,14 @@ static void pci_set_irq(void *opaque, int irq_num, int level)
>      pci_change_irq_level(pci_dev, irq_num, change);
>  }
>  
> +PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
> +{
> +    PCIBus *bus = dev->host_bus;
> +
> +    assert(bus->route_intx_to_irq);
> +    return bus->route_intx_to_irq(bus->irq_opaque, dev->host_intx_pin[pin]);
> +}
> +
>  /***********************************************************/
>  /* monitor info on PCI */
>  

Just an idea: can devices cache this result, bypassing the
intx to irq lookup on data path?

> diff --git a/hw/pci.h b/hw/pci.h
> index 5b54e2d..bbba01e 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -141,6 +141,15 @@ enum {
>  #define PCI_DEVICE_GET_CLASS(obj) \
>       OBJECT_GET_CLASS(PCIDeviceClass, (obj), TYPE_PCI_DEVICE)
>  
> +typedef struct PCIINTxRoute {
> +    enum {
> +        PCI_INTX_ENABLED,
> +        PCI_INTX_INVERTED,
> +        PCI_INTX_DISABLED,
> +    } mode;
> +    int irq;
> +} PCIINTxRoute;

Is this INTX route or IRQ route?
Is the INTX enabled/disabled/inverted or the IRQ?

I have the impression it's the IRQ, in the apic.
PCI INTX are never inverted they are always active low.
Jan Kiszka June 7, 2012, 3:10 p.m. UTC | #2
On 2012-06-07 16:32, Michael S. Tsirkin wrote:
> On Mon, Jun 04, 2012 at 10:52:13AM +0200, Jan Kiszka wrote:
>> @@ -1089,6 +1093,14 @@ static void pci_set_irq(void *opaque, int irq_num, int level)
>>      pci_change_irq_level(pci_dev, irq_num, change);
>>  }
>>  
>> +PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
>> +{
>> +    PCIBus *bus = dev->host_bus;
>> +
>> +    assert(bus->route_intx_to_irq);
>> +    return bus->route_intx_to_irq(bus->irq_opaque, dev->host_intx_pin[pin]);
>> +}
>> +
>>  /***********************************************************/
>>  /* monitor info on PCI */
>>  
> 
> Just an idea: can devices cache this result, bypassing the
> intx to irq lookup on data path?

That lookup is part of set_irq which we don't bypass so far and where
this is generally trivial. If we want to cache the effects of set_irq as
well, I guess things would become pretty complex (e.g. due to vmstate
compatibility), and I'm unsure if it would buy us much.

> 
>> diff --git a/hw/pci.h b/hw/pci.h
>> index 5b54e2d..bbba01e 100644
>> --- a/hw/pci.h
>> +++ b/hw/pci.h
>> @@ -141,6 +141,15 @@ enum {
>>  #define PCI_DEVICE_GET_CLASS(obj) \
>>       OBJECT_GET_CLASS(PCIDeviceClass, (obj), TYPE_PCI_DEVICE)
>>  
>> +typedef struct PCIINTxRoute {
>> +    enum {
>> +        PCI_INTX_ENABLED,
>> +        PCI_INTX_INVERTED,
>> +        PCI_INTX_DISABLED,
>> +    } mode;
>> +    int irq;
>> +} PCIINTxRoute;
> 
> Is this INTX route or IRQ route?
> Is the INTX enabled/disabled/inverted or the IRQ?
> 
> I have the impression it's the IRQ, in the apic.
> PCI INTX are never inverted they are always active low.

This should be considered as "the route *of* an INTx", not "to some
IRQ". I could call it PCIINTxToIRQRoute if you prefer, but it's a bit
lengthy.

Jan
Michael S. Tsirkin June 7, 2012, 4:28 p.m. UTC | #3
On Thu, Jun 07, 2012 at 05:10:17PM +0200, Jan Kiszka wrote:
> On 2012-06-07 16:32, Michael S. Tsirkin wrote:
> > On Mon, Jun 04, 2012 at 10:52:13AM +0200, Jan Kiszka wrote:
> >> @@ -1089,6 +1093,14 @@ static void pci_set_irq(void *opaque, int irq_num, int level)
> >>      pci_change_irq_level(pci_dev, irq_num, change);
> >>  }
> >>  
> >> +PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
> >> +{
> >> +    PCIBus *bus = dev->host_bus;
> >> +
> >> +    assert(bus->route_intx_to_irq);
> >> +    return bus->route_intx_to_irq(bus->irq_opaque, dev->host_intx_pin[pin]);
> >> +}
> >> +
> >>  /***********************************************************/
> >>  /* monitor info on PCI */
> >>  
> > 
> > Just an idea: can devices cache this result, bypassing the
> > intx to irq lookup on data path?
> 
> That lookup is part of set_irq which we don't bypass so far and where
> this is generally trivial. If we want to cache the effects of set_irq as
> well, I guess things would become pretty complex (e.g. due to vmstate
> compatibility), and I'm unsure if it would buy us much.

This is less for performance but more for making
everyone use the same infrastructure rather than
assigned devices being the weird case.

> > 
> >> diff --git a/hw/pci.h b/hw/pci.h
> >> index 5b54e2d..bbba01e 100644
> >> --- a/hw/pci.h
> >> +++ b/hw/pci.h
> >> @@ -141,6 +141,15 @@ enum {
> >>  #define PCI_DEVICE_GET_CLASS(obj) \
> >>       OBJECT_GET_CLASS(PCIDeviceClass, (obj), TYPE_PCI_DEVICE)
> >>  
> >> +typedef struct PCIINTxRoute {
> >> +    enum {
> >> +        PCI_INTX_ENABLED,
> >> +        PCI_INTX_INVERTED,
> >> +        PCI_INTX_DISABLED,
> >> +    } mode;
> >> +    int irq;
> >> +} PCIINTxRoute;
> > 
> > Is this INTX route or IRQ route?
> > Is the INTX enabled/disabled/inverted or the IRQ?
> > 
> > I have the impression it's the IRQ, in the apic.
> > PCI INTX are never inverted they are always active low.
> 
> This should be considered as "the route *of* an INTx", not "to some
> IRQ". I could call it PCIINTxToIRQRoute if you prefer, but it's a bit
> lengthy.
> 
> Jan

Yes but the polarity is in apic? Or is it in host bridge?
Jan Kiszka June 7, 2012, 4:46 p.m. UTC | #4
On 2012-06-07 18:28, Michael S. Tsirkin wrote:
> On Thu, Jun 07, 2012 at 05:10:17PM +0200, Jan Kiszka wrote:
>> On 2012-06-07 16:32, Michael S. Tsirkin wrote:
>>> On Mon, Jun 04, 2012 at 10:52:13AM +0200, Jan Kiszka wrote:
>>>> @@ -1089,6 +1093,14 @@ static void pci_set_irq(void *opaque, int irq_num, int level)
>>>>      pci_change_irq_level(pci_dev, irq_num, change);
>>>>  }
>>>>  
>>>> +PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
>>>> +{
>>>> +    PCIBus *bus = dev->host_bus;
>>>> +
>>>> +    assert(bus->route_intx_to_irq);
>>>> +    return bus->route_intx_to_irq(bus->irq_opaque, dev->host_intx_pin[pin]);
>>>> +}
>>>> +
>>>>  /***********************************************************/
>>>>  /* monitor info on PCI */
>>>>  
>>>
>>> Just an idea: can devices cache this result, bypassing the
>>> intx to irq lookup on data path?
>>
>> That lookup is part of set_irq which we don't bypass so far and where
>> this is generally trivial. If we want to cache the effects of set_irq as
>> well, I guess things would become pretty complex (e.g. due to vmstate
>> compatibility), and I'm unsure if it would buy us much.
> 
> This is less for performance but more for making
> everyone use the same infrastructure rather than
> assigned devices being the weird case.

Device assignment is weird. It bypasses all state updates as it does not
have to bother about migratability.

Well, of course we could cache the host bridge routing result as well,
for every device. It would have to be in addition to host_intx_pin. But
the result would look pretty strange to me.

In any case, I would prefer to do this, if at all, on top of this
series, specifically as it will require to touch all host bridges.

> 
>>>
>>>> diff --git a/hw/pci.h b/hw/pci.h
>>>> index 5b54e2d..bbba01e 100644
>>>> --- a/hw/pci.h
>>>> +++ b/hw/pci.h
>>>> @@ -141,6 +141,15 @@ enum {
>>>>  #define PCI_DEVICE_GET_CLASS(obj) \
>>>>       OBJECT_GET_CLASS(PCIDeviceClass, (obj), TYPE_PCI_DEVICE)
>>>>  
>>>> +typedef struct PCIINTxRoute {
>>>> +    enum {
>>>> +        PCI_INTX_ENABLED,
>>>> +        PCI_INTX_INVERTED,
>>>> +        PCI_INTX_DISABLED,
>>>> +    } mode;
>>>> +    int irq;
>>>> +} PCIINTxRoute;
>>>
>>> Is this INTX route or IRQ route?
>>> Is the INTX enabled/disabled/inverted or the IRQ?
>>>
>>> I have the impression it's the IRQ, in the apic.
>>> PCI INTX are never inverted they are always active low.
>>
>> This should be considered as "the route *of* an INTx", not "to some
>> IRQ". I could call it PCIINTxToIRQRoute if you prefer, but it's a bit
>> lengthy.
>>
>> Jan
> 
> Yes but the polarity is in apic? Or is it in host bridge?
> 

Nope (then we would not have to bother). At least one host bridge
(bonito) is apparently able to invert the polarity.

Jan
Michael S. Tsirkin June 7, 2012, 4:55 p.m. UTC | #5
On Thu, Jun 07, 2012 at 06:46:38PM +0200, Jan Kiszka wrote:
> On 2012-06-07 18:28, Michael S. Tsirkin wrote:
> > On Thu, Jun 07, 2012 at 05:10:17PM +0200, Jan Kiszka wrote:
> >> On 2012-06-07 16:32, Michael S. Tsirkin wrote:
> >>> On Mon, Jun 04, 2012 at 10:52:13AM +0200, Jan Kiszka wrote:
> >>>> @@ -1089,6 +1093,14 @@ static void pci_set_irq(void *opaque, int irq_num, int level)
> >>>>      pci_change_irq_level(pci_dev, irq_num, change);
> >>>>  }
> >>>>  
> >>>> +PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
> >>>> +{
> >>>> +    PCIBus *bus = dev->host_bus;
> >>>> +
> >>>> +    assert(bus->route_intx_to_irq);
> >>>> +    return bus->route_intx_to_irq(bus->irq_opaque, dev->host_intx_pin[pin]);
> >>>> +}
> >>>> +
> >>>>  /***********************************************************/
> >>>>  /* monitor info on PCI */
> >>>>  
> >>>
> >>> Just an idea: can devices cache this result, bypassing the
> >>> intx to irq lookup on data path?
> >>
> >> That lookup is part of set_irq which we don't bypass so far and where
> >> this is generally trivial. If we want to cache the effects of set_irq as
> >> well, I guess things would become pretty complex (e.g. due to vmstate
> >> compatibility), and I'm unsure if it would buy us much.
> > 
> > This is less for performance but more for making
> > everyone use the same infrastructure rather than
> > assigned devices being the weird case.
> 
> Device assignment is weird. It bypasses all state updates as it does not
> have to bother about migratability.
> 
> Well, of course we could cache the host bridge routing result as well,
> for every device. It would have to be in addition to host_intx_pin. But
> the result would look pretty strange to me.
> 
> In any case, I would prefer to do this, if at all, on top of this
> series, specifically as it will require to touch all host bridges.

Yes that's fine.

> > 
> >>>
> >>>> diff --git a/hw/pci.h b/hw/pci.h
> >>>> index 5b54e2d..bbba01e 100644
> >>>> --- a/hw/pci.h
> >>>> +++ b/hw/pci.h
> >>>> @@ -141,6 +141,15 @@ enum {
> >>>>  #define PCI_DEVICE_GET_CLASS(obj) \
> >>>>       OBJECT_GET_CLASS(PCIDeviceClass, (obj), TYPE_PCI_DEVICE)
> >>>>  
> >>>> +typedef struct PCIINTxRoute {
> >>>> +    enum {
> >>>> +        PCI_INTX_ENABLED,
> >>>> +        PCI_INTX_INVERTED,
> >>>> +        PCI_INTX_DISABLED,
> >>>> +    } mode;
> >>>> +    int irq;
> >>>> +} PCIINTxRoute;
> >>>
> >>> Is this INTX route or IRQ route?
> >>> Is the INTX enabled/disabled/inverted or the IRQ?
> >>>
> >>> I have the impression it's the IRQ, in the apic.
> >>> PCI INTX are never inverted they are always active low.
> >>
> >> This should be considered as "the route *of* an INTx", not "to some
> >> IRQ". I could call it PCIINTxToIRQRoute if you prefer, but it's a bit
> >> lengthy.
> >>
> >> Jan
> > 
> > Yes but the polarity is in apic? Or is it in host bridge?
> > 
> 
> Nope (then we would not have to bother). At least one host bridge
> (bonito) is apparently able to invert the polarity.
> 
> Jan
>
Michael S. Tsirkin June 10, 2012, 9:55 a.m. UTC | #6
On Thu, Jun 07, 2012 at 06:46:38PM +0200, Jan Kiszka wrote:
> On 2012-06-07 18:28, Michael S. Tsirkin wrote:
> > On Thu, Jun 07, 2012 at 05:10:17PM +0200, Jan Kiszka wrote:
> >> On 2012-06-07 16:32, Michael S. Tsirkin wrote:
> >>> On Mon, Jun 04, 2012 at 10:52:13AM +0200, Jan Kiszka wrote:
> >>>> @@ -1089,6 +1093,14 @@ static void pci_set_irq(void *opaque, int irq_num, int level)
> >>>>      pci_change_irq_level(pci_dev, irq_num, change);
> >>>>  }
> >>>>  
> >>>> +PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
> >>>> +{
> >>>> +    PCIBus *bus = dev->host_bus;
> >>>> +
> >>>> +    assert(bus->route_intx_to_irq);
> >>>> +    return bus->route_intx_to_irq(bus->irq_opaque, dev->host_intx_pin[pin]);
> >>>> +}
> >>>> +
> >>>>  /***********************************************************/
> >>>>  /* monitor info on PCI */
> >>>>  
> >>>
> >>> Just an idea: can devices cache this result, bypassing the
> >>> intx to irq lookup on data path?
> >>
> >> That lookup is part of set_irq which we don't bypass so far and where
> >> this is generally trivial. If we want to cache the effects of set_irq as
> >> well, I guess things would become pretty complex (e.g. due to vmstate
> >> compatibility), and I'm unsure if it would buy us much.
> > 
> > This is less for performance but more for making
> > everyone use the same infrastructure rather than
> > assigned devices being the weird case.
> 
> Device assignment is weird. It bypasses all state updates as it does not
> have to bother about migratability.
> 
> Well, of course we could cache the host bridge routing result as well,
> for every device. It would have to be in addition to host_intx_pin. But
> the result would look pretty strange to me.
> 
> In any case, I would prefer to do this, if at all, on top of this
> series, specifically as it will require to touch all host bridges.

I'd like to ponder this a bit more then.

If the claim is that device assignment is only needed for
piix anyway, then why not make it depend on piix *explicitly*?
Yes ugly but this will make it very easy to find and
address any missing pieces.

As it is you are adding APIs that in theory address
non PIIX issues but in practice don't implement for non
PIIX. So we never really know.

> > 
> >>>
> >>>> diff --git a/hw/pci.h b/hw/pci.h
> >>>> index 5b54e2d..bbba01e 100644
> >>>> --- a/hw/pci.h
> >>>> +++ b/hw/pci.h
> >>>> @@ -141,6 +141,15 @@ enum {
> >>>>  #define PCI_DEVICE_GET_CLASS(obj) \
> >>>>       OBJECT_GET_CLASS(PCIDeviceClass, (obj), TYPE_PCI_DEVICE)
> >>>>  
> >>>> +typedef struct PCIINTxRoute {
> >>>> +    enum {
> >>>> +        PCI_INTX_ENABLED,
> >>>> +        PCI_INTX_INVERTED,
> >>>> +        PCI_INTX_DISABLED,
> >>>> +    } mode;
> >>>> +    int irq;
> >>>> +} PCIINTxRoute;
> >>>
> >>> Is this INTX route or IRQ route?
> >>> Is the INTX enabled/disabled/inverted or the IRQ?
> >>>
> >>> I have the impression it's the IRQ, in the apic.
> >>> PCI INTX are never inverted they are always active low.
> >>
> >> This should be considered as "the route *of* an INTx", not "to some
> >> IRQ". I could call it PCIINTxToIRQRoute if you prefer, but it's a bit
> >> lengthy.
> >>
> >> Jan
> > 
> > Yes but the polarity is in apic? Or is it in host bridge?
> > 
> 
> Nope (then we would not have to bother). At least one host bridge
> (bonito) is apparently able to invert the polarity.
> 
> Jan
>
Jan Kiszka June 10, 2012, 10:08 a.m. UTC | #7
On 2012-06-10 11:55, Michael S. Tsirkin wrote:
> On Thu, Jun 07, 2012 at 06:46:38PM +0200, Jan Kiszka wrote:
>> On 2012-06-07 18:28, Michael S. Tsirkin wrote:
>>> On Thu, Jun 07, 2012 at 05:10:17PM +0200, Jan Kiszka wrote:
>>>> On 2012-06-07 16:32, Michael S. Tsirkin wrote:
>>>>> On Mon, Jun 04, 2012 at 10:52:13AM +0200, Jan Kiszka wrote:
>>>>>> @@ -1089,6 +1093,14 @@ static void pci_set_irq(void *opaque, int irq_num, int level)
>>>>>>      pci_change_irq_level(pci_dev, irq_num, change);
>>>>>>  }
>>>>>>  
>>>>>> +PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
>>>>>> +{
>>>>>> +    PCIBus *bus = dev->host_bus;
>>>>>> +
>>>>>> +    assert(bus->route_intx_to_irq);
>>>>>> +    return bus->route_intx_to_irq(bus->irq_opaque, dev->host_intx_pin[pin]);
>>>>>> +}
>>>>>> +
>>>>>>  /***********************************************************/
>>>>>>  /* monitor info on PCI */
>>>>>>  
>>>>>
>>>>> Just an idea: can devices cache this result, bypassing the
>>>>> intx to irq lookup on data path?
>>>>
>>>> That lookup is part of set_irq which we don't bypass so far and where
>>>> this is generally trivial. If we want to cache the effects of set_irq as
>>>> well, I guess things would become pretty complex (e.g. due to vmstate
>>>> compatibility), and I'm unsure if it would buy us much.
>>>
>>> This is less for performance but more for making
>>> everyone use the same infrastructure rather than
>>> assigned devices being the weird case.
>>
>> Device assignment is weird. It bypasses all state updates as it does not
>> have to bother about migratability.
>>
>> Well, of course we could cache the host bridge routing result as well,
>> for every device. It would have to be in addition to host_intx_pin. But
>> the result would look pretty strange to me.
>>
>> In any case, I would prefer to do this, if at all, on top of this
>> series, specifically as it will require to touch all host bridges.
> 
> I'd like to ponder this a bit more then.
> 
> If the claim is that device assignment is only needed for
> piix anyway, then why not make it depend on piix *explicitly*?
> Yes ugly but this will make it very easy to find and
> address any missing pieces.

Because it is conceptually independent of the PIIX, we will need it for
successors of that x86 chipset as well, and I won't add the ugly hack of
qemu-kvm upstream

> 
> As it is you are adding APIs that in theory address
> non PIIX issues but in practice don't implement for non
> PIIX. So we never really know.

I once hacked q35 to make it work with device assignment. This really
requires something like this. It actually requires something generic,
independent of PCI, but that's too much for this round.

Jan
Michael S. Tsirkin June 10, 2012, 10:41 a.m. UTC | #8
On Sun, Jun 10, 2012 at 12:08:23PM +0200, Jan Kiszka wrote:
> On 2012-06-10 11:55, Michael S. Tsirkin wrote:
> > On Thu, Jun 07, 2012 at 06:46:38PM +0200, Jan Kiszka wrote:
> >> On 2012-06-07 18:28, Michael S. Tsirkin wrote:
> >>> On Thu, Jun 07, 2012 at 05:10:17PM +0200, Jan Kiszka wrote:
> >>>> On 2012-06-07 16:32, Michael S. Tsirkin wrote:
> >>>>> On Mon, Jun 04, 2012 at 10:52:13AM +0200, Jan Kiszka wrote:
> >>>>>> @@ -1089,6 +1093,14 @@ static void pci_set_irq(void *opaque, int irq_num, int level)
> >>>>>>      pci_change_irq_level(pci_dev, irq_num, change);
> >>>>>>  }
> >>>>>>  
> >>>>>> +PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
> >>>>>> +{
> >>>>>> +    PCIBus *bus = dev->host_bus;
> >>>>>> +
> >>>>>> +    assert(bus->route_intx_to_irq);
> >>>>>> +    return bus->route_intx_to_irq(bus->irq_opaque, dev->host_intx_pin[pin]);
> >>>>>> +}
> >>>>>> +
> >>>>>>  /***********************************************************/
> >>>>>>  /* monitor info on PCI */
> >>>>>>  
> >>>>>
> >>>>> Just an idea: can devices cache this result, bypassing the
> >>>>> intx to irq lookup on data path?
> >>>>
> >>>> That lookup is part of set_irq which we don't bypass so far and where
> >>>> this is generally trivial. If we want to cache the effects of set_irq as
> >>>> well, I guess things would become pretty complex (e.g. due to vmstate
> >>>> compatibility), and I'm unsure if it would buy us much.
> >>>
> >>> This is less for performance but more for making
> >>> everyone use the same infrastructure rather than
> >>> assigned devices being the weird case.
> >>
> >> Device assignment is weird. It bypasses all state updates as it does not
> >> have to bother about migratability.
> >>
> >> Well, of course we could cache the host bridge routing result as well,
> >> for every device. It would have to be in addition to host_intx_pin. But
> >> the result would look pretty strange to me.
> >>
> >> In any case, I would prefer to do this, if at all, on top of this
> >> series, specifically as it will require to touch all host bridges.
> > 
> > I'd like to ponder this a bit more then.
> > 
> > If the claim is that device assignment is only needed for
> > piix anyway, then why not make it depend on piix *explicitly*?
> > Yes ugly but this will make it very easy to find and
> > address any missing pieces.
> 
> Because it is conceptually independent of the PIIX, we will need it for
> successors of that x86 chipset as well, and I won't add the ugly hack of
> qemu-kvm upstream

So you look at an API and see it requires a route
callback. And you ask "why doesn't chipset X implement it"?
And the answer is "because it's only used by device assignment".
Which you will only know if you read this thread. So it's
a hack. And I'd rather have the hacks in device-assignment.c
than in pci.c even if the former are nastier.

> > 
> > As it is you are adding APIs that in theory address
> > non PIIX issues but in practice don't implement for non
> > PIIX. So we never really know.
> 
> I once hacked q35 to make it work with device assignment. This really
> requires something like this.
 
Yes I'm aware of this motivation. This does not do much to
address the concerns though.

> It actually requires something generic,
> independent of PCI, but that's too much for this round.
> 
> Jan

And this just makes the concerns worse :(
Jan Kiszka June 10, 2012, 10:49 a.m. UTC | #9
On 2012-06-10 12:41, Michael S. Tsirkin wrote:
> On Sun, Jun 10, 2012 at 12:08:23PM +0200, Jan Kiszka wrote:
>> On 2012-06-10 11:55, Michael S. Tsirkin wrote:
>>> On Thu, Jun 07, 2012 at 06:46:38PM +0200, Jan Kiszka wrote:
>>>> On 2012-06-07 18:28, Michael S. Tsirkin wrote:
>>>>> On Thu, Jun 07, 2012 at 05:10:17PM +0200, Jan Kiszka wrote:
>>>>>> On 2012-06-07 16:32, Michael S. Tsirkin wrote:
>>>>>>> On Mon, Jun 04, 2012 at 10:52:13AM +0200, Jan Kiszka wrote:
>>>>>>>> @@ -1089,6 +1093,14 @@ static void pci_set_irq(void *opaque, int irq_num, int level)
>>>>>>>>      pci_change_irq_level(pci_dev, irq_num, change);
>>>>>>>>  }
>>>>>>>>  
>>>>>>>> +PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
>>>>>>>> +{
>>>>>>>> +    PCIBus *bus = dev->host_bus;
>>>>>>>> +
>>>>>>>> +    assert(bus->route_intx_to_irq);
>>>>>>>> +    return bus->route_intx_to_irq(bus->irq_opaque, dev->host_intx_pin[pin]);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>  /***********************************************************/
>>>>>>>>  /* monitor info on PCI */
>>>>>>>>  
>>>>>>>
>>>>>>> Just an idea: can devices cache this result, bypassing the
>>>>>>> intx to irq lookup on data path?
>>>>>>
>>>>>> That lookup is part of set_irq which we don't bypass so far and where
>>>>>> this is generally trivial. If we want to cache the effects of set_irq as
>>>>>> well, I guess things would become pretty complex (e.g. due to vmstate
>>>>>> compatibility), and I'm unsure if it would buy us much.
>>>>>
>>>>> This is less for performance but more for making
>>>>> everyone use the same infrastructure rather than
>>>>> assigned devices being the weird case.
>>>>
>>>> Device assignment is weird. It bypasses all state updates as it does not
>>>> have to bother about migratability.
>>>>
>>>> Well, of course we could cache the host bridge routing result as well,
>>>> for every device. It would have to be in addition to host_intx_pin. But
>>>> the result would look pretty strange to me.
>>>>
>>>> In any case, I would prefer to do this, if at all, on top of this
>>>> series, specifically as it will require to touch all host bridges.
>>>
>>> I'd like to ponder this a bit more then.
>>>
>>> If the claim is that device assignment is only needed for
>>> piix anyway, then why not make it depend on piix *explicitly*?
>>> Yes ugly but this will make it very easy to find and
>>> address any missing pieces.
>>
>> Because it is conceptually independent of the PIIX, we will need it for
>> successors of that x86 chipset as well, and I won't add the ugly hack of
>> qemu-kvm upstream
> 
> So you look at an API and see it requires a route
> callback. And you ask "why doesn't chipset X implement it"?
> And the answer is "because it's only used by device assignment".
> Which you will only know if you read this thread. So it's
> a hack. And I'd rather have the hacks in device-assignment.c
> than in pci.c even if the former are nastier.

I don't share your view on this. It is surely _not_ a hack, specifically
when compared to what we have so far and what could be done otherwise,
e.g. hacking device-assignment and PIIX to make them cooperate (sorry, I
would vote against such an attempt). This is just a partially used
generic API. Any chipset not providing the required routing function
will cause an assert once some tries to make use of it.

So, what can I do to make this API more acceptable for you?

Jan
Michael S. Tsirkin June 10, 2012, 10:53 a.m. UTC | #10
On Sun, Jun 10, 2012 at 12:49:11PM +0200, Jan Kiszka wrote:
> On 2012-06-10 12:41, Michael S. Tsirkin wrote:
> > On Sun, Jun 10, 2012 at 12:08:23PM +0200, Jan Kiszka wrote:
> >> On 2012-06-10 11:55, Michael S. Tsirkin wrote:
> >>> On Thu, Jun 07, 2012 at 06:46:38PM +0200, Jan Kiszka wrote:
> >>>> On 2012-06-07 18:28, Michael S. Tsirkin wrote:
> >>>>> On Thu, Jun 07, 2012 at 05:10:17PM +0200, Jan Kiszka wrote:
> >>>>>> On 2012-06-07 16:32, Michael S. Tsirkin wrote:
> >>>>>>> On Mon, Jun 04, 2012 at 10:52:13AM +0200, Jan Kiszka wrote:
> >>>>>>>> @@ -1089,6 +1093,14 @@ static void pci_set_irq(void *opaque, int irq_num, int level)
> >>>>>>>>      pci_change_irq_level(pci_dev, irq_num, change);
> >>>>>>>>  }
> >>>>>>>>  
> >>>>>>>> +PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
> >>>>>>>> +{
> >>>>>>>> +    PCIBus *bus = dev->host_bus;
> >>>>>>>> +
> >>>>>>>> +    assert(bus->route_intx_to_irq);
> >>>>>>>> +    return bus->route_intx_to_irq(bus->irq_opaque, dev->host_intx_pin[pin]);
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>>  /***********************************************************/
> >>>>>>>>  /* monitor info on PCI */
> >>>>>>>>  
> >>>>>>>
> >>>>>>> Just an idea: can devices cache this result, bypassing the
> >>>>>>> intx to irq lookup on data path?
> >>>>>>
> >>>>>> That lookup is part of set_irq which we don't bypass so far and where
> >>>>>> this is generally trivial. If we want to cache the effects of set_irq as
> >>>>>> well, I guess things would become pretty complex (e.g. due to vmstate
> >>>>>> compatibility), and I'm unsure if it would buy us much.
> >>>>>
> >>>>> This is less for performance but more for making
> >>>>> everyone use the same infrastructure rather than
> >>>>> assigned devices being the weird case.
> >>>>
> >>>> Device assignment is weird. It bypasses all state updates as it does not
> >>>> have to bother about migratability.
> >>>>
> >>>> Well, of course we could cache the host bridge routing result as well,
> >>>> for every device. It would have to be in addition to host_intx_pin. But
> >>>> the result would look pretty strange to me.
> >>>>
> >>>> In any case, I would prefer to do this, if at all, on top of this
> >>>> series, specifically as it will require to touch all host bridges.
> >>>
> >>> I'd like to ponder this a bit more then.
> >>>
> >>> If the claim is that device assignment is only needed for
> >>> piix anyway, then why not make it depend on piix *explicitly*?
> >>> Yes ugly but this will make it very easy to find and
> >>> address any missing pieces.
> >>
> >> Because it is conceptually independent of the PIIX, we will need it for
> >> successors of that x86 chipset as well, and I won't add the ugly hack of
> >> qemu-kvm upstream
> > 
> > So you look at an API and see it requires a route
> > callback. And you ask "why doesn't chipset X implement it"?
> > And the answer is "because it's only used by device assignment".
> > Which you will only know if you read this thread. So it's
> > a hack. And I'd rather have the hacks in device-assignment.c
> > than in pci.c even if the former are nastier.
> 
> I don't share your view on this. It is surely _not_ a hack, specifically
> when compared to what we have so far and what could be done otherwise,
> e.g. hacking device-assignment and PIIX to make them cooperate (sorry, I
> would vote against such an attempt). This is just a partially used
> generic API. Any chipset not providing the required routing function
> will cause an assert once some tries to make use of it.
> 
> So, what can I do to make this API more acceptable for you?
> 
> Jan
> 

The idea that I suggested above would address this concern.
Or wait a bit, maybe I'll come around when I ponder
the alternatives.
Alex Williamson June 10, 2012, 2:19 p.m. UTC | #11
On Sun, 2012-06-10 at 12:49 +0200, Jan Kiszka wrote:
> On 2012-06-10 12:41, Michael S. Tsirkin wrote:
> > On Sun, Jun 10, 2012 at 12:08:23PM +0200, Jan Kiszka wrote:
> >> On 2012-06-10 11:55, Michael S. Tsirkin wrote:
> >>> On Thu, Jun 07, 2012 at 06:46:38PM +0200, Jan Kiszka wrote:
> >>>> On 2012-06-07 18:28, Michael S. Tsirkin wrote:
> >>>>> On Thu, Jun 07, 2012 at 05:10:17PM +0200, Jan Kiszka wrote:
> >>>>>> On 2012-06-07 16:32, Michael S. Tsirkin wrote:
> >>>>>>> On Mon, Jun 04, 2012 at 10:52:13AM +0200, Jan Kiszka wrote:
> >>>>>>>> @@ -1089,6 +1093,14 @@ static void pci_set_irq(void *opaque, int irq_num, int level)
> >>>>>>>>      pci_change_irq_level(pci_dev, irq_num, change);
> >>>>>>>>  }
> >>>>>>>>  
> >>>>>>>> +PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
> >>>>>>>> +{
> >>>>>>>> +    PCIBus *bus = dev->host_bus;
> >>>>>>>> +
> >>>>>>>> +    assert(bus->route_intx_to_irq);
> >>>>>>>> +    return bus->route_intx_to_irq(bus->irq_opaque, dev->host_intx_pin[pin]);
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>>  /***********************************************************/
> >>>>>>>>  /* monitor info on PCI */
> >>>>>>>>  
> >>>>>>>
> >>>>>>> Just an idea: can devices cache this result, bypassing the
> >>>>>>> intx to irq lookup on data path?
> >>>>>>
> >>>>>> That lookup is part of set_irq which we don't bypass so far and where
> >>>>>> this is generally trivial. If we want to cache the effects of set_irq as
> >>>>>> well, I guess things would become pretty complex (e.g. due to vmstate
> >>>>>> compatibility), and I'm unsure if it would buy us much.
> >>>>>
> >>>>> This is less for performance but more for making
> >>>>> everyone use the same infrastructure rather than
> >>>>> assigned devices being the weird case.
> >>>>
> >>>> Device assignment is weird. It bypasses all state updates as it does not
> >>>> have to bother about migratability.
> >>>>
> >>>> Well, of course we could cache the host bridge routing result as well,
> >>>> for every device. It would have to be in addition to host_intx_pin. But
> >>>> the result would look pretty strange to me.
> >>>>
> >>>> In any case, I would prefer to do this, if at all, on top of this
> >>>> series, specifically as it will require to touch all host bridges.
> >>>
> >>> I'd like to ponder this a bit more then.
> >>>
> >>> If the claim is that device assignment is only needed for
> >>> piix anyway, then why not make it depend on piix *explicitly*?
> >>> Yes ugly but this will make it very easy to find and
> >>> address any missing pieces.
> >>
> >> Because it is conceptually independent of the PIIX, we will need it for
> >> successors of that x86 chipset as well, and I won't add the ugly hack of
> >> qemu-kvm upstream
> > 
> > So you look at an API and see it requires a route
> > callback. And you ask "why doesn't chipset X implement it"?
> > And the answer is "because it's only used by device assignment".
> > Which you will only know if you read this thread. So it's
> > a hack. And I'd rather have the hacks in device-assignment.c
> > than in pci.c even if the former are nastier.
> 
> I don't share your view on this. It is surely _not_ a hack, specifically
> when compared to what we have so far and what could be done otherwise,
> e.g. hacking device-assignment and PIIX to make them cooperate (sorry, I
> would vote against such an attempt). This is just a partially used
> generic API. Any chipset not providing the required routing function
> will cause an assert once some tries to make use of it.

I agree, and frankly it sounds like a rather biased attitude against
device assignment.  piix and q35 may be the only initial chipsets to
implement this, but VFIO is designed to be architecture neutral.  It's
already working on POWER.  Interrupt routing is one of the most
intrusive parts because we cannot know from inside the device assignment
code how the chipset exposes intx out to an irq.  Jan's interface makes
it easy for a chipset to add this, versus hacks in device assignment
code, even if they were possible.  If it needs to be tweaked for other
chipsets, we'll do that when they come.  Please don't roadblock device
assignment or VFIO support by not allowing us a well architected,
generic interface to track interrupts.  Thanks,

Alex
Michael S. Tsirkin June 10, 2012, 2:43 p.m. UTC | #12
On Sun, Jun 10, 2012 at 08:19:28AM -0600, Alex Williamson wrote:
> On Sun, 2012-06-10 at 12:49 +0200, Jan Kiszka wrote:
> > On 2012-06-10 12:41, Michael S. Tsirkin wrote:
> > > On Sun, Jun 10, 2012 at 12:08:23PM +0200, Jan Kiszka wrote:
> > >> On 2012-06-10 11:55, Michael S. Tsirkin wrote:
> > >>> On Thu, Jun 07, 2012 at 06:46:38PM +0200, Jan Kiszka wrote:
> > >>>> On 2012-06-07 18:28, Michael S. Tsirkin wrote:
> > >>>>> On Thu, Jun 07, 2012 at 05:10:17PM +0200, Jan Kiszka wrote:
> > >>>>>> On 2012-06-07 16:32, Michael S. Tsirkin wrote:
> > >>>>>>> On Mon, Jun 04, 2012 at 10:52:13AM +0200, Jan Kiszka wrote:
> > >>>>>>>> @@ -1089,6 +1093,14 @@ static void pci_set_irq(void *opaque, int irq_num, int level)
> > >>>>>>>>      pci_change_irq_level(pci_dev, irq_num, change);
> > >>>>>>>>  }
> > >>>>>>>>  
> > >>>>>>>> +PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
> > >>>>>>>> +{
> > >>>>>>>> +    PCIBus *bus = dev->host_bus;
> > >>>>>>>> +
> > >>>>>>>> +    assert(bus->route_intx_to_irq);
> > >>>>>>>> +    return bus->route_intx_to_irq(bus->irq_opaque, dev->host_intx_pin[pin]);
> > >>>>>>>> +}
> > >>>>>>>> +
> > >>>>>>>>  /***********************************************************/
> > >>>>>>>>  /* monitor info on PCI */
> > >>>>>>>>  
> > >>>>>>>
> > >>>>>>> Just an idea: can devices cache this result, bypassing the
> > >>>>>>> intx to irq lookup on data path?
> > >>>>>>
> > >>>>>> That lookup is part of set_irq which we don't bypass so far and where
> > >>>>>> this is generally trivial. If we want to cache the effects of set_irq as
> > >>>>>> well, I guess things would become pretty complex (e.g. due to vmstate
> > >>>>>> compatibility), and I'm unsure if it would buy us much.
> > >>>>>
> > >>>>> This is less for performance but more for making
> > >>>>> everyone use the same infrastructure rather than
> > >>>>> assigned devices being the weird case.
> > >>>>
> > >>>> Device assignment is weird. It bypasses all state updates as it does not
> > >>>> have to bother about migratability.
> > >>>>
> > >>>> Well, of course we could cache the host bridge routing result as well,
> > >>>> for every device. It would have to be in addition to host_intx_pin. But
> > >>>> the result would look pretty strange to me.
> > >>>>
> > >>>> In any case, I would prefer to do this, if at all, on top of this
> > >>>> series, specifically as it will require to touch all host bridges.
> > >>>
> > >>> I'd like to ponder this a bit more then.
> > >>>
> > >>> If the claim is that device assignment is only needed for
> > >>> piix anyway, then why not make it depend on piix *explicitly*?
> > >>> Yes ugly but this will make it very easy to find and
> > >>> address any missing pieces.
> > >>
> > >> Because it is conceptually independent of the PIIX, we will need it for
> > >> successors of that x86 chipset as well, and I won't add the ugly hack of
> > >> qemu-kvm upstream
> > > 
> > > So you look at an API and see it requires a route
> > > callback. And you ask "why doesn't chipset X implement it"?
> > > And the answer is "because it's only used by device assignment".
> > > Which you will only know if you read this thread. So it's
> > > a hack. And I'd rather have the hacks in device-assignment.c
> > > than in pci.c even if the former are nastier.
> > 
> > I don't share your view on this. It is surely _not_ a hack, specifically
> > when compared to what we have so far and what could be done otherwise,
> > e.g. hacking device-assignment and PIIX to make them cooperate (sorry, I
> > would vote against such an attempt). This is just a partially used
> > generic API. Any chipset not providing the required routing function
> > will cause an assert once some tries to make use of it.
> 
> I agree, and frankly it sounds like a rather biased attitude against
> device assignment.
> piix and q35 may be the only initial chipsets to
> implement this, but VFIO is designed to be architecture neutral.  It's
> already working on POWER.

Not with this patch it doesn't.

>  Interrupt routing is one of the most
> intrusive parts because we cannot know from inside the device assignment
> code how the chipset exposes intx out to an irq.  Jan's interface makes
> it easy for a chipset to add this, versus hacks in device assignment
> code, even if they were possible.  If it needs to be tweaked for other
> chipsets, we'll do that when they come.  Please don't roadblock device
> assignment or VFIO support by not allowing us a well architected,
> generic interface to track interrupts.  Thanks,
> 
> Alex

Problems are:
1. This sticks NULL in all chipsets except one. This means it's
   hard to find and fill out.
2. Adds a function, in every chipset, that is only used by assignment.
   This means many maintainers have no way to test it.

Ways to handle this that came up so far, in order of my preference:
1. Cache all of route in devices.
2. Some ugly hack in device assignment.
3. Merge as is and fix it when it gets broken.

So if you expect me to merge this work, then either Jan does (1), or
gives up and does (2), or I find some time and do (1), or I fail to do
(1) and get convinced that we need to do (3). Or someone else gets
involved.
Alex Williamson June 10, 2012, 3:25 p.m. UTC | #13
On Sun, 2012-06-10 at 17:43 +0300, Michael S. Tsirkin wrote:
> On Sun, Jun 10, 2012 at 08:19:28AM -0600, Alex Williamson wrote:
> > On Sun, 2012-06-10 at 12:49 +0200, Jan Kiszka wrote:
> > > On 2012-06-10 12:41, Michael S. Tsirkin wrote:
> > > > On Sun, Jun 10, 2012 at 12:08:23PM +0200, Jan Kiszka wrote:
> > > >> On 2012-06-10 11:55, Michael S. Tsirkin wrote:
> > > >>> On Thu, Jun 07, 2012 at 06:46:38PM +0200, Jan Kiszka wrote:
> > > >>>> On 2012-06-07 18:28, Michael S. Tsirkin wrote:
> > > >>>>> On Thu, Jun 07, 2012 at 05:10:17PM +0200, Jan Kiszka wrote:
> > > >>>>>> On 2012-06-07 16:32, Michael S. Tsirkin wrote:
> > > >>>>>>> On Mon, Jun 04, 2012 at 10:52:13AM +0200, Jan Kiszka wrote:
> > > >>>>>>>> @@ -1089,6 +1093,14 @@ static void pci_set_irq(void *opaque, int irq_num, int level)
> > > >>>>>>>>      pci_change_irq_level(pci_dev, irq_num, change);
> > > >>>>>>>>  }
> > > >>>>>>>>  
> > > >>>>>>>> +PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
> > > >>>>>>>> +{
> > > >>>>>>>> +    PCIBus *bus = dev->host_bus;
> > > >>>>>>>> +
> > > >>>>>>>> +    assert(bus->route_intx_to_irq);
> > > >>>>>>>> +    return bus->route_intx_to_irq(bus->irq_opaque, dev->host_intx_pin[pin]);
> > > >>>>>>>> +}
> > > >>>>>>>> +
> > > >>>>>>>>  /***********************************************************/
> > > >>>>>>>>  /* monitor info on PCI */
> > > >>>>>>>>  
> > > >>>>>>>
> > > >>>>>>> Just an idea: can devices cache this result, bypassing the
> > > >>>>>>> intx to irq lookup on data path?
> > > >>>>>>
> > > >>>>>> That lookup is part of set_irq which we don't bypass so far and where
> > > >>>>>> this is generally trivial. If we want to cache the effects of set_irq as
> > > >>>>>> well, I guess things would become pretty complex (e.g. due to vmstate
> > > >>>>>> compatibility), and I'm unsure if it would buy us much.
> > > >>>>>
> > > >>>>> This is less for performance but more for making
> > > >>>>> everyone use the same infrastructure rather than
> > > >>>>> assigned devices being the weird case.
> > > >>>>
> > > >>>> Device assignment is weird. It bypasses all state updates as it does not
> > > >>>> have to bother about migratability.
> > > >>>>
> > > >>>> Well, of course we could cache the host bridge routing result as well,
> > > >>>> for every device. It would have to be in addition to host_intx_pin. But
> > > >>>> the result would look pretty strange to me.
> > > >>>>
> > > >>>> In any case, I would prefer to do this, if at all, on top of this
> > > >>>> series, specifically as it will require to touch all host bridges.
> > > >>>
> > > >>> I'd like to ponder this a bit more then.
> > > >>>
> > > >>> If the claim is that device assignment is only needed for
> > > >>> piix anyway, then why not make it depend on piix *explicitly*?
> > > >>> Yes ugly but this will make it very easy to find and
> > > >>> address any missing pieces.
> > > >>
> > > >> Because it is conceptually independent of the PIIX, we will need it for
> > > >> successors of that x86 chipset as well, and I won't add the ugly hack of
> > > >> qemu-kvm upstream
> > > > 
> > > > So you look at an API and see it requires a route
> > > > callback. And you ask "why doesn't chipset X implement it"?
> > > > And the answer is "because it's only used by device assignment".
> > > > Which you will only know if you read this thread. So it's
> > > > a hack. And I'd rather have the hacks in device-assignment.c
> > > > than in pci.c even if the former are nastier.
> > > 
> > > I don't share your view on this. It is surely _not_ a hack, specifically
> > > when compared to what we have so far and what could be done otherwise,
> > > e.g. hacking device-assignment and PIIX to make them cooperate (sorry, I
> > > would vote against such an attempt). This is just a partially used
> > > generic API. Any chipset not providing the required routing function
> > > will cause an assert once some tries to make use of it.
> > 
> > I agree, and frankly it sounds like a rather biased attitude against
> > device assignment.
> > piix and q35 may be the only initial chipsets to
> > implement this, but VFIO is designed to be architecture neutral.  It's
> > already working on POWER.
> 
> Not with this patch it doesn't.

Alexey, how are you handling INTx routing and EOI on POWER?

> >  Interrupt routing is one of the most
> > intrusive parts because we cannot know from inside the device assignment
> > code how the chipset exposes intx out to an irq.  Jan's interface makes
> > it easy for a chipset to add this, versus hacks in device assignment
> > code, even if they were possible.  If it needs to be tweaked for other
> > chipsets, we'll do that when they come.  Please don't roadblock device
> > assignment or VFIO support by not allowing us a well architected,
> > generic interface to track interrupts.  Thanks,
> > 
> > Alex
> 
> Problems are:
> 1. This sticks NULL in all chipsets except one. This means it's
>    hard to find and fill out.

I don't buy that.  Look for all the callers, find the one that's not
NULL, look at what it does...  How's that a problem?  This can be solved
by documentation.

> 2. Adds a function, in every chipset, that is only used by assignment.
>    This means many maintainers have no way to test it.

It's effectively optional, so they don't have to implement it.  They can
wait to care about it until they want device assignment.  This can also
be solved by documentation so the maintainer knows when and how this is
used and can choose to implement it or not.

> Ways to handle this that came up so far, in order of my preference:
> 1. Cache all of route in devices.

How do we get the route to cache it?

> 2. Some ugly hack in device assignment.

We still have the problem of how do we get the route to start with?

> 3. Merge as is and fix it when it gets broken.

Isn't that how open source works? ;^)

> So if you expect me to merge this work, then either Jan does (1), or
> gives up and does (2), or I find some time and do (1), or I fail to do
> (1) and get convinced that we need to do (3). Or someone else gets
> involved.

I still have trouble seeing how (3) is a problem.  The translation of an
INTx to an irq is chipset specific.  We need a chipset function to
perform that transform.  We don't know how to do this for every chipset
in the tree, nor do many of those chipset care at the moment about
device assignment.  Jan has created an arrangement where chipsets can
easily opt-in to this support.  Aside from asking us to go spend a month
digging up specs for every chipset in tree and trying to implement this
ourselves, what kind of enhancement to the interface are you looking
for?  Thanks,

Alex
Michael S. Tsirkin June 10, 2012, 3:55 p.m. UTC | #14
On Sun, Jun 10, 2012 at 09:25:06AM -0600, Alex Williamson wrote:
> On Sun, 2012-06-10 at 17:43 +0300, Michael S. Tsirkin wrote:
> > On Sun, Jun 10, 2012 at 08:19:28AM -0600, Alex Williamson wrote:
> > > On Sun, 2012-06-10 at 12:49 +0200, Jan Kiszka wrote:
> > > > On 2012-06-10 12:41, Michael S. Tsirkin wrote:
> > > > > On Sun, Jun 10, 2012 at 12:08:23PM +0200, Jan Kiszka wrote:
> > > > >> On 2012-06-10 11:55, Michael S. Tsirkin wrote:
> > > > >>> On Thu, Jun 07, 2012 at 06:46:38PM +0200, Jan Kiszka wrote:
> > > > >>>> On 2012-06-07 18:28, Michael S. Tsirkin wrote:
> > > > >>>>> On Thu, Jun 07, 2012 at 05:10:17PM +0200, Jan Kiszka wrote:
> > > > >>>>>> On 2012-06-07 16:32, Michael S. Tsirkin wrote:
> > > > >>>>>>> On Mon, Jun 04, 2012 at 10:52:13AM +0200, Jan Kiszka wrote:
> > > > >>>>>>>> @@ -1089,6 +1093,14 @@ static void pci_set_irq(void *opaque, int irq_num, int level)
> > > > >>>>>>>>      pci_change_irq_level(pci_dev, irq_num, change);
> > > > >>>>>>>>  }
> > > > >>>>>>>>  
> > > > >>>>>>>> +PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
> > > > >>>>>>>> +{
> > > > >>>>>>>> +    PCIBus *bus = dev->host_bus;
> > > > >>>>>>>> +
> > > > >>>>>>>> +    assert(bus->route_intx_to_irq);
> > > > >>>>>>>> +    return bus->route_intx_to_irq(bus->irq_opaque, dev->host_intx_pin[pin]);
> > > > >>>>>>>> +}
> > > > >>>>>>>> +
> > > > >>>>>>>>  /***********************************************************/
> > > > >>>>>>>>  /* monitor info on PCI */
> > > > >>>>>>>>  
> > > > >>>>>>>
> > > > >>>>>>> Just an idea: can devices cache this result, bypassing the
> > > > >>>>>>> intx to irq lookup on data path?
> > > > >>>>>>
> > > > >>>>>> That lookup is part of set_irq which we don't bypass so far and where
> > > > >>>>>> this is generally trivial. If we want to cache the effects of set_irq as
> > > > >>>>>> well, I guess things would become pretty complex (e.g. due to vmstate
> > > > >>>>>> compatibility), and I'm unsure if it would buy us much.
> > > > >>>>>
> > > > >>>>> This is less for performance but more for making
> > > > >>>>> everyone use the same infrastructure rather than
> > > > >>>>> assigned devices being the weird case.
> > > > >>>>
> > > > >>>> Device assignment is weird. It bypasses all state updates as it does not
> > > > >>>> have to bother about migratability.
> > > > >>>>
> > > > >>>> Well, of course we could cache the host bridge routing result as well,
> > > > >>>> for every device. It would have to be in addition to host_intx_pin. But
> > > > >>>> the result would look pretty strange to me.
> > > > >>>>
> > > > >>>> In any case, I would prefer to do this, if at all, on top of this
> > > > >>>> series, specifically as it will require to touch all host bridges.
> > > > >>>
> > > > >>> I'd like to ponder this a bit more then.
> > > > >>>
> > > > >>> If the claim is that device assignment is only needed for
> > > > >>> piix anyway, then why not make it depend on piix *explicitly*?
> > > > >>> Yes ugly but this will make it very easy to find and
> > > > >>> address any missing pieces.
> > > > >>
> > > > >> Because it is conceptually independent of the PIIX, we will need it for
> > > > >> successors of that x86 chipset as well, and I won't add the ugly hack of
> > > > >> qemu-kvm upstream
> > > > > 
> > > > > So you look at an API and see it requires a route
> > > > > callback. And you ask "why doesn't chipset X implement it"?
> > > > > And the answer is "because it's only used by device assignment".
> > > > > Which you will only know if you read this thread. So it's
> > > > > a hack. And I'd rather have the hacks in device-assignment.c
> > > > > than in pci.c even if the former are nastier.
> > > > 
> > > > I don't share your view on this. It is surely _not_ a hack, specifically
> > > > when compared to what we have so far and what could be done otherwise,
> > > > e.g. hacking device-assignment and PIIX to make them cooperate (sorry, I
> > > > would vote against such an attempt). This is just a partially used
> > > > generic API. Any chipset not providing the required routing function
> > > > will cause an assert once some tries to make use of it.
> > > 
> > > I agree, and frankly it sounds like a rather biased attitude against
> > > device assignment.
> > > piix and q35 may be the only initial chipsets to
> > > implement this, but VFIO is designed to be architecture neutral.  It's
> > > already working on POWER.
> > 
> > Not with this patch it doesn't.
> 
> Alexey, how are you handling INTx routing and EOI on POWER?
> 
> > >  Interrupt routing is one of the most
> > > intrusive parts because we cannot know from inside the device assignment
> > > code how the chipset exposes intx out to an irq.  Jan's interface makes
> > > it easy for a chipset to add this, versus hacks in device assignment
> > > code, even if they were possible.  If it needs to be tweaked for other
> > > chipsets, we'll do that when they come.  Please don't roadblock device
> > > assignment or VFIO support by not allowing us a well architected,
> > > generic interface to track interrupts.  Thanks,
> > > 
> > > Alex
> > 
> > Problems are:
> > 1. This sticks NULL in all chipsets except one. This means it's
> >    hard to find and fill out.
> 
> I don't buy that.  Look for all the callers, find the one that's not
> NULL, look at what it does...  How's that a problem?  This can be solved
> by documentation.

Yes but it's not there.

> > 2. Adds a function, in every chipset, that is only used by assignment.
> >    This means many maintainers have no way to test it.
> 
> It's effectively optional, so they don't have to implement it.  They can
> wait to care about it until they want device assignment.  This can also
> be solved by documentation so the maintainer knows when and how this is
> used and can choose to implement it or not.

No, what I meant is it's hard to test on systems where it's implemented.

> > Ways to handle this that came up so far, in order of my preference:
> > 1. Cache all of route in devices.
> 
> How do we get the route to cache it?

Move routing logic from piix3_set_irq_level
to a route_intx callback, or equivalent.
Then you just look it up in device->root_route.

> > 2. Some ugly hack in device assignment.
> 
> We still have the problem of how do we get the route to start with?

You are asking how an ugly hack might work?
Think it up yourself, I'm not gonnu mention hacks at
this point yet.

> > 3. Merge as is and fix it when it gets broken.
> 
> Isn't that how open source works? ;^)

It works by darwinism. This is exactly what's going on here.

> > So if you expect me to merge this work, then either Jan does (1), or
> > gives up and does (2), or I find some time and do (1), or I fail to do
> > (1) and get convinced that we need to do (3). Or someone else gets
> > involved.
> 
> I still have trouble seeing how (3) is a problem.  The translation of an
> INTx to an irq is chipset specific.  We need a chipset function to
> perform that transform.  We don't know how to do this for every chipset
> in the tree, nor do many of those chipset care at the moment about
> device assignment.  Jan has created an arrangement where chipsets can
> easily opt-in to this support.  Aside from asking us to go spend a month
> digging up specs for every chipset in tree and trying to implement this
> ourselves, what kind of enhancement to the interface are you looking
> for?  Thanks,
> 
> Alex

Sorry I don't understand what you are talking about.
It's better to have one way to determine interrupt routing than two.
It can be done, and IIUC Jan doesn't disagree.
There are gnarly issues related to migration compatibility
with old qemu sorrounding the solution which makes Jan think it's too complex,
but nothing remotely related to digging up any specs.
Jan Kiszka June 10, 2012, 4:30 p.m. UTC | #15
On 2012-06-10 17:55, Michael S. Tsirkin wrote:
>>> So if you expect me to merge this work, then either Jan does (1), or
>>> gives up and does (2), or I find some time and do (1), or I fail to do
>>> (1) and get convinced that we need to do (3). Or someone else gets
>>> involved.
>>
>> I still have trouble seeing how (3) is a problem.  The translation of an
>> INTx to an irq is chipset specific.  We need a chipset function to
>> perform that transform.  We don't know how to do this for every chipset
>> in the tree, nor do many of those chipset care at the moment about
>> device assignment.  Jan has created an arrangement where chipsets can
>> easily opt-in to this support.  Aside from asking us to go spend a month
>> digging up specs for every chipset in tree and trying to implement this
>> ourselves, what kind of enhancement to the interface are you looking
>> for?  Thanks,
>>
>> Alex
> 
> Sorry I don't understand what you are talking about.
> It's better to have one way to determine interrupt routing than two.
> It can be done, and IIUC Jan doesn't disagree.
> There are gnarly issues related to migration compatibility
> with old qemu sorrounding the solution which makes Jan think it's too complex,
> but nothing remotely related to digging up any specs.

Caching the host bridge generically means changing all chipsets and,
well, also testing that they still work afterward. As explained before,
I'd really like to avoid doing this in a single step.

And there are still migration issues - as explained based on the PIIX3.
Until it is officially stated that we give up on backward migratability,
it will be quite some effort (ie. lots of additional code) to provide
compatibility on top of generic intx-to-irq routing.

This interface here is surely not the perfect solution. But it is better
than a) hacking device assignment and VFIO into a specific chipset and
b) starting a huge refactoring now. We can surely tweak some aspects of
this approach before merging, e.g. documentation. But lets try to move
forward in small, well testable steps. I really don't see the need for
touching all chipsets and migration formats for this purpose ATM.

Jan
Michael S. Tsirkin June 10, 2012, 4:50 p.m. UTC | #16
On Sun, Jun 10, 2012 at 06:30:59PM +0200, Jan Kiszka wrote:
> Caching the host bridge generically means changing all chipsets and,
> well, also testing that they still work afterward. As explained before,
> I'd really like to avoid doing this in a single step.

Surely it is not hard to find a way to switch chipsets gradually.
Michael S. Tsirkin June 10, 2012, 5:04 p.m. UTC | #17
On Sun, Jun 10, 2012 at 07:50:07PM +0300, Michael S. Tsirkin wrote:
> On Sun, Jun 10, 2012 at 06:30:59PM +0200, Jan Kiszka wrote:
> > Caching the host bridge generically means changing all chipsets and,
> > well, also testing that they still work afterward. As explained before,
> > I'd really like to avoid doing this in a single step.
> 
> Surely it is not hard to find a way to switch chipsets gradually.

Just to stress: the problem is that with this patch there
are 2 ways to get routing even for piix.

So instead of your patch which caches just intx route
but always, and which I applied, you could have one that only caches
if the root supplies a new route callback.

Then to set irq, you check cache and if not valid,
fallback on the legacy interface.

With time we convert everyone and drop the legacy interface.

> -- 
> MST
diff mbox

Patch

diff --git a/hw/alpha_typhoon.c b/hw/alpha_typhoon.c
index d5193bb..1056b50 100644
--- a/hw/alpha_typhoon.c
+++ b/hw/alpha_typhoon.c
@@ -764,7 +764,7 @@  PCIBus *typhoon_init(ram_addr_t ram_size, ISABus **isa_bus,
                                 &s->pchip.reg_io);
 
     b = pci_register_bus(&s->host.busdev.qdev, "pci",
-                         typhoon_set_irq, sys_map_irq, s,
+                         typhoon_set_irq, sys_map_irq, NULL, s,
                          &s->pchip.reg_mem, addr_space_io, 0, 64);
     s->host.bus = b;
 
diff --git a/hw/apb_pci.c b/hw/apb_pci.c
index 57ead09..270a785 100644
--- a/hw/apb_pci.c
+++ b/hw/apb_pci.c
@@ -378,7 +378,7 @@  PCIBus *pci_apb_init(target_phys_addr_t special_base,
     memory_region_add_subregion(get_system_memory(), mem_base, &d->pci_mmio);
 
     d->bus = pci_register_bus(&d->busdev.qdev, "pci",
-                              pci_apb_set_irq, pci_pbm_map_irq, d,
+                              pci_apb_set_irq, pci_pbm_map_irq, NULL, d,
                               &d->pci_mmio,
                               get_system_io(),
                               0, 32);
diff --git a/hw/bonito.c b/hw/bonito.c
index 77786f8..7ce5993 100644
--- a/hw/bonito.c
+++ b/hw/bonito.c
@@ -750,7 +750,7 @@  PCIBus *bonito_init(qemu_irq *pic)
     dev = qdev_create(NULL, "Bonito-pcihost");
     pcihost = FROM_SYSBUS(BonitoState, sysbus_from_qdev(dev));
     b = pci_register_bus(&pcihost->busdev.qdev, "pci", pci_bonito_set_irq,
-                         pci_bonito_map_irq, pic, get_system_memory(),
+                         pci_bonito_map_irq, NULL, pic, get_system_memory(),
                          get_system_io(),
                          0x28, 32);
     pcihost->bus = b;
diff --git a/hw/grackle_pci.c b/hw/grackle_pci.c
index 81ff3a3..f47d9fe 100644
--- a/hw/grackle_pci.c
+++ b/hw/grackle_pci.c
@@ -85,6 +85,7 @@  PCIBus *pci_grackle_init(uint32_t base, qemu_irq *pic,
     d->host_state.bus = pci_register_bus(&d->busdev.qdev, "pci",
                                          pci_grackle_set_irq,
                                          pci_grackle_map_irq,
+                                         NULL,
                                          pic,
                                          &d->pci_mmio,
                                          address_space_io,
diff --git a/hw/gt64xxx.c b/hw/gt64xxx.c
index a2d0e5a..2418238 100644
--- a/hw/gt64xxx.c
+++ b/hw/gt64xxx.c
@@ -1093,6 +1093,7 @@  PCIBus *gt64120_register(qemu_irq *pic)
     d = FROM_SYSBUS(GT64120State, s);
     d->pci.bus = pci_register_bus(&d->busdev.qdev, "pci",
                                   gt64120_pci_set_irq, gt64120_pci_map_irq,
+                                  NULL,
                                   pic,
                                   get_system_memory(),
                                   get_system_io(),
diff --git a/hw/pci.c b/hw/pci.c
index 9a2b4a3..8878a11 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -320,10 +320,12 @@  PCIBus *pci_bus_new(DeviceState *parent, const char *name,
 
 void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq,
                   pci_route_pin_fn route_intx_pin,
+                  pci_route_irq_fn route_intx_to_irq,
                   void *irq_opaque, int nirq)
 {
     bus->set_irq = set_irq;
     bus->route_intx_pin = route_intx_pin;
+    bus->route_intx_to_irq = route_intx_to_irq;
     bus->irq_opaque = irq_opaque;
     bus->nirq = nirq;
     bus->irq_count = g_malloc0(nirq * sizeof(bus->irq_count[0]));
@@ -340,6 +342,7 @@  void pci_bus_hotplug(PCIBus *bus, pci_hotplug_fn hotplug, DeviceState *qdev)
 PCIBus *pci_register_bus(DeviceState *parent, const char *name,
                          pci_set_irq_fn set_irq,
                          pci_route_pin_fn route_intx_pin,
+                         pci_route_irq_fn route_intx_to_irq,
                          void *irq_opaque,
                          MemoryRegion *address_space_mem,
                          MemoryRegion *address_space_io,
@@ -349,7 +352,8 @@  PCIBus *pci_register_bus(DeviceState *parent, const char *name,
 
     bus = pci_bus_new(parent, name, address_space_mem,
                       address_space_io, devfn_min);
-    pci_bus_irqs(bus, set_irq, route_intx_pin, irq_opaque, nirq);
+    pci_bus_irqs(bus, set_irq, route_intx_pin, route_intx_to_irq, irq_opaque,
+                 nirq);
     return bus;
 }
 
@@ -1089,6 +1093,14 @@  static void pci_set_irq(void *opaque, int irq_num, int level)
     pci_change_irq_level(pci_dev, irq_num, change);
 }
 
+PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
+{
+    PCIBus *bus = dev->host_bus;
+
+    assert(bus->route_intx_to_irq);
+    return bus->route_intx_to_irq(bus->irq_opaque, dev->host_intx_pin[pin]);
+}
+
 /***********************************************************/
 /* monitor info on PCI */
 
diff --git a/hw/pci.h b/hw/pci.h
index 5b54e2d..bbba01e 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -141,6 +141,15 @@  enum {
 #define PCI_DEVICE_GET_CLASS(obj) \
      OBJECT_GET_CLASS(PCIDeviceClass, (obj), TYPE_PCI_DEVICE)
 
+typedef struct PCIINTxRoute {
+    enum {
+        PCI_INTX_ENABLED,
+        PCI_INTX_INVERTED,
+        PCI_INTX_DISABLED,
+    } mode;
+    int irq;
+} PCIINTxRoute;
+
 typedef struct PCIDeviceClass {
     DeviceClass parent_class;
 
@@ -280,6 +289,7 @@  MemoryRegion *pci_address_space_io(PCIDevice *dev);
 
 typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level);
 typedef int (*pci_route_pin_fn)(PCIDevice *pci_dev, int pin);
+typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin);
 
 typedef enum {
     PCI_HOTPLUG_DISABLED,
@@ -295,16 +305,19 @@  PCIBus *pci_bus_new(DeviceState *parent, const char *name,
                     uint8_t devfn_min);
 void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq,
                   pci_route_pin_fn route_intx_pin,
+                  pci_route_irq_fn route_intx_to_irq,
                   void *irq_opaque, int nirq);
 int pci_bus_get_irq_level(PCIBus *bus, int irq_num);
 void pci_bus_hotplug(PCIBus *bus, pci_hotplug_fn hotplug, DeviceState *dev);
 PCIBus *pci_register_bus(DeviceState *parent, const char *name,
                          pci_set_irq_fn set_irq,
                          pci_route_pin_fn route_intx_pin,
+                         pci_route_irq_fn route_intx_to_irq,
                          void *irq_opaque,
                          MemoryRegion *address_space_mem,
                          MemoryRegion *address_space_io,
                          uint8_t devfn_min, int nirq);
+PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin);
 void pci_device_reset(PCIDevice *dev);
 void pci_bus_reset(PCIBus *bus);
 
diff --git a/hw/pci_internals.h b/hw/pci_internals.h
index be5594b..6639232 100644
--- a/hw/pci_internals.h
+++ b/hw/pci_internals.h
@@ -19,6 +19,7 @@  struct PCIBus {
     uint8_t devfn_min;
     pci_set_irq_fn set_irq;
     pci_route_pin_fn route_intx_pin;
+    pci_route_irq_fn route_intx_to_irq;
     pci_hotplug_fn hotplug;
     DeviceState *hotplug_qdev;
     void *irq_opaque;
diff --git a/hw/piix_pci.c b/hw/piix_pci.c
index 09e84f5..347177f 100644
--- a/hw/piix_pci.c
+++ b/hw/piix_pci.c
@@ -89,6 +89,7 @@  struct PCII440FXState {
 #define I440FX_SMRAM    0x72
 
 static void piix3_set_irq(void *opaque, int pirq, int level);
+static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int pci_intx);
 static void piix3_write_config_xen(PCIDevice *dev,
                                uint32_t address, uint32_t val, int len);
 
@@ -308,13 +309,13 @@  static PCIBus *i440fx_common_init(const char *device_name,
     if (xen_enabled()) {
         piix3 = DO_UPCAST(PIIX3State, dev,
                 pci_create_simple_multifunction(b, -1, true, "PIIX3-xen"));
-        pci_bus_irqs(b, xen_piix3_set_irq, xen_pci_slot_get_pirq,
+        pci_bus_irqs(b, xen_piix3_set_irq, xen_pci_slot_get_pirq, NULL,
                 piix3, XEN_PIIX_NUM_PIRQS);
     } else {
         piix3 = DO_UPCAST(PIIX3State, dev,
                 pci_create_simple_multifunction(b, -1, true, "PIIX3"));
-        pci_bus_irqs(b, piix3_set_irq, pci_slot_get_pirq, piix3,
-                PIIX_NUM_PIRQS);
+        pci_bus_irqs(b, piix3_set_irq, pci_slot_get_pirq,
+                     piix3_route_intx_pin_to_irq, piix3, PIIX_NUM_PIRQS);
     }
     piix3->pic = pic;
     *isa_bus = DO_UPCAST(ISABus, qbus,
@@ -386,6 +387,22 @@  static void piix3_set_irq(void *opaque, int pirq, int level)
     piix3_set_irq_level(piix3, pirq, level);
 }
 
+static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int pin)
+{
+    PIIX3State *piix3 = opaque;
+    int irq = piix3->dev.config[PIIX_PIRQC + pin];
+    PCIINTxRoute route;
+
+    if (irq < PIIX_NUM_PIC_IRQS) {
+        route.mode = PCI_INTX_ENABLED;
+        route.irq = irq;
+    } else {
+        route.mode = PCI_INTX_DISABLED;
+        route.irq = -1;
+    }
+    return route;
+}
+
 /* irq routing is changed. so rebuild bitmap */
 static void piix3_update_irq_levels(PIIX3State *piix3)
 {
diff --git a/hw/ppc4xx_pci.c b/hw/ppc4xx_pci.c
index 203c3cd..224c4a0 100644
--- a/hw/ppc4xx_pci.c
+++ b/hw/ppc4xx_pci.c
@@ -343,7 +343,7 @@  static int ppc4xx_pcihost_initfn(SysBusDevice *dev)
     }
 
     b = pci_register_bus(&s->pci_state.busdev.qdev, NULL, ppc4xx_pci_set_irq,
-                         ppc4xx_pci_map_irq, s->irq, get_system_memory(),
+                         ppc4xx_pci_map_irq, NULL, s->irq, get_system_memory(),
                          get_system_io(), 0, 4);
     s->pci_state.bus = b;
 
diff --git a/hw/ppce500_pci.c b/hw/ppce500_pci.c
index 0f60b24..dd924ae 100644
--- a/hw/ppce500_pci.c
+++ b/hw/ppce500_pci.c
@@ -318,7 +318,7 @@  static int e500_pcihost_initfn(SysBusDevice *dev)
     }
 
     b = pci_register_bus(&s->pci_state.busdev.qdev, NULL, mpc85xx_pci_set_irq,
-                         mpc85xx_pci_map_irq, s->irq, address_space_mem,
+                         mpc85xx_pci_map_irq, NULL, s->irq, address_space_mem,
                          address_space_io, PCI_DEVFN(0x11, 0), 4);
     s->pci_state.bus = b;
 
diff --git a/hw/prep_pci.c b/hw/prep_pci.c
index 38dbff4..9d7bec7 100644
--- a/hw/prep_pci.c
+++ b/hw/prep_pci.c
@@ -108,7 +108,7 @@  static int raven_pcihost_init(SysBusDevice *dev)
     }
 
     bus = pci_register_bus(&h->busdev.qdev, NULL,
-                           prep_set_irq, prep_map_irq, s->irq,
+                           prep_set_irq, prep_map_irq, NULL, s->irq,
                            address_space_mem, address_space_io, 0, 4);
     h->bus = bus;
 
diff --git a/hw/sh_pci.c b/hw/sh_pci.c
index 0cfac46..1cea12b 100644
--- a/hw/sh_pci.c
+++ b/hw/sh_pci.c
@@ -120,7 +120,7 @@  static int sh_pci_device_init(SysBusDevice *dev)
         sysbus_init_irq(dev, &s->irq[i]);
     }
     s->bus = pci_register_bus(&s->busdev.qdev, "pci",
-                              sh_pci_set_irq, sh_pci_map_irq,
+                              sh_pci_set_irq, sh_pci_map_irq, NULL,
                               s->irq,
                               get_system_memory(),
                               get_system_io(),
diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
index 25b400a..4a43062 100644
--- a/hw/spapr_pci.c
+++ b/hw/spapr_pci.c
@@ -306,7 +306,7 @@  static int spapr_phb_init(SysBusDevice *s)
 
     bus = pci_register_bus(&phb->busdev.qdev,
                            phb->busname ? phb->busname : phb->dtbusname,
-                           pci_spapr_set_irq, pci_spapr_map_irq, phb,
+                           pci_spapr_set_irq, pci_spapr_map_irq, NULL, phb,
                            &phb->memspace, &phb->iospace,
                            PCI_DEVFN(0, 0), PCI_NUM_PINS);
     phb->host_state.bus = bus;
diff --git a/hw/unin_pci.c b/hw/unin_pci.c
index 409bcd4..056e3bc 100644
--- a/hw/unin_pci.c
+++ b/hw/unin_pci.c
@@ -227,7 +227,7 @@  PCIBus *pci_pmac_init(qemu_irq *pic,
 
     d->host_state.bus = pci_register_bus(dev, "pci",
                                          pci_unin_set_irq, pci_unin_map_irq,
-                                         pic,
+                                         NULL, pic,
                                          &d->pci_mmio,
                                          address_space_io,
                                          PCI_DEVFN(11, 0), 4);
@@ -293,7 +293,7 @@  PCIBus *pci_pmac_u3_init(qemu_irq *pic,
 
     d->host_state.bus = pci_register_bus(dev, "pci",
                                          pci_unin_set_irq, pci_unin_map_irq,
-                                         pic,
+                                         NULL, pic,
                                          &d->pci_mmio,
                                          address_space_io,
                                          PCI_DEVFN(11, 0), 4);
diff --git a/hw/versatile_pci.c b/hw/versatile_pci.c
index ae53a8b..90c400e 100644
--- a/hw/versatile_pci.c
+++ b/hw/versatile_pci.c
@@ -68,7 +68,7 @@  static int pci_vpb_init(SysBusDevice *dev)
         sysbus_init_irq(dev, &s->irq[i]);
     }
     bus = pci_register_bus(&dev->qdev, "pci",
-                           pci_vpb_set_irq, pci_vpb_map_irq, s->irq,
+                           pci_vpb_set_irq, pci_vpb_map_irq, NULL, s->irq,
                            get_system_memory(), get_system_io(),
                            PCI_DEVFN(11, 0), 4);