diff mbox

xen: fix interrupt routing

Message ID alpine.DEB.2.00.1105261645560.12963@kaball-desktop
State New
Headers show

Commit Message

Stefano Stabellini May 26, 2011, 3:48 p.m. UTC
xen: fix interrupt routing

- remove i440FX-xen and i440fx_write_config_xen
we don't need to intercept pci config writes to i440FX anymore;

- introduce PIIX3-xen and piix3_write_config_xen
we do need to intercept pci config write to the PCI-ISA bridge to update
the PCI link routing;

- set the number of PIIX3-xen interrupts lines to 128;

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>

Comments

Alexander Graf May 27, 2011, 11:17 p.m. UTC | #1
On 26.05.2011, at 17:48, Stefano Stabellini wrote:

> xen: fix interrupt routing
> 
> - remove i440FX-xen and i440fx_write_config_xen
> we don't need to intercept pci config writes to i440FX anymore;

Why not? In which version? Did anything below change? What about compat code? Older hypervisor versions?

> - introduce PIIX3-xen and piix3_write_config_xen
> we do need to intercept pci config write to the PCI-ISA bridge to update
> the PCI link routing;
> 
> - set the number of PIIX3-xen interrupts lines to 128;
> 
> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> 
> diff --git a/hw/pc.h b/hw/pc.h
> index 0dcbee7..6d5730b 100644
> --- a/hw/pc.h
> +++ b/hw/pc.h
> @@ -176,7 +176,6 @@ struct PCII440FXState;
> typedef struct PCII440FXState PCII440FXState;
> 
> PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn, qemu_irq *pic, ram_addr_t ram_size);
> -PCIBus *i440fx_xen_init(PCII440FXState **pi440fx_state, int *piix3_devfn, qemu_irq *pic, ram_addr_t ram_size);
> void i440fx_init_memory_mappings(PCII440FXState *d);
> 
> /* piix4.c */
> diff --git a/hw/pc_piix.c b/hw/pc_piix.c
> index 9a22a8a..ba198de 100644
> --- a/hw/pc_piix.c
> +++ b/hw/pc_piix.c
> @@ -124,11 +124,7 @@ static void pc_init1(ram_addr_t ram_size,
>     isa_irq = qemu_allocate_irqs(isa_irq_handler, isa_irq_state, 24);
> 
>     if (pci_enabled) {
> -        if (!xen_enabled()) {
> -            pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
> -        } else {
> -            pci_bus = i440fx_xen_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
> -        }
> +        pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
>     } else {
>         pci_bus = NULL;
>         i440fx_state = NULL;
> diff --git a/hw/piix_pci.c b/hw/piix_pci.c
> index 7f1c4cc..3d73a42 100644
> --- a/hw/piix_pci.c
> +++ b/hw/piix_pci.c
> @@ -40,6 +40,7 @@ typedef PCIHostState I440FXState;
> 
> #define PIIX_NUM_PIC_IRQS       16      /* i8259 * 2 */
> #define PIIX_NUM_PIRQS          4ULL    /* PIRQ[A-D] */
> +#define XEN_PIIX_NUM_PIRQS      128ULL
> #define PIIX_PIRQC              0x60
> 
> typedef struct PIIX3State {
> @@ -78,6 +79,8 @@ struct PCII440FXState {
> #define I440FX_SMRAM    0x72
> 
> static void piix3_set_irq(void *opaque, int pirq, int level);
> +static void piix3_write_config_xen(PCIDevice *dev,
> +                               uint32_t address, uint32_t val, int len);
> 
> /* return the global irq number corresponding to a given device irq
>    pin. We could also use the bus number to have a more precise
> @@ -173,13 +176,6 @@ static void i440fx_write_config(PCIDevice *dev,
>     }
> }
> 
> -static void i440fx_write_config_xen(PCIDevice *dev,
> -                                    uint32_t address, uint32_t val, int len)
> -{
> -    xen_piix_pci_write_config_client(address, val, len);
> -    i440fx_write_config(dev, address, val, len);
> -}
> -
> static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)
> {
>     PCII440FXState *d = opaque;
> @@ -267,8 +263,17 @@ static PCIBus *i440fx_common_init(const char *device_name,
>     d = pci_create_simple(b, 0, device_name);
>     *pi440fx_state = DO_UPCAST(PCII440FXState, dev, d);
> 
> -    piix3 = DO_UPCAST(PIIX3State, dev,
> -                      pci_create_simple_multifunction(b, -1, true, "PIIX3"));
> +    if (xen_enabled()) {
> +        piix3 = DO_UPCAST(PIIX3State, dev,
> +                pci_create_simple_multifunction(b, -1, true, "PIIX3-xen"));
> +        pci_bus_irqs(b, xen_piix3_set_irq, xen_pci_slot_get_pirq,
> +                piix3, XEN_PIIX_NUM_PIRQS);

But with XEN_PIIX_NUM_PIRQS it's not a piix3 anymore, no? What's the reason behind this change?


Alex
Stefano Stabellini May 31, 2011, 11:05 a.m. UTC | #2
On Sat, 28 May 2011, Alexander Graf wrote:
> 
> On 26.05.2011, at 17:48, Stefano Stabellini wrote:
> 
> > xen: fix interrupt routing
> > 
> > - remove i440FX-xen and i440fx_write_config_xen
> > we don't need to intercept pci config writes to i440FX anymore;
> 
> Why not? In which version? Did anything below change? What about compat code? Older hypervisor versions?

Nothing changed, I think it was a genuine mistake in the original patch
series: the intention was to catch the pci config writes to 0x60-0x63 to
reprogram the xen pci link routes accordingly (see
xen_piix_pci_write_config_client).  If I am not mistaken a similar thing
is done by non-Xen Qemu in piix3_update_irq_levels.


> > - introduce PIIX3-xen and piix3_write_config_xen
> > we do need to intercept pci config write to the PCI-ISA bridge to update
> > the PCI link routing;
> > 
> > - set the number of PIIX3-xen interrupts lines to 128;
> > 
> > Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> > 
> > diff --git a/hw/pc.h b/hw/pc.h
> > index 0dcbee7..6d5730b 100644
> > --- a/hw/pc.h
> > +++ b/hw/pc.h
> > @@ -176,7 +176,6 @@ struct PCII440FXState;
> > typedef struct PCII440FXState PCII440FXState;
> > 
> > PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn, qemu_irq *pic, ram_addr_t ram_size);
> > -PCIBus *i440fx_xen_init(PCII440FXState **pi440fx_state, int *piix3_devfn, qemu_irq *pic, ram_addr_t ram_size);
> > void i440fx_init_memory_mappings(PCII440FXState *d);
> > 
> > /* piix4.c */
> > diff --git a/hw/pc_piix.c b/hw/pc_piix.c
> > index 9a22a8a..ba198de 100644
> > --- a/hw/pc_piix.c
> > +++ b/hw/pc_piix.c
> > @@ -124,11 +124,7 @@ static void pc_init1(ram_addr_t ram_size,
> >     isa_irq = qemu_allocate_irqs(isa_irq_handler, isa_irq_state, 24);
> > 
> >     if (pci_enabled) {
> > -        if (!xen_enabled()) {
> > -            pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
> > -        } else {
> > -            pci_bus = i440fx_xen_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
> > -        }
> > +        pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
> >     } else {
> >         pci_bus = NULL;
> >         i440fx_state = NULL;
> > diff --git a/hw/piix_pci.c b/hw/piix_pci.c
> > index 7f1c4cc..3d73a42 100644
> > --- a/hw/piix_pci.c
> > +++ b/hw/piix_pci.c
> > @@ -40,6 +40,7 @@ typedef PCIHostState I440FXState;
> > 
> > #define PIIX_NUM_PIC_IRQS       16      /* i8259 * 2 */
> > #define PIIX_NUM_PIRQS          4ULL    /* PIRQ[A-D] */
> > +#define XEN_PIIX_NUM_PIRQS      128ULL
> > #define PIIX_PIRQC              0x60
> > 
> > typedef struct PIIX3State {
> > @@ -78,6 +79,8 @@ struct PCII440FXState {
> > #define I440FX_SMRAM    0x72
> > 
> > static void piix3_set_irq(void *opaque, int pirq, int level);
> > +static void piix3_write_config_xen(PCIDevice *dev,
> > +                               uint32_t address, uint32_t val, int len);
> > 
> > /* return the global irq number corresponding to a given device irq
> >    pin. We could also use the bus number to have a more precise
> > @@ -173,13 +176,6 @@ static void i440fx_write_config(PCIDevice *dev,
> >     }
> > }
> > 
> > -static void i440fx_write_config_xen(PCIDevice *dev,
> > -                                    uint32_t address, uint32_t val, int len)
> > -{
> > -    xen_piix_pci_write_config_client(address, val, len);
> > -    i440fx_write_config(dev, address, val, len);
> > -}
> > -
> > static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)
> > {
> >     PCII440FXState *d = opaque;
> > @@ -267,8 +263,17 @@ static PCIBus *i440fx_common_init(const char *device_name,
> >     d = pci_create_simple(b, 0, device_name);
> >     *pi440fx_state = DO_UPCAST(PCII440FXState, dev, d);
> > 
> > -    piix3 = DO_UPCAST(PIIX3State, dev,
> > -                      pci_create_simple_multifunction(b, -1, true, "PIIX3"));
> > +    if (xen_enabled()) {
> > +        piix3 = DO_UPCAST(PIIX3State, dev,
> > +                pci_create_simple_multifunction(b, -1, true, "PIIX3-xen"));
> > +        pci_bus_irqs(b, xen_piix3_set_irq, xen_pci_slot_get_pirq,
> > +                piix3, XEN_PIIX_NUM_PIRQS);
> 
> But with XEN_PIIX_NUM_PIRQS it's not a piix3 anymore, no? What's the reason behind this change?
 
It is still a piix3, but also provides non-legacy interrupt links to the
IO-APIC.
The four pins of each PCI device on the bus not only are routed to the
normal four pirqs (programmed writing to 0x60-0x63, see above) but also
they are connected to the IO-APIC directly.
These additional routes can only be discovered through ACPI, so you need
matching ACPI tables. We used to build the old ACPI tables like this:

/* PRTA: APIC routing table (via non-legacy IOAPIC GSIs). */
printf("Name(PRTA, Package() {\n");
for ( dev = 1; dev < 32; dev++ )
    for ( intx = 0; intx < 4; intx++ ) /* INTA-D */
        printf("Package(){0x%04xffff, %u, 0, %u},\n",
               dev, intx, ((dev*4+dev/8+intx)&31)+16);
printf("})\n");
Alexander Graf June 14, 2011, 9:25 a.m. UTC | #3
On 31.05.2011, at 13:05, Stefano Stabellini wrote:

> On Sat, 28 May 2011, Alexander Graf wrote:
>> 
>> On 26.05.2011, at 17:48, Stefano Stabellini wrote:
>> 
>>> xen: fix interrupt routing
>>> 
>>> - remove i440FX-xen and i440fx_write_config_xen
>>> we don't need to intercept pci config writes to i440FX anymore;
>> 
>> Why not? In which version? Did anything below change? What about compat code? Older hypervisor versions?
> 
> Nothing changed, I think it was a genuine mistake in the original patch
> series: the intention was to catch the pci config writes to 0x60-0x63 to
> reprogram the xen pci link routes accordingly (see
> xen_piix_pci_write_config_client).  If I am not mistaken a similar thing
> is done by non-Xen Qemu in piix3_update_irq_levels.
> 
> 
>>> - introduce PIIX3-xen and piix3_write_config_xen
>>> we do need to intercept pci config write to the PCI-ISA bridge to update
>>> the PCI link routing;
>>> 
>>> - set the number of PIIX3-xen interrupts lines to 128;
>>> 
>>> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
>>> 
>>> diff --git a/hw/pc.h b/hw/pc.h
>>> index 0dcbee7..6d5730b 100644
>>> --- a/hw/pc.h
>>> +++ b/hw/pc.h
>>> @@ -176,7 +176,6 @@ struct PCII440FXState;
>>> typedef struct PCII440FXState PCII440FXState;
>>> 
>>> PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn, qemu_irq *pic, ram_addr_t ram_size);
>>> -PCIBus *i440fx_xen_init(PCII440FXState **pi440fx_state, int *piix3_devfn, qemu_irq *pic, ram_addr_t ram_size);
>>> void i440fx_init_memory_mappings(PCII440FXState *d);
>>> 
>>> /* piix4.c */
>>> diff --git a/hw/pc_piix.c b/hw/pc_piix.c
>>> index 9a22a8a..ba198de 100644
>>> --- a/hw/pc_piix.c
>>> +++ b/hw/pc_piix.c
>>> @@ -124,11 +124,7 @@ static void pc_init1(ram_addr_t ram_size,
>>>    isa_irq = qemu_allocate_irqs(isa_irq_handler, isa_irq_state, 24);
>>> 
>>>    if (pci_enabled) {
>>> -        if (!xen_enabled()) {
>>> -            pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
>>> -        } else {
>>> -            pci_bus = i440fx_xen_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
>>> -        }
>>> +        pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
>>>    } else {
>>>        pci_bus = NULL;
>>>        i440fx_state = NULL;
>>> diff --git a/hw/piix_pci.c b/hw/piix_pci.c
>>> index 7f1c4cc..3d73a42 100644
>>> --- a/hw/piix_pci.c
>>> +++ b/hw/piix_pci.c
>>> @@ -40,6 +40,7 @@ typedef PCIHostState I440FXState;
>>> 
>>> #define PIIX_NUM_PIC_IRQS       16      /* i8259 * 2 */
>>> #define PIIX_NUM_PIRQS          4ULL    /* PIRQ[A-D] */
>>> +#define XEN_PIIX_NUM_PIRQS      128ULL
>>> #define PIIX_PIRQC              0x60
>>> 
>>> typedef struct PIIX3State {
>>> @@ -78,6 +79,8 @@ struct PCII440FXState {
>>> #define I440FX_SMRAM    0x72
>>> 
>>> static void piix3_set_irq(void *opaque, int pirq, int level);
>>> +static void piix3_write_config_xen(PCIDevice *dev,
>>> +                               uint32_t address, uint32_t val, int len);
>>> 
>>> /* return the global irq number corresponding to a given device irq
>>>   pin. We could also use the bus number to have a more precise
>>> @@ -173,13 +176,6 @@ static void i440fx_write_config(PCIDevice *dev,
>>>    }
>>> }
>>> 
>>> -static void i440fx_write_config_xen(PCIDevice *dev,
>>> -                                    uint32_t address, uint32_t val, int len)
>>> -{
>>> -    xen_piix_pci_write_config_client(address, val, len);
>>> -    i440fx_write_config(dev, address, val, len);
>>> -}
>>> -
>>> static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)
>>> {
>>>    PCII440FXState *d = opaque;
>>> @@ -267,8 +263,17 @@ static PCIBus *i440fx_common_init(const char *device_name,
>>>    d = pci_create_simple(b, 0, device_name);
>>>    *pi440fx_state = DO_UPCAST(PCII440FXState, dev, d);
>>> 
>>> -    piix3 = DO_UPCAST(PIIX3State, dev,
>>> -                      pci_create_simple_multifunction(b, -1, true, "PIIX3"));
>>> +    if (xen_enabled()) {
>>> +        piix3 = DO_UPCAST(PIIX3State, dev,
>>> +                pci_create_simple_multifunction(b, -1, true, "PIIX3-xen"));
>>> +        pci_bus_irqs(b, xen_piix3_set_irq, xen_pci_slot_get_pirq,
>>> +                piix3, XEN_PIIX_NUM_PIRQS);
>> 
>> But with XEN_PIIX_NUM_PIRQS it's not a piix3 anymore, no? What's the reason behind this change?
> 
> It is still a piix3, but also provides non-legacy interrupt links to the
> IO-APIC.
> The four pins of each PCI device on the bus not only are routed to the
> normal four pirqs (programmed writing to 0x60-0x63, see above) but also
> they are connected to the IO-APIC directly.
> These additional routes can only be discovered through ACPI, so you need
> matching ACPI tables. We used to build the old ACPI tables like this:
> 
> /* PRTA: APIC routing table (via non-legacy IOAPIC GSIs). */
> printf("Name(PRTA, Package() {\n");
> for ( dev = 1; dev < 32; dev++ )
>    for ( intx = 0; intx < 4; intx++ ) /* INTA-D */
>        printf("Package(){0x%04xffff, %u, 0, %u},\n",
>               dev, intx, ((dev*4+dev/8+intx)&31)+16);
> printf("})\n");
> 

Interesting concept, but completely non-standard and very much different from real hardware. Please at least add a comment there to show readers that Xen is doing a hack which is not at all related to how the PIIX really works.

Also, do you really need this? For KVM, we simply share interrupts or use MSI where we need the device separate.


Alex
Ian Campbell June 14, 2011, 12:17 p.m. UTC | #4
On Tue, 2011-06-14 at 10:25 +0100, Alexander Graf wrote:
> On 31.05.2011, at 13:05, Stefano Stabellini wrote:
> 
> > On Sat, 28 May 2011, Alexander Graf wrote:
> >> 
> >> On 26.05.2011, at 17:48, Stefano Stabellini wrote:
> >> 
> >>> xen: fix interrupt routing
> >>> 
> >>> - remove i440FX-xen and i440fx_write_config_xen
> >>> we don't need to intercept pci config writes to i440FX anymore;
> >> 
> >> Why not? In which version? Did anything below change? What about compat code? Older hypervisor versions?
> > 
> > Nothing changed, I think it was a genuine mistake in the original patch
> > series: the intention was to catch the pci config writes to 0x60-0x63 to
> > reprogram the xen pci link routes accordingly (see
> > xen_piix_pci_write_config_client).  If I am not mistaken a similar thing
> > is done by non-Xen Qemu in piix3_update_irq_levels.
> > 
> > 
> >>> - introduce PIIX3-xen and piix3_write_config_xen
> >>> we do need to intercept pci config write to the PCI-ISA bridge to update
> >>> the PCI link routing;
> >>> 
> >>> - set the number of PIIX3-xen interrupts lines to 128;
> >>> 
> >>> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> >>> 
> >>> diff --git a/hw/pc.h b/hw/pc.h
> >>> index 0dcbee7..6d5730b 100644
> >>> --- a/hw/pc.h
> >>> +++ b/hw/pc.h
> >>> @@ -176,7 +176,6 @@ struct PCII440FXState;
> >>> typedef struct PCII440FXState PCII440FXState;
> >>> 
> >>> PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn, qemu_irq *pic, ram_addr_t ram_size);
> >>> -PCIBus *i440fx_xen_init(PCII440FXState **pi440fx_state, int *piix3_devfn, qemu_irq *pic, ram_addr_t ram_size);
> >>> void i440fx_init_memory_mappings(PCII440FXState *d);
> >>> 
> >>> /* piix4.c */
> >>> diff --git a/hw/pc_piix.c b/hw/pc_piix.c
> >>> index 9a22a8a..ba198de 100644
> >>> --- a/hw/pc_piix.c
> >>> +++ b/hw/pc_piix.c
> >>> @@ -124,11 +124,7 @@ static void pc_init1(ram_addr_t ram_size,
> >>>    isa_irq = qemu_allocate_irqs(isa_irq_handler, isa_irq_state, 24);
> >>> 
> >>>    if (pci_enabled) {
> >>> -        if (!xen_enabled()) {
> >>> -            pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
> >>> -        } else {
> >>> -            pci_bus = i440fx_xen_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
> >>> -        }
> >>> +        pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
> >>>    } else {
> >>>        pci_bus = NULL;
> >>>        i440fx_state = NULL;
> >>> diff --git a/hw/piix_pci.c b/hw/piix_pci.c
> >>> index 7f1c4cc..3d73a42 100644
> >>> --- a/hw/piix_pci.c
> >>> +++ b/hw/piix_pci.c
> >>> @@ -40,6 +40,7 @@ typedef PCIHostState I440FXState;
> >>> 
> >>> #define PIIX_NUM_PIC_IRQS       16      /* i8259 * 2 */
> >>> #define PIIX_NUM_PIRQS          4ULL    /* PIRQ[A-D] */
> >>> +#define XEN_PIIX_NUM_PIRQS      128ULL
> >>> #define PIIX_PIRQC              0x60
> >>> 
> >>> typedef struct PIIX3State {
> >>> @@ -78,6 +79,8 @@ struct PCII440FXState {
> >>> #define I440FX_SMRAM    0x72
> >>> 
> >>> static void piix3_set_irq(void *opaque, int pirq, int level);
> >>> +static void piix3_write_config_xen(PCIDevice *dev,
> >>> +                               uint32_t address, uint32_t val, int len);
> >>> 
> >>> /* return the global irq number corresponding to a given device irq
> >>>   pin. We could also use the bus number to have a more precise
> >>> @@ -173,13 +176,6 @@ static void i440fx_write_config(PCIDevice *dev,
> >>>    }
> >>> }
> >>> 
> >>> -static void i440fx_write_config_xen(PCIDevice *dev,
> >>> -                                    uint32_t address, uint32_t val, int len)
> >>> -{
> >>> -    xen_piix_pci_write_config_client(address, val, len);
> >>> -    i440fx_write_config(dev, address, val, len);
> >>> -}
> >>> -
> >>> static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)
> >>> {
> >>>    PCII440FXState *d = opaque;
> >>> @@ -267,8 +263,17 @@ static PCIBus *i440fx_common_init(const char *device_name,
> >>>    d = pci_create_simple(b, 0, device_name);
> >>>    *pi440fx_state = DO_UPCAST(PCII440FXState, dev, d);
> >>> 
> >>> -    piix3 = DO_UPCAST(PIIX3State, dev,
> >>> -                      pci_create_simple_multifunction(b, -1, true, "PIIX3"));
> >>> +    if (xen_enabled()) {
> >>> +        piix3 = DO_UPCAST(PIIX3State, dev,
> >>> +                pci_create_simple_multifunction(b, -1, true, "PIIX3-xen"));
> >>> +        pci_bus_irqs(b, xen_piix3_set_irq, xen_pci_slot_get_pirq,
> >>> +                piix3, XEN_PIIX_NUM_PIRQS);
> >> 
> >> But with XEN_PIIX_NUM_PIRQS it's not a piix3 anymore, no? What's the reason behind this change?
> > 
> > It is still a piix3, but also provides non-legacy interrupt links to the
> > IO-APIC.
> > The four pins of each PCI device on the bus not only are routed to the
> > normal four pirqs (programmed writing to 0x60-0x63, see above) but also
> > they are connected to the IO-APIC directly.
> > These additional routes can only be discovered through ACPI, so you need
> > matching ACPI tables. We used to build the old ACPI tables like this:
> > 
> > /* PRTA: APIC routing table (via non-legacy IOAPIC GSIs). */
> > printf("Name(PRTA, Package() {\n");
> > for ( dev = 1; dev < 32; dev++ )
> >    for ( intx = 0; intx < 4; intx++ ) /* INTA-D */
> >        printf("Package(){0x%04xffff, %u, 0, %u},\n",
> >               dev, intx, ((dev*4+dev/8+intx)&31)+16);
> > printf("})\n");
> > 
> 
> Interesting concept, but completely non-standard and very much
> different from real hardware. Please at least add a comment there to
> show readers that Xen is doing a hack which is not at all related to
> how the PIIX really works.

Isn't this more a function of the "wires" on the motherboard than the
PIIX specifically? i.e. this just encodes the permutation of the wires
from the PCI slots into the IO-APIC input pins (bypassing the PIIX,
which is only used for legacy ISA IRQs i.e. by non-APIC aware OSes)?

IOW I guess strictly speaking Xen differs from the i440fx rather than
from the PIIX?

So XEN_PIIX_NUM_PIRQS is a bit misnamed -- it's more like
XEN_I440FX_NUM_PIRQS (or maybe even XEN_IOAPIC_NUM_...?)

Ian.
Alexander Graf June 14, 2011, 12:30 p.m. UTC | #5
Am 14.06.2011 um 14:17 schrieb Ian Campbell <Ian.Campbell@citrix.com>:

> On Tue, 2011-06-14 at 10:25 +0100, Alexander Graf wrote:
>> On 31.05.2011, at 13:05, Stefano Stabellini wrote:
>> 
>>> On Sat, 28 May 2011, Alexander Graf wrote:
>>>> 
>>>> On 26.05.2011, at 17:48, Stefano Stabellini wrote:
>>>> 
>>>>> xen: fix interrupt routing
>>>>> 
>>>>> - remove i440FX-xen and i440fx_write_config_xen
>>>>> we don't need to intercept pci config writes to i440FX anymore;
>>>> 
>>>> Why not? In which version? Did anything below change? What about compat code? Older hypervisor versions?
>>> 
>>> Nothing changed, I think it was a genuine mistake in the original patch
>>> series: the intention was to catch the pci config writes to 0x60-0x63 to
>>> reprogram the xen pci link routes accordingly (see
>>> xen_piix_pci_write_config_client).  If I am not mistaken a similar thing
>>> is done by non-Xen Qemu in piix3_update_irq_levels.
>>> 
>>> 
>>>>> - introduce PIIX3-xen and piix3_write_config_xen
>>>>> we do need to intercept pci config write to the PCI-ISA bridge to update
>>>>> the PCI link routing;
>>>>> 
>>>>> - set the number of PIIX3-xen interrupts lines to 128;
>>>>> 
>>>>> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
>>>>> 
>>>>> diff --git a/hw/pc.h b/hw/pc.h
>>>>> index 0dcbee7..6d5730b 100644
>>>>> --- a/hw/pc.h
>>>>> +++ b/hw/pc.h
>>>>> @@ -176,7 +176,6 @@ struct PCII440FXState;
>>>>> typedef struct PCII440FXState PCII440FXState;
>>>>> 
>>>>> PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn, qemu_irq *pic, ram_addr_t ram_size);
>>>>> -PCIBus *i440fx_xen_init(PCII440FXState **pi440fx_state, int *piix3_devfn, qemu_irq *pic, ram_addr_t ram_size);
>>>>> void i440fx_init_memory_mappings(PCII440FXState *d);
>>>>> 
>>>>> /* piix4.c */
>>>>> diff --git a/hw/pc_piix.c b/hw/pc_piix.c
>>>>> index 9a22a8a..ba198de 100644
>>>>> --- a/hw/pc_piix.c
>>>>> +++ b/hw/pc_piix.c
>>>>> @@ -124,11 +124,7 @@ static void pc_init1(ram_addr_t ram_size,
>>>>>   isa_irq = qemu_allocate_irqs(isa_irq_handler, isa_irq_state, 24);
>>>>> 
>>>>>   if (pci_enabled) {
>>>>> -        if (!xen_enabled()) {
>>>>> -            pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
>>>>> -        } else {
>>>>> -            pci_bus = i440fx_xen_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
>>>>> -        }
>>>>> +        pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
>>>>>   } else {
>>>>>       pci_bus = NULL;
>>>>>       i440fx_state = NULL;
>>>>> diff --git a/hw/piix_pci.c b/hw/piix_pci.c
>>>>> index 7f1c4cc..3d73a42 100644
>>>>> --- a/hw/piix_pci.c
>>>>> +++ b/hw/piix_pci.c
>>>>> @@ -40,6 +40,7 @@ typedef PCIHostState I440FXState;
>>>>> 
>>>>> #define PIIX_NUM_PIC_IRQS       16      /* i8259 * 2 */
>>>>> #define PIIX_NUM_PIRQS          4ULL    /* PIRQ[A-D] */
>>>>> +#define XEN_PIIX_NUM_PIRQS      128ULL
>>>>> #define PIIX_PIRQC              0x60
>>>>> 
>>>>> typedef struct PIIX3State {
>>>>> @@ -78,6 +79,8 @@ struct PCII440FXState {
>>>>> #define I440FX_SMRAM    0x72
>>>>> 
>>>>> static void piix3_set_irq(void *opaque, int pirq, int level);
>>>>> +static void piix3_write_config_xen(PCIDevice *dev,
>>>>> +                               uint32_t address, uint32_t val, int len);
>>>>> 
>>>>> /* return the global irq number corresponding to a given device irq
>>>>>  pin. We could also use the bus number to have a more precise
>>>>> @@ -173,13 +176,6 @@ static void i440fx_write_config(PCIDevice *dev,
>>>>>   }
>>>>> }
>>>>> 
>>>>> -static void i440fx_write_config_xen(PCIDevice *dev,
>>>>> -                                    uint32_t address, uint32_t val, int len)
>>>>> -{
>>>>> -    xen_piix_pci_write_config_client(address, val, len);
>>>>> -    i440fx_write_config(dev, address, val, len);
>>>>> -}
>>>>> -
>>>>> static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)
>>>>> {
>>>>>   PCII440FXState *d = opaque;
>>>>> @@ -267,8 +263,17 @@ static PCIBus *i440fx_common_init(const char *device_name,
>>>>>   d = pci_create_simple(b, 0, device_name);
>>>>>   *pi440fx_state = DO_UPCAST(PCII440FXState, dev, d);
>>>>> 
>>>>> -    piix3 = DO_UPCAST(PIIX3State, dev,
>>>>> -                      pci_create_simple_multifunction(b, -1, true, "PIIX3"));
>>>>> +    if (xen_enabled()) {
>>>>> +        piix3 = DO_UPCAST(PIIX3State, dev,
>>>>> +                pci_create_simple_multifunction(b, -1, true, "PIIX3-xen"));
>>>>> +        pci_bus_irqs(b, xen_piix3_set_irq, xen_pci_slot_get_pirq,
>>>>> +                piix3, XEN_PIIX_NUM_PIRQS);
>>>> 
>>>> But with XEN_PIIX_NUM_PIRQS it's not a piix3 anymore, no? What's the reason behind this change?
>>> 
>>> It is still a piix3, but also provides non-legacy interrupt links to the
>>> IO-APIC.
>>> The four pins of each PCI device on the bus not only are routed to the
>>> normal four pirqs (programmed writing to 0x60-0x63, see above) but also
>>> they are connected to the IO-APIC directly.
>>> These additional routes can only be discovered through ACPI, so you need
>>> matching ACPI tables. We used to build the old ACPI tables like this:
>>> 
>>> /* PRTA: APIC routing table (via non-legacy IOAPIC GSIs). */
>>> printf("Name(PRTA, Package() {\n");
>>> for ( dev = 1; dev < 32; dev++ )
>>>   for ( intx = 0; intx < 4; intx++ ) /* INTA-D */
>>>       printf("Package(){0x%04xffff, %u, 0, %u},\n",
>>>              dev, intx, ((dev*4+dev/8+intx)&31)+16);
>>> printf("})\n");
>>> 
>> 
>> Interesting concept, but completely non-standard and very much
>> different from real hardware. Please at least add a comment there to
>> show readers that Xen is doing a hack which is not at all related to
>> how the PIIX really works.
> 
> Isn't this more a function of the "wires" on the motherboard than the
> PIIX specifically? i.e. this just encodes the permutation of the wires
> from the PCI slots into the IO-APIC input pins (bypassing the PIIX,
> which is only used for legacy ISA IRQs i.e. by non-APIC aware OSes)?

Interrupts with PCI work slightly different. PCI devices can map (themselves or by software) to one of 4 interrupt lines: INTA, INTB, INTC, INTD. These get converted using PCI host controller specific logic to 4 interrupt lines which then go into the IO-APIC.

The IO-APIC is a chip with a limited number of pins. IIRC it was 24, could be 26 though.

I haven't seen a single case where PCI devices have a direct link to the IO-APIC. I also have not seen any PCI host controller that exports more than 4 interrupts. Giving each PCI device its own line, on top of that more than ever could be in real hardware, is a plain hack IMHO.

Did this really give you actual performance/latency/scalability gains? I still think for devices that matter, we should go with MSI rather than deriving from real hw.


Alex

>
Jan Kiszka June 14, 2011, 1:08 p.m. UTC | #6
On 2011-06-14 14:30, Alexander Graf wrote:
> 
> Am 14.06.2011 um 14:17 schrieb Ian Campbell <Ian.Campbell@citrix.com>:
> 
>> On Tue, 2011-06-14 at 10:25 +0100, Alexander Graf wrote:
>>> On 31.05.2011, at 13:05, Stefano Stabellini wrote:
>>>
>>>> On Sat, 28 May 2011, Alexander Graf wrote:
>>>>>
>>>>> On 26.05.2011, at 17:48, Stefano Stabellini wrote:
>>>>>
>>>>>> xen: fix interrupt routing
>>>>>>
>>>>>> - remove i440FX-xen and i440fx_write_config_xen
>>>>>> we don't need to intercept pci config writes to i440FX anymore;
>>>>>
>>>>> Why not? In which version? Did anything below change? What about compat code? Older hypervisor versions?
>>>>
>>>> Nothing changed, I think it was a genuine mistake in the original patch
>>>> series: the intention was to catch the pci config writes to 0x60-0x63 to
>>>> reprogram the xen pci link routes accordingly (see
>>>> xen_piix_pci_write_config_client).  If I am not mistaken a similar thing
>>>> is done by non-Xen Qemu in piix3_update_irq_levels.
>>>>
>>>>
>>>>>> - introduce PIIX3-xen and piix3_write_config_xen
>>>>>> we do need to intercept pci config write to the PCI-ISA bridge to update
>>>>>> the PCI link routing;
>>>>>>
>>>>>> - set the number of PIIX3-xen interrupts lines to 128;
>>>>>>
>>>>>> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
>>>>>>
>>>>>> diff --git a/hw/pc.h b/hw/pc.h
>>>>>> index 0dcbee7..6d5730b 100644
>>>>>> --- a/hw/pc.h
>>>>>> +++ b/hw/pc.h
>>>>>> @@ -176,7 +176,6 @@ struct PCII440FXState;
>>>>>> typedef struct PCII440FXState PCII440FXState;
>>>>>>
>>>>>> PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn, qemu_irq *pic, ram_addr_t ram_size);
>>>>>> -PCIBus *i440fx_xen_init(PCII440FXState **pi440fx_state, int *piix3_devfn, qemu_irq *pic, ram_addr_t ram_size);
>>>>>> void i440fx_init_memory_mappings(PCII440FXState *d);
>>>>>>
>>>>>> /* piix4.c */
>>>>>> diff --git a/hw/pc_piix.c b/hw/pc_piix.c
>>>>>> index 9a22a8a..ba198de 100644
>>>>>> --- a/hw/pc_piix.c
>>>>>> +++ b/hw/pc_piix.c
>>>>>> @@ -124,11 +124,7 @@ static void pc_init1(ram_addr_t ram_size,
>>>>>>   isa_irq = qemu_allocate_irqs(isa_irq_handler, isa_irq_state, 24);
>>>>>>
>>>>>>   if (pci_enabled) {
>>>>>> -        if (!xen_enabled()) {
>>>>>> -            pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
>>>>>> -        } else {
>>>>>> -            pci_bus = i440fx_xen_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
>>>>>> -        }
>>>>>> +        pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
>>>>>>   } else {
>>>>>>       pci_bus = NULL;
>>>>>>       i440fx_state = NULL;
>>>>>> diff --git a/hw/piix_pci.c b/hw/piix_pci.c
>>>>>> index 7f1c4cc..3d73a42 100644
>>>>>> --- a/hw/piix_pci.c
>>>>>> +++ b/hw/piix_pci.c
>>>>>> @@ -40,6 +40,7 @@ typedef PCIHostState I440FXState;
>>>>>>
>>>>>> #define PIIX_NUM_PIC_IRQS       16      /* i8259 * 2 */
>>>>>> #define PIIX_NUM_PIRQS          4ULL    /* PIRQ[A-D] */
>>>>>> +#define XEN_PIIX_NUM_PIRQS      128ULL
>>>>>> #define PIIX_PIRQC              0x60
>>>>>>
>>>>>> typedef struct PIIX3State {
>>>>>> @@ -78,6 +79,8 @@ struct PCII440FXState {
>>>>>> #define I440FX_SMRAM    0x72
>>>>>>
>>>>>> static void piix3_set_irq(void *opaque, int pirq, int level);
>>>>>> +static void piix3_write_config_xen(PCIDevice *dev,
>>>>>> +                               uint32_t address, uint32_t val, int len);
>>>>>>
>>>>>> /* return the global irq number corresponding to a given device irq
>>>>>>  pin. We could also use the bus number to have a more precise
>>>>>> @@ -173,13 +176,6 @@ static void i440fx_write_config(PCIDevice *dev,
>>>>>>   }
>>>>>> }
>>>>>>
>>>>>> -static void i440fx_write_config_xen(PCIDevice *dev,
>>>>>> -                                    uint32_t address, uint32_t val, int len)
>>>>>> -{
>>>>>> -    xen_piix_pci_write_config_client(address, val, len);
>>>>>> -    i440fx_write_config(dev, address, val, len);
>>>>>> -}
>>>>>> -
>>>>>> static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)
>>>>>> {
>>>>>>   PCII440FXState *d = opaque;
>>>>>> @@ -267,8 +263,17 @@ static PCIBus *i440fx_common_init(const char *device_name,
>>>>>>   d = pci_create_simple(b, 0, device_name);
>>>>>>   *pi440fx_state = DO_UPCAST(PCII440FXState, dev, d);
>>>>>>
>>>>>> -    piix3 = DO_UPCAST(PIIX3State, dev,
>>>>>> -                      pci_create_simple_multifunction(b, -1, true, "PIIX3"));
>>>>>> +    if (xen_enabled()) {
>>>>>> +        piix3 = DO_UPCAST(PIIX3State, dev,
>>>>>> +                pci_create_simple_multifunction(b, -1, true, "PIIX3-xen"));
>>>>>> +        pci_bus_irqs(b, xen_piix3_set_irq, xen_pci_slot_get_pirq,
>>>>>> +                piix3, XEN_PIIX_NUM_PIRQS);
>>>>>
>>>>> But with XEN_PIIX_NUM_PIRQS it's not a piix3 anymore, no? What's the reason behind this change?
>>>>
>>>> It is still a piix3, but also provides non-legacy interrupt links to the
>>>> IO-APIC.
>>>> The four pins of each PCI device on the bus not only are routed to the
>>>> normal four pirqs (programmed writing to 0x60-0x63, see above) but also
>>>> they are connected to the IO-APIC directly.
>>>> These additional routes can only be discovered through ACPI, so you need
>>>> matching ACPI tables. We used to build the old ACPI tables like this:
>>>>
>>>> /* PRTA: APIC routing table (via non-legacy IOAPIC GSIs). */
>>>> printf("Name(PRTA, Package() {\n");
>>>> for ( dev = 1; dev < 32; dev++ )
>>>>   for ( intx = 0; intx < 4; intx++ ) /* INTA-D */
>>>>       printf("Package(){0x%04xffff, %u, 0, %u},\n",
>>>>              dev, intx, ((dev*4+dev/8+intx)&31)+16);
>>>> printf("})\n");
>>>>
>>>
>>> Interesting concept, but completely non-standard and very much
>>> different from real hardware. Please at least add a comment there to
>>> show readers that Xen is doing a hack which is not at all related to
>>> how the PIIX really works.
>>
>> Isn't this more a function of the "wires" on the motherboard than the
>> PIIX specifically? i.e. this just encodes the permutation of the wires
>> from the PCI slots into the IO-APIC input pins (bypassing the PIIX,
>> which is only used for legacy ISA IRQs i.e. by non-APIC aware OSes)?
> 
> Interrupts with PCI work slightly different. PCI devices can map (themselves or by software) to one of 4 interrupt lines: INTA, INTB, INTC, INTD. These get converted using PCI host controller specific logic to 4 interrupt lines which then go into the IO-APIC.
> 
> The IO-APIC is a chip with a limited number of pins. IIRC it was 24, could be 26 though.
> 
> I haven't seen a single case where PCI devices have a direct link to the IO-APIC. I also have not seen any PCI host controller that exports more than 4 interrupts. Giving each PCI device its own line, on top of that more than ever could be in real hardware, is a plain hack IMHO.
> 
> Did this really give you actual performance/latency/scalability gains? I still think for devices that matter, we should go with MSI rather than deriving from real hw.

I bet the motivation is to have an IRQ route that is independent of
QEMU, thus can be discovered inside the Xen kernel and then remains
stable - or is simply hard-wired. Device assignment? Direct legacy IRQ
injection from the kernel?

KVM (qemu-kvm) has that issue as well and uses a simple hack to get a
notification on routing changes. That works as long as there is only one
hub (the PIIX3) involved. We need something smarter on the long term,
though.

Jan
Stefano Stabellini June 14, 2011, 1:31 p.m. UTC | #7
On Tue, 14 Jun 2011, Jan Kiszka wrote:
> I bet the motivation is to have an IRQ route that is independent of
> QEMU, thus can be discovered inside the Xen kernel and then remains
> stable - or is simply hard-wired. Device assignment? Direct legacy IRQ
> injection from the kernel?
> 

This code predates me, so Keir might have more insights on the original
motivation, but you are correct, they are hard-wired and can be used for
device-assignment as well.
diff mbox

Patch

diff --git a/hw/pc.h b/hw/pc.h
index 0dcbee7..6d5730b 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -176,7 +176,6 @@  struct PCII440FXState;
 typedef struct PCII440FXState PCII440FXState;
 
 PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn, qemu_irq *pic, ram_addr_t ram_size);
-PCIBus *i440fx_xen_init(PCII440FXState **pi440fx_state, int *piix3_devfn, qemu_irq *pic, ram_addr_t ram_size);
 void i440fx_init_memory_mappings(PCII440FXState *d);
 
 /* piix4.c */
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 9a22a8a..ba198de 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -124,11 +124,7 @@  static void pc_init1(ram_addr_t ram_size,
     isa_irq = qemu_allocate_irqs(isa_irq_handler, isa_irq_state, 24);
 
     if (pci_enabled) {
-        if (!xen_enabled()) {
-            pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
-        } else {
-            pci_bus = i440fx_xen_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
-        }
+        pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
     } else {
         pci_bus = NULL;
         i440fx_state = NULL;
diff --git a/hw/piix_pci.c b/hw/piix_pci.c
index 7f1c4cc..3d73a42 100644
--- a/hw/piix_pci.c
+++ b/hw/piix_pci.c
@@ -40,6 +40,7 @@  typedef PCIHostState I440FXState;
 
 #define PIIX_NUM_PIC_IRQS       16      /* i8259 * 2 */
 #define PIIX_NUM_PIRQS          4ULL    /* PIRQ[A-D] */
+#define XEN_PIIX_NUM_PIRQS      128ULL
 #define PIIX_PIRQC              0x60
 
 typedef struct PIIX3State {
@@ -78,6 +79,8 @@  struct PCII440FXState {
 #define I440FX_SMRAM    0x72
 
 static void piix3_set_irq(void *opaque, int pirq, int level);
+static void piix3_write_config_xen(PCIDevice *dev,
+                               uint32_t address, uint32_t val, int len);
 
 /* return the global irq number corresponding to a given device irq
    pin. We could also use the bus number to have a more precise
@@ -173,13 +176,6 @@  static void i440fx_write_config(PCIDevice *dev,
     }
 }
 
-static void i440fx_write_config_xen(PCIDevice *dev,
-                                    uint32_t address, uint32_t val, int len)
-{
-    xen_piix_pci_write_config_client(address, val, len);
-    i440fx_write_config(dev, address, val, len);
-}
-
 static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)
 {
     PCII440FXState *d = opaque;
@@ -267,8 +263,17 @@  static PCIBus *i440fx_common_init(const char *device_name,
     d = pci_create_simple(b, 0, device_name);
     *pi440fx_state = DO_UPCAST(PCII440FXState, dev, d);
 
-    piix3 = DO_UPCAST(PIIX3State, dev,
-                      pci_create_simple_multifunction(b, -1, true, "PIIX3"));
+    if (xen_enabled()) {
+        piix3 = DO_UPCAST(PIIX3State, dev,
+                pci_create_simple_multifunction(b, -1, true, "PIIX3-xen"));
+        pci_bus_irqs(b, xen_piix3_set_irq, xen_pci_slot_get_pirq,
+                piix3, XEN_PIIX_NUM_PIRQS);
+    } else {
+        piix3 = DO_UPCAST(PIIX3State, dev,
+                pci_create_simple_multifunction(b, -1, true, "PIIX3"));
+        pci_bus_irqs(b, piix3_set_irq, pci_slot_get_pirq, piix3,
+                PIIX_NUM_PIRQS);
+    }
     piix3->pic = pic;
 
     (*pi440fx_state)->piix3 = piix3;
@@ -289,21 +294,6 @@  PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix3_devfn,
     PCIBus *b;
 
     b = i440fx_common_init("i440FX", pi440fx_state, piix3_devfn, pic, ram_size);
-    pci_bus_irqs(b, piix3_set_irq, pci_slot_get_pirq, (*pi440fx_state)->piix3,
-                 PIIX_NUM_PIRQS);
-
-    return b;
-}
-
-PCIBus *i440fx_xen_init(PCII440FXState **pi440fx_state, int *piix3_devfn,
-                        qemu_irq *pic, ram_addr_t ram_size)
-{
-    PCIBus *b;
-
-    b = i440fx_common_init("i440FX-xen", pi440fx_state, piix3_devfn, pic, ram_size);
-    pci_bus_irqs(b, xen_piix3_set_irq, xen_pci_slot_get_pirq,
-                 (*pi440fx_state)->piix3, PIIX_NUM_PIRQS);
-
     return b;
 }
 
@@ -365,6 +355,13 @@  static void piix3_write_config(PCIDevice *dev,
     }
 }
 
+static void piix3_write_config_xen(PCIDevice *dev,
+                               uint32_t address, uint32_t val, int len)
+{
+    xen_piix_pci_write_config_client(address, val, len);
+    piix3_write_config(dev, address, val, len);
+}
+
 static void piix3_reset(void *opaque)
 {
     PIIX3State *d = opaque;
@@ -465,14 +462,6 @@  static PCIDeviceInfo i440fx_info[] = {
         .init         = i440fx_initfn,
         .config_write = i440fx_write_config,
     },{
-        .qdev.name    = "i440FX-xen",
-        .qdev.desc    = "Host bridge",
-        .qdev.size    = sizeof(PCII440FXState),
-        .qdev.vmsd    = &vmstate_i440fx,
-        .qdev.no_user = 1,
-        .init         = i440fx_initfn,
-        .config_write = i440fx_write_config_xen,
-    },{
         .qdev.name    = "PIIX3",
         .qdev.desc    = "ISA bridge",
         .qdev.size    = sizeof(PIIX3State),
@@ -482,6 +471,15 @@  static PCIDeviceInfo i440fx_info[] = {
         .init         = piix3_initfn,
         .config_write = piix3_write_config,
     },{
+        .qdev.name    = "PIIX3-xen",
+        .qdev.desc    = "ISA bridge",
+        .qdev.size    = sizeof(PIIX3State),
+        .qdev.vmsd    = &vmstate_piix3,
+        .qdev.no_user = 1,
+        .no_hotplug   = 1,
+        .init         = piix3_initfn,
+        .config_write = piix3_write_config_xen,
+    },{
         /* end of list */
     }
 };