Patchwork xen: fix interrupt routing

login
register
mail settings
Submitter Stefano Stabellini
Date May 18, 2011, 5:53 p.m.
Message ID <1305741220-4990-1-git-send-email-stefano.stabellini@eu.citrix.com>
Download mbox | patch
Permalink /patch/96210/
State New
Headers show

Comments

Stefano Stabellini - May 18, 2011, 5:53 p.m.
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>

Match the routing informations built by seabios:

- remove i440fx_write_config_xen
we don't need to intercept pci config writes to i440FX;

- introduce piix3_write_config_xen
we do need to intercept pci config write to the PCI-ISA bridge to update
the PCI link routing;

- remove xen_pci_slot_get_pirq
we are now using the same link routing as seabios and qemu so we don't
need a diffirent get_pirq function;

- fix xen_piix3_set_irq
we always inject one of the 4 pci intx, so we can use
xc_hvm_set_isa_irq_level to inject the interrupt. Use pci_irqs as
initialized by seabios to map a pirq into an ISA irq. This has the
benefit of removing all the calls to xc_hvm_set_pci_intx_level that
doesn't work correctly anymore because from the same device number and
intx Xen calculates a different PCI link compared to Qemu and Seabios.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
---
 hw/piix_pci.c |   21 ++++++++++++---------
 xen-all.c     |   16 ++++++++--------
 2 files changed, 20 insertions(+), 17 deletions(-)
Isaku Yamahata - May 19, 2011, 2:14 a.m.
On Wed, May 18, 2011 at 06:53:40PM +0100, stefano.stabellini@eu.citrix.com wrote:
> From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> 
> Match the routing informations built by seabios:
> 
> - remove i440fx_write_config_xen
> we don't need to intercept pci config writes to i440FX;
> 
> - introduce piix3_write_config_xen
> we do need to intercept pci config write to the PCI-ISA bridge to update
> the PCI link routing;

So "i440FX-xen" isn't needed anymore, and something like PIIX3-xen
is necessary instead of overwriting piix3->dev.config_write.

thanks,

> 
> - remove xen_pci_slot_get_pirq
> we are now using the same link routing as seabios and qemu so we don't
> need a diffirent get_pirq function;
> 
> - fix xen_piix3_set_irq
> we always inject one of the 4 pci intx, so we can use
> xc_hvm_set_isa_irq_level to inject the interrupt. Use pci_irqs as
> initialized by seabios to map a pirq into an ISA irq. This has the
> benefit of removing all the calls to xc_hvm_set_pci_intx_level that
> doesn't work correctly anymore because from the same device number and
> intx Xen calculates a different PCI link compared to Qemu and Seabios.
> 
> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> ---
>  hw/piix_pci.c |   21 ++++++++++++---------
>  xen-all.c     |   16 ++++++++--------
>  2 files changed, 20 insertions(+), 17 deletions(-)
> 
> diff --git a/hw/piix_pci.c b/hw/piix_pci.c
> index 7f1c4cc..aae6a48 100644
> --- a/hw/piix_pci.c
> +++ b/hw/piix_pci.c
> @@ -78,6 +78,8 @@ struct PCII440FXState {
>  #define I440FX_SMRAM    0x72
>  
>  static void piix3_set_irq(void *opaque, int pirq, int level);
> +static void piix3_write_config_xen(PCIDevice *dev,
> +                               uint32_t address, uint32_t val, int len);
>  
>  /* return the global irq number corresponding to a given device irq
>     pin. We could also use the bus number to have a more precise
> @@ -173,13 +175,6 @@ static void i440fx_write_config(PCIDevice *dev,
>      }
>  }
>  
> -static void i440fx_write_config_xen(PCIDevice *dev,
> -                                    uint32_t address, uint32_t val, int len)
> -{
> -    xen_piix_pci_write_config_client(address, val, len);
> -    i440fx_write_config(dev, address, val, len);
> -}
> -
>  static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)
>  {
>      PCII440FXState *d = opaque;
> @@ -301,7 +296,8 @@ PCIBus *i440fx_xen_init(PCII440FXState **pi440fx_state, int *piix3_devfn,
>      PCIBus *b;
>  
>      b = i440fx_common_init("i440FX-xen", pi440fx_state, piix3_devfn, pic, ram_size);
> -    pci_bus_irqs(b, xen_piix3_set_irq, xen_pci_slot_get_pirq,
> +    (*pi440fx_state)->piix3->dev.config_write = piix3_write_config_xen;
> +    pci_bus_irqs(b, xen_piix3_set_irq, pci_slot_get_pirq,
>                   (*pi440fx_state)->piix3, PIIX_NUM_PIRQS);
>  
>      return b;
> @@ -365,6 +361,13 @@ static void piix3_write_config(PCIDevice *dev,
>      }
>  }
>  
> +static void piix3_write_config_xen(PCIDevice *dev,
> +                               uint32_t address, uint32_t val, int len)
> +{
> +    xen_piix_pci_write_config_client(address, val, len);
> +    piix3_write_config(dev, address, val, len);
> +}
> +
>  static void piix3_reset(void *opaque)
>  {
>      PIIX3State *d = opaque;
> @@ -471,7 +474,7 @@ static PCIDeviceInfo i440fx_info[] = {
>          .qdev.vmsd    = &vmstate_i440fx,
>          .qdev.no_user = 1,
>          .init         = i440fx_initfn,
> -        .config_write = i440fx_write_config_xen,
> +        .config_write = i440fx_write_config,
>      },{
>          .qdev.name    = "PIIX3",
>          .qdev.desc    = "ISA bridge",
> diff --git a/xen-all.c b/xen-all.c
> index 0eac202..7d7863f 100644
> --- a/xen-all.c
> +++ b/xen-all.c
> @@ -70,17 +70,16 @@ typedef struct XenIOState {
>      Notifier exit;
>  } XenIOState;
>  
> -/* Xen specific function for piix pci */
> +/* pci_irqs as initialized by seabios */
> +uint8_t pci_irqs[4] = {
> +    10, 10, 11, 11
> +};
>  
> -int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
> -{
> -    return irq_num + ((pci_dev->devfn >> 3) << 2);
> -}
> +/* Xen specific function for piix pci */
>  
> -void xen_piix3_set_irq(void *opaque, int irq_num, int level)
> +void xen_piix3_set_irq(void *opaque, int pirq, int level)
>  {
> -    xc_hvm_set_pci_intx_level(xen_xc, xen_domid, 0, 0, irq_num >> 2,
> -                              irq_num & 3, level);
> +    xc_hvm_set_isa_irq_level(xen_xc, xen_domid, pci_irqs[pirq], level);
>  }
>  
>  void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
> @@ -95,6 +94,7 @@ void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
>          }
>          v &= 0xf;
>          if (((address + i) >= 0x60) && ((address + i) <= 0x63)) {
> +            pci_irqs[address + i - 0x60] = v;
>              xc_hvm_set_pci_link_route(xen_xc, xen_domid, address + i - 0x60, v);
>          }
>      }
> -- 
> 1.7.2.3
> 
>
Ian Campbell - May 19, 2011, 7:55 a.m.
On Wed, 2011-05-18 at 18:53 +0100, Stefano Stabellini wrote:
> From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> 
> Match the routing informations built by seabios:
> 
> - remove i440fx_write_config_xen
> we don't need to intercept pci config writes to i440FX;
> 
> - introduce piix3_write_config_xen
> we do need to intercept pci config write to the PCI-ISA bridge to update
> the PCI link routing;
> 
> - remove xen_pci_slot_get_pirq
> we are now using the same link routing as seabios and qemu so we don't
> need a diffirent get_pirq function;
> 
> - fix xen_piix3_set_irq
> we always inject one of the 4 pci intx, so we can use
> xc_hvm_set_isa_irq_level to inject the interrupt. Use pci_irqs as
> initialized by seabios to map a pirq into an ISA irq. This has the
> benefit of removing all the calls to xc_hvm_set_pci_intx_level that
> doesn't work correctly anymore because from the same device number and
> intx Xen calculates a different PCI link compared to Qemu and Seabios.

Why does Xen calculate a different PCI link? What are the other
downsides of this disparity?

For example, in the hypervisor I see that hvm_pci_intx_link is used in
the passthrough code as well as in the HVMOP_set_pci_intx_level, is that
safe/sane in conjunction with this change?

IOW -- are we hiding other problems for later by doing an end run around
the interface instead of addressing the disconnect directly? It seems
like it would be a relatively contained change on the SeaBIOS end to
allow it to use the routing which Xen expects.

Ian.
Stefano Stabellini - May 19, 2011, 7:16 p.m.
On Thu, 19 May 2011, Ian Campbell wrote:
> On Wed, 2011-05-18 at 18:53 +0100, Stefano Stabellini wrote:
> > From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> > 
> > Match the routing informations built by seabios:
> > 
> > - remove i440fx_write_config_xen
> > we don't need to intercept pci config writes to i440FX;
> > 
> > - introduce piix3_write_config_xen
> > we do need to intercept pci config write to the PCI-ISA bridge to update
> > the PCI link routing;
> > 
> > - remove xen_pci_slot_get_pirq
> > we are now using the same link routing as seabios and qemu so we don't
> > need a diffirent get_pirq function;
> > 
> > - fix xen_piix3_set_irq
> > we always inject one of the 4 pci intx, so we can use
> > xc_hvm_set_isa_irq_level to inject the interrupt. Use pci_irqs as
> > initialized by seabios to map a pirq into an ISA irq. This has the
> > benefit of removing all the calls to xc_hvm_set_pci_intx_level that
> > doesn't work correctly anymore because from the same device number and
> > intx Xen calculates a different PCI link compared to Qemu and Seabios.
> 
> Why does Xen calculate a different PCI link? What are the other
> downsides of this disparity?
>
> For example, in the hypervisor I see that hvm_pci_intx_link is used in
> the passthrough code as well as in the HVMOP_set_pci_intx_level, is that
> safe/sane in conjunction with this change?
> 
> IOW -- are we hiding other problems for later by doing an end run around
> the interface instead of addressing the disconnect directly? It seems
> like it would be a relatively contained change on the SeaBIOS end to
> allow it to use the routing which Xen expects.


If you give a look at __hvm_pci_intx_assert in xen, it uses
hvm_pci_intx_link to calculate the link from device number and intx:

#define hvm_pci_intx_link(dev, intx) \
    (((dev) + (intx)) & 3)

while qemu and seabios calculate the link as follow:

static int pci_slot_get_pirq(PCIDevice *pci_dev, int pci_intx)
{
    int slot_addend;
    slot_addend = (pci_dev->devfn >> 3) - 1;
    return (pci_intx + slot_addend) & 3;
}

they are similar but unfortunately not indentical (the "- 1" is the
critical difference there).
That means that the routing for ISA irqs is different but if we always
use xc_hvm_set_isa_irq_level we avoid the issue because it takes an ISA
irq as parameter so Xen won't notice.

Unfortunately like you wrote, we would still have a problem with PCI
passthrough because Xen calls hvm_pci_intx_link internally.
We would need to add an option in Xen to switch to the new interrupt
routing.

I think we need to figure out if we want to keep the old MP tables and
the old ACPI _PRT entries or we want to start using Seabios' MP tables
and ACPI _PRT entries.
In both cases the impact on Qemu is pretty limited: it is just a matter
of changing the implementation of piix3_set_irq and pci_slot_get_pirq
(both have a xen specific implementation already).

It is interesting that the old PIIX3 in qemu_xen had 128 interrupt
lines, the routing informations of which are present in the ACPI PRTA
routing table.
The PIIX3 in the new qemu has only 4 lines so there is no need for
xc_hvm_set_pci_intx_level.
Considering that the next update of this patch will add PIIX3-xen there
is scope for increasing the interrupt lines again if we want to.
Stefano Stabellini - May 19, 2011, 7:16 p.m.
On Thu, 19 May 2011, Isaku Yamahata wrote:
> On Wed, May 18, 2011 at 06:53:40PM +0100, stefano.stabellini@eu.citrix.com wrote:
> > From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> > 
> > Match the routing informations built by seabios:
> > 
> > - remove i440fx_write_config_xen
> > we don't need to intercept pci config writes to i440FX;
> > 
> > - introduce piix3_write_config_xen
> > we do need to intercept pci config write to the PCI-ISA bridge to update
> > the PCI link routing;
> 
> So "i440FX-xen" isn't needed anymore, and something like PIIX3-xen
> is necessary instead of overwriting piix3->dev.config_write.

That is a good idea. I am also going to remove i440fx_xen_init to get rid of
a 'if (xen_enabled())' but to do that I have to move the call to
pci_bus_irqs in i440fx_common_init.

I'll send an update of the patch as soon as we figure out what to do
with the BIOS tables.

Patch

diff --git a/hw/piix_pci.c b/hw/piix_pci.c
index 7f1c4cc..aae6a48 100644
--- a/hw/piix_pci.c
+++ b/hw/piix_pci.c
@@ -78,6 +78,8 @@  struct PCII440FXState {
 #define I440FX_SMRAM    0x72
 
 static void piix3_set_irq(void *opaque, int pirq, int level);
+static void piix3_write_config_xen(PCIDevice *dev,
+                               uint32_t address, uint32_t val, int len);
 
 /* return the global irq number corresponding to a given device irq
    pin. We could also use the bus number to have a more precise
@@ -173,13 +175,6 @@  static void i440fx_write_config(PCIDevice *dev,
     }
 }
 
-static void i440fx_write_config_xen(PCIDevice *dev,
-                                    uint32_t address, uint32_t val, int len)
-{
-    xen_piix_pci_write_config_client(address, val, len);
-    i440fx_write_config(dev, address, val, len);
-}
-
 static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)
 {
     PCII440FXState *d = opaque;
@@ -301,7 +296,8 @@  PCIBus *i440fx_xen_init(PCII440FXState **pi440fx_state, int *piix3_devfn,
     PCIBus *b;
 
     b = i440fx_common_init("i440FX-xen", pi440fx_state, piix3_devfn, pic, ram_size);
-    pci_bus_irqs(b, xen_piix3_set_irq, xen_pci_slot_get_pirq,
+    (*pi440fx_state)->piix3->dev.config_write = piix3_write_config_xen;
+    pci_bus_irqs(b, xen_piix3_set_irq, pci_slot_get_pirq,
                  (*pi440fx_state)->piix3, PIIX_NUM_PIRQS);
 
     return b;
@@ -365,6 +361,13 @@  static void piix3_write_config(PCIDevice *dev,
     }
 }
 
+static void piix3_write_config_xen(PCIDevice *dev,
+                               uint32_t address, uint32_t val, int len)
+{
+    xen_piix_pci_write_config_client(address, val, len);
+    piix3_write_config(dev, address, val, len);
+}
+
 static void piix3_reset(void *opaque)
 {
     PIIX3State *d = opaque;
@@ -471,7 +474,7 @@  static PCIDeviceInfo i440fx_info[] = {
         .qdev.vmsd    = &vmstate_i440fx,
         .qdev.no_user = 1,
         .init         = i440fx_initfn,
-        .config_write = i440fx_write_config_xen,
+        .config_write = i440fx_write_config,
     },{
         .qdev.name    = "PIIX3",
         .qdev.desc    = "ISA bridge",
diff --git a/xen-all.c b/xen-all.c
index 0eac202..7d7863f 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -70,17 +70,16 @@  typedef struct XenIOState {
     Notifier exit;
 } XenIOState;
 
-/* Xen specific function for piix pci */
+/* pci_irqs as initialized by seabios */
+uint8_t pci_irqs[4] = {
+    10, 10, 11, 11
+};
 
-int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
-{
-    return irq_num + ((pci_dev->devfn >> 3) << 2);
-}
+/* Xen specific function for piix pci */
 
-void xen_piix3_set_irq(void *opaque, int irq_num, int level)
+void xen_piix3_set_irq(void *opaque, int pirq, int level)
 {
-    xc_hvm_set_pci_intx_level(xen_xc, xen_domid, 0, 0, irq_num >> 2,
-                              irq_num & 3, level);
+    xc_hvm_set_isa_irq_level(xen_xc, xen_domid, pci_irqs[pirq], level);
 }
 
 void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
@@ -95,6 +94,7 @@  void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
         }
         v &= 0xf;
         if (((address + i) >= 0x60) && ((address + i) <= 0x63)) {
+            pci_irqs[address + i - 0x60] = v;
             xc_hvm_set_pci_link_route(xen_xc, xen_domid, address + i - 0x60, v);
         }
     }