Patchwork [RFC] pci: Differentiate PCI Express bus

login
register
mail settings
Submitter Alex Williamson
Date March 11, 2013, 9:18 p.m.
Message ID <20130311210254.5171.78669.stgit@bling.home>
Download mbox | patch
Permalink /patch/226705/
State New
Headers show

Comments

Alex Williamson - March 11, 2013, 9:18 p.m.
When creating capabilities devices need to know what kind of bus
they're on.  If we're on an express bus without a parent_dev, then
we're on the root complex and need to use integrated endpoints
rather than standard endpoints.  When we're on an express bus with
a parent_dev we need to negotiate link parameters so that the
endpoint doesn't claim it's running x16, 8GT/s while the root port
above it claims x1, 2.5GT/s capability.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

This feels a bit kludgy so I'm sending this out as an RFC looking for
suggestions.  I played a little with creating a PCIBusClass, putting
is_express on the class, and instantiating a TYPE_PCIE_BUS that uses
TYPE_PCI_BUS as it's parent, but that gets overly complicated and
means that any time we instantiate a bus we need to figure out whether
to use legacy or express.  Any better ideas?  Thanks,

Alex

 hw/ioh3420.c            |    2 ++
 hw/pci/pci_bus.h        |    2 ++
 hw/q35.c                |    1 +
 hw/xio3130_downstream.c |    2 ++
 hw/xio3130_upstream.c   |    2 ++
 5 files changed, 9 insertions(+)
Michael S. Tsirkin - March 12, 2013, 2:46 p.m.
On Mon, Mar 11, 2013 at 03:18:49PM -0600, Alex Williamson wrote:
> When creating capabilities devices need to know what kind of bus
> they're on.  If we're on an express bus without a parent_dev, then
> we're on the root complex and need to use integrated endpoints
> rather than standard endpoints.  When we're on an express bus with
> a parent_dev we need to negotiate link parameters so that the
> endpoint doesn't claim it's running x16, 8GT/s while the root port
> above it claims x1, 2.5GT/s capability.
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> ---
> 
> This feels a bit kludgy so I'm sending this out as an RFC looking for
> suggestions.  I played a little with creating a PCIBusClass, putting
> is_express on the class,

You actually don't even need is_express if you do this, just
add a wrapper that checks the type.

> and instantiating a TYPE_PCIE_BUS that uses
> TYPE_PCI_BUS as it's parent, but that gets overly complicated and
> means that any time we instantiate a bus we need to figure out whether
> to use legacy or express.

This last is probably a plus, not a minus.

> Any better ideas?  Thanks,
> 
> Alex

If I understand correctly, the issue is that the root bus does not
have a parent device?

>  hw/ioh3420.c            |    2 ++
>  hw/pci/pci_bus.h        |    2 ++
>  hw/q35.c                |    1 +
>  hw/xio3130_downstream.c |    2 ++
>  hw/xio3130_upstream.c   |    2 ++
>  5 files changed, 9 insertions(+)

This isn't too bad though I'd prefer accessing is_express
through an API. E.g.
pci_bus_set_type()

> diff --git a/hw/ioh3420.c b/hw/ioh3420.c
> index 95bceb5..186a46f 100644
> --- a/hw/ioh3420.c
> +++ b/hw/ioh3420.c
> @@ -102,6 +102,8 @@ static int ioh3420_initfn(PCIDevice *d)
>          return rc;
>      }
>  
> +    br->sec_bus->is_express = true;
> +
>      pcie_port_init_reg(d);
>  
>      rc = pci_bridge_ssvid_init(d, IOH_EP_SSVID_OFFSET,
> diff --git a/hw/pci/pci_bus.h b/hw/pci/pci_bus.h
> index aef559a..a325f46 100644
> --- a/hw/pci/pci_bus.h
> +++ b/hw/pci/pci_bus.h
> @@ -34,6 +34,8 @@ struct PCIBus {
>         Keep a count of the number of devices with raised IRQs.  */
>      int nirq;
>      int *irq_count;
> +
> +    bool is_express; /* PCI Express bus or Legacy bus? */
>  };
>  
>  typedef struct PCIBridgeWindows PCIBridgeWindows;
> diff --git a/hw/q35.c b/hw/q35.c
> index efebc27..f5fdcb0 100644
> --- a/hw/q35.c
> +++ b/hw/q35.c
> @@ -55,6 +55,7 @@ static int q35_host_init(SysBusDevice *dev)
>      }
>      b = pci_bus_new(&s->host.pci.busdev.qdev, "pcie.0",
>                      s->mch.pci_address_space, s->mch.address_space_io, 0);
> +    b->is_express = true;
>      s->host.pci.bus = b;
>      qdev_set_parent_bus(DEVICE(&s->mch), BUS(b));
>      qdev_init_nofail(DEVICE(&s->mch));
> diff --git a/hw/xio3130_downstream.c b/hw/xio3130_downstream.c
> index 7f00bc8..600ec06 100644
> --- a/hw/xio3130_downstream.c
> +++ b/hw/xio3130_downstream.c
> @@ -66,6 +66,8 @@ static int xio3130_downstream_initfn(PCIDevice *d)
>          return rc;
>      }
>  
> +    br->sec_bus->is_express = true;
> +
>      pcie_port_init_reg(d);
>  
>      rc = msi_init(d, XIO3130_MSI_OFFSET, XIO3130_MSI_NR_VECTOR,
> diff --git a/hw/xio3130_upstream.c b/hw/xio3130_upstream.c
> index 70b15d3..b6fea60 100644
> --- a/hw/xio3130_upstream.c
> +++ b/hw/xio3130_upstream.c
> @@ -62,6 +62,8 @@ static int xio3130_upstream_initfn(PCIDevice *d)
>          return rc;
>      }
>  
> +    br->sec_bus->is_express = true;
> +
>      pcie_port_init_reg(d);
>  
>      rc = msi_init(d, XIO3130_MSI_OFFSET, XIO3130_MSI_NR_VECTOR,
Alex Williamson - March 12, 2013, 3:36 p.m.
On Tue, 2013-03-12 at 16:46 +0200, Michael S. Tsirkin wrote:
> On Mon, Mar 11, 2013 at 03:18:49PM -0600, Alex Williamson wrote:
> > When creating capabilities devices need to know what kind of bus
> > they're on.  If we're on an express bus without a parent_dev, then
> > we're on the root complex and need to use integrated endpoints
> > rather than standard endpoints.  When we're on an express bus with
> > a parent_dev we need to negotiate link parameters so that the
> > endpoint doesn't claim it's running x16, 8GT/s while the root port
> > above it claims x1, 2.5GT/s capability.
> > 
> > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > ---
> > 
> > This feels a bit kludgy so I'm sending this out as an RFC looking for
> > suggestions.  I played a little with creating a PCIBusClass, putting
> > is_express on the class,
> 
> You actually don't even need is_express if you do this, just
> add a wrapper that checks the type.

Yeah, that's true.

> > and instantiating a TYPE_PCIE_BUS that uses
> > TYPE_PCI_BUS as it's parent, but that gets overly complicated and
> > means that any time we instantiate a bus we need to figure out whether
> > to use legacy or express.
> 
> This last is probably a plus, not a minus.

Ok, it just means either duplicating a lot of interfaces to make legacy
vs express functions or adding ugly is_express options to the functions
(ex. pci_bus_new).  Preference?

> > Any better ideas?  Thanks,
> > 
> > Alex
> 
> If I understand correctly, the issue is that the root bus does not
> have a parent device?

No, that's actually a feature that lets us determine if the express bus
is a root complex bus or normal express bus.  I'm worried about things
like xhci which calls pcie_cap_init() to give itself an endpoint
capability.  If it's connected to the root complex, that actually needs
to be an integrated endpoint or else windows won't use it.  Devices
probably don't want to care whether they're an endpoint or integrated
endpoint, so pcie_cap_init() should probably transparently change types
and drop the link capabilities.

We also need to add pseudo link negotiation.  Root ports and switches
should report maximum width and transfer rate capability and downstream
devices should call a function to negotiate link to the device
capabilities.  I don't think we can just look for is_express on
bus->parent_dev because the parent_dev could be a PCIe-to-PCI bridge.
So there seem to be a number of points where it would be convenient to
test express vs legacy on the bus.

> >  hw/ioh3420.c            |    2 ++
> >  hw/pci/pci_bus.h        |    2 ++
> >  hw/q35.c                |    1 +
> >  hw/xio3130_downstream.c |    2 ++
> >  hw/xio3130_upstream.c   |    2 ++
> >  5 files changed, 9 insertions(+)
> 
> This isn't too bad though I'd prefer accessing is_express
> through an API. E.g.
> pci_bus_set_type()

Yep, this is just an example of the minimum footprint for such a change
pushing it as far out from the core as we can.  If you're onboard that
we need a way to differentiate the bus type I can make it more
integrated into the core.  Thanks,

Alex

> > diff --git a/hw/ioh3420.c b/hw/ioh3420.c
> > index 95bceb5..186a46f 100644
> > --- a/hw/ioh3420.c
> > +++ b/hw/ioh3420.c
> > @@ -102,6 +102,8 @@ static int ioh3420_initfn(PCIDevice *d)
> >          return rc;
> >      }
> >  
> > +    br->sec_bus->is_express = true;
> > +
> >      pcie_port_init_reg(d);
> >  
> >      rc = pci_bridge_ssvid_init(d, IOH_EP_SSVID_OFFSET,
> > diff --git a/hw/pci/pci_bus.h b/hw/pci/pci_bus.h
> > index aef559a..a325f46 100644
> > --- a/hw/pci/pci_bus.h
> > +++ b/hw/pci/pci_bus.h
> > @@ -34,6 +34,8 @@ struct PCIBus {
> >         Keep a count of the number of devices with raised IRQs.  */
> >      int nirq;
> >      int *irq_count;
> > +
> > +    bool is_express; /* PCI Express bus or Legacy bus? */
> >  };
> >  
> >  typedef struct PCIBridgeWindows PCIBridgeWindows;
> > diff --git a/hw/q35.c b/hw/q35.c
> > index efebc27..f5fdcb0 100644
> > --- a/hw/q35.c
> > +++ b/hw/q35.c
> > @@ -55,6 +55,7 @@ static int q35_host_init(SysBusDevice *dev)
> >      }
> >      b = pci_bus_new(&s->host.pci.busdev.qdev, "pcie.0",
> >                      s->mch.pci_address_space, s->mch.address_space_io, 0);
> > +    b->is_express = true;
> >      s->host.pci.bus = b;
> >      qdev_set_parent_bus(DEVICE(&s->mch), BUS(b));
> >      qdev_init_nofail(DEVICE(&s->mch));
> > diff --git a/hw/xio3130_downstream.c b/hw/xio3130_downstream.c
> > index 7f00bc8..600ec06 100644
> > --- a/hw/xio3130_downstream.c
> > +++ b/hw/xio3130_downstream.c
> > @@ -66,6 +66,8 @@ static int xio3130_downstream_initfn(PCIDevice *d)
> >          return rc;
> >      }
> >  
> > +    br->sec_bus->is_express = true;
> > +
> >      pcie_port_init_reg(d);
> >  
> >      rc = msi_init(d, XIO3130_MSI_OFFSET, XIO3130_MSI_NR_VECTOR,
> > diff --git a/hw/xio3130_upstream.c b/hw/xio3130_upstream.c
> > index 70b15d3..b6fea60 100644
> > --- a/hw/xio3130_upstream.c
> > +++ b/hw/xio3130_upstream.c
> > @@ -62,6 +62,8 @@ static int xio3130_upstream_initfn(PCIDevice *d)
> >          return rc;
> >      }
> >  
> > +    br->sec_bus->is_express = true;
> > +
> >      pcie_port_init_reg(d);
> >  
> >      rc = msi_init(d, XIO3130_MSI_OFFSET, XIO3130_MSI_NR_VECTOR,
Michael S. Tsirkin - March 12, 2013, 3:50 p.m.
On Tue, Mar 12, 2013 at 09:36:49AM -0600, Alex Williamson wrote:
> On Tue, 2013-03-12 at 16:46 +0200, Michael S. Tsirkin wrote:
> > On Mon, Mar 11, 2013 at 03:18:49PM -0600, Alex Williamson wrote:
> > > When creating capabilities devices need to know what kind of bus
> > > they're on.  If we're on an express bus without a parent_dev, then
> > > we're on the root complex and need to use integrated endpoints
> > > rather than standard endpoints.  When we're on an express bus with
> > > a parent_dev we need to negotiate link parameters so that the
> > > endpoint doesn't claim it's running x16, 8GT/s while the root port
> > > above it claims x1, 2.5GT/s capability.
> > > 
> > > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > > ---
> > > 
> > > This feels a bit kludgy so I'm sending this out as an RFC looking for
> > > suggestions.  I played a little with creating a PCIBusClass, putting
> > > is_express on the class,
> > 
> > You actually don't even need is_express if you do this, just
> > add a wrapper that checks the type.
> 
> Yeah, that's true.
> 
> > > and instantiating a TYPE_PCIE_BUS that uses
> > > TYPE_PCI_BUS as it's parent, but that gets overly complicated and
> > > means that any time we instantiate a bus we need to figure out whether
> > > to use legacy or express.
> > 
> > This last is probably a plus, not a minus.
> 
> Ok, it just means either duplicating a lot of interfaces to make legacy
> vs express functions or adding ugly is_express options to the functions
> (ex. pci_bus_new).  Preference?

If we do this I'd prefer a type flag I think. But see below.

> > > Any better ideas?  Thanks,
> > > 
> > > Alex
> > 
> > If I understand correctly, the issue is that the root bus does not
> > have a parent device?
> 
> No, that's actually a feature that lets us determine if the express bus
> is a root complex bus or normal express bus.  I'm worried about things
> like xhci which calls pcie_cap_init() to give itself an endpoint
> capability.  If it's connected to the root complex, that actually needs
> to be an integrated endpoint or else windows won't use it.  Devices
> probably don't want to care whether they're an endpoint or integrated
> endpoint, so pcie_cap_init() should probably transparently change types
> and drop the link capabilities.

Confused. Root complex bus doesn't have a parent, isn't this
enough?

> We also need to add pseudo link negotiation.  Root ports and switches
> should report maximum width and transfer rate capability and downstream
> devices should call a function to negotiate link to the device
> capabilities.  I don't think we can just look for is_express on
> bus->parent_dev because the parent_dev could be a PCIe-to-PCI bridge.

That's easy to detect though, right?

> So there seem to be a number of points where it would be convenient to
> test express vs legacy on the bus.
> 
> > >  hw/ioh3420.c            |    2 ++
> > >  hw/pci/pci_bus.h        |    2 ++
> > >  hw/q35.c                |    1 +
> > >  hw/xio3130_downstream.c |    2 ++
> > >  hw/xio3130_upstream.c   |    2 ++
> > >  5 files changed, 9 insertions(+)
> > 
> > This isn't too bad though I'd prefer accessing is_express
> > through an API. E.g.
> > pci_bus_set_type()
> 
> Yep, this is just an example of the minimum footprint for such a change
> pushing it as far out from the core as we can.  If you're onboard that
> we need a way to differentiate the bus type I can make it more
> integrated into the core.  Thanks,
> 
> Alex
> 
> > > diff --git a/hw/ioh3420.c b/hw/ioh3420.c
> > > index 95bceb5..186a46f 100644
> > > --- a/hw/ioh3420.c
> > > +++ b/hw/ioh3420.c
> > > @@ -102,6 +102,8 @@ static int ioh3420_initfn(PCIDevice *d)
> > >          return rc;
> > >      }
> > >  
> > > +    br->sec_bus->is_express = true;
> > > +
> > >      pcie_port_init_reg(d);
> > >  
> > >      rc = pci_bridge_ssvid_init(d, IOH_EP_SSVID_OFFSET,
> > > diff --git a/hw/pci/pci_bus.h b/hw/pci/pci_bus.h
> > > index aef559a..a325f46 100644
> > > --- a/hw/pci/pci_bus.h
> > > +++ b/hw/pci/pci_bus.h
> > > @@ -34,6 +34,8 @@ struct PCIBus {
> > >         Keep a count of the number of devices with raised IRQs.  */
> > >      int nirq;
> > >      int *irq_count;
> > > +
> > > +    bool is_express; /* PCI Express bus or Legacy bus? */
> > >  };
> > >  
> > >  typedef struct PCIBridgeWindows PCIBridgeWindows;
> > > diff --git a/hw/q35.c b/hw/q35.c
> > > index efebc27..f5fdcb0 100644
> > > --- a/hw/q35.c
> > > +++ b/hw/q35.c
> > > @@ -55,6 +55,7 @@ static int q35_host_init(SysBusDevice *dev)
> > >      }
> > >      b = pci_bus_new(&s->host.pci.busdev.qdev, "pcie.0",
> > >                      s->mch.pci_address_space, s->mch.address_space_io, 0);
> > > +    b->is_express = true;
> > >      s->host.pci.bus = b;
> > >      qdev_set_parent_bus(DEVICE(&s->mch), BUS(b));
> > >      qdev_init_nofail(DEVICE(&s->mch));
> > > diff --git a/hw/xio3130_downstream.c b/hw/xio3130_downstream.c
> > > index 7f00bc8..600ec06 100644
> > > --- a/hw/xio3130_downstream.c
> > > +++ b/hw/xio3130_downstream.c
> > > @@ -66,6 +66,8 @@ static int xio3130_downstream_initfn(PCIDevice *d)
> > >          return rc;
> > >      }
> > >  
> > > +    br->sec_bus->is_express = true;
> > > +
> > >      pcie_port_init_reg(d);
> > >  
> > >      rc = msi_init(d, XIO3130_MSI_OFFSET, XIO3130_MSI_NR_VECTOR,
> > > diff --git a/hw/xio3130_upstream.c b/hw/xio3130_upstream.c
> > > index 70b15d3..b6fea60 100644
> > > --- a/hw/xio3130_upstream.c
> > > +++ b/hw/xio3130_upstream.c
> > > @@ -62,6 +62,8 @@ static int xio3130_upstream_initfn(PCIDevice *d)
> > >          return rc;
> > >      }
> > >  
> > > +    br->sec_bus->is_express = true;
> > > +
> > >      pcie_port_init_reg(d);
> > >  
> > >      rc = msi_init(d, XIO3130_MSI_OFFSET, XIO3130_MSI_NR_VECTOR,
> 
>
Alex Williamson - March 12, 2013, 4:25 p.m.
On Tue, 2013-03-12 at 17:50 +0200, Michael S. Tsirkin wrote:
> On Tue, Mar 12, 2013 at 09:36:49AM -0600, Alex Williamson wrote:
> > On Tue, 2013-03-12 at 16:46 +0200, Michael S. Tsirkin wrote:
> > > On Mon, Mar 11, 2013 at 03:18:49PM -0600, Alex Williamson wrote:
> > > > When creating capabilities devices need to know what kind of bus
> > > > they're on.  If we're on an express bus without a parent_dev, then
> > > > we're on the root complex and need to use integrated endpoints
> > > > rather than standard endpoints.  When we're on an express bus with
> > > > a parent_dev we need to negotiate link parameters so that the
> > > > endpoint doesn't claim it's running x16, 8GT/s while the root port
> > > > above it claims x1, 2.5GT/s capability.
> > > > 
> > > > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > > > ---
> > > > 
> > > > This feels a bit kludgy so I'm sending this out as an RFC looking for
> > > > suggestions.  I played a little with creating a PCIBusClass, putting
> > > > is_express on the class,
> > > 
> > > You actually don't even need is_express if you do this, just
> > > add a wrapper that checks the type.
> > 
> > Yeah, that's true.
> > 
> > > > and instantiating a TYPE_PCIE_BUS that uses
> > > > TYPE_PCI_BUS as it's parent, but that gets overly complicated and
> > > > means that any time we instantiate a bus we need to figure out whether
> > > > to use legacy or express.
> > > 
> > > This last is probably a plus, not a minus.
> > 
> > Ok, it just means either duplicating a lot of interfaces to make legacy
> > vs express functions or adding ugly is_express options to the functions
> > (ex. pci_bus_new).  Preference?
> 
> If we do this I'd prefer a type flag I think. But see below.
> 
> > > > Any better ideas?  Thanks,
> > > > 
> > > > Alex
> > > 
> > > If I understand correctly, the issue is that the root bus does not
> > > have a parent device?
> > 
> > No, that's actually a feature that lets us determine if the express bus
> > is a root complex bus or normal express bus.  I'm worried about things
> > like xhci which calls pcie_cap_init() to give itself an endpoint
> > capability.  If it's connected to the root complex, that actually needs
> > to be an integrated endpoint or else windows won't use it.  Devices
> > probably don't want to care whether they're an endpoint or integrated
> > endpoint, so pcie_cap_init() should probably transparently change types
> > and drop the link capabilities.
> 
> Confused. Root complex bus doesn't have a parent, isn't this
> enough?

If we only used (!bus->parent_dev), how do we determine a root complex
bus on q35 vs a root bus on 440fx?

> > We also need to add pseudo link negotiation.  Root ports and switches
> > should report maximum width and transfer rate capability and downstream
> > devices should call a function to negotiate link to the device
> > capabilities.  I don't think we can just look for is_express on
> > bus->parent_dev because the parent_dev could be a PCIe-to-PCI bridge.
> 
> That's easy to detect though, right?

Yeah, maybe it's not so bad.  We'd have to look at is_express on
bus->parent_dev.  If it's not express we know we're on a legacy bus.  If
it is_express then we have to read the type from the PCIe capability.
If type is PCI_EXP_TYPE_PCI_BRIDGE it's legacy, otherwise express.  So
maybe the root bus is the only place where we can't tell.

I'm not sure if there's a globally correct right answer for conversion
of PCIe capabilities though.  On a root complex bus we should always
convert endpoints to integrated endpoints.  On a legacy PCI root bus we
probably want to leave them alone or in some cases drop them.  Device
assignment has shown us that some drivers depend on having an express
capability regardless of the bus, but we don't know what would happen if
those became integrated endpoints on 440fx.

We could special case (!bus->parent_dev) and look at the name of the
class providing the bus, but then we're into and ugly string matching
maintenance issue.  Is there another way we can tell q35 vs 440fx?
Thanks,

Alex

> > So there seem to be a number of points where it would be convenient to
> > test express vs legacy on the bus.
> > 
> > > >  hw/ioh3420.c            |    2 ++
> > > >  hw/pci/pci_bus.h        |    2 ++
> > > >  hw/q35.c                |    1 +
> > > >  hw/xio3130_downstream.c |    2 ++
> > > >  hw/xio3130_upstream.c   |    2 ++
> > > >  5 files changed, 9 insertions(+)
> > > 
> > > This isn't too bad though I'd prefer accessing is_express
> > > through an API. E.g.
> > > pci_bus_set_type()
> > 
> > Yep, this is just an example of the minimum footprint for such a change
> > pushing it as far out from the core as we can.  If you're onboard that
> > we need a way to differentiate the bus type I can make it more
> > integrated into the core.  Thanks,
> > 
> > Alex
> > 
> > > > diff --git a/hw/ioh3420.c b/hw/ioh3420.c
> > > > index 95bceb5..186a46f 100644
> > > > --- a/hw/ioh3420.c
> > > > +++ b/hw/ioh3420.c
> > > > @@ -102,6 +102,8 @@ static int ioh3420_initfn(PCIDevice *d)
> > > >          return rc;
> > > >      }
> > > >  
> > > > +    br->sec_bus->is_express = true;
> > > > +
> > > >      pcie_port_init_reg(d);
> > > >  
> > > >      rc = pci_bridge_ssvid_init(d, IOH_EP_SSVID_OFFSET,
> > > > diff --git a/hw/pci/pci_bus.h b/hw/pci/pci_bus.h
> > > > index aef559a..a325f46 100644
> > > > --- a/hw/pci/pci_bus.h
> > > > +++ b/hw/pci/pci_bus.h
> > > > @@ -34,6 +34,8 @@ struct PCIBus {
> > > >         Keep a count of the number of devices with raised IRQs.  */
> > > >      int nirq;
> > > >      int *irq_count;
> > > > +
> > > > +    bool is_express; /* PCI Express bus or Legacy bus? */
> > > >  };
> > > >  
> > > >  typedef struct PCIBridgeWindows PCIBridgeWindows;
> > > > diff --git a/hw/q35.c b/hw/q35.c
> > > > index efebc27..f5fdcb0 100644
> > > > --- a/hw/q35.c
> > > > +++ b/hw/q35.c
> > > > @@ -55,6 +55,7 @@ static int q35_host_init(SysBusDevice *dev)
> > > >      }
> > > >      b = pci_bus_new(&s->host.pci.busdev.qdev, "pcie.0",
> > > >                      s->mch.pci_address_space, s->mch.address_space_io, 0);
> > > > +    b->is_express = true;
> > > >      s->host.pci.bus = b;
> > > >      qdev_set_parent_bus(DEVICE(&s->mch), BUS(b));
> > > >      qdev_init_nofail(DEVICE(&s->mch));
> > > > diff --git a/hw/xio3130_downstream.c b/hw/xio3130_downstream.c
> > > > index 7f00bc8..600ec06 100644
> > > > --- a/hw/xio3130_downstream.c
> > > > +++ b/hw/xio3130_downstream.c
> > > > @@ -66,6 +66,8 @@ static int xio3130_downstream_initfn(PCIDevice *d)
> > > >          return rc;
> > > >      }
> > > >  
> > > > +    br->sec_bus->is_express = true;
> > > > +
> > > >      pcie_port_init_reg(d);
> > > >  
> > > >      rc = msi_init(d, XIO3130_MSI_OFFSET, XIO3130_MSI_NR_VECTOR,
> > > > diff --git a/hw/xio3130_upstream.c b/hw/xio3130_upstream.c
> > > > index 70b15d3..b6fea60 100644
> > > > --- a/hw/xio3130_upstream.c
> > > > +++ b/hw/xio3130_upstream.c
> > > > @@ -62,6 +62,8 @@ static int xio3130_upstream_initfn(PCIDevice *d)
> > > >          return rc;
> > > >      }
> > > >  
> > > > +    br->sec_bus->is_express = true;
> > > > +
> > > >      pcie_port_init_reg(d);
> > > >  
> > > >      rc = msi_init(d, XIO3130_MSI_OFFSET, XIO3130_MSI_NR_VECTOR,
> > 
> >
Michael S. Tsirkin - March 12, 2013, 4:30 p.m.
On Tue, Mar 12, 2013 at 10:25:20AM -0600, Alex Williamson wrote:
> On Tue, 2013-03-12 at 17:50 +0200, Michael S. Tsirkin wrote:
> > On Tue, Mar 12, 2013 at 09:36:49AM -0600, Alex Williamson wrote:
> > > On Tue, 2013-03-12 at 16:46 +0200, Michael S. Tsirkin wrote:
> > > > On Mon, Mar 11, 2013 at 03:18:49PM -0600, Alex Williamson wrote:
> > > > > When creating capabilities devices need to know what kind of bus
> > > > > they're on.  If we're on an express bus without a parent_dev, then
> > > > > we're on the root complex and need to use integrated endpoints
> > > > > rather than standard endpoints.  When we're on an express bus with
> > > > > a parent_dev we need to negotiate link parameters so that the
> > > > > endpoint doesn't claim it's running x16, 8GT/s while the root port
> > > > > above it claims x1, 2.5GT/s capability.
> > > > > 
> > > > > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > > > > ---
> > > > > 
> > > > > This feels a bit kludgy so I'm sending this out as an RFC looking for
> > > > > suggestions.  I played a little with creating a PCIBusClass, putting
> > > > > is_express on the class,
> > > > 
> > > > You actually don't even need is_express if you do this, just
> > > > add a wrapper that checks the type.
> > > 
> > > Yeah, that's true.
> > > 
> > > > > and instantiating a TYPE_PCIE_BUS that uses
> > > > > TYPE_PCI_BUS as it's parent, but that gets overly complicated and
> > > > > means that any time we instantiate a bus we need to figure out whether
> > > > > to use legacy or express.
> > > > 
> > > > This last is probably a plus, not a minus.
> > > 
> > > Ok, it just means either duplicating a lot of interfaces to make legacy
> > > vs express functions or adding ugly is_express options to the functions
> > > (ex. pci_bus_new).  Preference?
> > 
> > If we do this I'd prefer a type flag I think. But see below.
> > 
> > > > > Any better ideas?  Thanks,
> > > > > 
> > > > > Alex
> > > > 
> > > > If I understand correctly, the issue is that the root bus does not
> > > > have a parent device?
> > > 
> > > No, that's actually a feature that lets us determine if the express bus
> > > is a root complex bus or normal express bus.  I'm worried about things
> > > like xhci which calls pcie_cap_init() to give itself an endpoint
> > > capability.  If it's connected to the root complex, that actually needs
> > > to be an integrated endpoint or else windows won't use it.  Devices
> > > probably don't want to care whether they're an endpoint or integrated
> > > endpoint, so pcie_cap_init() should probably transparently change types
> > > and drop the link capabilities.
> > 
> > Confused. Root complex bus doesn't have a parent, isn't this
> > enough?
> 
> If we only used (!bus->parent_dev), how do we determine a root complex
> bus on q35 vs a root bus on 440fx?

Right, that's the issue as I see it.

> > > We also need to add pseudo link negotiation.  Root ports and switches
> > > should report maximum width and transfer rate capability and downstream
> > > devices should call a function to negotiate link to the device
> > > capabilities.  I don't think we can just look for is_express on
> > > bus->parent_dev because the parent_dev could be a PCIe-to-PCI bridge.
> > 
> > That's easy to detect though, right?
> 
> Yeah, maybe it's not so bad.  We'd have to look at is_express on
> bus->parent_dev.  If it's not express we know we're on a legacy bus.  If
> it is_express then we have to read the type from the PCIe capability.
> If type is PCI_EXP_TYPE_PCI_BRIDGE it's legacy, otherwise express.  So
> maybe the root bus is the only place where we can't tell.
> 
> I'm not sure if there's a globally correct right answer for conversion
> of PCIe capabilities though.  On a root complex bus we should always
> convert endpoints to integrated endpoints.  On a legacy PCI root bus we
> probably want to leave them alone or in some cases drop them.  Device
> assignment has shown us that some drivers depend on having an express
> capability regardless of the bus, but we don't know what would happen if
> those became integrated endpoints on 440fx.
> 
> We could special case (!bus->parent_dev) and look at the name of the
> class providing the bus, but then we're into and ugly string matching
> maintenance issue.  Is there another way we can tell q35 vs 440fx?
> Thanks,
> 
> Alex
> 
> > > So there seem to be a number of points where it would be convenient to
> > > test express vs legacy on the bus.
> > > 
> > > > >  hw/ioh3420.c            |    2 ++
> > > > >  hw/pci/pci_bus.h        |    2 ++
> > > > >  hw/q35.c                |    1 +
> > > > >  hw/xio3130_downstream.c |    2 ++
> > > > >  hw/xio3130_upstream.c   |    2 ++
> > > > >  5 files changed, 9 insertions(+)
> > > > 
> > > > This isn't too bad though I'd prefer accessing is_express
> > > > through an API. E.g.
> > > > pci_bus_set_type()
> > > 
> > > Yep, this is just an example of the minimum footprint for such a change
> > > pushing it as far out from the core as we can.  If you're onboard that
> > > we need a way to differentiate the bus type I can make it more
> > > integrated into the core.  Thanks,
> > > 
> > > Alex
> > > 
> > > > > diff --git a/hw/ioh3420.c b/hw/ioh3420.c
> > > > > index 95bceb5..186a46f 100644
> > > > > --- a/hw/ioh3420.c
> > > > > +++ b/hw/ioh3420.c
> > > > > @@ -102,6 +102,8 @@ static int ioh3420_initfn(PCIDevice *d)
> > > > >          return rc;
> > > > >      }
> > > > >  
> > > > > +    br->sec_bus->is_express = true;
> > > > > +
> > > > >      pcie_port_init_reg(d);
> > > > >  
> > > > >      rc = pci_bridge_ssvid_init(d, IOH_EP_SSVID_OFFSET,
> > > > > diff --git a/hw/pci/pci_bus.h b/hw/pci/pci_bus.h
> > > > > index aef559a..a325f46 100644
> > > > > --- a/hw/pci/pci_bus.h
> > > > > +++ b/hw/pci/pci_bus.h
> > > > > @@ -34,6 +34,8 @@ struct PCIBus {
> > > > >         Keep a count of the number of devices with raised IRQs.  */
> > > > >      int nirq;
> > > > >      int *irq_count;
> > > > > +
> > > > > +    bool is_express; /* PCI Express bus or Legacy bus? */
> > > > >  };
> > > > >  
> > > > >  typedef struct PCIBridgeWindows PCIBridgeWindows;
> > > > > diff --git a/hw/q35.c b/hw/q35.c
> > > > > index efebc27..f5fdcb0 100644
> > > > > --- a/hw/q35.c
> > > > > +++ b/hw/q35.c
> > > > > @@ -55,6 +55,7 @@ static int q35_host_init(SysBusDevice *dev)
> > > > >      }
> > > > >      b = pci_bus_new(&s->host.pci.busdev.qdev, "pcie.0",
> > > > >                      s->mch.pci_address_space, s->mch.address_space_io, 0);
> > > > > +    b->is_express = true;
> > > > >      s->host.pci.bus = b;
> > > > >      qdev_set_parent_bus(DEVICE(&s->mch), BUS(b));
> > > > >      qdev_init_nofail(DEVICE(&s->mch));
> > > > > diff --git a/hw/xio3130_downstream.c b/hw/xio3130_downstream.c
> > > > > index 7f00bc8..600ec06 100644
> > > > > --- a/hw/xio3130_downstream.c
> > > > > +++ b/hw/xio3130_downstream.c
> > > > > @@ -66,6 +66,8 @@ static int xio3130_downstream_initfn(PCIDevice *d)
> > > > >          return rc;
> > > > >      }
> > > > >  
> > > > > +    br->sec_bus->is_express = true;
> > > > > +
> > > > >      pcie_port_init_reg(d);
> > > > >  
> > > > >      rc = msi_init(d, XIO3130_MSI_OFFSET, XIO3130_MSI_NR_VECTOR,
> > > > > diff --git a/hw/xio3130_upstream.c b/hw/xio3130_upstream.c
> > > > > index 70b15d3..b6fea60 100644
> > > > > --- a/hw/xio3130_upstream.c
> > > > > +++ b/hw/xio3130_upstream.c
> > > > > @@ -62,6 +62,8 @@ static int xio3130_upstream_initfn(PCIDevice *d)
> > > > >          return rc;
> > > > >      }
> > > > >  
> > > > > +    br->sec_bus->is_express = true;
> > > > > +
> > > > >      pcie_port_init_reg(d);
> > > > >  
> > > > >      rc = msi_init(d, XIO3130_MSI_OFFSET, XIO3130_MSI_NR_VECTOR,
> > > 
> > > 
> 
>

Patch

diff --git a/hw/ioh3420.c b/hw/ioh3420.c
index 95bceb5..186a46f 100644
--- a/hw/ioh3420.c
+++ b/hw/ioh3420.c
@@ -102,6 +102,8 @@  static int ioh3420_initfn(PCIDevice *d)
         return rc;
     }
 
+    br->sec_bus->is_express = true;
+
     pcie_port_init_reg(d);
 
     rc = pci_bridge_ssvid_init(d, IOH_EP_SSVID_OFFSET,
diff --git a/hw/pci/pci_bus.h b/hw/pci/pci_bus.h
index aef559a..a325f46 100644
--- a/hw/pci/pci_bus.h
+++ b/hw/pci/pci_bus.h
@@ -34,6 +34,8 @@  struct PCIBus {
        Keep a count of the number of devices with raised IRQs.  */
     int nirq;
     int *irq_count;
+
+    bool is_express; /* PCI Express bus or Legacy bus? */
 };
 
 typedef struct PCIBridgeWindows PCIBridgeWindows;
diff --git a/hw/q35.c b/hw/q35.c
index efebc27..f5fdcb0 100644
--- a/hw/q35.c
+++ b/hw/q35.c
@@ -55,6 +55,7 @@  static int q35_host_init(SysBusDevice *dev)
     }
     b = pci_bus_new(&s->host.pci.busdev.qdev, "pcie.0",
                     s->mch.pci_address_space, s->mch.address_space_io, 0);
+    b->is_express = true;
     s->host.pci.bus = b;
     qdev_set_parent_bus(DEVICE(&s->mch), BUS(b));
     qdev_init_nofail(DEVICE(&s->mch));
diff --git a/hw/xio3130_downstream.c b/hw/xio3130_downstream.c
index 7f00bc8..600ec06 100644
--- a/hw/xio3130_downstream.c
+++ b/hw/xio3130_downstream.c
@@ -66,6 +66,8 @@  static int xio3130_downstream_initfn(PCIDevice *d)
         return rc;
     }
 
+    br->sec_bus->is_express = true;
+
     pcie_port_init_reg(d);
 
     rc = msi_init(d, XIO3130_MSI_OFFSET, XIO3130_MSI_NR_VECTOR,
diff --git a/hw/xio3130_upstream.c b/hw/xio3130_upstream.c
index 70b15d3..b6fea60 100644
--- a/hw/xio3130_upstream.c
+++ b/hw/xio3130_upstream.c
@@ -62,6 +62,8 @@  static int xio3130_upstream_initfn(PCIDevice *d)
         return rc;
     }
 
+    br->sec_bus->is_express = true;
+
     pcie_port_init_reg(d);
 
     rc = msi_init(d, XIO3130_MSI_OFFSET, XIO3130_MSI_NR_VECTOR,