Patchwork [PATCHv2,1/3] pci: added pcie_get_speed_cap_mask function

login
register
mail settings
Submitter Lucas Kannebley Tavares
Date March 20, 2013, 5:24 a.m.
Message ID <1363757079-23550-2-git-send-email-lucaskt@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/229252/
State Changes Requested, archived
Headers show

Comments

Lucas Kannebley Tavares - March 20, 2013, 5:24 a.m.
Added function to gather the speed cap for a device and return a mask to
supported speeds. The function is divided into an interface and a weak
implementation so that architecture-specific functions can be called.

This is the first step in moving function drm_pcie_get_speed_cap_mask
from the drm subsystem to the pci one.

Signed-off-by: Lucas Kannebley Tavares <lucaskt@linux.vnet.ibm.com>
---
 drivers/pci/pci.c   |   44 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h |    6 ++++++
 2 files changed, 50 insertions(+), 0 deletions(-)
Bjorn Helgaas - March 26, 2013, 6:39 p.m.
On Tue, Mar 19, 2013 at 11:24 PM, Lucas Kannebley Tavares
<lucaskt@linux.vnet.ibm.com> wrote:
> Added function to gather the speed cap for a device and return a mask to
> supported speeds. The function is divided into an interface and a weak
> implementation so that architecture-specific functions can be called.
>
> This is the first step in moving function drm_pcie_get_speed_cap_mask
> from the drm subsystem to the pci one.

This still doesn't feel right to me.

I'm definitely not a hardware guy, but my understanding based on the
PCIe spec r3.0, sec 6.11, is that the hardware will automatically
maintain the link at the highest speed supported by both ends of the
link, unless software sets a lower Target Link Speed.  The only users
of this function are some Radeon drivers, and it looks like they only
use it because the hardware doesn't conform to this aspect of the spec
and requires device-specific tweaking.

We already have bus->max_bus_speed, which should tell you the max
speed supported by the upstream component.  Is that enough
information?  Maybe the radeon drivers could simply do something like
this:

    speed = rdev->ddev->pdev->bus->max_bus_speed;
    if (speed == PCI_SPEED_UNKNOWN || speed < PCIE_SPEED_5_0GT)
        return;

In your original email [1], you mentioned a null pointer dereference,
presumably when reading the PCIe capability for dev->pdev->bus->self,
with "self" being NULL.  This only happens for a bus with no upstream
P2P bridge, i.e., self will be NULL only if pdev is on a root bus.
But we also know pdev is a PCIe device, and I think a PCIe device on a
root bus must be a "Root Complex Integrated Endpoint" (PCIe spec sec
1.3.2.3).  Such a device does not have a link at all, so there's no
point in fiddling with its link speed.

I don't see how a radeon device could be an integrated endpoint, but
in your hypervisor environment, maybe it isn't quite spec-compliant in
terms of its attachment.  Can you collect the output of "lspci -vv"?
Maybe that will make things clearer.

In any case, if a radeon device is on a root bus, I think the root bus
max_bus_speed will be PCI_SPEED_UNKNOWN, and if it's not on a root
bus, max_bus_speed should be set correctly based on the upstream PCIe
port.

Bjorn

[1] http://lkml.kernel.org/r/50819140.8030806@linux.vnet.ibm.com

> Signed-off-by: Lucas Kannebley Tavares <lucaskt@linux.vnet.ibm.com>
> ---
>  drivers/pci/pci.c   |   44 ++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/pci.h |    6 ++++++
>  2 files changed, 50 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index b099e00..d94ab79 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -3931,6 +3931,50 @@ static int __init pci_setup(char *str)
>  }
>  early_param("pci", pci_setup);
>
> +int __weak pcibios_get_speed_cap_mask(struct pci_dev *dev, u32 *mask)
> +{
> +       struct pci_dev *root;
> +       u32 lnkcap, lnkcap2;
> +
> +       *mask = 0;
> +       if (!dev)
> +               return -EINVAL;
> +
> +       root = dev->bus->self;
> +
> +       /* we've been informed via and serverworks don't make the cut */
> +       if (root->vendor == PCI_VENDOR_ID_VIA ||
> +           root->vendor == PCI_VENDOR_ID_SERVERWORKS)
> +               return -EINVAL;
> +
> +       pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
> +       pcie_capability_read_dword(root, PCI_EXP_LNKCAP2, &lnkcap2);
> +
> +       if (lnkcap2) {  /* PCIe r3.0-compliant */
> +               if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_2_5GB)
> +                       *mask |= PCIE_SPEED_25;
> +               if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_5_0GB)
> +                       *mask |= PCIE_SPEED_50;
> +               if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB)
> +                       *mask |= PCIE_SPEED_80;
> +       } else {        /* pre-r3.0 */
> +               if (lnkcap & PCI_EXP_LNKCAP_SLS_2_5GB)
> +                       *mask |= PCIE_SPEED_25;
> +               if (lnkcap & PCI_EXP_LNKCAP_SLS_5_0GB)
> +                       *mask |= (PCIE_SPEED_25 | PCIE_SPEED_50);
> +       }
> +
> +       dev_info(&dev->dev, "probing gen 2 caps for device %x:%x = %x/%x\n",
> +               root->vendor, root->device, lnkcap, lnkcap2);
> +       return 0;
> +}
> +
> +int pcie_get_speed_cap_mask(struct pci_dev *dev, u32 *mask)
> +{
> +       return pcibios_get_speed_cap_mask(dev, mask);
> +}
> +EXPORT_SYMBOL(pcie_get_speed_cap_mask);
> +
>  EXPORT_SYMBOL(pci_reenable_device);
>  EXPORT_SYMBOL(pci_enable_device_io);
>  EXPORT_SYMBOL(pci_enable_device_mem);
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 2461033a..24a2f63 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1861,4 +1861,10 @@ static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev)
>   */
>  struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev);
>
> +#define PCIE_SPEED_25 1
> +#define PCIE_SPEED_50 2
> +#define PCIE_SPEED_80 4
> +
> +extern int pcie_get_speed_cap_mask(struct pci_dev *dev, u32 *speed_mask);
> +
>  #endif /* LINUX_PCI_H */
> --
> 1.7.4.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt - March 27, 2013, 3:25 p.m.
On Tue, 2013-03-26 at 12:39 -0600, Bjorn Helgaas wrote:
> But we also know pdev is a PCIe device, and I think a PCIe device on a
> root bus must be a "Root Complex Integrated Endpoint" (PCIe spec sec
> 1.3.2.3).  Such a device does not have a link at all, so there's no
> point in fiddling with its link speed.

This is where our IBM hypervisor makes things murky. It doesn't expose
the PCIe parents (basically somewhat makes PCIe look like PCI except we
still have the PCIe caps on the child devices, just no access to the
parent device).

It's garbage but can't be fixed (would break AIX :-)

However we might be able to populate the bus->max_bus_speed from some
architecture specific quirk and have radeon use that.

Cheers,
Ben.
Bjorn Helgaas - March 27, 2013, 4:58 p.m.
On Wed, Mar 27, 2013 at 9:25 AM, Benjamin Herrenschmidt
<benh@kernel.crashing.org> wrote:
> On Tue, 2013-03-26 at 12:39 -0600, Bjorn Helgaas wrote:
>> But we also know pdev is a PCIe device, and I think a PCIe device on a
>> root bus must be a "Root Complex Integrated Endpoint" (PCIe spec sec
>> 1.3.2.3).  Such a device does not have a link at all, so there's no
>> point in fiddling with its link speed.
>
> This is where our IBM hypervisor makes things murky. It doesn't expose
> the PCIe parents (basically somewhat makes PCIe look like PCI except we
> still have the PCIe caps on the child devices, just no access to the
> parent device).

Interesting.  I wonder if we'll trip over this anywhere else, e.g.,
ASPM or other link-related things.  I guess we'll just have to see if
anything else breaks.

> It's garbage but can't be fixed (would break AIX :-)
>
> However we might be able to populate the bus->max_bus_speed from some
> architecture specific quirk and have radeon use that.

That sounds like a good solution to me.  It seems like it's really an
arch-specific deviation from the spec, so it'd be nice to have an
arch-specific solution for it.

Bjorn

Patch

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b099e00..d94ab79 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -3931,6 +3931,50 @@  static int __init pci_setup(char *str)
 }
 early_param("pci", pci_setup);
 
+int __weak pcibios_get_speed_cap_mask(struct pci_dev *dev, u32 *mask)
+{
+	struct pci_dev *root;
+	u32 lnkcap, lnkcap2;
+
+	*mask = 0;
+	if (!dev)
+		return -EINVAL;
+
+	root = dev->bus->self;
+
+	/* we've been informed via and serverworks don't make the cut */
+	if (root->vendor == PCI_VENDOR_ID_VIA ||
+	    root->vendor == PCI_VENDOR_ID_SERVERWORKS)
+		return -EINVAL;
+
+	pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
+	pcie_capability_read_dword(root, PCI_EXP_LNKCAP2, &lnkcap2);
+
+	if (lnkcap2) {	/* PCIe r3.0-compliant */
+		if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_2_5GB)
+			*mask |= PCIE_SPEED_25;
+		if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_5_0GB)
+			*mask |= PCIE_SPEED_50;
+		if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB)
+			*mask |= PCIE_SPEED_80;
+	} else {	/* pre-r3.0 */
+		if (lnkcap & PCI_EXP_LNKCAP_SLS_2_5GB)
+			*mask |= PCIE_SPEED_25;
+		if (lnkcap & PCI_EXP_LNKCAP_SLS_5_0GB)
+			*mask |= (PCIE_SPEED_25 | PCIE_SPEED_50);
+	}
+
+	dev_info(&dev->dev, "probing gen 2 caps for device %x:%x = %x/%x\n",
+		root->vendor, root->device, lnkcap, lnkcap2);
+	return 0;
+}
+
+int pcie_get_speed_cap_mask(struct pci_dev *dev, u32 *mask)
+{
+	return pcibios_get_speed_cap_mask(dev, mask);
+}
+EXPORT_SYMBOL(pcie_get_speed_cap_mask);
+
 EXPORT_SYMBOL(pci_reenable_device);
 EXPORT_SYMBOL(pci_enable_device_io);
 EXPORT_SYMBOL(pci_enable_device_mem);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 2461033a..24a2f63 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1861,4 +1861,10 @@  static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev)
  */
 struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev);
 
+#define PCIE_SPEED_25 1
+#define PCIE_SPEED_50 2
+#define PCIE_SPEED_80 4
+
+extern int pcie_get_speed_cap_mask(struct pci_dev *dev, u32 *speed_mask);
+
 #endif /* LINUX_PCI_H */