diff mbox series

PCI: pciehp: Report degraded links via link bandwidth notification

Message ID 20181129000829.14751-1-mr.nuke.me@gmail.com
State Superseded
Delegated to: Bjorn Helgaas
Headers show
Series PCI: pciehp: Report degraded links via link bandwidth notification | expand

Commit Message

Alex G. Nov. 29, 2018, 12:08 a.m. UTC
A warning is generated when a PCIe device is probed with a degraded
link, but there was no similar mechanism to warn when the link becomes
degraded after probing. The Link Bandwidth Notification provides this
mechanism.

Use the link bandwidth notification interrupt to detect bandwidth
changes, and rescan the bandwidth, looking for the weakest point. This
is the same logic used in probe().

Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
---
 drivers/pci/hotplug/pciehp_hpc.c | 35 +++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

Comments

Mika Westerberg Nov. 29, 2018, 4:06 p.m. UTC | #1
Hi Alexandru,

On Wed, Nov 28, 2018 at 06:08:24PM -0600, Alexandru Gagniuc wrote:
> A warning is generated when a PCIe device is probed with a degraded
> link, but there was no similar mechanism to warn when the link becomes
> degraded after probing. The Link Bandwidth Notification provides this
> mechanism.
> 
> Use the link bandwidth notification interrupt to detect bandwidth
> changes, and rescan the bandwidth, looking for the weakest point. This
> is the same logic used in probe().
> 
> Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
> ---
>  drivers/pci/hotplug/pciehp_hpc.c | 35 +++++++++++++++++++++++++++++++-
>  1 file changed, 34 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
> index 7dd443aea5a5..834672000b59 100644
> --- a/drivers/pci/hotplug/pciehp_hpc.c
> +++ b/drivers/pci/hotplug/pciehp_hpc.c
> @@ -515,7 +515,8 @@ static irqreturn_t pciehp_isr(int irq, void *dev_id)
>  	struct controller *ctrl = (struct controller *)dev_id;
>  	struct pci_dev *pdev = ctrl_dev(ctrl);
>  	struct device *parent = pdev->dev.parent;
> -	u16 status, events;
> +	struct pci_dev *endpoint;
> +	u16 status, events, link_status;

Looks better if you write them in opposite order (reverse xmas-tree):

	u16 status, events, link_status;
	struct pci_dev *endpoint;

>  	/*
>  	 * Interrupts only occur in D3hot or shallower and only if enabled
> @@ -525,6 +526,17 @@ static irqreturn_t pciehp_isr(int irq, void *dev_id)
>  	    (!(ctrl->slot_ctrl & PCI_EXP_SLTCTL_HPIE) && !pciehp_poll_mode))
>  		return IRQ_NONE;
>  
> +	pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &link_status);
> +

Unnecessary empty line.

> +	if (link_status & PCI_EXP_LNKSTA_LBMS) {
> +		if (pdev->subordinate && pdev->subordinate->self)
> +			endpoint = pdev->subordinate->self;

Hmm, I thought pdev->subordinate->self == pdev, no?

> +		else
> +			endpoint = pdev;
> +		__pcie_print_link_status(endpoint, false);
> +		pcie_capability_write_word(pdev, PCI_EXP_LNKSTA, link_status);
> +	}
> +
>  	/*
>  	 * Keep the port accessible by holding a runtime PM ref on its parent.
>  	 * Defer resume of the parent to the IRQ thread if it's suspended.
> @@ -677,6 +689,24 @@ static int pciehp_poll(void *data)
>  	return 0;
>  }
>  
> +static bool pcie_link_bandwidth_notification_supported(struct controller *ctrl)
> +{
> +	int ret;
> +	u32 cap;
> +
> +	ret = pcie_capability_read_dword(ctrl_dev(ctrl), PCI_EXP_LNKCAP, &cap);
> +	return (ret == PCIBIOS_SUCCESSFUL) && (cap & PCI_EXP_LNKCAP_LBNC);
> +}
> +
> +static void pcie_enable_link_bandwidth_notification(struct controller *ctrl)
> +{
> +	u16 lnk_ctl;
> +
> +	pcie_capability_read_word(ctrl_dev(ctrl), PCI_EXP_LNKCTL, &lnk_ctl);
> +	lnk_ctl |= PCI_EXP_LNKCTL_LBMIE;
> +	pcie_capability_write_word(ctrl_dev(ctrl), PCI_EXP_LNKCTL, lnk_ctl);
> +}
> +
>  static void pcie_enable_notification(struct controller *ctrl)
>  {
>  	u16 cmd, mask;
> @@ -713,6 +743,9 @@ static void pcie_enable_notification(struct controller *ctrl)
>  	pcie_write_cmd_nowait(ctrl, cmd, mask);
>  	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
>  		 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, cmd);
> +
> +	if (pcie_link_bandwidth_notification_supported(ctrl))
> +		pcie_enable_link_bandwidth_notification(ctrl);

Do we ever need to disable it?

>  }
>  
>  static void pcie_disable_notification(struct controller *ctrl)
> -- 
> 2.17.1
Bjorn Helgaas Nov. 29, 2018, 5:35 p.m. UTC | #2
On Wed, Nov 28, 2018 at 06:08:24PM -0600, Alexandru Gagniuc wrote:
> A warning is generated when a PCIe device is probed with a degraded
> link, but there was no similar mechanism to warn when the link becomes
> degraded after probing. The Link Bandwidth Notification provides this
> mechanism.
> 
> Use the link bandwidth notification interrupt to detect bandwidth
> changes, and rescan the bandwidth, looking for the weakest point. This
> is the same logic used in probe().

I like the concept of this.  What I don't like is the fact that it's
tied to pciehp, since I don't think the concept of Link Bandwidth
Notification is related to hotplug.  So I think we'll only notice this
for ports that support hotplug.  Maybe it's worth doing it this way
anyway, even if it could be generalized in the future?

> Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
> ---
>  drivers/pci/hotplug/pciehp_hpc.c | 35 +++++++++++++++++++++++++++++++-
>  1 file changed, 34 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
> index 7dd443aea5a5..834672000b59 100644
> --- a/drivers/pci/hotplug/pciehp_hpc.c
> +++ b/drivers/pci/hotplug/pciehp_hpc.c
> @@ -515,7 +515,8 @@ static irqreturn_t pciehp_isr(int irq, void *dev_id)
>  	struct controller *ctrl = (struct controller *)dev_id;
>  	struct pci_dev *pdev = ctrl_dev(ctrl);
>  	struct device *parent = pdev->dev.parent;
> -	u16 status, events;
> +	struct pci_dev *endpoint;
> +	u16 status, events, link_status;
>  
>  	/*
>  	 * Interrupts only occur in D3hot or shallower and only if enabled
> @@ -525,6 +526,17 @@ static irqreturn_t pciehp_isr(int irq, void *dev_id)
>  	    (!(ctrl->slot_ctrl & PCI_EXP_SLTCTL_HPIE) && !pciehp_poll_mode))
>  		return IRQ_NONE;
>  
> +	pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &link_status);
> +
> +	if (link_status & PCI_EXP_LNKSTA_LBMS) {
> +		if (pdev->subordinate && pdev->subordinate->self)
> +			endpoint = pdev->subordinate->self;
> +		else
> +			endpoint = pdev;
> +		__pcie_print_link_status(endpoint, false);
> +		pcie_capability_write_word(pdev, PCI_EXP_LNKSTA, link_status);
> +	}
> +
>  	/*
>  	 * Keep the port accessible by holding a runtime PM ref on its parent.
>  	 * Defer resume of the parent to the IRQ thread if it's suspended.
> @@ -677,6 +689,24 @@ static int pciehp_poll(void *data)
>  	return 0;
>  }
>  
> +static bool pcie_link_bandwidth_notification_supported(struct controller *ctrl)
> +{
> +	int ret;
> +	u32 cap;
> +
> +	ret = pcie_capability_read_dword(ctrl_dev(ctrl), PCI_EXP_LNKCAP, &cap);
> +	return (ret == PCIBIOS_SUCCESSFUL) && (cap & PCI_EXP_LNKCAP_LBNC);
> +}
> +
> +static void pcie_enable_link_bandwidth_notification(struct controller *ctrl)
> +{
> +	u16 lnk_ctl;
> +
> +	pcie_capability_read_word(ctrl_dev(ctrl), PCI_EXP_LNKCTL, &lnk_ctl);
> +	lnk_ctl |= PCI_EXP_LNKCTL_LBMIE;
> +	pcie_capability_write_word(ctrl_dev(ctrl), PCI_EXP_LNKCTL, lnk_ctl);
> +}
> +
>  static void pcie_enable_notification(struct controller *ctrl)
>  {
>  	u16 cmd, mask;
> @@ -713,6 +743,9 @@ static void pcie_enable_notification(struct controller *ctrl)
>  	pcie_write_cmd_nowait(ctrl, cmd, mask);
>  	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
>  		 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, cmd);
> +
> +	if (pcie_link_bandwidth_notification_supported(ctrl))
> +		pcie_enable_link_bandwidth_notification(ctrl);
>  }
>  
>  static void pcie_disable_notification(struct controller *ctrl)
> -- 
> 2.17.1
>
Alex_Gagniuc@Dellteam.com Nov. 29, 2018, 6:57 p.m. UTC | #3
On 11/29/2018 11:36 AM, Bjorn Helgaas wrote:
> On Wed, Nov 28, 2018 at 06:08:24PM -0600, Alexandru Gagniuc wrote:
>> A warning is generated when a PCIe device is probed with a degraded
>> link, but there was no similar mechanism to warn when the link becomes
>> degraded after probing. The Link Bandwidth Notification provides this
>> mechanism.
>>
>> Use the link bandwidth notification interrupt to detect bandwidth
>> changes, and rescan the bandwidth, looking for the weakest point. This
>> is the same logic used in probe().
> 
> I like the concept of this.  What I don't like is the fact that it's
> tied to pciehp, since I don't think the concept of Link Bandwidth
> Notification is related to hotplug.  So I think we'll only notice this
> for ports that support hotplug.  Maybe it's worth doing it this way
> anyway, even if it could be generalized in the future?

That makes sense. At first, I thought that BW notification was tied to 
hotplug, but our PCIe spec writer disagreed with that assertion. I'm 
just not sure where to handle the interrupt otherwise.

Alex
Alex_Gagniuc@Dellteam.com Nov. 29, 2018, 7 p.m. UTC | #4
On 11/29/2018 10:06 AM, Mika Westerberg wrote:
>> @@ -515,7 +515,8 @@ static irqreturn_t pciehp_isr(int irq, void *dev_id)
>>   	struct controller *ctrl = (struct controller *)dev_id;
>>   	struct pci_dev *pdev = ctrl_dev(ctrl);
>>   	struct device *parent = pdev->dev.parent;
>> -	u16 status, events;
>> +	struct pci_dev *endpoint;
>> +	u16 status, events, link_status;
> 
> Looks better if you write them in opposite order (reverse xmas-tree):
> 
> 	u16 status, events, link_status;
> 	struct pci_dev *endpoint;
> 

I don't decorate in November :p

>> +	pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &link_status);
>> +
> 
> Unnecessary empty line.

However Bjorn wants it, though I don't like the crowded look with this 
line removed.

>> +	if (link_status & PCI_EXP_LNKSTA_LBMS) {
>> +		if (pdev->subordinate && pdev->subordinate->self)
>> +			endpoint = pdev->subordinate->self;
> 
> Hmm, I thought pdev->subordinate->self == pdev, no?

That makes no sense, but I think you're right. I'm trying to get to the 
other end of the PCIe link. Is there a simple way to do that? (other 
than convoluted logic that all leads to the same mistake)

>>   static void pcie_enable_notification(struct controller *ctrl)
>>   {
>>   	u16 cmd, mask;
>> @@ -713,6 +743,9 @@ static void pcie_enable_notification(struct controller *ctrl)
>>   	pcie_write_cmd_nowait(ctrl, cmd, mask);
>>   	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
>>   		 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, cmd);
>> +
>> +	if (pcie_link_bandwidth_notification_supported(ctrl))
>> +		pcie_enable_link_bandwidth_notification(ctrl);
> 
> Do we ever need to disable it?

I can't think of a case where that would be needed.

Alex
Lukas Wunner Nov. 29, 2018, 7:13 p.m. UTC | #5
On Thu, Nov 29, 2018 at 06:57:37PM +0000, Alex_Gagniuc@Dellteam.com wrote:
> On 11/29/2018 11:36 AM, Bjorn Helgaas wrote:
> > On Wed, Nov 28, 2018 at 06:08:24PM -0600, Alexandru Gagniuc wrote:
> >> A warning is generated when a PCIe device is probed with a degraded
> >> link, but there was no similar mechanism to warn when the link becomes
> >> degraded after probing. The Link Bandwidth Notification provides this
> >> mechanism.
> >>
> >> Use the link bandwidth notification interrupt to detect bandwidth
> >> changes, and rescan the bandwidth, looking for the weakest point. This
> >> is the same logic used in probe().
> > 
> > I like the concept of this.  What I don't like is the fact that it's
> > tied to pciehp, since I don't think the concept of Link Bandwidth
> > Notification is related to hotplug.  So I think we'll only notice this
> > for ports that support hotplug.  Maybe it's worth doing it this way
> > anyway, even if it could be generalized in the future?
> 
> That makes sense. At first, I thought that BW notification was tied to 
> hotplug, but our PCIe spec writer disagreed with that assertion. I'm 
> just not sure where to handle the interrupt otherwise.

I guess the interrupt is shared with hotplug and PME?  In that case write
a separate pcie_port_service_driver and request the interrupt with
IRQF_SHARED.  Define a new service type in drivers/pci/pcie/portdrv.h.
Amend get_port_device_capability() to check for PCI_EXP_LNKCAP_LBNC.

Thanks,

Lukas
Mika Westerberg Nov. 29, 2018, 7:30 p.m. UTC | #6
On Thu, Nov 29, 2018 at 07:00:58PM +0000, Alex_Gagniuc@Dellteam.com wrote:
> >> +	if (link_status & PCI_EXP_LNKSTA_LBMS) {
> >> +		if (pdev->subordinate && pdev->subordinate->self)
> >> +			endpoint = pdev->subordinate->self;
> > 
> > Hmm, I thought pdev->subordinate->self == pdev, no?
> 
> That makes no sense, but I think you're right. I'm trying to get to the 
> other end of the PCIe link. Is there a simple way to do that? (other 
> than convoluted logic that all leads to the same mistake)

AFAIK you should be able to find the other end by looking at the
pdev->subordinate->devices list. Not sure if there is a simpler way,
though.
Bjorn Helgaas Nov. 29, 2018, 11:04 p.m. UTC | #7
On Thu, Nov 29, 2018 at 08:13:12PM +0100, Lukas Wunner wrote:
> On Thu, Nov 29, 2018 at 06:57:37PM +0000, Alex_Gagniuc@Dellteam.com wrote:
> > On 11/29/2018 11:36 AM, Bjorn Helgaas wrote:
> > > On Wed, Nov 28, 2018 at 06:08:24PM -0600, Alexandru Gagniuc wrote:
> > >> A warning is generated when a PCIe device is probed with a degraded
> > >> link, but there was no similar mechanism to warn when the link becomes
> > >> degraded after probing. The Link Bandwidth Notification provides this
> > >> mechanism.
> > >>
> > >> Use the link bandwidth notification interrupt to detect bandwidth
> > >> changes, and rescan the bandwidth, looking for the weakest point. This
> > >> is the same logic used in probe().
> > > 
> > > I like the concept of this.  What I don't like is the fact that it's
> > > tied to pciehp, since I don't think the concept of Link Bandwidth
> > > Notification is related to hotplug.  So I think we'll only notice this
> > > for ports that support hotplug.  Maybe it's worth doing it this way
> > > anyway, even if it could be generalized in the future?
> > 
> > That makes sense. At first, I thought that BW notification was tied to 
> > hotplug, but our PCIe spec writer disagreed with that assertion. I'm 
> > just not sure where to handle the interrupt otherwise.
> 
> I guess the interrupt is shared with hotplug and PME?  In that case write
> a separate pcie_port_service_driver and request the interrupt with
> IRQF_SHARED.  Define a new service type in drivers/pci/pcie/portdrv.h.
> Amend get_port_device_capability() to check for PCI_EXP_LNKCAP_LBNC.

I really don't like the port driver design.  I'd rather integrate
those services more tightly into the PCI core.  But realistically
that's wishful thinking and may never happen, so this might be the
most expedient approach.

Bjorn
Alex_Gagniuc@Dellteam.com Nov. 29, 2018, 11:24 p.m. UTC | #8
On 11/29/2018 5:05 PM, Bjorn Helgaas wrote:
> On Thu, Nov 29, 2018 at 08:13:12PM +0100, Lukas Wunner wrote:
>> I guess the interrupt is shared with hotplug and PME?  In that case write
>> a separate pcie_port_service_driver and request the interrupt with
>> IRQF_SHARED.  Define a new service type in drivers/pci/pcie/portdrv.h.
>> Amend get_port_device_capability() to check for PCI_EXP_LNKCAP_LBNC.
> 
> I really don't like the port driver design.  I'd rather integrate
> those services more tightly into the PCI core.  But realistically
> that's wishful thinking and may never happen, so this might be the
> most expedient approach.

So, how would it get integrated? I don't like the port service driver 
either. It's too dicky on how it creates some new devices that other 
drives bind to. If we could have a 1:1 mapping between service drivers 
and PCI capabilities, then it might make better sense.

So, do I go the new service driver route?

Alex
diff mbox series

Patch

diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 7dd443aea5a5..834672000b59 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -515,7 +515,8 @@  static irqreturn_t pciehp_isr(int irq, void *dev_id)
 	struct controller *ctrl = (struct controller *)dev_id;
 	struct pci_dev *pdev = ctrl_dev(ctrl);
 	struct device *parent = pdev->dev.parent;
-	u16 status, events;
+	struct pci_dev *endpoint;
+	u16 status, events, link_status;
 
 	/*
 	 * Interrupts only occur in D3hot or shallower and only if enabled
@@ -525,6 +526,17 @@  static irqreturn_t pciehp_isr(int irq, void *dev_id)
 	    (!(ctrl->slot_ctrl & PCI_EXP_SLTCTL_HPIE) && !pciehp_poll_mode))
 		return IRQ_NONE;
 
+	pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &link_status);
+
+	if (link_status & PCI_EXP_LNKSTA_LBMS) {
+		if (pdev->subordinate && pdev->subordinate->self)
+			endpoint = pdev->subordinate->self;
+		else
+			endpoint = pdev;
+		__pcie_print_link_status(endpoint, false);
+		pcie_capability_write_word(pdev, PCI_EXP_LNKSTA, link_status);
+	}
+
 	/*
 	 * Keep the port accessible by holding a runtime PM ref on its parent.
 	 * Defer resume of the parent to the IRQ thread if it's suspended.
@@ -677,6 +689,24 @@  static int pciehp_poll(void *data)
 	return 0;
 }
 
+static bool pcie_link_bandwidth_notification_supported(struct controller *ctrl)
+{
+	int ret;
+	u32 cap;
+
+	ret = pcie_capability_read_dword(ctrl_dev(ctrl), PCI_EXP_LNKCAP, &cap);
+	return (ret == PCIBIOS_SUCCESSFUL) && (cap & PCI_EXP_LNKCAP_LBNC);
+}
+
+static void pcie_enable_link_bandwidth_notification(struct controller *ctrl)
+{
+	u16 lnk_ctl;
+
+	pcie_capability_read_word(ctrl_dev(ctrl), PCI_EXP_LNKCTL, &lnk_ctl);
+	lnk_ctl |= PCI_EXP_LNKCTL_LBMIE;
+	pcie_capability_write_word(ctrl_dev(ctrl), PCI_EXP_LNKCTL, lnk_ctl);
+}
+
 static void pcie_enable_notification(struct controller *ctrl)
 {
 	u16 cmd, mask;
@@ -713,6 +743,9 @@  static void pcie_enable_notification(struct controller *ctrl)
 	pcie_write_cmd_nowait(ctrl, cmd, mask);
 	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
 		 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, cmd);
+
+	if (pcie_link_bandwidth_notification_supported(ctrl))
+		pcie_enable_link_bandwidth_notification(ctrl);
 }
 
 static void pcie_disable_notification(struct controller *ctrl)