diff mbox series

[wireless-drivers,1/2] mt76: mt76x2e: disable pcie_aspm by default

Message ID fec60f066bab1936d58b2e69bae3f20e645d1304.1571868221.git.lorenzo@kernel.org
State Awaiting Upstream
Delegated to: David Miller
Headers show
Series fix mt76x2e hangs on U7612E mini-pcie | expand

Commit Message

Lorenzo Bianconi Oct. 23, 2019, 10:23 p.m. UTC
On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
instability and so let's disable PCIE_ASPM by default. This patch has
been successfully tested on U7612E-H1 mini-pice card

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 drivers/net/wireless/mediatek/mt76/mmio.c     | 47 +++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt76.h     |  1 +
 .../net/wireless/mediatek/mt76/mt76x2/pci.c   |  2 +
 3 files changed, 50 insertions(+)

Comments

Kalle Valo Oct. 24, 2019, 5:03 a.m. UTC | #1
Lorenzo Bianconi <lorenzo@kernel.org> writes:

> On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
> instability and so let's disable PCIE_ASPM by default. This patch has
> been successfully tested on U7612E-H1 mini-pice card
>
> Signed-off-by: Felix Fietkau <nbd@nbd.name>
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>

[...]

> +void mt76_mmio_disable_aspm(struct pci_dev *pdev)
> +{
> +	struct pci_dev *parent = pdev->bus->self;
> +	u16 aspm_conf, parent_aspm_conf = 0;
> +
> +	pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf);
> +	aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
> +	if (parent) {
> +		pcie_capability_read_word(parent, PCI_EXP_LNKCTL,
> +					  &parent_aspm_conf);
> +		parent_aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
> +	}
> +
> +	if (!aspm_conf && (!parent || !parent_aspm_conf)) {
> +		/* aspm already disabled */
> +		return;
> +	}
> +
> +	dev_info(&pdev->dev, "disabling ASPM %s %s\n",
> +		 (aspm_conf & PCI_EXP_LNKCTL_ASPM_L0S) ? "L0s" : "",
> +		 (aspm_conf & PCI_EXP_LNKCTL_ASPM_L1) ? "L1" : "");
> +
> +#ifdef CONFIG_PCIEASPM
> +	pci_disable_link_state(pdev, aspm_conf);
> +
> +	/* Double-check ASPM control.  If not disabled by the above, the
> +	 * BIOS is preventing that from happening (or CONFIG_PCIEASPM is
> +	 * not enabled); override by writing PCI config space directly.
> +	 */
> +	pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf);
> +	if (!(aspm_conf & PCI_EXP_LNKCTL_ASPMC))
> +		return;
> +#endif /* CONFIG_PCIEASPM */

A minor comment, but 'if IS_ENABLED(CONFIG_PCIEASPM)' is preferred over
#ifdef. Better compiler coverage and so on.
Lorenzo Bianconi Oct. 24, 2019, 8:08 a.m. UTC | #2
> Lorenzo Bianconi <lorenzo@kernel.org> writes:
> 
> > On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
> > instability and so let's disable PCIE_ASPM by default. This patch has
> > been successfully tested on U7612E-H1 mini-pice card
> >
> > Signed-off-by: Felix Fietkau <nbd@nbd.name>
> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> 
> [...]
> 
> > +void mt76_mmio_disable_aspm(struct pci_dev *pdev)
> > +{
> > +	struct pci_dev *parent = pdev->bus->self;
> > +	u16 aspm_conf, parent_aspm_conf = 0;
> > +
> > +	pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf);
> > +	aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
> > +	if (parent) {
> > +		pcie_capability_read_word(parent, PCI_EXP_LNKCTL,
> > +					  &parent_aspm_conf);
> > +		parent_aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
> > +	}
> > +
> > +	if (!aspm_conf && (!parent || !parent_aspm_conf)) {
> > +		/* aspm already disabled */
> > +		return;
> > +	}
> > +
> > +	dev_info(&pdev->dev, "disabling ASPM %s %s\n",
> > +		 (aspm_conf & PCI_EXP_LNKCTL_ASPM_L0S) ? "L0s" : "",
> > +		 (aspm_conf & PCI_EXP_LNKCTL_ASPM_L1) ? "L1" : "");
> > +
> > +#ifdef CONFIG_PCIEASPM
> > +	pci_disable_link_state(pdev, aspm_conf);
> > +
> > +	/* Double-check ASPM control.  If not disabled by the above, the
> > +	 * BIOS is preventing that from happening (or CONFIG_PCIEASPM is
> > +	 * not enabled); override by writing PCI config space directly.
> > +	 */
> > +	pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf);
> > +	if (!(aspm_conf & PCI_EXP_LNKCTL_ASPMC))
> > +		return;
> > +#endif /* CONFIG_PCIEASPM */
> 
> A minor comment, but 'if IS_ENABLED(CONFIG_PCIEASPM)' is preferred over
> #ifdef. Better compiler coverage and so on.

Hi Kalle,

ack, I will fix it in v2.

Regards,
Lorenzo

> 
> -- 
> https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches
Heiner Kallweit Oct. 24, 2019, 7:41 p.m. UTC | #3
On 24.10.2019 00:23, Lorenzo Bianconi wrote:
> On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
> instability and so let's disable PCIE_ASPM by default. This patch has
> been successfully tested on U7612E-H1 mini-pice card
> 
> Signed-off-by: Felix Fietkau <nbd@nbd.name>
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> ---
>  drivers/net/wireless/mediatek/mt76/mmio.c     | 47 +++++++++++++++++++
>  drivers/net/wireless/mediatek/mt76/mt76.h     |  1 +
>  .../net/wireless/mediatek/mt76/mt76x2/pci.c   |  2 +
>  3 files changed, 50 insertions(+)
> 
> diff --git a/drivers/net/wireless/mediatek/mt76/mmio.c b/drivers/net/wireless/mediatek/mt76/mmio.c
> index 1c974df1fe25..f991a8f1c42a 100644
> --- a/drivers/net/wireless/mediatek/mt76/mmio.c
> +++ b/drivers/net/wireless/mediatek/mt76/mmio.c
> @@ -3,6 +3,8 @@
>   * Copyright (C) 2016 Felix Fietkau <nbd@nbd.name>
>   */
>  
> +#include <linux/pci.h>
> +
>  #include "mt76.h"
>  #include "trace.h"
>  
> @@ -78,6 +80,51 @@ void mt76_set_irq_mask(struct mt76_dev *dev, u32 addr,
>  }
>  EXPORT_SYMBOL_GPL(mt76_set_irq_mask);
>  
> +void mt76_mmio_disable_aspm(struct pci_dev *pdev)
> +{
> +	struct pci_dev *parent = pdev->bus->self;
> +	u16 aspm_conf, parent_aspm_conf = 0;
> +
> +	pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf);
> +	aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
> +	if (parent) {
> +		pcie_capability_read_word(parent, PCI_EXP_LNKCTL,
> +					  &parent_aspm_conf);
> +		parent_aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
> +	}
> +
> +	if (!aspm_conf && (!parent || !parent_aspm_conf)) {
> +		/* aspm already disabled */
> +		return;
> +	}
> +
> +	dev_info(&pdev->dev, "disabling ASPM %s %s\n",
> +		 (aspm_conf & PCI_EXP_LNKCTL_ASPM_L0S) ? "L0s" : "",
> +		 (aspm_conf & PCI_EXP_LNKCTL_ASPM_L1) ? "L1" : "");
> +
> +#ifdef CONFIG_PCIEASPM
> +	pci_disable_link_state(pdev, aspm_conf);
> +
> +	/* Double-check ASPM control.  If not disabled by the above, the
> +	 * BIOS is preventing that from happening (or CONFIG_PCIEASPM is
> +	 * not enabled); override by writing PCI config space directly.
> +	 */
> +	pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf);
> +	if (!(aspm_conf & PCI_EXP_LNKCTL_ASPMC))
> +		return;
> +#endif /* CONFIG_PCIEASPM */
> +
> +	/* Both device and parent should have the same ASPM setting.
> +	 * Disable ASPM in downstream component first and then upstream.
> +	 */
> +	pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL, aspm_conf);
> +
> +	if (parent)
> +		pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
> +					   aspm_conf);

+ linux-pci mailing list

All this seems to be legacy code copied from e1000e.
Fiddling with the low-level PCI(e) registers should be left to the
PCI core. It shouldn't be needed here, a simple call to
pci_disable_link_state() should be sufficient. Note that this function
has a return value meanwhile that you can check instead of reading
back low-level registers.
If BIOS forbids that OS changes ASPM settings, then this should be
respected (like PCI core does). Instead the network chip may provide
the option to configure whether it activates certain ASPM (sub-)states
or not. We went through a similar exercise with the r8169 driver,
you can check how it's done there.

> +}
> +EXPORT_SYMBOL_GPL(mt76_mmio_disable_aspm);
> +
>  void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs)
>  {
>  	static const struct mt76_bus_ops mt76_mmio_ops = {
> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> index 570c159515a0..962812b6247d 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> @@ -578,6 +578,7 @@ bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
>  #define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)
>  
>  void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
> +void mt76_mmio_disable_aspm(struct pci_dev *pdev);
>  
>  static inline u16 mt76_chip(struct mt76_dev *dev)
>  {
> diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> index 73c3104f8858..264bef87e5c7 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> @@ -81,6 +81,8 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	/* RG_SSUSB_CDR_BR_PE1D = 0x3 */
>  	mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
>  
> +	mt76_mmio_disable_aspm(pdev);
> +
>  	return 0;
>  
>  error:
>
Lorenzo Bianconi Oct. 24, 2019, 9:54 p.m. UTC | #4
> On 24.10.2019 00:23, Lorenzo Bianconi wrote:
> > On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
> > instability and so let's disable PCIE_ASPM by default. This patch has
> > been successfully tested on U7612E-H1 mini-pice card
> > 
> > Signed-off-by: Felix Fietkau <nbd@nbd.name>
> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > ---
> >  drivers/net/wireless/mediatek/mt76/mmio.c     | 47 +++++++++++++++++++
> >  drivers/net/wireless/mediatek/mt76/mt76.h     |  1 +
> >  .../net/wireless/mediatek/mt76/mt76x2/pci.c   |  2 +
> >  3 files changed, 50 insertions(+)
> > 

[...]

> > +
> > +	if (parent)
> > +		pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
> > +					   aspm_conf);
> 
> + linux-pci mailing list

Hi Heiner,

> 
> All this seems to be legacy code copied from e1000e.
> Fiddling with the low-level PCI(e) registers should be left to the
> PCI core. It shouldn't be needed here, a simple call to
> pci_disable_link_state() should be sufficient. Note that this function
> has a return value meanwhile that you can check instead of reading
> back low-level registers.

ack, I will add it to v2

> If BIOS forbids that OS changes ASPM settings, then this should be
> respected (like PCI core does). Instead the network chip may provide
> the option to configure whether it activates certain ASPM (sub-)states
> or not. We went through a similar exercise with the r8169 driver,
> you can check how it's done there.

looking at the vendor sdk (at least in the version I currently have) there are
no particular ASPM configurations, it just optionally disables it writing directly
in pci registers.
Moreover there are multiple drivers that are currently using this approach:
- ath9k in ath_pci_aspm_init()
- tg3 in tg3_chip_reset()
- e1000e in __e1000e_disable_aspm()
- r8169 in rtl_enable_clock_request()/rtl_disable_clock_request()

Is disabling the ASPM for the system the only option to make this minipcie
work?

Regards,
Lorenzo

> 
> > +}
> > +EXPORT_SYMBOL_GPL(mt76_mmio_disable_aspm);
> > +
> >  void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs)
> >  {
> >  	static const struct mt76_bus_ops mt76_mmio_ops = {
> > diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> > index 570c159515a0..962812b6247d 100644
> > --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> > +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> > @@ -578,6 +578,7 @@ bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
> >  #define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)
> >  
> >  void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
> > +void mt76_mmio_disable_aspm(struct pci_dev *pdev);
> >  
> >  static inline u16 mt76_chip(struct mt76_dev *dev)
> >  {
> > diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> > index 73c3104f8858..264bef87e5c7 100644
> > --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> > +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> > @@ -81,6 +81,8 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> >  	/* RG_SSUSB_CDR_BR_PE1D = 0x3 */
> >  	mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
> >  
> > +	mt76_mmio_disable_aspm(pdev);
> > +
> >  	return 0;
> >  
> >  error:
> > 
>
Heiner Kallweit Oct. 24, 2019, 10:19 p.m. UTC | #5
On 24.10.2019 23:54, Lorenzo Bianconi wrote:
>> On 24.10.2019 00:23, Lorenzo Bianconi wrote:
>>> On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
>>> instability and so let's disable PCIE_ASPM by default. This patch has
>>> been successfully tested on U7612E-H1 mini-pice card
>>>
>>> Signed-off-by: Felix Fietkau <nbd@nbd.name>
>>> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
>>> ---
>>>  drivers/net/wireless/mediatek/mt76/mmio.c     | 47 +++++++++++++++++++
>>>  drivers/net/wireless/mediatek/mt76/mt76.h     |  1 +
>>>  .../net/wireless/mediatek/mt76/mt76x2/pci.c   |  2 +
>>>  3 files changed, 50 insertions(+)
>>>
> 
> [...]
> 
>>> +
>>> +	if (parent)
>>> +		pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
>>> +					   aspm_conf);
>>
>> + linux-pci mailing list
> 
> Hi Heiner,
> 
>>
>> All this seems to be legacy code copied from e1000e.
>> Fiddling with the low-level PCI(e) registers should be left to the
>> PCI core. It shouldn't be needed here, a simple call to
>> pci_disable_link_state() should be sufficient. Note that this function
>> has a return value meanwhile that you can check instead of reading
>> back low-level registers.
> 
> ack, I will add it to v2
> 
>> If BIOS forbids that OS changes ASPM settings, then this should be
>> respected (like PCI core does). Instead the network chip may provide
>> the option to configure whether it activates certain ASPM (sub-)states
>> or not. We went through a similar exercise with the r8169 driver,
>> you can check how it's done there.
> 
> looking at the vendor sdk (at least in the version I currently have) there are
> no particular ASPM configurations, it just optionally disables it writing directly
> in pci registers.
> Moreover there are multiple drivers that are currently using this approach:
> - ath9k in ath_pci_aspm_init()
> - tg3 in tg3_chip_reset()
> - e1000e in __e1000e_disable_aspm()
> - r8169 in rtl_enable_clock_request()/rtl_disable_clock_request()
> 
All these drivers include quite some legacy code. I can mainly speak for r8169:
First versions of the driver are almost as old as Linux. And even though I
refactored most of the driver still some legacy code for older chip versions
(like the two functions you mentioned) is included.

> Is disabling the ASPM for the system the only option to make this minipcie
> work?
> 

No. What we do in r8169:

- call pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1)
- If it returns 0, then ASPM (including the L1 sub-states) is disabled.
- If it returns an errno, then disabling ASPM failed (most likely due to
  BIOS forbidding ASPM changes - pci_disable_link_state will spit out
  a related warning). In this case r8169 configures the chip to not initiate
  transitions to L0s/L1 (the other end of the link may still try to enter
  ASPM states). See rtl_hw_aspm_clkreq_enable(). That's sufficient
  to avoid the ASPM-related problems with certain versions of this chip.
  Maybe your HW provides similar functionality.

> Regards,
> Lorenzo
> 
Heiner

>>
>>> +}
>>> +EXPORT_SYMBOL_GPL(mt76_mmio_disable_aspm);
>>> +
>>>  void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs)
>>>  {
>>>  	static const struct mt76_bus_ops mt76_mmio_ops = {
>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
>>> index 570c159515a0..962812b6247d 100644
>>> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
>>> @@ -578,6 +578,7 @@ bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
>>>  #define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)
>>>  
>>>  void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
>>> +void mt76_mmio_disable_aspm(struct pci_dev *pdev);
>>>  
>>>  static inline u16 mt76_chip(struct mt76_dev *dev)
>>>  {
>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
>>> index 73c3104f8858..264bef87e5c7 100644
>>> --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
>>> @@ -81,6 +81,8 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>>  	/* RG_SSUSB_CDR_BR_PE1D = 0x3 */
>>>  	mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
>>>  
>>> +	mt76_mmio_disable_aspm(pdev);
>>> +
>>>  	return 0;
>>>  
>>>  error:
>>>
>>
Lorenzo Bianconi Oct. 24, 2019, 11:07 p.m. UTC | #6
> On 24.10.2019 23:54, Lorenzo Bianconi wrote:
> >> On 24.10.2019 00:23, Lorenzo Bianconi wrote:
> >>> On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
> >>> instability and so let's disable PCIE_ASPM by default. This patch has
> >>> been successfully tested on U7612E-H1 mini-pice card
> >>>
> >>> Signed-off-by: Felix Fietkau <nbd@nbd.name>
> >>> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> >>> ---
> >>>  drivers/net/wireless/mediatek/mt76/mmio.c     | 47 +++++++++++++++++++
> >>>  drivers/net/wireless/mediatek/mt76/mt76.h     |  1 +
> >>>  .../net/wireless/mediatek/mt76/mt76x2/pci.c   |  2 +
> >>>  3 files changed, 50 insertions(+)
> >>>
> > 
> > [...]
> > 
> >>> +
> >>> +	if (parent)
> >>> +		pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
> >>> +					   aspm_conf);
> >>
> >> + linux-pci mailing list
> > 
> > Hi Heiner,
> > 
> >>
> >> All this seems to be legacy code copied from e1000e.
> >> Fiddling with the low-level PCI(e) registers should be left to the
> >> PCI core. It shouldn't be needed here, a simple call to
> >> pci_disable_link_state() should be sufficient. Note that this function
> >> has a return value meanwhile that you can check instead of reading
> >> back low-level registers.
> > 
> > ack, I will add it to v2
> > 
> >> If BIOS forbids that OS changes ASPM settings, then this should be
> >> respected (like PCI core does). Instead the network chip may provide
> >> the option to configure whether it activates certain ASPM (sub-)states
> >> or not. We went through a similar exercise with the r8169 driver,
> >> you can check how it's done there.
> > 
> > looking at the vendor sdk (at least in the version I currently have) there are
> > no particular ASPM configurations, it just optionally disables it writing directly
> > in pci registers.
> > Moreover there are multiple drivers that are currently using this approach:
> > - ath9k in ath_pci_aspm_init()
> > - tg3 in tg3_chip_reset()
> > - e1000e in __e1000e_disable_aspm()
> > - r8169 in rtl_enable_clock_request()/rtl_disable_clock_request()
> > 
> All these drivers include quite some legacy code. I can mainly speak for r8169:
> First versions of the driver are almost as old as Linux. And even though I
> refactored most of the driver still some legacy code for older chip versions
> (like the two functions you mentioned) is included.
> 
> > Is disabling the ASPM for the system the only option to make this minipcie
> > work?
> > 
> 
> No. What we do in r8169:
> 
> - call pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1)
> - If it returns 0, then ASPM (including the L1 sub-states) is disabled.
> - If it returns an errno, then disabling ASPM failed (most likely due to
>   BIOS forbidding ASPM changes - pci_disable_link_state will spit out
>   a related warning). In this case r8169 configures the chip to not initiate
>   transitions to L0s/L1 (the other end of the link may still try to enter
>   ASPM states). See rtl_hw_aspm_clkreq_enable(). That's sufficient
>   to avoid the ASPM-related problems with certain versions of this chip.
>   Maybe your HW provides similar functionality.

yep, I looked at rtl_hw_aspm_clkreq_enable. This is more or less what I did but
unfortunately there is no specific code or documentation I can use for mt76x2e.
So as last chance I decided to disable ASPM directly (in this way the chip is
working fine).
Do you think a kernel parameter to disable ASPM directly would be acceptable?

Regards,
Lorenzo

> 
> > Regards,
> > Lorenzo
> > 
> Heiner
> 
> >>
> >>> +}
> >>> +EXPORT_SYMBOL_GPL(mt76_mmio_disable_aspm);
> >>> +
> >>>  void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs)
> >>>  {
> >>>  	static const struct mt76_bus_ops mt76_mmio_ops = {
> >>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> >>> index 570c159515a0..962812b6247d 100644
> >>> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> >>> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> >>> @@ -578,6 +578,7 @@ bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
> >>>  #define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)
> >>>  
> >>>  void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
> >>> +void mt76_mmio_disable_aspm(struct pci_dev *pdev);
> >>>  
> >>>  static inline u16 mt76_chip(struct mt76_dev *dev)
> >>>  {
> >>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> >>> index 73c3104f8858..264bef87e5c7 100644
> >>> --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> >>> +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> >>> @@ -81,6 +81,8 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> >>>  	/* RG_SSUSB_CDR_BR_PE1D = 0x3 */
> >>>  	mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
> >>>  
> >>> +	mt76_mmio_disable_aspm(pdev);
> >>> +
> >>>  	return 0;
> >>>  
> >>>  error:
> >>>
> >>
>
Heiner Kallweit Oct. 25, 2019, 10:56 a.m. UTC | #7
On 25.10.2019 01:07, Lorenzo Bianconi wrote:
>> On 24.10.2019 23:54, Lorenzo Bianconi wrote:
>>>> On 24.10.2019 00:23, Lorenzo Bianconi wrote:
>>>>> On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
>>>>> instability and so let's disable PCIE_ASPM by default. This patch has
>>>>> been successfully tested on U7612E-H1 mini-pice card
>>>>>
>>>>> Signed-off-by: Felix Fietkau <nbd@nbd.name>
>>>>> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
>>>>> ---
>>>>>  drivers/net/wireless/mediatek/mt76/mmio.c     | 47 +++++++++++++++++++
>>>>>  drivers/net/wireless/mediatek/mt76/mt76.h     |  1 +
>>>>>  .../net/wireless/mediatek/mt76/mt76x2/pci.c   |  2 +
>>>>>  3 files changed, 50 insertions(+)
>>>>>
>>>
>>> [...]
>>>
>>>>> +
>>>>> +	if (parent)
>>>>> +		pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
>>>>> +					   aspm_conf);
>>>>
>>>> + linux-pci mailing list
>>>
>>> Hi Heiner,
>>>
>>>>
>>>> All this seems to be legacy code copied from e1000e.
>>>> Fiddling with the low-level PCI(e) registers should be left to the
>>>> PCI core. It shouldn't be needed here, a simple call to
>>>> pci_disable_link_state() should be sufficient. Note that this function
>>>> has a return value meanwhile that you can check instead of reading
>>>> back low-level registers.
>>>
>>> ack, I will add it to v2
>>>
>>>> If BIOS forbids that OS changes ASPM settings, then this should be
>>>> respected (like PCI core does). Instead the network chip may provide
>>>> the option to configure whether it activates certain ASPM (sub-)states
>>>> or not. We went through a similar exercise with the r8169 driver,
>>>> you can check how it's done there.
>>>
>>> looking at the vendor sdk (at least in the version I currently have) there are
>>> no particular ASPM configurations, it just optionally disables it writing directly
>>> in pci registers.
>>> Moreover there are multiple drivers that are currently using this approach:
>>> - ath9k in ath_pci_aspm_init()
>>> - tg3 in tg3_chip_reset()
>>> - e1000e in __e1000e_disable_aspm()
>>> - r8169 in rtl_enable_clock_request()/rtl_disable_clock_request()
>>>
>> All these drivers include quite some legacy code. I can mainly speak for r8169:
>> First versions of the driver are almost as old as Linux. And even though I
>> refactored most of the driver still some legacy code for older chip versions
>> (like the two functions you mentioned) is included.
>>
>>> Is disabling the ASPM for the system the only option to make this minipcie
>>> work?
>>>
>>
>> No. What we do in r8169:
>>
>> - call pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1)
>> - If it returns 0, then ASPM (including the L1 sub-states) is disabled.
>> - If it returns an errno, then disabling ASPM failed (most likely due to
>>   BIOS forbidding ASPM changes - pci_disable_link_state will spit out
>>   a related warning). In this case r8169 configures the chip to not initiate
>>   transitions to L0s/L1 (the other end of the link may still try to enter
>>   ASPM states). See rtl_hw_aspm_clkreq_enable(). That's sufficient
>>   to avoid the ASPM-related problems with certain versions of this chip.
>>   Maybe your HW provides similar functionality.
> 
> yep, I looked at rtl_hw_aspm_clkreq_enable. This is more or less what I did but
> unfortunately there is no specific code or documentation I can use for mt76x2e.
> So as last chance I decided to disable ASPM directly (in this way the chip is
> working fine).
> Do you think a kernel parameter to disable ASPM directly would be acceptable?
> 
Module parameters are not the preferred approach, even though some maintainers
may consider it acceptable. I think it should be ok if you disable ASPM per
default. Who wants ASPM can enable the individual states via brand-new
sysfs attributes (provided BIOS allows OS to control ASPM).
However changing ASPM settings via direct register writes may cause
inconsistencies between PCI core and actual settings.
I'm not sure whether there's any general best practice how to deal with the
scenario that a device misbehaves with ASPM enabled and OS isn't allowed to
change ASPM settings. 
Maybe the PCI guys can advise on these points.

> Regards,
> Lorenzo
> 
Heiner

>>
>>> Regards,
>>> Lorenzo
>>>
>> Heiner
>>
>>>>
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(mt76_mmio_disable_aspm);
>>>>> +
>>>>>  void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs)
>>>>>  {
>>>>>  	static const struct mt76_bus_ops mt76_mmio_ops = {
>>>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
>>>>> index 570c159515a0..962812b6247d 100644
>>>>> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
>>>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
>>>>> @@ -578,6 +578,7 @@ bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
>>>>>  #define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)
>>>>>  
>>>>>  void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
>>>>> +void mt76_mmio_disable_aspm(struct pci_dev *pdev);
>>>>>  
>>>>>  static inline u16 mt76_chip(struct mt76_dev *dev)
>>>>>  {
>>>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
>>>>> index 73c3104f8858..264bef87e5c7 100644
>>>>> --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
>>>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
>>>>> @@ -81,6 +81,8 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>>>>  	/* RG_SSUSB_CDR_BR_PE1D = 0x3 */
>>>>>  	mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
>>>>>  
>>>>> +	mt76_mmio_disable_aspm(pdev);
>>>>> +
>>>>>  	return 0;
>>>>>  
>>>>>  error:
>>>>>
>>>>
>>
Lorenzo Bianconi Oct. 25, 2019, 11:46 a.m. UTC | #8
> On 25.10.2019 01:07, Lorenzo Bianconi wrote:
> >> On 24.10.2019 23:54, Lorenzo Bianconi wrote:
> >>>> On 24.10.2019 00:23, Lorenzo Bianconi wrote:
> >>>>> On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
> >>>>> instability and so let's disable PCIE_ASPM by default. This patch has
> >>>>> been successfully tested on U7612E-H1 mini-pice card
> >>>>>
> >>>>> Signed-off-by: Felix Fietkau <nbd@nbd.name>
> >>>>> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> >>>>> ---
> >>>>>  drivers/net/wireless/mediatek/mt76/mmio.c     | 47 +++++++++++++++++++
> >>>>>  drivers/net/wireless/mediatek/mt76/mt76.h     |  1 +
> >>>>>  .../net/wireless/mediatek/mt76/mt76x2/pci.c   |  2 +
> >>>>>  3 files changed, 50 insertions(+)
> >>>>>
> >>>
> >>> [...]
> >>>
> >>>>> +
> >>>>> +	if (parent)
> >>>>> +		pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
> >>>>> +					   aspm_conf);
> >>>>
> >>>> + linux-pci mailing list
> >>>
> >>> Hi Heiner,
> >>>
> >>>>
> >>>> All this seems to be legacy code copied from e1000e.
> >>>> Fiddling with the low-level PCI(e) registers should be left to the
> >>>> PCI core. It shouldn't be needed here, a simple call to
> >>>> pci_disable_link_state() should be sufficient. Note that this function
> >>>> has a return value meanwhile that you can check instead of reading
> >>>> back low-level registers.
> >>>
> >>> ack, I will add it to v2
> >>>
> >>>> If BIOS forbids that OS changes ASPM settings, then this should be
> >>>> respected (like PCI core does). Instead the network chip may provide
> >>>> the option to configure whether it activates certain ASPM (sub-)states
> >>>> or not. We went through a similar exercise with the r8169 driver,
> >>>> you can check how it's done there.
> >>>
> >>> looking at the vendor sdk (at least in the version I currently have) there are
> >>> no particular ASPM configurations, it just optionally disables it writing directly
> >>> in pci registers.
> >>> Moreover there are multiple drivers that are currently using this approach:
> >>> - ath9k in ath_pci_aspm_init()
> >>> - tg3 in tg3_chip_reset()
> >>> - e1000e in __e1000e_disable_aspm()
> >>> - r8169 in rtl_enable_clock_request()/rtl_disable_clock_request()
> >>>
> >> All these drivers include quite some legacy code. I can mainly speak for r8169:
> >> First versions of the driver are almost as old as Linux. And even though I
> >> refactored most of the driver still some legacy code for older chip versions
> >> (like the two functions you mentioned) is included.
> >>
> >>> Is disabling the ASPM for the system the only option to make this minipcie
> >>> work?
> >>>
> >>
> >> No. What we do in r8169:
> >>
> >> - call pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1)
> >> - If it returns 0, then ASPM (including the L1 sub-states) is disabled.
> >> - If it returns an errno, then disabling ASPM failed (most likely due to
> >>   BIOS forbidding ASPM changes - pci_disable_link_state will spit out
> >>   a related warning). In this case r8169 configures the chip to not initiate
> >>   transitions to L0s/L1 (the other end of the link may still try to enter
> >>   ASPM states). See rtl_hw_aspm_clkreq_enable(). That's sufficient
> >>   to avoid the ASPM-related problems with certain versions of this chip.
> >>   Maybe your HW provides similar functionality.
> > 
> > yep, I looked at rtl_hw_aspm_clkreq_enable. This is more or less what I did but
> > unfortunately there is no specific code or documentation I can use for mt76x2e.
> > So as last chance I decided to disable ASPM directly (in this way the chip is
> > working fine).
> > Do you think a kernel parameter to disable ASPM directly would be acceptable?
> > 
> Module parameters are not the preferred approach, even though some maintainers
> may consider it acceptable. I think it should be ok if you disable ASPM per
> default. Who wants ASPM can enable the individual states via brand-new
> sysfs attributes (provided BIOS allows OS to control ASPM).
> However changing ASPM settings via direct register writes may cause
> inconsistencies between PCI core and actual settings.
> I'm not sure whether there's any general best practice how to deal with the
> scenario that a device misbehaves with ASPM enabled and OS isn't allowed to
> change ASPM settings. 
> Maybe the PCI guys can advise on these points.

Hi Heiner,

I reviewed the mtk sdk and it seems mt7662/mt7612/mt7602 series does not
have hw pcie ps support (not sure if it just not implemented or so). In my
scenario without disabling ASPM the card does not work at all, so I guess we
can proceed with current approach and then try to understand if we can do
something better. What do you think?

@Ryder, Sean: do you have any hint on this topic?

Regards,
Lorenzo

> 
> > Regards,
> > Lorenzo
> > 
> Heiner
> 
> >>
> >>> Regards,
> >>> Lorenzo
> >>>
> >> Heiner
> >>
> >>>>
> >>>>> +}
> >>>>> +EXPORT_SYMBOL_GPL(mt76_mmio_disable_aspm);
> >>>>> +
> >>>>>  void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs)
> >>>>>  {
> >>>>>  	static const struct mt76_bus_ops mt76_mmio_ops = {
> >>>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> >>>>> index 570c159515a0..962812b6247d 100644
> >>>>> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> >>>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> >>>>> @@ -578,6 +578,7 @@ bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
> >>>>>  #define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)
> >>>>>  
> >>>>>  void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
> >>>>> +void mt76_mmio_disable_aspm(struct pci_dev *pdev);
> >>>>>  
> >>>>>  static inline u16 mt76_chip(struct mt76_dev *dev)
> >>>>>  {
> >>>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> >>>>> index 73c3104f8858..264bef87e5c7 100644
> >>>>> --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> >>>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> >>>>> @@ -81,6 +81,8 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> >>>>>  	/* RG_SSUSB_CDR_BR_PE1D = 0x3 */
> >>>>>  	mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
> >>>>>  
> >>>>> +	mt76_mmio_disable_aspm(pdev);
> >>>>> +
> >>>>>  	return 0;
> >>>>>  
> >>>>>  error:
> >>>>>
> >>>>
> >>
>
Heiner Kallweit Oct. 25, 2019, 5 p.m. UTC | #9
On 25.10.2019 13:46, Lorenzo Bianconi wrote:
>> On 25.10.2019 01:07, Lorenzo Bianconi wrote:
>>>> On 24.10.2019 23:54, Lorenzo Bianconi wrote:
>>>>>> On 24.10.2019 00:23, Lorenzo Bianconi wrote:
>>>>>>> On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
>>>>>>> instability and so let's disable PCIE_ASPM by default. This patch has
>>>>>>> been successfully tested on U7612E-H1 mini-pice card
>>>>>>>
>>>>>>> Signed-off-by: Felix Fietkau <nbd@nbd.name>
>>>>>>> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
>>>>>>> ---
>>>>>>>  drivers/net/wireless/mediatek/mt76/mmio.c     | 47 +++++++++++++++++++
>>>>>>>  drivers/net/wireless/mediatek/mt76/mt76.h     |  1 +
>>>>>>>  .../net/wireless/mediatek/mt76/mt76x2/pci.c   |  2 +
>>>>>>>  3 files changed, 50 insertions(+)
>>>>>>>
>>>>>
>>>>> [...]
>>>>>
>>>>>>> +
>>>>>>> +	if (parent)
>>>>>>> +		pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
>>>>>>> +					   aspm_conf);
>>>>>>
>>>>>> + linux-pci mailing list
>>>>>
>>>>> Hi Heiner,
>>>>>
>>>>>>
>>>>>> All this seems to be legacy code copied from e1000e.
>>>>>> Fiddling with the low-level PCI(e) registers should be left to the
>>>>>> PCI core. It shouldn't be needed here, a simple call to
>>>>>> pci_disable_link_state() should be sufficient. Note that this function
>>>>>> has a return value meanwhile that you can check instead of reading
>>>>>> back low-level registers.
>>>>>
>>>>> ack, I will add it to v2
>>>>>
>>>>>> If BIOS forbids that OS changes ASPM settings, then this should be
>>>>>> respected (like PCI core does). Instead the network chip may provide
>>>>>> the option to configure whether it activates certain ASPM (sub-)states
>>>>>> or not. We went through a similar exercise with the r8169 driver,
>>>>>> you can check how it's done there.
>>>>>
>>>>> looking at the vendor sdk (at least in the version I currently have) there are
>>>>> no particular ASPM configurations, it just optionally disables it writing directly
>>>>> in pci registers.
>>>>> Moreover there are multiple drivers that are currently using this approach:
>>>>> - ath9k in ath_pci_aspm_init()
>>>>> - tg3 in tg3_chip_reset()
>>>>> - e1000e in __e1000e_disable_aspm()
>>>>> - r8169 in rtl_enable_clock_request()/rtl_disable_clock_request()
>>>>>
>>>> All these drivers include quite some legacy code. I can mainly speak for r8169:
>>>> First versions of the driver are almost as old as Linux. And even though I
>>>> refactored most of the driver still some legacy code for older chip versions
>>>> (like the two functions you mentioned) is included.
>>>>
>>>>> Is disabling the ASPM for the system the only option to make this minipcie
>>>>> work?
>>>>>
>>>>
>>>> No. What we do in r8169:
>>>>
>>>> - call pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1)
>>>> - If it returns 0, then ASPM (including the L1 sub-states) is disabled.
>>>> - If it returns an errno, then disabling ASPM failed (most likely due to
>>>>   BIOS forbidding ASPM changes - pci_disable_link_state will spit out
>>>>   a related warning). In this case r8169 configures the chip to not initiate
>>>>   transitions to L0s/L1 (the other end of the link may still try to enter
>>>>   ASPM states). See rtl_hw_aspm_clkreq_enable(). That's sufficient
>>>>   to avoid the ASPM-related problems with certain versions of this chip.
>>>>   Maybe your HW provides similar functionality.
>>>
>>> yep, I looked at rtl_hw_aspm_clkreq_enable. This is more or less what I did but
>>> unfortunately there is no specific code or documentation I can use for mt76x2e.
>>> So as last chance I decided to disable ASPM directly (in this way the chip is
>>> working fine).
>>> Do you think a kernel parameter to disable ASPM directly would be acceptable?
>>>
>> Module parameters are not the preferred approach, even though some maintainers
>> may consider it acceptable. I think it should be ok if you disable ASPM per
>> default. Who wants ASPM can enable the individual states via brand-new
>> sysfs attributes (provided BIOS allows OS to control ASPM).
>> However changing ASPM settings via direct register writes may cause
>> inconsistencies between PCI core and actual settings.
>> I'm not sure whether there's any general best practice how to deal with the
>> scenario that a device misbehaves with ASPM enabled and OS isn't allowed to
>> change ASPM settings. 
>> Maybe the PCI guys can advise on these points.
> 
> Hi Heiner,
> 
> I reviewed the mtk sdk and it seems mt7662/mt7612/mt7602 series does not
> have hw pcie ps support (not sure if it just not implemented or so). In my
> scenario without disabling ASPM the card does not work at all, so I guess we
> can proceed with current approach and then try to understand if we can do
> something better. What do you think?
> 

If there's no proper way to deal with the issue, then you may have to go with
the current approach. Just what you could do is calling pci_disable_link_state()
first and do the direct register modification limbo only as a fallback if
function returns an errno or CONFIG_PCIEASPM isn't defined.

It may make sense to change the return value of the pci_disable_link_state
dummy if CONFIG_PCIEASPM isn't defined: from 0 to e.g. -EOPNOTSUPP
Then in the driver it wouldn't be needed to check CONFIG_PCIEASPM.

Interestingly in mt76x2/pci.c there's the following:

/* Fix up ASPM configuration */

	/* RG_SSUSB_G1_CDR_BIR_LTR = 0x9 */
	mt76_rmw_field(dev, 0x15a10, 0x1f << 16, 0x9);

	/* RG_SSUSB_G1_CDR_BIC_LTR = 0xf */
	mt76_rmw_field(dev, 0x15a0c, 0xf << 28, 0xf);

	/* RG_SSUSB_CDR_BR_PE1D = 0x3 */
	mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);

Would be helpful to know what this does and whether it may need to be adjusted.
At least in your case this fix attempt doesn't seem to be sufficient.


> @Ryder, Sean: do you have any hint on this topic?
> 
> Regards,
> Lorenzo
> 
>>
>>> Regards,
>>> Lorenzo
>>>
>> Heiner
>>
>>>>
>>>>> Regards,
>>>>> Lorenzo
>>>>>
>>>> Heiner
>>>>
>>>>>>
>>>>>>> +}
>>>>>>> +EXPORT_SYMBOL_GPL(mt76_mmio_disable_aspm);
>>>>>>> +
>>>>>>>  void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs)
>>>>>>>  {
>>>>>>>  	static const struct mt76_bus_ops mt76_mmio_ops = {
>>>>>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
>>>>>>> index 570c159515a0..962812b6247d 100644
>>>>>>> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
>>>>>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
>>>>>>> @@ -578,6 +578,7 @@ bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
>>>>>>>  #define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)
>>>>>>>  
>>>>>>>  void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
>>>>>>> +void mt76_mmio_disable_aspm(struct pci_dev *pdev);
>>>>>>>  
>>>>>>>  static inline u16 mt76_chip(struct mt76_dev *dev)
>>>>>>>  {
>>>>>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
>>>>>>> index 73c3104f8858..264bef87e5c7 100644
>>>>>>> --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
>>>>>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
>>>>>>> @@ -81,6 +81,8 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>>>>>>  	/* RG_SSUSB_CDR_BR_PE1D = 0x3 */
>>>>>>>  	mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
>>>>>>>  
>>>>>>> +	mt76_mmio_disable_aspm(pdev);
>>>>>>> +
>>>>>>>  	return 0;
>>>>>>>  
>>>>>>>  error:
>>>>>>>
>>>>>>
>>>>
>>
Lorenzo Bianconi Oct. 25, 2019, 5:35 p.m. UTC | #10
>
> On 25.10.2019 13:46, Lorenzo Bianconi wrote:
> >> On 25.10.2019 01:07, Lorenzo Bianconi wrote:
> >>>> On 24.10.2019 23:54, Lorenzo Bianconi wrote:
> >>>>>> On 24.10.2019 00:23, Lorenzo Bianconi wrote:
> >>>>>>> On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
> >>>>>>> instability and so let's disable PCIE_ASPM by default. This patch has
> >>>>>>> been successfully tested on U7612E-H1 mini-pice card
> >>>>>>>
> >>>>>>> Signed-off-by: Felix Fietkau <nbd@nbd.name>
> >>>>>>> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> >>>>>>> ---
> >>>>>>>  drivers/net/wireless/mediatek/mt76/mmio.c     | 47 +++++++++++++++++++
> >>>>>>>  drivers/net/wireless/mediatek/mt76/mt76.h     |  1 +
> >>>>>>>  .../net/wireless/mediatek/mt76/mt76x2/pci.c   |  2 +
> >>>>>>>  3 files changed, 50 insertions(+)
> >>>>>>>
> >>>>>
> >>>>> [...]
> >>>>>
> >>>>>>> +
> >>>>>>> +       if (parent)
> >>>>>>> +               pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
> >>>>>>> +                                          aspm_conf);
> >>>>>>
> >>>>>> + linux-pci mailing list
> >>>>>
> >>>>> Hi Heiner,
> >>>>>
> >>>>>>
> >>>>>> All this seems to be legacy code copied from e1000e.
> >>>>>> Fiddling with the low-level PCI(e) registers should be left to the
> >>>>>> PCI core. It shouldn't be needed here, a simple call to
> >>>>>> pci_disable_link_state() should be sufficient. Note that this function
> >>>>>> has a return value meanwhile that you can check instead of reading
> >>>>>> back low-level registers.
> >>>>>
> >>>>> ack, I will add it to v2
> >>>>>
> >>>>>> If BIOS forbids that OS changes ASPM settings, then this should be
> >>>>>> respected (like PCI core does). Instead the network chip may provide
> >>>>>> the option to configure whether it activates certain ASPM (sub-)states
> >>>>>> or not. We went through a similar exercise with the r8169 driver,
> >>>>>> you can check how it's done there.
> >>>>>
> >>>>> looking at the vendor sdk (at least in the version I currently have) there are
> >>>>> no particular ASPM configurations, it just optionally disables it writing directly
> >>>>> in pci registers.
> >>>>> Moreover there are multiple drivers that are currently using this approach:
> >>>>> - ath9k in ath_pci_aspm_init()
> >>>>> - tg3 in tg3_chip_reset()
> >>>>> - e1000e in __e1000e_disable_aspm()
> >>>>> - r8169 in rtl_enable_clock_request()/rtl_disable_clock_request()
> >>>>>
> >>>> All these drivers include quite some legacy code. I can mainly speak for r8169:
> >>>> First versions of the driver are almost as old as Linux. And even though I
> >>>> refactored most of the driver still some legacy code for older chip versions
> >>>> (like the two functions you mentioned) is included.
> >>>>
> >>>>> Is disabling the ASPM for the system the only option to make this minipcie
> >>>>> work?
> >>>>>
> >>>>
> >>>> No. What we do in r8169:
> >>>>
> >>>> - call pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1)
> >>>> - If it returns 0, then ASPM (including the L1 sub-states) is disabled.
> >>>> - If it returns an errno, then disabling ASPM failed (most likely due to
> >>>>   BIOS forbidding ASPM changes - pci_disable_link_state will spit out
> >>>>   a related warning). In this case r8169 configures the chip to not initiate
> >>>>   transitions to L0s/L1 (the other end of the link may still try to enter
> >>>>   ASPM states). See rtl_hw_aspm_clkreq_enable(). That's sufficient
> >>>>   to avoid the ASPM-related problems with certain versions of this chip.
> >>>>   Maybe your HW provides similar functionality.
> >>>
> >>> yep, I looked at rtl_hw_aspm_clkreq_enable. This is more or less what I did but
> >>> unfortunately there is no specific code or documentation I can use for mt76x2e.
> >>> So as last chance I decided to disable ASPM directly (in this way the chip is
> >>> working fine).
> >>> Do you think a kernel parameter to disable ASPM directly would be acceptable?
> >>>
> >> Module parameters are not the preferred approach, even though some maintainers
> >> may consider it acceptable. I think it should be ok if you disable ASPM per
> >> default. Who wants ASPM can enable the individual states via brand-new
> >> sysfs attributes (provided BIOS allows OS to control ASPM).
> >> However changing ASPM settings via direct register writes may cause
> >> inconsistencies between PCI core and actual settings.
> >> I'm not sure whether there's any general best practice how to deal with the
> >> scenario that a device misbehaves with ASPM enabled and OS isn't allowed to
> >> change ASPM settings.
> >> Maybe the PCI guys can advise on these points.
> >
> > Hi Heiner,
> >
> > I reviewed the mtk sdk and it seems mt7662/mt7612/mt7602 series does not
> > have hw pcie ps support (not sure if it just not implemented or so). In my
> > scenario without disabling ASPM the card does not work at all, so I guess we
> > can proceed with current approach and then try to understand if we can do
> > something better. What do you think?
> >
>
> If there's no proper way to deal with the issue, then you may have to go with
> the current approach. Just what you could do is calling pci_disable_link_state()
> first and do the direct register modification limbo only as a fallback if
> function returns an errno or CONFIG_PCIEASPM isn't defined.

Hi Heiner,

ack, will do in v2

>
> It may make sense to change the return value of the pci_disable_link_state
> dummy if CONFIG_PCIEASPM isn't defined: from 0 to e.g. -EOPNOTSUPP
> Then in the driver it wouldn't be needed to check CONFIG_PCIEASPM.
>
> Interestingly in mt76x2/pci.c there's the following:
>
> /* Fix up ASPM configuration */
>
>         /* RG_SSUSB_G1_CDR_BIR_LTR = 0x9 */
>         mt76_rmw_field(dev, 0x15a10, 0x1f << 16, 0x9);
>
>         /* RG_SSUSB_G1_CDR_BIC_LTR = 0xf */
>         mt76_rmw_field(dev, 0x15a0c, 0xf << 28, 0xf);
>
>         /* RG_SSUSB_CDR_BR_PE1D = 0x3 */
>         mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
>
> Would be helpful to know what this does and whether it may need to be adjusted.
> At least in your case this fix attempt doesn't seem to be sufficient.

unfortunately not.

Regards,
Lorenzo

>
>
> > @Ryder, Sean: do you have any hint on this topic?
> >
> > Regards,
> > Lorenzo
> >
> >>
> >>> Regards,
> >>> Lorenzo
> >>>
> >> Heiner
> >>
> >>>>
> >>>>> Regards,
> >>>>> Lorenzo
> >>>>>
> >>>> Heiner
> >>>>
> >>>>>>
> >>>>>>> +}
> >>>>>>> +EXPORT_SYMBOL_GPL(mt76_mmio_disable_aspm);
> >>>>>>> +
> >>>>>>>  void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs)
> >>>>>>>  {
> >>>>>>>         static const struct mt76_bus_ops mt76_mmio_ops = {
> >>>>>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> >>>>>>> index 570c159515a0..962812b6247d 100644
> >>>>>>> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> >>>>>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> >>>>>>> @@ -578,6 +578,7 @@ bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
> >>>>>>>  #define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)
> >>>>>>>
> >>>>>>>  void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
> >>>>>>> +void mt76_mmio_disable_aspm(struct pci_dev *pdev);
> >>>>>>>
> >>>>>>>  static inline u16 mt76_chip(struct mt76_dev *dev)
> >>>>>>>  {
> >>>>>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> >>>>>>> index 73c3104f8858..264bef87e5c7 100644
> >>>>>>> --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> >>>>>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> >>>>>>> @@ -81,6 +81,8 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> >>>>>>>         /* RG_SSUSB_CDR_BR_PE1D = 0x3 */
> >>>>>>>         mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
> >>>>>>>
> >>>>>>> +       mt76_mmio_disable_aspm(pdev);
> >>>>>>> +
> >>>>>>>         return 0;
> >>>>>>>
> >>>>>>>  error:
> >>>>>>>
> >>>>>>
> >>>>
> >>
>
diff mbox series

Patch

diff --git a/drivers/net/wireless/mediatek/mt76/mmio.c b/drivers/net/wireless/mediatek/mt76/mmio.c
index 1c974df1fe25..f991a8f1c42a 100644
--- a/drivers/net/wireless/mediatek/mt76/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mmio.c
@@ -3,6 +3,8 @@ 
  * Copyright (C) 2016 Felix Fietkau <nbd@nbd.name>
  */
 
+#include <linux/pci.h>
+
 #include "mt76.h"
 #include "trace.h"
 
@@ -78,6 +80,51 @@  void mt76_set_irq_mask(struct mt76_dev *dev, u32 addr,
 }
 EXPORT_SYMBOL_GPL(mt76_set_irq_mask);
 
+void mt76_mmio_disable_aspm(struct pci_dev *pdev)
+{
+	struct pci_dev *parent = pdev->bus->self;
+	u16 aspm_conf, parent_aspm_conf = 0;
+
+	pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf);
+	aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
+	if (parent) {
+		pcie_capability_read_word(parent, PCI_EXP_LNKCTL,
+					  &parent_aspm_conf);
+		parent_aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
+	}
+
+	if (!aspm_conf && (!parent || !parent_aspm_conf)) {
+		/* aspm already disabled */
+		return;
+	}
+
+	dev_info(&pdev->dev, "disabling ASPM %s %s\n",
+		 (aspm_conf & PCI_EXP_LNKCTL_ASPM_L0S) ? "L0s" : "",
+		 (aspm_conf & PCI_EXP_LNKCTL_ASPM_L1) ? "L1" : "");
+
+#ifdef CONFIG_PCIEASPM
+	pci_disable_link_state(pdev, aspm_conf);
+
+	/* Double-check ASPM control.  If not disabled by the above, the
+	 * BIOS is preventing that from happening (or CONFIG_PCIEASPM is
+	 * not enabled); override by writing PCI config space directly.
+	 */
+	pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf);
+	if (!(aspm_conf & PCI_EXP_LNKCTL_ASPMC))
+		return;
+#endif /* CONFIG_PCIEASPM */
+
+	/* Both device and parent should have the same ASPM setting.
+	 * Disable ASPM in downstream component first and then upstream.
+	 */
+	pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL, aspm_conf);
+
+	if (parent)
+		pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
+					   aspm_conf);
+}
+EXPORT_SYMBOL_GPL(mt76_mmio_disable_aspm);
+
 void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs)
 {
 	static const struct mt76_bus_ops mt76_mmio_ops = {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 570c159515a0..962812b6247d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -578,6 +578,7 @@  bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
 #define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)
 
 void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
+void mt76_mmio_disable_aspm(struct pci_dev *pdev);
 
 static inline u16 mt76_chip(struct mt76_dev *dev)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
index 73c3104f8858..264bef87e5c7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
@@ -81,6 +81,8 @@  mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	/* RG_SSUSB_CDR_BR_PE1D = 0x3 */
 	mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
 
+	mt76_mmio_disable_aspm(pdev);
+
 	return 0;
 
 error: