diff mbox

PCI: Remove not needed check in disable aspm link

Message ID 1363628226-6679-1-git-send-email-yinghai@kernel.org
State Rejected
Headers show

Commit Message

Yinghai Lu March 18, 2013, 5:37 p.m. UTC
Roman reported ath5k does not work anymore on 3.8.
Bisected to
| commit 8c33f51df406e1a1f7fa4e9b244845b7ebd61fa6
| Author: Taku Izumi <izumi.taku@jp.fujitsu.com>
| Date:   Tue Oct 30 15:27:13 2012 +0900
|
|    PCI/ACPI: Request _OSC control before scanning PCI root bus
|
|    This patch moves up the code block to request _OSC control in order to
|    separate ACPI work and PCI work in acpi_pci_root_add().

It make pci_disable_link_state does not work anymore as acpi_disabled
is set before pci root bus scanning.
It will skip that in quirks and pcie_aspm_sanity_check.

We could revert to old logic, but that will make booting path and hotplug
path with different aspm_disabled again.

Acctually we don't need to check aspm_disabled in disable link, as
we already have protection about link state following.

https://bugzilla.kernel.org/show_bug.cgi?id=55211
http://article.gmane.org/gmane.linux.kernel.pci/20640

Need it for 3.8 stable.

Reported-by: Roman Yepishev <roman.yepishev@gmail.com>
Bisected-by: Roman Yepishev <roman.yepishev@gmail.com>
Tested-by: Roman Yepishev <roman.yepishev@gmail.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Taku Izumi <izumi.taku@jp.fujitsu.com>
Cc: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Cc: stable@kernel.org

---
 drivers/pci/pcie/aspm.c |   21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Bjorn Helgaas March 27, 2013, 10:56 p.m. UTC | #1
On Mon, Mar 18, 2013 at 11:37 AM, Yinghai Lu <yinghai@kernel.org> wrote:
> Roman reported ath5k does not work anymore on 3.8.
> Bisected to
> | commit 8c33f51df406e1a1f7fa4e9b244845b7ebd61fa6
> | Author: Taku Izumi <izumi.taku@jp.fujitsu.com>
> | Date:   Tue Oct 30 15:27:13 2012 +0900
> |
> |    PCI/ACPI: Request _OSC control before scanning PCI root bus
> |
> |    This patch moves up the code block to request _OSC control in order to
> |    separate ACPI work and PCI work in acpi_pci_root_add().
>
> It make pci_disable_link_state does not work anymore as acpi_disabled
> is set before pci root bus scanning.
> It will skip that in quirks and pcie_aspm_sanity_check.
>
> We could revert to old logic, but that will make booting path and hotplug
> path with different aspm_disabled again.
>
> Acctually we don't need to check aspm_disabled in disable link, as
> we already have protection about link state following.
>
> https://bugzilla.kernel.org/show_bug.cgi?id=55211
> http://article.gmane.org/gmane.linux.kernel.pci/20640
>
> Need it for 3.8 stable.
>
> Reported-by: Roman Yepishev <roman.yepishev@gmail.com>
> Bisected-by: Roman Yepishev <roman.yepishev@gmail.com>
> Tested-by: Roman Yepishev <roman.yepishev@gmail.com>
> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> Cc: Taku Izumi <izumi.taku@jp.fujitsu.com>
> Cc: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
> Cc: stable@kernel.org
>
> ---
>  drivers/pci/pcie/aspm.c |   21 ++++-----------------
>  1 file changed, 4 insertions(+), 17 deletions(-)
>
> Index: linux-2.6/drivers/pci/pcie/aspm.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/pcie/aspm.c
> +++ linux-2.6/drivers/pci/pcie/aspm.c
> @@ -493,15 +493,6 @@ static int pcie_aspm_sanity_check(struct
>                         return -EINVAL;
>
>                 /*
> -                * If ASPM is disabled then we're not going to change
> -                * the BIOS state. It's safe to continue even if it's a
> -                * pre-1.1 device
> -                */
> -
> -               if (aspm_disabled)
> -                       continue;
> -
> -               /*
>                  * Disable ASPM for pre-1.1 PCIe device, we follow MS to use
>                  * RBER bit to determine if a function is 1.1 version device
>                  */
> @@ -718,15 +709,11 @@ void pcie_aspm_powersave_config_link(str
>   * pci_disable_link_state - disable pci device's link state, so the link will
>   * never enter specific states
>   */
> -static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem,
> -                                    bool force)
> +static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
>  {
>         struct pci_dev *parent = pdev->bus->self;
>         struct pcie_link_state *link;
>
> -       if (aspm_disabled && !force)
> -               return;
> -
>         if (!pci_is_pcie(pdev))
>                 return;
>
> @@ -757,13 +744,13 @@ static void __pci_disable_link_state(str
>
>  void pci_disable_link_state_locked(struct pci_dev *pdev, int state)
>  {
> -       __pci_disable_link_state(pdev, state, false, false);
> +       __pci_disable_link_state(pdev, state, false);
>  }
>  EXPORT_SYMBOL(pci_disable_link_state_locked);
>
>  void pci_disable_link_state(struct pci_dev *pdev, int state)
>  {
> -       __pci_disable_link_state(pdev, state, true, false);
> +       __pci_disable_link_state(pdev, state, true);
>  }
>  EXPORT_SYMBOL(pci_disable_link_state);
>
> @@ -781,7 +768,7 @@ void pcie_clear_aspm(struct pci_bus *bus
>                 __pci_disable_link_state(child, PCIE_LINK_STATE_L0S |
>                                          PCIE_LINK_STATE_L1 |
>                                          PCIE_LINK_STATE_CLKPM,
> -                                        false, true);
> +                                        false);
>         }
>  }
>

This might fix the problem, but the code is still a mess.  In
acpi_pci_root_add(), why do we have the following?

    acpi_pci_root_add

      acpi_pci_osc_support
      if (flags != base_flags)
        pcie_no_aspm
      if (...)
        acpi_pci_osc_control_set
        if (ACPI_SUCCESS)
          is_osc_granted = true

      pci_acpi_scan_root

      if (is_osc_granted)
        if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM)
          pcie_clear_aspm
      else
        pcie_no_aspm

Why can't we set all the ASPM flags *first*, before calling
pci_acpi_scan_root()?  That way we could just do the correct ASPM
setup as we discover devices during enumeration, rather than trying to
fix things up afterwards.  I suspect pcie_clear_aspm() is broken
anyway, because it looks like it only touches one level of the
hierarchy, without recursively descending it.

But Taku went to some trouble in 8c33f51d to introduce is_osc_granted
to remember this until after pci_acpi_scan_root(), so presumably
there's some reason for this.  Do you remember why, Taku?

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yinghai Lu March 28, 2013, 7:41 a.m. UTC | #2
On Wed, Mar 27, 2013 at 3:56 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>
> Why can't we set all the ASPM flags *first*, before calling
> pci_acpi_scan_root()?  That way we could just do the correct ASPM
> setup as we discover devices during enumeration, rather than trying to
> fix things up afterwards.  I suspect pcie_clear_aspm() is broken
> anyway, because it looks like it only touches one level of the
> hierarchy, without recursively descending it.

Yes, we can clean up aspm stop/clear up.
and that should be for 3.10 right?

But this patch should be safe for 3.9 and stable.

Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas March 28, 2013, 12:46 p.m. UTC | #3
On Thu, Mar 28, 2013 at 1:41 AM, Yinghai Lu <yinghai@kernel.org> wrote:
> On Wed, Mar 27, 2013 at 3:56 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>>
>> Why can't we set all the ASPM flags *first*, before calling
>> pci_acpi_scan_root()?  That way we could just do the correct ASPM
>> setup as we discover devices during enumeration, rather than trying to
>> fix things up afterwards.  I suspect pcie_clear_aspm() is broken
>> anyway, because it looks like it only touches one level of the
>> hierarchy, without recursively descending it.
>
> Yes, we can clean up aspm stop/clear up.
> and that should be for 3.10 right?
>
> But this patch should be safe for 3.9 and stable.

This patch might be *safe*, but it (and the changelog) are completely
unintelligible.

The problem with applying an unintelligible stop-gap patch is that it
becomes forever part of the changelog, and it's a huge waste of time
to everybody who tries to understand the history later.  That's why I
think it's worth spending some time to make a good patch now.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yinghai Lu March 28, 2013, 8:21 p.m. UTC | #4
On Thu, Mar 28, 2013 at 5:46 AM, Bjorn Helgaas <bhelgaas@google.com> wrote:

> This patch might be *safe*, but it (and the changelog) are completely
> unintelligible.
>
> The problem with applying an unintelligible stop-gap patch is that it
> becomes forever part of the changelog, and it's a huge waste of time
> to everybody who tries to understand the history later.  That's why I
> think it's worth spending some time to make a good patch now.

Ok, Please check if attached is doing what you want.

Thanks

Yinghai
Yinghai Lu March 28, 2013, 8:24 p.m. UTC | #5
resending with adding To Roman.

On Thu, Mar 28, 2013 at 5:46 AM, Bjorn Helgaas <bhelgaas@google.com> wrote:
> This patch might be *safe*, but it (and the changelog) are completely
> unintelligible.
>
> The problem with applying an unintelligible stop-gap patch is that it
> becomes forever part of the changelog, and it's a huge waste of time
> to everybody who tries to understand the history later.  That's why I
> think it's worth spending some time to make a good patch now.

Please check if attached patch is doing what you want.

Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yinghai Lu March 28, 2013, 8:24 p.m. UTC | #6
patch for Roman

On Thu, Mar 28, 2013 at 1:24 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> resending with adding To Roman.
>
> On Thu, Mar 28, 2013 at 5:46 AM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>> This patch might be *safe*, but it (and the changelog) are completely
>> unintelligible.
>>
>> The problem with applying an unintelligible stop-gap patch is that it
>> becomes forever part of the changelog, and it's a huge waste of time
>> to everybody who tries to understand the history later.  That's why I
>> think it's worth spending some time to make a good patch now.
>
> Please check if attached patch is doing what you want.
>
> Thanks
>
> Yinghai
Bjorn Helgaas March 29, 2013, 3:22 a.m. UTC | #7
[+cc Matthew]
[+cc e1000-devel@lists.sourceforge.net for suspected 82575/82598 regression]

On Thu, Mar 28, 2013 at 01:24:55PM -0700, Yinghai Lu wrote:
> patch for Roman
> 
> On Thu, Mar 28, 2013 at 1:24 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> > resending with adding To Roman.
> >
> > On Thu, Mar 28, 2013 at 5:46 AM, Bjorn Helgaas <bhelgaas@google.com> wrote:
> >> This patch might be *safe*, but it (and the changelog) are completely
> >> unintelligible.
> >>
> >> The problem with applying an unintelligible stop-gap patch is that it
> >> becomes forever part of the changelog, and it's a huge waste of time
> >> to everybody who tries to understand the history later.  That's why I
> >> think it's worth spending some time to make a good patch now.
> >
> > Please check if attached patch is doing what you want.

Patch inlined below for convenience.

> Subject: [PATCH] PCI: Remove not needed check in disable aspm link
> 
> Roman reported ath5k does not work anymore on 3.8.
> Bisected to
> | commit 8c33f51df406e1a1f7fa4e9b244845b7ebd61fa6
> | Author: Taku Izumi <izumi.taku@jp.fujitsu.com>
> | Date:   Tue Oct 30 15:27:13 2012 +0900
> |
> |    PCI/ACPI: Request _OSC control before scanning PCI root bus
> |
> |    This patch moves up the code block to request _OSC control in order to
> |    separate ACPI work and PCI work in acpi_pci_root_add().
> 
> It make pci_disable_link_state does not work anymore as acpi_disabled
> is set before pci root bus scanning.
> It will skip that in quirks and pcie_aspm_sanity_check.

I think this regression has nothing to do with pci_disable_link_state().

When aspm_disabled is set, pci_disable_link_state() doesn't do anything.

In both 3.7 and 3.8, aspm_disabled is set in acpi_pci_root_add() before
any driver probe routines are run, so it looks like calling
pci_disable_link_state() from a driver had no effect even in 3.7.  This
is a problem, of course, but not the one Roman is seeing, because ath5k
calls pci_disable_link_state() from the driver probe routine.

There are also PCI_FIXUP_FINAL quirks for 82575 and 82598 NICs that call
pci_disable_link_state().  In 3.7, these quirks are run before
aspm_disabled is set, but 8c33f51d moved the pcie_no_aspm() call up
before we start scanning the bus, so in 3.8, aspm_disabled is set
*before* we run them.  I think that means 8c33f51d broke all these
quirks.  That's also a problem, of course, but this isn't the one Roman
is seeing either.

I think the problem Roman is seeing happens when
pcie_aspm_init_link_state() calls pcie_aspm_sanity_check() during device
enumeration.  In 3.8, the fact that aspm_disabled is already set by the
time we get here means we skip the check for pre-1.1 PCIe devices, and
I think *this* is what Roman is seeing.

I suspect the following hunk of your patch is enough to fix things for
Roman:

> --- linux-2.6.orig/drivers/pci/pcie/aspm.c
> +++ linux-2.6/drivers/pci/pcie/aspm.c
> @@ -493,15 +492,6 @@ static int pcie_aspm_sanity_check(struct
>  			return -EINVAL;
>  
>  		/*
> -		 * If ASPM is disabled then we're not going to change
> -		 * the BIOS state. It's safe to continue even if it's a
> -		 * pre-1.1 device
> -		 */
> -
> -		if (aspm_disabled)
> -			continue;
> -
> -		/*
>  		 * Disable ASPM for pre-1.1 PCIe device, we follow MS to use
>  		 * RBER bit to determine if a function is 1.1 version device
>  		 */

However, this test was added by Matthew in c9651e70, and I can't remove
it unless we have an explanation of why removing it will not reintroduce
the bug he was fixing.

This code is such a terrible mess that it's not surprising at all that
we have all these issues.  But there's too much to untangle in v3.9; all
we can hope for is to fix the regressions in v3.9 and clean it up later.

> We could revert to old logic, but that will make booting path and hotplug
> path with different aspm_disabled again.
> 
> Acctually we don't need to check aspm_disabled in disable link, as
> we already have protection about link state following.
> 
> https://bugzilla.kernel.org/show_bug.cgi?id=55211
> http://article.gmane.org/gmane.linux.kernel.pci/20640
> 
> Need it for 3.8 stable.
> 
> -v2: more cleanup
> 	1. remove aspm_support_enabled, as if it compiled in, support is there
> 		so even user pass aspm=off, link_state still get allocated,
> 		then we will have chance to disable aspm on devices from
> 		buggy setting of BIOS.
> 	2. move pcie_no_aspm() calling for fadt disabling before scanning
> 		requested by Bjorn.
> 
> Reported-by: Roman Yepishev <roman.yepishev@gmail.com>
> Bisected-by: Roman Yepishev <roman.yepishev@gmail.com>
> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> Cc: Taku Izumi <izumi.taku@jp.fujitsu.com>
> Cc: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
> 
> ---
>  drivers/acpi/pci_root.c  |   25 +++++++++---------------
>  drivers/pci/pcie/aspm.c  |   48 ++---------------------------------------------
>  include/linux/pci-aspm.h |    4 ---
>  include/linux/pci.h      |    2 -
>  4 files changed, 14 insertions(+), 65 deletions(-)
> 
> Index: linux-2.6/drivers/acpi/pci_root.c
> ===================================================================
> --- linux-2.6.orig/drivers/acpi/pci_root.c
> +++ linux-2.6/drivers/acpi/pci_root.c
> @@ -415,7 +415,6 @@ static int acpi_pci_root_add(struct acpi
>  	struct acpi_pci_root *root;
>  	struct acpi_pci_driver *driver;
>  	u32 flags, base_flags;
> -	bool is_osc_granted = false;
>  
>  	root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL);
>  	if (!root)
> @@ -494,6 +493,11 @@ static int acpi_pci_root_add(struct acpi
>  			flags = base_flags;
>  		}
>  	}
> +
> +	/* ASPM setting */
> +	if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM)
> +		pcie_no_aspm();
> +
>  	if (!pcie_ports_disabled
>  	    && (flags & ACPI_PCIE_REQ_SUPPORT) == ACPI_PCIE_REQ_SUPPORT) {
>  		flags = OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL
> @@ -513,16 +517,17 @@ static int acpi_pci_root_add(struct acpi
>  
>  		status = acpi_pci_osc_control_set(device->handle, &flags,
>  				       OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL);
> -		if (ACPI_SUCCESS(status)) {
> -			is_osc_granted = true;
> +		if (ACPI_SUCCESS(status))
>  			dev_info(&device->dev,
>  				"ACPI _OSC control (0x%02x) granted\n", flags);
> -		} else {
> -			is_osc_granted = false;
> +		else {
>  			dev_info(&device->dev,
>  				"ACPI _OSC request failed (%s), "
>  				"returned control mask: 0x%02x\n",
>  				acpi_format_exception(status), flags);
> +			pr_info("ACPI _OSC control for PCIe not granted, "
> +				"disabling ASPM\n");
> +			pcie_no_aspm();
>  		}
>  	} else {
>  		dev_info(&device->dev,
> @@ -554,16 +559,6 @@ static int acpi_pci_root_add(struct acpi
>  		goto out_del_root;
>  	}
>  
> -	/* ASPM setting */
> -	if (is_osc_granted) {
> -		if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM)
> -			pcie_clear_aspm(root->bus);
> -	} else {
> -		pr_info("ACPI _OSC control for PCIe not granted, "
> -			"disabling ASPM\n");
> -		pcie_no_aspm();
> -	}
> -
>  	pci_acpi_add_bus_pm_notifier(device, root->bus);
>  	if (device->wakeup.flags.run_wake)
>  		device_set_run_wake(root->bus->bridge, true);
> Index: linux-2.6/drivers/pci/pcie/aspm.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/pcie/aspm.c
> +++ linux-2.6/drivers/pci/pcie/aspm.c
> @@ -69,7 +69,6 @@ struct pcie_link_state {
>  };
>  
>  static int aspm_disabled, aspm_force;
> -static bool aspm_support_enabled = true;
>  static DEFINE_MUTEX(aspm_lock);
>  static LIST_HEAD(link_list);
>  
> @@ -493,15 +492,6 @@ static int pcie_aspm_sanity_check(struct
>  			return -EINVAL;
>  
>  		/*
> -		 * If ASPM is disabled then we're not going to change
> -		 * the BIOS state. It's safe to continue even if it's a
> -		 * pre-1.1 device
> -		 */
> -
> -		if (aspm_disabled)
> -			continue;
> -
> -		/*
>  		 * Disable ASPM for pre-1.1 PCIe device, we follow MS to use
>  		 * RBER bit to determine if a function is 1.1 version device
>  		 */
> @@ -556,9 +546,6 @@ void pcie_aspm_init_link_state(struct pc
>  	struct pcie_link_state *link;
>  	int blacklist = !!pcie_aspm_sanity_check(pdev);
>  
> -	if (!aspm_support_enabled)
> -		return;
> -
>  	if (!pci_is_pcie(pdev) || pdev->link_state)
>  		return;
>  	if (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT &&
> @@ -718,15 +705,11 @@ void pcie_aspm_powersave_config_link(str
>   * pci_disable_link_state - disable pci device's link state, so the link will
>   * never enter specific states
>   */
> -static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem,
> -				     bool force)
> +static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
>  {
>  	struct pci_dev *parent = pdev->bus->self;
>  	struct pcie_link_state *link;
>  
> -	if (aspm_disabled && !force)
> -		return;
> -
>  	if (!pci_is_pcie(pdev))
>  		return;
>  
> @@ -757,34 +740,16 @@ static void __pci_disable_link_state(str
>  
>  void pci_disable_link_state_locked(struct pci_dev *pdev, int state)
>  {
> -	__pci_disable_link_state(pdev, state, false, false);
> +	__pci_disable_link_state(pdev, state, false);
>  }
>  EXPORT_SYMBOL(pci_disable_link_state_locked);
>  
>  void pci_disable_link_state(struct pci_dev *pdev, int state)
>  {
> -	__pci_disable_link_state(pdev, state, true, false);
> +	__pci_disable_link_state(pdev, state, true);
>  }
>  EXPORT_SYMBOL(pci_disable_link_state);
>  
> -void pcie_clear_aspm(struct pci_bus *bus)
> -{
> -	struct pci_dev *child;
> -
> -	if (aspm_force)
> -		return;
> -
> -	/*
> -	 * Clear any ASPM setup that the firmware has carried out on this bus
> -	 */
> -	list_for_each_entry(child, &bus->devices, bus_list) {
> -		__pci_disable_link_state(child, PCIE_LINK_STATE_L0S |
> -					 PCIE_LINK_STATE_L1 |
> -					 PCIE_LINK_STATE_CLKPM,
> -					 false, true);
> -	}
> -}
> -
>  static int pcie_aspm_set_policy(const char *val, struct kernel_param *kp)
>  {
>  	int i;
> @@ -944,7 +909,6 @@ static int __init pcie_aspm_disable(char
>  	if (!strcmp(str, "off")) {
>  		aspm_policy = POLICY_DEFAULT;
>  		aspm_disabled = 1;
> -		aspm_support_enabled = false;
>  		printk(KERN_INFO "PCIe ASPM is disabled\n");
>  	} else if (!strcmp(str, "force")) {
>  		aspm_force = 1;
> @@ -980,9 +944,3 @@ int pcie_aspm_enabled(void)
>         return !aspm_disabled;
>  }
>  EXPORT_SYMBOL(pcie_aspm_enabled);
> -
> -bool pcie_aspm_support_enabled(void)
> -{
> -	return aspm_support_enabled;
> -}
> -EXPORT_SYMBOL(pcie_aspm_support_enabled);
> Index: linux-2.6/include/linux/pci-aspm.h
> ===================================================================
> --- linux-2.6.orig/include/linux/pci-aspm.h
> +++ linux-2.6/include/linux/pci-aspm.h
> @@ -29,7 +29,6 @@ extern void pcie_aspm_pm_state_change(st
>  extern void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
>  extern void pci_disable_link_state(struct pci_dev *pdev, int state);
>  extern void pci_disable_link_state_locked(struct pci_dev *pdev, int state);
> -extern void pcie_clear_aspm(struct pci_bus *bus);
>  extern void pcie_no_aspm(void);
>  #else
>  static inline void pcie_aspm_init_link_state(struct pci_dev *pdev)
> @@ -47,9 +46,6 @@ static inline void pcie_aspm_powersave_c
>  static inline void pci_disable_link_state(struct pci_dev *pdev, int state)
>  {
>  }
> -static inline void pcie_clear_aspm(struct pci_bus *bus)
> -{
> -}
>  static inline void pcie_no_aspm(void)
>  {
>  }
> Index: linux-2.6/include/linux/pci.h
> ===================================================================
> --- linux-2.6.orig/include/linux/pci.h
> +++ linux-2.6/include/linux/pci.h
> @@ -1168,7 +1168,7 @@ static inline int pcie_aspm_enabled(void
>  static inline bool pcie_aspm_support_enabled(void) { return false; }
>  #else
>  extern int pcie_aspm_enabled(void);
> -extern bool pcie_aspm_support_enabled(void);
> +static inline bool pcie_aspm_support_enabled(void) { return true; }
>  #endif
>  
>  #ifdef CONFIG_PCIEAER
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas March 29, 2013, 12:24 p.m. UTC | #8
On Thu, Mar 28, 2013 at 11:59 PM, Yinghai Lu <yinghai@kernel.org> wrote:
>
> On Thu, Mar 28, 2013 at 8:22 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>>
>> [+cc Matthew]
>> [+cc e1000-devel@lists.sourceforge.net for suspected 82575/82598
>> regression]
>>
>> On Thu, Mar 28, 2013 at 01:24:55PM -0700, Yinghai Lu wrote:
>> > patch for Roman
>> >
>> > On Thu, Mar 28, 2013 at 1:24 PM, Yinghai Lu <yinghai@kernel.org> wrote:
>> > > resending with adding To Roman.
>> > >
>> > > On Thu, Mar 28, 2013 at 5:46 AM, Bjorn Helgaas <bhelgaas@google.com>
>> > > wrote:
>> > >> This patch might be *safe*, but it (and the changelog) are completely
>> > >> unintelligible.
>> > >>
>> > >> The problem with applying an unintelligible stop-gap patch is that it
>> > >> becomes forever part of the changelog, and it's a huge waste of time
>> > >> to everybody who tries to understand the history later.  That's why I
>> > >> think it's worth spending some time to make a good patch now.
>> > >
>> > > Please check if attached patch is doing what you want.
>>
>> Patch inlined below for convenience.
>>
>> > Subject: [PATCH] PCI: Remove not needed check in disable aspm link
>> >
>> > Roman reported ath5k does not work anymore on 3.8.
>> > Bisected to
>> > | commit 8c33f51df406e1a1f7fa4e9b244845b7ebd61fa6
>> > | Author: Taku Izumi <izumi.taku@jp.fujitsu.com>
>> > | Date:   Tue Oct 30 15:27:13 2012 +0900
>> > |
>> > |    PCI/ACPI: Request _OSC control before scanning PCI root bus
>> > |
>> > |    This patch moves up the code block to request _OSC control in order
>> > to
>> > |    separate ACPI work and PCI work in acpi_pci_root_add().
>> >
>> > It make pci_disable_link_state does not work anymore as acpi_disabled
>> > is set before pci root bus scanning.
>> > It will skip that in quirks and pcie_aspm_sanity_check.
>>
>> I think this regression has nothing to do with pci_disable_link_state().
>>
>> When aspm_disabled is set, pci_disable_link_state() doesn't do anything.
>>
>> In both 3.7 and 3.8, aspm_disabled is set in acpi_pci_root_add() before
>> any driver probe routines are run, so it looks like calling
>> pci_disable_link_state() from a driver had no effect even in 3.7.  This
>> is a problem, of course, but not the one Roman is seeing, because ath5k
>> calls pci_disable_link_state() from the driver probe routine.
>>
>> There are also PCI_FIXUP_FINAL quirks for 82575 and 82598 NICs that call
>> pci_disable_link_state().  In 3.7, these quirks are run before
>> aspm_disabled is set, but 8c33f51d moved the pcie_no_aspm() call up
>> before we start scanning the bus, so in 3.8, aspm_disabled is set
>> *before* we run them.  I think that means 8c33f51d broke all these
>> quirks.  That's also a problem, of course, but this isn't the one Roman
>> is seeing either.
>>
>> I think the problem Roman is seeing happens when
>> pcie_aspm_init_link_state() calls pcie_aspm_sanity_check() during device
>> enumeration.  In 3.8, the fact that aspm_disabled is already set by the
>> time we get here means we skip the check for pre-1.1 PCIe devices, and
>> I think *this* is what Roman is seeing.
>>
>> I suspect the following hunk of your patch is enough to fix things for
>> Roman:
>>
>> > --- linux-2.6.orig/drivers/pci/pcie/aspm.c
>> > +++ linux-2.6/drivers/pci/pcie/aspm.c
>> > @@ -493,15 +492,6 @@ static int pcie_aspm_sanity_check(struct
>> >                       return -EINVAL;
>> >
>> >               /*
>> > -              * If ASPM is disabled then we're not going to change
>> > -              * the BIOS state. It's safe to continue even if it's a
>> > -              * pre-1.1 device
>> > -              */
>> > -
>> > -             if (aspm_disabled)
>> > -                     continue;
>> > -
>> > -             /*
>> >                * Disable ASPM for pre-1.1 PCIe device, we follow MS to
>> > use
>> >                * RBER bit to determine if a function is 1.1 version
>> > device
>> >                */
>>
>> However, this test was added by Matthew in c9651e70, and I can't remove
>> it unless we have an explanation of why removing it will not reintroduce
>> the bug he was fixing.
>>
>> This code is such a terrible mess that it's not surprising at all that
>> we have all these issues.  But there's too much to untangle in v3.9; all
>> we can hope for is to fix the regressions in v3.9 and clean it up later.
>
>
> v1 will fix quirks and pcie_aspm_sanity_check path.
> v2. will go further even user pass "aspm=off", those quirks and disable aspm
> in driver
> will still work, and also call pcie_no_aspm for disable aspm for FADT path
> early.
>
> So now you want half of v1, and not want to fix quirk path.
> Is my understanding right?

What I want is a patch that fixes the regression and doesn't break
anything else, along with a changelog that makes it obvious that we're
doing the right thing.  I don't know what that looks like yet.  None
of your patches so far is even close.

Half of your v1 patch (removing the pcie_aspm_sanity_check() test)
*might* be the right thing, but only if you can clearly explain why
that will not reintroduce the bug Matthew fixed with c9651e70.

I think we also need to fix the PCI_FIXUP_FINAL quirk regression, but
that's a separate issue and should be a separate patch.
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yinghai Lu March 29, 2013, 6:02 p.m. UTC | #9
On Fri, Mar 29, 2013 at 5:24 AM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>
> Half of your v1 patch (removing the pcie_aspm_sanity_check() test)
> *might* be the right thing, but only if you can clearly explain why
> that will not reintroduce the bug Matthew fixed with c9651e70.
>
> I think we also need to fix the PCI_FIXUP_FINAL quirk regression, but
> that's a separate issue and should be a separate patch.


First commit from Matthew
 0ae5eaf10     PCI: ignore pre-1.1 ASPM quirking when ASPM is disabled
    Right now we won't touch ASPM state if ASPM is disabled, except in the case
    where we find a device that appears to be too old to reliably support ASPM.
    Right now we'll clear it in that case, which is almost certainly the wrong
    thing to do

Try to not touch pre-1.1 ASPM for all, and it causes lots of regression.

So second commit

cdb0f9a1ad2e ASPM: Fix pcie devices with non-pcie children
    Since 3.2.12 and 3.3, some systems are failing to boot with a BUG_ON.
    Some other systems using the pata_jmicron driver fail to boot because no
    disks are detected.  Passing pcie_aspm=force on the kernel command line
    works around it.

move the check aspm_disabled down.

but ath5 and etc (pre-1.1) really need to aspm_disable to change their
hw setting.

So the right solution would be dropping pcie_aspm_sanity_check()
change -in v2 should make all both happy, as quirk and disable that in
driver for ath5 are calling
pcie_disable_aspm_state explicitly.

In v2, we already removed pcie_clear_aspm() that is calling
pcie_disable_aspm_state.


Please check attached -v3.


Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yinghai Lu March 29, 2013, 6:04 p.m. UTC | #10
attatched -v3 again

On Fri, Mar 29, 2013 at 11:02 AM, Yinghai Lu <yinghai@kernel.org> wrote:
> On Fri, Mar 29, 2013 at 5:24 AM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>>
>> Half of your v1 patch (removing the pcie_aspm_sanity_check() test)
>> *might* be the right thing, but only if you can clearly explain why
>> that will not reintroduce the bug Matthew fixed with c9651e70.
>>
>> I think we also need to fix the PCI_FIXUP_FINAL quirk regression, but
>> that's a separate issue and should be a separate patch.
>
>
> First commit from Matthew
>  0ae5eaf10     PCI: ignore pre-1.1 ASPM quirking when ASPM is disabled
>     Right now we won't touch ASPM state if ASPM is disabled, except in the case
>     where we find a device that appears to be too old to reliably support ASPM.
>     Right now we'll clear it in that case, which is almost certainly the wrong
>     thing to do
>
> Try to not touch pre-1.1 ASPM for all, and it causes lots of regression.
>
> So second commit
>
> cdb0f9a1ad2e ASPM: Fix pcie devices with non-pcie children
>     Since 3.2.12 and 3.3, some systems are failing to boot with a BUG_ON.
>     Some other systems using the pata_jmicron driver fail to boot because no
>     disks are detected.  Passing pcie_aspm=force on the kernel command line
>     works around it.
>
> move the check aspm_disabled down.
>
> but ath5 and etc (pre-1.1) really need to aspm_disable to change their
> hw setting.
>
> So the right solution would be dropping pcie_aspm_sanity_check()
> change -in v2 should make all both happy, as quirk and disable that in
> driver for ath5 are calling
> pcie_disable_aspm_state explicitly.
>
> In v2, we already removed pcie_clear_aspm() that is calling
> pcie_disable_aspm_state.
>
>
> Please check attached -v3.
>
>
> Thanks
>
> Yinghai
Roman Yepishev March 29, 2013, 6:11 p.m. UTC | #11
On Fri, Mar 29, 2013 at 5:22 AM, Bjorn Helgaas <bhelgaas@google.com> wrote:
> [+cc Matthew]
> [+cc e1000-devel@lists.sourceforge.net for suspected 82575/82598 regression]
>
> I think this regression has nothing to do with pci_disable_link_state().
>
> ...
>
> There are also PCI_FIXUP_FINAL quirks for 82575 and 82598 NICs that call
> pci_disable_link_state().  In 3.7, these quirks are run before
> aspm_disabled is set, but 8c33f51d moved the pcie_no_aspm() call up
> before we start scanning the bus, so in 3.8, aspm_disabled is set
> *before* we run them.  I think that means 8c33f51d broke all these
> quirks.  That's also a problem, of course, but this isn't the one Roman
> is seeing either.
I have to say that my iwlwifi device (Intel Corporation Centrino
Wireless-N 1000 [Condor Peak] [8086:0084] also appears to be affected
- it calls pci_disable_link_state too and in current 3.8 that does not
do anything. It looks like it affects something when the system is
resumed from suspend, but I was not able to confirm that yet (but
there's a thread about removing the code since it did not appear to
work - http://thread.gmane.org/gmane.linux.kernel.pci/20628/focus=20640)

> I think the problem Roman is seeing happens when
> pcie_aspm_init_link_state() calls pcie_aspm_sanity_check() during device
> enumeration.  In 3.8, the fact that aspm_disabled is already set by the
> time we get here means we skip the check for pre-1.1 PCIe devices, and
> I think *this* is what Roman is seeing.
>
> I suspect the following hunk of your patch is enough to fix things for
> Roman:
>
>> --- linux-2.6.orig/drivers/pci/pcie/aspm.c
>> +++ linux-2.6/drivers/pci/pcie/aspm.c
>> @@ -493,15 +492,6 @@ static int pcie_aspm_sanity_check(struct
>>                       return -EINVAL;
>>
>>               /*
>> -              * If ASPM is disabled then we're not going to change
>> -              * the BIOS state. It's safe to continue even if it's a
>> -              * pre-1.1 device
>> -              */
>> -
>> -             if (aspm_disabled)
>> -                     continue;
>> -
>> -             /*
>>                * Disable ASPM for pre-1.1 PCIe device, we follow MS to use
>>                * RBER bit to determine if a function is 1.1 version device
>>                */
>
> However, this test was added by Matthew in c9651e70, and I can't remove
> it unless we have an explanation of why removing it will not reintroduce
> the bug he was fixing.
>
> This code is such a terrible mess that it's not surprising at all that
> we have all these issues.  But there's too much to untangle in v3.9; all
> we can hope for is to fix the regressions in v3.9 and clean it up later.
>
I have removed the check and indeed it allowed ASPM to become disabled
during the ath5k driver load.

--
Regards, Roman Yepishev
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

Index: linux-2.6/drivers/pci/pcie/aspm.c
===================================================================
--- linux-2.6.orig/drivers/pci/pcie/aspm.c
+++ linux-2.6/drivers/pci/pcie/aspm.c
@@ -493,15 +493,6 @@  static int pcie_aspm_sanity_check(struct
 			return -EINVAL;
 
 		/*
-		 * If ASPM is disabled then we're not going to change
-		 * the BIOS state. It's safe to continue even if it's a
-		 * pre-1.1 device
-		 */
-
-		if (aspm_disabled)
-			continue;
-
-		/*
 		 * Disable ASPM for pre-1.1 PCIe device, we follow MS to use
 		 * RBER bit to determine if a function is 1.1 version device
 		 */
@@ -718,15 +709,11 @@  void pcie_aspm_powersave_config_link(str
  * pci_disable_link_state - disable pci device's link state, so the link will
  * never enter specific states
  */
-static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem,
-				     bool force)
+static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
 {
 	struct pci_dev *parent = pdev->bus->self;
 	struct pcie_link_state *link;
 
-	if (aspm_disabled && !force)
-		return;
-
 	if (!pci_is_pcie(pdev))
 		return;
 
@@ -757,13 +744,13 @@  static void __pci_disable_link_state(str
 
 void pci_disable_link_state_locked(struct pci_dev *pdev, int state)
 {
-	__pci_disable_link_state(pdev, state, false, false);
+	__pci_disable_link_state(pdev, state, false);
 }
 EXPORT_SYMBOL(pci_disable_link_state_locked);
 
 void pci_disable_link_state(struct pci_dev *pdev, int state)
 {
-	__pci_disable_link_state(pdev, state, true, false);
+	__pci_disable_link_state(pdev, state, true);
 }
 EXPORT_SYMBOL(pci_disable_link_state);
 
@@ -781,7 +768,7 @@  void pcie_clear_aspm(struct pci_bus *bus
 		__pci_disable_link_state(child, PCIE_LINK_STATE_L0S |
 					 PCIE_LINK_STATE_L1 |
 					 PCIE_LINK_STATE_CLKPM,
-					 false, true);
+					 false);
 	}
 }