diff mbox series

[v5,3/4] PCI/ASPM: add sysfs attributes for controlling ASPM link states

Message ID 8783b887-2e30-43f0-d462-96f8fbb18ae2@gmail.com
State Superseded
Delegated to: Bjorn Helgaas
Headers show
Series PCI/ASPM: add sysfs attributes for controlling ASPM | expand

Commit Message

Heiner Kallweit Aug. 31, 2019, 8:20 p.m. UTC
Background of this extension is a problem with the r8169 network driver.
Several combinations of board chipsets and network chip versions have
problems if ASPM is enabled, therefore we have to disable ASPM per default.
However especially on notebooks ASPM can provide significant power-saving,
therefore we want to give users the option to enable ASPM. With the new
sysfs attributes users can control which ASPM link-states are
enabled/disabled.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
v2:
- use a dedicated sysfs attribute per link state
- allow separate control of ASPM and PCI PM L1 sub-states
v3:
- statically allocate the attribute group
- replace snprintf with printf
- base on top of "PCI: Make pcie_downstream_port() available outside of access.c"
v4:
- add call to sysfs_update_group because is_visible callback returns false
  always at file creation time
- simplify code a little
v5:
- rebased to latest pci/next
---
 Documentation/ABI/testing/sysfs-bus-pci |  13 ++
 drivers/pci/pci-sysfs.c                 |   7 +
 drivers/pci/pci.h                       |   4 +
 drivers/pci/pcie/aspm.c                 | 184 ++++++++++++++++++++++++
 4 files changed, 208 insertions(+)

Comments

Bjorn Helgaas Sept. 7, 2019, 8:32 p.m. UTC | #1
On Sat, Aug 31, 2019 at 10:20:47PM +0200, Heiner Kallweit wrote:
> Background of this extension is a problem with the r8169 network driver.
> Several combinations of board chipsets and network chip versions have
> problems if ASPM is enabled, therefore we have to disable ASPM per default.
> However especially on notebooks ASPM can provide significant power-saving,
> therefore we want to give users the option to enable ASPM. With the new
> sysfs attributes users can control which ASPM link-states are
> enabled/disabled.
> 
> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> ---
> v2:
> - use a dedicated sysfs attribute per link state
> - allow separate control of ASPM and PCI PM L1 sub-states
> v3:
> - statically allocate the attribute group
> - replace snprintf with printf
> - base on top of "PCI: Make pcie_downstream_port() available outside of access.c"
> v4:
> - add call to sysfs_update_group because is_visible callback returns false
>   always at file creation time
> - simplify code a little
> v5:
> - rebased to latest pci/next
> ---
>  Documentation/ABI/testing/sysfs-bus-pci |  13 ++
>  drivers/pci/pci-sysfs.c                 |   7 +
>  drivers/pci/pci.h                       |   4 +
>  drivers/pci/pcie/aspm.c                 | 184 ++++++++++++++++++++++++
>  4 files changed, 208 insertions(+)
> 
> diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
> index 8bfee557e..49249a165 100644
> --- a/Documentation/ABI/testing/sysfs-bus-pci
> +++ b/Documentation/ABI/testing/sysfs-bus-pci
> @@ -347,3 +347,16 @@ Description:
>  		If the device has any Peer-to-Peer memory registered, this
>  	        file contains a '1' if the memory has been published for
>  		use outside the driver that owns the device.
> +
> +What		/sys/bus/pci/devices/.../aspm/aspm_l0s
> +What		/sys/bus/pci/devices/.../aspm/aspm_l1
> +What		/sys/bus/pci/devices/.../aspm/aspm_l1_1
> +What		/sys/bus/pci/devices/.../aspm/aspm_l1_2
> +What		/sys/bus/pci/devices/.../aspm/aspm_l1_1_pcipm
> +What		/sys/bus/pci/devices/.../aspm/aspm_l1_2_pcipm
> +What		/sys/bus/pci/devices/.../aspm/aspm_clkpm
> +date:		August 2019

Other entries use "What:" and "Date:" (add colon and capitalize).

There are no examples in *this* file, but in
Documentation/ABI/testing/sysfs-bus-pci-drivers-ehci_hcd,
the "What:" is not repeated for each file in the group.

> +Contact:	Heiner Kallweit <hkallweit1@gmail.com>
> +Description:	If ASPM is supported for an endpoint, then these files
> +		can be used to disable or enable the individual
> +		power management states.

Please mention the specific details here, e.g., "write 1 to enable, 0
to disable".

> diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
> index 868e35109..687240f55 100644
> --- a/drivers/pci/pci-sysfs.c
> +++ b/drivers/pci/pci-sysfs.c
> @@ -1315,6 +1315,10 @@ static int pci_create_capabilities_sysfs(struct pci_dev *dev)
>  
>  	pcie_vpd_create_sysfs_dev_files(dev);
>  	pcie_aspm_create_sysfs_dev_files(dev);
> +#ifdef CONFIG_PCIEASPM
> +	/* update visibility of attributes in this group */
> +	sysfs_update_group(&dev->dev.kobj, &aspm_ctrl_attr_group);
> +#endif

Isn't there a way to do this in drivers/pci/pcie/aspm.c somehow,
without using sysfs_update_group()?  There are only three callers of
it in the tree, and I'd be surprised if ASPM is unique enough to have
to be the fourth.

>  	if (dev->reset_fn) {
>  		retval = device_create_file(&dev->dev, &dev_attr_reset);
> @@ -1571,6 +1575,9 @@ static const struct attribute_group *pci_dev_attr_groups[] = {
>  	&pcie_dev_attr_group,
>  #ifdef CONFIG_PCIEAER
>  	&aer_stats_attr_group,
> +#endif
> +#ifdef CONFIG_PCIEASPM
> +	&aspm_ctrl_attr_group,
>  #endif
>  	NULL,
>  };
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index 44b80186d..9dc3e3673 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -659,4 +659,8 @@ static inline int pci_acpi_program_hp_params(struct pci_dev *dev)
>  }
>  #endif
>  
> +#ifdef CONFIG_PCIEASPM
> +extern const struct attribute_group aspm_ctrl_attr_group;
> +#endif
> +
>  #endif /* DRIVERS_PCI_H */
> diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
> index f044ae4d1..ce3425125 100644
> --- a/drivers/pci/pcie/aspm.c
> +++ b/drivers/pci/pcie/aspm.c
> @@ -1287,6 +1287,190 @@ void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
>  }
>  #endif
>  
> +static struct pcie_link_state *aspm_get_parent_link(struct pci_dev *pdev)

I know the ASPM code is pretty confused, but I don't think "parent
link" really makes sense.  "Parent" implies a parent/child
relationship, but a link doesn't have a parent or a child; it only has
an upstream end and a downstream end.

Anyway, any given PCIe device has either zero or one link associated
with it, so something like "aspm_get_link()" would be unambiguous all
by itself.

> +{
> +	struct pci_dev *parent = pdev->bus->self;
> +
> +	if (pcie_downstream_port(pdev))
> +		parent = pdev;
> +
> +	return parent ? parent->link_state : NULL;
> +}
> +
> +static bool pcie_check_valid_aspm_endpoint(struct pci_dev *pdev)

Maybe "pcie_is_aspm_dev()" or similar?  I think we may want to include
more than just endpoints (see below).  "Check" in function names is a
pet peeve of mine because it doesn't tell us whether it's a pure
function (as this is) or it has side effects, and it doesn't give a
hint about what the sense of the return value is.

> +{
> +	struct pcie_link_state *link;
> +
> +	if (!pci_is_pcie(pdev) || pci_pcie_type(pdev) != PCI_EXP_TYPE_ENDPOINT)

Do you intend to exclude other Upstream Ports like Legacy Endpoints,
Upstream Switch Ports, and PCIe-to-PCI/PCI-X Bridges?  They also have
a link leading to them, so we might want them to have knobs as well.
Or if we don't want the knobs, a comment about why not would be
useful.

> +		return false;
> +
> +	link = aspm_get_parent_link(pdev);
> +
> +	return link && link->aspm_capable;
> +}
> +
> +static ssize_t aspm_attr_show_common(struct device *dev,
> +				     struct device_attribute *attr,
> +				     char *buf, int state)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	struct pcie_link_state *link;
> +	int val;
> +
> +	link = aspm_get_parent_link(pdev);
> +	if (!link)
> +		return -EOPNOTSUPP;
> +
> +	mutex_lock(&aspm_lock);
> +	val = !!(link->aspm_enabled & state);

I'm not a huge fan of "!!".  There are several uses in this file, but
I think this:

  enabled = link->aspm_enabled & state;
  ...
  return sprintf(buf, "%d\n", enabled ? 1 : 0);

is clearer.

> +	mutex_unlock(&aspm_lock);
> +
> +	return sprintf(buf, "%d\n", val);
> +}
> +
> +static ssize_t aspm_attr_store_common(struct device *dev,
> +				      struct device_attribute *attr,
> +				      const char *buf, size_t len, int state)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	struct pcie_link_state *link;
> +	bool state_enable;
> +
> +	if (aspm_disabled)
> +		return -EPERM;
> +
> +	link = aspm_get_parent_link(pdev);
> +	if (!link)
> +		return -EOPNOTSUPP;
> +
> +	if (!(link->aspm_capable & state))
> +		return -EOPNOTSUPP;
> +
> +	if (strtobool(buf, &state_enable) < 0)
> +		return -EINVAL;
> +
> +	down_read(&pci_bus_sem);
> +	mutex_lock(&aspm_lock);
> +
> +	if (state_enable) {
> +		link->aspm_disable &= ~state;
> +		/* need to enable L1 for sub-states */
> +		if (state & ASPM_STATE_L1SS)
> +			link->aspm_disable &= ~ASPM_STATE_L1;
> +	} else {
> +		link->aspm_disable |= state;
> +	}
> +
> +	pcie_config_aspm_link(link, policy_to_aspm_state(link));
> +
> +	mutex_unlock(&aspm_lock);
> +	up_read(&pci_bus_sem);
> +
> +	return len;
> +}
> +
> +#define ASPM_ATTR(_f, _s)						\
> +static ssize_t aspm_##_f##_show(struct device *dev,			\
> +			struct device_attribute *attr, char *buf)	\
> +{ return aspm_attr_show_common(dev, attr, buf, ASPM_STATE_##_s); }	\
> +									\
> +static ssize_t aspm_##_f##_store(struct device *dev,			\
> +				 struct device_attribute *attr,		\
> +				 const char *buf, size_t len)		\
> +{ return aspm_attr_store_common(dev, attr, buf, len, ASPM_STATE_##_s); }
> +
> +ASPM_ATTR(l0s, L0S)
> +ASPM_ATTR(l1, L1)
> +ASPM_ATTR(l1_1, L1_1)
> +ASPM_ATTR(l1_2, L1_2)
> +ASPM_ATTR(l1_1_pcipm, L1_1_PCIPM)
> +ASPM_ATTR(l1_2_pcipm, L1_2_PCIPM)
> +
> +static ssize_t aspm_clkpm_show(struct device *dev,
> +			       struct device_attribute *attr, char *buf)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	struct pcie_link_state *link;
> +	int val;
> +
> +	link = aspm_get_parent_link(pdev);
> +	if (!link)
> +		return -EOPNOTSUPP;
> +
> +	mutex_lock(&aspm_lock);
> +	val = link->clkpm_enabled;
> +	mutex_unlock(&aspm_lock);
> +
> +	return sprintf(buf, "%d\n", val);
> +}
> +
> +static ssize_t aspm_clkpm_store(struct device *dev,
> +				struct device_attribute *attr,
> +				const char *buf, size_t len)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	struct pcie_link_state *link;
> +	bool state_enable;
> +
> +	if (aspm_disabled)
> +		return -EPERM;
> +
> +	link = aspm_get_parent_link(pdev);
> +	if (!link)
> +		return -EOPNOTSUPP;
> +
> +	if (!link->clkpm_capable)
> +		return -EOPNOTSUPP;
> +
> +	if (strtobool(buf, &state_enable) < 0)
> +		return -EINVAL;
> +
> +	down_read(&pci_bus_sem);
> +	mutex_lock(&aspm_lock);
> +
> +	link->clkpm_disable = !state_enable;
> +	pcie_set_clkpm(link, policy_to_clkpm_state(link));
> +
> +	mutex_unlock(&aspm_lock);
> +	up_read(&pci_bus_sem);
> +
> +	return len;
> +}
> +
> +static DEVICE_ATTR_RW(aspm_l0s);
> +static DEVICE_ATTR_RW(aspm_l1);
> +static DEVICE_ATTR_RW(aspm_l1_1);
> +static DEVICE_ATTR_RW(aspm_l1_2);
> +static DEVICE_ATTR_RW(aspm_l1_1_pcipm);
> +static DEVICE_ATTR_RW(aspm_l1_2_pcipm);
> +static DEVICE_ATTR_RW(aspm_clkpm);
> +
> +static struct attribute *aspm_ctrl_attrs[] = {
> +	&dev_attr_aspm_l0s.attr,
> +	&dev_attr_aspm_l1.attr,
> +	&dev_attr_aspm_l1_1.attr,
> +	&dev_attr_aspm_l1_2.attr,
> +	&dev_attr_aspm_l1_1_pcipm.attr,
> +	&dev_attr_aspm_l1_2_pcipm.attr,
> +	&dev_attr_aspm_clkpm.attr,
> +	NULL
> +};
> +
> +static umode_t aspm_ctrl_attrs_are_visible(struct kobject *kobj,
> +					   struct attribute *a, int n)
> +{
> +	struct device *dev = kobj_to_dev(kobj);
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +
> +	return pcie_check_valid_aspm_endpoint(pdev) ? a->mode : 0;
> +}
> +
> +const struct attribute_group aspm_ctrl_attr_group = {
> +	.name = "aspm",
> +	.attrs = aspm_ctrl_attrs,
> +	.is_visible = aspm_ctrl_attrs_are_visible,
> +};
> +
>  static int __init pcie_aspm_disable(char *str)
>  {
>  	if (!strcmp(str, "off")) {
> -- 
> 2.23.0
> 
> 
>
Heiner Kallweit Sept. 29, 2019, 5:15 p.m. UTC | #2
On 07.09.2019 22:32, Bjorn Helgaas wrote:
> On Sat, Aug 31, 2019 at 10:20:47PM +0200, Heiner Kallweit wrote:
>> Background of this extension is a problem with the r8169 network driver.
>> Several combinations of board chipsets and network chip versions have
>> problems if ASPM is enabled, therefore we have to disable ASPM per default.
>> However especially on notebooks ASPM can provide significant power-saving,
>> therefore we want to give users the option to enable ASPM. With the new
>> sysfs attributes users can control which ASPM link-states are
>> enabled/disabled.
>>
>> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
>> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
>> ---
>> v2:
>> - use a dedicated sysfs attribute per link state
>> - allow separate control of ASPM and PCI PM L1 sub-states
>> v3:
>> - statically allocate the attribute group
>> - replace snprintf with printf
>> - base on top of "PCI: Make pcie_downstream_port() available outside of access.c"
>> v4:
>> - add call to sysfs_update_group because is_visible callback returns false
>>   always at file creation time
>> - simplify code a little
>> v5:
>> - rebased to latest pci/next
>> ---
>>  Documentation/ABI/testing/sysfs-bus-pci |  13 ++
>>  drivers/pci/pci-sysfs.c                 |   7 +
>>  drivers/pci/pci.h                       |   4 +
>>  drivers/pci/pcie/aspm.c                 | 184 ++++++++++++++++++++++++
>>  4 files changed, 208 insertions(+)
>>
>> diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
>> index 8bfee557e..49249a165 100644
>> --- a/Documentation/ABI/testing/sysfs-bus-pci
>> +++ b/Documentation/ABI/testing/sysfs-bus-pci
>> @@ -347,3 +347,16 @@ Description:
>>  		If the device has any Peer-to-Peer memory registered, this
>>  	        file contains a '1' if the memory has been published for
>>  		use outside the driver that owns the device.
>> +
>> +What		/sys/bus/pci/devices/.../aspm/aspm_l0s
>> +What		/sys/bus/pci/devices/.../aspm/aspm_l1
>> +What		/sys/bus/pci/devices/.../aspm/aspm_l1_1
>> +What		/sys/bus/pci/devices/.../aspm/aspm_l1_2
>> +What		/sys/bus/pci/devices/.../aspm/aspm_l1_1_pcipm
>> +What		/sys/bus/pci/devices/.../aspm/aspm_l1_2_pcipm
>> +What		/sys/bus/pci/devices/.../aspm/aspm_clkpm
>> +date:		August 2019
> 
> Other entries use "What:" and "Date:" (add colon and capitalize).
> 
> There are no examples in *this* file, but in
> Documentation/ABI/testing/sysfs-bus-pci-drivers-ehci_hcd,
> the "What:" is not repeated for each file in the group.
> 
>> +Contact:	Heiner Kallweit <hkallweit1@gmail.com>
>> +Description:	If ASPM is supported for an endpoint, then these files
>> +		can be used to disable or enable the individual
>> +		power management states.
> 
> Please mention the specific details here, e.g., "write 1 to enable, 0
> to disable".
> 
>> diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
>> index 868e35109..687240f55 100644
>> --- a/drivers/pci/pci-sysfs.c
>> +++ b/drivers/pci/pci-sysfs.c
>> @@ -1315,6 +1315,10 @@ static int pci_create_capabilities_sysfs(struct pci_dev *dev)
>>  
>>  	pcie_vpd_create_sysfs_dev_files(dev);
>>  	pcie_aspm_create_sysfs_dev_files(dev);
>> +#ifdef CONFIG_PCIEASPM
>> +	/* update visibility of attributes in this group */
>> +	sysfs_update_group(&dev->dev.kobj, &aspm_ctrl_attr_group);
>> +#endif
> 
> Isn't there a way to do this in drivers/pci/pcie/aspm.c somehow,
> without using sysfs_update_group()?  There are only three callers of
> it in the tree, and I'd be surprised if ASPM is unique enough to have
> to be the fourth.
> 
At least I didn't find any. Reason seems to be the following:
Static sysfs files are created in pci_scan_single_device ->
pci_device_add. And pci_scan_slot calls pci_scan_single_device
before calling pcie_aspm_init_link_state(bus->self).
Means the pcie_link_state doesn't exist yet and we have to update
visibility of the ASPM sysfs files later.
I'd be happy if I could avoid this visibility update exercise.

>>  	if (dev->reset_fn) {
>>  		retval = device_create_file(&dev->dev, &dev_attr_reset);
>> @@ -1571,6 +1575,9 @@ static const struct attribute_group *pci_dev_attr_groups[] = {
>>  	&pcie_dev_attr_group,
>>  #ifdef CONFIG_PCIEAER
>>  	&aer_stats_attr_group,
>> +#endif
>> +#ifdef CONFIG_PCIEASPM
>> +	&aspm_ctrl_attr_group,
>>  #endif
>>  	NULL,
>>  };
>> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
>> index 44b80186d..9dc3e3673 100644
>> --- a/drivers/pci/pci.h
>> +++ b/drivers/pci/pci.h
>> @@ -659,4 +659,8 @@ static inline int pci_acpi_program_hp_params(struct pci_dev *dev)
>>  }
>>  #endif
>>  
>> +#ifdef CONFIG_PCIEASPM
>> +extern const struct attribute_group aspm_ctrl_attr_group;
>> +#endif
>> +
>>  #endif /* DRIVERS_PCI_H */
>> diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
>> index f044ae4d1..ce3425125 100644
>> --- a/drivers/pci/pcie/aspm.c
>> +++ b/drivers/pci/pcie/aspm.c
>> @@ -1287,6 +1287,190 @@ void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
>>  }
>>  #endif
>>  
>> +static struct pcie_link_state *aspm_get_parent_link(struct pci_dev *pdev)
> 
> I know the ASPM code is pretty confused, but I don't think "parent
> link" really makes sense.  "Parent" implies a parent/child
> relationship, but a link doesn't have a parent or a child; it only has
> an upstream end and a downstream end.
> 
I basically copied this "parent" stuff from __pci_disable_link_state.
Fine with me to change the naming.
What confuses me a little is that we have different versions of getting
the pcie_link_state for a pci_dev in:

- this new function of mine
- __pci_disable_link_state
- pcie_aspm_enabled

The latter uses pci_upstream_bridge instead of accessing pdev->bus->self
directly and doesn't include the call to pcie_downstream_port.
I wonder whether the functionality could be factored out to a generic
helper that works in all these places.

> Anyway, any given PCIe device has either zero or one link associated
> with it, so something like "aspm_get_link()" would be unambiguous all
> by itself.
> 
>> +{
>> +	struct pci_dev *parent = pdev->bus->self;
>> +
>> +	if (pcie_downstream_port(pdev))
>> +		parent = pdev;
>> +
>> +	return parent ? parent->link_state : NULL;
>> +}
>> +
>> +static bool pcie_check_valid_aspm_endpoint(struct pci_dev *pdev)
> 
> Maybe "pcie_is_aspm_dev()" or similar?  I think we may want to include
> more than just endpoints (see below).  "Check" in function names is a
> pet peeve of mine because it doesn't tell us whether it's a pure
> function (as this is) or it has side effects, and it doesn't give a
> hint about what the sense of the return value is.
> 
>> +{
>> +	struct pcie_link_state *link;
>> +
>> +	if (!pci_is_pcie(pdev) || pci_pcie_type(pdev) != PCI_EXP_TYPE_ENDPOINT)
> 
> Do you intend to exclude other Upstream Ports like Legacy Endpoints,
> Upstream Switch Ports, and PCIe-to-PCI/PCI-X Bridges?  They also have
> a link leading to them, so we might want them to have knobs as well.
> Or if we don't want the knobs, a comment about why not would be
> useful.
> 
My use case is about endpoints only and I'm not really a PCI expert.
Based on your list in addition to PCI_EXP_TYPE_ENDPOINT we'd enable
the ASPM sysfs fils for:
- PCI_EXP_TYPE_LEG_END
- PCI_EXP_TYPE_UPSTREAM
- PCI_EXP_TYPE_PCI_BRIDGE
- PCI_EXP_TYPE_PCIE_BRIDGE
If you can confirm the list I'd extend my patch accordingly.

[...]
Bjorn Helgaas Oct. 2, 2019, 7:55 p.m. UTC | #3
On Sun, Sep 29, 2019 at 07:15:05PM +0200, Heiner Kallweit wrote:
> On 07.09.2019 22:32, Bjorn Helgaas wrote:
> > On Sat, Aug 31, 2019 at 10:20:47PM +0200, Heiner Kallweit wrote:
> >> Background of this extension is a problem with the r8169 network driver.
> >> Several combinations of board chipsets and network chip versions have
> >> problems if ASPM is enabled, therefore we have to disable ASPM per default.
> >> However especially on notebooks ASPM can provide significant power-saving,
> >> therefore we want to give users the option to enable ASPM. With the new
> >> sysfs attributes users can control which ASPM link-states are
> >> enabled/disabled.
> >>
> >> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
> >> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> >> ---
> >> v2:
> >> - use a dedicated sysfs attribute per link state
> >> - allow separate control of ASPM and PCI PM L1 sub-states
> >> v3:
> >> - statically allocate the attribute group
> >> - replace snprintf with printf
> >> - base on top of "PCI: Make pcie_downstream_port() available outside of access.c"
> >> v4:
> >> - add call to sysfs_update_group because is_visible callback returns false
> >>   always at file creation time
> >> - simplify code a little
> >> v5:
> >> - rebased to latest pci/next
> >> ---
> >>  Documentation/ABI/testing/sysfs-bus-pci |  13 ++
> >>  drivers/pci/pci-sysfs.c                 |   7 +
> >>  drivers/pci/pci.h                       |   4 +
> >>  drivers/pci/pcie/aspm.c                 | 184 ++++++++++++++++++++++++
> >>  4 files changed, 208 insertions(+)
> >>
> >> diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
> >> index 8bfee557e..49249a165 100644
> >> --- a/Documentation/ABI/testing/sysfs-bus-pci
> >> +++ b/Documentation/ABI/testing/sysfs-bus-pci
> >> @@ -347,3 +347,16 @@ Description:
> >>  		If the device has any Peer-to-Peer memory registered, this
> >>  	        file contains a '1' if the memory has been published for
> >>  		use outside the driver that owns the device.
> >> +
> >> +What		/sys/bus/pci/devices/.../aspm/aspm_l0s
> >> +What		/sys/bus/pci/devices/.../aspm/aspm_l1
> >> +What		/sys/bus/pci/devices/.../aspm/aspm_l1_1
> >> +What		/sys/bus/pci/devices/.../aspm/aspm_l1_2
> >> +What		/sys/bus/pci/devices/.../aspm/aspm_l1_1_pcipm
> >> +What		/sys/bus/pci/devices/.../aspm/aspm_l1_2_pcipm
> >> +What		/sys/bus/pci/devices/.../aspm/aspm_clkpm
> >> +date:		August 2019

I didn't notice this before, but I wonder if one "aspm" in these paths
would be enough?  E.g., /sys/bus/pci/devices/.../aspm/l0s?

> >> @@ -1315,6 +1315,10 @@ static int pci_create_capabilities_sysfs(struct pci_dev *dev)
> >>  
> >>  	pcie_vpd_create_sysfs_dev_files(dev);
> >>  	pcie_aspm_create_sysfs_dev_files(dev);
> >> +#ifdef CONFIG_PCIEASPM
> >> +	/* update visibility of attributes in this group */
> >> +	sysfs_update_group(&dev->dev.kobj, &aspm_ctrl_attr_group);
> >> +#endif
> > 
> > Isn't there a way to do this in drivers/pci/pcie/aspm.c somehow,
> > without using sysfs_update_group()?  There are only three callers of
> > it in the tree, and I'd be surprised if ASPM is unique enough to have
> > to be the fourth.
> > 
> At least I didn't find any. Reason seems to be the following:
> Static sysfs files are created in pci_scan_single_device ->
> pci_device_add. And pci_scan_slot calls pci_scan_single_device
> before calling pcie_aspm_init_link_state(bus->self).
> Means the pcie_link_state doesn't exist yet and we have to update
> visibility of the ASPM sysfs files later.

Ah, I see.  I think it's this call graph:

  pci_scan_slot
    pci_scan_single_device
      pci_scan_device
      pci_device_add
	pci_init_capabilities
	device_add
	  device_add_attrs
	    device_add_groups(dev->type->groups)
	      sysfs_create_groups         # <-- sysfs files created
    pcie_aspm_init_link_state(bridge)     # <-- link_states allocated

I think this part of the ASPM code is a little bit broken -- we wait
to initialize ASPM until we've enumerated all the devices on the link.
I think it would be better to initialize it somewhere in
pci_device_add(), maybe pci_init_capabilities(), which would solve
this ordering problem.  That's a pretty big project that can be done
later.

But I *think* we should be able to at least move the
sysfs_update_group() to the end of pcie_aspm_init_link_state().  We'd
have to iterate over the subordinate->devices, but it would at least
be in the ASPM code where we'll see it if/when we rework the
initialization.

> >> +static struct pcie_link_state *aspm_get_parent_link(struct pci_dev *pdev)
> > 
> > I know the ASPM code is pretty confused, but I don't think "parent
> > link" really makes sense.  "Parent" implies a parent/child
> > relationship, but a link doesn't have a parent or a child; it only has
> > an upstream end and a downstream end.
> > 
> I basically copied this "parent" stuff from __pci_disable_link_state.
> Fine with me to change the naming.
> What confuses me a little is that we have different versions of getting
> the pcie_link_state for a pci_dev in:
> 
> - this new function of mine
> - __pci_disable_link_state
> - pcie_aspm_enabled
> 
> The latter uses pci_upstream_bridge instead of accessing pdev->bus->self
> directly and doesn't include the call to pcie_downstream_port.
> I wonder whether the functionality could be factored out to a generic
> helper that works in all these places.

Definitely.  I think your pcie_aspm_get_link() (from the v6 patch)
could be used directly in those places.  You could add a new patch
that just adds pcie_aspm_get_link() and uses it.

> >> +{
> >> +	struct pci_dev *parent = pdev->bus->self;
> >> +
> >> +	if (pcie_downstream_port(pdev))
> >> +		parent = pdev;
> >> +
> >> +	return parent ? parent->link_state : NULL;
> >> +}
> >> +
> >> +static bool pcie_check_valid_aspm_endpoint(struct pci_dev *pdev)
> >> +{
> >> +	struct pcie_link_state *link;
> >> +
> >> +	if (!pci_is_pcie(pdev) || pci_pcie_type(pdev) != PCI_EXP_TYPE_ENDPOINT)
> > 
> > Do you intend to exclude other Upstream Ports like Legacy Endpoints,
> > Upstream Switch Ports, and PCIe-to-PCI/PCI-X Bridges?  They also have
> > a link leading to them, so we might want them to have knobs as well.
> > Or if we don't want the knobs, a comment about why not would be
> > useful.
> > 
> My use case is about endpoints only and I'm not really a PCI expert.
> Based on your list in addition to PCI_EXP_TYPE_ENDPOINT we'd enable
> the ASPM sysfs fils for:
> - PCI_EXP_TYPE_LEG_END
> - PCI_EXP_TYPE_UPSTREAM
> - PCI_EXP_TYPE_PCI_BRIDGE
> - PCI_EXP_TYPE_PCIE_BRIDGE
> If you can confirm the list I'd extend my patch accordingly.

Yes, I think the list would be right, but looking at this again, I
don't think you need this function at all -- you can just use
pcie_aspm_get_link().  Then aspm_ctrl_attrs_are_visible() uses exactly
the same test as the show/store functions.  Actually, I think then you
could omit the "if (!link)" tests from the show/store functions
because those functions can never be called unless
aspm_ctrl_attrs_are_visible() found a link.

Bjorn
Heiner Kallweit Oct. 2, 2019, 9:10 p.m. UTC | #4
On 02.10.2019 21:55, Bjorn Helgaas wrote:
> On Sun, Sep 29, 2019 at 07:15:05PM +0200, Heiner Kallweit wrote:
>> On 07.09.2019 22:32, Bjorn Helgaas wrote:
>>> On Sat, Aug 31, 2019 at 10:20:47PM +0200, Heiner Kallweit wrote:
>>>> Background of this extension is a problem with the r8169 network driver.
>>>> Several combinations of board chipsets and network chip versions have
>>>> problems if ASPM is enabled, therefore we have to disable ASPM per default.
>>>> However especially on notebooks ASPM can provide significant power-saving,
>>>> therefore we want to give users the option to enable ASPM. With the new
>>>> sysfs attributes users can control which ASPM link-states are
>>>> enabled/disabled.
>>>>
>>>> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
>>>> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
>>>> ---
>>>> v2:
>>>> - use a dedicated sysfs attribute per link state
>>>> - allow separate control of ASPM and PCI PM L1 sub-states
>>>> v3:
>>>> - statically allocate the attribute group
>>>> - replace snprintf with printf
>>>> - base on top of "PCI: Make pcie_downstream_port() available outside of access.c"
>>>> v4:
>>>> - add call to sysfs_update_group because is_visible callback returns false
>>>>   always at file creation time
>>>> - simplify code a little
>>>> v5:
>>>> - rebased to latest pci/next
>>>> ---
>>>>  Documentation/ABI/testing/sysfs-bus-pci |  13 ++
>>>>  drivers/pci/pci-sysfs.c                 |   7 +
>>>>  drivers/pci/pci.h                       |   4 +
>>>>  drivers/pci/pcie/aspm.c                 | 184 ++++++++++++++++++++++++
>>>>  4 files changed, 208 insertions(+)
>>>>
>>>> diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
>>>> index 8bfee557e..49249a165 100644
>>>> --- a/Documentation/ABI/testing/sysfs-bus-pci
>>>> +++ b/Documentation/ABI/testing/sysfs-bus-pci
>>>> @@ -347,3 +347,16 @@ Description:
>>>>  		If the device has any Peer-to-Peer memory registered, this
>>>>  	        file contains a '1' if the memory has been published for
>>>>  		use outside the driver that owns the device.
>>>> +
>>>> +What		/sys/bus/pci/devices/.../aspm/aspm_l0s
>>>> +What		/sys/bus/pci/devices/.../aspm/aspm_l1
>>>> +What		/sys/bus/pci/devices/.../aspm/aspm_l1_1
>>>> +What		/sys/bus/pci/devices/.../aspm/aspm_l1_2
>>>> +What		/sys/bus/pci/devices/.../aspm/aspm_l1_1_pcipm
>>>> +What		/sys/bus/pci/devices/.../aspm/aspm_l1_2_pcipm
>>>> +What		/sys/bus/pci/devices/.../aspm/aspm_clkpm
>>>> +date:		August 2019
> 
> I didn't notice this before, but I wonder if one "aspm" in these paths
> would be enough?  E.g., /sys/bus/pci/devices/.../aspm/l0s?
> 
Yes, that should be fine.

>>>> @@ -1315,6 +1315,10 @@ static int pci_create_capabilities_sysfs(struct pci_dev *dev)
>>>>  
>>>>  	pcie_vpd_create_sysfs_dev_files(dev);
>>>>  	pcie_aspm_create_sysfs_dev_files(dev);
>>>> +#ifdef CONFIG_PCIEASPM
>>>> +	/* update visibility of attributes in this group */
>>>> +	sysfs_update_group(&dev->dev.kobj, &aspm_ctrl_attr_group);
>>>> +#endif
>>>
>>> Isn't there a way to do this in drivers/pci/pcie/aspm.c somehow,
>>> without using sysfs_update_group()?  There are only three callers of
>>> it in the tree, and I'd be surprised if ASPM is unique enough to have
>>> to be the fourth.
>>>
>> At least I didn't find any. Reason seems to be the following:
>> Static sysfs files are created in pci_scan_single_device ->
>> pci_device_add. And pci_scan_slot calls pci_scan_single_device
>> before calling pcie_aspm_init_link_state(bus->self).
>> Means the pcie_link_state doesn't exist yet and we have to update
>> visibility of the ASPM sysfs files later.
> 
> Ah, I see.  I think it's this call graph:
> 
>   pci_scan_slot
>     pci_scan_single_device
>       pci_scan_device
>       pci_device_add
> 	pci_init_capabilities
> 	device_add
> 	  device_add_attrs
> 	    device_add_groups(dev->type->groups)
> 	      sysfs_create_groups         # <-- sysfs files created
>     pcie_aspm_init_link_state(bridge)     # <-- link_states allocated
> 
> I think this part of the ASPM code is a little bit broken -- we wait
> to initialize ASPM until we've enumerated all the devices on the link.
> I think it would be better to initialize it somewhere in
> pci_device_add(), maybe pci_init_capabilities(), which would solve
> this ordering problem.  That's a pretty big project that can be done
> later.
> 
> But I *think* we should be able to at least move the
> sysfs_update_group() to the end of pcie_aspm_init_link_state().  We'd
> have to iterate over the subordinate->devices, but it would at least
> be in the ASPM code where we'll see it if/when we rework the
> initialization.
> 

OK

>>>> +static struct pcie_link_state *aspm_get_parent_link(struct pci_dev *pdev)
>>>
>>> I know the ASPM code is pretty confused, but I don't think "parent
>>> link" really makes sense.  "Parent" implies a parent/child
>>> relationship, but a link doesn't have a parent or a child; it only has
>>> an upstream end and a downstream end.
>>>
>> I basically copied this "parent" stuff from __pci_disable_link_state.
>> Fine with me to change the naming.
>> What confuses me a little is that we have different versions of getting
>> the pcie_link_state for a pci_dev in:
>>
>> - this new function of mine
>> - __pci_disable_link_state
>> - pcie_aspm_enabled
>>
>> The latter uses pci_upstream_bridge instead of accessing pdev->bus->self
>> directly and doesn't include the call to pcie_downstream_port.
>> I wonder whether the functionality could be factored out to a generic
>> helper that works in all these places.
> 
> Definitely.  I think your pcie_aspm_get_link() (from the v6 patch)
> could be used directly in those places.  You could add a new patch
> that just adds pcie_aspm_get_link() and uses it.
> 

OK

>>>> +{
>>>> +	struct pci_dev *parent = pdev->bus->self;
>>>> +
>>>> +	if (pcie_downstream_port(pdev))
>>>> +		parent = pdev;
>>>> +
>>>> +	return parent ? parent->link_state : NULL;
>>>> +}
>>>> +
>>>> +static bool pcie_check_valid_aspm_endpoint(struct pci_dev *pdev)
>>>> +{
>>>> +	struct pcie_link_state *link;
>>>> +
>>>> +	if (!pci_is_pcie(pdev) || pci_pcie_type(pdev) != PCI_EXP_TYPE_ENDPOINT)
>>>
>>> Do you intend to exclude other Upstream Ports like Legacy Endpoints,
>>> Upstream Switch Ports, and PCIe-to-PCI/PCI-X Bridges?  They also have
>>> a link leading to them, so we might want them to have knobs as well.
>>> Or if we don't want the knobs, a comment about why not would be
>>> useful.
>>>
>> My use case is about endpoints only and I'm not really a PCI expert.
>> Based on your list in addition to PCI_EXP_TYPE_ENDPOINT we'd enable
>> the ASPM sysfs fils for:
>> - PCI_EXP_TYPE_LEG_END
>> - PCI_EXP_TYPE_UPSTREAM
>> - PCI_EXP_TYPE_PCI_BRIDGE
>> - PCI_EXP_TYPE_PCIE_BRIDGE
>> If you can confirm the list I'd extend my patch accordingly.
> 
> Yes, I think the list would be right, but looking at this again, I
> don't think you need this function at all -- you can just use
> pcie_aspm_get_link().  Then aspm_ctrl_attrs_are_visible() uses exactly
> the same test as the show/store functions.  Actually, I think then you
> could omit the "if (!link)" tests from the show/store functions
> because those functions can never be called unless
> aspm_ctrl_attrs_are_visible() found a link.
> 
Right, the !link checks can be removed from the show/store functions.
In pcie_is_aspm_dev() I think we need to check at least whether
device is PCIe and whether link is ASPM-capable. Making the sysfs
attributes visible for a non-PCIe device doesn't make sense,
the same applies to PCIe devices with a link that is not ASPM-capable.

> Bjorn
> 
Heiner
Bjorn Helgaas Oct. 2, 2019, 10:10 p.m. UTC | #5
On Wed, Oct 02, 2019 at 11:10:55PM +0200, Heiner Kallweit wrote:
> On 02.10.2019 21:55, Bjorn Helgaas wrote:
> > On Sun, Sep 29, 2019 at 07:15:05PM +0200, Heiner Kallweit wrote:
> >> On 07.09.2019 22:32, Bjorn Helgaas wrote:
> >>> On Sat, Aug 31, 2019 at 10:20:47PM +0200, Heiner Kallweit wrote:

> >>>> +static struct pcie_link_state *aspm_get_parent_link(struct pci_dev *pdev)
> >>>
> >>> I know the ASPM code is pretty confused, but I don't think "parent
> >>> link" really makes sense.  "Parent" implies a parent/child
> >>> relationship, but a link doesn't have a parent or a child; it only has
> >>> an upstream end and a downstream end.
> >>>
> >> I basically copied this "parent" stuff from __pci_disable_link_state.
> >> Fine with me to change the naming.
> >> What confuses me a little is that we have different versions of getting
> >> the pcie_link_state for a pci_dev in:
> >>
> >> - this new function of mine
> >> - __pci_disable_link_state
> >> - pcie_aspm_enabled
> >>
> >> The latter uses pci_upstream_bridge instead of accessing pdev->bus->self
> >> directly and doesn't include the call to pcie_downstream_port.
> >> I wonder whether the functionality could be factored out to a generic
> >> helper that works in all these places.
> > 
> > Definitely.  I think your pcie_aspm_get_link() (from the v6 patch)
> > could be used directly in those places.  You could add a new patch
> > that just adds pcie_aspm_get_link() and uses it.
> > 
> 
> OK
> 
> >>>> +{
> >>>> +	struct pci_dev *parent = pdev->bus->self;
> >>>> +
> >>>> +	if (pcie_downstream_port(pdev))
> >>>> +		parent = pdev;
> >>>> +
> >>>> +	return parent ? parent->link_state : NULL;
> >>>> +}
> >>>> +
> >>>> +static bool pcie_check_valid_aspm_endpoint(struct pci_dev *pdev)
> >>>> +{
> >>>> +	struct pcie_link_state *link;
> >>>> +
> >>>> +	if (!pci_is_pcie(pdev) || pci_pcie_type(pdev) != PCI_EXP_TYPE_ENDPOINT)
> >>>
> >>> Do you intend to exclude other Upstream Ports like Legacy Endpoints,
> >>> Upstream Switch Ports, and PCIe-to-PCI/PCI-X Bridges?  They also have
> >>> a link leading to them, so we might want them to have knobs as well.
> >>> Or if we don't want the knobs, a comment about why not would be
> >>> useful.
> >>>
> >> My use case is about endpoints only and I'm not really a PCI expert.
> >> Based on your list in addition to PCI_EXP_TYPE_ENDPOINT we'd enable
> >> the ASPM sysfs fils for:
> >> - PCI_EXP_TYPE_LEG_END
> >> - PCI_EXP_TYPE_UPSTREAM
> >> - PCI_EXP_TYPE_PCI_BRIDGE
> >> - PCI_EXP_TYPE_PCIE_BRIDGE
> >> If you can confirm the list I'd extend my patch accordingly.
> > 
> > Yes, I think the list would be right, but looking at this again, I
> > don't think you need this function at all -- you can just use
> > pcie_aspm_get_link().  Then aspm_ctrl_attrs_are_visible() uses exactly
> > the same test as the show/store functions.  Actually, I think then you
> > could omit the "if (!link)" tests from the show/store functions
> > because those functions can never be called unless
> > aspm_ctrl_attrs_are_visible() found a link.
> > 
> Right, the !link checks can be removed from the show/store functions.
> In pcie_is_aspm_dev() I think we need to check at least whether
> device is PCIe and whether link is ASPM-capable. Making the sysfs
> attributes visible for a non-PCIe device doesn't make sense,
> the same applies to PCIe devices with a link that is not ASPM-capable.

I agree we don't want these attributes visible for non-PCIe or
non-ASPM-capable situations, but I think you can do this:

  static struct pcie_link_state *pcie_aspm_get_link(struct pci_dev *pdev)
  {
    struct pci_dev *bridge = pci_upstream_bridge(pdev);

    if (bridge)
      return bridge->link_state;

    return NULL;
  }

  static umode_t aspm_ctrl_attrs_are_visible(...)
  {
    ...
    if (pcie_aspm_get_link(pdev))
      return a->mode;

    return 0;
  }

We can rely on pcie_aspm_init_link_state() to only set
bridge->link_state if the devices on both ends of the link are PCIe
and support ASPM.
Heiner Kallweit Oct. 2, 2019, 10:23 p.m. UTC | #6
On 03.10.2019 00:10, Bjorn Helgaas wrote:
> On Wed, Oct 02, 2019 at 11:10:55PM +0200, Heiner Kallweit wrote:
>> On 02.10.2019 21:55, Bjorn Helgaas wrote:
>>> On Sun, Sep 29, 2019 at 07:15:05PM +0200, Heiner Kallweit wrote:
>>>> On 07.09.2019 22:32, Bjorn Helgaas wrote:
>>>>> On Sat, Aug 31, 2019 at 10:20:47PM +0200, Heiner Kallweit wrote:
> 
>>>>>> +static struct pcie_link_state *aspm_get_parent_link(struct pci_dev *pdev)
>>>>>
>>>>> I know the ASPM code is pretty confused, but I don't think "parent
>>>>> link" really makes sense.  "Parent" implies a parent/child
>>>>> relationship, but a link doesn't have a parent or a child; it only has
>>>>> an upstream end and a downstream end.
>>>>>
>>>> I basically copied this "parent" stuff from __pci_disable_link_state.
>>>> Fine with me to change the naming.
>>>> What confuses me a little is that we have different versions of getting
>>>> the pcie_link_state for a pci_dev in:
>>>>
>>>> - this new function of mine
>>>> - __pci_disable_link_state
>>>> - pcie_aspm_enabled
>>>>
>>>> The latter uses pci_upstream_bridge instead of accessing pdev->bus->self
>>>> directly and doesn't include the call to pcie_downstream_port.
>>>> I wonder whether the functionality could be factored out to a generic
>>>> helper that works in all these places.
>>>
>>> Definitely.  I think your pcie_aspm_get_link() (from the v6 patch)
>>> could be used directly in those places.  You could add a new patch
>>> that just adds pcie_aspm_get_link() and uses it.
>>>
>>
>> OK
>>
>>>>>> +{
>>>>>> +	struct pci_dev *parent = pdev->bus->self;
>>>>>> +
>>>>>> +	if (pcie_downstream_port(pdev))
>>>>>> +		parent = pdev;
>>>>>> +
>>>>>> +	return parent ? parent->link_state : NULL;
>>>>>> +}
>>>>>> +
>>>>>> +static bool pcie_check_valid_aspm_endpoint(struct pci_dev *pdev)
>>>>>> +{
>>>>>> +	struct pcie_link_state *link;
>>>>>> +
>>>>>> +	if (!pci_is_pcie(pdev) || pci_pcie_type(pdev) != PCI_EXP_TYPE_ENDPOINT)
>>>>>
>>>>> Do you intend to exclude other Upstream Ports like Legacy Endpoints,
>>>>> Upstream Switch Ports, and PCIe-to-PCI/PCI-X Bridges?  They also have
>>>>> a link leading to them, so we might want them to have knobs as well.
>>>>> Or if we don't want the knobs, a comment about why not would be
>>>>> useful.
>>>>>
>>>> My use case is about endpoints only and I'm not really a PCI expert.
>>>> Based on your list in addition to PCI_EXP_TYPE_ENDPOINT we'd enable
>>>> the ASPM sysfs fils for:
>>>> - PCI_EXP_TYPE_LEG_END
>>>> - PCI_EXP_TYPE_UPSTREAM
>>>> - PCI_EXP_TYPE_PCI_BRIDGE
>>>> - PCI_EXP_TYPE_PCIE_BRIDGE
>>>> If you can confirm the list I'd extend my patch accordingly.
>>>
>>> Yes, I think the list would be right, but looking at this again, I
>>> don't think you need this function at all -- you can just use
>>> pcie_aspm_get_link().  Then aspm_ctrl_attrs_are_visible() uses exactly
>>> the same test as the show/store functions.  Actually, I think then you
>>> could omit the "if (!link)" tests from the show/store functions
>>> because those functions can never be called unless
>>> aspm_ctrl_attrs_are_visible() found a link.
>>>
>> Right, the !link checks can be removed from the show/store functions.
>> In pcie_is_aspm_dev() I think we need to check at least whether
>> device is PCIe and whether link is ASPM-capable. Making the sysfs
>> attributes visible for a non-PCIe device doesn't make sense,
>> the same applies to PCIe devices with a link that is not ASPM-capable.
> 
> I agree we don't want these attributes visible for non-PCIe or
> non-ASPM-capable situations, but I think you can do this:
> 
>   static struct pcie_link_state *pcie_aspm_get_link(struct pci_dev *pdev)
>   {
>     struct pci_dev *bridge = pci_upstream_bridge(pdev);
> 
>     if (bridge)
>       return bridge->link_state;
> 
>     return NULL;
>   }
> 
>   static umode_t aspm_ctrl_attrs_are_visible(...)
>   {
>     ...
>     if (pcie_aspm_get_link(pdev))
>       return a->mode;
> 
>     return 0;
>   }
> 
> We can rely on pcie_aspm_init_link_state() to only set
> bridge->link_state if the devices on both ends of the link are PCIe
> and support ASPM.
> 
With the first one I agree. However there may be links where e.g. the
bridge doesn't support ASPM. One example is my small Zotac test system:

Intel Corporation Celeron N3350/Pentium N4200/Atom E3900 Series PCI Express Port
 LnkCap: Port #3, Speed 5GT/s, Width x1, ASPM not supported
Heiner Kallweit Oct. 3, 2019, 2:15 p.m. UTC | #7
On 03.10.2019 00:23, Heiner Kallweit wrote:
> On 03.10.2019 00:10, Bjorn Helgaas wrote:
>> On Wed, Oct 02, 2019 at 11:10:55PM +0200, Heiner Kallweit wrote:
>>> On 02.10.2019 21:55, Bjorn Helgaas wrote:
>>>> On Sun, Sep 29, 2019 at 07:15:05PM +0200, Heiner Kallweit wrote:
>>>>> On 07.09.2019 22:32, Bjorn Helgaas wrote:
>>>>>> On Sat, Aug 31, 2019 at 10:20:47PM +0200, Heiner Kallweit wrote:
>>
>>>>>>> +static struct pcie_link_state *aspm_get_parent_link(struct pci_dev *pdev)
>>>>>>
>>>>>> I know the ASPM code is pretty confused, but I don't think "parent
>>>>>> link" really makes sense.  "Parent" implies a parent/child
>>>>>> relationship, but a link doesn't have a parent or a child; it only has
>>>>>> an upstream end and a downstream end.
>>>>>>
>>>>> I basically copied this "parent" stuff from __pci_disable_link_state.
>>>>> Fine with me to change the naming.
>>>>> What confuses me a little is that we have different versions of getting
>>>>> the pcie_link_state for a pci_dev in:
>>>>>
>>>>> - this new function of mine
>>>>> - __pci_disable_link_state
>>>>> - pcie_aspm_enabled
>>>>>
>>>>> The latter uses pci_upstream_bridge instead of accessing pdev->bus->self
>>>>> directly and doesn't include the call to pcie_downstream_port.
>>>>> I wonder whether the functionality could be factored out to a generic
>>>>> helper that works in all these places.
>>>>
>>>> Definitely.  I think your pcie_aspm_get_link() (from the v6 patch)
>>>> could be used directly in those places.  You could add a new patch
>>>> that just adds pcie_aspm_get_link() and uses it.
>>>>
>>>
>>> OK
>>>
>>>>>>> +{
>>>>>>> +	struct pci_dev *parent = pdev->bus->self;
>>>>>>> +
>>>>>>> +	if (pcie_downstream_port(pdev))
>>>>>>> +		parent = pdev;
>>>>>>> +
>>>>>>> +	return parent ? parent->link_state : NULL;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static bool pcie_check_valid_aspm_endpoint(struct pci_dev *pdev)
>>>>>>> +{
>>>>>>> +	struct pcie_link_state *link;
>>>>>>> +
>>>>>>> +	if (!pci_is_pcie(pdev) || pci_pcie_type(pdev) != PCI_EXP_TYPE_ENDPOINT)
>>>>>>
>>>>>> Do you intend to exclude other Upstream Ports like Legacy Endpoints,
>>>>>> Upstream Switch Ports, and PCIe-to-PCI/PCI-X Bridges?  They also have
>>>>>> a link leading to them, so we might want them to have knobs as well.
>>>>>> Or if we don't want the knobs, a comment about why not would be
>>>>>> useful.
>>>>>>
>>>>> My use case is about endpoints only and I'm not really a PCI expert.
>>>>> Based on your list in addition to PCI_EXP_TYPE_ENDPOINT we'd enable
>>>>> the ASPM sysfs fils for:
>>>>> - PCI_EXP_TYPE_LEG_END
>>>>> - PCI_EXP_TYPE_UPSTREAM
>>>>> - PCI_EXP_TYPE_PCI_BRIDGE
>>>>> - PCI_EXP_TYPE_PCIE_BRIDGE
>>>>> If you can confirm the list I'd extend my patch accordingly.
>>>>
>>>> Yes, I think the list would be right, but looking at this again, I
>>>> don't think you need this function at all -- you can just use
>>>> pcie_aspm_get_link().  Then aspm_ctrl_attrs_are_visible() uses exactly
>>>> the same test as the show/store functions.  Actually, I think then you
>>>> could omit the "if (!link)" tests from the show/store functions
>>>> because those functions can never be called unless
>>>> aspm_ctrl_attrs_are_visible() found a link.
>>>>
>>> Right, the !link checks can be removed from the show/store functions.
>>> In pcie_is_aspm_dev() I think we need to check at least whether
>>> device is PCIe and whether link is ASPM-capable. Making the sysfs
>>> attributes visible for a non-PCIe device doesn't make sense,
>>> the same applies to PCIe devices with a link that is not ASPM-capable.
>>
>> I agree we don't want these attributes visible for non-PCIe or
>> non-ASPM-capable situations, but I think you can do this:
>>
>>   static struct pcie_link_state *pcie_aspm_get_link(struct pci_dev *pdev)
>>   {
>>     struct pci_dev *bridge = pci_upstream_bridge(pdev);
>>
>>     if (bridge)
>>       return bridge->link_state;
>>
>>     return NULL;
>>   }
>>
>>   static umode_t aspm_ctrl_attrs_are_visible(...)
>>   {
>>     ...
>>     if (pcie_aspm_get_link(pdev))
>>       return a->mode;
>>
>>     return 0;
>>   }
>>
>> We can rely on pcie_aspm_init_link_state() to only set
>> bridge->link_state if the devices on both ends of the link are PCIe
>> and support ASPM.
>>
> With the first one I agree. However there may be links where e.g. the
> bridge doesn't support ASPM. One example is my small Zotac test system:
> 
> Intel Corporation Celeron N3350/Pentium N4200/Atom E3900 Series PCI Express Port
>  LnkCap: Port #3, Speed 5GT/s, Width x1, ASPM not supported
> 

After thinking once more about it:
pcie_aspm_get_link() looks like this in my series and w/o a prior call
to pci_is_pcie() we may call pcie_downstream_port() for a non-PCIe
device what results in a fake PCI_EXP_TYPE_ENDPOINT result.
I don't want to rely on side effects and therefore would like to
keep the call to pci_is_pcie(). I'll submit a v7 and we can continue
to discuss based on that.

static struct pcie_link_state *pcie_aspm_get_link(struct pci_dev *pdev)
{
        struct pci_dev *upstream;

        if (pcie_downstream_port(pdev))
                upstream = pdev;
        else
                upstream = pci_upstream_bridge(pdev);

        return upstream ? upstream->link_state : NULL;
}
Bjorn Helgaas Oct. 3, 2019, 4:27 p.m. UTC | #8
On Thu, Oct 03, 2019 at 12:23:28AM +0200, Heiner Kallweit wrote:
> On 03.10.2019 00:10, Bjorn Helgaas wrote:
> > On Wed, Oct 02, 2019 at 11:10:55PM +0200, Heiner Kallweit wrote:
> >> On 02.10.2019 21:55, Bjorn Helgaas wrote:
> >>> On Sun, Sep 29, 2019 at 07:15:05PM +0200, Heiner Kallweit wrote:
> >>>> On 07.09.2019 22:32, Bjorn Helgaas wrote:
> >>>>> On Sat, Aug 31, 2019 at 10:20:47PM +0200, Heiner Kallweit wrote:
> > 
> >>>>>> +static struct pcie_link_state *aspm_get_parent_link(struct pci_dev *pdev)
> >>>>>
> >>>>> I know the ASPM code is pretty confused, but I don't think "parent
> >>>>> link" really makes sense.  "Parent" implies a parent/child
> >>>>> relationship, but a link doesn't have a parent or a child; it only has
> >>>>> an upstream end and a downstream end.
> >>>>>
> >>>> I basically copied this "parent" stuff from __pci_disable_link_state.
> >>>> Fine with me to change the naming.
> >>>> What confuses me a little is that we have different versions of getting
> >>>> the pcie_link_state for a pci_dev in:
> >>>>
> >>>> - this new function of mine
> >>>> - __pci_disable_link_state
> >>>> - pcie_aspm_enabled
> >>>>
> >>>> The latter uses pci_upstream_bridge instead of accessing pdev->bus->self
> >>>> directly and doesn't include the call to pcie_downstream_port.
> >>>> I wonder whether the functionality could be factored out to a generic
> >>>> helper that works in all these places.
> >>>
> >>> Definitely.  I think your pcie_aspm_get_link() (from the v6 patch)
> >>> could be used directly in those places.  You could add a new patch
> >>> that just adds pcie_aspm_get_link() and uses it.
> >>>
> >>
> >> OK
> >>
> >>>>>> +{
> >>>>>> +	struct pci_dev *parent = pdev->bus->self;
> >>>>>> +
> >>>>>> +	if (pcie_downstream_port(pdev))
> >>>>>> +		parent = pdev;
> >>>>>> +
> >>>>>> +	return parent ? parent->link_state : NULL;
> >>>>>> +}
> >>>>>> +
> >>>>>> +static bool pcie_check_valid_aspm_endpoint(struct pci_dev *pdev)
> >>>>>> +{
> >>>>>> +	struct pcie_link_state *link;
> >>>>>> +
> >>>>>> +	if (!pci_is_pcie(pdev) || pci_pcie_type(pdev) != PCI_EXP_TYPE_ENDPOINT)
> >>>>>
> >>>>> Do you intend to exclude other Upstream Ports like Legacy Endpoints,
> >>>>> Upstream Switch Ports, and PCIe-to-PCI/PCI-X Bridges?  They also have
> >>>>> a link leading to them, so we might want them to have knobs as well.
> >>>>> Or if we don't want the knobs, a comment about why not would be
> >>>>> useful.
> >>>>>
> >>>> My use case is about endpoints only and I'm not really a PCI expert.
> >>>> Based on your list in addition to PCI_EXP_TYPE_ENDPOINT we'd enable
> >>>> the ASPM sysfs fils for:
> >>>> - PCI_EXP_TYPE_LEG_END
> >>>> - PCI_EXP_TYPE_UPSTREAM
> >>>> - PCI_EXP_TYPE_PCI_BRIDGE
> >>>> - PCI_EXP_TYPE_PCIE_BRIDGE
> >>>> If you can confirm the list I'd extend my patch accordingly.
> >>>
> >>> Yes, I think the list would be right, but looking at this again, I
> >>> don't think you need this function at all -- you can just use
> >>> pcie_aspm_get_link().  Then aspm_ctrl_attrs_are_visible() uses exactly
> >>> the same test as the show/store functions.  Actually, I think then you
> >>> could omit the "if (!link)" tests from the show/store functions
> >>> because those functions can never be called unless
> >>> aspm_ctrl_attrs_are_visible() found a link.
> >>>
> >> Right, the !link checks can be removed from the show/store functions.
> >> In pcie_is_aspm_dev() I think we need to check at least whether
> >> device is PCIe and whether link is ASPM-capable. Making the sysfs
> >> attributes visible for a non-PCIe device doesn't make sense,
> >> the same applies to PCIe devices with a link that is not ASPM-capable.
> > 
> > I agree we don't want these attributes visible for non-PCIe or
> > non-ASPM-capable situations, but I think you can do this:
> > 
> >   static struct pcie_link_state *pcie_aspm_get_link(struct pci_dev *pdev)
> >   {
> >     struct pci_dev *bridge = pci_upstream_bridge(pdev);
> > 
> >     if (bridge)
> >       return bridge->link_state;
> > 
> >     return NULL;
> >   }
> > 
> >   static umode_t aspm_ctrl_attrs_are_visible(...)
> >   {
> >     ...
> >     if (pcie_aspm_get_link(pdev))
> >       return a->mode;
> > 
> >     return 0;
> >   }
> > 
> > We can rely on pcie_aspm_init_link_state() to only set
> > bridge->link_state if the devices on both ends of the link are PCIe
> > and support ASPM.
> > 
> With the first one I agree. However there may be links where e.g. the
> bridge doesn't support ASPM. One example is my small Zotac test system:
> 
> Intel Corporation Celeron N3350/Pentium N4200/Atom E3900 Series PCI Express Port
>  LnkCap: Port #3, Speed 5GT/s, Width x1, ASPM not supported

Oh, I'm sorry, you're right!  I was thinking that "obviously,
pcie_aspm_init_link_state() wouldn't allocate link_state if the port
didn't support ASPM," but I hadn't bothered to actually verify that,
and my assumption was wrong.

I think we *do* currently allocate link_state even if the port doesn't
support ASPM.  That seems a little strange, but since we use
link_state to manage both ASPM and Clock Power Management, I guess we
probably need it if the port supports either one.

That raises the question of how we handle all these attributes.
If I'm reading this patch right, we currently add all these files
(l0s, l1, l1_1, l1_2, l1_1_pcipm, l1_2_pcipm, clkpm) as a group, even
though only a subset may be supported on a particular link.

It might be a little messy, but I think they will be more useful if we
only make the ones that are actually supported visible.  It looks like
x86_pmu_events_group does something along this line; maybe we could
leverage that strategy.

It's also not 100% right that we have filenames like:

  aspm/l1_1_pcipm
  aspm/clkpm

because those are not directly connected to ASPM.

/sys/devices/pci0000:00/0000:00:14.2/power/ doesn't seem like quite
the right place because it's full of generic Linux power management
stuff, not hardware-level things like ASPM and Clock Power management.

I wonder if we need a "pcie/" or "pcie_link/" or "link/" directory.
Or maybe "link_pm"?  I think Rajat's current proposal for AER stats is
"/sys/devices/pci0000:00/0000:00:1c.0/aer_stats/".  We should figure
out some way to harmonize these because I'm sure we'll have more in
the future.

I don't have a good suggestion.  One possibility:

  link_pm/l0s_aspm
  link_pm/l1_aspm
  link_pm/l1_1_aspm
  link_pm/l1_1_pcipm
  link_pm/clkpm

Bjorn
diff mbox series

Patch

diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 8bfee557e..49249a165 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -347,3 +347,16 @@  Description:
 		If the device has any Peer-to-Peer memory registered, this
 	        file contains a '1' if the memory has been published for
 		use outside the driver that owns the device.
+
+What		/sys/bus/pci/devices/.../aspm/aspm_l0s
+What		/sys/bus/pci/devices/.../aspm/aspm_l1
+What		/sys/bus/pci/devices/.../aspm/aspm_l1_1
+What		/sys/bus/pci/devices/.../aspm/aspm_l1_2
+What		/sys/bus/pci/devices/.../aspm/aspm_l1_1_pcipm
+What		/sys/bus/pci/devices/.../aspm/aspm_l1_2_pcipm
+What		/sys/bus/pci/devices/.../aspm/aspm_clkpm
+date:		August 2019
+Contact:	Heiner Kallweit <hkallweit1@gmail.com>
+Description:	If ASPM is supported for an endpoint, then these files
+		can be used to disable or enable the individual
+		power management states.
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 868e35109..687240f55 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -1315,6 +1315,10 @@  static int pci_create_capabilities_sysfs(struct pci_dev *dev)
 
 	pcie_vpd_create_sysfs_dev_files(dev);
 	pcie_aspm_create_sysfs_dev_files(dev);
+#ifdef CONFIG_PCIEASPM
+	/* update visibility of attributes in this group */
+	sysfs_update_group(&dev->dev.kobj, &aspm_ctrl_attr_group);
+#endif
 
 	if (dev->reset_fn) {
 		retval = device_create_file(&dev->dev, &dev_attr_reset);
@@ -1571,6 +1575,9 @@  static const struct attribute_group *pci_dev_attr_groups[] = {
 	&pcie_dev_attr_group,
 #ifdef CONFIG_PCIEAER
 	&aer_stats_attr_group,
+#endif
+#ifdef CONFIG_PCIEASPM
+	&aspm_ctrl_attr_group,
 #endif
 	NULL,
 };
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 44b80186d..9dc3e3673 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -659,4 +659,8 @@  static inline int pci_acpi_program_hp_params(struct pci_dev *dev)
 }
 #endif
 
+#ifdef CONFIG_PCIEASPM
+extern const struct attribute_group aspm_ctrl_attr_group;
+#endif
+
 #endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index f044ae4d1..ce3425125 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -1287,6 +1287,190 @@  void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
 }
 #endif
 
+static struct pcie_link_state *aspm_get_parent_link(struct pci_dev *pdev)
+{
+	struct pci_dev *parent = pdev->bus->self;
+
+	if (pcie_downstream_port(pdev))
+		parent = pdev;
+
+	return parent ? parent->link_state : NULL;
+}
+
+static bool pcie_check_valid_aspm_endpoint(struct pci_dev *pdev)
+{
+	struct pcie_link_state *link;
+
+	if (!pci_is_pcie(pdev) || pci_pcie_type(pdev) != PCI_EXP_TYPE_ENDPOINT)
+		return false;
+
+	link = aspm_get_parent_link(pdev);
+
+	return link && link->aspm_capable;
+}
+
+static ssize_t aspm_attr_show_common(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf, int state)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pcie_link_state *link;
+	int val;
+
+	link = aspm_get_parent_link(pdev);
+	if (!link)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&aspm_lock);
+	val = !!(link->aspm_enabled & state);
+	mutex_unlock(&aspm_lock);
+
+	return sprintf(buf, "%d\n", val);
+}
+
+static ssize_t aspm_attr_store_common(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t len, int state)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pcie_link_state *link;
+	bool state_enable;
+
+	if (aspm_disabled)
+		return -EPERM;
+
+	link = aspm_get_parent_link(pdev);
+	if (!link)
+		return -EOPNOTSUPP;
+
+	if (!(link->aspm_capable & state))
+		return -EOPNOTSUPP;
+
+	if (strtobool(buf, &state_enable) < 0)
+		return -EINVAL;
+
+	down_read(&pci_bus_sem);
+	mutex_lock(&aspm_lock);
+
+	if (state_enable) {
+		link->aspm_disable &= ~state;
+		/* need to enable L1 for sub-states */
+		if (state & ASPM_STATE_L1SS)
+			link->aspm_disable &= ~ASPM_STATE_L1;
+	} else {
+		link->aspm_disable |= state;
+	}
+
+	pcie_config_aspm_link(link, policy_to_aspm_state(link));
+
+	mutex_unlock(&aspm_lock);
+	up_read(&pci_bus_sem);
+
+	return len;
+}
+
+#define ASPM_ATTR(_f, _s)						\
+static ssize_t aspm_##_f##_show(struct device *dev,			\
+			struct device_attribute *attr, char *buf)	\
+{ return aspm_attr_show_common(dev, attr, buf, ASPM_STATE_##_s); }	\
+									\
+static ssize_t aspm_##_f##_store(struct device *dev,			\
+				 struct device_attribute *attr,		\
+				 const char *buf, size_t len)		\
+{ return aspm_attr_store_common(dev, attr, buf, len, ASPM_STATE_##_s); }
+
+ASPM_ATTR(l0s, L0S)
+ASPM_ATTR(l1, L1)
+ASPM_ATTR(l1_1, L1_1)
+ASPM_ATTR(l1_2, L1_2)
+ASPM_ATTR(l1_1_pcipm, L1_1_PCIPM)
+ASPM_ATTR(l1_2_pcipm, L1_2_PCIPM)
+
+static ssize_t aspm_clkpm_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pcie_link_state *link;
+	int val;
+
+	link = aspm_get_parent_link(pdev);
+	if (!link)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&aspm_lock);
+	val = link->clkpm_enabled;
+	mutex_unlock(&aspm_lock);
+
+	return sprintf(buf, "%d\n", val);
+}
+
+static ssize_t aspm_clkpm_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t len)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pcie_link_state *link;
+	bool state_enable;
+
+	if (aspm_disabled)
+		return -EPERM;
+
+	link = aspm_get_parent_link(pdev);
+	if (!link)
+		return -EOPNOTSUPP;
+
+	if (!link->clkpm_capable)
+		return -EOPNOTSUPP;
+
+	if (strtobool(buf, &state_enable) < 0)
+		return -EINVAL;
+
+	down_read(&pci_bus_sem);
+	mutex_lock(&aspm_lock);
+
+	link->clkpm_disable = !state_enable;
+	pcie_set_clkpm(link, policy_to_clkpm_state(link));
+
+	mutex_unlock(&aspm_lock);
+	up_read(&pci_bus_sem);
+
+	return len;
+}
+
+static DEVICE_ATTR_RW(aspm_l0s);
+static DEVICE_ATTR_RW(aspm_l1);
+static DEVICE_ATTR_RW(aspm_l1_1);
+static DEVICE_ATTR_RW(aspm_l1_2);
+static DEVICE_ATTR_RW(aspm_l1_1_pcipm);
+static DEVICE_ATTR_RW(aspm_l1_2_pcipm);
+static DEVICE_ATTR_RW(aspm_clkpm);
+
+static struct attribute *aspm_ctrl_attrs[] = {
+	&dev_attr_aspm_l0s.attr,
+	&dev_attr_aspm_l1.attr,
+	&dev_attr_aspm_l1_1.attr,
+	&dev_attr_aspm_l1_2.attr,
+	&dev_attr_aspm_l1_1_pcipm.attr,
+	&dev_attr_aspm_l1_2_pcipm.attr,
+	&dev_attr_aspm_clkpm.attr,
+	NULL
+};
+
+static umode_t aspm_ctrl_attrs_are_visible(struct kobject *kobj,
+					   struct attribute *a, int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return pcie_check_valid_aspm_endpoint(pdev) ? a->mode : 0;
+}
+
+const struct attribute_group aspm_ctrl_attr_group = {
+	.name = "aspm",
+	.attrs = aspm_ctrl_attrs,
+	.is_visible = aspm_ctrl_attrs_are_visible,
+};
+
 static int __init pcie_aspm_disable(char *str)
 {
 	if (!strcmp(str, "off")) {