diff mbox series

[v4,3/3] pci: intel: Add sysfs attributes to configure pcie link

Message ID d8574605f8e70f41ce1e88ccfb56b63c8f85e4df.1571638827.git.eswara.kota@linux.intel.com
State Changes Requested
Headers show
Series PCI: Add Intel PCIe Driver and respective dt-binding yaml file | expand

Commit Message

Dilip Kota Oct. 21, 2019, 6:39 a.m. UTC
PCIe RC driver on Intel Gateway SoCs have a requirement
of changing link width and speed on the fly.
So add the sysfs attributes to show and store the link
properties.
Add the respective link resize function in pcie DesignWare
framework so that Intel PCIe driver can use during link
width configuration on the fly.

Signed-off-by: Dilip Kota <eswara.kota@linux.intel.com>
---
 drivers/pci/controller/dwc/pcie-designware.c |   9 +++
 drivers/pci/controller/dwc/pcie-designware.h |   3 +
 drivers/pci/controller/dwc/pcie-intel-gw.c   | 112 ++++++++++++++++++++++++++-
 3 files changed, 123 insertions(+), 1 deletion(-)

Comments

Gustavo Pimentel Oct. 21, 2019, 8:40 a.m. UTC | #1
On Mon, Oct 21, 2019 at 7:39:20, Dilip Kota <eswara.kota@linux.intel.com> 
wrote:

> PCIe RC driver on Intel Gateway SoCs have a requirement
> of changing link width and speed on the fly.
> So add the sysfs attributes to show and store the link
> properties.
> Add the respective link resize function in pcie DesignWare
> framework so that Intel PCIe driver can use during link
> width configuration on the fly.
> 
> Signed-off-by: Dilip Kota <eswara.kota@linux.intel.com>
> ---
>  drivers/pci/controller/dwc/pcie-designware.c |   9 +++
>  drivers/pci/controller/dwc/pcie-designware.h |   3 +
>  drivers/pci/controller/dwc/pcie-intel-gw.c   | 112 ++++++++++++++++++++++++++-
>  3 files changed, 123 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
> index 4c391bfd681a..662fdcb4f2d6 100644
> --- a/drivers/pci/controller/dwc/pcie-designware.c
> +++ b/drivers/pci/controller/dwc/pcie-designware.c
> @@ -474,6 +474,15 @@ int dw_pcie_link_up(struct dw_pcie *pci)
>  		(!(val & PCIE_PORT_DEBUG1_LINK_IN_TRAINING)));
>  }
>  
> +void dw_pcie_link_width_resize(struct dw_pcie *pci, u32 lane_width)
> +{
> +	u32 val;
> +
> +	val =  dw_pcie_readl_dbi(pci, PCIE_PORT_MULTI_LANE_CTRL);
> +	val &= ~(PORT_MLTI_LNK_WDTH_CHNG | PORT_MLTI_LNK_WDTH);
> +	val |= PORT_MLTI_LNK_WDTH_CHNG | lane_width;
> +	dw_pcie_writel_dbi(pci, PCIE_PORT_MULTI_LANE_CTRL, val);
> +}
>  
>  void dw_pcie_upconfig_setup(struct dw_pcie *pci)
>  {
> diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
> index 3beac10e4a4c..fcf0442341fd 100644
> --- a/drivers/pci/controller/dwc/pcie-designware.h
> +++ b/drivers/pci/controller/dwc/pcie-designware.h
> @@ -67,6 +67,8 @@
>  #define PCIE_MSI_INTR0_STATUS		0x830
>  
>  #define PCIE_PORT_MULTI_LANE_CTRL	0x8C0
> +#define PORT_MLTI_LNK_WDTH		GENMASK(5, 0)
> +#define PORT_MLTI_LNK_WDTH_CHNG		BIT(6)
>  #define PORT_MLTI_UPCFG_SUPPORT		BIT(7)
>  
>  #define PCIE_ATU_VIEWPORT		0x900
> @@ -282,6 +284,7 @@ void dw_pcie_write_dbi2(struct dw_pcie *pci, u32 reg, size_t size, u32 val);
>  u32 dw_pcie_read_atu(struct dw_pcie *pci, u32 reg, size_t size);
>  void dw_pcie_write_atu(struct dw_pcie *pci, u32 reg, size_t size, u32 val);
>  int dw_pcie_link_up(struct dw_pcie *pci);
> +void dw_pcie_link_width_resize(struct dw_pcie *pci, u32 lane_width);
>  void dw_pcie_upconfig_setup(struct dw_pcie *pci);
>  void dw_pcie_link_speed_change(struct dw_pcie *pci, bool enable);
>  void dw_pcie_link_set_n_fts(struct dw_pcie *pci, u32 n_fts);
> diff --git a/drivers/pci/controller/dwc/pcie-intel-gw.c b/drivers/pci/controller/dwc/pcie-intel-gw.c
> index 9142c70db808..b9be0921671d 100644
> --- a/drivers/pci/controller/dwc/pcie-intel-gw.c
> +++ b/drivers/pci/controller/dwc/pcie-intel-gw.c
> @@ -146,6 +146,22 @@ static void intel_pcie_ltssm_disable(struct intel_pcie_port *lpp)
>  	pcie_app_wr_mask(lpp, PCIE_APP_CCR_LTSSM_ENABLE, 0, PCIE_APP_CCR);
>  }
>  
> +static const char *pcie_link_gen_to_str(int gen)
> +{
> +	switch (gen) {
> +	case PCIE_LINK_SPEED_GEN1:
> +		return "2.5";
> +	case PCIE_LINK_SPEED_GEN2:
> +		return "5.0";
> +	case PCIE_LINK_SPEED_GEN3:
> +		return "8.0";
> +	case PCIE_LINK_SPEED_GEN4:
> +		return "16.0";
> +	default:
> +		return "???";
> +	}
> +}
> +
>  static void intel_pcie_link_setup(struct intel_pcie_port *lpp)
>  {
>  	u32 val;
> @@ -444,6 +460,91 @@ static int intel_pcie_host_setup(struct intel_pcie_port *lpp)
>  	return ret;
>  }
>  
> +static ssize_t pcie_link_status_show(struct device *dev,
> +				     struct device_attribute *attr, char *buf)
> +{
> +	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
> +	u32 reg, width, gen;
> +
> +	reg = pcie_rc_cfg_rd(lpp, PCIE_CAP_OFST + PCI_EXP_LNKCTL);
> +	width = FIELD_GET(PCI_EXP_LNKSTA_NLW, reg >> 16);
> +	gen = FIELD_GET(PCI_EXP_LNKSTA_CLS, reg >> 16);
> +
> +	if (gen > lpp->max_speed)
> +		return -EINVAL;
> +
> +	return sprintf(buf, "Port %2u Width x%u Speed %s GT/s\n", lpp->id,
> +		       width, pcie_link_gen_to_str(gen));
> +}
> +static DEVICE_ATTR_RO(pcie_link_status);

Dilip please check pci.h there are there already enums and strings 
relatively to PCIe speed and width, that you can use.

> +
> +static ssize_t pcie_speed_store(struct device *dev,
> +				struct device_attribute *attr,
> +				const char *buf, size_t len)
> +{
> +	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
> +	unsigned long val;
> +	int ret;
> +
> +	ret = kstrtoul(buf, 10, &val);
> +	if (ret)
> +		return ret;
> +
> +	if (val > lpp->max_speed)
> +		return -EINVAL;
> +
> +	lpp->link_gen = val;
> +	intel_pcie_max_speed_setup(lpp);
> +	dw_pcie_link_speed_change(&lpp->pci, false);
> +	dw_pcie_link_speed_change(&lpp->pci, true);
> +
> +	return len;
> +}
> +static DEVICE_ATTR_WO(pcie_speed);
> +
> +/*
> + * Link width change on the fly is not always successful.
> + * It also depends on the partner.
> + */
> +static ssize_t pcie_width_store(struct device *dev,
> +				struct device_attribute *attr,
> +				const char *buf, size_t len)
> +{
> +	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
> +	unsigned long val;
> +	int ret;
> +
> +	lpp = dev_get_drvdata(dev);
> +
> +	ret = kstrtoul(buf, 10, &val);
> +	if (ret)
> +		return ret;
> +
> +	if (val > lpp->max_width)
> +		return -EINVAL;
> +
> +	/* HW auto bandwidth negotiation must be enabled */
> +	pcie_rc_cfg_wr_mask(lpp, PCI_EXP_LNKCTL_HAWD, 0,
> +			    PCIE_CAP_OFST + PCI_EXP_LNKCTL);
> +	dw_pcie_link_width_resize(&lpp->pci, val);
> +
> +	return len;
> +}
> +static DEVICE_ATTR_WO(pcie_width);
> +
> +static struct attribute *pcie_cfg_attrs[] = {
> +	&dev_attr_pcie_link_status.attr,
> +	&dev_attr_pcie_speed.attr,
> +	&dev_attr_pcie_width.attr,
> +	NULL,
> +};
> +ATTRIBUTE_GROUPS(pcie_cfg);
> +
> +static int intel_pcie_sysfs_init(struct intel_pcie_port *lpp)
> +{
> +	return devm_device_add_groups(lpp->pci.dev, pcie_cfg_groups);
> +}
> +
>  static void __intel_pcie_remove(struct intel_pcie_port *lpp)
>  {
>  	intel_pcie_core_irq_disable(lpp);
> @@ -490,8 +591,17 @@ static int intel_pcie_rc_init(struct pcie_port *pp)
>  {
>  	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
>  	struct intel_pcie_port *lpp = dev_get_drvdata(pci->dev);
> +	int ret;
>  
> -	return intel_pcie_host_setup(lpp);
> +	ret = intel_pcie_host_setup(lpp);
> +	if (ret)
> +		return ret;
> +
> +	ret = intel_pcie_sysfs_init(lpp);
> +	if (ret)
> +		__intel_pcie_remove(lpp);
> +
> +	return ret;
>  }
>  
>  int intel_pcie_msi_init(struct pcie_port *pp)
> -- 
> 2.11.0
Dilip Kota Oct. 21, 2019, 10:34 a.m. UTC | #2
Hi Gustavo Pimentel,

On 10/21/2019 4:40 PM, Gustavo Pimentel wrote:
> On Mon, Oct 21, 2019 at 7:39:20, Dilip Kota <eswara.kota@linux.intel.com>
> wrote:
>
>> PCIe RC driver on Intel Gateway SoCs have a requirement
>> of changing link width and speed on the fly.
>> So add the sysfs attributes to show and store the link
>> properties.
>> Add the respective link resize function in pcie DesignWare
>> framework so that Intel PCIe driver can use during link
>> width configuration on the fly.
>>
>> Signed-off-by: Dilip Kota <eswara.kota@linux.intel.com>
>> ---
>>   drivers/pci/controller/dwc/pcie-designware.c |   9 +++
>>   drivers/pci/controller/dwc/pcie-designware.h |   3 +
>>   drivers/pci/controller/dwc/pcie-intel-gw.c   | 112 ++++++++++++++++++++++++++-
>>   3 files changed, 123 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
>> index 4c391bfd681a..662fdcb4f2d6 100644
>> --- a/drivers/pci/controller/dwc/pcie-designware.c
>> +++ b/drivers/pci/controller/dwc/pcie-designware.c
>> @@ -474,6 +474,15 @@ int dw_pcie_link_up(struct dw_pcie *pci)
>>   		(!(val & PCIE_PORT_DEBUG1_LINK_IN_TRAINING)));
>>   }
>>   
>> +void dw_pcie_link_width_resize(struct dw_pcie *pci, u32 lane_width)
>> +{
>> +	u32 val;
>> +
>> +	val =  dw_pcie_readl_dbi(pci, PCIE_PORT_MULTI_LANE_CTRL);
>> +	val &= ~(PORT_MLTI_LNK_WDTH_CHNG | PORT_MLTI_LNK_WDTH);
>> +	val |= PORT_MLTI_LNK_WDTH_CHNG | lane_width;
>> +	dw_pcie_writel_dbi(pci, PCIE_PORT_MULTI_LANE_CTRL, val);
>> +}
>>   
>>   void dw_pcie_upconfig_setup(struct dw_pcie *pci)
>>   {
>> diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
>> index 3beac10e4a4c..fcf0442341fd 100644
>> --- a/drivers/pci/controller/dwc/pcie-designware.h
>> +++ b/drivers/pci/controller/dwc/pcie-designware.h
>> @@ -67,6 +67,8 @@
>>   #define PCIE_MSI_INTR0_STATUS		0x830
>>   
>>   #define PCIE_PORT_MULTI_LANE_CTRL	0x8C0
>> +#define PORT_MLTI_LNK_WDTH		GENMASK(5, 0)
>> +#define PORT_MLTI_LNK_WDTH_CHNG		BIT(6)
>>   #define PORT_MLTI_UPCFG_SUPPORT		BIT(7)
>>   
>>   #define PCIE_ATU_VIEWPORT		0x900
>> @@ -282,6 +284,7 @@ void dw_pcie_write_dbi2(struct dw_pcie *pci, u32 reg, size_t size, u32 val);
>>   u32 dw_pcie_read_atu(struct dw_pcie *pci, u32 reg, size_t size);
>>   void dw_pcie_write_atu(struct dw_pcie *pci, u32 reg, size_t size, u32 val);
>>   int dw_pcie_link_up(struct dw_pcie *pci);
>> +void dw_pcie_link_width_resize(struct dw_pcie *pci, u32 lane_width);
>>   void dw_pcie_upconfig_setup(struct dw_pcie *pci);
>>   void dw_pcie_link_speed_change(struct dw_pcie *pci, bool enable);
>>   void dw_pcie_link_set_n_fts(struct dw_pcie *pci, u32 n_fts);
>> diff --git a/drivers/pci/controller/dwc/pcie-intel-gw.c b/drivers/pci/controller/dwc/pcie-intel-gw.c
>> index 9142c70db808..b9be0921671d 100644
>> --- a/drivers/pci/controller/dwc/pcie-intel-gw.c
>> +++ b/drivers/pci/controller/dwc/pcie-intel-gw.c
>> @@ -146,6 +146,22 @@ static void intel_pcie_ltssm_disable(struct intel_pcie_port *lpp)
>>   	pcie_app_wr_mask(lpp, PCIE_APP_CCR_LTSSM_ENABLE, 0, PCIE_APP_CCR);
>>   }
>>   
>> +static const char *pcie_link_gen_to_str(int gen)
>> +{
>> +	switch (gen) {
>> +	case PCIE_LINK_SPEED_GEN1:
>> +		return "2.5";
>> +	case PCIE_LINK_SPEED_GEN2:
>> +		return "5.0";
>> +	case PCIE_LINK_SPEED_GEN3:
>> +		return "8.0";
>> +	case PCIE_LINK_SPEED_GEN4:
>> +		return "16.0";
>> +	default:
>> +		return "???";
>> +	}
>> +}
>> +
>>   static void intel_pcie_link_setup(struct intel_pcie_port *lpp)
>>   {
>>   	u32 val;
>> @@ -444,6 +460,91 @@ static int intel_pcie_host_setup(struct intel_pcie_port *lpp)
>>   	return ret;
>>   }
>>   
>> +static ssize_t pcie_link_status_show(struct device *dev,
>> +				     struct device_attribute *attr, char *buf)
>> +{
>> +	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
>> +	u32 reg, width, gen;
>> +
>> +	reg = pcie_rc_cfg_rd(lpp, PCIE_CAP_OFST + PCI_EXP_LNKCTL);
>> +	width = FIELD_GET(PCI_EXP_LNKSTA_NLW, reg >> 16);
>> +	gen = FIELD_GET(PCI_EXP_LNKSTA_CLS, reg >> 16);
>> +
>> +	if (gen > lpp->max_speed)
>> +		return -EINVAL;
>> +
>> +	return sprintf(buf, "Port %2u Width x%u Speed %s GT/s\n", lpp->id,
>> +		       width, pcie_link_gen_to_str(gen));
>> +}
>> +static DEVICE_ATTR_RO(pcie_link_status);
> Dilip please check pci.h there are there already enums and strings
> relatively to PCIe speed and width, that you can use.

Yes i can see a global array "pcie_link_speed[]" and a macro 
PCIE_SPEED2STR[].
I will update the driver.
Whereas width enum, it is not required here as directly storing the 
register value.
Thanks for pointing it.

Regards,
Dilip
>
>> +
>> +static ssize_t pcie_speed_store(struct device *dev,
>> +				struct device_attribute *attr,
>> +				const char *buf, size_t len)
>> +{
>> +	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
>> +	unsigned long val;
>> +	int ret;
>> +
>> +	ret = kstrtoul(buf, 10, &val);
>> +	if (ret)
>> +		return ret;
>> +
>> +	if (val > lpp->max_speed)
>> +		return -EINVAL;
>> +
>> +	lpp->link_gen = val;
>> +	intel_pcie_max_speed_setup(lpp);
>> +	dw_pcie_link_speed_change(&lpp->pci, false);
>> +	dw_pcie_link_speed_change(&lpp->pci, true);
>> +
>> +	return len;
>> +}
>> +static DEVICE_ATTR_WO(pcie_speed);
>> +
>> +/*
>> + * Link width change on the fly is not always successful.
>> + * It also depends on the partner.
>> + */
>> +static ssize_t pcie_width_store(struct device *dev,
>> +				struct device_attribute *attr,
>> +				const char *buf, size_t len)
>> +{
>> +	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
>> +	unsigned long val;
>> +	int ret;
>> +
>> +	lpp = dev_get_drvdata(dev);
>> +
>> +	ret = kstrtoul(buf, 10, &val);
>> +	if (ret)
>> +		return ret;
>> +
>> +	if (val > lpp->max_width)
>> +		return -EINVAL;
>> +
>> +	/* HW auto bandwidth negotiation must be enabled */
>> +	pcie_rc_cfg_wr_mask(lpp, PCI_EXP_LNKCTL_HAWD, 0,
>> +			    PCIE_CAP_OFST + PCI_EXP_LNKCTL);
>> +	dw_pcie_link_width_resize(&lpp->pci, val);
>> +
>> +	return len;
>> +}
>> +static DEVICE_ATTR_WO(pcie_width);
>> +
>> +static struct attribute *pcie_cfg_attrs[] = {
>> +	&dev_attr_pcie_link_status.attr,
>> +	&dev_attr_pcie_speed.attr,
>> +	&dev_attr_pcie_width.attr,
>> +	NULL,
>> +};
>> +ATTRIBUTE_GROUPS(pcie_cfg);
>> +
>> +static int intel_pcie_sysfs_init(struct intel_pcie_port *lpp)
>> +{
>> +	return devm_device_add_groups(lpp->pci.dev, pcie_cfg_groups);
>> +}
>> +
>>   static void __intel_pcie_remove(struct intel_pcie_port *lpp)
>>   {
>>   	intel_pcie_core_irq_disable(lpp);
>> @@ -490,8 +591,17 @@ static int intel_pcie_rc_init(struct pcie_port *pp)
>>   {
>>   	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
>>   	struct intel_pcie_port *lpp = dev_get_drvdata(pci->dev);
>> +	int ret;
>>   
>> -	return intel_pcie_host_setup(lpp);
>> +	ret = intel_pcie_host_setup(lpp);
>> +	if (ret)
>> +		return ret;
>> +
>> +	ret = intel_pcie_sysfs_init(lpp);
>> +	if (ret)
>> +		__intel_pcie_remove(lpp);
>> +
>> +	return ret;
>>   }
>>   
>>   int intel_pcie_msi_init(struct pcie_port *pp)
>> -- 
>> 2.11.0
>
Andrew Murray Oct. 21, 2019, 1:38 p.m. UTC | #3
On Mon, Oct 21, 2019 at 02:39:20PM +0800, Dilip Kota wrote:
> PCIe RC driver on Intel Gateway SoCs have a requirement
> of changing link width and speed on the fly.
> So add the sysfs attributes to show and store the link
> properties.
> Add the respective link resize function in pcie DesignWare
> framework so that Intel PCIe driver can use during link
> width configuration on the fly.
> 
> Signed-off-by: Dilip Kota <eswara.kota@linux.intel.com>
> ---
>  drivers/pci/controller/dwc/pcie-designware.c |   9 +++
>  drivers/pci/controller/dwc/pcie-designware.h |   3 +
>  drivers/pci/controller/dwc/pcie-intel-gw.c   | 112 ++++++++++++++++++++++++++-
>  3 files changed, 123 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
> index 4c391bfd681a..662fdcb4f2d6 100644
> --- a/drivers/pci/controller/dwc/pcie-designware.c
> +++ b/drivers/pci/controller/dwc/pcie-designware.c
> @@ -474,6 +474,15 @@ int dw_pcie_link_up(struct dw_pcie *pci)
>  		(!(val & PCIE_PORT_DEBUG1_LINK_IN_TRAINING)));
>  }
>  
> +void dw_pcie_link_width_resize(struct dw_pcie *pci, u32 lane_width)
> +{
> +	u32 val;
> +
> +	val =  dw_pcie_readl_dbi(pci, PCIE_PORT_MULTI_LANE_CTRL);
> +	val &= ~(PORT_MLTI_LNK_WDTH_CHNG | PORT_MLTI_LNK_WDTH);
> +	val |= PORT_MLTI_LNK_WDTH_CHNG | lane_width;
> +	dw_pcie_writel_dbi(pci, PCIE_PORT_MULTI_LANE_CTRL, val);
> +}
>  
>  void dw_pcie_upconfig_setup(struct dw_pcie *pci)
>  {
> diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
> index 3beac10e4a4c..fcf0442341fd 100644
> --- a/drivers/pci/controller/dwc/pcie-designware.h
> +++ b/drivers/pci/controller/dwc/pcie-designware.h
> @@ -67,6 +67,8 @@
>  #define PCIE_MSI_INTR0_STATUS		0x830
>  
>  #define PCIE_PORT_MULTI_LANE_CTRL	0x8C0
> +#define PORT_MLTI_LNK_WDTH		GENMASK(5, 0)
> +#define PORT_MLTI_LNK_WDTH_CHNG		BIT(6)
>  #define PORT_MLTI_UPCFG_SUPPORT		BIT(7)
>  
>  #define PCIE_ATU_VIEWPORT		0x900
> @@ -282,6 +284,7 @@ void dw_pcie_write_dbi2(struct dw_pcie *pci, u32 reg, size_t size, u32 val);
>  u32 dw_pcie_read_atu(struct dw_pcie *pci, u32 reg, size_t size);
>  void dw_pcie_write_atu(struct dw_pcie *pci, u32 reg, size_t size, u32 val);
>  int dw_pcie_link_up(struct dw_pcie *pci);
> +void dw_pcie_link_width_resize(struct dw_pcie *pci, u32 lane_width);
>  void dw_pcie_upconfig_setup(struct dw_pcie *pci);
>  void dw_pcie_link_speed_change(struct dw_pcie *pci, bool enable);
>  void dw_pcie_link_set_n_fts(struct dw_pcie *pci, u32 n_fts);
> diff --git a/drivers/pci/controller/dwc/pcie-intel-gw.c b/drivers/pci/controller/dwc/pcie-intel-gw.c
> index 9142c70db808..b9be0921671d 100644
> --- a/drivers/pci/controller/dwc/pcie-intel-gw.c
> +++ b/drivers/pci/controller/dwc/pcie-intel-gw.c
> @@ -146,6 +146,22 @@ static void intel_pcie_ltssm_disable(struct intel_pcie_port *lpp)
>  	pcie_app_wr_mask(lpp, PCIE_APP_CCR_LTSSM_ENABLE, 0, PCIE_APP_CCR);
>  }
>  
> +static const char *pcie_link_gen_to_str(int gen)
> +{
> +	switch (gen) {
> +	case PCIE_LINK_SPEED_GEN1:
> +		return "2.5";
> +	case PCIE_LINK_SPEED_GEN2:
> +		return "5.0";
> +	case PCIE_LINK_SPEED_GEN3:
> +		return "8.0";
> +	case PCIE_LINK_SPEED_GEN4:
> +		return "16.0";
> +	default:
> +		return "???";
> +	}
> +}
> +
>  static void intel_pcie_link_setup(struct intel_pcie_port *lpp)
>  {
>  	u32 val;
> @@ -444,6 +460,91 @@ static int intel_pcie_host_setup(struct intel_pcie_port *lpp)
>  	return ret;
>  }
>  
> +static ssize_t pcie_link_status_show(struct device *dev,
> +				     struct device_attribute *attr, char *buf)
> +{
> +	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
> +	u32 reg, width, gen;
> +
> +	reg = pcie_rc_cfg_rd(lpp, PCIE_CAP_OFST + PCI_EXP_LNKCTL);
> +	width = FIELD_GET(PCI_EXP_LNKSTA_NLW, reg >> 16);
> +	gen = FIELD_GET(PCI_EXP_LNKSTA_CLS, reg >> 16);
> +
> +	if (gen > lpp->max_speed)
> +		return -EINVAL;
> +
> +	return sprintf(buf, "Port %2u Width x%u Speed %s GT/s\n", lpp->id,
> +		       width, pcie_link_gen_to_str(gen));
> +}
> +static DEVICE_ATTR_RO(pcie_link_status);
> +
> +static ssize_t pcie_speed_store(struct device *dev,
> +				struct device_attribute *attr,
> +				const char *buf, size_t len)
> +{
> +	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
> +	unsigned long val;
> +	int ret;
> +
> +	ret = kstrtoul(buf, 10, &val);
> +	if (ret)
> +		return ret;
> +
> +	if (val > lpp->max_speed)
> +		return -EINVAL;
> +
> +	lpp->link_gen = val;
> +	intel_pcie_max_speed_setup(lpp);
> +	dw_pcie_link_speed_change(&lpp->pci, false);
> +	dw_pcie_link_speed_change(&lpp->pci, true);
> +
> +	return len;
> +}
> +static DEVICE_ATTR_WO(pcie_speed);
> +
> +/*
> + * Link width change on the fly is not always successful.
> + * It also depends on the partner.
> + */
> +static ssize_t pcie_width_store(struct device *dev,
> +				struct device_attribute *attr,
> +				const char *buf, size_t len)
> +{
> +	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
> +	unsigned long val;
> +	int ret;
> +
> +	lpp = dev_get_drvdata(dev);
> +
> +	ret = kstrtoul(buf, 10, &val);
> +	if (ret)
> +		return ret;
> +
> +	if (val > lpp->max_width)
> +		return -EINVAL;
> +
> +	/* HW auto bandwidth negotiation must be enabled */
> +	pcie_rc_cfg_wr_mask(lpp, PCI_EXP_LNKCTL_HAWD, 0,
> +			    PCIE_CAP_OFST + PCI_EXP_LNKCTL);
> +	dw_pcie_link_width_resize(&lpp->pci, val);
> +
> +	return len;
> +}
> +static DEVICE_ATTR_WO(pcie_width);
> +
> +static struct attribute *pcie_cfg_attrs[] = {
> +	&dev_attr_pcie_link_status.attr,
> +	&dev_attr_pcie_speed.attr,
> +	&dev_attr_pcie_width.attr,
> +	NULL,
> +};

Is there a reason that these are limited only to the Intel driver and
not the wider set of DWC drivers?

Is there anything specific here about the Intel GW driver?

Thanks,

Andrew Murray

> +ATTRIBUTE_GROUPS(pcie_cfg);
> +
> +static int intel_pcie_sysfs_init(struct intel_pcie_port *lpp)
> +{
> +	return devm_device_add_groups(lpp->pci.dev, pcie_cfg_groups);
> +}
> +
>  static void __intel_pcie_remove(struct intel_pcie_port *lpp)
>  {
>  	intel_pcie_core_irq_disable(lpp);
> @@ -490,8 +591,17 @@ static int intel_pcie_rc_init(struct pcie_port *pp)
>  {
>  	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
>  	struct intel_pcie_port *lpp = dev_get_drvdata(pci->dev);
> +	int ret;
>  
> -	return intel_pcie_host_setup(lpp);
> +	ret = intel_pcie_host_setup(lpp);
> +	if (ret)
> +		return ret;
> +
> +	ret = intel_pcie_sysfs_init(lpp);
> +	if (ret)
> +		__intel_pcie_remove(lpp);
> +
> +	return ret;
>  }
>  
>  int intel_pcie_msi_init(struct pcie_port *pp)
> -- 
> 2.11.0
>
Bjorn Helgaas Oct. 21, 2019, 5:18 p.m. UTC | #4
On Mon, Oct 21, 2019 at 02:38:50PM +0100, Andrew Murray wrote:
> On Mon, Oct 21, 2019 at 02:39:20PM +0800, Dilip Kota wrote:
> > PCIe RC driver on Intel Gateway SoCs have a requirement
> > of changing link width and speed on the fly.

Please add more details about why this is needed.  Since you're adding
sysfs files, it sounds like it's not actually the *driver* that needs
this; it's something in userspace?

The normal scenario is that the hardware negotiates link widths and
speeds without any software involvement (PCIe r5.0, sec 1.2).

If this is to work around hardware defects, we should try to do that
inside the kernel because we can't expect userspace to do it reliably.

As Andrew points out below, this all sounds like it should be generic
rather than Intel-specific.

> > So add the sysfs attributes to show and store the link
> > properties.
> > Add the respective link resize function in pcie DesignWare
> > framework so that Intel PCIe driver can use during link
> > width configuration on the fly.
> > ...

> > +static ssize_t pcie_link_status_show(struct device *dev,
> > +				     struct device_attribute *attr, char *buf)
> > +{
> > +	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
> > +	u32 reg, width, gen;
> > +
> > +	reg = pcie_rc_cfg_rd(lpp, PCIE_CAP_OFST + PCI_EXP_LNKCTL);
> > +	width = FIELD_GET(PCI_EXP_LNKSTA_NLW, reg >> 16);
> > +	gen = FIELD_GET(PCI_EXP_LNKSTA_CLS, reg >> 16);
> > +
> > +	if (gen > lpp->max_speed)
> > +		return -EINVAL;
> > +
> > +	return sprintf(buf, "Port %2u Width x%u Speed %s GT/s\n", lpp->id,
> > +		       width, pcie_link_gen_to_str(gen));
> > +}
> > +static DEVICE_ATTR_RO(pcie_link_status);

We already have generic current_link_speed and current_link_width
files.

> > +static ssize_t pcie_speed_store(struct device *dev,
> > +				struct device_attribute *attr,
> > +				const char *buf, size_t len)
> > +{
> > +	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
> > +	unsigned long val;
> > +	int ret;
> > +
> > +	ret = kstrtoul(buf, 10, &val);
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (val > lpp->max_speed)
> > +		return -EINVAL;
> > +
> > +	lpp->link_gen = val;
> > +	intel_pcie_max_speed_setup(lpp);
> > +	dw_pcie_link_speed_change(&lpp->pci, false);
> > +	dw_pcie_link_speed_change(&lpp->pci, true);
> > +
> > +	return len;
> > +}
> > +static DEVICE_ATTR_WO(pcie_speed);
> > +
> > +/*
> > + * Link width change on the fly is not always successful.
> > + * It also depends on the partner.
> > + */
> > +static ssize_t pcie_width_store(struct device *dev,
> > +				struct device_attribute *attr,
> > +				const char *buf, size_t len)
> > +{
> > +	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
> > +	unsigned long val;
> > +	int ret;
> > +
> > +	lpp = dev_get_drvdata(dev);
> > +
> > +	ret = kstrtoul(buf, 10, &val);
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (val > lpp->max_width)
> > +		return -EINVAL;
> > +
> > +	/* HW auto bandwidth negotiation must be enabled */
> > +	pcie_rc_cfg_wr_mask(lpp, PCI_EXP_LNKCTL_HAWD, 0,
> > +			    PCIE_CAP_OFST + PCI_EXP_LNKCTL);
> > +	dw_pcie_link_width_resize(&lpp->pci, val);
> > +
> > +	return len;
> > +}
> > +static DEVICE_ATTR_WO(pcie_width);
> > +
> > +static struct attribute *pcie_cfg_attrs[] = {
> > +	&dev_attr_pcie_link_status.attr,
> > +	&dev_attr_pcie_speed.attr,
> > +	&dev_attr_pcie_width.attr,
> > +	NULL,
> > +};
> 
> Is there a reason that these are limited only to the Intel driver and
> not the wider set of DWC drivers?
> 
> Is there anything specific here about the Intel GW driver?
Dilip Kota Oct. 22, 2019, 9:20 a.m. UTC | #5
Hi Andrew Murray,

On 10/21/2019 9:38 PM, Andrew Murray wrote:
> On Mon, Oct 21, 2019 at 02:39:20PM +0800, Dilip Kota wrote:
>> PCIe RC driver on Intel Gateway SoCs have a requirement
>> of changing link width and speed on the fly.
>> So add the sysfs attributes to show and store the link
>> properties.
>> Add the respective link resize function in pcie DesignWare
>> framework so that Intel PCIe driver can use during link
>> width configuration on the fly.
>>
>> Signed-off-by: Dilip Kota <eswara.kota@linux.intel.com>
>> ---
>>   drivers/pci/controller/dwc/pcie-designware.c |   9 +++
>>   drivers/pci/controller/dwc/pcie-designware.h |   3 +
>>   drivers/pci/controller/dwc/pcie-intel-gw.c   | 112 ++++++++++++++++++++++++++-
>>   3 files changed, 123 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
>> index 4c391bfd681a..662fdcb4f2d6 100644
>> --- a/drivers/pci/controller/dwc/pcie-designware.c
>> +++ b/drivers/pci/controller/dwc/pcie-designware.c
>> @@ -474,6 +474,15 @@ int dw_pcie_link_up(struct dw_pcie *pci)
>>   		(!(val & PCIE_PORT_DEBUG1_LINK_IN_TRAINING)));
>>   }
>>   
>> +void dw_pcie_link_width_resize(struct dw_pcie *pci, u32 lane_width)
>> +{
>> +	u32 val;
>> +
>> +	val =  dw_pcie_readl_dbi(pci, PCIE_PORT_MULTI_LANE_CTRL);
>> +	val &= ~(PORT_MLTI_LNK_WDTH_CHNG | PORT_MLTI_LNK_WDTH);
>> +	val |= PORT_MLTI_LNK_WDTH_CHNG | lane_width;
>> +	dw_pcie_writel_dbi(pci, PCIE_PORT_MULTI_LANE_CTRL, val);
>> +}
>>   
>>   void dw_pcie_upconfig_setup(struct dw_pcie *pci)
>>   {
>> diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
>> index 3beac10e4a4c..fcf0442341fd 100644
>> --- a/drivers/pci/controller/dwc/pcie-designware.h
>> +++ b/drivers/pci/controller/dwc/pcie-designware.h
>> @@ -67,6 +67,8 @@
>>   #define PCIE_MSI_INTR0_STATUS		0x830
>>   
>>   #define PCIE_PORT_MULTI_LANE_CTRL	0x8C0
>> +#define PORT_MLTI_LNK_WDTH		GENMASK(5, 0)
>> +#define PORT_MLTI_LNK_WDTH_CHNG		BIT(6)
>>   #define PORT_MLTI_UPCFG_SUPPORT		BIT(7)
>>   
>>   #define PCIE_ATU_VIEWPORT		0x900
>> @@ -282,6 +284,7 @@ void dw_pcie_write_dbi2(struct dw_pcie *pci, u32 reg, size_t size, u32 val);
>>   u32 dw_pcie_read_atu(struct dw_pcie *pci, u32 reg, size_t size);
>>   void dw_pcie_write_atu(struct dw_pcie *pci, u32 reg, size_t size, u32 val);
>>   int dw_pcie_link_up(struct dw_pcie *pci);
>> +void dw_pcie_link_width_resize(struct dw_pcie *pci, u32 lane_width);
>>   void dw_pcie_upconfig_setup(struct dw_pcie *pci);
>>   void dw_pcie_link_speed_change(struct dw_pcie *pci, bool enable);
>>   void dw_pcie_link_set_n_fts(struct dw_pcie *pci, u32 n_fts);
>> diff --git a/drivers/pci/controller/dwc/pcie-intel-gw.c b/drivers/pci/controller/dwc/pcie-intel-gw.c
>> index 9142c70db808..b9be0921671d 100644
>> --- a/drivers/pci/controller/dwc/pcie-intel-gw.c
>> +++ b/drivers/pci/controller/dwc/pcie-intel-gw.c
>> @@ -146,6 +146,22 @@ static void intel_pcie_ltssm_disable(struct intel_pcie_port *lpp)
>>   	pcie_app_wr_mask(lpp, PCIE_APP_CCR_LTSSM_ENABLE, 0, PCIE_APP_CCR);
>>   }
>>   
>> +static const char *pcie_link_gen_to_str(int gen)
>> +{
>> +	switch (gen) {
>> +	case PCIE_LINK_SPEED_GEN1:
>> +		return "2.5";
>> +	case PCIE_LINK_SPEED_GEN2:
>> +		return "5.0";
>> +	case PCIE_LINK_SPEED_GEN3:
>> +		return "8.0";
>> +	case PCIE_LINK_SPEED_GEN4:
>> +		return "16.0";
>> +	default:
>> +		return "???";
>> +	}
>> +}
>> +
>>   static void intel_pcie_link_setup(struct intel_pcie_port *lpp)
>>   {
>>   	u32 val;
>> @@ -444,6 +460,91 @@ static int intel_pcie_host_setup(struct intel_pcie_port *lpp)
>>   	return ret;
>>   }
>>   
>> +static ssize_t pcie_link_status_show(struct device *dev,
>> +				     struct device_attribute *attr, char *buf)
>> +{
>> +	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
>> +	u32 reg, width, gen;
>> +
>> +	reg = pcie_rc_cfg_rd(lpp, PCIE_CAP_OFST + PCI_EXP_LNKCTL);
>> +	width = FIELD_GET(PCI_EXP_LNKSTA_NLW, reg >> 16);
>> +	gen = FIELD_GET(PCI_EXP_LNKSTA_CLS, reg >> 16);
>> +
>> +	if (gen > lpp->max_speed)
>> +		return -EINVAL;
>> +
>> +	return sprintf(buf, "Port %2u Width x%u Speed %s GT/s\n", lpp->id,
>> +		       width, pcie_link_gen_to_str(gen));
>> +}
>> +static DEVICE_ATTR_RO(pcie_link_status);
>> +
>> +static ssize_t pcie_speed_store(struct device *dev,
>> +				struct device_attribute *attr,
>> +				const char *buf, size_t len)
>> +{
>> +	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
>> +	unsigned long val;
>> +	int ret;
>> +
>> +	ret = kstrtoul(buf, 10, &val);
>> +	if (ret)
>> +		return ret;
>> +
>> +	if (val > lpp->max_speed)
>> +		return -EINVAL;
>> +
>> +	lpp->link_gen = val;
>> +	intel_pcie_max_speed_setup(lpp);
>> +	dw_pcie_link_speed_change(&lpp->pci, false);
>> +	dw_pcie_link_speed_change(&lpp->pci, true);
>> +
>> +	return len;
>> +}
>> +static DEVICE_ATTR_WO(pcie_speed);
>> +
>> +/*
>> + * Link width change on the fly is not always successful.
>> + * It also depends on the partner.
>> + */
>> +static ssize_t pcie_width_store(struct device *dev,
>> +				struct device_attribute *attr,
>> +				const char *buf, size_t len)
>> +{
>> +	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
>> +	unsigned long val;
>> +	int ret;
>> +
>> +	lpp = dev_get_drvdata(dev);
>> +
>> +	ret = kstrtoul(buf, 10, &val);
>> +	if (ret)
>> +		return ret;
>> +
>> +	if (val > lpp->max_width)
>> +		return -EINVAL;
>> +
>> +	/* HW auto bandwidth negotiation must be enabled */
>> +	pcie_rc_cfg_wr_mask(lpp, PCI_EXP_LNKCTL_HAWD, 0,
>> +			    PCIE_CAP_OFST + PCI_EXP_LNKCTL);
>> +	dw_pcie_link_width_resize(&lpp->pci, val);
>> +
>> +	return len;
>> +}
>> +static DEVICE_ATTR_WO(pcie_width);
>> +
>> +static struct attribute *pcie_cfg_attrs[] = {
>> +	&dev_attr_pcie_link_status.attr,
>> +	&dev_attr_pcie_speed.attr,
>> +	&dev_attr_pcie_width.attr,
>> +	NULL,
>> +};
> Is there a reason that these are limited only to the Intel driver and
> not the wider set of DWC drivers?
>
> Is there anything specific here about the Intel GW driver?

Yes, they need intel_pcie_max_speed_setup() and pcie_link_gen_to_str().
Once intel_pcie_max_speed_setup() moved to DesignWare framework (as per 
Bjorn Helgaas inputs) and use pcie_link_speed[] array instead of 
pcie_link_gen_to_str() (as per gustavo pimentel inputs) we can move this 
to PCIe DesignWare framework or to pci sysfs file.

Regards,
Dilip

>
> Thanks,
>
> Andrew Murray
>
>> +ATTRIBUTE_GROUPS(pcie_cfg);
>> +
>> +static int intel_pcie_sysfs_init(struct intel_pcie_port *lpp)
>> +{
>> +	return devm_device_add_groups(lpp->pci.dev, pcie_cfg_groups);
>> +}
>> +
>>   static void __intel_pcie_remove(struct intel_pcie_port *lpp)
>>   {
>>   	intel_pcie_core_irq_disable(lpp);
>> @@ -490,8 +591,17 @@ static int intel_pcie_rc_init(struct pcie_port *pp)
>>   {
>>   	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
>>   	struct intel_pcie_port *lpp = dev_get_drvdata(pci->dev);
>> +	int ret;
>>   
>> -	return intel_pcie_host_setup(lpp);
>> +	ret = intel_pcie_host_setup(lpp);
>> +	if (ret)
>> +		return ret;
>> +
>> +	ret = intel_pcie_sysfs_init(lpp);
>> +	if (ret)
>> +		__intel_pcie_remove(lpp);
>> +
>> +	return ret;
>>   }
>>   
>>   int intel_pcie_msi_init(struct pcie_port *pp)
>> -- 
>> 2.11.0
>>
Dilip Kota Oct. 22, 2019, 9:27 a.m. UTC | #6
Hi Bjorn Helgaas,

On 10/22/2019 1:18 AM, Bjorn Helgaas wrote:
> On Mon, Oct 21, 2019 at 02:38:50PM +0100, Andrew Murray wrote:
>> On Mon, Oct 21, 2019 at 02:39:20PM +0800, Dilip Kota wrote:
>>> PCIe RC driver on Intel Gateway SoCs have a requirement
>>> of changing link width and speed on the fly.
> Please add more details about why this is needed.  Since you're adding
> sysfs files, it sounds like it's not actually the *driver* that needs
> this; it's something in userspace?
We have use cases to change the link speed and width on the fly.
One is EMI check and other is power saving.
Some battery backed applications have to switch PCIe link from higher 
GEN to GEN1 and width to x1. During the cases like
external power supply got disconnected or broken. Once external power 
supply is connected then switch PCIe link to higher GEN and width.
>
> The normal scenario is that the hardware negotiates link widths and
> speeds without any software involvement (PCIe r5.0, sec 1.2).
>
> If this is to work around hardware defects, we should try to do that
> inside the kernel because we can't expect userspace to do it reliably.
>
> As Andrew points out below, this all sounds like it should be generic
> rather than Intel-specific.
>
>>> So add the sysfs attributes to show and store the link
>>> properties.
>>> Add the respective link resize function in pcie DesignWare
>>> framework so that Intel PCIe driver can use during link
>>> width configuration on the fly.
>>> ...
>>> +static ssize_t pcie_link_status_show(struct device *dev,
>>> +				     struct device_attribute *attr, char *buf)
>>> +{
>>> +	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
>>> +	u32 reg, width, gen;
>>> +
>>> +	reg = pcie_rc_cfg_rd(lpp, PCIE_CAP_OFST + PCI_EXP_LNKCTL);
>>> +	width = FIELD_GET(PCI_EXP_LNKSTA_NLW, reg >> 16);
>>> +	gen = FIELD_GET(PCI_EXP_LNKSTA_CLS, reg >> 16);
>>> +
>>> +	if (gen > lpp->max_speed)
>>> +		return -EINVAL;
>>> +
>>> +	return sprintf(buf, "Port %2u Width x%u Speed %s GT/s\n", lpp->id,
>>> +		       width, pcie_link_gen_to_str(gen));
>>> +}
>>> +static DEVICE_ATTR_RO(pcie_link_status);
> We already have generic current_link_speed and current_link_width
> files.

Thanks for pointing it. I will remove the pcie_link_status.

Regards,
Dilip

>
>>> +static ssize_t pcie_speed_store(struct device *dev,
>>> +				struct device_attribute *attr,
>>> +				const char *buf, size_t len)
>>> +{
>>> +	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
>>> +	unsigned long val;
>>> +	int ret;
>>> +
>>> +	ret = kstrtoul(buf, 10, &val);
>>> +	if (ret)
>>> +		return ret;
>>> +
>>> +	if (val > lpp->max_speed)
>>> +		return -EINVAL;
>>> +
>>> +	lpp->link_gen = val;
>>> +	intel_pcie_max_speed_setup(lpp);
>>> +	dw_pcie_link_speed_change(&lpp->pci, false);
>>> +	dw_pcie_link_speed_change(&lpp->pci, true);
>>> +
>>> +	return len;
>>> +}
>>> +static DEVICE_ATTR_WO(pcie_speed);
>>> +
>>> +/*
>>> + * Link width change on the fly is not always successful.
>>> + * It also depends on the partner.
>>> + */
>>> +static ssize_t pcie_width_store(struct device *dev,
>>> +				struct device_attribute *attr,
>>> +				const char *buf, size_t len)
>>> +{
>>> +	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
>>> +	unsigned long val;
>>> +	int ret;
>>> +
>>> +	lpp = dev_get_drvdata(dev);
>>> +
>>> +	ret = kstrtoul(buf, 10, &val);
>>> +	if (ret)
>>> +		return ret;
>>> +
>>> +	if (val > lpp->max_width)
>>> +		return -EINVAL;
>>> +
>>> +	/* HW auto bandwidth negotiation must be enabled */
>>> +	pcie_rc_cfg_wr_mask(lpp, PCI_EXP_LNKCTL_HAWD, 0,
>>> +			    PCIE_CAP_OFST + PCI_EXP_LNKCTL);
>>> +	dw_pcie_link_width_resize(&lpp->pci, val);
>>> +
>>> +	return len;
>>> +}
>>> +static DEVICE_ATTR_WO(pcie_width);
>>> +
>>> +static struct attribute *pcie_cfg_attrs[] = {
>>> +	&dev_attr_pcie_link_status.attr,
>>> +	&dev_attr_pcie_speed.attr,
>>> +	&dev_attr_pcie_width.attr,
>>> +	NULL,
>>> +};
>> Is there a reason that these are limited only to the Intel driver and
>> not the wider set of DWC drivers?
>>
>> Is there anything specific here about the Intel GW driver?
Bjorn Helgaas Oct. 22, 2019, 12:59 p.m. UTC | #7
[+cc Rafael, linux-pm, beginning of discussion at
https://lore.kernel.org/r/d8574605f8e70f41ce1e88ccfb56b63c8f85e4df.1571638827.git.eswara.kota@linux.intel.com]

On Tue, Oct 22, 2019 at 05:27:38PM +0800, Dilip Kota wrote:
> On 10/22/2019 1:18 AM, Bjorn Helgaas wrote:
> > On Mon, Oct 21, 2019 at 02:38:50PM +0100, Andrew Murray wrote:
> > > On Mon, Oct 21, 2019 at 02:39:20PM +0800, Dilip Kota wrote:
> > > > PCIe RC driver on Intel Gateway SoCs have a requirement
> > > > of changing link width and speed on the fly.
> > Please add more details about why this is needed.  Since you're adding
> > sysfs files, it sounds like it's not actually the *driver* that needs
> > this; it's something in userspace?

> We have use cases to change the link speed and width on the fly.
> One is EMI check and other is power saving.  Some battery backed
> applications have to switch PCIe link from higher GEN to GEN1 and
> width to x1. During the cases like external power supply got
> disconnected or broken. Once external power supply is connected then
> switch PCIe link to higher GEN and width.

That sounds plausible, but of course nothing there is specific to the
Intel Gateway, so we should implement this generically so it would
work on all hardware.

I'm not sure what the interface should look like -- should it be a
low-level interface as you propose where userspace would have to
identify each link of interest, or is there some system-wide
power/performance knob that could tune all links?  Cc'd Rafael and
linux-pm in case they have ideas.

Bjorn
Andrew Murray Oct. 25, 2019, 9:34 a.m. UTC | #8
On Tue, Oct 22, 2019 at 05:20:00PM +0800, Dilip Kota wrote:
> Hi Andrew Murray,
> 
> On 10/21/2019 9:38 PM, Andrew Murray wrote:
> > On Mon, Oct 21, 2019 at 02:39:20PM +0800, Dilip Kota wrote:
> > > PCIe RC driver on Intel Gateway SoCs have a requirement
> > > of changing link width and speed on the fly.
> > > So add the sysfs attributes to show and store the link
> > > properties.
> > > Add the respective link resize function in pcie DesignWare
> > > framework so that Intel PCIe driver can use during link
> > > width configuration on the fly.
> > > 
> > > Signed-off-by: Dilip Kota <eswara.kota@linux.intel.com>

> > > +/*
> > > + * Link width change on the fly is not always successful.
> > > + * It also depends on the partner.
> > > + */
> > > +static ssize_t pcie_width_store(struct device *dev,
> > > +				struct device_attribute *attr,
> > > +				const char *buf, size_t len)
> > > +{
> > > +	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
> > > +	unsigned long val;
> > > +	int ret;
> > > +
> > > +	lpp = dev_get_drvdata(dev);
> > > +
> > > +	ret = kstrtoul(buf, 10, &val);
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	if (val > lpp->max_width)
> > > +		return -EINVAL;
> > > +
> > > +	/* HW auto bandwidth negotiation must be enabled */
> > > +	pcie_rc_cfg_wr_mask(lpp, PCI_EXP_LNKCTL_HAWD, 0,
> > > +			    PCIE_CAP_OFST + PCI_EXP_LNKCTL);
> > > +	dw_pcie_link_width_resize(&lpp->pci, val);
> > > +
> > > +	return len;
> > > +}
> > > +static DEVICE_ATTR_WO(pcie_width);
> > > +
> > > +static struct attribute *pcie_cfg_attrs[] = {
> > > +	&dev_attr_pcie_link_status.attr,
> > > +	&dev_attr_pcie_speed.attr,
> > > +	&dev_attr_pcie_width.attr,
> > > +	NULL,
> > > +};
> > Is there a reason that these are limited only to the Intel driver and
> > not the wider set of DWC drivers?
> > 
> > Is there anything specific here about the Intel GW driver?
> 
> Yes, they need intel_pcie_max_speed_setup() and pcie_link_gen_to_str().
> Once intel_pcie_max_speed_setup() moved to DesignWare framework (as per
> Bjorn Helgaas inputs) and use pcie_link_speed[] array instead of
> pcie_link_gen_to_str() (as per gustavo pimentel inputs) we can move this to
> PCIe DesignWare framework or to pci sysfs file.

I think the key concern here is this: If you introduce sysfs controls that
represent generic PCI concepts (such as changing the link speed) - the concept
isn't limited to a particular host controller, it's limited to PCI. Therefore
the sysfs control should also apply more widely to all PCI controllers. This
is important as otherwise you may end up getting a slightly different user
interface to achieve the same consequence depending on the host-controller in
use.

If each controller driver has a different way of doing things, then it lends
itself to having some set of ops that they can all implement. Or perhaps there
is a middle-ground solution where this applies just to DWC devices and not all
devices. 

Thanks,

Andrew Murray

> 
> Regards,
> Dilip
> 
> > 
> > Thanks,
> > 
> > Andrew Murray
> > 
> > > +ATTRIBUTE_GROUPS(pcie_cfg);
> > > +
> > > +static int intel_pcie_sysfs_init(struct intel_pcie_port *lpp)
> > > +{
> > > +	return devm_device_add_groups(lpp->pci.dev, pcie_cfg_groups);
> > > +}
> > > +
> > >   static void __intel_pcie_remove(struct intel_pcie_port *lpp)
> > >   {
> > >   	intel_pcie_core_irq_disable(lpp);
> > > @@ -490,8 +591,17 @@ static int intel_pcie_rc_init(struct pcie_port *pp)
> > >   {
> > >   	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> > >   	struct intel_pcie_port *lpp = dev_get_drvdata(pci->dev);
> > > +	int ret;
> > > -	return intel_pcie_host_setup(lpp);
> > > +	ret = intel_pcie_host_setup(lpp);
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	ret = intel_pcie_sysfs_init(lpp);
> > > +	if (ret)
> > > +		__intel_pcie_remove(lpp);
> > > +
> > > +	return ret;
> > >   }
> > >   int intel_pcie_msi_init(struct pcie_port *pp)
> > > -- 
> > > 2.11.0
> > >
Dilip Kota Oct. 29, 2019, 9:31 a.m. UTC | #9
On 10/22/2019 8:59 PM, Bjorn Helgaas wrote:
> [+cc Rafael, linux-pm, beginning of discussion at
> https://lore.kernel.org/r/d8574605f8e70f41ce1e88ccfb56b63c8f85e4df.1571638827.git.eswara.kota@linux.intel.com]
>
> On Tue, Oct 22, 2019 at 05:27:38PM +0800, Dilip Kota wrote:
>> On 10/22/2019 1:18 AM, Bjorn Helgaas wrote:
>>> On Mon, Oct 21, 2019 at 02:38:50PM +0100, Andrew Murray wrote:
>>>> On Mon, Oct 21, 2019 at 02:39:20PM +0800, Dilip Kota wrote:
>>>>> PCIe RC driver on Intel Gateway SoCs have a requirement
>>>>> of changing link width and speed on the fly.
>>> Please add more details about why this is needed.  Since you're adding
>>> sysfs files, it sounds like it's not actually the *driver* that needs
>>> this; it's something in userspace?
>> We have use cases to change the link speed and width on the fly.
>> One is EMI check and other is power saving.  Some battery backed
>> applications have to switch PCIe link from higher GEN to GEN1 and
>> width to x1. During the cases like external power supply got
>> disconnected or broken. Once external power supply is connected then
>> switch PCIe link to higher GEN and width.
> That sounds plausible, but of course nothing there is specific to the
> Intel Gateway, so we should implement this generically so it would
> work on all hardware.
Agree.
>
> I'm not sure what the interface should look like -- should it be a
> low-level interface as you propose where userspace would have to
> identify each link of interest, or is there some system-wide
> power/performance knob that could tune all links?  Cc'd Rafael and
> linux-pm in case they have ideas.

To my knowledge sysfs is the appropriate way to go.
If there are any other best possible knobs, will be helpful.

Regards,
Dilip

>
> Bjorn
Dilip Kota Oct. 29, 2019, 9:51 a.m. UTC | #10
On 10/25/2019 5:34 PM, Andrew Murray wrote:

>>>> +/*
>>>> + * Link width change on the fly is not always successful.
>>>> + * It also depends on the partner.
>>>> + */
>>>> +static ssize_t pcie_width_store(struct device *dev,
>>>> +				struct device_attribute *attr,
>>>> +				const char *buf, size_t len)
>>>> +{
>>>> +	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
>>>> +	unsigned long val;
>>>> +	int ret;
>>>> +
>>>> +	lpp = dev_get_drvdata(dev);
>>>> +
>>>> +	ret = kstrtoul(buf, 10, &val);
>>>> +	if (ret)
>>>> +		return ret;
>>>> +
>>>> +	if (val > lpp->max_width)
>>>> +		return -EINVAL;
>>>> +
>>>> +	/* HW auto bandwidth negotiation must be enabled */
>>>> +	pcie_rc_cfg_wr_mask(lpp, PCI_EXP_LNKCTL_HAWD, 0,
>>>> +			    PCIE_CAP_OFST + PCI_EXP_LNKCTL);
>>>> +	dw_pcie_link_width_resize(&lpp->pci, val);
>>>> +
>>>> +	return len;
>>>> +}
>>>> +static DEVICE_ATTR_WO(pcie_width);
>>>> +
>>>> +static struct attribute *pcie_cfg_attrs[] = {
>>>> +	&dev_attr_pcie_link_status.attr,
>>>> +	&dev_attr_pcie_speed.attr,
>>>> +	&dev_attr_pcie_width.attr,
>>>> +	NULL,
>>>> +};
>>> Is there a reason that these are limited only to the Intel driver and
>>> not the wider set of DWC drivers?
>>>
>>> Is there anything specific here about the Intel GW driver?
>> Yes, they need intel_pcie_max_speed_setup() and pcie_link_gen_to_str().
>> Once intel_pcie_max_speed_setup() moved to DesignWare framework (as per
>> Bjorn Helgaas inputs) and use pcie_link_speed[] array instead of
>> pcie_link_gen_to_str() (as per gustavo pimentel inputs) we can move this to
>> PCIe DesignWare framework or to pci sysfs file.
> I think the key concern here is this: If you introduce sysfs controls that
> represent generic PCI concepts (such as changing the link speed) - the concept
> isn't limited to a particular host controller, it's limited to PCI. Therefore
> the sysfs control should also apply more widely to all PCI controllers. This
> is important as otherwise you may end up getting a slightly different user
> interface to achieve the same consequence depending on the host-controller in
> use.
>
> If each controller driver has a different way of doing things, then it lends
> itself to having some set of ops that they can all implement. Or perhaps there
> is a middle-ground solution where this applies just to DWC devices and not all
> devices.
I see the better way is to move the implementation to PCIe DesignWare 
framework because of
the registers programming.

Regards,
Dilip
> Thanks,
>
> Andrew Murray
>
>>
Rafael J. Wysocki Oct. 29, 2019, 10:42 a.m. UTC | #11
On Tue, Oct 22, 2019 at 2:59 PM Bjorn Helgaas <helgaas@kernel.org> wrote:
>
> [+cc Rafael, linux-pm, beginning of discussion at
> https://lore.kernel.org/r/d8574605f8e70f41ce1e88ccfb56b63c8f85e4df.1571638827.git.eswara.kota@linux.intel.com]
>
> On Tue, Oct 22, 2019 at 05:27:38PM +0800, Dilip Kota wrote:
> > On 10/22/2019 1:18 AM, Bjorn Helgaas wrote:
> > > On Mon, Oct 21, 2019 at 02:38:50PM +0100, Andrew Murray wrote:
> > > > On Mon, Oct 21, 2019 at 02:39:20PM +0800, Dilip Kota wrote:
> > > > > PCIe RC driver on Intel Gateway SoCs have a requirement
> > > > > of changing link width and speed on the fly.
> > > Please add more details about why this is needed.  Since you're adding
> > > sysfs files, it sounds like it's not actually the *driver* that needs
> > > this; it's something in userspace?
>
> > We have use cases to change the link speed and width on the fly.
> > One is EMI check and other is power saving.  Some battery backed
> > applications have to switch PCIe link from higher GEN to GEN1 and
> > width to x1. During the cases like external power supply got
> > disconnected or broken. Once external power supply is connected then
> > switch PCIe link to higher GEN and width.
>
> That sounds plausible, but of course nothing there is specific to the
> Intel Gateway, so we should implement this generically so it would
> work on all hardware.
>
> I'm not sure what the interface should look like -- should it be a
> low-level interface as you propose where userspace would have to
> identify each link of interest, or is there some system-wide
> power/performance knob that could tune all links?  Cc'd Rafael and
> linux-pm in case they have ideas.

Frankly, I need some time to think about this and, in case you are
wondering about whether or not it has been discussed with me already,
it hasn't.

At this point I can only say that since we have an ASPM interface,
which IMO is not fantastic, it may be good to come up with a common
link management interface.

Cheers!
Bjorn Helgaas Oct. 29, 2019, 12:36 p.m. UTC | #12
[+cc Heiner for ASPM conversation]

On Tue, Oct 29, 2019 at 11:42:53AM +0100, Rafael J. Wysocki wrote:
> On Tue, Oct 22, 2019 at 2:59 PM Bjorn Helgaas <helgaas@kernel.org> wrote:
> >
> > [+cc Rafael, linux-pm, beginning of discussion at
> > https://lore.kernel.org/r/d8574605f8e70f41ce1e88ccfb56b63c8f85e4df.1571638827.git.eswara.kota@linux.intel.com]
> >
> > On Tue, Oct 22, 2019 at 05:27:38PM +0800, Dilip Kota wrote:
> > > On 10/22/2019 1:18 AM, Bjorn Helgaas wrote:
> > > > On Mon, Oct 21, 2019 at 02:38:50PM +0100, Andrew Murray wrote:
> > > > > On Mon, Oct 21, 2019 at 02:39:20PM +0800, Dilip Kota wrote:
> > > > > > PCIe RC driver on Intel Gateway SoCs have a requirement
> > > > > > of changing link width and speed on the fly.
> > > > Please add more details about why this is needed.  Since you're adding
> > > > sysfs files, it sounds like it's not actually the *driver* that needs
> > > > this; it's something in userspace?
> >
> > > We have use cases to change the link speed and width on the fly.
> > > One is EMI check and other is power saving.  Some battery backed
> > > applications have to switch PCIe link from higher GEN to GEN1 and
> > > width to x1. During the cases like external power supply got
> > > disconnected or broken. Once external power supply is connected then
> > > switch PCIe link to higher GEN and width.
> >
> > That sounds plausible, but of course nothing there is specific to the
> > Intel Gateway, so we should implement this generically so it would
> > work on all hardware.
> >
> > I'm not sure what the interface should look like -- should it be a
> > low-level interface as you propose where userspace would have to
> > identify each link of interest, or is there some system-wide
> > power/performance knob that could tune all links?  Cc'd Rafael and
> > linux-pm in case they have ideas.
> 
> Frankly, I need some time to think about this and, in case you are
> wondering about whether or not it has been discussed with me already,
> it hasn't.
> 
> At this point I can only say that since we have an ASPM interface,
> which IMO is not fantastic, it may be good to come up with a common
> link management interface.

The ASPM interface hasn't been merged yet, so if you have better
ideas, now is the time.  That one is definitely very low-level, partly
because the first use case is working around defects in a specific
device.

Some sort of unification of link management does sound like a good
idea.

Bjorn
Bjorn Helgaas Oct. 30, 2019, 10:14 p.m. UTC | #13
[+cc Heiner, Rajat]

On Tue, Oct 29, 2019 at 05:31:18PM +0800, Dilip Kota wrote:
> On 10/22/2019 8:59 PM, Bjorn Helgaas wrote:
> > [+cc Rafael, linux-pm, beginning of discussion at
> > https://lore.kernel.org/r/d8574605f8e70f41ce1e88ccfb56b63c8f85e4df.1571638827.git.eswara.kota@linux.intel.com]
> > 
> > On Tue, Oct 22, 2019 at 05:27:38PM +0800, Dilip Kota wrote:
> > > On 10/22/2019 1:18 AM, Bjorn Helgaas wrote:
> > > > On Mon, Oct 21, 2019 at 02:38:50PM +0100, Andrew Murray wrote:
> > > > > On Mon, Oct 21, 2019 at 02:39:20PM +0800, Dilip Kota wrote:
> > > > > > PCIe RC driver on Intel Gateway SoCs have a requirement
> > > > > > of changing link width and speed on the fly.
> > > > Please add more details about why this is needed.  Since you're adding
> > > > sysfs files, it sounds like it's not actually the *driver* that needs
> > > > this; it's something in userspace?
> > > We have use cases to change the link speed and width on the fly.
> > > One is EMI check and other is power saving.  Some battery backed
> > > applications have to switch PCIe link from higher GEN to GEN1 and
> > > width to x1. During the cases like external power supply got
> > > disconnected or broken. Once external power supply is connected then
> > > switch PCIe link to higher GEN and width.
> > That sounds plausible, but of course nothing there is specific to the
> > Intel Gateway, so we should implement this generically so it would
> > work on all hardware.
> Agree.
> > 
> > I'm not sure what the interface should look like -- should it be a
> > low-level interface as you propose where userspace would have to
> > identify each link of interest, or is there some system-wide
> > power/performance knob that could tune all links?  Cc'd Rafael and
> > linux-pm in case they have ideas.
> 
> To my knowledge sysfs is the appropriate way to go.
> If there are any other best possible knobs, will be helpful.

I agree sysfs is the right place for it; my question was whether we
should have files like:

  /sys/.../0000:00:1f.3/pcie_speed
  /sys/.../0000:00:1f.3/pcie_width

as I think this patch would add (BTW, please include sample paths like
the above in the commit log), or whether there should be a more global
thing that would affect all the links in the system.

I think the low-level files like you propose would be better because
one might want to tune link performance differently for different
types of devices and workloads.

We also have to decide if these files should be associated with the
device at the upstream or downstream end of the link.  For ASPM, the
current proposal [1] has the files at the downstream end on the theory
that the GPU, NIC, NVMe device, etc is the user-recognizable one.
Also, neither ASPM nor link speed/width make any sense unless there
*is* a device at the downstream end, so putting them there
automatically makes them visible only when they're useful.

Rafael had some concerns about the proposed ASPM interface [2], but I
don't know what they are yet.

For ASPM we added a "link_pm" directory, and maybe that's too
specific.  Maybe it should be a generic "link_mgt" or even "pcie"
directory that could contain both the ASPM and width/speed files.

There's also a change coming to put AER stats in something like this:

  /sys/.../0000:00:1f.3/aer_stats/correctable_rx_err
  /sys/.../0000:00:1f.3/aer_stats/correctable_timeout
  /sys/.../0000:00:1f.3/aer_stats/fatal_TLP
  ...

It would certainly be good to have some organizational scheme or we'll
end up with a real hodge-podge.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git/commit/?h=pci/aspm&id=ad46fe1c733656611788e2cd59793e891ed7ded7
[2] https://lore.kernel.org/r/CAJZ5v0jdxR4roEUC_Hs3puCzGY4ThdLsi_XcxfBUUxqruP4z7A@mail.gmail.com
Rafael J. Wysocki Oct. 30, 2019, 11:31 p.m. UTC | #14
On Wed, Oct 30, 2019 at 11:14 PM Bjorn Helgaas <helgaas@kernel.org> wrote:
>
> [+cc Heiner, Rajat]
>
> On Tue, Oct 29, 2019 at 05:31:18PM +0800, Dilip Kota wrote:
> > On 10/22/2019 8:59 PM, Bjorn Helgaas wrote:
> > > [+cc Rafael, linux-pm, beginning of discussion at
> > > https://lore.kernel.org/r/d8574605f8e70f41ce1e88ccfb56b63c8f85e4df.1571638827.git.eswara.kota@linux.intel.com]
> > >
> > > On Tue, Oct 22, 2019 at 05:27:38PM +0800, Dilip Kota wrote:
> > > > On 10/22/2019 1:18 AM, Bjorn Helgaas wrote:
> > > > > On Mon, Oct 21, 2019 at 02:38:50PM +0100, Andrew Murray wrote:
> > > > > > On Mon, Oct 21, 2019 at 02:39:20PM +0800, Dilip Kota wrote:
> > > > > > > PCIe RC driver on Intel Gateway SoCs have a requirement
> > > > > > > of changing link width and speed on the fly.
> > > > > Please add more details about why this is needed.  Since you're adding
> > > > > sysfs files, it sounds like it's not actually the *driver* that needs
> > > > > this; it's something in userspace?
> > > > We have use cases to change the link speed and width on the fly.
> > > > One is EMI check and other is power saving.  Some battery backed
> > > > applications have to switch PCIe link from higher GEN to GEN1 and
> > > > width to x1. During the cases like external power supply got
> > > > disconnected or broken. Once external power supply is connected then
> > > > switch PCIe link to higher GEN and width.
> > > That sounds plausible, but of course nothing there is specific to the
> > > Intel Gateway, so we should implement this generically so it would
> > > work on all hardware.
> > Agree.
> > >
> > > I'm not sure what the interface should look like -- should it be a
> > > low-level interface as you propose where userspace would have to
> > > identify each link of interest, or is there some system-wide
> > > power/performance knob that could tune all links?  Cc'd Rafael and
> > > linux-pm in case they have ideas.
> >
> > To my knowledge sysfs is the appropriate way to go.
> > If there are any other best possible knobs, will be helpful.
>
> I agree sysfs is the right place for it; my question was whether we
> should have files like:
>
>   /sys/.../0000:00:1f.3/pcie_speed
>   /sys/.../0000:00:1f.3/pcie_width
>
> as I think this patch would add (BTW, please include sample paths like
> the above in the commit log), or whether there should be a more global
> thing that would affect all the links in the system.
>
> I think the low-level files like you propose would be better because
> one might want to tune link performance differently for different
> types of devices and workloads.
>
> We also have to decide if these files should be associated with the
> device at the upstream or downstream end of the link.  For ASPM, the
> current proposal [1] has the files at the downstream end on the theory
> that the GPU, NIC, NVMe device, etc is the user-recognizable one.
> Also, neither ASPM nor link speed/width make any sense unless there
> *is* a device at the downstream end, so putting them there
> automatically makes them visible only when they're useful.
>
> Rafael had some concerns about the proposed ASPM interface [2], but I
> don't know what they are yet.

I was talking about the existing ASPM interface in sysfs.  The new one
I still have to review, but I'm kind of wondering what about people
who used the old one?  Would it be supported going forward?

> For ASPM we added a "link_pm" directory, and maybe that's too
> specific.  Maybe it should be a generic "link_mgt" or even "pcie"
> directory that could contain both the ASPM and width/speed files.
>
> There's also a change coming to put AER stats in something like this:
>
>   /sys/.../0000:00:1f.3/aer_stats/correctable_rx_err
>   /sys/.../0000:00:1f.3/aer_stats/correctable_timeout
>   /sys/.../0000:00:1f.3/aer_stats/fatal_TLP
>   ...
>
> It would certainly be good to have some organizational scheme or we'll
> end up with a real hodge-podge.
>
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git/commit/?h=pci/aspm&id=ad46fe1c733656611788e2cd59793e891ed7ded7
> [2] https://lore.kernel.org/r/CAJZ5v0jdxR4roEUC_Hs3puCzGY4ThdLsi_XcxfBUUxqruP4z7A@mail.gmail.com
Bjorn Helgaas Oct. 31, 2019, 2:56 a.m. UTC | #15
On Thu, Oct 31, 2019 at 12:31:44AM +0100, Rafael J. Wysocki wrote:
> On Wed, Oct 30, 2019 at 11:14 PM Bjorn Helgaas <helgaas@kernel.org> wrote:
> >
> > [+cc Heiner, Rajat]
> >
> > On Tue, Oct 29, 2019 at 05:31:18PM +0800, Dilip Kota wrote:
> > > On 10/22/2019 8:59 PM, Bjorn Helgaas wrote:
> > > > [+cc Rafael, linux-pm, beginning of discussion at
> > > > https://lore.kernel.org/r/d8574605f8e70f41ce1e88ccfb56b63c8f85e4df.1571638827.git.eswara.kota@linux.intel.com]
> > > >
> > > > On Tue, Oct 22, 2019 at 05:27:38PM +0800, Dilip Kota wrote:
> > > > > On 10/22/2019 1:18 AM, Bjorn Helgaas wrote:
> > > > > > On Mon, Oct 21, 2019 at 02:38:50PM +0100, Andrew Murray wrote:
> > > > > > > On Mon, Oct 21, 2019 at 02:39:20PM +0800, Dilip Kota wrote:
> > > > > > > > PCIe RC driver on Intel Gateway SoCs have a requirement
> > > > > > > > of changing link width and speed on the fly.
> > > > > > Please add more details about why this is needed.  Since you're adding
> > > > > > sysfs files, it sounds like it's not actually the *driver* that needs
> > > > > > this; it's something in userspace?
> > > > > We have use cases to change the link speed and width on the fly.
> > > > > One is EMI check and other is power saving.  Some battery backed
> > > > > applications have to switch PCIe link from higher GEN to GEN1 and
> > > > > width to x1. During the cases like external power supply got
> > > > > disconnected or broken. Once external power supply is connected then
> > > > > switch PCIe link to higher GEN and width.
> > > > That sounds plausible, but of course nothing there is specific to the
> > > > Intel Gateway, so we should implement this generically so it would
> > > > work on all hardware.
> > > Agree.
> > > >
> > > > I'm not sure what the interface should look like -- should it be a
> > > > low-level interface as you propose where userspace would have to
> > > > identify each link of interest, or is there some system-wide
> > > > power/performance knob that could tune all links?  Cc'd Rafael and
> > > > linux-pm in case they have ideas.
> > >
> > > To my knowledge sysfs is the appropriate way to go.
> > > If there are any other best possible knobs, will be helpful.
> >
> > I agree sysfs is the right place for it; my question was whether we
> > should have files like:
> >
> >   /sys/.../0000:00:1f.3/pcie_speed
> >   /sys/.../0000:00:1f.3/pcie_width
> >
> > as I think this patch would add (BTW, please include sample paths like
> > the above in the commit log), or whether there should be a more global
> > thing that would affect all the links in the system.
> >
> > I think the low-level files like you propose would be better because
> > one might want to tune link performance differently for different
> > types of devices and workloads.
> >
> > We also have to decide if these files should be associated with the
> > device at the upstream or downstream end of the link.  For ASPM, the
> > current proposal [1] has the files at the downstream end on the theory
> > that the GPU, NIC, NVMe device, etc is the user-recognizable one.
> > Also, neither ASPM nor link speed/width make any sense unless there
> > *is* a device at the downstream end, so putting them there
> > automatically makes them visible only when they're useful.
> >
> > Rafael had some concerns about the proposed ASPM interface [2], but I
> > don't know what they are yet.
> 
> I was talking about the existing ASPM interface in sysfs.  The new one
> I still have to review, but I'm kind of wondering what about people
> who used the old one?  Would it be supported going forward?

The old one interface was enabled by CONFIG_PCIEASPM_DEBUG.  Red Hat
doesn't enable that.  Ubuntu does.  I *thought* we heard from a
Canonical person who said they didn't have any tools that used it, but
I can't find that now.  I don't know about SUSE.

So the idea was to drop it on the theory that nobody is using it.
Possibly that's too aggressive.

Bjorn
Rafael J. Wysocki Oct. 31, 2019, 9:13 a.m. UTC | #16
On Thursday, October 31, 2019 3:56:37 AM CET Bjorn Helgaas wrote:
> On Thu, Oct 31, 2019 at 12:31:44AM +0100, Rafael J. Wysocki wrote:
> > On Wed, Oct 30, 2019 at 11:14 PM Bjorn Helgaas <helgaas@kernel.org> wrote:
> > >
> > > [+cc Heiner, Rajat]
> > >
> > > On Tue, Oct 29, 2019 at 05:31:18PM +0800, Dilip Kota wrote:
> > > > On 10/22/2019 8:59 PM, Bjorn Helgaas wrote:
> > > > > [+cc Rafael, linux-pm, beginning of discussion at
> > > > > https://lore.kernel.org/r/d8574605f8e70f41ce1e88ccfb56b63c8f85e4df.1571638827.git.eswara.kota@linux.intel.com]
> > > > >
> > > > > On Tue, Oct 22, 2019 at 05:27:38PM +0800, Dilip Kota wrote:
> > > > > > On 10/22/2019 1:18 AM, Bjorn Helgaas wrote:
> > > > > > > On Mon, Oct 21, 2019 at 02:38:50PM +0100, Andrew Murray wrote:
> > > > > > > > On Mon, Oct 21, 2019 at 02:39:20PM +0800, Dilip Kota wrote:
> > > > > > > > > PCIe RC driver on Intel Gateway SoCs have a requirement
> > > > > > > > > of changing link width and speed on the fly.
> > > > > > > Please add more details about why this is needed.  Since you're adding
> > > > > > > sysfs files, it sounds like it's not actually the *driver* that needs
> > > > > > > this; it's something in userspace?
> > > > > > We have use cases to change the link speed and width on the fly.
> > > > > > One is EMI check and other is power saving.  Some battery backed
> > > > > > applications have to switch PCIe link from higher GEN to GEN1 and
> > > > > > width to x1. During the cases like external power supply got
> > > > > > disconnected or broken. Once external power supply is connected then
> > > > > > switch PCIe link to higher GEN and width.
> > > > > That sounds plausible, but of course nothing there is specific to the
> > > > > Intel Gateway, so we should implement this generically so it would
> > > > > work on all hardware.
> > > > Agree.
> > > > >
> > > > > I'm not sure what the interface should look like -- should it be a
> > > > > low-level interface as you propose where userspace would have to
> > > > > identify each link of interest, or is there some system-wide
> > > > > power/performance knob that could tune all links?  Cc'd Rafael and
> > > > > linux-pm in case they have ideas.
> > > >
> > > > To my knowledge sysfs is the appropriate way to go.
> > > > If there are any other best possible knobs, will be helpful.
> > >
> > > I agree sysfs is the right place for it; my question was whether we
> > > should have files like:
> > >
> > >   /sys/.../0000:00:1f.3/pcie_speed
> > >   /sys/.../0000:00:1f.3/pcie_width
> > >
> > > as I think this patch would add (BTW, please include sample paths like
> > > the above in the commit log), or whether there should be a more global
> > > thing that would affect all the links in the system.
> > >
> > > I think the low-level files like you propose would be better because
> > > one might want to tune link performance differently for different
> > > types of devices and workloads.
> > >
> > > We also have to decide if these files should be associated with the
> > > device at the upstream or downstream end of the link.  For ASPM, the
> > > current proposal [1] has the files at the downstream end on the theory
> > > that the GPU, NIC, NVMe device, etc is the user-recognizable one.
> > > Also, neither ASPM nor link speed/width make any sense unless there
> > > *is* a device at the downstream end, so putting them there
> > > automatically makes them visible only when they're useful.
> > >
> > > Rafael had some concerns about the proposed ASPM interface [2], but I
> > > don't know what they are yet.
> > 
> > I was talking about the existing ASPM interface in sysfs.  The new one
> > I still have to review, but I'm kind of wondering what about people
> > who used the old one?  Would it be supported going forward?
> 
> The old one interface was enabled by CONFIG_PCIEASPM_DEBUG.  Red Hat
> doesn't enable that.  Ubuntu does.  I *thought* we heard from a
> Canonical person who said they didn't have any tools that used it, but
> I can't find that now.  I don't know about SUSE.
> 
> So the idea was to drop it on the theory that nobody is using it.
> Possibly that's too aggressive.

Well, one problem is that the "old" (actually existing) I/F has made it
to one of my OSS EU presentation slides (I did not talk to this particular
slide, but it is there in the deck that's available for downloading), so who
knows who is going to use it. :-)

So I guess that there's a risk that needs to be taken into consideration.

What could be done, in principle, would be to make the new I/F depend on
CONFIG_PCIEASPM_DEBUG being unset and provide the "old" one when it is set.

In any case, the pcie_aspm.policy module parameter cannot be dropped, because
AFAICS there is quite a bit of user space using it (e.g. TLP).

Cheers!
Dilip Kota Oct. 31, 2019, 10:47 a.m. UTC | #17
On 10/31/2019 6:14 AM, Bjorn Helgaas wrote:
> [+cc Heiner, Rajat]
>
> On Tue, Oct 29, 2019 at 05:31:18PM +0800, Dilip Kota wrote:
>> On 10/22/2019 8:59 PM, Bjorn Helgaas wrote:
>>> [+cc Rafael, linux-pm, beginning of discussion at
>>> https://lore.kernel.org/r/d8574605f8e70f41ce1e88ccfb56b63c8f85e4df.1571638827.git.eswara.kota@linux.intel.com]
>>>
>>> On Tue, Oct 22, 2019 at 05:27:38PM +0800, Dilip Kota wrote:
>>>> On 10/22/2019 1:18 AM, Bjorn Helgaas wrote:
>>>>> On Mon, Oct 21, 2019 at 02:38:50PM +0100, Andrew Murray wrote:
>>>>>> On Mon, Oct 21, 2019 at 02:39:20PM +0800, Dilip Kota wrote:
>>>>>>> PCIe RC driver on Intel Gateway SoCs have a requirement
>>>>>>> of changing link width and speed on the fly.
>>>>> Please add more details about why this is needed.  Since you're adding
>>>>> sysfs files, it sounds like it's not actually the *driver* that needs
>>>>> this; it's something in userspace?
>>>> We have use cases to change the link speed and width on the fly.
>>>> One is EMI check and other is power saving.  Some battery backed
>>>> applications have to switch PCIe link from higher GEN to GEN1 and
>>>> width to x1. During the cases like external power supply got
>>>> disconnected or broken. Once external power supply is connected then
>>>> switch PCIe link to higher GEN and width.
>>> That sounds plausible, but of course nothing there is specific to the
>>> Intel Gateway, so we should implement this generically so it would
>>> work on all hardware.
>> Agree.
>>> I'm not sure what the interface should look like -- should it be a
>>> low-level interface as you propose where userspace would have to
>>> identify each link of interest, or is there some system-wide
>>> power/performance knob that could tune all links?  Cc'd Rafael and
>>> linux-pm in case they have ideas.
>> To my knowledge sysfs is the appropriate way to go.
>> If there are any other best possible knobs, will be helpful.
> I agree sysfs is the right place for it; my question was whether we
> should have files like:
>
>    /sys/.../0000:00:1f.3/pcie_speed
>    /sys/.../0000:00:1f.3/pcie_width
>
> as I think this patch would add (BTW, please include sample paths like
> the above in the commit log), or whether there should be a more global
> thing that would affect all the links in the system.
Sure, i will add them.
>
> I think the low-level files like you propose would be better because
> one might want to tune link performance differently for different
> types of devices and workloads.
>
> We also have to decide if these files should be associated with the
> device at the upstream or downstream end of the link.  For ASPM, the
> current proposal [1] has the files at the downstream end on the theory
> that the GPU, NIC, NVMe device, etc is the user-recognizable one.
> Also, neither ASPM nor link speed/width make any sense unless there
> *is* a device at the downstream end, so putting them there
> automatically makes them visible only when they're useful.

This patch places the speed and width in the host controller directory.
/sys/.../xxx.pcie/pcie_speed
/sys/.../xxx.pcie/pcie_width

I agree with you partially,  because i am having couple of points making 
me to
keep speed and width change entries in controller directory:

-- For changing the speed/width with device node, software ends up 
traversing to the controller
   from the device and do the operations.
-- Change speed and width are performed at controller level,
-- Keeping speed and width in controller gives a perspective (to the 
user) of changing
them only once irrespective of no. of devices.
-- For speed and link change in Synopsys PCIe controller, specific 
registers need to be configured.
    This prevents or complicates adding the speed and width change 
functionality in pci-sysfs or pci framework.

Regards,
Dilip
Bjorn Helgaas Oct. 31, 2019, 1:01 p.m. UTC | #18
On Thu, Oct 31, 2019 at 10:13:11AM +0100, Rafael J. Wysocki wrote:
> On Thursday, October 31, 2019 3:56:37 AM CET Bjorn Helgaas wrote:
> > On Thu, Oct 31, 2019 at 12:31:44AM +0100, Rafael J. Wysocki wrote:
> > > On Wed, Oct 30, 2019 at 11:14 PM Bjorn Helgaas <helgaas@kernel.org> wrote:

> > > > Rafael had some concerns about the proposed ASPM interface [2], but I
> > > > don't know what they are yet.
> > > 
> > > I was talking about the existing ASPM interface in sysfs.  The new one
> > > I still have to review, but I'm kind of wondering what about people
> > > who used the old one?  Would it be supported going forward?
> > 
> > The old one interface was enabled by CONFIG_PCIEASPM_DEBUG.  Red Hat
> > doesn't enable that.  Ubuntu does.  I *thought* we heard from a
> > Canonical person who said they didn't have any tools that used it, but
> > I can't find that now.  I don't know about SUSE.
> > 
> > So the idea was to drop it on the theory that nobody is using it.
> > Possibly that's too aggressive.
> 
> Well, one problem is that the "old" (actually existing) I/F has made it
> to one of my OSS EU presentation slides (I did not talk to this particular
> slide, but it is there in the deck that's available for downloading), so who
> knows who is going to use it. :-)
> 
> So I guess that there's a risk that needs to be taken into consideration.
> 
> What could be done, in principle, would be to make the new I/F depend on
> CONFIG_PCIEASPM_DEBUG being unset and provide the "old" one when it is set.

I would prefer to enable the new interface unconditionally to make it
easier for userspace tools like powertop to use it.

I think the existing and new interfaces could coexist, with the
existing interface being enabled by CONFIG_PCIEASPM_DEBUG as it is
today.  The patch that removes the existing interface is the last in
the series and could easily be dropped.

> In any case, the pcie_aspm.policy module parameter cannot be dropped, because
> AFAICS there is quite a bit of user space using it (e.g. TLP).

What is TLP?  Since CONFIG_PCIEASPM is a bool, aspm.o is built in
statically if enabled, so pcie_aspm.policy is effectively a boot-time
kernel parameter, right?  We don't have a plan to remove it.

Bjorn
Bjorn Helgaas Oct. 31, 2019, 1:22 p.m. UTC | #19
On Thu, Oct 31, 2019 at 06:47:10PM +0800, Dilip Kota wrote:
> On 10/31/2019 6:14 AM, Bjorn Helgaas wrote:
> > On Tue, Oct 29, 2019 at 05:31:18PM +0800, Dilip Kota wrote:
> > > On 10/22/2019 8:59 PM, Bjorn Helgaas wrote:
> > > > [+cc Rafael, linux-pm, beginning of discussion at
> > > > https://lore.kernel.org/r/d8574605f8e70f41ce1e88ccfb56b63c8f85e4df.1571638827.git.eswara.kota@linux.intel.com]
> > > > 
> > > > On Tue, Oct 22, 2019 at 05:27:38PM +0800, Dilip Kota wrote:
> > > > > On 10/22/2019 1:18 AM, Bjorn Helgaas wrote:
> > > > > > On Mon, Oct 21, 2019 at 02:38:50PM +0100, Andrew Murray wrote:
> > > > > > > On Mon, Oct 21, 2019 at 02:39:20PM +0800, Dilip Kota wrote:
> > > > > > > > PCIe RC driver on Intel Gateway SoCs have a requirement
> > > > > > > > of changing link width and speed on the fly.
> > > > > > Please add more details about why this is needed.  Since
> > > > > > you're adding sysfs files, it sounds like it's not
> > > > > > actually the *driver* that needs this; it's something in
> > > > > > userspace?
> > > > > We have use cases to change the link speed and width on the fly.
> > > > > One is EMI check and other is power saving.  Some battery backed
> > > > > applications have to switch PCIe link from higher GEN to GEN1 and
> > > > > width to x1. During the cases like external power supply got
> > > > > disconnected or broken. Once external power supply is connected then
> > > > > switch PCIe link to higher GEN and width.
> > > > That sounds plausible, but of course nothing there is specific to the
> > > > Intel Gateway, so we should implement this generically so it would
> > > > work on all hardware.
> > > Agree.
> > > > I'm not sure what the interface should look like -- should it be a
> > > > low-level interface as you propose where userspace would have to
> > > > identify each link of interest, or is there some system-wide
> > > > power/performance knob that could tune all links?  Cc'd Rafael and
> > > > linux-pm in case they have ideas.
> > > To my knowledge sysfs is the appropriate way to go.
> > > If there are any other best possible knobs, will be helpful.
> > I agree sysfs is the right place for it; my question was whether we
> > should have files like:
> > 
> >    /sys/.../0000:00:1f.3/pcie_speed
> >    /sys/.../0000:00:1f.3/pcie_width
> > 
> > as I think this patch would add (BTW, please include sample paths like
> > the above in the commit log), or whether there should be a more global
> > thing that would affect all the links in the system.
> Sure, i will add them.
> > 
> > I think the low-level files like you propose would be better because
> > one might want to tune link performance differently for different
> > types of devices and workloads.
> > 
> > We also have to decide if these files should be associated with the
> > device at the upstream or downstream end of the link.  For ASPM, the
> > current proposal [1] has the files at the downstream end on the theory
> > that the GPU, NIC, NVMe device, etc is the user-recognizable one.
> > Also, neither ASPM nor link speed/width make any sense unless there
> > *is* a device at the downstream end, so putting them there
> > automatically makes them visible only when they're useful.
> 
> This patch places the speed and width in the host controller directory.
> /sys/.../xxx.pcie/pcie_speed
> /sys/.../xxx.pcie/pcie_width
> 
> I agree with you partially,  because i am having couple of points
> making me to keep speed and width change entries in controller
> directory:
> 
> -- For changing the speed/width with device node, software ends up
>    traversing to the controller from the device and do the
>    operations.
> -- Change speed and width are performed at controller level,

The controller is effectively a Root Complex, which may contain
several Root Ports.  I have the impression that the Synopsys
controller only supports a single Root Port, but that's just a detail
of the Synopsys implementation.  I think it should be possible to
configure the width/speed of each Root Port individually.

> -- Keeping speed and width in controller gives a perspective (to the
>    user) of changing them only once irrespective of no. of devices.

What if there's a switch?  If we change the width/speed of the link
between the Root Port and the Switch Upstream Port, that doesn't do
anything about the links from the Switch Downstream Ports.

> -- For speed and link change in Synopsys PCIe controller, specific
>    registers need to be configured.  This prevents or complicates
>    adding the speed and width change functionality in pci-sysfs or
>    pci framework.

Don't the Link Control and related registers in PCIe spec give us
enough control to manage the link width/speed of *all* links,
including those from Root Ports and Switch Downstream Ports?

If the Synopsys controller requires controller-specific registers,
that sounds to me like it doesn't quite conform to the spec.  Maybe
that means we would need some sort of quirk or controller callback?

Bjorn
Dilip Kota Nov. 1, 2019, 5:47 a.m. UTC | #20
On 10/31/2019 9:22 PM, Bjorn Helgaas wrote:
> On Thu, Oct 31, 2019 at 06:47:10PM +0800, Dilip Kota wrote:
>> On 10/31/2019 6:14 AM, Bjorn Helgaas wrote:
>>> On Tue, Oct 29, 2019 at 05:31:18PM +0800, Dilip Kota wrote:
>>>> On 10/22/2019 8:59 PM, Bjorn Helgaas wrote:
>>>>> [+cc Rafael, linux-pm, beginning of discussion at
>>>>> https://lore.kernel.org/r/d8574605f8e70f41ce1e88ccfb56b63c8f85e4df.1571638827.git.eswara.kota@linux.intel.com]
>>>>>
>>>>> On Tue, Oct 22, 2019 at 05:27:38PM +0800, Dilip Kota wrote:
>>>>>> On 10/22/2019 1:18 AM, Bjorn Helgaas wrote:
>>>>>>> On Mon, Oct 21, 2019 at 02:38:50PM +0100, Andrew Murray wrote:
>>>>>>>> On Mon, Oct 21, 2019 at 02:39:20PM +0800, Dilip Kota wrote:
>>>>>>>>> PCIe RC driver on Intel Gateway SoCs have a requirement
>>>>>>>>> of changing link width and speed on the fly.
>>>>>>> Please add more details about why this is needed.  Since
>>>>>>> you're adding sysfs files, it sounds like it's not
>>>>>>> actually the *driver* that needs this; it's something in
>>>>>>> userspace?
>>>>>> We have use cases to change the link speed and width on the fly.
>>>>>> One is EMI check and other is power saving.  Some battery backed
>>>>>> applications have to switch PCIe link from higher GEN to GEN1 and
>>>>>> width to x1. During the cases like external power supply got
>>>>>> disconnected or broken. Once external power supply is connected then
>>>>>> switch PCIe link to higher GEN and width.
>>>>> That sounds plausible, but of course nothing there is specific to the
>>>>> Intel Gateway, so we should implement this generically so it would
>>>>> work on all hardware.
>>>> Agree.
>>>>> I'm not sure what the interface should look like -- should it be a
>>>>> low-level interface as you propose where userspace would have to
>>>>> identify each link of interest, or is there some system-wide
>>>>> power/performance knob that could tune all links?  Cc'd Rafael and
>>>>> linux-pm in case they have ideas.
>>>> To my knowledge sysfs is the appropriate way to go.
>>>> If there are any other best possible knobs, will be helpful.
>>> I agree sysfs is the right place for it; my question was whether we
>>> should have files like:
>>>
>>>     /sys/.../0000:00:1f.3/pcie_speed
>>>     /sys/.../0000:00:1f.3/pcie_width
>>>
>>> as I think this patch would add (BTW, please include sample paths like
>>> the above in the commit log), or whether there should be a more global
>>> thing that would affect all the links in the system.
>> Sure, i will add them.
>>> I think the low-level files like you propose would be better because
>>> one might want to tune link performance differently for different
>>> types of devices and workloads.
>>>
>>> We also have to decide if these files should be associated with the
>>> device at the upstream or downstream end of the link.  For ASPM, the
>>> current proposal [1] has the files at the downstream end on the theory
>>> that the GPU, NIC, NVMe device, etc is the user-recognizable one.
>>> Also, neither ASPM nor link speed/width make any sense unless there
>>> *is* a device at the downstream end, so putting them there
>>> automatically makes them visible only when they're useful.
>> This patch places the speed and width in the host controller directory.
>> /sys/.../xxx.pcie/pcie_speed
>> /sys/.../xxx.pcie/pcie_width
>>
>> I agree with you partially,  because i am having couple of points
>> making me to keep speed and width change entries in controller
>> directory:
>>
>> -- For changing the speed/width with device node, software ends up
>>     traversing to the controller from the device and do the
>>     operations.
>> -- Change speed and width are performed at controller level,
> The controller is effectively a Root Complex, which may contain
> several Root Ports.  I have the impression that the Synopsys
> controller only supports a single Root Port, but that's just a detail
> of the Synopsys implementation.  I think it should be possible to
> configure the width/speed of each Root Port individually.
>
>> -- Keeping speed and width in controller gives a perspective (to the
>>     user) of changing them only once irrespective of no. of devices.
> What if there's a switch?  If we change the width/speed of the link
> between the Root Port and the Switch Upstream Port, that doesn't do
> anything about the links from the Switch Downstream Ports.
I missed to evaluate the multiple root port and switch scenarios, thanks 
for pointing it.
Then, placing the link speed and width change entries in the device node 
will be appropriate.
Software will traverse to the respective port or bus through the device 
node and does the changes.
>
>> -- For speed and link change in Synopsys PCIe controller, specific
>>     registers need to be configured.  This prevents or complicates
>>     adding the speed and width change functionality in pci-sysfs or
>>     pci framework.
> Don't the Link Control and related registers in PCIe spec give us
> enough control to manage the link width/speed of *all* links,
> including those from Root Ports and Switch Downstream Ports?
>
> If the Synopsys controller requires controller-specific registers,
> that sounds to me like it doesn't quite conform to the spec.  Maybe
> that means we would need some sort of quirk or controller callback?
Yes, Synopsys has specific registers configuration for link width 
resizing and speed change.
I will evaluate the possible mechanism for plugging in the controller 
specific changes to the framework.

Regards,
Dilip
>
> Bjorn
Andrew Murray Nov. 1, 2019, 11:30 a.m. UTC | #21
On Fri, Nov 01, 2019 at 01:47:39PM +0800, Dilip Kota wrote:
> 
> On 10/31/2019 9:22 PM, Bjorn Helgaas wrote:
> > On Thu, Oct 31, 2019 at 06:47:10PM +0800, Dilip Kota wrote:
> > > On 10/31/2019 6:14 AM, Bjorn Helgaas wrote:
> > > > On Tue, Oct 29, 2019 at 05:31:18PM +0800, Dilip Kota wrote:
> > > > > On 10/22/2019 8:59 PM, Bjorn Helgaas wrote:
> > > > > > [+cc Rafael, linux-pm, beginning of discussion at
> > > > > > https://lore.kernel.org/r/d8574605f8e70f41ce1e88ccfb56b63c8f85e4df.1571638827.git.eswara.kota@linux.intel.com]
> > > > > > 
> > > > > > On Tue, Oct 22, 2019 at 05:27:38PM +0800, Dilip Kota wrote:
> > > > > > > On 10/22/2019 1:18 AM, Bjorn Helgaas wrote:
> > > > > > > > On Mon, Oct 21, 2019 at 02:38:50PM +0100, Andrew Murray wrote:
> > > > > > > > > On Mon, Oct 21, 2019 at 02:39:20PM +0800, Dilip Kota wrote:
> > > > > > > > > > PCIe RC driver on Intel Gateway SoCs have a requirement
> > > > > > > > > > of changing link width and speed on the fly.
> > > > > > > > Please add more details about why this is needed.  Since
> > > > > > > > you're adding sysfs files, it sounds like it's not
> > > > > > > > actually the *driver* that needs this; it's something in
> > > > > > > > userspace?
> > > > > > > We have use cases to change the link speed and width on the fly.
> > > > > > > One is EMI check and other is power saving.  Some battery backed
> > > > > > > applications have to switch PCIe link from higher GEN to GEN1 and
> > > > > > > width to x1. During the cases like external power supply got
> > > > > > > disconnected or broken. Once external power supply is connected then
> > > > > > > switch PCIe link to higher GEN and width.
> > > > > > That sounds plausible, but of course nothing there is specific to the
> > > > > > Intel Gateway, so we should implement this generically so it would
> > > > > > work on all hardware.
> > > > > Agree.
> > > > > > I'm not sure what the interface should look like -- should it be a
> > > > > > low-level interface as you propose where userspace would have to
> > > > > > identify each link of interest, or is there some system-wide
> > > > > > power/performance knob that could tune all links?  Cc'd Rafael and
> > > > > > linux-pm in case they have ideas.
> > > > > To my knowledge sysfs is the appropriate way to go.
> > > > > If there are any other best possible knobs, will be helpful.
> > > > I agree sysfs is the right place for it; my question was whether we
> > > > should have files like:
> > > > 
> > > >     /sys/.../0000:00:1f.3/pcie_speed
> > > >     /sys/.../0000:00:1f.3/pcie_width
> > > > 
> > > > as I think this patch would add (BTW, please include sample paths like
> > > > the above in the commit log), or whether there should be a more global
> > > > thing that would affect all the links in the system.
> > > Sure, i will add them.
> > > > I think the low-level files like you propose would be better because
> > > > one might want to tune link performance differently for different
> > > > types of devices and workloads.
> > > > 
> > > > We also have to decide if these files should be associated with the
> > > > device at the upstream or downstream end of the link.  For ASPM, the
> > > > current proposal [1] has the files at the downstream end on the theory
> > > > that the GPU, NIC, NVMe device, etc is the user-recognizable one.
> > > > Also, neither ASPM nor link speed/width make any sense unless there
> > > > *is* a device at the downstream end, so putting them there
> > > > automatically makes them visible only when they're useful.
> > > This patch places the speed and width in the host controller directory.
> > > /sys/.../xxx.pcie/pcie_speed
> > > /sys/.../xxx.pcie/pcie_width
> > > 
> > > I agree with you partially,  because i am having couple of points
> > > making me to keep speed and width change entries in controller
> > > directory:
> > > 
> > > -- For changing the speed/width with device node, software ends up
> > >     traversing to the controller from the device and do the
> > >     operations.
> > > -- Change speed and width are performed at controller level,
> > The controller is effectively a Root Complex, which may contain
> > several Root Ports.  I have the impression that the Synopsys
> > controller only supports a single Root Port, but that's just a detail
> > of the Synopsys implementation.  I think it should be possible to
> > configure the width/speed of each Root Port individually.
> > 
> > > -- Keeping speed and width in controller gives a perspective (to the
> > >     user) of changing them only once irrespective of no. of devices.
> > What if there's a switch?  If we change the width/speed of the link
> > between the Root Port and the Switch Upstream Port, that doesn't do
> > anything about the links from the Switch Downstream Ports.
> I missed to evaluate the multiple root port and switch scenarios, thanks for
> pointing it.
> Then, placing the link speed and width change entries in the device node
> will be appropriate.
> Software will traverse to the respective port or bus through the device node
> and does the changes.
> > 
> > > -- For speed and link change in Synopsys PCIe controller, specific
> > >     registers need to be configured.  This prevents or complicates
> > >     adding the speed and width change functionality in pci-sysfs or
> > >     pci framework.
> > Don't the Link Control and related registers in PCIe spec give us
> > enough control to manage the link width/speed of *all* links,
> > including those from Root Ports and Switch Downstream Ports?
> > 
> > If the Synopsys controller requires controller-specific registers,
> > that sounds to me like it doesn't quite conform to the spec.  Maybe
> > that means we would need some sort of quirk or controller callback?
> Yes, Synopsys has specific registers configuration for link width resizing
> and speed change.
> I will evaluate the possible mechanism for plugging in the controller
> specific changes to the framework.

According to the spec, "Software is permitted to restrict the maximum speed
of Link operation and set the perferred Link speed by setting the value in the
Target Link Speed field in the Upstream component." - This is the Link Control
2 Register, and a link retrain should then be triggered.

With regards to this proposed sysfs API - I wonder if this implies we should
also disable 'Hardware Autonomous Speed Disable' to prevent a link speed
change for device specific reasons?

In my view, this means we *can* have a sysfs control for limiting the link
speed using standard PCI means - though callbacks and quirks may be needed
for host bridge controllers and similar.

With regards to link width, I can't see any obvious software initiated means
to change the link width (they are all RO) - though a device can change its
own link width so long as it's 'Hardware Autonomous Width Disable' bit is
clear. So whilst there may be some benefit for the initial links of a few
host bridge controllers that may opt-in to some framework for this - such an
API wouldn't benefit the majority of links in a PCI fabric. Perhaps this
(width) should be DWC specific.

Thanks,

Andrew Murray

> 
> Regards,
> Dilip
> > 
> > Bjorn
diff mbox series

Patch

diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
index 4c391bfd681a..662fdcb4f2d6 100644
--- a/drivers/pci/controller/dwc/pcie-designware.c
+++ b/drivers/pci/controller/dwc/pcie-designware.c
@@ -474,6 +474,15 @@  int dw_pcie_link_up(struct dw_pcie *pci)
 		(!(val & PCIE_PORT_DEBUG1_LINK_IN_TRAINING)));
 }
 
+void dw_pcie_link_width_resize(struct dw_pcie *pci, u32 lane_width)
+{
+	u32 val;
+
+	val =  dw_pcie_readl_dbi(pci, PCIE_PORT_MULTI_LANE_CTRL);
+	val &= ~(PORT_MLTI_LNK_WDTH_CHNG | PORT_MLTI_LNK_WDTH);
+	val |= PORT_MLTI_LNK_WDTH_CHNG | lane_width;
+	dw_pcie_writel_dbi(pci, PCIE_PORT_MULTI_LANE_CTRL, val);
+}
 
 void dw_pcie_upconfig_setup(struct dw_pcie *pci)
 {
diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
index 3beac10e4a4c..fcf0442341fd 100644
--- a/drivers/pci/controller/dwc/pcie-designware.h
+++ b/drivers/pci/controller/dwc/pcie-designware.h
@@ -67,6 +67,8 @@ 
 #define PCIE_MSI_INTR0_STATUS		0x830
 
 #define PCIE_PORT_MULTI_LANE_CTRL	0x8C0
+#define PORT_MLTI_LNK_WDTH		GENMASK(5, 0)
+#define PORT_MLTI_LNK_WDTH_CHNG		BIT(6)
 #define PORT_MLTI_UPCFG_SUPPORT		BIT(7)
 
 #define PCIE_ATU_VIEWPORT		0x900
@@ -282,6 +284,7 @@  void dw_pcie_write_dbi2(struct dw_pcie *pci, u32 reg, size_t size, u32 val);
 u32 dw_pcie_read_atu(struct dw_pcie *pci, u32 reg, size_t size);
 void dw_pcie_write_atu(struct dw_pcie *pci, u32 reg, size_t size, u32 val);
 int dw_pcie_link_up(struct dw_pcie *pci);
+void dw_pcie_link_width_resize(struct dw_pcie *pci, u32 lane_width);
 void dw_pcie_upconfig_setup(struct dw_pcie *pci);
 void dw_pcie_link_speed_change(struct dw_pcie *pci, bool enable);
 void dw_pcie_link_set_n_fts(struct dw_pcie *pci, u32 n_fts);
diff --git a/drivers/pci/controller/dwc/pcie-intel-gw.c b/drivers/pci/controller/dwc/pcie-intel-gw.c
index 9142c70db808..b9be0921671d 100644
--- a/drivers/pci/controller/dwc/pcie-intel-gw.c
+++ b/drivers/pci/controller/dwc/pcie-intel-gw.c
@@ -146,6 +146,22 @@  static void intel_pcie_ltssm_disable(struct intel_pcie_port *lpp)
 	pcie_app_wr_mask(lpp, PCIE_APP_CCR_LTSSM_ENABLE, 0, PCIE_APP_CCR);
 }
 
+static const char *pcie_link_gen_to_str(int gen)
+{
+	switch (gen) {
+	case PCIE_LINK_SPEED_GEN1:
+		return "2.5";
+	case PCIE_LINK_SPEED_GEN2:
+		return "5.0";
+	case PCIE_LINK_SPEED_GEN3:
+		return "8.0";
+	case PCIE_LINK_SPEED_GEN4:
+		return "16.0";
+	default:
+		return "???";
+	}
+}
+
 static void intel_pcie_link_setup(struct intel_pcie_port *lpp)
 {
 	u32 val;
@@ -444,6 +460,91 @@  static int intel_pcie_host_setup(struct intel_pcie_port *lpp)
 	return ret;
 }
 
+static ssize_t pcie_link_status_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
+	u32 reg, width, gen;
+
+	reg = pcie_rc_cfg_rd(lpp, PCIE_CAP_OFST + PCI_EXP_LNKCTL);
+	width = FIELD_GET(PCI_EXP_LNKSTA_NLW, reg >> 16);
+	gen = FIELD_GET(PCI_EXP_LNKSTA_CLS, reg >> 16);
+
+	if (gen > lpp->max_speed)
+		return -EINVAL;
+
+	return sprintf(buf, "Port %2u Width x%u Speed %s GT/s\n", lpp->id,
+		       width, pcie_link_gen_to_str(gen));
+}
+static DEVICE_ATTR_RO(pcie_link_status);
+
+static ssize_t pcie_speed_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t len)
+{
+	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val > lpp->max_speed)
+		return -EINVAL;
+
+	lpp->link_gen = val;
+	intel_pcie_max_speed_setup(lpp);
+	dw_pcie_link_speed_change(&lpp->pci, false);
+	dw_pcie_link_speed_change(&lpp->pci, true);
+
+	return len;
+}
+static DEVICE_ATTR_WO(pcie_speed);
+
+/*
+ * Link width change on the fly is not always successful.
+ * It also depends on the partner.
+ */
+static ssize_t pcie_width_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t len)
+{
+	struct intel_pcie_port *lpp = dev_get_drvdata(dev);
+	unsigned long val;
+	int ret;
+
+	lpp = dev_get_drvdata(dev);
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val > lpp->max_width)
+		return -EINVAL;
+
+	/* HW auto bandwidth negotiation must be enabled */
+	pcie_rc_cfg_wr_mask(lpp, PCI_EXP_LNKCTL_HAWD, 0,
+			    PCIE_CAP_OFST + PCI_EXP_LNKCTL);
+	dw_pcie_link_width_resize(&lpp->pci, val);
+
+	return len;
+}
+static DEVICE_ATTR_WO(pcie_width);
+
+static struct attribute *pcie_cfg_attrs[] = {
+	&dev_attr_pcie_link_status.attr,
+	&dev_attr_pcie_speed.attr,
+	&dev_attr_pcie_width.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(pcie_cfg);
+
+static int intel_pcie_sysfs_init(struct intel_pcie_port *lpp)
+{
+	return devm_device_add_groups(lpp->pci.dev, pcie_cfg_groups);
+}
+
 static void __intel_pcie_remove(struct intel_pcie_port *lpp)
 {
 	intel_pcie_core_irq_disable(lpp);
@@ -490,8 +591,17 @@  static int intel_pcie_rc_init(struct pcie_port *pp)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
 	struct intel_pcie_port *lpp = dev_get_drvdata(pci->dev);
+	int ret;
 
-	return intel_pcie_host_setup(lpp);
+	ret = intel_pcie_host_setup(lpp);
+	if (ret)
+		return ret;
+
+	ret = intel_pcie_sysfs_init(lpp);
+	if (ret)
+		__intel_pcie_remove(lpp);
+
+	return ret;
 }
 
 int intel_pcie_msi_init(struct pcie_port *pp)