diff mbox series

[v12,12/15] PCI/RCEC: Add RCiEP's linked RCEC to AER/ERR

Message ID 20201121001036.8560-13-sean.v.kelley@intel.com
State New
Headers show
Series Add RCEC handling to PCI/AER | expand

Commit Message

Kelley, Sean V Nov. 21, 2020, 12:10 a.m. UTC
From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>

When attempting error recovery for an RCiEP associated with an RCEC device,
there needs to be a way to update the Root Error Status, the Uncorrectable
Error Status and the Uncorrectable Error Severity of the parent RCEC.  In
some non-native cases in which there is no OS-visible device associated
with the RCiEP, there is nothing to act upon as the firmware is acting
before the OS.

Add handling for the linked RCEC in AER/ERR while taking into account
non-native cases.

Co-developed-by: Sean V Kelley <sean.v.kelley@intel.com>
Link: https://lore.kernel.org/r/20201002184735.1229220-12-seanvk.dev@oregontracks.org
Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/pci/pcie/aer.c | 46 +++++++++++++++++++++++++++++++-----------
 drivers/pci/pcie/err.c | 20 +++++++++---------
 2 files changed, 44 insertions(+), 22 deletions(-)

Comments

Bjorn Helgaas Dec. 2, 2020, 11:44 p.m. UTC | #1
On Fri, Nov 20, 2020 at 04:10:33PM -0800, Sean V Kelley wrote:
> From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> 
> When attempting error recovery for an RCiEP associated with an RCEC device,
> there needs to be a way to update the Root Error Status, the Uncorrectable
> Error Status and the Uncorrectable Error Severity of the parent RCEC.  In
> some non-native cases in which there is no OS-visible device associated
> with the RCiEP, there is nothing to act upon as the firmware is acting
> before the OS.
> 
> Add handling for the linked RCEC in AER/ERR while taking into account
> non-native cases.
> 
> Co-developed-by: Sean V Kelley <sean.v.kelley@intel.com>
> Link: https://lore.kernel.org/r/20201002184735.1229220-12-seanvk.dev@oregontracks.org
> Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> ---
>  drivers/pci/pcie/aer.c | 46 +++++++++++++++++++++++++++++++-----------
>  drivers/pci/pcie/err.c | 20 +++++++++---------
>  2 files changed, 44 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index 0ba0b47ae751..51389a6ee4ca 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -1358,29 +1358,51 @@ static int aer_probe(struct pcie_device *dev)
>   */
>  static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
>  {
> -	int aer = dev->aer_cap;
> +	int type = pci_pcie_type(dev);
> +	struct pci_dev *root;
> +	int aer = 0;
> +	int rc = 0;
>  	u32 reg32;
> -	int rc;
>  
> -	if (pcie_aer_is_native(dev)) {
> +	if (type == PCI_EXP_TYPE_RC_END)
> +		/*
> +		 * The reset should only clear the Root Error Status
> +		 * of the RCEC. Only perform this for the
> +		 * native case, i.e., an RCEC is present.
> +		 */
> +		root = dev->rcec;
> +	else
> +		root = dev;
> +
> +	if (root)
> +		aer = dev->aer_cap;
> +
> +	if ((aer) && pcie_aer_is_native(dev)) {
>  		/* Disable Root's interrupt in response to error messages */
> -		pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
> +		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
>  		reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
> -		pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
> +		pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
>  	}
>  
> -	rc = pci_bus_error_reset(dev);
> -	pci_info(dev, "Root Port link has been reset (%d)\n", rc);
> +	if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) {
> +		if (pcie_has_flr(dev)) {
> +			rc = pcie_flr(dev);
> +			pci_info(dev, "has been reset (%d)\n", rc);

Maybe:

  +             } else {
  +                     rc = -ENOTTY;
  +                     pci_info(dev, "not reset (no FLR support)\n");

Or do we want to pretend the device was reset and return
PCI_ERS_RESULT_RECOVERED?

> +	} else {
> +		rc = pci_bus_error_reset(dev);
> +		pci_info(dev, "Root Port link has been reset (%d)\n", rc);
> +	}
>  
> -	if (pcie_aer_is_native(dev)) {
> +	if ((aer) && pcie_aer_is_native(dev)) {
>  		/* Clear Root Error Status */
> -		pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
> -		pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, reg32);
> +		pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, &reg32);
> +		pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32);
>  
>  		/* Enable Root Port's interrupt in response to error messages */
> -		pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
> +		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
>  		reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
> -		pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
> +		pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
>  	}
>  
>  	return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
> diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
> index 7883c9791562..cbc5abfe767b 100644
> --- a/drivers/pci/pcie/err.c
> +++ b/drivers/pci/pcie/err.c
> @@ -148,10 +148,10 @@ static int report_resume(struct pci_dev *dev, void *data)
>  
>  /**
>   * pci_walk_bridge - walk bridges potentially AER affected
> - * @bridge:	bridge which may be a Port, an RCEC with associated RCiEPs,
> - *		or an RCiEP associated with an RCEC
> - * @cb:		callback to be called for each device found
> - * @userdata:	arbitrary pointer to be passed to callback
> + * @bridge   bridge which may be an RCEC with associated RCiEPs,
> + *           or a Port.
> + * @cb       callback to be called for each device found
> + * @userdata arbitrary pointer to be passed to callback.
>   *
>   * If the device provided is a bridge, walk the subordinate bus, including
>   * any bridged devices on buses under this bus.  Call the provided callback
> @@ -164,8 +164,14 @@ static void pci_walk_bridge(struct pci_dev *bridge,
>  			    int (*cb)(struct pci_dev *, void *),
>  			    void *userdata)
>  {
> +	/*
> +	 * In a non-native case where there is no OS-visible reporting
> +	 * device the bridge will be NULL, i.e., no RCEC, no Downstream Port.

I don't quite understand this comment.  I see that in the non-native
case, the reporting device may not be OS-visible.  But I don't
understand why the comment is *here*.

If "bridge" can be NULL here, we should test that before dereferencing
"bridge->subordinate".

>  	if (bridge->subordinate)
>  		pci_walk_bus(bridge->subordinate, cb, userdata);
> +	else if (bridge->rcec)
> +		cb(bridge->rcec, userdata);

And I don't understand what's going on here.  In this case, I *think*
"bridge" is an RCiEP and "bridge->rcec" is the related RCEC, so it
looks like we'll call report_frozen_detected(), report_mmio_enabled(),
etc for the RCEC driver.  I would think we'd want the RCiEP driver.

Sorry if I'm missing the obvious.

>  	else
>  		cb(bridge, userdata);
>  }
> @@ -194,12 +200,6 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
>  	pci_dbg(bridge, "broadcast error_detected message\n");
>  	if (state == pci_channel_io_frozen) {
>  		pci_walk_bridge(bridge, report_frozen_detected, &status);
> -		if (type == PCI_EXP_TYPE_RC_END) {
> -			pci_warn(dev, "subordinate device reset not possible for RCiEP\n");
> -			status = PCI_ERS_RESULT_NONE;
> -			goto failed;
> -		}
> -
>  		status = reset_subordinates(bridge);
>  		if (status != PCI_ERS_RESULT_RECOVERED) {
>  			pci_warn(bridge, "subordinate device reset failed\n");
> -- 
> 2.29.2
>
Kelley, Sean V Dec. 3, 2020, 12:51 a.m. UTC | #2
> On Dec 2, 2020, at 3:44 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> 
> On Fri, Nov 20, 2020 at 04:10:33PM -0800, Sean V Kelley wrote:
>> From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
>> 
>> When attempting error recovery for an RCiEP associated with an RCEC device,
>> there needs to be a way to update the Root Error Status, the Uncorrectable
>> Error Status and the Uncorrectable Error Severity of the parent RCEC.  In
>> some non-native cases in which there is no OS-visible device associated
>> with the RCiEP, there is nothing to act upon as the firmware is acting
>> before the OS.
>> 
>> Add handling for the linked RCEC in AER/ERR while taking into account
>> non-native cases.
>> 
>> Co-developed-by: Sean V Kelley <sean.v.kelley@intel.com>
>> Link: https://lore.kernel.org/r/20201002184735.1229220-12-seanvk.dev@oregontracks.org
>> Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
>> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
>> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>> ---
>> drivers/pci/pcie/aer.c | 46 +++++++++++++++++++++++++++++++-----------
>> drivers/pci/pcie/err.c | 20 +++++++++---------
>> 2 files changed, 44 insertions(+), 22 deletions(-)
>> 
>> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
>> index 0ba0b47ae751..51389a6ee4ca 100644
>> --- a/drivers/pci/pcie/aer.c
>> +++ b/drivers/pci/pcie/aer.c
>> @@ -1358,29 +1358,51 @@ static int aer_probe(struct pcie_device *dev)
>>  */
>> static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
>> {
>> -	int aer = dev->aer_cap;
>> +	int type = pci_pcie_type(dev);
>> +	struct pci_dev *root;
>> +	int aer = 0;
>> +	int rc = 0;
>> 	u32 reg32;
>> -	int rc;
>> 
>> -	if (pcie_aer_is_native(dev)) {
>> +	if (type == PCI_EXP_TYPE_RC_END)
>> +		/*
>> +		 * The reset should only clear the Root Error Status
>> +		 * of the RCEC. Only perform this for the
>> +		 * native case, i.e., an RCEC is present.
>> +		 */
>> +		root = dev->rcec;
>> +	else
>> +		root = dev;
>> +
>> +	if (root)
>> +		aer = dev->aer_cap;
>> +
>> +	if ((aer) && pcie_aer_is_native(dev)) {
>> 		/* Disable Root's interrupt in response to error messages */
>> -		pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
>> +		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
>> 		reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
>> -		pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
>> +		pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
>> 	}
>> 
>> -	rc = pci_bus_error_reset(dev);
>> -	pci_info(dev, "Root Port link has been reset (%d)\n", rc);
>> +	if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) {
>> +		if (pcie_has_flr(dev)) {
>> +			rc = pcie_flr(dev);
>> +			pci_info(dev, "has been reset (%d)\n", rc);
> 
> Maybe:
> 
>  +             } else {
>  +                     rc = -ENOTTY;
>  +                     pci_info(dev, "not reset (no FLR support)\n");
> 
> Or do we want to pretend the device was reset and return
> PCI_ERS_RESULT_RECOVERED?

We are currently doing the latter now with the default of rc = 0 above and so  I’m not sure the extra detail here on the absence of FLR support is of value.


> 
>> +	} else {
>> +		rc = pci_bus_error_reset(dev);
>> +		pci_info(dev, "Root Port link has been reset (%d)\n", rc);
>> +	}
>> 
>> -	if (pcie_aer_is_native(dev)) {
>> +	if ((aer) && pcie_aer_is_native(dev)) {
>> 		/* Clear Root Error Status */
>> -		pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
>> -		pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, reg32);
>> +		pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, &reg32);
>> +		pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32);
>> 
>> 		/* Enable Root Port's interrupt in response to error messages */
>> -		pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
>> +		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
>> 		reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
>> -		pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
>> +		pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
>> 	}
>> 
>> 	return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
>> diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
>> index 7883c9791562..cbc5abfe767b 100644
>> --- a/drivers/pci/pcie/err.c
>> +++ b/drivers/pci/pcie/err.c
>> @@ -148,10 +148,10 @@ static int report_resume(struct pci_dev *dev, void *data)
>> 
>> /**
>>  * pci_walk_bridge - walk bridges potentially AER affected
>> - * @bridge:	bridge which may be a Port, an RCEC with associated RCiEPs,
>> - *		or an RCiEP associated with an RCEC
>> - * @cb:		callback to be called for each device found
>> - * @userdata:	arbitrary pointer to be passed to callback
>> + * @bridge   bridge which may be an RCEC with associated RCiEPs,
>> + *           or a Port.
>> + * @cb       callback to be called for each device found
>> + * @userdata arbitrary pointer to be passed to callback.
>>  *
>>  * If the device provided is a bridge, walk the subordinate bus, including
>>  * any bridged devices on buses under this bus.  Call the provided callback
>> @@ -164,8 +164,14 @@ static void pci_walk_bridge(struct pci_dev *bridge,
>> 			    int (*cb)(struct pci_dev *, void *),
>> 			    void *userdata)
>> {
>> +	/*
>> +	 * In a non-native case where there is no OS-visible reporting
>> +	 * device the bridge will be NULL, i.e., no RCEC, no Downstream Port.
> 
> I don't quite understand this comment.  I see that in the non-native
> case, the reporting device may not be OS-visible.  But I don't
> understand why the comment is *here*.
> 
> If "bridge" can be NULL here, we should test that before dereferencing
> "bridge->subordinate".

Wrongly worded.  The subordinate may be NULL or the associated RCEC may be NULL, not the “bridge”.
However, per below, we should not be trying to call report_frozen_detected(), report_mmio_enabled() via
the associated RCEC’s driver, but rather the CB for the RCiEP itself.

Going back to this conversation,

https://lore.kernel.org/linux-pci/20201016172210.GA86168@bjorn-Precision-5520/

"Looks like *this* is the patch where the "no subordinate bus" case
becomes possible?  If you agree, I can just move the test here, no
need to repost.”

It is actually the case we are only dealing with the absence of a subordinate bus.

> 
>> 	if (bridge->subordinate)
>> 		pci_walk_bus(bridge->subordinate, cb, userdata);
>> +	else if (bridge->rcec)
>> +		cb(bridge->rcec, userdata);
> 
> And I don't understand what's going on here.  In this case, I *think*
> "bridge" is an RCiEP and "bridge->rcec" is the related RCEC, so it
> looks like we'll call report_frozen_detected(), report_mmio_enabled(),
> etc for the RCEC driver.  I would think we'd want the RCiEP driver.

Indeed, the bridge->rcec here is the dev->rcec in which the dev is the RCiEP.

And we don’t need that conditional here, it should just hit the device driver’s routines.

This is an unfortunate side effect of the RCiEP being subordinate to the RCEC but for
 purposes of linking, it gives the impression of the other way around.


> 
> Sorry if I'm missing the obvious.

Actually your observations are on point.

Thanks,

Sean

> 
>> 	else
>> 		cb(bridge, userdata);
>> }
>> @@ -194,12 +200,6 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
>> 	pci_dbg(bridge, "broadcast error_detected message\n");
>> 	if (state == pci_channel_io_frozen) {
>> 		pci_walk_bridge(bridge, report_frozen_detected, &status);
>> -		if (type == PCI_EXP_TYPE_RC_END) {
>> -			pci_warn(dev, "subordinate device reset not possible for RCiEP\n");
>> -			status = PCI_ERS_RESULT_NONE;
>> -			goto failed;
>> -		}
>> -
>> 		status = reset_subordinates(bridge);
>> 		if (status != PCI_ERS_RESULT_RECOVERED) {
>> 			pci_warn(bridge, "subordinate device reset failed\n");
>> -- 
>> 2.29.2
>>
Bjorn Helgaas Dec. 4, 2020, 12:01 a.m. UTC | #3
On Thu, Dec 03, 2020 at 12:51:40AM +0000, Kelley, Sean V wrote:
> > On Dec 2, 2020, at 3:44 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> > On Fri, Nov 20, 2020 at 04:10:33PM -0800, Sean V Kelley wrote:
> >> From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> >> 
> >> When attempting error recovery for an RCiEP associated with an RCEC device,
> >> there needs to be a way to update the Root Error Status, the Uncorrectable
> >> Error Status and the Uncorrectable Error Severity of the parent RCEC.  In
> >> some non-native cases in which there is no OS-visible device associated
> >> with the RCiEP, there is nothing to act upon as the firmware is acting
> >> before the OS.
> >> 
> >> Add handling for the linked RCEC in AER/ERR while taking into account
> >> non-native cases.
> >> 
> >> Co-developed-by: Sean V Kelley <sean.v.kelley@intel.com>
> >> Link: https://lore.kernel.org/r/20201002184735.1229220-12-seanvk.dev@oregontracks.org
> >> Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
> >> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> >> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
> >> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> >> ---
> >> drivers/pci/pcie/aer.c | 46 +++++++++++++++++++++++++++++++-----------
> >> drivers/pci/pcie/err.c | 20 +++++++++---------
> >> 2 files changed, 44 insertions(+), 22 deletions(-)
> >> 
> >> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> >> index 0ba0b47ae751..51389a6ee4ca 100644
> >> --- a/drivers/pci/pcie/aer.c
> >> +++ b/drivers/pci/pcie/aer.c
> >> @@ -1358,29 +1358,51 @@ static int aer_probe(struct pcie_device *dev)
> >>  */
> >> static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
> >> {
> >> -	int aer = dev->aer_cap;
> >> +	int type = pci_pcie_type(dev);
> >> +	struct pci_dev *root;
> >> +	int aer = 0;
> >> +	int rc = 0;
> >> 	u32 reg32;
> >> -	int rc;
> >> 
> >> -	if (pcie_aer_is_native(dev)) {
> >> +	if (type == PCI_EXP_TYPE_RC_END)
> >> +		/*
> >> +		 * The reset should only clear the Root Error Status
> >> +		 * of the RCEC. Only perform this for the
> >> +		 * native case, i.e., an RCEC is present.
> >> +		 */
> >> +		root = dev->rcec;
> >> +	else
> >> +		root = dev;
> >> +
> >> +	if (root)
> >> +		aer = dev->aer_cap;
> >> +
> >> +	if ((aer) && pcie_aer_is_native(dev)) {
> >> 		/* Disable Root's interrupt in response to error messages */
> >> -		pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
> >> +		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
> >> 		reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
> >> -		pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
> >> +		pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
> >> 	}
> >> 
> >> -	rc = pci_bus_error_reset(dev);
> >> -	pci_info(dev, "Root Port link has been reset (%d)\n", rc);
> >> +	if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) {
> >> +		if (pcie_has_flr(dev)) {
> >> +			rc = pcie_flr(dev);
> >> +			pci_info(dev, "has been reset (%d)\n", rc);
> > 
> > Maybe:
> > 
> >  +             } else {
> >  +                     rc = -ENOTTY;
> >  +                     pci_info(dev, "not reset (no FLR support)\n");
> > 
> > Or do we want to pretend the device was reset and return
> > PCI_ERS_RESULT_RECOVERED?
> 
> We are currently doing the latter now with the default of rc = 0
> above and so  I’m not sure the extra detail here on the absence of
> FLR support is of value.

So to make sure I understand the proposal here, for RCECs and RCiEPs
that don't support FLR, you're saying you want to continue silently
and return PCI_ERS_RESULT_RECOVERED and let the drivers assume their
device was reset when it was not?

> >> +	} else {
> >> +		rc = pci_bus_error_reset(dev);
> >> +		pci_info(dev, "Root Port link has been reset (%d)\n", rc);
> >> +	}
> >> 
> >> -	if (pcie_aer_is_native(dev)) {
> >> +	if ((aer) && pcie_aer_is_native(dev)) {
> >> 		/* Clear Root Error Status */
> >> -		pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
> >> -		pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, reg32);
> >> +		pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, &reg32);
> >> +		pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32);
> >> 
> >> 		/* Enable Root Port's interrupt in response to error messages */
> >> -		pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
> >> +		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
> >> 		reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
> >> -		pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
> >> +		pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
> >> 	}
> >> 
> >> 	return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;

> >> @@ -164,8 +164,14 @@ static void pci_walk_bridge(struct pci_dev *bridge,
> >> 			    int (*cb)(struct pci_dev *, void *),
> >> 			    void *userdata)
> >> {
> >> +	/*
> >> +	 * In a non-native case where there is no OS-visible reporting
> >> +	 * device the bridge will be NULL, i.e., no RCEC, no Downstream Port.
> > 
> > I don't quite understand this comment.  I see that in the non-native
> > case, the reporting device may not be OS-visible.  But I don't
> > understand why the comment is *here*.
> > 
> > If "bridge" can be NULL here, we should test that before dereferencing
> > "bridge->subordinate".
> 
> Wrongly worded.  The subordinate may be NULL or the associated RCEC
> may be NULL, not the “bridge”.  However, per below, we should not be
> trying to call report_frozen_detected(), report_mmio_enabled() via
> the associated RCEC’s driver, but rather the CB for the RCiEP
> itself.

OK, so if we want a comment here, I assume it would be along the lines
of:

  If "bridge" has no subordinate bus, it's an RCEC or an RCiEP.  In
  either of those cases, we want to call the callback on "bridge"
  itself.

> Going back to this conversation,
> 
> https://lore.kernel.org/linux-pci/20201016172210.GA86168@bjorn-Precision-5520/
> 
> "Looks like *this* is the patch where the "no subordinate bus" case
> becomes possible?  If you agree, I can just move the test here, no
> need to repost.”
> 
> It is actually the case we are only dealing with the absence of a
> subordinate bus.
> 
> >> 	if (bridge->subordinate)
> >> 		pci_walk_bus(bridge->subordinate, cb, userdata);
> >> +	else if (bridge->rcec)
> >> +		cb(bridge->rcec, userdata);
> > 
> > And I don't understand what's going on here.  In this case, I *think*
> > "bridge" is an RCiEP and "bridge->rcec" is the related RCEC, so it
> > looks like we'll call report_frozen_detected(), report_mmio_enabled(),
> > etc for the RCEC driver.  I would think we'd want the RCiEP driver.
> 
> Indeed, the bridge->rcec here is the dev->rcec in which the dev is
> the RCiEP.
> 
> And we don’t need that conditional here, it should just hit the
> device driver’s routines.

So IIUC, the code would be:

  if (bridge->subordinate)
    pci_walk_bus(bridge->subordinate, cb, userdata);
  else
    cb(bridge, userdata);    /* RCEC or RCiEP */

Right?

I pushed a pci/err branch
(https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git/log/?h=pci/err)
with some tweaks in these areas.  Diff from your v12 posting appended
below.  I split the RCEC/RCiEP error recovery pieces up a little bit
differently than in your posting.  Let me know if you see anything
that should be changed.  I dropped one of Jonathan's
reviewed/tested-by but probably should have dropped others to avoid
putting words in his mouth.

Not sure we're completely done, but we'll get there yet.  I definitely
want to make sure this happens this cycle.

Bjorn


diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index b86a92494345..4aa118edde35 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1366,33 +1366,38 @@ static int aer_probe(struct pcie_device *dev)
 }
 
 /**
- * aer_root_reset - reset link on Root Port
- * @dev: pointer to Root Port's pci_dev data structure
+ * aer_root_reset - reset Root Port hierarchy, RCEC, or RCiEP
+ * @dev: pointer to Root Port, RCEC, or RCiEP
  *
- * Invoked by Port Bus driver when performing link reset at Root Port.
+ * Invoked by Port Bus driver when performing reset.
  */
 static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
 {
 	int type = pci_pcie_type(dev);
 	struct pci_dev *root;
-	int aer = 0;
-	int rc = 0;
+	int aer;
+	struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
 	u32 reg32;
+	int rc;
 
+	/*
+	 * Only Root Ports and RCECs have AER Root Command and Root Status
+	 * registers.  If "dev" is an RCiEP, the relevant registers are in
+	 * the RCEC.
+	 */
 	if (type == PCI_EXP_TYPE_RC_END)
-		/*
-		 * The reset should only clear the Root Error Status
-		 * of the RCEC. Only perform this for the
-		 * native case, i.e., an RCEC is present.
-		 */
 		root = dev->rcec;
 	else
 		root = dev;
 
-	if (root)
-		aer = dev->aer_cap;
+	/*
+	 * If the platform retained control of AER, an RCiEP may not have
+	 * an RCEC visible to us, so dev->rcec ("root") may be NULL.  In
+	 * that case, firmware is responsible for these registers.
+	 */
+	aer = root ? root->aer_cap : 0;
 
-	if ((aer) && pcie_aer_is_native(dev)) {
+	if ((host->native_aer || pcie_ports_native) && aer) {
 		/* Disable Root's interrupt in response to error messages */
 		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
 		reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
@@ -1403,13 +1408,15 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
 		if (pcie_has_flr(dev)) {
 			rc = pcie_flr(dev);
 			pci_info(dev, "has been reset (%d)\n", rc);
+		} else {
+			pci_info(dev, "not reset (no FLR support)\n");
 		}
 	} else {
 		rc = pci_bus_error_reset(dev);
 		pci_info(dev, "Root Port link has been reset (%d)\n", rc);
 	}
 
-	if ((aer) && pcie_aer_is_native(dev)) {
+	if ((host->native_aer || pcie_ports_native) && aer) {
 		/* Clear Root Error Status */
 		pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, &reg32);
 		pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32);
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index cbc5abfe767b..510f31f0ef6d 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -148,30 +148,23 @@ static int report_resume(struct pci_dev *dev, void *data)
 
 /**
  * pci_walk_bridge - walk bridges potentially AER affected
- * @bridge   bridge which may be an RCEC with associated RCiEPs,
- *           or a Port.
- * @cb       callback to be called for each device found
- * @userdata arbitrary pointer to be passed to callback.
+ * @bridge:	bridge which may be a Port, an RCEC, or an RCiEP
+ * @cb:		callback to be called for each device found
+ * @userdata:	arbitrary pointer to be passed to callback
  *
  * If the device provided is a bridge, walk the subordinate bus, including
  * any bridged devices on buses under this bus.  Call the provided callback
  * on each device found.
  *
- * If the device provided has no subordinate bus, call the callback on the
- * device itself.
+ * If the device provided has no subordinate bus, e.g., an RCEC or RCiEP,
+ * call the callback on the device itself.
  */
 static void pci_walk_bridge(struct pci_dev *bridge,
 			    int (*cb)(struct pci_dev *, void *),
 			    void *userdata)
 {
-	/*
-	 * In a non-native case where there is no OS-visible reporting
-	 * device the bridge will be NULL, i.e., no RCEC, no Downstream Port.
-	 */
 	if (bridge->subordinate)
 		pci_walk_bus(bridge->subordinate, cb, userdata);
-	else if (bridge->rcec)
-		cb(bridge->rcec, userdata);
 	else
 		cb(bridge, userdata);
 }
@@ -183,11 +176,16 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
 	int type = pci_pcie_type(dev);
 	struct pci_dev *bridge;
 	pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
+	struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
 
 	/*
-	 * Error recovery runs on all subordinates of the bridge.  If the
-	 * bridge detected the error, it is cleared at the end.  For RCiEPs
-	 * we should reset just the RCiEP itself.
+	 * If the error was detected by a Root Port, Downstream Port, RCEC,
+	 * or RCiEP, recovery runs on the device itself.  For Ports, that
+	 * also includes any subordinate devices.
+	 *
+	 * If it was detected by another device (Endpoint, etc), recovery
+	 * runs on the device and anything else under the same Port, i.e.,
+	 * everything under "bridge".
 	 */
 	if (type == PCI_EXP_TYPE_ROOT_PORT ||
 	    type == PCI_EXP_TYPE_DOWNSTREAM ||
@@ -232,11 +230,15 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
 	pci_dbg(bridge, "broadcast resume message\n");
 	pci_walk_bridge(bridge, report_resume, &status);
 
-	if (type == PCI_EXP_TYPE_ROOT_PORT ||
-	    type == PCI_EXP_TYPE_DOWNSTREAM ||
-	    type == PCI_EXP_TYPE_RC_EC) {
-		if (pcie_aer_is_native(bridge))
-			pcie_clear_device_status(bridge);
+	/*
+	 * If we have native control of AER, clear error status in the Root
+	 * Port or Downstream Port that signaled the error.  If the
+	 * platform retained control of AER, it is responsible for clearing
+	 * this status.  In that case, the signaling device may not even be
+	 * visible to the OS.
+	 */
+	if (host->native_aer || pcie_ports_native) {
+		pcie_clear_device_status(bridge);
 		pci_aer_clear_nonfatal_status(bridge);
 	}
 	pci_info(bridge, "device recovery successful\n");
Kelley, Sean V Dec. 4, 2020, 5:17 p.m. UTC | #4
Hi Bjorn,

> On Dec 3, 2020, at 4:01 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> 
> On Thu, Dec 03, 2020 at 12:51:40AM +0000, Kelley, Sean V wrote:
>>> On Dec 2, 2020, at 3:44 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
>>> On Fri, Nov 20, 2020 at 04:10:33PM -0800, Sean V Kelley wrote:
>>>> From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
>>>> 
>>>> When attempting error recovery for an RCiEP associated with an RCEC device,
>>>> there needs to be a way to update the Root Error Status, the Uncorrectable
>>>> Error Status and the Uncorrectable Error Severity of the parent RCEC.  In
>>>> some non-native cases in which there is no OS-visible device associated
>>>> with the RCiEP, there is nothing to act upon as the firmware is acting
>>>> before the OS.
>>>> 
>>>> Add handling for the linked RCEC in AER/ERR while taking into account
>>>> non-native cases.
>>>> 
>>>> Co-developed-by: Sean V Kelley <sean.v.kelley@intel.com>
>>>> Link: https://lore.kernel.org/r/20201002184735.1229220-12-seanvk.dev@oregontracks.org
>>>> Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
>>>> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
>>>> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
>>>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>>>> ---
>>>> drivers/pci/pcie/aer.c | 46 +++++++++++++++++++++++++++++++-----------
>>>> drivers/pci/pcie/err.c | 20 +++++++++---------
>>>> 2 files changed, 44 insertions(+), 22 deletions(-)
>>>> 
>>>> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
>>>> index 0ba0b47ae751..51389a6ee4ca 100644
>>>> --- a/drivers/pci/pcie/aer.c
>>>> +++ b/drivers/pci/pcie/aer.c
>>>> @@ -1358,29 +1358,51 @@ static int aer_probe(struct pcie_device *dev)
>>>> */
>>>> static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
>>>> {
>>>> -	int aer = dev->aer_cap;
>>>> +	int type = pci_pcie_type(dev);
>>>> +	struct pci_dev *root;
>>>> +	int aer = 0;
>>>> +	int rc = 0;
>>>> 	u32 reg32;
>>>> -	int rc;
>>>> 
>>>> -	if (pcie_aer_is_native(dev)) {
>>>> +	if (type == PCI_EXP_TYPE_RC_END)
>>>> +		/*
>>>> +		 * The reset should only clear the Root Error Status
>>>> +		 * of the RCEC. Only perform this for the
>>>> +		 * native case, i.e., an RCEC is present.
>>>> +		 */
>>>> +		root = dev->rcec;
>>>> +	else
>>>> +		root = dev;
>>>> +
>>>> +	if (root)
>>>> +		aer = dev->aer_cap;
>>>> +
>>>> +	if ((aer) && pcie_aer_is_native(dev)) {
>>>> 		/* Disable Root's interrupt in response to error messages */
>>>> -		pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
>>>> +		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
>>>> 		reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
>>>> -		pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
>>>> +		pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
>>>> 	}
>>>> 
>>>> -	rc = pci_bus_error_reset(dev);
>>>> -	pci_info(dev, "Root Port link has been reset (%d)\n", rc);
>>>> +	if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) {
>>>> +		if (pcie_has_flr(dev)) {
>>>> +			rc = pcie_flr(dev);
>>>> +			pci_info(dev, "has been reset (%d)\n", rc);
>>> 
>>> Maybe:
>>> 
>>> +             } else {
>>> +                     rc = -ENOTTY;
>>> +                     pci_info(dev, "not reset (no FLR support)\n");
>>> 
>>> Or do we want to pretend the device was reset and return
>>> PCI_ERS_RESULT_RECOVERED?
>> 
>> We are currently doing the latter now with the default of rc = 0
>> above and so  I’m not sure the extra detail here on the absence of
>> FLR support is of value.
> 
> So to make sure I understand the proposal here, for RCECs and RCiEPs
> that don't support FLR, you're saying you want to continue silently
> and return PCI_ERS_RESULT_RECOVERED and let the drivers assume their
> device was reset when it was not?

The setting of the  ‘rc’  on the FLR support is  fine to add to the else condition.
I had simply recalled in earlier discussion that pcie_has_flr() was needed due to quirky
behavior in some hardware and so was not sure that detail of having or not having flr was in fact
consitent/accurate.

> 
>>>> +	} else {
>>>> +		rc = pci_bus_error_reset(dev);
>>>> +		pci_info(dev, "Root Port link has been reset (%d)\n", rc);
>>>> +	}
>>>> 
>>>> -	if (pcie_aer_is_native(dev)) {
>>>> +	if ((aer) && pcie_aer_is_native(dev)) {
>>>> 		/* Clear Root Error Status */
>>>> -		pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
>>>> -		pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, reg32);
>>>> +		pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, &reg32);
>>>> +		pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32);
>>>> 
>>>> 		/* Enable Root Port's interrupt in response to error messages */
>>>> -		pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
>>>> +		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
>>>> 		reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
>>>> -		pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
>>>> +		pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
>>>> 	}
>>>> 
>>>> 	return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
> 
>>>> @@ -164,8 +164,14 @@ static void pci_walk_bridge(struct pci_dev *bridge,
>>>> 			    int (*cb)(struct pci_dev *, void *),
>>>> 			    void *userdata)
>>>> {
>>>> +	/*
>>>> +	 * In a non-native case where there is no OS-visible reporting
>>>> +	 * device the bridge will be NULL, i.e., no RCEC, no Downstream Port.
>>> 
>>> I don't quite understand this comment.  I see that in the non-native
>>> case, the reporting device may not be OS-visible.  But I don't
>>> understand why the comment is *here*.
>>> 
>>> If "bridge" can be NULL here, we should test that before dereferencing
>>> "bridge->subordinate".
>> 
>> Wrongly worded.  The subordinate may be NULL or the associated RCEC
>> may be NULL, not the “bridge”.  However, per below, we should not be
>> trying to call report_frozen_detected(), report_mmio_enabled() via
>> the associated RCEC’s driver, but rather the CB for the RCiEP
>> itself.
> 
> OK, so if we want a comment here, I assume it would be along the lines
> of:
> 
>  If "bridge" has no subordinate bus, it's an RCEC or an RCiEP.  In
>  either of those cases, we want to call the callback on "bridge"
>  itself.
> 

Correct.

>> Going back to this conversation,
>> 
>> https://lore.kernel.org/linux-pci/20201016172210.GA86168@bjorn-Precision-5520/
>> 
>> "Looks like *this* is the patch where the "no subordinate bus" case
>> becomes possible?  If you agree, I can just move the test here, no
>> need to repost.”
>> 
>> It is actually the case we are only dealing with the absence of a
>> subordinate bus.
>> 
>>>> 	if (bridge->subordinate)
>>>> 		pci_walk_bus(bridge->subordinate, cb, userdata);
>>>> +	else if (bridge->rcec)
>>>> +		cb(bridge->rcec, userdata);
>>> 
>>> And I don't understand what's going on here.  In this case, I *think*
>>> "bridge" is an RCiEP and "bridge->rcec" is the related RCEC, so it
>>> looks like we'll call report_frozen_detected(), report_mmio_enabled(),
>>> etc for the RCEC driver.  I would think we'd want the RCiEP driver.
>> 
>> Indeed, the bridge->rcec here is the dev->rcec in which the dev is
>> the RCiEP.
>> 
>> And we don’t need that conditional here, it should just hit the
>> device driver’s routines.
> 
> So IIUC, the code would be:
> 
>  if (bridge->subordinate)
>    pci_walk_bus(bridge->subordinate, cb, userdata);
>  else
>    cb(bridge, userdata);    /* RCEC or RCiEP */
> 
> Right?

Right, as before.

> 
> I pushed a pci/err branch
> (https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git/log/?h=pci/err)
> with some tweaks in these areas.  Diff from your v12 posting appended
> below.  I split the RCEC/RCiEP error recovery pieces up a little bit
> differently than in your posting.  Let me know if you see anything
> that should be changed.  I dropped one of Jonathan's
> reviewed/tested-by but probably should have dropped others to avoid
> putting words in his mouth.

Thanks very much for doing this update.  It looks good to me.

> 
> Not sure we're completely done, but we'll get there yet.  I definitely
> want to make sure this happens this cycle.

I’ll retest today.

Thanks,

Sean


> 
> Bjorn
> 
> 
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index b86a92494345..4aa118edde35 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -1366,33 +1366,38 @@ static int aer_probe(struct pcie_device *dev)
> }
> 
> /**
> - * aer_root_reset - reset link on Root Port
> - * @dev: pointer to Root Port's pci_dev data structure
> + * aer_root_reset - reset Root Port hierarchy, RCEC, or RCiEP
> + * @dev: pointer to Root Port, RCEC, or RCiEP
>  *
> - * Invoked by Port Bus driver when performing link reset at Root Port.
> + * Invoked by Port Bus driver when performing reset.
>  */
> static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
> {
> 	int type = pci_pcie_type(dev);
> 	struct pci_dev *root;
> -	int aer = 0;
> -	int rc = 0;
> +	int aer;
> +	struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
> 	u32 reg32;
> +	int rc;
> 
> +	/*
> +	 * Only Root Ports and RCECs have AER Root Command and Root Status
> +	 * registers.  If "dev" is an RCiEP, the relevant registers are in
> +	 * the RCEC.
> +	 */
> 	if (type == PCI_EXP_TYPE_RC_END)
> -		/*
> -		 * The reset should only clear the Root Error Status
> -		 * of the RCEC. Only perform this for the
> -		 * native case, i.e., an RCEC is present.
> -		 */
> 		root = dev->rcec;
> 	else
> 		root = dev;
> 
> -	if (root)
> -		aer = dev->aer_cap;
> +	/*
> +	 * If the platform retained control of AER, an RCiEP may not have
> +	 * an RCEC visible to us, so dev->rcec ("root") may be NULL.  In
> +	 * that case, firmware is responsible for these registers.
> +	 */
> +	aer = root ? root->aer_cap : 0;
> 
> -	if ((aer) && pcie_aer_is_native(dev)) {
> +	if ((host->native_aer || pcie_ports_native) && aer) {
> 		/* Disable Root's interrupt in response to error messages */
> 		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
> 		reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
> @@ -1403,13 +1408,15 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
> 		if (pcie_has_flr(dev)) {
> 			rc = pcie_flr(dev);
> 			pci_info(dev, "has been reset (%d)\n", rc);
> +		} else {
> +			pci_info(dev, "not reset (no FLR support)\n");
> 		}
> 	} else {
> 		rc = pci_bus_error_reset(dev);
> 		pci_info(dev, "Root Port link has been reset (%d)\n", rc);
> 	}
> 
> -	if ((aer) && pcie_aer_is_native(dev)) {
> +	if ((host->native_aer || pcie_ports_native) && aer) {
> 		/* Clear Root Error Status */
> 		pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, &reg32);
> 		pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32);
> diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
> index cbc5abfe767b..510f31f0ef6d 100644
> --- a/drivers/pci/pcie/err.c
> +++ b/drivers/pci/pcie/err.c
> @@ -148,30 +148,23 @@ static int report_resume(struct pci_dev *dev, void *data)
> 
> /**
>  * pci_walk_bridge - walk bridges potentially AER affected
> - * @bridge   bridge which may be an RCEC with associated RCiEPs,
> - *           or a Port.
> - * @cb       callback to be called for each device found
> - * @userdata arbitrary pointer to be passed to callback.
> + * @bridge:	bridge which may be a Port, an RCEC, or an RCiEP
> + * @cb:		callback to be called for each device found
> + * @userdata:	arbitrary pointer to be passed to callback
>  *
>  * If the device provided is a bridge, walk the subordinate bus, including
>  * any bridged devices on buses under this bus.  Call the provided callback
>  * on each device found.
>  *
> - * If the device provided has no subordinate bus, call the callback on the
> - * device itself.
> + * If the device provided has no subordinate bus, e.g., an RCEC or RCiEP,
> + * call the callback on the device itself.
>  */
> static void pci_walk_bridge(struct pci_dev *bridge,
> 			    int (*cb)(struct pci_dev *, void *),
> 			    void *userdata)
> {
> -	/*
> -	 * In a non-native case where there is no OS-visible reporting
> -	 * device the bridge will be NULL, i.e., no RCEC, no Downstream Port.
> -	 */
> 	if (bridge->subordinate)
> 		pci_walk_bus(bridge->subordinate, cb, userdata);
> -	else if (bridge->rcec)
> -		cb(bridge->rcec, userdata);
> 	else
> 		cb(bridge, userdata);
> }
> @@ -183,11 +176,16 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
> 	int type = pci_pcie_type(dev);
> 	struct pci_dev *bridge;
> 	pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
> +	struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
> 
> 	/*
> -	 * Error recovery runs on all subordinates of the bridge.  If the
> -	 * bridge detected the error, it is cleared at the end.  For RCiEPs
> -	 * we should reset just the RCiEP itself.
> +	 * If the error was detected by a Root Port, Downstream Port, RCEC,
> +	 * or RCiEP, recovery runs on the device itself.  For Ports, that
> +	 * also includes any subordinate devices.
> +	 *
> +	 * If it was detected by another device (Endpoint, etc), recovery
> +	 * runs on the device and anything else under the same Port, i.e.,
> +	 * everything under "bridge".
> 	 */
> 	if (type == PCI_EXP_TYPE_ROOT_PORT ||
> 	    type == PCI_EXP_TYPE_DOWNSTREAM ||
> @@ -232,11 +230,15 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
> 	pci_dbg(bridge, "broadcast resume message\n");
> 	pci_walk_bridge(bridge, report_resume, &status);
> 
> -	if (type == PCI_EXP_TYPE_ROOT_PORT ||
> -	    type == PCI_EXP_TYPE_DOWNSTREAM ||
> -	    type == PCI_EXP_TYPE_RC_EC) {
> -		if (pcie_aer_is_native(bridge))
> -			pcie_clear_device_status(bridge);
> +	/*
> +	 * If we have native control of AER, clear error status in the Root
> +	 * Port or Downstream Port that signaled the error.  If the
> +	 * platform retained control of AER, it is responsible for clearing
> +	 * this status.  In that case, the signaling device may not even be
> +	 * visible to the OS.
> +	 */
> +	if (host->native_aer || pcie_ports_native) {
> +		pcie_clear_device_status(bridge);
> 		pci_aer_clear_nonfatal_status(bridge);
> 	}
> 	pci_info(bridge, "device recovery successful\n");
Bjorn Helgaas Dec. 4, 2020, 5:24 p.m. UTC | #5
On Fri, Dec 04, 2020 at 05:17:58PM +0000, Kelley, Sean V wrote:
> On Dec 3, 2020, at 4:01 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:

> > OK, so if we want a comment here, I assume it would be along the lines
> > of:
> > 
> >  If "bridge" has no subordinate bus, it's an RCEC or an RCiEP.  In
> >  either of those cases, we want to call the callback on "bridge"
> >  itself.
> 
> Correct.

OK, good.  I think the function comment now captures this.

> > So IIUC, the code would be:
> > 
> >  if (bridge->subordinate)
> >    pci_walk_bus(bridge->subordinate, cb, userdata);
> >  else
> >    cb(bridge, userdata);    /* RCEC or RCiEP */
> > 
> > Right?
> 
> Right, as before.

Updated to match this.

> > I pushed a pci/err branch
> > (https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git/log/?h=pci/err)
> > with some tweaks in these areas.  Diff from your v12 posting appended
> > below.  I split the RCEC/RCiEP error recovery pieces up a little bit
> > differently than in your posting.  Let me know if you see anything
> > that should be changed.  I dropped one of Jonathan's
> > reviewed/tested-by but probably should have dropped others to avoid
> > putting words in his mouth.
> 
> Thanks very much for doing this update.  It looks good to me.

I just updated this for the "rc used before initialization" error.
Current head f74d7cf9f2bc ("PCI/AER: Add RCEC AER error injection
support").
Bjorn Helgaas Dec. 5, 2020, 9:30 p.m. UTC | #6
On Fri, Dec 04, 2020 at 05:17:58PM +0000, Kelley, Sean V wrote:
> > On Dec 3, 2020, at 4:01 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> > On Thu, Dec 03, 2020 at 12:51:40AM +0000, Kelley, Sean V wrote:
> >>> On Dec 2, 2020, at 3:44 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> >>> On Fri, Nov 20, 2020 at 04:10:33PM -0800, Sean V Kelley wrote:
> >>>> From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> >>>> 
> >>>> When attempting error recovery for an RCiEP associated with an RCEC device,
> >>>> there needs to be a way to update the Root Error Status, the Uncorrectable
> >>>> Error Status and the Uncorrectable Error Severity of the parent RCEC.  In
> >>>> some non-native cases in which there is no OS-visible device associated
> >>>> with the RCiEP, there is nothing to act upon as the firmware is acting
> >>>> before the OS.
> >>>> 
> >>>> Add handling for the linked RCEC in AER/ERR while taking into account
> >>>> non-native cases.
> >>>> 
> >>>> Co-developed-by: Sean V Kelley <sean.v.kelley@intel.com>
> >>>> Link: https://lore.kernel.org/r/20201002184735.1229220-12-seanvk.dev@oregontracks.org
> >>>> Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
> >>>> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> >>>> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
> >>>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> >>>> ---
> >>>> drivers/pci/pcie/aer.c | 46 +++++++++++++++++++++++++++++++-----------
> >>>> drivers/pci/pcie/err.c | 20 +++++++++---------
> >>>> 2 files changed, 44 insertions(+), 22 deletions(-)
> >>>> 
> >>>> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> >>>> index 0ba0b47ae751..51389a6ee4ca 100644
> >>>> --- a/drivers/pci/pcie/aer.c
> >>>> +++ b/drivers/pci/pcie/aer.c
> >>>> @@ -1358,29 +1358,51 @@ static int aer_probe(struct pcie_device *dev)
> >>>> */
> >>>> static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
> >>>> {
> >>>> -	int aer = dev->aer_cap;
> >>>> +	int type = pci_pcie_type(dev);
> >>>> +	struct pci_dev *root;
> >>>> +	int aer = 0;
> >>>> +	int rc = 0;
> >>>> 	u32 reg32;
> >>>> -	int rc;
> >>>> 
> >>>> -	if (pcie_aer_is_native(dev)) {
> >>>> +	if (type == PCI_EXP_TYPE_RC_END)
> >>>> +		/*
> >>>> +		 * The reset should only clear the Root Error Status
> >>>> +		 * of the RCEC. Only perform this for the
> >>>> +		 * native case, i.e., an RCEC is present.
> >>>> +		 */
> >>>> +		root = dev->rcec;
> >>>> +	else
> >>>> +		root = dev;
> >>>> +
> >>>> +	if (root)
> >>>> +		aer = dev->aer_cap;
> >>>> +
> >>>> +	if ((aer) && pcie_aer_is_native(dev)) {
> >>>> 		/* Disable Root's interrupt in response to error messages */
> >>>> -		pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
> >>>> +		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
> >>>> 		reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
> >>>> -		pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
> >>>> +		pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
> >>>> 	}
> >>>> 
> >>>> -	rc = pci_bus_error_reset(dev);
> >>>> -	pci_info(dev, "Root Port link has been reset (%d)\n", rc);
> >>>> +	if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) {
> >>>> +		if (pcie_has_flr(dev)) {
> >>>> +			rc = pcie_flr(dev);
> >>>> +			pci_info(dev, "has been reset (%d)\n", rc);
> >>> 
> >>> Maybe:
> >>> 
> >>> +             } else {
> >>> +                     rc = -ENOTTY;
> >>> +                     pci_info(dev, "not reset (no FLR support)\n");
> >>> 
> >>> Or do we want to pretend the device was reset and return
> >>> PCI_ERS_RESULT_RECOVERED?
> >> 
> >> We are currently doing the latter now with the default of rc = 0
> >> above and so  I’m not sure the extra detail here on the absence of
> >> FLR support is of value.
> > 
> > So to make sure I understand the proposal here, for RCECs and RCiEPs
> > that don't support FLR, you're saying you want to continue silently
> > and return PCI_ERS_RESULT_RECOVERED and let the drivers assume their
> > device was reset when it was not?
> 
> The setting of the ‘rc’ on the FLR support is fine to add to the
> else condition.  I had simply recalled in earlier discussion that
> pcie_has_flr() was needed due to quirky behavior in some hardware
> and so was not sure that detail of having or not having flr was in
> fact consitent/accurate.

I think we should do the following, unless you object:

    if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) {
	    if (pcie_has_flr(dev)) {
		    rc = pcie_flr(dev);
		    pci_info(dev, "has been reset (%d)\n", rc);
	    } else {
		    pci_info(dev, "not reset (no FLR support)\n");
		    rc = -ENOTTY;
	    }
    } else {
	    rc = pci_bus_error_reset(dev);
	    pci_info(dev, "Root Port link has been reset (%d)\n", rc);
    }
    ...
    return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;

Sorry, I should have done that in the proposed patch earlier; it's
what I was *thinking* but didn't get it transcribed into the code.

Bjorn
Kelley, Sean V Dec. 7, 2020, 5:23 p.m. UTC | #7
Hi Bjorn,

> On Dec 5, 2020, at 1:30 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> 
> On Fri, Dec 04, 2020 at 05:17:58PM +0000, Kelley, Sean V wrote:
>>> On Dec 3, 2020, at 4:01 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
>>> On Thu, Dec 03, 2020 at 12:51:40AM +0000, Kelley, Sean V wrote:
>>>>> On Dec 2, 2020, at 3:44 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
>>>>> On Fri, Nov 20, 2020 at 04:10:33PM -0800, Sean V Kelley wrote:
>>>>>> From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
>>>>>> 
>>>>>> When attempting error recovery for an RCiEP associated with an RCEC device,
>>>>>> there needs to be a way to update the Root Error Status, the Uncorrectable
>>>>>> Error Status and the Uncorrectable Error Severity of the parent RCEC.  In
>>>>>> some non-native cases in which there is no OS-visible device associated
>>>>>> with the RCiEP, there is nothing to act upon as the firmware is acting
>>>>>> before the OS.
>>>>>> 
>>>>>> Add handling for the linked RCEC in AER/ERR while taking into account
>>>>>> non-native cases.
>>>>>> 
>>>>>> Co-developed-by: Sean V Kelley <sean.v.kelley@intel.com>
>>>>>> Link: https://lore.kernel.org/r/20201002184735.1229220-12-seanvk.dev@oregontracks.org
>>>>>> Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
>>>>>> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
>>>>>> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
>>>>>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>>>>>> ---
>>>>>> drivers/pci/pcie/aer.c | 46 +++++++++++++++++++++++++++++++-----------
>>>>>> drivers/pci/pcie/err.c | 20 +++++++++---------
>>>>>> 2 files changed, 44 insertions(+), 22 deletions(-)
>>>>>> 
>>>>>> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
>>>>>> index 0ba0b47ae751..51389a6ee4ca 100644
>>>>>> --- a/drivers/pci/pcie/aer.c
>>>>>> +++ b/drivers/pci/pcie/aer.c
>>>>>> @@ -1358,29 +1358,51 @@ static int aer_probe(struct pcie_device *dev)
>>>>>> */
>>>>>> static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
>>>>>> {
>>>>>> -	int aer = dev->aer_cap;
>>>>>> +	int type = pci_pcie_type(dev);
>>>>>> +	struct pci_dev *root;
>>>>>> +	int aer = 0;
>>>>>> +	int rc = 0;
>>>>>> 	u32 reg32;
>>>>>> -	int rc;
>>>>>> 
>>>>>> -	if (pcie_aer_is_native(dev)) {
>>>>>> +	if (type == PCI_EXP_TYPE_RC_END)
>>>>>> +		/*
>>>>>> +		 * The reset should only clear the Root Error Status
>>>>>> +		 * of the RCEC. Only perform this for the
>>>>>> +		 * native case, i.e., an RCEC is present.
>>>>>> +		 */
>>>>>> +		root = dev->rcec;
>>>>>> +	else
>>>>>> +		root = dev;
>>>>>> +
>>>>>> +	if (root)
>>>>>> +		aer = dev->aer_cap;
>>>>>> +
>>>>>> +	if ((aer) && pcie_aer_is_native(dev)) {
>>>>>> 		/* Disable Root's interrupt in response to error messages */
>>>>>> -		pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
>>>>>> +		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
>>>>>> 		reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
>>>>>> -		pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
>>>>>> +		pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
>>>>>> 	}
>>>>>> 
>>>>>> -	rc = pci_bus_error_reset(dev);
>>>>>> -	pci_info(dev, "Root Port link has been reset (%d)\n", rc);
>>>>>> +	if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) {
>>>>>> +		if (pcie_has_flr(dev)) {
>>>>>> +			rc = pcie_flr(dev);
>>>>>> +			pci_info(dev, "has been reset (%d)\n", rc);
>>>>> 
>>>>> Maybe:
>>>>> 
>>>>> +             } else {
>>>>> +                     rc = -ENOTTY;
>>>>> +                     pci_info(dev, "not reset (no FLR support)\n");
>>>>> 
>>>>> Or do we want to pretend the device was reset and return
>>>>> PCI_ERS_RESULT_RECOVERED?
>>>> 
>>>> We are currently doing the latter now with the default of rc = 0
>>>> above and so  I’m not sure the extra detail here on the absence of
>>>> FLR support is of value.
>>> 
>>> So to make sure I understand the proposal here, for RCECs and RCiEPs
>>> that don't support FLR, you're saying you want to continue silently
>>> and return PCI_ERS_RESULT_RECOVERED and let the drivers assume their
>>> device was reset when it was not?
>> 
>> The setting of the ‘rc’ on the FLR support is fine to add to the
>> else condition.  I had simply recalled in earlier discussion that
>> pcie_has_flr() was needed due to quirky behavior in some hardware
>> and so was not sure that detail of having or not having flr was in
>> fact consitent/accurate.
> 
> I think we should do the following, unless you object:
> 
>    if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) {
> 	    if (pcie_has_flr(dev)) {
> 		    rc = pcie_flr(dev);
> 		    pci_info(dev, "has been reset (%d)\n", rc);
> 	    } else {
> 		    pci_info(dev, "not reset (no FLR support)\n");
> 		    rc = -ENOTTY;
> 	    }
>    } else {
> 	    rc = pci_bus_error_reset(dev);
> 	    pci_info(dev, "Root Port link has been reset (%d)\n", rc);
>    }
>    ...
>    return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
> 
> Sorry, I should have done that in the proposed patch earlier; it's
> what I was *thinking* but didn't get it transcribed into the code.


Looks good to me.

Thanks,

Sean


> 
> Bjorn
diff mbox series

Patch

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 0ba0b47ae751..51389a6ee4ca 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1358,29 +1358,51 @@  static int aer_probe(struct pcie_device *dev)
  */
 static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
 {
-	int aer = dev->aer_cap;
+	int type = pci_pcie_type(dev);
+	struct pci_dev *root;
+	int aer = 0;
+	int rc = 0;
 	u32 reg32;
-	int rc;
 
-	if (pcie_aer_is_native(dev)) {
+	if (type == PCI_EXP_TYPE_RC_END)
+		/*
+		 * The reset should only clear the Root Error Status
+		 * of the RCEC. Only perform this for the
+		 * native case, i.e., an RCEC is present.
+		 */
+		root = dev->rcec;
+	else
+		root = dev;
+
+	if (root)
+		aer = dev->aer_cap;
+
+	if ((aer) && pcie_aer_is_native(dev)) {
 		/* Disable Root's interrupt in response to error messages */
-		pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
+		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
 		reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
-		pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
+		pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
 	}
 
-	rc = pci_bus_error_reset(dev);
-	pci_info(dev, "Root Port link has been reset (%d)\n", rc);
+	if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) {
+		if (pcie_has_flr(dev)) {
+			rc = pcie_flr(dev);
+			pci_info(dev, "has been reset (%d)\n", rc);
+		}
+	} else {
+		rc = pci_bus_error_reset(dev);
+		pci_info(dev, "Root Port link has been reset (%d)\n", rc);
+	}
 
-	if (pcie_aer_is_native(dev)) {
+	if ((aer) && pcie_aer_is_native(dev)) {
 		/* Clear Root Error Status */
-		pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
-		pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, reg32);
+		pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, &reg32);
+		pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32);
 
 		/* Enable Root Port's interrupt in response to error messages */
-		pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
+		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
 		reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
-		pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
+		pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
 	}
 
 	return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index 7883c9791562..cbc5abfe767b 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -148,10 +148,10 @@  static int report_resume(struct pci_dev *dev, void *data)
 
 /**
  * pci_walk_bridge - walk bridges potentially AER affected
- * @bridge:	bridge which may be a Port, an RCEC with associated RCiEPs,
- *		or an RCiEP associated with an RCEC
- * @cb:		callback to be called for each device found
- * @userdata:	arbitrary pointer to be passed to callback
+ * @bridge   bridge which may be an RCEC with associated RCiEPs,
+ *           or a Port.
+ * @cb       callback to be called for each device found
+ * @userdata arbitrary pointer to be passed to callback.
  *
  * If the device provided is a bridge, walk the subordinate bus, including
  * any bridged devices on buses under this bus.  Call the provided callback
@@ -164,8 +164,14 @@  static void pci_walk_bridge(struct pci_dev *bridge,
 			    int (*cb)(struct pci_dev *, void *),
 			    void *userdata)
 {
+	/*
+	 * In a non-native case where there is no OS-visible reporting
+	 * device the bridge will be NULL, i.e., no RCEC, no Downstream Port.
+	 */
 	if (bridge->subordinate)
 		pci_walk_bus(bridge->subordinate, cb, userdata);
+	else if (bridge->rcec)
+		cb(bridge->rcec, userdata);
 	else
 		cb(bridge, userdata);
 }
@@ -194,12 +200,6 @@  pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
 	pci_dbg(bridge, "broadcast error_detected message\n");
 	if (state == pci_channel_io_frozen) {
 		pci_walk_bridge(bridge, report_frozen_detected, &status);
-		if (type == PCI_EXP_TYPE_RC_END) {
-			pci_warn(dev, "subordinate device reset not possible for RCiEP\n");
-			status = PCI_ERS_RESULT_NONE;
-			goto failed;
-		}
-
 		status = reset_subordinates(bridge);
 		if (status != PCI_ERS_RESULT_RECOVERED) {
 			pci_warn(bridge, "subordinate device reset failed\n");