diff mbox series

[v8,11/14] PCI/RCEC: Add RCiEP's linked RCEC to AER/ERR

Message ID 20201002184735.1229220-12-seanvk.dev@oregontracks.org
State New
Headers show
Series Add RCEC handling to PCI/AER | expand

Commit Message

Sean V Kelley Oct. 2, 2020, 6:47 p.m. UTC
From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>

When attempting error recovery for an RCiEP associated with an RCEC device,
there needs to be a way to update the Root Error Status, the Uncorrectable
Error Status and the Uncorrectable Error Severity of the parent RCEC.
In some non-native cases in which there is no OS visible device
associated with the RCiEP, there is nothing to act upon as the firmware
is acting before the OS. So add handling for the linked 'rcec' in AER/ERR
while taking into account non-native cases.

Co-developed-by: Sean V Kelley <sean.v.kelley@intel.com>
Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/pci/pcie/aer.c |  9 +++++----
 drivers/pci/pcie/err.c | 39 ++++++++++++++++++++++++++++-----------
 2 files changed, 33 insertions(+), 15 deletions(-)

Comments

Bjorn Helgaas Oct. 9, 2020, 5:57 p.m. UTC | #1
On Fri, Oct 02, 2020 at 11:47:32AM -0700, Sean V Kelley wrote:
> From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> 
> When attempting error recovery for an RCiEP associated with an RCEC device,
> there needs to be a way to update the Root Error Status, the Uncorrectable
> Error Status and the Uncorrectable Error Severity of the parent RCEC.
> In some non-native cases in which there is no OS visible device
> associated with the RCiEP, there is nothing to act upon as the firmware
> is acting before the OS. So add handling for the linked 'rcec' in AER/ERR
> while taking into account non-native cases.
> 
> Co-developed-by: Sean V Kelley <sean.v.kelley@intel.com>
> Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> ---
>  drivers/pci/pcie/aer.c |  9 +++++----
>  drivers/pci/pcie/err.c | 39 ++++++++++++++++++++++++++++-----------
>  2 files changed, 33 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index 65dff5f3457a..dccdba60b5d9 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -1358,17 +1358,18 @@ static int aer_probe(struct pcie_device *dev)
>  static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
>  {
>  	int aer = dev->aer_cap;
> +	int rc = 0;
>  	u32 reg32;
> -	int rc;
> -
>  
>  	/* Disable Root's interrupt in response to error messages */
>  	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
>  	reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
>  	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
>  
> -	rc = pci_bus_error_reset(dev);
> -	pci_info(dev, "Root Port link has been reset\n");
> +	if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) {
> +		rc = pci_bus_error_reset(dev);
> +		pci_info(dev, "Root Port link has been reset\n");
> +	}
>  
>  	/* Clear Root Error Status */
>  	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
> diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
> index 38abd7984996..956ad4c86d53 100644
> --- a/drivers/pci/pcie/err.c
> +++ b/drivers/pci/pcie/err.c
> @@ -149,7 +149,8 @@ static int report_resume(struct pci_dev *dev, void *data)
>  /**
>   * pci_walk_bridge - walk bridges potentially AER affected
>   * @bridge   bridge which may be an RCEC with associated RCiEPs,
> - *           an RCiEP associated with an RCEC, or a Port.
> + *           or a Port.
> + * @dev      an RCiEP lacking an associated RCEC.
>   * @cb       callback to be called for each device found
>   * @userdata arbitrary pointer to be passed to callback.
>   *
> @@ -160,13 +161,20 @@ static int report_resume(struct pci_dev *dev, void *data)
>   * If the device provided has no subordinate bus, call the provided
>   * callback on the device itself.
>   */
> -static void pci_walk_bridge(struct pci_dev *bridge, int (*cb)(struct pci_dev *, void *),
> +static void pci_walk_bridge(struct pci_dev *bridge, struct pci_dev *dev,
> +			    int (*cb)(struct pci_dev *, void *),
>  			    void *userdata)
>  {
> -	if (bridge->subordinate)
> +	/*
> +	 * In a non-native case where there is no OS-visible reporting
> +	 * device the bridge will be NULL, i.e., no RCEC, no PORT.
> +	 */
> +	if (bridge && bridge->subordinate)
>  		pci_walk_bus(bridge->subordinate, cb, userdata);
> -	else
> +	else if (bridge)
>  		cb(bridge, userdata);
> +	else
> +		cb(dev, userdata);
>  }
>  
>  static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
> @@ -196,16 +204,25 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
>  	type = pci_pcie_type(dev);
>  	if (type == PCI_EXP_TYPE_ROOT_PORT ||
>  	    type == PCI_EXP_TYPE_DOWNSTREAM ||
> -	    type == PCI_EXP_TYPE_RC_EC ||
> -	    type == PCI_EXP_TYPE_RC_END)
> +	    type == PCI_EXP_TYPE_RC_EC)
>  		bridge = dev;
> +	else if (type == PCI_EXP_TYPE_RC_END)
> +		bridge = dev->rcec;
>  	else
>  		bridge = pci_upstream_bridge(dev);
>  
>  	pci_dbg(dev, "broadcast error_detected message\n");
>  	if (state == pci_channel_io_frozen) {
> -		pci_walk_bridge(bridge, report_frozen_detected, &status);
> +		pci_walk_bridge(bridge, dev, report_frozen_detected, &status);
>  		if (type == PCI_EXP_TYPE_RC_END) {
> +			/*
> +			 * The callback only clears the Root Error Status
> +			 * of the RCEC (see aer.c). Only perform this for the
> +			 * native case, i.e., an RCEC is present.
> +			 */
> +			if (bridge)
> +				reset_subordinate_devices(bridge);

Help me understand this.  There are lots of callbacks in this picture,
but I guess this "callback only clears Root Error Status" must refer
to aer_root_reset(), i.e., the reset_subordinate_devices pointer?

Of course, the caller of pcie_do_recovery() supplied that pointer.
And we can infer that it must be aer_root_reset(), not
dpc_reset_link(), because RCECs and RCiEPs are not allowed to
implement DPC.

I wish we didn't have either this assumption about what
reset_subordinate_devices points to, or the assumption about what
aer_root_reset() does.  They both seem a little bit tenuous.

We already made aer_root_reset() smart enough to check for RCECs.  Can
we put the FLR there, too?  Then we wouldn't have this weird situation
where reset_subordinate_devices() does a reset and clears error
status, EXCEPT for this case where it only clears error status and we
do the reset here?

>  			status = flr_on_rciep(dev);
>  			if (status != PCI_ERS_RESULT_RECOVERED) {
>  				pci_warn(dev, "function level reset failed\n");
> @@ -219,13 +236,13 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
>  			}
>  		}
>  	} else {
> -		pci_walk_bridge(bridge, report_normal_detected, &status);
> +		pci_walk_bridge(bridge, dev, report_normal_detected, &status);
>  	}
>  
>  	if (status == PCI_ERS_RESULT_CAN_RECOVER) {
>  		status = PCI_ERS_RESULT_RECOVERED;
>  		pci_dbg(dev, "broadcast mmio_enabled message\n");
> -		pci_walk_bridge(bridge, report_mmio_enabled, &status);
> +		pci_walk_bridge(bridge, dev, report_mmio_enabled, &status);
>  	}
>  
>  	if (status == PCI_ERS_RESULT_NEED_RESET) {
> @@ -236,14 +253,14 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
>  		 */
>  		status = PCI_ERS_RESULT_RECOVERED;
>  		pci_dbg(dev, "broadcast slot_reset message\n");
> -		pci_walk_bridge(bridge, report_slot_reset, &status);
> +		pci_walk_bridge(bridge, dev, report_slot_reset, &status);
>  	}
>  
>  	if (status != PCI_ERS_RESULT_RECOVERED)
>  		goto failed;
>  
>  	pci_dbg(dev, "broadcast resume message\n");
> -	pci_walk_bridge(bridge, report_resume, &status);
> +	pci_walk_bridge(bridge, dev, report_resume, &status);
>  
>  	if (type == PCI_EXP_TYPE_ROOT_PORT ||
>  	    type == PCI_EXP_TYPE_DOWNSTREAM ||
> -- 
> 2.28.0
>
Kelley, Sean V Oct. 9, 2020, 6:26 p.m. UTC | #2
Hi Bjorn,

On 9 Oct 2020, at 10:57, Bjorn Helgaas wrote:

> On Fri, Oct 02, 2020 at 11:47:32AM -0700, Sean V Kelley wrote:
>> From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
>>
>> When attempting error recovery for an RCiEP associated with an RCEC 
>> device,
>> there needs to be a way to update the Root Error Status, the 
>> Uncorrectable
>> Error Status and the Uncorrectable Error Severity of the parent RCEC.
>> In some non-native cases in which there is no OS visible device
>> associated with the RCiEP, there is nothing to act upon as the 
>> firmware
>> is acting before the OS. So add handling for the linked 'rcec' in 
>> AER/ERR
>> while taking into account non-native cases.
>>
>> Co-developed-by: Sean V Kelley <sean.v.kelley@intel.com>
>> Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
>> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>> ---
>>  drivers/pci/pcie/aer.c |  9 +++++----
>>  drivers/pci/pcie/err.c | 39 ++++++++++++++++++++++++++++-----------
>>  2 files changed, 33 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
>> index 65dff5f3457a..dccdba60b5d9 100644
>> --- a/drivers/pci/pcie/aer.c
>> +++ b/drivers/pci/pcie/aer.c
>> @@ -1358,17 +1358,18 @@ static int aer_probe(struct pcie_device *dev)
>>  static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
>>  {
>>  	int aer = dev->aer_cap;
>> +	int rc = 0;
>>  	u32 reg32;
>> -	int rc;
>> -
>>
>>  	/* Disable Root's interrupt in response to error messages */
>>  	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
>>  	reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
>>  	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
>>
>> -	rc = pci_bus_error_reset(dev);
>> -	pci_info(dev, "Root Port link has been reset\n");
>> +	if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) {
>> +		rc = pci_bus_error_reset(dev);
>> +		pci_info(dev, "Root Port link has been reset\n");
>> +	}
>>
>>  	/* Clear Root Error Status */
>>  	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
>> diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
>> index 38abd7984996..956ad4c86d53 100644
>> --- a/drivers/pci/pcie/err.c
>> +++ b/drivers/pci/pcie/err.c
>> @@ -149,7 +149,8 @@ static int report_resume(struct pci_dev *dev, 
>> void *data)
>>  /**
>>   * pci_walk_bridge - walk bridges potentially AER affected
>>   * @bridge   bridge which may be an RCEC with associated RCiEPs,
>> - *           an RCiEP associated with an RCEC, or a Port.
>> + *           or a Port.
>> + * @dev      an RCiEP lacking an associated RCEC.
>>   * @cb       callback to be called for each device found
>>   * @userdata arbitrary pointer to be passed to callback.
>>   *
>> @@ -160,13 +161,20 @@ static int report_resume(struct pci_dev *dev, 
>> void *data)
>>   * If the device provided has no subordinate bus, call the provided
>>   * callback on the device itself.
>>   */
>> -static void pci_walk_bridge(struct pci_dev *bridge, int (*cb)(struct 
>> pci_dev *, void *),
>> +static void pci_walk_bridge(struct pci_dev *bridge, struct pci_dev 
>> *dev,
>> +			    int (*cb)(struct pci_dev *, void *),
>>  			    void *userdata)
>>  {
>> -	if (bridge->subordinate)
>> +	/*
>> +	 * In a non-native case where there is no OS-visible reporting
>> +	 * device the bridge will be NULL, i.e., no RCEC, no PORT.
>> +	 */
>> +	if (bridge && bridge->subordinate)
>>  		pci_walk_bus(bridge->subordinate, cb, userdata);
>> -	else
>> +	else if (bridge)
>>  		cb(bridge, userdata);
>> +	else
>> +		cb(dev, userdata);
>>  }
>>
>>  static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
>> @@ -196,16 +204,25 @@ pci_ers_result_t pcie_do_recovery(struct 
>> pci_dev *dev,
>>  	type = pci_pcie_type(dev);
>>  	if (type == PCI_EXP_TYPE_ROOT_PORT ||
>>  	    type == PCI_EXP_TYPE_DOWNSTREAM ||
>> -	    type == PCI_EXP_TYPE_RC_EC ||
>> -	    type == PCI_EXP_TYPE_RC_END)
>> +	    type == PCI_EXP_TYPE_RC_EC)
>>  		bridge = dev;
>> +	else if (type == PCI_EXP_TYPE_RC_END)
>> +		bridge = dev->rcec;
>>  	else
>>  		bridge = pci_upstream_bridge(dev);
>>
>>  	pci_dbg(dev, "broadcast error_detected message\n");
>>  	if (state == pci_channel_io_frozen) {
>> -		pci_walk_bridge(bridge, report_frozen_detected, &status);
>> +		pci_walk_bridge(bridge, dev, report_frozen_detected, &status);
>>  		if (type == PCI_EXP_TYPE_RC_END) {
>> +			/*
>> +			 * The callback only clears the Root Error Status
>> +			 * of the RCEC (see aer.c). Only perform this for the
>> +			 * native case, i.e., an RCEC is present.
>> +			 */
>> +			if (bridge)
>> +				reset_subordinate_devices(bridge);
>
> Help me understand this.  There are lots of callbacks in this picture,
> but I guess this "callback only clears Root Error Status" must refer
> to aer_root_reset(), i.e., the reset_subordinate_devices pointer?

The ‘bridge’ in this case will always be dev->rcec, the event 
collector for the associated RC_END. And that’s what’s being cleared 
in aer.c via aer_root_reset() as the callback. It’s also being checked 
for native/non-native here.

>
> Of course, the caller of pcie_do_recovery() supplied that pointer.
> And we can infer that it must be aer_root_reset(), not
> dpc_reset_link(), because RCECs and RCiEPs are not allowed to
> implement DPC.

Correct.

>
> I wish we didn't have either this assumption about what
> reset_subordinate_devices points to, or the assumption about what
> aer_root_reset() does.  They both seem a little bit tenuous.

Agree. It’s the relationship between the RC_END and the RC_EC.

>
> We already made aer_root_reset() smart enough to check for RCECs.  Can
> we put the FLR there, too?  Then we wouldn't have this weird situation
> where reset_subordinate_devices() does a reset and clears error
> status, EXCEPT for this case where it only clears error status and we
> do the reset here?

We could add the smarts to aer_root_reset() to check for an RC_END in 
that callback and perform the clear there on its RC_EC. We just 
wouldn’t map ‘bridge’ to dev->rcec for RC_END in 
pcie_do_recovery() which would simplify things.

Further, the FLR in the case of flr_on_rciep() below is specific to the 
RCiEP itself. So it could be performed either in aer_root_reset() or 
remain in the pcie_do_recovery().

That should work.

Sean


>
>>  			status = flr_on_rciep(dev);
>>  			if (status != PCI_ERS_RESULT_RECOVERED) {
>>  				pci_warn(dev, "function level reset failed\n");
>> @@ -219,13 +236,13 @@ pci_ers_result_t pcie_do_recovery(struct 
>> pci_dev *dev,
>>  			}
>>  		}
>>  	} else {
>> -		pci_walk_bridge(bridge, report_normal_detected, &status);
>> +		pci_walk_bridge(bridge, dev, report_normal_detected, &status);
>>  	}
>>
>>  	if (status == PCI_ERS_RESULT_CAN_RECOVER) {
>>  		status = PCI_ERS_RESULT_RECOVERED;
>>  		pci_dbg(dev, "broadcast mmio_enabled message\n");
>> -		pci_walk_bridge(bridge, report_mmio_enabled, &status);
>> +		pci_walk_bridge(bridge, dev, report_mmio_enabled, &status);
>>  	}
>>
>>  	if (status == PCI_ERS_RESULT_NEED_RESET) {
>> @@ -236,14 +253,14 @@ pci_ers_result_t pcie_do_recovery(struct 
>> pci_dev *dev,
>>  		 */
>>  		status = PCI_ERS_RESULT_RECOVERED;
>>  		pci_dbg(dev, "broadcast slot_reset message\n");
>> -		pci_walk_bridge(bridge, report_slot_reset, &status);
>> +		pci_walk_bridge(bridge, dev, report_slot_reset, &status);
>>  	}
>>
>>  	if (status != PCI_ERS_RESULT_RECOVERED)
>>  		goto failed;
>>
>>  	pci_dbg(dev, "broadcast resume message\n");
>> -	pci_walk_bridge(bridge, report_resume, &status);
>> +	pci_walk_bridge(bridge, dev, report_resume, &status);
>>
>>  	if (type == PCI_EXP_TYPE_ROOT_PORT ||
>>  	    type == PCI_EXP_TYPE_DOWNSTREAM ||
>> -- 
>> 2.28.0
>>
Kelley, Sean V Oct. 9, 2020, 6:34 p.m. UTC | #3
On 9 Oct 2020, at 11:26, Sean V Kelley wrote:

> Hi Bjorn,
>
> On 9 Oct 2020, at 10:57, Bjorn Helgaas wrote:
>
>> On Fri, Oct 02, 2020 at 11:47:32AM -0700, Sean V Kelley wrote:
>>> From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
>>>
>>> When attempting error recovery for an RCiEP associated with an RCEC 
>>> device,
>>> there needs to be a way to update the Root Error Status, the 
>>> Uncorrectable
>>> Error Status and the Uncorrectable Error Severity of the parent 
>>> RCEC.
>>> In some non-native cases in which there is no OS visible device
>>> associated with the RCiEP, there is nothing to act upon as the 
>>> firmware
>>> is acting before the OS. So add handling for the linked 'rcec' in 
>>> AER/ERR
>>> while taking into account non-native cases.
>>>
>>> Co-developed-by: Sean V Kelley <sean.v.kelley@intel.com>
>>> Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
>>> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
>>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>>> ---
>>>  drivers/pci/pcie/aer.c |  9 +++++----
>>>  drivers/pci/pcie/err.c | 39 ++++++++++++++++++++++++++++-----------
>>>  2 files changed, 33 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
>>> index 65dff5f3457a..dccdba60b5d9 100644
>>> --- a/drivers/pci/pcie/aer.c
>>> +++ b/drivers/pci/pcie/aer.c
>>> @@ -1358,17 +1358,18 @@ static int aer_probe(struct pcie_device 
>>> *dev)
>>>  static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
>>>  {
>>>  	int aer = dev->aer_cap;
>>> +	int rc = 0;
>>>  	u32 reg32;
>>> -	int rc;
>>> -
>>>
>>>  	/* Disable Root's interrupt in response to error messages */
>>>  	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
>>>  	reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
>>>  	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
>>>
>>> -	rc = pci_bus_error_reset(dev);
>>> -	pci_info(dev, "Root Port link has been reset\n");
>>> +	if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) {
>>> +		rc = pci_bus_error_reset(dev);
>>> +		pci_info(dev, "Root Port link has been reset\n");
>>> +	}
>>>
>>>  	/* Clear Root Error Status */
>>>  	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
>>> diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
>>> index 38abd7984996..956ad4c86d53 100644
>>> --- a/drivers/pci/pcie/err.c
>>> +++ b/drivers/pci/pcie/err.c
>>> @@ -149,7 +149,8 @@ static int report_resume(struct pci_dev *dev, 
>>> void *data)
>>>  /**
>>>   * pci_walk_bridge - walk bridges potentially AER affected
>>>   * @bridge   bridge which may be an RCEC with associated RCiEPs,
>>> - *           an RCiEP associated with an RCEC, or a Port.
>>> + *           or a Port.
>>> + * @dev      an RCiEP lacking an associated RCEC.
>>>   * @cb       callback to be called for each device found
>>>   * @userdata arbitrary pointer to be passed to callback.
>>>   *
>>> @@ -160,13 +161,20 @@ static int report_resume(struct pci_dev *dev, 
>>> void *data)
>>>   * If the device provided has no subordinate bus, call the provided
>>>   * callback on the device itself.
>>>   */
>>> -static void pci_walk_bridge(struct pci_dev *bridge, int 
>>> (*cb)(struct pci_dev *, void *),
>>> +static void pci_walk_bridge(struct pci_dev *bridge, struct pci_dev 
>>> *dev,
>>> +			    int (*cb)(struct pci_dev *, void *),
>>>  			    void *userdata)
>>>  {
>>> -	if (bridge->subordinate)
>>> +	/*
>>> +	 * In a non-native case where there is no OS-visible reporting
>>> +	 * device the bridge will be NULL, i.e., no RCEC, no PORT.
>>> +	 */
>>> +	if (bridge && bridge->subordinate)
>>>  		pci_walk_bus(bridge->subordinate, cb, userdata);
>>> -	else
>>> +	else if (bridge)
>>>  		cb(bridge, userdata);
>>> +	else
>>> +		cb(dev, userdata);
>>>  }
>>>
>>>  static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
>>> @@ -196,16 +204,25 @@ pci_ers_result_t pcie_do_recovery(struct 
>>> pci_dev *dev,
>>>  	type = pci_pcie_type(dev);
>>>  	if (type == PCI_EXP_TYPE_ROOT_PORT ||
>>>  	    type == PCI_EXP_TYPE_DOWNSTREAM ||
>>> -	    type == PCI_EXP_TYPE_RC_EC ||
>>> -	    type == PCI_EXP_TYPE_RC_END)
>>> +	    type == PCI_EXP_TYPE_RC_EC)
>>>  		bridge = dev;
>>> +	else if (type == PCI_EXP_TYPE_RC_END)
>>> +		bridge = dev->rcec;
>>>  	else
>>>  		bridge = pci_upstream_bridge(dev);
>>>
>>>  	pci_dbg(dev, "broadcast error_detected message\n");
>>>  	if (state == pci_channel_io_frozen) {
>>> -		pci_walk_bridge(bridge, report_frozen_detected, &status);
>>> +		pci_walk_bridge(bridge, dev, report_frozen_detected, &status);
>>>  		if (type == PCI_EXP_TYPE_RC_END) {
>>> +			/*
>>> +			 * The callback only clears the Root Error Status
>>> +			 * of the RCEC (see aer.c). Only perform this for the
>>> +			 * native case, i.e., an RCEC is present.
>>> +			 */
>>> +			if (bridge)
>>> +				reset_subordinate_devices(bridge);
>>
>> Help me understand this.  There are lots of callbacks in this 
>> picture,
>> but I guess this "callback only clears Root Error Status" must refer
>> to aer_root_reset(), i.e., the reset_subordinate_devices pointer?
>
> The ‘bridge’ in this case will always be dev->rcec, the event 
> collector for the associated RC_END. And that’s what’s being 
> cleared in aer.c via aer_root_reset() as the callback. It’s also 
> being checked for native/non-native here.
>
>>
>> Of course, the caller of pcie_do_recovery() supplied that pointer.
>> And we can infer that it must be aer_root_reset(), not
>> dpc_reset_link(), because RCECs and RCiEPs are not allowed to
>> implement DPC.
>
> Correct.
>
>>
>> I wish we didn't have either this assumption about what
>> reset_subordinate_devices points to, or the assumption about what
>> aer_root_reset() does.  They both seem a little bit tenuous.
>
> Agree. It’s the relationship between the RC_END and the RC_EC.
>
>>
>> We already made aer_root_reset() smart enough to check for RCECs.  
>> Can
>> we put the FLR there, too?  Then we wouldn't have this weird 
>> situation
>> where reset_subordinate_devices() does a reset and clears error
>> status, EXCEPT for this case where it only clears error status and we
>> do the reset here?
>
> We could add the smarts to aer_root_reset() to check for an RC_END in 
> that callback and perform the clear there on its RC_EC. We just 
> wouldn’t map ‘bridge’ to dev->rcec for RC_END in 
> pcie_do_recovery() which would simplify things.
>
> Further, the FLR in the case of flr_on_rciep() below is specific to 
> the RCiEP itself. So it could be performed either in aer_root_reset() 
> or remain in the pcie_do_recovery().
>
> That should work.
>
> Sean

Thinking more on this, you could still pass dev->rcec to the callback 
(eventually aer_root_reset()), but you won’t have the ability to 
handle the FLR there without the pointer to the RC_END. That’s why I 
suggested passing the RC_END and checking for its RC_EC in 
aer_root_reset() if you want to also handle FLR there too from below.

Sean

>
>
>>
>>>  			status = flr_on_rciep(dev);
>>>  			if (status != PCI_ERS_RESULT_RECOVERED) {
>>>  				pci_warn(dev, "function level reset failed\n");
>>> @@ -219,13 +236,13 @@ pci_ers_result_t pcie_do_recovery(struct 
>>> pci_dev *dev,
>>>  			}
>>>  		}
>>>  	} else {
>>> -		pci_walk_bridge(bridge, report_normal_detected, &status);
>>> +		pci_walk_bridge(bridge, dev, report_normal_detected, &status);
>>>  	}
>>>
>>>  	if (status == PCI_ERS_RESULT_CAN_RECOVER) {
>>>  		status = PCI_ERS_RESULT_RECOVERED;
>>>  		pci_dbg(dev, "broadcast mmio_enabled message\n");
>>> -		pci_walk_bridge(bridge, report_mmio_enabled, &status);
>>> +		pci_walk_bridge(bridge, dev, report_mmio_enabled, &status);
>>>  	}
>>>
>>>  	if (status == PCI_ERS_RESULT_NEED_RESET) {
>>> @@ -236,14 +253,14 @@ pci_ers_result_t pcie_do_recovery(struct 
>>> pci_dev *dev,
>>>  		 */
>>>  		status = PCI_ERS_RESULT_RECOVERED;
>>>  		pci_dbg(dev, "broadcast slot_reset message\n");
>>> -		pci_walk_bridge(bridge, report_slot_reset, &status);
>>> +		pci_walk_bridge(bridge, dev, report_slot_reset, &status);
>>>  	}
>>>
>>>  	if (status != PCI_ERS_RESULT_RECOVERED)
>>>  		goto failed;
>>>
>>>  	pci_dbg(dev, "broadcast resume message\n");
>>> -	pci_walk_bridge(bridge, report_resume, &status);
>>> +	pci_walk_bridge(bridge, dev, report_resume, &status);
>>>
>>>  	if (type == PCI_EXP_TYPE_ROOT_PORT ||
>>>  	    type == PCI_EXP_TYPE_DOWNSTREAM ||
>>> -- 
>>> 2.28.0
>>>
Kelley, Sean V Oct. 9, 2020, 6:53 p.m. UTC | #4
On 9 Oct 2020, at 11:34, Sean V Kelley wrote:

> On 9 Oct 2020, at 11:26, Sean V Kelley wrote:
>
>> Hi Bjorn,
>>
>> On 9 Oct 2020, at 10:57, Bjorn Helgaas wrote:
>>
>>> On Fri, Oct 02, 2020 at 11:47:32AM -0700, Sean V Kelley wrote:
>>>> From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
>>>>
>>>> When attempting error recovery for an RCiEP associated with an RCEC 
>>>> device,
>>>> there needs to be a way to update the Root Error Status, the 
>>>> Uncorrectable
>>>> Error Status and the Uncorrectable Error Severity of the parent 
>>>> RCEC.
>>>> In some non-native cases in which there is no OS visible device
>>>> associated with the RCiEP, there is nothing to act upon as the 
>>>> firmware
>>>> is acting before the OS. So add handling for the linked 'rcec' in 
>>>> AER/ERR
>>>> while taking into account non-native cases.
>>>>
>>>> Co-developed-by: Sean V Kelley <sean.v.kelley@intel.com>
>>>> Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
>>>> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
>>>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>>>> ---
>>>>  drivers/pci/pcie/aer.c |  9 +++++----
>>>>  drivers/pci/pcie/err.c | 39 
>>>> ++++++++++++++++++++++++++++-----------
>>>>  2 files changed, 33 insertions(+), 15 deletions(-)
>>>>
>>>> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
>>>> index 65dff5f3457a..dccdba60b5d9 100644
>>>> --- a/drivers/pci/pcie/aer.c
>>>> +++ b/drivers/pci/pcie/aer.c
>>>> @@ -1358,17 +1358,18 @@ static int aer_probe(struct pcie_device 
>>>> *dev)
>>>>  static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
>>>>  {
>>>>  	int aer = dev->aer_cap;
>>>> +	int rc = 0;
>>>>  	u32 reg32;
>>>> -	int rc;
>>>> -
>>>>
>>>>  	/* Disable Root's interrupt in response to error messages */
>>>>  	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
>>>>  	reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
>>>>  	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
>>>>
>>>> -	rc = pci_bus_error_reset(dev);
>>>> -	pci_info(dev, "Root Port link has been reset\n");
>>>> +	if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) {
>>>> +		rc = pci_bus_error_reset(dev);
>>>> +		pci_info(dev, "Root Port link has been reset\n");
>>>> +	}
>>>>
>>>>  	/* Clear Root Error Status */
>>>>  	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
>>>> diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
>>>> index 38abd7984996..956ad4c86d53 100644
>>>> --- a/drivers/pci/pcie/err.c
>>>> +++ b/drivers/pci/pcie/err.c
>>>> @@ -149,7 +149,8 @@ static int report_resume(struct pci_dev *dev, 
>>>> void *data)
>>>>  /**
>>>>   * pci_walk_bridge - walk bridges potentially AER affected
>>>>   * @bridge   bridge which may be an RCEC with associated RCiEPs,
>>>> - *           an RCiEP associated with an RCEC, or a Port.
>>>> + *           or a Port.
>>>> + * @dev      an RCiEP lacking an associated RCEC.
>>>>   * @cb       callback to be called for each device found
>>>>   * @userdata arbitrary pointer to be passed to callback.
>>>>   *
>>>> @@ -160,13 +161,20 @@ static int report_resume(struct pci_dev *dev, 
>>>> void *data)
>>>>   * If the device provided has no subordinate bus, call the 
>>>> provided
>>>>   * callback on the device itself.
>>>>   */
>>>> -static void pci_walk_bridge(struct pci_dev *bridge, int 
>>>> (*cb)(struct pci_dev *, void *),
>>>> +static void pci_walk_bridge(struct pci_dev *bridge, struct pci_dev 
>>>> *dev,
>>>> +			    int (*cb)(struct pci_dev *, void *),
>>>>  			    void *userdata)
>>>>  {
>>>> -	if (bridge->subordinate)
>>>> +	/*
>>>> +	 * In a non-native case where there is no OS-visible reporting
>>>> +	 * device the bridge will be NULL, i.e., no RCEC, no PORT.
>>>> +	 */
>>>> +	if (bridge && bridge->subordinate)
>>>>  		pci_walk_bus(bridge->subordinate, cb, userdata);
>>>> -	else
>>>> +	else if (bridge)
>>>>  		cb(bridge, userdata);
>>>> +	else
>>>> +		cb(dev, userdata);
>>>>  }
>>>>
>>>>  static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
>>>> @@ -196,16 +204,25 @@ pci_ers_result_t pcie_do_recovery(struct 
>>>> pci_dev *dev,
>>>>  	type = pci_pcie_type(dev);
>>>>  	if (type == PCI_EXP_TYPE_ROOT_PORT ||
>>>>  	    type == PCI_EXP_TYPE_DOWNSTREAM ||
>>>> -	    type == PCI_EXP_TYPE_RC_EC ||
>>>> -	    type == PCI_EXP_TYPE_RC_END)
>>>> +	    type == PCI_EXP_TYPE_RC_EC)
>>>>  		bridge = dev;
>>>> +	else if (type == PCI_EXP_TYPE_RC_END)
>>>> +		bridge = dev->rcec;
>>>>  	else
>>>>  		bridge = pci_upstream_bridge(dev);
>>>>
>>>>  	pci_dbg(dev, "broadcast error_detected message\n");
>>>>  	if (state == pci_channel_io_frozen) {
>>>> -		pci_walk_bridge(bridge, report_frozen_detected, &status);
>>>> +		pci_walk_bridge(bridge, dev, report_frozen_detected, &status);
>>>>  		if (type == PCI_EXP_TYPE_RC_END) {
>>>> +			/*
>>>> +			 * The callback only clears the Root Error Status
>>>> +			 * of the RCEC (see aer.c). Only perform this for the
>>>> +			 * native case, i.e., an RCEC is present.
>>>> +			 */
>>>> +			if (bridge)
>>>> +				reset_subordinate_devices(bridge);
>>>
>>> Help me understand this.  There are lots of callbacks in this 
>>> picture,
>>> but I guess this "callback only clears Root Error Status" must refer
>>> to aer_root_reset(), i.e., the reset_subordinate_devices pointer?
>>
>> The ‘bridge’ in this case will always be dev->rcec, the event 
>> collector for the associated RC_END. And that’s what’s being 
>> cleared in aer.c via aer_root_reset() as the callback. It’s also 
>> being checked for native/non-native here.
>>
>>>
>>> Of course, the caller of pcie_do_recovery() supplied that pointer.
>>> And we can infer that it must be aer_root_reset(), not
>>> dpc_reset_link(), because RCECs and RCiEPs are not allowed to
>>> implement DPC.
>>
>> Correct.
>>
>>>
>>> I wish we didn't have either this assumption about what
>>> reset_subordinate_devices points to, or the assumption about what
>>> aer_root_reset() does.  They both seem a little bit tenuous.
>>
>> Agree. It’s the relationship between the RC_END and the RC_EC.
>>
>>>
>>> We already made aer_root_reset() smart enough to check for RCECs.  
>>> Can
>>> we put the FLR there, too?  Then we wouldn't have this weird 
>>> situation
>>> where reset_subordinate_devices() does a reset and clears error
>>> status, EXCEPT for this case where it only clears error status and 
>>> we
>>> do the reset here?
>>
>> We could add the smarts to aer_root_reset() to check for an RC_END in 
>> that callback and perform the clear there on its RC_EC. We just 
>> wouldn’t map ‘bridge’ to dev->rcec for RC_END in 
>> pcie_do_recovery() which would simplify things.
>>
>> Further, the FLR in the case of flr_on_rciep() below is specific to 
>> the RCiEP itself. So it could be performed either in aer_root_reset() 
>> or remain in the pcie_do_recovery().
>>
>> That should work.
>>
>> Sean
>
> Thinking more on this, you could still pass dev->rcec to the callback 
> (eventually aer_root_reset()), but you won’t have the ability to 
> handle the FLR there without the pointer to the RC_END. That’s why I 
> suggested passing the RC_END and checking for its RC_EC in 
> aer_root_reset() if you want to also handle FLR there too from below.

Where you get into trouble is that you don’t want to do anything in 
the non-native case and shouldn’t be going to the callback. So this 
would add complexity to aer_root_reset() for checking on dev->rcec == 
NULL…where before you never would have invoked 
reset_subordinate_devices() in the non-native case.


>
> Sean
>
>>
>>
>>>
>>>>  			status = flr_on_rciep(dev);
>>>>  			if (status != PCI_ERS_RESULT_RECOVERED) {
>>>>  				pci_warn(dev, "function level reset failed\n");
>>>> @@ -219,13 +236,13 @@ pci_ers_result_t pcie_do_recovery(struct 
>>>> pci_dev *dev,
>>>>  			}
>>>>  		}
>>>>  	} else {
>>>> -		pci_walk_bridge(bridge, report_normal_detected, &status);
>>>> +		pci_walk_bridge(bridge, dev, report_normal_detected, &status);
>>>>  	}
>>>>
>>>>  	if (status == PCI_ERS_RESULT_CAN_RECOVER) {
>>>>  		status = PCI_ERS_RESULT_RECOVERED;
>>>>  		pci_dbg(dev, "broadcast mmio_enabled message\n");
>>>> -		pci_walk_bridge(bridge, report_mmio_enabled, &status);
>>>> +		pci_walk_bridge(bridge, dev, report_mmio_enabled, &status);
>>>>  	}
>>>>
>>>>  	if (status == PCI_ERS_RESULT_NEED_RESET) {
>>>> @@ -236,14 +253,14 @@ pci_ers_result_t pcie_do_recovery(struct 
>>>> pci_dev *dev,
>>>>  		 */
>>>>  		status = PCI_ERS_RESULT_RECOVERED;
>>>>  		pci_dbg(dev, "broadcast slot_reset message\n");
>>>> -		pci_walk_bridge(bridge, report_slot_reset, &status);
>>>> +		pci_walk_bridge(bridge, dev, report_slot_reset, &status);
>>>>  	}
>>>>
>>>>  	if (status != PCI_ERS_RESULT_RECOVERED)
>>>>  		goto failed;
>>>>
>>>>  	pci_dbg(dev, "broadcast resume message\n");
>>>> -	pci_walk_bridge(bridge, report_resume, &status);
>>>> +	pci_walk_bridge(bridge, dev, report_resume, &status);
>>>>
>>>>  	if (type == PCI_EXP_TYPE_ROOT_PORT ||
>>>>  	    type == PCI_EXP_TYPE_DOWNSTREAM ||
>>>> -- 
>>>> 2.28.0
>>>>
Kelley, Sean V Oct. 9, 2020, 7:48 p.m. UTC | #5
On 9 Oct 2020, at 11:53, Sean V Kelley wrote:

> On 9 Oct 2020, at 11:34, Sean V Kelley wrote:
>
>> On 9 Oct 2020, at 11:26, Sean V Kelley wrote:
>>
>>> Hi Bjorn,
>>>
>>> On 9 Oct 2020, at 10:57, Bjorn Helgaas wrote:
>>>
>>>> On Fri, Oct 02, 2020 at 11:47:32AM -0700, Sean V Kelley wrote:
>>>>> From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
>>>>>
>>>>> When attempting error recovery for an RCiEP associated with an 
>>>>> RCEC device,
>>>>> there needs to be a way to update the Root Error Status, the 
>>>>> Uncorrectable
>>>>> Error Status and the Uncorrectable Error Severity of the parent 
>>>>> RCEC.
>>>>> In some non-native cases in which there is no OS visible device
>>>>> associated with the RCiEP, there is nothing to act upon as the 
>>>>> firmware
>>>>> is acting before the OS. So add handling for the linked 'rcec' in 
>>>>> AER/ERR
>>>>> while taking into account non-native cases.
>>>>>
>>>>> Co-developed-by: Sean V Kelley <sean.v.kelley@intel.com>
>>>>> Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
>>>>> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
>>>>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>>>>> ---
>>>>>  drivers/pci/pcie/aer.c |  9 +++++----
>>>>>  drivers/pci/pcie/err.c | 39 
>>>>> ++++++++++++++++++++++++++++-----------
>>>>>  2 files changed, 33 insertions(+), 15 deletions(-)
>>>>>
>>>>> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
>>>>> index 65dff5f3457a..dccdba60b5d9 100644
>>>>> --- a/drivers/pci/pcie/aer.c
>>>>> +++ b/drivers/pci/pcie/aer.c
>>>>> @@ -1358,17 +1358,18 @@ static int aer_probe(struct pcie_device 
>>>>> *dev)
>>>>>  static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
>>>>>  {
>>>>>  	int aer = dev->aer_cap;
>>>>> +	int rc = 0;
>>>>>  	u32 reg32;
>>>>> -	int rc;
>>>>> -
>>>>>
>>>>>  	/* Disable Root's interrupt in response to error messages */
>>>>>  	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
>>>>>  	reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
>>>>>  	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
>>>>>
>>>>> -	rc = pci_bus_error_reset(dev);
>>>>> -	pci_info(dev, "Root Port link has been reset\n");
>>>>> +	if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) {
>>>>> +		rc = pci_bus_error_reset(dev);
>>>>> +		pci_info(dev, "Root Port link has been reset\n");
>>>>> +	}
>>>>>
>>>>>  	/* Clear Root Error Status */
>>>>>  	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
>>>>> diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
>>>>> index 38abd7984996..956ad4c86d53 100644
>>>>> --- a/drivers/pci/pcie/err.c
>>>>> +++ b/drivers/pci/pcie/err.c
>>>>> @@ -149,7 +149,8 @@ static int report_resume(struct pci_dev *dev, 
>>>>> void *data)
>>>>>  /**
>>>>>   * pci_walk_bridge - walk bridges potentially AER affected
>>>>>   * @bridge   bridge which may be an RCEC with associated RCiEPs,
>>>>> - *           an RCiEP associated with an RCEC, or a Port.
>>>>> + *           or a Port.
>>>>> + * @dev      an RCiEP lacking an associated RCEC.
>>>>>   * @cb       callback to be called for each device found
>>>>>   * @userdata arbitrary pointer to be passed to callback.
>>>>>   *
>>>>> @@ -160,13 +161,20 @@ static int report_resume(struct pci_dev 
>>>>> *dev, void *data)
>>>>>   * If the device provided has no subordinate bus, call the 
>>>>> provided
>>>>>   * callback on the device itself.
>>>>>   */
>>>>> -static void pci_walk_bridge(struct pci_dev *bridge, int 
>>>>> (*cb)(struct pci_dev *, void *),
>>>>> +static void pci_walk_bridge(struct pci_dev *bridge, struct 
>>>>> pci_dev *dev,
>>>>> +			    int (*cb)(struct pci_dev *, void *),
>>>>>  			    void *userdata)
>>>>>  {
>>>>> -	if (bridge->subordinate)
>>>>> +	/*
>>>>> +	 * In a non-native case where there is no OS-visible reporting
>>>>> +	 * device the bridge will be NULL, i.e., no RCEC, no PORT.
>>>>> +	 */
>>>>> +	if (bridge && bridge->subordinate)
>>>>>  		pci_walk_bus(bridge->subordinate, cb, userdata);
>>>>> -	else
>>>>> +	else if (bridge)
>>>>>  		cb(bridge, userdata);
>>>>> +	else
>>>>> +		cb(dev, userdata);
>>>>>  }
>>>>>
>>>>>  static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
>>>>> @@ -196,16 +204,25 @@ pci_ers_result_t pcie_do_recovery(struct 
>>>>> pci_dev *dev,
>>>>>  	type = pci_pcie_type(dev);
>>>>>  	if (type == PCI_EXP_TYPE_ROOT_PORT ||
>>>>>  	    type == PCI_EXP_TYPE_DOWNSTREAM ||
>>>>> -	    type == PCI_EXP_TYPE_RC_EC ||
>>>>> -	    type == PCI_EXP_TYPE_RC_END)
>>>>> +	    type == PCI_EXP_TYPE_RC_EC)
>>>>>  		bridge = dev;
>>>>> +	else if (type == PCI_EXP_TYPE_RC_END)
>>>>> +		bridge = dev->rcec;
>>>>>  	else
>>>>>  		bridge = pci_upstream_bridge(dev);
>>>>>
>>>>>  	pci_dbg(dev, "broadcast error_detected message\n");
>>>>>  	if (state == pci_channel_io_frozen) {
>>>>> -		pci_walk_bridge(bridge, report_frozen_detected, &status);
>>>>> +		pci_walk_bridge(bridge, dev, report_frozen_detected, &status);
>>>>>  		if (type == PCI_EXP_TYPE_RC_END) {
>>>>> +			/*
>>>>> +			 * The callback only clears the Root Error Status
>>>>> +			 * of the RCEC (see aer.c). Only perform this for the
>>>>> +			 * native case, i.e., an RCEC is present.
>>>>> +			 */
>>>>> +			if (bridge)
>>>>> +				reset_subordinate_devices(bridge);
>>>>
>>>> Help me understand this.  There are lots of callbacks in this 
>>>> picture,
>>>> but I guess this "callback only clears Root Error Status" must 
>>>> refer
>>>> to aer_root_reset(), i.e., the reset_subordinate_devices pointer?
>>>
>>> The ‘bridge’ in this case will always be dev->rcec, the event 
>>> collector for the associated RC_END. And that’s what’s being 
>>> cleared in aer.c via aer_root_reset() as the callback. It’s also 
>>> being checked for native/non-native here.
>>>
>>>>
>>>> Of course, the caller of pcie_do_recovery() supplied that pointer.
>>>> And we can infer that it must be aer_root_reset(), not
>>>> dpc_reset_link(), because RCECs and RCiEPs are not allowed to
>>>> implement DPC.
>>>
>>> Correct.
>>>
>>>>
>>>> I wish we didn't have either this assumption about what
>>>> reset_subordinate_devices points to, or the assumption about what
>>>> aer_root_reset() does.  They both seem a little bit tenuous.
>>>
>>> Agree. It’s the relationship between the RC_END and the RC_EC.
>>>
>>>>
>>>> We already made aer_root_reset() smart enough to check for RCECs.  
>>>> Can
>>>> we put the FLR there, too?  Then we wouldn't have this weird 
>>>> situation
>>>> where reset_subordinate_devices() does a reset and clears error
>>>> status, EXCEPT for this case where it only clears error status and 
>>>> we
>>>> do the reset here?
>>>
>>> We could add the smarts to aer_root_reset() to check for an RC_END 
>>> in that callback and perform the clear there on its RC_EC. We just 
>>> wouldn’t map ‘bridge’ to dev->rcec for RC_END in 
>>> pcie_do_recovery() which would simplify things.
>>>
>>> Further, the FLR in the case of flr_on_rciep() below is specific to 
>>> the RCiEP itself. So it could be performed either in 
>>> aer_root_reset() or remain in the pcie_do_recovery().
>>>
>>> That should work.
>>>
>>> Sean
>>
>> Thinking more on this, you could still pass dev->rcec to the callback 
>> (eventually aer_root_reset()), but you won’t have the ability to 
>> handle the FLR there without the pointer to the RC_END. That’s why 
>> I suggested passing the RC_END and checking for its RC_EC in 
>> aer_root_reset() if you want to also handle FLR there too from below.
>
> Where you get into trouble is that you don’t want to do anything in 
> the non-native case and shouldn’t be going to the callback. So this 
> would add complexity to aer_root_reset() for checking on dev->rcec == 
> NULL…where before you never would have invoked 
> reset_subordinate_devices() in the non-native case.
>
>

Unmapping of bridge to dev->rcec in pcie_do_recovery() also causes 
problems for pci_walk_bridge() which pays attention to the non-native 
case, even making available the dev for when there is no ‘bridge’ 
like device.

I think we may be just shifting things elsewhere despite the constraints 
remaining the same. So while it should work, I believe that I prefer the 
current approach.

Sean



>>
>> Sean
>>
>>>
>>>
>>>>
>>>>>  			status = flr_on_rciep(dev);
>>>>>  			if (status != PCI_ERS_RESULT_RECOVERED) {
>>>>>  				pci_warn(dev, "function level reset failed\n");
>>>>> @@ -219,13 +236,13 @@ pci_ers_result_t pcie_do_recovery(struct 
>>>>> pci_dev *dev,
>>>>>  			}
>>>>>  		}
>>>>>  	} else {
>>>>> -		pci_walk_bridge(bridge, report_normal_detected, &status);
>>>>> +		pci_walk_bridge(bridge, dev, report_normal_detected, &status);
>>>>>  	}
>>>>>
>>>>>  	if (status == PCI_ERS_RESULT_CAN_RECOVER) {
>>>>>  		status = PCI_ERS_RESULT_RECOVERED;
>>>>>  		pci_dbg(dev, "broadcast mmio_enabled message\n");
>>>>> -		pci_walk_bridge(bridge, report_mmio_enabled, &status);
>>>>> +		pci_walk_bridge(bridge, dev, report_mmio_enabled, &status);
>>>>>  	}
>>>>>
>>>>>  	if (status == PCI_ERS_RESULT_NEED_RESET) {
>>>>> @@ -236,14 +253,14 @@ pci_ers_result_t pcie_do_recovery(struct 
>>>>> pci_dev *dev,
>>>>>  		 */
>>>>>  		status = PCI_ERS_RESULT_RECOVERED;
>>>>>  		pci_dbg(dev, "broadcast slot_reset message\n");
>>>>> -		pci_walk_bridge(bridge, report_slot_reset, &status);
>>>>> +		pci_walk_bridge(bridge, dev, report_slot_reset, &status);
>>>>>  	}
>>>>>
>>>>>  	if (status != PCI_ERS_RESULT_RECOVERED)
>>>>>  		goto failed;
>>>>>
>>>>>  	pci_dbg(dev, "broadcast resume message\n");
>>>>> -	pci_walk_bridge(bridge, report_resume, &status);
>>>>> +	pci_walk_bridge(bridge, dev, report_resume, &status);
>>>>>
>>>>>  	if (type == PCI_EXP_TYPE_ROOT_PORT ||
>>>>>  	    type == PCI_EXP_TYPE_DOWNSTREAM ||
>>>>> -- 
>>>>> 2.28.0
>>>>>
Bjorn Helgaas Oct. 9, 2020, 9:30 p.m. UTC | #6
On Fri, Oct 09, 2020 at 12:57:45PM -0500, Bjorn Helgaas wrote:
> On Fri, Oct 02, 2020 at 11:47:32AM -0700, Sean V Kelley wrote:
> > From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> > 
> > When attempting error recovery for an RCiEP associated with an RCEC device,
> > there needs to be a way to update the Root Error Status, the Uncorrectable
> > Error Status and the Uncorrectable Error Severity of the parent RCEC.
> > In some non-native cases in which there is no OS visible device
> > associated with the RCiEP, there is nothing to act upon as the firmware
> > is acting before the OS. So add handling for the linked 'rcec' in AER/ERR
> > while taking into account non-native cases.
> > 
> > Co-developed-by: Sean V Kelley <sean.v.kelley@intel.com>
> > Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
> > Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > ---
> >  drivers/pci/pcie/aer.c |  9 +++++----
> >  drivers/pci/pcie/err.c | 39 ++++++++++++++++++++++++++++-----------
> >  2 files changed, 33 insertions(+), 15 deletions(-)
> > 
> > diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> > index 65dff5f3457a..dccdba60b5d9 100644
> > --- a/drivers/pci/pcie/aer.c
> > +++ b/drivers/pci/pcie/aer.c
> > @@ -1358,17 +1358,18 @@ static int aer_probe(struct pcie_device *dev)
> >  static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
> >  {
> >  	int aer = dev->aer_cap;
> > +	int rc = 0;
> >  	u32 reg32;
> > -	int rc;
> > -
> >  
> >  	/* Disable Root's interrupt in response to error messages */
> >  	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
> >  	reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
> >  	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
> >  
> > -	rc = pci_bus_error_reset(dev);
> > -	pci_info(dev, "Root Port link has been reset\n");
> > +	if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) {
> > +		rc = pci_bus_error_reset(dev);
> > +		pci_info(dev, "Root Port link has been reset\n");
> > +	}
> >  
> >  	/* Clear Root Error Status */
> >  	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
> > diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
> > index 38abd7984996..956ad4c86d53 100644
> > --- a/drivers/pci/pcie/err.c
> > +++ b/drivers/pci/pcie/err.c
> > @@ -149,7 +149,8 @@ static int report_resume(struct pci_dev *dev, void *data)
> >  /**
> >   * pci_walk_bridge - walk bridges potentially AER affected
> >   * @bridge   bridge which may be an RCEC with associated RCiEPs,
> > - *           an RCiEP associated with an RCEC, or a Port.
> > + *           or a Port.
> > + * @dev      an RCiEP lacking an associated RCEC.
> >   * @cb       callback to be called for each device found
> >   * @userdata arbitrary pointer to be passed to callback.
> >   *
> > @@ -160,13 +161,20 @@ static int report_resume(struct pci_dev *dev, void *data)
> >   * If the device provided has no subordinate bus, call the provided
> >   * callback on the device itself.
> >   */
> > -static void pci_walk_bridge(struct pci_dev *bridge, int (*cb)(struct pci_dev *, void *),
> > +static void pci_walk_bridge(struct pci_dev *bridge, struct pci_dev *dev,
> > +			    int (*cb)(struct pci_dev *, void *),
> >  			    void *userdata)
> >  {
> > -	if (bridge->subordinate)
> > +	/*
> > +	 * In a non-native case where there is no OS-visible reporting
> > +	 * device the bridge will be NULL, i.e., no RCEC, no PORT.
> > +	 */
> > +	if (bridge && bridge->subordinate)
> >  		pci_walk_bus(bridge->subordinate, cb, userdata);
> > -	else
> > +	else if (bridge)
> >  		cb(bridge, userdata);
> > +	else
> > +		cb(dev, userdata);
> >  }
> >  
> >  static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
> > @@ -196,16 +204,25 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
> >  	type = pci_pcie_type(dev);
> >  	if (type == PCI_EXP_TYPE_ROOT_PORT ||
> >  	    type == PCI_EXP_TYPE_DOWNSTREAM ||
> > -	    type == PCI_EXP_TYPE_RC_EC ||
> > -	    type == PCI_EXP_TYPE_RC_END)
> > +	    type == PCI_EXP_TYPE_RC_EC)
> >  		bridge = dev;
> > +	else if (type == PCI_EXP_TYPE_RC_END)
> > +		bridge = dev->rcec;
> >  	else
> >  		bridge = pci_upstream_bridge(dev);
> >  
> >  	pci_dbg(dev, "broadcast error_detected message\n");
> >  	if (state == pci_channel_io_frozen) {
> > -		pci_walk_bridge(bridge, report_frozen_detected, &status);
> > +		pci_walk_bridge(bridge, dev, report_frozen_detected, &status);
> >  		if (type == PCI_EXP_TYPE_RC_END) {
> > +			/*
> > +			 * The callback only clears the Root Error Status
> > +			 * of the RCEC (see aer.c). Only perform this for the
> > +			 * native case, i.e., an RCEC is present.
> > +			 */
> > +			if (bridge)
> > +				reset_subordinate_devices(bridge);
> 
> Help me understand this.  There are lots of callbacks in this picture,
> but I guess this "callback only clears Root Error Status" must refer
> to aer_root_reset(), i.e., the reset_subordinate_devices pointer?
> 
> Of course, the caller of pcie_do_recovery() supplied that pointer.
> And we can infer that it must be aer_root_reset(), not
> dpc_reset_link(), because RCECs and RCiEPs are not allowed to
> implement DPC.
> 
> I wish we didn't have either this assumption about what
> reset_subordinate_devices points to, or the assumption about what
> aer_root_reset() does.  They both seem a little bit tenuous.
> 
> We already made aer_root_reset() smart enough to check for RCECs.  Can
> we put the FLR there, too?  Then we wouldn't have this weird situation
> where reset_subordinate_devices() does a reset and clears error
> status, EXCEPT for this case where it only clears error status and we
> do the reset here?

Just as an example to make this concrete.  Doesn't even compile.


diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index d6927e6535e5..e389db7cbba6 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1372,28 +1372,45 @@ static int aer_probe(struct pcie_device *dev)
  */
 static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
 {
-	int aer = dev->aer_cap;
+	int type = pci_pcie_type(dev);
+	struct pci_dev *root;
+	int aer = 0;
 	int rc = 0;
 	u32 reg32;
 
-	/* Disable Root's interrupt in response to error messages */
-	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
-	reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
-	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
+	if (type == PCI_EXP_TYPE_RC_END)
+		root = dev->rcec;
+	else
+		root = dev;
+
+	if (root)
+		aer = root->aer_cap;
 
-	if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) {
+	if (aer) {
+		/* Disable Root's interrupt in response to error messages */
+		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
+		reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
+		pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
+	}
+
+	if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) {
+		rc = flr_on_rciep(dev);
+		pci_info(dev, "has been reset (%d)\n", rc);
+	} else {
 		rc = pci_bus_error_reset(dev);
-		pci_info(dev, "Root Port link has been reset\n");
+		pci_info(dev, "Root Port link has been reset (%d)\n", rc);
 	}
 
-	/* Clear Root Error Status */
-	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
-	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, reg32);
+	if (aer) {
+		/* Clear Root Error Status */
+		pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, &reg32);
+		pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32);
 
-	/* Enable Root Port's interrupt in response to error messages */
-	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
-	reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
-	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
+		/* Enable Root Port's interrupt in response to error messages */
+		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
+		reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
+		pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
+	}
 
 	return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
 }
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index 79ae1356141d..08976034a89c 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -203,36 +203,19 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
 	 */
 	if (type == PCI_EXP_TYPE_ROOT_PORT ||
 	    type == PCI_EXP_TYPE_DOWNSTREAM ||
-	    type == PCI_EXP_TYPE_RC_EC)
+	    type == PCI_EXP_TYPE_RC_EC ||
+	    type == PCI_EXP_TYPE_RC_END)
 		bridge = dev;
-	else if (type == PCI_EXP_TYPE_RC_END)
-		bridge = dev->rcec;
 	else
 		bridge = pci_upstream_bridge(dev);
 
 	pci_dbg(bridge, "broadcast error_detected message\n");
 	if (state == pci_channel_io_frozen) {
 		pci_walk_bridge(bridge, dev, report_frozen_detected, &status);
-		if (type == PCI_EXP_TYPE_RC_END) {
-			/*
-			 * The callback only clears the Root Error Status
-			 * of the RCEC (see aer.c). Only perform this for the
-			 * native case, i.e., an RCEC is present.
-			 */
-			if (bridge)
-				reset_subordinates(bridge);
-
-			status = flr_on_rciep(dev);
-			if (status != PCI_ERS_RESULT_RECOVERED) {
-				pci_warn(dev, "Function Level Reset failed\n");
-				goto failed;
-			}
-		} else {
-			status = reset_subordinates(bridge);
-			if (status != PCI_ERS_RESULT_RECOVERED) {
-				pci_warn(dev, "subordinate device reset failed\n");
-				goto failed;
-			}
+		status = reset_subordinates(bridge);
+		if (status != PCI_ERS_RESULT_RECOVERED) {
+			pci_warn(bridge, "subordinate device reset failed\n");
+			goto failed;
 		}
 	} else {
 		pci_walk_bridge(bridge, dev, report_normal_detected, &status);
Kelley, Sean V Oct. 9, 2020, 10:07 p.m. UTC | #7
On 9 Oct 2020, at 14:30, Bjorn Helgaas wrote:

> On Fri, Oct 09, 2020 at 12:57:45PM -0500, Bjorn Helgaas wrote:
>> On Fri, Oct 02, 2020 at 11:47:32AM -0700, Sean V Kelley wrote:
>>> From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
>>>
>>> When attempting error recovery for an RCiEP associated with an RCEC 
>>> device,
>>> there needs to be a way to update the Root Error Status, the 
>>> Uncorrectable
>>> Error Status and the Uncorrectable Error Severity of the parent 
>>> RCEC.
>>> In some non-native cases in which there is no OS visible device
>>> associated with the RCiEP, there is nothing to act upon as the 
>>> firmware
>>> is acting before the OS. So add handling for the linked 'rcec' in 
>>> AER/ERR
>>> while taking into account non-native cases.
>>>
>>> Co-developed-by: Sean V Kelley <sean.v.kelley@intel.com>
>>> Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
>>> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
>>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>>> ---
>>>  drivers/pci/pcie/aer.c |  9 +++++----
>>>  drivers/pci/pcie/err.c | 39 ++++++++++++++++++++++++++++-----------
>>>  2 files changed, 33 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
>>> index 65dff5f3457a..dccdba60b5d9 100644
>>> --- a/drivers/pci/pcie/aer.c
>>> +++ b/drivers/pci/pcie/aer.c
>>> @@ -1358,17 +1358,18 @@ static int aer_probe(struct pcie_device 
>>> *dev)
>>>  static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
>>>  {
>>>  	int aer = dev->aer_cap;
>>> +	int rc = 0;
>>>  	u32 reg32;
>>> -	int rc;
>>> -
>>>
>>>  	/* Disable Root's interrupt in response to error messages */
>>>  	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
>>>  	reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
>>>  	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
>>>
>>> -	rc = pci_bus_error_reset(dev);
>>> -	pci_info(dev, "Root Port link has been reset\n");
>>> +	if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) {
>>> +		rc = pci_bus_error_reset(dev);
>>> +		pci_info(dev, "Root Port link has been reset\n");
>>> +	}
>>>
>>>  	/* Clear Root Error Status */
>>>  	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
>>> diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
>>> index 38abd7984996..956ad4c86d53 100644
>>> --- a/drivers/pci/pcie/err.c
>>> +++ b/drivers/pci/pcie/err.c
>>> @@ -149,7 +149,8 @@ static int report_resume(struct pci_dev *dev, 
>>> void *data)
>>>  /**
>>>   * pci_walk_bridge - walk bridges potentially AER affected
>>>   * @bridge   bridge which may be an RCEC with associated RCiEPs,
>>> - *           an RCiEP associated with an RCEC, or a Port.
>>> + *           or a Port.
>>> + * @dev      an RCiEP lacking an associated RCEC.
>>>   * @cb       callback to be called for each device found
>>>   * @userdata arbitrary pointer to be passed to callback.
>>>   *
>>> @@ -160,13 +161,20 @@ static int report_resume(struct pci_dev *dev, 
>>> void *data)
>>>   * If the device provided has no subordinate bus, call the provided
>>>   * callback on the device itself.
>>>   */
>>> -static void pci_walk_bridge(struct pci_dev *bridge, int 
>>> (*cb)(struct pci_dev *, void *),
>>> +static void pci_walk_bridge(struct pci_dev *bridge, struct pci_dev 
>>> *dev,
>>> +			    int (*cb)(struct pci_dev *, void *),
>>>  			    void *userdata)
>>>  {
>>> -	if (bridge->subordinate)
>>> +	/*
>>> +	 * In a non-native case where there is no OS-visible reporting
>>> +	 * device the bridge will be NULL, i.e., no RCEC, no PORT.
>>> +	 */
>>> +	if (bridge && bridge->subordinate)
>>>  		pci_walk_bus(bridge->subordinate, cb, userdata);
>>> -	else
>>> +	else if (bridge)
>>>  		cb(bridge, userdata);
>>> +	else
>>> +		cb(dev, userdata);
>>>  }
>>>
>>>  static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
>>> @@ -196,16 +204,25 @@ pci_ers_result_t pcie_do_recovery(struct 
>>> pci_dev *dev,
>>>  	type = pci_pcie_type(dev);
>>>  	if (type == PCI_EXP_TYPE_ROOT_PORT ||
>>>  	    type == PCI_EXP_TYPE_DOWNSTREAM ||
>>> -	    type == PCI_EXP_TYPE_RC_EC ||
>>> -	    type == PCI_EXP_TYPE_RC_END)
>>> +	    type == PCI_EXP_TYPE_RC_EC)
>>>  		bridge = dev;
>>> +	else if (type == PCI_EXP_TYPE_RC_END)
>>> +		bridge = dev->rcec;
>>>  	else
>>>  		bridge = pci_upstream_bridge(dev);
>>>
>>>  	pci_dbg(dev, "broadcast error_detected message\n");
>>>  	if (state == pci_channel_io_frozen) {
>>> -		pci_walk_bridge(bridge, report_frozen_detected, &status);
>>> +		pci_walk_bridge(bridge, dev, report_frozen_detected, &status);
>>>  		if (type == PCI_EXP_TYPE_RC_END) {
>>> +			/*
>>> +			 * The callback only clears the Root Error Status
>>> +			 * of the RCEC (see aer.c). Only perform this for the
>>> +			 * native case, i.e., an RCEC is present.
>>> +			 */
>>> +			if (bridge)
>>> +				reset_subordinate_devices(bridge);
>>
>> Help me understand this.  There are lots of callbacks in this 
>> picture,
>> but I guess this "callback only clears Root Error Status" must refer
>> to aer_root_reset(), i.e., the reset_subordinate_devices pointer?
>>
>> Of course, the caller of pcie_do_recovery() supplied that pointer.
>> And we can infer that it must be aer_root_reset(), not
>> dpc_reset_link(), because RCECs and RCiEPs are not allowed to
>> implement DPC.
>>
>> I wish we didn't have either this assumption about what
>> reset_subordinate_devices points to, or the assumption about what
>> aer_root_reset() does.  They both seem a little bit tenuous.
>>
>> We already made aer_root_reset() smart enough to check for RCECs.  
>> Can
>> we put the FLR there, too?  Then we wouldn't have this weird 
>> situation
>> where reset_subordinate_devices() does a reset and clears error
>> status, EXCEPT for this case where it only clears error status and we
>> do the reset here?
>
> Just as an example to make this concrete.  Doesn't even compile.

Okay, I tried something similar.  Comments below:

>
>
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index d6927e6535e5..e389db7cbba6 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -1372,28 +1372,45 @@ static int aer_probe(struct pcie_device *dev)
>   */
>  static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
>  {
> -	int aer = dev->aer_cap;
> +	int type = pci_pcie_type(dev);
> +	struct pci_dev *root;
> +	int aer = 0;
>  	int rc = 0;
>  	u32 reg32;
>
> -	/* Disable Root's interrupt in response to error messages */
> -	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
> -	reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
> -	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
> +	if (type == PCI_EXP_TYPE_RC_END)
> +		root = dev->rcec;
> +	else
> +		root = dev;
> +
> +	if (root)
> +		aer = root->aer_cap;

Except here dev->rcec may be NULL for the non-native case. So I had a 
goto label to bypass what follows.  I left the FLR in err.c.


>
> -	if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) {
> +	if (aer) {
> +		/* Disable Root's interrupt in response to error messages */
> +		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
> +		reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
> +		pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
> +	}
> +
> +	if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) {
> +		rc = flr_on_rciep(dev);
> +		pci_info(dev, "has been reset (%d)\n", rc);
> +	} else {
>  		rc = pci_bus_error_reset(dev);
> -		pci_info(dev, "Root Port link has been reset\n");
> +		pci_info(dev, "Root Port link has been reset (%d)\n", rc);
>  	}
>
> -	/* Clear Root Error Status */
> -	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
> -	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, reg32);
> +	if (aer) {
> +		/* Clear Root Error Status */
> +		pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, &reg32);
> +		pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32);
>
> -	/* Enable Root Port's interrupt in response to error messages */
> -	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
> -	reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
> -	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
> +		/* Enable Root Port's interrupt in response to error messages */
> +		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
> +		reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
> +		pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
> +	}
>
>  	return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
>  }
> diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
> index 79ae1356141d..08976034a89c 100644
> --- a/drivers/pci/pcie/err.c
> +++ b/drivers/pci/pcie/err.c
> @@ -203,36 +203,19 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev 
> *dev,
>  	 */
>  	if (type == PCI_EXP_TYPE_ROOT_PORT ||
>  	    type == PCI_EXP_TYPE_DOWNSTREAM ||
> -	    type == PCI_EXP_TYPE_RC_EC)
> +	    type == PCI_EXP_TYPE_RC_EC ||
> +	    type == PCI_EXP_TYPE_RC_END)
>  		bridge = dev;
> -	else if (type == PCI_EXP_TYPE_RC_END)
> -		bridge = dev->rcec;
>  	else
>  		bridge = pci_upstream_bridge(dev);
>
>  	pci_dbg(bridge, "broadcast error_detected message\n");
>  	if (state == pci_channel_io_frozen) {
>  		pci_walk_bridge(bridge, dev, report_frozen_detected, &status);

We’d need to walk back the dev addition to the pci_walk_bridge() and 
revert to prior to the checks on dev->rcec.

> -		if (type == PCI_EXP_TYPE_RC_END) {
> -			/*
> -			 * The callback only clears the Root Error Status
> -			 * of the RCEC (see aer.c). Only perform this for the
> -			 * native case, i.e., an RCEC is present.
> -			 */
> -			if (bridge)
> -				reset_subordinates(bridge);
> -
> -			status = flr_on_rciep(dev);
> -			if (status != PCI_ERS_RESULT_RECOVERED) {
> -				pci_warn(dev, "Function Level Reset failed\n");
> -				goto failed;
> -			}
> -		} else {
> -			status = reset_subordinates(bridge);
> -			if (status != PCI_ERS_RESULT_RECOVERED) {
> -				pci_warn(dev, "subordinate device reset failed\n");
> -				goto failed;
> -			}
> +		status = reset_subordinates(bridge);
> +		if (status != PCI_ERS_RESULT_RECOVERED) {
> +			pci_warn(bridge, "subordinate device reset failed\n");
> +			goto failed;
>  		}
>  	} else {
>  		pci_walk_bridge(bridge, dev, report_normal_detected, &status);

So this is what I experimented with for the most part but with the 
changes that I highlighted - addressing non-native case in the callback 
and also in the bridge walk.

I can test it out.

Thanks,

Sean
Kelley, Sean V Oct. 9, 2020, 11:51 p.m. UTC | #8
On Fri, 2020-10-09 at 15:07 -0700, Sean V Kelley wrote:
> On 9 Oct 2020, at 14:30, Bjorn Helgaas wrote:
> 
> > On Fri, Oct 09, 2020 at 12:57:45PM -0500, Bjorn Helgaas wrote:
> > > On Fri, Oct 02, 2020 at 11:47:32AM -0700, Sean V Kelley wrote:
> > > > From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> > > > 
> > > > When attempting error recovery for an RCiEP associated with an
> > > > RCEC 
> > > > device,
> > > > there needs to be a way to update the Root Error Status, the 
> > > > Uncorrectable
> > > > Error Status and the Uncorrectable Error Severity of the
> > > > parent 
> > > > RCEC.
> > > > In some non-native cases in which there is no OS visible device
> > > > associated with the RCiEP, there is nothing to act upon as the 
> > > > firmware
> > > > is acting before the OS. So add handling for the linked 'rcec'
> > > > in 
> > > > AER/ERR
> > > > while taking into account non-native cases.
> > > > 
> > > > Co-developed-by: Sean V Kelley <sean.v.kelley@intel.com>
> > > > Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
> > > > Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> > > > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > > > ---
> > > >  drivers/pci/pcie/aer.c |  9 +++++----
> > > >  drivers/pci/pcie/err.c | 39 ++++++++++++++++++++++++++++----
> > > > -------
> > > >  2 files changed, 33 insertions(+), 15 deletions(-)
> > > > 
> > > > diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> > > > index 65dff5f3457a..dccdba60b5d9 100644
> > > > --- a/drivers/pci/pcie/aer.c
> > > > +++ b/drivers/pci/pcie/aer.c
> > > > @@ -1358,17 +1358,18 @@ static int aer_probe(struct
> > > > pcie_device 
> > > > *dev)
> > > >  static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
> > > >  {
> > > >  	int aer = dev->aer_cap;
> > > > +	int rc = 0;
> > > >  	u32 reg32;
> > > > -	int rc;
> > > > -
> > > > 
> > > >  	/* Disable Root's interrupt in response to error
> > > > messages */
> > > >  	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND,
> > > > &reg32);
> > > >  	reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
> > > >  	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND,
> > > > reg32);
> > > > 
> > > > -	rc = pci_bus_error_reset(dev);
> > > > -	pci_info(dev, "Root Port link has been reset\n");
> > > > +	if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) {
> > > > +		rc = pci_bus_error_reset(dev);
> > > > +		pci_info(dev, "Root Port link has been
> > > > reset\n");
> > > > +	}
> > > > 
> > > >  	/* Clear Root Error Status */
> > > >  	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS,
> > > > &reg32);
> > > > diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
> > > > index 38abd7984996..956ad4c86d53 100644
> > > > --- a/drivers/pci/pcie/err.c
> > > > +++ b/drivers/pci/pcie/err.c
> > > > @@ -149,7 +149,8 @@ static int report_resume(struct pci_dev
> > > > *dev, 
> > > > void *data)
> > > >  /**
> > > >   * pci_walk_bridge - walk bridges potentially AER affected
> > > >   * @bridge   bridge which may be an RCEC with associated
> > > > RCiEPs,
> > > > - *           an RCiEP associated with an RCEC, or a Port.
> > > > + *           or a Port.
> > > > + * @dev      an RCiEP lacking an associated RCEC.
> > > >   * @cb       callback to be called for each device found
> > > >   * @userdata arbitrary pointer to be passed to callback.
> > > >   *
> > > > @@ -160,13 +161,20 @@ static int report_resume(struct pci_dev
> > > > *dev, 
> > > > void *data)
> > > >   * If the device provided has no subordinate bus, call the
> > > > provided
> > > >   * callback on the device itself.
> > > >   */
> > > > -static void pci_walk_bridge(struct pci_dev *bridge, int 
> > > > (*cb)(struct pci_dev *, void *),
> > > > +static void pci_walk_bridge(struct pci_dev *bridge, struct
> > > > pci_dev 
> > > > *dev,
> > > > +			    int (*cb)(struct pci_dev *, void
> > > > *),
> > > >  			    void *userdata)
> > > >  {
> > > > -	if (bridge->subordinate)
> > > > +	/*
> > > > +	 * In a non-native case where there is no OS-visible
> > > > reporting
> > > > +	 * device the bridge will be NULL, i.e., no RCEC, no
> > > > PORT.
> > > > +	 */
> > > > +	if (bridge && bridge->subordinate)
> > > >  		pci_walk_bus(bridge->subordinate, cb,
> > > > userdata);
> > > > -	else
> > > > +	else if (bridge)
> > > >  		cb(bridge, userdata);
> > > > +	else
> > > > +		cb(dev, userdata);
> > > >  }
> > > > 
> > > >  static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
> > > > @@ -196,16 +204,25 @@ pci_ers_result_t pcie_do_recovery(struct 
> > > > pci_dev *dev,
> > > >  	type = pci_pcie_type(dev);
> > > >  	if (type == PCI_EXP_TYPE_ROOT_PORT ||
> > > >  	    type == PCI_EXP_TYPE_DOWNSTREAM ||
> > > > -	    type == PCI_EXP_TYPE_RC_EC ||
> > > > -	    type == PCI_EXP_TYPE_RC_END)
> > > > +	    type == PCI_EXP_TYPE_RC_EC)
> > > >  		bridge = dev;
> > > > +	else if (type == PCI_EXP_TYPE_RC_END)
> > > > +		bridge = dev->rcec;
> > > >  	else
> > > >  		bridge = pci_upstream_bridge(dev);
> > > > 
> > > >  	pci_dbg(dev, "broadcast error_detected message\n");
> > > >  	if (state == pci_channel_io_frozen) {
> > > > -		pci_walk_bridge(bridge, report_frozen_detected,
> > > > &status);
> > > > +		pci_walk_bridge(bridge, dev,
> > > > report_frozen_detected, &status);
> > > >  		if (type == PCI_EXP_TYPE_RC_END) {
> > > > +			/*
> > > > +			 * The callback only clears the Root
> > > > Error Status
> > > > +			 * of the RCEC (see aer.c). Only
> > > > perform this for the
> > > > +			 * native case, i.e., an RCEC is
> > > > present.
> > > > +			 */
> > > > +			if (bridge)
> > > > +				reset_subordinate_devices(bridg
> > > > e);
> > > 
> > > Help me understand this.  There are lots of callbacks in this 
> > > picture,
> > > but I guess this "callback only clears Root Error Status" must
> > > refer
> > > to aer_root_reset(), i.e., the reset_subordinate_devices pointer?
> > > 
> > > Of course, the caller of pcie_do_recovery() supplied that
> > > pointer.
> > > And we can infer that it must be aer_root_reset(), not
> > > dpc_reset_link(), because RCECs and RCiEPs are not allowed to
> > > implement DPC.
> > > 
> > > I wish we didn't have either this assumption about what
> > > reset_subordinate_devices points to, or the assumption about what
> > > aer_root_reset() does.  They both seem a little bit tenuous.
> > > 
> > > We already made aer_root_reset() smart enough to check for
> > > RCECs.  
> > > Can
> > > we put the FLR there, too?  Then we wouldn't have this weird 
> > > situation
> > > where reset_subordinate_devices() does a reset and clears error
> > > status, EXCEPT for this case where it only clears error status
> > > and we
> > > do the reset here?
> > 
> > Just as an example to make this concrete.  Doesn't even compile.
> 
> Okay, I tried something similar.  Comments below:
> 
> > 
> > diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> > index d6927e6535e5..e389db7cbba6 100644
> > --- a/drivers/pci/pcie/aer.c
> > +++ b/drivers/pci/pcie/aer.c
> > @@ -1372,28 +1372,45 @@ static int aer_probe(struct pcie_device
> > *dev)
> >   */
> >  static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
> >  {
> > -	int aer = dev->aer_cap;
> > +	int type = pci_pcie_type(dev);
> > +	struct pci_dev *root;
> > +	int aer = 0;
> >  	int rc = 0;
> >  	u32 reg32;
> > 
> > -	/* Disable Root's interrupt in response to error messages */
> > -	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
> > -	reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
> > -	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
> > +	if (type == PCI_EXP_TYPE_RC_END)
> > +		root = dev->rcec;
> > +	else
> > +		root = dev;
> > +
> > +	if (root)
> > +		aer = root->aer_cap;
> 
> Except here dev->rcec may be NULL for the non-native case. So I had
> a 
> goto label to bypass what follows.  I left the FLR in err.c.
> 
> 
> > -	if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) {
> > +	if (aer) {
> > +		/* Disable Root's interrupt in response to error
> > messages */
> > +		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND,
> > &reg32);
> > +		reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
> > +		pci_write_config_dword(root, aer +
> > PCI_ERR_ROOT_COMMAND, reg32);
> > +	}
> > +
> > +	if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END)
> > {
> > +		rc = flr_on_rciep(dev);
> > +		pci_info(dev, "has been reset (%d)\n", rc);
> > +	} else {
> >  		rc = pci_bus_error_reset(dev);
> > -		pci_info(dev, "Root Port link has been reset\n");
> > +		pci_info(dev, "Root Port link has been reset (%d)\n",
> > rc);
> >  	}
> > 
> > -	/* Clear Root Error Status */
> > -	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
> > -	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, reg32);
> > +	if (aer) {
> > +		/* Clear Root Error Status */
> > +		pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS,
> > &reg32);
> > +		pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS,
> > reg32);
> > 
> > -	/* Enable Root Port's interrupt in response to error messages
> > */
> > -	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
> > -	reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
> > -	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
> > +		/* Enable Root Port's interrupt in response to error
> > messages */
> > +		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND,
> > &reg32);
> > +		reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
> > +		pci_write_config_dword(root, aer +
> > PCI_ERR_ROOT_COMMAND, reg32);
> > +	}
> > 
> >  	return rc ? PCI_ERS_RESULT_DISCONNECT :
> > PCI_ERS_RESULT_RECOVERED;
> >  }
> > diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
> > index 79ae1356141d..08976034a89c 100644
> > --- a/drivers/pci/pcie/err.c
> > +++ b/drivers/pci/pcie/err.c
> > @@ -203,36 +203,19 @@ pci_ers_result_t pcie_do_recovery(struct
> > pci_dev 
> > *dev,
> >  	 */
> >  	if (type == PCI_EXP_TYPE_ROOT_PORT ||
> >  	    type == PCI_EXP_TYPE_DOWNSTREAM ||
> > -	    type == PCI_EXP_TYPE_RC_EC)
> > +	    type == PCI_EXP_TYPE_RC_EC ||
> > +	    type == PCI_EXP_TYPE_RC_END)
> >  		bridge = dev;
> > -	else if (type == PCI_EXP_TYPE_RC_END)
> > -		bridge = dev->rcec;
> >  	else
> >  		bridge = pci_upstream_bridge(dev);
> > 
> >  	pci_dbg(bridge, "broadcast error_detected message\n");
> >  	if (state == pci_channel_io_frozen) {
> >  		pci_walk_bridge(bridge, dev, report_frozen_detected,
> > &status);
> 
> We’d need to walk back the dev addition to the pci_walk_bridge() and 
> revert to prior to the checks on dev->rcec.
> 
> > -		if (type == PCI_EXP_TYPE_RC_END) {
> > -			/*
> > -			 * The callback only clears the Root Error
> > Status
> > -			 * of the RCEC (see aer.c). Only perform this
> > for the
> > -			 * native case, i.e., an RCEC is present.
> > -			 */
> > -			if (bridge)
> > -				reset_subordinates(bridge);
> > -
> > -			status = flr_on_rciep(dev);
> > -			if (status != PCI_ERS_RESULT_RECOVERED) {
> > -				pci_warn(dev, "Function Level Reset
> > failed\n");
> > -				goto failed;
> > -			}
> > -		} else {
> > -			status = reset_subordinates(bridge);
> > -			if (status != PCI_ERS_RESULT_RECOVERED) {
> > -				pci_warn(dev, "subordinate device reset
> > failed\n");
> > -				goto failed;
> > -			}
> > +		status = reset_subordinates(bridge);
> > +		if (status != PCI_ERS_RESULT_RECOVERED) {
> > +			pci_warn(bridge, "subordinate device reset
> > failed\n");
> > +			goto failed;
> >  		}
> >  	} else {
> >  		pci_walk_bridge(bridge, dev, report_normal_detected,
> > &status);
> 
> So this is what I experimented with for the most part but with the 
> changes that I highlighted - addressing non-native case in the
> callback 
> and also in the bridge walk.
> 
> I can test it out.


So I tested the following out, including your moving flr to aer.c:

- Renamed flr_on_rciep() to flr_on_rc() for RC devices (RC_END and
RC_EC)

- Moved check on dev->rcec into aer_root_reset() including the FLR.

- Reworked pci_walk_bridge() to drop extra dev argument and check
locally for the bridge->rcec. Maybe should also check on type when
checking on bridge->rcec.

Note I didn't use the check on aer_cap existence because I think you
had added that for simply being able to skip over for the non-native
case and I handle that with the single goto at the beginning which
takes you to the FLR.

So this is rough, compiled, tested with AER injections but that's it...

Please take a look.

Thanks,

Sean




 drivers/pci/pcie/aer.c | 57 ++++++++++++++++++++++++++++++--------
 drivers/pci/pcie/err.c | 62 ++++++++++++------------------------------
 2 files changed, 63 insertions(+), 56 deletions(-)

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 3cde646f71c0..35b32309a97b 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1362,6 +1362,23 @@ static int aer_probe(struct pcie_device *dev)
        return 0;
 }
 
+/**
+ * flr_on_rc - function levl reset on root complex device
+ * @dev: pointer to root complex device's pci_dev data structure
+ *
+ * Invoked by aer_root_reset() when performing link reset.
+ */
+static pci_ers_result_t flr_on_rc(struct pci_dev *dev)
+{
+       if (!pcie_has_flr(dev))
+               return PCI_ERS_RESULT_DISCONNECT;
+
+       if (pcie_flr(dev))
+               return PCI_ERS_RESULT_DISCONNECT;
+
+       return PCI_ERS_RESULT_RECOVERED;
+}
+
 /**
  * aer_root_reset - reset link on Root Port
  * @dev: pointer to Root Port's pci_dev data structure
@@ -1370,28 +1387,46 @@ static int aer_probe(struct pcie_device *dev)
  */
 static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
 {
+       int type = pci_pcie_type(dev);
+       struct pci_dev *root = dev;
        int aer = dev->aer_cap;
        int rc = 0;
        u32 reg32;
 
+       if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) {
+               /*
+                * The reset should only clear the Root Error Status
+                * of the RCEC. Only perform this for the
+                * native case, i.e., an RCEC is present.
+                */
+               if (!(dev->rcec))
+                       goto non_native;
+               else
+                       root = dev->rcec;
+       }
+
        /* Disable Root's interrupt in response to error messages */
-       pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
+       pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND,
&reg32);
        reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
-       pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
-
-       if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) {
-               rc = pci_bus_error_reset(dev);
-               pci_info(dev, "Root Port link has been reset\n");
-       }
+       pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND,
reg32);
 
        /* Clear Root Error Status */
-       pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
-       pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, reg32);
+       pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, &reg32);
+       pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32);
 
        /* Enable Root Port's interrupt in response to error messages
*/
-       pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
+       pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND,
&reg32);
        reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
-       pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
+       pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND,
reg32);
+
+non_native:
+       if ((type == PCI_EXP_TYPE_RC_EC) || (type ==
PCI_EXP_TYPE_RC_END)) {
+               rc = flr_on_rc(root);
+               pci_info(dev, "has been reset (%d)\n", rc);
+       } else {
+               rc = pci_bus_error_reset(root);
+               pci_info(dev, "Root Port link has been reset (%d)\n",
rc);
+       }
 
        return rc ? PCI_ERS_RESULT_DISCONNECT :
PCI_ERS_RESULT_RECOVERED;
 }
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index 956ad4c86d53..81bbf8a4006d 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -150,7 +150,6 @@ static int report_resume(struct pci_dev *dev, void
*data)
  * pci_walk_bridge - walk bridges potentially AER affected
  * @bridge   bridge which may be an RCEC with associated RCiEPs,
  *           or a Port.
- * @dev      an RCiEP lacking an associated RCEC.
  * @cb       callback to be called for each device found
  * @userdata arbitrary pointer to be passed to callback.
  *
@@ -161,7 +160,7 @@ static int report_resume(struct pci_dev *dev, void
*data)
  * If the device provided has no subordinate bus, call the provided
  * callback on the device itself.
  */
-static void pci_walk_bridge(struct pci_dev *bridge, struct pci_dev
*dev,
+static void pci_walk_bridge(struct pci_dev *bridge,
                            int (*cb)(struct pci_dev *, void *),
                            void *userdata)
 {
@@ -169,23 +168,12 @@ static void pci_walk_bridge(struct pci_dev
*bridge, struct pci_dev *dev,
         * In a non-native case where there is no OS-visible reporting
         * device the bridge will be NULL, i.e., no RCEC, no PORT.
         */
-       if (bridge && bridge->subordinate)
+       if (bridge->subordinate)
                pci_walk_bus(bridge->subordinate, cb, userdata);
-       else if (bridge)
-               cb(bridge, userdata);
+       else if (bridge->rcec)
+               cb(bridge->rcec, userdata);
        else
-               cb(dev, userdata);
-}
-
-static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
-{
-       if (!pcie_has_flr(dev))
-               return PCI_ERS_RESULT_DISCONNECT;
-
-       if (pcie_flr(dev))
-               return PCI_ERS_RESULT_DISCONNECT;
-
-       return PCI_ERS_RESULT_RECOVERED;
+               cb(bridge, userdata);
 }
 
 pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
@@ -204,45 +192,29 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev
*dev,
        type = pci_pcie_type(dev);
        if (type == PCI_EXP_TYPE_ROOT_PORT ||
            type == PCI_EXP_TYPE_DOWNSTREAM ||
-           type == PCI_EXP_TYPE_RC_EC)
+           type == PCI_EXP_TYPE_RC_EC ||
+           type == PCI_EXP_TYPE_RC_END)
                bridge = dev;
-       else if (type == PCI_EXP_TYPE_RC_END)
-               bridge = dev->rcec;
        else
                bridge = pci_upstream_bridge(dev);
 
        pci_dbg(dev, "broadcast error_detected message\n");
        if (state == pci_channel_io_frozen) {
-               pci_walk_bridge(bridge, dev, report_frozen_detected,
&status);
-               if (type == PCI_EXP_TYPE_RC_END) {
-                       /*
-                        * The callback only clears the Root Error
Status
-                        * of the RCEC (see aer.c). Only perform this
for the
-                        * native case, i.e., an RCEC is present.
-                        */
-                       if (bridge)
-                               reset_subordinate_devices(bridge);
-
-                       status = flr_on_rciep(dev);
-                       if (status != PCI_ERS_RESULT_RECOVERED) {
-                               pci_warn(dev, "function level reset
failed\n");
-                               goto failed;
-                       }
-               } else {
-                       status = reset_subordinate_devices(bridge);
-                       if (status != PCI_ERS_RESULT_RECOVERED) {
-                               pci_warn(dev, "subordinate device reset
failed\n");
-                               goto failed;
-                       }
+               pci_walk_bridge(bridge, report_frozen_detected,
&status);
+               status = reset_subordinate_devices(bridge);
+               if (status != PCI_ERS_RESULT_RECOVERED) {
+                       pci_warn(dev, "subordinate device reset
failed\n");
+                       goto failed;
                }
+
        } else {
-               pci_walk_bridge(bridge, dev, report_normal_detected,
&status);
+               pci_walk_bridge(bridge, report_normal_detected,
&status);
        }
 
        if (status == PCI_ERS_RESULT_CAN_RECOVER) {
                status = PCI_ERS_RESULT_RECOVERED;
                pci_dbg(dev, "broadcast mmio_enabled message\n");
-               pci_walk_bridge(bridge, dev, report_mmio_enabled,
&status);
+               pci_walk_bridge(bridge, report_mmio_enabled, &status);
        }
 
        if (status == PCI_ERS_RESULT_NEED_RESET) {
@@ -253,14 +225,14 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev
*dev,
                 */
                status = PCI_ERS_RESULT_RECOVERED;
                pci_dbg(dev, "broadcast slot_reset message\n");
-               pci_walk_bridge(bridge, dev, report_slot_reset,
&status);
+               pci_walk_bridge(bridge, report_slot_reset, &status);
        }
 
        if (status != PCI_ERS_RESULT_RECOVERED)
                goto failed;
 
        pci_dbg(dev, "broadcast resume message\n");
-       pci_walk_bridge(bridge, dev, report_resume, &status);
+       pci_walk_bridge(bridge, report_resume, &status);
 
        if (type == PCI_EXP_TYPE_ROOT_PORT ||
            type == PCI_EXP_TYPE_DOWNSTREAM ||
Bjorn Helgaas Oct. 12, 2020, 10:58 p.m. UTC | #9
On Fri, Oct 09, 2020 at 11:51:39PM +0000, Kelley, Sean V wrote:
> On Fri, 2020-10-09 at 15:07 -0700, Sean V Kelley wrote:

> So I tested the following out, including your moving flr to aer.c:
> 
> - Renamed flr_on_rciep() to flr_on_rc() for RC devices (RC_END and
> RC_EC)
> 
> - Moved check on dev->rcec into aer_root_reset() including the FLR.
> 
> - Reworked pci_walk_bridge() to drop extra dev argument and check
> locally for the bridge->rcec. Maybe should also check on type when
> checking on bridge->rcec.
> 
> Note I didn't use the check on aer_cap existence because I think you
> had added that for simply being able to skip over for the non-native
> case and I handle that with the single goto at the beginning which
> takes you to the FLR.

Right.  Well, my thinking was that "root" would be a device with the
AER Root Error Command and Root Error Status registers, i.e., a Root
Port or RCEC.  IIUC that basically means the APEI case where firmware
gives us an error record.

Isn't the existing v5.9 code buggy in that it unconditionally pokes
these registers?  I think the APEI path can end up here, and firmware
probably has not granted us control over AER.

Somewhat related question: I'm a little skeptical about the fact that
aer_root_reset() currently does:

  - clear ROOT_PORT_INTR_ON_MESG_MASK
  - do reset
  - clear PCI_ERR_ROOT_STATUS
  - enable ROOT_PORT_INTR_ON_MESG_MASK

In the APEI path all this AER register manipulation must be done by
firmware before passing the error record to the OS.  So in the native
case where the OS does own the AER registers, why can't the OS do that
manipulation in the same order, i.e., all before doing the reset?

> So this is rough, compiled, tested with AER injections but that's it...

I couldn't actually apply the patch below because it seems to be
whitespace-damaged, but I think I like it.

  - It would be nice to be able to just call pcie_flr() and not have
    to add flr_on_rc().  I can't remember why we need the
    pcie_has_flr() / pcie_flr() dance.  It seems racy and ugly, but I
    have a vague recollection that there actually is some reason for
    it.

  - I would *rather* consolidate the AER register updates and test for
    the non-native case once instead of treating it like a completely
    separate path with a "goto".  But maybe not possible.  Not a big
    deal either way.

  - Getting rid of the extra "dev" argument to pci_walk_bridge() is a
    great side-effect.  I didn't even notice that.

  - If we can simplify that "state == pci_channel_io_frozen" case as
    this does, that is a *big* deal because there are other patches
    just waiting to touch that reset and it will be much simpler if
    there's only one reset_subordinate_devices() call there.

If you do work this up, I'd really appreciate it if you can start with
my pci/err branch so I don't have to re-do all the tweaks I've already
done:

  https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git/commit/?h=pci/err

Bjorn
Kelley, Sean V Oct. 13, 2020, 4:55 p.m. UTC | #10
On Mon, 2020-10-12 at 17:58 -0500, Bjorn Helgaas wrote:
> On Fri, Oct 09, 2020 at 11:51:39PM +0000, Kelley, Sean V wrote:
> > On Fri, 2020-10-09 at 15:07 -0700, Sean V Kelley wrote:
> > So I tested the following out, including your moving flr to aer.c:
> > 
> > - Renamed flr_on_rciep() to flr_on_rc() for RC devices (RC_END and
> > RC_EC)
> > 
> > - Moved check on dev->rcec into aer_root_reset() including the FLR.
> > 
> > - Reworked pci_walk_bridge() to drop extra dev argument and check
> > locally for the bridge->rcec. Maybe should also check on type when
> > checking on bridge->rcec.
> > 
> > Note I didn't use the check on aer_cap existence because I think
> > you
> > had added that for simply being able to skip over for the non-
> > native
> > case and I handle that with the single goto at the beginning which
> > takes you to the FLR.
> 
> Right.  Well, my thinking was that "root" would be a device with the
> AER Root Error Command and Root Error Status registers, i.e., a Root
> Port or RCEC.  IIUC that basically means the APEI case where firmware
> gives us an error record.

Got it.

> 
> Isn't the existing v5.9 code buggy in that it unconditionally pokes
> these registers?  I think the APEI path can end up here, and firmware
> probably has not granted us control over AER.

Yes, APEI path can end up here even in the absence of AER control.

> 
> Somewhat related question: I'm a little skeptical about the fact that
> aer_root_reset() currently does:
> 
>   - clear ROOT_PORT_INTR_ON_MESG_MASK
>   - do reset
>   - clear PCI_ERR_ROOT_STATUS
>   - enable ROOT_PORT_INTR_ON_MESG_MASK

It's a bit of a mix and growing with RC_END handling.

> 
> In the APEI path all this AER register manipulation must be done by
> firmware before passing the error record to the OS.  So in the native
> case where the OS does own the AER registers, why can't the OS do
> that
> manipulation in the same order, i.e., all before doing the reset?

And you're right, the mix here imposes additional complexity for native
versus non-native. If you're not actively engaged with the code, it's
not obvious. So, yes moving it out would make more sense.


> 
> > So this is rough, compiled, tested with AER injections but that's
> > it...
> 
> I couldn't actually apply the patch below because it seems to be
> whitespace-damaged, but I think I like it.

Yes, it was a quick copy-paste to an existing email. Will work with
your branch.

> 
>   - It would be nice to be able to just call pcie_flr() and not have
>     to add flr_on_rc().  I can't remember why we need the
>     pcie_has_flr() / pcie_flr() dance.  It seems racy and ugly, but I
>     have a vague recollection that there actually is some reason for
>     it.

I'll have a look.

> 
>   - I would *rather* consolidate the AER register updates and test
> for
>     the non-native case once instead of treating it like a completely
>     separate path with a "goto".  But maybe not possible.  Not a big
>     deal either way.

Following your line of reasoning above, I think we can better
consolidate the AER register updates.

> 
>   - Getting rid of the extra "dev" argument to pci_walk_bridge() is a
>     great side-effect.  I didn't even notice that.
> 
>   - If we can simplify that "state == pci_channel_io_frozen" case as
>     this does, that is a *big* deal because there are other patches
>     just waiting to touch that reset and it will be much simpler if
>     there's only one reset_subordinate_devices() call there.

Agreed.

> 
> If you do work this up, I'd really appreciate it if you can start
> with
> my pci/err branch so I don't have to re-do all the tweaks I've
> already
> done:
> 
>   
> https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git/commit/?h=pci/err
> 

Will do.

Thanks,

Sean



> Bjorn
diff mbox series

Patch

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 65dff5f3457a..dccdba60b5d9 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1358,17 +1358,18 @@  static int aer_probe(struct pcie_device *dev)
 static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
 {
 	int aer = dev->aer_cap;
+	int rc = 0;
 	u32 reg32;
-	int rc;
-
 
 	/* Disable Root's interrupt in response to error messages */
 	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
 	reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
 	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
 
-	rc = pci_bus_error_reset(dev);
-	pci_info(dev, "Root Port link has been reset\n");
+	if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) {
+		rc = pci_bus_error_reset(dev);
+		pci_info(dev, "Root Port link has been reset\n");
+	}
 
 	/* Clear Root Error Status */
 	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index 38abd7984996..956ad4c86d53 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -149,7 +149,8 @@  static int report_resume(struct pci_dev *dev, void *data)
 /**
  * pci_walk_bridge - walk bridges potentially AER affected
  * @bridge   bridge which may be an RCEC with associated RCiEPs,
- *           an RCiEP associated with an RCEC, or a Port.
+ *           or a Port.
+ * @dev      an RCiEP lacking an associated RCEC.
  * @cb       callback to be called for each device found
  * @userdata arbitrary pointer to be passed to callback.
  *
@@ -160,13 +161,20 @@  static int report_resume(struct pci_dev *dev, void *data)
  * If the device provided has no subordinate bus, call the provided
  * callback on the device itself.
  */
-static void pci_walk_bridge(struct pci_dev *bridge, int (*cb)(struct pci_dev *, void *),
+static void pci_walk_bridge(struct pci_dev *bridge, struct pci_dev *dev,
+			    int (*cb)(struct pci_dev *, void *),
 			    void *userdata)
 {
-	if (bridge->subordinate)
+	/*
+	 * In a non-native case where there is no OS-visible reporting
+	 * device the bridge will be NULL, i.e., no RCEC, no PORT.
+	 */
+	if (bridge && bridge->subordinate)
 		pci_walk_bus(bridge->subordinate, cb, userdata);
-	else
+	else if (bridge)
 		cb(bridge, userdata);
+	else
+		cb(dev, userdata);
 }
 
 static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
@@ -196,16 +204,25 @@  pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
 	type = pci_pcie_type(dev);
 	if (type == PCI_EXP_TYPE_ROOT_PORT ||
 	    type == PCI_EXP_TYPE_DOWNSTREAM ||
-	    type == PCI_EXP_TYPE_RC_EC ||
-	    type == PCI_EXP_TYPE_RC_END)
+	    type == PCI_EXP_TYPE_RC_EC)
 		bridge = dev;
+	else if (type == PCI_EXP_TYPE_RC_END)
+		bridge = dev->rcec;
 	else
 		bridge = pci_upstream_bridge(dev);
 
 	pci_dbg(dev, "broadcast error_detected message\n");
 	if (state == pci_channel_io_frozen) {
-		pci_walk_bridge(bridge, report_frozen_detected, &status);
+		pci_walk_bridge(bridge, dev, report_frozen_detected, &status);
 		if (type == PCI_EXP_TYPE_RC_END) {
+			/*
+			 * The callback only clears the Root Error Status
+			 * of the RCEC (see aer.c). Only perform this for the
+			 * native case, i.e., an RCEC is present.
+			 */
+			if (bridge)
+				reset_subordinate_devices(bridge);
+
 			status = flr_on_rciep(dev);
 			if (status != PCI_ERS_RESULT_RECOVERED) {
 				pci_warn(dev, "function level reset failed\n");
@@ -219,13 +236,13 @@  pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
 			}
 		}
 	} else {
-		pci_walk_bridge(bridge, report_normal_detected, &status);
+		pci_walk_bridge(bridge, dev, report_normal_detected, &status);
 	}
 
 	if (status == PCI_ERS_RESULT_CAN_RECOVER) {
 		status = PCI_ERS_RESULT_RECOVERED;
 		pci_dbg(dev, "broadcast mmio_enabled message\n");
-		pci_walk_bridge(bridge, report_mmio_enabled, &status);
+		pci_walk_bridge(bridge, dev, report_mmio_enabled, &status);
 	}
 
 	if (status == PCI_ERS_RESULT_NEED_RESET) {
@@ -236,14 +253,14 @@  pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
 		 */
 		status = PCI_ERS_RESULT_RECOVERED;
 		pci_dbg(dev, "broadcast slot_reset message\n");
-		pci_walk_bridge(bridge, report_slot_reset, &status);
+		pci_walk_bridge(bridge, dev, report_slot_reset, &status);
 	}
 
 	if (status != PCI_ERS_RESULT_RECOVERED)
 		goto failed;
 
 	pci_dbg(dev, "broadcast resume message\n");
-	pci_walk_bridge(bridge, report_resume, &status);
+	pci_walk_bridge(bridge, dev, report_resume, &status);
 
 	if (type == PCI_EXP_TYPE_ROOT_PORT ||
 	    type == PCI_EXP_TYPE_DOWNSTREAM ||