PCIe AER: report non fatal errors only to the functions of the same device

Message ID 1501813106-89672-1-git-send-email-liudongdong3@huawei.com
State Superseded
Headers show

Commit Message

Dongdong Liu Aug. 4, 2017, 2:18 a.m.
From: Gabriele Paoloni <gabriele.paoloni@huawei.com>

Currently if an uncorrectable error is reported by an EP the AER
driver walks over all the devices connected to the upstream port
bus and in turns call the report_error_detected() callback.
If any of the devices connected to the bus does not implement
dev->driver->err_handler->error_detected() do_recovery() will fail.

However for non fatal errors the PCIe link should not be considered
compromised, therefore it makes sense to report the error only to
all the functions of a multifunction device.
This patch implements this new behaviour for non fatal errors.

Signed-off-by: Gabriele Paoloni <gabriele.paoloni@huawei.com>
Signed-off-by: Dongdong Liu <liudongdong3@huawei.com>
---
 drivers/pci/bus.c                  | 38 ++++++++++++++++++++++++++++++++++++++
 drivers/pci/pcie/aer/aerdrv_core.c | 13 ++++++++++++-
 include/linux/pci.h                |  3 ++-
 3 files changed, 52 insertions(+), 2 deletions(-)

Comments

Bjorn Helgaas Aug. 15, 2017, 10:50 p.m. | #1
On Fri, Aug 04, 2017 at 10:18:26AM +0800, Dongdong Liu wrote:
> From: Gabriele Paoloni <gabriele.paoloni@huawei.com>
> 
> Currently if an uncorrectable error is reported by an EP the AER
> driver walks over all the devices connected to the upstream port
> bus and in turns call the report_error_detected() callback.
> If any of the devices connected to the bus does not implement
> dev->driver->err_handler->error_detected() do_recovery() will fail.
> 
> However for non fatal errors the PCIe link should not be considered
> compromised, therefore it makes sense to report the error only to
> all the functions of a multifunction device.
> This patch implements this new behaviour for non fatal errors.

Why do we bother even with other functions in the same multifunction
device?  PCIe r3.1, sec 6.2.2.2.2, says non-fatal errors only affect a
particular transaction, and "devices not associated with the
transaction in error are not impacted."

A transaction is only associated with one function, so other functions
in the same device shouldn't be affected, should they?

> Signed-off-by: Gabriele Paoloni <gabriele.paoloni@huawei.com>
> Signed-off-by: Dongdong Liu <liudongdong3@huawei.com>
> ---
>  drivers/pci/bus.c                  | 38 ++++++++++++++++++++++++++++++++++++++
>  drivers/pci/pcie/aer/aerdrv_core.c | 13 ++++++++++++-
>  include/linux/pci.h                |  3 ++-
>  3 files changed, 52 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
> index bc56cf1..bc8f8b2 100644
> --- a/drivers/pci/bus.c
> +++ b/drivers/pci/bus.c
> @@ -364,6 +364,44 @@ void pci_bus_add_devices(const struct pci_bus *bus)
>  }
>  EXPORT_SYMBOL(pci_bus_add_devices);
>  
> +/** pci_walk_mf_dev - walk all functions of a multi-function
> + *  device calling callback.
> + *  @dev      a function in a multi-function device
> + *  @cb       callback to be called for each device found
> + *  @userdata arbitrary pointer to be passed to callback.
> + *
> + *  Walk, on a given bus, only the adjacent functions of a
> + *  multi-function device. Call the provided callback on each
> + *  device found.
> + *
> + *  We check the return of @cb each time. If it returns anything
> + *  other than 0, we break out.
> + *
> + */
> +void pci_walk_mf_dev(struct pci_dev *dev, int (*cb)(struct pci_dev *, void *),
> +		  void *userdata)
> +{
> +	int retval;
> +	struct pci_bus *bus;
> +	struct pci_dev *pdev;
> +	int ndev;
> +
> +	bus = dev->bus;
> +	ndev = PCI_SLOT(dev->devfn);
> +
> +	down_read(&pci_bus_sem);
> +	/* call cb for all the functions of the mf device */
> +	list_for_each_entry(pdev, &bus->devices, bus_list) {
> +		if (PCI_SLOT(pdev->devfn) == ndev) {
> +			retval = cb(pdev, userdata);
> +			if (retval)
> +				break;
> +		}
> +	}
> +	up_read(&pci_bus_sem);
> +}
> +EXPORT_SYMBOL_GPL(pci_walk_mf_dev);
> +
>  /** pci_walk_bus - walk devices on/under bus, calling callback.
>   *  @top      bus whose devices should be walked
>   *  @cb       callback to be called for each device found
> diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
> index b1303b3..67c3dc0 100644
> --- a/drivers/pci/pcie/aer/aerdrv_core.c
> +++ b/drivers/pci/pcie/aer/aerdrv_core.c
> @@ -390,7 +390,18 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
>  		 * If the error is reported by an end point, we think this
>  		 * error is related to the upstream link of the end point.
>  		 */
> -		pci_walk_bus(dev->bus, cb, &result_data);
> +		if ((state == pci_channel_io_normal) &&
> +				(!pci_ari_enabled(dev->bus)))
> +			/*
> +			 * the error is non fatal so the bus is ok, just walk
> +			 * through all the functions in a multifunction device.
> +			 * if ARI is enabled on the bus then there can be only
> +			 * one device under that bus (so walk all the functions
> +			 * under the bus).
> +			 */
> +			pci_walk_mf_dev(dev, cb, &result_data);
> +		else
> +			pci_walk_bus(dev->bus, cb, &result_data);
>  	}
>  
>  	return result_data.result;
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 4869e66..69e77bb 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1269,7 +1269,8 @@ const struct pci_device_id *pci_match_id(const struct pci_device_id *ids,
>  					 struct pci_dev *dev);
>  int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
>  		    int pass);
> -
> +void pci_walk_mf_dev(struct pci_dev *dev, int (*cb)(struct pci_dev *, void *),
> +		  void *userdata);
>  void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
>  		  void *userdata);
>  int pci_cfg_space_size(struct pci_dev *dev);
> -- 
> 1.9.1
>
Dongdong Liu Aug. 16, 2017, 8:50 a.m. | #2
Hi Bjorn

在 2017/8/16 6:50, Bjorn Helgaas 写道:
> On Fri, Aug 04, 2017 at 10:18:26AM +0800, Dongdong Liu wrote:
>> From: Gabriele Paoloni <gabriele.paoloni@huawei.com>
>>
>> Currently if an uncorrectable error is reported by an EP the AER
>> driver walks over all the devices connected to the upstream port
>> bus and in turns call the report_error_detected() callback.
>> If any of the devices connected to the bus does not implement
>> dev->driver->err_handler->error_detected() do_recovery() will fail.
>>
>> However for non fatal errors the PCIe link should not be considered
>> compromised, therefore it makes sense to report the error only to
>> all the functions of a multifunction device.
>> This patch implements this new behaviour for non fatal errors.
>
> Why do we bother even with other functions in the same multifunction
> device?  PCIe r3.1, sec 6.2.2.2.2, says non-fatal errors only affect a
> particular transaction, and "devices not associated with the
> transaction in error are not impacted."
>
> A transaction is only associated with one function, so other functions
> in the same device shouldn't be affected, should they?

PCIe r3.1, sec 6.2.4 Error Logging
PCI Express errors are not Function-specific.
"Software is responsible for scanning all Functions in a multi-Function device when it detects one of those errors"

Thanks,
Dongdong
>
>> Signed-off-by: Gabriele Paoloni <gabriele.paoloni@huawei.com>
>> Signed-off-by: Dongdong Liu <liudongdong3@huawei.com>
>> ---
>>  drivers/pci/bus.c                  | 38 ++++++++++++++++++++++++++++++++++++++
>>  drivers/pci/pcie/aer/aerdrv_core.c | 13 ++++++++++++-
>>  include/linux/pci.h                |  3 ++-
>>  3 files changed, 52 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
>> index bc56cf1..bc8f8b2 100644
>> --- a/drivers/pci/bus.c
>> +++ b/drivers/pci/bus.c
>> @@ -364,6 +364,44 @@ void pci_bus_add_devices(const struct pci_bus *bus)
>>  }
>>  EXPORT_SYMBOL(pci_bus_add_devices);
>>
>> +/** pci_walk_mf_dev - walk all functions of a multi-function
>> + *  device calling callback.
>> + *  @dev      a function in a multi-function device
>> + *  @cb       callback to be called for each device found
>> + *  @userdata arbitrary pointer to be passed to callback.
>> + *
>> + *  Walk, on a given bus, only the adjacent functions of a
>> + *  multi-function device. Call the provided callback on each
>> + *  device found.
>> + *
>> + *  We check the return of @cb each time. If it returns anything
>> + *  other than 0, we break out.
>> + *
>> + */
>> +void pci_walk_mf_dev(struct pci_dev *dev, int (*cb)(struct pci_dev *, void *),
>> +		  void *userdata)
>> +{
>> +	int retval;
>> +	struct pci_bus *bus;
>> +	struct pci_dev *pdev;
>> +	int ndev;
>> +
>> +	bus = dev->bus;
>> +	ndev = PCI_SLOT(dev->devfn);
>> +
>> +	down_read(&pci_bus_sem);
>> +	/* call cb for all the functions of the mf device */
>> +	list_for_each_entry(pdev, &bus->devices, bus_list) {
>> +		if (PCI_SLOT(pdev->devfn) == ndev) {
>> +			retval = cb(pdev, userdata);
>> +			if (retval)
>> +				break;
>> +		}
>> +	}
>> +	up_read(&pci_bus_sem);
>> +}
>> +EXPORT_SYMBOL_GPL(pci_walk_mf_dev);
>> +
>>  /** pci_walk_bus - walk devices on/under bus, calling callback.
>>   *  @top      bus whose devices should be walked
>>   *  @cb       callback to be called for each device found
>> diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
>> index b1303b3..67c3dc0 100644
>> --- a/drivers/pci/pcie/aer/aerdrv_core.c
>> +++ b/drivers/pci/pcie/aer/aerdrv_core.c
>> @@ -390,7 +390,18 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
>>  		 * If the error is reported by an end point, we think this
>>  		 * error is related to the upstream link of the end point.
>>  		 */
>> -		pci_walk_bus(dev->bus, cb, &result_data);
>> +		if ((state == pci_channel_io_normal) &&
>> +				(!pci_ari_enabled(dev->bus)))
>> +			/*
>> +			 * the error is non fatal so the bus is ok, just walk
>> +			 * through all the functions in a multifunction device.
>> +			 * if ARI is enabled on the bus then there can be only
>> +			 * one device under that bus (so walk all the functions
>> +			 * under the bus).
>> +			 */
>> +			pci_walk_mf_dev(dev, cb, &result_data);
>> +		else
>> +			pci_walk_bus(dev->bus, cb, &result_data);
>>  	}
>>
>>  	return result_data.result;
>> diff --git a/include/linux/pci.h b/include/linux/pci.h
>> index 4869e66..69e77bb 100644
>> --- a/include/linux/pci.h
>> +++ b/include/linux/pci.h
>> @@ -1269,7 +1269,8 @@ const struct pci_device_id *pci_match_id(const struct pci_device_id *ids,
>>  					 struct pci_dev *dev);
>>  int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
>>  		    int pass);
>> -
>> +void pci_walk_mf_dev(struct pci_dev *dev, int (*cb)(struct pci_dev *, void *),
>> +		  void *userdata);
>>  void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
>>  		  void *userdata);
>>  int pci_cfg_space_size(struct pci_dev *dev);
>> --
>> 1.9.1
>>
>
> .
>
Bjorn Helgaas Aug. 16, 2017, 2:07 p.m. | #3
On Wed, Aug 16, 2017 at 04:50:16PM +0800, Dongdong Liu wrote:
> Hi Bjorn
> 
> 在 2017/8/16 6:50, Bjorn Helgaas 写道:
> >On Fri, Aug 04, 2017 at 10:18:26AM +0800, Dongdong Liu wrote:
> >>From: Gabriele Paoloni <gabriele.paoloni@huawei.com>
> >>
> >>Currently if an uncorrectable error is reported by an EP the AER
> >>driver walks over all the devices connected to the upstream port
> >>bus and in turns call the report_error_detected() callback.
> >>If any of the devices connected to the bus does not implement
> >>dev->driver->err_handler->error_detected() do_recovery() will fail.
> >>
> >>However for non fatal errors the PCIe link should not be considered
> >>compromised, therefore it makes sense to report the error only to
> >>all the functions of a multifunction device.
> >>This patch implements this new behaviour for non fatal errors.
> >
> >Why do we bother even with other functions in the same multifunction
> >device?  PCIe r3.1, sec 6.2.2.2.2, says non-fatal errors only affect a
> >particular transaction, and "devices not associated with the
> >transaction in error are not impacted."
> >
> >A transaction is only associated with one function, so other functions
> >in the same device shouldn't be affected, should they?
> 
> PCIe r3.1, sec 6.2.4 Error Logging
> PCI Express errors are not Function-specific.  "Software is
> responsible for scanning all Functions in a multi-Function device
> when it detects one of those errors"

The previous text basically says that if a multi-function device
should generate at most one error reporting message, even if several
functions have logged an error of the same severity.  I think that
single message corresponds to a single interrupt.

So when it says "software is responsible for scanning all Functions in
a multi-Function device," I think the point is that software should
read the error reporting registers of all functions in case several of
them have logged errors.

But IIUC, this patch has nothing to do with reading the error CSRs
(which should be done in the PCI/AER core).  This patch merely changes
the set of devices for which we call the driver's error reporting
interfaces.

If this is fixing a problem, maybe it would help clarify things if you
could include the concrete details of what's going wrong.

Bjorn

> >>Signed-off-by: Gabriele Paoloni <gabriele.paoloni@huawei.com>
> >>Signed-off-by: Dongdong Liu <liudongdong3@huawei.com>
> >>---
> >> drivers/pci/bus.c                  | 38 ++++++++++++++++++++++++++++++++++++++
> >> drivers/pci/pcie/aer/aerdrv_core.c | 13 ++++++++++++-
> >> include/linux/pci.h                |  3 ++-
> >> 3 files changed, 52 insertions(+), 2 deletions(-)
> >>
> >>diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
> >>index bc56cf1..bc8f8b2 100644
> >>--- a/drivers/pci/bus.c
> >>+++ b/drivers/pci/bus.c
> >>@@ -364,6 +364,44 @@ void pci_bus_add_devices(const struct pci_bus *bus)
> >> }
> >> EXPORT_SYMBOL(pci_bus_add_devices);
> >>
> >>+/** pci_walk_mf_dev - walk all functions of a multi-function
> >>+ *  device calling callback.
> >>+ *  @dev      a function in a multi-function device
> >>+ *  @cb       callback to be called for each device found
> >>+ *  @userdata arbitrary pointer to be passed to callback.
> >>+ *
> >>+ *  Walk, on a given bus, only the adjacent functions of a
> >>+ *  multi-function device. Call the provided callback on each
> >>+ *  device found.
> >>+ *
> >>+ *  We check the return of @cb each time. If it returns anything
> >>+ *  other than 0, we break out.
> >>+ *
> >>+ */
> >>+void pci_walk_mf_dev(struct pci_dev *dev, int (*cb)(struct pci_dev *, void *),
> >>+		  void *userdata)
> >>+{
> >>+	int retval;
> >>+	struct pci_bus *bus;
> >>+	struct pci_dev *pdev;
> >>+	int ndev;
> >>+
> >>+	bus = dev->bus;
> >>+	ndev = PCI_SLOT(dev->devfn);
> >>+
> >>+	down_read(&pci_bus_sem);
> >>+	/* call cb for all the functions of the mf device */
> >>+	list_for_each_entry(pdev, &bus->devices, bus_list) {
> >>+		if (PCI_SLOT(pdev->devfn) == ndev) {
> >>+			retval = cb(pdev, userdata);
> >>+			if (retval)
> >>+				break;
> >>+		}
> >>+	}
> >>+	up_read(&pci_bus_sem);
> >>+}
> >>+EXPORT_SYMBOL_GPL(pci_walk_mf_dev);
> >>+
> >> /** pci_walk_bus - walk devices on/under bus, calling callback.
> >>  *  @top      bus whose devices should be walked
> >>  *  @cb       callback to be called for each device found
> >>diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
> >>index b1303b3..67c3dc0 100644
> >>--- a/drivers/pci/pcie/aer/aerdrv_core.c
> >>+++ b/drivers/pci/pcie/aer/aerdrv_core.c
> >>@@ -390,7 +390,18 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
> >> 		 * If the error is reported by an end point, we think this
> >> 		 * error is related to the upstream link of the end point.
> >> 		 */
> >>-		pci_walk_bus(dev->bus, cb, &result_data);
> >>+		if ((state == pci_channel_io_normal) &&
> >>+				(!pci_ari_enabled(dev->bus)))
> >>+			/*
> >>+			 * the error is non fatal so the bus is ok, just walk
> >>+			 * through all the functions in a multifunction device.
> >>+			 * if ARI is enabled on the bus then there can be only
> >>+			 * one device under that bus (so walk all the functions
> >>+			 * under the bus).
> >>+			 */
> >>+			pci_walk_mf_dev(dev, cb, &result_data);
> >>+		else
> >>+			pci_walk_bus(dev->bus, cb, &result_data);
> >> 	}
> >>
> >> 	return result_data.result;
> >>diff --git a/include/linux/pci.h b/include/linux/pci.h
> >>index 4869e66..69e77bb 100644
> >>--- a/include/linux/pci.h
> >>+++ b/include/linux/pci.h
> >>@@ -1269,7 +1269,8 @@ const struct pci_device_id *pci_match_id(const struct pci_device_id *ids,
> >> 					 struct pci_dev *dev);
> >> int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
> >> 		    int pass);
> >>-
> >>+void pci_walk_mf_dev(struct pci_dev *dev, int (*cb)(struct pci_dev *, void *),
> >>+		  void *userdata);
> >> void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
> >> 		  void *userdata);
> >> int pci_cfg_space_size(struct pci_dev *dev);
> >>--
> >>1.9.1
> >>
> >
> >.
> >
>
Gabriele Paoloni Aug. 17, 2017, 1:06 p.m. | #4
Hi Bjorn

> -----Original Message-----

> From: Bjorn Helgaas [mailto:helgaas@kernel.org]

> Sent: 16 August 2017 15:08

> To: liudongdong (C)

> Cc: linux-pci@vger.kernel.org; Gabriele Paoloni; Chenxin (Charles);

> Linuxarm

> Subject: Re: [PATCH] PCIe AER: report non fatal errors only to the

> functions of the same device

> 

> On Wed, Aug 16, 2017 at 04:50:16PM +0800, Dongdong Liu wrote:

> > Hi Bjorn

> >

> > 在 2017/8/16 6:50, Bjorn Helgaas 写道:

> > >On Fri, Aug 04, 2017 at 10:18:26AM +0800, Dongdong Liu wrote:

> > >>From: Gabriele Paoloni <gabriele.paoloni@huawei.com>

> > >>

> > >>Currently if an uncorrectable error is reported by an EP the AER

> > >>driver walks over all the devices connected to the upstream port

> > >>bus and in turns call the report_error_detected() callback.

> > >>If any of the devices connected to the bus does not implement

> > >>dev->driver->err_handler->error_detected() do_recovery() will fail.

> > >>

> > >>However for non fatal errors the PCIe link should not be considered

> > >>compromised, therefore it makes sense to report the error only to

> > >>all the functions of a multifunction device.

> > >>This patch implements this new behaviour for non fatal errors.

> > >

> > >Why do we bother even with other functions in the same multifunction

> > >device?  PCIe r3.1, sec 6.2.2.2.2, says non-fatal errors only affect

> a

> > >particular transaction, and "devices not associated with the

> > >transaction in error are not impacted."

> > >

> > >A transaction is only associated with one function, so other

> functions

> > >in the same device shouldn't be affected, should they?

> >

> > PCIe r3.1, sec 6.2.4 Error Logging

> > PCI Express errors are not Function-specific.  "Software is

> > responsible for scanning all Functions in a multi-Function device

> > when it detects one of those errors"

> 

> The previous text basically says that if a multi-function device

> should generate at most one error reporting message, even if several

> functions have logged an error of the same severity.  I think that

> single message corresponds to a single interrupt.

> 

> So when it says "software is responsible for scanning all Functions in

> a multi-Function device," I think the point is that software should

> read the error reporting registers of all functions in case several of

> them have logged errors.


Right, looking again at the AER core it seems that find_source_device()
would look for the error sources by walking the PCIe hierarchy starting
from the RP that reported the error (however from AER_MAX_MULTI_ERR_DEVICES
max 5 devices can log an error on a single AER interrupt...).

Anyway as it is now, and assuming that we have no more than 5 functions
in a multi-function device, AER core should call handle_error_source()
for each function that logged an error, right?

> 

> But IIUC, this patch has nothing to do with reading the error CSRs

> (which should be done in the PCI/AER core).  This patch merely changes

> the set of devices for which we call the driver's error reporting

> interfaces.


Correct. From our point of view if a fatal AER is reported by a function
then we need to call the driver callbacks also on all the function under
the same bus as the reporting device (as the link is compromised).

We thought that for non-fatal errors this is not necessary as the bus 
link should not be considered compromised, but we thought that for MF
devices maybe it would have been appropriate to call the driver callbacks
on the other functions of the same device. However after your consideration
above and after double checking the AER core it seems that also this is
not necessary (in fact for a MF device handle_error_source() will be called
for each function that logged the error)

> 

> If this is fixing a problem, maybe it would help clarify things if you

> could include the concrete details of what's going wrong.


In Hi1620 we have some integrated controllers that appear as PCIe EPs
under the same bus. Some of these controllers (e.g. the SATA 
controller) are missing the err_handler callbacks.

If one device reports a non-fatal uncorrectable error with the current
AER core code the callbacks for all the devices under the same bus will
be called and, if any of the devices is missing the callback all the
devices in the subtree are left in error state without recovery... 
This patch is needed to sort out a situation like this one.

Anyway I think that after the considerations above on the MF device
I can modify this patch to just call the callback for the reporting
function...?

Thanks
Gab

> 

> Bjorn

> 

> > >>Signed-off-by: Gabriele Paoloni <gabriele.paoloni@huawei.com>

> > >>Signed-off-by: Dongdong Liu <liudongdong3@huawei.com>

> > >>---

> > >> drivers/pci/bus.c                  | 38

> ++++++++++++++++++++++++++++++++++++++

> > >> drivers/pci/pcie/aer/aerdrv_core.c | 13 ++++++++++++-

> > >> include/linux/pci.h                |  3 ++-

> > >> 3 files changed, 52 insertions(+), 2 deletions(-)

> > >>

> > >>diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c

> > >>index bc56cf1..bc8f8b2 100644

> > >>--- a/drivers/pci/bus.c

> > >>+++ b/drivers/pci/bus.c

> > >>@@ -364,6 +364,44 @@ void pci_bus_add_devices(const struct pci_bus

> *bus)

> > >> }

> > >> EXPORT_SYMBOL(pci_bus_add_devices);

> > >>

> > >>+/** pci_walk_mf_dev - walk all functions of a multi-function

> > >>+ *  device calling callback.

> > >>+ *  @dev      a function in a multi-function device

> > >>+ *  @cb       callback to be called for each device found

> > >>+ *  @userdata arbitrary pointer to be passed to callback.

> > >>+ *

> > >>+ *  Walk, on a given bus, only the adjacent functions of a

> > >>+ *  multi-function device. Call the provided callback on each

> > >>+ *  device found.

> > >>+ *

> > >>+ *  We check the return of @cb each time. If it returns anything

> > >>+ *  other than 0, we break out.

> > >>+ *

> > >>+ */

> > >>+void pci_walk_mf_dev(struct pci_dev *dev, int (*cb)(struct pci_dev

> *, void *),

> > >>+		  void *userdata)

> > >>+{

> > >>+	int retval;

> > >>+	struct pci_bus *bus;

> > >>+	struct pci_dev *pdev;

> > >>+	int ndev;

> > >>+

> > >>+	bus = dev->bus;

> > >>+	ndev = PCI_SLOT(dev->devfn);

> > >>+

> > >>+	down_read(&pci_bus_sem);

> > >>+	/* call cb for all the functions of the mf device */

> > >>+	list_for_each_entry(pdev, &bus->devices, bus_list) {

> > >>+		if (PCI_SLOT(pdev->devfn) == ndev) {

> > >>+			retval = cb(pdev, userdata);

> > >>+			if (retval)

> > >>+				break;

> > >>+		}

> > >>+	}

> > >>+	up_read(&pci_bus_sem);

> > >>+}

> > >>+EXPORT_SYMBOL_GPL(pci_walk_mf_dev);

> > >>+

> > >> /** pci_walk_bus - walk devices on/under bus, calling callback.

> > >>  *  @top      bus whose devices should be walked

> > >>  *  @cb       callback to be called for each device found

> > >>diff --git a/drivers/pci/pcie/aer/aerdrv_core.c

> b/drivers/pci/pcie/aer/aerdrv_core.c

> > >>index b1303b3..67c3dc0 100644

> > >>--- a/drivers/pci/pcie/aer/aerdrv_core.c

> > >>+++ b/drivers/pci/pcie/aer/aerdrv_core.c

> > >>@@ -390,7 +390,18 @@ static pci_ers_result_t

> broadcast_error_message(struct pci_dev *dev,

> > >> 		 * If the error is reported by an end point, we think this

> > >> 		 * error is related to the upstream link of the end point.

> > >> 		 */

> > >>-		pci_walk_bus(dev->bus, cb, &result_data);

> > >>+		if ((state == pci_channel_io_normal) &&

> > >>+				(!pci_ari_enabled(dev->bus)))

> > >>+			/*

> > >>+			 * the error is non fatal so the bus is ok, just walk

> > >>+			 * through all the functions in a multifunction

> device.

> > >>+			 * if ARI is enabled on the bus then there can be

> only

> > >>+			 * one device under that bus (so walk all the

> functions

> > >>+			 * under the bus).

> > >>+			 */

> > >>+			pci_walk_mf_dev(dev, cb, &result_data);

> > >>+		else

> > >>+			pci_walk_bus(dev->bus, cb, &result_data);

> > >> 	}

> > >>

> > >> 	return result_data.result;

> > >>diff --git a/include/linux/pci.h b/include/linux/pci.h

> > >>index 4869e66..69e77bb 100644

> > >>--- a/include/linux/pci.h

> > >>+++ b/include/linux/pci.h

> > >>@@ -1269,7 +1269,8 @@ const struct pci_device_id

> *pci_match_id(const struct pci_device_id *ids,

> > >> 					 struct pci_dev *dev);

> > >> int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int

> max,

> > >> 		    int pass);

> > >>-

> > >>+void pci_walk_mf_dev(struct pci_dev *dev, int (*cb)(struct pci_dev

> *, void *),

> > >>+		  void *userdata);

> > >> void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *,

> void *),

> > >> 		  void *userdata);

> > >> int pci_cfg_space_size(struct pci_dev *dev);

> > >>--

> > >>1.9.1

> > >>

> > >

> > >.

> > >

> >
Bjorn Helgaas Aug. 17, 2017, 4:48 p.m. | #5
On Thu, Aug 17, 2017 at 01:06:32PM +0000, Gabriele Paoloni wrote:
> Hi Bjorn
> 
> > -----Original Message-----
> > From: Bjorn Helgaas [mailto:helgaas@kernel.org]
> > Sent: 16 August 2017 15:08
> > To: liudongdong (C)
> > Cc: linux-pci@vger.kernel.org; Gabriele Paoloni; Chenxin (Charles);
> > Linuxarm
> > Subject: Re: [PATCH] PCIe AER: report non fatal errors only to the
> > functions of the same device
> > 
> > On Wed, Aug 16, 2017 at 04:50:16PM +0800, Dongdong Liu wrote:
> > > Hi Bjorn
> > >
> > > 在 2017/8/16 6:50, Bjorn Helgaas 写道:
> > > >On Fri, Aug 04, 2017 at 10:18:26AM +0800, Dongdong Liu wrote:
> > > >>From: Gabriele Paoloni <gabriele.paoloni@huawei.com>
> > > >>
> > > >>Currently if an uncorrectable error is reported by an EP the AER
> > > >>driver walks over all the devices connected to the upstream port
> > > >>bus and in turns call the report_error_detected() callback.
> > > >>If any of the devices connected to the bus does not implement
> > > >>dev->driver->err_handler->error_detected() do_recovery() will fail.
> > > >>
> > > >>However for non fatal errors the PCIe link should not be considered
> > > >>compromised, therefore it makes sense to report the error only to
> > > >>all the functions of a multifunction device.
> > > >>This patch implements this new behaviour for non fatal errors.
> > > >
> > > >Why do we bother even with other functions in the same multifunction
> > > >device?  PCIe r3.1, sec 6.2.2.2.2, says non-fatal errors only affect
> > a
> > > >particular transaction, and "devices not associated with the
> > > >transaction in error are not impacted."
> > > >
> > > >A transaction is only associated with one function, so other
> > functions
> > > >in the same device shouldn't be affected, should they?
> > >
> > > PCIe r3.1, sec 6.2.4 Error Logging
> > > PCI Express errors are not Function-specific.  "Software is
> > > responsible for scanning all Functions in a multi-Function device
> > > when it detects one of those errors"
> > 
> > The previous text basically says that if a multi-function device
> > should generate at most one error reporting message, even if several
> > functions have logged an error of the same severity.  I think that
> > single message corresponds to a single interrupt.
> > 
> > So when it says "software is responsible for scanning all Functions in
> > a multi-Function device," I think the point is that software should
> > read the error reporting registers of all functions in case several of
> > them have logged errors.
> 
> Right, looking again at the AER core it seems that find_source_device()
> would look for the error sources by walking the PCIe hierarchy starting
> from the RP that reported the error (however from AER_MAX_MULTI_ERR_DEVICES
> max 5 devices can log an error on a single AER interrupt...).
> 
> Anyway as it is now, and assuming that we have no more than 5 functions
> in a multi-function device, AER core should call handle_error_source()
> for each function that logged an error, right?
> 
> > 
> > But IIUC, this patch has nothing to do with reading the error CSRs
> > (which should be done in the PCI/AER core).  This patch merely changes
> > the set of devices for which we call the driver's error reporting
> > interfaces.
> 
> Correct. From our point of view if a fatal AER is reported by a function
> then we need to call the driver callbacks also on all the function under
> the same bus as the reporting device (as the link is compromised).
> 
> We thought that for non-fatal errors this is not necessary as the bus 
> link should not be considered compromised, but we thought that for MF
> devices maybe it would have been appropriate to call the driver callbacks
> on the other functions of the same device. However after your consideration
> above and after double checking the AER core it seems that also this is
> not necessary (in fact for a MF device handle_error_source() will be called
> for each function that logged the error)
> 
> > 
> > If this is fixing a problem, maybe it would help clarify things if you
> > could include the concrete details of what's going wrong.
> 
> In Hi1620 we have some integrated controllers that appear as PCIe EPs
> under the same bus. Some of these controllers (e.g. the SATA 
> controller) are missing the err_handler callbacks.
> 
> If one device reports a non-fatal uncorrectable error with the current
> AER core code the callbacks for all the devices under the same bus will
> be called and, if any of the devices is missing the callback all the
> devices in the subtree are left in error state without recovery... 
> This patch is needed to sort out a situation like this one.
> 
> Anyway I think that after the considerations above on the MF device
> I can modify this patch to just call the callback for the reporting
> function...?

Sounds like a reasonable approach.  I'll compare the patch with the
spec in more detail when you post it.

Patch

diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index bc56cf1..bc8f8b2 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -364,6 +364,44 @@  void pci_bus_add_devices(const struct pci_bus *bus)
 }
 EXPORT_SYMBOL(pci_bus_add_devices);
 
+/** pci_walk_mf_dev - walk all functions of a multi-function
+ *  device calling callback.
+ *  @dev      a function in a multi-function device
+ *  @cb       callback to be called for each device found
+ *  @userdata arbitrary pointer to be passed to callback.
+ *
+ *  Walk, on a given bus, only the adjacent functions of a
+ *  multi-function device. Call the provided callback on each
+ *  device found.
+ *
+ *  We check the return of @cb each time. If it returns anything
+ *  other than 0, we break out.
+ *
+ */
+void pci_walk_mf_dev(struct pci_dev *dev, int (*cb)(struct pci_dev *, void *),
+		  void *userdata)
+{
+	int retval;
+	struct pci_bus *bus;
+	struct pci_dev *pdev;
+	int ndev;
+
+	bus = dev->bus;
+	ndev = PCI_SLOT(dev->devfn);
+
+	down_read(&pci_bus_sem);
+	/* call cb for all the functions of the mf device */
+	list_for_each_entry(pdev, &bus->devices, bus_list) {
+		if (PCI_SLOT(pdev->devfn) == ndev) {
+			retval = cb(pdev, userdata);
+			if (retval)
+				break;
+		}
+	}
+	up_read(&pci_bus_sem);
+}
+EXPORT_SYMBOL_GPL(pci_walk_mf_dev);
+
 /** pci_walk_bus - walk devices on/under bus, calling callback.
  *  @top      bus whose devices should be walked
  *  @cb       callback to be called for each device found
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index b1303b3..67c3dc0 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -390,7 +390,18 @@  static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
 		 * If the error is reported by an end point, we think this
 		 * error is related to the upstream link of the end point.
 		 */
-		pci_walk_bus(dev->bus, cb, &result_data);
+		if ((state == pci_channel_io_normal) &&
+				(!pci_ari_enabled(dev->bus)))
+			/*
+			 * the error is non fatal so the bus is ok, just walk
+			 * through all the functions in a multifunction device.
+			 * if ARI is enabled on the bus then there can be only
+			 * one device under that bus (so walk all the functions
+			 * under the bus).
+			 */
+			pci_walk_mf_dev(dev, cb, &result_data);
+		else
+			pci_walk_bus(dev->bus, cb, &result_data);
 	}
 
 	return result_data.result;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4869e66..69e77bb 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1269,7 +1269,8 @@  const struct pci_device_id *pci_match_id(const struct pci_device_id *ids,
 					 struct pci_dev *dev);
 int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
 		    int pass);
-
+void pci_walk_mf_dev(struct pci_dev *dev, int (*cb)(struct pci_dev *, void *),
+		  void *userdata);
 void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
 		  void *userdata);
 int pci_cfg_space_size(struct pci_dev *dev);