diff mbox series

[v2] PCI/AER: Do not clear AER bits if we don't own AER

Message ID 20180723165251.11424-1-mr.nuke.me@gmail.com
State Superseded
Delegated to: Bjorn Helgaas
Headers show
Series [v2] PCI/AER: Do not clear AER bits if we don't own AER | expand

Commit Message

Alex G. July 23, 2018, 4:52 p.m. UTC
When we don't own AER, we shouldn't touch the AER error bits. Clearing
error bits willy-nilly might cause firmware to miss some errors. In
theory, these bits get cleared by FFS, or via ACPI _HPX method. These
mechanisms are not subject to the problem.

This race is mostly of theoretical significance, since I can't
reasonably demonstrate this race in the lab.

On a side-note, pcie_aer_is_kernel_first() is created to alleviate the
need for two checks: aer_cap and get_firmware_first().

Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
---
 drivers/pci/pcie/aer.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

Comments

Alex G. July 24, 2018, 3:59 p.m. UTC | #1
On 07/23/2018 11:52 AM, Alexandru Gagniuc wrote:
> When we don't own AER, we shouldn't touch the AER error bits. Clearing
> error bits willy-nilly might cause firmware to miss some errors. In
> theory, these bits get cleared by FFS, or via ACPI _HPX method. These
> mechanisms are not subject to the problem.
> 
> This race is mostly of theoretical significance, since I can't
> reasonably demonstrate this race in the lab.
> 
> On a side-note, pcie_aer_is_kernel_first() is created to alleviate the
> need for two checks: aer_cap and get_firmware_first().
> 
> Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
> ---
>   drivers/pci/pcie/aer.c | 17 ++++++++++-------
>   1 file changed, 10 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index a2e88386af28..85c3e173c025 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -307,6 +307,12 @@ int pcie_aer_get_firmware_first(struct pci_dev *dev)
>   		aer_set_firmware_first(dev);
>   	return dev->__aer_firmware_first;
>   }
> +
> +static bool pcie_aer_is_kernel_first(struct pci_dev *dev)
> +{
> +	return !!dev->aer_cap && !pcie_aer_get_firmware_first(dev);
> +}
> +
>   #define	PCI_EXP_AER_FLAGS	(PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
>   				 PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
>   
> @@ -337,10 +343,7 @@ bool aer_acpi_firmware_first(void)
>   
>   int pci_enable_pcie_error_reporting(struct pci_dev *dev)
>   {
> -	if (pcie_aer_get_firmware_first(dev))
> -		return -EIO;
> -
> -	if (!dev->aer_cap)
> +	if (!pcie_aer_is_kernel_first(dev))
>   		return -EIO;
>   
>   	return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS);
> @@ -349,7 +352,7 @@ EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
>   
>   int pci_disable_pcie_error_reporting(struct pci_dev *dev)
>   {
> -	if (pcie_aer_get_firmware_first(dev))
> +	if (!pcie_aer_is_kernel_first(dev))
>   		return -EIO;
>   
>   	return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
> @@ -383,10 +386,10 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
>   	if (!pci_is_pcie(dev))
>   		return -ENODEV;
>   
> -	pos = dev->aer_cap;
> -	if (!pos)
> +	if (pcie_aer_is_kernel_first(dev))

This here is missing a '!'. It's in my local branch, so I must have 
exported the patch before I fixed that. I'll get that fixed next rev.

>   		return -EIO;
>   
> +	pos = dev->aer_cap;
>   	port_type = pci_pcie_type(dev);
>   	if (port_type == PCI_EXP_TYPE_ROOT_PORT) {
>   		pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status);
>
kernel test robot July 24, 2018, 5:08 p.m. UTC | #2
Hi Alexandru,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on pci/next]
[also build test ERROR on v4.18-rc6 next-20180724]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Alexandru-Gagniuc/PCI-AER-Do-not-clear-AER-bits-if-we-don-t-own-AER/20180724-235320
base:   https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git next
config: x86_64-randconfig-x008-201829 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   drivers/pci/pcie/aer.c: In function 'pci_enable_pcie_error_reporting':
>> drivers/pci/pcie/aer.c:371:7: error: implicit declaration of function 'pcie_aer_is_kernel_first'; did you mean 'pcie_aer_get_firmware_first'? [-Werror=implicit-function-declaration]
     if (!pcie_aer_is_kernel_first(dev))
          ^~~~~~~~~~~~~~~~~~~~~~~~
          pcie_aer_get_firmware_first
   cc1: some warnings being treated as errors

vim +371 drivers/pci/pcie/aer.c

   365	
   366	#define	PCI_EXP_AER_FLAGS	(PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
   367					 PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
   368	
   369	int pci_enable_pcie_error_reporting(struct pci_dev *dev)
   370	{
 > 371		if (!pcie_aer_is_kernel_first(dev))
   372			return -EIO;
   373	
   374		return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS);
   375	}
   376	EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
   377	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot July 25, 2018, 1:03 a.m. UTC | #3
Hi Alexandru,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on pci/next]
[also build test ERROR on v4.18-rc6 next-20180724]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Alexandru-Gagniuc/PCI-AER-Do-not-clear-AER-bits-if-we-don-t-own-AER/20180724-235320
base:   https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git next
config: x86_64-randconfig-s1-07250001 (attached as .config)
compiler: gcc-6 (Debian 6.4.0-9) 6.4.0 20171026
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   drivers/pci/pcie/aer.c: In function 'pci_enable_pcie_error_reporting':
>> drivers/pci/pcie/aer.c:371:7: error: implicit declaration of function 'pcie_aer_is_kernel_first' [-Werror=implicit-function-declaration]
     if (!pcie_aer_is_kernel_first(dev))
          ^~~~~~~~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors

vim +/pcie_aer_is_kernel_first +371 drivers/pci/pcie/aer.c

   365	
   366	#define	PCI_EXP_AER_FLAGS	(PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
   367					 PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
   368	
   369	int pci_enable_pcie_error_reporting(struct pci_dev *dev)
   370	{
 > 371		if (!pcie_aer_is_kernel_first(dev))
   372			return -EIO;
   373	
   374		return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS);
   375	}
   376	EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
   377	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox series

Patch

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index a2e88386af28..85c3e173c025 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -307,6 +307,12 @@  int pcie_aer_get_firmware_first(struct pci_dev *dev)
 		aer_set_firmware_first(dev);
 	return dev->__aer_firmware_first;
 }
+
+static bool pcie_aer_is_kernel_first(struct pci_dev *dev)
+{
+	return !!dev->aer_cap && !pcie_aer_get_firmware_first(dev);
+}
+
 #define	PCI_EXP_AER_FLAGS	(PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
 				 PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
 
@@ -337,10 +343,7 @@  bool aer_acpi_firmware_first(void)
 
 int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 {
-	if (pcie_aer_get_firmware_first(dev))
-		return -EIO;
-
-	if (!dev->aer_cap)
+	if (!pcie_aer_is_kernel_first(dev))
 		return -EIO;
 
 	return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS);
@@ -349,7 +352,7 @@  EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
 
 int pci_disable_pcie_error_reporting(struct pci_dev *dev)
 {
-	if (pcie_aer_get_firmware_first(dev))
+	if (!pcie_aer_is_kernel_first(dev))
 		return -EIO;
 
 	return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
@@ -383,10 +386,10 @@  int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
 	if (!pci_is_pcie(dev))
 		return -ENODEV;
 
-	pos = dev->aer_cap;
-	if (!pos)
+	if (pcie_aer_is_kernel_first(dev))
 		return -EIO;
 
+	pos = dev->aer_cap;
 	port_type = pci_pcie_type(dev);
 	if (port_type == PCI_EXP_TYPE_ROOT_PORT) {
 		pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status);