diff mbox

pciaer: report config read/write errors

Message ID 20081202092316.7d6b6291@extreme
State Not Applicable, archived
Delegated to: David Miller
Headers show

Commit Message

stephen hemminger Dec. 2, 2008, 5:23 p.m. UTC
This patch does more error checking in the Advanced Error Reporting code.
Since AER needs to access PCI registers > 255, it won't work without MMCONFIG
and other quirks may stop it as well. The code must check this by looking
at return values from pci_read/write_config_XXX calls.  

I don't have any hardware that uses AER routines but discovered this 
in earlier versions of the sky2 driver that tried to use
pci AER routines. Ended up just giving up and using other ways to access PCI
config space on sky2 since there were too many platform glitches.


Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Waskiewicz Jr, Peter P Dec. 2, 2008, 6:41 p.m. UTC | #1
On Tue, 2 Dec 2008, Stephen Hemminger wrote:

> This patch does more error checking in the Advanced Error Reporting code.
> Since AER needs to access PCI registers > 255, it won't work without MMCONFIG
> and other quirks may stop it as well. The code must check this by looking
> at return values from pci_read/write_config_XXX calls.  
> 
> I don't have any hardware that uses AER routines but discovered this 
> in earlier versions of the sky2 driver that tried to use
> pci AER routines. Ended up just giving up and using other ways to access PCI
> config space on sky2 since there were too many platform glitches.
> 
> 
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
> 
> 
> --- a/drivers/pci/pcie/aer/aerdrv_core.c	2008-12-02 07:56:08.000000000 -0800
> +++ b/drivers/pci/pcie/aer/aerdrv_core.c	2008-12-02 09:07:32.000000000 -0800
> @@ -31,80 +31,92 @@ module_param(forceload, bool, 0);
>  int pci_enable_pcie_error_reporting(struct pci_dev *dev)
>  {
>  	u16 reg16 = 0;
> -	int pos;
> +	int pos, err;
> +	u32 status;
>  
>  	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
>  	if (!pos)
>  		return -EIO;
>  
> +	err = pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
> +	if (err)
> +		return err;
> +
>  	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
>  	if (!pos)
>  		return -EIO;
>  
> -	pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
> +	err = pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
> +	if (err)
> +		return err;
> +
>  	reg16 = reg16 |
>  		PCI_EXP_DEVCTL_CERE |
>  		PCI_EXP_DEVCTL_NFERE |
>  		PCI_EXP_DEVCTL_FERE |
>  		PCI_EXP_DEVCTL_URRE;
> -	pci_write_config_word(dev, pos+PCI_EXP_DEVCTL,
> -			reg16);
> -	return 0;
> +	return pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, reg16);
>  }
>  
>  int pci_disable_pcie_error_reporting(struct pci_dev *dev)
>  {
>  	u16 reg16 = 0;
> -	int pos;
> +	int pos, err;
>  
>  	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
>  	if (!pos)
>  		return -EIO;
>  
> -	pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
> +	err = pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
> +	if (err)
> +		return err;
> +
>  	reg16 = reg16 & ~(PCI_EXP_DEVCTL_CERE |
>  			PCI_EXP_DEVCTL_NFERE |
>  			PCI_EXP_DEVCTL_FERE |
>  			PCI_EXP_DEVCTL_URRE);
> -	pci_write_config_word(dev, pos+PCI_EXP_DEVCTL,
> -			reg16);
> -	return 0;
> +	return pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, reg16);
>  }
>  
>  int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
>  {
> -	int pos;
> +	int pos, err;
>  	u32 status, mask;
>  
>  	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
>  	if (!pos)
>  		return -EIO;
>  
> -	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
> -	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
> +	err = pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
> +	if (err)
> +		return err;
> +
> +	err = pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
> +	if (err)
> +		return err;
> +
>  	if (dev->error_state == pci_channel_io_normal)
>  		status &= ~mask; /* Clear corresponding nonfatal bits */
>  	else
>  		status &= mask; /* Clear corresponding fatal bits */
> -	pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
> -
> -	return 0;
> +	return pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
>  }
>  
>  #if 0
>  int pci_cleanup_aer_correct_error_status(struct pci_dev *dev)
>  {
> -	int pos;
> +	int pos, err;
>  	u32 status;
>  
>  	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
>  	if (!pos)
>  		return -EIO;
>  
> -	pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status);
> -	pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status);
> +	err = pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status);
> +	if (err)
> +		return err;
>  
> -	return 0;
> +	return pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status);
>  }
>  #endif  /*  0  */

This looks fine to me.  Thanks very much Stephen.

-PJ Waskiewicz
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Loic Prylli Dec. 2, 2008, 7:04 p.m. UTC | #2
On 12/02/2008 12:23 PM, Stephen Hemminger wrote:
> This patch does more error checking in the Advanced Error Reporting code.
> Since AER needs to access PCI registers > 255, it won't work without MMCONFIG
> and other quirks may stop it as well. The code must check this by looking
> at return values from pci_read/write_config_XXX calls.  
>
> I don't have any hardware that uses AER routines but discovered this 
> in earlier versions of the sky2 driver that tried to use
> pci AER routines. Ended up just giving up and using other ways to access PCI
> config space on sky2 since there were too many platform glitches.
>   




When experimenting with sky2 driver, was pci_find_ext_capability() 
returning non-zero although further ext-space accesses were failing?



>
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
>
>
> --- a/drivers/pci/pcie/aer/aerdrv_core.c	2008-12-02 07:56:08.000000000 -0800
> +++ b/drivers/pci/pcie/aer/aerdrv_core.c	2008-12-02 09:07:32.000000000 -0800
> @@ -31,80 +31,92 @@ module_param(forceload, bool, 0);
>  int pci_enable_pcie_error_reporting(struct pci_dev *dev)
>  {
>  	u16 reg16 = 0;
> -	int pos;
> +	int pos, err;
> +	u32 status;
>  
>  	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
>  	if (!pos)
>  		return -EIO;
>  
> +	err = pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
> +	if (err)
> +		return err;
> +
>
>   



For legacy-conf-space, most kernel code assumes success without 
checking. For ext-conf-space, wouldn't it be convenient to be able to 
make the same assumption when pci_find_ext_capability() returns a valid 
offset?

The patch looks good to me, but I am just asking whether there is a 
known case where pcie_find_ext_capability() returns a valid offset, 
although that offset might turn out unusable (it might be worth 
investigating pci_find_ext_capability() then).


Loic


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
stephen hemminger Dec. 2, 2008, 7:44 p.m. UTC | #3
On Tue, 02 Dec 2008 14:04:15 -0500
Loic Prylli <loic@myri.com> wrote:

> On 12/02/2008 12:23 PM, Stephen Hemminger wrote:
> > This patch does more error checking in the Advanced Error Reporting code.
> > Since AER needs to access PCI registers > 255, it won't work without MMCONFIG
> > and other quirks may stop it as well. The code must check this by looking
> > at return values from pci_read/write_config_XXX calls.  
> >
> > I don't have any hardware that uses AER routines but discovered this 
> > in earlier versions of the sky2 driver that tried to use
> > pci AER routines. Ended up just giving up and using other ways to access PCI
> > config space on sky2 since there were too many platform glitches.
> >   
> 
> 
> 
> 
> When experimenting with sky2 driver, was pci_find_ext_capability() 
> returning non-zero although further ext-space accesses were failing?
> 

No pci_find_ext_capability would succeed but all access to registers >= 256
would fail if MMCONFIG failed. Since the device often asserts stray errors on boot, if the
error could not be cleared, the driver would get stuck when IRQ was enabled.

P.s: you can look back into LKML for Linus discussion about why MMCONFIG
is broken anyway.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Loic Prylli Dec. 2, 2008, 8:14 p.m. UTC | #4
On 12/02/2008 02:44 PM, Stephen Hemminger wrote:
> pci_find_ext_capability would succeed but all access to registers >= 256
> would fail if MMCONFIG failed. 



Your sentence seems self-contradicting to me. pci_find_ext_capability() 
(!=  pci_find_capability()) only tries to access registers >= 256, so I 
don't see how it would succeed if *all* those accesses are failing.


> P.s: you can look back into LKML for Linus discussion about why MMCONFIG
> is broken anyway.
>   



I was an active participant in one of those discussions (although it is 
possible I missed a later one), so I am quite aware about the 
limitations of MMCONFIG. But pci_find_ext_capability() looked to me a 
good filter to check about those limitations().

This is not just about pciaer, for instance, it might be that a lot of 
the SR-IOV code submitted recently assumes that if the corresponding 
extended-capability can be detected, little checking needs to be done 
afterwards.


Loic


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
stephen hemminger Dec. 2, 2008, 9:41 p.m. UTC | #5
On Tue, 02 Dec 2008 15:14:35 -0500
Loic Prylli <loic@myri.com> wrote:

> On 12/02/2008 02:44 PM, Stephen Hemminger wrote:
> > pci_find_ext_capability would succeed but all access to registers >= 256
> > would fail if MMCONFIG failed. 
> 
> 
> 
> Your sentence seems self-contradicting to me. pci_find_ext_capability() 
> (!=  pci_find_capability()) only tries to access registers >= 256, so I 
> don't see how it would succeed if *all* those accesses are failing.
> 
> 
> > P.s: you can look back into LKML for Linus discussion about why MMCONFIG
> > is broken anyway.
> >   
> 
> 
> 
> I was an active participant in one of those discussions (although it is 
> possible I missed a later one), so I am quite aware about the 
> limitations of MMCONFIG. But pci_find_ext_capability() looked to me a 
> good filter to check about those limitations().
> 
> This is not just about pciaer, for instance, it might be that a lot of 
> the SR-IOV code submitted recently assumes that if the corresponding 
> extended-capability can be detected, little checking needs to be done 
> afterwards.

I misremembered the issue.
The problem in sky2 was related but not the same. pci_find_ext_capability
would return 0, but the device driver needed to be able to clear errors
(or IRQ would scream). So since the existing extended capability code
wouldn't work on all platforms, another alternative had to be found.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

--- a/drivers/pci/pcie/aer/aerdrv_core.c	2008-12-02 07:56:08.000000000 -0800
+++ b/drivers/pci/pcie/aer/aerdrv_core.c	2008-12-02 09:07:32.000000000 -0800
@@ -31,80 +31,92 @@  module_param(forceload, bool, 0);
 int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 {
 	u16 reg16 = 0;
-	int pos;
+	int pos, err;
+	u32 status;
 
 	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 	if (!pos)
 		return -EIO;
 
+	err = pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
+	if (err)
+		return err;
+
 	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
 	if (!pos)
 		return -EIO;
 
-	pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
+	err = pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
+	if (err)
+		return err;
+
 	reg16 = reg16 |
 		PCI_EXP_DEVCTL_CERE |
 		PCI_EXP_DEVCTL_NFERE |
 		PCI_EXP_DEVCTL_FERE |
 		PCI_EXP_DEVCTL_URRE;
-	pci_write_config_word(dev, pos+PCI_EXP_DEVCTL,
-			reg16);
-	return 0;
+	return pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, reg16);
 }
 
 int pci_disable_pcie_error_reporting(struct pci_dev *dev)
 {
 	u16 reg16 = 0;
-	int pos;
+	int pos, err;
 
 	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
 	if (!pos)
 		return -EIO;
 
-	pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
+	err = pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
+	if (err)
+		return err;
+
 	reg16 = reg16 & ~(PCI_EXP_DEVCTL_CERE |
 			PCI_EXP_DEVCTL_NFERE |
 			PCI_EXP_DEVCTL_FERE |
 			PCI_EXP_DEVCTL_URRE);
-	pci_write_config_word(dev, pos+PCI_EXP_DEVCTL,
-			reg16);
-	return 0;
+	return pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, reg16);
 }
 
 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
 {
-	int pos;
+	int pos, err;
 	u32 status, mask;
 
 	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 	if (!pos)
 		return -EIO;
 
-	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
-	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
+	err = pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
+	if (err)
+		return err;
+
+	err = pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
+	if (err)
+		return err;
+
 	if (dev->error_state == pci_channel_io_normal)
 		status &= ~mask; /* Clear corresponding nonfatal bits */
 	else
 		status &= mask; /* Clear corresponding fatal bits */
-	pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
-
-	return 0;
+	return pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
 }
 
 #if 0
 int pci_cleanup_aer_correct_error_status(struct pci_dev *dev)
 {
-	int pos;
+	int pos, err;
 	u32 status;
 
 	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 	if (!pos)
 		return -EIO;
 
-	pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status);
-	pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status);
+	err = pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status);
+	if (err)
+		return err;
 
-	return 0;
+	return pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status);
 }
 #endif  /*  0  */