Message ID | 20081202092316.7d6b6291@extreme |
---|---|
State | Not Applicable, archived |
Delegated to: | David Miller |
Headers | show |
On Tue, 2 Dec 2008, Stephen Hemminger wrote: > This patch does more error checking in the Advanced Error Reporting code. > Since AER needs to access PCI registers > 255, it won't work without MMCONFIG > and other quirks may stop it as well. The code must check this by looking > at return values from pci_read/write_config_XXX calls. > > I don't have any hardware that uses AER routines but discovered this > in earlier versions of the sky2 driver that tried to use > pci AER routines. Ended up just giving up and using other ways to access PCI > config space on sky2 since there were too many platform glitches. > > > Signed-off-by: Stephen Hemminger <shemminger@vyatta.com> > > > --- a/drivers/pci/pcie/aer/aerdrv_core.c 2008-12-02 07:56:08.000000000 -0800 > +++ b/drivers/pci/pcie/aer/aerdrv_core.c 2008-12-02 09:07:32.000000000 -0800 > @@ -31,80 +31,92 @@ module_param(forceload, bool, 0); > int pci_enable_pcie_error_reporting(struct pci_dev *dev) > { > u16 reg16 = 0; > - int pos; > + int pos, err; > + u32 status; > > pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); > if (!pos) > return -EIO; > > + err = pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); > + if (err) > + return err; > + > pos = pci_find_capability(dev, PCI_CAP_ID_EXP); > if (!pos) > return -EIO; > > - pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, ®16); > + err = pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, ®16); > + if (err) > + return err; > + > reg16 = reg16 | > PCI_EXP_DEVCTL_CERE | > PCI_EXP_DEVCTL_NFERE | > PCI_EXP_DEVCTL_FERE | > PCI_EXP_DEVCTL_URRE; > - pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, > - reg16); > - return 0; > + return pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, reg16); > } > > int pci_disable_pcie_error_reporting(struct pci_dev *dev) > { > u16 reg16 = 0; > - int pos; > + int pos, err; > > pos = pci_find_capability(dev, PCI_CAP_ID_EXP); > if (!pos) > return -EIO; > > - pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, ®16); > + err = pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, ®16); > + if (err) > + return err; > + > reg16 = reg16 & ~(PCI_EXP_DEVCTL_CERE | > PCI_EXP_DEVCTL_NFERE | > PCI_EXP_DEVCTL_FERE | > PCI_EXP_DEVCTL_URRE); > - pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, > - reg16); > - return 0; > + return pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, reg16); > } > > int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) > { > - int pos; > + int pos, err; > u32 status, mask; > > pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); > if (!pos) > return -EIO; > > - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); > - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask); > + err = pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); > + if (err) > + return err; > + > + err = pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask); > + if (err) > + return err; > + > if (dev->error_state == pci_channel_io_normal) > status &= ~mask; /* Clear corresponding nonfatal bits */ > else > status &= mask; /* Clear corresponding fatal bits */ > - pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); > - > - return 0; > + return pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); > } > > #if 0 > int pci_cleanup_aer_correct_error_status(struct pci_dev *dev) > { > - int pos; > + int pos, err; > u32 status; > > pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); > if (!pos) > return -EIO; > > - pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status); > - pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status); > + err = pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status); > + if (err) > + return err; > > - return 0; > + return pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status); > } > #endif /* 0 */ This looks fine to me. Thanks very much Stephen. -PJ Waskiewicz -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 12/02/2008 12:23 PM, Stephen Hemminger wrote: > This patch does more error checking in the Advanced Error Reporting code. > Since AER needs to access PCI registers > 255, it won't work without MMCONFIG > and other quirks may stop it as well. The code must check this by looking > at return values from pci_read/write_config_XXX calls. > > I don't have any hardware that uses AER routines but discovered this > in earlier versions of the sky2 driver that tried to use > pci AER routines. Ended up just giving up and using other ways to access PCI > config space on sky2 since there were too many platform glitches. > When experimenting with sky2 driver, was pci_find_ext_capability() returning non-zero although further ext-space accesses were failing? > > Signed-off-by: Stephen Hemminger <shemminger@vyatta.com> > > > --- a/drivers/pci/pcie/aer/aerdrv_core.c 2008-12-02 07:56:08.000000000 -0800 > +++ b/drivers/pci/pcie/aer/aerdrv_core.c 2008-12-02 09:07:32.000000000 -0800 > @@ -31,80 +31,92 @@ module_param(forceload, bool, 0); > int pci_enable_pcie_error_reporting(struct pci_dev *dev) > { > u16 reg16 = 0; > - int pos; > + int pos, err; > + u32 status; > > pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); > if (!pos) > return -EIO; > > + err = pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); > + if (err) > + return err; > + > > For legacy-conf-space, most kernel code assumes success without checking. For ext-conf-space, wouldn't it be convenient to be able to make the same assumption when pci_find_ext_capability() returns a valid offset? The patch looks good to me, but I am just asking whether there is a known case where pcie_find_ext_capability() returns a valid offset, although that offset might turn out unusable (it might be worth investigating pci_find_ext_capability() then). Loic -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 02 Dec 2008 14:04:15 -0500 Loic Prylli <loic@myri.com> wrote: > On 12/02/2008 12:23 PM, Stephen Hemminger wrote: > > This patch does more error checking in the Advanced Error Reporting code. > > Since AER needs to access PCI registers > 255, it won't work without MMCONFIG > > and other quirks may stop it as well. The code must check this by looking > > at return values from pci_read/write_config_XXX calls. > > > > I don't have any hardware that uses AER routines but discovered this > > in earlier versions of the sky2 driver that tried to use > > pci AER routines. Ended up just giving up and using other ways to access PCI > > config space on sky2 since there were too many platform glitches. > > > > > > > When experimenting with sky2 driver, was pci_find_ext_capability() > returning non-zero although further ext-space accesses were failing? > No pci_find_ext_capability would succeed but all access to registers >= 256 would fail if MMCONFIG failed. Since the device often asserts stray errors on boot, if the error could not be cleared, the driver would get stuck when IRQ was enabled. P.s: you can look back into LKML for Linus discussion about why MMCONFIG is broken anyway. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 12/02/2008 02:44 PM, Stephen Hemminger wrote: > pci_find_ext_capability would succeed but all access to registers >= 256 > would fail if MMCONFIG failed. Your sentence seems self-contradicting to me. pci_find_ext_capability() (!= pci_find_capability()) only tries to access registers >= 256, so I don't see how it would succeed if *all* those accesses are failing. > P.s: you can look back into LKML for Linus discussion about why MMCONFIG > is broken anyway. > I was an active participant in one of those discussions (although it is possible I missed a later one), so I am quite aware about the limitations of MMCONFIG. But pci_find_ext_capability() looked to me a good filter to check about those limitations(). This is not just about pciaer, for instance, it might be that a lot of the SR-IOV code submitted recently assumes that if the corresponding extended-capability can be detected, little checking needs to be done afterwards. Loic -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 02 Dec 2008 15:14:35 -0500 Loic Prylli <loic@myri.com> wrote: > On 12/02/2008 02:44 PM, Stephen Hemminger wrote: > > pci_find_ext_capability would succeed but all access to registers >= 256 > > would fail if MMCONFIG failed. > > > > Your sentence seems self-contradicting to me. pci_find_ext_capability() > (!= pci_find_capability()) only tries to access registers >= 256, so I > don't see how it would succeed if *all* those accesses are failing. > > > > P.s: you can look back into LKML for Linus discussion about why MMCONFIG > > is broken anyway. > > > > > > I was an active participant in one of those discussions (although it is > possible I missed a later one), so I am quite aware about the > limitations of MMCONFIG. But pci_find_ext_capability() looked to me a > good filter to check about those limitations(). > > This is not just about pciaer, for instance, it might be that a lot of > the SR-IOV code submitted recently assumes that if the corresponding > extended-capability can be detected, little checking needs to be done > afterwards. I misremembered the issue. The problem in sky2 was related but not the same. pci_find_ext_capability would return 0, but the device driver needed to be able to clear errors (or IRQ would scream). So since the existing extended capability code wouldn't work on all platforms, another alternative had to be found. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
--- a/drivers/pci/pcie/aer/aerdrv_core.c 2008-12-02 07:56:08.000000000 -0800 +++ b/drivers/pci/pcie/aer/aerdrv_core.c 2008-12-02 09:07:32.000000000 -0800 @@ -31,80 +31,92 @@ module_param(forceload, bool, 0); int pci_enable_pcie_error_reporting(struct pci_dev *dev) { u16 reg16 = 0; - int pos; + int pos, err; + u32 status; pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); if (!pos) return -EIO; + err = pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); + if (err) + return err; + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (!pos) return -EIO; - pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, ®16); + err = pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, ®16); + if (err) + return err; + reg16 = reg16 | PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE; - pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, - reg16); - return 0; + return pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, reg16); } int pci_disable_pcie_error_reporting(struct pci_dev *dev) { u16 reg16 = 0; - int pos; + int pos, err; pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (!pos) return -EIO; - pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, ®16); + err = pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, ®16); + if (err) + return err; + reg16 = reg16 & ~(PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE); - pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, - reg16); - return 0; + return pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, reg16); } int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) { - int pos; + int pos, err; u32 status, mask; pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); if (!pos) return -EIO; - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask); + err = pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); + if (err) + return err; + + err = pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask); + if (err) + return err; + if (dev->error_state == pci_channel_io_normal) status &= ~mask; /* Clear corresponding nonfatal bits */ else status &= mask; /* Clear corresponding fatal bits */ - pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); - - return 0; + return pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); } #if 0 int pci_cleanup_aer_correct_error_status(struct pci_dev *dev) { - int pos; + int pos, err; u32 status; pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); if (!pos) return -EIO; - pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status); - pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status); + err = pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status); + if (err) + return err; - return 0; + return pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status); } #endif /* 0 */
This patch does more error checking in the Advanced Error Reporting code. Since AER needs to access PCI registers > 255, it won't work without MMCONFIG and other quirks may stop it as well. The code must check this by looking at return values from pci_read/write_config_XXX calls. I don't have any hardware that uses AER routines but discovered this in earlier versions of the sky2 driver that tried to use pci AER routines. Ended up just giving up and using other ways to access PCI config space on sky2 since there were too many platform glitches. Signed-off-by: Stephen Hemminger <shemminger@vyatta.com> -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html