Message ID | e4fa9c6253fd2bfc49746f98e4024fb4980c8936.1578682741.git.sathyanarayanan.kuppuswamy@linux.intel.com |
---|---|
State | New |
Headers | show |
Series | Add Error Disconnect Recover (EDR) support | expand |
On Sun, Jan 12, 2020 at 02:43:59PM -0800, sathyanarayanan.kuppuswamy@linux.intel.com wrote: > From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > > As per PCI firmware specification r3.2 System Firmware Intermediary > (SFI) _OSC and DPC Updates ECR > (https://members.pcisig.com/wg/PCI-SIG/document/13563), sec titled > "DPC Event Handling Implementation Note", page 10, Error Disconnect > Recover (EDR) support allows OS to handle error recovery and clearing > Error Registers even in FF mode. So create exception for FF mode checks > in pci_cleanup_aer_uncorrect_error_status(), pci_aer_clear_fatal_status() > and pci_cleanup_aer_error_status_regs() functions when its being called > from DPC code path. > > Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > Acked-by: Keith Busch <keith.busch@intel.com> > --- > Documentation/PCI/pcieaer-howto.rst | 2 +- > drivers/net/ethernet/intel/ice/ice_main.c | 2 +- > drivers/ntb/hw/idt/ntb_hw_idt.c | 2 +- > drivers/pci/pci.c | 2 +- > drivers/pci/pci.h | 2 +- > drivers/pci/pcie/aer.c | 16 ++++++++-------- > drivers/pci/pcie/dpc.c | 4 ++-- > drivers/pci/pcie/err.c | 2 +- > drivers/scsi/lpfc/lpfc_attr.c | 2 +- > include/linux/aer.h | 12 ++++++++---- > 10 files changed, 25 insertions(+), 21 deletions(-) > > diff --git a/Documentation/PCI/pcieaer-howto.rst b/Documentation/PCI/pcieaer-howto.rst > index 18bdefaafd1a..184c966b61cb 100644 > --- a/Documentation/PCI/pcieaer-howto.rst > +++ b/Documentation/PCI/pcieaer-howto.rst > @@ -243,7 +243,7 @@ messages to root port when an error is detected. > > :: > > - int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);` > + int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev, bool ff_check); > > pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable > error status register. > diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c > index 69bff085acf7..8378dbf3500b 100644 > --- a/drivers/net/ethernet/intel/ice/ice_main.c > +++ b/drivers/net/ethernet/intel/ice/ice_main.c > @@ -3491,7 +3491,7 @@ static pci_ers_result_t ice_pci_err_slot_reset(struct pci_dev *pdev) > result = PCI_ERS_RESULT_DISCONNECT; > } > > - err = pci_cleanup_aer_uncorrect_error_status(pdev); > + err = pci_cleanup_aer_uncorrect_error_status(pdev, 1); > if (err) > dev_dbg(&pdev->dev, > "pci_cleanup_aer_uncorrect_error_status failed, error %d\n", > diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c > index dcf234680535..9023308be2de 100644 > --- a/drivers/ntb/hw/idt/ntb_hw_idt.c > +++ b/drivers/ntb/hw/idt/ntb_hw_idt.c > @@ -2675,7 +2675,7 @@ static int idt_init_pci(struct idt_ntb_dev *ndev) > if (ret != 0) > dev_warn(&pdev->dev, "PCIe AER capability disabled\n"); > else /* Cleanup uncorrectable error status before getting to init */ > - pci_cleanup_aer_uncorrect_error_status(pdev); > + pci_cleanup_aer_uncorrect_error_status(pdev, 1); > > /* First enable the PCI device */ > ret = pcim_enable_device(pdev); > diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c > index 63366344acff..5bd72d9343f6 100644 > --- a/drivers/pci/pcie/dpc.c > +++ b/drivers/pci/pcie/dpc.c > @@ -280,8 +280,8 @@ static void dpc_process_error(struct dpc_dev *dpc) > dpc_get_aer_uncorrect_severity(pdev, &info) && > aer_get_device_error_info(pdev, &info)) { > aer_print_error(pdev, &info); > - pci_cleanup_aer_uncorrect_error_status(pdev); > - pci_aer_clear_fatal_status(pdev); > + pci_cleanup_aer_uncorrect_error_status(pdev, 0); > + pci_aer_clear_fatal_status(pdev, 0); > } > > /* We configure DPC so it only triggers on ERR_FATAL */ > diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c > index 4ff82b36a37a..5b37efd29e20 100644 > --- a/drivers/scsi/lpfc/lpfc_attr.c > +++ b/drivers/scsi/lpfc/lpfc_attr.c > @@ -4810,7 +4810,7 @@ lpfc_aer_cleanup_state(struct device *dev, struct device_attribute *attr, > return -EINVAL; > > if (phba->hba_flag & HBA_AER_ENABLED) > - rc = pci_cleanup_aer_uncorrect_error_status(phba->pcidev); > + rc = pci_cleanup_aer_uncorrect_error_status(phba->pcidev, 1); > > if (rc == 0) > return strlen(buf); > diff --git a/include/linux/aer.h b/include/linux/aer.h > index fa19e01f418a..f4f49df500ad 100644 > --- a/include/linux/aer.h > +++ b/include/linux/aer.h > @@ -44,8 +44,10 @@ struct aer_capability_regs { > /* PCIe port driver needs this function to enable AER */ > int pci_enable_pcie_error_reporting(struct pci_dev *dev); > int pci_disable_pcie_error_reporting(struct pci_dev *dev); > -int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev); > -int pci_cleanup_aer_error_status_regs(struct pci_dev *dev); > +int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev, > + bool ff_check); IIUC your intent is that drivers should only call this with ff_check=1, and only the single case in dpc.c should call it with ff_check=0. But the existence of that parameter creates an opportunity for drivers to do it wrong. The "ff_check==0" case needs to be internal to drivers/pci and not even visible to drivers. E.g., you can leave the interface of pci_cleanup_aer_uncorrect_error_status() alone and make it call internal-to-PCI function. There should be no changes to drivers or the documentation. > +int pci_cleanup_aer_error_status_regs(struct pci_dev *dev, > + bool ff_check); Same for this. pci_cleanup_aer_error_status_regs() is only used inside drivers/pci anyway, so should probably be moved out of linux/aer.h. > void pci_save_aer_state(struct pci_dev *dev); > void pci_restore_aer_state(struct pci_dev *dev); > #else > @@ -57,11 +59,13 @@ static inline int pci_disable_pcie_error_reporting(struct pci_dev *dev) > { > return -EINVAL; > } > -static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) > +static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev, > + bool ff_check) > { > return -EINVAL; > } > -static inline int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) > +static inline int pci_cleanup_aer_error_status_regs(struct pci_dev *dev, > + bool ff_check) > { > return -EINVAL; > } > -- > 2.21.0 >
diff --git a/Documentation/PCI/pcieaer-howto.rst b/Documentation/PCI/pcieaer-howto.rst index 18bdefaafd1a..184c966b61cb 100644 --- a/Documentation/PCI/pcieaer-howto.rst +++ b/Documentation/PCI/pcieaer-howto.rst @@ -243,7 +243,7 @@ messages to root port when an error is detected. :: - int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);` + int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev, bool ff_check); pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable error status register. diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 69bff085acf7..8378dbf3500b 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3491,7 +3491,7 @@ static pci_ers_result_t ice_pci_err_slot_reset(struct pci_dev *pdev) result = PCI_ERS_RESULT_DISCONNECT; } - err = pci_cleanup_aer_uncorrect_error_status(pdev); + err = pci_cleanup_aer_uncorrect_error_status(pdev, 1); if (err) dev_dbg(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status failed, error %d\n", diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c index dcf234680535..9023308be2de 100644 --- a/drivers/ntb/hw/idt/ntb_hw_idt.c +++ b/drivers/ntb/hw/idt/ntb_hw_idt.c @@ -2675,7 +2675,7 @@ static int idt_init_pci(struct idt_ntb_dev *ndev) if (ret != 0) dev_warn(&pdev->dev, "PCIe AER capability disabled\n"); else /* Cleanup uncorrectable error status before getting to init */ - pci_cleanup_aer_uncorrect_error_status(pdev); + pci_cleanup_aer_uncorrect_error_status(pdev, 1); /* First enable the PCI device */ ret = pcim_enable_device(pdev); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e87196cc1a7f..6264244d92bf 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1499,7 +1499,7 @@ void pci_restore_state(struct pci_dev *dev) pci_restore_rebar_state(dev); pci_restore_dpc_state(dev); - pci_cleanup_aer_error_status_regs(dev); + pci_cleanup_aer_error_status_regs(dev, 1); pci_restore_aer_state(dev); pci_restore_config_space(dev); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index a0a53bd05a0b..0b4452f72a9a 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -646,7 +646,7 @@ void pci_no_aer(void); void pci_aer_init(struct pci_dev *dev); void pci_aer_exit(struct pci_dev *dev); extern const struct attribute_group aer_stats_attr_group; -void pci_aer_clear_fatal_status(struct pci_dev *dev); +void pci_aer_clear_fatal_status(struct pci_dev *dev, bool ff_check); void pci_aer_clear_device_status(struct pci_dev *dev); #else static inline void pci_no_aer(void) { } diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 1ca86f2e0166..c64a91b347d2 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -376,7 +376,7 @@ void pci_aer_clear_device_status(struct pci_dev *dev) pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta); } -int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) +int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev, bool ff_check) { int pos; u32 status, sev; @@ -385,7 +385,7 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) if (!pos) return -EIO; - if (pcie_aer_get_firmware_first(dev)) + if (ff_check && pcie_aer_get_firmware_first(dev)) return -EIO; /* Clear status bits for ERR_NONFATAL errors only */ @@ -399,7 +399,7 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status); -void pci_aer_clear_fatal_status(struct pci_dev *dev) +void pci_aer_clear_fatal_status(struct pci_dev *dev, bool ff_check) { int pos; u32 status, sev; @@ -408,8 +408,8 @@ void pci_aer_clear_fatal_status(struct pci_dev *dev) if (!pos) return; - if (pcie_aer_get_firmware_first(dev)) - return; + if (ff_check && pcie_aer_get_firmware_first(dev)) + return -EIO; /* Clear status bits for ERR_FATAL errors only */ pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); @@ -419,7 +419,7 @@ void pci_aer_clear_fatal_status(struct pci_dev *dev) pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); } -int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) +int pci_cleanup_aer_error_status_regs(struct pci_dev *dev, bool ff_check) { int pos; u32 status; @@ -432,7 +432,7 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) if (!pos) return -EIO; - if (pcie_aer_get_firmware_first(dev)) + if (ff_check && pcie_aer_get_firmware_first(dev)) return -EIO; port_type = pci_pcie_type(dev); @@ -515,7 +515,7 @@ void pci_aer_init(struct pci_dev *dev) n = pcie_cap_has_rtctl(dev) ? 5 : 4; pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_ERR, sizeof(u32) * n); - pci_cleanup_aer_error_status_regs(dev); + pci_cleanup_aer_error_status_regs(dev, 1); } void pci_aer_exit(struct pci_dev *dev) diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index 63366344acff..5bd72d9343f6 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -280,8 +280,8 @@ static void dpc_process_error(struct dpc_dev *dpc) dpc_get_aer_uncorrect_severity(pdev, &info) && aer_get_device_error_info(pdev, &info)) { aer_print_error(pdev, &info); - pci_cleanup_aer_uncorrect_error_status(pdev); - pci_aer_clear_fatal_status(pdev); + pci_cleanup_aer_uncorrect_error_status(pdev, 0); + pci_aer_clear_fatal_status(pdev, 0); } /* We configure DPC so it only triggers on ERR_FATAL */ diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 71639055511e..366be938bd21 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -234,7 +234,7 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, pci_walk_bus(bus, report_resume, &status); pci_aer_clear_device_status(dev); - pci_cleanup_aer_uncorrect_error_status(dev); + pci_cleanup_aer_uncorrect_error_status(dev, 1); pci_info(dev, "AER: Device recovery successful\n"); return; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 4ff82b36a37a..5b37efd29e20 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -4810,7 +4810,7 @@ lpfc_aer_cleanup_state(struct device *dev, struct device_attribute *attr, return -EINVAL; if (phba->hba_flag & HBA_AER_ENABLED) - rc = pci_cleanup_aer_uncorrect_error_status(phba->pcidev); + rc = pci_cleanup_aer_uncorrect_error_status(phba->pcidev, 1); if (rc == 0) return strlen(buf); diff --git a/include/linux/aer.h b/include/linux/aer.h index fa19e01f418a..f4f49df500ad 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -44,8 +44,10 @@ struct aer_capability_regs { /* PCIe port driver needs this function to enable AER */ int pci_enable_pcie_error_reporting(struct pci_dev *dev); int pci_disable_pcie_error_reporting(struct pci_dev *dev); -int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev); -int pci_cleanup_aer_error_status_regs(struct pci_dev *dev); +int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev, + bool ff_check); +int pci_cleanup_aer_error_status_regs(struct pci_dev *dev, + bool ff_check); void pci_save_aer_state(struct pci_dev *dev); void pci_restore_aer_state(struct pci_dev *dev); #else @@ -57,11 +59,13 @@ static inline int pci_disable_pcie_error_reporting(struct pci_dev *dev) { return -EINVAL; } -static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) +static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev, + bool ff_check) { return -EINVAL; } -static inline int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) +static inline int pci_cleanup_aer_error_status_regs(struct pci_dev *dev, + bool ff_check) { return -EINVAL; }