Message ID | 1590499319-6472-1-git-send-email-wenxiong@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | powerpc/pci: [PATCH 1/1 V3] PCIE PHB reset | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | success | Successfully applied on branch powerpc/merge (fff0b48b2415d08f9bc09e9e596b916b3817862b) |
snowpatch_ozlabs/build-ppc64le | success | Build succeeded |
snowpatch_ozlabs/build-ppc64be | success | Build succeeded |
snowpatch_ozlabs/build-ppc64e | success | Build succeeded |
snowpatch_ozlabs/build-pmac32 | success | Build succeeded |
snowpatch_ozlabs/checkpatch | warning | total: 0 errors, 0 warnings, 12 checks, 161 lines checked |
snowpatch_ozlabs/needsstable | success | Patch has no Fixes tags |
On Wed, May 27, 2020 at 12:48 AM <wenxiong@linux.vnet.ibm.com> wrote: > > From: Wen Xiong <wenxiong@linux.vnet.ibm.com> > > Several device drivers hit EEH(Extended Error handling) when triggering > kdump on Pseries PowerVM. This patch implemented a reset of the PHBs > in pci general code when triggering kdump. PHB reset stop all PCI > transactions from normal kernel. We have tested the patch in several > enviroments: > - direct slot adapters > - adapters under the switch > - a VF adapter in PowerVM > - a VF adapter/adapter in KVM guest. > > Signed-off-by: Wen Xiong <wenxiong@linux.vnet.ibm.com> Can you include a patch changelog in future. It's helpful to see what we need to look at specificly when you post a new revision of a patch. Looks good to me otherwise. Reviewed-by: Oliver O'Halloran <oohall@gmail.com>
On Tue, May 26, 2020 at 08:21:59AM -0500, wenxiong@linux.vnet.ibm.com wrote: > From: Wen Xiong <wenxiong@linux.vnet.ibm.com> > > Several device drivers hit EEH(Extended Error handling) when triggering > kdump on Pseries PowerVM. This patch implemented a reset of the PHBs > in pci general code when triggering kdump. PHB reset stop all PCI > transactions from normal kernel. We have tested the patch in several > enviroments: > - direct slot adapters > - adapters under the switch > - a VF adapter in PowerVM > - a VF adapter/adapter in KVM guest. > > Signed-off-by: Wen Xiong <wenxiong@linux.vnet.ibm.com> Looks good to me. Reviewed-by: Sam Bobroff <sbobroff@linux.ibm.com> (It would be easier to review if you included a patchset change log and CC'd people who reviewed earlier versions.) Cheers, Sam. > > --- > arch/powerpc/platforms/pseries/pci.c | 152 +++++++++++++++++++++++++++ > 1 file changed, 152 insertions(+) > > diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c > index 911534b89c85..cb7e4276cf04 100644 > --- a/arch/powerpc/platforms/pseries/pci.c > +++ b/arch/powerpc/platforms/pseries/pci.c > @@ -11,6 +11,8 @@ > #include <linux/kernel.h> > #include <linux/pci.h> > #include <linux/string.h> > +#include <linux/crash_dump.h> > +#include <linux/delay.h> > > #include <asm/eeh.h> > #include <asm/pci-bridge.h> > @@ -354,3 +356,153 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge) > > return 0; > } > + > +/** > + * pseries_get_pdn_addr - Retrieve PHB address > + * @pe: EEH PE > + * > + * Retrieve the assocated PHB address. Actually, there're 2 RTAS > + * function calls dedicated for the purpose. We need implement > + * it through the new function and then the old one. Besides, > + * you should make sure the config address is figured out from > + * FDT node before calling the function. > + * > + */ > +static int pseries_get_pdn_addr(struct pci_controller *phb) > +{ > + int ret = -1; > + int rets[3]; > + int ibm_get_config_addr_info; > + int ibm_get_config_addr_info2; > + int config_addr = 0; > + struct pci_dn *root_pdn, *pdn; > + > + ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2"); > + ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); > + > + root_pdn = PCI_DN(phb->dn); > + pdn = list_first_entry(&root_pdn->child_list, struct pci_dn, list); > + config_addr = (pdn->busno << 16) | (pdn->devfn << 8); > + > + if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { > + /* > + * First of all, we need to make sure there has one PE > + * associated with the device. If option is 1, it > + * queries if config address is supported in a PE or not. > + * If option is 0, it returns PE config address or config > + * address for the PE primary bus. > + */ > + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, > + config_addr, BUID_HI(pdn->phb->buid), > + BUID_LO(pdn->phb->buid), 1); > + if (ret || (rets[0] == 0)) { > + pr_warn("%s: Failed to get address for PHB#%x-PE# option=%d config_addr=%x\n", > + __func__, pdn->phb->global_number, 1, rets[0]); > + return -1; > + } > + > + /* Retrieve the associated PE config address */ > + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, > + config_addr, BUID_HI(pdn->phb->buid), > + BUID_LO(pdn->phb->buid), 0); > + if (ret) { > + pr_warn("%s: Failed to get address for PHB#%x-PE# option=%d config_addr=%x\n", > + __func__, pdn->phb->global_number, 0, rets[0]); > + return -1; > + } > + return rets[0]; > + } > + > + if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { > + ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets, > + config_addr, BUID_HI(pdn->phb->buid), > + BUID_LO(pdn->phb->buid), 0); > + if (ret || rets[0]) { > + pr_warn("%s: Failed to get address for PHB#%x-PE# config_addr=%x\n", > + __func__, pdn->phb->global_number, rets[0]); > + return -1; > + } > + return rets[0]; > + } > + > + return ret; > +} > + > +static int __init pseries_phb_reset(void) > +{ > + struct pci_controller *phb; > + int config_addr; > + int ibm_set_slot_reset; > + int ibm_configure_pe; > + int ret; > + > + if (is_kdump_kernel() || reset_devices) { > + pr_info("Issue PHB reset ...\n"); > + ibm_set_slot_reset = rtas_token("ibm,set-slot-reset"); > + ibm_configure_pe = rtas_token("ibm,configure-pe"); > + > + if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE || > + ibm_configure_pe == RTAS_UNKNOWN_SERVICE) { > + pr_info("%s: EEH functionality not supported\n", > + __func__); > + } > + > + list_for_each_entry(phb, &hose_list, list_node) { > + config_addr = pseries_get_pdn_addr(phb); > + if (config_addr == -1) > + continue; > + > + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, > + config_addr, BUID_HI(phb->buid), > + BUID_LO(phb->buid), EEH_RESET_FUNDAMENTAL); > + > + /* If fundamental-reset not supported, try hot-reset */ > + if (ret == -8) > + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, > + config_addr, BUID_HI(phb->buid), > + BUID_LO(phb->buid), EEH_RESET_HOT); > + > + if (ret) { > + pr_err("%s: PHB#%x-PE# failed with rtas_call activate reset=%d\n", > + __func__, phb->global_number, ret); > + continue; > + } > + } > + msleep(EEH_PE_RST_SETTLE_TIME); > + > + list_for_each_entry(phb, &hose_list, list_node) { > + config_addr = pseries_get_pdn_addr(phb); > + if (config_addr == -1) > + continue; > + > + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, > + config_addr, BUID_HI(phb->buid), > + BUID_LO(phb->buid), EEH_RESET_DEACTIVATE); > + if (ret) { > + pr_err("%s: PHB#%x-PE# failed with rtas_call deactive reset=%d\n", > + __func__, phb->global_number, ret); > + continue; > + } > + } > + msleep(EEH_PE_RST_SETTLE_TIME); > + > + list_for_each_entry(phb, &hose_list, list_node) { > + config_addr = pseries_get_pdn_addr(phb); > + if (config_addr == -1) > + continue; > + > + ret = rtas_call(ibm_configure_pe, 3, 1, NULL, > + config_addr, BUID_HI(phb->buid), > + BUID_LO(phb->buid)); > + if (ret) { > + pr_err("%s: PHB#%x-PE# failed with rtas_call configure_pe =%d\n", > + __func__, phb->global_number, ret); > + continue; > + } > + } > + } > + > + return 0; > +} > +machine_postcore_initcall(pseries, pseries_phb_reset); > + > -- > 2.18.1 >
wenxiong@linux.vnet.ibm.com writes: > From: Wen Xiong <wenxiong@linux.vnet.ibm.com> > > Several device drivers hit EEH(Extended Error handling) when triggering > kdump on Pseries PowerVM. This patch implemented a reset of the PHBs > in pci general code when triggering kdump. Actually it's in pseries specific PCI code, and the reset is done in the 2nd kernel as it boots, not when triggering the kdump. You're doing it as a: machine_postcore_initcall(pseries, pseries_phb_reset); But we do the EEH initialisation in: core_initcall_sync(eeh_init); Which happens first. So it seems to me that this should be called from pseries_eeh_init(). That would isolate the code in the right place, and allow you to use the existing ibm_get_config_addr_info. You probably can't use pseries_eeh_get_pe_addr(), because you won't have a "pe" structure yet. Instead you should add a helper that does the core of that logic but accepts config_addr/buid as parameters, and then have your code and pseries_eeh_get_pe_addr() call that. > PHB reset stop all PCI > transactions from normal kernel. We have tested the patch in several > enviroments: > - direct slot adapters > - adapters under the switch > - a VF adapter in PowerVM > - a VF adapter/adapter in KVM guest. > > Signed-off-by: Wen Xiong <wenxiong@linux.vnet.ibm.com> > > --- > arch/powerpc/platforms/pseries/pci.c | 152 +++++++++++++++++++++++++++ > 1 file changed, 152 insertions(+) > > diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c > index 911534b89c85..cb7e4276cf04 100644 > --- a/arch/powerpc/platforms/pseries/pci.c > +++ b/arch/powerpc/platforms/pseries/pci.c > @@ -11,6 +11,8 @@ > #include <linux/kernel.h> > #include <linux/pci.h> > #include <linux/string.h> > +#include <linux/crash_dump.h> > +#include <linux/delay.h> > > #include <asm/eeh.h> > #include <asm/pci-bridge.h> > @@ -354,3 +356,153 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge) > > return 0; > } > + > +/** > + * pseries_get_pdn_addr - Retrieve PHB address > + * @pe: EEH PE There is no "pe" parameter. Oh but there is in pseries_eeh_get_pe_addr() which this is an almost verbatim copy of. > + * > + * Retrieve the assocated PHB address. Actually, there're 2 RTAS > + * function calls dedicated for the purpose. We need implement > + * it through the new function and then the old one. Besides, > + * you should make sure the config address is figured out from > + * FDT node before calling the function. > + * > + */ > +static int pseries_get_pdn_addr(struct pci_controller *phb) > +{ > + int ret = -1; > + int rets[3]; > + int ibm_get_config_addr_info; > + int ibm_get_config_addr_info2; > + int config_addr = 0; > + struct pci_dn *root_pdn, *pdn; > + > + ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2"); > + ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); > + > + root_pdn = PCI_DN(phb->dn); > + pdn = list_first_entry(&root_pdn->child_list, struct pci_dn, list); > + config_addr = (pdn->busno << 16) | (pdn->devfn << 8); > + > + if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { > + /* > + * First of all, we need to make sure there has one PE > + * associated with the device. If option is 1, it > + * queries if config address is supported in a PE or not. > + * If option is 0, it returns PE config address or config > + * address for the PE primary bus. > + */ > + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, > + config_addr, BUID_HI(pdn->phb->buid), > + BUID_LO(pdn->phb->buid), 1); > + if (ret || (rets[0] == 0)) { > + pr_warn("%s: Failed to get address for PHB#%x-PE# option=%d config_addr=%x\n", > + __func__, pdn->phb->global_number, 1, rets[0]); Here you've hacked the existing pr_warn() to drop the PE number but left "-PE#" in the format string. See the original: pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->config_addr); > + return -1; > + } > + > + /* Retrieve the associated PE config address */ > + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, > + config_addr, BUID_HI(pdn->phb->buid), > + BUID_LO(pdn->phb->buid), 0); > + if (ret) { > + pr_warn("%s: Failed to get address for PHB#%x-PE# option=%d config_addr=%x\n", ^ and here > + __func__, pdn->phb->global_number, 0, rets[0]); > + return -1; > + } > + return rets[0]; > + } > + > + if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { > + ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets, > + config_addr, BUID_HI(pdn->phb->buid), > + BUID_LO(pdn->phb->buid), 0); > + if (ret || rets[0]) { > + pr_warn("%s: Failed to get address for PHB#%x-PE# config_addr=%x\n", ^ and here > + __func__, pdn->phb->global_number, rets[0]); > + return -1; > + } > + return rets[0]; > + } > + > + return ret; > +} > + > +static int __init pseries_phb_reset(void) > +{ > + struct pci_controller *phb; > + int config_addr; > + int ibm_set_slot_reset; > + int ibm_configure_pe; > + int ret; > + > + if (is_kdump_kernel() || reset_devices) { This should be inverted and turned into an early return, allowing the entire rest of the function to be de-indented. > + pr_info("Issue PHB reset ...\n"); > + ibm_set_slot_reset = rtas_token("ibm,set-slot-reset"); > + ibm_configure_pe = rtas_token("ibm,configure-pe"); > + > + if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE || > + ibm_configure_pe == RTAS_UNKNOWN_SERVICE) { > + pr_info("%s: EEH functionality not supported\n", > + __func__); But then you just continue? > + } > + > + list_for_each_entry(phb, &hose_list, list_node) { > + config_addr = pseries_get_pdn_addr(phb); > + if (config_addr == -1) > + continue; > + > + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, > + config_addr, BUID_HI(phb->buid), > + BUID_LO(phb->buid), EEH_RESET_FUNDAMENTAL); > + > + /* If fundamental-reset not supported, try hot-reset */ > + if (ret == -8) Where does -8 come from? Oh I see, it's copied from pseries_eeh_reset(). > + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, > + config_addr, BUID_HI(phb->buid), > + BUID_LO(phb->buid), EEH_RESET_HOT); > + > + if (ret) { > + pr_err("%s: PHB#%x-PE# failed with rtas_call activate reset=%d\n", ^ again missing PE number. > + __func__, phb->global_number, ret); > + continue; > + } > + } > + msleep(EEH_PE_RST_SETTLE_TIME); So that loop is basically a copy of pseries_eeh_reset() but with the sleep hoisted out of the loop. I'd really prefer to see that refactored into a helper that takes the config_addr and buid and doesn't do the sleep. Then this loop could call that helper, and so could pseries_eeh_reset(). > + > + list_for_each_entry(phb, &hose_list, list_node) { > + config_addr = pseries_get_pdn_addr(phb); > + if (config_addr == -1) > + continue; > + > + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, > + config_addr, BUID_HI(phb->buid), > + BUID_LO(phb->buid), EEH_RESET_DEACTIVATE); > + if (ret) { > + pr_err("%s: PHB#%x-PE# failed with rtas_call deactive reset=%d\n", > + __func__, phb->global_number, ret); > + continue; > + } > + } > + msleep(EEH_PE_RST_SETTLE_TIME); > + > + list_for_each_entry(phb, &hose_list, list_node) { > + config_addr = pseries_get_pdn_addr(phb); > + if (config_addr == -1) > + continue; > + > + ret = rtas_call(ibm_configure_pe, 3, 1, NULL, > + config_addr, BUID_HI(phb->buid), > + BUID_LO(phb->buid)); > + if (ret) { > + pr_err("%s: PHB#%x-PE# failed with rtas_call configure_pe =%d\n", > + __func__, phb->global_number, ret); > + continue; > + } > + } > + } > + > + return 0; > +} > +machine_postcore_initcall(pseries, pseries_phb_reset); cheers
On Tue, Jun 16, 2020 at 9:55 PM Michael Ellerman <mpe@ellerman.id.au> wrote: > > wenxiong@linux.vnet.ibm.com writes: > > From: Wen Xiong <wenxiong@linux.vnet.ibm.com> > > > > Several device drivers hit EEH(Extended Error handling) when triggering > > kdump on Pseries PowerVM. This patch implemented a reset of the PHBs > > in pci general code when triggering kdump. > > Actually it's in pseries specific PCI code, and the reset is done in the > 2nd kernel as it boots, not when triggering the kdump. > > You're doing it as a: > > machine_postcore_initcall(pseries, pseries_phb_reset); > > But we do the EEH initialisation in: > > core_initcall_sync(eeh_init); > > Which happens first. > > So it seems to me that this should be called from pseries_eeh_init(). This happens to use some of the same RTAS calls as EEH, but it's entirely orthogonal to it. Wedging the two together doesn't make any real sense IMO since this should be usable even with !CONFIG_EEH. > That would isolate the code in the right place, and allow you to use the > existing ibm_get_config_addr_info. > > You probably can't use pseries_eeh_get_pe_addr(), because you won't have > a "pe" structure yet. > > Instead you should add a helper that does the core of that logic but > accepts config_addr/buid as parameters, and then have your code and > pseries_eeh_get_pe_addr() call that. I have a patch in my next pile of eeh reworks which kills off pseries_eeh_get_pe_addr() entirely. It's used to implement eeh_ops->get_pe_addr for pseries, but the only caller of ->get_pe_addr() is in pseries EEH code and the powernv version is a stub so I was going to drop it from EEH ops and fold it into the caller. We could do that in this patch, but it's just going to create a merge conflict for you to deal with. Up to you. > *snip* > > + list_for_each_entry(phb, &hose_list, list_node) { > > + config_addr = pseries_get_pdn_addr(phb); > > + if (config_addr == -1) > > + continue; > > + > > + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, > > + config_addr, BUID_HI(phb->buid), > > + BUID_LO(phb->buid), EEH_RESET_FUNDAMENTAL); > > + > > + /* If fundamental-reset not supported, try hot-reset */ > > + if (ret == -8) > > Where does -8 come from? There's a comment right there. It could be better explained I suppose, but you need to read PAPR/LoPAPR to make sense of anything RTAS so what's it really buying you? > Oh I see, it's copied from pseries_eeh_reset(). > > + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, > > + config_addr, BUID_HI(phb->buid), > > + BUID_LO(phb->buid), EEH_RESET_HOT); > > + > > + if (ret) { > > + pr_err("%s: PHB#%x-PE# failed with rtas_call activate reset=%d\n", > ^ > again missing PE number. > > > + __func__, phb->global_number, ret); > > + continue; > > + } > > + } > > + msleep(EEH_PE_RST_SETTLE_TIME); > > So that loop is basically a copy of pseries_eeh_reset() but with the > sleep hoisted out of the loop. > > I'd really prefer to see that refactored into a helper that takes the > config_addr and buid and doesn't do the sleep. > > Then this loop could call that helper, and so could pseries_eeh_reset(). That's better so long as we're not requiring CONFIG_EEH being selected. pseries_eeh_reset() uses the rtas token variables initialised in pseries_eeh_init() which are static to the file so they'd need to be initialised somewhere common.
"Oliver O'Halloran" <oohall@gmail.com> writes: > On Tue, Jun 16, 2020 at 9:55 PM Michael Ellerman <mpe@ellerman.id.au> wrote: >> wenxiong@linux.vnet.ibm.com writes: >> > From: Wen Xiong <wenxiong@linux.vnet.ibm.com> >> > >> > Several device drivers hit EEH(Extended Error handling) when triggering >> > kdump on Pseries PowerVM. This patch implemented a reset of the PHBs >> > in pci general code when triggering kdump. >> >> Actually it's in pseries specific PCI code, and the reset is done in the >> 2nd kernel as it boots, not when triggering the kdump. >> >> You're doing it as a: >> >> machine_postcore_initcall(pseries, pseries_phb_reset); >> >> But we do the EEH initialisation in: >> >> core_initcall_sync(eeh_init); >> >> Which happens first. >> >> So it seems to me that this should be called from pseries_eeh_init(). > > This happens to use some of the same RTAS calls as EEH, but it's > entirely orthogonal to it. I don't agree. I mean it's literally calling EEH_RESET_FUNDAMENTAL etc. Those RTAS calls are all documented in the EEH section of PAPR. I guess you're saying it's orthogonal to the kernel handling an EEH and doing the recovery process etc, which I can kind of see. > Wedging the two together doesn't make any real sense IMO since this > should be usable even with !CONFIG_EEH. You can't turn CONFIG_EEH off for pseries or powernv. And if you could this patch wouldn't compile because it uses EEH constants that are behind #ifdef CONFIG_EEH. If you could turn CONFIG_EEH off it would presumably be because you were on a platform that didn't support EEH, in which case you wouldn't need this code. So IMO this is EEH code, and should be with the other EEH code and should be behind CONFIG_EEH. >> That would isolate the code in the right place, and allow you to use the >> existing ibm_get_config_addr_info. >> >> You probably can't use pseries_eeh_get_pe_addr(), because you won't have >> a "pe" structure yet. >> >> Instead you should add a helper that does the core of that logic but >> accepts config_addr/buid as parameters, and then have your code and >> pseries_eeh_get_pe_addr() call that. > > I have a patch in my next pile of eeh reworks which kills off > pseries_eeh_get_pe_addr() entirely. It's used to implement > eeh_ops->get_pe_addr for pseries, but the only caller of > ->get_pe_addr() is in pseries EEH code and the powernv version is a > stub so I was going to drop it from EEH ops and fold it into the > caller. We could do that in this patch, but it's just going to create > a merge conflict for you to deal with. Up to you. That sounds like a good cleanup. I'm not concerned about conflicts within arch/powerpc, I can fix them up. >> > + list_for_each_entry(phb, &hose_list, list_node) { >> > + config_addr = pseries_get_pdn_addr(phb); >> > + if (config_addr == -1) >> > + continue; >> > + >> > + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, >> > + config_addr, BUID_HI(phb->buid), >> > + BUID_LO(phb->buid), EEH_RESET_FUNDAMENTAL); >> > + >> > + /* If fundamental-reset not supported, try hot-reset */ >> > + if (ret == -8) >> >> Where does -8 come from? > > There's a comment right there. Yeah I guess. I was expecting it would map to some RTAS_ERROR_FOO value, but it's just literally -8 in PAPR. As long as there's only a single usage then I don't mind it. > It could be better explained I suppose, but you need to read > PAPR/LoPAPR to make sense of anything RTAS so what's it really buying > you? Making the code easier for new folks to understand? cheers
On Wed, Jun 17, 2020 at 4:29 PM Michael Ellerman <mpe@ellerman.id.au> wrote: > > "Oliver O'Halloran" <oohall@gmail.com> writes: > > On Tue, Jun 16, 2020 at 9:55 PM Michael Ellerman <mpe@ellerman.id.au> wrote: > >> wenxiong@linux.vnet.ibm.com writes: > >> > From: Wen Xiong <wenxiong@linux.vnet.ibm.com> > >> > > >> > Several device drivers hit EEH(Extended Error handling) when triggering > >> > kdump on Pseries PowerVM. This patch implemented a reset of the PHBs > >> > in pci general code when triggering kdump. > >> > >> Actually it's in pseries specific PCI code, and the reset is done in the > >> 2nd kernel as it boots, not when triggering the kdump. > >> > >> You're doing it as a: > >> > >> machine_postcore_initcall(pseries, pseries_phb_reset); > >> > >> But we do the EEH initialisation in: > >> > >> core_initcall_sync(eeh_init); > >> > >> Which happens first. > >> > >> So it seems to me that this should be called from pseries_eeh_init(). > > > > This happens to use some of the same RTAS calls as EEH, but it's > > entirely orthogonal to it. > > I don't agree. I mean it's literally calling EEH_RESET_FUNDAMENTAL etc. > Those RTAS calls are all documented in the EEH section of PAPR. > > I guess you're saying it's orthogonal to the kernel handling an EEH and > doing the recovery process etc, which I can kind of see. > > > Wedging the two together doesn't make any real sense IMO since this > > should be usable even with !CONFIG_EEH. > > You can't turn CONFIG_EEH off for pseries or powernv. Not yet :) > And if you could this patch wouldn't compile because it uses EEH > constants that are behind #ifdef CONFIG_EEH. That's fixable. > If you could turn CONFIG_EEH off it would presumably be because you were > on a platform that didn't support EEH, in which case you wouldn't need > this code. I think there's an argument to be made for disabling EEH in some situations. A lot of drivers do a pretty poor job of recovering in the first place so it's conceivable that someone might want to disable it in say, a kdump kernel. That said, the real reason is mostly for the sake of code organisation. EEH is an optional platform feature but you wouldn't know it looking at the implementation and I'd like to stop it bleeding into odd places. Making it buildable without !CONFIG_EEH would probably help. > So IMO this is EEH code, and should be with the other EEH code and > should be behind CONFIG_EEH. *shrug* I wanted it to follow the model of the powernv implementation of the same feature which is done immediately after initialising the pci_controller and independent of all of the EEH setup. Although, looking at it again I see it calls pnv_eeh_phb_reset() which is in eeh_powernv.c so I guess that's pretty similar to what you're suggesting. > That sounds like a good cleanup. I'm not concerned about conflicts > within arch/powerpc, I can fix them up. > > >> > + list_for_each_entry(phb, &hose_list, list_node) { > >> > + config_addr = pseries_get_pdn_addr(phb); > >> > + if (config_addr == -1) > >> > + continue; > >> > + > >> > + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, > >> > + config_addr, BUID_HI(phb->buid), > >> > + BUID_LO(phb->buid), EEH_RESET_FUNDAMENTAL); > >> > + > >> > + /* If fundamental-reset not supported, try hot-reset */ > >> > + if (ret == -8) > >> > >> Where does -8 come from? > > > > There's a comment right there. > > Yeah I guess. I was expecting it would map to some RTAS_ERROR_FOO value, > but it's just literally -8 in PAPR. Yeah, as far as I can tell the meaning of the return codes are specific to each RTAS call, it's a bit bad.
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 911534b89c85..cb7e4276cf04 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -11,6 +11,8 @@ #include <linux/kernel.h> #include <linux/pci.h> #include <linux/string.h> +#include <linux/crash_dump.h> +#include <linux/delay.h> #include <asm/eeh.h> #include <asm/pci-bridge.h> @@ -354,3 +356,153 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge) return 0; } + +/** + * pseries_get_pdn_addr - Retrieve PHB address + * @pe: EEH PE + * + * Retrieve the assocated PHB address. Actually, there're 2 RTAS + * function calls dedicated for the purpose. We need implement + * it through the new function and then the old one. Besides, + * you should make sure the config address is figured out from + * FDT node before calling the function. + * + */ +static int pseries_get_pdn_addr(struct pci_controller *phb) +{ + int ret = -1; + int rets[3]; + int ibm_get_config_addr_info; + int ibm_get_config_addr_info2; + int config_addr = 0; + struct pci_dn *root_pdn, *pdn; + + ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2"); + ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); + + root_pdn = PCI_DN(phb->dn); + pdn = list_first_entry(&root_pdn->child_list, struct pci_dn, list); + config_addr = (pdn->busno << 16) | (pdn->devfn << 8); + + if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { + /* + * First of all, we need to make sure there has one PE + * associated with the device. If option is 1, it + * queries if config address is supported in a PE or not. + * If option is 0, it returns PE config address or config + * address for the PE primary bus. + */ + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, + config_addr, BUID_HI(pdn->phb->buid), + BUID_LO(pdn->phb->buid), 1); + if (ret || (rets[0] == 0)) { + pr_warn("%s: Failed to get address for PHB#%x-PE# option=%d config_addr=%x\n", + __func__, pdn->phb->global_number, 1, rets[0]); + return -1; + } + + /* Retrieve the associated PE config address */ + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, + config_addr, BUID_HI(pdn->phb->buid), + BUID_LO(pdn->phb->buid), 0); + if (ret) { + pr_warn("%s: Failed to get address for PHB#%x-PE# option=%d config_addr=%x\n", + __func__, pdn->phb->global_number, 0, rets[0]); + return -1; + } + return rets[0]; + } + + if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { + ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets, + config_addr, BUID_HI(pdn->phb->buid), + BUID_LO(pdn->phb->buid), 0); + if (ret || rets[0]) { + pr_warn("%s: Failed to get address for PHB#%x-PE# config_addr=%x\n", + __func__, pdn->phb->global_number, rets[0]); + return -1; + } + return rets[0]; + } + + return ret; +} + +static int __init pseries_phb_reset(void) +{ + struct pci_controller *phb; + int config_addr; + int ibm_set_slot_reset; + int ibm_configure_pe; + int ret; + + if (is_kdump_kernel() || reset_devices) { + pr_info("Issue PHB reset ...\n"); + ibm_set_slot_reset = rtas_token("ibm,set-slot-reset"); + ibm_configure_pe = rtas_token("ibm,configure-pe"); + + if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE || + ibm_configure_pe == RTAS_UNKNOWN_SERVICE) { + pr_info("%s: EEH functionality not supported\n", + __func__); + } + + list_for_each_entry(phb, &hose_list, list_node) { + config_addr = pseries_get_pdn_addr(phb); + if (config_addr == -1) + continue; + + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), EEH_RESET_FUNDAMENTAL); + + /* If fundamental-reset not supported, try hot-reset */ + if (ret == -8) + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), EEH_RESET_HOT); + + if (ret) { + pr_err("%s: PHB#%x-PE# failed with rtas_call activate reset=%d\n", + __func__, phb->global_number, ret); + continue; + } + } + msleep(EEH_PE_RST_SETTLE_TIME); + + list_for_each_entry(phb, &hose_list, list_node) { + config_addr = pseries_get_pdn_addr(phb); + if (config_addr == -1) + continue; + + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), EEH_RESET_DEACTIVATE); + if (ret) { + pr_err("%s: PHB#%x-PE# failed with rtas_call deactive reset=%d\n", + __func__, phb->global_number, ret); + continue; + } + } + msleep(EEH_PE_RST_SETTLE_TIME); + + list_for_each_entry(phb, &hose_list, list_node) { + config_addr = pseries_get_pdn_addr(phb); + if (config_addr == -1) + continue; + + ret = rtas_call(ibm_configure_pe, 3, 1, NULL, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid)); + if (ret) { + pr_err("%s: PHB#%x-PE# failed with rtas_call configure_pe =%d\n", + __func__, phb->global_number, ret); + continue; + } + } + } + + return 0; +} +machine_postcore_initcall(pseries, pseries_phb_reset); +