diff mbox series

powerpc/pci: [PATCH 1/1 V3] PCIE PHB reset

Message ID 1590499319-6472-1-git-send-email-wenxiong@linux.vnet.ibm.com (mailing list archive)
State Superseded
Headers show
Series powerpc/pci: [PATCH 1/1 V3] PCIE PHB reset | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (fff0b48b2415d08f9bc09e9e596b916b3817862b)
snowpatch_ozlabs/build-ppc64le success Build succeeded
snowpatch_ozlabs/build-ppc64be success Build succeeded
snowpatch_ozlabs/build-ppc64e success Build succeeded
snowpatch_ozlabs/build-pmac32 success Build succeeded
snowpatch_ozlabs/checkpatch warning total: 0 errors, 0 warnings, 12 checks, 161 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

wenxiong May 26, 2020, 1:21 p.m. UTC
From: Wen Xiong <wenxiong@linux.vnet.ibm.com>

Several device drivers hit EEH(Extended Error handling) when triggering
kdump on Pseries PowerVM. This patch implemented a reset of the PHBs
in pci general code when triggering kdump. PHB reset stop all PCI
transactions from normal kernel. We have tested the patch in several
enviroments:
- direct slot adapters
- adapters under the switch
- a VF adapter in PowerVM
- a VF adapter/adapter in KVM guest.

Signed-off-by: Wen Xiong <wenxiong@linux.vnet.ibm.com>

---
 arch/powerpc/platforms/pseries/pci.c | 152 +++++++++++++++++++++++++++
 1 file changed, 152 insertions(+)

Comments

Oliver O'Halloran May 29, 2020, 2:56 a.m. UTC | #1
On Wed, May 27, 2020 at 12:48 AM <wenxiong@linux.vnet.ibm.com> wrote:
>
> From: Wen Xiong <wenxiong@linux.vnet.ibm.com>
>
> Several device drivers hit EEH(Extended Error handling) when triggering
> kdump on Pseries PowerVM. This patch implemented a reset of the PHBs
> in pci general code when triggering kdump. PHB reset stop all PCI
> transactions from normal kernel. We have tested the patch in several
> enviroments:
> - direct slot adapters
> - adapters under the switch
> - a VF adapter in PowerVM
> - a VF adapter/adapter in KVM guest.
>
> Signed-off-by: Wen Xiong <wenxiong@linux.vnet.ibm.com>

Can you include a patch changelog in future. It's helpful to see what
we need to look at specificly when you post a new revision of a patch.
Looks good to me otherwise.

Reviewed-by: Oliver O'Halloran <oohall@gmail.com>
Sam Bobroff May 29, 2020, 3:17 a.m. UTC | #2
On Tue, May 26, 2020 at 08:21:59AM -0500, wenxiong@linux.vnet.ibm.com wrote:
> From: Wen Xiong <wenxiong@linux.vnet.ibm.com>
> 
> Several device drivers hit EEH(Extended Error handling) when triggering
> kdump on Pseries PowerVM. This patch implemented a reset of the PHBs
> in pci general code when triggering kdump. PHB reset stop all PCI
> transactions from normal kernel. We have tested the patch in several
> enviroments:
> - direct slot adapters
> - adapters under the switch
> - a VF adapter in PowerVM
> - a VF adapter/adapter in KVM guest.
> 
> Signed-off-by: Wen Xiong <wenxiong@linux.vnet.ibm.com>

Looks good to me.
Reviewed-by: Sam Bobroff <sbobroff@linux.ibm.com>

(It would be easier to review if you included a patchset change log and
CC'd people who reviewed earlier versions.)

Cheers,
Sam.

> 
> ---
>  arch/powerpc/platforms/pseries/pci.c | 152 +++++++++++++++++++++++++++
>  1 file changed, 152 insertions(+)
> 
> diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
> index 911534b89c85..cb7e4276cf04 100644
> --- a/arch/powerpc/platforms/pseries/pci.c
> +++ b/arch/powerpc/platforms/pseries/pci.c
> @@ -11,6 +11,8 @@
>  #include <linux/kernel.h>
>  #include <linux/pci.h>
>  #include <linux/string.h>
> +#include <linux/crash_dump.h>
> +#include <linux/delay.h>
>  
>  #include <asm/eeh.h>
>  #include <asm/pci-bridge.h>
> @@ -354,3 +356,153 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
>  
>  	return 0;
>  }
> +
> +/**
> + * pseries_get_pdn_addr - Retrieve PHB address
> + * @pe: EEH PE
> + *
> + * Retrieve the assocated PHB address. Actually, there're 2 RTAS
> + * function calls dedicated for the purpose. We need implement
> + * it through the new function and then the old one. Besides,
> + * you should make sure the config address is figured out from
> + * FDT node before calling the function.
> + *
> + */
> +static int pseries_get_pdn_addr(struct pci_controller *phb)
> +{
> +	int ret = -1;
> +	int rets[3];
> +	int ibm_get_config_addr_info;
> +	int ibm_get_config_addr_info2;
> +	int config_addr = 0;
> +	struct pci_dn *root_pdn, *pdn;
> +
> +	ibm_get_config_addr_info2   = rtas_token("ibm,get-config-addr-info2");
> +	ibm_get_config_addr_info    = rtas_token("ibm,get-config-addr-info");
> +
> +	root_pdn = PCI_DN(phb->dn);
> +	pdn = list_first_entry(&root_pdn->child_list, struct pci_dn, list);
> +	config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
> +
> +	if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
> +		/*
> +		 * First of all, we need to make sure there has one PE
> +		 * associated with the device. If option is 1, it
> +		 * queries if config address is supported in a PE or not.
> +		 * If option is 0, it returns PE config address or config
> +		 * address for the PE primary bus.
> +		 */
> +		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
> +			config_addr, BUID_HI(pdn->phb->buid),
> +			BUID_LO(pdn->phb->buid), 1);
> +		if (ret || (rets[0] == 0)) {
> +			pr_warn("%s: Failed to get address for PHB#%x-PE# option=%d config_addr=%x\n",
> +				__func__, pdn->phb->global_number, 1, rets[0]);
> +			return -1;
> +		}
> +
> +		/* Retrieve the associated PE config address */
> +		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
> +			config_addr, BUID_HI(pdn->phb->buid),
> +			BUID_LO(pdn->phb->buid), 0);
> +		if (ret) {
> +			pr_warn("%s: Failed to get address for PHB#%x-PE# option=%d config_addr=%x\n",
> +				__func__, pdn->phb->global_number, 0, rets[0]);
> +			return -1;
> +		}
> +		return rets[0];
> +	}
> +
> +	if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
> +		ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
> +			config_addr, BUID_HI(pdn->phb->buid),
> +			BUID_LO(pdn->phb->buid), 0);
> +		if (ret || rets[0]) {
> +			pr_warn("%s: Failed to get address for PHB#%x-PE# config_addr=%x\n",
> +				__func__, pdn->phb->global_number, rets[0]);
> +			return -1;
> +		}
> +		return rets[0];
> +	}
> +
> +	return ret;
> +}
> +
> +static int __init pseries_phb_reset(void)
> +{
> +	struct pci_controller *phb;
> +	int config_addr;
> +	int ibm_set_slot_reset;
> +	int ibm_configure_pe;
> +	int ret;
> +
> +	if (is_kdump_kernel() || reset_devices) {
> +		pr_info("Issue PHB reset ...\n");
> +		ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
> +		ibm_configure_pe = rtas_token("ibm,configure-pe");
> +
> +		if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE ||
> +				ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
> +			pr_info("%s: EEH functionality not supported\n",
> +				__func__);
> +		}
> +
> +		list_for_each_entry(phb, &hose_list, list_node) {
> +			config_addr = pseries_get_pdn_addr(phb);
> +			if (config_addr == -1)
> +				continue;
> +
> +			ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
> +				config_addr, BUID_HI(phb->buid),
> +				BUID_LO(phb->buid), EEH_RESET_FUNDAMENTAL);
> +
> +			/* If fundamental-reset not supported, try hot-reset */
> +			if (ret == -8)
> +				ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
> +					config_addr, BUID_HI(phb->buid),
> +					BUID_LO(phb->buid), EEH_RESET_HOT);
> +
> +			if (ret) {
> +				pr_err("%s: PHB#%x-PE# failed with rtas_call activate reset=%d\n",
> +					__func__, phb->global_number, ret);
> +				continue;
> +			}
> +		}
> +		msleep(EEH_PE_RST_SETTLE_TIME);
> +
> +		list_for_each_entry(phb, &hose_list, list_node) {
> +			config_addr = pseries_get_pdn_addr(phb);
> +			if (config_addr == -1)
> +				continue;
> +
> +			ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
> +				config_addr, BUID_HI(phb->buid),
> +				BUID_LO(phb->buid), EEH_RESET_DEACTIVATE);
> +			if (ret) {
> +				pr_err("%s: PHB#%x-PE# failed with rtas_call deactive reset=%d\n",
> +					__func__, phb->global_number, ret);
> +				continue;
> +			}
> +		}
> +		msleep(EEH_PE_RST_SETTLE_TIME);
> +
> +		list_for_each_entry(phb, &hose_list, list_node) {
> +			config_addr = pseries_get_pdn_addr(phb);
> +			if (config_addr == -1)
> +				continue;
> +
> +			ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
> +				config_addr, BUID_HI(phb->buid),
> +				BUID_LO(phb->buid));
> +			if (ret) {
> +				pr_err("%s: PHB#%x-PE# failed with rtas_call configure_pe =%d\n",
> +					__func__, phb->global_number, ret);
> +				continue;
> +			}
> +		}
> +	}
> +
> +	return 0;
> +}
> +machine_postcore_initcall(pseries, pseries_phb_reset);
> +
> -- 
> 2.18.1
>
Michael Ellerman June 16, 2020, 11:56 a.m. UTC | #3
wenxiong@linux.vnet.ibm.com writes:
> From: Wen Xiong <wenxiong@linux.vnet.ibm.com>
>
> Several device drivers hit EEH(Extended Error handling) when triggering
> kdump on Pseries PowerVM. This patch implemented a reset of the PHBs
> in pci general code when triggering kdump.

Actually it's in pseries specific PCI code, and the reset is done in the
2nd kernel as it boots, not when triggering the kdump.

You're doing it as a:

  machine_postcore_initcall(pseries, pseries_phb_reset);

But we do the EEH initialisation in:

  core_initcall_sync(eeh_init);

Which happens first.

So it seems to me that this should be called from pseries_eeh_init().

That would isolate the code in the right place, and allow you to use the
existing ibm_get_config_addr_info.

You probably can't use pseries_eeh_get_pe_addr(), because you won't have
a "pe" structure yet.

Instead you should add a helper that does the core of that logic but
accepts config_addr/buid as parameters, and then have your code and
pseries_eeh_get_pe_addr() call that.

> PHB reset stop all PCI
> transactions from normal kernel. We have tested the patch in several
> enviroments:
> - direct slot adapters
> - adapters under the switch
> - a VF adapter in PowerVM
> - a VF adapter/adapter in KVM guest.
>
> Signed-off-by: Wen Xiong <wenxiong@linux.vnet.ibm.com>
>
> ---
>  arch/powerpc/platforms/pseries/pci.c | 152 +++++++++++++++++++++++++++
>  1 file changed, 152 insertions(+)
>
> diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
> index 911534b89c85..cb7e4276cf04 100644
> --- a/arch/powerpc/platforms/pseries/pci.c
> +++ b/arch/powerpc/platforms/pseries/pci.c
> @@ -11,6 +11,8 @@
>  #include <linux/kernel.h>
>  #include <linux/pci.h>
>  #include <linux/string.h>
> +#include <linux/crash_dump.h>
> +#include <linux/delay.h>
>  
>  #include <asm/eeh.h>
>  #include <asm/pci-bridge.h>
> @@ -354,3 +356,153 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
>  
>  	return 0;
>  }
> +
> +/**
> + * pseries_get_pdn_addr - Retrieve PHB address
> + * @pe: EEH PE

There is no "pe" parameter.

Oh but there is in pseries_eeh_get_pe_addr() which this is an almost
verbatim copy of.

> + *
> + * Retrieve the assocated PHB address. Actually, there're 2 RTAS
> + * function calls dedicated for the purpose. We need implement
> + * it through the new function and then the old one. Besides,
> + * you should make sure the config address is figured out from
> + * FDT node before calling the function.
> + *
> + */
> +static int pseries_get_pdn_addr(struct pci_controller *phb)
> +{
> +	int ret = -1;
> +	int rets[3];
> +	int ibm_get_config_addr_info;
> +	int ibm_get_config_addr_info2;
> +	int config_addr = 0;
> +	struct pci_dn *root_pdn, *pdn;
> +
> +	ibm_get_config_addr_info2   = rtas_token("ibm,get-config-addr-info2");
> +	ibm_get_config_addr_info    = rtas_token("ibm,get-config-addr-info");
> +
> +	root_pdn = PCI_DN(phb->dn);
> +	pdn = list_first_entry(&root_pdn->child_list, struct pci_dn, list);
> +	config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
> +
> +	if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
> +		/*
> +		 * First of all, we need to make sure there has one PE
> +		 * associated with the device. If option is 1, it
> +		 * queries if config address is supported in a PE or not.
> +		 * If option is 0, it returns PE config address or config
> +		 * address for the PE primary bus.
> +		 */
> +		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
> +			config_addr, BUID_HI(pdn->phb->buid),
> +			BUID_LO(pdn->phb->buid), 1);
> +		if (ret || (rets[0] == 0)) {
> +			pr_warn("%s: Failed to get address for PHB#%x-PE# option=%d config_addr=%x\n",
> +				__func__, pdn->phb->global_number, 1, rets[0]);

Here you've hacked the existing pr_warn() to drop the PE number but left
"-PE#" in the format string.

See the original:
			pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
				__func__, pe->phb->global_number, pe->config_addr);


> +			return -1;
> +		}
> +
> +		/* Retrieve the associated PE config address */
> +		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
> +			config_addr, BUID_HI(pdn->phb->buid),
> +			BUID_LO(pdn->phb->buid), 0);
> +		if (ret) {
> +			pr_warn("%s: Failed to get address for PHB#%x-PE# option=%d config_addr=%x\n",
                                                                         ^
                                                                         and here

> +				__func__, pdn->phb->global_number, 0, rets[0]);
> +			return -1;
> +		}
> +		return rets[0];
> +	}
> +
> +	if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
> +		ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
> +			config_addr, BUID_HI(pdn->phb->buid),
> +			BUID_LO(pdn->phb->buid), 0);
> +		if (ret || rets[0]) {
> +			pr_warn("%s: Failed to get address for PHB#%x-PE# config_addr=%x\n",
                                                                         ^
                                                                         and here

> +				__func__, pdn->phb->global_number, rets[0]);
> +			return -1;
> +		}
> +		return rets[0];
> +	}
> +
> +	return ret;
> +}
> +
> +static int __init pseries_phb_reset(void)
> +{
> +	struct pci_controller *phb;
> +	int config_addr;
> +	int ibm_set_slot_reset;
> +	int ibm_configure_pe;
> +	int ret;
> +
> +	if (is_kdump_kernel() || reset_devices) {

This should be inverted and turned into an early return, allowing the
entire rest of the function to be de-indented.

> +		pr_info("Issue PHB reset ...\n");
> +		ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
> +		ibm_configure_pe = rtas_token("ibm,configure-pe");
> +
> +		if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE ||
> +				ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
> +			pr_info("%s: EEH functionality not supported\n",
> +				__func__);

But then you just continue?

> +		}
> +
> +		list_for_each_entry(phb, &hose_list, list_node) {
> +			config_addr = pseries_get_pdn_addr(phb);
> +			if (config_addr == -1)
> +				continue;
> +
> +			ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
> +				config_addr, BUID_HI(phb->buid),
> +				BUID_LO(phb->buid), EEH_RESET_FUNDAMENTAL);
> +
> +			/* If fundamental-reset not supported, try hot-reset */
> +			if (ret == -8)

Where does -8 come from?

Oh I see, it's copied from pseries_eeh_reset().

> +				ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
> +					config_addr, BUID_HI(phb->buid),
> +					BUID_LO(phb->buid), EEH_RESET_HOT);
> +
> +			if (ret) {
> +				pr_err("%s: PHB#%x-PE# failed with rtas_call activate reset=%d\n",
                                                      ^
                                                      again missing PE number.

> +					__func__, phb->global_number, ret);
> +				continue;
> +			}
> +		}
> +		msleep(EEH_PE_RST_SETTLE_TIME);

So that loop is basically a copy of pseries_eeh_reset() but with the
sleep hoisted out of the loop.

I'd really prefer to see that refactored into a helper that takes the
config_addr and buid and doesn't do the sleep.

Then this loop could call that helper, and so could pseries_eeh_reset().

> +
> +		list_for_each_entry(phb, &hose_list, list_node) {
> +			config_addr = pseries_get_pdn_addr(phb);
> +			if (config_addr == -1)
> +				continue;
> +
> +			ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
> +				config_addr, BUID_HI(phb->buid),
> +				BUID_LO(phb->buid), EEH_RESET_DEACTIVATE);
> +			if (ret) {
> +				pr_err("%s: PHB#%x-PE# failed with rtas_call deactive reset=%d\n",
> +					__func__, phb->global_number, ret);
> +				continue;
> +			}
> +		}
> +		msleep(EEH_PE_RST_SETTLE_TIME);
> +
> +		list_for_each_entry(phb, &hose_list, list_node) {
> +			config_addr = pseries_get_pdn_addr(phb);
> +			if (config_addr == -1)
> +				continue;
> +
> +			ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
> +				config_addr, BUID_HI(phb->buid),
> +				BUID_LO(phb->buid));
> +			if (ret) {
> +				pr_err("%s: PHB#%x-PE# failed with rtas_call configure_pe =%d\n",
> +					__func__, phb->global_number, ret);
> +				continue;
> +			}
> +		}
> +	}
> +
> +	return 0;
> +}
> +machine_postcore_initcall(pseries, pseries_phb_reset);

cheers
Oliver O'Halloran June 17, 2020, 12:45 a.m. UTC | #4
On Tue, Jun 16, 2020 at 9:55 PM Michael Ellerman <mpe@ellerman.id.au> wrote:
>
> wenxiong@linux.vnet.ibm.com writes:
> > From: Wen Xiong <wenxiong@linux.vnet.ibm.com>
> >
> > Several device drivers hit EEH(Extended Error handling) when triggering
> > kdump on Pseries PowerVM. This patch implemented a reset of the PHBs
> > in pci general code when triggering kdump.
>
> Actually it's in pseries specific PCI code, and the reset is done in the
> 2nd kernel as it boots, not when triggering the kdump.
>
> You're doing it as a:
>
>   machine_postcore_initcall(pseries, pseries_phb_reset);
>
> But we do the EEH initialisation in:
>
>   core_initcall_sync(eeh_init);
>
> Which happens first.
>
> So it seems to me that this should be called from pseries_eeh_init().

This happens to use some of the same RTAS calls as EEH, but it's
entirely orthogonal to it. Wedging the two together doesn't make any
real sense IMO since this should be usable even with !CONFIG_EEH.

> That would isolate the code in the right place, and allow you to use the
> existing ibm_get_config_addr_info.
>
> You probably can't use pseries_eeh_get_pe_addr(), because you won't have
> a "pe" structure yet.
>
> Instead you should add a helper that does the core of that logic but
> accepts config_addr/buid as parameters, and then have your code and
> pseries_eeh_get_pe_addr() call that.

I have a patch in my next pile of eeh reworks which kills off
pseries_eeh_get_pe_addr() entirely. It's used to implement
eeh_ops->get_pe_addr for pseries, but the only caller of
->get_pe_addr() is in pseries EEH code and the powernv version is a
stub so I was going to drop it from EEH ops and fold it into the
caller. We could do that in this patch, but it's just going to create
a merge conflict for you to deal with. Up to you.

> *snip*
> > +             list_for_each_entry(phb, &hose_list, list_node) {
> > +                     config_addr = pseries_get_pdn_addr(phb);
> > +                     if (config_addr == -1)
> > +                             continue;
> > +
> > +                     ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
> > +                             config_addr, BUID_HI(phb->buid),
> > +                             BUID_LO(phb->buid), EEH_RESET_FUNDAMENTAL);
> > +
> > +                     /* If fundamental-reset not supported, try hot-reset */
> > +                     if (ret == -8)
>
> Where does -8 come from?

There's a comment right there.

It could be better explained I suppose, but you need to read
PAPR/LoPAPR to make sense of anything RTAS so what's it really buying
you?

> Oh I see, it's copied from pseries_eeh_reset().
> > +                             ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
> > +                                     config_addr, BUID_HI(phb->buid),
> > +                                     BUID_LO(phb->buid), EEH_RESET_HOT);
> > +
> > +                     if (ret) {
> > +                             pr_err("%s: PHB#%x-PE# failed with rtas_call activate reset=%d\n",
>                                                       ^
>                                                       again missing PE number.
>
> > +                                     __func__, phb->global_number, ret);
> > +                             continue;
> > +                     }
> > +             }
> > +             msleep(EEH_PE_RST_SETTLE_TIME);
>
> So that loop is basically a copy of pseries_eeh_reset() but with the
> sleep hoisted out of the loop.
>
> I'd really prefer to see that refactored into a helper that takes the
> config_addr and buid and doesn't do the sleep.
>
> Then this loop could call that helper, and so could pseries_eeh_reset().

That's better so long as we're not requiring CONFIG_EEH being
selected. pseries_eeh_reset() uses the rtas token variables
initialised in pseries_eeh_init() which are static to the file so
they'd need to be initialised somewhere common.
Michael Ellerman June 17, 2020, 6:30 a.m. UTC | #5
"Oliver O'Halloran" <oohall@gmail.com> writes:
> On Tue, Jun 16, 2020 at 9:55 PM Michael Ellerman <mpe@ellerman.id.au> wrote:
>> wenxiong@linux.vnet.ibm.com writes:
>> > From: Wen Xiong <wenxiong@linux.vnet.ibm.com>
>> >
>> > Several device drivers hit EEH(Extended Error handling) when triggering
>> > kdump on Pseries PowerVM. This patch implemented a reset of the PHBs
>> > in pci general code when triggering kdump.
>>
>> Actually it's in pseries specific PCI code, and the reset is done in the
>> 2nd kernel as it boots, not when triggering the kdump.
>>
>> You're doing it as a:
>>
>>   machine_postcore_initcall(pseries, pseries_phb_reset);
>>
>> But we do the EEH initialisation in:
>>
>>   core_initcall_sync(eeh_init);
>>
>> Which happens first.
>>
>> So it seems to me that this should be called from pseries_eeh_init().
>
> This happens to use some of the same RTAS calls as EEH, but it's
> entirely orthogonal to it.

I don't agree. I mean it's literally calling EEH_RESET_FUNDAMENTAL etc.
Those RTAS calls are all documented in the EEH section of PAPR.

I guess you're saying it's orthogonal to the kernel handling an EEH and
doing the recovery process etc, which I can kind of see.

> Wedging the two together doesn't make any real sense IMO since this
> should be usable even with !CONFIG_EEH.

You can't turn CONFIG_EEH off for pseries or powernv.

And if you could this patch wouldn't compile because it uses EEH
constants that are behind #ifdef CONFIG_EEH.

If you could turn CONFIG_EEH off it would presumably be because you were
on a platform that didn't support EEH, in which case you wouldn't need
this code.

So IMO this is EEH code, and should be with the other EEH code and
should be behind CONFIG_EEH.

>> That would isolate the code in the right place, and allow you to use the
>> existing ibm_get_config_addr_info.
>>
>> You probably can't use pseries_eeh_get_pe_addr(), because you won't have
>> a "pe" structure yet.
>>
>> Instead you should add a helper that does the core of that logic but
>> accepts config_addr/buid as parameters, and then have your code and
>> pseries_eeh_get_pe_addr() call that.
>
> I have a patch in my next pile of eeh reworks which kills off
> pseries_eeh_get_pe_addr() entirely. It's used to implement
> eeh_ops->get_pe_addr for pseries, but the only caller of
> ->get_pe_addr() is in pseries EEH code and the powernv version is a
> stub so I was going to drop it from EEH ops and fold it into the
> caller. We could do that in this patch, but it's just going to create
> a merge conflict for you to deal with. Up to you.

That sounds like a good cleanup. I'm not concerned about conflicts
within arch/powerpc, I can fix them up.

>> > +             list_for_each_entry(phb, &hose_list, list_node) {
>> > +                     config_addr = pseries_get_pdn_addr(phb);
>> > +                     if (config_addr == -1)
>> > +                             continue;
>> > +
>> > +                     ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
>> > +                             config_addr, BUID_HI(phb->buid),
>> > +                             BUID_LO(phb->buid), EEH_RESET_FUNDAMENTAL);
>> > +
>> > +                     /* If fundamental-reset not supported, try hot-reset */
>> > +                     if (ret == -8)
>>
>> Where does -8 come from?
>
> There's a comment right there.

Yeah I guess. I was expecting it would map to some RTAS_ERROR_FOO value,
but it's just literally -8 in PAPR.

As long as there's only a single usage then I don't mind it.

> It could be better explained I suppose, but you need to read
> PAPR/LoPAPR to make sense of anything RTAS so what's it really buying
> you?

Making the code easier for new folks to understand?

cheers
Oliver O'Halloran June 19, 2020, 6:09 a.m. UTC | #6
On Wed, Jun 17, 2020 at 4:29 PM Michael Ellerman <mpe@ellerman.id.au> wrote:
>
> "Oliver O'Halloran" <oohall@gmail.com> writes:
> > On Tue, Jun 16, 2020 at 9:55 PM Michael Ellerman <mpe@ellerman.id.au> wrote:
> >> wenxiong@linux.vnet.ibm.com writes:
> >> > From: Wen Xiong <wenxiong@linux.vnet.ibm.com>
> >> >
> >> > Several device drivers hit EEH(Extended Error handling) when triggering
> >> > kdump on Pseries PowerVM. This patch implemented a reset of the PHBs
> >> > in pci general code when triggering kdump.
> >>
> >> Actually it's in pseries specific PCI code, and the reset is done in the
> >> 2nd kernel as it boots, not when triggering the kdump.
> >>
> >> You're doing it as a:
> >>
> >>   machine_postcore_initcall(pseries, pseries_phb_reset);
> >>
> >> But we do the EEH initialisation in:
> >>
> >>   core_initcall_sync(eeh_init);
> >>
> >> Which happens first.
> >>
> >> So it seems to me that this should be called from pseries_eeh_init().
> >
> > This happens to use some of the same RTAS calls as EEH, but it's
> > entirely orthogonal to it.
>
> I don't agree. I mean it's literally calling EEH_RESET_FUNDAMENTAL etc.
> Those RTAS calls are all documented in the EEH section of PAPR.
>
> I guess you're saying it's orthogonal to the kernel handling an EEH and
> doing the recovery process etc, which I can kind of see.
>
> > Wedging the two together doesn't make any real sense IMO since this
> > should be usable even with !CONFIG_EEH.
>
> You can't turn CONFIG_EEH off for pseries or powernv.

Not yet :)

> And if you could this patch wouldn't compile because it uses EEH
> constants that are behind #ifdef CONFIG_EEH.

That's fixable.

> If you could turn CONFIG_EEH off it would presumably be because you were
> on a platform that didn't support EEH, in which case you wouldn't need
> this code.

I think there's an argument to be made for disabling EEH in some
situations. A lot of drivers do a pretty poor job of recovering in the
first place so it's conceivable that someone might want to disable it
in say, a kdump kernel. That said, the real reason is mostly for the
sake of code organisation. EEH is an optional platform feature but you
wouldn't know it looking at the implementation and I'd like to stop it
bleeding into odd places. Making it buildable without !CONFIG_EEH
would probably help.

> So IMO this is EEH code, and should be with the other EEH code and
> should be behind CONFIG_EEH.

*shrug*

I wanted it to follow the model of the powernv implementation of the
same feature which is done immediately after initialising the
pci_controller and independent of all of the EEH setup. Although,
looking at it again I see it calls pnv_eeh_phb_reset() which is in
eeh_powernv.c so I guess that's pretty similar to what you're
suggesting.

> That sounds like a good cleanup. I'm not concerned about conflicts
> within arch/powerpc, I can fix them up.
>
> >> > +             list_for_each_entry(phb, &hose_list, list_node) {
> >> > +                     config_addr = pseries_get_pdn_addr(phb);
> >> > +                     if (config_addr == -1)
> >> > +                             continue;
> >> > +
> >> > +                     ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
> >> > +                             config_addr, BUID_HI(phb->buid),
> >> > +                             BUID_LO(phb->buid), EEH_RESET_FUNDAMENTAL);
> >> > +
> >> > +                     /* If fundamental-reset not supported, try hot-reset */
> >> > +                     if (ret == -8)
> >>
> >> Where does -8 come from?
> >
> > There's a comment right there.
>
> Yeah I guess. I was expecting it would map to some RTAS_ERROR_FOO value,
> but it's just literally -8 in PAPR.

Yeah, as far as I can tell the meaning of the return codes are
specific to each RTAS call, it's a bit bad.
diff mbox series

Patch

diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 911534b89c85..cb7e4276cf04 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -11,6 +11,8 @@ 
 #include <linux/kernel.h>
 #include <linux/pci.h>
 #include <linux/string.h>
+#include <linux/crash_dump.h>
+#include <linux/delay.h>
 
 #include <asm/eeh.h>
 #include <asm/pci-bridge.h>
@@ -354,3 +356,153 @@  int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
 
 	return 0;
 }
+
+/**
+ * pseries_get_pdn_addr - Retrieve PHB address
+ * @pe: EEH PE
+ *
+ * Retrieve the assocated PHB address. Actually, there're 2 RTAS
+ * function calls dedicated for the purpose. We need implement
+ * it through the new function and then the old one. Besides,
+ * you should make sure the config address is figured out from
+ * FDT node before calling the function.
+ *
+ */
+static int pseries_get_pdn_addr(struct pci_controller *phb)
+{
+	int ret = -1;
+	int rets[3];
+	int ibm_get_config_addr_info;
+	int ibm_get_config_addr_info2;
+	int config_addr = 0;
+	struct pci_dn *root_pdn, *pdn;
+
+	ibm_get_config_addr_info2   = rtas_token("ibm,get-config-addr-info2");
+	ibm_get_config_addr_info    = rtas_token("ibm,get-config-addr-info");
+
+	root_pdn = PCI_DN(phb->dn);
+	pdn = list_first_entry(&root_pdn->child_list, struct pci_dn, list);
+	config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+
+	if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
+		/*
+		 * First of all, we need to make sure there has one PE
+		 * associated with the device. If option is 1, it
+		 * queries if config address is supported in a PE or not.
+		 * If option is 0, it returns PE config address or config
+		 * address for the PE primary bus.
+		 */
+		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+			config_addr, BUID_HI(pdn->phb->buid),
+			BUID_LO(pdn->phb->buid), 1);
+		if (ret || (rets[0] == 0)) {
+			pr_warn("%s: Failed to get address for PHB#%x-PE# option=%d config_addr=%x\n",
+				__func__, pdn->phb->global_number, 1, rets[0]);
+			return -1;
+		}
+
+		/* Retrieve the associated PE config address */
+		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+			config_addr, BUID_HI(pdn->phb->buid),
+			BUID_LO(pdn->phb->buid), 0);
+		if (ret) {
+			pr_warn("%s: Failed to get address for PHB#%x-PE# option=%d config_addr=%x\n",
+				__func__, pdn->phb->global_number, 0, rets[0]);
+			return -1;
+		}
+		return rets[0];
+	}
+
+	if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
+		ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
+			config_addr, BUID_HI(pdn->phb->buid),
+			BUID_LO(pdn->phb->buid), 0);
+		if (ret || rets[0]) {
+			pr_warn("%s: Failed to get address for PHB#%x-PE# config_addr=%x\n",
+				__func__, pdn->phb->global_number, rets[0]);
+			return -1;
+		}
+		return rets[0];
+	}
+
+	return ret;
+}
+
+static int __init pseries_phb_reset(void)
+{
+	struct pci_controller *phb;
+	int config_addr;
+	int ibm_set_slot_reset;
+	int ibm_configure_pe;
+	int ret;
+
+	if (is_kdump_kernel() || reset_devices) {
+		pr_info("Issue PHB reset ...\n");
+		ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
+		ibm_configure_pe = rtas_token("ibm,configure-pe");
+
+		if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE ||
+				ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
+			pr_info("%s: EEH functionality not supported\n",
+				__func__);
+		}
+
+		list_for_each_entry(phb, &hose_list, list_node) {
+			config_addr = pseries_get_pdn_addr(phb);
+			if (config_addr == -1)
+				continue;
+
+			ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+				config_addr, BUID_HI(phb->buid),
+				BUID_LO(phb->buid), EEH_RESET_FUNDAMENTAL);
+
+			/* If fundamental-reset not supported, try hot-reset */
+			if (ret == -8)
+				ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+					config_addr, BUID_HI(phb->buid),
+					BUID_LO(phb->buid), EEH_RESET_HOT);
+
+			if (ret) {
+				pr_err("%s: PHB#%x-PE# failed with rtas_call activate reset=%d\n",
+					__func__, phb->global_number, ret);
+				continue;
+			}
+		}
+		msleep(EEH_PE_RST_SETTLE_TIME);
+
+		list_for_each_entry(phb, &hose_list, list_node) {
+			config_addr = pseries_get_pdn_addr(phb);
+			if (config_addr == -1)
+				continue;
+
+			ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+				config_addr, BUID_HI(phb->buid),
+				BUID_LO(phb->buid), EEH_RESET_DEACTIVATE);
+			if (ret) {
+				pr_err("%s: PHB#%x-PE# failed with rtas_call deactive reset=%d\n",
+					__func__, phb->global_number, ret);
+				continue;
+			}
+		}
+		msleep(EEH_PE_RST_SETTLE_TIME);
+
+		list_for_each_entry(phb, &hose_list, list_node) {
+			config_addr = pseries_get_pdn_addr(phb);
+			if (config_addr == -1)
+				continue;
+
+			ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
+				config_addr, BUID_HI(phb->buid),
+				BUID_LO(phb->buid));
+			if (ret) {
+				pr_err("%s: PHB#%x-PE# failed with rtas_call configure_pe =%d\n",
+					__func__, phb->global_number, ret);
+				continue;
+			}
+		}
+	}
+
+	return 0;
+}
+machine_postcore_initcall(pseries, pseries_phb_reset);
+