diff mbox series

[14/14] powerpc/eeh: Move PE tree setup into the platform

Message ID 20200706013619.459420-15-oohall@gmail.com (mailing list archive)
State Superseded
Headers show
Series [01/14] powerpc/eeh: Remove eeh_dev_phb_init_dynamic() | expand

Commit Message

Oliver O'Halloran July 6, 2020, 1:36 a.m. UTC
The EEH core has a concept of a "PE tree" to support PowerNV. The PE tree
follows the PCI bus structures because a reset asserted on an upstream
bridge will be propagated to the downstream bridges. On pseries there's a
1-1 correspondence between what the guest sees are a PHB and a PE so the
"tree" is really just a single node.

Current the EEH core is reponsible for setting up this PE tree which it
does by traversing the pci_dn tree. The structure of the pci_dn tree
matches the bus tree on PowerNV which leads to the PE tree being "correct"
this setup method doesn't make a whole lot of sense and it's actively
confusing for the pseries case where it doesn't really do anything.

We want to remove the dependence on pci_dn anyway so this patch move
choosing where to insert a new PE into the platform code rather than
being part of the generic EEH code. For PowerNV this simplifies the
tree building logic and removes the use of pci_dn. For pseries we
keep the existing logic. I'm not really convinced it does anything
due to the 1-1 PE-to-PHB correspondence so every device under that
PHB should be in the same PE, but I'd rather not remove it entirely
until we've had a chance to look at it more deeply.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/include/asm/eeh.h               |  2 +-
 arch/powerpc/kernel/eeh_pe.c                 | 70 ++++++--------------
 arch/powerpc/platforms/powernv/eeh-powernv.c | 27 +++++++-
 arch/powerpc/platforms/pseries/eeh_pseries.c | 59 +++++++++++++++--
 4 files changed, 101 insertions(+), 57 deletions(-)

Comments

Alexey Kardashevskiy July 14, 2020, 1:50 a.m. UTC | #1
On 06/07/2020 11:36, Oliver O'Halloran wrote:
> The EEH core has a concept of a "PE tree" to support PowerNV. The PE tree
> follows the PCI bus structures because a reset asserted on an upstream
> bridge will be propagated to the downstream bridges. On pseries there's a
> 1-1 correspondence between what the guest sees are a PHB and a PE so the
> "tree" is really just a single node.
> 
> Current the EEH core is reponsible for setting up this PE tree which it
> does by traversing the pci_dn tree. The structure of the pci_dn tree
> matches the bus tree on PowerNV which leads to the PE tree being "correct"
> this setup method doesn't make a whole lot of sense and it's actively
> confusing for the pseries case where it doesn't really do anything.
> 
> We want to remove the dependence on pci_dn anyway so this patch move
> choosing where to insert a new PE into the platform code rather than
> being part of the generic EEH code. For PowerNV this simplifies the
> tree building logic and removes the use of pci_dn. For pseries we
> keep the existing logic. I'm not really convinced it does anything
> due to the 1-1 PE-to-PHB correspondence so every device under that
> PHB should be in the same PE, but I'd rather not remove it entirely
> until we've had a chance to look at it more deeply.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> ---
>  arch/powerpc/include/asm/eeh.h               |  2 +-
>  arch/powerpc/kernel/eeh_pe.c                 | 70 ++++++--------------
>  arch/powerpc/platforms/powernv/eeh-powernv.c | 27 +++++++-
>  arch/powerpc/platforms/pseries/eeh_pseries.c | 59 +++++++++++++++--
>  4 files changed, 101 insertions(+), 57 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
> index 8d34e5b790c2..1cab629dbc74 100644
> --- a/arch/powerpc/include/asm/eeh.h
> +++ b/arch/powerpc/include/asm/eeh.h
> @@ -283,7 +283,7 @@ struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
>  struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root);
>  struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
>  			  int pe_no, int config_addr);
> -int eeh_pe_tree_insert(struct eeh_dev *edev);
> +int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent);
>  int eeh_pe_tree_remove(struct eeh_dev *edev);
>  void eeh_pe_update_time_stamp(struct eeh_pe *pe);
>  void *eeh_pe_traverse(struct eeh_pe *root,
> diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
> index 898205829a8f..ea2f8b362d18 100644
> --- a/arch/powerpc/kernel/eeh_pe.c
> +++ b/arch/powerpc/kernel/eeh_pe.c
> @@ -318,53 +318,20 @@ struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
>  	return pe;
>  }
>  
> -/**
> - * eeh_pe_get_parent - Retrieve the parent PE
> - * @edev: EEH device
> - *
> - * The whole PEs existing in the system are organized as hierarchy
> - * tree. The function is used to retrieve the parent PE according
> - * to the parent EEH device.
> - */
> -static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
> -{
> -	struct eeh_dev *parent;
> -	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
> -
> -	/*
> -	 * It might have the case for the indirect parent
> -	 * EEH device already having associated PE, but
> -	 * the direct parent EEH device doesn't have yet.
> -	 */
> -	if (edev->physfn)
> -		pdn = pci_get_pdn(edev->physfn);
> -	else
> -		pdn = pdn ? pdn->parent : NULL;
> -	while (pdn) {
> -		/* We're poking out of PCI territory */
> -		parent = pdn_to_eeh_dev(pdn);
> -		if (!parent)
> -			return NULL;
> -
> -		if (parent->pe)
> -			return parent->pe;
> -
> -		pdn = pdn->parent;
> -	}
> -
> -	return NULL;
> -}
> -
>  /**
>   * eeh_pe_tree_insert - Add EEH device to parent PE
>   * @edev: EEH device
> + * @new_pe_parent: PE to create additional PEs under
>   *
> - * Add EEH device to the parent PE. If the parent PE already
> - * exists, the PE type will be changed to EEH_PE_BUS. Otherwise,
> - * we have to create new PE to hold the EEH device and the new
> - * PE will be linked to its parent PE as well.
> + * Add EEH device to the PE in edev->pe_config_addr. If a PE already
> + * exists with that address then @edev is added to that PE. Otherwise
> + * a new PE is created and inserted into the PE tree as a child of
> + * @new_pe_parent.
> + *
> + * If @new_pe_parent is NULL then the new PE will be inserted under
> + * directly under the the PHB.
>   */
> -int eeh_pe_tree_insert(struct eeh_dev *edev)
> +int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent)
>  {
>  	struct pci_controller *hose = edev->controller;
>  	struct eeh_pe *pe, *parent;


We can ditch this "parent" in that single place now and use "pe"
instead. And name the new parameter simply "parent". Dunno if it
improves things though.



> @@ -399,7 +366,7 @@ int eeh_pe_tree_insert(struct eeh_dev *edev)
>  			}
>  
>  			eeh_edev_dbg(edev,
> -				     "Added to device PE (parent: PE#%x)\n",
> +				     "Added to existing PE (parent: PE#%x)\n",
>  				     pe->parent->addr);
>  		} else {
>  			/* Mark the PE as type of PCI bus */
> @@ -431,10 +398,9 @@ int eeh_pe_tree_insert(struct eeh_dev *edev)
>  	 * to PHB directly. Otherwise, we have to associate the
>  	 * PE with its parent.
>  	 */
> -	parent = eeh_pe_get_parent(edev);
> -	if (!parent) {
> -		parent = eeh_phb_pe_get(hose);
> -		if (!parent) {
> +	if (!new_pe_parent) {
> +		new_pe_parent = eeh_phb_pe_get(hose);
> +		if (!new_pe_parent) {



afaict only pseries can realisticly pass new_pe_parent==NULL so this
chunk could go to pseries_eeh_pe_get_parent.


>  			pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
>  				__func__, hose->global_number);
>  			edev->pe = NULL;
> @@ -442,17 +408,19 @@ int eeh_pe_tree_insert(struct eeh_dev *edev)
>  			return -EEXIST;
>  		}
>  	}
> -	pe->parent = parent;
> +
> +	/* link new PE into the tree */
> +	pe->parent = new_pe_parent;
> +	list_add_tail(&pe->child, &new_pe_parent->child_list);
>  
>  	/*
>  	 * Put the newly created PE into the child list and
>  	 * link the EEH device accordingly.
>  	 */
> -	list_add_tail(&pe->child, &parent->child_list);
>  	list_add_tail(&edev->entry, &pe->edevs);
>  	edev->pe = pe;
> -	eeh_edev_dbg(edev, "Added to device PE (parent: PE#%x)\n",
> -		     pe->parent->addr);
> +	eeh_edev_dbg(edev, "Added to new (parent: PE#%x)\n",
> +		     new_pe_parent->addr);
>  
>  	return 0;
>  }
> diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
> index 8c9fca773692..9af8c3b98853 100644
> --- a/arch/powerpc/platforms/powernv/eeh-powernv.c
> +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
> @@ -338,6 +338,28 @@ static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap)
>  	return 0;
>  }
>  
> +static struct eeh_pe *pnv_eeh_get_upstream_pe(struct pci_dev *pdev)
> +{
> +	struct pci_controller *hose = pdev->bus->sysdata;
> +	struct pnv_phb *phb = hose->private_data;
> +	struct pci_dev *parent = pdev->bus->self;
> +
> +#ifdef CONFIG_PCI_IOV
> +	/* for VFs we use the PF's PE as the upstream PE */
> +	if (pdev->is_virtfn)
> +		parent = pdev->physfn;
> +#endif
> +
> +	/* otherwise use the PE of our parent bridge */
> +	if (parent) {
> +		struct pnv_ioda_pe *ioda_pe = pnv_ioda_get_pe(parent);
> +
> +		return eeh_pe_get(phb->hose, ioda_pe->pe_number, 0);
> +	}
> +
> +	return NULL;
> +}
> +
>  /**
>   * pnv_eeh_probe - Do probe on PCI device
>   * @pdev: pci_dev to probe
> @@ -350,6 +372,7 @@ static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev)
>  	struct pci_controller *hose = pdn->phb;
>  	struct pnv_phb *phb = hose->private_data;
>  	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
> +	struct eeh_pe *upstream_pe;
>  	uint32_t pcie_flags;
>  	int ret;
>  	int config_addr = (pdn->busno << 8) | (pdn->devfn);
> @@ -398,8 +421,10 @@ static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev)
>  
>  	edev->pe_config_addr = phb->ioda.pe_rmap[config_addr];
>  
> +	upstream_pe = pnv_eeh_get_upstream_pe(pdev);
> +
>  	/* Create PE */
> -	ret = eeh_pe_tree_insert(edev);
> +	ret = eeh_pe_tree_insert(edev, upstream_pe);
>  	if (ret) {
>  		eeh_edev_warn(edev, "Failed to add device to PE (code %d)\n", ret);
>  		return NULL;
> diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
> index 72556f4436a8..98f9a1b269be 100644
> --- a/arch/powerpc/platforms/pseries/eeh_pseries.c
> +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
> @@ -68,11 +68,16 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
>  	pseries_eeh_init_edev(pdn);
>  #ifdef CONFIG_PCI_IOV
>  	if (pdev->is_virtfn) {
> +		/*
> +		 * FIXME: This really should be handled by choosing the right
> +		 *        parent PE in in pseries_eeh_init_edev().
> +		 */
> +		struct eeh_pe *physfn_pe = pci_dev_to_eeh_dev(pdev->physfn)->pe;
>  		struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
>  
>  		edev->pe_config_addr =  (pdn->busno << 16) | (pdn->devfn << 8);
>  		eeh_pe_tree_remove(edev); /* Remove as it is adding to bus pe */
> -		eeh_pe_tree_insert(edev);   /* Add as VF PE type */
> +		eeh_pe_tree_insert(edev, physfn_pe);   /* Add as VF PE type */
>  	}
>  #endif
>  	eeh_probe_device(pdev);
> @@ -218,6 +223,43 @@ static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap)
>  	return 0;
>  }
>  
> +/**
> + * pseries_eeh_pe_get_parent - Retrieve the parent PE
> + * @edev: EEH device
> + *
> + * The whole PEs existing in the system are organized as hierarchy
> + * tree. The function is used to retrieve the parent PE according
> + * to the parent EEH device.
> + */
> +static struct eeh_pe *pseries_eeh_pe_get_parent(struct eeh_dev *edev)
> +{
> +	struct eeh_dev *parent;
> +	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
> +
> +	/*
> +	 * It might have the case for the indirect parent
> +	 * EEH device already having associated PE, but
> +	 * the direct parent EEH device doesn't have yet.
> +	 */
> +	if (edev->physfn)
> +		pdn = pci_get_pdn(edev->physfn);
> +	else
> +		pdn = pdn ? pdn->parent : NULL;
> +	while (pdn) {
> +		/* We're poking out of PCI territory */


We are traversing up PCI hierarchy here - pci_dn->parent, how is this
out of PCI territory? Or I understand "out of" incorrectly?


> +		parent = pdn_to_eeh_dev(pdn);
> +		if (!parent)
> +			return NULL;
> +
> +		if (parent->pe)
> +			return parent->pe;
> +
> +		pdn = pdn->parent;
> +	}
> +
> +	return NULL;
> +}
> +
>  /**
>   * pseries_eeh_init_edev - initialise the eeh_dev and eeh_pe for a pci_dn
>   *
> @@ -301,6 +343,8 @@ void pseries_eeh_init_edev(struct pci_dn *pdn)
>  	if (ret) {
>  		eeh_edev_dbg(edev, "EEH failed to enable on device (code %d)\n", ret);
>  	} else {
> +		struct eeh_pe *parent;
> +
>  		/* Retrieve PE address */
>  		edev->pe_config_addr = pseries_eeh_get_pe_addr(pdn);
>  		pe.addr = edev->pe_config_addr;
> @@ -313,16 +357,23 @@ void pseries_eeh_init_edev(struct pci_dn *pdn)
>  		if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT)
>  			enable = 1;
>  
> -		if (enable) {
> +		/* This device doesn't support EEH, but it may have an
> +		 * EEH parent, in which case we mark it as supported.
> +		 */
> +		parent = pseries_eeh_pe_get_parent(edev);
> +		if (parent && !enable)
> +			edev->pe_config_addr = parent->addr;


What if pseries_eeh_pe_get_parent() returned NULL - we won't write
edev->pe_config_addr so it remains 0 which is fine just by accident? :)

I'd make pseries_eeh_pe_get_parent() always return not NULL (a parent or
a hose) and simplify the block below.


I mean the change is alright but part of the excersise is making the
code more readable but there also can go forever :) so

Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>



> +
> +		if (enable || parent) {
>  			eeh_add_flag(EEH_ENABLED);
> -			eeh_pe_tree_insert(edev);
> +			eeh_pe_tree_insert(edev, parent);
>  		} else if (pdn->parent && pdn_to_eeh_dev(pdn->parent) &&
>  			   (pdn_to_eeh_dev(pdn->parent))->pe) {
>  			/* This device doesn't support EEH, but it may have an
>  			 * EEH parent, in which case we mark it as supported.
>  			 */
>  			edev->pe_config_addr = pdn_to_eeh_dev(pdn->parent)->pe_config_addr;
> -			eeh_pe_tree_insert(edev);
> +			eeh_pe_tree_insert(edev, parent);
>  		}
>  		eeh_edev_dbg(edev, "EEH is %s on device (code %d)\n",
>  			     (enable ? "enabled" : "unsupported"), ret);
>
Oliver O'Halloran July 14, 2020, 3:08 a.m. UTC | #2
On Tue, Jul 14, 2020 at 11:50 AM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>
> On 06/07/2020 11:36, Oliver O'Halloran wrote:
> >  /**
> >   * eeh_pe_tree_insert - Add EEH device to parent PE
> >   * @edev: EEH device
> > + * @new_pe_parent: PE to create additional PEs under
> >   *
> > - * Add EEH device to the parent PE. If the parent PE already
> > - * exists, the PE type will be changed to EEH_PE_BUS. Otherwise,
> > - * we have to create new PE to hold the EEH device and the new
> > - * PE will be linked to its parent PE as well.
> > + * Add EEH device to the PE in edev->pe_config_addr. If a PE already
> > + * exists with that address then @edev is added to that PE. Otherwise
> > + * a new PE is created and inserted into the PE tree as a child of
> > + * @new_pe_parent.
> > + *
> > + * If @new_pe_parent is NULL then the new PE will be inserted under
> > + * directly under the the PHB.
> >   */
> > -int eeh_pe_tree_insert(struct eeh_dev *edev)
> > +int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent)
> >  {
> >       struct pci_controller *hose = edev->controller;
> >       struct eeh_pe *pe, *parent;
>
>
> We can ditch this "parent" in that single place now and use "pe"
> instead. And name the new parameter simply "parent". Dunno if it
> improves things though.

I did this at some point and then decided not to. It added a bunch of
noise to the diff and calling the parameter "parent" ended up being
pretty unreadable. The parameter is "the parent of the PE that will be
created to contain edev", or "parent of the parent PE". It's pretty
unwieldy.

> > @@ -399,7 +366,7 @@ int eeh_pe_tree_insert(struct eeh_dev *edev)
> >                       }
> >
> >                       eeh_edev_dbg(edev,
> > -                                  "Added to device PE (parent: PE#%x)\n",
> > +                                  "Added to existing PE (parent: PE#%x)\n",
> >                                    pe->parent->addr);
> >               } else {
> >                       /* Mark the PE as type of PCI bus */
> > @@ -431,10 +398,9 @@ int eeh_pe_tree_insert(struct eeh_dev *edev)
> >        * to PHB directly. Otherwise, we have to associate the
> >        * PE with its parent.
> >        */
> > -     parent = eeh_pe_get_parent(edev);
> > -     if (!parent) {
> > -             parent = eeh_phb_pe_get(hose);
> > -             if (!parent) {
> > +     if (!new_pe_parent) {
> > +             new_pe_parent = eeh_phb_pe_get(hose);
> > +             if (!new_pe_parent) {
>
>
>
> afaict only pseries can realisticly pass new_pe_parent==NULL so this
> chunk could go to pseries_eeh_pe_get_parent.

pnv_eeh_get_upstream_pe() will never return the PHB PE so
new_pe_parent will be NULL for the first PE created under a PowerNV
PHB. I guess we could move the PHB PE handling into the platform too,
but I think that just results in having to special case PHB PEs in two
places rather than one.

> > +static struct eeh_pe *pseries_eeh_pe_get_parent(struct eeh_dev *edev)
> > +{
> > +     struct eeh_dev *parent;
> > +     struct pci_dn *pdn = eeh_dev_to_pdn(edev);
> > +
> > +     /*
> > +      * It might have the case for the indirect parent
> > +      * EEH device already having associated PE, but
> > +      * the direct parent EEH device doesn't have yet.
> > +      */
> > +     if (edev->physfn)
> > +             pdn = pci_get_pdn(edev->physfn);
> > +     else
> > +             pdn = pdn ? pdn->parent : NULL;
> > +     while (pdn) {
> > +             /* We're poking out of PCI territory */
>
>
> We are traversing up PCI hierarchy here - pci_dn->parent, how is this
> out of PCI territory? Or I understand "out of" incorrectly?
>
>
> > +             parent = pdn_to_eeh_dev(pdn);
> > +             if (!parent)
> > +                     return NULL;

If there's no eeh dev then the node we're looking at is a PHB rather
than an actual PCI device so it stops looking. I think. The comment
was copied over from the existing code and I haven't spent a whole lot
of time parsing it's meaning.



> > @@ -301,6 +343,8 @@ void pseries_eeh_init_edev(struct pci_dn *pdn)
> >       if (ret) {
> >               eeh_edev_dbg(edev, "EEH failed to enable on device (code %d)\n", ret);
> >       } else {
> > +             struct eeh_pe *parent;
> > +
> >               /* Retrieve PE address */
> >               edev->pe_config_addr = pseries_eeh_get_pe_addr(pdn);
> >               pe.addr = edev->pe_config_addr;
> > @@ -313,16 +357,23 @@ void pseries_eeh_init_edev(struct pci_dn *pdn)
> >               if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT)
> >                       enable = 1;
> >
> > -             if (enable) {
> > +             /* This device doesn't support EEH, but it may have an
> > +              * EEH parent, in which case we mark it as supported.
> > +              */
> > +             parent = pseries_eeh_pe_get_parent(edev);
> > +             if (parent && !enable)
> > +                     edev->pe_config_addr = parent->addr;
>
>
> What if pseries_eeh_pe_get_parent() returned NULL - we won't write
> edev->pe_config_addr so it remains 0 which is fine just by accident? :)

edev->pe_config_addr is set above when we call
pseries_eeh_get_pe_addr(). The check there is mainly to cover the case
where pseries_eeh_get_pe_addr() fails because the device is on a
subordinate bus rather than the root bus of the PE. PAPR says the
get-pe-addr-info RTAS call can fail in that situation and that you're
supposed to traverse up the DT to find the pe_config_addr, which is
what pe_get_parent() does. Yeah it's confusing, but that's what it
does today too.

> Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>
> > +
> > +             if (enable || parent) {
> >                       eeh_add_flag(EEH_ENABLED);
> > -                     eeh_pe_tree_insert(edev);
> > +                     eeh_pe_tree_insert(edev, parent);

> >               } else if (pdn->parent && pdn_to_eeh_dev(pdn->parent) &&
> >                          (pdn_to_eeh_dev(pdn->parent))->pe) {
> >                       /* This device doesn't support EEH, but it may have an
> >                        * EEH parent, in which case we mark it as supported.
> >                        */
> >                       edev->pe_config_addr = pdn_to_eeh_dev(pdn->parent)->pe_config_addr;
> > -                     eeh_pe_tree_insert(edev);
> > +                     eeh_pe_tree_insert(edev, parent);

I think I was supposed to delete this hunk and then forgot to since it
handles the same case mentioned above.

> >               }
> >               eeh_edev_dbg(edev, "EEH is %s on device (code %d)\n",
> >                            (enable ? "enabled" : "unsupported"), ret);
> >
>
> --
> Alexey
Alexey Kardashevskiy July 14, 2020, 9:10 a.m. UTC | #3
On 14/07/2020 13:08, Oliver O'Halloran wrote:
> On Tue, Jul 14, 2020 at 11:50 AM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>>
>> On 06/07/2020 11:36, Oliver O'Halloran wrote:
>>>  /**
>>>   * eeh_pe_tree_insert - Add EEH device to parent PE
>>>   * @edev: EEH device
>>> + * @new_pe_parent: PE to create additional PEs under
>>>   *
>>> - * Add EEH device to the parent PE. If the parent PE already
>>> - * exists, the PE type will be changed to EEH_PE_BUS. Otherwise,
>>> - * we have to create new PE to hold the EEH device and the new
>>> - * PE will be linked to its parent PE as well.
>>> + * Add EEH device to the PE in edev->pe_config_addr. If a PE already
>>> + * exists with that address then @edev is added to that PE. Otherwise
>>> + * a new PE is created and inserted into the PE tree as a child of
>>> + * @new_pe_parent.
>>> + *
>>> + * If @new_pe_parent is NULL then the new PE will be inserted under
>>> + * directly under the the PHB.
>>>   */
>>> -int eeh_pe_tree_insert(struct eeh_dev *edev)
>>> +int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent)
>>>  {
>>>       struct pci_controller *hose = edev->controller;
>>>       struct eeh_pe *pe, *parent;
>>
>>
>> We can ditch this "parent" in that single place now and use "pe"
>> instead. And name the new parameter simply "parent". Dunno if it
>> improves things though.
> 
> I did this at some point and then decided not to. It added a bunch of
> noise to the diff and calling the parameter "parent" ended up being
> pretty unreadable. The parameter is "the parent of the PE that will be
> created to contain edev", or "parent of the parent PE". It's pretty
> unwieldy.

Ok fine but we still do not need both pe and parent in that function
(may be one day...).


> 
>>> @@ -399,7 +366,7 @@ int eeh_pe_tree_insert(struct eeh_dev *edev)
>>>                       }
>>>
>>>                       eeh_edev_dbg(edev,
>>> -                                  "Added to device PE (parent: PE#%x)\n",
>>> +                                  "Added to existing PE (parent: PE#%x)\n",
>>>                                    pe->parent->addr);
>>>               } else {
>>>                       /* Mark the PE as type of PCI bus */
>>> @@ -431,10 +398,9 @@ int eeh_pe_tree_insert(struct eeh_dev *edev)
>>>        * to PHB directly. Otherwise, we have to associate the
>>>        * PE with its parent.
>>>        */
>>> -     parent = eeh_pe_get_parent(edev);
>>> -     if (!parent) {
>>> -             parent = eeh_phb_pe_get(hose);
>>> -             if (!parent) {
>>> +     if (!new_pe_parent) {
>>> +             new_pe_parent = eeh_phb_pe_get(hose);
>>> +             if (!new_pe_parent) {
>>
>>
>>
>> afaict only pseries can realisticly pass new_pe_parent==NULL so this
>> chunk could go to pseries_eeh_pe_get_parent.
> 
> pnv_eeh_get_upstream_pe() will never return the PHB PE so
> new_pe_parent will be NULL for the first PE created under a PowerNV
> PHB. I guess we could move the PHB PE handling into the platform too,
> but I think that just results in having to special case PHB PEs in two
> places rather than one.
> 
>>> +static struct eeh_pe *pseries_eeh_pe_get_parent(struct eeh_dev *edev)
>>> +{
>>> +     struct eeh_dev *parent;
>>> +     struct pci_dn *pdn = eeh_dev_to_pdn(edev);
>>> +
>>> +     /*
>>> +      * It might have the case for the indirect parent
>>> +      * EEH device already having associated PE, but
>>> +      * the direct parent EEH device doesn't have yet.
>>> +      */
>>> +     if (edev->physfn)
>>> +             pdn = pci_get_pdn(edev->physfn);
>>> +     else
>>> +             pdn = pdn ? pdn->parent : NULL;
>>> +     while (pdn) {
>>> +             /* We're poking out of PCI territory */
>>
>>
>> We are traversing up PCI hierarchy here - pci_dn->parent, how is this
>> out of PCI territory? Or I understand "out of" incorrectly?
>>
>>
>>> +             parent = pdn_to_eeh_dev(pdn);
>>> +             if (!parent)
>>> +                     return NULL;
> 
> If there's no eeh dev then the node we're looking at is a PHB rather
> than an actual PCI device so it stops looking. I think. The comment
> was copied over from the existing code and I haven't spent a whole lot
> of time parsing it's meaning.


I noticed cut-n-paste. May be just ditch it if nobody can parse it?

> 
> 
> 
>>> @@ -301,6 +343,8 @@ void pseries_eeh_init_edev(struct pci_dn *pdn)
>>>       if (ret) {
>>>               eeh_edev_dbg(edev, "EEH failed to enable on device (code %d)\n", ret);
>>>       } else {
>>> +             struct eeh_pe *parent;
>>> +
>>>               /* Retrieve PE address */
>>>               edev->pe_config_addr = pseries_eeh_get_pe_addr(pdn);
>>>               pe.addr = edev->pe_config_addr;
>>> @@ -313,16 +357,23 @@ void pseries_eeh_init_edev(struct pci_dn *pdn)
>>>               if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT)
>>>                       enable = 1;
>>>
>>> -             if (enable) {
>>> +             /* This device doesn't support EEH, but it may have an
>>> +              * EEH parent, in which case we mark it as supported.
>>> +              */
>>> +             parent = pseries_eeh_pe_get_parent(edev);
>>> +             if (parent && !enable)
>>> +                     edev->pe_config_addr = parent->addr;
>>
>>
>> What if pseries_eeh_pe_get_parent() returned NULL - we won't write
>> edev->pe_config_addr so it remains 0 which is fine just by accident? :)
> 
> edev->pe_config_addr is set above when we call
> pseries_eeh_get_pe_addr(). The check there is mainly to cover the case
> where pseries_eeh_get_pe_addr() fails because the device is on a
> subordinate bus rather than the root bus of the PE. PAPR says the
> get-pe-addr-info RTAS call can fail in that situation and that you're
> supposed to traverse up the DT to find the pe_config_addr, which is
> what pe_get_parent() does. Yeah it's confusing, but that's what it
> does today too.
> 
>> Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>
>>> +
>>> +             if (enable || parent) {
>>>                       eeh_add_flag(EEH_ENABLED);
>>> -                     eeh_pe_tree_insert(edev);
>>> +                     eeh_pe_tree_insert(edev, parent);
> 
>>>               } else if (pdn->parent && pdn_to_eeh_dev(pdn->parent) &&
>>>                          (pdn_to_eeh_dev(pdn->parent))->pe) {
>>>                       /* This device doesn't support EEH, but it may have an
>>>                        * EEH parent, in which case we mark it as supported.
>>>                        */
>>>                       edev->pe_config_addr = pdn_to_eeh_dev(pdn->parent)->pe_config_addr;
>>> -                     eeh_pe_tree_insert(edev);
>>> +                     eeh_pe_tree_insert(edev, parent);
> 
> I think I was supposed to delete this hunk and then forgot to since it
> handles the same case mentioned above.

A-ha!


> 
>>>               }
>>>               eeh_edev_dbg(edev, "EEH is %s on device (code %d)\n",
>>>                            (enable ? "enabled" : "unsupported"), ret);
>>>
>>
>> --
>> Alexey
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 8d34e5b790c2..1cab629dbc74 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -283,7 +283,7 @@  struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
 struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root);
 struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
 			  int pe_no, int config_addr);
-int eeh_pe_tree_insert(struct eeh_dev *edev);
+int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent);
 int eeh_pe_tree_remove(struct eeh_dev *edev);
 void eeh_pe_update_time_stamp(struct eeh_pe *pe);
 void *eeh_pe_traverse(struct eeh_pe *root,
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 898205829a8f..ea2f8b362d18 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -318,53 +318,20 @@  struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
 	return pe;
 }
 
-/**
- * eeh_pe_get_parent - Retrieve the parent PE
- * @edev: EEH device
- *
- * The whole PEs existing in the system are organized as hierarchy
- * tree. The function is used to retrieve the parent PE according
- * to the parent EEH device.
- */
-static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
-{
-	struct eeh_dev *parent;
-	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
-
-	/*
-	 * It might have the case for the indirect parent
-	 * EEH device already having associated PE, but
-	 * the direct parent EEH device doesn't have yet.
-	 */
-	if (edev->physfn)
-		pdn = pci_get_pdn(edev->physfn);
-	else
-		pdn = pdn ? pdn->parent : NULL;
-	while (pdn) {
-		/* We're poking out of PCI territory */
-		parent = pdn_to_eeh_dev(pdn);
-		if (!parent)
-			return NULL;
-
-		if (parent->pe)
-			return parent->pe;
-
-		pdn = pdn->parent;
-	}
-
-	return NULL;
-}
-
 /**
  * eeh_pe_tree_insert - Add EEH device to parent PE
  * @edev: EEH device
+ * @new_pe_parent: PE to create additional PEs under
  *
- * Add EEH device to the parent PE. If the parent PE already
- * exists, the PE type will be changed to EEH_PE_BUS. Otherwise,
- * we have to create new PE to hold the EEH device and the new
- * PE will be linked to its parent PE as well.
+ * Add EEH device to the PE in edev->pe_config_addr. If a PE already
+ * exists with that address then @edev is added to that PE. Otherwise
+ * a new PE is created and inserted into the PE tree as a child of
+ * @new_pe_parent.
+ *
+ * If @new_pe_parent is NULL then the new PE will be inserted under
+ * directly under the the PHB.
  */
-int eeh_pe_tree_insert(struct eeh_dev *edev)
+int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent)
 {
 	struct pci_controller *hose = edev->controller;
 	struct eeh_pe *pe, *parent;
@@ -399,7 +366,7 @@  int eeh_pe_tree_insert(struct eeh_dev *edev)
 			}
 
 			eeh_edev_dbg(edev,
-				     "Added to device PE (parent: PE#%x)\n",
+				     "Added to existing PE (parent: PE#%x)\n",
 				     pe->parent->addr);
 		} else {
 			/* Mark the PE as type of PCI bus */
@@ -431,10 +398,9 @@  int eeh_pe_tree_insert(struct eeh_dev *edev)
 	 * to PHB directly. Otherwise, we have to associate the
 	 * PE with its parent.
 	 */
-	parent = eeh_pe_get_parent(edev);
-	if (!parent) {
-		parent = eeh_phb_pe_get(hose);
-		if (!parent) {
+	if (!new_pe_parent) {
+		new_pe_parent = eeh_phb_pe_get(hose);
+		if (!new_pe_parent) {
 			pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
 				__func__, hose->global_number);
 			edev->pe = NULL;
@@ -442,17 +408,19 @@  int eeh_pe_tree_insert(struct eeh_dev *edev)
 			return -EEXIST;
 		}
 	}
-	pe->parent = parent;
+
+	/* link new PE into the tree */
+	pe->parent = new_pe_parent;
+	list_add_tail(&pe->child, &new_pe_parent->child_list);
 
 	/*
 	 * Put the newly created PE into the child list and
 	 * link the EEH device accordingly.
 	 */
-	list_add_tail(&pe->child, &parent->child_list);
 	list_add_tail(&edev->entry, &pe->edevs);
 	edev->pe = pe;
-	eeh_edev_dbg(edev, "Added to device PE (parent: PE#%x)\n",
-		     pe->parent->addr);
+	eeh_edev_dbg(edev, "Added to new (parent: PE#%x)\n",
+		     new_pe_parent->addr);
 
 	return 0;
 }
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 8c9fca773692..9af8c3b98853 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -338,6 +338,28 @@  static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap)
 	return 0;
 }
 
+static struct eeh_pe *pnv_eeh_get_upstream_pe(struct pci_dev *pdev)
+{
+	struct pci_controller *hose = pdev->bus->sysdata;
+	struct pnv_phb *phb = hose->private_data;
+	struct pci_dev *parent = pdev->bus->self;
+
+#ifdef CONFIG_PCI_IOV
+	/* for VFs we use the PF's PE as the upstream PE */
+	if (pdev->is_virtfn)
+		parent = pdev->physfn;
+#endif
+
+	/* otherwise use the PE of our parent bridge */
+	if (parent) {
+		struct pnv_ioda_pe *ioda_pe = pnv_ioda_get_pe(parent);
+
+		return eeh_pe_get(phb->hose, ioda_pe->pe_number, 0);
+	}
+
+	return NULL;
+}
+
 /**
  * pnv_eeh_probe - Do probe on PCI device
  * @pdev: pci_dev to probe
@@ -350,6 +372,7 @@  static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev)
 	struct pci_controller *hose = pdn->phb;
 	struct pnv_phb *phb = hose->private_data;
 	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+	struct eeh_pe *upstream_pe;
 	uint32_t pcie_flags;
 	int ret;
 	int config_addr = (pdn->busno << 8) | (pdn->devfn);
@@ -398,8 +421,10 @@  static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev)
 
 	edev->pe_config_addr = phb->ioda.pe_rmap[config_addr];
 
+	upstream_pe = pnv_eeh_get_upstream_pe(pdev);
+
 	/* Create PE */
-	ret = eeh_pe_tree_insert(edev);
+	ret = eeh_pe_tree_insert(edev, upstream_pe);
 	if (ret) {
 		eeh_edev_warn(edev, "Failed to add device to PE (code %d)\n", ret);
 		return NULL;
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 72556f4436a8..98f9a1b269be 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -68,11 +68,16 @@  void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
 	pseries_eeh_init_edev(pdn);
 #ifdef CONFIG_PCI_IOV
 	if (pdev->is_virtfn) {
+		/*
+		 * FIXME: This really should be handled by choosing the right
+		 *        parent PE in in pseries_eeh_init_edev().
+		 */
+		struct eeh_pe *physfn_pe = pci_dev_to_eeh_dev(pdev->physfn)->pe;
 		struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
 
 		edev->pe_config_addr =  (pdn->busno << 16) | (pdn->devfn << 8);
 		eeh_pe_tree_remove(edev); /* Remove as it is adding to bus pe */
-		eeh_pe_tree_insert(edev);   /* Add as VF PE type */
+		eeh_pe_tree_insert(edev, physfn_pe);   /* Add as VF PE type */
 	}
 #endif
 	eeh_probe_device(pdev);
@@ -218,6 +223,43 @@  static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap)
 	return 0;
 }
 
+/**
+ * pseries_eeh_pe_get_parent - Retrieve the parent PE
+ * @edev: EEH device
+ *
+ * The whole PEs existing in the system are organized as hierarchy
+ * tree. The function is used to retrieve the parent PE according
+ * to the parent EEH device.
+ */
+static struct eeh_pe *pseries_eeh_pe_get_parent(struct eeh_dev *edev)
+{
+	struct eeh_dev *parent;
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
+	/*
+	 * It might have the case for the indirect parent
+	 * EEH device already having associated PE, but
+	 * the direct parent EEH device doesn't have yet.
+	 */
+	if (edev->physfn)
+		pdn = pci_get_pdn(edev->physfn);
+	else
+		pdn = pdn ? pdn->parent : NULL;
+	while (pdn) {
+		/* We're poking out of PCI territory */
+		parent = pdn_to_eeh_dev(pdn);
+		if (!parent)
+			return NULL;
+
+		if (parent->pe)
+			return parent->pe;
+
+		pdn = pdn->parent;
+	}
+
+	return NULL;
+}
+
 /**
  * pseries_eeh_init_edev - initialise the eeh_dev and eeh_pe for a pci_dn
  *
@@ -301,6 +343,8 @@  void pseries_eeh_init_edev(struct pci_dn *pdn)
 	if (ret) {
 		eeh_edev_dbg(edev, "EEH failed to enable on device (code %d)\n", ret);
 	} else {
+		struct eeh_pe *parent;
+
 		/* Retrieve PE address */
 		edev->pe_config_addr = pseries_eeh_get_pe_addr(pdn);
 		pe.addr = edev->pe_config_addr;
@@ -313,16 +357,23 @@  void pseries_eeh_init_edev(struct pci_dn *pdn)
 		if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT)
 			enable = 1;
 
-		if (enable) {
+		/* This device doesn't support EEH, but it may have an
+		 * EEH parent, in which case we mark it as supported.
+		 */
+		parent = pseries_eeh_pe_get_parent(edev);
+		if (parent && !enable)
+			edev->pe_config_addr = parent->addr;
+
+		if (enable || parent) {
 			eeh_add_flag(EEH_ENABLED);
-			eeh_pe_tree_insert(edev);
+			eeh_pe_tree_insert(edev, parent);
 		} else if (pdn->parent && pdn_to_eeh_dev(pdn->parent) &&
 			   (pdn_to_eeh_dev(pdn->parent))->pe) {
 			/* This device doesn't support EEH, but it may have an
 			 * EEH parent, in which case we mark it as supported.
 			 */
 			edev->pe_config_addr = pdn_to_eeh_dev(pdn->parent)->pe_config_addr;
-			eeh_pe_tree_insert(edev);
+			eeh_pe_tree_insert(edev, parent);
 		}
 		eeh_edev_dbg(edev, "EEH is %s on device (code %d)\n",
 			     (enable ? "enabled" : "unsupported"), ret);