diff mbox series

[RFC,v4,2/9] powerpc/powernv/pci: Suppress an EEH error when reading an empty slot

Message ID 20190301160440.26262-3-s.miroshnichenko@yadro.com (mailing list archive)
State Superseded
Headers show
Series powerpc/powernv/pci: Make hotplug self-sufficient, independent of FW and DT | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success next/apply_patch Successfully applied
snowpatch_ozlabs/checkpatch success total: 0 errors, 0 warnings, 0 checks, 63 lines checked

Commit Message

Sergei Miroshnichenko March 1, 2019, 4:04 p.m. UTC
Reading an empty slot returns all ones, which triggers a false
EEH error event on PowerNV. This patch unfreezes the bus where
it has happened.

Signed-off-by: Sergey Miroshnichenko <s.miroshnichenko@yadro.com>
---
 arch/powerpc/include/asm/ppc-pci.h   |  1 +
 arch/powerpc/kernel/pci_dn.c         |  2 +-
 arch/powerpc/platforms/powernv/pci.c | 34 ++++++++++++++++++++++++----
 3 files changed, 32 insertions(+), 5 deletions(-)

Comments

Oliver O'Halloran March 5, 2019, 6:14 a.m. UTC | #1
On Sat, Mar 2, 2019 at 3:04 AM Sergey Miroshnichenko
<s.miroshnichenko@yadro.com> wrote:
>
> Reading an empty slot returns all ones, which triggers a false
> EEH error event on PowerNV. This patch unfreezes the bus where
> it has happened.
>
> Signed-off-by: Sergey Miroshnichenko <s.miroshnichenko@yadro.com>
> ---
>  arch/powerpc/include/asm/ppc-pci.h   |  1 +
>  arch/powerpc/kernel/pci_dn.c         |  2 +-
>  arch/powerpc/platforms/powernv/pci.c | 34 ++++++++++++++++++++++++----
>  3 files changed, 32 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
> index f67da277d652..737393c54f58 100644
> --- a/arch/powerpc/include/asm/ppc-pci.h
> +++ b/arch/powerpc/include/asm/ppc-pci.h
> @@ -40,6 +40,7 @@ void *traverse_pci_dn(struct pci_dn *root,
>                       void *(*fn)(struct pci_dn *, void *),
>                       void *data);
>  extern void pci_devs_phb_init_dynamic(struct pci_controller *phb);
> +struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus);
>
>  /* From rtas_pci.h */
>  extern void init_pci_config_tokens (void);
> diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
> index ab147a1909c8..341ed71250f1 100644
> --- a/arch/powerpc/kernel/pci_dn.c
> +++ b/arch/powerpc/kernel/pci_dn.c
> @@ -40,7 +40,7 @@
>   * one of PF's bridge. For other devices, their firmware
>   * data is linked to that of their bridge.
>   */
> -static struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus)
> +struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus)
>  {
>         struct pci_bus *pbus;
>         struct device_node *dn;
> diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
> index 3260250d2029..73c2d0aed996 100644
> --- a/arch/powerpc/platforms/powernv/pci.c
> +++ b/arch/powerpc/platforms/powernv/pci.c
> @@ -761,6 +761,21 @@ static inline pnv_pci_cfg_check(struct pci_dn *pdn)
>  }
>  #endif /* CONFIG_EEH */
>
> +static int get_bus_pe_number(struct pci_bus *bus)
> +{
> +       struct pci_dn *pdn = pci_bus_to_pdn(bus);
> +       struct pci_dn *child;
> +
> +       if (!pdn)
> +               return IODA_INVALID_PE;
> +
> +       list_for_each_entry(child, &pdn->child_list, list)
> +               if (child->pe_number != IODA_INVALID_PE)
> +                       return child->pe_number;
> +
> +       return IODA_INVALID_PE;
> +}
> +
>  static int pnv_pci_read_config(struct pci_bus *bus,
>                                unsigned int devfn,
>                                int where, int size, u32 *val)
> @@ -769,12 +784,23 @@ static int pnv_pci_read_config(struct pci_bus *bus,
>         struct pci_controller *hose = pci_bus_to_host(bus);
>         struct pnv_phb *phb = hose->private_data;
>         int ret;
> +       u32 empty_val = 0xFFFFFFFF;
>
> -       *val = 0xFFFFFFFF;
> +       *val = empty_val;
>         pdn = pci_get_pdn_by_devfn(bus, devfn);
> -       if (!pdn)
> -               return pnv_pci_cfg_read_raw(phb->opal_id, bus->number, devfn,
> -                                           where, size, val);
> +       if (!pdn) {
> +               int pe_number = get_bus_pe_number(bus);
> +
> +               ret = pnv_pci_cfg_read_raw(phb->opal_id, bus->number, devfn,
> +                                          where, size, val);
> +
> +               if (!ret && (*val == empty_val) && phb->unfreeze_pe)

Do this empty val check work when using 1 or 2 byte cfg accesses?

> +                       phb->unfreeze_pe(phb, (pe_number == IODA_INVALID_PE) ?
> +                                        0xff : pe_number,

Use phb->ioda.reserved_pe_idx rather than guessing that 0xff is safe
to use. On P9 we have PHBs with 512 PEs and some older P8 firmware
releases used 0 as the reserved PE rather than 0xff.

> +                                        OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
> +
> +               return ret;
> +       }
>
>         if (!pnv_pci_cfg_check(pdn))
>                 return PCIBIOS_DEVICE_NOT_FOUND;
> --
> 2.20.1
>
Sergei Miroshnichenko March 5, 2019, 3:13 p.m. UTC | #2
On 3/5/19 9:14 AM, Oliver wrote:
> On Sat, Mar 2, 2019 at 3:04 AM Sergey Miroshnichenko
> <s.miroshnichenko@yadro.com> wrote:
>>
>> Reading an empty slot returns all ones, which triggers a false
>> EEH error event on PowerNV. This patch unfreezes the bus where
>> it has happened.
>>
>> Signed-off-by: Sergey Miroshnichenko <s.miroshnichenko@yadro.com>
>> ---
>>  arch/powerpc/include/asm/ppc-pci.h   |  1 +
>>  arch/powerpc/kernel/pci_dn.c         |  2 +-
>>  arch/powerpc/platforms/powernv/pci.c | 34 ++++++++++++++++++++++++----
>>  3 files changed, 32 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
>> index f67da277d652..737393c54f58 100644
>> --- a/arch/powerpc/include/asm/ppc-pci.h
>> +++ b/arch/powerpc/include/asm/ppc-pci.h
>> @@ -40,6 +40,7 @@ void *traverse_pci_dn(struct pci_dn *root,
>>                       void *(*fn)(struct pci_dn *, void *),
>>                       void *data);
>>  extern void pci_devs_phb_init_dynamic(struct pci_controller *phb);
>> +struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus);
>>
>>  /* From rtas_pci.h */
>>  extern void init_pci_config_tokens (void);
>> diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
>> index ab147a1909c8..341ed71250f1 100644
>> --- a/arch/powerpc/kernel/pci_dn.c
>> +++ b/arch/powerpc/kernel/pci_dn.c
>> @@ -40,7 +40,7 @@
>>   * one of PF's bridge. For other devices, their firmware
>>   * data is linked to that of their bridge.
>>   */
>> -static struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus)
>> +struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus)
>>  {
>>         struct pci_bus *pbus;
>>         struct device_node *dn;
>> diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
>> index 3260250d2029..73c2d0aed996 100644
>> --- a/arch/powerpc/platforms/powernv/pci.c
>> +++ b/arch/powerpc/platforms/powernv/pci.c
>> @@ -761,6 +761,21 @@ static inline pnv_pci_cfg_check(struct pci_dn *pdn)
>>  }
>>  #endif /* CONFIG_EEH */
>>
>> +static int get_bus_pe_number(struct pci_bus *bus)
>> +{
>> +       struct pci_dn *pdn = pci_bus_to_pdn(bus);
>> +       struct pci_dn *child;
>> +
>> +       if (!pdn)
>> +               return IODA_INVALID_PE;
>> +
>> +       list_for_each_entry(child, &pdn->child_list, list)
>> +               if (child->pe_number != IODA_INVALID_PE)
>> +                       return child->pe_number;
>> +
>> +       return IODA_INVALID_PE;
>> +}
>> +
>>  static int pnv_pci_read_config(struct pci_bus *bus,
>>                                unsigned int devfn,
>>                                int where, int size, u32 *val)
>> @@ -769,12 +784,23 @@ static int pnv_pci_read_config(struct pci_bus *bus,
>>         struct pci_controller *hose = pci_bus_to_host(bus);
>>         struct pnv_phb *phb = hose->private_data;
>>         int ret;
>> +       u32 empty_val = 0xFFFFFFFF;
>>
>> -       *val = 0xFFFFFFFF;
>> +       *val = empty_val;
>>         pdn = pci_get_pdn_by_devfn(bus, devfn);
>> -       if (!pdn)
>> -               return pnv_pci_cfg_read_raw(phb->opal_id, bus->number, devfn,
>> -                                           where, size, val);
>> +       if (!pdn) {
>> +               int pe_number = get_bus_pe_number(bus);
>> +
>> +               ret = pnv_pci_cfg_read_raw(phb->opal_id, bus->number, devfn,
>> +                                          where, size, val);
>> +
>> +               if (!ret && (*val == empty_val) && phb->unfreeze_pe)
> 
> Do this empty val check work when using 1 or 2 byte cfg accesses?
> 

That was intentional because 0xff and 0xffff are valid values, but the
0xffffffff is the only reliable sign of an empty slot. And the kernel
pokes a slot by the pci_bus_generic_read_dev_vendor_id() function, which
in turn tries to pci_bus_read_config_dword(PCI_VENDOR_ID).

But I haven't tried actually to read 1-2 bytes from an empty slot to
test if that triggers an EEH. If it does, I'll change that to
EEH_IO_ERROR_VALUE(size).

>> +                       phb->unfreeze_pe(phb, (pe_number == IODA_INVALID_PE) ?
>> +                                        0xff : pe_number,
> 
> Use phb->ioda.reserved_pe_idx rather than guessing that 0xff is safe
> to use. On P9 we have PHBs with 512 PEs and some older P8 firmware
> releases used 0 as the reserved PE rather than 0xff.
> 

Thanks for the catch! I'll fix that in v5.

Best regards,
Serge

>> +                                        OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
>> +
>> +               return ret;
>> +       }
>>
>>         if (!pnv_pci_cfg_check(pdn))
>>                 return PCIBIOS_DEVICE_NOT_FOUND;
>> --
>> 2.20.1
>>
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index f67da277d652..737393c54f58 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -40,6 +40,7 @@  void *traverse_pci_dn(struct pci_dn *root,
 		      void *(*fn)(struct pci_dn *, void *),
 		      void *data);
 extern void pci_devs_phb_init_dynamic(struct pci_controller *phb);
+struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus);
 
 /* From rtas_pci.h */
 extern void init_pci_config_tokens (void);
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index ab147a1909c8..341ed71250f1 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -40,7 +40,7 @@ 
  * one of PF's bridge. For other devices, their firmware
  * data is linked to that of their bridge.
  */
-static struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus)
+struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus)
 {
 	struct pci_bus *pbus;
 	struct device_node *dn;
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 3260250d2029..73c2d0aed996 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -761,6 +761,21 @@  static inline pnv_pci_cfg_check(struct pci_dn *pdn)
 }
 #endif /* CONFIG_EEH */
 
+static int get_bus_pe_number(struct pci_bus *bus)
+{
+	struct pci_dn *pdn = pci_bus_to_pdn(bus);
+	struct pci_dn *child;
+
+	if (!pdn)
+		return IODA_INVALID_PE;
+
+	list_for_each_entry(child, &pdn->child_list, list)
+		if (child->pe_number != IODA_INVALID_PE)
+			return child->pe_number;
+
+	return IODA_INVALID_PE;
+}
+
 static int pnv_pci_read_config(struct pci_bus *bus,
 			       unsigned int devfn,
 			       int where, int size, u32 *val)
@@ -769,12 +784,23 @@  static int pnv_pci_read_config(struct pci_bus *bus,
 	struct pci_controller *hose = pci_bus_to_host(bus);
 	struct pnv_phb *phb = hose->private_data;
 	int ret;
+	u32 empty_val = 0xFFFFFFFF;
 
-	*val = 0xFFFFFFFF;
+	*val = empty_val;
 	pdn = pci_get_pdn_by_devfn(bus, devfn);
-	if (!pdn)
-		return pnv_pci_cfg_read_raw(phb->opal_id, bus->number, devfn,
-					    where, size, val);
+	if (!pdn) {
+		int pe_number = get_bus_pe_number(bus);
+
+		ret = pnv_pci_cfg_read_raw(phb->opal_id, bus->number, devfn,
+					   where, size, val);
+
+		if (!ret && (*val == empty_val) && phb->unfreeze_pe)
+			phb->unfreeze_pe(phb, (pe_number == IODA_INVALID_PE) ?
+					 0xff : pe_number,
+					 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+
+		return ret;
+	}
 
 	if (!pnv_pci_cfg_check(pdn))
 		return PCIBIOS_DEVICE_NOT_FOUND;