diff mbox

[Vivid,SRU] powerpc/eeh: Fix missed PE#0 on P7IOC

Message ID 1444073657-26131-1-git-send-email-tim.gardner@canonical.com
State New
Headers show

Commit Message

Tim Gardner Oct. 5, 2015, 7:34 p.m. UTC
From: Gavin Shan <gwshan@linux.vnet.ibm.com>

BugLink: http://bugs.launchpad.net/bugs/1502982

PE#0 should be regarded as valid for P7IOC, while it's invalid for
PHB3. The patch adds flag EEH_VALID_PE_ZERO to differentiate those
two cases. Without the patch, we possibly see frozen PE#0 state is
cleared without EEH recovery taken on P7IOC as following kernel logs
indicate:

[root@ltcfbl8eb ~]# dmesg
       :
pci 0000:00     : [PE# 000] Secondary bus 0 associated with PE#0
pci 0000:01     : [PE# 001] Secondary bus 1 associated with PE#1
pci 0001:00     : [PE# 000] Secondary bus 0 associated with PE#0
pci 0001:01     : [PE# 001] Secondary bus 1 associated with PE#1
pci 0002:00     : [PE# 000] Secondary bus 0 associated with PE#0
pci 0002:01     : [PE# 001] Secondary bus 1 associated with PE#1
pci 0003:00     : [PE# 000] Secondary bus 0 associated with PE#0
pci 0003:01     : [PE# 001] Secondary bus 1 associated with PE#1
pci 0003:20     : [PE# 002] Secondary bus 32..63 associated with PE#2
       :
EEH: Clear non-existing PHB#3-PE#0
EEH: PHB location: U78AE.001.WZS00M9-P1-002

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
(cherry picked from commit 2aa5cf9e48f2f39cc255f8e29964df3ff9ca017b)
Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
---
 arch/powerpc/include/asm/eeh.h               |  5 +++--
 arch/powerpc/kernel/eeh_pe.c                 | 14 +++++++++++---
 arch/powerpc/platforms/powernv/eeh-powernv.c | 11 +++++++++++
 3 files changed, 25 insertions(+), 5 deletions(-)

Comments

Chris J Arges Oct. 6, 2015, 1:25 p.m. UTC | #1
On 10/05/2015 02:34 PM, tim.gardner@canonical.com wrote:
> From: Gavin Shan <gwshan@linux.vnet.ibm.com>
> 
> BugLink: http://bugs.launchpad.net/bugs/1502982
> 
> PE#0 should be regarded as valid for P7IOC, while it's invalid for
> PHB3. The patch adds flag EEH_VALID_PE_ZERO to differentiate those
> two cases. Without the patch, we possibly see frozen PE#0 state is
> cleared without EEH recovery taken on P7IOC as following kernel logs
> indicate:
> 
> [root@ltcfbl8eb ~]# dmesg
>        :
> pci 0000:00     : [PE# 000] Secondary bus 0 associated with PE#0
> pci 0000:01     : [PE# 001] Secondary bus 1 associated with PE#1
> pci 0001:00     : [PE# 000] Secondary bus 0 associated with PE#0
> pci 0001:01     : [PE# 001] Secondary bus 1 associated with PE#1
> pci 0002:00     : [PE# 000] Secondary bus 0 associated with PE#0
> pci 0002:01     : [PE# 001] Secondary bus 1 associated with PE#1
> pci 0003:00     : [PE# 000] Secondary bus 0 associated with PE#0
> pci 0003:01     : [PE# 001] Secondary bus 1 associated with PE#1
> pci 0003:20     : [PE# 002] Secondary bus 32..63 associated with PE#2
>        :
> EEH: Clear non-existing PHB#3-PE#0
> EEH: PHB location: U78AE.001.WZS00M9-P1-002
> 
> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> (cherry picked from commit 2aa5cf9e48f2f39cc255f8e29964df3ff9ca017b)
> Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
> ---
>  arch/powerpc/include/asm/eeh.h               |  5 +++--
>  arch/powerpc/kernel/eeh_pe.c                 | 14 +++++++++++---
>  arch/powerpc/platforms/powernv/eeh-powernv.c | 11 +++++++++++
>  3 files changed, 25 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
> index 40bde0a..55abfd0 100644
> --- a/arch/powerpc/include/asm/eeh.h
> +++ b/arch/powerpc/include/asm/eeh.h
> @@ -38,8 +38,9 @@ struct device_node;
>  #define EEH_FORCE_DISABLED	0x02	/* EEH disabled		*/
>  #define EEH_PROBE_MODE_DEV	0x04	/* From PCI device	*/
>  #define EEH_PROBE_MODE_DEVTREE	0x08	/* From device tree	*/
> -#define EEH_ENABLE_IO_FOR_LOG	0x10	/* Enable IO for log	*/
> -#define EEH_EARLY_DUMP_LOG	0x20	/* Dump log immediately	*/
> +#define EEH_VALID_PE_ZERO	0x10	/* PE#0 is valid	*/
> +#define EEH_ENABLE_IO_FOR_LOG	0x20	/* Enable IO for log	*/
> +#define EEH_EARLY_DUMP_LOG	0x40	/* Dump log immediately	*/
>  
>  /*
>   * Delay for PE reset, all in ms
> diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
> index ea83ad9..1e4946c 100644
> --- a/arch/powerpc/kernel/eeh_pe.c
> +++ b/arch/powerpc/kernel/eeh_pe.c
> @@ -239,10 +239,18 @@ static void *__eeh_pe_get(void *data, void *flag)
>  	if (pe->type & EEH_PE_PHB)
>  		return NULL;
>  
> -	/* We prefer PE address */
> -	if (edev->pe_config_addr &&
> -	   (edev->pe_config_addr == pe->addr))
> +	/*
> +	 * We prefer PE address. For most cases, we should
> +	 * have non-zero PE address
> +	 */
> +	if (eeh_has_flag(EEH_VALID_PE_ZERO)) {
> +		if (edev->pe_config_addr == pe->addr)
> +			return pe;
> +	} else {
> +		if (edev->pe_config_addr &&
> +		    (edev->pe_config_addr == pe->addr))
>  		return pe;
> +	}
>  
>  	/* Try BDF address */
>  	if (edev->config_addr &&
> diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
> index 1d19e79..e261869 100644
> --- a/arch/powerpc/platforms/powernv/eeh-powernv.c
> +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
> @@ -68,6 +68,17 @@ static int powernv_eeh_init(void)
>  
>  		if (phb->model == PNV_PHB_MODEL_P7IOC)
>  			eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
> +
> +		/*
> +		 * PE#0 should be regarded as valid by EEH core
> +		 * if it's not the reserved one. Currently, we
> +		 * have the reserved PE#0 and PE#127 for PHB3
> +		 * and P7IOC separately. So we should regard
> +		 * PE#0 as valid for P7IOC.
> +		 */
> +		if (phb->ioda.reserved_pe != 0)
> +			eeh_add_flag(EEH_VALID_PE_ZERO);
> +
>  		break;
>  	}
>  
>
Seth Forshee Oct. 6, 2015, 6:30 p.m. UTC | #2

Brad Figg Oct. 7, 2015, 5:42 p.m. UTC | #3
Applied to the master-next branch of Vivid.
Brad Figg Oct. 7, 2015, 5:57 p.m. UTC | #4
Applied to the master-next branch of Vivid.
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 40bde0a..55abfd0 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -38,8 +38,9 @@  struct device_node;
 #define EEH_FORCE_DISABLED	0x02	/* EEH disabled		*/
 #define EEH_PROBE_MODE_DEV	0x04	/* From PCI device	*/
 #define EEH_PROBE_MODE_DEVTREE	0x08	/* From device tree	*/
-#define EEH_ENABLE_IO_FOR_LOG	0x10	/* Enable IO for log	*/
-#define EEH_EARLY_DUMP_LOG	0x20	/* Dump log immediately	*/
+#define EEH_VALID_PE_ZERO	0x10	/* PE#0 is valid	*/
+#define EEH_ENABLE_IO_FOR_LOG	0x20	/* Enable IO for log	*/
+#define EEH_EARLY_DUMP_LOG	0x40	/* Dump log immediately	*/
 
 /*
  * Delay for PE reset, all in ms
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index ea83ad9..1e4946c 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -239,10 +239,18 @@  static void *__eeh_pe_get(void *data, void *flag)
 	if (pe->type & EEH_PE_PHB)
 		return NULL;
 
-	/* We prefer PE address */
-	if (edev->pe_config_addr &&
-	   (edev->pe_config_addr == pe->addr))
+	/*
+	 * We prefer PE address. For most cases, we should
+	 * have non-zero PE address
+	 */
+	if (eeh_has_flag(EEH_VALID_PE_ZERO)) {
+		if (edev->pe_config_addr == pe->addr)
+			return pe;
+	} else {
+		if (edev->pe_config_addr &&
+		    (edev->pe_config_addr == pe->addr))
 		return pe;
+	}
 
 	/* Try BDF address */
 	if (edev->config_addr &&
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 1d19e79..e261869 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -68,6 +68,17 @@  static int powernv_eeh_init(void)
 
 		if (phb->model == PNV_PHB_MODEL_P7IOC)
 			eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
+
+		/*
+		 * PE#0 should be regarded as valid by EEH core
+		 * if it's not the reserved one. Currently, we
+		 * have the reserved PE#0 and PE#127 for PHB3
+		 * and P7IOC separately. So we should regard
+		 * PE#0 as valid for P7IOC.
+		 */
+		if (phb->ioda.reserved_pe != 0)
+			eeh_add_flag(EEH_VALID_PE_ZERO);
+
 		break;
 	}