diff mbox series

[6/6] powerpc/eeh: Correct retries in eeh_pe_reset_full()

Message ID 157c0f87f60432aea1b79a020d8b384a5e0e7857.1543460917.git.sbobroff@linux.ibm.com (mailing list archive)
State Accepted
Commit 195482c3633c5ce03c099c3e6b3f283b0ae116d6
Headers show
Series powerpc/eeh: Improve recovery of passed-through devices | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success next/apply_patch Successfully applied
snowpatch_ozlabs/build-ppc64le success build succeded & removed 0 sparse warning(s)
snowpatch_ozlabs/build-ppc64be success build succeded & removed 0 sparse warning(s)
snowpatch_ozlabs/build-ppc64e success build succeded & removed 0 sparse warning(s)
snowpatch_ozlabs/build-pmac32 success build succeded & removed 0 sparse warning(s)
snowpatch_ozlabs/checkpatch success total: 0 errors, 0 warnings, 0 checks, 53 lines checked

Commit Message

Sam Bobroff Nov. 29, 2018, 3:16 a.m. UTC
Currently, eeh_pe_reset_full() will only attempt to reset a PE more
than once if activating the reset state and deactivating it both
succeed, but later polling shows that it hasn't become active.

Change this so that it will try up to three times for any reason other
than an unrecoverable slot error and adjust the message generation so
that it's clear weather the reset has ultimately succeeded or failed.
This allows the reset to succeed in some situations where it would
currently fail.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
 arch/powerpc/kernel/eeh.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)
diff mbox series

Patch

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index df02f55fdfa1..528caf857428 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -912,7 +912,7 @@  int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed)
 	int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
 	int type = EEH_RESET_HOT;
 	unsigned int freset = 0;
-	int i, state, ret;
+	int i, state = 0, ret;
 
 	/*
 	 * Determine the type of reset to perform - hot or fundamental.
@@ -930,28 +930,32 @@  int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed)
 	/* Make three attempts at resetting the bus */
 	for (i = 0; i < 3; i++) {
 		ret = eeh_pe_reset(pe, type, include_passed);
-		if (ret)
-			break;
-
-		ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, include_passed);
-		if (ret)
-			break;
+		if (!ret)
+			ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE,
+					   include_passed);
+		if (ret) {
+			ret = -EIO;
+			pr_warn("EEH: Failure %d resetting PHB#%x-PE#%x (attempt %d)\n\n",
+				state, pe->phb->global_number, pe->addr, i + 1);
+			continue;
+		}
+		if (i)
+			pr_warn("EEH: PHB#%x-PE#%x: Successful reset (attempt %d)\n",
+				pe->phb->global_number, pe->addr, i + 1);
 
 		/* Wait until the PE is in a functioning state */
 		state = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
 		if (state < 0) {
-			pr_warn("%s: Unrecoverable slot failure on PHB#%x-PE#%x",
-				__func__, pe->phb->global_number, pe->addr);
+			pr_warn("EEH: Unrecoverable slot failure on PHB#%x-PE#%x",
+				pe->phb->global_number, pe->addr);
 			ret = -ENOTRECOVERABLE;
 			break;
 		}
 		if (eeh_state_active(state))
 			break;
-
-		/* Set error in case this is our last attempt */
-		ret = -EIO;
-		pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n",
-			__func__, state, pe->phb->global_number, pe->addr, (i + 1));
+		else
+			pr_warn("EEH: PHB#%x-PE#%x: Slot inactive after reset: 0x%x (attempt %d)\n",
+				pe->phb->global_number, pe->addr, state, i + 1);
 	}
 
 	/* Resetting the PE may have unfrozen child PEs. If those PEs have been