diff mbox series

[RFC,11/15] powerpc/eeh: Sync eeh_dev_check_failure()

Message ID 3120a1979b699600fce3c105b66c64559feb0ee9.1569996166.git.sbobroff@linux.ibm.com (mailing list archive)
State RFC
Headers show
Series powerpc/eeh: Synchronize access to struct eeh_pe | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch warning Failed to apply on branch next (6edfc6487b474fe01857dc3f1a9cd701bb9b21c8)
snowpatch_ozlabs/apply_patch fail Failed to apply to any branch

Commit Message

Sam Bobroff Oct. 2, 2019, 6:02 a.m. UTC
Synchronize access to eeh_pe.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
 arch/powerpc/kernel/eeh.c | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index eb37cb384ff4..171be70b34d8 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -447,7 +447,7 @@  static int eeh_phb_check_failure(struct eeh_pe *pe)
 int eeh_dev_check_failure(struct eeh_dev *edev)
 {
 	int ret;
-	unsigned long flags;
+	unsigned long flags, pe_flags;
 	struct device_node *dn;
 	struct pci_dev *dev;
 	struct eeh_pe *pe, *parent_pe;
@@ -464,7 +464,9 @@  int eeh_dev_check_failure(struct eeh_dev *edev)
 		return 0;
 	}
 	dev = eeh_dev_to_pci_dev(edev);
+	/* TODO: Unsafe until eeh_dev can be synchronized with eeh_pe. */
 	pe = eeh_dev_to_pe(edev);
+	eeh_get_pe(pe);
 
 	/* Access to IO BARs might get this far and still not want checking. */
 	if (!pe) {
@@ -475,6 +477,7 @@  int eeh_dev_check_failure(struct eeh_dev *edev)
 
 	if (!pe->addr && !pe->config_addr) {
 		eeh_stats.no_cfg_addr++;
+		eeh_put_pe(pe); /* Release ref */
 		return 0;
 	}
 
@@ -482,17 +485,21 @@  int eeh_dev_check_failure(struct eeh_dev *edev)
 	 * On PowerNV platform, we might already have fenced PHB
 	 * there and we need take care of that firstly.
 	 */
-	ret = eeh_phb_check_failure(pe);
-	if (ret > 0)
+	ret = eeh_phb_check_failure(pe); /* Acquire ref */
+	if (ret > 0) {
+		eeh_put_pe(pe); /* Release ref */
 		return ret;
+	}
 
 	/*
 	 * If the PE isn't owned by us, we shouldn't check the
 	 * state. Instead, let the owner handle it if the PE has
 	 * been frozen.
 	 */
-	if (eeh_pe_passed(pe))
+	if (eeh_pe_passed(pe)) {
+		eeh_put_pe(pe); /* Release ref */
 		return 0;
+	}
 
 	/* If we already have a pending isolation event for this
 	 * slot, we know it's bad already, we don't need to check.
@@ -548,7 +555,10 @@  int eeh_dev_check_failure(struct eeh_dev *edev)
 	 * put into frozen state as well. We should take care
 	 * that at first.
 	 */
+	eeh_lock_pes(&pe_flags);
 	parent_pe = pe->parent;
+	eeh_get_pe(parent_pe); /* Acquire ref */
+	eeh_unlock_pes(pe_flags);
 	while (parent_pe) {
 		/* Hit the ceiling ? */
 		if (parent_pe->type & EEH_PE_PHB)
@@ -557,15 +567,18 @@  int eeh_dev_check_failure(struct eeh_dev *edev)
 		/* Frozen parent PE ? */
 		ret = eeh_ops->get_state(parent_pe, NULL);
 		if (ret > 0 && !eeh_state_active(ret)) {
+			eeh_put_pe(pe); /* Release ref */
 			pe = parent_pe;
+			eeh_get_pe(pe); /* Acquire ref */
 			pr_err("EEH: Failure of PHB#%x-PE#%x will be handled at parent PHB#%x-PE#%x.\n",
 			       pe->phb->global_number, pe->addr,
 			       pe->phb->global_number, parent_pe->addr);
 		}
 
 		/* Next parent level */
-		parent_pe = parent_pe->parent;
+		eeh_pe_move_to_parent(&parent_pe);
 	}
+	eeh_put_pe(parent_pe); /* Release ref */
 
 	eeh_stats.slot_resets++;
 
@@ -582,11 +595,12 @@  int eeh_dev_check_failure(struct eeh_dev *edev)
 	 */
 	pr_debug("EEH: %s: Frozen PHB#%x-PE#%x detected\n",
 		__func__, pe->phb->global_number, pe->addr);
-	eeh_send_failure_event(pe);
+	eeh_send_failure_event(pe); /* Give ref */
 
 	return 1;
 
 dn_unlock:
+	eeh_put_pe(pe); /* Release ref */
 	eeh_serialize_unlock(flags);
 	return rc;
 }