diff mbox series

[05/14] powerpc/eeh: Defer printing stack trace

Message ID 20190903101605.2890-6-oohall@gmail.com (mailing list archive)
State Accepted
Commit 25baf3d81614b0b8ca8958f4d6f111ccaaaad578
Headers show
Series [01/14] powerpc/eeh: Clean up EEH PEs after recovery finishes | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch next (c317052c95bef1f977b023158e5aa929215f443d)
snowpatch_ozlabs/checkpatch warning total: 0 errors, 0 warnings, 6 checks, 143 lines checked

Commit Message

Oliver O'Halloran Sept. 3, 2019, 10:15 a.m. UTC
Currently we print a stack trace in the event handler to help with
debugging EEH issues. In the case of suprise hot-unplug this is unneeded,
so we want to prevent printing the stack trace unless we know it's due to
an actual device error. To accomplish this, we can save a stack trace at
the point of detection and only print it once the EEH recovery handler has
determined the freeze was due to an actual error.

Since the whole point of this is to prevent spurious EEH output we also
move a few prints out of the detection thread, or mark them as pr_debug
so anyone interested can get output from the eeh_check_dev_failure()
if they want.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/include/asm/eeh.h   | 11 +++++++++
 arch/powerpc/kernel/eeh.c        | 15 ++++---------
 arch/powerpc/kernel/eeh_driver.c | 38 +++++++++++++++++++++++++++++++-
 arch/powerpc/kernel/eeh_event.c  | 26 ++++++++++------------
 4 files changed, 64 insertions(+), 26 deletions(-)

Comments

Sam Bobroff Sept. 17, 2019, 1:04 a.m. UTC | #1
On Tue, Sep 03, 2019 at 08:15:56PM +1000, Oliver O'Halloran wrote:
> Currently we print a stack trace in the event handler to help with
> debugging EEH issues. In the case of suprise hot-unplug this is unneeded,
> so we want to prevent printing the stack trace unless we know it's due to
> an actual device error. To accomplish this, we can save a stack trace at
> the point of detection and only print it once the EEH recovery handler has
> determined the freeze was due to an actual error.
> 
> Since the whole point of this is to prevent spurious EEH output we also
> move a few prints out of the detection thread, or mark them as pr_debug
> so anyone interested can get output from the eeh_check_dev_failure()
> if they want.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>

I think this is a good change, and even in the normal case it will place
the stacktrace closer to the rest of the recovery information.

But, I think it would make more sense to put the stacktrace into the
struct eeh_event, rather than the struct eeh_pe. Is there some reason
we can't do that? (It would save a fair bit of memory!)

Cheers,
Sam.

> ---
>  arch/powerpc/include/asm/eeh.h   | 11 +++++++++
>  arch/powerpc/kernel/eeh.c        | 15 ++++---------
>  arch/powerpc/kernel/eeh_driver.c | 38 +++++++++++++++++++++++++++++++-
>  arch/powerpc/kernel/eeh_event.c  | 26 ++++++++++------------
>  4 files changed, 64 insertions(+), 26 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
> index c13119a5e69b..9d0e1694a94d 100644
> --- a/arch/powerpc/include/asm/eeh.h
> +++ b/arch/powerpc/include/asm/eeh.h
> @@ -88,6 +88,17 @@ struct eeh_pe {
>  	struct list_head child_list;	/* List of PEs below this PE	*/
>  	struct list_head child;		/* Memb. child_list/eeh_phb_pe	*/
>  	struct list_head edevs;		/* List of eeh_dev in this PE	*/
> +
> +	/*
> +	 * Saved stack trace. When we find a PE freeze in eeh_dev_check_failure
> +	 * the stack trace is saved here so we can print it in the recovery
> +	 * thread if it turns out to due to a real problem rather than
> +	 * a hot-remove.
> +	 *
> +	 * A max of 64 entries might be overkill, but it also might not be.
> +	 */
> +	unsigned long stack_trace[64];
> +	int trace_entries;
>  };
>  
>  #define eeh_pe_for_each_dev(pe, edev, tmp) \
> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
> index 9c468e79d13c..46d17817b438 100644
> --- a/arch/powerpc/kernel/eeh.c
> +++ b/arch/powerpc/kernel/eeh.c
> @@ -420,11 +420,9 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
>  	eeh_pe_mark_isolated(phb_pe);
>  	eeh_serialize_unlock(flags);
>  
> -	pr_err("EEH: PHB#%x failure detected, location: %s\n",
> +	pr_debug("EEH: PHB#%x failure detected, location: %s\n",
>  		phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));
> -	dump_stack();
>  	eeh_send_failure_event(phb_pe);
> -
>  	return 1;
>  out:
>  	eeh_serialize_unlock(flags);
> @@ -451,7 +449,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
>  	unsigned long flags;
>  	struct device_node *dn;
>  	struct pci_dev *dev;
> -	struct eeh_pe *pe, *parent_pe, *phb_pe;
> +	struct eeh_pe *pe, *parent_pe;
>  	int rc = 0;
>  	const char *location = NULL;
>  
> @@ -581,13 +579,8 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
>  	 * a stack trace will help the device-driver authors figure
>  	 * out what happened.  So print that out.
>  	 */
> -	phb_pe = eeh_phb_pe_get(pe->phb);
> -	pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
> -	       pe->phb->global_number, pe->addr);
> -	pr_err("EEH: PE location: %s, PHB location: %s\n",
> -	       eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
> -	dump_stack();
> -
> +	pr_debug("EEH: %s: Frozen PHB#%x-PE#%x detected\n",
> +		__func__, pe->phb->global_number, pe->addr);
>  	eeh_send_failure_event(pe);
>  
>  	return 1;
> diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
> index 52ce7584af43..0d34cc12c529 100644
> --- a/arch/powerpc/kernel/eeh_driver.c
> +++ b/arch/powerpc/kernel/eeh_driver.c
> @@ -863,8 +863,44 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
>  			if (eeh_slot_presence_check(edev->pdev))
>  				devices++;
>  
> -	if (!devices)
> +	if (!devices) {
> +		pr_debug("EEH: Frozen PHB#%x-PE#%x is empty!\n",
> +			pe->phb->global_number, pe->addr);
>  		goto out; /* nothing to recover */
> +	}
> +
> +	/* Log the event */
> +	if (pe->type & EEH_PE_PHB) {
> +		pr_err("EEH: PHB#%x failure detected, location: %s\n",
> +			pe->phb->global_number, eeh_pe_loc_get(pe));
> +	} else {
> +		struct eeh_pe *phb_pe = eeh_phb_pe_get(pe->phb);
> +
> +		pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
> +		       pe->phb->global_number, pe->addr);
> +		pr_err("EEH: PE location: %s, PHB location: %s\n",
> +		       eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
> +	}
> +
> +	/*
> +	 * Print the saved stack trace now that we've verified there's
> +	 * something to recover.
> +	 */
> +	if (pe->trace_entries) {
> +		void **ptrs = (void **) pe->stack_trace;
> +		int i;
> +
> +		pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
> +		       pe->phb->global_number, pe->addr);
> +
> +		/* FIXME: Use the same format as dump_stack() */
> +		pr_err("EEH: Call Trace:\n");
> +		for (i = 0; i < pe->trace_entries; i++)
> +			pr_err("EEH: [%pK] %pS\n", ptrs[i], ptrs[i]);
> +
> +		pe->trace_entries = 0;
> +	}
> +
>  
>  	eeh_pe_update_time_stamp(pe);
>  	pe->freeze_count++;
> diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
> index e36653e5f76b..1d55486adb0f 100644
> --- a/arch/powerpc/kernel/eeh_event.c
> +++ b/arch/powerpc/kernel/eeh_event.c
> @@ -40,7 +40,6 @@ static int eeh_event_handler(void * dummy)
>  {
>  	unsigned long flags;
>  	struct eeh_event *event;
> -	struct eeh_pe *pe;
>  
>  	while (!kthread_should_stop()) {
>  		if (wait_for_completion_interruptible(&eeh_eventlist_event))
> @@ -59,19 +58,10 @@ static int eeh_event_handler(void * dummy)
>  			continue;
>  
>  		/* We might have event without binding PE */
> -		pe = event->pe;
> -		if (pe) {
> -			if (pe->type & EEH_PE_PHB)
> -				pr_info("EEH: Detected error on PHB#%x\n",
> -					 pe->phb->global_number);
> -			else
> -				pr_info("EEH: Detected PCI bus error on "
> -					"PHB#%x-PE#%x\n",
> -					pe->phb->global_number, pe->addr);
> -			eeh_handle_normal_event(pe);
> -		} else {
> +		if (event->pe)
> +			eeh_handle_normal_event(event->pe);
> +		else
>  			eeh_handle_special_event();
> -		}
>  
>  		kfree(event);
>  	}
> @@ -126,8 +116,16 @@ int __eeh_send_failure_event(struct eeh_pe *pe)
>  	 * This prevents the PE from being free()ed by a hotplug driver
>  	 * while the PE is sitting in the event queue.
>  	 */
> -	if (pe)
> +	if (pe) {
> +		/*
> +		 * Save the current stack trace so we can dump it from the
> +		 * event handler thread.
> +		 */
> +		pe->trace_entries = stack_trace_save(pe->stack_trace,
> +					 ARRAY_SIZE(pe->stack_trace), 0);
> +
>  		eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
> +	}
>  
>  	/* We may or may not be called in an interrupt context */
>  	spin_lock_irqsave(&eeh_eventlist_lock, flags);
> -- 
> 2.21.0
>
Oliver O'Halloran Sept. 17, 2019, 1:45 a.m. UTC | #2
On Tue, Sep 17, 2019 at 11:04 AM Sam Bobroff <sbobroff@linux.ibm.com> wrote:
>
> On Tue, Sep 03, 2019 at 08:15:56PM +1000, Oliver O'Halloran wrote:
> > Currently we print a stack trace in the event handler to help with
> > debugging EEH issues. In the case of suprise hot-unplug this is unneeded,
> > so we want to prevent printing the stack trace unless we know it's due to
> > an actual device error. To accomplish this, we can save a stack trace at
> > the point of detection and only print it once the EEH recovery handler has
> > determined the freeze was due to an actual error.
> >
> > Since the whole point of this is to prevent spurious EEH output we also
> > move a few prints out of the detection thread, or mark them as pr_debug
> > so anyone interested can get output from the eeh_check_dev_failure()
> > if they want.
> >
> > Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
>
> I think this is a good change, and even in the normal case it will place
> the stacktrace closer to the rest of the recovery information.
>
> But, I think it would make more sense to put the stacktrace into the
> struct eeh_event, rather than the struct eeh_pe. Is there some reason
> we can't do that? (It would save a fair bit of memory!)

Two reasons:

1) the eeh_event structures are allocated with GFP_ATOMIC since
eeh_dev_check_failure() can be called from any context. Minimising the
number of atomic allocations we do is a good idea as a matter of
course.
2) We don't pass the eeh_event structure to the event handler
function. I guess we could, but... eh

I don't see the memory saving as hugely significant either. There's
always fewer eeh_pe structures than there are PCI devices since some
will share PEs (e.g. switches, multifunction cards) so you'd be saving
a dozen KB at most.

root@zaius1:~# lspci | wc -l
59
root@zaius1:~# echo $(( $(lspci | wc -l) * 64 * 8))
30208

I think we'll live...

>
> Cheers,
> Sam.
Sam Bobroff Sept. 17, 2019, 3:35 a.m. UTC | #3
On Tue, Sep 17, 2019 at 11:45:14AM +1000, Oliver O'Halloran wrote:
> On Tue, Sep 17, 2019 at 11:04 AM Sam Bobroff <sbobroff@linux.ibm.com> wrote:
> >
> > On Tue, Sep 03, 2019 at 08:15:56PM +1000, Oliver O'Halloran wrote:
> > > Currently we print a stack trace in the event handler to help with
> > > debugging EEH issues. In the case of suprise hot-unplug this is unneeded,
> > > so we want to prevent printing the stack trace unless we know it's due to
> > > an actual device error. To accomplish this, we can save a stack trace at
> > > the point of detection and only print it once the EEH recovery handler has
> > > determined the freeze was due to an actual error.
> > >
> > > Since the whole point of this is to prevent spurious EEH output we also
> > > move a few prints out of the detection thread, or mark them as pr_debug
> > > so anyone interested can get output from the eeh_check_dev_failure()
> > > if they want.
> > >
> > > Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> >
> > I think this is a good change, and even in the normal case it will place
> > the stacktrace closer to the rest of the recovery information.
> >
> > But, I think it would make more sense to put the stacktrace into the
> > struct eeh_event, rather than the struct eeh_pe. Is there some reason
> > we can't do that? (It would save a fair bit of memory!)
> 
> Two reasons:
> 
> 1) the eeh_event structures are allocated with GFP_ATOMIC since
> eeh_dev_check_failure() can be called from any context. Minimising the
> number of atomic allocations we do is a good idea as a matter of
> course.

Yes, but I meant directly inside eeh_event so there wouldn't be a second
allocation. It would just be a bit bigger.

> 2) We don't pass the eeh_event structure to the event handler
> function. I guess we could, but... eh
> 
> I don't see the memory saving as hugely significant either. There's
> always fewer eeh_pe structures than there are PCI devices since some
> will share PEs (e.g. switches, multifunction cards) so you'd be saving
> a dozen KB at most.
> 
> root@zaius1:~# lspci | wc -l
> 59
> root@zaius1:~# echo $(( $(lspci | wc -l) * 64 * 8))
> 30208
> 
> I think we'll live...

Sure, I don't have very strong feelings about it either way.

> >
> > Cheers,
> > Sam.
Oliver O'Halloran Sept. 17, 2019, 3:38 a.m. UTC | #4
On Tue, Sep 17, 2019 at 1:35 PM Sam Bobroff <sbobroff@linux.ibm.com> wrote:
>
> On Tue, Sep 17, 2019 at 11:45:14AM +1000, Oliver O'Halloran wrote:
> >
> > Two reasons:
> >
> > 1) the eeh_event structures are allocated with GFP_ATOMIC since
> > eeh_dev_check_failure() can be called from any context. Minimising the
> > number of atomic allocations we do is a good idea as a matter of
> > course.
>
> Yes, but I meant directly inside eeh_event so there wouldn't be a second
> allocation. It would just be a bit bigger.

Right, my point was that increasing the allocation size also increases
the chance of an allocation failure.
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index c13119a5e69b..9d0e1694a94d 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -88,6 +88,17 @@  struct eeh_pe {
 	struct list_head child_list;	/* List of PEs below this PE	*/
 	struct list_head child;		/* Memb. child_list/eeh_phb_pe	*/
 	struct list_head edevs;		/* List of eeh_dev in this PE	*/
+
+	/*
+	 * Saved stack trace. When we find a PE freeze in eeh_dev_check_failure
+	 * the stack trace is saved here so we can print it in the recovery
+	 * thread if it turns out to due to a real problem rather than
+	 * a hot-remove.
+	 *
+	 * A max of 64 entries might be overkill, but it also might not be.
+	 */
+	unsigned long stack_trace[64];
+	int trace_entries;
 };
 
 #define eeh_pe_for_each_dev(pe, edev, tmp) \
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 9c468e79d13c..46d17817b438 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -420,11 +420,9 @@  static int eeh_phb_check_failure(struct eeh_pe *pe)
 	eeh_pe_mark_isolated(phb_pe);
 	eeh_serialize_unlock(flags);
 
-	pr_err("EEH: PHB#%x failure detected, location: %s\n",
+	pr_debug("EEH: PHB#%x failure detected, location: %s\n",
 		phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));
-	dump_stack();
 	eeh_send_failure_event(phb_pe);
-
 	return 1;
 out:
 	eeh_serialize_unlock(flags);
@@ -451,7 +449,7 @@  int eeh_dev_check_failure(struct eeh_dev *edev)
 	unsigned long flags;
 	struct device_node *dn;
 	struct pci_dev *dev;
-	struct eeh_pe *pe, *parent_pe, *phb_pe;
+	struct eeh_pe *pe, *parent_pe;
 	int rc = 0;
 	const char *location = NULL;
 
@@ -581,13 +579,8 @@  int eeh_dev_check_failure(struct eeh_dev *edev)
 	 * a stack trace will help the device-driver authors figure
 	 * out what happened.  So print that out.
 	 */
-	phb_pe = eeh_phb_pe_get(pe->phb);
-	pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
-	       pe->phb->global_number, pe->addr);
-	pr_err("EEH: PE location: %s, PHB location: %s\n",
-	       eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
-	dump_stack();
-
+	pr_debug("EEH: %s: Frozen PHB#%x-PE#%x detected\n",
+		__func__, pe->phb->global_number, pe->addr);
 	eeh_send_failure_event(pe);
 
 	return 1;
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 52ce7584af43..0d34cc12c529 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -863,8 +863,44 @@  void eeh_handle_normal_event(struct eeh_pe *pe)
 			if (eeh_slot_presence_check(edev->pdev))
 				devices++;
 
-	if (!devices)
+	if (!devices) {
+		pr_debug("EEH: Frozen PHB#%x-PE#%x is empty!\n",
+			pe->phb->global_number, pe->addr);
 		goto out; /* nothing to recover */
+	}
+
+	/* Log the event */
+	if (pe->type & EEH_PE_PHB) {
+		pr_err("EEH: PHB#%x failure detected, location: %s\n",
+			pe->phb->global_number, eeh_pe_loc_get(pe));
+	} else {
+		struct eeh_pe *phb_pe = eeh_phb_pe_get(pe->phb);
+
+		pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
+		       pe->phb->global_number, pe->addr);
+		pr_err("EEH: PE location: %s, PHB location: %s\n",
+		       eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
+	}
+
+	/*
+	 * Print the saved stack trace now that we've verified there's
+	 * something to recover.
+	 */
+	if (pe->trace_entries) {
+		void **ptrs = (void **) pe->stack_trace;
+		int i;
+
+		pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
+		       pe->phb->global_number, pe->addr);
+
+		/* FIXME: Use the same format as dump_stack() */
+		pr_err("EEH: Call Trace:\n");
+		for (i = 0; i < pe->trace_entries; i++)
+			pr_err("EEH: [%pK] %pS\n", ptrs[i], ptrs[i]);
+
+		pe->trace_entries = 0;
+	}
+
 
 	eeh_pe_update_time_stamp(pe);
 	pe->freeze_count++;
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index e36653e5f76b..1d55486adb0f 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -40,7 +40,6 @@  static int eeh_event_handler(void * dummy)
 {
 	unsigned long flags;
 	struct eeh_event *event;
-	struct eeh_pe *pe;
 
 	while (!kthread_should_stop()) {
 		if (wait_for_completion_interruptible(&eeh_eventlist_event))
@@ -59,19 +58,10 @@  static int eeh_event_handler(void * dummy)
 			continue;
 
 		/* We might have event without binding PE */
-		pe = event->pe;
-		if (pe) {
-			if (pe->type & EEH_PE_PHB)
-				pr_info("EEH: Detected error on PHB#%x\n",
-					 pe->phb->global_number);
-			else
-				pr_info("EEH: Detected PCI bus error on "
-					"PHB#%x-PE#%x\n",
-					pe->phb->global_number, pe->addr);
-			eeh_handle_normal_event(pe);
-		} else {
+		if (event->pe)
+			eeh_handle_normal_event(event->pe);
+		else
 			eeh_handle_special_event();
-		}
 
 		kfree(event);
 	}
@@ -126,8 +116,16 @@  int __eeh_send_failure_event(struct eeh_pe *pe)
 	 * This prevents the PE from being free()ed by a hotplug driver
 	 * while the PE is sitting in the event queue.
 	 */
-	if (pe)
+	if (pe) {
+		/*
+		 * Save the current stack trace so we can dump it from the
+		 * event handler thread.
+		 */
+		pe->trace_entries = stack_trace_save(pe->stack_trace,
+					 ARRAY_SIZE(pe->stack_trace), 0);
+
 		eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+	}
 
 	/* We may or may not be called in an interrupt context */
 	spin_lock_irqsave(&eeh_eventlist_lock, flags);