Patchwork hang on panic, can not return to OBP

login
register
mail settings
Submitter Mark Fortescue
Date Feb. 5, 2009, 10:08 p.m.
Message ID <Pine.LNX.4.61.0902052140110.23072@mtfhpc.demon.co.uk>
Download mbox | patch
Permalink /patch/22217/
State RFC
Delegated to: David Miller
Headers show

Comments

Mark Fortescue - Feb. 5, 2009, 10:08 p.m.
Hi Meelis,

On sun4c, the only solution I ever managed to get close to working was a 
prom-based polling loop of the keyboard at the end of panic(). This only 
works when the processor is still running.

'soft lockups' (normally caused by recursion issues when I have 
encountered them) always ended in power-up reset because they stopped the 
processor. I ended up changing the soft lockup code to do a panic before 
things got too out of hand (ie always panic). This reduced the number of 
times I had to power cycle my aging hardware.

It was still an issue for me last time I did any kernel testing (2.6.23 
over a year ago) and if you are having the same issues I had (sparc 
processor hardware lockup) a power up reset each time, untill you can find 
and prevent the recursion issue, is the only option.

Please find attached my sun4c Panic handling patch. A bit old but may 
still be usefull (Not in text as my email client makes a mess of in text 
patches).

Regards
 	Mark

On Thu, 5 Feb 2009, Meelis Roos wrote:

> Hello,
>
> I started to debug my latest problem with Quad HME. First there is a
> OF mapping problem, then on quad also a IRQ problem, these I did not
> touch. Then came prontk recursion problem that caused panic, this I did
> not touch yet. The panic told I cpuld return to PROM with Stop-A but I
> couldn't, and this was the problem I tried to solve.
>
> First, why tell the user to press Stop-A or break at all? Is it because
> we can extract information from Break-T, Break-P etc? Otherwise the
> panic code could drop to prom itself?
>
> I tried the following patch and that did not work - probably because of
> the same reason that Break did not work (interrupts off?). Seems the
> local_irq_enable() is not enough - but what could help here? Is
> prom_halt() the right thing? ... Kad a look of waht Stop-A does, tried
> also the second patch... but if it would have worked, Stop-A wouldb
> prpbaly also have.
>
> This patch is of course not even close to merging, it's just a hack to
> try it out.
>
> diff --git a/kernel/panic.c b/kernel/panic.c
> index 2a2ff36..ffe6f2a 100644
> --- a/kernel/panic.c
> +++ b/kernel/panic.c
> @@ -22,6 +22,9 @@
> #include <linux/random.h>
> #include <linux/kallsyms.h>
> #include <linux/dmi.h>
> +#ifdef __sparc__
> +#include <asm/oplib.h>
> +#endif
>
> int panic_on_oops;
> static unsigned long tainted_mask;
> @@ -120,13 +123,16 @@ NORET_TYPE void panic(const char * fmt, ...)
> 		extern int stop_a_enabled;
> 		/* Make sure the user can actually press Stop-A (L1-A) */
> 		stop_a_enabled = 1;
> -		printk(KERN_EMERG "Press Stop-A (L1-A) to return to the boot prom\n");
> +		printk(KERN_EMERG "Returning to the boot prom\n");
> 	}
> #endif
> #if defined(CONFIG_S390)
> 	disabled_wait(caller);
> #endif
> 	local_irq_enable();
> +#ifdef __sparc__
> +	prom_halt();
> +#endif
> 	for (i = 0;;) {
> 		touch_softlockup_watchdog();
> 		i += panic_blink(i);
>
>
> And the other one:
>
> diff --git a/kernel/panic.c b/kernel/panic.c
> index 2a2ff36..df46a5c 100644
> --- a/kernel/panic.c
> +++ b/kernel/panic.c
> @@ -22,6 +22,9 @@
> #include <linux/random.h>
> #include <linux/kallsyms.h>
> #include <linux/dmi.h>
> +#ifdef __sparc__
> +#include <asm/oplib.h>
> +#endif
>
> int panic_on_oops;
> static unsigned long tainted_mask;
> @@ -120,13 +123,19 @@ NORET_TYPE void panic(const char * fmt, ...)
> 		extern int stop_a_enabled;
> 		/* Make sure the user can actually press Stop-A (L1-A) */
> 		stop_a_enabled = 1;
> -		printk(KERN_EMERG "Press Stop-A (L1-A) to return to the boot prom\n");
> +		printk(KERN_EMERG "Returning to the boot prom\n");
> 	}
> #endif
> #if defined(CONFIG_S390)
> 	disabled_wait(caller);
> #endif
> 	local_irq_enable();
> +#ifdef __sparc__
> +	prom_printf("\n");
> +	flush_user_windows();
> +
> +	prom_cmdline();
> +#endif
> 	for (i = 0;;) {
> 		touch_softlockup_watchdog();
> 		i += panic_blink(i);
>
> -- 
> Meelis Roos (mroos@linux.ee)
> --
> To unsubscribe from this list: send the line "unsubscribe sparclinux" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

Patch

diff -ruNpd linux-2.6.20.9/kernel/panic.c linux-test/kernel/panic.c

--- linux-2.6.20.9/kernel/panic.c	2007-04-28 15:02:21.000000000 +0100

+++ linux-test/kernel/panic.c	2007-04-28 04:09:28.000000000 +0100

@@ -106,6 +106,7 @@  NORET_TYPE void panic(const char * fmt, 

 	 	 * Delay timeout seconds before rebooting the machine. 
 		 * We can't use the "normal" timers since we just panicked..
 	 	 */
+	        printk("\n");

 		printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout);
 		for (i = 0; i < panic_timeout*1000; ) {
 			touch_nmi_watchdog();
@@ -124,6 +125,7 @@  NORET_TYPE void panic(const char * fmt, 

 		extern int stop_a_enabled;
 		/* Make sure the user can actually press Stop-A (L1-A) */
 		stop_a_enabled = 1;
+		printk ("\n");

 		printk(KERN_EMERG "Press Stop-A (L1-A) to return to the boot prom\n");
 	}
 #endif
@@ -136,6 +138,33 @@  NORET_TYPE void panic(const char * fmt, 

 		i += panic_blink(i);
 		mdelay(1);
 		i++;
+#ifdef __sparc__

+		{

+			/*

+			 * L1-A processing only works if KBD stuff OK.

+			 * So to debug before KBD up you require this.

+			 * Maybe make this part of the early console stuf.

+			 */

+			static int stop_l1;

+			int        ch;

+			extern int prom_nbgetchar(void);

+

+			if ((ch = prom_nbgetchar()) != -1)

+			{

+				if ((ch == 0x7F) ||

+				    (ch == 0xFE) || (ch == 0xFF))

+					stop_l1 = 0;

+				if ((ch & 0x7F) == 0x01)

+					stop_l1 = 1;

+				if (stop_l1 && ((ch & 0x7F) == 77))

+				{

+					extern void sun_do_break(void);

+

+					sun_do_break();

+				}

+			}

+		}

+#endif /* __sparc__ */

 	}
 }
 
diff -ru -x '.git*' -x .mailmap linux-2.6/drivers/serial/sunzilog.c linux-test/drivers/serial/sunzilog.c

--- linux-2.6/drivers/serial/sunzilog.c	2007-06-15 22:08:45.000000000 +0100

+++ linux-test/drivers/serial/sunzilog.c	2007-06-03 00:35:03.000000000 +0100

@@ -429,6 +429,7 @@ 

 			/* Wait for BREAK to deassert to avoid potentially
 			 * confusing the PROM.
 			 */
+			sun_do_break(); /* On Sun4c, the while loop hangs */

 			while (1) {
 				status = readb(&channel->control);
 				ZSDELAY();
diff -ru -x '.git*' -x .mailmap linux-2.6/kernel/softlockup.c linux-test/kernel/softlockup.c

--- linux-2.6/kernel/softlockup.c	2007-06-15 22:08:46.000000000 +0100

+++ linux-test/kernel/softlockup.c	2007-06-03 00:31:00.000000000 +0100

@@ -105,6 +105,7 @@ 

 			this_cpu);
 		dump_stack();
 		spin_unlock(&print_lock);
+		panic ("Sun4c can't cope with soft lockups\n");

 	}
 }