diff mbox

[GIT,PULL] powerpc64/tracing: Add frame buffer to calls of trace_hardirqs_on/off

Message ID 1293169566.22802.420.camel@gandalf.stny.rr.com (mailing list archive)
State Accepted, archived
Commit 3cb5f1a3e58c0bd70d47d9907cc5c65192281dee
Delegated to: Benjamin Herrenschmidt
Headers show

Commit Message

Steven Rostedt Dec. 24, 2010, 5:46 a.m. UTC
Ben,

Joerg reported a crash with the irqsoff tracer with PPC32.
Unfortunately, I don't have a ppc32 that I can work with (my kids took
it from me). I posted a test patch on the BZ that was opened for it:

https://bugzilla.kernel.org/show_bug.cgi?id=16573

Anyway, I was also able to reproduce the crash on PPC64. This patch
handles that case.

The following patch is in:

  git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace.git

    branch: ppc/ftrace


Steven Rostedt (1):
      powerpc64/tracing: Add frame buffer to calls of trace_hardirqs_on/off

----
 arch/powerpc/include/asm/irqflags.h |   40 ++++++++++++++++++++++++++--------
 1 files changed, 30 insertions(+), 10 deletions(-)
---------------------------
commit 5025019505da6731f8be13940bb978617599c935
Author: Steven Rostedt <srostedt@redhat.com>
Date:   Thu Dec 23 21:07:39 2010 -0800

    powerpc64/tracing: Add frame buffer to calls of trace_hardirqs_on/off
    
    When an interrupt occurs in userspace, we can call trace_hardirqs_on/off()
    With one level stack. But if we have irqsoff tracing enabled,
    it checks both CALLER_ADDR0 and CALLER_ADDR1. The second call
    goes two stack frames up. If this is from user space, then there may
    not exist a second stack.
    
    Add a second stack when calling trace_hardirqs_on/off() otherwise
    the following oops might occur:
    
    Oops: Kernel access of bad area, sig: 11 [#1]
    PREEMPT SMP NR_CPUS=2 PA Semi PWRficient
    last sysfs file: /sys/block/sda/size
    Modules linked in: ohci_hcd ehci_hcd usbcore
    NIP: c0000000000e1c00 LR: c0000000000034d4 CTR: 000000011012c440
    REGS: c00000003e2f3af0 TRAP: 0300   Not tainted  (2.6.37-rc6+)
    MSR: 9000000000001032 <ME,IR,DR>  CR: 48044444  XER: 20000000
    DAR: 00000001ffb9db50, DSISR: 0000000040000000
    TASK = c00000003e1a00a0[2088] 'emacs' THREAD: c00000003e2f0000 CPU: 1
    GPR00: 0000000000000001 c00000003e2f3d70 c00000000084e0d0 c0000000008816e8
    GPR04: 000000001034c678 000000001032e8f9 0000000010336540 0000000040020000
    GPR08: 0000000040020000 00000001ffb9db40 c00000003e2f3e30 0000000060000000
    GPR12: 100000000000f032 c00000000fff0280 000000001032e8c9 0000000000000008
    GPR16: 00000000105be9c0 00000000105be950 00000000105be9b0 00000000105be950
    GPR20: 00000000ffb9dc50 00000000ffb9dbf0 00000000102f0000 00000000102f0000
    GPR24: 00000000102e0000 00000000102f0000 0000000010336540 c0000000009ded38
    GPR28: 00000000102e0000 c0000000000034d4 c0000000007ccb10 c00000003e2f3d70
    NIP [c0000000000e1c00] .trace_hardirqs_off+0xb0/0x1d0
    LR [c0000000000034d4] decrementer_common+0xd4/0x100
    Call Trace:
    [c00000003e2f3d70] [c00000003e2f3e30] 0xc00000003e2f3e30 (unreliable)
    [c00000003e2f3e30] [c0000000000034d4] decrementer_common+0xd4/0x100
    Instruction dump:
    81690000 7f8b0000 419e0018 f84a0028 60000000 60000000 60000000 e95f0000
    80030000 e92a0000 eb6301f8 2f800000 <eb890010> 41fe00dc a06d000a eb1e8050
    ---[ end trace 4ec7fd2be9240928 ]---
    
    Reported-by: Joerg Sommer <joerg@alea.gnuu.de>
    Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

Comments

Benjamin Herrenschmidt Dec. 24, 2010, 9:11 p.m. UTC | #1
On Fri, 2010-12-24 at 00:46 -0500, Steven Rostedt wrote:

>  arch/powerpc/include/asm/irqflags.h |   40 ++++++++++++++++++++++++++--------
>  1 files changed, 30 insertions(+), 10 deletions(-)
> ---------------------------
> commit 5025019505da6731f8be13940bb978617599c935
> Author: Steven Rostedt <srostedt@redhat.com>
> Date:   Thu Dec 23 21:07:39 2010 -0800
> 
>     powerpc64/tracing: Add frame buffer to calls of trace_hardirqs_on/off
>     
>     When an interrupt occurs in userspace, we can call trace_hardirqs_on/off()
>     With one level stack. But if we have irqsoff tracing enabled,
>     it checks both CALLER_ADDR0 and CALLER_ADDR1. The second call
>     goes two stack frames up. If this is from user space, then there may
>     not exist a second stack.
>     
>     Add a second stack when calling trace_hardirqs_on/off() otherwise
>     the following oops might occur:

Hrm... this is really gross :-) So we add gratuituous overhead because
the code below is dumb :-) What about making the code less stupid
instead when poking at the stack and detect it's coming from userspace
instead ?

Cheers,
Ben.

>     Oops: Kernel access of bad area, sig: 11 [#1]
>     PREEMPT SMP NR_CPUS=2 PA Semi PWRficient
>     last sysfs file: /sys/block/sda/size
>     Modules linked in: ohci_hcd ehci_hcd usbcore
>     NIP: c0000000000e1c00 LR: c0000000000034d4 CTR: 000000011012c440
>     REGS: c00000003e2f3af0 TRAP: 0300   Not tainted  (2.6.37-rc6+)
>     MSR: 9000000000001032 <ME,IR,DR>  CR: 48044444  XER: 20000000
>     DAR: 00000001ffb9db50, DSISR: 0000000040000000
>     TASK = c00000003e1a00a0[2088] 'emacs' THREAD: c00000003e2f0000 CPU: 1
>     GPR00: 0000000000000001 c00000003e2f3d70 c00000000084e0d0 c0000000008816e8
>     GPR04: 000000001034c678 000000001032e8f9 0000000010336540 0000000040020000
>     GPR08: 0000000040020000 00000001ffb9db40 c00000003e2f3e30 0000000060000000
>     GPR12: 100000000000f032 c00000000fff0280 000000001032e8c9 0000000000000008
>     GPR16: 00000000105be9c0 00000000105be950 00000000105be9b0 00000000105be950
>     GPR20: 00000000ffb9dc50 00000000ffb9dbf0 00000000102f0000 00000000102f0000
>     GPR24: 00000000102e0000 00000000102f0000 0000000010336540 c0000000009ded38
>     GPR28: 00000000102e0000 c0000000000034d4 c0000000007ccb10 c00000003e2f3d70
>     NIP [c0000000000e1c00] .trace_hardirqs_off+0xb0/0x1d0
>     LR [c0000000000034d4] decrementer_common+0xd4/0x100
>     Call Trace:
>     [c00000003e2f3d70] [c00000003e2f3e30] 0xc00000003e2f3e30 (unreliable)
>     [c00000003e2f3e30] [c0000000000034d4] decrementer_common+0xd4/0x100
>     Instruction dump:
>     81690000 7f8b0000 419e0018 f84a0028 60000000 60000000 60000000 e95f0000
>     80030000 e92a0000 eb6301f8 2f800000 <eb890010> 41fe00dc a06d000a eb1e8050
>     ---[ end trace 4ec7fd2be9240928 ]---
>     
>     Reported-by: Joerg Sommer <joerg@alea.gnuu.de>
>     Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
> 
> diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h
> index b85d8dd..b0b06d8 100644
> --- a/arch/powerpc/include/asm/irqflags.h
> +++ b/arch/powerpc/include/asm/irqflags.h
> @@ -12,24 +12,44 @@
>  
>  #else
>  #ifdef CONFIG_TRACE_IRQFLAGS
> +#ifdef CONFIG_IRQSOFF_TRACER
> +/*
> + * Since the ftrace irqsoff latency trace checks CALLER_ADDR1,
> + * which is the stack frame here, we need to force a stack frame
> + * in case we came from user space.
> + */
> +#define TRACE_WITH_FRAME_BUFFER(func)		\
> +	mflr	r0;				\
> +	stdu	r1, -32(r1);			\
> +	std	r0, 16(r1);			\
> +	stdu	r1, -32(r1);			\
> +	bl func;				\
> +	ld	r1, 0(r1);			\
> +	ld	r1, 0(r1);
> +#else
> +#define TRACE_WITH_FRAME_BUFFER(func)		\
> +	bl func;
> +#endif
> +
>  /*
>   * Most of the CPU's IRQ-state tracing is done from assembly code; we
>   * have to call a C function so call a wrapper that saves all the
>   * C-clobbered registers.
>   */
> -#define TRACE_ENABLE_INTS	bl .trace_hardirqs_on
> -#define TRACE_DISABLE_INTS	bl .trace_hardirqs_off
> -#define TRACE_AND_RESTORE_IRQ_PARTIAL(en,skip)	\
> -	cmpdi	en,0;				\
> -	bne	95f;				\
> -	stb	en,PACASOFTIRQEN(r13);		\
> -	bl	.trace_hardirqs_off;		\
> -	b	skip;				\
> -95:	bl	.trace_hardirqs_on;		\
> +#define TRACE_ENABLE_INTS	TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_on)
> +#define TRACE_DISABLE_INTS	TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_off)
> +
> +#define TRACE_AND_RESTORE_IRQ_PARTIAL(en,skip)		\
> +	cmpdi	en,0;					\
> +	bne	95f;					\
> +	stb	en,PACASOFTIRQEN(r13);			\
> +	TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_off)	\
> +	b	skip;					\
> +95:	TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_on)	\
>  	li	en,1;
>  #define TRACE_AND_RESTORE_IRQ(en)		\
>  	TRACE_AND_RESTORE_IRQ_PARTIAL(en,96f);	\
> -	stb	en,PACASOFTIRQEN(r13);	        \
> +	stb	en,PACASOFTIRQEN(r13);		\
>  96:
>  #else
>  #define TRACE_ENABLE_INTS
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
Steven Rostedt Dec. 25, 2010, 9:04 p.m. UTC | #2
On Sat, 2010-12-25 at 08:11 +1100, Benjamin Herrenschmidt wrote:
> On Fri, 2010-12-24 at 00:46 -0500, Steven Rostedt wrote:
> 
> >  arch/powerpc/include/asm/irqflags.h |   40 ++++++++++++++++++++++++++--------
> >  1 files changed, 30 insertions(+), 10 deletions(-)
> > ---------------------------
> > commit 5025019505da6731f8be13940bb978617599c935
> > Author: Steven Rostedt <srostedt@redhat.com>
> > Date:   Thu Dec 23 21:07:39 2010 -0800
> > 
> >     powerpc64/tracing: Add frame buffer to calls of trace_hardirqs_on/off
> >     
> >     When an interrupt occurs in userspace, we can call trace_hardirqs_on/off()
> >     With one level stack. But if we have irqsoff tracing enabled,
> >     it checks both CALLER_ADDR0 and CALLER_ADDR1. The second call
> >     goes two stack frames up. If this is from user space, then there may
> >     not exist a second stack.
> >     
> >     Add a second stack when calling trace_hardirqs_on/off() otherwise
> >     the following oops might occur:
> 
> Hrm... this is really gross :-) So we add gratuituous overhead because
> the code below is dumb :-) What about making the code less stupid
> instead when poking at the stack and detect it's coming from userspace
> instead ?

Note, when CONFIG_IRQSOFF_TRACE is set, there's already a bit of
overhead :-)

Anyway, I'll have to take a look at how the frame pointer is set up. Or
we could also set up all stacks coming into the kernel to have a "dummy"
frame pointer that wont hurt anything if we index into it.

Anyway, I'm off till the new year, so I'll worry about it then ;-)

-- Steve
Benjamin Herrenschmidt Dec. 27, 2010, 9:38 p.m. UTC | #3
On Sat, 2010-12-25 at 16:04 -0500, Steven Rostedt wrote:
> 
> Note, when CONFIG_IRQSOFF_TRACE is set, there's already a bit of
> overhead :-)
> 
> Anyway, I'll have to take a look at how the frame pointer is set up.
> Or
> we could also set up all stacks coming into the kernel to have a
> "dummy"
> frame pointer that wont hurt anything if we index into it.
> 
> Anyway, I'm off till the new year, so I'll worry about it then ;-)

Right, might be simpler to just make them loop back onto themselves or
something like that. I can have a look too if I get a chance.

Cheers,
Ben.
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h
index b85d8dd..b0b06d8 100644
--- a/arch/powerpc/include/asm/irqflags.h
+++ b/arch/powerpc/include/asm/irqflags.h
@@ -12,24 +12,44 @@ 
 
 #else
 #ifdef CONFIG_TRACE_IRQFLAGS
+#ifdef CONFIG_IRQSOFF_TRACER
+/*
+ * Since the ftrace irqsoff latency trace checks CALLER_ADDR1,
+ * which is the stack frame here, we need to force a stack frame
+ * in case we came from user space.
+ */
+#define TRACE_WITH_FRAME_BUFFER(func)		\
+	mflr	r0;				\
+	stdu	r1, -32(r1);			\
+	std	r0, 16(r1);			\
+	stdu	r1, -32(r1);			\
+	bl func;				\
+	ld	r1, 0(r1);			\
+	ld	r1, 0(r1);
+#else
+#define TRACE_WITH_FRAME_BUFFER(func)		\
+	bl func;
+#endif
+
 /*
  * Most of the CPU's IRQ-state tracing is done from assembly code; we
  * have to call a C function so call a wrapper that saves all the
  * C-clobbered registers.
  */
-#define TRACE_ENABLE_INTS	bl .trace_hardirqs_on
-#define TRACE_DISABLE_INTS	bl .trace_hardirqs_off
-#define TRACE_AND_RESTORE_IRQ_PARTIAL(en,skip)	\
-	cmpdi	en,0;				\
-	bne	95f;				\
-	stb	en,PACASOFTIRQEN(r13);		\
-	bl	.trace_hardirqs_off;		\
-	b	skip;				\
-95:	bl	.trace_hardirqs_on;		\
+#define TRACE_ENABLE_INTS	TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_on)
+#define TRACE_DISABLE_INTS	TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_off)
+
+#define TRACE_AND_RESTORE_IRQ_PARTIAL(en,skip)		\
+	cmpdi	en,0;					\
+	bne	95f;					\
+	stb	en,PACASOFTIRQEN(r13);			\
+	TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_off)	\
+	b	skip;					\
+95:	TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_on)	\
 	li	en,1;
 #define TRACE_AND_RESTORE_IRQ(en)		\
 	TRACE_AND_RESTORE_IRQ_PARTIAL(en,96f);	\
-	stb	en,PACASOFTIRQEN(r13);	        \
+	stb	en,PACASOFTIRQEN(r13);		\
 96:
 #else
 #define TRACE_ENABLE_INTS