Message ID | 1310383915-30543-1-git-send-email-tiejun.chen@windriver.com (mailing list archive) |
---|---|
State | Rejected |
Headers | show |
Tiejun Chen wrote: > When kprobe these operations such as store-and-update-word for SP(r1), > > stwu r1, -A(r1) > > The program exception is triggered, and PPC always allocate an exception frame > as shown as the follows: > > old r1 ---------- > ... > nip > gpr[2] ~ gpr[31] > gpr[1] <--------- old r1 is stored. > gpr[0] > -------- <--------- pr_regs @offset 16 bytes > padding > STACK_FRAME_REGS_MARKER > LR > back chain > new r1 ---------- > Then emulate_step() will emulate this instruction, 'stwu'. Actually its > equivalent to: > 1> Update pr_regs->gpr[1] = mem[old r1 + (-A)] > 2> stw [old r1], mem[old r1 + (-A)] > > Please notice the stack based on new r1 may be covered with mem[old r1 > +(-A)] when addr[old r1 + (-A)] < addr[old r1 + sizeof(an exception frame0]. > So the above 2# operation will overwirte something to break this exception > frame then unexpected kernel problem will be issued. > > So looks we have to implement independed interrupt stack for PPC program > exception when CONFIG_BOOKE is enabled. Here we can use > EXC_LEVEL_EXCEPTION_PROLOG to replace original NORMAL_EXCEPTION_PROLOG > for program exception if CONFIG_BOOKE. Then its always safe for kprobe > with independed exc stack from one pre-allocated and dedicated thread_info. > Actually this is just waht we did for critical/machine check exceptions > on PPC. > > Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com> > --- > arch/powerpc/include/asm/irq.h | 3 +++ > arch/powerpc/include/asm/reg.h | 4 ++++ > arch/powerpc/kernel/entry_32.S | 15 +++++++++++++++ > arch/powerpc/kernel/head_booke.h | 15 +++++++++++++-- > arch/powerpc/kernel/irq.c | 11 +++++++++++ > arch/powerpc/kernel/setup_32.c | 4 ++++ > 6 files changed, 50 insertions(+), 2 deletions(-) > > diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h > index 1bff591..6d12169 100644 > --- a/arch/powerpc/include/asm/irq.h > +++ b/arch/powerpc/include/asm/irq.h > @@ -313,6 +313,9 @@ struct pt_regs; > extern struct thread_info *critirq_ctx[NR_CPUS]; > extern struct thread_info *dbgirq_ctx[NR_CPUS]; > extern struct thread_info *mcheckirq_ctx[NR_CPUS]; > +#if defined(CONFIG_KPROBES) && defined(CONFIG_BOOKE) > +extern struct thread_info *pgirq_ctx[NR_CPUS]; > +#endif > extern void exc_lvl_ctx_init(void); > #else > #define exc_lvl_ctx_init() > diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h > index c5cae0d..34d6178 100644 > --- a/arch/powerpc/include/asm/reg.h > +++ b/arch/powerpc/include/asm/reg.h > @@ -885,6 +885,10 @@ > #endif > #define SPRN_SPRG_RVCPU SPRN_SPRG1 > #define SPRN_SPRG_WVCPU SPRN_SPRG1 > +#ifdef CONFIG_KPROBES > +#define SPRN_SPRG_RSCRATCH_PG SPRN_SPRG0 > +#define SPRN_SPRG_WSCRATCH_PG SPRN_SPRG0 > +#endif > #endif > > #ifdef CONFIG_8xx > diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S > index 56212bc..a99e209 100644 > --- a/arch/powerpc/kernel/entry_32.S > +++ b/arch/powerpc/kernel/entry_32.S > @@ -1122,6 +1122,21 @@ ret_from_mcheck_exc: > RESTORE_xSRR(DSRR0,DSRR1); > RESTORE_MMU_REGS; > RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI) > + > + .globl ret_from_prog_exc > +ret_from_prog_exc: > + mfspr r9,SPRN_SPRG_THREAD > + lwz r10,SAVED_KSP_LIMIT(r1) > + stw r10,KSP_LIMIT(r9) > + lwz r9,THREAD_INFO-THREAD(r9) > + rlwinm r10,r1,0,0,(31-THREAD_SHIFT) > + lwz r10,TI_PREEMPT(r10) > + stw r10,TI_PREEMPT(r9) > + RESTORE_xSRR(SRR0,SRR1); > + RESTORE_xSRR(CSRR0,CSRR1); > + RESTORE_xSRR(DSRR0,DSRR1); > + RESTORE_MMU_REGS; > + RET_FROM_EXC_LEVEL(SPRN_SRR0, SPRN_SRR1, rfi) > #endif /* CONFIG_BOOKE */ After a further consideration, to improve the above code fragment with the following ------ + + .globl ret_from_prog_exc +ret_from_prog_exc: +#ifdef CONFIG_KPROBES + mfspr r9,SPRN_SPRG_THREAD + lwz r9,THREAD_INFO-THREAD(r9) + rlwinm r10,r1,0,0,(31-THREAD_SHIFT) + lwz r10,TI_PREEMPT(r10) + stw r10,TI_PREEMPT(r9) + RET_FROM_EXC_LEVEL(SPRN_SRR0, SPRN_SRR1, rfi) +#else + b ret_from_except_full +#endif Here remove unnecessary restore, and also make sure its still same as normal program exception when !CONFIG_KPROBES. Tiejun > > /* > diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h > index a0bf158..941be40 100644 > --- a/arch/powerpc/kernel/head_booke.h > +++ b/arch/powerpc/kernel/head_booke.h > @@ -79,6 +79,10 @@ > /* only on e500mc/e200 */ > #define DBG_STACK_BASE dbgirq_ctx > > +#if defined(CONFIG_KPROBES) > +#define PG_STACK_BASE pgirq_ctx > +#endif > + > #define EXC_LVL_FRAME_OVERHEAD (THREAD_SIZE - INT_FRAME_SIZE - EXC_LVL_SIZE) > > #ifdef CONFIG_SMP > @@ -158,6 +162,12 @@ > EXC_LEVEL_EXCEPTION_PROLOG(DBG, SPRN_DSRR0, SPRN_DSRR1) > #define MCHECK_EXCEPTION_PROLOG \ > EXC_LEVEL_EXCEPTION_PROLOG(MC, SPRN_MCSRR0, SPRN_MCSRR1) > +#if defined(CONFIG_KPROBES) > +#define PROGRAM_EXCEPTION_PROLOG \ > + EXC_LEVEL_EXCEPTION_PROLOG(PG, SPRN_SRR0, SPRN_SRR1) > +#else > +#define PROGRAM_EXCEPTION_PROLOG NORMAL_EXCEPTION_PROLOG > +#endif > > /* > * Exception vectors. > @@ -370,11 +380,12 @@ label: > > #define PROGRAM_EXCEPTION \ > START_EXCEPTION(Program) \ > - NORMAL_EXCEPTION_PROLOG; \ > + PROGRAM_EXCEPTION_PROLOG; \ > mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \ > stw r4,_ESR(r11); \ > addi r3,r1,STACK_FRAME_OVERHEAD; \ > - EXC_XFER_STD(0x0700, program_check_exception) > + EXC_XFER_TEMPLATE(program_check_exception, 0x0700, MSR_KERNEL, NOCOPY,\ > + transfer_to_handler_full, ret_from_prog_exc) > > #define DECREMENTER_EXCEPTION \ > START_EXCEPTION(Decrementer) \ > diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c > index 5b428e3..ff5b8dd 100644 > --- a/arch/powerpc/kernel/irq.c > +++ b/arch/powerpc/kernel/irq.c > @@ -397,6 +397,10 @@ struct thread_info *critirq_ctx[NR_CPUS] __read_mostly; > struct thread_info *dbgirq_ctx[NR_CPUS] __read_mostly; > struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly; > > +#if defined(CONFIG_KPROBES) && defined(CONFIG_BOOKE) > +struct thread_info *pgirq_ctx[NR_CPUS] __read_mostly; > +#endif > + > void exc_lvl_ctx_init(void) > { > struct thread_info *tp; > @@ -423,6 +427,13 @@ void exc_lvl_ctx_init(void) > tp = mcheckirq_ctx[cpu_nr]; > tp->cpu = cpu_nr; > tp->preempt_count = HARDIRQ_OFFSET; > + > +#if defined(CONFIG_KPROBES) > + memset((void *)pgirq_ctx[i], 0, THREAD_SIZE); > + tp = pgirq_ctx[i]; > + tp->cpu = i; > + tp->preempt_count = 0; > +#endif > #endif > } > } > diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c > index 620d792..b872564 100644 > --- a/arch/powerpc/kernel/setup_32.c > +++ b/arch/powerpc/kernel/setup_32.c > @@ -272,6 +272,10 @@ static void __init exc_lvl_early_init(void) > __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); > mcheckirq_ctx[hw_cpu] = (struct thread_info *) > __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); > +#ifdef CONFIG_KPROBES > + pgirq_ctx[hw_cpu] = (struct thread_info *) > + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); > +#endif > #endif > } > }
Tiejun Chen wrote: > v1 -> v2: when allocate pgirq_ctx, use 'hw_cpu' to identify cpu ID in exc_lvl_early_init(). > v2 -> v3: add that specific return-from-program-exc to restore necessary thread info then > we can withdraw the original patch #2. Any feedback? Thanks Tiejun > > Tiejun > _______________________________________________ > Linuxppc-dev mailing list > Linuxppc-dev@lists.ozlabs.org > https://lists.ozlabs.org/listinfo/linuxppc-dev >
On Jul 11, 2011, at 6:31 AM, Tiejun Chen wrote: > When kprobe these operations such as store-and-update-word for SP(r1), > > stwu r1, -A(r1) > > The program exception is triggered, and PPC always allocate an exception frame > as shown as the follows: > > old r1 ---------- > ... > nip > gpr[2] ~ gpr[31] > gpr[1] <--------- old r1 is stored. > gpr[0] > -------- <--------- pr_regs @offset 16 bytes > padding > STACK_FRAME_REGS_MARKER > LR > back chain > new r1 ---------- > Then emulate_step() will emulate this instruction, 'stwu'. Actually its > equivalent to: > 1> Update pr_regs->gpr[1] = mem[old r1 + (-A)] > 2> stw [old r1], mem[old r1 + (-A)] > > Please notice the stack based on new r1 may be covered with mem[old r1 > +(-A)] when addr[old r1 + (-A)] < addr[old r1 + sizeof(an exception frame0]. > So the above 2# operation will overwirte something to break this exception > frame then unexpected kernel problem will be issued. > > So looks we have to implement independed interrupt stack for PPC program > exception when CONFIG_BOOKE is enabled. Here we can use > EXC_LEVEL_EXCEPTION_PROLOG to replace original NORMAL_EXCEPTION_PROLOG > for program exception if CONFIG_BOOKE. Then its always safe for kprobe > with independed exc stack from one pre-allocated and dedicated thread_info. > Actually this is just waht we did for critical/machine check exceptions > on PPC. > > Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com> > --- I'm still very confused why we need a unique stack frame for kprobe/program exceptions on book-e devices. Can you explain this further. - k
On Thu, 14 Jul 2011 08:27:44 -0500 Kumar Gala <galak@kernel.crashing.org> wrote: > > On Jul 11, 2011, at 6:31 AM, Tiejun Chen wrote: > > > When kprobe these operations such as store-and-update-word for SP(r1), > > > > stwu r1, -A(r1) > > > > The program exception is triggered, and PPC always allocate an exception frame > > as shown as the follows: > > > > old r1 ---------- > > ... > > nip > > gpr[2] ~ gpr[31] > > gpr[1] <--------- old r1 is stored. > > gpr[0] > > -------- <--------- pr_regs @offset 16 bytes > > padding > > STACK_FRAME_REGS_MARKER > > LR > > back chain > > new r1 ---------- > > Then emulate_step() will emulate this instruction, 'stwu'. Actually its > > equivalent to: > > 1> Update pr_regs->gpr[1] = mem[old r1 + (-A)] > > 2> stw [old r1], mem[old r1 + (-A)] > > > > Please notice the stack based on new r1 may be covered with mem[old r1 > > +(-A)] when addr[old r1 + (-A)] < addr[old r1 + sizeof(an exception frame0]. > > So the above 2# operation will overwirte something to break this exception > > frame then unexpected kernel problem will be issued. > > > > So looks we have to implement independed interrupt stack for PPC program > > exception when CONFIG_BOOKE is enabled. Here we can use > > EXC_LEVEL_EXCEPTION_PROLOG to replace original NORMAL_EXCEPTION_PROLOG > > for program exception if CONFIG_BOOKE. Then its always safe for kprobe > > with independed exc stack from one pre-allocated and dedicated thread_info. > > Actually this is just waht we did for critical/machine check exceptions > > on PPC. > > > > Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com> > > --- > > I'm still very confused why we need a unique stack frame for kprobe/program exceptions on book-e devices. I don't know why it's booke-specific (or why they're emulating rather than using single-step), but the problem is trying to emulate an instruction that's expanding the non-exception stack into the area that the exception handler is sitting on, and writing to that new stack area. -Scott
Kumar Gala wrote: > On Jul 11, 2011, at 6:31 AM, Tiejun Chen wrote: > >> When kprobe these operations such as store-and-update-word for SP(r1), >> >> stwu r1, -A(r1) >> >> The program exception is triggered, and PPC always allocate an exception frame >> as shown as the follows: >> >> old r1 ---------- >> ... >> nip >> gpr[2] ~ gpr[31] >> gpr[1] <--------- old r1 is stored. >> gpr[0] >> -------- <--------- pr_regs @offset 16 bytes >> padding >> STACK_FRAME_REGS_MARKER >> LR >> back chain >> new r1 ---------- >> Then emulate_step() will emulate this instruction, 'stwu'. Actually its >> equivalent to: >> 1> Update pr_regs->gpr[1] = mem[old r1 + (-A)] >> 2> stw [old r1], mem[old r1 + (-A)] >> >> Please notice the stack based on new r1 may be covered with mem[old r1 >> +(-A)] when addr[old r1 + (-A)] < addr[old r1 + sizeof(an exception frame0]. >> So the above 2# operation will overwirte something to break this exception >> frame then unexpected kernel problem will be issued. >> >> So looks we have to implement independed interrupt stack for PPC program >> exception when CONFIG_BOOKE is enabled. Here we can use >> EXC_LEVEL_EXCEPTION_PROLOG to replace original NORMAL_EXCEPTION_PROLOG >> for program exception if CONFIG_BOOKE. Then its always safe for kprobe >> with independed exc stack from one pre-allocated and dedicated thread_info. >> Actually this is just waht we did for critical/machine check exceptions >> on PPC. >> >> Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com> >> --- > > I'm still very confused why we need a unique stack frame for kprobe/program exceptions on book-e devices. Its a bug at least for Book-E. And if you'd like to check another topic thread, "[BUG?]3.0-rc4+ftrace+kprobe: set kprobe at instruction 'stwu' lead to system crash/freeze", you can know this story completely :) This bug should not be reproduced on PPC64 with the exception prolog/endlog dedicated to PPC64. But I have no enough time to check other PPC32 & !BOOKE so I'm not sure if we should extend this modification. > > Can you explain this further. I can show one of those issued examples. Here we kprobe the entry point of show_interrupts(). (gdb) disassemble show_interrupts Dump of assembler code for function show_interrupts: 0xc0004ff4 <+0>: stwu r1,-48(r1) 0xc0004ff8 <+4>: mflr r0 I add some printk() inside pre_handler() to show pt_regs->gpr[1] and pt_regs->nip. ------ ...... Planted kprobe at c0004ff4 pre_handler: p->addr = 0xc0004ff4, nip = 0xc0004ff4, msr = 0x29000 gpr[1] = de767e50. nip = c0004ff4. When hit this instruction, emulate_step() would emulate this instruction as follows: ------ #1> current pr_regs->gpr[1] = 0xde767e50 - 48 = 0xde767e20; #2> stw (previous pr_regs->gpr[1]), @(current pr_regs->gpr[1]) ==> stw (0xde767e50), 0xde767e20 But after this kprobe process something would be rewrite incorrectly: ------ ...... post_handler: p->addr = 0xc0004ff4, msr = 0x29000 gpr[1] = de767e20. nip = de767e54. ^ If everything is good nip should equal to (0xc0004ff4 + 0x4). But looks its reset with (0xde767e50 + 0x4) via the above #2 operation. So why? As I understand kprobe use 'trap' to enter the program exception. At now PR = 0 so the kernel allocate an exception frame as normal. ---------------- old r1[0xde767e50] 1 pt_regs->result 2 pt_regs->dsisr 3 pt_regs->dar 4 pt_regs->trap 5 pt_regs->mq 6 pt_regs->ccr 7 pt_regs->xer 8 pt_regs->link 9 pt_regs->ctr 10 pt_regs->orig_gpr3 11 pt_regs->msr 12 pt_regs->nip <-- @ 0xde767e50 - 12 x 4 = 0xde767e20 ...... ----------------- new r1[0xde767e50 - INT_FRAME_SIZE] I think you can understand why pt_regs->nip is broken :) So the root cause is an exception frame and the kprobed function stack frame are always overlap. And then someone member inside an exception frame may be corrupted by that emulated stw operation. So I think we have to use one unique stack frame to avoid this when enable CONFIG_KPROBES. Especially for Book-E we can refer easily machine check/critical/debug exception implementation to do this like my patch. Tiejun > > - k > >
On Fri, 15 Jul 2011 13:28:15 +0800 tiejun.chen <tiejun.chen@windriver.com> wrote: > Kumar Gala wrote: > > I'm still very confused why we need a unique stack frame for kprobe/program exceptions on book-e devices. > > Its a bug at least for Book-E. But why only booke? There's nothing booke-specific about the stwu instruction. > And if you'd like to check another topic thread, > "[BUG?]3.0-rc4+ftrace+kprobe: set kprobe at instruction 'stwu' lead to system > crash/freeze", you can know this story completely :) > > This bug should not be reproduced on PPC64 with the exception prolog/endlog > dedicated to PPC64. What if the function you're stepping through is creating a stack frame that is larger than the 288 bytes that get skipped? > But I have no enough time to check other PPC32 & !BOOKE so > I'm not sure if we should extend this modification. Someone should. -Scott
> -----Original Message----- > From: Scott Wood [mailto:scottwood@freescale.com] > Sent: Saturday, July 16, 2011 2:43 AM > To: Chen, Tiejun > Cc: Kumar Gala; linuxppc-dev@ozlabs.org > Subject: Re: [v3 PATCH 1/1] booke/kprobe: make program > exception to use one dedicated exception stack > > On Fri, 15 Jul 2011 13:28:15 +0800 > tiejun.chen <tiejun.chen@windriver.com> wrote: > > > Kumar Gala wrote: > > > I'm still very confused why we need a unique stack frame > for kprobe/program exceptions on book-e devices. > > > > Its a bug at least for Book-E. > > But why only booke? There's nothing booke-specific about the I don't mean this is reproduced only on booke, so I use 'at least' carefully to notice we really see this problem on booke. > stwu instruction. Please note this root cause to this bug is not related to how to emulate stwu instruction. That should be issued from the overlap between an exception frame and the kprobed function stack frame on booke. Would you like to see that example I showed? > > > And if you'd like to check another topic thread, > > "[BUG?]3.0-rc4+ftrace+kprobe: set kprobe at instruction > 'stwu' lead to > > system crash/freeze", you can know this story completely :) > > > > This bug should not be reproduced on PPC64 with the exception > > prolog/endlog dedicated to PPC64. > > What if the function you're stepping through is creating a > stack frame that is larger than the 288 bytes that get skipped? From that bug email looks PPC64 should be good from some feedbacks. So here I think guys who are very familiar with PPC64 can reply this rather than me :) > > > But I have no enough time to check other PPC32 & !BOOKE so I'm not > > sure if we should extend this modification. > > Someone should. This is fine to me. But currently we have to fix this based on booke firstly. Tiejun > > -Scott > >
On Sat, 16 Jul 2011 03:25:47 +0000 "Chen, Tiejun" <Tiejun.Chen@windriver.com> wrote: > > -----Original Message----- > > From: Scott Wood [mailto:scottwood@freescale.com] > > Sent: Saturday, July 16, 2011 2:43 AM > > To: Chen, Tiejun > > Cc: Kumar Gala; linuxppc-dev@ozlabs.org > > Subject: Re: [v3 PATCH 1/1] booke/kprobe: make program > > exception to use one dedicated exception stack > > > > On Fri, 15 Jul 2011 13:28:15 +0800 > > tiejun.chen <tiejun.chen@windriver.com> wrote: > > > > > Kumar Gala wrote: > > > > I'm still very confused why we need a unique stack frame > > for kprobe/program exceptions on book-e devices. > > > > > > Its a bug at least for Book-E. > > > > But why only booke? There's nothing booke-specific about the > > I don't mean this is reproduced only on booke, so I use 'at least' carefully to notice we really see this problem on booke. > > > stwu instruction. > > Please note this root cause to this bug is not related to how to emulate stwu instruction. That should be issued from the overlap between an exception frame and the kprobed function stack frame on booke. Would you like to see that example I showed? As I understand it, the problem comes from the fact that stwu combines the creation of a stack frame with storing into that stack frame. If they were separate instructions you'd have a new exception frame at a lower address by the time you actually store to the non-exception frame. -Scott
Scott Wood wrote: > On Sat, 16 Jul 2011 03:25:47 +0000 > "Chen, Tiejun" <Tiejun.Chen@windriver.com> wrote: > >>> -----Original Message----- >>> From: Scott Wood [mailto:scottwood@freescale.com] >>> Sent: Saturday, July 16, 2011 2:43 AM >>> To: Chen, Tiejun >>> Cc: Kumar Gala; linuxppc-dev@ozlabs.org >>> Subject: Re: [v3 PATCH 1/1] booke/kprobe: make program >>> exception to use one dedicated exception stack >>> >>> On Fri, 15 Jul 2011 13:28:15 +0800 >>> tiejun.chen <tiejun.chen@windriver.com> wrote: >>> >>>> Kumar Gala wrote: >>>>> I'm still very confused why we need a unique stack frame >>> for kprobe/program exceptions on book-e devices. >>>> Its a bug at least for Book-E. >>> But why only booke? There's nothing booke-specific about the >> I don't mean this is reproduced only on booke, so I use 'at least' carefully to notice we really see this problem on booke. >> >>> stwu instruction. >> Please note this root cause to this bug is not related to how to emulate stwu instruction. That should be issued from the overlap between an exception frame and the kprobed function stack frame on booke. Would you like to see that example I showed? > > As I understand it, the problem comes from the fact that stwu combines the > creation of a stack frame with storing into that stack frame. If they were Yes. > separate instructions you'd have a new exception frame at a lower address > by the time you actually store to the non-exception frame. So when kprobe we should use a unique stack frame to skip that stack frame the kprobed stwu want to create. Tiejun > > -Scott > >
tiejun.chen wrote: > Kumar Gala wrote: >> On Jul 11, 2011, at 6:31 AM, Tiejun Chen wrote: >> >>> When kprobe these operations such as store-and-update-word for SP(r1), >>> >>> stwu r1, -A(r1) >>> >>> The program exception is triggered, and PPC always allocate an exception frame >>> as shown as the follows: >>> >>> old r1 ---------- >>> ... >>> nip >>> gpr[2] ~ gpr[31] >>> gpr[1] <--------- old r1 is stored. >>> gpr[0] >>> -------- <--------- pr_regs @offset 16 bytes >>> padding >>> STACK_FRAME_REGS_MARKER >>> LR >>> back chain >>> new r1 ---------- >>> Then emulate_step() will emulate this instruction, 'stwu'. Actually its >>> equivalent to: >>> 1> Update pr_regs->gpr[1] = mem[old r1 + (-A)] >>> 2> stw [old r1], mem[old r1 + (-A)] >>> >>> Please notice the stack based on new r1 may be covered with mem[old r1 >>> +(-A)] when addr[old r1 + (-A)] < addr[old r1 + sizeof(an exception frame0]. >>> So the above 2# operation will overwirte something to break this exception >>> frame then unexpected kernel problem will be issued. >>> >>> So looks we have to implement independed interrupt stack for PPC program >>> exception when CONFIG_BOOKE is enabled. Here we can use >>> EXC_LEVEL_EXCEPTION_PROLOG to replace original NORMAL_EXCEPTION_PROLOG >>> for program exception if CONFIG_BOOKE. Then its always safe for kprobe >>> with independed exc stack from one pre-allocated and dedicated thread_info. >>> Actually this is just waht we did for critical/machine check exceptions >>> on PPC. >>> >>> Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com> >>> --- >> I'm still very confused why we need a unique stack frame for kprobe/program exceptions on book-e devices. > > Its a bug at least for Book-E. And if you'd like to check another topic thread, > "[BUG?]3.0-rc4+ftrace+kprobe: set kprobe at instruction 'stwu' lead to system > crash/freeze", you can know this story completely :) > > This bug should not be reproduced on PPC64 with the exception prolog/endlog > dedicated to PPC64. But I have no enough time to check other PPC32 & !BOOKE so > I'm not sure if we should extend this modification. > >> Can you explain this further. > > I can show one of those issued examples. > > Here we kprobe the entry point of show_interrupts(). > > (gdb) disassemble show_interrupts > Dump of assembler code for function show_interrupts: > 0xc0004ff4 <+0>: stwu r1,-48(r1) > 0xc0004ff8 <+4>: mflr r0 > > I add some printk() inside pre_handler() to show pt_regs->gpr[1] and pt_regs->nip. > ------ > ...... > Planted kprobe at c0004ff4 > pre_handler: p->addr = 0xc0004ff4, nip = 0xc0004ff4, msr = 0x29000 > gpr[1] = de767e50. > nip = c0004ff4. > > When hit this instruction, emulate_step() would emulate this instruction as follows: > ------ > #1> current pr_regs->gpr[1] = 0xde767e50 - 48 = 0xde767e20; > #2> stw (previous pr_regs->gpr[1]), @(current pr_regs->gpr[1]) > ==> stw (0xde767e50), 0xde767e20 > > But after this kprobe process something would be rewrite incorrectly: > ------ > ...... > post_handler: p->addr = 0xc0004ff4, msr = 0x29000 > gpr[1] = de767e20. > nip = de767e54. > ^ > If everything is good nip should equal to (0xc0004ff4 + 0x4). But looks its > reset with (0xde767e50 + 0x4) via the above #2 operation. So why? > > As I understand kprobe use 'trap' to enter the program exception. At now PR = 0 > so the kernel allocate an exception frame as normal. > > ---------------- old r1[0xde767e50] > 1 pt_regs->result > 2 pt_regs->dsisr > 3 pt_regs->dar > 4 pt_regs->trap > 5 pt_regs->mq > 6 pt_regs->ccr > 7 pt_regs->xer > 8 pt_regs->link > 9 pt_regs->ctr > 10 pt_regs->orig_gpr3 > 11 pt_regs->msr > 12 pt_regs->nip <-- @ 0xde767e50 - 12 x 4 = 0xde767e20 > ...... > ----------------- new r1[0xde767e50 - INT_FRAME_SIZE] > > I think you can understand why pt_regs->nip is broken :) So the root cause is an > exception frame and the kprobed function stack frame are always overlap. And > then someone member inside an exception frame may be corrupted by that emulated > stw operation. > > So I think we have to use one unique stack frame to avoid this when enable > CONFIG_KPROBES. Especially for Book-E we can refer easily machine > check/critical/debug exception implementation to do this like my patch. > More questions or suggestions? Tiejun > Tiejun > >> - k
> > As I understand it, the problem comes from the fact that stwu combines the > > creation of a stack frame with storing into that stack frame. If they were > > Yes. > > > separate instructions you'd have a new exception frame at a lower address > > by the time you actually store to the non-exception frame. > > So when kprobe we should use a unique stack frame to skip that stack frame the > kprobed stwu want to create. I still don't like that patch. Potentially the problem exist for all variants of powerpc, not just booke, and I'm not sure I like adding yet another exception stack. Another (non-great) approach would be to special case stwu to the stack, and instead of doing the store while emulating the instruction, keep the store address around and do it later, after the stack has been unwound, in the exit path (a TIF flag to hit the slow path and then do it in the slow path). It sounds hackish but it makes it easier to fix everybody at once, there are "issues" with changing stacks especially on ppc64 and it would definitely be affected as well if the stack frame created is larger than our gap. Cheers, Ben.
Benjamin Herrenschmidt wrote: >>> As I understand it, the problem comes from the fact that stwu combines the >>> creation of a stack frame with storing into that stack frame. If they were >> Yes. >> >>> separate instructions you'd have a new exception frame at a lower address >>> by the time you actually store to the non-exception frame. >> So when kprobe we should use a unique stack frame to skip that stack frame the >> kprobed stwu want to create. > > I still don't like that patch. Potentially the problem exist for all > variants of powerpc, not just booke, and I'm not sure I like adding yet Yes. > another exception stack. But I think we should extend easily this for other powerpc variants. And only when enable CONFIG_KPROBES that dedicated exception stack is valid, so its not such a big risk :) > > Another (non-great) approach would be to special case stwu to the stack, > and instead of doing the store while emulating the instruction, keep the > store address around and do it later, after the stack has been unwound, > in the exit path (a TIF flag to hit the slow path and then do it in the > slow path). Actually I also considered one idea that we do stw-update in the exit path like your proposal. But I'm not sure if its worth intruding a new TIF flag only for 'stwu'. And if I understand what your exit path means properly, we should do this on ret_from_except_full, ... exc_exit_restart: lwz r11,_NIP(r1) lwz r12,_MSR(r1) Looks we have to add something to update as 'stwu' since _NIP/_MSR are also corrupted potentially. So I feel we'll make this complicated if we really do here. exc_exit_start: mtspr SPRN_SRR0,r11 mtspr SPRN_SRR1,r12 REST_2GPRS(11, r1) lwz r1,GPR1(r1) .globl exc_exit_restart_end exc_exit_restart_end: PPC405_ERR77_SYNC rfi b . /* prevent prefetch past rfi */ If I'm wrong please correct me. > > It sounds hackish but it makes it easier to fix everybody at once, there > are "issues" with changing stacks especially on ppc64 and it would > definitely be affected as well if the stack frame created is larger than > our gap. If we provide another exception stack like we did debug exception on ppc64, are there those "issues" you said? Thanks Tiejun > > Cheers, > Ben. > >
On Wed, 2011-08-31 at 17:17 +0800, tiejun.chen wrote: > > It sounds hackish but it makes it easier to fix everybody at once, there > > are "issues" with changing stacks especially on ppc64 and it would > > definitely be affected as well if the stack frame created is larger than > > our gap. > > If we provide another exception stack like we did debug exception on ppc64, are > there those "issues" you said? Actually if we allocate it like the irq stacks, we should be ok, but that's yet another 16K per CPU that needs to be covered by the first segment, nasty.... In any case, please try to come up with a patch that covers all variants. Cheers, Ben.
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 1bff591..6d12169 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -313,6 +313,9 @@ struct pt_regs; extern struct thread_info *critirq_ctx[NR_CPUS]; extern struct thread_info *dbgirq_ctx[NR_CPUS]; extern struct thread_info *mcheckirq_ctx[NR_CPUS]; +#if defined(CONFIG_KPROBES) && defined(CONFIG_BOOKE) +extern struct thread_info *pgirq_ctx[NR_CPUS]; +#endif extern void exc_lvl_ctx_init(void); #else #define exc_lvl_ctx_init() diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c5cae0d..34d6178 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -885,6 +885,10 @@ #endif #define SPRN_SPRG_RVCPU SPRN_SPRG1 #define SPRN_SPRG_WVCPU SPRN_SPRG1 +#ifdef CONFIG_KPROBES +#define SPRN_SPRG_RSCRATCH_PG SPRN_SPRG0 +#define SPRN_SPRG_WSCRATCH_PG SPRN_SPRG0 +#endif #endif #ifdef CONFIG_8xx diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 56212bc..a99e209 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1122,6 +1122,21 @@ ret_from_mcheck_exc: RESTORE_xSRR(DSRR0,DSRR1); RESTORE_MMU_REGS; RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI) + + .globl ret_from_prog_exc +ret_from_prog_exc: + mfspr r9,SPRN_SPRG_THREAD + lwz r10,SAVED_KSP_LIMIT(r1) + stw r10,KSP_LIMIT(r9) + lwz r9,THREAD_INFO-THREAD(r9) + rlwinm r10,r1,0,0,(31-THREAD_SHIFT) + lwz r10,TI_PREEMPT(r10) + stw r10,TI_PREEMPT(r9) + RESTORE_xSRR(SRR0,SRR1); + RESTORE_xSRR(CSRR0,CSRR1); + RESTORE_xSRR(DSRR0,DSRR1); + RESTORE_MMU_REGS; + RET_FROM_EXC_LEVEL(SPRN_SRR0, SPRN_SRR1, rfi) #endif /* CONFIG_BOOKE */ /* diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index a0bf158..941be40 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -79,6 +79,10 @@ /* only on e500mc/e200 */ #define DBG_STACK_BASE dbgirq_ctx +#if defined(CONFIG_KPROBES) +#define PG_STACK_BASE pgirq_ctx +#endif + #define EXC_LVL_FRAME_OVERHEAD (THREAD_SIZE - INT_FRAME_SIZE - EXC_LVL_SIZE) #ifdef CONFIG_SMP @@ -158,6 +162,12 @@ EXC_LEVEL_EXCEPTION_PROLOG(DBG, SPRN_DSRR0, SPRN_DSRR1) #define MCHECK_EXCEPTION_PROLOG \ EXC_LEVEL_EXCEPTION_PROLOG(MC, SPRN_MCSRR0, SPRN_MCSRR1) +#if defined(CONFIG_KPROBES) +#define PROGRAM_EXCEPTION_PROLOG \ + EXC_LEVEL_EXCEPTION_PROLOG(PG, SPRN_SRR0, SPRN_SRR1) +#else +#define PROGRAM_EXCEPTION_PROLOG NORMAL_EXCEPTION_PROLOG +#endif /* * Exception vectors. @@ -370,11 +380,12 @@ label: #define PROGRAM_EXCEPTION \ START_EXCEPTION(Program) \ - NORMAL_EXCEPTION_PROLOG; \ + PROGRAM_EXCEPTION_PROLOG; \ mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \ stw r4,_ESR(r11); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_STD(0x0700, program_check_exception) + EXC_XFER_TEMPLATE(program_check_exception, 0x0700, MSR_KERNEL, NOCOPY,\ + transfer_to_handler_full, ret_from_prog_exc) #define DECREMENTER_EXCEPTION \ START_EXCEPTION(Decrementer) \ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 5b428e3..ff5b8dd 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -397,6 +397,10 @@ struct thread_info *critirq_ctx[NR_CPUS] __read_mostly; struct thread_info *dbgirq_ctx[NR_CPUS] __read_mostly; struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly; +#if defined(CONFIG_KPROBES) && defined(CONFIG_BOOKE) +struct thread_info *pgirq_ctx[NR_CPUS] __read_mostly; +#endif + void exc_lvl_ctx_init(void) { struct thread_info *tp; @@ -423,6 +427,13 @@ void exc_lvl_ctx_init(void) tp = mcheckirq_ctx[cpu_nr]; tp->cpu = cpu_nr; tp->preempt_count = HARDIRQ_OFFSET; + +#if defined(CONFIG_KPROBES) + memset((void *)pgirq_ctx[i], 0, THREAD_SIZE); + tp = pgirq_ctx[i]; + tp->cpu = i; + tp->preempt_count = 0; +#endif #endif } } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 620d792..b872564 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -272,6 +272,10 @@ static void __init exc_lvl_early_init(void) __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); mcheckirq_ctx[hw_cpu] = (struct thread_info *) __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); +#ifdef CONFIG_KPROBES + pgirq_ctx[hw_cpu] = (struct thread_info *) + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); +#endif #endif } }
When kprobe these operations such as store-and-update-word for SP(r1), stwu r1, -A(r1) The program exception is triggered, and PPC always allocate an exception frame as shown as the follows: old r1 ---------- ... nip gpr[2] ~ gpr[31] gpr[1] <--------- old r1 is stored. gpr[0] -------- <--------- pr_regs @offset 16 bytes padding STACK_FRAME_REGS_MARKER LR back chain new r1 ---------- Then emulate_step() will emulate this instruction, 'stwu'. Actually its equivalent to: 1> Update pr_regs->gpr[1] = mem[old r1 + (-A)] 2> stw [old r1], mem[old r1 + (-A)] Please notice the stack based on new r1 may be covered with mem[old r1 +(-A)] when addr[old r1 + (-A)] < addr[old r1 + sizeof(an exception frame0]. So the above 2# operation will overwirte something to break this exception frame then unexpected kernel problem will be issued. So looks we have to implement independed interrupt stack for PPC program exception when CONFIG_BOOKE is enabled. Here we can use EXC_LEVEL_EXCEPTION_PROLOG to replace original NORMAL_EXCEPTION_PROLOG for program exception if CONFIG_BOOKE. Then its always safe for kprobe with independed exc stack from one pre-allocated and dedicated thread_info. Actually this is just waht we did for critical/machine check exceptions on PPC. Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com> --- arch/powerpc/include/asm/irq.h | 3 +++ arch/powerpc/include/asm/reg.h | 4 ++++ arch/powerpc/kernel/entry_32.S | 15 +++++++++++++++ arch/powerpc/kernel/head_booke.h | 15 +++++++++++++-- arch/powerpc/kernel/irq.c | 11 +++++++++++ arch/powerpc/kernel/setup_32.c | 4 ++++ 6 files changed, 50 insertions(+), 2 deletions(-)