Message ID | 20210320122227.345427-1-mpe@ellerman.id.au (mailing list archive) |
---|---|
State | Accepted |
Headers | show |
Series | [v7] powerpc/irq: Inline call_do_irq() and call_do_softirq() | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | warning | Failed to apply on branch powerpc/merge (87d76f542a24ecfa797e9bd3bb56c0f19aabff57) |
snowpatch_ozlabs/apply_patch | warning | Failed to apply on branch powerpc/next (fbda7904302499dd7ffc073a3c84eb7c9275db0a) |
snowpatch_ozlabs/apply_patch | warning | Failed to apply on branch linus/master (1c273e10bc0cc7efb933e0ca10e260cdfc9f0b8c) |
snowpatch_ozlabs/apply_patch | warning | Failed to apply on branch powerpc/fixes (cc7a0bb058b85ea03db87169c60c7cfdd5d34678) |
snowpatch_ozlabs/apply_patch | warning | Failed to apply on branch linux-next (1df27313f50a57497c1faeb6a6ae4ca939c85a7d) |
snowpatch_ozlabs/apply_patch | fail | Failed to apply to any branch |
Le 20/03/2021 à 13:22, Michael Ellerman a écrit : > From: Christophe Leroy <christophe.leroy@csgroup.eu> > > call_do_irq() and call_do_softirq() are simple enough to be > worth inlining. > > Inlining them avoids an mflr/mtlr pair plus a save/reload on stack. It > also allows GCC to keep the saved ksp_limit in an nonvolatile reg. We don't have the ksp_limit anymore, I forgot to remove the above text. > > This is inspired from S390 arch. Several other arches do more or > less the same. The way sparc arch does seems odd thought. > > Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> > Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> > --- > > v2: no change. > v3: no change. > v4: > - comment reminding the purpose of the inline asm block. > - added r2 as clobbered reg > v5: > - Limiting the change to PPC32 for now. > - removed r2 from the clobbered regs list (on PPC32 r2 points to current all the time) > - Removed patch 1 and merged ksp_limit handling in here. > v6: > - Rebase on top of merge-test (ca6e327fefb2). > - Remove the ksp_limit stuff as it's doesn't exist anymore. > > v7: > mpe: > - Enable for 64-bit too. This all in-kernel code calling in-kernel > code, and must use the kernel TOC. Great. > - Use named parameters for the inline asm. Hmm. It is the first time we use named parameters in powerpc assembly, isn't it ? I saw when investigating userspace access that x86 is using named parameters widely. Wondering, how would the below look like with named parameters (from __put_user_asm2_goto) ? stw%X1 %L0, %L1 > - Reformat inline asm. > - Mark as always_inline. > - Drop unused ret from call_do_softirq(), add r3 as clobbered. > --- > arch/powerpc/include/asm/irq.h | 2 -- > arch/powerpc/kernel/irq.c | 41 ++++++++++++++++++++++++++++++++++ > arch/powerpc/kernel/misc_32.S | 25 --------------------- > arch/powerpc/kernel/misc_64.S | 22 ------------------ > 4 files changed, 41 insertions(+), 49 deletions(-) > > diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h > index f3f264e441a7..b2bd58830430 100644 > --- a/arch/powerpc/include/asm/irq.h > +++ b/arch/powerpc/include/asm/irq.h > @@ -53,8 +53,6 @@ extern void *mcheckirq_ctx[NR_CPUS]; > extern void *hardirq_ctx[NR_CPUS]; > extern void *softirq_ctx[NR_CPUS]; > > -void call_do_softirq(void *sp); > -void call_do_irq(struct pt_regs *regs, void *sp); > extern void do_IRQ(struct pt_regs *regs); > extern void __init init_IRQ(void); > extern void __do_irq(struct pt_regs *regs); > diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c > index 5b72abbff96c..260effc0a435 100644 > --- a/arch/powerpc/kernel/irq.c > +++ b/arch/powerpc/kernel/irq.c > @@ -667,6 +667,47 @@ static inline void check_stack_overflow(void) > } > } > > +static __always_inline void call_do_softirq(const void *sp) > +{ > + /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */ > + asm volatile ( > + PPC_STLU " %%r1, %[offset](%[sp]) ;" > + "mr %%r1, %[sp] ;" > + "bl %[callee] ;" > + PPC_LL " %%r1, 0(%%r1) ;" > + : // Outputs > + : // Inputs > + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), > + [callee] "i" (__do_softirq) > + : // Clobbers > + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", > + "cr7", "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", > + "r11", "r12" > + ); > +} > + > +static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) > +{ > + register unsigned long r3 asm("r3") = (unsigned long)regs; > + > + /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */ > + asm volatile ( > + PPC_STLU " %%r1, %[offset](%[sp]) ;" > + "mr %%r1, %[sp] ;" > + "bl %[callee] ;" > + PPC_LL " %%r1, 0(%%r1) ;" > + : // Outputs > + "+r" (r3) > + : // Inputs > + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), > + [callee] "i" (__do_irq) > + : // Clobbers > + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", > + "cr7", "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", > + "r11", "r12" > + ); > +} > + > void __do_irq(struct pt_regs *regs) > { > unsigned int irq; > diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S > index acc410043b96..6a076bef2932 100644 > --- a/arch/powerpc/kernel/misc_32.S > +++ b/arch/powerpc/kernel/misc_32.S > @@ -27,31 +27,6 @@ > > .text > > -_GLOBAL(call_do_softirq) > - mflr r0 > - stw r0,4(r1) > - stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) > - mr r1,r3 > - bl __do_softirq > - lwz r1,0(r1) > - lwz r0,4(r1) > - mtlr r0 > - blr > - > -/* > - * void call_do_irq(struct pt_regs *regs, void *sp); > - */ > -_GLOBAL(call_do_irq) > - mflr r0 > - stw r0,4(r1) > - stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) > - mr r1,r4 > - bl __do_irq > - lwz r1,0(r1) > - lwz r0,4(r1) > - mtlr r0 > - blr > - > /* > * This returns the high 64 bits of the product of two 64-bit numbers. > */ > diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S > index 070465825c21..4b761a18a74d 100644 > --- a/arch/powerpc/kernel/misc_64.S > +++ b/arch/powerpc/kernel/misc_64.S > @@ -27,28 +27,6 @@ > > .text > > -_GLOBAL(call_do_softirq) > - mflr r0 > - std r0,16(r1) > - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) > - mr r1,r3 > - bl __do_softirq > - ld r1,0(r1) > - ld r0,16(r1) > - mtlr r0 > - blr > - > -_GLOBAL(call_do_irq) > - mflr r0 > - std r0,16(r1) > - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) > - mr r1,r4 > - bl __do_irq > - ld r1,0(r1) > - ld r0,16(r1) > - mtlr r0 > - blr > - > _GLOBAL(__bswapdi2) > EXPORT_SYMBOL(__bswapdi2) > srdi r8,r3,32 >
Christophe Leroy <christophe.leroy@csgroup.eu> writes: > Le 20/03/2021 à 13:22, Michael Ellerman a écrit : >> From: Christophe Leroy <christophe.leroy@csgroup.eu> >> >> call_do_irq() and call_do_softirq() are simple enough to be >> worth inlining. >> >> Inlining them avoids an mflr/mtlr pair plus a save/reload on stack. It >> also allows GCC to keep the saved ksp_limit in an nonvolatile reg. > > We don't have the ksp_limit anymore, I forgot to remove the above text. No worries, I'll edit it when I apply it. >> This is inspired from S390 arch. Several other arches do more or >> less the same. The way sparc arch does seems odd thought. >> >> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> >> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> >> --- >> >> v2: no change. >> v3: no change. >> v4: >> - comment reminding the purpose of the inline asm block. >> - added r2 as clobbered reg >> v5: >> - Limiting the change to PPC32 for now. >> - removed r2 from the clobbered regs list (on PPC32 r2 points to current all the time) >> - Removed patch 1 and merged ksp_limit handling in here. >> v6: >> - Rebase on top of merge-test (ca6e327fefb2). >> - Remove the ksp_limit stuff as it's doesn't exist anymore. >> >> v7: >> mpe: >> - Enable for 64-bit too. This all in-kernel code calling in-kernel >> code, and must use the kernel TOC. > > Great. > >> - Use named parameters for the inline asm. > > Hmm. It is the first time we use named parameters in powerpc assembly, isn't it ? Not quite the first. See load_unaligned_zeropad(). And also my soon to be merged code in: http://patchwork.ozlabs.org/project/linuxppc-dev/patch/20210211135130.3474832-5-mpe@ellerman.id.au/ > I saw when investigating userspace access that x86 is using named parameters widely. Yeah I'd like us to use it more, I think it helps readability a lot. > Wondering, how would the below look like with named parameters (from __put_user_asm2_goto) ? > > stw%X1 %L0, %L1 Not sure, possibly that's too complicated for it :) cheers
On Wed, Mar 24, 2021 at 11:26:01PM +1100, Michael Ellerman wrote: > Christophe Leroy <christophe.leroy@csgroup.eu> writes: > > Hmm. It is the first time we use named parameters in powerpc assembly, isn't it ? > Yeah I'd like us to use it more, I think it helps readability a lot. ..in some cases. Not in most cases :-( > > Wondering, how would the below look like with named parameters (from __put_user_asm2_goto) ? > > > > stw%X1 %L0, %L1 > > Not sure, possibly that's too complicated for it :) asm("stw%X[name1] %L[name0],%L[name1]" :: [name0]"r"(x), [name1]"m"(p)); Yes, it is not more readable *at all*. Segher
On Sat, 20 Mar 2021 23:22:27 +1100, Michael Ellerman wrote: > call_do_irq() and call_do_softirq() are simple enough to be > worth inlining. > > Inlining them avoids an mflr/mtlr pair plus a save/reload on stack. It > also allows GCC to keep the saved ksp_limit in an nonvolatile reg. > > This is inspired from S390 arch. Several other arches do more or > less the same. The way sparc arch does seems odd thought. Applied to powerpc/next. [1/1] powerpc/irq: Inline call_do_irq() and call_do_softirq() https://git.kernel.org/powerpc/c/48cf12d88969bd4238b8769767eb476970319d93 cheers
On Sat, Mar 20, 2021 at 11:22:27PM +1100, Michael Ellerman wrote: > From: Christophe Leroy <christophe.leroy@csgroup.eu> > > call_do_irq() and call_do_softirq() are simple enough to be > worth inlining. > > Inlining them avoids an mflr/mtlr pair plus a save/reload on stack. It > also allows GCC to keep the saved ksp_limit in an nonvolatile reg. > > This is inspired from S390 arch. Several other arches do more or > less the same. The way sparc arch does seems odd thought. > > Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> > Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> > --- > > v2: no change. > v3: no change. > v4: > - comment reminding the purpose of the inline asm block. > - added r2 as clobbered reg > v5: > - Limiting the change to PPC32 for now. > - removed r2 from the clobbered regs list (on PPC32 r2 points to current all the time) > - Removed patch 1 and merged ksp_limit handling in here. > v6: > - Rebase on top of merge-test (ca6e327fefb2). > - Remove the ksp_limit stuff as it's doesn't exist anymore. > > v7: > mpe: > - Enable for 64-bit too. This all in-kernel code calling in-kernel > code, and must use the kernel TOC. > - Use named parameters for the inline asm. > - Reformat inline asm. > - Mark as always_inline. > - Drop unused ret from call_do_softirq(), add r3 as clobbered. > --- > arch/powerpc/include/asm/irq.h | 2 -- > arch/powerpc/kernel/irq.c | 41 ++++++++++++++++++++++++++++++++++ > arch/powerpc/kernel/misc_32.S | 25 --------------------- > arch/powerpc/kernel/misc_64.S | 22 ------------------ > 4 files changed, 41 insertions(+), 49 deletions(-) > > diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h > index f3f264e441a7..b2bd58830430 100644 > --- a/arch/powerpc/include/asm/irq.h > +++ b/arch/powerpc/include/asm/irq.h > @@ -53,8 +53,6 @@ extern void *mcheckirq_ctx[NR_CPUS]; > extern void *hardirq_ctx[NR_CPUS]; > extern void *softirq_ctx[NR_CPUS]; > > -void call_do_softirq(void *sp); > -void call_do_irq(struct pt_regs *regs, void *sp); > extern void do_IRQ(struct pt_regs *regs); > extern void __init init_IRQ(void); > extern void __do_irq(struct pt_regs *regs); > diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c > index 5b72abbff96c..260effc0a435 100644 > --- a/arch/powerpc/kernel/irq.c > +++ b/arch/powerpc/kernel/irq.c > @@ -667,6 +667,47 @@ static inline void check_stack_overflow(void) > } > } > > +static __always_inline void call_do_softirq(const void *sp) > +{ > + /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */ > + asm volatile ( > + PPC_STLU " %%r1, %[offset](%[sp]) ;" > + "mr %%r1, %[sp] ;" > + "bl %[callee] ;" > + PPC_LL " %%r1, 0(%%r1) ;" > + : // Outputs > + : // Inputs > + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), > + [callee] "i" (__do_softirq) > + : // Clobbers > + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", > + "cr7", "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", > + "r11", "r12" > + ); > +} > + > +static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) > +{ > + register unsigned long r3 asm("r3") = (unsigned long)regs; > + > + /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */ > + asm volatile ( > + PPC_STLU " %%r1, %[offset](%[sp]) ;" > + "mr %%r1, %[sp] ;" > + "bl %[callee] ;" > + PPC_LL " %%r1, 0(%%r1) ;" > + : // Outputs > + "+r" (r3) > + : // Inputs > + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), > + [callee] "i" (__do_irq) > + : // Clobbers > + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", > + "cr7", "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", > + "r11", "r12" > + ); > +} > + > void __do_irq(struct pt_regs *regs) > { > unsigned int irq; > diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S > index acc410043b96..6a076bef2932 100644 > --- a/arch/powerpc/kernel/misc_32.S > +++ b/arch/powerpc/kernel/misc_32.S > @@ -27,31 +27,6 @@ > > .text > > -_GLOBAL(call_do_softirq) > - mflr r0 > - stw r0,4(r1) > - stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) > - mr r1,r3 > - bl __do_softirq > - lwz r1,0(r1) > - lwz r0,4(r1) > - mtlr r0 > - blr > - > -/* > - * void call_do_irq(struct pt_regs *regs, void *sp); > - */ > -_GLOBAL(call_do_irq) > - mflr r0 > - stw r0,4(r1) > - stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) > - mr r1,r4 > - bl __do_irq > - lwz r1,0(r1) > - lwz r0,4(r1) > - mtlr r0 > - blr > - > /* > * This returns the high 64 bits of the product of two 64-bit numbers. > */ > diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S > index 070465825c21..4b761a18a74d 100644 > --- a/arch/powerpc/kernel/misc_64.S > +++ b/arch/powerpc/kernel/misc_64.S > @@ -27,28 +27,6 @@ > > .text > > -_GLOBAL(call_do_softirq) > - mflr r0 > - std r0,16(r1) > - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) > - mr r1,r3 > - bl __do_softirq > - ld r1,0(r1) > - ld r0,16(r1) > - mtlr r0 > - blr > - > -_GLOBAL(call_do_irq) > - mflr r0 > - std r0,16(r1) > - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) > - mr r1,r4 > - bl __do_irq > - ld r1,0(r1) > - ld r0,16(r1) > - mtlr r0 > - blr > - > _GLOBAL(__bswapdi2) > EXPORT_SYMBOL(__bswapdi2) > srdi r8,r3,32 > -- > 2.25.1 > This change caused our ppc44x_defconfig builds to hang when powering down in QEMU: https://github.com/ClangBuiltLinux/continuous-integration2/runs/2304364629?check_suite_focus=true#logs This is probably something with clang given that GCC 10.3.0 works fine but due to the nature of the change, I have no idea how to tell what is going wrong. I tried to do some rudimentary debugging with gdb but that did not really get me anywhere. The kernel was built with just 'CC=clang' and it is reproducible with all versions of clang that the kernel supports. The QEMU invocation is visible at the link above, it is done with our boot-qemu.sh in this repo, which also houses the rootfs: https://github.com/ClangBuiltLinux/boot-utils Happy to provide any other information or debug/test as directed! Cheers, Nathan
Le 26/04/2021 à 20:50, Nathan Chancellor a écrit : > On Sat, Mar 20, 2021 at 11:22:27PM +1100, Michael Ellerman wrote: >> From: Christophe Leroy <christophe.leroy@csgroup.eu> >> >> call_do_irq() and call_do_softirq() are simple enough to be >> worth inlining. >> >> Inlining them avoids an mflr/mtlr pair plus a save/reload on stack. It >> also allows GCC to keep the saved ksp_limit in an nonvolatile reg. >> >> This is inspired from S390 arch. Several other arches do more or >> less the same. The way sparc arch does seems odd thought. >> >> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> >> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> >> > > This change caused our ppc44x_defconfig builds to hang when powering > down in QEMU: > > https://github.com/ClangBuiltLinux/continuous-integration2/runs/2304364629?check_suite_focus=true#logs > > This is probably something with clang given that GCC 10.3.0 works fine > but due to the nature of the change, I have no idea how to tell what is > going wrong. I tried to do some rudimentary debugging with gdb but that > did not really get me anywhere. > > The kernel was built with just 'CC=clang' and it is reproducible with > all versions of clang that the kernel supports. > > The QEMU invocation is visible at the link above, it is done with our > boot-qemu.sh in this repo, which also houses the rootfs: > > https://github.com/ClangBuiltLinux/boot-utils > > Happy to provide any other information or debug/test as directed! > With GCC: 000003f0 <do_softirq_own_stack>: 3f0: 94 21 ff f0 stwu r1,-16(r1) 3f4: 7c 08 02 a6 mflr r0 3f8: 3d 20 00 00 lis r9,0 3fa: R_PPC_ADDR16_HA .data..read_mostly+0x4 3fc: 93 e1 00 0c stw r31,12(r1) 400: 90 01 00 14 stw r0,20(r1) 404: 83 e9 00 00 lwz r31,0(r9) 406: R_PPC_ADDR16_LO .data..read_mostly+0x4 408: 94 3f 1f f0 stwu r1,8176(r31) 40c: 7f e1 fb 78 mr r1,r31 410: 48 00 00 01 bl 410 <do_softirq_own_stack+0x20> 410: R_PPC_REL24 __do_softirq 414: 80 21 00 00 lwz r1,0(r1) 418: 80 01 00 14 lwz r0,20(r1) 41c: 83 e1 00 0c lwz r31,12(r1) 420: 38 21 00 10 addi r1,r1,16 424: 7c 08 03 a6 mtlr r0 428: 4e 80 00 20 blr With CLANG: 000003e8 <do_softirq_own_stack>: 3e8: 94 21 ff f0 stwu r1,-16(r1) 3ec: 93 c1 00 08 stw r30,8(r1) 3f0: 3c 60 00 00 lis r3,0 3f2: R_PPC_ADDR16_HA softirq_ctx 3f4: 83 c3 00 00 lwz r30,0(r3) 3f6: R_PPC_ADDR16_LO softirq_ctx 3f8: 94 3e 1f f0 stwu r1,8176(r30) 3fc: 7f c1 f3 78 mr r1,r30 400: 48 00 00 01 bl 400 <do_softirq_own_stack+0x18> 400: R_PPC_REL24 __do_softirq 404: 80 21 00 00 lwz r1,0(r1) 408: 83 c1 00 08 lwz r30,8(r1) 40c: 38 21 00 10 addi r1,r1,16 410: 4e 80 00 20 blr As you can see, CLANG doesn't save/restore 'lr' allthought 'lr' is explicitely listed in the registers clobbered by the inline assembly: >> +static __always_inline void call_do_softirq(const void *sp) >> +{ >> + /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */ >> + asm volatile ( >> + PPC_STLU " %%r1, %[offset](%[sp]) ;" >> + "mr %%r1, %[sp] ;" >> + "bl %[callee] ;" >> + PPC_LL " %%r1, 0(%%r1) ;" >> + : // Outputs >> + : // Inputs >> + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), >> + [callee] "i" (__do_softirq) >> + : // Clobbers >> + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", >> + "cr7", "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", >> + "r11", "r12" >> + );
On Mon, Apr 26, 2021 at 11:39 PM Christophe Leroy <christophe.leroy@csgroup.eu> wrote: > > > > Le 26/04/2021 à 20:50, Nathan Chancellor a écrit : > > On Sat, Mar 20, 2021 at 11:22:27PM +1100, Michael Ellerman wrote: > >> From: Christophe Leroy <christophe.leroy@csgroup.eu> > >> > >> call_do_irq() and call_do_softirq() are simple enough to be > >> worth inlining. > >> > >> Inlining them avoids an mflr/mtlr pair plus a save/reload on stack. It > >> also allows GCC to keep the saved ksp_limit in an nonvolatile reg. > >> > >> This is inspired from S390 arch. Several other arches do more or > >> less the same. The way sparc arch does seems odd thought. > >> > >> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> > >> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> > >> > > > > This change caused our ppc44x_defconfig builds to hang when powering > > down in QEMU: > > > > https://github.com/ClangBuiltLinux/continuous-integration2/runs/2304364629?check_suite_focus=true#logs > > > > This is probably something with clang given that GCC 10.3.0 works fine > > but due to the nature of the change, I have no idea how to tell what is > > going wrong. I tried to do some rudimentary debugging with gdb but that > > did not really get me anywhere. > > > > The kernel was built with just 'CC=clang' and it is reproducible with > > all versions of clang that the kernel supports. > > > > The QEMU invocation is visible at the link above, it is done with our > > boot-qemu.sh in this repo, which also houses the rootfs: > > > > https://github.com/ClangBuiltLinux/boot-utils > > > > Happy to provide any other information or debug/test as directed! > > > > With GCC: > > 000003f0 <do_softirq_own_stack>: > 3f0: 94 21 ff f0 stwu r1,-16(r1) > 3f4: 7c 08 02 a6 mflr r0 > 3f8: 3d 20 00 00 lis r9,0 > 3fa: R_PPC_ADDR16_HA .data..read_mostly+0x4 > 3fc: 93 e1 00 0c stw r31,12(r1) > 400: 90 01 00 14 stw r0,20(r1) > 404: 83 e9 00 00 lwz r31,0(r9) > 406: R_PPC_ADDR16_LO .data..read_mostly+0x4 > 408: 94 3f 1f f0 stwu r1,8176(r31) > 40c: 7f e1 fb 78 mr r1,r31 > 410: 48 00 00 01 bl 410 <do_softirq_own_stack+0x20> > 410: R_PPC_REL24 __do_softirq > 414: 80 21 00 00 lwz r1,0(r1) > 418: 80 01 00 14 lwz r0,20(r1) > 41c: 83 e1 00 0c lwz r31,12(r1) > 420: 38 21 00 10 addi r1,r1,16 > 424: 7c 08 03 a6 mtlr r0 > 428: 4e 80 00 20 blr > > > With CLANG: > > 000003e8 <do_softirq_own_stack>: > 3e8: 94 21 ff f0 stwu r1,-16(r1) > 3ec: 93 c1 00 08 stw r30,8(r1) > 3f0: 3c 60 00 00 lis r3,0 > 3f2: R_PPC_ADDR16_HA softirq_ctx > 3f4: 83 c3 00 00 lwz r30,0(r3) > 3f6: R_PPC_ADDR16_LO softirq_ctx > 3f8: 94 3e 1f f0 stwu r1,8176(r30) > 3fc: 7f c1 f3 78 mr r1,r30 > 400: 48 00 00 01 bl 400 <do_softirq_own_stack+0x18> > 400: R_PPC_REL24 __do_softirq > 404: 80 21 00 00 lwz r1,0(r1) > 408: 83 c1 00 08 lwz r30,8(r1) > 40c: 38 21 00 10 addi r1,r1,16 > 410: 4e 80 00 20 blr > > > As you can see, CLANG doesn't save/restore 'lr' allthought 'lr' is explicitely listed in the > registers clobbered by the inline assembly: Ah, thanks for debugging this. Will follow up in https://bugs.llvm.org/show_bug.cgi?id=50147. > > >> +static __always_inline void call_do_softirq(const void *sp) > >> +{ > >> + /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */ > >> + asm volatile ( > >> + PPC_STLU " %%r1, %[offset](%[sp]) ;" > >> + "mr %%r1, %[sp] ;" > >> + "bl %[callee] ;" > >> + PPC_LL " %%r1, 0(%%r1) ;" > >> + : // Outputs > >> + : // Inputs > >> + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), > >> + [callee] "i" (__do_softirq) > >> + : // Clobbers > >> + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", > >> + "cr7", "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", > >> + "r11", "r12" > >> + ); > > -- > You received this message because you are subscribed to the Google Groups "Clang Built Linux" group. > To unsubscribe from this group and stop receiving emails from it, send an email to clang-built-linux+unsubscribe@googlegroups.com. > To view this discussion on the web visit https://groups.google.com/d/msgid/clang-built-linux/de6fc09f-97f5-c934-6393-998ec766b48a%40csgroup.eu.
On Tue, Apr 27, 2021 at 1:42 PM Nick Desaulniers <ndesaulniers@google.com> wrote: > > On Mon, Apr 26, 2021 at 11:39 PM Christophe Leroy > <christophe.leroy@csgroup.eu> wrote: > > > > As you can see, CLANG doesn't save/restore 'lr' allthought 'lr' is explicitely listed in the > > registers clobbered by the inline assembly: > > Ah, thanks for debugging this. Will follow up in > https://bugs.llvm.org/show_bug.cgi?id=50147. Looks like there's a fix posted for LLVM in: https://reviews.llvm.org/D101657 Though trying to test it in QEMU, I'm hitting some assertion failure booting a kernel (even without that patch to LLVM): qemu-system-ppc: ../../hw/pci/pci.c:253: pci_bus_change_irq_level: Assertion `irq_num >= 0' failed. That's with QEMU emulator version 5.2.0 (Debian 1:5.2+dfsg-9) I didn't see anything in https://bugs.launchpad.net/qemu/ about it, but figured I'd share in case that assertion failure looked familiar to anyone.
On Fri, Apr 30, 2021 at 2:33 PM Nick Desaulniers <ndesaulniers@google.com> wrote: > > On Tue, Apr 27, 2021 at 1:42 PM Nick Desaulniers > <ndesaulniers@google.com> wrote: > > > > On Mon, Apr 26, 2021 at 11:39 PM Christophe Leroy > > <christophe.leroy@csgroup.eu> wrote: > > > > > > As you can see, CLANG doesn't save/restore 'lr' allthought 'lr' is explicitely listed in the > > > registers clobbered by the inline assembly: > > > > Ah, thanks for debugging this. Will follow up in > > https://bugs.llvm.org/show_bug.cgi?id=50147. > > Looks like there's a fix posted for LLVM in: https://reviews.llvm.org/D101657 > > Though trying to test it in QEMU, I'm hitting some assertion failure > booting a kernel (even without that patch to LLVM): > qemu-system-ppc: ../../hw/pci/pci.c:253: pci_bus_change_irq_level: > Assertion `irq_num >= 0' failed. > That's with > QEMU emulator version 5.2.0 (Debian 1:5.2+dfsg-9) > > I didn't see anything in https://bugs.launchpad.net/qemu/ about it, > but figured I'd share in case that assertion failure looked familiar > to anyone. Nathan pointed out some previous reports; looks like others are hitting this, too: https://github.com/ClangBuiltLinux/linux/issues/1345#issuecomment-830451276
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index f3f264e441a7..b2bd58830430 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -53,8 +53,6 @@ extern void *mcheckirq_ctx[NR_CPUS]; extern void *hardirq_ctx[NR_CPUS]; extern void *softirq_ctx[NR_CPUS]; -void call_do_softirq(void *sp); -void call_do_irq(struct pt_regs *regs, void *sp); extern void do_IRQ(struct pt_regs *regs); extern void __init init_IRQ(void); extern void __do_irq(struct pt_regs *regs); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 5b72abbff96c..260effc0a435 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -667,6 +667,47 @@ static inline void check_stack_overflow(void) } } +static __always_inline void call_do_softirq(const void *sp) +{ + /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */ + asm volatile ( + PPC_STLU " %%r1, %[offset](%[sp]) ;" + "mr %%r1, %[sp] ;" + "bl %[callee] ;" + PPC_LL " %%r1, 0(%%r1) ;" + : // Outputs + : // Inputs + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), + [callee] "i" (__do_softirq) + : // Clobbers + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", + "cr7", "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" + ); +} + +static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) +{ + register unsigned long r3 asm("r3") = (unsigned long)regs; + + /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */ + asm volatile ( + PPC_STLU " %%r1, %[offset](%[sp]) ;" + "mr %%r1, %[sp] ;" + "bl %[callee] ;" + PPC_LL " %%r1, 0(%%r1) ;" + : // Outputs + "+r" (r3) + : // Inputs + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), + [callee] "i" (__do_irq) + : // Clobbers + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", + "cr7", "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" + ); +} + void __do_irq(struct pt_regs *regs) { unsigned int irq; diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index acc410043b96..6a076bef2932 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -27,31 +27,6 @@ .text -_GLOBAL(call_do_softirq) - mflr r0 - stw r0,4(r1) - stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) - mr r1,r3 - bl __do_softirq - lwz r1,0(r1) - lwz r0,4(r1) - mtlr r0 - blr - -/* - * void call_do_irq(struct pt_regs *regs, void *sp); - */ -_GLOBAL(call_do_irq) - mflr r0 - stw r0,4(r1) - stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) - mr r1,r4 - bl __do_irq - lwz r1,0(r1) - lwz r0,4(r1) - mtlr r0 - blr - /* * This returns the high 64 bits of the product of two 64-bit numbers. */ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 070465825c21..4b761a18a74d 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -27,28 +27,6 @@ .text -_GLOBAL(call_do_softirq) - mflr r0 - std r0,16(r1) - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) - mr r1,r3 - bl __do_softirq - ld r1,0(r1) - ld r0,16(r1) - mtlr r0 - blr - -_GLOBAL(call_do_irq) - mflr r0 - std r0,16(r1) - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) - mr r1,r4 - bl __do_irq - ld r1,0(r1) - ld r0,16(r1) - mtlr r0 - blr - _GLOBAL(__bswapdi2) EXPORT_SYMBOL(__bswapdi2) srdi r8,r3,32