Message ID | a9b51088209febfc8bb481884100aa1bb6b5245e.1443679075.git.segher@kernel.crashing.org |
---|---|
State | New |
Headers | show |
On Thu, Oct 1, 2015 at 8:08 AM, Segher Boessenkool <segher@kernel.crashing.org> wrote: > After the shrink-wrapping patches the prologue will often be pushed > "deeper" into the function, which in turn means the software trace cache > pass will more often want to duplicate the basic block containing the > prologue. This caused failures for 32-bit SVR4 with -msecure-plt PIC. > > This configuration uses the load_toc_v4_PIC_1 instruction, which creates > assembler labels without using the normal machinery for that. If now > the compiler decides to duplicate the insn, it will emit the same label > twice. Boom. > > It isn't so easy to fix this to use labels the compiler knows about (let > alone test that properly). Instead, this patch wires up a "cannot_copy" > attribute to be used by TARGET_CANNOT_COPY_P, and sets that attribute on > these insns we do not want copied. > > Bootstrapped and tested on powerpc64-linux, with the usual configurations > (-m32,-m32/-mpowerpc64,-m64,-m64/-mlra); new testcase fails before, works > after (on 32-bit). > > Is this okay for mainline? Isn't that quite expensive? So even if not "easy", can you try? Do we have other ports with local labels in define_insns? I see some in darwin.md as well which your patch doesn't handle btw., otherwise suspicious %0: also appears (only) in h8300.md. arc.md also has a suspicious case in its doloop_end_i pattern. Richard. > > Segher > > > 2015-09-30 Segher Boessenkool <segher@kernel.crashing.org> > > PR target/67788 > PR target/67789 > * config/rs6000/rs6000.c (TARGET_CANNOT_COPY_INSN_P): New. > (rs6000_cannot_copy_insn_p): New function. > * config/rs6000/rs6000.md (cannot_copy): New attribute. > (load_toc_v4_PIC_1_normal): Set cannot_copy. > (load_toc_v4_PIC_1_476): Ditto. > > gcc/testsuite/ > PR target/67788 > PR target/67789 > * gcc.target/powerpc/pr67789.c: New testcase. > > --- > gcc/config/rs6000/rs6000.c | 11 +++++++++ > gcc/config/rs6000/rs6000.md | 9 +++++-- > gcc/testsuite/gcc.target/powerpc/pr67789.c | 39 ++++++++++++++++++++++++++++++ > 3 files changed, 57 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/pr67789.c > > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > index 93bb725..29fd198 100644 > --- a/gcc/config/rs6000/rs6000.c > +++ b/gcc/config/rs6000/rs6000.c > @@ -1513,6 +1513,8 @@ static const struct attribute_spec rs6000_attribute_table[] = > #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost > #undef TARGET_MEMORY_MOVE_COST > #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost > +#undef TARGET_CANNOT_COPY_INSN_P > +#define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p > #undef TARGET_RTX_COSTS > #define TARGET_RTX_COSTS rs6000_rtx_costs > #undef TARGET_ADDRESS_COST > @@ -31226,6 +31228,15 @@ rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first) > #endif /* HAVE_AS_TLS */ > #endif /* TARGET_XCOFF */ > > +/* Return true if INSN should not be copied. */ > + > +static bool > +rs6000_cannot_copy_insn_p (rtx_insn *insn) > +{ > + return recog_memoized (insn) >= 0 > + && get_attr_cannot_copy (insn); > +} > + > /* Compute a (partial) cost for rtx X. Return true if the complete > cost has been computed, and false if subexpressions should be > scanned. In either case, *TOTAL contains the cost result. */ > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index cfdb286..8c53c40 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -226,6 +226,9 @@ (define_attr "var_shift" "no,yes" > (const_string "no")) > (const_string "no"))) > > +;; Is copying of this instruction disallowed? > +(define_attr "cannot_copy" "no,yes" (const_string "no")) > + > ;; Define floating point instruction sub-types for use with Xfpu.md > (define_attr "fp_type" "fp_default,fp_addsub_s,fp_addsub_d,fp_mul_s,fp_mul_d,fp_div_s,fp_div_d,fp_maddsub_s,fp_maddsub_d,fp_sqrt_s,fp_sqrt_d" (const_string "fp_default")) > > @@ -9130,7 +9133,8 @@ (define_insn "load_toc_v4_PIC_1_normal" > && (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))" > "bcl 20,31,%0\\n%0:" > [(set_attr "type" "branch") > - (set_attr "length" "4")]) > + (set_attr "length" "4") > + (set_attr "cannot_copy" "yes")]) > > (define_insn "load_toc_v4_PIC_1_476" > [(set (reg:SI LR_REGNO) > @@ -9148,7 +9152,8 @@ (define_insn "load_toc_v4_PIC_1_476" > return templ; > }" > [(set_attr "type" "branch") > - (set_attr "length" "4")]) > + (set_attr "length" "4") > + (set_attr "cannot_copy" "yes")]) > > (define_expand "load_toc_v4_PIC_1b" > [(parallel [(set (reg:SI LR_REGNO) > diff --git a/gcc/testsuite/gcc.target/powerpc/pr67789.c b/gcc/testsuite/gcc.target/powerpc/pr67789.c > new file mode 100644 > index 0000000..d1bd047 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr67789.c > @@ -0,0 +1,39 @@ > +/* { dg-do assemble } */ > +/* { dg-options "-O2 -msecure-plt -fPIC" } */ > + > +#define FE_TONEAREST 0 > +#define FE_UPWARD 1 > +#define FE_DOWNWARD 2 > +#define FE_TOWARDZERO 3 > + > +extern int fesetround(int); > + > +void > +set_fpu_rounding_mode (int mode) > +{ > + int rnd_mode; > + > + switch (mode) > + { > + case 2: > + rnd_mode = FE_TONEAREST; > + break; > + > + case 4: > + rnd_mode = FE_UPWARD; > + break; > + > + case 1: > + rnd_mode = FE_DOWNWARD; > + break; > + > + case 3: > + rnd_mode = FE_TOWARDZERO; > + break; > + > + default: > + return; > + } > + > + fesetround (rnd_mode); > +} > -- > 1.8.1.4 >
> > Do we have other ports with local labels in define_insns? I see some in > darwin.md as well which your patch doesn't handle btw., otherwise > suspicious %0: also appears (only) in h8300.md. arc.md also has > a suspicious case in its doloop_end_i pattern. It is reasonably common, and defining cannot_copy_insn_p is the normal way of dealing with it. sh for example does this. Bernd
On Thu, Oct 1, 2015 at 2:08 AM, Segher Boessenkool <segher@kernel.crashing.org> wrote: > After the shrink-wrapping patches the prologue will often be pushed > "deeper" into the function, which in turn means the software trace cache > pass will more often want to duplicate the basic block containing the > prologue. This caused failures for 32-bit SVR4 with -msecure-plt PIC. > > This configuration uses the load_toc_v4_PIC_1 instruction, which creates > assembler labels without using the normal machinery for that. If now > the compiler decides to duplicate the insn, it will emit the same label > twice. Boom. > > It isn't so easy to fix this to use labels the compiler knows about (let > alone test that properly). Instead, this patch wires up a "cannot_copy" > attribute to be used by TARGET_CANNOT_COPY_P, and sets that attribute on > these insns we do not want copied. > > Bootstrapped and tested on powerpc64-linux, with the usual configurations > (-m32,-m32/-mpowerpc64,-m64,-m64/-mlra); new testcase fails before, works > after (on 32-bit). > > Is this okay for mainline? > > > Segher > > > 2015-09-30 Segher Boessenkool <segher@kernel.crashing.org> > > PR target/67788 > PR target/67789 > * config/rs6000/rs6000.c (TARGET_CANNOT_COPY_INSN_P): New. > (rs6000_cannot_copy_insn_p): New function. > * config/rs6000/rs6000.md (cannot_copy): New attribute. > (load_toc_v4_PIC_1_normal): Set cannot_copy. > (load_toc_v4_PIC_1_476): Ditto. > > gcc/testsuite/ > PR target/67788 > PR target/67789 > * gcc.target/powerpc/pr67789.c: New testcase. Bernd mentions that this is the normal way of handling this problem, so okay. Thanks, David
On Thu, Oct 1, 2015 at 2:08 AM, Segher Boessenkool <segher@kernel.crashing.org> wrote: > After the shrink-wrapping patches the prologue will often be pushed > "deeper" into the function, which in turn means the software trace cache > pass will more often want to duplicate the basic block containing the > prologue. This caused failures for 32-bit SVR4 with -msecure-plt PIC. > > This configuration uses the load_toc_v4_PIC_1 instruction, which creates > assembler labels without using the normal machinery for that. If now > the compiler decides to duplicate the insn, it will emit the same label > twice. Boom. > > It isn't so easy to fix this to use labels the compiler knows about (let > alone test that properly). Instead, this patch wires up a "cannot_copy" > attribute to be used by TARGET_CANNOT_COPY_P, and sets that attribute on > these insns we do not want copied. > > Bootstrapped and tested on powerpc64-linux, with the usual configurations > (-m32,-m32/-mpowerpc64,-m64,-m64/-mlra); new testcase fails before, works > after (on 32-bit). > > Is this okay for mainline? Is this expensive enough that it is worth limiting the definition of the hook to configurations that include 32-bit SVR4 support so that not every configuration incurs the overhead? Thanks, David
On Thu, Oct 01, 2015 at 12:14:44PM +0200, Richard Biener wrote: > On Thu, Oct 1, 2015 at 8:08 AM, Segher Boessenkool > <segher@kernel.crashing.org> wrote: > > After the shrink-wrapping patches the prologue will often be pushed > > "deeper" into the function, which in turn means the software trace cache > > pass will more often want to duplicate the basic block containing the > > prologue. This caused failures for 32-bit SVR4 with -msecure-plt PIC. > > > > This configuration uses the load_toc_v4_PIC_1 instruction, which creates > > assembler labels without using the normal machinery for that. If now > > the compiler decides to duplicate the insn, it will emit the same label > > twice. Boom. > > > > It isn't so easy to fix this to use labels the compiler knows about (let > > alone test that properly). Instead, this patch wires up a "cannot_copy" > > attribute to be used by TARGET_CANNOT_COPY_P, and sets that attribute on > > these insns we do not want copied. > > > > Bootstrapped and tested on powerpc64-linux, with the usual configurations > > (-m32,-m32/-mpowerpc64,-m64,-m64/-mlra); new testcase fails before, works > > after (on 32-bit). > > > > Is this okay for mainline? > > Isn't that quite expensive? Not really? recog_memoized isn't so bad. I cannot measure a difference. > So even if not "easy", can you try? I did, and after half a day had a big mess and lots of things failing, no idea where this was headed, and in the meantime bootstrap still fails (on affected targets). Other targets use cannot_copy_p in similar situations. > Do we have other ports with local labels in define_insns? I of course tried to find such, but didn't. Oh, you're asking for any insn that does anything whatsoever, but with a label. That's the "half a day" above. It might matter that these insns are created after reload. Or I somehow need to force the BB to be split, that seems to have been the problem; and splitting the prologue will be FUN. > I see some in darwin.md as well which your patch doesn't handle btw., Oh argh forgot to grep outside of rs6000.md. Will fix. > otherwise suspicious %0: also appears (only) in h8300.md. I think it is part of the syntax for that insn? jsr ...:8 > arc.md also has > a suspicious case in its doloop_end_i pattern. That one is in an assembler comment :-) Segher
On Thu, Oct 01, 2015 at 10:08:50AM -0400, David Edelsohn wrote: > Is this expensive enough that it is worth limiting the definition of > the hook to configurations that include 32-bit SVR4 support so that > not every configuration incurs the overhead? I don't think so. That won't save the call to the target hook, and that is a big part of the overhead already. Segher
On Thu, Oct 01, 2015 at 12:18:08PM -0500, Segher Boessenkool wrote: > On Thu, Oct 01, 2015 at 12:14:44PM +0200, Richard Biener wrote: > > So even if not "easy", can you try? > > I did, and after half a day had a big mess and lots of things failing, > no idea where this was headed, and in the meantime bootstrap still fails > (on affected targets). I had a look too, and while you can revise the load_toc_v4_PIC patterns to use labels emitted the usual way (eg. as in i386.c:ix86_init_large_pic_reg) they tend to wander away from the insn. I think that could be solved, but these labels which aren't referred to by jump insns get converted to NOTE_INSN_DELETED_LABEL somewhere, and that leads to further pain.
On Fri, Oct 02, 2015 at 10:24:07AM +0930, Alan Modra wrote: > On Thu, Oct 01, 2015 at 12:18:08PM -0500, Segher Boessenkool wrote: > > On Thu, Oct 01, 2015 at 12:14:44PM +0200, Richard Biener wrote: > > > So even if not "easy", can you try? > > > > I did, and after half a day had a big mess and lots of things failing, > > no idea where this was headed, and in the meantime bootstrap still fails > > (on affected targets). > > I had a look too, and while you can revise the load_toc_v4_PIC > patterns to use labels emitted the usual way (eg. as in > i386.c:ix86_init_large_pic_reg) they tend to wander away from the > insn. Yes, and only "bcl 20,31,$+4" avoids the link stack on recent CPUs (bcl 20,31,$+8, which we also use, doesn't). > I think that could be solved, but these labels which aren't > referred to by jump insns get converted to NOTE_INSN_DELETED_LABEL > somewhere, and that leads to further pain. Yes. You need to make the bcl a jump_insn to the label. And then there is yet more pain. Segher
On Fri, Oct 2, 2015 at 3:14 AM, Segher Boessenkool <segher@kernel.crashing.org> wrote: > On Fri, Oct 02, 2015 at 10:24:07AM +0930, Alan Modra wrote: >> On Thu, Oct 01, 2015 at 12:18:08PM -0500, Segher Boessenkool wrote: >> > On Thu, Oct 01, 2015 at 12:14:44PM +0200, Richard Biener wrote: >> > > So even if not "easy", can you try? >> > >> > I did, and after half a day had a big mess and lots of things failing, >> > no idea where this was headed, and in the meantime bootstrap still fails >> > (on affected targets). >> >> I had a look too, and while you can revise the load_toc_v4_PIC >> patterns to use labels emitted the usual way (eg. as in >> i386.c:ix86_init_large_pic_reg) they tend to wander away from the >> insn. > > Yes, and only "bcl 20,31,$+4" avoids the link stack on recent CPUs > (bcl 20,31,$+8, which we also use, doesn't). > >> I think that could be solved, but these labels which aren't >> referred to by jump insns get converted to NOTE_INSN_DELETED_LABEL >> somewhere, and that leads to further pain. > > Yes. You need to make the bcl a jump_insn to the label. And then > there is yet more pain. Sounds like supporting this with a special instruction in the assembler would be easier then? ... As for the compile-time hit, yes, calling the hook at all and having an extra loop over all stmts in cfg_layout_can_duplicate_bb_p. There are not many other uses of the hook, so I wonder if it is called everywhere it has to be called. Richard. > > Segher
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 93bb725..29fd198 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -1513,6 +1513,8 @@ static const struct attribute_spec rs6000_attribute_table[] = #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost #undef TARGET_MEMORY_MOVE_COST #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost +#undef TARGET_CANNOT_COPY_INSN_P +#define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p #undef TARGET_RTX_COSTS #define TARGET_RTX_COSTS rs6000_rtx_costs #undef TARGET_ADDRESS_COST @@ -31226,6 +31228,15 @@ rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first) #endif /* HAVE_AS_TLS */ #endif /* TARGET_XCOFF */ +/* Return true if INSN should not be copied. */ + +static bool +rs6000_cannot_copy_insn_p (rtx_insn *insn) +{ + return recog_memoized (insn) >= 0 + && get_attr_cannot_copy (insn); +} + /* Compute a (partial) cost for rtx X. Return true if the complete cost has been computed, and false if subexpressions should be scanned. In either case, *TOTAL contains the cost result. */ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index cfdb286..8c53c40 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -226,6 +226,9 @@ (define_attr "var_shift" "no,yes" (const_string "no")) (const_string "no"))) +;; Is copying of this instruction disallowed? +(define_attr "cannot_copy" "no,yes" (const_string "no")) + ;; Define floating point instruction sub-types for use with Xfpu.md (define_attr "fp_type" "fp_default,fp_addsub_s,fp_addsub_d,fp_mul_s,fp_mul_d,fp_div_s,fp_div_d,fp_maddsub_s,fp_maddsub_d,fp_sqrt_s,fp_sqrt_d" (const_string "fp_default")) @@ -9130,7 +9133,8 @@ (define_insn "load_toc_v4_PIC_1_normal" && (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))" "bcl 20,31,%0\\n%0:" [(set_attr "type" "branch") - (set_attr "length" "4")]) + (set_attr "length" "4") + (set_attr "cannot_copy" "yes")]) (define_insn "load_toc_v4_PIC_1_476" [(set (reg:SI LR_REGNO) @@ -9148,7 +9152,8 @@ (define_insn "load_toc_v4_PIC_1_476" return templ; }" [(set_attr "type" "branch") - (set_attr "length" "4")]) + (set_attr "length" "4") + (set_attr "cannot_copy" "yes")]) (define_expand "load_toc_v4_PIC_1b" [(parallel [(set (reg:SI LR_REGNO) diff --git a/gcc/testsuite/gcc.target/powerpc/pr67789.c b/gcc/testsuite/gcc.target/powerpc/pr67789.c new file mode 100644 index 0000000..d1bd047 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr67789.c @@ -0,0 +1,39 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -msecure-plt -fPIC" } */ + +#define FE_TONEAREST 0 +#define FE_UPWARD 1 +#define FE_DOWNWARD 2 +#define FE_TOWARDZERO 3 + +extern int fesetround(int); + +void +set_fpu_rounding_mode (int mode) +{ + int rnd_mode; + + switch (mode) + { + case 2: + rnd_mode = FE_TONEAREST; + break; + + case 4: + rnd_mode = FE_UPWARD; + break; + + case 1: + rnd_mode = FE_DOWNWARD; + break; + + case 3: + rnd_mode = FE_TOWARDZERO; + break; + + default: + return; + } + + fesetround (rnd_mode); +}