diff mbox

rs6000: Add "cannot_copy" attribute, use it (PR67788, PR67789)

Message ID a9b51088209febfc8bb481884100aa1bb6b5245e.1443679075.git.segher@kernel.crashing.org
State New
Headers show

Commit Message

Segher Boessenkool Oct. 1, 2015, 6:08 a.m. UTC
After the shrink-wrapping patches the prologue will often be pushed
"deeper" into the function, which in turn means the software trace cache
pass will more often want to duplicate the basic block containing the
prologue.  This caused failures for 32-bit SVR4 with -msecure-plt PIC.

This configuration uses the load_toc_v4_PIC_1 instruction, which creates
assembler labels without using the normal machinery for that.  If now
the compiler decides to duplicate the insn, it will emit the same label
twice.  Boom.

It isn't so easy to fix this to use labels the compiler knows about (let
alone test that properly).  Instead, this patch wires up a "cannot_copy"
attribute to be used by TARGET_CANNOT_COPY_P, and sets that attribute on
these insns we do not want copied.

Bootstrapped and tested on powerpc64-linux, with the usual configurations
(-m32,-m32/-mpowerpc64,-m64,-m64/-mlra); new testcase fails before, works
after (on 32-bit).

Is this okay for mainline?


Segher


2015-09-30  Segher Boessenkool  <segher@kernel.crashing.org>

	PR target/67788
	PR target/67789
	* config/rs6000/rs6000.c (TARGET_CANNOT_COPY_INSN_P): New.
	(rs6000_cannot_copy_insn_p): New function.
	* config/rs6000/rs6000.md (cannot_copy): New attribute.
	(load_toc_v4_PIC_1_normal): Set cannot_copy.
	(load_toc_v4_PIC_1_476): Ditto.

gcc/testsuite/
	PR target/67788
	PR target/67789
	* gcc.target/powerpc/pr67789.c: New testcase.

---
 gcc/config/rs6000/rs6000.c                 | 11 +++++++++
 gcc/config/rs6000/rs6000.md                |  9 +++++--
 gcc/testsuite/gcc.target/powerpc/pr67789.c | 39 ++++++++++++++++++++++++++++++
 3 files changed, 57 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr67789.c

Comments

Richard Biener Oct. 1, 2015, 10:14 a.m. UTC | #1
On Thu, Oct 1, 2015 at 8:08 AM, Segher Boessenkool
<segher@kernel.crashing.org> wrote:
> After the shrink-wrapping patches the prologue will often be pushed
> "deeper" into the function, which in turn means the software trace cache
> pass will more often want to duplicate the basic block containing the
> prologue.  This caused failures for 32-bit SVR4 with -msecure-plt PIC.
>
> This configuration uses the load_toc_v4_PIC_1 instruction, which creates
> assembler labels without using the normal machinery for that.  If now
> the compiler decides to duplicate the insn, it will emit the same label
> twice.  Boom.
>
> It isn't so easy to fix this to use labels the compiler knows about (let
> alone test that properly).  Instead, this patch wires up a "cannot_copy"
> attribute to be used by TARGET_CANNOT_COPY_P, and sets that attribute on
> these insns we do not want copied.
>
> Bootstrapped and tested on powerpc64-linux, with the usual configurations
> (-m32,-m32/-mpowerpc64,-m64,-m64/-mlra); new testcase fails before, works
> after (on 32-bit).
>
> Is this okay for mainline?

Isn't that quite expensive?  So even if not "easy", can you try?

Do we have other ports with local labels in define_insns?  I see some in
darwin.md as well which your patch doesn't handle btw., otherwise
suspicious %0: also appears (only) in h8300.md.  arc.md also has
a suspicious case in its doloop_end_i pattern.

Richard.

>
> Segher
>
>
> 2015-09-30  Segher Boessenkool  <segher@kernel.crashing.org>
>
>         PR target/67788
>         PR target/67789
>         * config/rs6000/rs6000.c (TARGET_CANNOT_COPY_INSN_P): New.
>         (rs6000_cannot_copy_insn_p): New function.
>         * config/rs6000/rs6000.md (cannot_copy): New attribute.
>         (load_toc_v4_PIC_1_normal): Set cannot_copy.
>         (load_toc_v4_PIC_1_476): Ditto.
>
> gcc/testsuite/
>         PR target/67788
>         PR target/67789
>         * gcc.target/powerpc/pr67789.c: New testcase.
>
> ---
>  gcc/config/rs6000/rs6000.c                 | 11 +++++++++
>  gcc/config/rs6000/rs6000.md                |  9 +++++--
>  gcc/testsuite/gcc.target/powerpc/pr67789.c | 39 ++++++++++++++++++++++++++++++
>  3 files changed, 57 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr67789.c
>
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 93bb725..29fd198 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -1513,6 +1513,8 @@ static const struct attribute_spec rs6000_attribute_table[] =
>  #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
>  #undef TARGET_MEMORY_MOVE_COST
>  #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
> +#undef TARGET_CANNOT_COPY_INSN_P
> +#define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
>  #undef TARGET_RTX_COSTS
>  #define TARGET_RTX_COSTS rs6000_rtx_costs
>  #undef TARGET_ADDRESS_COST
> @@ -31226,6 +31228,15 @@ rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
>  #endif /* HAVE_AS_TLS */
>  #endif /* TARGET_XCOFF */
>
> +/* Return true if INSN should not be copied.  */
> +
> +static bool
> +rs6000_cannot_copy_insn_p (rtx_insn *insn)
> +{
> +  return recog_memoized (insn) >= 0
> +        && get_attr_cannot_copy (insn);
> +}
> +
>  /* Compute a (partial) cost for rtx X.  Return true if the complete
>     cost has been computed, and false if subexpressions should be
>     scanned.  In either case, *TOTAL contains the cost result.  */
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index cfdb286..8c53c40 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -226,6 +226,9 @@ (define_attr "var_shift" "no,yes"
>                               (const_string "no"))
>                 (const_string "no")))
>
> +;; Is copying of this instruction disallowed?
> +(define_attr "cannot_copy" "no,yes" (const_string "no"))
> +
>  ;; Define floating point instruction sub-types for use with Xfpu.md
>  (define_attr "fp_type" "fp_default,fp_addsub_s,fp_addsub_d,fp_mul_s,fp_mul_d,fp_div_s,fp_div_d,fp_maddsub_s,fp_maddsub_d,fp_sqrt_s,fp_sqrt_d" (const_string "fp_default"))
>
> @@ -9130,7 +9133,8 @@ (define_insn "load_toc_v4_PIC_1_normal"
>     && (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))"
>    "bcl 20,31,%0\\n%0:"
>    [(set_attr "type" "branch")
> -   (set_attr "length" "4")])
> +   (set_attr "length" "4")
> +   (set_attr "cannot_copy" "yes")])
>
>  (define_insn "load_toc_v4_PIC_1_476"
>    [(set (reg:SI LR_REGNO)
> @@ -9148,7 +9152,8 @@ (define_insn "load_toc_v4_PIC_1_476"
>    return templ;
>  }"
>    [(set_attr "type" "branch")
> -   (set_attr "length" "4")])
> +   (set_attr "length" "4")
> +   (set_attr "cannot_copy" "yes")])
>
>  (define_expand "load_toc_v4_PIC_1b"
>    [(parallel [(set (reg:SI LR_REGNO)
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr67789.c b/gcc/testsuite/gcc.target/powerpc/pr67789.c
> new file mode 100644
> index 0000000..d1bd047
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr67789.c
> @@ -0,0 +1,39 @@
> +/* { dg-do assemble } */
> +/* { dg-options "-O2 -msecure-plt -fPIC" } */
> +
> +#define FE_TONEAREST 0
> +#define FE_UPWARD 1
> +#define FE_DOWNWARD 2
> +#define FE_TOWARDZERO 3
> +
> +extern int fesetround(int);
> +
> +void
> +set_fpu_rounding_mode (int mode)
> +{
> +  int rnd_mode;
> +
> +  switch (mode)
> +    {
> +      case 2:
> +       rnd_mode = FE_TONEAREST;
> +       break;
> +
> +      case 4:
> +        rnd_mode = FE_UPWARD;
> +        break;
> +
> +      case 1:
> +        rnd_mode = FE_DOWNWARD;
> +        break;
> +
> +      case 3:
> +        rnd_mode = FE_TOWARDZERO;
> +        break;
> +
> +      default:
> +        return;
> +    }
> +
> +  fesetround (rnd_mode);
> +}
> --
> 1.8.1.4
>
Bernd Schmidt Oct. 1, 2015, 11:12 a.m. UTC | #2
>
> Do we have other ports with local labels in define_insns?  I see some in
> darwin.md as well which your patch doesn't handle btw., otherwise
> suspicious %0: also appears (only) in h8300.md.  arc.md also has
> a suspicious case in its doloop_end_i pattern.

It is reasonably common, and defining cannot_copy_insn_p is the normal 
way of dealing with it. sh for example does this.


Bernd
David Edelsohn Oct. 1, 2015, 2:05 p.m. UTC | #3
On Thu, Oct 1, 2015 at 2:08 AM, Segher Boessenkool
<segher@kernel.crashing.org> wrote:
> After the shrink-wrapping patches the prologue will often be pushed
> "deeper" into the function, which in turn means the software trace cache
> pass will more often want to duplicate the basic block containing the
> prologue.  This caused failures for 32-bit SVR4 with -msecure-plt PIC.
>
> This configuration uses the load_toc_v4_PIC_1 instruction, which creates
> assembler labels without using the normal machinery for that.  If now
> the compiler decides to duplicate the insn, it will emit the same label
> twice.  Boom.
>
> It isn't so easy to fix this to use labels the compiler knows about (let
> alone test that properly).  Instead, this patch wires up a "cannot_copy"
> attribute to be used by TARGET_CANNOT_COPY_P, and sets that attribute on
> these insns we do not want copied.
>
> Bootstrapped and tested on powerpc64-linux, with the usual configurations
> (-m32,-m32/-mpowerpc64,-m64,-m64/-mlra); new testcase fails before, works
> after (on 32-bit).
>
> Is this okay for mainline?
>
>
> Segher
>
>
> 2015-09-30  Segher Boessenkool  <segher@kernel.crashing.org>
>
>         PR target/67788
>         PR target/67789
>         * config/rs6000/rs6000.c (TARGET_CANNOT_COPY_INSN_P): New.
>         (rs6000_cannot_copy_insn_p): New function.
>         * config/rs6000/rs6000.md (cannot_copy): New attribute.
>         (load_toc_v4_PIC_1_normal): Set cannot_copy.
>         (load_toc_v4_PIC_1_476): Ditto.
>
> gcc/testsuite/
>         PR target/67788
>         PR target/67789
>         * gcc.target/powerpc/pr67789.c: New testcase.

Bernd mentions that this is the normal way of handling this problem, so okay.

Thanks, David
David Edelsohn Oct. 1, 2015, 2:08 p.m. UTC | #4
On Thu, Oct 1, 2015 at 2:08 AM, Segher Boessenkool
<segher@kernel.crashing.org> wrote:
> After the shrink-wrapping patches the prologue will often be pushed
> "deeper" into the function, which in turn means the software trace cache
> pass will more often want to duplicate the basic block containing the
> prologue.  This caused failures for 32-bit SVR4 with -msecure-plt PIC.
>
> This configuration uses the load_toc_v4_PIC_1 instruction, which creates
> assembler labels without using the normal machinery for that.  If now
> the compiler decides to duplicate the insn, it will emit the same label
> twice.  Boom.
>
> It isn't so easy to fix this to use labels the compiler knows about (let
> alone test that properly).  Instead, this patch wires up a "cannot_copy"
> attribute to be used by TARGET_CANNOT_COPY_P, and sets that attribute on
> these insns we do not want copied.
>
> Bootstrapped and tested on powerpc64-linux, with the usual configurations
> (-m32,-m32/-mpowerpc64,-m64,-m64/-mlra); new testcase fails before, works
> after (on 32-bit).
>
> Is this okay for mainline?

Is this expensive enough that it is worth limiting the definition of
the hook to configurations that include 32-bit SVR4 support so that
not every configuration incurs the overhead?

Thanks, David
Segher Boessenkool Oct. 1, 2015, 5:18 p.m. UTC | #5
On Thu, Oct 01, 2015 at 12:14:44PM +0200, Richard Biener wrote:
> On Thu, Oct 1, 2015 at 8:08 AM, Segher Boessenkool
> <segher@kernel.crashing.org> wrote:
> > After the shrink-wrapping patches the prologue will often be pushed
> > "deeper" into the function, which in turn means the software trace cache
> > pass will more often want to duplicate the basic block containing the
> > prologue.  This caused failures for 32-bit SVR4 with -msecure-plt PIC.
> >
> > This configuration uses the load_toc_v4_PIC_1 instruction, which creates
> > assembler labels without using the normal machinery for that.  If now
> > the compiler decides to duplicate the insn, it will emit the same label
> > twice.  Boom.
> >
> > It isn't so easy to fix this to use labels the compiler knows about (let
> > alone test that properly).  Instead, this patch wires up a "cannot_copy"
> > attribute to be used by TARGET_CANNOT_COPY_P, and sets that attribute on
> > these insns we do not want copied.
> >
> > Bootstrapped and tested on powerpc64-linux, with the usual configurations
> > (-m32,-m32/-mpowerpc64,-m64,-m64/-mlra); new testcase fails before, works
> > after (on 32-bit).
> >
> > Is this okay for mainline?
> 
> Isn't that quite expensive?

Not really?  recog_memoized isn't so bad.  I cannot measure a difference.

> So even if not "easy", can you try?

I did, and after half a day had a big mess and lots of things failing,
no idea where this was headed, and in the meantime bootstrap still fails
(on affected targets).

Other targets use cannot_copy_p in similar situations.

> Do we have other ports with local labels in define_insns?

I of course tried to find such, but didn't.  Oh, you're asking for any
insn that does anything whatsoever, but with a label.  That's the
"half a day" above.

It might matter that these insns are created after reload.  Or I somehow
need to force the BB to be split, that seems to have been the problem;
and splitting the prologue will be FUN.

> I see some in darwin.md as well which your patch doesn't handle btw.,

Oh argh forgot to grep outside of rs6000.md.  Will fix.

> otherwise suspicious %0: also appears (only) in h8300.md.

I think it is part of the syntax for that insn?   jsr ...:8

> arc.md also has
> a suspicious case in its doloop_end_i pattern.

That one is in an assembler comment  :-)


Segher
Segher Boessenkool Oct. 1, 2015, 5:39 p.m. UTC | #6
On Thu, Oct 01, 2015 at 10:08:50AM -0400, David Edelsohn wrote:
> Is this expensive enough that it is worth limiting the definition of
> the hook to configurations that include 32-bit SVR4 support so that
> not every configuration incurs the overhead?

I don't think so.  That won't save the call to the target hook, and
that is a big part of the overhead already.


Segher
Alan Modra Oct. 2, 2015, 12:54 a.m. UTC | #7
On Thu, Oct 01, 2015 at 12:18:08PM -0500, Segher Boessenkool wrote:
> On Thu, Oct 01, 2015 at 12:14:44PM +0200, Richard Biener wrote:
> > So even if not "easy", can you try?
> 
> I did, and after half a day had a big mess and lots of things failing,
> no idea where this was headed, and in the meantime bootstrap still fails
> (on affected targets).

I had a look too, and while you can revise the load_toc_v4_PIC
patterns to use labels emitted the usual way (eg. as in
i386.c:ix86_init_large_pic_reg) they tend to wander away from the
insn.  I think that could be solved, but these labels which aren't
referred to by jump insns get converted to NOTE_INSN_DELETED_LABEL
somewhere, and that leads to further pain.
Segher Boessenkool Oct. 2, 2015, 1:14 a.m. UTC | #8
On Fri, Oct 02, 2015 at 10:24:07AM +0930, Alan Modra wrote:
> On Thu, Oct 01, 2015 at 12:18:08PM -0500, Segher Boessenkool wrote:
> > On Thu, Oct 01, 2015 at 12:14:44PM +0200, Richard Biener wrote:
> > > So even if not "easy", can you try?
> > 
> > I did, and after half a day had a big mess and lots of things failing,
> > no idea where this was headed, and in the meantime bootstrap still fails
> > (on affected targets).
> 
> I had a look too, and while you can revise the load_toc_v4_PIC
> patterns to use labels emitted the usual way (eg. as in
> i386.c:ix86_init_large_pic_reg) they tend to wander away from the
> insn.

Yes, and only "bcl 20,31,$+4" avoids the link stack on recent CPUs
(bcl 20,31,$+8, which we also use, doesn't).

> I think that could be solved, but these labels which aren't
> referred to by jump insns get converted to NOTE_INSN_DELETED_LABEL
> somewhere, and that leads to further pain.

Yes.  You need to make the bcl a jump_insn to the label.  And then
there is yet more pain.


Segher
Richard Biener Oct. 2, 2015, 8:31 a.m. UTC | #9
On Fri, Oct 2, 2015 at 3:14 AM, Segher Boessenkool
<segher@kernel.crashing.org> wrote:
> On Fri, Oct 02, 2015 at 10:24:07AM +0930, Alan Modra wrote:
>> On Thu, Oct 01, 2015 at 12:18:08PM -0500, Segher Boessenkool wrote:
>> > On Thu, Oct 01, 2015 at 12:14:44PM +0200, Richard Biener wrote:
>> > > So even if not "easy", can you try?
>> >
>> > I did, and after half a day had a big mess and lots of things failing,
>> > no idea where this was headed, and in the meantime bootstrap still fails
>> > (on affected targets).
>>
>> I had a look too, and while you can revise the load_toc_v4_PIC
>> patterns to use labels emitted the usual way (eg. as in
>> i386.c:ix86_init_large_pic_reg) they tend to wander away from the
>> insn.
>
> Yes, and only "bcl 20,31,$+4" avoids the link stack on recent CPUs
> (bcl 20,31,$+8, which we also use, doesn't).
>
>> I think that could be solved, but these labels which aren't
>> referred to by jump insns get converted to NOTE_INSN_DELETED_LABEL
>> somewhere, and that leads to further pain.
>
> Yes.  You need to make the bcl a jump_insn to the label.  And then
> there is yet more pain.

Sounds like supporting this with a special instruction in the assembler
would be easier then? ...

As for the compile-time hit, yes, calling the hook at all and having an
extra loop over all stmts in cfg_layout_can_duplicate_bb_p.  There are
not many other uses of the hook, so I wonder if it is called everywhere
it has to be called.

Richard.

>
> Segher
diff mbox

Patch

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 93bb725..29fd198 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1513,6 +1513,8 @@  static const struct attribute_spec rs6000_attribute_table[] =
 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
 #undef TARGET_MEMORY_MOVE_COST
 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
+#undef TARGET_CANNOT_COPY_INSN_P
+#define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
 #undef TARGET_RTX_COSTS
 #define TARGET_RTX_COSTS rs6000_rtx_costs
 #undef TARGET_ADDRESS_COST
@@ -31226,6 +31228,15 @@  rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
 #endif /* HAVE_AS_TLS */
 #endif /* TARGET_XCOFF */
 
+/* Return true if INSN should not be copied.  */
+
+static bool
+rs6000_cannot_copy_insn_p (rtx_insn *insn)
+{
+  return recog_memoized (insn) >= 0
+	 && get_attr_cannot_copy (insn);
+}
+
 /* Compute a (partial) cost for rtx X.  Return true if the complete
    cost has been computed, and false if subexpressions should be
    scanned.  In either case, *TOTAL contains the cost result.  */
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index cfdb286..8c53c40 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -226,6 +226,9 @@  (define_attr "var_shift" "no,yes"
 			      (const_string "no"))
 		(const_string "no")))
 
+;; Is copying of this instruction disallowed?
+(define_attr "cannot_copy" "no,yes" (const_string "no"))
+
 ;; Define floating point instruction sub-types for use with Xfpu.md
 (define_attr "fp_type" "fp_default,fp_addsub_s,fp_addsub_d,fp_mul_s,fp_mul_d,fp_div_s,fp_div_d,fp_maddsub_s,fp_maddsub_d,fp_sqrt_s,fp_sqrt_d" (const_string "fp_default"))
 
@@ -9130,7 +9133,8 @@  (define_insn "load_toc_v4_PIC_1_normal"
    && (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))"
   "bcl 20,31,%0\\n%0:"
   [(set_attr "type" "branch")
-   (set_attr "length" "4")])
+   (set_attr "length" "4")
+   (set_attr "cannot_copy" "yes")])
 
 (define_insn "load_toc_v4_PIC_1_476"
   [(set (reg:SI LR_REGNO)
@@ -9148,7 +9152,8 @@  (define_insn "load_toc_v4_PIC_1_476"
   return templ;
 }"
   [(set_attr "type" "branch")
-   (set_attr "length" "4")])
+   (set_attr "length" "4")
+   (set_attr "cannot_copy" "yes")])
 
 (define_expand "load_toc_v4_PIC_1b"
   [(parallel [(set (reg:SI LR_REGNO)
diff --git a/gcc/testsuite/gcc.target/powerpc/pr67789.c b/gcc/testsuite/gcc.target/powerpc/pr67789.c
new file mode 100644
index 0000000..d1bd047
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr67789.c
@@ -0,0 +1,39 @@ 
+/* { dg-do assemble } */
+/* { dg-options "-O2 -msecure-plt -fPIC" } */
+
+#define FE_TONEAREST 0
+#define FE_UPWARD 1
+#define FE_DOWNWARD 2
+#define FE_TOWARDZERO 3
+
+extern int fesetround(int);
+
+void
+set_fpu_rounding_mode (int mode)
+{
+  int rnd_mode;
+
+  switch (mode)
+    {
+      case 2:
+       rnd_mode = FE_TONEAREST;
+       break;
+
+      case 4:
+        rnd_mode = FE_UPWARD;
+        break;
+
+      case 1:
+        rnd_mode = FE_DOWNWARD;
+        break;
+
+      case 3:
+        rnd_mode = FE_TOWARDZERO; 
+        break;
+
+      default:
+        return;
+    }
+
+  fesetround (rnd_mode);
+}