diff mbox

[RFA] Restore combine.c split point for multiply-accumulate instructions

Message ID 555D6F64.3040300@redhat.com
State New
Headers show

Commit Message

Jeff Law May 21, 2015, 5:38 a.m. UTC
find_split_point will tend to favor splitting complex insns in such a 
way as to encourage multiply-add insns.  It does this by splitting an 
unrecognizable insn at the (plus (mult)).

Now that many MULTs are canonicalized as ASHIFT, that code to prefer the 
multiply-add is no longer triggering when it could/should.  This 
ultimately results in splitting at the ASHIFT rather than the containing 
PLUS and thus we generate distinct shift and add insns rather than a 
single shadd insn on the PA (and probably other architectures).

This patch will treat (plus (ashift)) just like (plus (mult)) which 
encourages creation of shift-add insns.

This has been bootstrapped and regression tested on 
x86-unknown-linux-gnu and with an hppa2.0w-hp-hpux11.00 cross compiler 
on the hppa.exp testsuite (full disclosure -- hppa.exp only has two 
tests, so it's far from extensive).

I've also verified this is one of the changes ultimately necessary to 
resolve the code generation regressions caused by Venkat's combine.c 
change on the PA across my 300+ testfiles for a PA cross compiler.

OK for the trunk?



Jeff

Comments

Richard Biener May 21, 2015, 9:09 a.m. UTC | #1
On Thu, May 21, 2015 at 7:38 AM, Jeff Law <law@redhat.com> wrote:
>
> find_split_point will tend to favor splitting complex insns in such a way as
> to encourage multiply-add insns.  It does this by splitting an
> unrecognizable insn at the (plus (mult)).
>
> Now that many MULTs are canonicalized as ASHIFT, that code to prefer the
> multiply-add is no longer triggering when it could/should.  This ultimately
> results in splitting at the ASHIFT rather than the containing PLUS and thus
> we generate distinct shift and add insns rather than a single shadd insn on
> the PA (and probably other architectures).
>
> This patch will treat (plus (ashift)) just like (plus (mult)) which
> encourages creation of shift-add insns.
>
> This has been bootstrapped and regression tested on x86-unknown-linux-gnu
> and with an hppa2.0w-hp-hpux11.00 cross compiler on the hppa.exp testsuite
> (full disclosure -- hppa.exp only has two tests, so it's far from
> extensive).
>
> I've also verified this is one of the changes ultimately necessary to
> resolve the code generation regressions caused by Venkat's combine.c change
> on the PA across my 300+ testfiles for a PA cross compiler.
>
> OK for the trunk?

Sounds reasonable.

Thanks,
Richard.

>
>
> Jeff
>
>
>
>
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 490386e..250fa0a 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,5 +1,8 @@
>  2015-05-20  Jeff Law  <law@redhat.com>
>
> +       * combine.c (find_split_point): Handle ASHIFT like MULT to encourage
> +       multiply-accumulate/shift-add insn generation.
> +
>         * config/pa/pa.c (pa_print_operand): New 'o' output modifier.
>         (pa_mem_shadd_constant_p): Renamed from pa_shadd_constant_p.
>         (pa_shadd_constant_p): Allow constants for shadd insns rather
> diff --git a/gcc/combine.c b/gcc/combine.c
> index a90849e..ab6de3a 100644
> --- a/gcc/combine.c
> +++ b/gcc/combine.c
> @@ -5145,7 +5163,9 @@ find_split_point (rtx *loc, rtx_insn *insn, bool
> set_src)
>        /* Split at a multiply-accumulate instruction.  However if this is
>           the SET_SRC, we likely do not have such an instruction and it's
>           worthless to try this split.  */
> -      if (!set_src && GET_CODE (XEXP (x, 0)) == MULT)
> +      if (!set_src
> +         && (GET_CODE (XEXP (x, 0)) == MULT
> +             || GET_CODE (XEXP (x, 0)) == ASHIFT))
>          return loc;
>
>      default:
> diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
> index f20a131..bac0973 100644
> --- a/gcc/testsuite/ChangeLog
> +++ b/gcc/testsuite/ChangeLog
> @@ -1,5 +1,7 @@
>  2015-05-20  Jeff Law  <law@redhat.com>
>
> +       * gcc.target/hppa/shadd-2.c: New test.
> +
>         * gcc.target/hppa/hppa.exp: New target test driver.
>         * gcc.target/hppa/shadd-1.c: New test.
>
> diff --git a/gcc/testsuite/gcc.target/hppa/shadd-2.c
> b/gcc/testsuite/gcc.target/hppa/shadd-2.c
> new file mode 100644
> index 0000000..34708e5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/hppa/shadd-2.c
> @@ -0,0 +1,49 @@
> +/* { dg-do compile }  */
> +/* { dg-options "-O2" }  */
> +/* { dg-final { scan-assembler-times "sh.add" 2 } }  */
> +
> +typedef struct rtx_def *rtx;
> +typedef const struct rtx_def *const_rtx;
> +enum machine_mode
> +{
> +  VOIDmode, BLKmode, CCmode, CCGCmode, CCGOCmode, CCNOmode, CCAmode,
> CCCmode,
> +    CCOmode, CCSmode, CCZmode, CCFPmode, CCFPUmode, BImode, QImode, HImode,
> +    SImode, DImode, TImode, OImode, QQmode, HQmode, SQmode, DQmode, TQmode,
> +    UQQmode, UHQmode, USQmode, UDQmode, UTQmode, HAmode, SAmode, DAmode,
> +    TAmode, UHAmode, USAmode, UDAmode, UTAmode, SFmode, DFmode, XFmode,
> +    TFmode, SDmode, DDmode, TDmode, CQImode, CHImode, CSImode, CDImode,
> +    CTImode, COImode, SCmode, DCmode, XCmode, TCmode, V2QImode, V4QImode,
> +    V2HImode, V1SImode, V8QImode, V4HImode, V2SImode, V1DImode, V16QImode,
> +    V8HImode, V4SImode, V2DImode, V1TImode, V32QImode, V16HImode, V8SImode,
> +    V4DImode, V2TImode, V64QImode, V32HImode, V16SImode, V8DImode,
> V4TImode,
> +    V2SFmode, V4SFmode, V2DFmode, V8SFmode, V4DFmode, V2TFmode, V16SFmode,
> +    V8DFmode, V4TFmode, MAX_MACHINE_MODE, NUM_MACHINE_MODES =
> MAX_MACHINE_MODE
> +};
> +struct rtx_def
> +{
> +  __extension__ enum machine_mode mode:8;
> +};
> +struct target_regs
> +{
> +  unsigned char x_hard_regno_nregs[53][MAX_MACHINE_MODE];
> +};
> +extern void oof (void);
> +extern int rhs_regno (rtx);
> +
> +extern struct target_regs default_target_regs;
> +__inline__ unsigned int
> +end_hard_regno (enum machine_mode mode, unsigned int regno)
> +{
> +  return regno +
> +    ((&default_target_regs)->x_hard_regno_nregs)[regno][(int) mode];
> +}
> +
> +void
> +note_btr_set (rtx dest, const_rtx set
> +             __attribute__ ((__unused__)), void *data)
> +{
> +  int regno, end_regno;
> +  end_regno = end_hard_regno (((dest)->mode), (rhs_regno (dest)));
> +  for (; regno < end_regno; regno++)
> +    oof ();
> +}
>
Segher Boessenkool May 21, 2015, 1:40 p.m. UTC | #2
On Wed, May 20, 2015 at 11:38:44PM -0600, Jeff Law wrote:
> I've also verified this is one of the changes ultimately necessary to 
> resolve the code generation regressions caused by Venkat's combine.c 
> change on the PA across my 300+ testfiles for a PA cross compiler.

How much does it help, do you know?

> OK for the trunk?

Yes, please commit.  Thanks.  (One tiny comment below).


Segher


> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 490386e..250fa0a 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,5 +1,8 @@
>  2015-05-20  Jeff Law  <law@redhat.com>
>  
> +	* combine.c (find_split_point): Handle ASHIFT like MULT to encourage
> +	multiply-accumulate/shift-add insn generation.
> +
>  	* config/pa/pa.c (pa_print_operand): New 'o' output modifier.
>  	(pa_mem_shadd_constant_p): Renamed from pa_shadd_constant_p.
>  	(pa_shadd_constant_p): Allow constants for shadd insns rather
> diff --git a/gcc/combine.c b/gcc/combine.c
> index a90849e..ab6de3a 100644
> --- a/gcc/combine.c
> +++ b/gcc/combine.c
> @@ -5145,7 +5163,9 @@ find_split_point (rtx *loc, rtx_insn *insn, bool set_src)
>        /* Split at a multiply-accumulate instruction.  However if this is
>           the SET_SRC, we likely do not have such an instruction and it's
>           worthless to try this split.  */
> -      if (!set_src && GET_CODE (XEXP (x, 0)) == MULT)
> +      if (!set_src
> +	  && (GET_CODE (XEXP (x, 0)) == MULT
> +	      || GET_CODE (XEXP (x, 0)) == ASHIFT))
>          return loc;

It might be better to also check if it is shifting by a CONST_INT.
I doubt it matters much, but it is closer to the original.


Segher
Jeff Law May 21, 2015, 1:51 p.m. UTC | #3
On 05/21/2015 07:40 AM, Segher Boessenkool wrote:
> On Wed, May 20, 2015 at 11:38:44PM -0600, Jeff Law wrote:
>> I've also verified this is one of the changes ultimately necessary to
>> resolve the code generation regressions caused by Venkat's combine.c
>> change on the PA across my 300+ testfiles for a PA cross compiler.
>
> How much does it help, do you know?
It resolves the remaining missed opportunities to create shadd insns 
across those 300+ files.

There's one more combine.c patch on the way to canonicalize in one more 
place -- which fixes a missed CSE due to a MULT in one context and 
ASHIFT in another.

Then it's strictly cleanup on the PA port to kill the old MULT patterns.

>
> It might be better to also check if it is shifting by a CONST_INT.
> I doubt it matters much, but it is closer to the original.
Sure, that's not a problem at all.  Will do after the usual testing.

jeff
diff mbox

Patch

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 490386e..250fa0a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,8 @@ 
 2015-05-20  Jeff Law  <law@redhat.com>
 
+	* combine.c (find_split_point): Handle ASHIFT like MULT to encourage
+	multiply-accumulate/shift-add insn generation.
+
 	* config/pa/pa.c (pa_print_operand): New 'o' output modifier.
 	(pa_mem_shadd_constant_p): Renamed from pa_shadd_constant_p.
 	(pa_shadd_constant_p): Allow constants for shadd insns rather
diff --git a/gcc/combine.c b/gcc/combine.c
index a90849e..ab6de3a 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -5145,7 +5163,9 @@  find_split_point (rtx *loc, rtx_insn *insn, bool set_src)
       /* Split at a multiply-accumulate instruction.  However if this is
          the SET_SRC, we likely do not have such an instruction and it's
          worthless to try this split.  */
-      if (!set_src && GET_CODE (XEXP (x, 0)) == MULT)
+      if (!set_src
+	  && (GET_CODE (XEXP (x, 0)) == MULT
+	      || GET_CODE (XEXP (x, 0)) == ASHIFT))
         return loc;
 
     default:
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index f20a131..bac0973 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,7 @@ 
 2015-05-20  Jeff Law  <law@redhat.com>
 
+	* gcc.target/hppa/shadd-2.c: New test.
+
 	* gcc.target/hppa/hppa.exp: New target test driver.
 	* gcc.target/hppa/shadd-1.c: New test.
 
diff --git a/gcc/testsuite/gcc.target/hppa/shadd-2.c b/gcc/testsuite/gcc.target/hppa/shadd-2.c
new file mode 100644
index 0000000..34708e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/hppa/shadd-2.c
@@ -0,0 +1,49 @@ 
+/* { dg-do compile }  */
+/* { dg-options "-O2" }  */
+/* { dg-final { scan-assembler-times "sh.add" 2 } }  */
+
+typedef struct rtx_def *rtx;
+typedef const struct rtx_def *const_rtx;
+enum machine_mode
+{
+  VOIDmode, BLKmode, CCmode, CCGCmode, CCGOCmode, CCNOmode, CCAmode, CCCmode,
+    CCOmode, CCSmode, CCZmode, CCFPmode, CCFPUmode, BImode, QImode, HImode,
+    SImode, DImode, TImode, OImode, QQmode, HQmode, SQmode, DQmode, TQmode,
+    UQQmode, UHQmode, USQmode, UDQmode, UTQmode, HAmode, SAmode, DAmode,
+    TAmode, UHAmode, USAmode, UDAmode, UTAmode, SFmode, DFmode, XFmode,
+    TFmode, SDmode, DDmode, TDmode, CQImode, CHImode, CSImode, CDImode,
+    CTImode, COImode, SCmode, DCmode, XCmode, TCmode, V2QImode, V4QImode,
+    V2HImode, V1SImode, V8QImode, V4HImode, V2SImode, V1DImode, V16QImode,
+    V8HImode, V4SImode, V2DImode, V1TImode, V32QImode, V16HImode, V8SImode,
+    V4DImode, V2TImode, V64QImode, V32HImode, V16SImode, V8DImode, V4TImode,
+    V2SFmode, V4SFmode, V2DFmode, V8SFmode, V4DFmode, V2TFmode, V16SFmode,
+    V8DFmode, V4TFmode, MAX_MACHINE_MODE, NUM_MACHINE_MODES = MAX_MACHINE_MODE
+};
+struct rtx_def
+{
+  __extension__ enum machine_mode mode:8;
+};
+struct target_regs
+{
+  unsigned char x_hard_regno_nregs[53][MAX_MACHINE_MODE];
+};
+extern void oof (void);
+extern int rhs_regno (rtx);
+
+extern struct target_regs default_target_regs;
+__inline__ unsigned int
+end_hard_regno (enum machine_mode mode, unsigned int regno)
+{
+  return regno +
+    ((&default_target_regs)->x_hard_regno_nregs)[regno][(int) mode];
+}
+
+void
+note_btr_set (rtx dest, const_rtx set
+	      __attribute__ ((__unused__)), void *data)
+{
+  int regno, end_regno;
+  end_regno = end_hard_regno (((dest)->mode), (rhs_regno (dest)));
+  for (; regno < end_regno; regno++)
+    oof ();
+}