Patchwork [SH] PR 54236 - Improve addc/subc utilization

login
register
mail settings
Submitter Oleg Endo
Date Aug. 16, 2012, 7:11 p.m.
Message ID <1345144304.3584.35.camel@yam-132-YW-E178-FTW>
Download mbox | patch
Permalink /patch/178077/
State New
Headers show

Comments

Oleg Endo - Aug. 16, 2012, 7:11 p.m.
Hello,

The attached patch improves the utilization of the addc and subc
instructions as mentioned in the PR.
Tested on rev 190396 with
make -k check RUNTESTFLAGS="--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"

and no new failures.
I've also briefly checked the CSiBE result-size for 'm4-single -ml -O2
-mpretend-cmove'.  There is a little bit of noise here and there due to
differences in insn scheduling/ordering.  Looking at some parts of
mpeg2dec, the new insn sequences seem to be beneficial for a couple of
functions.
OK?

Cheers,
Oleg

ChangeLog:

	PR target/54236
	* config/sh/sh.md (addc): Add commutative modifier.
	(*addc, *minus_plus_one, *subc, *negc): New insns and splits.

testsuite/ChangeLog:

	PR target/54236
	* gcc.target/sh/pr54236-1.c: New.
Kaz Kojima - Aug. 16, 2012, 10:12 p.m.
Oleg Endo <oleg.endo@t-online.de> wrote:
> The attached patch improves the utilization of the addc and subc
> instructions as mentioned in the PR.
> Tested on rev 190396 with
> make -k check RUNTESTFLAGS="--target_board=sh-sim
> \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"
> 
> and no new failures.
> I've also briefly checked the CSiBE result-size for 'm4-single -ml -O2
> -mpretend-cmove'.  There is a little bit of noise here and there due to
> differences in insn scheduling/ordering.  Looking at some parts of
> mpeg2dec, the new insn sequences seem to be beneficial for a couple of
> functions.
> OK?

OK.

Regards,
	kaz

Patch

Index: gcc/testsuite/gcc.target/sh/pr54236-1.c
===================================================================
--- gcc/testsuite/gcc.target/sh/pr54236-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr54236-1.c	(revision 0)
@@ -0,0 +1,76 @@ 
+/* Tests to check the utilization of addc, subc and negc instructions in
+   special cases.  If everything works as expected we won't see any
+   movt instructions in these cases.  */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O1" } */
+/* { dg-skip-if "" { "sh*-*-*" } { "-m5*"} { "" } } */
+/* { dg-final { scan-assembler-times "addc" 3 } } */
+/* { dg-final { scan-assembler-times "subc" 3 } } */
+/* { dg-final { scan-assembler-times "sett" 4 } } */
+/* { dg-final { scan-assembler-times "negc" 1 } } */
+/* { dg-final { scan-assembler-not "movt" } } */
+
+int
+test_00 (int a, int b, int c, int d)
+{
+  /* 1x addc, 1x sett  */
+  return a + b + 1;
+}
+
+int
+test_01 (int a, int b, int c, int d)
+{
+  /* 1x addc  */
+  return a + (c == d);
+}
+
+int
+test_02 (int a, int b, int c, int d)
+{
+  /* 1x subc, 1x sett  */
+  return a - b - 1;
+}
+
+int
+test_03 (int a, int b, int c, int d)
+{
+  /* 1x subc  */
+  return a - (c == d);
+}
+
+int
+test_04 (int a, int b, int c, int d)
+{
+  /* 1x addc, 1x sett  */
+  return a + b + c + 1;
+}
+
+int
+test_05 (int a, int b, int c, int d)
+{
+  /* 1x subc, 1x sett  */
+  return a - b - c - 1;
+}
+
+int
+test_06 (int a, int b, int c, int d)
+{
+  /* 1x negc  */
+  return 0 - a - (b == c);
+}
+
+int
+test_07 (int *vec)
+{
+  /* Must not see a 'sett' or 'addc' here.
+     This is a case where combine tries to produce
+     'a + (0 - b) + 1' out of 'a - b + 1'.  */
+  int z = vec[0];
+  int vi = vec[1];
+  int zi = vec[2];
+
+  if (zi != 0 && z < -1)
+    vi -= (((vi >> 7) & 0x01) << 1) - 1;
+
+  return vi;
+}
Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md	(revision 190396)
+++ gcc/config/sh/sh.md	(working copy)
@@ -1686,7 +1686,7 @@ 
 
 (define_insn "addc"
   [(set (match_operand:SI 0 "arith_reg_dest" "=r")
-	(plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+	(plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%0")
 			  (match_operand:SI 2 "arith_reg_operand" "r"))
 		 (reg:SI T_REG)))
    (set (reg:SI T_REG)
@@ -1695,6 +1695,76 @@ 
   "addc	%2,%0"
   [(set_attr "type" "arith")])
 
+;; A simplified version of the addc insn, where the exact value of the
+;; T bit doesn't matter.  This is easier for combine to pick up.
+;; We allow a reg or 0 for one of the operands in order to be able to
+;; do 'reg + T' sequences.  Reload will load the constant 0 into the reg
+;; as needed.
+(define_insn "*addc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%0")
+			  (match_operand:SI 2 "arith_reg_or_0_operand" "r"))
+		 (match_operand:SI 3 "t_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "addc	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; Split 'reg + reg + 1' into a sett addc sequence, as it can be scheduled
+;; better, if the sett insn can be done early.
+(define_insn_and_split "*addc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "")
+			  (match_operand:SI 2 "arith_reg_operand" ""))
+		 (const_int 1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG) (const_int 1))
+   (parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 1) (match_dup 2))
+				          (reg:SI T_REG)))
+	      (clobber (reg:SI T_REG))])])
+
+;; Sometimes combine will try to do 'reg + (0-reg) + 1' if the *addc pattern
+;; matched.  Split this up into a simple sub add sequence, as this will save
+;; us one sett insn.
+(define_insn_and_split "*minus_plus_one"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(plus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "")
+			   (match_operand:SI 2 "arith_reg_operand" ""))
+		 (const_int 1)))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))])
+
+;; Split 'reg + T' into 'reg + 0 + T' to utilize the addc insn.
+;; If the 0 constant can be CSE-ed, this becomes a one instruction
+;; operation, as opposed to sequences such as
+;;	movt	r2
+;;	add	r2,r3
+;;
+;; Even if the constant is not CSE-ed, a sequence such as
+;;	mov	#0,r2
+;;	addc	r2,r3
+;; can be scheduled much better since the load of the constant can be
+;; done earlier, before any comparison insns that store the result in
+;; the T bit.
+(define_insn_and_split "*addc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(plus:SI (match_operand:SI 1 "t_reg_operand" "")
+		 (match_operand:SI 2 "arith_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0)
+		   (plus:SI (plus:SI (match_dup 2) (const_int 0))
+			    (match_dup 1)))
+	      (clobber (reg:SI T_REG))])])
+
 (define_expand "addsi3"
   [(set (match_operand:SI 0 "arith_reg_operand" "")
 	(plus:SI (match_operand:SI 1 "arith_operand" "")
@@ -1805,6 +1875,63 @@ 
   "subc	%2,%0"
   [(set_attr "type" "arith")])
 
+;; A simplified version of the subc insn, where the exact value of the
+;; T bit doesn't matter.  This is easier for combine to pick up.
+;; We allow a reg or 0 for one of the operands in order to be able to
+;; do 'reg - T' sequences.  Reload will load the constant 0 into the reg
+;; as needed.
+(define_insn "*subc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			    (match_operand:SI 2 "arith_reg_or_0_operand" "r"))
+		  (match_operand:SI 3 "t_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "subc	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; Split reg - reg - 1 into a sett subc sequence, as it can be scheduled
+;; better, if the sett insn can be done early.
+;; Notice that combine turns 'a - b - 1' into 'a + (~b)'.
+(define_insn_and_split "*subc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(plus:SI (not:SI (match_operand:SI 1 "arith_reg_operand" ""))
+		 (match_operand:SI 2 "arith_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG) (const_int 1))
+   (parallel [(set (match_dup 0)
+		   (minus:SI (minus:SI (match_dup 2) (match_dup 1))
+			     (reg:SI T_REG)))
+	      (clobber (reg:SI T_REG))])])
+
+;; Split 'reg - T' into 'reg - 0 - T' to utilize the subc insn.
+;; If the 0 constant can be CSE-ed, this becomes a one instruction
+;; operation, as opposed to sequences such as
+;;	movt	r2
+;;	sub	r2,r3
+;;
+;; Even if the constant is not CSE-ed, a sequence such as
+;;	mov	#0,r2
+;;	subc	r2,r3
+;; can be scheduled much better since the load of the constant can be
+;; done earlier, before any comparison insns that store the result in
+;; the T bit.
+(define_insn_and_split "*subc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(minus:SI (match_operand:SI 1 "arith_reg_operand" "")
+		  (match_operand:SI 2 "t_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0)
+		   (minus:SI (minus:SI (match_dup 1) (const_int 0))
+			     (match_dup 2)))
+	      (clobber (reg:SI T_REG))])])
+
 (define_insn "*subsi3_internal"
   [(set (match_operand:SI 0 "arith_reg_dest" "=r")
 	(minus:SI (match_operand:SI 1 "arith_reg_operand" "0")
@@ -4528,6 +4655,19 @@ 
   "negc	%1,%0"
   [(set_attr "type" "arith")])
 
+;; A simplified version of the negc insn, where the exact value of the
+;; T bit doesn't matter.  This is easier for combine to pick up.
+;; Notice that '0 - x - 1' is the same as '~x', thus we don't specify
+;; extra patterns for this case.
+(define_insn "*negc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(minus:SI (neg:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+		  (match_operand:SI 2 "t_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "negc	%1,%0"
+  [(set_attr "type" "arith")])
+
 (define_insn "*negdi_media"
   [(set (match_operand:DI 0 "arith_reg_dest" "=r")
 	(neg:DI (match_operand:DI 1 "arith_reg_operand" "r")))]