diff mbox

[SH] PR 51244 - Catch more unnecessary sign/zero extensions

Message ID 1350290118.2348.58.camel@yam-132-YW-E178-FTW
State New
Headers show

Commit Message

Oleg Endo Oct. 15, 2012, 8:35 a.m. UTC
Hello,

This one refactors some copy pasta that my previous patch regarding this
matter introduced and catches more unnecessary sign/zero extensions of T
bit stores.  It also fixes the bug reported in PR 54925 which popped up
after the last patch for PR 51244.
Tested on rev 192417 with
make -k check RUNTESTFLAGS="--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"

and no new failures.
OK?

Cheers,
Oleg

gcc/ChangeLog:

	PR target/51244
	* config/sh/sh-protos.h (set_of_reg): New struct.
	(sh_find_set_of_reg, sh_is_logical_t_store_expr, 
	sh_try_omit_signzero_extend):  Declare...
	* config/sh/sh.c (sh_find_set_of_reg, 
	sh_is_logical_t_store_expr, 
	sh_try_omit_signzero_extend): ...these new functions.
	* config/sh/sh.md (*logical_op_t): New insn_and_split.
	(*zero_extend<mode>si2_compact): Use sh_try_omit_signzero_extend
	in splitter.
	(*extend<mode>si2_compact_reg): Convert to insn_and_split.  Use 
	sh_try_omit_signzero_extend in splitter.
	(*mov<mode>_reg_reg): Disallow t_reg_operand as operand 1.
	(*cbranch_t): Rewrite combine part in splitter using new 
	sh_find_set_of_reg function.

testsuite/ChangeLog:

	PR target/51244
	* gcc.target/sh/pr51244-17.c: New.

Comments

Kaz Kojima Oct. 15, 2012, 11:35 a.m. UTC | #1
Oleg Endo <oleg.endo@t-online.de> wrote:
> This one refactors some copy pasta that my previous patch regarding this
> matter introduced and catches more unnecessary sign/zero extensions of T
> bit stores.  It also fixes the bug reported in PR 54925 which popped up
> after the last patch for PR 51244.
> Tested on rev 192417 with
> make -k check RUNTESTFLAGS="--target_board=sh-sim
> \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"
> 
> and no new failures.
> OK?

OK.

Regards,
	kaz
diff mbox

Patch

Index: gcc/config/sh/sh-protos.h
===================================================================
--- gcc/config/sh/sh-protos.h	(revision 192417)
+++ gcc/config/sh/sh-protos.h	(working copy)
@@ -163,6 +163,25 @@ 
 					enum machine_mode mode = VOIDmode);
 extern rtx sh_find_equiv_gbr_addr (rtx cur_insn, rtx mem);
 extern int sh_eval_treg_value (rtx op);
+
+/* Result value of sh_find_set_of_reg.  */
+struct set_of_reg
+{
+  /* The insn where sh_find_set_of_reg stopped looking.
+     Can be NULL_RTX if the end of the insn list was reached.  */
+  rtx insn;
+
+  /* The set rtx of the specified reg if found, NULL_RTX otherwise.  */
+  const_rtx set_rtx;
+
+  /* The set source rtx of the specified reg if found, NULL_RTX otherwise.
+     Usually, this is the most interesting return value.  */
+  rtx set_src;
+};
+
+extern set_of_reg sh_find_set_of_reg (rtx reg, rtx insn, rtx(*stepfunc)(rtx));
+extern bool sh_is_logical_t_store_expr (rtx op, rtx insn);
+extern rtx sh_try_omit_signzero_extend (rtx extended_op, rtx insn);
 #endif /* RTX_CODE */
 
 extern void sh_cpu_cpp_builtins (cpp_reader* pfile);
Index: gcc/config/sh/sh.c
===================================================================
--- gcc/config/sh/sh.c	(revision 192417)
+++ gcc/config/sh/sh.c	(working copy)
@@ -13450,4 +13450,114 @@ 
   return NULL_RTX;
 }
 
+/*------------------------------------------------------------------------------
+  Manual insn combine support code.
+*/
+
+/* Given a reg rtx and a start insn, try to find the insn that sets the
+   specified reg by using the specified insn stepping function, such as 
+   'prev_nonnote_insn_bb'.  When the insn is found, try to extract the rtx
+   of the reg set.  */
+set_of_reg
+sh_find_set_of_reg (rtx reg, rtx insn, rtx(*stepfunc)(rtx))
+{
+  set_of_reg result;
+  result.insn = insn;
+  result.set_rtx = NULL_RTX;
+  result.set_src = NULL_RTX;
+
+  if (!REG_P (reg) || insn == NULL_RTX)
+    return result;
+
+  for (result.insn = stepfunc (insn); result.insn != NULL_RTX;
+       result.insn = stepfunc (result.insn))
+    {
+      if (LABEL_P (result.insn) || BARRIER_P (result.insn))
+	return result;
+      if (!NONJUMP_INSN_P (result.insn))
+	continue;
+      if (reg_set_p (reg, result.insn))
+	{
+	  result.set_rtx = set_of (reg, result.insn);
+
+	  if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET)
+	    return result;
+
+	  result.set_src = XEXP (result.set_rtx, 1);
+	  return result;
+	}
+    }
+
+  return result;
+}
+
+/* Given an op rtx and an insn, try to find out whether the result of the
+   specified op consists only of logical operations on T bit stores.  */
+bool
+sh_is_logical_t_store_expr (rtx op, rtx insn)
+{
+  if (!logical_operator (op, SImode))
+    return false;
+
+  rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
+  int op_is_t_count = 0;
+
+  for (int i = 0; i < 2; ++i)
+    {
+      if (t_reg_operand (ops[i], VOIDmode)
+	  || negt_reg_operand (ops[i], VOIDmode))
+	op_is_t_count++;
+
+      else
+	{
+	  set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
+						  prev_nonnote_insn_bb);
+	  if (op_set.set_src == NULL_RTX)
+	    continue;
+
+	  if (t_reg_operand (op_set.set_src, VOIDmode)
+	      || negt_reg_operand (op_set.set_src, VOIDmode)
+	      || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
+	      op_is_t_count++;
+	}
+    }
+  
+  return op_is_t_count == 2;
+}
+
+/* Given the operand that is extended in a sign/zero extend insn, and the
+   insn, try to figure out whether the sign/zero extension can be replaced
+   by a simple reg-reg copy.  If so, the replacement reg rtx is returned,
+   NULL_RTX otherwise.  */
+rtx
+sh_try_omit_signzero_extend (rtx extended_op, rtx insn)
+{
+  if (REG_P (extended_op))
+    extended_op = extended_op;
+  else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
+    extended_op = SUBREG_REG (extended_op);
+  else
+    return NULL_RTX;
+
+  /* Reg moves must be of the same mode.  */
+  if (GET_MODE (extended_op) != SImode)
+    return NULL_RTX;
+
+  set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
+  if (s.set_src == NULL_RTX)
+    return NULL_RTX;
+
+  if (t_reg_operand (s.set_src, VOIDmode)
+      || negt_reg_operand (s.set_src, VOIDmode))
+    return extended_op;
+
+  /* If the zero extended reg was formed by a logical operation, check the
+     operands of the logical operation.  If both originated from T bit
+     stores the zero extension can be eliminated.  */
+  else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
+    return extended_op;
+
+  return NULL_RTX;
+}
+
 #include "gt-sh.h"
Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md	(revision 192417)
+++ gcc/config/sh/sh.md	(working copy)
@@ -3708,6 +3708,26 @@ 
   "xor	%2,%0"
   [(set_attr "type" "arith")])
 
+;; The *logical_op_t pattern helps combine eliminating sign/zero extensions
+;; of results where one of the inputs is a T bit store.  Notice that this
+;; pattern must not match during reload.  If reload picks this pattern it
+;; will be impossible to split it afterwards.
+(define_insn_and_split "*logical_op_t"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(match_operator:SI 3 "logical_operator"
+	  [(match_operand:SI 1 "arith_reg_operand")
+	   (match_operand:SI 2 "t_reg_operand")]))]
+  "TARGET_SH1 && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 4) (reg:SI T_REG))
+   (set (match_dup 0) (match_dup 3))]
+{
+  operands[4] = gen_reg_rtx (SImode);
+  operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SImode,
+				operands[1], operands[4]);
+})
+
 (define_insn "*xorsi3_media"
   [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
 	(xor:SI (match_operand:SI 1 "logical_reg_operand" "%r,r")
@@ -5590,39 +5610,7 @@ 
      eliminated.  Notice that this also helps the *cbranch_t splitter when
      it tries to post-combine tests and conditional branches, as it does not
      check for zero extensions.  */
-  rtx ext_reg;
-  if (REG_P (operands[1]))
-    ext_reg = operands[1];
-  else if (GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1])))
-    ext_reg = SUBREG_REG (operands[1]);
-  else
-    FAIL;
-
-  /* Reg moves must be of the same mode.  */
-  if (GET_MODE (ext_reg) != SImode)
-    FAIL;
-
-  operands[2] = NULL_RTX;
-  for (rtx i = prev_nonnote_insn_bb (curr_insn); i != NULL_RTX;
-       i = prev_nonnote_insn_bb (i))
-    {
-      if (LABEL_P (i) || BARRIER_P (i))
-	break;
-      if (!NONJUMP_INSN_P (i))
-	continue;
-
-      if (reg_set_p (ext_reg, i))
-	{
-	  rtx set_op = XEXP (set_of (ext_reg, i), 1);
-	  if (set_op == NULL_RTX)
-	    break;
-	  if (t_reg_operand (set_op, VOIDmode)
-	      || negt_reg_operand (set_op, VOIDmode))
-	    operands[2] = ext_reg;
-	  break;
-	}
-    }
-
+  operands[2] = sh_try_omit_signzero_extend (operands[1], curr_insn);
   if (operands[2] == NULL_RTX)
     FAIL;
 }
@@ -5850,11 +5838,23 @@ 
 			   subreg_lowpart_offset (SImode, GET_MODE (op1)));
 })
 
-(define_insn "*extend<mode>si2_compact_reg"
+(define_insn_and_split "*extend<mode>si2_compact_reg"
   [(set (match_operand:SI 0 "arith_reg_dest" "=r")
 	(sign_extend:SI (match_operand:QIHI 1 "arith_reg_operand" "r")))]
   "TARGET_SH1"
   "exts.<bw>	%1,%0"
+  "&& can_create_pseudo_p ()"
+  [(set (match_dup 0) (match_dup 2))]
+{
+  /* Sometimes combine fails to combine a T bit or negated T bit store to a
+     reg with a following sign extension.  In the split pass after combine,
+     try to figure the extended reg was set.  If it originated from the T
+     bit we can replace the sign extension with a reg move, which will be
+     eliminated.  */
+  operands[2] = sh_try_omit_signzero_extend (operands[1], curr_insn);
+  if (operands[2] == NULL_RTX)
+    FAIL;
+}
   [(set_attr "type" "arith")])
 
 ;; FIXME: Fold non-SH2A and SH2A alternatives with "enabled" attribute.
@@ -6629,10 +6629,19 @@ 
 ;; picked to load/store regs.  If the regs regs are on the stack reload will
 ;; try other insns and not stick to movqi_reg_reg.
 ;; The same applies to the movhi variants.
+;;
+;; Notice, that T bit is not allowed as a mov src operand here.  This is to
+;; avoid things like (set (reg:QI) (subreg:QI (reg:SI T_REG) 0)), which
+;; introduces zero extensions after T bit stores and redundant reg copies.
+;;
+;; FIXME: We can't use 'arith_reg_operand' (which disallows T_REG) as a
+;; predicate for the mov src operand because reload will have trouble
+;; reloading MAC subregs otherwise.  For that probably special patterns
+;; would be required.
 (define_insn "*mov<mode>_reg_reg"
   [(set (match_operand:QIHI 0 "arith_reg_dest" "=r")
 	(match_operand:QIHI 1 "register_operand" "r"))]
-  "TARGET_SH1"
+  "TARGET_SH1 && !t_reg_operand (operands[1], VOIDmode)"
   "mov	%1,%0"
   [(set_attr "type" "move")])
 
@@ -8178,28 +8187,17 @@ 
   rtx testing_insn = NULL_RTX;
   rtx tested_reg = NULL_RTX;
 
-  for (rtx i = prev_nonnote_insn_bb (curr_insn); i != NULL_RTX;
-       i = prev_nonnote_insn_bb (i))
+  set_of_reg s0 = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
+				      prev_nonnote_insn_bb);
+  if (s0.set_src != NULL_RTX
+      && GET_CODE (s0.set_src) == EQ
+      && REG_P (XEXP (s0.set_src, 0))
+      && satisfies_constraint_Z (XEXP (s0.set_src, 1)))
     {
-      if (LABEL_P (i) || BARRIER_P (i))
-	break;
-      if (!NONJUMP_INSN_P (i))
-	continue;
-
-      rtx p = PATTERN (i);
-      if (p != NULL_RTX
-	  && GET_CODE (p) == SET && t_reg_operand (XEXP (p, 0), VOIDmode)
-	  && GET_CODE (XEXP (p, 1)) == EQ
-	  && REG_P (XEXP (XEXP (p, 1), 0))
-	  && satisfies_constraint_Z (XEXP (XEXP (p, 1), 1)))
-	{
-	  testing_insn = i;
-	  tested_reg = XEXP (XEXP (p, 1), 0);
-	  break;
-	}
+      testing_insn = s0.insn;
+      tested_reg = XEXP (s0.set_src, 0);
     }
-
-  if (testing_insn == NULL_RTX)
+  else
     FAIL;
 
   /* Continue scanning the insns backwards and try to find the insn that
@@ -8213,48 +8211,38 @@ 
   if (reg_used_between_p (get_t_reg_rtx (), testing_insn, curr_insn))
     FAIL;
 
-  for (rtx i = prev_nonnote_insn_bb (testing_insn); i != NULL_RTX;
-       i = prev_nonnote_insn_bb (i))
+  while (true)
     {
-      if (LABEL_P (i) || BARRIER_P (i))
+      set_of_reg s1 = sh_find_set_of_reg (tested_reg, s0.insn,
+					  prev_nonnote_insn_bb);
+      if (s1.set_src == NULL_RTX)
 	break;
-      if (!NONJUMP_INSN_P (i))
-	continue;
 
-      if (reg_set_p (tested_reg, i))
+      if (t_reg_operand (s1.set_src, VOIDmode))
+	operands[2] = GEN_INT (treg_value ^ 1);
+      else if (negt_reg_operand (s1.set_src, VOIDmode))
+	operands[2] = GEN_INT (treg_value);
+      else if (REG_P (s1.set_src))
 	{
-	  const_rtx tested_reg_set = set_of (tested_reg, i);
+	   /* If it's a reg-reg copy follow the copied reg.  This can
+	      happen e.g. when T bit store zero-extensions are
+	      eliminated.  */
+	  tested_reg = s1.set_src;
+	  s0.insn = s1.insn;
+	  continue;
+	}
 
-	  /* It could also be a clobber...  */
-	  if (tested_reg_set == NULL_RTX || GET_CODE (tested_reg_set) != SET)
-	    break;
+	/* It's only safe to remove the testing insn if the T bit is not
+	   modified between the testing insn and the insn that stores the
+	   T bit.  Notice that some T bit stores such as negc also modify
+	   the T bit.  */
+	if (modified_between_p (get_t_reg_rtx (), s1.insn, testing_insn)
+	    || modified_in_p (get_t_reg_rtx (), s1.insn))
+	  operands[2] = NULL_RTX;
 
-	  rtx set_op1 = XEXP (tested_reg_set, 1);
-	  if (t_reg_operand (set_op1, VOIDmode))
-	    operands[2] = GEN_INT (treg_value ^ 1);
-	  else if (negt_reg_operand (set_op1, VOIDmode))
-	    operands[2] = GEN_INT (treg_value);
-	  else if (REG_P (set_op1))
-	    {
-	      /* If it's a reg-reg copy follow the copied reg.  This can
-		 happen e.g. when T bit store zero-extensions are
-		 eliminated.  */
-	      tested_reg = set_op1;
-	      continue;
-	    }
+	break;
+      }
 
-	  /* It's only safe to remove the testing insn if the T bit is not
-	     modified between the testing insn and the insn that stores the
-	     T bit.  Notice that some T bit stores such as negc also modify
-	     the T bit.  */
-	  if (modified_between_p (get_t_reg_rtx (), i, testing_insn)
-	      || modified_in_p (get_t_reg_rtx (), i))
-	    operands[2] = NULL_RTX;
-
-	  break;
-	}
-    }
-
   if (operands[2] == NULL_RTX)
     FAIL;
 
Index: gcc/testsuite/gcc.target/sh/pr51244-17.c
===================================================================
--- gcc/testsuite/gcc.target/sh/pr51244-17.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr51244-17.c	(revision 0)
@@ -0,0 +1,297 @@ 
+/* Check that no unnecessary zero extensions are done on values that are
+   results of arithmetic with T bit inputs.  */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O1" } */
+/* { dg-skip-if "" { "sh*-*-*" } { "-m5*" } { "" } } */
+/* { dg-final { scan-assembler-not "extu|exts" } } */
+
+int
+test00 (int a, int b, int c, int d)
+{
+  int x = a == b;
+  int y = c == 0;
+  return x == y;
+}
+
+int
+test01 (int a, int b, int c, int d)
+{
+  int x = a == b;
+  int y = c == d;
+  return x == y;
+}
+
+int
+test02 (int a, int b, int c, int d)
+{
+  int x = a != b;
+  int y = c == d;
+  return x == y;
+}
+
+int
+test03 (int a, int b, int c, int d)
+{
+  int x = a != b;
+  int y = c != d;
+  return x == y;
+}
+
+int
+test04 (int a, int b, int c, int d)
+{
+  int x = a != b;
+  int y = c != d;
+  return x == y;
+}
+
+int
+test05 (int a, int b, int c, int d)
+{
+  int x = a == b;
+  int y = c == 0;
+  return x != y;
+}
+
+int
+test06 (int a, int b, int c, int d)
+{
+  int x = a == b;
+  int y = c == 0;
+  return x ^ y;
+}
+
+int
+test07 (int a, int b, int c, int d)
+{
+  int x = a == b;
+  int y = c == 0;
+  return x | y;
+}
+
+int
+test08 (int a, int b, int c, int d)
+{
+  int x = a == b;
+  int y = c == 0;
+  return x & y;
+}
+
+int
+test09 (int a, int b, int c, int d)
+{
+  int x = a == b;
+  int y = c == d;
+  return x != y;
+}
+
+int
+test10 (int a, int b, int c, int d)
+{
+  int x = a != b;
+  int y = c == d;
+  return x != y;
+}
+
+int
+test11 (int a, int b, int c, int d)
+{
+  int x = a != b;
+  int y = c != d;
+  return x != y;
+}
+
+int
+test12 (int a, int b, int c, int d)
+{
+  int x = a != b;
+  int y = c != d;
+  return x != y;
+}
+
+int
+test13 (int a, int b, int c, int d, int e, int f)
+{
+  int x = a == b;
+  int y = c == 0;
+  int z = d == e;
+  return x == y || x == z;
+}
+
+int
+test14 (int a, int b, int c, int d, int e, int f)
+{
+  int x = a == b;
+  int y = c == 0;
+  int z = d == e;
+  return x == y && x == z;
+}
+
+int
+test15 (int a, int b, int c, int d, int e, int f)
+{
+  int x = a != b;
+  int y = c == 0;
+  int z = d == e;
+  return x == y || x == z;
+}
+
+int
+test16 (int a, int b, int c, int d, int e, int f)
+{
+  int x = a != b;
+  int y = c == 0;
+  int z = d == e;
+  return x == y && x == z;
+}
+
+int
+test17 (int a, int b, int c, int d, int e, int f)
+{
+  int x = a != b;
+  int y = c != 0;
+  int z = d == e;
+  return x == y || x == z;
+}
+
+int
+test18 (int a, int b, int c, int d, int e, int f)
+{
+  int x = a != b;
+  int y = c != 0;
+  int z = d == e;
+  return x == y && x == z;
+}
+
+int
+test19 (int a, int b, int c, int d, int e, int f)
+{
+  int x = a != b;
+  int y = c != 0;
+  int z = d == e;
+  return x == y || x == z;
+}
+
+int
+test20 (int a, int b, int c, int d, int e, int f)
+{
+  int x = a != b;
+  int y = c != 0;
+  int z = d != e;
+  return x == y && x == z;
+}
+
+int
+test21 (int a, int b, int c, int d)
+{
+  int x = a == b;
+  int y = c == 0;
+  return x + y;
+}
+
+int
+test22 (int a, int b, int c, int d)
+{
+  int x = a != b;
+  int y = c == 0;
+  return x + y;
+}
+
+int
+test23 (int a, int b, int c, int d)
+{
+  int x = a != b;
+  int y = c != 0;
+  return x + y;
+}
+
+int
+test24 (int a, int b, int c, int d)
+{
+  int x = a == b;
+  int y = c == 0;
+  return x - y;
+}
+
+int
+test25 (int a, int b, int c, int d)
+{
+  int x = a != b;
+  int y = c == 0;
+  return x - y;
+}
+
+int
+test26 (int a, int b, int c, int d)
+{
+  int x = a != b;
+  int y = c != 0;
+  return x - y;
+}
+
+int
+test27 (int a, int b, int c, int d)
+{
+  int x = a == b;
+  int y = c == 0;
+  return x * y;
+}
+
+int
+test28 (int a, int b, int c, int d)
+{
+  int x = a != b;
+  int y = c == 0;
+  return x * y;
+}
+
+int
+test29 (int a, int b, int c, int d)
+{
+  int x = a != b;
+  int y = c != 0;
+  return x * y;
+}
+
+int
+test30 (int a, int b)
+{
+  return ((a & 0x7F) == 1)
+	  | ((a & 0xFF00) == 0x0200)
+	  | ((a & 0xFF0000) == 0x030000);
+}
+
+int
+test31 (int a, int b)
+{
+  return ((a & 0x7F) == 1)
+	  | ((a & 0xFF00) == 0x0200)
+	  | ((a & 0xFF0000) == 0x030000)
+	  | ((a & 0xFF000000) == 0x04000000);
+}
+
+int
+test32 (int* a, int b, int c, volatile char* d)
+{
+  d[1] = a[0] != 0;
+  return b;
+}
+
+int
+test33 (int* a, int b, int c, volatile char* d)
+{
+  d[1] = a[0] == 0;
+  return b;
+}
+
+char
+test34 (int a, int* b)
+{
+  return (b[4] & b[0] & a) == a;
+}
+
+unsigned char
+test35 (int a, int* b)
+{
+  return (b[4] & b[0] & a) == a;
+}