Patchwork [SH] PR 54760 - Add DImode GBR loads/stores, fix optimization

login
register
mail settings
Submitter Oleg Endo
Date Oct. 15, 2012, 8:21 a.m.
Message ID <1350289276.2348.46.camel@yam-132-YW-E178-FTW>
Download mbox | patch
Permalink /patch/191491/
State New
Headers show

Comments

Oleg Endo - Oct. 15, 2012, 8:21 a.m.
Hello,

I somehow initially forgot to implement DImode GBR based loads/stores.
Attached patch does that and also fixes a problem with the GBR address
mode optimization.
Tested on rev 192417 with
make -k check RUNTESTFLAGS="--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"

and no new failures.
OK?

Cheers,
Oleg

gcc/ChangeLog:

	PR target/54760
	* config/sh/sh.c (sh_find_base_reg_disp): Stop searching insns 
	when hitting a call insn if GBR is marked as call used.
	* config/sh/iterators.md (QIHISIDI): New mode iterator.
	* config/sh/predicates.md (gbr_address_mem): New predicate.
	* config/sh/sh.md (*movdi_gbr_load, *movdi_gbr_store): New 
	insn_and_split.
	Use QIHISIDI instead of QIHISI in unnamed GBR addressing splits.


testsuite/ChangeLog:

	PR target/54760
	* gcc.target/sh/pr54760-2.c: Add long long and unsigned long 
	long test functions.
	* gcc.target/sh/pr54760-4.c: New.
Kaz Kojima - Oct. 15, 2012, 11:34 a.m.
Oleg Endo <oleg.endo@t-online.de> wrote:
> I somehow initially forgot to implement DImode GBR based loads/stores.
> Attached patch does that and also fixes a problem with the GBR address
> mode optimization.
> Tested on rev 192417 with
> make -k check RUNTESTFLAGS="--target_board=sh-sim
> \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"
> 
> and no new failures.
> OK?

OK.

Regards,
	kaz

Patch

Index: gcc/config/sh/sh.c
===================================================================
--- gcc/config/sh/sh.c	(revision 192417)
+++ gcc/config/sh/sh.c	(working copy)
@@ -13383,6 +13383,10 @@ 
       for (rtx i = prev_nonnote_insn (insn); i != NULL;
 	   i = prev_nonnote_insn (i))
 	{
+	  if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG)
+	      && CALL_P (i))
+	    break;
+
 	  if (!NONJUMP_INSN_P (i))
 	    continue;
 
Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md	(revision 192417)
+++ gcc/config/sh/sh.md	(working copy)
@@ -10277,6 +10277,47 @@ 
   "mov.<bwl>	%0,@(0,gbr)"
   [(set_attr "type" "store")])
 
+;; DImode memory accesses have to be split in two SImode accesses.
+;; Split them before reload, so that it gets a better chance to figure out
+;; how to deal with the R0 restriction for the individual SImode accesses.
+;; Do not match this insn during or after reload because it can't be split
+;; afterwards.
+(define_insn_and_split "*movdi_gbr_load"
+  [(set (match_operand:DI 0 "register_operand")
+	(match_operand:DI 1 "gbr_address_mem"))]
+  "TARGET_SH1 && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 3) (match_dup 5))
+   (set (match_dup 4) (match_dup 6))]
+{
+  /* Swap low/high part load order on little endian, so that the result reg
+     of the second load can be used better.  */
+  int off = TARGET_LITTLE_ENDIAN ? 1 : 0;
+  operands[3 + off] = gen_lowpart (SImode, operands[0]);
+  operands[5 + off] = gen_lowpart (SImode, operands[1]);
+  operands[4 - off] = gen_highpart (SImode, operands[0]);
+  operands[6 - off] = gen_highpart (SImode, operands[1]);
+})
+
+(define_insn_and_split "*movdi_gbr_store"
+  [(set (match_operand:DI 0 "gbr_address_mem")
+	(match_operand:DI 1 "register_operand"))]
+  "TARGET_SH1 && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 3) (match_dup 5))
+   (set (match_dup 4) (match_dup 6))]
+{
+  /* Swap low/high part store order on big endian, so that stores of function
+     call results can save a reg copy.  */
+  int off = TARGET_LITTLE_ENDIAN ? 0 : 1;
+  operands[3 + off] = gen_lowpart (SImode, operands[0]);
+  operands[5 + off] = gen_lowpart (SImode, operands[1]);
+  operands[4 - off] = gen_highpart (SImode, operands[0]);
+  operands[6 - off] = gen_highpart (SImode, operands[1]);
+})
+
 ;; Sometimes memory accesses do not get combined with the store_gbr insn,
 ;; in particular when the displacements are in the range of the regular move
 ;; insns.  Thus, in the first split pass after the combine pass we search
@@ -10287,15 +10328,15 @@ 
 ;; other operand) and there's no point of doing it if the GBR is not
 ;; referenced in a function at all.
 (define_split
-  [(set (match_operand:QIHISI 0 "register_operand")
-	(match_operand:QIHISI 1 "memory_operand"))]
+  [(set (match_operand:QIHISIDI 0 "register_operand")
+	(match_operand:QIHISIDI 1 "memory_operand"))]
   "TARGET_SH1 && !reload_in_progress && !reload_completed
    && df_regs_ever_live_p (GBR_REG)"
   [(set (match_dup 0) (match_dup 1))]
 {
   rtx gbr_mem = sh_find_equiv_gbr_addr (curr_insn, operands[1]);
   if (gbr_mem != NULL_RTX)
-    operands[1] = change_address (operands[1], GET_MODE (operands[1]), gbr_mem);
+    operands[1] = replace_equiv_address (operands[1], gbr_mem);
   else
     FAIL;
 })
@@ -10309,7 +10350,7 @@ 
 {
   rtx gbr_mem = sh_find_equiv_gbr_addr (curr_insn, operands[1]);
   if (gbr_mem != NULL_RTX)
-    operands[1] = change_address (operands[1], GET_MODE (operands[1]), gbr_mem);
+    operands[1] = replace_equiv_address (operands[1], gbr_mem);
   else
     FAIL;
 })
@@ -10328,23 +10369,22 @@ 
   if (gbr_mem != NULL_RTX)
     {
       operands[2] = gen_reg_rtx (GET_MODE (operands[1]));
-      operands[1] = change_address (operands[1], GET_MODE (operands[1]),
-				    gbr_mem);
+      operands[1] = replace_equiv_address (operands[1], gbr_mem);
     }
   else
     FAIL;
 })
 
 (define_split
-  [(set (match_operand:QIHISI 0 "memory_operand")
-	(match_operand:QIHISI 1 "register_operand"))]
+  [(set (match_operand:QIHISIDI 0 "memory_operand")
+	(match_operand:QIHISIDI 1 "register_operand"))]
   "TARGET_SH1 && !reload_in_progress && !reload_completed
    && df_regs_ever_live_p (GBR_REG)"
   [(set (match_dup 0) (match_dup 1))]
 {
   rtx gbr_mem = sh_find_equiv_gbr_addr (curr_insn, operands[0]);
   if (gbr_mem != NULL_RTX)
-    operands[0] = change_address (operands[0], GET_MODE (operands[0]), gbr_mem);
+    operands[0] = replace_equiv_address (operands[0], gbr_mem);
   else
     FAIL;
 })
Index: gcc/config/sh/iterators.md
===================================================================
--- gcc/config/sh/iterators.md	(revision 192417)
+++ gcc/config/sh/iterators.md	(working copy)
@@ -18,6 +18,7 @@ 
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
+(define_mode_iterator QIHISIDI [QI HI SI DI])
 (define_mode_iterator QIHISI [QI HI SI])
 (define_mode_iterator QIHI [QI HI])
 (define_mode_iterator HISI [HI SI])
Index: gcc/config/sh/predicates.md
===================================================================
--- gcc/config/sh/predicates.md	(revision 192417)
+++ gcc/config/sh/predicates.md	(working copy)
@@ -1139,3 +1139,20 @@ 
 
   return INTVAL (op) >= 0 && INTVAL (op) <= max_disp;
 })
+
+;; A predicate that determines whether OP is a valid GBR addressing mode
+;; memory reference.
+(define_predicate "gbr_address_mem"
+  (match_code "mem")
+{
+  rtx addr = XEXP (op, 0);
+
+  if (REG_P (addr) && REGNO (addr) == GBR_REG)
+    return true;
+  if (GET_CODE (addr) == PLUS
+      && REG_P (XEXP (addr, 0)) && REGNO (XEXP (addr, 0)) == GBR_REG
+      && gbr_displacement (XEXP (addr, 1), mode))
+    return true;
+
+  return false;
+})
Index: gcc/testsuite/gcc.target/sh/pr54760-2.c
===================================================================
--- gcc/testsuite/gcc.target/sh/pr54760-2.c	(revision 192417)
+++ gcc/testsuite/gcc.target/sh/pr54760-2.c	(working copy)
@@ -9,107 +9,129 @@ 
 /* ---------------------------------------------------------------------------
   Simple GBR load.
 */
-#define func(name, type, disp)\
-  int \
+#define func(name, rettype, type, disp)\
+  rettype \
   name ## _tp_load (void) \
   { \
     type* tp = (type*)__builtin_thread_pointer (); \
     return tp[disp]; \
   }
 
-func (test00, int, 0)
-func (test01, int, 5)
-func (test02, int, 255)
+func (test00, int, int, 0)
+func (test01, int, int, 5)
+func (test02, int, int, 255)
 
-func (test03, short, 0)
-func (test04, short, 5)
-func (test05, short, 255)
+func (test03, int, short, 0)
+func (test04, int, short, 5)
+func (test05, int, short, 255)
 
-func (test06, char, 0)
-func (test07, char, 5)
-func (test08, char, 255)
+func (test06, int, char, 0)
+func (test07, int, char, 5)
+func (test08, int, char, 255)
 
-func (test09, unsigned int, 0)
-func (test10, unsigned int, 5)
-func (test11, unsigned int, 255)
+func (test09, int, unsigned int, 0)
+func (test10, int, unsigned int, 5)
+func (test11, int, unsigned int, 255)
 
-func (test12, unsigned short, 0)
-func (test13, unsigned short, 5)
-func (test14, unsigned short, 255)
+func (test12, int, unsigned short, 0)
+func (test13, int, unsigned short, 5)
+func (test14, int, unsigned short, 255)
 
-func (test15, unsigned char, 0)
-func (test16, unsigned char, 5)
-func (test17, unsigned char, 255)
+func (test15, int, unsigned char, 0)
+func (test16, int, unsigned char, 5)
+func (test17, int, unsigned char, 255)
 
+func (test18, long long, long long, 0)
+func (test19, long long, long long, 5)
+func (test20, long long, long long, 127)
+
+func (test21, long long, unsigned long long, 0)
+func (test22, long long, unsigned long long, 5)
+func (test23, long long, unsigned long long, 127)
+
 #undef func
 
 /* ---------------------------------------------------------------------------
   Simple GBR store.
 */
-#define func(name, type, disp)\
+#define func(name, argtype, type, disp)\
   void \
-  name ## _tp_store (int a) \
+  name ## _tp_store (argtype a) \
   { \
     type* tp = (type*)__builtin_thread_pointer (); \
     tp[disp] = (type)a; \
   }
 
-func (test00, int, 0)
-func (test01, int, 5)
-func (test02, int, 255)
+func (test00, int, int, 0)
+func (test01, int, int, 5)
+func (test02, int, int, 255)
 
-func (test03, short, 0)
-func (test04, short, 5)
-func (test05, short, 255)
+func (test03, int, short, 0)
+func (test04, int, short, 5)
+func (test05, int, short, 255)
 
-func (test06, char, 0)
-func (test07, char, 5)
-func (test08, char, 255)
+func (test06, int, char, 0)
+func (test07, int, char, 5)
+func (test08, int, char, 255)
 
-func (test09, unsigned int, 0)
-func (test10, unsigned int, 5)
-func (test11, unsigned int, 255)
+func (test09, int, unsigned int, 0)
+func (test10, int, unsigned int, 5)
+func (test11, int, unsigned int, 255)
 
-func (test12, unsigned short, 0)
-func (test13, unsigned short, 5)
-func (test14, unsigned short, 255)
+func (test12, int, unsigned short, 0)
+func (test13, int, unsigned short, 5)
+func (test14, int, unsigned short, 255)
 
-func (test15, unsigned char, 0)
-func (test16, unsigned char, 5)
-func (test17, unsigned char, 255)
+func (test15, int, unsigned char, 0)
+func (test16, int, unsigned char, 5)
+func (test17, int, unsigned char, 255)
 
+func (test18, long long, long long, 0)
+func (test19, long long, long long, 5)
+func (test20, long long, long long, 127)
+
+func (test21, long long, unsigned long long, 0)
+func (test22, long long, unsigned long long, 5)
+func (test23, long long, unsigned long long, 127)
+
 #undef func
 
 /* ---------------------------------------------------------------------------
   Arithmetic on the result of a GBR load.
 */
-#define func(name, type, disp, op, opname)\
-  int \
-  name ## _tp_load_arith_ ##opname (int a) \
+#define func(name, retargtype, type, disp, op, opname)\
+  retargtype \
+  name ## _tp_load_arith_ ##opname (retargtype a) \
   { \
     type* tp = (type*)__builtin_thread_pointer (); \
     return tp[disp] op a; \
   }
 
 #define funcs(op, opname) \
-  func (test00, int, 0, op, opname) \
-  func (test01, int, 5, op, opname) \
-  func (test02, int, 255, op, opname) \
-  func (test03, short, 0, op, opname) \
-  func (test04, short, 5, op, opname) \
-  func (test05, short, 255, op, opname) \
-  func (test06, char, 0, op, opname) \
-  func (test07, char, 5, op, opname) \
-  func (test08, char, 255, op, opname) \
-  func (test09, unsigned int, 0, op, opname) \
-  func (test10, unsigned int, 5, op, opname) \
-  func (test11, unsigned int, 255, op, opname) \
-  func (test12, unsigned short, 0, op, opname) \
-  func (test13, unsigned short, 5, op, opname) \
-  func (test14, unsigned short, 255, op, opname) \
-  func (test15, unsigned char, 0, op, opname) \
-  func (test16, unsigned char, 5, op, opname) \
-  func (test17, unsigned char, 255, op, opname) \
+  func (test00, int, int, 0, op, opname) \
+  func (test01, int, int, 5, op, opname) \
+  func (test02, int, int, 255, op, opname) \
+  func (test03, int, short, 0, op, opname) \
+  func (test04, int, short, 5, op, opname) \
+  func (test05, int, short, 255, op, opname) \
+  func (test06, int, char, 0, op, opname) \
+  func (test07, int, char, 5, op, opname) \
+  func (test08, int, char, 255, op, opname) \
+  func (test09, int, unsigned int, 0, op, opname) \
+  func (test10, int, unsigned int, 5, op, opname) \
+  func (test11, int, unsigned int, 255, op, opname) \
+  func (test12, int, unsigned short, 0, op, opname) \
+  func (test13, int, unsigned short, 5, op, opname) \
+  func (test14, int, unsigned short, 255, op, opname) \
+  func (test15, int, unsigned char, 0, op, opname) \
+  func (test16, int, unsigned char, 5, op, opname) \
+  func (test17, int, unsigned char, 255, op, opname) \
+  func (test18, long long, long long, 0, op, opname) \
+  func (test19, long long, long long, 5, op, opname) \
+  func (test20, long long, long long, 127, op, opname) \
+  func (test21, long long, unsigned long long, 0, op, opname) \
+  func (test22, long long, unsigned long long, 5, op, opname) \
+  func (test23, long long, unsigned long long, 127, op, opname) \
 
 funcs (+, plus)
 funcs (-, minus)
@@ -124,8 +146,8 @@ 
 /* ---------------------------------------------------------------------------
   Arithmetic of the result of two GBR loads.
 */
-#define func(name, type, disp0, disp1, op, opname)\
-  int \
+#define func(name, rettype, type, disp0, disp1, op, opname)\
+  rettype \
   name ## _tp_load_load_arith_ ##opname (void) \
   { \
     type* tp = (type*)__builtin_thread_pointer (); \
@@ -133,18 +155,22 @@ 
   }
 
 #define funcs(op, opname) \
-  func (test00, int, 0, 5, op, opname) \
-  func (test02, int, 1, 255, op, opname) \
-  func (test03, short, 0, 5, op, opname) \
-  func (test05, short, 1, 255, op, opname) \
-  func (test06, char, 0, 5, op, opname) \
-  func (test08, char, 1, 255, op, opname) \
-  func (test09, unsigned int, 0, 5, op, opname) \
-  func (test11, unsigned int, 1, 255, op, opname) \
-  func (test12, unsigned short, 0, 5, op, opname) \
-  func (test14, unsigned short, 1, 255, op, opname) \
-  func (test15, unsigned char, 0, 5, op, opname) \
-  func (test17, unsigned char, 1, 255, op, opname) \
+  func (test00, int, int, 0, 5, op, opname) \
+  func (test02, int, int, 1, 255, op, opname) \
+  func (test03, int, short, 0, 5, op, opname) \
+  func (test05, int, short, 1, 255, op, opname) \
+  func (test06, int, char, 0, 5, op, opname) \
+  func (test08, int, char, 1, 255, op, opname) \
+  func (test09, int, unsigned int, 0, 5, op, opname) \
+  func (test11, int, unsigned int, 1, 255, op, opname) \
+  func (test12, int, unsigned short, 0, 5, op, opname) \
+  func (test14, int, unsigned short, 1, 255, op, opname) \
+  func (test15, int, unsigned char, 0, 5, op, opname) \
+  func (test17, int, unsigned char, 1, 255, op, opname) \
+  func (test18, long long, long long, 0, 5, op, opname) \
+  func (test19, long long, long long, 1, 127, op, opname) \
+  func (test20, long long, unsigned long long, 0, 5, op, opname) \
+  func (test21, long long, unsigned long long, 1, 127, op, opname) \
 
 funcs (+, plus)
 funcs (-, minus)
@@ -180,6 +206,10 @@ 
 func (test14, unsigned short, 1, 255)
 func (test15, unsigned char, 0, 5)
 func (test17, unsigned char, 1, 255)
+func (test18, long long, 0, 5)
+func (test19, long long, 1, 127)
+func (test20, unsigned long long, 0, 5)
+func (test21, unsigned long long, 1, 127)
 
 #undef func
 
@@ -187,33 +217,39 @@ 
   GBR load, arithmetic, GBR store
 */
 
-#define func(name, type, disp, op, opname)\
+#define func(name, argtype, type, disp, op, opname)\
   void \
-  name ## _tp_load_arith_store_ ##opname (int a) \
+  name ## _tp_load_arith_store_ ##opname (argtype a) \
   { \
     type* tp = (type*)__builtin_thread_pointer (); \
     tp[disp] op a; \
   }
 
 #define funcs(op, opname) \
-  func (test00, int, 0, op, opname) \
-  func (test01, int, 5, op, opname) \
-  func (test02, int, 255, op, opname) \
-  func (test03, short, 0, op, opname) \
-  func (test04, short, 5, op, opname) \
-  func (test05, short, 255, op, opname) \
-  func (test06, char, 0, op, opname) \
-  func (test07, char, 5, op, opname) \
-  func (test08, char, 255, op, opname) \
-  func (test09, unsigned int, 0, op, opname) \
-  func (test10, unsigned int, 5, op, opname) \
-  func (test11, unsigned int, 255, op, opname) \
-  func (test12, unsigned short, 0, op, opname) \
-  func (test13, unsigned short, 5, op, opname) \
-  func (test14, unsigned short, 255, op, opname) \
-  func (test15, unsigned char, 0, op, opname) \
-  func (test16, unsigned char, 5, op, opname) \
-  func (test17, unsigned char, 255, op, opname) \
+  func (test00, int, int, 0, op, opname) \
+  func (test01, int, int, 5, op, opname) \
+  func (test02, int, int, 255, op, opname) \
+  func (test03, int, short, 0, op, opname) \
+  func (test04, int, short, 5, op, opname) \
+  func (test05, int, short, 255, op, opname) \
+  func (test06, int, char, 0, op, opname) \
+  func (test07, int, char, 5, op, opname) \
+  func (test08, int, char, 255, op, opname) \
+  func (test09, int, unsigned int, 0, op, opname) \
+  func (test10, int, unsigned int, 5, op, opname) \
+  func (test11, int, unsigned int, 255, op, opname) \
+  func (test12, int, unsigned short, 0, op, opname) \
+  func (test13, int, unsigned short, 5, op, opname) \
+  func (test14, int, unsigned short, 255, op, opname) \
+  func (test15, int, unsigned char, 0, op, opname) \
+  func (test16, int, unsigned char, 5, op, opname) \
+  func (test17, int, unsigned char, 255, op, opname) \
+  func (test18, long long, long long, 0, op, opname) \
+  func (test19, long long, long long, 5, op, opname) \
+  func (test20, long long, long long, 127, op, opname) \
+  func (test21, long long, unsigned long long, 0, op, opname) \
+  func (test22, long long, unsigned long long, 5, op, opname) \
+  func (test23, long long, unsigned long long, 127, op, opname) \
 
 funcs (+=, plus)
 funcs (-=, minus)
Index: gcc/testsuite/gcc.target/sh/pr54760-4.c
===================================================================
--- gcc/testsuite/gcc.target/sh/pr54760-4.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr54760-4.c	(revision 0)
@@ -0,0 +1,19 @@ 
+/* Check that the GBR address optimization does not combine a gbr store
+   and its use when a function call is inbetween, when GBR is a call used
+   register, i.e. it is invalidated by function calls.  */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O1 -fcall-used-gbr" } */
+/* { dg-skip-if "" { "sh*-*-*" } { "-m5*"} { "" } }  */
+/* { dg-final { scan-assembler "stc\tgbr" } } */
+
+extern int test00 (void);
+int
+test01 (int x)
+{
+  /* We must see a stc gbr,rn before the function call, because
+     a function call could modify the gbr.  In this case the user requests
+     the old gbr value, before the function call.  */
+  int* p = (int*)__builtin_thread_pointer ();
+  p[5] = test00 ();
+  return 0;
+}