Patchwork [SH] PR 54089 - Reorg left shifts

login
register
mail settings
Submitter Oleg Endo
Date July 25, 2012, 8:31 a.m.
Message ID <1343205100.2149.36.camel@yam-132-YW-E178-FTW>
Download mbox | patch
Permalink /patch/173123/
State New
Headers show

Comments

Oleg Endo - July 25, 2012, 8:31 a.m.
Hello,

The attached patch reorganizes the SH left shift patterns.  The final
generated shift insns don't differ much, but it seems to have some
positive side effects on register allocation in some cases. 

Tested with
make -k check RUNTESTFLAGS="--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m2a-single/-mb,-m4/-ml,-m4/-mb,-m4-single/-ml,
-m4-single/-mb,-m4a-single/-ml,-m4a-single/-mb}"

and no new failures.
CSiBE result-size for '-O2 -m4-single -ml -mpretend-cmove' shows a
little bit of noise here and there (total ~ -200 bytes)
Same for '-O2 -m2 -ml' (no dynamic shifts, total ~ -400 bytes) but with
a few improvements (bzip2/compress.c -180 bytes).

OK?

Cheers,
Oleg


ChangeLog:

	PR target/54089
	* config/sh/predicates.md (shift_count_operand): Handle
	not-SHMEDIA case.
	(p27_shift_count_operand, not_p27_shift_count_operand): New 
	predicates.
	config/sh/sh.md (ashlsi3): Remove parallel and T_REG clobber 
	from expander.  Do not emit shift insn for not-SHMEDIA case.
	(ashlsi3_std): Replace with ...
	(ashlsi3_k, ashlsi3_d): ... these new insns.
	config/sh/sh.c (gen_ashift): Make static.  Add sanity checks.  
	Emit ashlsi3_k insn instead of ashlsi3_std in ASHIFT case.
	(gen_ashift_hi): Make static.
	* config/sh/sh-protos.h (gen_ashift, gen_ashift_hi): Remove 
	forward declaration.
Kaz Kojima - July 25, 2012, 10:55 p.m.
Oleg Endo <oleg.endo@t-online.de> wrote:
> The attached patch reorganizes the SH left shift patterns.  The final
> generated shift insns don't differ much, but it seems to have some
> positive side effects on register allocation in some cases. 
> 
> Tested with
> make -k check RUNTESTFLAGS="--target_board=sh-sim
> \{-m2/-ml,-m2/-mb,-m2a/-mb,-m2a-single/-mb,-m4/-ml,-m4/-mb,-m4-single/-ml,
> -m4-single/-mb,-m4a-single/-ml,-m4a-single/-mb}"
> 
> and no new failures.
> CSiBE result-size for '-O2 -m4-single -ml -mpretend-cmove' shows a
> little bit of noise here and there (total ~ -200 bytes)
> Same for '-O2 -m2 -ml' (no dynamic shifts, total ~ -400 bytes) but with
> a few improvements (bzip2/compress.c -180 bytes).
> 
> OK?

OK.

Regards,
	kaz

Patch

Index: gcc/config/sh/sh.c
===================================================================
--- gcc/config/sh/sh.c	(revision 189797)
+++ gcc/config/sh/sh.c	(working copy)
@@ -3277,9 +3277,11 @@ 
 
 /* Code to expand a shift.  */
 
-void
+static void
 gen_ashift (int type, int n, rtx reg)
 {
+  rtx n_rtx;
+
   /* Negative values here come from the shift_amounts array.  */
   if (n < 0)
     {
@@ -3290,26 +3292,30 @@ 
       n = -n;
     }
 
+  n_rtx = GEN_INT (n);
+  gcc_assert (satisfies_constraint_P27 (n_rtx));
+
   switch (type)
     {
     case ASHIFTRT:
-      emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
+      emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
       break;
     case LSHIFTRT:
       if (n == 1)
-	emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
+	emit_insn (gen_lshrsi3_m (reg, reg, n_rtx));
       else
-	emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
+	emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
       break;
     case ASHIFT:
-      emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
+      emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
       break;
+    default:
+      gcc_unreachable ();
     }
 }
 
 /* Same for HImode */
-
-void
+static void
 gen_ashift_hi (int type, int n, rtx reg)
 {
   /* Negative values here come from the shift_amounts array.  */
Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md	(revision 189797)
+++ gcc/config/sh/sh.md	(working copy)
@@ -3492,10 +3492,9 @@ 
 ;; SImode shift left
 
 (define_expand "ashlsi3"
-  [(parallel [(set (match_operand:SI 0 "arith_reg_operand" "")
-		   (ashift:SI (match_operand:SI 1 "arith_reg_operand" "")
-			      (match_operand:SI 2 "nonmemory_operand" "")))
-	      (clobber (reg:SI T_REG))])]
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "")
+		   (match_operand:SI 2 "shift_count_operand" "")))]
   ""
 {
   if (TARGET_SHMEDIA)
@@ -3503,56 +3502,74 @@ 
       emit_insn (gen_ashlsi3_media (operands[0], operands[1], operands[2]));
       DONE;
     }
-  if (CONST_INT_P (operands[2])
-      && sh_dynamicalize_shift_p (operands[2]))
-    operands[2] = force_reg (SImode, operands[2]);
-  if (TARGET_DYNSHIFT)
+  if (TARGET_DYNSHIFT
+      && CONST_INT_P (operands[2]) && sh_dynamicalize_shift_p (operands[2]))
+      operands[2] = force_reg (SImode, operands[2]);
+})
+
+(define_insn "ashlsi3_k"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0,0")
+		   (match_operand:SI 2 "p27_shift_count_operand" "M,P27")))]
+  "TARGET_SH1"
+  "@
+	add	%0,%0
+	shll%O2	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "ashlsi3_d"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (match_operand:SI 2 "shift_count_operand" "r")))]
+  "TARGET_DYNSHIFT"
+  "shld	%2,%0"
+  "&& (CONST_INT_P (operands[2]) && ! sh_dynamicalize_shift_p (operands[2]))"
+  [(const_int 0)]
+{
+  if (satisfies_constraint_P27 (operands[2]))
     {
-      emit_insn (gen_ashlsi3_std (operands[0], operands[1], operands[2]));
+      emit_insn (gen_ashlsi3_k (operands[0], operands[1], operands[2]));
       DONE;
     }
-  if (! immediate_operand (operands[2], GET_MODE (operands[2])))
-    FAIL;
-})
+  else if (!satisfies_constraint_P27 (operands[2]))
+    {
+      emit_insn (gen_ashlsi3_n (operands[0], operands[1], operands[2]));
+      DONE;
+    }
 
-;; This pattern is used by init_expmed for computing the costs of shift
-;; insns.
-(define_insn_and_split "ashlsi3_std"
-  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r,r,r")
-	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0,0,0,0")
-		   (match_operand:SI 2 "nonmemory_operand" "r,M,P27,?ri")))
-   (clobber (match_scratch:SI 3 "=X,X,X,&r"))]
-  "TARGET_DYNSHIFT || (TARGET_SH1 && satisfies_constraint_P27 (operands[2]))"
-  "@
-   shld	%2,%0
-   add	%0,%0
-   shll%O2	%0
-   #"
-  "TARGET_DYNSHIFT
-   && reload_completed
-   && CONST_INT_P (operands[2])
-   && ! satisfies_constraint_P27 (operands[2])"
-  [(set (match_dup 3) (match_dup 2))
-   (parallel
-    [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 3)))
-     (clobber (match_dup 4))])]
-{
-  operands[4] = gen_rtx_SCRATCH (SImode);
+  FAIL;
 }
-  [(set_attr "length" "*,*,*,4")
-   (set_attr "type" "dyn_shift,arith,arith,arith")])
+  [(set_attr "type" "arith")])
 
 (define_insn_and_split "ashlsi3_n"
   [(set (match_operand:SI 0 "arith_reg_dest" "=r")
 	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
-		   (match_operand:SI 2 "const_int_operand" "n")))
-   (clobber (reg:SI T_REG))]
-  "TARGET_SH1 && ! sh_dynamicalize_shift_p (operands[2])"
+		   (match_operand:SI 2 "not_p27_shift_count_operand" "")))]
+  "TARGET_SH1"
   "#"
-  "TARGET_SH1 && reload_completed"
-  [(use (reg:SI R0_REG))]
+  "&& (reload_completed || INTVAL (operands[2]) == 31
+       || (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ()))"
+  [(const_int 0)]
 {
-  gen_shifty_op (ASHIFT, operands);
+  if (INTVAL (operands[2]) == 31)
+    {
+      /* If the shift amount is 31 we split into a different sequence before
+	 reload so that it gets a chance to allocate R0 for the sequence.
+	 If it fails to do so (due to pressure on R0), it will take one insn
+	 more for the and.  */
+      emit_insn (gen_andsi3 (operands[0], operands[1], const1_rtx));
+      emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
+    }
+  else if (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ())
+    {
+      /* If this pattern was picked and dynamic shifts are supported, switch
+	 to dynamic shift pattern before reload.  */
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_ashlsi3_d (operands[0], operands[1], operands[2]));
+    }
+  else
+    gen_shifty_op (ASHIFT, operands);
+
   DONE;
 })
 
Index: gcc/config/sh/predicates.md
===================================================================
--- gcc/config/sh/predicates.md	(revision 189797)
+++ gcc/config/sh/predicates.md	(working copy)
@@ -755,6 +755,13 @@ 
 (define_predicate "shift_count_operand"
   (match_code "const_int,const_double,const,symbol_ref,label_ref,subreg,reg,zero_extend,sign_extend")
 {
+  /* Allow T_REG as shift count for dynamic shifts, although it is not
+     really possible.  It will then be copied to a general purpose reg.  */
+  if (! TARGET_SHMEDIA)
+    return const_int_operand (op, mode)
+	   || (TARGET_DYNSHIFT && (arith_reg_operand (op, mode)
+				   || t_reg_operand (op, mode)));
+
   return (CONSTANT_P (op)
 	  ? (CONST_INT_P (op)
 	     ? (unsigned) INTVAL (op) < GET_MODE_BITSIZE (mode)
@@ -785,6 +792,14 @@ 
   return arith_reg_operand (op, mode);
 })
 
+(define_predicate "p27_shift_count_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_P27 (op)")))
+
+(define_predicate "not_p27_shift_count_operand"
+  (and (match_code "const_int")
+       (match_test "! satisfies_constraint_P27 (op)")))
+
 ;; TODO: Add a comment here.
 
 (define_predicate "shift_operator"
Index: gcc/config/sh/sh-protos.h
===================================================================
--- gcc/config/sh/sh-protos.h	(revision 189797)
+++ gcc/config/sh/sh-protos.h	(working copy)
@@ -74,8 +74,6 @@ 
 extern void sh_emit_compare_and_branch (rtx *, enum machine_mode);
 extern void sh_emit_compare_and_set (rtx *, enum machine_mode);
 extern int shift_insns_rtx (rtx);
-extern void gen_ashift (int, int, rtx);
-extern void gen_ashift_hi (int, int, rtx);
 extern void gen_shifty_op (int, rtx *);
 extern void gen_shifty_hi_op (int, rtx *);
 extern bool expand_ashiftrt (rtx *);