diff mbox

[SH] Adding some peepholes (PR 61142)

Message ID 1424900120.14981.407.camel@yam-132-YW-E178-FTW
State New
Headers show

Commit Message

Oleg Endo Feb. 25, 2015, 9:35 p.m. UTC
Hi,

These are the peepholes as mentioned in PR 65153 and in PR 61142.  They
try to wallpaper some bad RA choices and reduce the CSiBE code size by
approx. 3.9K bytes.

A problem I ran into with this one is that the peephole2 pass drops
REG_INC notes, which makes the following passes produce garbage
sometimes.  Instead of rejecting automodify mems in the peephole2
patterns, for now I'm manually adding the REG_INC notes after emitting
move insns.  Maybe peephole2 could do that automatically in the future.

Tested with 
make -k check RUNTESTFLAGS="--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}".

Kaz, could you also please pre-test this on sh4-linux?

Cheers,
Oleg

gcc/ChangeLog
	PR target/61142
	* config/sh/sh.c (sh_check_add_incdec_notes): New function.
	* config/sh/sh-protos.h (sh_check_add_incdec_notes): Declare it.
	* config/sh/predicates.md (const_logical_operand): New 
	predicate.
	* config/sh/sh.md: Add new peephole2 patterns.

Comments

Kaz Kojima Feb. 26, 2015, 10:21 a.m. UTC | #1
Oleg Endo <oleg.endo@t-online.de> wrote:
> These are the peepholes as mentioned in PR 65153 and in PR 61142.  They
> try to wallpaper some bad RA choices and reduce the CSiBE code size by
> approx. 3.9K bytes.
> 
> A problem I ran into with this one is that the peephole2 pass drops
> REG_INC notes, which makes the following passes produce garbage
> sometimes.  Instead of rejecting automodify mems in the peephole2
> patterns, for now I'm manually adding the REG_INC notes after emitting
> move insns.  Maybe peephole2 could do that automatically in the future.
> 
> Tested with 
> make -k check RUNTESTFLAGS="--target_board=sh-sim
> \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}".
> 
> Kaz, could you also please pre-test this on sh4-linux?

No new failures on trunk revision 220928.

Regards,
	kaz
Oleg Endo Feb. 26, 2015, 7:16 p.m. UTC | #2
On Thu, 2015-02-26 at 19:21 +0900, Kaz Kojima wrote:
> Oleg Endo <oleg.endo@t-online.de> wrote:
> > These are the peepholes as mentioned in PR 65153 and in PR 61142.  They
> > try to wallpaper some bad RA choices and reduce the CSiBE code size by
> > approx. 3.9K bytes.
> > 
> > A problem I ran into with this one is that the peephole2 pass drops
> > REG_INC notes, which makes the following passes produce garbage
> > sometimes.  Instead of rejecting automodify mems in the peephole2
> > patterns, for now I'm manually adding the REG_INC notes after emitting
> > move insns.  Maybe peephole2 could do that automatically in the future.
> > 
> > Tested with 
> > make -k check RUNTESTFLAGS="--target_board=sh-sim
> > \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}".
> > 
> > Kaz, could you also please pre-test this on sh4-linux?
> 
> No new failures on trunk revision 220928.

Thanks.  Committed as r221026.

Cheers,
Oleg
diff mbox

Patch

Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md	(revision 220947)
+++ gcc/config/sh/sh.md	(working copy)
@@ -14532,6 +14532,179 @@ 
 	(mem:HI (plus:SI (match_dup 1) (match_dup 2))))]
   "")
 
+;;	extu.bw	a,b
+;;	mov	b,c	->	extu.bw	a,c
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(zero_extend:SI (match_operand:QIHI 1 "arith_reg_operand")))
+   (set (match_operand:SI 2 "arith_reg_dest")
+	(match_dup 0))]
+  "TARGET_SH1 && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2) (zero_extend:SI (match_dup 1)))])
+
+;;	mov	r0,r1
+;;	extu.bw	r1,r1   ->	extu.bw	r0,r1
+(define_peephole2
+  [(set (match_operand 0 "arith_reg_dest")
+	(match_operand 1 "arith_reg_operand"))
+   (set (match_operand:SI 2 "arith_reg_dest")
+	(zero_extend:SI (match_operand:QIHI 3 "arith_reg_operand")))]
+  "TARGET_SH1
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && (REGNO (operands[0]) == REGNO (operands[2])
+       || peep2_reg_dead_p (2, operands[0]))"
+  [(set (match_dup 2) (zero_extend:SI (match_dup 1)))]
+{
+  operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
+})
+
+;;	mov	a,b
+;;	mov	b,a	->	< nop >
+(define_peephole2
+  [(set (match_operand 0 "register_operand")
+	(match_operand 1 "register_operand"))
+   (set (match_operand 2 "register_operand")
+	(match_operand 3 "register_operand"))]
+  "TARGET_SH1
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && REGNO (operands[1]) == REGNO (operands[2])
+   && peep2_reg_dead_p (2, operands[3])"
+  [(const_int 0)])
+
+;;	mov	#3,r4
+;;	and	r4,r1	->	mov	r1,r0
+;;	mov	r1,r0		and	#3,r0
+(define_code_iterator ANDIORXOR [and ior xor])
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand")
+	(match_operand:SI 1 "const_logical_operand"))
+   (set (match_operand:SI 2) (ANDIORXOR:SI (match_dup 2) (match_dup 0)))
+   (set (reg:SI R0_REG) (match_dup 2))]
+  "TARGET_SH1
+   && peep2_reg_dead_p (3, operands[0]) && peep2_reg_dead_p (3, operands[2])"
+  [(set (reg:SI R0_REG) (match_dup 2))
+   (set (reg:SI R0_REG) (ANDIORXOR:SI (reg:SI R0_REG) (match_dup 1)))])
+
+;;	...	r2,r0		...	r2,r0
+;;	or	r1,r0	->	or	r0,r1
+;;	mov	r0,r1
+;;	(r0 dead)
+(define_code_iterator ANDIORXORPLUS [and ior xor plus])
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ANDIORXORPLUS:SI (match_dup 0) (match_operand:SI 1 "arith_reg_dest")))
+   (set (match_dup 1) (match_dup 0))]
+  "TARGET_SH1 && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 1) (ANDIORXORPLUS:SI (match_dup 1) (match_dup 0)))])
+
+;;	mov	r12,r0
+;;	add	#-48,r0     ->	add	#-48,r12
+;;	mov.l	r0,@(4,r10)	mov.l	r12,@(4,r10)
+;;	(r12 dead)
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(match_operand:SI 1 "arith_reg_dest"))
+   (set (match_dup 0) (plus:SI (match_dup 0)
+			       (match_operand:SI 2 "const_int_operand")))
+   (set (match_operand:SI 3 "general_movdst_operand") (match_dup 0))]
+  "TARGET_SH1
+   && peep2_reg_dead_p (2, operands[1]) && peep2_reg_dead_p (3, operands[0])"
+  [(const_int 0)]
+{
+  emit_insn (gen_addsi3 (operands[1], operands[1], operands[2]));
+  sh_check_add_incdec_notes (emit_move_insn (operands[3], operands[1]));
+})
+
+;;	mov.l	@(r0,r9),r1
+;;	mov	r1,r0	    ->	mov	@(r0,r9),r0
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(match_operand:SI 1 "general_movsrc_operand"))
+   (set (match_operand:SI 2 "arith_reg_dest")
+	(match_dup 0))]
+  "TARGET_SH1 && peep2_reg_dead_p (2, operands[0])"
+  [(const_int 0)]
+{
+  sh_check_add_incdec_notes (emit_move_insn (operands[2], operands[1]));
+})
+
+(define_peephole2
+  [(set (match_operand:QIHI 0 "register_operand")
+	(match_operand:QIHI 1 "movsrc_no_disp_mem_operand"))
+   (set (match_operand:QIHI 2 "register_operand")
+	(match_dup 0))]
+  "TARGET_SH1 && peep2_reg_dead_p (2, operands[0])"
+  [(const_int 0)]
+{
+  sh_check_add_incdec_notes (emit_move_insn (operands[2], operands[1]));
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(sign_extend:SI (match_operand:QIHI 1 "movsrc_no_disp_mem_operand")))
+   (set (match_operand:SI 2 "arith_reg_dest")
+	(match_dup 0))]
+  "TARGET_SH1 && peep2_reg_dead_p (2, operands[0])"
+  [(const_int 0)]
+{
+  sh_check_add_incdec_notes (emit_insn (gen_extend<mode>si2 (operands[2],
+							     operands[1])));
+})
+
+;;	mov.w	@(18,r1),r0 (r0 = HImode)
+;;	mov	r0,r1       (r0 = r1 = HImode)		mov.w	@(18,r1),r0
+;;	...	..,r13      (r13 = SImode)	-> 	...	..,r13
+;;	tst	r1,r13					tst	r0,r13
+(define_peephole2
+  [(set (match_operand 0 "arith_reg_dest")
+	(match_operand 1 "arith_reg_dest"))
+   (set (match_operand:SI 2 "arith_reg_dest")
+	(match_operand:SI 3))
+   (set (reg:SI T_REG)
+	(eq:SI (and:SI (match_operand:SI 4 "arith_reg_operand")
+		       (match_operand:SI 5 "arith_reg_operand"))
+	       (const_int 0)))]
+  "TARGET_SH1
+   && peep2_reg_dead_p (3, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[3])
+   && (REGNO (operands[0]) == REGNO (operands[4])
+       || REGNO (operands[0]) == REGNO (operands[5]))
+   && (REGNO (operands[2]) == REGNO (operands[4])
+       || REGNO (operands[2]) == REGNO (operands[5]))"
+  [(const_int 0)]
+{
+  sh_check_add_incdec_notes (emit_move_insn (operands[2], operands[3]));
+  emit_insn (gen_tstsi_t (operands[2],
+			  gen_rtx_REG (SImode, (REGNO (operands[1])))));
+})
+
+;;	mov.w	@(18,r1),r0 (r0 = HImode)
+;;	...	..,r13	    (r13 = SImode)		mov.w	@(18,r1),r0
+;;	mov	r0,r1       (r0 = r1 = HImode)	->	...	..,r13
+;;	tst	r1,r13					tst	r0,r13
+(define_peephole2
+  [(set (match_operand:SI 2 "arith_reg_dest")
+	(match_operand:SI 3))
+   (set (match_operand 0 "arith_reg_dest")
+	(match_operand 1 "arith_reg_operand"))
+   (set (reg:SI T_REG)
+	(eq:SI (and:SI (match_operand:SI 4 "arith_reg_operand")
+		       (match_operand:SI 5 "arith_reg_operand"))
+	       (const_int 0)))]
+  "TARGET_SH1
+   && peep2_reg_dead_p (3, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[3])
+   && (REGNO (operands[0]) == REGNO (operands[4])
+       || REGNO (operands[0]) == REGNO (operands[5]))
+   && (REGNO (operands[2]) == REGNO (operands[4])
+       || REGNO (operands[2]) == REGNO (operands[5]))"
+  [(const_int 0)]
+{
+  sh_check_add_incdec_notes (emit_move_insn (operands[2], operands[3]));
+  emit_insn (gen_tstsi_t (operands[2],
+			  gen_rtx_REG (SImode, (REGNO (operands[1])))));
+})
+
 (define_peephole
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
Index: gcc/config/sh/predicates.md
===================================================================
--- gcc/config/sh/predicates.md	(revision 220947)
+++ gcc/config/sh/predicates.md	(working copy)
@@ -798,6 +798,12 @@ 
   return 0;
 })
 
+;; Returns true if OP is a valid constant source operand for a logical
+;; operations tst/and/or/xor #imm,r0.
+(define_predicate "const_logical_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_K08 (op)")))
+
 ;; Like logical_operand but allows additional constant values which can be
 ;; done with zero extensions.  Used for the second operand of and insns.
 (define_predicate "logical_and_operand"
Index: gcc/config/sh/sh-protos.h
===================================================================
--- gcc/config/sh/sh-protos.h	(revision 220947)
+++ gcc/config/sh/sh-protos.h	(working copy)
@@ -309,6 +309,7 @@ 
 
 extern bool sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno);
 extern void sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno);
+extern rtx_insn* sh_check_add_incdec_notes (rtx_insn* i);
 
 extern bool sh_in_recog_treg_set_expr (void);
 extern bool sh_recog_treg_set_expr (rtx op, machine_mode mode);
Index: gcc/config/sh/sh.c
===================================================================
--- gcc/config/sh/sh.c	(revision 220947)
+++ gcc/config/sh/sh.c	(working copy)
@@ -13757,6 +13757,35 @@ 
     remove_note (i, n);
 }
 
+/* Given an insn check if it contains any post/pre inc/dec mem operands and
+   add the REG_INC notes accordingly.
+   FIXME: This function is very similar to lra.c (add_auto_inc_notes).
+   FIXME: This function is currently used by peephole2 patterns because
+	  the peephole2 pass does not preserve REG_INC notes.  If the notes
+	  are dropped the following passes will do wrong things.  */
+rtx_insn*
+sh_check_add_incdec_notes (rtx_insn* i)
+{
+  struct for_each_inc_dec_clb
+  {
+    static int func (rtx mem ATTRIBUTE_UNUSED, rtx op ATTRIBUTE_UNUSED,
+		     rtx dest, rtx src ATTRIBUTE_UNUSED,
+		     rtx srcoff ATTRIBUTE_UNUSED, void* arg)
+    {
+      gcc_assert (REG_P (dest));
+
+      rtx_insn* i = (rtx_insn*)arg;
+      if (find_regno_note (i, REG_INC, REGNO (dest)) == NULL)
+	add_reg_note (i, REG_INC, dest);
+
+      return 0;
+    }
+  };
+
+  for_each_inc_dec (PATTERN (i), for_each_inc_dec_clb::func, i);
+  return i;
+}
+
 /* Given an op rtx and an insn, try to find out whether the result of the
    specified op consists only of logical operations on T bit stores.  */
 bool