diff mbox

[SH] PR 50751 - Add support for SH2A movu.b and movu.w insns

Message ID 1344895432.2279.93.camel@yam-132-YW-E178-FTW
State New
Headers show

Commit Message

Oleg Endo Aug. 13, 2012, 10:03 p.m. UTC
Hello,

This adds support for the SH2A instructions movu.b and movu.w for
zero-extending mem loads with displacement addressing.
Tested on rev 190332 with
make -k check RUNTESTFLAGS="--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"

and no new failures.
OK?

Cheers,
Oleg

ChangeLog:

	PR target/50751
	* config/sh/constraints.md (Sra): New constraint.
	* config/sh/predicates.md (simple_mem_operand, 
	displacement_mem_operand, zero_extend_movu_operand): New 
	predicates.
	(zero_extend_operand): Check zero_extend_movu_operand for SH2A.
	* config/sh/sh.md (*zero_extendqisi2_disp_mem, 
	*zero_extendhisi2_disp_mem): Add new insns and two new related 
	peephole2 patterns.

testsuite/ChangeLog:

	PR target/50751
	* gcc.target/sh/pr50751-8.c: New.

Comments

Kaz Kojima Aug. 14, 2012, 11:31 a.m. UTC | #1
Oleg Endo <oleg.endo@t-online.de> wrote:
> This adds support for the SH2A instructions movu.b and movu.w for
> zero-extending mem loads with displacement addressing.
> Tested on rev 190332 with
> make -k check RUNTESTFLAGS="--target_board=sh-sim
> \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"
> 
> and no new failures.
> OK?

OK.

Regards,
	kaz
diff mbox

Patch

Index: gcc/config/sh/constraints.md
===================================================================
--- gcc/config/sh/constraints.md	(revision 190332)
+++ gcc/config/sh/constraints.md	(working copy)
@@ -49,6 +49,7 @@ 
 ;;  Sbw: QImode address with 12 bit displacement
 ;;  Snd: address without displacement
 ;;  Sdd: address with displacement
+;;  Sra: simple register address
 ;; W: vector
 ;; Z: zero in any mode
 ;;
@@ -307,3 +308,8 @@ 
        (match_test "GET_MODE (op) == QImode")
        (match_test "satisfies_constraint_K12 (XEXP (XEXP (op, 0), 1))")))
 
+(define_memory_constraint "Sra"
+  "A memory reference that uses a simple register addressing."
+  (and (match_test "MEM_P (op)")
+       (match_test "REG_P (XEXP (op, 0))")))
+
Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md	(revision 190332)
+++ gcc/config/sh/sh.md	(working copy)
@@ -4842,6 +4842,88 @@ 
   "extu.b	%1,%0"
   [(set_attr "type" "arith")])
 
+;; SH2A supports two zero extending load instructions: movu.b and movu.w.
+;; They could also be used for simple memory addresses like @Rn by setting
+;; the displacement value to zero.  However, doing so too early results in
+;; missed opportunities for other optimizations such as post-inc or index
+;; addressing loads.
+;; Although the 'zero_extend_movu_operand' predicate does not allow simple
+;; register addresses (an address without a displacement, index, post-inc),
+;; zero-displacement addresses might be generated during reload, wich are
+;; simplified to simple register addresses in turn.  Thus, we have to
+;; provide the Sdd and Sra alternatives in the patterns.
+(define_insn "*zero_extendqisi2_disp_mem"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(zero_extend:SI
+	  (match_operand:QI 1 "zero_extend_movu_operand" "Sdd,Sra")))]
+  "TARGET_SH2A"
+  "@
+	movu.b	%1,%0
+	movu.b	@(0,%t1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn "*zero_extendhisi2_disp_mem"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(zero_extend:SI
+	  (match_operand:HI 1 "zero_extend_movu_operand" "Sdd,Sra")))]
+  "TARGET_SH2A"
+  "@
+	movu.w	%1,%0
+	movu.w	@(0,%t1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+;; Convert the zero extending loads in sequences such as:
+;;	movu.b	@(1,r5),r0	movu.w	@(2,r5),r0
+;;	mov.b	r0,@(1,r4)	mov.b	r0,@(1,r4)
+;;
+;; back to sign extending loads like:
+;;	mov.b	@(1,r5),r0	mov.w	@(2,r5),r0
+;;	mov.b	r0,@(1,r4)	mov.b	r0,@(1,r4)
+;;
+;; if the extension type is irrelevant.  The sign extending mov.{b|w} insn
+;; is only 2 bytes in size if the displacement is {K04|K05}.
+;; If the displacement is greater it doesn't matter, so we convert anyways.
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(zero_extend:SI (match_operand 1 "displacement_mem_operand" "")))
+   (set (match_operand 2 "general_operand" "")
+	(match_operand 3 "arith_reg_operand" ""))]
+  "TARGET_SH2A
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && peep2_reg_dead_p (2, operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[2]))
+      <= GET_MODE_SIZE (GET_MODE (operands[1]))"
+  [(set (match_dup 0) (sign_extend:SI (match_dup 1)))
+   (set (match_dup 2) (match_dup 3))])
+
+;; Fold sequences such as
+;;	mov.b	@r3,r7
+;;	extu.b	r7,r7
+;; into
+;;	movu.b	@(0,r3),r7
+;; This does not reduce the code size but the number of instructions is
+;; halved, which results in faster code.
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(sign_extend:SI (match_operand 1 "simple_mem_operand" "")))
+   (set (match_operand:SI 2 "arith_reg_dest" "")
+	(zero_extend:SI (match_operand 3 "arith_reg_operand" "")))]
+  "TARGET_SH2A
+   && GET_MODE (operands[1]) == GET_MODE (operands[3])
+   && (GET_MODE (operands[1]) == QImode || GET_MODE (operands[1]) == HImode)
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && (REGNO (operands[2]) == REGNO (operands[0])
+       || peep2_reg_dead_p (2, operands[0]))"
+  [(set (match_dup 2) (zero_extend:SI (match_dup 4)))]
+{
+  operands[4]
+    = replace_equiv_address (operands[1],
+			     gen_rtx_PLUS (SImode, XEXP (operands[1], 0),
+					   const0_rtx));
+})
+
 ;; -------------------------------------------------------------------------
 ;; Sign extension instructions
 ;; -------------------------------------------------------------------------
Index: gcc/config/sh/predicates.md
===================================================================
--- gcc/config/sh/predicates.md	(revision 190332)
+++ gcc/config/sh/predicates.md	(working copy)
@@ -368,12 +368,33 @@ 
 	  : nonimmediate_operand) (op, mode);
 })
 
+;; Returns 1 if OP is a simple register address.
+(define_predicate "simple_mem_operand"
+  (and (match_code "mem")
+       (match_test "arith_reg_operand (XEXP (op, 0), SImode)")))
+
+;; Returns 1 if OP is a valid displacement address.
+(define_predicate "displacement_mem_operand"
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == PLUS")
+       (match_test "arith_reg_operand (XEXP (XEXP (op, 0), 0), SImode)")
+       (match_test "sh_legitimate_index_p (GET_MODE (op),
+					   XEXP (XEXP (op, 0), 1),
+					   TARGET_SH2A, true)")))
+
+;; Returns 1 if the operand can be used in an SH2A movu.{b|w} insn.
+(define_predicate "zero_extend_movu_operand"
+  (and (match_operand 0 "displacement_mem_operand")
+       (match_test "GET_MODE (op) == QImode || GET_MODE (op) == HImode")))
+
 ;; Returns 1 if the operand can be used in a zero_extend.
 (define_predicate "zero_extend_operand"
   (ior (and (match_test "TARGET_SHMEDIA")
 	    (match_operand 0 "general_extend_operand"))
        (and (match_test "! TARGET_SHMEDIA")
-	    (match_operand 0 "arith_reg_operand"))))
+	    (match_operand 0 "arith_reg_operand"))
+       (and (match_test "TARGET_SH2A")
+	    (match_operand 0 "zero_extend_movu_operand"))))
 
 ;; Returns 1 if OP can be source of a simple move operation. Same as
 ;; general_operand, but a LABEL_REF is valid, PRE_DEC is invalid as
Index: gcc/testsuite/gcc.target/sh/pr50751-8.c
===================================================================
--- gcc/testsuite/gcc.target/sh/pr50751-8.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr50751-8.c	(revision 0)
@@ -0,0 +1,100 @@ 
+/* Check that on SH2A the 4 byte movu.b and movu.w displacement insns are
+   generated.  This has to be checked with -O2 because some of the patterns
+   rely on peepholes.  */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { "sh*-*-*" } { "*" } { "-m2a*" } } */
+/* { dg-final { scan-assembler-times "movu.b" 4 } } */
+/* { dg-final { scan-assembler-times "movu.w" 3 } } */
+
+int
+test_00 (unsigned char* x)
+{
+  /* 1x movu.b  */
+  return x[0];
+}
+
+int
+test_01 (unsigned short* x)
+{
+  /* 1x movu.w  */
+  return x[0];
+}
+
+int
+test_02 (unsigned char* x)
+{
+  /* 1x movu.b  */
+  return x[1];
+}
+
+int
+test_03 (unsigned char* x)
+{
+  /* 1x movu.b  */
+  return x[32];
+}
+
+int
+test_04 (unsigned char* x)
+{
+  /* 1x movu.b  */
+  return x[9000];
+}
+
+int
+test_05 (unsigned short* x)
+{
+  /* 1x movu.w  */
+  return x[9000];
+}
+
+int
+test_06 (unsigned char* x, int i)
+{
+  /* No movu.b expected here.  Should use mov.b (r0,r4) + extu.b instead.  */
+  return x[i];
+}
+
+int
+test_07 (unsigned short* x, int i)
+{
+  /* No movu.w expected here.  Should use mov.w (r0,r4) + extu.w instead.  */
+  return x[i];
+}
+
+int
+test_08 (unsigned char* x, int c)
+{
+  /* No movu.b expected here.  Should use post-inc addressing instead.  */
+  int s = 0;
+  int i;
+  for (i = 0; i < c; ++i)
+    s += x[i];
+  return s;
+}
+
+void
+test_09 (unsigned char* x, unsigned char* y)
+{
+  /* No movu.b expected here, since the zero-extension is irrelevant.  */
+  x[1] = y[1];
+  x[2] = y[2];
+}
+
+void
+test_10 (unsigned char* x, unsigned short* y)
+{
+  /* No movu.w expected here, since the zero-extension is irrelevant.  */
+  x[1] = y[1];
+  x[2] = y[2];
+}
+
+int
+test_11 (unsigned char* x, unsigned short* y)
+{
+  /* 1x movu.w  */
+  int yy = y[1];
+  x[1] = yy;
+  return yy;
+}