diff mbox

S/390: Improve risbg usage

Message ID 20150722135016.GA23197@maggie
State New
Headers show

Commit Message

Andreas Krebbel July 22, 2015, 1:50 p.m. UTC
Hi,

with the attached patch we use risbg in more situations.

This especially helps the SpecCPU 400.perlbench testcase.

Bootstrapped on s390 and s390x. No regressions.

I'll commit the patch after waiting a few days for review comments.

Bye,

-Andreas-


gcc/ChangeLog:

2015-07-22  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	* config/s390/s390.c (s390_rtx_costs): Make risbg patterns
	cheaper.
	(s390_expand_insv): Don't generate risbg pattern for constant zero
	sources.
	* config/s390/s390.md ("*insv<mode>_zEC12_appendbitsleft")
	("*insv<mode>_z10_appendbitsleft"): New pattern definitions.  New
	splitters.

gcc/testsuite/ChangeLog:

2015-07-22  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	* gcc.target/s390/insv-1.c: New test.
	* gcc.target/s390/insv-2.c: New test.
	* gcc.target/s390/insv-3.c: New test.
diff mbox

Patch

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 861dfb2..a8712b9 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -3321,13 +3321,26 @@  s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
       *total = 0;
       return true;
 
+    case IOR:
+      /* risbg */
+      if (GET_CODE (XEXP (x, 0)) == AND
+	  && GET_CODE (XEXP (x, 1)) == ASHIFT
+	  && REG_P (XEXP (XEXP (x, 0), 0))
+	  && REG_P (XEXP (XEXP (x, 1), 0))
+	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+	  && CONST_INT_P (XEXP (XEXP (x, 1), 1))
+	  && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
+	      (1UL << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
+	{
+	  *total = COSTS_N_INSNS (2);
+	  return true;
+	}
     case ASHIFT:
     case ASHIFTRT:
     case LSHIFTRT:
     case ROTATE:
     case ROTATERT:
     case AND:
-    case IOR:
     case XOR:
     case NEG:
     case NOT:
@@ -5839,8 +5852,17 @@  s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
 
       if (mode_s == VOIDmode)
 	{
-	  /* Assume const_int etc already in the proper mode.  */
-	  src = force_reg (mode, src);
+	  /* For constant zero values the representation with AND
+	     appears to be folded in more situations than the (set
+	     (zero_extract) ...).
+	     We only do this when the start and end of the bitfield
+	     remain in the same SImode chunk.  That way nihf or nilf
+	     can be used.
+	     The AND patterns might still generate a risbg for this.  */
+	  if (src == const0_rtx && bitpos / 32  == (bitpos + bitsize - 1) / 32)
+	    return false;
+	  else
+	    src = force_reg (mode, src);
 	}
       else if (mode_s != mode)
 	{
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 8c07d1b..2961f61 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -3776,6 +3776,71 @@ 
   [(set_attr "op_type" "RIE")
    (set_attr "z10prop" "z10_super_E1")])
 
+; Implement appending Y on the left of S bits of X
+; x = (y << s) | (x & ((1 << s) - 1))
+(define_insn "*insv<mode>_zEC12_appendbitsleft"
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
+	(ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "0")
+			  (match_operand:GPR 2 "immediate_operand" ""))
+		 (ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "d")
+			     (match_operand:GPR 4 "nonzero_shift_count_operand" ""))))]
+  "TARGET_ZEC12 && UINTVAL (operands[2]) == (1UL << UINTVAL (operands[4])) - 1"
+  "risbgn\t%0,%3,64-<bitsize>,64-%4-1,%4"
+  [(set_attr "op_type" "RIE")
+   (set_attr "z10prop" "z10_super_E1")])
+
+(define_insn "*insv<mode>_z10_appendbitsleft"
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
+	(ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "0")
+			  (match_operand:GPR 2 "immediate_operand" ""))
+		 (ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "d")
+			     (match_operand:GPR 4 "nonzero_shift_count_operand" ""))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10 && !TARGET_ZEC12 && UINTVAL (operands[2]) == (1UL << UINTVAL (operands[4])) - 1"
+  "risbg\t%0,%3,64-<bitsize>,64-%4-1,%4"
+  [(set_attr "op_type" "RIE")
+   (set_attr "z10prop" "z10_super_E1")])
+
+; z = (x << c) | (y >> d) with (x << c) and (y >> d) not overlapping after shifting
+;  -> z = y >> d; z = (x << c) | (y & ((1 << c) - 1))
+;  -> z = y >> d; z = risbg;
+
+(define_split
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "")
+	(ior:GPR (lshiftrt:GPR (match_operand:GPR 1 "nonimmediate_operand" "")
+			       (match_operand:GPR 2 "nonzero_shift_count_operand" ""))
+		 (ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "")
+			     (match_operand:GPR 4 "nonzero_shift_count_operand" ""))))]
+  "TARGET_ZEC12 && UINTVAL (operands[2]) + UINTVAL (operands[4]) >= <bitsize>"
+  [(set (match_dup 0)
+	(lshiftrt:GPR (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(ior:GPR (and:GPR (match_dup 0) (match_dup 5))
+		 (ashift:GPR (match_dup 3) (match_dup 4))))]
+{
+  operands[5] = GEN_INT ((1UL << UINTVAL (operands[4])) - 1);
+})
+
+(define_split
+  [(parallel
+    [(set (match_operand:GPR 0 "nonimmediate_operand" "")
+	  (ior:GPR (lshiftrt:GPR (match_operand:GPR 1 "nonimmediate_operand" "")
+				 (match_operand:GPR 2 "nonzero_shift_count_operand" ""))
+		   (ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "")
+			       (match_operand:GPR 4 "nonzero_shift_count_operand" ""))))
+     (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_Z10 && !TARGET_ZEC12 && UINTVAL (operands[2]) + UINTVAL (operands[4]) >= <bitsize>"
+  [(set (match_dup 0)
+	(lshiftrt:GPR (match_dup 1) (match_dup 2)))
+   (parallel
+    [(set (match_dup 0)
+	  (ior:GPR (and:GPR (match_dup 0) (match_dup 5))
+		   (ashift:GPR (match_dup 3) (match_dup 4))))
+     (clobber (reg:CC CC_REGNUM))])]
+{
+  operands[5] = GEN_INT ((1UL << UINTVAL (operands[4])) - 1);
+})
+
 (define_insn "*r<noxa>sbg_<mode>_noshift"
   [(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
 	(IXOR:GPR
diff --git a/gcc/testsuite/gcc.target/s390/insv-1.c b/gcc/testsuite/gcc.target/s390/insv-1.c
new file mode 100644
index 0000000..e6c1b8b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/insv-1.c
@@ -0,0 +1,111 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=z10 -mzarch" } */
+
+unsigned long
+foo1 (unsigned long a, unsigned long b)
+{
+  return (a << 5) | (b & (((1UL << 5) - 1)));
+}
+
+/* This generates very different RTX than foo1.  The output reg (r2)
+   matches the unshifted argument.  So it actually is a
+   (set (zero_extract a 59 0) b) */
+unsigned long
+foo2 (unsigned long a, unsigned long b)
+{
+  return (b << 5) | (a & (((1UL << 5) - 1)));
+}
+
+/* risbg cannot be used when less bits are removed with the mask.  */
+
+unsigned long
+foo1b (unsigned long a, unsigned long b)
+{
+  return (a << 5) | (b & 1);
+}
+
+unsigned long
+foo2b (unsigned long a, unsigned long b)
+{
+  return (b << 5) | (a & 1);
+}
+
+/* risbg cannot be used when the masked bits would end up in the
+   result since a real OR is required then.  */
+unsigned long
+foo1c (unsigned long a, unsigned long b)
+{
+  return (a << 5) | (b & 127);
+}
+
+unsigned long
+foo2c (unsigned long a, unsigned long b)
+{
+  return (b << 5) | (a & 127);
+}
+
+unsigned long
+foo3 (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (a << 5) | (b >> 59);
+#else
+  return (a << 5) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4 (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (b << 5) | (a >> 59);
+#else
+  return (b << 5) | (a >> 27);
+#endif
+}
+
+/* risbg can be used also if there are some bits spared in the middle
+   of the two chunks.  */
+unsigned long
+foo3b (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (a << 6) | (b >> 59);
+#else
+  return (a << 6) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4b (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (b << 6) | (a >> 59);
+#else
+  return (b << 6) | (a >> 27);
+#endif
+}
+
+/* One bit of overlap so better don't use risbg.  */
+
+unsigned long
+foo3c (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (a << 4) | (b >> 59);
+#else
+  return (a << 4) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4c (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (b << 4) | (a >> 59);
+#else
+  return (b << 4) | (a >> 27);
+#endif
+}
+
+/* { dg-final { scan-assembler-times "risbg" 6 } } */
diff --git a/gcc/testsuite/gcc.target/s390/insv-2.c b/gcc/testsuite/gcc.target/s390/insv-2.c
new file mode 100644
index 0000000..2ba6d6c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/insv-2.c
@@ -0,0 +1,111 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=zEC12 -mzarch" } */
+
+unsigned long
+foo1 (unsigned long a, unsigned long b)
+{
+  return (a << 5) | (b & (((1UL << 5) - 1)));
+}
+
+/* This generates very different RTX than foo1.  The output reg (r2)
+   matches the unshifted argument.  So it actually is a
+   (set (zero_extract a 59 0) b) */
+unsigned long
+foo2 (unsigned long a, unsigned long b)
+{
+  return (b << 5) | (a & (((1UL << 5) - 1)));
+}
+
+/* risbgn cannot be used when less bits are removed with the mask.  */
+
+unsigned long
+foo1b (unsigned long a, unsigned long b)
+{
+  return (a << 5) | (b & 1);
+}
+
+unsigned long
+foo2b (unsigned long a, unsigned long b)
+{
+  return (b << 5) | (a & 1);
+}
+
+/* risbgn cannot be used when the masked bits would end up in the
+   result since a real OR is required then.  */
+unsigned long
+foo1c (unsigned long a, unsigned long b)
+{
+  return (a << 5) | (b & 127);
+}
+
+unsigned long
+foo2c (unsigned long a, unsigned long b)
+{
+  return (b << 5) | (a & 127);
+}
+
+unsigned long
+foo3 (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (a << 5) | (b >> 59);
+#else
+  return (a << 5) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4 (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (b << 5) | (a >> 59);
+#else
+  return (b << 5) | (a >> 27);
+#endif
+}
+
+/* risbgn can be used also if there are some bits spared in the middle
+   of the two chunks.  */
+unsigned long
+foo3b (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (a << 6) | (b >> 59);
+#else
+  return (a << 6) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4b (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (b << 6) | (a >> 59);
+#else
+  return (b << 6) | (a >> 27);
+#endif
+}
+
+/* One bit of overlap so better don't use risbgn.  */
+
+unsigned long
+foo3c (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (a << 4) | (b >> 59);
+#else
+  return (a << 4) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4c (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (b << 4) | (a >> 59);
+#else
+  return (b << 4) | (a >> 27);
+#endif
+}
+
+/* { dg-final { scan-assembler-times "risbgn" 6 } } */
diff --git a/gcc/testsuite/gcc.target/s390/insv-3.c b/gcc/testsuite/gcc.target/s390/insv-3.c
new file mode 100644
index 0000000..0719750
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/insv-3.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=z10 -mzarch" } */
+
+/* risbg with z bit would work here but we rather want this to be a shift.  */
+struct
+{
+  int a:31;
+  int b:1;
+} s;
+
+void
+foo (int in)
+{
+  s.a = in;
+  s.b = 0;
+}
+
+/* { dg-final { scan-assembler-not "risbg" } } */