Patchwork [rtl,i386] vec_merge simplification

login
register
mail settings
Submitter Marc Glisse
Date March 30, 2013, 2:47 p.m.
Message ID <alpine.DEB.2.02.1303301513400.10575@stedding.saclay.inria.fr>
Download mbox | patch
Permalink /patch/232523/
State New
Headers show

Comments

Marc Glisse - March 30, 2013, 2:47 p.m.
On Wed, 27 Mar 2013, Eric Botcazou wrote:

> OK, modulo a few nits:

Thanks, here is a version taking into account all your comments, and which 
still passes bootstrap+testsuite on x86_64-linux-gnu. I am not completely 
sure if there is a point checking !side_effects_p (op1) after rtx_equal_p 
(op0, op1), but I am still doing it as it seems safe.

Uros, are you ok with the testsuite part?


2013-03-30  Marc Glisse  <marc.glisse@inria.fr>

gcc/
 	* simplify-rtx.c (simplify_binary_operation_1) <VEC_SELECT>:
 	Handle VEC_MERGE.
 	(simplify_ternary_operation) <VEC_MERGE>: Use unsigned HOST_WIDE_INT
 	for masks. Test for side effects. Handle nested VEC_MERGE. Handle
 	equal arguments.

gcc/testsuite/
 	* gcc.target/i386/merge-1.c: New testcase.
 	* gcc.target/i386/avx2-vpblendd128-1.c: Make it non-trivial.
Uros Bizjak - March 31, 2013, 8:32 a.m.
On Sat, Mar 30, 2013 at 3:47 PM, Marc Glisse <marc.glisse@inria.fr> wrote:

>> OK, modulo a few nits:
>
>
> Thanks, here is a version taking into account all your comments, and which still passes bootstrap+testsuite on x86_64-linux-gnu. I am not completely sure if there is a point checking !side_effects_p (op1) after rtx_equal_p (op0, op1), but I am still doing it as it seems safe.
>
> Uros, are you ok with the testsuite part?
>
>
> 2013-03-30  Marc Glisse  <marc.glisse@inria.fr>
>
> gcc/
>         * simplify-rtx.c (simplify_binary_operation_1) <VEC_SELECT>:
>         Handle VEC_MERGE.
>         (simplify_ternary_operation) <VEC_MERGE>: Use unsigned HOST_WIDE_INT
>         for masks. Test for side effects. Handle nested VEC_MERGE. Handle
>         equal arguments.
>
> gcc/testsuite/
>         * gcc.target/i386/merge-1.c: New testcase.
>         * gcc.target/i386/avx2-vpblendd128-1.c: Make it non-trivial.

Yes, testsuite changes are also OK.

Thanks,
Uros.
Eric Botcazou - April 2, 2013, 7:34 a.m.
> Thanks, here is a version taking into account all your comments, and which
> still passes bootstrap+testsuite on x86_64-linux-gnu. I am not completely
> sure if there is a point checking !side_effects_p (op1) after rtx_equal_p
> (op0, op1), but I am still doing it as it seems safe.

It's also done in other cases (IOR, XOR, AND, etc) where we return the first 
operand after having found out that the second operand is equal to it.  If the 
semantics of VEC_MERGE is to evaluate its operands, then they always need to 
be evaluated, whatever the result.

> gcc/
>  	* simplify-rtx.c (simplify_binary_operation_1) <VEC_SELECT>:
>  	Handle VEC_MERGE.
>  	(simplify_ternary_operation) <VEC_MERGE>: Use unsigned HOST_WIDE_INT
>  	for masks. Test for side effects. Handle nested VEC_MERGE. Handle
>  	equal arguments.

OK for mainline, thanks.
Marc Glisse - April 2, 2013, 7:53 a.m.
On Tue, 2 Apr 2013, Eric Botcazou wrote:

>> Thanks, here is a version taking into account all your comments, and which
>> still passes bootstrap+testsuite on x86_64-linux-gnu. I am not completely
>> sure if there is a point checking !side_effects_p (op1) after rtx_equal_p
>> (op0, op1), but I am still doing it as it seems safe.
>
> It's also done in other cases (IOR, XOR, AND, etc) where we return the first
> operand after having found out that the second operand is equal to it.  If the
> semantics of VEC_MERGE is to evaluate its operands, then they always need to
> be evaluated, whatever the result.

Thanks, I should have checked the other uses indeed... I got confused 
because at tree level, operand_equal_p implies that there are no side 
effects.

Patch

Index: simplify-rtx.c

===================================================================
--- simplify-rtx.c	(revision 197265)

+++ simplify-rtx.c	(working copy)

@@ -3553,20 +3553,45 @@  simplify_binary_operation_1 (enum rtx_co

 		  offset -= vec_size;
 		  vec = XEXP (vec, 1);
 		}
 	      vec = avoid_constant_pool_reference (vec);
 	    }
 
 	  if (GET_MODE (vec) == mode)
 	    return vec;
 	}
 
+      /* If we select elements in a vec_merge that all come from the same

+	 operand, select from that operand directly.  */

+      if (GET_CODE (op0) == VEC_MERGE)

+	{

+	  rtx trueop02 = avoid_constant_pool_reference (XEXP (op0, 2));

+	  if (CONST_INT_P (trueop02))

+	    {

+	      unsigned HOST_WIDE_INT sel = UINTVAL (trueop02);

+	      bool all_operand0 = true;

+	      bool all_operand1 = true;

+	      for (int i = 0; i < XVECLEN (trueop1, 0); i++)

+		{

+		  rtx j = XVECEXP (trueop1, 0, i);

+		  if (sel & (1 << UINTVAL (j)))

+		    all_operand1 = false;

+		  else

+		    all_operand0 = false;

+		}

+	      if (all_operand0 && !side_effects_p (XEXP (op0, 1)))

+		return simplify_gen_binary (VEC_SELECT, mode, XEXP (op0, 0), op1);

+	      if (all_operand1 && !side_effects_p (XEXP (op0, 0)))

+		return simplify_gen_binary (VEC_SELECT, mode, XEXP (op0, 1), op1);

+	    }

+	}

+

       return 0;
     case VEC_CONCAT:
       {
 	enum machine_mode op0_mode = (GET_MODE (trueop0) != VOIDmode
 				      ? GET_MODE (trueop0)
 				      : GET_MODE_INNER (mode));
 	enum machine_mode op1_mode = (GET_MODE (trueop1) != VOIDmode
 				      ? GET_MODE (trueop1)
 				      : GET_MODE_INNER (mode));
 
@@ -5217,21 +5242,21 @@  simplify_const_relational_operation (enu

    OP0, OP1, and OP2.  OP0_MODE was the mode of OP0 before it became
    a constant.  Return 0 if no simplifications is possible.  */
 
 rtx
 simplify_ternary_operation (enum rtx_code code, enum machine_mode mode,
 			    enum machine_mode op0_mode, rtx op0, rtx op1,
 			    rtx op2)
 {
   unsigned int width = GET_MODE_PRECISION (mode);
   bool any_change = false;
-  rtx tem;

+  rtx tem, trueop2;

 
   /* VOIDmode means "infinite" precision.  */
   if (width == 0)
     width = HOST_BITS_PER_WIDE_INT;
 
   switch (code)
     {
     case FMA:
       /* Simplify negations around the multiplication.  */
       /* -a * -b + c  =>  a * b + c.  */
@@ -5363,47 +5388,85 @@  simplify_ternary_operation (enum rtx_cod

 	      else if (temp)
 	        return gen_rtx_IF_THEN_ELSE (mode, temp, op1, op2);
 	    }
 	}
       break;
 
     case VEC_MERGE:
       gcc_assert (GET_MODE (op0) == mode);
       gcc_assert (GET_MODE (op1) == mode);
       gcc_assert (VECTOR_MODE_P (mode));
-      op2 = avoid_constant_pool_reference (op2);

-      if (CONST_INT_P (op2))

+      trueop2 = avoid_constant_pool_reference (op2);

+      if (CONST_INT_P (trueop2))

 	{
-          int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));

+	  int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));

 	  unsigned n_elts = (GET_MODE_SIZE (mode) / elt_size);
-	  int mask = (1 << n_elts) - 1;

+	  unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n_elts)

+					- 1;

+	  unsigned HOST_WIDE_INT sel = UINTVAL (trueop2);

 
-	  if (!(INTVAL (op2) & mask))

+	  if (!(sel & mask) && !side_effects_p (op0))

 	    return op1;
-	  if ((INTVAL (op2) & mask) == mask)

+	  if ((sel & mask) == mask && !side_effects_p (op1))

 	    return op0;
 
-	  op0 = avoid_constant_pool_reference (op0);

-	  op1 = avoid_constant_pool_reference (op1);

-	  if (GET_CODE (op0) == CONST_VECTOR

-	      && GET_CODE (op1) == CONST_VECTOR)

+	  rtx trueop0 = avoid_constant_pool_reference (op0);

+	  rtx trueop1 = avoid_constant_pool_reference (op1);

+	  if (GET_CODE (trueop0) == CONST_VECTOR

+	      && GET_CODE (trueop1) == CONST_VECTOR)

 	    {
 	      rtvec v = rtvec_alloc (n_elts);
 	      unsigned int i;
 
 	      for (i = 0; i < n_elts; i++)
-		RTVEC_ELT (v, i) = (INTVAL (op2) & (1 << i)

-				    ? CONST_VECTOR_ELT (op0, i)

-				    : CONST_VECTOR_ELT (op1, i));

+		RTVEC_ELT (v, i) = ((sel & ((unsigned HOST_WIDE_INT) 1 << i))

+				    ? CONST_VECTOR_ELT (trueop0, i)

+				    : CONST_VECTOR_ELT (trueop1, i));

 	      return gen_rtx_CONST_VECTOR (mode, v);
 	    }
+

+	  /* Replace (vec_merge (vec_merge a b m) c n) with (vec_merge b c n)

+	     if no element from a appears in the result.  */

+	  if (GET_CODE (op0) == VEC_MERGE)

+	    {

+	      tem = avoid_constant_pool_reference (XEXP (op0, 2));

+	      if (CONST_INT_P (tem))

+		{

+		  unsigned HOST_WIDE_INT sel0 = UINTVAL (tem);

+		  if (!(sel & sel0 & mask) && !side_effects_p (XEXP (op0, 0)))

+		    return simplify_gen_ternary (code, mode, mode,

+						 XEXP (op0, 1), op1, op2);

+		  if (!(sel & ~sel0 & mask) && !side_effects_p (XEXP (op0, 1)))

+		    return simplify_gen_ternary (code, mode, mode,

+						 XEXP (op0, 0), op1, op2);

+		}

+	    }

+	  if (GET_CODE (op1) == VEC_MERGE)

+	    {

+	      tem = avoid_constant_pool_reference (XEXP (op1, 2));

+	      if (CONST_INT_P (tem))

+		{

+		  unsigned HOST_WIDE_INT sel1 = UINTVAL (tem);

+		  if (!(~sel & sel1 & mask) && !side_effects_p (XEXP (op1, 0)))

+		    return simplify_gen_ternary (code, mode, mode,

+						 op0, XEXP (op1, 1), op2);

+		  if (!(~sel & ~sel1 & mask) && !side_effects_p (XEXP (op1, 1)))

+		    return simplify_gen_ternary (code, mode, mode,

+						 op0, XEXP (op1, 0), op2);

+		}

+	    }

 	}
+

+      if (rtx_equal_p (op0, op1)

+	  && !side_effects_p (op2) && !side_effects_p (op1))

+	return op0;

+

       break;
 
     default:
       gcc_unreachable ();
     }
 
   return 0;
 }
 
 /* Evaluate a SUBREG of a CONST_INT or CONST_DOUBLE or CONST_FIXED
Index: testsuite/gcc.target/i386/merge-1.c

===================================================================
--- testsuite/gcc.target/i386/merge-1.c	(revision 0)

+++ testsuite/gcc.target/i386/merge-1.c	(revision 0)

@@ -0,0 +1,22 @@ 

+/* { dg-do compile } */

+/* { dg-options "-O1 -msse2" } */

+

+#include <x86intrin.h>

+

+void

+f (double *r, __m128d x, __m128d y, __m128d z)

+{

+  __m128d t=_mm_move_sd(x,y);

+  __m128d u=_mm_move_sd(t,z);

+  *r = u[0];

+}

+

+__m128d

+g(__m128d x, __m128d y, __m128d z)

+{

+  __m128d t=_mm_move_sd(x,y);

+  __m128d u=_mm_move_sd(t,z);

+  return u;

+}

+

+/* { dg-final { scan-assembler-times "movsd" 1 } } */


Property changes on: testsuite/gcc.target/i386/merge-1.c
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Author Date Id Revision URL

Index: testsuite/gcc.target/i386/avx2-vpblendd128-1.c

===================================================================
--- testsuite/gcc.target/i386/avx2-vpblendd128-1.c	(revision 197265)

+++ testsuite/gcc.target/i386/avx2-vpblendd128-1.c	(working copy)

@@ -1,13 +1,14 @@ 

 /* { dg-do compile } */
 /* { dg-options "-mavx2 -O2" } */
 /* { dg-final { scan-assembler "vpblendd\[ \\t\]+\[^\n\]*" } } */
 
 #include <immintrin.h>
 
 __m128i x;
+__m128i y;

 
 void extern
 avx2_test (void)
 {
-  x = _mm_blend_epi32 (x, x, 13);

+  x = _mm_blend_epi32 (x, y, 13);

 }