Simplify a VEC_SELECT from one half of a VEC_CONCAT

Submitted by Marc Glisse on Dec. 2, 2012, 1:56 a.m.

Details

Message ID alpine.DEB.2.02.1212020241040.30726@stedding.saclay.inria.fr
State New
Headers show

Commit Message

Marc Glisse Dec. 2, 2012, 1:56 a.m.
Hello,

in PR50829, HJ Lu pointed me to this PR for which I already had a patch (I 
hadn't submitted it because I didn't have a good use case for it).

bootstrap+testsuite on x86_64-linux.

2012-12-02  Marc Glisse  <marc.glisse@inria.fr>

 	PR target/44551
gcc/
 	* simplify-rtx.c (simplify_binary_operation_1) <VEC_SELECT>: Detect
 	when all elements come from one half of a VEC_CONCAT.

gcc/testsuite/
 	* gcc.target/i386/pr44551.c: New testcase.

Patch hide | download patch | download mbox

Index: gcc/simplify-rtx.c
===================================================================
--- gcc/simplify-rtx.c	(revision 194037)
+++ gcc/simplify-rtx.c	(working copy)
@@ -3482,44 +3482,77 @@  simplify_binary_operation_1 (enum rtx_co
 	      rtx subop0, subop1;
 
 	      gcc_assert (i0 < 2 && i1 < 2);
 	      subop0 = XEXP (trueop0, i0);
 	      subop1 = XEXP (trueop0, i1);
 
 	      return simplify_gen_binary (VEC_CONCAT, mode, subop0, subop1);
 	    }
 	}
 
-      if (XVECLEN (trueop1, 0) == 1
-	  && CONST_INT_P (XVECEXP (trueop1, 0, 0))
-	  && GET_CODE (trueop0) == VEC_CONCAT)
+      /* Detect if all the elements come from the same subpart of a concat.  */
+      if (GET_CODE (trueop0) == VEC_CONCAT)
 	{
-	  rtx vec = trueop0;
-	  int offset = INTVAL (XVECEXP (trueop1, 0, 0)) * GET_MODE_SIZE (mode);
+	  rtx new_op0 = NULL_RTX;
+	  rtx new_op1 = NULL_RTX;
+	  int first = 0;
+	  int second = 0;
+	  unsigned nelts_first_half = 1;
+	  enum machine_mode mode_first_half = GET_MODE (XEXP (trueop0, 0));
+	  if (VECTOR_MODE_P (mode_first_half))
+	    {
+	      int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode_first_half));
+	      nelts_first_half = (GET_MODE_SIZE (mode_first_half) / elt_size);
+	    }
 
-	  /* Try to find the element in the VEC_CONCAT.  */
-	  while (GET_MODE (vec) != mode
-		 && GET_CODE (vec) == VEC_CONCAT)
+	  for (int i = 0; i < XVECLEN (trueop1, 0); i++)
 	    {
-	      HOST_WIDE_INT vec_size = GET_MODE_SIZE (GET_MODE (XEXP (vec, 0)));
-	      if (offset < vec_size)
-		vec = XEXP (vec, 0);
+	      rtx j = XVECEXP (trueop1, 0, i);
+	      if (!CONST_INT_P (j))
+		{
+		  first++;
+		  second++;
+		  break;
+		}
+	      if (INTVAL (j) < nelts_first_half)
+		first++;
 	      else
+		second++;
+	    }
+
+	  if (second == 0)
+	    {
+	      new_op0 = XEXP (trueop0, 0);
+	      new_op1 = trueop1;
+	    }
+	  else if (first == 0)
+	    {
+	      int len = XVECLEN (trueop1, 0);
+	      rtvec vec = rtvec_alloc (len);
+	      for (int i = 0; i < len; i++)
 		{
-		  offset -= vec_size;
-		  vec = XEXP (vec, 1);
+		  int j = INTVAL (XVECEXP (trueop1, 0, i)) - nelts_first_half;
+		  RTVEC_ELT (vec, i) = GEN_INT (j);
 		}
-	      vec = avoid_constant_pool_reference (vec);
+	      new_op0 = XEXP (trueop0, 1);
+	      new_op1 = gen_rtx_PARALLEL (VOIDmode, vec);
 	    }
 
-	  if (GET_MODE (vec) == mode)
-	    return vec;
+	  if (new_op0)
+	    {
+	      if (VECTOR_MODE_P (GET_MODE (new_op0)))
+		return simplify_gen_binary (VEC_SELECT, mode, new_op0, new_op1);
+	      if (VECTOR_MODE_P (mode))
+		return simplify_gen_unary (VEC_DUPLICATE, mode, new_op0,
+					   GET_MODE (new_op0));
+	      return new_op0;
+	    }
 	}
 
       return 0;
     case VEC_CONCAT:
       {
 	enum machine_mode op0_mode = (GET_MODE (trueop0) != VOIDmode
 				      ? GET_MODE (trueop0)
 				      : GET_MODE_INNER (mode));
 	enum machine_mode op1_mode = (GET_MODE (trueop1) != VOIDmode
 				      ? GET_MODE (trueop1)
Index: gcc/testsuite/gcc.target/i386/pr44551.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr44551.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/pr44551.c	(revision 0)
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O -mavx" } */
+
+#include <immintrin.h>
+
+__m128i
+foo (__m256i x, __m128i y)
+{
+  __m256i r = _mm256_insertf128_si256(x, y, 1);
+  __m128i a = _mm256_extractf128_si256(r, 1);
+  return a;
+}
+
+/* { dg-final { scan-assembler-not "insert" } } */
+/* { dg-final { scan-assembler-not "extract" } } */