diff mbox

Add MULT_HIGHPART_EXPR

Message ID 20120628071755.GP20264@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek June 28, 2012, 7:17 a.m. UTC
On Wed, Jun 27, 2012 at 02:37:08PM -0700, Richard Henderson wrote:
> 
> I was sitting on this patch until I got around to fixing up Jakub's
> existing vector divmod code to use it.  But seeing as how he's adding
> more uses, I think it's better to get it in earlier.
> 
> Tested via a patch sent under separate cover that changes
> __builtin_alpha_umulh to immediately fold to MULT_HIGHPART_EXPR.

Thanks.  Here is an incremental patch on top of my patch from yesterday
which expands some of the vector divisions/modulos using MULT_HIGHPART_EXPR
instead of VEC_WIDEN_MULT_*_EXPR + VEC_PERM_EXPR if backend supports that.
Improves code generated for ushort or short / or % on i?86 (slightly
complicated by the fact that unfortunately even -mavx2 doesn't support
vector by vector shifts for V{8,16}HImode (nor V{16,32}QImode), XOP does
though).

Ok for trunk?

I'll look at using MULT_HIGHPART_EXPR in the pattern recognizer and
vectorizing it as either of the sequences next.

2012-06-28  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/53645
	* tree-vect-generic.c (expand_vector_divmod): Use MULT_HIGHPART_EXPR
	instead of VEC_WIDEN_MULT_{HI,LO}_EXPR followed by VEC_PERM_EXPR
	if possible.

	* gcc.c-torture/execute/pr53645-2.c: New test.


	Jakub

Comments

Richard Henderson June 28, 2012, 5:42 p.m. UTC | #1
On 2012-06-28 00:17, Jakub Jelinek wrote:
> 2012-06-28  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/53645
> 	* tree-vect-generic.c (expand_vector_divmod): Use MULT_HIGHPART_EXPR
> 	instead of VEC_WIDEN_MULT_{HI,LO}_EXPR followed by VEC_PERM_EXPR
> 	if possible.
> 
> 	* gcc.c-torture/execute/pr53645-2.c: New test.

Ok.


r~
diff mbox

Patch

--- gcc/tree-vect-generic.c.jj	2012-06-28 08:32:50.000000000 +0200
+++ gcc/tree-vect-generic.c	2012-06-28 09:10:51.436748834 +0200
@@ -455,7 +455,7 @@  expand_vector_divmod (gimple_stmt_iterat
   unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
   optab op;
   tree *vec;
-  unsigned char *sel;
+  unsigned char *sel = NULL;
   tree cur_op, mhi, mlo, mulcst, perm_mask, wider_type, tem;
 
   if (prec > HOST_BITS_PER_WIDE_INT)
@@ -744,26 +744,34 @@  expand_vector_divmod (gimple_stmt_iterat
   if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
     return NULL_TREE;
 
-  op = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR, type, optab_default);
-  if (op == NULL
-      || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
-    return NULL_TREE;
-  op = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR, type, optab_default);
-  if (op == NULL
-      || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
-    return NULL_TREE;
-  sel = XALLOCAVEC (unsigned char, nunits);
-  for (i = 0; i < nunits; i++)
-    sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1);
-  if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
-    return NULL_TREE;
-  wider_type
-    = build_vector_type (build_nonstandard_integer_type (prec * 2, unsignedp),
-			 nunits / 2);
-  if (GET_MODE_CLASS (TYPE_MODE (wider_type)) != MODE_VECTOR_INT
-      || GET_MODE_BITSIZE (TYPE_MODE (wider_type))
-	 != GET_MODE_BITSIZE (TYPE_MODE (type)))
-    return NULL_TREE;
+  op = optab_for_tree_code (MULT_HIGHPART_EXPR, type, optab_default);
+  if (op != NULL
+      && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
+    wider_type = NULL_TREE;
+  else
+    {
+      op = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR, type, optab_default);
+      if (op == NULL
+	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
+	return NULL_TREE;
+      op = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR, type, optab_default);
+      if (op == NULL
+	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
+	return NULL_TREE;
+      sel = XALLOCAVEC (unsigned char, nunits);
+      for (i = 0; i < nunits; i++)
+	sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1);
+      if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
+	return NULL_TREE;
+      wider_type
+	= build_vector_type (build_nonstandard_integer_type (prec * 2,
+							     unsignedp),
+			     nunits / 2);
+      if (GET_MODE_CLASS (TYPE_MODE (wider_type)) != MODE_VECTOR_INT
+	  || GET_MODE_BITSIZE (TYPE_MODE (wider_type))
+	     != GET_MODE_BITSIZE (TYPE_MODE (type)))
+	return NULL_TREE;
+    }
 
   cur_op = op0;
 
@@ -772,7 +780,7 @@  expand_vector_divmod (gimple_stmt_iterat
     case 0:
       gcc_assert (unsignedp);
       /* t1 = oprnd0 >> pre_shift;
-	 t2 = (type) (t1 w* ml >> prec);
+	 t2 = t1 h* ml;
 	 q = t2 >> post_shift;  */
       cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
       if (cur_op == NULL_TREE)
@@ -801,30 +809,37 @@  expand_vector_divmod (gimple_stmt_iterat
   for (i = 0; i < nunits; i++)
     vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
   mulcst = build_vector (type, vec);
-  for (i = 0; i < nunits; i++)
-    vec[i] = build_int_cst (TREE_TYPE (type), sel[i]);
-  perm_mask = build_vector (type, vec);
-  mhi = gimplify_build2 (gsi, VEC_WIDEN_MULT_HI_EXPR, wider_type,
-			 cur_op, mulcst);
-  mhi = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, mhi);
-  mlo = gimplify_build2 (gsi, VEC_WIDEN_MULT_LO_EXPR, wider_type,
-			 cur_op, mulcst);
-  mlo = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, mlo);
-  if (BYTES_BIG_ENDIAN)
-    cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, mhi, mlo, perm_mask);
+  if (wider_type == NULL_TREE)
+    cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
   else
-    cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, mlo, mhi, perm_mask);
+    {
+      for (i = 0; i < nunits; i++)
+	vec[i] = build_int_cst (TREE_TYPE (type), sel[i]);
+      perm_mask = build_vector (type, vec);
+      mhi = gimplify_build2 (gsi, VEC_WIDEN_MULT_HI_EXPR, wider_type,
+			     cur_op, mulcst);
+      mhi = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, mhi);
+      mlo = gimplify_build2 (gsi, VEC_WIDEN_MULT_LO_EXPR, wider_type,
+			     cur_op, mulcst);
+      mlo = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, mlo);
+      if (BYTES_BIG_ENDIAN)
+	cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, mhi, mlo,
+				  perm_mask);
+      else
+	cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, mlo, mhi,
+				  perm_mask);
+    }
 
   switch (mode)
     {
     case 0:
       /* t1 = oprnd0 >> pre_shift;
-	 t2 = (type) (t1 w* ml >> prec);
+	 t2 = t1 h* ml;
 	 q = t2 >> post_shift;  */
       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
       break;
     case 1:
-      /* t1 = (type) (oprnd0 w* ml >> prec);
+      /* t1 = oprnd0 h* ml;
 	 t2 = oprnd0 - t1;
 	 t3 = t2 >> 1;
 	 t4 = t1 + t3;
@@ -848,7 +863,7 @@  expand_vector_divmod (gimple_stmt_iterat
     case 3:
     case 4:
     case 5:
-      /* t1 = (type) (oprnd0 w* ml >> prec);
+      /* t1 = oprnd0 h* ml;
 	 t2 = t1; [ iff (mode & 2) != 0 ]
 	 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
 	 t3 = t2 >> post_shift;
--- gcc/testsuite/gcc.c-torture/execute/pr53645-2.c.jj	2012-06-28 09:08:53.022405925 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr53645-2.c	2012-06-28 09:07:57.000000000 +0200
@@ -0,0 +1,120 @@ 
+/* PR tree-optimization/53645 */
+
+typedef unsigned short int UV __attribute__((vector_size (16)));
+typedef short int SV __attribute__((vector_size (16)));
+extern void abort (void);
+
+#define TEST(a, b, c, d, e, f, g, h) \
+__attribute__((noinline)) void			\
+uq##a##b##c##d##e##f##g##h (UV *x, UV *y) 	\
+{						\
+  *x = *y / ((UV) { a, b, c, d, e, f, g, h });	\
+}						\
+						\
+__attribute__((noinline)) void			\
+ur##a##b##c##d##e##f##g##h (UV *x, UV *y) 	\
+{						\
+  *x = *y % ((UV) { a, b, c, d, e, f, g, h });	\
+}						\
+						\
+__attribute__((noinline)) void			\
+sq##a##b##c##d##e##f##g##h (SV *x, SV *y) 	\
+{						\
+  *x = *y / ((SV) { a, b, c, d, e, f, g, h });	\
+}						\
+						\
+__attribute__((noinline)) void			\
+sr##a##b##c##d##e##f##g##h (SV *x, SV *y) 	\
+{						\
+  *x = *y % ((SV) { a, b, c, d, e, f, g, h });	\
+}
+
+#define TESTS \
+TEST (4, 4, 4, 4, 4, 4, 4, 4)		\
+TEST (1, 4, 2, 8, 16, 64, 32, 128)	\
+TEST (3, 3, 3, 3, 3, 3, 3, 3)		\
+TEST (6, 5, 6, 5, 6, 5, 6, 5)		\
+TEST (14, 14, 14, 6, 14, 6, 14, 14)	\
+TEST (7, 7, 7, 7, 7, 7, 7, 7)		\
+
+TESTS
+
+UV u[] =
+  { ((UV) { 73U, 65531U, 0U, 174U, 921U, 65535U, 17U, 178U }),
+    ((UV) { 1U, 8173U, 65535U, 65472U, 12U, 29612U, 128U, 8912U }) };
+SV s[] =
+  { ((SV) { 73, -9123, 32761, 8191, 16371, 1201, 12701, 9999 }),
+    ((SV) { 9903, -1, -7323, 0, -7, -323, 9124, -9199 }) };
+
+int
+main ()
+{
+  UV ur, ur2;
+  SV sr, sr2;
+  int i;
+#undef TEST
+#define TEST(a, b, c, d, e, f, g, h)			\
+    uq##a##b##c##d##e##f##g##h (&ur, u + i);		\
+    if (ur[0] != u[i][0] / a || ur[3] != u[i][3] / d)	\
+     abort ();						\
+    asm volatile ("" : : "r" (&ur) : "memory");		\
+    if (ur[2] != u[i][2] / c || ur[1] != u[i][1] / b)	\
+     abort ();						\
+    asm volatile ("" : : "r" (&ur) : "memory");		\
+    if (ur[4] != u[i][4] / e || ur[7] != u[i][7] / h)	\
+     abort ();						\
+    asm volatile ("" : : "r" (&ur) : "memory");		\
+    if (ur[6] != u[i][6] / g || ur[5] != u[i][5] / f)	\
+     abort ();						\
+    asm volatile ("" : : "r" (&ur) : "memory");		\
+    ur##a##b##c##d##e##f##g##h (&ur, u + i);		\
+    if (ur[0] != u[i][0] % a || ur[3] != u[i][3] % d)	\
+     abort ();						\
+    asm volatile ("" : : "r" (&ur) : "memory");		\
+    if (ur[2] != u[i][2] % c || ur[1] != u[i][1] % b)	\
+     abort ();						\
+    asm volatile ("" : : "r" (&ur) : "memory");		\
+    if (ur[4] != u[i][4] % e || ur[7] != u[i][7] % h)	\
+     abort ();						\
+    asm volatile ("" : : "r" (&ur) : "memory");		\
+    if (ur[6] != u[i][6] % g || ur[5] != u[i][5] % f)	\
+     abort ();						\
+    asm volatile ("" : : "r" (&ur) : "memory");
+  for (i = 0; i < sizeof (u) / sizeof (u[0]); i++)
+    {
+      TESTS
+    }
+#undef TEST
+#define TEST(a, b, c, d, e, f, g, h)			\
+    sq##a##b##c##d##e##f##g##h (&sr, s + i);		\
+    if (sr[0] != s[i][0] / a || sr[3] != s[i][3] / d)	\
+     abort ();						\
+    asm volatile ("" : : "r" (&sr) : "memory");		\
+    if (sr[2] != s[i][2] / c || sr[1] != s[i][1] / b)	\
+     abort ();						\
+    asm volatile ("" : : "r" (&sr) : "memory");		\
+    if (sr[4] != s[i][4] / e || sr[7] != s[i][7] / h)	\
+     abort ();						\
+    asm volatile ("" : : "r" (&sr) : "memory");		\
+    if (sr[6] != s[i][6] / g || sr[5] != s[i][5] / f)	\
+     abort ();						\
+    asm volatile ("" : : "r" (&sr) : "memory");		\
+    sr##a##b##c##d##e##f##g##h (&sr, s + i);		\
+    if (sr[0] != s[i][0] % a || sr[3] != s[i][3] % d)	\
+     abort ();						\
+    asm volatile ("" : : "r" (&sr) : "memory");		\
+    if (sr[2] != s[i][2] % c || sr[1] != s[i][1] % b)	\
+     abort ();						\
+    asm volatile ("" : : "r" (&sr) : "memory");		\
+    if (sr[4] != s[i][4] % e || sr[7] != s[i][7] % h)	\
+     abort ();						\
+    asm volatile ("" : : "r" (&sr) : "memory");		\
+    if (sr[6] != s[i][6] % g || sr[5] != s[i][5] % f)	\
+     abort ();						\
+    asm volatile ("" : : "r" (&sr) : "memory");
+  for (i = 0; i < sizeof (s) / sizeof (s[0]); i++)
+    {
+      TESTS
+    }
+  return 0;
+}