Patchwork Pattern recognizer for rotates

login
register
mail settings
Submitter Jakub Jelinek
Date May 16, 2013, 4:25 p.m.
Message ID <20130516162544.GJ1377@tucnak.redhat.com>
Download mbox | patch
Permalink /patch/244394/
State New
Headers show

Comments

Jakub Jelinek - May 16, 2013, 4:25 p.m.
On Wed, May 15, 2013 at 03:24:37PM +0200, Richard Biener wrote:
> Ok with ...

> > +  /* Pattern detected.  */
> > +  if (dump_enabled_p ())
> > +    dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
> > +		     "vect_recog_rotate_pattern: detected: ");
> 
> Please use MSG_NOTE here.

Ok, here is what I've committed (the above change plus added 6 i386.exp
testcases).

2013-05-16  Jakub Jelinek  <jakub@redhat.com>

	* tree-vectorizer.h (NUM_PATTERNS): Increment.
	* tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add
	vect_recog_rotate_pattern.
	(vect_recog_rotate_pattern): New function.

	* gcc.target/i386/rotate-3.c: New test.
	* gcc.target/i386/rotate-3a.c: New test.
	* gcc.target/i386/rotate-4.c: New test.
	* gcc.target/i386/rotate-4a.c: New test.
	* gcc.target/i386/rotate-5.c: New test.
	* gcc.target/i386/rotate-5a.c: New test.



	Jakub

Patch

--- gcc/tree-vectorizer.h.jj	2013-05-13 09:44:45.822529016 +0200
+++ gcc/tree-vectorizer.h	2013-05-16 13:56:08.093576302 +0200
@@ -1005,7 +1005,7 @@  extern void vect_slp_transform_bb (basic
    Additional pattern recognition functions can (and will) be added
    in the future.  */
 typedef gimple (* vect_recog_func_ptr) (vec<gimple> *, tree *, tree *);
-#define NUM_PATTERNS 10
+#define NUM_PATTERNS 11
 void vect_pattern_recog (loop_vec_info, bb_vec_info);
 
 /* In tree-vectorizer.c.  */
--- gcc/tree-vect-patterns.c.jj	2013-05-16 12:36:29.496419141 +0200
+++ gcc/tree-vect-patterns.c	2013-05-16 13:56:08.095576326 +0200
@@ -50,6 +50,7 @@  static gimple vect_recog_over_widening_p
                                                  tree *);
 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
 	                                tree *, tree *);
+static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
 						      tree *, tree *);
 static gimple vect_recog_divmod_pattern (vec<gimple> *,
@@ -64,6 +65,7 @@  static vect_recog_func_ptr vect_vect_rec
 	vect_recog_pow_pattern,
 	vect_recog_widen_shift_pattern,
 	vect_recog_over_widening_pattern,
+	vect_recog_rotate_pattern,
 	vect_recog_vector_vector_shift_pattern,
 	vect_recog_divmod_pattern,
 	vect_recog_mixed_size_cond_pattern,
@@ -1446,6 +1448,218 @@  vect_recog_widen_shift_pattern (vec<gimp
 
   if (dump_enabled_p ())
     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
+
+  stmts->safe_push (last_stmt);
+  return pattern_stmt;
+}
+
+/* Detect a rotate pattern wouldn't be otherwise vectorized:
+
+   type a_t, b_t, c_t;
+
+   S0 a_t = b_t r<< c_t;
+
+  Input/Output:
+
+  * STMTS: Contains a stmt from which the pattern search begins,
+    i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
+    with a sequence:
+
+   S1 d_t = -c_t;
+   S2 e_t = d_t & (B - 1);
+   S3 f_t = b_t << c_t;
+   S4 g_t = b_t >> e_t;
+   S0 a_t = f_t | g_t;
+
+    where B is element bitsize of type.
+
+  Output:
+
+  * TYPE_IN: The type of the input arguments to the pattern.
+
+  * TYPE_OUT: The type of the output of this pattern.
+
+  * Return value: A new stmt that will be used to replace the rotate
+    S0 stmt.  */
+
+static gimple
+vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
+{
+  gimple last_stmt = stmts->pop ();
+  tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
+  gimple pattern_stmt, def_stmt;
+  enum tree_code rhs_code;
+  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
+  enum vect_def_type dt;
+  optab optab1, optab2;
+
+  if (!is_gimple_assign (last_stmt))
+    return NULL;
+
+  rhs_code = gimple_assign_rhs_code (last_stmt);
+  switch (rhs_code)
+    {
+    case LROTATE_EXPR:
+    case RROTATE_EXPR:
+      break;
+    default:
+      return NULL;
+    }
+
+  if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
+    return NULL;
+
+  lhs = gimple_assign_lhs (last_stmt);
+  oprnd0 = gimple_assign_rhs1 (last_stmt);
+  type = TREE_TYPE (oprnd0);
+  oprnd1 = gimple_assign_rhs2 (last_stmt);
+  if (TREE_CODE (oprnd0) != SSA_NAME
+      || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
+      || !INTEGRAL_TYPE_P (type)
+      || !TYPE_UNSIGNED (type))
+    return NULL;
+
+  if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
+			   &def, &dt))
+    return NULL;
+
+  if (dt != vect_internal_def
+      && dt != vect_constant_def
+      && dt != vect_external_def)
+    return NULL;
+
+  vectype = get_vectype_for_scalar_type (type);
+  if (vectype == NULL_TREE)
+    return NULL;
+
+  /* If vector/vector or vector/scalar rotate is supported by the target,
+     don't do anything here.  */
+  optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
+  if (optab1
+      && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
+    return NULL;
+
+  if (bb_vinfo != NULL || dt != vect_internal_def)
+    {
+      optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
+      if (optab2
+	  && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
+	return NULL;
+    }
+
+  /* If vector/vector or vector/scalar shifts aren't supported by the target,
+     don't do anything here either.  */
+  optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
+  optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
+  if (!optab1
+      || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
+      || !optab2
+      || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+    {
+      if (bb_vinfo == NULL && dt == vect_internal_def)
+	return NULL;
+      optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
+      optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
+      if (!optab1
+	  || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
+	  || !optab2
+	  || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+	return NULL;
+    }
+
+  *type_in = vectype;
+  *type_out = vectype;
+  if (*type_in == NULL_TREE)
+    return NULL;
+
+  def = NULL_TREE;
+  if (TREE_CODE (oprnd1) == INTEGER_CST
+      || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
+    def = oprnd1;
+  else if (def_stmt && gimple_assign_cast_p (def_stmt))
+    {
+      tree rhs1 = gimple_assign_rhs1 (def_stmt);
+      if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
+	  && TYPE_PRECISION (TREE_TYPE (rhs1))
+	     == TYPE_PRECISION (type))
+	def = rhs1;
+    }
+
+  STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
+  if (def == NULL_TREE)
+    {
+      def = vect_recog_temp_ssa_var (type, NULL);
+      def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
+					       NULL_TREE);
+      append_pattern_def_seq (stmt_vinfo, def_stmt);
+    }
+  stype = TREE_TYPE (def);
+
+  if (TREE_CODE (def) == INTEGER_CST)
+    {
+      if (!host_integerp (def, 1)
+	  || (unsigned HOST_WIDE_INT) tree_low_cst (def, 1)
+	     >= GET_MODE_PRECISION (TYPE_MODE (type))
+	  || integer_zerop (def))
+	return NULL;
+      def2 = build_int_cst (stype,
+			    GET_MODE_PRECISION (TYPE_MODE (type))
+			    - tree_low_cst (def, 1));
+    }
+  else
+    {
+      tree vecstype = get_vectype_for_scalar_type (stype);
+      stmt_vec_info def_stmt_vinfo;
+
+      if (vecstype == NULL_TREE)
+	return NULL;
+      def2 = vect_recog_temp_ssa_var (stype, NULL);
+      def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
+					       NULL_TREE);
+      def_stmt_vinfo
+	= new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
+      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
+      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
+      append_pattern_def_seq (stmt_vinfo, def_stmt);
+
+      def2 = vect_recog_temp_ssa_var (stype, NULL);
+      tree mask
+	= build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
+      def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
+					       gimple_assign_lhs (def_stmt),
+					       mask);
+      def_stmt_vinfo
+	= new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
+      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
+      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
+      append_pattern_def_seq (stmt_vinfo, def_stmt);
+    }
+
+  var1 = vect_recog_temp_ssa_var (type, NULL);
+  def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
+					   ? LSHIFT_EXPR : RSHIFT_EXPR,
+					   var1, oprnd0, def);
+  append_pattern_def_seq (stmt_vinfo, def_stmt);
+
+  var2 = vect_recog_temp_ssa_var (type, NULL);
+  def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
+					   ? RSHIFT_EXPR : LSHIFT_EXPR,
+					   var2, oprnd0, def2);
+  append_pattern_def_seq (stmt_vinfo, def_stmt);
+
+  /* Pattern detected.  */
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+		     "vect_recog_rotate_pattern: detected: ");
+
+  /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
+  var = vect_recog_temp_ssa_var (type, NULL);
+  pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2);
+
+  if (dump_enabled_p ())
+    dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
 
   stmts->safe_push (last_stmt);
   return pattern_stmt;
--- gcc/testsuite/gcc.target/i386/rotate-3.c.jj	2013-05-16 13:08:00.117087861 +0200
+++ gcc/testsuite/gcc.target/i386/rotate-3.c	2013-05-16 13:59:41.332373698 +0200
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target avx2 } */
+/* { dg-options "-O3 -mavx2 -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
+unsigned int a[1024] __attribute__((aligned (32)));
+
+__attribute__((noinline, noclone)) void
+foo (void)
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    {
+      int j = i & 31;
+      a[i] = (a[i] << j) | (a[i] >> ((-j) & 31));
+    }
+}
--- gcc/testsuite/gcc.target/i386/rotate-3a.c.jj	2013-05-16 13:51:04.584307601 +0200
+++ gcc/testsuite/gcc.target/i386/rotate-3a.c	2013-05-16 14:00:19.158164463 +0200
@@ -0,0 +1,24 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target avx2 } */
+/* { dg-options "-O3 -mavx2" } */
+
+#include "avx2-check.h"
+
+#include "rotate-3.c"
+
+static void
+__attribute__((noinline))
+avx2_test (void)
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    a[i] = i * 1073741789U;
+  foo ();
+  for (i = 0; i < 1024; i++)
+    {
+      int j = i & 31;
+      unsigned int x = i * 1073741789U;
+      if (a[i] != ((x << j) | (x >> ((-j) & 31))))
+	abort ();
+    }
+}
--- gcc/testsuite/gcc.target/i386/rotate-4.c.jj	2013-05-16 13:08:11.044032865 +0200
+++ gcc/testsuite/gcc.target/i386/rotate-4.c	2013-05-16 13:50:14.386590821 +0200
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target avx2 } */
+/* { dg-options "-O3 -mavx2 -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
+unsigned int a[1024] __attribute__((aligned (32)));
+
+__attribute__((noinline, noclone)) void
+foo (int j)
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    a[i] = (a[i] << j) | (a[i] >> ((-j) & 31));
+}
--- gcc/testsuite/gcc.target/i386/rotate-4a.c.jj	2013-05-16 13:52:11.437923226 +0200
+++ gcc/testsuite/gcc.target/i386/rotate-4a.c	2013-05-16 14:00:33.907079446 +0200
@@ -0,0 +1,34 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target avx2 } */
+/* { dg-options "-O3 -mavx2" } */
+
+#include "avx2-check.h"
+
+#include "rotate-4.c"
+
+static void
+__attribute__((noinline))
+avx2_test (void)
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    a[i] = i * 1073741789U;
+  foo (3);
+  for (i = 0; i < 1024; i++)
+    {
+      unsigned int x = i * 1073741789U;
+      if (a[i] != ((x << 3) | (x >> ((-3) & 31))))
+	abort ();
+    }
+  foo (0);
+  for (i = 0; i < 1024; i++)
+    {
+      unsigned int x = i * 1073741789U;
+      if (a[i] != ((x << 3) | (x >> ((-3) & 31))))
+	abort ();
+    }
+  foo (29);
+  for (i = 0; i < 1024; i++)
+    if (a[i] != i * 1073741789U)
+      abort ();
+}
--- gcc/testsuite/gcc.target/i386/rotate-5.c.jj	2013-05-16 13:08:13.849019300 +0200
+++ gcc/testsuite/gcc.target/i386/rotate-5.c	2013-05-16 13:50:22.331547612 +0200
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O3 -mavx -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
+unsigned int a[1024] __attribute__((aligned (32)));
+
+__attribute__((noinline, noclone)) void
+foo (void)
+{
+  int i, j = 3;
+  for (i = 0; i < 1024; i++)
+    a[i] = (a[i] << j) | (a[i] >> ((-j) & 31));
+}
--- gcc/testsuite/gcc.target/i386/rotate-5a.c.jj	2013-05-16 13:51:14.104252744 +0200
+++ gcc/testsuite/gcc.target/i386/rotate-5a.c	2013-05-16 13:58:15.402858405 +0200
@@ -0,0 +1,23 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O3 -mavx" } */
+
+#include "avx-check.h"
+
+#include "rotate-5.c"
+
+static void
+__attribute__((noinline))
+avx_test (void)
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    a[i] = i * 1073741789U;
+  foo ();
+  for (i = 0; i < 1024; i++)
+    {
+      unsigned int x = i * 1073741789U;
+      if (a[i] != ((x << 3) | (x >> ((-3) & 31))))
+	abort ();
+    }
+}