Patchwork Pattern recognizer rotate improvement

login
register
mail settings
Submitter Jakub Jelinek
Date May 17, 2013, 6:20 a.m.
Message ID <20130517062055.GN1377@tucnak.redhat.com>
Download mbox | patch
Permalink /patch/244504/
State New
Headers show

Comments

Jakub Jelinek - May 17, 2013, 6:20 a.m.
On Wed, May 15, 2013 at 03:24:37PM +0200, Richard Biener wrote:
> We have the same issue in some other places where we insert invariant
> code into the loop body - one reason there is another LIM pass
> after vectorization.

Well, in this case it causes the shift amount to be loaded into a vector
instead of scalar, therefore even when LIM moves it before the loop, it
will only work with vector/vector shifts and be more expensive that way
(need to broadcast the value in a vector).  The following patch
improves it slightly at least for loops, by just emitting the shift amount
stmts to loop preheader, rotate-4.c used to be only vectorizable with
-mavx2 (which has vector/vector shifts), now also -mavx (which doesn't)
vectorizes it.  Unfortunately this trick doesn't work for SLP vectorization,
emitting the stmts at the start of the current bb doesn't help, because
every stmt emits its own and thus it is vectorized with vector/vector
shifts only anyway.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2013-05-17  Jakub Jelinek  <jakub@redhat.com>

	* tree-vect-patterns.c (vect_recog_rotate_pattern): For
	vect_external_def oprnd1 with loop_vinfo, try to emit
	optional cast, negation and and stmts on the loop preheader
	edge instead of into the pattern def seq.

	* gcc.target/i386/rotate-4.c: Compile only with -mavx
	instead of -mavx2, require only avx instead of avx2.
	* gcc.target/i386/rotate-4a.c: Include avx-check.h instead
	of avx2-check.h and turn into an avx runtime test instead of
	avx2 runtime test.



	Jakub
Richard Guenther - May 17, 2013, 8:01 a.m.
On Fri, 17 May 2013, Jakub Jelinek wrote:

> On Wed, May 15, 2013 at 03:24:37PM +0200, Richard Biener wrote:
> > We have the same issue in some other places where we insert invariant
> > code into the loop body - one reason there is another LIM pass
> > after vectorization.
> 
> Well, in this case it causes the shift amount to be loaded into a vector
> instead of scalar, therefore even when LIM moves it before the loop, it
> will only work with vector/vector shifts and be more expensive that way
> (need to broadcast the value in a vector).  The following patch
> improves it slightly at least for loops, by just emitting the shift amount
> stmts to loop preheader, rotate-4.c used to be only vectorizable with
> -mavx2 (which has vector/vector shifts), now also -mavx (which doesn't)
> vectorizes it.  Unfortunately this trick doesn't work for SLP vectorization,
> emitting the stmts at the start of the current bb doesn't help, because
> every stmt emits its own and thus it is vectorized with vector/vector
> shifts only anyway.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Thanks,
Richard.
 
> 2013-05-17  Jakub Jelinek  <jakub@redhat.com>
> 
> 	* tree-vect-patterns.c (vect_recog_rotate_pattern): For
> 	vect_external_def oprnd1 with loop_vinfo, try to emit
> 	optional cast, negation and and stmts on the loop preheader
> 	edge instead of into the pattern def seq.
> 
> 	* gcc.target/i386/rotate-4.c: Compile only with -mavx
> 	instead of -mavx2, require only avx instead of avx2.
> 	* gcc.target/i386/rotate-4a.c: Include avx-check.h instead
> 	of avx2-check.h and turn into an avx runtime test instead of
> 	avx2 runtime test.
> 
> --- gcc/tree-vect-patterns.c.jj	2013-05-16 13:56:08.000000000 +0200
> +++ gcc/tree-vect-patterns.c	2013-05-16 15:27:00.565143478 +0200
> @@ -1494,6 +1494,7 @@ vect_recog_rotate_pattern (vec<gimple> *
>    bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
>    enum vect_def_type dt;
>    optab optab1, optab2;
> +  edge ext_def = NULL;
>  
>    if (!is_gimple_assign (last_stmt))
>      return NULL;
> @@ -1574,6 +1575,21 @@ vect_recog_rotate_pattern (vec<gimple> *
>    if (*type_in == NULL_TREE)
>      return NULL;
>  
> +  if (dt == vect_external_def
> +      && TREE_CODE (oprnd1) == SSA_NAME
> +      && loop_vinfo)
> +    {
> +      struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> +      ext_def = loop_preheader_edge (loop);
> +      if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
> +	{
> +	  basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
> +	  if (bb == NULL
> +	      || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
> +	    ext_def = NULL;
> +	}
> +    }
> +
>    def = NULL_TREE;
>    if (TREE_CODE (oprnd1) == INTEGER_CST
>        || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
> @@ -1593,7 +1609,14 @@ vect_recog_rotate_pattern (vec<gimple> *
>        def = vect_recog_temp_ssa_var (type, NULL);
>        def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
>  					       NULL_TREE);
> -      append_pattern_def_seq (stmt_vinfo, def_stmt);
> +      if (ext_def)
> +	{
> +	  basic_block new_bb
> +	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
> +	  gcc_assert (!new_bb);
> +	}
> +      else
> +	append_pattern_def_seq (stmt_vinfo, def_stmt);
>      }
>    stype = TREE_TYPE (def);
>  
> @@ -1618,11 +1641,19 @@ vect_recog_rotate_pattern (vec<gimple> *
>        def2 = vect_recog_temp_ssa_var (stype, NULL);
>        def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
>  					       NULL_TREE);
> -      def_stmt_vinfo
> -	= new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
> -      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
> -      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
> -      append_pattern_def_seq (stmt_vinfo, def_stmt);
> +      if (ext_def)
> +	{
> +	  basic_block new_bb
> +	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
> +	  gcc_assert (!new_bb);
> +	}
> +      else
> +	{
> +	  def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
> +	  set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
> +	  STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
> +	  append_pattern_def_seq (stmt_vinfo, def_stmt);
> +	}
>  
>        def2 = vect_recog_temp_ssa_var (stype, NULL);
>        tree mask
> @@ -1630,11 +1661,19 @@ vect_recog_rotate_pattern (vec<gimple> *
>        def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
>  					       gimple_assign_lhs (def_stmt),
>  					       mask);
> -      def_stmt_vinfo
> -	= new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
> -      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
> -      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
> -      append_pattern_def_seq (stmt_vinfo, def_stmt);
> +      if (ext_def)
> +	{
> +	  basic_block new_bb
> +	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
> +	  gcc_assert (!new_bb);
> +	}
> +      else
> +	{
> +	  def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
> +	  set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
> +	  STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
> +	  append_pattern_def_seq (stmt_vinfo, def_stmt);
> +	}
>      }
>  
>    var1 = vect_recog_temp_ssa_var (type, NULL);
> --- gcc/testsuite/gcc.target/i386/rotate-4.c.jj	2013-05-16 13:50:14.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/rotate-4.c	2013-05-16 15:23:32.729313026 +0200
> @@ -1,6 +1,6 @@
>  /* { dg-do compile } */
> -/* { dg-require-effective-target avx2 } */
> -/* { dg-options "-O3 -mavx2 -fdump-tree-vect-details" } */
> +/* { dg-require-effective-target avx } */
> +/* { dg-options "-O3 -mavx -fdump-tree-vect-details" } */
>  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
>  /* { dg-final { cleanup-tree-dump "vect" } } */
>  
> --- gcc/testsuite/gcc.target/i386/rotate-4a.c.jj	2013-05-16 14:00:33.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/rotate-4a.c	2013-05-16 15:23:44.791247428 +0200
> @@ -1,14 +1,14 @@
>  /* { dg-do run } */
> -/* { dg-require-effective-target avx2 } */
> -/* { dg-options "-O3 -mavx2" } */
> +/* { dg-require-effective-target avx } */
> +/* { dg-options "-O3 -mavx" } */
>  
> -#include "avx2-check.h"
> +#include "avx-check.h"
>  
>  #include "rotate-4.c"
>  
>  static void
>  __attribute__((noinline))
> -avx2_test (void)
> +avx_test (void)
>  {
>    int i;
>    for (i = 0; i < 1024; i++)
> 
> 
> 	Jakub
> 
>

Patch

--- gcc/tree-vect-patterns.c.jj	2013-05-16 13:56:08.000000000 +0200
+++ gcc/tree-vect-patterns.c	2013-05-16 15:27:00.565143478 +0200
@@ -1494,6 +1494,7 @@  vect_recog_rotate_pattern (vec<gimple> *
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
   enum vect_def_type dt;
   optab optab1, optab2;
+  edge ext_def = NULL;
 
   if (!is_gimple_assign (last_stmt))
     return NULL;
@@ -1574,6 +1575,21 @@  vect_recog_rotate_pattern (vec<gimple> *
   if (*type_in == NULL_TREE)
     return NULL;
 
+  if (dt == vect_external_def
+      && TREE_CODE (oprnd1) == SSA_NAME
+      && loop_vinfo)
+    {
+      struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+      ext_def = loop_preheader_edge (loop);
+      if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
+	{
+	  basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
+	  if (bb == NULL
+	      || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
+	    ext_def = NULL;
+	}
+    }
+
   def = NULL_TREE;
   if (TREE_CODE (oprnd1) == INTEGER_CST
       || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
@@ -1593,7 +1609,14 @@  vect_recog_rotate_pattern (vec<gimple> *
       def = vect_recog_temp_ssa_var (type, NULL);
       def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
 					       NULL_TREE);
-      append_pattern_def_seq (stmt_vinfo, def_stmt);
+      if (ext_def)
+	{
+	  basic_block new_bb
+	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
+	  gcc_assert (!new_bb);
+	}
+      else
+	append_pattern_def_seq (stmt_vinfo, def_stmt);
     }
   stype = TREE_TYPE (def);
 
@@ -1618,11 +1641,19 @@  vect_recog_rotate_pattern (vec<gimple> *
       def2 = vect_recog_temp_ssa_var (stype, NULL);
       def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
 					       NULL_TREE);
-      def_stmt_vinfo
-	= new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
-      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
-      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
-      append_pattern_def_seq (stmt_vinfo, def_stmt);
+      if (ext_def)
+	{
+	  basic_block new_bb
+	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
+	  gcc_assert (!new_bb);
+	}
+      else
+	{
+	  def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
+	  set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
+	  STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
+	  append_pattern_def_seq (stmt_vinfo, def_stmt);
+	}
 
       def2 = vect_recog_temp_ssa_var (stype, NULL);
       tree mask
@@ -1630,11 +1661,19 @@  vect_recog_rotate_pattern (vec<gimple> *
       def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
 					       gimple_assign_lhs (def_stmt),
 					       mask);
-      def_stmt_vinfo
-	= new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
-      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
-      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
-      append_pattern_def_seq (stmt_vinfo, def_stmt);
+      if (ext_def)
+	{
+	  basic_block new_bb
+	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
+	  gcc_assert (!new_bb);
+	}
+      else
+	{
+	  def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
+	  set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
+	  STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
+	  append_pattern_def_seq (stmt_vinfo, def_stmt);
+	}
     }
 
   var1 = vect_recog_temp_ssa_var (type, NULL);
--- gcc/testsuite/gcc.target/i386/rotate-4.c.jj	2013-05-16 13:50:14.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/rotate-4.c	2013-05-16 15:23:32.729313026 +0200
@@ -1,6 +1,6 @@ 
 /* { dg-do compile } */
-/* { dg-require-effective-target avx2 } */
-/* { dg-options "-O3 -mavx2 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O3 -mavx -fdump-tree-vect-details" } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
--- gcc/testsuite/gcc.target/i386/rotate-4a.c.jj	2013-05-16 14:00:33.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/rotate-4a.c	2013-05-16 15:23:44.791247428 +0200
@@ -1,14 +1,14 @@ 
 /* { dg-do run } */
-/* { dg-require-effective-target avx2 } */
-/* { dg-options "-O3 -mavx2" } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O3 -mavx" } */
 
-#include "avx2-check.h"
+#include "avx-check.h"
 
 #include "rotate-4.c"
 
 static void
 __attribute__((noinline))
-avx2_test (void)
+avx_test (void)
 {
   int i;
   for (i = 0; i < 1024; i++)