diff mbox

Fix simd clone vectorization (PR tree-optimization/64024)

Message ID 20141126185711.GR1669@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Nov. 26, 2014, 6:57 p.m. UTC
Hi!

As discussed in the PR and on IRC, the problem here is that peeling
for alignment can for some linear argument that during vect analysis
passed simple_iv no longer pass it during vect transform phase.

So, to fix this, this patch remembers the base and step values from
simple_iv during vect analysis and uses them during transform phase
(biased by what the peeling for alignment advanced of course).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2014-11-26  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/64024
	* tree-vectorizer.h (struct _stmt_vec_info): Remove simd_clone_fndecl
	field.  Add simd_clone_info field.
	(STMT_VINFO_SIMD_CLONE_FNDECL): Remove.
	(STMT_VINFO_SIMD_CLONE_INFO): Define.
	* tree-vect-stmts.c (vectorizable_simd_clone_call): Adjust for
	STMT_VINFO_SIMD_CLONE_FNDECL becoming first element of
	STMT_VINFO_SIMD_CLONE_INFO vector.  For linear arguments, remember
	base and linear_step from analysis phase and use it during transform
	phase, biased by the difference between LOOP_VINFO_NITERS{_UNCHANGED,}
	multiplied by linear_step.
	(free_stmt_vec_info): Release STMT_VINFO_SIMD_CLONE_INFO.

	* gcc.dg/vect/vect-simd-clone-13.c: New test.
	* gcc.dg/vect/vect-simd-clone-14.c: New test.


	Jakub

Comments

Richard Biener Nov. 27, 2014, 10:01 a.m. UTC | #1
On Wed, 26 Nov 2014, Jakub Jelinek wrote:

> Hi!
> 
> As discussed in the PR and on IRC, the problem here is that peeling
> for alignment can for some linear argument that during vect analysis
> passed simple_iv no longer pass it during vect transform phase.
> 
> So, to fix this, this patch remembers the base and step values from
> simple_iv during vect analysis and uses them during transform phase
> (biased by what the peeling for alignment advanced of course).
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Thanks,
Richard.

> 2014-11-26  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/64024
> 	* tree-vectorizer.h (struct _stmt_vec_info): Remove simd_clone_fndecl
> 	field.  Add simd_clone_info field.
> 	(STMT_VINFO_SIMD_CLONE_FNDECL): Remove.
> 	(STMT_VINFO_SIMD_CLONE_INFO): Define.
> 	* tree-vect-stmts.c (vectorizable_simd_clone_call): Adjust for
> 	STMT_VINFO_SIMD_CLONE_FNDECL becoming first element of
> 	STMT_VINFO_SIMD_CLONE_INFO vector.  For linear arguments, remember
> 	base and linear_step from analysis phase and use it during transform
> 	phase, biased by the difference between LOOP_VINFO_NITERS{_UNCHANGED,}
> 	multiplied by linear_step.
> 	(free_stmt_vec_info): Release STMT_VINFO_SIMD_CLONE_INFO.
> 
> 	* gcc.dg/vect/vect-simd-clone-13.c: New test.
> 	* gcc.dg/vect/vect-simd-clone-14.c: New test.
> 
> --- gcc/tree-vectorizer.h.jj	2014-11-19 18:48:07.000000000 +0100
> +++ gcc/tree-vectorizer.h	2014-11-26 12:56:00.899824766 +0100
> @@ -602,8 +602,10 @@ typedef struct _stmt_vec_info {
>       of this stmt.  */
>    vec<dr_p> same_align_refs;
>  
> -  /* Selected SIMD clone's function decl.  */
> -  tree simd_clone_fndecl;
> +  /* Selected SIMD clone's function info.  First vector element
> +     is SIMD clone's function decl, followed by a pair of trees (base + step)
> +     for linear arguments (pair of NULLs for other arguments).  */
> +  vec<tree> simd_clone_info;
>  
>    /* Classify the def of this stmt.  */
>    enum vect_def_type def_type;
> @@ -677,7 +679,7 @@ typedef struct _stmt_vec_info {
>  #define STMT_VINFO_RELATED_STMT(S)         (S)->related_stmt
>  #define STMT_VINFO_PATTERN_DEF_SEQ(S)      (S)->pattern_def_seq
>  #define STMT_VINFO_SAME_ALIGN_REFS(S)      (S)->same_align_refs
> -#define STMT_VINFO_SIMD_CLONE_FNDECL(S)	   (S)->simd_clone_fndecl
> +#define STMT_VINFO_SIMD_CLONE_INFO(S)	   (S)->simd_clone_info
>  #define STMT_VINFO_DEF_TYPE(S)             (S)->def_type
>  #define STMT_VINFO_GROUP_FIRST_ELEMENT(S)  (S)->first_element
>  #define STMT_VINFO_GROUP_NEXT_ELEMENT(S)   (S)->next_element
> --- gcc/tree-vect-stmts.c.jj	2014-11-19 18:47:59.000000000 +0100
> +++ gcc/tree-vect-stmts.c	2014-11-26 15:38:59.883409014 +0100
> @@ -2715,12 +2715,40 @@ vectorizable_simd_clone_call (gimple stm
>        else
>  	gcc_assert (thisarginfo.vectype != NULL_TREE);
>  
> -      if (thisarginfo.dt != vect_constant_def
> -	  && thisarginfo.dt != vect_external_def
> -	  && loop_vinfo
> -	  && TREE_CODE (op) == SSA_NAME
> -	  && simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false)
> -	  && tree_fits_shwi_p (iv.step))
> +      /* For linear arguments, the analyze phase should have saved
> +	 the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
> +      if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
> +	  && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
> +	{
> +	  gcc_assert (vec_stmt);
> +	  thisarginfo.linear_step
> +	    = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
> +	  thisarginfo.op
> +	    = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
> +	  /* If loop has been peeled for alignment, we need to adjust it.  */
> +	  tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
> +	  tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
> +	  if (n1 != n2)
> +	    {
> +	      tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
> +	      tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
> +	      tree opt = TREE_TYPE (thisarginfo.op);
> +	      bias = fold_convert (TREE_TYPE (step), bias);
> +	      bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
> +	      thisarginfo.op
> +		= fold_build2 (POINTER_TYPE_P (opt)
> +			       ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
> +			       thisarginfo.op, bias);
> +	    }
> +	}
> +      else if (!vec_stmt
> +	       && thisarginfo.dt != vect_constant_def
> +	       && thisarginfo.dt != vect_external_def
> +	       && loop_vinfo
> +	       && TREE_CODE (op) == SSA_NAME
> +	       && simple_iv (loop, loop_containing_stmt (stmt), op,
> +			     &iv, false)
> +	       && tree_fits_shwi_p (iv.step))
>  	{
>  	  thisarginfo.linear_step = tree_to_shwi (iv.step);
>  	  thisarginfo.op = iv.base;
> @@ -2735,8 +2763,8 @@ vectorizable_simd_clone_call (gimple stm
>  
>    unsigned int badness = 0;
>    struct cgraph_node *bestn = NULL;
> -  if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info))
> -    bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info));
> +  if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
> +    bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
>    else
>      for (struct cgraph_node *n = node->simd_clones; n != NULL;
>  	 n = n->simdclone->next_clone)
> @@ -2855,7 +2883,19 @@ vectorizable_simd_clone_call (gimple stm
>  
>    if (!vec_stmt) /* transformation not required.  */
>      {
> -      STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info) = bestn->decl;
> +      STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
> +      for (i = 0; i < nargs; i++)
> +	if (bestn->simdclone->args[i].arg_type
> +	    == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
> +	  {
> +	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
> +									+ 1);
> +	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
> +	    tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
> +		       ? size_type_node : TREE_TYPE (arginfo[i].op);
> +	    tree ls = build_int_cst (lst, arginfo[i].linear_step);
> +	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
> +	  }
>        STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
>        if (dump_enabled_p ())
>  	dump_printf_loc (MSG_NOTE, vect_location,
> @@ -7479,6 +7519,7 @@ free_stmt_vec_info (gimple stmt)
>      }
>  
>    STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
> +  STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
>    set_vinfo_for_stmt (stmt, NULL);
>    free (stmt_info);
>  }
> --- gcc/testsuite/gcc.dg/vect/vect-simd-clone-13.c.jj	2014-11-26 15:42:26.162690785 +0100
> +++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-13.c	2014-11-26 15:42:49.252278876 +0100
> @@ -0,0 +1,7 @@
> +/* { dg-require-effective-target vect_simd_clones } */
> +/* { dg-additional-options "-fopenmp-simd -fcommon" } */
> +/* { dg-additional-options "-mavx" { target avx_runtime } } */
> +
> +#include "vect-simd-clone-6.c"
> +
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> --- gcc/testsuite/gcc.dg/vect/vect-simd-clone-14.c.jj	2014-11-26 15:43:09.522919202 +0100
> +++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-14.c	2014-11-26 15:43:24.566652273 +0100
> @@ -0,0 +1,7 @@
> +/* { dg-require-effective-target vect_simd_clones } */
> +/* { dg-additional-options "-fopenmp-simd -fcommon" } */
> +/* { dg-additional-options "-mavx" { target avx_runtime } } */
> +
> +#include "vect-simd-clone-11.c"
> +
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> 
> 	Jakub
> 
>
diff mbox

Patch

--- gcc/tree-vectorizer.h.jj	2014-11-19 18:48:07.000000000 +0100
+++ gcc/tree-vectorizer.h	2014-11-26 12:56:00.899824766 +0100
@@ -602,8 +602,10 @@  typedef struct _stmt_vec_info {
      of this stmt.  */
   vec<dr_p> same_align_refs;
 
-  /* Selected SIMD clone's function decl.  */
-  tree simd_clone_fndecl;
+  /* Selected SIMD clone's function info.  First vector element
+     is SIMD clone's function decl, followed by a pair of trees (base + step)
+     for linear arguments (pair of NULLs for other arguments).  */
+  vec<tree> simd_clone_info;
 
   /* Classify the def of this stmt.  */
   enum vect_def_type def_type;
@@ -677,7 +679,7 @@  typedef struct _stmt_vec_info {
 #define STMT_VINFO_RELATED_STMT(S)         (S)->related_stmt
 #define STMT_VINFO_PATTERN_DEF_SEQ(S)      (S)->pattern_def_seq
 #define STMT_VINFO_SAME_ALIGN_REFS(S)      (S)->same_align_refs
-#define STMT_VINFO_SIMD_CLONE_FNDECL(S)	   (S)->simd_clone_fndecl
+#define STMT_VINFO_SIMD_CLONE_INFO(S)	   (S)->simd_clone_info
 #define STMT_VINFO_DEF_TYPE(S)             (S)->def_type
 #define STMT_VINFO_GROUP_FIRST_ELEMENT(S)  (S)->first_element
 #define STMT_VINFO_GROUP_NEXT_ELEMENT(S)   (S)->next_element
--- gcc/tree-vect-stmts.c.jj	2014-11-19 18:47:59.000000000 +0100
+++ gcc/tree-vect-stmts.c	2014-11-26 15:38:59.883409014 +0100
@@ -2715,12 +2715,40 @@  vectorizable_simd_clone_call (gimple stm
       else
 	gcc_assert (thisarginfo.vectype != NULL_TREE);
 
-      if (thisarginfo.dt != vect_constant_def
-	  && thisarginfo.dt != vect_external_def
-	  && loop_vinfo
-	  && TREE_CODE (op) == SSA_NAME
-	  && simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false)
-	  && tree_fits_shwi_p (iv.step))
+      /* For linear arguments, the analyze phase should have saved
+	 the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
+      if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
+	  && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
+	{
+	  gcc_assert (vec_stmt);
+	  thisarginfo.linear_step
+	    = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
+	  thisarginfo.op
+	    = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
+	  /* If loop has been peeled for alignment, we need to adjust it.  */
+	  tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
+	  tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
+	  if (n1 != n2)
+	    {
+	      tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
+	      tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
+	      tree opt = TREE_TYPE (thisarginfo.op);
+	      bias = fold_convert (TREE_TYPE (step), bias);
+	      bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
+	      thisarginfo.op
+		= fold_build2 (POINTER_TYPE_P (opt)
+			       ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
+			       thisarginfo.op, bias);
+	    }
+	}
+      else if (!vec_stmt
+	       && thisarginfo.dt != vect_constant_def
+	       && thisarginfo.dt != vect_external_def
+	       && loop_vinfo
+	       && TREE_CODE (op) == SSA_NAME
+	       && simple_iv (loop, loop_containing_stmt (stmt), op,
+			     &iv, false)
+	       && tree_fits_shwi_p (iv.step))
 	{
 	  thisarginfo.linear_step = tree_to_shwi (iv.step);
 	  thisarginfo.op = iv.base;
@@ -2735,8 +2763,8 @@  vectorizable_simd_clone_call (gimple stm
 
   unsigned int badness = 0;
   struct cgraph_node *bestn = NULL;
-  if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info))
-    bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info));
+  if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
+    bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
   else
     for (struct cgraph_node *n = node->simd_clones; n != NULL;
 	 n = n->simdclone->next_clone)
@@ -2855,7 +2883,19 @@  vectorizable_simd_clone_call (gimple stm
 
   if (!vec_stmt) /* transformation not required.  */
     {
-      STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info) = bestn->decl;
+      STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
+      for (i = 0; i < nargs; i++)
+	if (bestn->simdclone->args[i].arg_type
+	    == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
+	  {
+	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
+									+ 1);
+	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
+	    tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
+		       ? size_type_node : TREE_TYPE (arginfo[i].op);
+	    tree ls = build_int_cst (lst, arginfo[i].linear_step);
+	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
+	  }
       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_NOTE, vect_location,
@@ -7479,6 +7519,7 @@  free_stmt_vec_info (gimple stmt)
     }
 
   STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
+  STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
   set_vinfo_for_stmt (stmt, NULL);
   free (stmt_info);
 }
--- gcc/testsuite/gcc.dg/vect/vect-simd-clone-13.c.jj	2014-11-26 15:42:26.162690785 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-13.c	2014-11-26 15:42:49.252278876 +0100
@@ -0,0 +1,7 @@ 
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd -fcommon" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#include "vect-simd-clone-6.c"
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.dg/vect/vect-simd-clone-14.c.jj	2014-11-26 15:43:09.522919202 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-14.c	2014-11-26 15:43:24.566652273 +0100
@@ -0,0 +1,7 @@ 
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd -fcommon" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#include "vect-simd-clone-11.c"
+
+/* { dg-final { cleanup-tree-dump "vect" } } */