Patchwork Allow vectorization of IVs with non-constant step (PR tree-optimization/57705)

login
register
mail settings
Submitter Jakub Jelinek
Date June 25, 2013, 11:56 a.m.
Message ID <20130625115629.GN2336@tucnak.redhat.com>
Download mbox | patch
Permalink /patch/254129/
State New
Headers show

Comments

Jakub Jelinek - June 25, 2013, 11:56 a.m.
Hi!

While hacking on the gomp4 simd clauses stuff, I've noticed that
we don't vectorize IVs with non-constant step.  The following patch
implements it, basically we just can't rely that the step or VF * step
is a constant and instead for VF * step need to emit a temporary with
scalar VF * step value before adding a vector temporary.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

I wasn't sure about nested loop vectorization, so I've disabled it for that
case, perhaps with a testcase that exhibits it could be replaced just with a
test that the step, if SSA_NAME, doesn't have definition even in the outer
loop, because we still put the vector constants before the outer loop.

2013-06-25  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/57705
	* tree-vect-loop.c (vect_is_simple_iv_evolution): Allow
	SSA_NAME step, provided that it is not defined inside the loop.
	(vect_analyze_scalar_cycles_1): Disallow SSA_NAME step in nested
	loop.
	(get_initial_def_for_induction): Handle SSA_NAME IV step.

	* gcc.dg/vect/pr57705.c: New test.
	* gcc.dg/vect/vect-iv-7.c: Add noclone attribute, remove xfail.


	Jakub
Richard Guenther - June 25, 2013, 12:33 p.m.
Jakub Jelinek <jakub@redhat.com> wrote:

>Hi!
>
>While hacking on the gomp4 simd clauses stuff, I've noticed that
>we don't vectorize IVs with non-constant step.  The following patch
>implements it, basically we just can't rely that the step or VF * step
>is a constant and instead for VF * step need to emit a temporary with
>scalar VF * step value before adding a vector temporary.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Thanks,
Richard.

>I wasn't sure about nested loop vectorization, so I've disabled it for
>that
>case, perhaps with a testcase that exhibits it could be replaced just
>with a
>test that the step, if SSA_NAME, doesn't have definition even in the
>outer
>loop, because we still put the vector constants before the outer loop.
>
>2013-06-25  Jakub Jelinek  <jakub@redhat.com>
>
>	PR tree-optimization/57705
>	* tree-vect-loop.c (vect_is_simple_iv_evolution): Allow
>	SSA_NAME step, provided that it is not defined inside the loop.
>	(vect_analyze_scalar_cycles_1): Disallow SSA_NAME step in nested
>	loop.
>	(get_initial_def_for_induction): Handle SSA_NAME IV step.
>
>	* gcc.dg/vect/pr57705.c: New test.
>	* gcc.dg/vect/vect-iv-7.c: Add noclone attribute, remove xfail.
>
>--- gcc/tree-vect-loop.c.jj	2013-06-06 08:39:24.000000000 +0200
>+++ gcc/tree-vect-loop.c	2013-06-25 11:17:02.947602981 +0200
>@@ -500,7 +500,7 @@ vect_determine_vectorization_factor (loo
> /* Function vect_is_simple_iv_evolution.
> 
>    FORNOW: A simple evolution of an induction variables in the loop is
>-   considered a polynomial evolution with constant step.  */
>+   considered a polynomial evolution.  */
> 
> static bool
>vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree *
>init,
>@@ -509,6 +509,7 @@ vect_is_simple_iv_evolution (unsigned lo
>   tree init_expr;
>   tree step_expr;
> tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
>+  basic_block bb;
> 
>   /* When there is no evolution in this loop, the evolution function
>      is not "simple".  */
>@@ -534,7 +535,10 @@ vect_is_simple_iv_evolution (unsigned lo
>   *init = init_expr;
>   *step = step_expr;
> 
>-  if (TREE_CODE (step_expr) != INTEGER_CST)
>+  if (TREE_CODE (step_expr) != INTEGER_CST
>+      && (TREE_CODE (step_expr) != SSA_NAME
>+	  || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
>+	      && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))))
>     {
>       if (dump_enabled_p ())
>         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>@@ -556,7 +560,7 @@ static void
>vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop
>*loop)
> {
>   basic_block bb = loop->header;
>-  tree dumy;
>+  tree init, step;
>   vec<gimple> worklist;
>   worklist.create (64);
>   gimple_stmt_iterator gsi;
>@@ -605,7 +609,9 @@ vect_analyze_scalar_cycles_1 (loop_vec_i
> 	}
> 
>       if (!access_fn
>-	  || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy,
>&dumy))
>+	  || !vect_is_simple_iv_evolution (loop->num, access_fn, &init,
>&step)
>+	  || (LOOP_VINFO_LOOP (loop_vinfo) != loop
>+	      && TREE_CODE (step) != INTEGER_CST))
> 	{
> 	  worklist.safe_push (phi);
> 	  continue;
>@@ -3273,10 +3279,14 @@ get_initial_def_for_induction (gimple iv
>       expr = build_int_cst (TREE_TYPE (step_expr), vf);
>       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
> 			      expr, step_expr);
>+      if (TREE_CODE (step_expr) == SSA_NAME)
>+	new_name = vect_init_vector (iv_phi, new_name,
>+				     TREE_TYPE (step_expr), NULL);
>     }
> 
>   t = unshare_expr (new_name);
>-  gcc_assert (CONSTANT_CLASS_P (new_name));
>+  gcc_assert (CONSTANT_CLASS_P (new_name)
>+	      || TREE_CODE (new_name) == SSA_NAME);
>   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
>   gcc_assert (stepvectype);
>   new_vec = build_vector_from_val (stepvectype, t);
>@@ -3332,8 +3342,12 @@ get_initial_def_for_induction (gimple iv
>       expr = build_int_cst (TREE_TYPE (step_expr), nunits);
>       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
> 			      expr, step_expr);
>+      if (TREE_CODE (step_expr) == SSA_NAME)
>+	new_name = vect_init_vector (iv_phi, new_name,
>+				     TREE_TYPE (step_expr), NULL);
>       t = unshare_expr (new_name);
>-      gcc_assert (CONSTANT_CLASS_P (new_name));
>+      gcc_assert (CONSTANT_CLASS_P (new_name)
>+		  || TREE_CODE (new_name) == SSA_NAME);
>       new_vec = build_vector_from_val (stepvectype, t);
>      vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
> 
>--- gcc/testsuite/gcc.dg/vect/pr57705.c.jj	2013-06-25
>10:15:21.037092153 +0200
>+++ gcc/testsuite/gcc.dg/vect/pr57705.c	2013-06-25 10:15:02.239733689
>+0200
>@@ -0,0 +1,65 @@
>+/* { dg-do run } */
>+
>+#include "tree-vect.h"
>+
>+int a[1024];
>+unsigned char b[1024];
>+
>+extern void abort (void);
>+
>+__attribute__((noinline, noclone)) void
>+foo (int k, int m)
>+{
>+  int i, k2 = k;
>+  for (i = 0; i < 1024; i++)
>+    {
>+      a[i] = k2;
>+      k2 += m + 1;
>+    }
>+}
>+
>+__attribute__((noinline, noclone)) void
>+bar (int k, int m)
>+{
>+  int i, k2 = k;
>+  for (i = 0; i < 1024; i++)
>+    {
>+      k2 += m + 1;
>+      a[i] = k2;
>+    }
>+}
>+
>+__attribute__((noinline, noclone)) void
>+baz (int k, int m)
>+{
>+  int i, k2 = k;
>+  for (i = 0; i < 1024; i++)
>+    {
>+      a[i] = k2;
>+      b[i] = i;
>+      k2 += m + 1;
>+    }
>+}
>+
>+int
>+main ()
>+{
>+  int i;
>+  check_vect ();
>+  foo (5, 3);
>+  for (i = 0; i < 1024; i++)
>+    if (a[i] != 5 + 4 * i)
>+      abort ();
>+  bar (5, 3);
>+  for (i = 0; i < 1024; i++)
>+    if (a[i] != 9 + 4 * i)
>+      abort ();
>+  baz (5, 3);
>+  for (i = 0; i < 1024; i++)
>+    if (a[i] != 5 + 4 * i || b[i] != (unsigned char) i)
>+      abort ();
>+  return 0;
>+}
>+
>+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 3 "vect" } }
>*/
>+/* { dg-final { cleanup-tree-dump "vect" } } */
>--- gcc/testsuite/gcc.dg/vect/vect-iv-7.c.jj	2008-09-05
>12:54:35.000000000 +0200
>+++ gcc/testsuite/gcc.dg/vect/vect-iv-7.c	2013-06-25 10:37:35.864287369
>+0200
>@@ -6,7 +6,7 @@
> #define N 16
>int result[N] = {8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34,
>36, 38};
>  
>-__attribute__ ((noinline)) int main1 (int X)
>+__attribute__ ((noinline, noclone)) int main1 (int X)
> {  
>   int arr[N];
>   int k = 3;
>@@ -38,5 +38,5 @@ int main (void)
>   return main1 (2);
> } 
> 
>-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
>xfail *-*-* } } } */
>+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } }
>*/
> /* { dg-final { cleanup-tree-dump "vect" } } */
>
>	Jakub

Patch

--- gcc/tree-vect-loop.c.jj	2013-06-06 08:39:24.000000000 +0200
+++ gcc/tree-vect-loop.c	2013-06-25 11:17:02.947602981 +0200
@@ -500,7 +500,7 @@  vect_determine_vectorization_factor (loo
 /* Function vect_is_simple_iv_evolution.
 
    FORNOW: A simple evolution of an induction variables in the loop is
-   considered a polynomial evolution with constant step.  */
+   considered a polynomial evolution.  */
 
 static bool
 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
@@ -509,6 +509,7 @@  vect_is_simple_iv_evolution (unsigned lo
   tree init_expr;
   tree step_expr;
   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
+  basic_block bb;
 
   /* When there is no evolution in this loop, the evolution function
      is not "simple".  */
@@ -534,7 +535,10 @@  vect_is_simple_iv_evolution (unsigned lo
   *init = init_expr;
   *step = step_expr;
 
-  if (TREE_CODE (step_expr) != INTEGER_CST)
+  if (TREE_CODE (step_expr) != INTEGER_CST
+      && (TREE_CODE (step_expr) != SSA_NAME
+	  || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
+	      && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))))
     {
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -556,7 +560,7 @@  static void
 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 {
   basic_block bb = loop->header;
-  tree dumy;
+  tree init, step;
   vec<gimple> worklist;
   worklist.create (64);
   gimple_stmt_iterator gsi;
@@ -605,7 +609,9 @@  vect_analyze_scalar_cycles_1 (loop_vec_i
 	}
 
       if (!access_fn
-	  || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
+	  || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
+	  || (LOOP_VINFO_LOOP (loop_vinfo) != loop
+	      && TREE_CODE (step) != INTEGER_CST))
 	{
 	  worklist.safe_push (phi);
 	  continue;
@@ -3273,10 +3279,14 @@  get_initial_def_for_induction (gimple iv
       expr = build_int_cst (TREE_TYPE (step_expr), vf);
       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
 			      expr, step_expr);
+      if (TREE_CODE (step_expr) == SSA_NAME)
+	new_name = vect_init_vector (iv_phi, new_name,
+				     TREE_TYPE (step_expr), NULL);
     }
 
   t = unshare_expr (new_name);
-  gcc_assert (CONSTANT_CLASS_P (new_name));
+  gcc_assert (CONSTANT_CLASS_P (new_name)
+	      || TREE_CODE (new_name) == SSA_NAME);
   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
   gcc_assert (stepvectype);
   new_vec = build_vector_from_val (stepvectype, t);
@@ -3332,8 +3342,12 @@  get_initial_def_for_induction (gimple iv
       expr = build_int_cst (TREE_TYPE (step_expr), nunits);
       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
 			      expr, step_expr);
+      if (TREE_CODE (step_expr) == SSA_NAME)
+	new_name = vect_init_vector (iv_phi, new_name,
+				     TREE_TYPE (step_expr), NULL);
       t = unshare_expr (new_name);
-      gcc_assert (CONSTANT_CLASS_P (new_name));
+      gcc_assert (CONSTANT_CLASS_P (new_name)
+		  || TREE_CODE (new_name) == SSA_NAME);
       new_vec = build_vector_from_val (stepvectype, t);
       vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
 
--- gcc/testsuite/gcc.dg/vect/pr57705.c.jj	2013-06-25 10:15:21.037092153 +0200
+++ gcc/testsuite/gcc.dg/vect/pr57705.c	2013-06-25 10:15:02.239733689 +0200
@@ -0,0 +1,65 @@ 
+/* { dg-do run } */
+
+#include "tree-vect.h"
+
+int a[1024];
+unsigned char b[1024];
+
+extern void abort (void);
+
+__attribute__((noinline, noclone)) void
+foo (int k, int m)
+{
+  int i, k2 = k;
+  for (i = 0; i < 1024; i++)
+    {
+      a[i] = k2;
+      k2 += m + 1;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+bar (int k, int m)
+{
+  int i, k2 = k;
+  for (i = 0; i < 1024; i++)
+    {
+      k2 += m + 1;
+      a[i] = k2;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+baz (int k, int m)
+{
+  int i, k2 = k;
+  for (i = 0; i < 1024; i++)
+    {
+      a[i] = k2;
+      b[i] = i;
+      k2 += m + 1;
+    }
+}
+
+int
+main ()
+{
+  int i;
+  check_vect ();
+  foo (5, 3);
+  for (i = 0; i < 1024; i++)
+    if (a[i] != 5 + 4 * i)
+      abort ();
+  bar (5, 3);
+  for (i = 0; i < 1024; i++)
+    if (a[i] != 9 + 4 * i)
+      abort ();
+  baz (5, 3);
+  for (i = 0; i < 1024; i++)
+    if (a[i] != 5 + 4 * i || b[i] != (unsigned char) i)
+      abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 3 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.dg/vect/vect-iv-7.c.jj	2008-09-05 12:54:35.000000000 +0200
+++ gcc/testsuite/gcc.dg/vect/vect-iv-7.c	2013-06-25 10:37:35.864287369 +0200
@@ -6,7 +6,7 @@ 
 #define N 16
 int result[N] = {8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38};
  
-__attribute__ ((noinline)) int main1 (int X)
+__attribute__ ((noinline, noclone)) int main1 (int X)
 {  
   int arr[N];
   int k = 3;
@@ -38,5 +38,5 @@  int main (void)
   return main1 (2);
 } 
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */