diff mbox

Fix non-INTEGER_CST step vectorization (PR tree-optimization/57741)

Message ID 20130628062629.GK2336@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek June 28, 2013, 6:26 a.m. UTC
Hi!

My recent patch introduced a regression as shown by the attached
testcases, there was nothing actually checking the type of the induction,
but because we allowed only INTEGER_CST steps that implied integral/pointer
types only.  With -ffast-math I don't see a reason why we can't handle
floating point types the same, so the patch adds type checks to
vect_is_simple_iv_evolution so that we won't be surprised by fixed point,
vector (and whatever else comes later) inductions, and handles the REAL_TYPE
steps in get_initial_def_for_induction.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2013-06-28  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/57741
	* tree-vect-loop.c (vect_is_simple_iv_evolution): Disallow
	non-INTEGRAL_TYPE_P non-SCALAR_FLOAT_TYPE_P SSA_NAME step_exprs,
	or SCALAR_FLOAT_TYPE_P SSA_NAMEs if !flag_associative_math.
	Allow REAL_CST step_exprs if flag_associative_math.
	(get_initial_def_for_induction): Handle SCALAR_FLOAT_TYPE_P step_expr.

	* gcc.dg/vect/pr57741-1.c: New test.
	* gcc.dg/vect/pr57741-2.c: New test.
	* gcc.dg/vect/pr57741-3.c: New test.


	Jakub

Comments

Richard Biener July 2, 2013, 11:21 a.m. UTC | #1
Jakub Jelinek <jakub@redhat.com> wrote:

>Hi!
>
>My recent patch introduced a regression as shown by the attached
>testcases, there was nothing actually checking the type of the
>induction,
>but because we allowed only INTEGER_CST steps that implied
>integral/pointer
>types only.  With -ffast-math I don't see a reason why we can't handle
>floating point types the same, so the patch adds type checks to
>vect_is_simple_iv_evolution so that we won't be surprised by fixed
>point,
>vector (and whatever else comes later) inductions, and handles the
>REAL_TYPE
>steps in get_initial_def_for_induction.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Thanks,
Richard.

>2013-06-28  Jakub Jelinek  <jakub@redhat.com>
>
>	PR tree-optimization/57741
>	* tree-vect-loop.c (vect_is_simple_iv_evolution): Disallow
>	non-INTEGRAL_TYPE_P non-SCALAR_FLOAT_TYPE_P SSA_NAME step_exprs,
>	or SCALAR_FLOAT_TYPE_P SSA_NAMEs if !flag_associative_math.
>	Allow REAL_CST step_exprs if flag_associative_math.
>	(get_initial_def_for_induction): Handle SCALAR_FLOAT_TYPE_P step_expr.
>
>	* gcc.dg/vect/pr57741-1.c: New test.
>	* gcc.dg/vect/pr57741-2.c: New test.
>	* gcc.dg/vect/pr57741-3.c: New test.
>
>--- gcc/tree-vect-loop.c.jj	2013-06-25 14:34:33.000000000 +0200
>+++ gcc/tree-vect-loop.c	2013-06-28 01:07:42.524553908 +0200
>@@ -538,7 +538,12 @@ vect_is_simple_iv_evolution (unsigned lo
>   if (TREE_CODE (step_expr) != INTEGER_CST
>       && (TREE_CODE (step_expr) != SSA_NAME
> 	  || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
>-	      && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))))
>+	      && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))
>+	  || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
>+	      && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
>+		  || !flag_associative_math)))
>+      && (TREE_CODE (step_expr) != REAL_CST
>+	  || !flag_associative_math))
>     {
>       if (dump_enabled_p ())
>         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>@@ -3276,7 +3281,13 @@ get_initial_def_for_induction (gimple iv
>     {
>       /* iv_loop is the loop to be vectorized. Generate:
> 	  vec_step = [VF*S, VF*S, VF*S, VF*S]  */
>-      expr = build_int_cst (TREE_TYPE (step_expr), vf);
>+      if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
>+	{
>+	  expr = build_int_cst (integer_type_node, vf);
>+	  expr = fold_convert (TREE_TYPE (step_expr), expr);
>+	}
>+      else
>+	expr = build_int_cst (TREE_TYPE (step_expr), vf);
>       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
> 			      expr, step_expr);
>       if (TREE_CODE (step_expr) == SSA_NAME)
>@@ -3339,7 +3350,13 @@ get_initial_def_for_induction (gimple iv
>       gcc_assert (!nested_in_vect_loop);
> 
>       /* Create the vector that holds the step of the induction.  */
>-      expr = build_int_cst (TREE_TYPE (step_expr), nunits);
>+      if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
>+	{
>+	  expr = build_int_cst (integer_type_node, nunits);
>+	  expr = fold_convert (TREE_TYPE (step_expr), expr);
>+	}
>+      else
>+	expr = build_int_cst (TREE_TYPE (step_expr), nunits);
>       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
> 			      expr, step_expr);
>       if (TREE_CODE (step_expr) == SSA_NAME)
>--- gcc/testsuite/gcc.dg/vect/pr57741-1.c.jj	2013-06-27
>23:29:07.322359740 +0200
>+++ gcc/testsuite/gcc.dg/vect/pr57741-1.c	2013-06-27 23:54:18.216533899
>+0200
>@@ -0,0 +1,21 @@
>+/* PR tree-optimization/57741 */
>+/* { dg-do compile } */
>+
>+void
>+foo (float *p, float *q, float x)
>+{
>+  int i;
>+  float f = 1.0f, g = 2.0f;
>+  for (i = 0; i < 1024; i++)
>+    {
>+      *p++ = f;
>+      f += x;
>+    }
>+  for (i = 0; i < 1024; i++)
>+    {
>+      *q++ = g;
>+      g += 0.5f;
>+    }
>+}
>+
>+/* { dg-final { cleanup-tree-dump "vect" } } */
>--- gcc/testsuite/gcc.dg/vect/pr57741-2.c.jj	2013-06-27
>23:44:40.846538237 +0200
>+++ gcc/testsuite/gcc.dg/vect/pr57741-2.c	2013-06-27 23:50:23.552498840
>+0200
>@@ -0,0 +1,44 @@
>+/* PR tree-optimization/57741 */
>+/* { dg-do run } */
>+/* { dg-additional-options "-ffast-math" } */
>+
>+#include "tree-vect.h"
>+
>+extern void abort (void);
>+
>+__attribute__((noinline, noclone)) void
>+foo (float *p, float *q, float x)
>+{
>+  int i;
>+  p = (float *) __builtin_assume_aligned (p, 32);
>+  q = (float *) __builtin_assume_aligned (q, 32);
>+  float f = 1.0f, g = 2.0f;
>+  for (i = 0; i < 1024; i++)
>+    {
>+      *p++ = f;
>+      f += x;
>+    }
>+  for (i = 0; i < 1024; i++)
>+    {
>+      *q++ = g;
>+      g += 0.5f;
>+    }
>+}
>+
>+float p[1024] __attribute__((aligned (32))) = { 17.0f };
>+float q[1024] __attribute__((aligned (32))) = { 17.0f };
>+
>+int
>+main ()
>+{
>+  int i;
>+  check_vect ();
>+  foo (p, q, 1.5f);
>+  for (i = 0; i < 1024; i++)
>+    if (p[i] != 1.0f + i * 1.5f || q[i] != 2.0f + i * 0.5f)
>+      abort ();
>+  return 0;
>+}
>+
>+/* { dg-final { scan-tree-dump-times "vectorized 2 loop" 1 "vect" } }
>*/
>+/* { dg-final { cleanup-tree-dump "vect" } } */
>--- gcc/testsuite/gcc.dg/vect/pr57741-3.c.jj	2013-06-28
>01:08:29.530334797 +0200
>+++ gcc/testsuite/gcc.dg/vect/pr57741-3.c	2013-06-28 01:11:31.365141459
>+0200
>@@ -0,0 +1,42 @@
>+/* PR tree-optimization/57741 */
>+/* { dg-do run } */
>+/* { dg-additional-options "-ffast-math" } */
>+
>+#include "tree-vect.h"
>+
>+extern void abort (void);
>+
>+float p[1024] __attribute__((aligned (32))) = { 17.0f };
>+float q[1024] __attribute__((aligned (32))) = { 17.0f };
>+char r[1024] __attribute__((aligned (32))) = { 1 };
>+
>+__attribute__((noinline, noclone)) void
>+foo (float x)
>+{
>+  int i;
>+  float f = 1.0f, g = 2.0f;
>+  for (i = 0; i < 1024; i++)
>+    {
>+      p[i] = f;
>+      f += x;
>+      q[i] = g;
>+      g += 0.5f;
>+      r[i]++;
>+    }
>+}
>+
>+int
>+main ()
>+{
>+  int i;
>+  check_vect ();
>+  r[0] = 0;
>+  foo (1.5f);
>+  for (i = 0; i < 1024; i++)
>+    if (p[i] != 1.0f + i * 1.5f || q[i] != 2.0f + i * 0.5f || r[i] !=
>1)
>+      abort ();
>+  return 0;
>+}
>+
>+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } }
>*/
>+/* { dg-final { cleanup-tree-dump "vect" } } */
>
>	Jakub
diff mbox

Patch

--- gcc/tree-vect-loop.c.jj	2013-06-25 14:34:33.000000000 +0200
+++ gcc/tree-vect-loop.c	2013-06-28 01:07:42.524553908 +0200
@@ -538,7 +538,12 @@  vect_is_simple_iv_evolution (unsigned lo
   if (TREE_CODE (step_expr) != INTEGER_CST
       && (TREE_CODE (step_expr) != SSA_NAME
 	  || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
-	      && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))))
+	      && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))
+	  || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
+	      && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
+		  || !flag_associative_math)))
+      && (TREE_CODE (step_expr) != REAL_CST
+	  || !flag_associative_math))
     {
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -3276,7 +3281,13 @@  get_initial_def_for_induction (gimple iv
     {
       /* iv_loop is the loop to be vectorized. Generate:
 	  vec_step = [VF*S, VF*S, VF*S, VF*S]  */
-      expr = build_int_cst (TREE_TYPE (step_expr), vf);
+      if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
+	{
+	  expr = build_int_cst (integer_type_node, vf);
+	  expr = fold_convert (TREE_TYPE (step_expr), expr);
+	}
+      else
+	expr = build_int_cst (TREE_TYPE (step_expr), vf);
       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
 			      expr, step_expr);
       if (TREE_CODE (step_expr) == SSA_NAME)
@@ -3339,7 +3350,13 @@  get_initial_def_for_induction (gimple iv
       gcc_assert (!nested_in_vect_loop);
 
       /* Create the vector that holds the step of the induction.  */
-      expr = build_int_cst (TREE_TYPE (step_expr), nunits);
+      if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
+	{
+	  expr = build_int_cst (integer_type_node, nunits);
+	  expr = fold_convert (TREE_TYPE (step_expr), expr);
+	}
+      else
+	expr = build_int_cst (TREE_TYPE (step_expr), nunits);
       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
 			      expr, step_expr);
       if (TREE_CODE (step_expr) == SSA_NAME)
--- gcc/testsuite/gcc.dg/vect/pr57741-1.c.jj	2013-06-27 23:29:07.322359740 +0200
+++ gcc/testsuite/gcc.dg/vect/pr57741-1.c	2013-06-27 23:54:18.216533899 +0200
@@ -0,0 +1,21 @@ 
+/* PR tree-optimization/57741 */
+/* { dg-do compile } */
+
+void
+foo (float *p, float *q, float x)
+{
+  int i;
+  float f = 1.0f, g = 2.0f;
+  for (i = 0; i < 1024; i++)
+    {
+      *p++ = f;
+      f += x;
+    }
+  for (i = 0; i < 1024; i++)
+    {
+      *q++ = g;
+      g += 0.5f;
+    }
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.dg/vect/pr57741-2.c.jj	2013-06-27 23:44:40.846538237 +0200
+++ gcc/testsuite/gcc.dg/vect/pr57741-2.c	2013-06-27 23:50:23.552498840 +0200
@@ -0,0 +1,44 @@ 
+/* PR tree-optimization/57741 */
+/* { dg-do run } */
+/* { dg-additional-options "-ffast-math" } */
+
+#include "tree-vect.h"
+
+extern void abort (void);
+
+__attribute__((noinline, noclone)) void
+foo (float *p, float *q, float x)
+{
+  int i;
+  p = (float *) __builtin_assume_aligned (p, 32);
+  q = (float *) __builtin_assume_aligned (q, 32);
+  float f = 1.0f, g = 2.0f;
+  for (i = 0; i < 1024; i++)
+    {
+      *p++ = f;
+      f += x;
+    }
+  for (i = 0; i < 1024; i++)
+    {
+      *q++ = g;
+      g += 0.5f;
+    }
+}
+
+float p[1024] __attribute__((aligned (32))) = { 17.0f };
+float q[1024] __attribute__((aligned (32))) = { 17.0f };
+
+int
+main ()
+{
+  int i;
+  check_vect ();
+  foo (p, q, 1.5f);
+  for (i = 0; i < 1024; i++)
+    if (p[i] != 1.0f + i * 1.5f || q[i] != 2.0f + i * 0.5f)
+      abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 2 loop" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.dg/vect/pr57741-3.c.jj	2013-06-28 01:08:29.530334797 +0200
+++ gcc/testsuite/gcc.dg/vect/pr57741-3.c	2013-06-28 01:11:31.365141459 +0200
@@ -0,0 +1,42 @@ 
+/* PR tree-optimization/57741 */
+/* { dg-do run } */
+/* { dg-additional-options "-ffast-math" } */
+
+#include "tree-vect.h"
+
+extern void abort (void);
+
+float p[1024] __attribute__((aligned (32))) = { 17.0f };
+float q[1024] __attribute__((aligned (32))) = { 17.0f };
+char r[1024] __attribute__((aligned (32))) = { 1 };
+
+__attribute__((noinline, noclone)) void
+foo (float x)
+{
+  int i;
+  float f = 1.0f, g = 2.0f;
+  for (i = 0; i < 1024; i++)
+    {
+      p[i] = f;
+      f += x;
+      q[i] = g;
+      g += 0.5f;
+      r[i]++;
+    }
+}
+
+int
+main ()
+{
+  int i;
+  check_vect ();
+  r[0] = 0;
+  foo (1.5f);
+  for (i = 0; i < 1024; i++)
+    if (p[i] != 1.0f + i * 1.5f || q[i] != 2.0f + i * 0.5f || r[i] != 1)
+      abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */