Patchwork Handle vectorization of invariant loads (PR46787)

login
register
mail settings
Submitter Richard Guenther
Date June 29, 2011, 11:19 a.m.
Message ID <alpine.LNX.2.00.1106291310490.810@zhemvz.fhfr.qr>
Download mbox | patch
Permalink /patch/102565/
State New
Headers show

Comments

Richard Guenther - June 29, 2011, 11:19 a.m.
The following patch makes us handle invariant loads during vectorization.
Dependence analysis currently isn't clever enough to disambiguate them
thus we insert versioning-for-alias checks.  For the testcase hoisting
the load is still always possible though, and for a read-after-write
dependence it would be possible for the vectorized loop copy as the
may-aliasing write is varying by the scalar variable size.

The existing code for vectorizing invariant accesses looks very
suspicious - it generates a vector load at the scalar address
to then just extract the first vector element.  Huh.  IMHO this
can be simplified as done, by just re-using the scalar load result.
But maybe this code was supposed to deal with something entirely
Ira Rosen - June 29, 2011, 12:11 p.m.
Richard Guenther <rguenther@suse.de> wrote on 29/06/2011 02:19:40 PM:

>
> The following patch makes us handle invariant loads during vectorization.
> Dependence analysis currently isn't clever enough to disambiguate them
> thus we insert versioning-for-alias checks.  For the testcase hoisting
> the load is still always possible though, and for a read-after-write
> dependence it would be possible for the vectorized loop copy as the
> may-aliasing write is varying by the scalar variable size.
>
> The existing code for vectorizing invariant accesses looks very
> suspicious - it generates a vector load at the scalar address
> to then just extract the first vector element.  Huh.  IMHO this
> can be simplified as done, by just re-using the scalar load result.
> But maybe this code was supposed to deal with something entirely
> different?

This code was added to support outer loop vectorization:
http://gcc.gnu.org/ml/gcc-patches/2007-08/msg00729.html (with an intention
to be improved...). I think your version is fine for the outer loops as
well. But with this patch an unused vector load is still created (it will
probably be removed later though).

Ira

>
> This patch gives a 33% speedup to the phoronix himeno testcase
> if you bump the maximum alias versioning checks we want to insert.
>
> I'm currently re-bootstrapping & testing this but an earlier version
> was ok on x86_64-unknown-linux-gnu.
>
> 2011-06-29  Richard Guenther  <rguenther@suse.de>
>
>    PR tree-optimization/46787
>    * tree-data-ref.c (dr_address_invariant_p): Remove.
>    (find_data_references_in_stmt): Invariant accesses are ok now.
>    * tree-vect-stmts.c (vectorizable_load): Handle invariant
>    loads.
>    * tree-vect-data-refs.c (vect_analyze_data_ref_access): Allow
>    invariant loads.
>
>    * gcc.dg/vect/vect-121.c: New testcase.
>
> Index: gcc/tree-data-ref.c
> ===================================================================
> --- gcc/tree-data-ref.c   (revision 175531)
> +++ gcc/tree-data-ref.c   (working copy)
> @@ -919,21 +919,6 @@ dr_analyze_alias (struct data_reference
>      }
>  }
>
> -/* Returns true if the address of DR is invariant.  */
> -
> -static bool
> -dr_address_invariant_p (struct data_reference *dr)
> -{
> -  unsigned i;
> -  tree idx;
> -
> -  FOR_EACH_VEC_ELT (tree, DR_ACCESS_FNS (dr), i, idx)
> -    if (tree_contains_chrecs (idx, NULL))
> -      return false;
> -
> -  return true;
> -}
> -
>  /* Frees data reference DR.  */
>
>  void
> @@ -4228,19 +4213,6 @@ find_data_references_in_stmt (struct loo
>        dr = create_data_ref (nest, loop_containing_stmt (stmt),
>               *ref->pos, stmt, ref->is_read);
>        gcc_assert (dr != NULL);
> -
> -      /* FIXME -- data dependence analysis does not work correctly
> for objects
> -         with invariant addresses in loop nests.  Let us fail here until
the
> -    problem is fixed.  */
> -      if (dr_address_invariant_p (dr) && nest)
> -   {
> -     free_data_ref (dr);
> -     if (dump_file && (dump_flags & TDF_DETAILS))
> -       fprintf (dump_file, "\tFAILED as dr address is invariant\n");
> -     ret = false;
> -     break;
> -   }
> -
>        VEC_safe_push (data_reference_p, heap, *datarefs, dr);
>      }
>    VEC_free (data_ref_loc, heap, references);
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> --- gcc/tree-vect-stmts.c   (revision 175531)
> +++ gcc/tree-vect-stmts.c   (working copy)
> @@ -4076,7 +4076,8 @@ vectorizable_load (gimple stmt, gimple_s
>        && code != COMPONENT_REF
>        && code != IMAGPART_EXPR
>        && code != REALPART_EXPR
> -      && code != MEM_REF)
> +      && code != MEM_REF
> +      && TREE_CODE_CLASS (code) != tcc_declaration)
>      return false;
>
>    if (!STMT_VINFO_DATA_REF (stmt_info))
> @@ -4527,30 +4528,14 @@ vectorizable_load (gimple stmt, gimple_s
>           if (inv_p && !bb_vinfo)
>        {
>          gcc_assert (!strided_load);
> -        gcc_assert (nested_in_vect_loop_p (loop, stmt));
>          if (j == 0)
>            {
> -            int k;
> -            tree t = NULL_TREE;
> -            tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
> -
> -            /* CHECKME: bitpos depends on endianess?  */
> -            bitpos = bitsize_zero_node;
> -            vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
> -               bitsize, bitpos);
> -            vec_dest = vect_create_destination_var (scalar_dest,
> -                           NULL_TREE);
> -            new_stmt = gimple_build_assign (vec_dest, vec_inv);
> -            new_temp = make_ssa_name (vec_dest, new_stmt);
> -            gimple_assign_set_lhs (new_stmt, new_temp);
> -            vect_finish_stmt_generation (stmt, new_stmt, gsi);
> -
> -            for (k = nunits - 1; k >= 0; --k)
> -         t = tree_cons (NULL_TREE, new_temp, t);
> -            /* FIXME: use build_constructor directly.  */
> -            vec_inv = build_constructor_from_list (vectype, t);
> +            tree vec_inv;
> +            gimple_stmt_iterator gsi2 = *gsi;
> +            gsi_next (&gsi2);
> +            vec_inv = build_vector_from_val (vectype, scalar_dest);
>              new_temp = vect_init_vector (stmt, vec_inv,
> -                     vectype, gsi);
> +                     vectype, &gsi2);
>              new_stmt = SSA_NAME_DEF_STMT (new_temp);
>            }
>          else
> Index: gcc/tree-vect-data-refs.c
> ===================================================================
> --- gcc/tree-vect-data-refs.c   (revision 175531)
> +++ gcc/tree-vect-data-refs.c   (working copy)
> @@ -2302,9 +2302,9 @@ vect_analyze_data_ref_access (struct dat
>        return false;
>      }
>
> -  /* Don't allow invariant accesses in loops.  */
> +  /* Allow invariant loads in loops.  */
>    if (loop_vinfo && dr_step == 0)
> -    return false;
> +    return DR_IS_READ (dr);
>
>    if (loop && nested_in_vect_loop_p (loop, stmt))
>      {
> Index: gcc/testsuite/gcc.dg/vect/vect-121.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/vect/vect-121.c   (revision 0)
> +++ gcc/testsuite/gcc.dg/vect/vect-121.c   (revision 0)
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_float } */
> +
> +float *x;
> +float parm;
> +float
> +test (int start, int end)
> +{
> +  int i;
> +  for (i = start; i < end; ++i)
> +    {
> +      float tem = x[i];
> +      x[i] = parm * tem;
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
H.J. Lu - July 4, 2011, 1:28 p.m.
On Wed, Jun 29, 2011 at 4:19 AM, Richard Guenther <rguenther@suse.de> wrote:
>
> The following patch makes us handle invariant loads during vectorization.
> Dependence analysis currently isn't clever enough to disambiguate them
> thus we insert versioning-for-alias checks.  For the testcase hoisting
> the load is still always possible though, and for a read-after-write
> dependence it would be possible for the vectorized loop copy as the
> may-aliasing write is varying by the scalar variable size.
>
> The existing code for vectorizing invariant accesses looks very
> suspicious - it generates a vector load at the scalar address
> to then just extract the first vector element.  Huh.  IMHO this
> can be simplified as done, by just re-using the scalar load result.
> But maybe this code was supposed to deal with something entirely
> different?
>
> This patch gives a 33% speedup to the phoronix himeno testcase
> if you bump the maximum alias versioning checks we want to insert.
>
> I'm currently re-bootstrapping & testing this but an earlier version
> was ok on x86_64-unknown-linux-gnu.
>
> 2011-06-29  Richard Guenther  <rguenther@suse.de>
>
>        PR tree-optimization/46787
>        * tree-data-ref.c (dr_address_invariant_p): Remove.
>        (find_data_references_in_stmt): Invariant accesses are ok now.
>        * tree-vect-stmts.c (vectorizable_load): Handle invariant
>        loads.
>        * tree-vect-data-refs.c (vect_analyze_data_ref_access): Allow
>        invariant loads.
>
>        * gcc.dg/vect/vect-121.c: New testcase.
>

This also caused:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49628
H.J. Lu - July 29, 2011, 2:57 a.m.
On Mon, Jul 4, 2011 at 6:28 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Wed, Jun 29, 2011 at 4:19 AM, Richard Guenther <rguenther@suse.de> wrote:
>>
>> The following patch makes us handle invariant loads during vectorization.
>> Dependence analysis currently isn't clever enough to disambiguate them
>> thus we insert versioning-for-alias checks.  For the testcase hoisting
>> the load is still always possible though, and for a read-after-write
>> dependence it would be possible for the vectorized loop copy as the
>> may-aliasing write is varying by the scalar variable size.
>>
>> The existing code for vectorizing invariant accesses looks very
>> suspicious - it generates a vector load at the scalar address
>> to then just extract the first vector element.  Huh.  IMHO this
>> can be simplified as done, by just re-using the scalar load result.
>> But maybe this code was supposed to deal with something entirely
>> different?
>>
>> This patch gives a 33% speedup to the phoronix himeno testcase
>> if you bump the maximum alias versioning checks we want to insert.
>>
>> I'm currently re-bootstrapping & testing this but an earlier version
>> was ok on x86_64-unknown-linux-gnu.
>>
>> 2011-06-29  Richard Guenther  <rguenther@suse.de>
>>
>>        PR tree-optimization/46787
>>        * tree-data-ref.c (dr_address_invariant_p): Remove.
>>        (find_data_references_in_stmt): Invariant accesses are ok now.
>>        * tree-vect-stmts.c (vectorizable_load): Handle invariant
>>        loads.
>>        * tree-vect-data-refs.c (vect_analyze_data_ref_access): Allow
>>        invariant loads.
>>
>>        * gcc.dg/vect/vect-121.c: New testcase.
>>
>
> This also caused:
>
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49628
>
>

It also caused:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49893

Patch

different?

This patch gives a 33% speedup to the phoronix himeno testcase
if you bump the maximum alias versioning checks we want to insert.

I'm currently re-bootstrapping & testing this but an earlier version
was ok on x86_64-unknown-linux-gnu.

2011-06-29  Richard Guenther  <rguenther@suse.de>

	PR tree-optimization/46787
	* tree-data-ref.c (dr_address_invariant_p): Remove.
	(find_data_references_in_stmt): Invariant accesses are ok now.
	* tree-vect-stmts.c (vectorizable_load): Handle invariant
	loads.
	* tree-vect-data-refs.c (vect_analyze_data_ref_access): Allow
	invariant loads.

	* gcc.dg/vect/vect-121.c: New testcase.

Index: gcc/tree-data-ref.c
===================================================================
--- gcc/tree-data-ref.c	(revision 175531)
+++ gcc/tree-data-ref.c	(working copy)
@@ -919,21 +919,6 @@  dr_analyze_alias (struct data_reference
     }
 }
 
-/* Returns true if the address of DR is invariant.  */
-
-static bool
-dr_address_invariant_p (struct data_reference *dr)
-{
-  unsigned i;
-  tree idx;
-
-  FOR_EACH_VEC_ELT (tree, DR_ACCESS_FNS (dr), i, idx)
-    if (tree_contains_chrecs (idx, NULL))
-      return false;
-
-  return true;
-}
-
 /* Frees data reference DR.  */
 
 void
@@ -4228,19 +4213,6 @@  find_data_references_in_stmt (struct loo
       dr = create_data_ref (nest, loop_containing_stmt (stmt),
 			    *ref->pos, stmt, ref->is_read);
       gcc_assert (dr != NULL);
-
-      /* FIXME -- data dependence analysis does not work correctly for objects
-         with invariant addresses in loop nests.  Let us fail here until the
-	 problem is fixed.  */
-      if (dr_address_invariant_p (dr) && nest)
-	{
-	  free_data_ref (dr);
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "\tFAILED as dr address is invariant\n");
-	  ret = false;
-	  break;
-	}
-
       VEC_safe_push (data_reference_p, heap, *datarefs, dr);
     }
   VEC_free (data_ref_loc, heap, references);
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	(revision 175531)
+++ gcc/tree-vect-stmts.c	(working copy)
@@ -4076,7 +4076,8 @@  vectorizable_load (gimple stmt, gimple_s
       && code != COMPONENT_REF
       && code != IMAGPART_EXPR
       && code != REALPART_EXPR
-      && code != MEM_REF)
+      && code != MEM_REF
+      && TREE_CODE_CLASS (code) != tcc_declaration)
     return false;
 
   if (!STMT_VINFO_DATA_REF (stmt_info))
@@ -4527,30 +4528,14 @@  vectorizable_load (gimple stmt, gimple_s
 	      if (inv_p && !bb_vinfo)
 		{
 		  gcc_assert (!strided_load);
-		  gcc_assert (nested_in_vect_loop_p (loop, stmt));
 		  if (j == 0)
 		    {
-		      int k;
-		      tree t = NULL_TREE;
-		      tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
-
-		      /* CHECKME: bitpos depends on endianess?  */
-		      bitpos = bitsize_zero_node;
-		      vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
-					bitsize, bitpos);
-		      vec_dest = vect_create_destination_var (scalar_dest,
-							      NULL_TREE);
-		      new_stmt = gimple_build_assign (vec_dest, vec_inv);
-		      new_temp = make_ssa_name (vec_dest, new_stmt);
-		      gimple_assign_set_lhs (new_stmt, new_temp);
-		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
-
-		      for (k = nunits - 1; k >= 0; --k)
-			t = tree_cons (NULL_TREE, new_temp, t);
-		      /* FIXME: use build_constructor directly.  */
-		      vec_inv = build_constructor_from_list (vectype, t);
+		      tree vec_inv;
+		      gimple_stmt_iterator gsi2 = *gsi;
+		      gsi_next (&gsi2);
+		      vec_inv = build_vector_from_val (vectype, scalar_dest);
 		      new_temp = vect_init_vector (stmt, vec_inv,
-						   vectype, gsi);
+						   vectype, &gsi2);
 		      new_stmt = SSA_NAME_DEF_STMT (new_temp);
 		    }
 		  else
Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c	(revision 175531)
+++ gcc/tree-vect-data-refs.c	(working copy)
@@ -2302,9 +2302,9 @@  vect_analyze_data_ref_access (struct dat
       return false;
     }
 
-  /* Don't allow invariant accesses in loops.  */
+  /* Allow invariant loads in loops.  */
   if (loop_vinfo && dr_step == 0)
-    return false;
+    return DR_IS_READ (dr);
 
   if (loop && nested_in_vect_loop_p (loop, stmt))
     {
Index: gcc/testsuite/gcc.dg/vect/vect-121.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/vect-121.c	(revision 0)
+++ gcc/testsuite/gcc.dg/vect/vect-121.c	(revision 0)
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_float } */
+
+float *x;
+float parm;
+float
+test (int start, int end)
+{
+  int i;
+  for (i = start; i < end; ++i)
+    {
+      float tem = x[i];
+      x[i] = parm * tem;
+    }
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */