Patchwork [gomp4] Basic vectorization enablement for #pragma omp simd

login
register
mail settings
Submitter Jakub Jelinek
Date May 14, 2013, 1:36 p.m.
Message ID <20130514133650.GQ1377@tucnak.redhat.com>
Download mbox | patch
Permalink /patch/243706/
State New
Headers show

Comments

Jakub Jelinek - May 14, 2013, 1:36 p.m.
On Tue, May 14, 2013 at 12:16:07PM +0200, Richard Biener wrote:
> Works for me.

...

Ok, here is what I've committed to gomp-4_0-branch.
tree-vect-data-refs.c was kept (almost) unchanged, as per IRC discussion,
something ++todo for the future.

2013-05-14  Jakub Jelinek  <jakub@redhat.com>

	* cfgloop.h (struct loop): Add safelen and force_vect fields.
	* function.h (struct function): Add has_force_vect_loops field.
	* omp-low.c (expand_omp_simd): If !broken_loop, create loop for
	the simd region and set safelen and force_vect fields in it.
	* tree-vectorizer.c (vectorize_loops): If loop has force_vect set,
	vectorize it even if flag_vectorize isn't set.  Clear loop->force_vect
	after vectorization.
	* tree-ssa-loop.c (gate_tree_vectorize): Return true even
	cfun->has_force_vect_loops.
	* tree-ssa-loop-ivcanon.c (tree_unroll_loops_completely_1): Don't
	unroll loops with loop->force_vect.
	* tree-vect-data-refs.c (vect_analyze_data_ref_dependence): For
	unknown or bad data dependency, if loop->safelen is non-zero, just
	decrease *max_vf to loop->safelen if needed and return false.
	* tree-if-conv.c (main_tree_if_conversion): If-convert also loops with
	loop->force_vect.
	(gate_tree_if_conversion): Return true even if
	cfun->has_force_vect_loops.



	Jakub

Patch

--- gcc/cfgloop.h.jj	2013-05-13 16:49:44.000000000 +0200
+++ gcc/cfgloop.h	2013-05-14 13:59:47.179036079 +0200
@@ -168,6 +168,15 @@  struct GTY ((chain_next ("%h.next"))) lo
      describes what is the state of the estimation.  */
   enum loop_estimation estimate_state;
 
+  /* If > 0, an integer, where the user asserted that for any
+     I in [ 0, nb_iterations ) and for any J in
+     [ I, min ( I + safelen, nb_iterations ) ), the Ith and Jth iterations
+     of the loop can be safely evaluated concurrently.  */
+  int safelen;
+
+  /* True if we should try harder to vectorize this loop.  */
+  bool force_vect;
+
   /* Upper bound on number of iterations of a loop.  */
   struct nb_iter_bound *bounds;
 
--- gcc/function.h.jj	2013-05-13 16:49:03.000000000 +0200
+++ gcc/function.h	2013-05-14 14:06:31.102720074 +0200
@@ -641,6 +641,10 @@  struct GTY(()) function {
      adjusts one of its arguments and forwards to another
      function.  */
   unsigned int is_thunk : 1;
+
+  /* Nonzero if the current function contains any loops with
+     loop->force_vect set.  */
+  unsigned int has_force_vect_loops : 1;
 };
 
 /* Add the decl D to the local_decls list of FUN.  */
--- gcc/omp-low.c.jj	2013-05-13 16:37:05.000000000 +0200
+++ gcc/omp-low.c	2013-05-14 14:54:43.154188242 +0200
@@ -4960,6 +4960,8 @@  expand_omp_simd (struct omp_region *regi
   edge e, ne;
   tree *counts = NULL;
   int i;
+  tree safelen = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
+				  OMP_CLAUSE_SAFELEN);
 
   type = TREE_TYPE (fd->loop.v);
   entry_bb = region->entry;
@@ -5157,6 +5159,34 @@  expand_omp_simd (struct omp_region *regi
   set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
   set_immediate_dominator (CDI_DOMINATORS, l2_bb, l1_bb);
   set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
+
+  if (!broken_loop)
+    {
+      struct loop *loop = alloc_loop ();
+      loop->header = l1_bb;
+      loop->latch = e->dest;
+      add_loop (loop, l1_bb->loop_father);
+      if (safelen == NULL_TREE)
+	loop->safelen = INT_MAX;
+      else
+	{
+	  safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
+	  if (!host_integerp (safelen, 1)
+	      || (unsigned HOST_WIDE_INT) tree_low_cst (safelen, 1)
+		 > INT_MAX)
+	    loop->safelen = INT_MAX;
+	  else
+	    loop->safelen = tree_low_cst (safelen, 1);
+	}
+      /* If not -fno-tree-vectorize, hint that we want to vectorize
+	 the loop.  */
+      if (flag_tree_vectorize
+	  || !global_options_set.x_flag_tree_vectorize)
+	{
+	  loop->force_vect = true;
+	  cfun->has_force_vect_loops = true;
+	}
+    }
 }
 
 
--- gcc/tree-vectorizer.c.jj	2013-05-13 16:49:03.000000000 +0200
+++ gcc/tree-vectorizer.c	2013-05-14 14:13:43.434236251 +0200
@@ -101,7 +101,8 @@  vectorize_loops (void)
      than all previously defined loops.  This fact allows us to run
      only over initial loops skipping newly generated ones.  */
   FOR_EACH_LOOP (li, loop, 0)
-    if (optimize_loop_nest_for_speed_p (loop))
+    if ((flag_tree_vectorize && optimize_loop_nest_for_speed_p (loop))
+	|| loop->force_vect)
       {
 	loop_vec_info loop_vinfo;
 	vect_location = find_loop_location (loop);
@@ -122,6 +123,9 @@  vectorize_loops (void)
                        LOC_FILE (vect_location), LOC_LINE (vect_location));
 	vect_transform_loop (loop_vinfo);
 	num_vectorized_loops++;
+	/* Now that the loop has been vectorized, allow it to be unrolled
+	   etc.  */
+	loop->force_vect = false;
       }
 
   vect_location = UNKNOWN_LOC;
--- gcc/tree-ssa-loop.c.jj	2013-05-13 16:46:36.000000000 +0200
+++ gcc/tree-ssa-loop.c	2013-05-14 14:14:27.320984029 +0200
@@ -225,7 +225,7 @@  tree_vectorize (void)
 static bool
 gate_tree_vectorize (void)
 {
-  return flag_tree_vectorize;
+  return flag_tree_vectorize || cfun->has_force_vect_loops;
 }
 
 struct gimple_opt_pass pass_vectorize =
--- gcc/tree-ssa-loop-ivcanon.c.jj	2013-05-13 16:46:36.000000000 +0200
+++ gcc/tree-ssa-loop-ivcanon.c	2013-05-14 14:14:07.088100214 +0200
@@ -1123,6 +1123,11 @@  tree_unroll_loops_completely_1 (bool may
   if (changed)
     return true;
 
+  /* Don't unroll #pragma omp simd loops until the vectorizer
+     attempts to vectorize those.  */
+  if (loop->force_vect)
+    return false;
+
   /* Try to unroll this loop.  */
   loop_father = loop_outer (loop);
   if (!loop_father)
--- gcc/tree-vect-data-refs.c.jj	2013-05-13 16:49:08.000000000 +0200
+++ gcc/tree-vect-data-refs.c	2013-05-14 14:26:06.972967958 +0200
@@ -255,6 +255,15 @@  vect_analyze_data_ref_dependence (struct
   /* Unknown data dependence.  */
   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
     {
+      /* If user asserted safelen consecutive iterations can be
+	 executed concurrently, assume independence.  */
+      if (loop->safelen >= 2)
+	{
+	  if (loop->safelen < *max_vf)
+	    *max_vf = loop->safelen;
+	  return false;
+	}
+
       if (STMT_VINFO_GATHER_P (stmtinfo_a)
 	  || STMT_VINFO_GATHER_P (stmtinfo_b))
 	{
@@ -291,6 +300,15 @@  vect_analyze_data_ref_dependence (struct
   /* Known data dependence.  */
   if (DDR_NUM_DIST_VECTS (ddr) == 0)
     {
+      /* If user asserted safelen consecutive iterations can be
+	 executed concurrently, assume independence.  */
+      if (loop->safelen >= 2)
+	{
+	  if (loop->safelen < *max_vf)
+	    *max_vf = loop->safelen;
+	  return false;
+	}
+
       if (STMT_VINFO_GATHER_P (stmtinfo_a)
 	  || STMT_VINFO_GATHER_P (stmtinfo_b))
 	{
--- gcc/tree-if-conv.c.jj	2013-05-13 16:49:06.000000000 +0200
+++ gcc/tree-if-conv.c	2013-05-14 14:38:16.928803349 +0200
@@ -1822,6 +1822,10 @@  main_tree_if_conversion (void)
     return 0;
 
   FOR_EACH_LOOP (li, loop, 0)
+    if (flag_tree_loop_if_convert == 1
+	|| flag_tree_loop_if_convert_stores == 1
+	|| flag_tree_vectorize
+	|| loop->force_vect)
     changed |= tree_if_conversion (loop);
 
   if (changed)
@@ -1848,7 +1852,8 @@  main_tree_if_conversion (void)
 static bool
 gate_tree_if_conversion (void)
 {
-  return ((flag_tree_vectorize && flag_tree_loop_if_convert != 0)
+  return (((flag_tree_vectorize || cfun->has_force_vect_loops)
+	   && flag_tree_loop_if_convert != 0)
 	  || flag_tree_loop_if_convert == 1
 	  || flag_tree_loop_if_convert_stores == 1);
 }