diff mbox series

Handle LOOP_DIST_ALIAS ifns in move_sese_region_to_fn (PR tree-optimization/83359)

Message ID 20171211211051.GR2353@tucnak
State New
Headers show
Series Handle LOOP_DIST_ALIAS ifns in move_sese_region_to_fn (PR tree-optimization/83359) | expand

Commit Message

Jakub Jelinek Dec. 11, 2017, 9:10 p.m. UTC
Hi!

Unlike LOOP_VECTORIZED ifns, LOOP_DIST_ALIAS is added by the ldist pass
and needs to be maintained until the vectorizer, and parloops in between
that.  Earlier I've added code to update or drop orig_loop_num during
move_sese_region_to_fn, but that is not sufficient.  If we move
the whole pair of loops with the associated LOOP_DIST_ALIAS call into
the outlined loopfn, we need to update the first argument, as orig_loop_num
is likely changing.  If the whole triplet (two loops with orig_loop_num
and LOOP_DIST_ALIAS with the same first argument) stays in parent function,
we don't need to adjust it.  In all other cases, this patch folds the
LOOP_DIST_ALIAS ifn to the second argument, like the vectorizer does if
it fails to vectorize it.

Bootstrapped/regtested on x86_64-linux, i686-linux, powerpc64le-linux,
bootstrapped on powerpc64-linux, regtest there pending.  Ok for trunk?

2017-12-11  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/83359
	* tree-cfg.h (fold_loop_internal_call): Declare.
	* tree-vectorizer.c (fold_loop_internal_call): Moved to ...
	* tree-cfg.c (fold_loop_internal_call): ... here.  No longer static.
	(find_loop_dist_alias): New function.
	(move_sese_region_to_fn): If any dloop->orig_loop_num value is
	updated, also adjust any corresponding LOOP_DIST_ALIAS internal
	calls.

	* gcc.dg/graphite/pr83359.c: New test.


	Jakub

Comments

Richard Biener Dec. 12, 2017, 9:15 a.m. UTC | #1
On Mon, 11 Dec 2017, Jakub Jelinek wrote:

> Hi!
> 
> Unlike LOOP_VECTORIZED ifns, LOOP_DIST_ALIAS is added by the ldist pass
> and needs to be maintained until the vectorizer, and parloops in between
> that.  Earlier I've added code to update or drop orig_loop_num during
> move_sese_region_to_fn, but that is not sufficient.  If we move
> the whole pair of loops with the associated LOOP_DIST_ALIAS call into
> the outlined loopfn, we need to update the first argument, as orig_loop_num
> is likely changing.  If the whole triplet (two loops with orig_loop_num
> and LOOP_DIST_ALIAS with the same first argument) stays in parent function,
> we don't need to adjust it.  In all other cases, this patch folds the
> LOOP_DIST_ALIAS ifn to the second argument, like the vectorizer does if
> it fails to vectorize it.
> 
> Bootstrapped/regtested on x86_64-linux, i686-linux, powerpc64le-linux,
> bootstrapped on powerpc64-linux, regtest there pending.  Ok for trunk?

Ok.

Thanks,
Richard.

> 2017-12-11  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/83359
> 	* tree-cfg.h (fold_loop_internal_call): Declare.
> 	* tree-vectorizer.c (fold_loop_internal_call): Moved to ...
> 	* tree-cfg.c (fold_loop_internal_call): ... here.  No longer static.
> 	(find_loop_dist_alias): New function.
> 	(move_sese_region_to_fn): If any dloop->orig_loop_num value is
> 	updated, also adjust any corresponding LOOP_DIST_ALIAS internal
> 	calls.
> 
> 	* gcc.dg/graphite/pr83359.c: New test.
> 
> --- gcc/tree-cfg.h.jj	2017-09-05 23:28:14.000000000 +0200
> +++ gcc/tree-cfg.h	2017-12-11 12:35:24.284777550 +0100
> @@ -77,6 +77,7 @@ extern void gather_blocks_in_sese_region
>  					  vec<basic_block> *bbs_p);
>  extern void verify_sese (basic_block, basic_block, vec<basic_block> *);
>  extern bool gather_ssa_name_hash_map_from (tree const &, tree const &, void *);
> +extern void fold_loop_internal_call (gimple *, tree);
>  extern basic_block move_sese_region_to_fn (struct function *, basic_block,
>  				           basic_block, tree);
>  extern void dump_function_to_file (tree, FILE *, dump_flags_t);
> --- gcc/tree-vectorizer.c.jj	2017-09-01 09:26:37.000000000 +0200
> +++ gcc/tree-vectorizer.c	2017-12-11 12:33:41.436055580 +0100
> @@ -464,27 +464,6 @@ vect_loop_vectorized_call (struct loop *
>    return NULL;
>  }
>  
> -/* Fold loop internal call G like IFN_LOOP_VECTORIZED/IFN_LOOP_DIST_ALIAS
> -   to VALUE and update any immediate uses of it's LHS.  */
> -
> -static void
> -fold_loop_internal_call (gimple *g, tree value)
> -{
> -  tree lhs = gimple_call_lhs (g);
> -  use_operand_p use_p;
> -  imm_use_iterator iter;
> -  gimple *use_stmt;
> -  gimple_stmt_iterator gsi = gsi_for_stmt (g);
> -
> -  update_call_from_tree (&gsi, value);
> -  FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
> -    {
> -      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
> -	SET_USE (use_p, value);
> -      update_stmt (use_stmt);
> -    }
> -}
> -
>  /* If LOOP has been versioned during loop distribution, return the gurading
>     internal call.  */
>  
> --- gcc/tree-cfg.c.jj	2017-12-07 18:05:30.000000000 +0100
> +++ gcc/tree-cfg.c	2017-12-11 12:34:55.054140750 +0100
> @@ -7337,6 +7337,47 @@ gather_ssa_name_hash_map_from (tree cons
>    return true;
>  }
>  
> +/* Return LOOP_DIST_ALIAS call if present in BB.  */
> +
> +static gimple *
> +find_loop_dist_alias (basic_block bb)
> +{
> +  gimple *g = last_stmt (bb);
> +  if (g == NULL || gimple_code (g) != GIMPLE_COND)
> +    return NULL;
> +
> +  gimple_stmt_iterator gsi = gsi_for_stmt (g);
> +  gsi_prev (&gsi);
> +  if (gsi_end_p (gsi))
> +    return NULL;
> +
> +  g = gsi_stmt (gsi);
> +  if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS))
> +    return g;
> +  return NULL;
> +}
> +
> +/* Fold loop internal call G like IFN_LOOP_VECTORIZED/IFN_LOOP_DIST_ALIAS
> +   to VALUE and update any immediate uses of it's LHS.  */
> +
> +void
> +fold_loop_internal_call (gimple *g, tree value)
> +{
> +  tree lhs = gimple_call_lhs (g);
> +  use_operand_p use_p;
> +  imm_use_iterator iter;
> +  gimple *use_stmt;
> +  gimple_stmt_iterator gsi = gsi_for_stmt (g);
> +
> +  update_call_from_tree (&gsi, value);
> +  FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
> +    {
> +      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
> +	SET_USE (use_p, value);
> +      update_stmt (use_stmt);
> +    }
> +}
> +
>  /* Move a single-entry, single-exit region delimited by ENTRY_BB and
>     EXIT_BB to function DEST_CFUN.  The whole region is replaced by a
>     single basic block in the original CFG and the new basic block is
> @@ -7510,7 +7551,6 @@ move_sese_region_to_fn (struct function
>  	  }
>      }
>  
> -
>    /* Adjust the number of blocks in the tree root of the outlined part.  */
>    get_loop (dest_cfun, 0)->num_nodes = bbs.length () + 2;
>  
> @@ -7521,19 +7561,77 @@ move_sese_region_to_fn (struct function
>    /* Fix up orig_loop_num.  If the block referenced in it has been moved
>       to dest_cfun, update orig_loop_num field, otherwise clear it.  */
>    struct loop *dloop;
> +  signed char *moved_orig_loop_num = NULL;
>    FOR_EACH_LOOP_FN (dest_cfun, dloop, 0)
>      if (dloop->orig_loop_num)
>        {
> +	if (moved_orig_loop_num == NULL)
> +	  moved_orig_loop_num
> +	    = XCNEWVEC (signed char, vec_safe_length (larray));
>  	if ((*larray)[dloop->orig_loop_num] != NULL
>  	    && get_loop (saved_cfun, dloop->orig_loop_num) == NULL)
> -	  dloop->orig_loop_num = (*larray)[dloop->orig_loop_num]->num;
> +	  {
> +	    if (moved_orig_loop_num[dloop->orig_loop_num] >= 0
> +		&& moved_orig_loop_num[dloop->orig_loop_num] < 2)
> +	      moved_orig_loop_num[dloop->orig_loop_num]++;
> +	    dloop->orig_loop_num = (*larray)[dloop->orig_loop_num]->num;
> +	  }
>  	else
> -	  dloop->orig_loop_num = 0;
> +	  {
> +	    moved_orig_loop_num[dloop->orig_loop_num] = -1;
> +	    dloop->orig_loop_num = 0;
> +	  }
>        }
> -  ggc_free (larray);
> -
>    pop_cfun ();
>  
> +  if (moved_orig_loop_num)
> +    {
> +      FOR_EACH_VEC_ELT (bbs, i, bb)
> +	{
> +	  gimple *g = find_loop_dist_alias (bb);
> +	  if (g == NULL)
> +	    continue;
> +
> +	  int orig_loop_num = tree_to_shwi (gimple_call_arg (g, 0));
> +	  gcc_assert (orig_loop_num
> +		      && (unsigned) orig_loop_num < vec_safe_length (larray));
> +	  if (moved_orig_loop_num[orig_loop_num] == 2)
> +	    {
> +	      /* If we have moved both loops with this orig_loop_num into
> +		 dest_cfun and the LOOP_DIST_ALIAS call is being moved there
> +		 too, update the first argument.  */
> +	      gcc_assert ((*larray)[dloop->orig_loop_num] != NULL
> +			  && (get_loop (saved_cfun, dloop->orig_loop_num)
> +			      == NULL));
> +	      tree t = build_int_cst (integer_type_node,
> +				      (*larray)[dloop->orig_loop_num]->num);
> +	      gimple_call_set_arg (g, 0, t);
> +	      update_stmt (g);
> +	      /* Make sure the following loop will not update it.  */
> +	      moved_orig_loop_num[orig_loop_num] = 0;
> +	    }
> +	  else
> +	    /* Otherwise at least one of the loops stayed in saved_cfun.
> +	       Remove the LOOP_DIST_ALIAS call.  */
> +	    fold_loop_internal_call (g, gimple_call_arg (g, 1));
> +	}
> +      FOR_EACH_BB_FN (bb, saved_cfun)
> +	{
> +	  gimple *g = find_loop_dist_alias (bb);
> +	  if (g == NULL)
> +	    continue;
> +	  int orig_loop_num = tree_to_shwi (gimple_call_arg (g, 0));
> +	  gcc_assert (orig_loop_num
> +		      && (unsigned) orig_loop_num < vec_safe_length (larray));
> +	  if (moved_orig_loop_num[orig_loop_num])
> +	    /* LOOP_DIST_ALIAS call remained in saved_cfun, if at least one
> +	       of the corresponding loops was moved, remove it.  */
> +	    fold_loop_internal_call (g, gimple_call_arg (g, 1));
> +	}
> +      XDELETEVEC (moved_orig_loop_num);
> +    }
> +  ggc_free (larray);
> +
>    /* Move blocks from BBS into DEST_CFUN.  */
>    gcc_assert (bbs.length () >= 2);
>    after = dest_cfun->cfg->x_entry_block_ptr;
> --- gcc/testsuite/gcc.dg/graphite/pr83359.c.jj	2017-12-11 11:43:10.433737382 +0100
> +++ gcc/testsuite/gcc.dg/graphite/pr83359.c	2017-12-11 11:43:01.000000000 +0100
> @@ -0,0 +1,40 @@
> +/* PR tree-optimization/83359 */
> +/* { dg-do compile { target pthread } } */
> +/* { dg-options "-O3 -floop-parallelize-all -ftree-parallelize-loops=2" } */
> +
> +int a, b, c;
> +
> +void
> +foo (int x, int y)
> +{
> +  int *d = &a;
> +  int *e = &x;
> +
> +  for (a = 0; a < 1; ++a)
> +    d = &x;
> +
> +  while (b < 10)
> +    {
> +      for (b = 0; b < 1; ++b)
> +        if (x == 0)
> +          while (x < 1)
> +            ++x;
> +        else
> +          while (x < 1)
> +            {
> +              d = &y;
> +              ++x;
> +            }
> +      ++b;
> +    }
> +
> +  for (;;)
> +    for (c = 0; c < 2; ++c)
> +      {
> +        if (*d != 0)
> +          a = *e;
> +
> +        e = &b;
> +        y = 0;
> +      }
> +}
> 
> 	Jakub
> 
>
diff mbox series

Patch

--- gcc/tree-cfg.h.jj	2017-09-05 23:28:14.000000000 +0200
+++ gcc/tree-cfg.h	2017-12-11 12:35:24.284777550 +0100
@@ -77,6 +77,7 @@  extern void gather_blocks_in_sese_region
 					  vec<basic_block> *bbs_p);
 extern void verify_sese (basic_block, basic_block, vec<basic_block> *);
 extern bool gather_ssa_name_hash_map_from (tree const &, tree const &, void *);
+extern void fold_loop_internal_call (gimple *, tree);
 extern basic_block move_sese_region_to_fn (struct function *, basic_block,
 				           basic_block, tree);
 extern void dump_function_to_file (tree, FILE *, dump_flags_t);
--- gcc/tree-vectorizer.c.jj	2017-09-01 09:26:37.000000000 +0200
+++ gcc/tree-vectorizer.c	2017-12-11 12:33:41.436055580 +0100
@@ -464,27 +464,6 @@  vect_loop_vectorized_call (struct loop *
   return NULL;
 }
 
-/* Fold loop internal call G like IFN_LOOP_VECTORIZED/IFN_LOOP_DIST_ALIAS
-   to VALUE and update any immediate uses of it's LHS.  */
-
-static void
-fold_loop_internal_call (gimple *g, tree value)
-{
-  tree lhs = gimple_call_lhs (g);
-  use_operand_p use_p;
-  imm_use_iterator iter;
-  gimple *use_stmt;
-  gimple_stmt_iterator gsi = gsi_for_stmt (g);
-
-  update_call_from_tree (&gsi, value);
-  FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
-    {
-      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
-	SET_USE (use_p, value);
-      update_stmt (use_stmt);
-    }
-}
-
 /* If LOOP has been versioned during loop distribution, return the gurading
    internal call.  */
 
--- gcc/tree-cfg.c.jj	2017-12-07 18:05:30.000000000 +0100
+++ gcc/tree-cfg.c	2017-12-11 12:34:55.054140750 +0100
@@ -7337,6 +7337,47 @@  gather_ssa_name_hash_map_from (tree cons
   return true;
 }
 
+/* Return LOOP_DIST_ALIAS call if present in BB.  */
+
+static gimple *
+find_loop_dist_alias (basic_block bb)
+{
+  gimple *g = last_stmt (bb);
+  if (g == NULL || gimple_code (g) != GIMPLE_COND)
+    return NULL;
+
+  gimple_stmt_iterator gsi = gsi_for_stmt (g);
+  gsi_prev (&gsi);
+  if (gsi_end_p (gsi))
+    return NULL;
+
+  g = gsi_stmt (gsi);
+  if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS))
+    return g;
+  return NULL;
+}
+
+/* Fold loop internal call G like IFN_LOOP_VECTORIZED/IFN_LOOP_DIST_ALIAS
+   to VALUE and update any immediate uses of it's LHS.  */
+
+void
+fold_loop_internal_call (gimple *g, tree value)
+{
+  tree lhs = gimple_call_lhs (g);
+  use_operand_p use_p;
+  imm_use_iterator iter;
+  gimple *use_stmt;
+  gimple_stmt_iterator gsi = gsi_for_stmt (g);
+
+  update_call_from_tree (&gsi, value);
+  FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
+    {
+      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+	SET_USE (use_p, value);
+      update_stmt (use_stmt);
+    }
+}
+
 /* Move a single-entry, single-exit region delimited by ENTRY_BB and
    EXIT_BB to function DEST_CFUN.  The whole region is replaced by a
    single basic block in the original CFG and the new basic block is
@@ -7510,7 +7551,6 @@  move_sese_region_to_fn (struct function
 	  }
     }
 
-
   /* Adjust the number of blocks in the tree root of the outlined part.  */
   get_loop (dest_cfun, 0)->num_nodes = bbs.length () + 2;
 
@@ -7521,19 +7561,77 @@  move_sese_region_to_fn (struct function
   /* Fix up orig_loop_num.  If the block referenced in it has been moved
      to dest_cfun, update orig_loop_num field, otherwise clear it.  */
   struct loop *dloop;
+  signed char *moved_orig_loop_num = NULL;
   FOR_EACH_LOOP_FN (dest_cfun, dloop, 0)
     if (dloop->orig_loop_num)
       {
+	if (moved_orig_loop_num == NULL)
+	  moved_orig_loop_num
+	    = XCNEWVEC (signed char, vec_safe_length (larray));
 	if ((*larray)[dloop->orig_loop_num] != NULL
 	    && get_loop (saved_cfun, dloop->orig_loop_num) == NULL)
-	  dloop->orig_loop_num = (*larray)[dloop->orig_loop_num]->num;
+	  {
+	    if (moved_orig_loop_num[dloop->orig_loop_num] >= 0
+		&& moved_orig_loop_num[dloop->orig_loop_num] < 2)
+	      moved_orig_loop_num[dloop->orig_loop_num]++;
+	    dloop->orig_loop_num = (*larray)[dloop->orig_loop_num]->num;
+	  }
 	else
-	  dloop->orig_loop_num = 0;
+	  {
+	    moved_orig_loop_num[dloop->orig_loop_num] = -1;
+	    dloop->orig_loop_num = 0;
+	  }
       }
-  ggc_free (larray);
-
   pop_cfun ();
 
+  if (moved_orig_loop_num)
+    {
+      FOR_EACH_VEC_ELT (bbs, i, bb)
+	{
+	  gimple *g = find_loop_dist_alias (bb);
+	  if (g == NULL)
+	    continue;
+
+	  int orig_loop_num = tree_to_shwi (gimple_call_arg (g, 0));
+	  gcc_assert (orig_loop_num
+		      && (unsigned) orig_loop_num < vec_safe_length (larray));
+	  if (moved_orig_loop_num[orig_loop_num] == 2)
+	    {
+	      /* If we have moved both loops with this orig_loop_num into
+		 dest_cfun and the LOOP_DIST_ALIAS call is being moved there
+		 too, update the first argument.  */
+	      gcc_assert ((*larray)[dloop->orig_loop_num] != NULL
+			  && (get_loop (saved_cfun, dloop->orig_loop_num)
+			      == NULL));
+	      tree t = build_int_cst (integer_type_node,
+				      (*larray)[dloop->orig_loop_num]->num);
+	      gimple_call_set_arg (g, 0, t);
+	      update_stmt (g);
+	      /* Make sure the following loop will not update it.  */
+	      moved_orig_loop_num[orig_loop_num] = 0;
+	    }
+	  else
+	    /* Otherwise at least one of the loops stayed in saved_cfun.
+	       Remove the LOOP_DIST_ALIAS call.  */
+	    fold_loop_internal_call (g, gimple_call_arg (g, 1));
+	}
+      FOR_EACH_BB_FN (bb, saved_cfun)
+	{
+	  gimple *g = find_loop_dist_alias (bb);
+	  if (g == NULL)
+	    continue;
+	  int orig_loop_num = tree_to_shwi (gimple_call_arg (g, 0));
+	  gcc_assert (orig_loop_num
+		      && (unsigned) orig_loop_num < vec_safe_length (larray));
+	  if (moved_orig_loop_num[orig_loop_num])
+	    /* LOOP_DIST_ALIAS call remained in saved_cfun, if at least one
+	       of the corresponding loops was moved, remove it.  */
+	    fold_loop_internal_call (g, gimple_call_arg (g, 1));
+	}
+      XDELETEVEC (moved_orig_loop_num);
+    }
+  ggc_free (larray);
+
   /* Move blocks from BBS into DEST_CFUN.  */
   gcc_assert (bbs.length () >= 2);
   after = dest_cfun->cfg->x_entry_block_ptr;
--- gcc/testsuite/gcc.dg/graphite/pr83359.c.jj	2017-12-11 11:43:10.433737382 +0100
+++ gcc/testsuite/gcc.dg/graphite/pr83359.c	2017-12-11 11:43:01.000000000 +0100
@@ -0,0 +1,40 @@ 
+/* PR tree-optimization/83359 */
+/* { dg-do compile { target pthread } } */
+/* { dg-options "-O3 -floop-parallelize-all -ftree-parallelize-loops=2" } */
+
+int a, b, c;
+
+void
+foo (int x, int y)
+{
+  int *d = &a;
+  int *e = &x;
+
+  for (a = 0; a < 1; ++a)
+    d = &x;
+
+  while (b < 10)
+    {
+      for (b = 0; b < 1; ++b)
+        if (x == 0)
+          while (x < 1)
+            ++x;
+        else
+          while (x < 1)
+            {
+              d = &y;
+              ++x;
+            }
+      ++b;
+    }
+
+  for (;;)
+    for (c = 0; c < 2; ++c)
+      {
+        if (*d != 0)
+          a = *e;
+
+        e = &b;
+        y = 0;
+      }
+}