diff mbox

[2/2,PR66642] Add empty loop exit block in transform_to_exit_first_loop_alt

Message ID 559269FF.4070400@mentor.com
State New
Headers show

Commit Message

Tom de Vries June 30, 2015, 10:05 a.m. UTC
On 25/06/15 09:43, Tom de Vries wrote:
> Hi,
>
> I ran into a failure with parloops for reduction loop testcase
> libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt-3.c.  When we
> exercise the low iteration count loop, the test-case fails.
>
> To understand the problem, let's first look at what happens when we use
> transform_to_exit_first_loop (the original one) instead of
> transform_to_exit_first_loop_alt (the alternative one, which is
> currently used, and causing the failure).
>
> Before transform_to_exit_first_loop, the low iteration count loop and
> the main loop share the loop exit block. After
> transform_to_exit_first_loop, that's not the case anymore, the main loop
> now has an exit block with a single predecessor. Subsequently,
> separate_decls_in_region inserts code in the main loop exit block, which
> is only triggered upon exit of the main loop.
>
> However, transform_to_exit_first_loop_alt does not insert such an exit
> block, and the code inserted by separate_decls_in_region is also active
> for the low iteration count loop, which results in an incorrect
> reduction result when the low iteration count loop is used.
>
>
> This patch fixes the problem by making sure
> transform_to_exit_first_loop_alt adds a new exit block inbetween the
> main loop header and the old exit block.
>
>

Updated test-cases after commit of fix for PR66652, reposting.

> Bootstrapped and reg-tested on x86_64.
>
> OK for trunk?
>

Thanks,
- Tom
diff mbox

Patch

Add empty loop exit block in transform_to_exit_first_loop_alt

2015-06-24  Tom de Vries  <tom@codesourcery.com>

	PR tree-optimization/66642
	* tree-parloops.c (transform_to_exit_first_loop_alt): Update function
	header comment.  Rename split_edge variable to edge_at_split.  Split
	exit edge to create new loop exit bb.  Insert loop exit phis in new loop
	exit bb.

	* testsuite/libgomp.c/parloops-exit-first-loop-alt-3.c (main): Test low
	iteration count case.
	* testsuite/libgomp.c/parloops-exit-first-loop-alt.c (init): New
	function, factor out of ...
	(main): ... here.  Test low iteration count case.
---
 gcc/tree-parloops.c                                | 45 ++++++++++++++++------
 .../libgomp.c/parloops-exit-first-loop-alt-3.c     |  5 +++
 .../libgomp.c/parloops-exit-first-loop-alt.c       | 28 +++++++++++++-
 3 files changed, 64 insertions(+), 14 deletions(-)

diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 21ed17b..19c1aa5 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -1535,7 +1535,7 @@  replace_uses_in_bbs_by (tree name, tree val, bitmap bbs)
      goto <bb header>
 
      <bb exit>:
-     sum_z = PHI <sum_b (cond[1])>
+     sum_z = PHI <sum_b (cond[1]), ...>
 
      [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
 	 that's <bb header>.
@@ -1562,14 +1562,17 @@  replace_uses_in_bbs_by (tree name, tree val, bitmap bbs)
      if (ivtmp_c < n + 1)
        goto <bb header>;
      else
-       goto <bb exit>;
+       goto <bb newexit>;
 
      <bb latch>:
      ivtmp_b = ivtmp_a + 1;
      goto <bb newheader>
 
+     <bb newexit>:
+     sum_y = PHI <sum_c (newheader)>
+
      <bb exit>:
-     sum_z = PHI <sum_c (newheader)>
+     sum_z = PHI <sum_y (newexit), ...>
 
 
    In unified diff format:
@@ -1606,9 +1609,12 @@  replace_uses_in_bbs_by (tree name, tree val, bitmap bbs)
 -     goto <bb header>
 +     goto <bb newheader>
 
++    <bb newexit>:
++    sum_y = PHI <sum_c (newheader)>
+
       <bb exit>:
--     sum_z = PHI <sum_b (cond[1])>
-+     sum_z = PHI <sum_c (newheader)>
+-     sum_z = PHI <sum_b (cond[1]), ...>
++     sum_z = PHI <sum_y (newexit), ...>
 
    Note: the example does not show any virtual phis, but these are handled more
    or less as reductions.
@@ -1646,7 +1652,7 @@  transform_to_exit_first_loop_alt (struct loop *loop,
 
   /* Create the new_header block.  */
   basic_block new_header = split_block_before_cond_jump (exit->src);
-  edge split_edge = single_pred_edge (new_header);
+  edge edge_at_split = single_pred_edge (new_header);
 
   /* Redirect entry edge to new_header.  */
   edge entry = loop_preheader_edge (loop);
@@ -1663,9 +1669,9 @@  transform_to_exit_first_loop_alt (struct loop *loop,
   e = redirect_edge_and_branch (post_cond_edge, header);
   gcc_assert (e == post_cond_edge);
 
-  /* Redirect split_edge to latch.  */
-  e = redirect_edge_and_branch (split_edge, latch);
-  gcc_assert (e == split_edge);
+  /* Redirect edge_at_split to latch.  */
+  e = redirect_edge_and_branch (edge_at_split, latch);
+  gcc_assert (e == edge_at_split);
 
   /* Set the new loop bound.  */
   gimple_cond_set_rhs (cond_stmt, bound);
@@ -1718,21 +1724,36 @@  transform_to_exit_first_loop_alt (struct loop *loop,
   /* Set the latch arguments of the new phis to ivtmp/sum_b.  */
   flush_pending_stmts (post_inc_edge);
 
-  /* Register the reduction exit phis.  */
+  /* Create a new empty exit block, inbetween the new loop header and the old
+     exit block.  The function separate_decls_in_region needs this block to
+     insert code that is active on loop exit, but not any other path.  */
+  basic_block new_exit_block = split_edge (exit);
+
+  /* Insert and register the reduction exit phis.  */
   for (gphi_iterator gsi = gsi_start_phis (exit_block);
        !gsi_end_p (gsi);
        gsi_next (&gsi))
     {
       gphi *phi = gsi.phi ();
       tree res_z = PHI_RESULT (phi);
+
+      /* Now that we have a new exit block, duplicate the phi of the old exit
+	 block in the new exit block to preserve loop-closed ssa.  */
+      edge succ_new_exit_block = single_succ_edge (new_exit_block);
+      edge pred_new_exit_block = single_pred_edge (new_exit_block);
+      tree res_y = copy_ssa_name (res_z, phi);
+      gphi *nphi = create_phi_node (res_y, new_exit_block);
+      tree res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
+      add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
+      add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
+
       if (virtual_operand_p (res_z))
 	continue;
 
-      tree res_c = PHI_ARG_DEF_FROM_EDGE (phi, exit);
       gimple reduc_phi = SSA_NAME_DEF_STMT (res_c);
       struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
       if (red != NULL)
-	red->keep_res = phi;
+	red->keep_res = nphi;
     }
 
   /* We're going to cancel the loop at the end of gen_parallel_loop, but until
diff --git a/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt-3.c b/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt-3.c
index 7de1377..78365e8 100644
--- a/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt-3.c
+++ b/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt-3.c
@@ -36,5 +36,10 @@  main (void)
   if (res != 11995)
     abort ();
 
+  /* Test low iteration count case.  */
+  res = f (10, a);
+  if (res != 25)
+    abort ();
+
   return 0;
 }
diff --git a/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt.c b/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt.c
index 07468a9..a49454b 100644
--- a/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt.c
+++ b/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt.c
@@ -22,8 +22,8 @@  f (unsigned int n, unsigned int *__restrict__ a, unsigned int *__restrict__ b,
     c[i] = a[i] + b[i];
 }
 
-int
-main (void)
+static void __attribute__((noclone,noinline))
+init (void)
 {
   int i, j;
 
@@ -36,6 +36,14 @@  main (void)
 	b[k] = (k * 3) % 7;
 	c[k] = k * 2;
       }
+}
+
+int
+main (void)
+{
+  int i;
+
+  init ();
 
   f (N, a, b, c);
 
@@ -47,5 +55,21 @@  main (void)
 	abort ();
     }
 
+  /* Test low iteration count case.  */
+
+  init ();
+
+  f (10, a, b, c);
+
+  for (i = 0; i < N; i++)
+    {
+      unsigned int actual = c[i];
+      unsigned int expected = (i < 10
+			       ? i + ((i * 3) % 7)
+			       : i * 2);
+      if (actual != expected)
+	abort ();
+    }
+
   return 0;
 }
-- 
1.9.1