diff mbox series

tree-optimization/113373 - add missing LC PHIs for live operations

Message ID 20240119135540.875441388C@imap1.dmz-prg2.suse.org
State New
Headers show
Series tree-optimization/113373 - add missing LC PHIs for live operations | expand

Commit Message

Richard Biener Jan. 19, 2024, 1:55 p.m. UTC
The following makes reduction epilogue code generation happy by properly
adding LC PHIs to the exit blocks for multiple exit vectorized loops.

Some refactoring might make the flow easier to follow but I've refrained
from doing that with this patch.

I've kept some fixes in reduction epilogue generation from the earlier
attempt fixing this PR.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.  I'm
waiting for the linaro CI and on Monday will followup with some
refactoring.

Richard.

	PR tree-optimization/113373
	* tree-vect-loop-manip.cc (slpeel_tree_duplicate_loop_to_edge_cfg):
	Create LC PHIs in the exit blocks where necessary.
	* tree-vect-loop.cc (vectorizable_live_operation): Do not try
	to handle missing LC PHIs.
	(find_connected_edge): Remove.
	(vect_create_epilog_for_reduction): Cleanup use of auto_vec.

	* gcc.dg/vect/vect-early-break_104-pr113373.c: New testcase.
---
 .../vect/vect-early-break_104-pr113373.c      | 19 ++++++++
 gcc/tree-vect-loop-manip.cc                   | 34 ++++++++++++--
 gcc/tree-vect-loop.cc                         | 46 +++++--------------
 3 files changed, 60 insertions(+), 39 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break_104-pr113373.c
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_104-pr113373.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_104-pr113373.c
new file mode 100644
index 00000000000..1601aafb3e6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_104-pr113373.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+
+struct asCArray {
+  unsigned *array;
+  int length;
+};
+unsigned asCReaderTranslateFunction(struct asCArray b, unsigned t)
+{
+  int size = 0;
+  for (unsigned num; num < t; num++)
+  {
+    if (num >= b.length)
+      __builtin_abort();
+    size += b.array[num];
+  }
+  return size;
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 1477906e96e..eacbc022549 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -1696,7 +1696,8 @@  slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
 	      /* Check if we've already created a new phi node during edge
 		 redirection.  If we have, only propagate the value
 		 downwards in case there is no merge block.  */
-	      if (tree *res = new_phi_args.get (new_arg))
+	      tree *res;
+	      if ((res = new_phi_args.get (new_arg)))
 		{
 		  if (multiple_exits_p)
 		    new_arg = *res;
@@ -1717,7 +1718,7 @@  slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
 		  /* Similar to the single exit case, If we have an existing
 		     LCSSA variable thread through the original value otherwise
 		     skip it and directly use the final value.  */
-		  if (tree *res = new_phi_args.get (tmp_arg))
+		  if ((res = new_phi_args.get (tmp_arg)))
 		    new_arg = *res;
 		  else if (!virtual_operand_p (new_arg))
 		    new_arg = tmp_arg;
@@ -1728,9 +1729,20 @@  slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
 
 	      /* Otherwise, main loop exit should use the final iter value.  */
 	      if (multiple_exits_p)
-		SET_PHI_ARG_DEF_ON_EDGE (lcssa_phi,
-					 single_succ_edge (main_loop_exit_block),
-					 new_arg);
+		{
+		  /* Create a LC PHI if it doesn't already exist.  */
+		  if (!virtual_operand_p (new_arg) && !res)
+		    {
+		      tree new_def = copy_ssa_name (new_arg);
+		      gphi *lc_phi
+			= create_phi_node (new_def, main_loop_exit_block);
+		      SET_PHI_ARG_DEF (lc_phi, 0, new_arg);
+		      new_arg = new_def;
+		    }
+		  SET_PHI_ARG_DEF_ON_EDGE (lcssa_phi,
+					   single_succ_edge (main_loop_exit_block),
+					   new_arg);
+		}
 	      else
 		SET_PHI_ARG_DEF_ON_EDGE (lcssa_phi, loop_exit, new_arg);
 
@@ -1766,6 +1778,18 @@  slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
 		      if (vphi)
 			alt_arg = gimple_phi_result (vphi);
 		    }
+		  /* For other live args we didn't create LC PHI nodes.
+		     Do so here.  */
+		  else
+		    {
+		      tree alt_def = copy_ssa_name (alt_arg);
+		      gphi *lc_phi
+			= create_phi_node (alt_def, alt_loop_exit_block);
+		      for (unsigned i = 0; i < gimple_phi_num_args (lc_phi);
+			   ++i)
+			SET_PHI_ARG_DEF (lc_phi, i, alt_arg);
+		      alt_arg = alt_def;
+		    }
 		  edge main_e = single_succ_edge (alt_loop_exit_block);
 		  SET_PHI_ARG_DEF_ON_EDGE (to_phi, main_e, alt_arg);
 		}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 4769d6f53e4..fe631252dc2 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -6017,7 +6017,6 @@  vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
   int j, i;
   vec<tree> &scalar_results = reduc_info->reduc_scalar_results;
   unsigned int group_size = 1, k;
-  auto_vec<gimple *> phis;
   /* SLP reduction without reduction chain, e.g.,
      # a1 = phi <a2, a0>
      # b1 = phi <b2, b0>
@@ -6930,12 +6929,12 @@  vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
           use <s_out4> */
 
   gcc_assert (live_out_stmts.size () == scalar_results.length ());
+  auto_vec<gimple *> phis;
   for (k = 0; k < live_out_stmts.size (); k++)
     {
       stmt_vec_info scalar_stmt_info = vect_orig_stmt (live_out_stmts[k]);
       scalar_dest = gimple_get_lhs (scalar_stmt_info->stmt);
 
-      phis.create (3);
       /* Find the loop-closed-use at the loop exit of the original scalar
          result.  (The reduction result is expected to have two immediate uses,
          one at the latch block, and one at the loop exit).  For double
@@ -6988,7 +6987,7 @@  vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
 	    }
         }
 
-      phis.release ();
+      phis.truncate (0);
     }
 }
 
@@ -10710,18 +10709,6 @@  vectorizable_live_operation_1 (loop_vec_info loop_vinfo,
   return new_tree;
 }
 
-/* Find the edge that's the final one in the path from SRC to DEST and
-   return it.  This edge must exist in at most one forwarder edge between.  */
-
-static edge
-find_connected_edge (edge src, basic_block dest)
-{
-   if (src->dest == dest)
-     return src;
-
-  return find_edge (src->dest, dest);
-}
-
 /* Function vectorizable_live_operation.
 
    STMT_INFO computes a value that is used outside the loop.  Check if
@@ -10964,13 +10951,8 @@  vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
 	    {
 	      edge e = gimple_phi_arg_edge (as_a <gphi *> (use_stmt),
 					   phi_arg_index_from_use (use_p));
-	      bool main_exit_edge = e == main_e
-				    || find_connected_edge (main_e, e->src);
-
-	      /* Early exits have an merge block, we want the merge block itself
-		 so use ->src.  For main exit the merge block is the
-		 destination.  */
-	      basic_block dest = main_exit_edge ? main_e->dest : e->src;
+	      gcc_assert (loop_exit_edge_p (loop, e));
+	      bool main_exit_edge = e == main_e;
 	      tree tmp_vec_lhs = vec_lhs;
 	      tree tmp_bitstart = bitstart;
 
@@ -10988,22 +10970,18 @@  vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
 	      gimple_stmt_iterator exit_gsi;
 	      tree new_tree
 		= vectorizable_live_operation_1 (loop_vinfo, stmt_info,
-						 dest, vectype, ncopies,
+						 e->dest, vectype, ncopies,
 						 slp_node, bitsize,
 						 tmp_bitstart, tmp_vec_lhs,
 						 lhs_type, &exit_gsi);
 
-	      if (gimple_phi_num_args (use_stmt) == 1)
-		{
-		  auto gsi = gsi_for_stmt (use_stmt);
-		  remove_phi_node (&gsi, false);
-		  tree lhs_phi = gimple_phi_result (use_stmt);
-		  gimple *copy = gimple_build_assign (lhs_phi, new_tree);
-		  gsi_insert_before (&exit_gsi, copy, GSI_SAME_STMT);
-		}
-	      else
-		SET_PHI_ARG_DEF (use_stmt, e->dest_idx, new_tree);
-	  }
+	      auto gsi = gsi_for_stmt (use_stmt);
+	      remove_phi_node (&gsi, false);
+	      tree lhs_phi = gimple_phi_result (use_stmt);
+	      gimple *copy = gimple_build_assign (lhs_phi, new_tree);
+	      gsi_insert_before (&exit_gsi, copy, GSI_SAME_STMT);
+	      break;
+	    }
 
       /* There a no further out-of-loop uses of lhs by LC-SSA construction.  */
       FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)