diff mbox series

[1/2] tree-optimization/114068 - missed virtual LC PHI after vect peeling

Message ID 20240226113510.776E63858C33@sourceware.org
State New
Headers show
Series [1/2] tree-optimization/114068 - missed virtual LC PHI after vect peeling | expand

Commit Message

Richard Biener Feb. 26, 2024, 11:34 a.m. UTC
When we choose the IV exit to be one leading to no virtual use we
fail to have a virtual LC PHI even though we need it for the epilog
entry.  The following makes sure to create it so that later updating
works.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

	PR tree-optimization/114068
	* tree-vect-loop-manip.cc (get_live_virtual_operand_on_edge):
	New function.
	(slpeel_tree_duplicate_loop_to_edge_cfg): Add a virtual LC PHI
	on the main exit if needed.  Remove band-aid for the case
	it was missing.

	* gcc.dg/vect/vect-early-break_118-pr114068.c: New testcase.
	* gcc.dg/vect/vect-early-break_119-pr114068.c: Likewise.
---
 .../vect/vect-early-break_118-pr114068.c      | 23 ++++++++
 .../vect/vect-early-break_119-pr114068.c      | 25 +++++++++
 gcc/tree-vect-loop-manip.cc                   | 52 ++++++++++++++-----
 3 files changed, 87 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break_118-pr114068.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break_119-pr114068.c
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_118-pr114068.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_118-pr114068.c
new file mode 100644
index 00000000000..b462a464b66
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_118-pr114068.c
@@ -0,0 +1,23 @@ 
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+struct h {
+  int b;
+  int f;
+} k;
+
+void n(int m) {
+  struct h a = k;
+  for (int o = m; o; ++o) {
+    if (a.f)
+      __builtin_unreachable();
+    if (o > 1)
+      __builtin_unreachable();
+    *(&k.b + o) = 1;
+  }
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_119-pr114068.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_119-pr114068.c
new file mode 100644
index 00000000000..a65ef7b8c49
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_119-pr114068.c
@@ -0,0 +1,25 @@ 
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+struct h {
+  int b;
+  int c;
+  int f;
+} k;
+
+void n(int m) {
+  struct h a = k;
+  for (int o = m; o; ++o) {
+    if (a.f)
+      __builtin_unreachable();
+    if (o > 1)
+      __builtin_unreachable();
+    *(&k.b + o) = 1;
+    *(&k.c + o*m) = 2;
+  }
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 3f974d6d839..39bac1e99ef 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -1429,6 +1429,32 @@  vect_set_loop_condition (class loop *loop, edge loop_e, loop_vec_info loop_vinfo
 		     (gimple *) cond_stmt);
 }
 
+/* Get the virtual operand live on E.  The precondition on this is valid
+   immediate dominators and an actual virtual definition dominating E.  */
+/* ???  Costly band-aid.  For the use in question we can populate a
+   live-on-exit/end-of-BB virtual operand when copying stmts.  */
+
+static tree
+get_live_virtual_operand_on_edge (edge e)
+{
+  basic_block bb = e->src;
+  do
+    {
+      for (auto gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
+	{
+	  gimple *stmt = gsi_stmt (gsi);
+	  if (gimple_vdef (stmt))
+	    return gimple_vdef (stmt);
+	  if (gimple_vuse (stmt))
+	    return gimple_vuse (stmt);
+	}
+      if (gphi *vphi = get_virtual_phi (bb))
+	return gimple_phi_result (vphi);
+      bb = get_immediate_dominator (CDI_DOMINATORS, bb);
+    }
+  while (1);
+}
+
 /* Given LOOP this function generates a new copy of it and puts it
    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
@@ -1595,6 +1621,18 @@  slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
       flush_pending_stmts (loop_exit);
       set_immediate_dominator (CDI_DOMINATORS, new_preheader, loop_exit->src);
 
+      /* If we ended up choosing an exit leading to a path not using memory
+	 we can end up without a virtual LC PHI.  Create it when it is
+	 needed because of the epilog loop continuation.  */
+      if (need_virtual_phi && !get_virtual_phi (loop_exit->dest))
+	{
+	  tree header_def = gimple_phi_result (get_virtual_phi (loop->header));
+	  gphi *vphi = create_phi_node (copy_ssa_name (header_def),
+					new_preheader);
+	  add_phi_arg (vphi, get_live_virtual_operand_on_edge (loop_exit),
+		       loop_exit, UNKNOWN_LOCATION);
+	}
+
       bool multiple_exits_p = loop_exits.length () > 1;
       basic_block main_loop_exit_block = new_preheader;
       basic_block alt_loop_exit_block = NULL;
@@ -1711,19 +1749,7 @@  slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
 		    {
 		      /* Use the existing virtual LC SSA from exit block.  */
 		      gphi *vphi = get_virtual_phi (main_loop_exit_block);
-		      /* ???  When the exit yields to a path without
-			 any virtual use we can miss a LC PHI for the
-			 live virtual operand.  Simply choosing the
-			 one live at the start of the loop header isn't
-			 correct, but we should get here only with
-			 early-exit vectorization which will move all
-			 defs after the main exit, so leave a temporarily
-			 wrong virtual operand in place.  This happens
-			 for gcc.dg/pr113659.c.  */
-		      if (vphi)
-			new_arg = gimple_phi_result (vphi);
-		      else
-			new_arg = gimple_phi_result (from_phi);
+		      new_arg = gimple_phi_result (vphi);
 		    }
 		  else if ((res = new_phi_args.get (new_arg)))
 		    new_arg = *res;