diff mbox series

tree-optimization/112961 - include latch in if-conversion CSE

Message ID 20231212141358.C89FC385703F@sourceware.org
State New
Headers show
Series tree-optimization/112961 - include latch in if-conversion CSE | expand

Commit Message

Richard Biener Dec. 12, 2023, 2:12 p.m. UTC
The following makes sure to also process the (empty) latch when
performing CSE on the if-converted loop body.  That's important
to get all uses of copies propagated out on the backedge as well.
To avoid CSE on the PHI nodes itself which is prohibitive
(see PR90402) this temporarily adds a fake entry edge to the loop.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

	PR tree-optimization/112961
	* tree-if-conv.cc (tree_if_conversion): Instead of excluding
	the latch block from VN, add a fake entry edge.

	* g++.dg/vect/pr112961.cc: New testcase.
---
 gcc/testsuite/g++.dg/vect/pr112961.cc | 17 +++++++++++++++++
 gcc/tree-if-conv.cc                   |  9 +++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/vect/pr112961.cc
diff mbox series

Patch

diff --git a/gcc/testsuite/g++.dg/vect/pr112961.cc b/gcc/testsuite/g++.dg/vect/pr112961.cc
new file mode 100644
index 00000000000..52759e180fb
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/pr112961.cc
@@ -0,0 +1,17 @@ 
+// { dg-do compile }
+// { dg-require-effective-target vect_int }
+
+inline const int& maxx (const int& a, const int &b)
+{
+  return a > b ? a : b;
+}
+
+int foo(int *a)
+{
+  int max = 0;
+  for (int i = 0; i < 1024; ++i)
+    max = maxx(max, a[i]);
+  return max;
+}
+
+// { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { xfail vect_no_int_min_max } } }
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index 0bde281c246..f9fd0149937 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -3734,7 +3734,7 @@  tree_if_conversion (class loop *loop, vec<gimple *> *preds)
   auto_vec <gassign *, 4> reads_to_lower;
   auto_vec <gassign *, 4> writes_to_lower;
   bitmap exit_bbs;
-  edge pe;
+  edge pe, e;
   auto_vec<data_reference_p, 10> refs;
   bool loop_versioned;
 
@@ -3894,11 +3894,13 @@  tree_if_conversion (class loop *loop, vec<gimple *> *preds)
   /* Perform local CSE, this esp. helps the vectorizer analysis if loads
      and stores are involved.  CSE only the loop body, not the entry
      PHIs, those are to be kept in sync with the non-if-converted copy.
+     Do this by adding a fake entry edge - we do want to include the
+     latch as otherwise copies on a reduction path cannot be propagated out.
      ???  We'll still keep dead stores though.  */
+  e = make_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun), loop->header, EDGE_FAKE);
   exit_bbs = BITMAP_ALLOC (NULL);
   for (edge exit : get_loop_exit_edges (loop))
     bitmap_set_bit (exit_bbs, exit->dest->index);
-  bitmap_set_bit (exit_bbs, loop->latch->index);
 
   std::pair <tree, tree> *name_pair;
   unsigned ssa_names_idx;
@@ -3908,6 +3910,9 @@  tree_if_conversion (class loop *loop, vec<gimple *> *preds)
 
   todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
 
+  /* Remove the fake edge again.  */
+  remove_edge (e);
+
   /* Delete dead predicate computations.  */
   ifcvt_local_dce (loop);
   BITMAP_FREE (exit_bbs);