diff mbox series

Fix PR90402

Message ID alpine.LSU.2.20.1905091345590.10704@zhemvz.fhfr.qr
State New
Headers show
Series Fix PR90402 | expand

Commit Message

Richard Biener May 9, 2019, 11:49 a.m. UTC
The following fixes PR90402 by avoiding value-numbering bring the
loop header PHIs of if-converted and non-if-converted loop copy
out-of-sync for the vectorizer.

This is done by teaching region VN to handle multiple entries
into the entry block which allows us to value-number a cycles
body without considering the backedge (just forcing all the
entry PHIs varying).  Single BB cycles probably still cannot
be handled since they'd have entry->dest == exit_bb, but I
didn't actually try...

Bootstrap & regtest running on x86_64-unknown-linux-gnu.

Richard.

2019-05-09  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/90402
	* tree-if-conv.c (tree_if_conversion): Value number only
	the loop body by making the latch an exit of the region
	as well.
	* tree-ssa-sccvn.c (process_bb): Add flag whether to skip
	processing PHIs.
	(do_rpo_vn): Deal with multiple edges into the entry block
	that are not backedges inside the region by skipping PHIs
	of the entry block.

	* gcc.dg/torture/pr90402-1.c: New testcase.
diff mbox series

Patch

Index: gcc/tree-if-conv.c
===================================================================
--- gcc/tree-if-conv.c	(revision 271030)
+++ gcc/tree-if-conv.c	(working copy)
@@ -3066,10 +3066,12 @@  tree_if_conversion (struct loop *loop, v
   ifcvt_local_dce (loop->header);
 
   /* Perform local CSE, this esp. helps the vectorizer analysis if loads
-     and stores are involved.
+     and stores are involved.  CSE only the loop body, not the entry
+     PHIs, those are to be kept in sync with the non-if-converted copy.
      ???  We'll still keep dead stores though.  */
   exit_bbs = BITMAP_ALLOC (NULL);
   bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
+  bitmap_set_bit (exit_bbs, loop->latch->index);
   todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
   BITMAP_FREE (exit_bbs);
 
Index: gcc/tree-ssa-sccvn.c
===================================================================
--- gcc/tree-ssa-sccvn.c	(revision 271030)
+++ gcc/tree-ssa-sccvn.c	(working copy)
@@ -5978,7 +5978,7 @@  insert_related_predicates_on_edge (enum
 static unsigned
 process_bb (rpo_elim &avail, basic_block bb,
 	    bool bb_visited, bool iterate_phis, bool iterate, bool eliminate,
-	    bool do_region, bitmap exit_bbs)
+	    bool do_region, bitmap exit_bbs, bool skip_phis)
 {
   unsigned todo = 0;
   edge_iterator ei;
@@ -5989,7 +5989,8 @@  process_bb (rpo_elim &avail, basic_block
   /* If we are in loop-closed SSA preserve this state.  This is
      relevant when called on regions from outside of FRE/PRE.  */
   bool lc_phi_nodes = false;
-  if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
+  if (!skip_phis
+      && loops_state_satisfies_p (LOOP_CLOSED_SSA))
     FOR_EACH_EDGE (e, ei, bb->preds)
       if (e->src->loop_father != e->dest->loop_father
 	  && flow_loop_nested_p (e->dest->loop_father,
@@ -6010,67 +6011,68 @@  process_bb (rpo_elim &avail, basic_block
     }
 
   /* Value-number all defs in the basic-block.  */
-  for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
-       gsi_next (&gsi))
-    {
-      gphi *phi = gsi.phi ();
-      tree res = PHI_RESULT (phi);
-      vn_ssa_aux_t res_info = VN_INFO (res);
-      if (!bb_visited)
-	{
-	  gcc_assert (!res_info->visited);
-	  res_info->valnum = VN_TOP;
-	  res_info->visited = true;
-	}
-
-      /* When not iterating force backedge values to varying.  */
-      visit_stmt (phi, !iterate_phis);
-      if (virtual_operand_p (res))
-	continue;
-
-      /* Eliminate */
-      /* The interesting case is gcc.dg/tree-ssa/pr22230.c for correctness
-	 how we handle backedges and availability.
-	 And gcc.dg/tree-ssa/ssa-sccvn-2.c for optimization.  */
-      tree val = res_info->valnum;
-      if (res != val && !iterate && eliminate)
-	{
-	  if (tree leader = avail.eliminate_avail (bb, res))
-	    {
-	      if (leader != res
-		  /* Preserve loop-closed SSA form.  */
-		  && (! lc_phi_nodes
-		      || is_gimple_min_invariant (leader)))
-		{
-		  if (dump_file && (dump_flags & TDF_DETAILS))
-		    {
-		      fprintf (dump_file, "Replaced redundant PHI node "
-			       "defining ");
-		      print_generic_expr (dump_file, res);
-		      fprintf (dump_file, " with ");
-		      print_generic_expr (dump_file, leader);
-		      fprintf (dump_file, "\n");
-		    }
-		  avail.eliminations++;
+  if (!skip_phis)
+    for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
+	 gsi_next (&gsi))
+      {
+	gphi *phi = gsi.phi ();
+	tree res = PHI_RESULT (phi);
+	vn_ssa_aux_t res_info = VN_INFO (res);
+	if (!bb_visited)
+	  {
+	    gcc_assert (!res_info->visited);
+	    res_info->valnum = VN_TOP;
+	    res_info->visited = true;
+	  }
 
-		  if (may_propagate_copy (res, leader))
-		    {
-		      /* Schedule for removal.  */
-		      avail.to_remove.safe_push (phi);
-		      continue;
-		    }
-		  /* ???  Else generate a copy stmt.  */
-		}
-	    }
-	}
-      /* Only make defs available that not already are.  But make
-	 sure loop-closed SSA PHI node defs are picked up for
-	 downstream uses.  */
-      if (lc_phi_nodes
-	  || res == val
-	  || ! avail.eliminate_avail (bb, res))
-	avail.eliminate_push_avail (bb, res);
-    }
+	/* When not iterating force backedge values to varying.  */
+	visit_stmt (phi, !iterate_phis);
+	if (virtual_operand_p (res))
+	  continue;
+
+	/* Eliminate */
+	/* The interesting case is gcc.dg/tree-ssa/pr22230.c for correctness
+	   how we handle backedges and availability.
+	   And gcc.dg/tree-ssa/ssa-sccvn-2.c for optimization.  */
+	tree val = res_info->valnum;
+	if (res != val && !iterate && eliminate)
+	  {
+	    if (tree leader = avail.eliminate_avail (bb, res))
+	      {
+		if (leader != res
+		    /* Preserve loop-closed SSA form.  */
+		    && (! lc_phi_nodes
+			|| is_gimple_min_invariant (leader)))
+		  {
+		    if (dump_file && (dump_flags & TDF_DETAILS))
+		      {
+			fprintf (dump_file, "Replaced redundant PHI node "
+				 "defining ");
+			print_generic_expr (dump_file, res);
+			fprintf (dump_file, " with ");
+			print_generic_expr (dump_file, leader);
+			fprintf (dump_file, "\n");
+		      }
+		    avail.eliminations++;
+
+		    if (may_propagate_copy (res, leader))
+		      {
+			/* Schedule for removal.  */
+			avail.to_remove.safe_push (phi);
+			continue;
+		      }
+		    /* ???  Else generate a copy stmt.  */
+		  }
+	      }
+	  }
+	/* Only make defs available that not already are.  But make
+	   sure loop-closed SSA PHI node defs are picked up for
+	   downstream uses.  */
+	if (lc_phi_nodes
+	    || res == val
+	    || ! avail.eliminate_avail (bb, res))
+	  avail.eliminate_push_avail (bb, res);
+      }
 
   /* For empty BBs mark outgoing edges executable.  For non-empty BBs
      we do this when processing the last stmt as we have to do this
@@ -6414,6 +6416,13 @@  do_rpo_vn (function *fn, edge entry, bit
       bitmap_set_bit (exit_bbs, EXIT_BLOCK);
     }
 
+  /* Clear EDGE_DFS_BACK on "all" entry edges, RPO order compute will
+     re-mark those that are contained in the region.  */
+  edge_iterator ei;
+  edge e;
+  FOR_EACH_EDGE (e, ei, entry->dest->preds)
+    e->flags &= ~EDGE_DFS_BACK;
+
   int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (fn) - NUM_FIXED_BLOCKS);
   int n = rev_post_order_and_mark_dfs_back_seme
     (fn, entry, exit_bbs, !loops_state_satisfies_p (LOOPS_NEED_FIXUP), rpo);
@@ -6424,6 +6433,18 @@  do_rpo_vn (function *fn, edge entry, bit
   if (!do_region)
     BITMAP_FREE (exit_bbs);
 
+  /* If there are any non-DFS_BACK edges into entry->dest skip
+     processing PHI nodes for that block.  This supports
+     value-numbering loop bodies w/o the actual loop.  */
+  FOR_EACH_EDGE (e, ei, entry->dest->preds)
+    if (e != entry
+	&& !(e->flags & EDGE_DFS_BACK))
+      break;
+  bool skip_entry_phis = e != NULL;
+  if (skip_entry_phis && dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, "Region does not contain all edges into "
+	     "the entry block, skipping its PHIs.\n");
+
   int *bb_to_rpo = XNEWVEC (int, last_basic_block_for_fn (fn));
   for (int i = 0; i < n; ++i)
     bb_to_rpo[rpo[i]] = i;
@@ -6453,7 +6474,9 @@  do_rpo_vn (function *fn, edge entry, bit
 	  edge e;
 	  edge_iterator ei;
 	  FOR_EACH_EDGE (e, ei, bb->preds)
-	    gcc_assert (e == entry || (e->src->flags & bb_in_region));
+	    gcc_assert (e == entry
+			|| (skip_entry_phis && bb == entry->dest)
+			|| (e->src->flags & bb_in_region));
 	}
       for (int i = 0; i < n; ++i)
 	{
@@ -6497,7 +6520,7 @@  do_rpo_vn (function *fn, edge entry, bit
 	  if (e->flags & EDGE_DFS_BACK)
 	    has_backedges = true;
 	  e->flags &= ~EDGE_EXECUTABLE;
-	  if (iterate || e == entry)
+	  if (iterate || e == entry || (skip_entry_phis && bb == entry->dest))
 	    continue;
 	  if (bb_to_rpo[e->src->index] > i)
 	    {
@@ -6530,7 +6553,7 @@  do_rpo_vn (function *fn, edge entry, bit
 	  edge_iterator ei;
 	  FOR_EACH_EDGE (e, ei, bb->preds)
 	    {
-	      if (e == entry)
+	      if (e == entry || (skip_entry_phis && bb == entry->dest))
 		continue;
 	      int max_rpo = MAX (rpo_state[i].max_rpo,
 				 rpo_state[bb_to_rpo[e->src->index]].max_rpo);
@@ -6619,7 +6642,7 @@  do_rpo_vn (function *fn, edge entry, bit
 	todo |= process_bb (avail, bb,
 			    rpo_state[idx].visited != 0,
 			    rpo_state[idx].iterate,
-			    iterate, eliminate, do_region, exit_bbs);
+			    iterate, eliminate, do_region, exit_bbs, false);
 	rpo_state[idx].visited++;
 
 	/* Verify if changed values flow over executable outgoing backedges
@@ -6717,8 +6740,10 @@  do_rpo_vn (function *fn, edge entry, bit
 	  edge e;
 	  FOR_EACH_EDGE (e, ei, bb->preds)
 	    if (!(e->flags & EDGE_EXECUTABLE)
-		&& !rpo_state[bb_to_rpo[e->src->index]].visited
-		&& rpo_state[bb_to_rpo[e->src->index]].max_rpo >= (int)idx)
+		&& (bb == entry->dest
+		    || (!rpo_state[bb_to_rpo[e->src->index]].visited
+			&& (rpo_state[bb_to_rpo[e->src->index]].max_rpo
+			    >= (int)idx))))
 	      {
 		if (dump_file && (dump_flags & TDF_DETAILS))
 		  fprintf (dump_file, "Cannot trust state of predecessor "
@@ -6729,7 +6754,8 @@  do_rpo_vn (function *fn, edge entry, bit
 
 	  nblk++;
 	  todo |= process_bb (avail, bb, false, false, false, eliminate,
-			      do_region, exit_bbs);
+			      do_region, exit_bbs,
+			      skip_entry_phis && bb == entry->dest);
 	  rpo_state[idx].visited++;
 
 	  FOR_EACH_EDGE (e, ei, bb->succs)
@@ -6811,7 +6837,9 @@  do_rpo_vn (function *fn, edge entry, bit
 }
 
 /* Region-based entry for RPO VN.  Performs value-numbering and elimination
-   on the SEME region specified by ENTRY and EXIT_BBS.  */
+   on the SEME region specified by ENTRY and EXIT_BBS.  If ENTRY is not
+   the only edge into the region at ENTRY->dest PHI nodes in ENTRY->dest
+   are not considered.  */
 
 unsigned
 do_rpo_vn (function *fn, edge entry, bitmap exit_bbs)
Index: gcc/testsuite/gcc.dg/torture/pr90402-1.c
===================================================================
--- gcc/testsuite/gcc.dg/torture/pr90402-1.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/torture/pr90402-1.c	(working copy)
@@ -0,0 +1,26 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-mavx" { target x86_64-*-* i?86-*-* } } */
+
+int kn, ha;
+
+int
+c7 (void)
+{
+}
+
+void
+ul (int w3)
+{
+  kn = c7 ();
+
+  while (w3 < 1)
+    {
+      ha += !!kn ? 1 : w3;
+
+      for (kn = 0; kn < 2; ++kn)
+	{
+	}
+
+      ++w3;
+    }
+}