Patchwork Fix PR56756

login
register
mail settings
Submitter Richard Guenther
Date March 28, 2013, 3:15 p.m.
Message ID <alpine.LNX.2.00.1303281611520.21094@zhemvz.fhfr.qr>
Download mbox | patch
Permalink /patch/232060/
State New
Headers show

Comments

Richard Guenther - March 28, 2013, 3:15 p.m.
The domwalker fix to order dom children after inverted postorder
exposed the latent issue that LIM relies on domwalk to walk
all blocks defining SSA names before all blocks using them ...
which is what the following patch tries to fix using the
dependency information it already tracks (but incompletely so,
thus the fixes).

Bootstrapped and tested on x86_64-unknown-linux-gnu.

I'm not entirely happy with this - it should use a worklist
of stmts instead of recursing and handling basic-blocks.
But well ...

Leaving for comments.  (inverted_post_order_compute visits
loop nodes in "weird" order because it visits loop exit
nodes last)

Richard.

2013-03-28  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/56756
	* tree-ssa-loop-im.c (outermost_invariant_loop): More
	properly handle not yet processed def stmts.
	(add_dependency): Simplify.
	(move_computations_stmt): Rename to ...
	(move_computations_bb): ... this.  Process blocks with
	dependencies recursively.
	(move_computations): Instead of a dominator walk process
	basic-blocks in order determined by dependences of stmts
	we want to hoist.
	(force_move_till_op): Add dependencies.
	(struct fmt_data): Add dependence storage.
	(execute_sm): Adjust.

	* gcc.dg/torture/pr56756.c: New testcase.
Richard Guenther - April 16, 2013, 12:02 p.m.
On Thu, 28 Mar 2013, Richard Biener wrote:

> 
> The domwalker fix to order dom children after inverted postorder
> exposed the latent issue that LIM relies on domwalk to walk
> all blocks defining SSA names before all blocks using them ...
> which is what the following patch tries to fix using the
> dependency information it already tracks (but incompletely so,
> thus the fixes).
> 
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
> 
> I'm not entirely happy with this - it should use a worklist
> of stmts instead of recursing and handling basic-blocks.
> But well ...
> 
> Leaving for comments.  (inverted_post_order_compute visits
> loop nodes in "weird" order because it visits loop exit
> nodes last)

Ok, I wasn't really happy with the above.  The following simpler
patch avoids creating intermediate IL that has SSA uses where
their definition does not dominate the use.  Simply by placing
the store-motion load before a random previous load/store where
obviously all SSA names used to compute the load/store address
have dominating definitions.  That isn't guaranteed when
inserting into the loop latch block as the testcase shows.

Bootstrap and regtest pending on x86_64-unknown-linux-gnu.

Richard.

2013-04-16  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/56756
	* tree-ssa-loop-im.c (struct first_mem_ref_loc_1): New functor.
	(first_mem_ref_loc): New.
	(execute_sm): Place the load temporarily before a previous
	access instead of in the latch edge to ensure its SSA dependencies
	are defined at points dominating the load.

	* gcc.dg/torture/pr56756.c: New testcase.

Index: gcc/tree-ssa-loop-im.c
===================================================================
*** gcc/tree-ssa-loop-im.c	(revision 197997)
--- gcc/tree-ssa-loop-im.c	(working copy)
*************** rewrite_mem_refs (struct loop *loop, mem
*** 1718,1723 ****
--- 1718,1749 ----
    for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
  }
  
+ /* Stores the first reference location in LOCP.  */
+ 
+ struct first_mem_ref_loc_1
+ {
+   first_mem_ref_loc_1 (mem_ref_loc_p *locp_) : locp (locp_) {}
+   bool operator()(mem_ref_loc_p loc);
+   mem_ref_loc_p *locp;
+ };
+ 
+ bool
+ first_mem_ref_loc_1::operator()(mem_ref_loc_p loc)
+ {
+   *locp = loc;
+   return true;
+ }
+ 
+ /* Returns the first reference location to REF in LOOP.  */
+ 
+ static mem_ref_loc_p
+ first_mem_ref_loc (struct loop *loop, mem_ref_p ref)
+ {
+   mem_ref_loc_p locp = NULL;
+   for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
+   return locp;
+ }
+ 
  /* The name and the length of the currently generated variable
     for lsm.  */
  #define MAX_LSM_NAME_LENGTH 40
*************** execute_sm (struct loop *loop, vec<edge>
*** 2022,2030 ****
    unsigned i;
    gimple load;
    struct fmt_data fmt_data;
!   edge ex, latch_edge;
    struct lim_aux_data *lim_data;
    bool multi_threaded_model_p = false;
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
--- 2048,2057 ----
    unsigned i;
    gimple load;
    struct fmt_data fmt_data;
!   edge ex;
    struct lim_aux_data *lim_data;
    bool multi_threaded_model_p = false;
+   gimple_stmt_iterator gsi;
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
*************** execute_sm (struct loop *loop, vec<edge>
*** 2049,2057 ****
  
    rewrite_mem_refs (loop, ref, tmp_var);
  
!   /* Emit the load code into the latch, so that we are sure it will
!      be processed after all dependencies.  */
!   latch_edge = loop_latch_edge (loop);
  
    /* FIXME/TODO: For the multi-threaded variant, we could avoid this
       load altogether, since the store is predicated by a flag.  We
--- 2076,2085 ----
  
    rewrite_mem_refs (loop, ref, tmp_var);
  
!   /* Emit the load code on a random exit edge or into the latch if
!      the loop does not exit, so that we are sure it will be processed
!      by move_computations after all dependencies.  */
!   gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
  
    /* FIXME/TODO: For the multi-threaded variant, we could avoid this
       load altogether, since the store is predicated by a flag.  We
*************** execute_sm (struct loop *loop, vec<edge>
*** 2060,2066 ****
    lim_data = init_lim_data (load);
    lim_data->max_loop = loop;
    lim_data->tgt_loop = loop;
!   gsi_insert_on_edge (latch_edge, load);
  
    if (multi_threaded_model_p)
      {
--- 2088,2094 ----
    lim_data = init_lim_data (load);
    lim_data->max_loop = loop;
    lim_data->tgt_loop = loop;
!   gsi_insert_before (&gsi, load, GSI_SAME_STMT);
  
    if (multi_threaded_model_p)
      {
*************** execute_sm (struct loop *loop, vec<edge>
*** 2068,2074 ****
        lim_data = init_lim_data (load);
        lim_data->max_loop = loop;
        lim_data->tgt_loop = loop;
!       gsi_insert_on_edge (latch_edge, load);
      }
  
    /* Sink the store to every exit from the loop.  */
--- 2096,2102 ----
        lim_data = init_lim_data (load);
        lim_data->max_loop = loop;
        lim_data->tgt_loop = loop;
!       gsi_insert_before (&gsi, load, GSI_SAME_STMT);
      }
  
    /* Sink the store to every exit from the loop.  */
Index: gcc/testsuite/gcc.dg/torture/pr56756.c
===================================================================
*** gcc/testsuite/gcc.dg/torture/pr56756.c	(revision 0)
--- gcc/testsuite/gcc.dg/torture/pr56756.c	(working copy)
***************
*** 0 ****
--- 1,28 ----
+ /* { dg-do compile } */
+ 
+ int a, *b;
+ 
+ void f(void)
+ {
+   if(a)
+     {
+       int k;
+ 
+       for(a = 0; a < 1; a++)
+ 	{
+ 	  int **q;
+ 	  f();
+ 
+ 	  for(; **q; ++**q)
+ 	    lbl:
+ 		if(a)
+ 		  {
+ 		    a = 0;
+ 		    goto lbl;
+ 		  }
+ 
+ 	  b = &k;
+ 	}
+     }
+   goto lbl;
+ }

Patch

Index: gcc/tree-ssa-loop-im.c
===================================================================
--- gcc/tree-ssa-loop-im.c	(revision 197188)
+++ gcc/tree-ssa-loop-im.c	(working copy)
@@ -417,7 +418,7 @@  outermost_invariant_loop (tree def, stru
 {
   gimple def_stmt;
   basic_block def_bb;
-  struct loop *max_loop;
+  struct loop *def_loop;
   struct lim_aux_data *lim_data;
 
   if (!def)
@@ -434,17 +435,22 @@  outermost_invariant_loop (tree def, stru
   if (!def_bb)
     return superloop_at_depth (loop, 1);
 
-  max_loop = find_common_loop (loop, def_bb->loop_father);
-
+  def_loop = def_bb->loop_father;
   lim_data = get_lim_data (def_stmt);
-  if (lim_data != NULL && lim_data->max_loop != NULL)
-    max_loop = find_common_loop (max_loop,
-				 loop_outer (lim_data->max_loop));
-  if (max_loop == loop)
-    return NULL;
-  max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
+  if (lim_data == NULL
+      || lim_data->max_loop == NULL)
+    {
+      if (def_loop == loop)
+	return NULL;
+      if (!flow_loop_nested_p (def_loop, loop))
+	return NULL;
+      return superloop_at_depth (loop, loop_depth (def_loop) + 1);
+    }
 
-  return max_loop;
+  if (lim_data->max_loop != loop
+      && !flow_loop_nested_p (lim_data->max_loop, loop))
+    return NULL;
+  return lim_data->max_loop;
 }
 
 /* DATA is a structure containing information associated with a statement
@@ -466,7 +472,6 @@  add_dependency (tree def, struct lim_aux
   gimple def_stmt = SSA_NAME_DEF_STMT (def);
   basic_block def_bb = gimple_bb (def_stmt);
   struct loop *max_loop;
-  struct lim_aux_data *def_data;
 
   if (!def_bb)
     return true;
@@ -478,17 +483,13 @@  add_dependency (tree def, struct lim_aux
   if (flow_loop_nested_p (data->max_loop, max_loop))
     data->max_loop = max_loop;
 
-  def_data = get_lim_data (def_stmt);
-  if (!def_data)
-    return true;
-
   if (add_cost
       /* Only add the cost if the statement defining DEF is inside LOOP,
 	 i.e. if it is likely that by moving the invariants dependent
 	 on it, we will be able to avoid creating a new register for
 	 it (since it will be only used in these dependent invariants).  */
       && def_bb->loop_father == loop)
-    data->cost += def_data->cost;
+    data->cost += get_lim_data (def_stmt)->cost;
 
   data->depends.safe_push (def_stmt);
 
@@ -1174,18 +1175,20 @@  determine_invariantness (void)
    data stored in LIM_DATA structures associated with each statement.  Callback
    for walk_dominator_tree.  */
 
-static void
-move_computations_stmt (struct dom_walk_data *dw_data,
-			basic_block bb)
+static unsigned
+move_computations_bb (basic_block bb, sbitmap visited)
 {
   struct loop *level;
   gimple_stmt_iterator bsi;
   gimple stmt;
   unsigned cost = 0;
   struct lim_aux_data *lim_data;
+  unsigned todo = 0;
+  unsigned i;
+  gimple dep_stmt;
 
   if (!loop_outer (bb->loop_father))
-    return;
+    return 0;
 
   for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
     {
@@ -1201,14 +1204,25 @@  move_computations_stmt (struct dom_walk_
 
       cost = lim_data->cost;
       level = lim_data->tgt_loop;
-      clear_lim_data (stmt);
 
       if (!level)
 	{
+	  clear_lim_data (stmt);
 	  gsi_next (&bsi);
 	  continue;
 	}
 
+      /* Make sure to process blocks with stmts we depend on first.  */
+      FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
+	if (gimple_bb (dep_stmt)
+	    && !bitmap_bit_p (visited, gimple_bb (dep_stmt)->index))
+	  {
+	    bitmap_set_bit (visited, gimple_bb (dep_stmt)->index);
+	    todo |= move_computations_bb (gimple_bb (dep_stmt), visited);
+	  }
+
+      clear_lim_data (stmt);
+
       if (dump_file && (dump_flags & TDF_DETAILS))
 	{
 	  fprintf (dump_file, "Moving PHI node\n");
@@ -1240,7 +1254,7 @@  move_computations_stmt (struct dom_walk_
 						   gimple_phi_result (stmt),
 						   t, arg0, arg1);
 	  SSA_NAME_DEF_STMT (gimple_phi_result (stmt)) = new_stmt;
-	  *((unsigned int *)(dw_data->global_data)) |= TODO_cleanup_cfg;
+	  todo |= TODO_cleanup_cfg;
 	}
       gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
       remove_phi_node (&bsi, false);
@@ -1261,14 +1275,25 @@  move_computations_stmt (struct dom_walk_
 
       cost = lim_data->cost;
       level = lim_data->tgt_loop;
-      clear_lim_data (stmt);
 
       if (!level)
 	{
+	  clear_lim_data (stmt);
 	  gsi_next (&bsi);
 	  continue;
 	}
 
+      /* Make sure to process blocks with stmts we depend on first.  */
+      FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
+	if (gimple_bb (dep_stmt)
+	    && !bitmap_bit_p (visited, gimple_bb (dep_stmt)->index))
+	  {
+	    bitmap_set_bit (visited, gimple_bb (dep_stmt)->index);
+	    todo |= move_computations_bb (gimple_bb (dep_stmt), visited);
+	  }
+
+      clear_lim_data (stmt);
+
       /* We do not really want to move conditionals out of the loop; we just
 	 placed it here to force its operands to be moved if necessary.  */
       if (gimple_code (stmt) == GIMPLE_COND)
@@ -1303,6 +1328,8 @@  move_computations_stmt (struct dom_walk_
       gsi_remove (&bsi, false);
       gsi_insert_on_edge (e, stmt);
     }
+
+  return todo;
 }
 
 /* Hoist the statements out of the loops prescribed by data stored in
@@ -1311,17 +1338,19 @@  move_computations_stmt (struct dom_walk_
 static unsigned int
 move_computations (void)
 {
-  struct dom_walk_data walk_data;
-  unsigned int todo = 0;
+  basic_block bb;
+  unsigned todo = 0;
+  sbitmap visited = sbitmap_alloc (last_basic_block);
+  bitmap_clear (visited);
 
-  memset (&walk_data, 0, sizeof (struct dom_walk_data));
-  walk_data.global_data = &todo;
-  walk_data.dom_direction = CDI_DOMINATORS;
-  walk_data.before_dom_children = move_computations_stmt;
+  FOR_EACH_BB (bb)
+    if (!bitmap_bit_p (visited, bb->index))
+      {
+	bitmap_set_bit (visited, bb->index);
+	todo |= move_computations_bb (bb, visited);
+      }
 
-  init_walk_dominator_tree (&walk_data);
-  walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
-  fini_walk_dominator_tree (&walk_data);
+  sbitmap_free (visited);
 
   gsi_commit_edge_inserts ();
   if (need_ssa_update_p (cfun))
@@ -1365,7 +1394,8 @@  may_move_till (tree ref, tree *index, vo
    moved out of the LOOP.  ORIG_LOOP is the loop in that EXPR is used.  */
 
 static void
-force_move_till_op (tree op, struct loop *orig_loop, struct loop *loop)
+force_move_till_op (tree op, struct loop *orig_loop, struct loop *loop,
+		    struct lim_aux_data *lim_data)
 {
   gimple stmt;
 
@@ -1380,6 +1410,7 @@  force_move_till_op (tree op, struct loop
     return;
 
   set_level (stmt, orig_loop, loop);
+  lim_data->depends.safe_push (stmt);
 }
 
 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
@@ -1390,6 +1421,7 @@  struct fmt_data
 {
   struct loop *loop;
   struct loop *orig_loop;
+  struct lim_aux_data *lim_data;
 };
 
 static bool
@@ -1402,11 +1434,14 @@  force_move_till (tree ref, tree *index,
       tree step = TREE_OPERAND (ref, 3);
       tree lbound = TREE_OPERAND (ref, 2);
 
-      force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
-      force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
+      force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop,
+			  fmt_data->lim_data);
+      force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop,
+			  fmt_data->lim_data);
     }
 
-  force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
+  force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop,
+		      fmt_data->lim_data);
 
   return true;
 }
@@ -2036,10 +2071,6 @@  execute_sm (struct loop *loop, vec<edge>
   tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
 			    get_lsm_tmp_name (ref->mem.ref, ~0));
 
-  fmt_data.loop = loop;
-  fmt_data.orig_loop = loop;
-  for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
-
   if (block_in_transaction (loop_preheader_edge (loop)->src)
       || !PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES))
     multi_threaded_model_p = true;
@@ -2062,6 +2093,11 @@  execute_sm (struct loop *loop, vec<edge>
   lim_data->tgt_loop = loop;
   gsi_insert_on_edge (latch_edge, load);
 
+  fmt_data.loop = loop;
+  fmt_data.orig_loop = loop;
+  fmt_data.lim_data = lim_data;
+  for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
+
   if (multi_threaded_model_p)
     {
       load = gimple_build_assign (store_flag, boolean_false_node);
Index: gcc/testsuite/gcc.dg/torture/pr56756.c
===================================================================
--- gcc/testsuite/gcc.dg/torture/pr56756.c	(revision 0)
+++ gcc/testsuite/gcc.dg/torture/pr56756.c	(working copy)
@@ -0,0 +1,28 @@ 
+/* { dg-do compile } */
+
+int a, *b;
+
+void f(void)
+{
+  if(a)
+    {
+      int k;
+
+      for(a = 0; a < 1; a++)
+	{
+	  int **q;
+	  f();
+
+	  for(; **q; ++**q)
+	    lbl:
+		if(a)
+		  {
+		    a = 0;
+		    goto lbl;
+		  }
+
+	  b = &k;
+	}
+    }
+  goto lbl;
+}