Patchwork [1/4] Add flag -ftree-loop-if-convert-memory-writes.

login
register
mail settings
Submitter Sebastian Pop
Date Aug. 13, 2010, 9:55 p.m.
Message ID <AANLkTikcJOR40zBJLFrTUwzkWvL03aCZNy6UoFmY49iQ@mail.gmail.com>
Download mbox | patch
Permalink /patch/61720/
State New
Headers show

Comments

Sebastian Pop - Aug. 13, 2010, 9:55 p.m.
On Fri, Aug 13, 2010 at 03:54, Richard Guenther <rguenther@suse.de> wrote:
>> +static void
>> +predicate_mem_writes (loop_p loop)
>> +{
>> +  unsigned int i, orig_loop_num_nodes = loop->num_nodes;
>> +
>> +  for (i = 1; i < orig_loop_num_nodes; i++)
>> +    {
>> +      gimple_stmt_iterator gsi;
>> +      basic_block bb = ifc_bbs[i];
>> +      tree cond = bb_predicate (bb);
>> +
>> +      if (is_true_predicate (cond))
>> +     continue;
>> +
>> +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>
> So this is another loop over all loops and statements.  Why not
> do this loop once and dispatch to mem/scalar if-conversion there?
>
>> @@ -1178,7 +1270,10 @@ combine_blocks (struct loop *loop)
>>
>>    remove_conditions_and_labels (loop);
>>    insert_gimplified_predicates (loop);
>> -  ifconvert_phi_nodes (loop);
>> +  predicate_all_scalar_phis (loop);
>> +
>> +  if (flag_tree_loop_if_convert_memory_writes)
>> +    predicate_mem_writes (loop);
>
> As suggested above, predicate_all_scalar_phis and predicate_mem_writes
> should loop over all loops/bbs once.
>

The only thing that predicate_all_scalar_phis and predicate_mem_writes
have in common is that they iterate over all the basic blocks of an
innermost loop.

predicate_mem_writes iterates over all the statements of the BB.

predicate_all_scalar_phis iterates over all the phi nodes of the BB.

Should I go ahead and merge these two functions as asked?
I still find the implementation more clear in the separated form.

Please find attached a preliminary patch (passed compile and
make -k check RUNTESTFLAGS=tree-ssa.exp and vect.exp) on top of
the previous one, that shows the corrections for all your comments
(except this last point).  I will submit the combined patch +
corrections once it passes bootstrap and test.

Sebastian
Richard Guenther - Aug. 16, 2010, 9:46 a.m.
On Fri, 13 Aug 2010, Sebastian Pop wrote:

> On Fri, Aug 13, 2010 at 03:54, Richard Guenther <rguenther@suse.de> wrote:
> >> +static void
> >> +predicate_mem_writes (loop_p loop)
> >> +{
> >> +  unsigned int i, orig_loop_num_nodes = loop->num_nodes;
> >> +
> >> +  for (i = 1; i < orig_loop_num_nodes; i++)
> >> +    {
> >> +      gimple_stmt_iterator gsi;
> >> +      basic_block bb = ifc_bbs[i];
> >> +      tree cond = bb_predicate (bb);
> >> +
> >> +      if (is_true_predicate (cond))
> >> +     continue;
> >> +
> >> +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> >
> > So this is another loop over all loops and statements.  Why not
> > do this loop once and dispatch to mem/scalar if-conversion there?
> >
> >> @@ -1178,7 +1270,10 @@ combine_blocks (struct loop *loop)
> >>
> >>    remove_conditions_and_labels (loop);
> >>    insert_gimplified_predicates (loop);
> >> -  ifconvert_phi_nodes (loop);
> >> +  predicate_all_scalar_phis (loop);
> >> +
> >> +  if (flag_tree_loop_if_convert_memory_writes)
> >> +    predicate_mem_writes (loop);
> >
> > As suggested above, predicate_all_scalar_phis and predicate_mem_writes
> > should loop over all loops/bbs once.
> >
> 
> The only thing that predicate_all_scalar_phis and predicate_mem_writes
> have in common is that they iterate over all the basic blocks of an
> innermost loop.
> 
> predicate_mem_writes iterates over all the statements of the BB.
> 
> predicate_all_scalar_phis iterates over all the phi nodes of the BB.
> 
> Should I go ahead and merge these two functions as asked?
> I still find the implementation more clear in the separated form.

Yeah, thinking again it's ok as-is.

> Please find attached a preliminary patch (passed compile and
> make -k check RUNTESTFLAGS=tree-ssa.exp and vect.exp) on top of
> the previous one, that shows the corrections for all your comments
> (except this last point).  I will submit the combined patch +
> corrections once it passes bootstrap and test.

-                 || gimple_code (gsi_stmt (gsi)) == GIMPLE_COND)
+                 || gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
+                 || stmt_ends_bb_p (gsi_stmt (gsi)))

GIMPLE_CONDs already are handled in stmt_ends_bb_p, so the check
for GIMPLE_COND can be removed.

@@ -1628,7 +1716,7 @@ main_tree_if_conversion (void)
     changed |= tree_if_conversion (loop);

   if (changed && flag_tree_loop_if_convert_memory_writes)
-    update_ssa (TODO_update_ssa);
+    return TODO_update_ssa_only_virtuals;

   return changed ? TODO_cleanup_cfg : 0;

So you skip cfg_cleanup in that case.  I'd have done

  unsigned todo = 0;

...

   if (changed)
     todo |= TODO_cleanup_cfg;
   if (changed && flag_tree_loop_if_convert_memory_writes)
     todo |= TODO_update_ssa_only_virtuals;

   return todo;

I'll wait for a new combined patch and have a quick look at it.

Richard.

Patch

diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
index 51319d7..017c895 100644
--- a/gcc/tree-if-conv.c
+++ b/gcc/tree-if-conv.c
@@ -213,11 +213,13 @@  reset_bb_predicate (basic_block bb)
   init_bb_predicate (bb);
 }
 
-/* Create a new temp variable of type TYPE.  Add GIMPLE_ASSIGN to assign EXP
-   to the new variable.  */
+/* Returns a new SSA_NAME of type TYPE that is assigned the value of
+   the expression EXPR.  Inserts the statement created for this
+   computation before GSI and leaves the iterator GSI at the same
+   statement.  */
 
-static gimple
-ifc_temp_var (tree type, tree exp)
+static tree
+ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
 {
   const char *name = "_ifc_";
   tree var, new_name;
@@ -227,8 +229,8 @@  ifc_temp_var (tree type, tree exp)
   var = create_tmp_var (type, name);
   add_referenced_var (var);
 
-  /* Build new statement to assign EXP to new variable.  */
-  stmt = gimple_build_assign (var, exp);
+  /* Build new statement to assign EXPR to new variable.  */
+  stmt = gimple_build_assign (var, expr);
 
   /* Get SSA name for the new variable and set make new statement
      its definition statement.  */
@@ -237,7 +239,8 @@  ifc_temp_var (tree type, tree exp)
   SSA_NAME_DEF_STMT (new_name) = stmt;
   update_stmt (stmt);
 
-  return stmt;
+  gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+  return gimple_assign_lhs (stmt);
 }
 
 /* Return true when COND is a true predicate.  */
@@ -1205,13 +1208,7 @@  find_phi_replacement_condition (struct loop *loop,
 				    false, NULL_TREE,
 				    true, GSI_SAME_STMT);
   if (!is_gimple_reg (*cond) && !is_gimple_condexpr (*cond))
-    {
-      gimple new_stmt;
-
-      new_stmt = ifc_temp_var (TREE_TYPE (*cond), unshare_expr (*cond));
-      gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
-      *cond = gimple_assign_lhs (new_stmt);
-    }
+    *cond = ifc_temp_var (TREE_TYPE (*cond), unshare_expr (*cond), gsi);
 
   gcc_assert (*cond);
 
@@ -1368,7 +1365,8 @@  insert_gimplified_predicates (loop_p loop)
 	      gimple_stmt_iterator gsi = gsi_last_bb (bb);
 
 	      if (gsi_end_p (gsi)
-		  || gimple_code (gsi_stmt (gsi)) == GIMPLE_COND)
+		  || gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
+		  || stmt_ends_bb_p (gsi_stmt (gsi)))
 		gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
 	      else
 		gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
@@ -1382,9 +1380,110 @@  insert_gimplified_predicates (loop_p loop)
 
 /* Predicate each write to memory in LOOP.
 
-   Replace a statement "A[i] = foo" with "A[i] = cond ? foo : A[i]"
-   with the condition COND determined from the predicate of the basic
-   block containing the statement.  */
+   This function transforms control flow constructs containing memory
+   writes of the form:
+
+   | for (i = 0; i < N; i++)
+   |   if (cond)
+   |     A[i] = expr;
+
+   into the following form that does not contain control flow:
+
+   | for (i = 0; i < N; i++)
+   |   A[i] = cond ? expr : A[i];
+
+   The original CFG looks like this:
+
+   | bb_0
+   |   i = 0
+   | end_bb_0
+   |
+   | bb_1
+   |   if (i < N) goto bb_5 else goto bb_2
+   | end_bb_1
+   |
+   | bb_2
+   |   cond = some_computation;
+   |   if (cond) goto bb_3 else goto bb_4
+   | end_bb_2
+   |
+   | bb_3
+   |   A[i] = expr;
+   |   goto bb_4
+   | end_bb_3
+   |
+   | bb_4
+   |   goto bb_1
+   | end_bb_4
+
+   insert_gimplified_predicates inserts the computation of the COND
+   expression at the beginning of the destination basic block:
+
+   | bb_0
+   |   i = 0
+   | end_bb_0
+   |
+   | bb_1
+   |   if (i < N) goto bb_5 else goto bb_2
+   | end_bb_1
+   |
+   | bb_2
+   |   cond = some_computation;
+   |   if (cond) goto bb_3 else goto bb_4
+   | end_bb_2
+   |
+   | bb_3
+   |   cond = some_computation;
+   |   A[i] = expr;
+   |   goto bb_4
+   | end_bb_3
+   |
+   | bb_4
+   |   goto bb_1
+   | end_bb_4
+
+   predicate_mem_writes is then predicating the memory write as follows:
+
+   | bb_0
+   |   i = 0
+   | end_bb_0
+   |
+   | bb_1
+   |   if (i < N) goto bb_5 else goto bb_2
+   | end_bb_1
+   |
+   | bb_2
+   |   if (cond) goto bb_3 else goto bb_4
+   | end_bb_2
+   |
+   | bb_3
+   |   cond = some_computation;
+   |   A[i] = cond ? expr : A[i];
+   |   goto bb_4
+   | end_bb_3
+   |
+   | bb_4
+   |   goto bb_1
+   | end_bb_4
+
+   and finally combine_blocks removes the basic block boundaries making
+   the loop vectorizable:
+
+   | bb_0
+   |   i = 0
+   |   if (i < N) goto bb_5 else goto bb_1
+   | end_bb_0
+   |
+   | bb_1
+   |   cond = some_computation;
+   |   A[i] = cond ? expr : A[i];
+   |   if (i < N) goto bb_5 else goto bb_4
+   | end_bb_1
+   |
+   | bb_4
+   |   goto bb_1
+   | end_bb_4
+*/
 
 static void
 predicate_mem_writes (loop_p loop)
@@ -1396,35 +1495,24 @@  predicate_mem_writes (loop_p loop)
       gimple_stmt_iterator gsi;
       basic_block bb = ifc_bbs[i];
       tree cond = bb_predicate (bb);
+      gimple stmt;
 
       if (is_true_predicate (cond))
 	continue;
 
       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-	if (is_gimple_assign (gsi_stmt (gsi))
-	    && gimple_vdef (gsi_stmt (gsi)))
+	if ((stmt = gsi_stmt (gsi))
+	    && gimple_assign_single_p (stmt)
+	    && gimple_vdef (stmt))
 	  {
-	    gimple new_stmt, lhs_stmt, rhs_stmt;
-	    gimple stmt = gsi_stmt (gsi);
-	    tree lhs = gimple_get_lhs (stmt);
-	    tree rhs = gimple_op (stmt, 1);
-
-	    gcc_assert (gimple_num_ops (stmt) == 2);
-
-	    rhs_stmt = ifc_temp_var (TREE_TYPE (rhs), unshare_expr (rhs));
-	    gsi_insert_before (&gsi, rhs_stmt, GSI_SAME_STMT);
-	    rhs = gimple_get_lhs (rhs_stmt);
-
-	    lhs_stmt = ifc_temp_var (TREE_TYPE (lhs), unshare_expr (lhs));
-	    gsi_insert_before (&gsi, lhs_stmt, GSI_SAME_STMT);
-	    lhs = gimple_get_lhs (lhs_stmt);
-
-	    cond = unshare_expr (cond);
-	    rhs = build3 (COND_EXPR, TREE_TYPE (lhs), cond, rhs, lhs);
-
-	    new_stmt = ifc_temp_var (TREE_TYPE (lhs), rhs);
-	    gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
-	    gimple_set_op (stmt, 1, gimple_get_lhs (new_stmt));
+	    tree lhs = gimple_assign_lhs (stmt);
+	    tree rhs = gimple_assign_rhs1 (stmt);
+	    tree type = TREE_TYPE (lhs);
+
+	    lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
+	    rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
+	    rhs = build3 (COND_EXPR, type, unshare_expr (cond), rhs, lhs);
+	    gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
 	    update_stmt (stmt);
 	  }
     }
@@ -1628,7 +1716,7 @@  main_tree_if_conversion (void)
     changed |= tree_if_conversion (loop);
 
   if (changed && flag_tree_loop_if_convert_memory_writes)
-    update_ssa (TODO_update_ssa);
+    return TODO_update_ssa_only_virtuals;
 
   return changed ? TODO_cleanup_cfg : 0;
 }