diff mbox

Minimize downward code motion during reassociation

Message ID CAPK5YPYUsvoskSJTfqYn+FPyTY0qQqzSJQ05B4dAzt0oefn-Ug@mail.gmail.com
State New
Headers show

Commit Message

Easwaran Raman Oct. 18, 2012, 10:36 p.m. UTC
Hi,

During expression reassociation, statements are conservatively moved
downwards to ensure that dependences are correctly satisfied after
reassocation. This could lead to lengthening of live ranges. This
patch moves statements only to the extent necessary. Bootstraps and no
test regression on x86_64/linux. OK for trunk?

Thanks,
Easwaran

2012-10-18   Easwaran Raman  <eraman@google.com>
* tree-ssa-reassoc.c(assign_uids): New function.
(assign_uids_in_relevant_bbs): Likewise.
(ensure_ops_are_available): Likewise.
(rewrite_expr_tree): Do not move statements beyond what is
necessary. Remove call to swap_ops_for_binary_stmt...
(reassociate_bb): ... and move it here.



 /* Recursively rewrite our linearized statements so that the operators
    match those in OPS[OPINDEX], putting the computation in rank
    order.  */
@@ -2262,11 +2384,6 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
   tree rhs2 = gimple_assign_rhs2 (stmt);
   operand_entry_t oe;

-  /* If we have three operands left, then we want to make sure the ones
-     that get the double binary op are chosen wisely.  */
-  if (opindex + 3 == VEC_length (operand_entry_t, ops))
-    swap_ops_for_binary_stmt (ops, opindex, stmt);
-
   /* The final recursion case for this function is that you have
      exactly two operations left.
      If we had one exactly one op in the entire list to start with, we
@@ -2312,19 +2429,17 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
     {
       if (!moved)
  {
-  gimple_stmt_iterator gsinow, gsirhs1;
-  gimple stmt1 = stmt, stmt2;
-  unsigned int count;
+  gimple stmt1 = stmt;
+  unsigned int count, i = 1;

-  gsinow = gsi_for_stmt (stmt);
   count = VEC_length (operand_entry_t, ops) - opindex - 2;
-  while (count-- != 0)
+  while (i <= count)
     {
-      stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
-      gsirhs1 = gsi_for_stmt (stmt2);
-      gsi_move_before (&gsirhs1, &gsinow);
-      gsi_prev (&gsinow);
-      stmt1 = stmt2;
+      stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
+              /* Ensure that STMT1 is moved to a place where all operands in
+                 OPS[opindex + i...] are available.  */
+              ensure_ops_are_available (stmt1, ops, opindex + i);
+              i++;
     }
   moved = true;
  }
@@ -3542,8 +3657,18 @@ reassociate_bb (basic_block bb)
       && VEC_length (operand_entry_t, ops) > 3)
     rewrite_expr_tree_parallel (stmt, width, ops);
   else
-    rewrite_expr_tree (stmt, 0, ops, false);
+                    {
+                      /* When there are three operands left, we want
+                         to make sure the ones that get the double
+                         binary op are chosen wisely.  */
+                      int len = VEC_length (operand_entry_t, ops);
+                      if (len >= 3)
+                        swap_ops_for_binary_stmt (ops, len - 3, stmt);

+                      assign_uids_in_relevant_bbs (ops);
+      rewrite_expr_tree (stmt, 0, ops, false);
+                    }
+
   /* If we combined some repeated factors into a
      __builtin_powi call, multiply that result by the
      reassociated operands.  */

Comments

Xinliang David Li Oct. 20, 2012, 12:13 a.m. UTC | #1
On Thu, Oct 18, 2012 at 3:36 PM, Easwaran Raman <eraman@google.com> wrote:
> Hi,
>
> During expression reassociation, statements are conservatively moved
> downwards to ensure that dependences are correctly satisfied after
> reassocation. This could lead to lengthening of live ranges. This
> patch moves statements only to the extent necessary. Bootstraps and no
> test regression on x86_64/linux. OK for trunk?
>
> Thanks,
> Easwaran
>
> 2012-10-18   Easwaran Raman  <eraman@google.com>
> * tree-ssa-reassoc.c(assign_uids): New function.
> (assign_uids_in_relevant_bbs): Likewise.
> (ensure_ops_are_available): Likewise.
> (rewrite_expr_tree): Do not move statements beyond what is
> necessary. Remove call to swap_ops_for_binary_stmt...
> (reassociate_bb): ... and move it here.
>
>
>
> Index: gcc/tree-ssa-reassoc.c
> ===================================================================
> --- gcc/tree-ssa-reassoc.c (revision 192487)
> +++ gcc/tree-ssa-reassoc.c (working copy)
> @@ -2250,6 +2250,128 @@ swap_ops_for_binary_stmt (VEC(operand_entry_t, hea
>      }
>  }
>
> +/* Assign UIDs to statements in basic block BB.  */
> +
> +static void
> +assign_uids (basic_block bb)
> +{
> +  unsigned uid = 0;
> +  gimple_stmt_iterator gsi;
> +  /* First assign uids to phis.  */
> +  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> +    {
> +      gimple stmt = gsi_stmt (gsi);
> +      gimple_set_uid (stmt, uid++);
> +    }
> +
> +  /* Then assign uids to stmts.  */
> +  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> +    {
> +      gimple stmt = gsi_stmt (gsi);
> +      gimple_set_uid (stmt, uid++);
> +    }
> +}
> +
> +/* For each operand in OPS, find the basic block that contains the statement
> +   which defines the operand. For all such basic blocks, assign UIDs.  */
> +
> +static void
> +assign_uids_in_relevant_bbs (VEC(operand_entry_t, heap) * ops)
> +{
> +  operand_entry_t oe;
> +  int i;
> +  struct pointer_set_t *seen_bbs = pointer_set_create ();
> +
> +  for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++)
> +    {
> +      gimple def_stmt;
> +      basic_block bb;
> +      if (TREE_CODE (oe->op) != SSA_NAME)
> +        continue;
> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
> +      bb = gimple_bb (def_stmt);
> +      if (!pointer_set_contains (seen_bbs, bb))
> +        {
> +          assign_uids (bb);
> +          pointer_set_insert (seen_bbs, bb);
> +        }
> +    }
> +  pointer_set_destroy (seen_bbs);
> +}
> +
> +/* Ensure that operands in the OPS vector starting from OPINDEXth
> entry are live
> +   at STMT.  This is accomplished by moving STMT if needed.  */
> +
> +static void
> +ensure_ops_are_available (gimple stmt, VEC(operand_entry_t, heap) *
> ops, int opindex)

format issue?

> +{
> +  int i;
> +  int len = VEC_length (operand_entry_t, ops);
> +  gimple insert_stmt = stmt;
> +  basic_block insert_bb = gimple_bb (stmt);
> +  gimple_stmt_iterator gsi_insert, gsistmt;
> +  for (i = opindex; i < len; i++)
> +    {
> +      operand_entry_t oe = VEC_index (operand_entry_t, ops, i);
> +      gimple def_stmt;
> +      basic_block def_bb;
> +      /* Ignore constants and operands with default definitons.  */
> +      if (TREE_CODE (oe->op) != SSA_NAME
> +          || SSA_NAME_IS_DEFAULT_DEF (oe->op))
> +        continue;
> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
> +      def_bb = gimple_bb (def_stmt);
> +      if (def_bb != insert_bb
> +          && !dominated_by_p (CDI_DOMINATORS, insert_bb, def_bb))
> +        {
> +          insert_bb = def_bb;
> +          insert_stmt = def_stmt;
> +        }
> +      else if (def_bb == insert_bb
> +               && gimple_uid (insert_stmt) < gimple_uid (def_stmt))
> +        insert_stmt = def_stmt;
> +    }
> +  if (insert_stmt == stmt)
> +    return;
> +  gsistmt = gsi_for_stmt (stmt);
> +  /* If GSI_STMT is a phi node, then do not insert just after that statement.

GSI_STMT --> INSERT_STMT?

> +     Instead, find the first non-label gimple statement in BB and insert before
> +     that.  */
> +  if (gimple_code (insert_stmt) == GIMPLE_PHI)
> +    {
> +      gsi_insert = gsi_after_labels (insert_bb);
> +      gsi_move_before (&gsistmt, &gsi_insert);
> +    }
> +  /* Statements marked for throw can not be in the middle of a basic block. So
> +     we can not insert a statement (not marked for throw) immediately
> after.  */
> +  else if (lookup_stmt_eh_lp (insert_stmt) > 0
> +           && stmt_can_throw_internal (insert_stmt))




> +    {
> +      edge e, succ_edge = NULL;
> +      edge_iterator ei;
> +
> +      /* There should be exactly one normal edge out of the basic block.  */
> +      FOR_EACH_EDGE (e, ei, insert_bb->succs)
> +        {
> +          if (!(e->flags & EDGE_COMPLEX))
> +            {
> +              gcc_assert (succ_edge == NULL);
> +              succ_edge = e;
> +            }
> +        }
> +      /* Insert STMT at the beginning of the successor basic block.  */
> +      insert_bb = succ_edge->dest;
> +      gsi_insert = gsi_after_labels (insert_bb);
> +      gsi_move_before (&gsistmt, &gsi_insert);
> +    }
> +  else
> +    {
> +      gsi_insert = gsi_for_stmt (insert_stmt);
> +      gsi_move_after (&gsistmt, &gsi_insert);
> +    }
> +
> +}
> +
>  /* Recursively rewrite our linearized statements so that the operators
>     match those in OPS[OPINDEX], putting the computation in rank
>     order.  */
> @@ -2262,11 +2384,6 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>    tree rhs2 = gimple_assign_rhs2 (stmt);
>    operand_entry_t oe;
>
> -  /* If we have three operands left, then we want to make sure the ones
> -     that get the double binary op are chosen wisely.  */
> -  if (opindex + 3 == VEC_length (operand_entry_t, ops))
> -    swap_ops_for_binary_stmt (ops, opindex, stmt);
> -

Why moving the swap down ?


>    /* The final recursion case for this function is that you have
>       exactly two operations left.
>       If we had one exactly one op in the entire list to start with, we
> @@ -2312,19 +2429,17 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>      {
>        if (!moved)
>   {
> -  gimple_stmt_iterator gsinow, gsirhs1;
> -  gimple stmt1 = stmt, stmt2;
> -  unsigned int count;
> +  gimple stmt1 = stmt;
> +  unsigned int count, i = 1;
>
> -  gsinow = gsi_for_stmt (stmt);
>    count = VEC_length (operand_entry_t, ops) - opindex - 2;
> -  while (count-- != 0)
> +  while (i <= count)
>      {
> -      stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
> -      gsirhs1 = gsi_for_stmt (stmt2);
> -      gsi_move_before (&gsirhs1, &gsinow);
> -      gsi_prev (&gsinow);
> -      stmt1 = stmt2;
> +      stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
> +              /* Ensure that STMT1 is moved to a place where all operands in
> +                 OPS[opindex + i...] are available.  */
> +              ensure_ops_are_available (stmt1, ops, opindex + i);
> +              i++;
>      }
>    moved = true;
>   }
> @@ -3542,8 +3657,18 @@ reassociate_bb (basic_block bb)
>        && VEC_length (operand_entry_t, ops) > 3)
>      rewrite_expr_tree_parallel (stmt, width, ops);
>    else
> -    rewrite_expr_tree (stmt, 0, ops, false);
> +                    {
> +                      /* When there are three operands left, we want
> +                         to make sure the ones that get the double
> +                         binary op are chosen wisely.  */
> +                      int len = VEC_length (operand_entry_t, ops);
> +                      if (len >= 3)
> +                        swap_ops_for_binary_stmt (ops, len - 3, stmt);
>
> +                      assign_uids_in_relevant_bbs (ops);
> +      rewrite_expr_tree (stmt, 0, ops, false);
> +                    }

Fix indentation.


David
> +
>    /* If we combined some repeated factors into a
>       __builtin_powi call, multiply that result by the
>       reassociated operands.  */
Easwaran Raman Oct. 20, 2012, 1:16 a.m. UTC | #2
On Fri, Oct 19, 2012 at 5:13 PM, Xinliang David Li <davidxl@google.com> wrote:
> On Thu, Oct 18, 2012 at 3:36 PM, Easwaran Raman <eraman@google.com> wrote:
>> Hi,
>>
>> During expression reassociation, statements are conservatively moved
>> downwards to ensure that dependences are correctly satisfied after
>> reassocation. This could lead to lengthening of live ranges. This
>> patch moves statements only to the extent necessary. Bootstraps and no
>> test regression on x86_64/linux. OK for trunk?
>>
>> Thanks,
>> Easwaran
>>
>> 2012-10-18   Easwaran Raman  <eraman@google.com>
>> * tree-ssa-reassoc.c(assign_uids): New function.
>> (assign_uids_in_relevant_bbs): Likewise.
>> (ensure_ops_are_available): Likewise.
>> (rewrite_expr_tree): Do not move statements beyond what is
>> necessary. Remove call to swap_ops_for_binary_stmt...
>> (reassociate_bb): ... and move it here.
>>
>>
>>
>> Index: gcc/tree-ssa-reassoc.c
>> ===================================================================
>> --- gcc/tree-ssa-reassoc.c (revision 192487)
>> +++ gcc/tree-ssa-reassoc.c (working copy)
>> @@ -2250,6 +2250,128 @@ swap_ops_for_binary_stmt (VEC(operand_entry_t, hea
>>      }
>>  }
>>
>> +/* Assign UIDs to statements in basic block BB.  */
>> +
>> +static void
>> +assign_uids (basic_block bb)
>> +{
>> +  unsigned uid = 0;
>> +  gimple_stmt_iterator gsi;
>> +  /* First assign uids to phis.  */
>> +  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>> +    {
>> +      gimple stmt = gsi_stmt (gsi);
>> +      gimple_set_uid (stmt, uid++);
>> +    }
>> +
>> +  /* Then assign uids to stmts.  */
>> +  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>> +    {
>> +      gimple stmt = gsi_stmt (gsi);
>> +      gimple_set_uid (stmt, uid++);
>> +    }
>> +}
>> +
>> +/* For each operand in OPS, find the basic block that contains the statement
>> +   which defines the operand. For all such basic blocks, assign UIDs.  */
>> +
>> +static void
>> +assign_uids_in_relevant_bbs (VEC(operand_entry_t, heap) * ops)
>> +{
>> +  operand_entry_t oe;
>> +  int i;
>> +  struct pointer_set_t *seen_bbs = pointer_set_create ();
>> +
>> +  for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++)
>> +    {
>> +      gimple def_stmt;
>> +      basic_block bb;
>> +      if (TREE_CODE (oe->op) != SSA_NAME)
>> +        continue;
>> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
>> +      bb = gimple_bb (def_stmt);
>> +      if (!pointer_set_contains (seen_bbs, bb))
>> +        {
>> +          assign_uids (bb);
>> +          pointer_set_insert (seen_bbs, bb);
>> +        }
>> +    }
>> +  pointer_set_destroy (seen_bbs);
>> +}
>> +
>> +/* Ensure that operands in the OPS vector starting from OPINDEXth
>> entry are live
>> +   at STMT.  This is accomplished by moving STMT if needed.  */
>> +
>> +static void
>> +ensure_ops_are_available (gimple stmt, VEC(operand_entry_t, heap) *
>> ops, int opindex)
>
> format issue?
>
>> +{
>> +  int i;
>> +  int len = VEC_length (operand_entry_t, ops);
>> +  gimple insert_stmt = stmt;
>> +  basic_block insert_bb = gimple_bb (stmt);
>> +  gimple_stmt_iterator gsi_insert, gsistmt;
>> +  for (i = opindex; i < len; i++)
>> +    {
>> +      operand_entry_t oe = VEC_index (operand_entry_t, ops, i);
>> +      gimple def_stmt;
>> +      basic_block def_bb;
>> +      /* Ignore constants and operands with default definitons.  */
>> +      if (TREE_CODE (oe->op) != SSA_NAME
>> +          || SSA_NAME_IS_DEFAULT_DEF (oe->op))
>> +        continue;
>> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
>> +      def_bb = gimple_bb (def_stmt);
>> +      if (def_bb != insert_bb
>> +          && !dominated_by_p (CDI_DOMINATORS, insert_bb, def_bb))
>> +        {
>> +          insert_bb = def_bb;
>> +          insert_stmt = def_stmt;
>> +        }
>> +      else if (def_bb == insert_bb
>> +               && gimple_uid (insert_stmt) < gimple_uid (def_stmt))
>> +        insert_stmt = def_stmt;
>> +    }
>> +  if (insert_stmt == stmt)
>> +    return;
>> +  gsistmt = gsi_for_stmt (stmt);
>> +  /* If GSI_STMT is a phi node, then do not insert just after that statement.
>
> GSI_STMT --> INSERT_STMT?

Ok.
>> +     Instead, find the first non-label gimple statement in BB and insert before
>> +     that.  */
>> +  if (gimple_code (insert_stmt) == GIMPLE_PHI)
>> +    {
>> +      gsi_insert = gsi_after_labels (insert_bb);
>> +      gsi_move_before (&gsistmt, &gsi_insert);
>> +    }
>> +  /* Statements marked for throw can not be in the middle of a basic block. So
>> +     we can not insert a statement (not marked for throw) immediately
>> after.  */
>> +  else if (lookup_stmt_eh_lp (insert_stmt) > 0
>> +           && stmt_can_throw_internal (insert_stmt))
>
>
>
>
>> +    {
>> +      edge e, succ_edge = NULL;
>> +      edge_iterator ei;
>> +
>> +      /* There should be exactly one normal edge out of the basic block.  */
>> +      FOR_EACH_EDGE (e, ei, insert_bb->succs)
>> +        {
>> +          if (!(e->flags & EDGE_COMPLEX))
>> +            {
>> +              gcc_assert (succ_edge == NULL);
>> +              succ_edge = e;
>> +            }
>> +        }
>> +      /* Insert STMT at the beginning of the successor basic block.  */
>> +      insert_bb = succ_edge->dest;
>> +      gsi_insert = gsi_after_labels (insert_bb);
>> +      gsi_move_before (&gsistmt, &gsi_insert);
>> +    }
>> +  else
>> +    {
>> +      gsi_insert = gsi_for_stmt (insert_stmt);
>> +      gsi_move_after (&gsistmt, &gsi_insert);
>> +    }
>> +
>> +}
>> +
>>  /* Recursively rewrite our linearized statements so that the operators
>>     match those in OPS[OPINDEX], putting the computation in rank
>>     order.  */
>> @@ -2262,11 +2384,6 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>    tree rhs2 = gimple_assign_rhs2 (stmt);
>>    operand_entry_t oe;
>>
>> -  /* If we have three operands left, then we want to make sure the ones
>> -     that get the double binary op are chosen wisely.  */
>> -  if (opindex + 3 == VEC_length (operand_entry_t, ops))
>> -    swap_ops_for_binary_stmt (ops, opindex, stmt);
>> -
>
> Why moving the swap down ?
Otherwise, when I call ensure_ops_are_available on the leaf statement
(last statement in the chain), I might end up using the wrong
operands. ensure_ops_are_available makes sure that the last stmt comes
after ops[len-2] and ops[len-1]. If I call swap_ops_for_binary_stmt
after this, we might have the wrong deps.

>
>>    /* The final recursion case for this function is that you have
>>       exactly two operations left.
>>       If we had one exactly one op in the entire list to start with, we
>> @@ -2312,19 +2429,17 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>      {
>>        if (!moved)
>>   {
>> -  gimple_stmt_iterator gsinow, gsirhs1;
>> -  gimple stmt1 = stmt, stmt2;
>> -  unsigned int count;
>> +  gimple stmt1 = stmt;
>> +  unsigned int count, i = 1;
>>
>> -  gsinow = gsi_for_stmt (stmt);
>>    count = VEC_length (operand_entry_t, ops) - opindex - 2;
>> -  while (count-- != 0)
>> +  while (i <= count)
>>      {
>> -      stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>> -      gsirhs1 = gsi_for_stmt (stmt2);
>> -      gsi_move_before (&gsirhs1, &gsinow);
>> -      gsi_prev (&gsinow);
>> -      stmt1 = stmt2;
>> +      stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>> +              /* Ensure that STMT1 is moved to a place where all operands in
>> +                 OPS[opindex + i...] are available.  */
>> +              ensure_ops_are_available (stmt1, ops, opindex + i);
>> +              i++;
>>      }
>>    moved = true;
>>   }
>> @@ -3542,8 +3657,18 @@ reassociate_bb (basic_block bb)
>>        && VEC_length (operand_entry_t, ops) > 3)
>>      rewrite_expr_tree_parallel (stmt, width, ops);
>>    else
>> -    rewrite_expr_tree (stmt, 0, ops, false);
>> +                    {
>> +                      /* When there are three operands left, we want
>> +                         to make sure the ones that get the double
>> +                         binary op are chosen wisely.  */
>> +                      int len = VEC_length (operand_entry_t, ops);
>> +                      if (len >= 3)
>> +                        swap_ops_for_binary_stmt (ops, len - 3, stmt);
>>
>> +                      assign_uids_in_relevant_bbs (ops);
>> +      rewrite_expr_tree (stmt, 0, ops, false);
>> +                    }
>
> Fix indentation.
>
Ok.

- Easwaran
> David
>> +
>>    /* If we combined some repeated factors into a
>>       __builtin_powi call, multiply that result by the
>>       reassociated operands.  */
Richard Biener Oct. 22, 2012, 7:59 a.m. UTC | #3
On Fri, Oct 19, 2012 at 12:36 AM, Easwaran Raman <eraman@google.com> wrote:
> Hi,
>
> During expression reassociation, statements are conservatively moved
> downwards to ensure that dependences are correctly satisfied after
> reassocation. This could lead to lengthening of live ranges. This
> patch moves statements only to the extent necessary. Bootstraps and no
> test regression on x86_64/linux. OK for trunk?
>
> Thanks,
> Easwaran
>
> 2012-10-18   Easwaran Raman  <eraman@google.com>
> * tree-ssa-reassoc.c(assign_uids): New function.
> (assign_uids_in_relevant_bbs): Likewise.
> (ensure_ops_are_available): Likewise.
> (rewrite_expr_tree): Do not move statements beyond what is
> necessary. Remove call to swap_ops_for_binary_stmt...
> (reassociate_bb): ... and move it here.
>
>
>
> Index: gcc/tree-ssa-reassoc.c
> ===================================================================
> --- gcc/tree-ssa-reassoc.c (revision 192487)
> +++ gcc/tree-ssa-reassoc.c (working copy)
> @@ -2250,6 +2250,128 @@ swap_ops_for_binary_stmt (VEC(operand_entry_t, hea
>      }
>  }
>
> +/* Assign UIDs to statements in basic block BB.  */
> +
> +static void
> +assign_uids (basic_block bb)
> +{
> +  unsigned uid = 0;
> +  gimple_stmt_iterator gsi;
> +  /* First assign uids to phis.  */
> +  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> +    {
> +      gimple stmt = gsi_stmt (gsi);
> +      gimple_set_uid (stmt, uid++);
> +    }
> +
> +  /* Then assign uids to stmts.  */
> +  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> +    {
> +      gimple stmt = gsi_stmt (gsi);
> +      gimple_set_uid (stmt, uid++);
> +    }
> +}
> +
> +/* For each operand in OPS, find the basic block that contains the statement
> +   which defines the operand. For all such basic blocks, assign UIDs.  */
> +
> +static void
> +assign_uids_in_relevant_bbs (VEC(operand_entry_t, heap) * ops)
> +{
> +  operand_entry_t oe;
> +  int i;
> +  struct pointer_set_t *seen_bbs = pointer_set_create ();
> +
> +  for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++)
> +    {
> +      gimple def_stmt;
> +      basic_block bb;
> +      if (TREE_CODE (oe->op) != SSA_NAME)
> +        continue;
> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
> +      bb = gimple_bb (def_stmt);
> +      if (!pointer_set_contains (seen_bbs, bb))
> +        {
> +          assign_uids (bb);
> +          pointer_set_insert (seen_bbs, bb);
> +        }
> +    }
> +  pointer_set_destroy (seen_bbs);
> +}

Please assign UIDs once using the existing renumber_gimple_stmt_uids ().
You seem to call the above multiple times and thus do work bigger than
O(number of basic blocks).

> +/* Ensure that operands in the OPS vector starting from OPINDEXth
> entry are live
> +   at STMT.  This is accomplished by moving STMT if needed.  */
> +
> +static void
> +ensure_ops_are_available (gimple stmt, VEC(operand_entry_t, heap) *
> ops, int opindex)
> +{
> +  int i;
> +  int len = VEC_length (operand_entry_t, ops);
> +  gimple insert_stmt = stmt;
> +  basic_block insert_bb = gimple_bb (stmt);
> +  gimple_stmt_iterator gsi_insert, gsistmt;
> +  for (i = opindex; i < len; i++)
> +    {

Likewise you call this for each call to rewrite_expr_tree, so it seems to me
this is quadratic in the number of ops in the op vector.

Why make this all so complicated?  It seems to me that we should
fixup stmt order only after the whole ops vector has been materialized.

> +      operand_entry_t oe = VEC_index (operand_entry_t, ops, i);
> +      gimple def_stmt;
> +      basic_block def_bb;
> +      /* Ignore constants and operands with default definitons.  */
> +      if (TREE_CODE (oe->op) != SSA_NAME
> +          || SSA_NAME_IS_DEFAULT_DEF (oe->op))
> +        continue;
> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
> +      def_bb = gimple_bb (def_stmt);
> +      if (def_bb != insert_bb
> +          && !dominated_by_p (CDI_DOMINATORS, insert_bb, def_bb))
> +        {
> +          insert_bb = def_bb;
> +          insert_stmt = def_stmt;
> +        }
> +      else if (def_bb == insert_bb
> +               && gimple_uid (insert_stmt) < gimple_uid (def_stmt))
> +        insert_stmt = def_stmt;
> +    }
> +  if (insert_stmt == stmt)
> +    return;
> +  gsistmt = gsi_for_stmt (stmt);
> +  /* If GSI_STMT is a phi node, then do not insert just after that statement.
> +     Instead, find the first non-label gimple statement in BB and insert before
> +     that.  */
> +  if (gimple_code (insert_stmt) == GIMPLE_PHI)
> +    {
> +      gsi_insert = gsi_after_labels (insert_bb);
> +      gsi_move_before (&gsistmt, &gsi_insert);
> +    }
> +  /* Statements marked for throw can not be in the middle of a basic block. So
> +     we can not insert a statement (not marked for throw) immediately
> after.  */
> +  else if (lookup_stmt_eh_lp (insert_stmt) > 0

that's already performed by stmt_can_throw_internal

> +           && stmt_can_throw_internal (insert_stmt))

But all this should be a non-issue as re-assoc should never assign an ops
vector entry for such stmts (but it could have leafs defined by such stmts).
If you only ever move definitions down, like the present code does caring
about leafs should not be necessary.

Richard.

> +    {
> +      edge e, succ_edge = NULL;
> +      edge_iterator ei;
> +
> +      /* There should be exactly one normal edge out of the basic block.  */
> +      FOR_EACH_EDGE (e, ei, insert_bb->succs)
> +        {
> +          if (!(e->flags & EDGE_COMPLEX))
> +            {
> +              gcc_assert (succ_edge == NULL);
> +              succ_edge = e;
> +            }
> +        }
> +      /* Insert STMT at the beginning of the successor basic block.  */
> +      insert_bb = succ_edge->dest;
> +      gsi_insert = gsi_after_labels (insert_bb);
> +      gsi_move_before (&gsistmt, &gsi_insert);
> +    }
> +  else
> +    {
> +      gsi_insert = gsi_for_stmt (insert_stmt);
> +      gsi_move_after (&gsistmt, &gsi_insert);
> +    }
> +
> +}
> +
>  /* Recursively rewrite our linearized statements so that the operators
>     match those in OPS[OPINDEX], putting the computation in rank
>     order.  */
> @@ -2262,11 +2384,6 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>    tree rhs2 = gimple_assign_rhs2 (stmt);
>    operand_entry_t oe;
>
> -  /* If we have three operands left, then we want to make sure the ones
> -     that get the double binary op are chosen wisely.  */
> -  if (opindex + 3 == VEC_length (operand_entry_t, ops))
> -    swap_ops_for_binary_stmt (ops, opindex, stmt);
> -
>    /* The final recursion case for this function is that you have
>       exactly two operations left.
>       If we had one exactly one op in the entire list to start with, we
> @@ -2312,19 +2429,17 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>      {
>        if (!moved)
>   {
> -  gimple_stmt_iterator gsinow, gsirhs1;
> -  gimple stmt1 = stmt, stmt2;
> -  unsigned int count;
> +  gimple stmt1 = stmt;
> +  unsigned int count, i = 1;
>
> -  gsinow = gsi_for_stmt (stmt);
>    count = VEC_length (operand_entry_t, ops) - opindex - 2;
> -  while (count-- != 0)
> +  while (i <= count)
>      {
> -      stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
> -      gsirhs1 = gsi_for_stmt (stmt2);
> -      gsi_move_before (&gsirhs1, &gsinow);
> -      gsi_prev (&gsinow);
> -      stmt1 = stmt2;
> +      stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
> +              /* Ensure that STMT1 is moved to a place where all operands in
> +                 OPS[opindex + i...] are available.  */
> +              ensure_ops_are_available (stmt1, ops, opindex + i);
> +              i++;
>      }
>    moved = true;
>   }
> @@ -3542,8 +3657,18 @@ reassociate_bb (basic_block bb)
>        && VEC_length (operand_entry_t, ops) > 3)
>      rewrite_expr_tree_parallel (stmt, width, ops);
>    else
> -    rewrite_expr_tree (stmt, 0, ops, false);
> +                    {
> +                      /* When there are three operands left, we want
> +                         to make sure the ones that get the double
> +                         binary op are chosen wisely.  */
> +                      int len = VEC_length (operand_entry_t, ops);
> +                      if (len >= 3)
> +                        swap_ops_for_binary_stmt (ops, len - 3, stmt);
>
> +                      assign_uids_in_relevant_bbs (ops);
> +      rewrite_expr_tree (stmt, 0, ops, false);
> +                    }
> +
>    /* If we combined some repeated factors into a
>       __builtin_powi call, multiply that result by the
>       reassociated operands.  */
Easwaran Raman Oct. 22, 2012, 6:31 p.m. UTC | #4
On Mon, Oct 22, 2012 at 12:59 AM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Fri, Oct 19, 2012 at 12:36 AM, Easwaran Raman <eraman@google.com> wrote:
>> Hi,
>>
>> During expression reassociation, statements are conservatively moved
>> downwards to ensure that dependences are correctly satisfied after
>> reassocation. This could lead to lengthening of live ranges. This
>> patch moves statements only to the extent necessary. Bootstraps and no
>> test regression on x86_64/linux. OK for trunk?
>>
>> Thanks,
>> Easwaran
>>
>> 2012-10-18   Easwaran Raman  <eraman@google.com>
>> * tree-ssa-reassoc.c(assign_uids): New function.
>> (assign_uids_in_relevant_bbs): Likewise.
>> (ensure_ops_are_available): Likewise.
>> (rewrite_expr_tree): Do not move statements beyond what is
>> necessary. Remove call to swap_ops_for_binary_stmt...
>> (reassociate_bb): ... and move it here.
>>
>>
>>
>> Index: gcc/tree-ssa-reassoc.c
>> ===================================================================
>> --- gcc/tree-ssa-reassoc.c (revision 192487)
>> +++ gcc/tree-ssa-reassoc.c (working copy)
>> @@ -2250,6 +2250,128 @@ swap_ops_for_binary_stmt (VEC(operand_entry_t, hea
>>      }
>>  }
>>
>> +/* Assign UIDs to statements in basic block BB.  */
>> +
>> +static void
>> +assign_uids (basic_block bb)
>> +{
>> +  unsigned uid = 0;
>> +  gimple_stmt_iterator gsi;
>> +  /* First assign uids to phis.  */
>> +  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>> +    {
>> +      gimple stmt = gsi_stmt (gsi);
>> +      gimple_set_uid (stmt, uid++);
>> +    }
>> +
>> +  /* Then assign uids to stmts.  */
>> +  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>> +    {
>> +      gimple stmt = gsi_stmt (gsi);
>> +      gimple_set_uid (stmt, uid++);
>> +    }
>> +}
>> +
>> +/* For each operand in OPS, find the basic block that contains the statement
>> +   which defines the operand. For all such basic blocks, assign UIDs.  */
>> +
>> +static void
>> +assign_uids_in_relevant_bbs (VEC(operand_entry_t, heap) * ops)
>> +{
>> +  operand_entry_t oe;
>> +  int i;
>> +  struct pointer_set_t *seen_bbs = pointer_set_create ();
>> +
>> +  for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++)
>> +    {
>> +      gimple def_stmt;
>> +      basic_block bb;
>> +      if (TREE_CODE (oe->op) != SSA_NAME)
>> +        continue;
>> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
>> +      bb = gimple_bb (def_stmt);
>> +      if (!pointer_set_contains (seen_bbs, bb))
>> +        {
>> +          assign_uids (bb);
>> +          pointer_set_insert (seen_bbs, bb);
>> +        }
>> +    }
>> +  pointer_set_destroy (seen_bbs);
>> +}
>
> Please assign UIDs once using the existing renumber_gimple_stmt_uids ().
> You seem to call the above multiple times and thus do work bigger than
> O(number of basic blocks).

The reason I call the above multiple times is that gsi_move_before
might get called between two calls to the above. For instance, after
rewrite_expr_tree is called once, the following sequence of calls
could happen:
reassociate_bb -> linearize_expr_tree -> linearize_expr ->
gsi_move_before.  So it is not sufficient to call
renumber_gimple_stmt_uids once per do_reassoc. Or do you want me to
use renumber_gimple_stmt_uids_in_blocks instead of
assign_uids_in_relevant_bbs?



>> +/* Ensure that operands in the OPS vector starting from OPINDEXth
>> entry are live
>> +   at STMT.  This is accomplished by moving STMT if needed.  */
>> +
>> +static void
>> +ensure_ops_are_available (gimple stmt, VEC(operand_entry_t, heap) *
>> ops, int opindex)
>> +{
>> +  int i;
>> +  int len = VEC_length (operand_entry_t, ops);
>> +  gimple insert_stmt = stmt;
>> +  basic_block insert_bb = gimple_bb (stmt);
>> +  gimple_stmt_iterator gsi_insert, gsistmt;
>> +  for (i = opindex; i < len; i++)
>> +    {
>
> Likewise you call this for each call to rewrite_expr_tree, so it seems to me
> this is quadratic in the number of ops in the op vector.

The call to ensure_ops_are_available inside rewrite_expr_tree is
guarded by if (!moved) and I am setting moved = true there to ensure
that ensure_ops_are_available inside is called once per reassociation
of a expression tree.

>
> Why make this all so complicated?  It seems to me that we should
> fixup stmt order only after the whole ops vector has been materialized.


>
>> +      operand_entry_t oe = VEC_index (operand_entry_t, ops, i);
>> +      gimple def_stmt;
>> +      basic_block def_bb;
>> +      /* Ignore constants and operands with default definitons.  */
>> +      if (TREE_CODE (oe->op) != SSA_NAME
>> +          || SSA_NAME_IS_DEFAULT_DEF (oe->op))
>> +        continue;
>> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
>> +      def_bb = gimple_bb (def_stmt);
>> +      if (def_bb != insert_bb
>> +          && !dominated_by_p (CDI_DOMINATORS, insert_bb, def_bb))
>> +        {
>> +          insert_bb = def_bb;
>> +          insert_stmt = def_stmt;
>> +        }
>> +      else if (def_bb == insert_bb
>> +               && gimple_uid (insert_stmt) < gimple_uid (def_stmt))
>> +        insert_stmt = def_stmt;
>> +    }
>> +  if (insert_stmt == stmt)
>> +    return;
>> +  gsistmt = gsi_for_stmt (stmt);
>> +  /* If GSI_STMT is a phi node, then do not insert just after that statement.
>> +     Instead, find the first non-label gimple statement in BB and insert before
>> +     that.  */
>> +  if (gimple_code (insert_stmt) == GIMPLE_PHI)
>> +    {
>> +      gsi_insert = gsi_after_labels (insert_bb);
>> +      gsi_move_before (&gsistmt, &gsi_insert);
>> +    }
>> +  /* Statements marked for throw can not be in the middle of a basic block. So
>> +     we can not insert a statement (not marked for throw) immediately
>> after.  */
>> +  else if (lookup_stmt_eh_lp (insert_stmt) > 0
>
> that's already performed by stmt_can_throw_internal
Ok. I was hit by the "statement marked for throw in middle of block"
error in verify_gimple_in_cfg  and simply copied the sequence of
checks that led to that.

>
>> +           && stmt_can_throw_internal (insert_stmt))
>
> But all this should be a non-issue as re-assoc should never assign an ops
> vector entry for such stmts (but it could have leafs defined by such stmts).
> If you only ever move definitions down, like the present code does caring
> about leafs should not be necessary.

I added this as I got an ICE in verify_gimple_in_cfg. So it seems like
such statements gets assigned into the ops vector.

Thanks,
Easwaran
>
> Richard.
>
>> +    {
>> +      edge e, succ_edge = NULL;
>> +      edge_iterator ei;
>> +
>> +      /* There should be exactly one normal edge out of the basic block.  */
>> +      FOR_EACH_EDGE (e, ei, insert_bb->succs)
>> +        {
>> +          if (!(e->flags & EDGE_COMPLEX))
>> +            {
>> +              gcc_assert (succ_edge == NULL);
>> +              succ_edge = e;
>> +            }
>> +        }
>> +      /* Insert STMT at the beginning of the successor basic block.  */
>> +      insert_bb = succ_edge->dest;
>> +      gsi_insert = gsi_after_labels (insert_bb);
>> +      gsi_move_before (&gsistmt, &gsi_insert);
>> +    }
>> +  else
>> +    {
>> +      gsi_insert = gsi_for_stmt (insert_stmt);
>> +      gsi_move_after (&gsistmt, &gsi_insert);
>> +    }
>> +
>> +}
>> +
>>  /* Recursively rewrite our linearized statements so that the operators
>>     match those in OPS[OPINDEX], putting the computation in rank
>>     order.  */
>> @@ -2262,11 +2384,6 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>    tree rhs2 = gimple_assign_rhs2 (stmt);
>>    operand_entry_t oe;
>>
>> -  /* If we have three operands left, then we want to make sure the ones
>> -     that get the double binary op are chosen wisely.  */
>> -  if (opindex + 3 == VEC_length (operand_entry_t, ops))
>> -    swap_ops_for_binary_stmt (ops, opindex, stmt);
>> -
>>    /* The final recursion case for this function is that you have
>>       exactly two operations left.
>>       If we had one exactly one op in the entire list to start with, we
>> @@ -2312,19 +2429,17 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>      {
>>        if (!moved)
>>   {
>> -  gimple_stmt_iterator gsinow, gsirhs1;
>> -  gimple stmt1 = stmt, stmt2;
>> -  unsigned int count;
>> +  gimple stmt1 = stmt;
>> +  unsigned int count, i = 1;
>>
>> -  gsinow = gsi_for_stmt (stmt);
>>    count = VEC_length (operand_entry_t, ops) - opindex - 2;
>> -  while (count-- != 0)
>> +  while (i <= count)
>>      {
>> -      stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>> -      gsirhs1 = gsi_for_stmt (stmt2);
>> -      gsi_move_before (&gsirhs1, &gsinow);
>> -      gsi_prev (&gsinow);
>> -      stmt1 = stmt2;
>> +      stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>> +              /* Ensure that STMT1 is moved to a place where all operands in
>> +                 OPS[opindex + i...] are available.  */
>> +              ensure_ops_are_available (stmt1, ops, opindex + i);
>> +              i++;
>>      }
>>    moved = true;
>>   }
>> @@ -3542,8 +3657,18 @@ reassociate_bb (basic_block bb)
>>        && VEC_length (operand_entry_t, ops) > 3)
>>      rewrite_expr_tree_parallel (stmt, width, ops);
>>    else
>> -    rewrite_expr_tree (stmt, 0, ops, false);
>> +                    {
>> +                      /* When there are three operands left, we want
>> +                         to make sure the ones that get the double
>> +                         binary op are chosen wisely.  */
>> +                      int len = VEC_length (operand_entry_t, ops);
>> +                      if (len >= 3)
>> +                        swap_ops_for_binary_stmt (ops, len - 3, stmt);
>>
>> +                      assign_uids_in_relevant_bbs (ops);
>> +      rewrite_expr_tree (stmt, 0, ops, false);
>> +                    }
>> +
>>    /* If we combined some repeated factors into a
>>       __builtin_powi call, multiply that result by the
>>       reassociated operands.  */
Richard Biener Oct. 23, 2012, 9:52 a.m. UTC | #5
On Mon, Oct 22, 2012 at 8:31 PM, Easwaran Raman <eraman@google.com> wrote:
> On Mon, Oct 22, 2012 at 12:59 AM, Richard Biener
> <richard.guenther@gmail.com> wrote:
>> On Fri, Oct 19, 2012 at 12:36 AM, Easwaran Raman <eraman@google.com> wrote:
>>> Hi,
>>>
>>> During expression reassociation, statements are conservatively moved
>>> downwards to ensure that dependences are correctly satisfied after
>>> reassocation. This could lead to lengthening of live ranges. This
>>> patch moves statements only to the extent necessary. Bootstraps and no
>>> test regression on x86_64/linux. OK for trunk?
>>>
>>> Thanks,
>>> Easwaran
>>>
>>> 2012-10-18   Easwaran Raman  <eraman@google.com>
>>> * tree-ssa-reassoc.c(assign_uids): New function.
>>> (assign_uids_in_relevant_bbs): Likewise.
>>> (ensure_ops_are_available): Likewise.
>>> (rewrite_expr_tree): Do not move statements beyond what is
>>> necessary. Remove call to swap_ops_for_binary_stmt...
>>> (reassociate_bb): ... and move it here.
>>>
>>>
>>>
>>> Index: gcc/tree-ssa-reassoc.c
>>> ===================================================================
>>> --- gcc/tree-ssa-reassoc.c (revision 192487)
>>> +++ gcc/tree-ssa-reassoc.c (working copy)
>>> @@ -2250,6 +2250,128 @@ swap_ops_for_binary_stmt (VEC(operand_entry_t, hea
>>>      }
>>>  }
>>>
>>> +/* Assign UIDs to statements in basic block BB.  */
>>> +
>>> +static void
>>> +assign_uids (basic_block bb)
>>> +{
>>> +  unsigned uid = 0;
>>> +  gimple_stmt_iterator gsi;
>>> +  /* First assign uids to phis.  */
>>> +  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>> +    {
>>> +      gimple stmt = gsi_stmt (gsi);
>>> +      gimple_set_uid (stmt, uid++);
>>> +    }
>>> +
>>> +  /* Then assign uids to stmts.  */
>>> +  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>> +    {
>>> +      gimple stmt = gsi_stmt (gsi);
>>> +      gimple_set_uid (stmt, uid++);
>>> +    }
>>> +}
>>> +
>>> +/* For each operand in OPS, find the basic block that contains the statement
>>> +   which defines the operand. For all such basic blocks, assign UIDs.  */
>>> +
>>> +static void
>>> +assign_uids_in_relevant_bbs (VEC(operand_entry_t, heap) * ops)
>>> +{
>>> +  operand_entry_t oe;
>>> +  int i;
>>> +  struct pointer_set_t *seen_bbs = pointer_set_create ();
>>> +
>>> +  for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++)
>>> +    {
>>> +      gimple def_stmt;
>>> +      basic_block bb;
>>> +      if (TREE_CODE (oe->op) != SSA_NAME)
>>> +        continue;
>>> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
>>> +      bb = gimple_bb (def_stmt);
>>> +      if (!pointer_set_contains (seen_bbs, bb))
>>> +        {
>>> +          assign_uids (bb);
>>> +          pointer_set_insert (seen_bbs, bb);
>>> +        }
>>> +    }
>>> +  pointer_set_destroy (seen_bbs);
>>> +}
>>
>> Please assign UIDs once using the existing renumber_gimple_stmt_uids ().
>> You seem to call the above multiple times and thus do work bigger than
>> O(number of basic blocks).
>
> The reason I call the above multiple times is that gsi_move_before
> might get called between two calls to the above. For instance, after
> rewrite_expr_tree is called once, the following sequence of calls
> could happen:
> reassociate_bb -> linearize_expr_tree -> linearize_expr ->
> gsi_move_before.  So it is not sufficient to call
> renumber_gimple_stmt_uids once per do_reassoc. Or do you want me to
> use renumber_gimple_stmt_uids_in_blocks instead of
> assign_uids_in_relevant_bbs?

It's sufficient to call it once if you conservatively update UIDs at stmt
move / insert time (assign the same UID as the stmt before/after).

>
>
>>> +/* Ensure that operands in the OPS vector starting from OPINDEXth
>>> entry are live
>>> +   at STMT.  This is accomplished by moving STMT if needed.  */
>>> +
>>> +static void
>>> +ensure_ops_are_available (gimple stmt, VEC(operand_entry_t, heap) *
>>> ops, int opindex)
>>> +{
>>> +  int i;
>>> +  int len = VEC_length (operand_entry_t, ops);
>>> +  gimple insert_stmt = stmt;
>>> +  basic_block insert_bb = gimple_bb (stmt);
>>> +  gimple_stmt_iterator gsi_insert, gsistmt;
>>> +  for (i = opindex; i < len; i++)
>>> +    {
>>
>> Likewise you call this for each call to rewrite_expr_tree, so it seems to me
>> this is quadratic in the number of ops in the op vector.
>
> The call to ensure_ops_are_available inside rewrite_expr_tree is
> guarded by if (!moved) and I am setting moved = true there to ensure
> that ensure_ops_are_available inside is called once per reassociation
> of a expression tree.

It would be a lot easier to understand this function if you call it once
after rewrite_expr_tree finished.

>>
>> Why make this all so complicated?  It seems to me that we should
>> fixup stmt order only after the whole ops vector has been materialized.
>
>
>>
>>> +      operand_entry_t oe = VEC_index (operand_entry_t, ops, i);
>>> +      gimple def_stmt;
>>> +      basic_block def_bb;
>>> +      /* Ignore constants and operands with default definitons.  */
>>> +      if (TREE_CODE (oe->op) != SSA_NAME
>>> +          || SSA_NAME_IS_DEFAULT_DEF (oe->op))
>>> +        continue;
>>> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
>>> +      def_bb = gimple_bb (def_stmt);
>>> +      if (def_bb != insert_bb
>>> +          && !dominated_by_p (CDI_DOMINATORS, insert_bb, def_bb))
>>> +        {
>>> +          insert_bb = def_bb;
>>> +          insert_stmt = def_stmt;
>>> +        }
>>> +      else if (def_bb == insert_bb
>>> +               && gimple_uid (insert_stmt) < gimple_uid (def_stmt))
>>> +        insert_stmt = def_stmt;
>>> +    }
>>> +  if (insert_stmt == stmt)
>>> +    return;
>>> +  gsistmt = gsi_for_stmt (stmt);
>>> +  /* If GSI_STMT is a phi node, then do not insert just after that statement.
>>> +     Instead, find the first non-label gimple statement in BB and insert before
>>> +     that.  */
>>> +  if (gimple_code (insert_stmt) == GIMPLE_PHI)
>>> +    {
>>> +      gsi_insert = gsi_after_labels (insert_bb);
>>> +      gsi_move_before (&gsistmt, &gsi_insert);
>>> +    }
>>> +  /* Statements marked for throw can not be in the middle of a basic block. So
>>> +     we can not insert a statement (not marked for throw) immediately
>>> after.  */
>>> +  else if (lookup_stmt_eh_lp (insert_stmt) > 0
>>
>> that's already performed by stmt_can_throw_internal
> Ok. I was hit by the "statement marked for throw in middle of block"
> error in verify_gimple_in_cfg  and simply copied the sequence of
> checks that led to that.
>
>>
>>> +           && stmt_can_throw_internal (insert_stmt))
>>
>> But all this should be a non-issue as re-assoc should never assign an ops
>> vector entry for such stmts (but it could have leafs defined by such stmts).
>> If you only ever move definitions down, like the present code does caring
>> about leafs should not be necessary.
>
> I added this as I got an ICE in verify_gimple_in_cfg. So it seems like
> such statements gets assigned into the ops vector.

That would be a bug.  See how linearize_expr_tree checks whether
the defining stmt of an op may throw.  That is, it _does_ add the
SSA def to the vector but does not recurse on its defining stmt operands.

Richard.

> Thanks,
> Easwaran
>>
>> Richard.
>>
>>> +    {
>>> +      edge e, succ_edge = NULL;
>>> +      edge_iterator ei;
>>> +
>>> +      /* There should be exactly one normal edge out of the basic block.  */
>>> +      FOR_EACH_EDGE (e, ei, insert_bb->succs)
>>> +        {
>>> +          if (!(e->flags & EDGE_COMPLEX))
>>> +            {
>>> +              gcc_assert (succ_edge == NULL);
>>> +              succ_edge = e;
>>> +            }
>>> +        }
>>> +      /* Insert STMT at the beginning of the successor basic block.  */
>>> +      insert_bb = succ_edge->dest;
>>> +      gsi_insert = gsi_after_labels (insert_bb);
>>> +      gsi_move_before (&gsistmt, &gsi_insert);
>>> +    }
>>> +  else
>>> +    {
>>> +      gsi_insert = gsi_for_stmt (insert_stmt);
>>> +      gsi_move_after (&gsistmt, &gsi_insert);
>>> +    }
>>> +
>>> +}
>>> +
>>>  /* Recursively rewrite our linearized statements so that the operators
>>>     match those in OPS[OPINDEX], putting the computation in rank
>>>     order.  */
>>> @@ -2262,11 +2384,6 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>    tree rhs2 = gimple_assign_rhs2 (stmt);
>>>    operand_entry_t oe;
>>>
>>> -  /* If we have three operands left, then we want to make sure the ones
>>> -     that get the double binary op are chosen wisely.  */
>>> -  if (opindex + 3 == VEC_length (operand_entry_t, ops))
>>> -    swap_ops_for_binary_stmt (ops, opindex, stmt);
>>> -
>>>    /* The final recursion case for this function is that you have
>>>       exactly two operations left.
>>>       If we had one exactly one op in the entire list to start with, we
>>> @@ -2312,19 +2429,17 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>      {
>>>        if (!moved)
>>>   {
>>> -  gimple_stmt_iterator gsinow, gsirhs1;
>>> -  gimple stmt1 = stmt, stmt2;
>>> -  unsigned int count;
>>> +  gimple stmt1 = stmt;
>>> +  unsigned int count, i = 1;
>>>
>>> -  gsinow = gsi_for_stmt (stmt);
>>>    count = VEC_length (operand_entry_t, ops) - opindex - 2;
>>> -  while (count-- != 0)
>>> +  while (i <= count)
>>>      {
>>> -      stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>>> -      gsirhs1 = gsi_for_stmt (stmt2);
>>> -      gsi_move_before (&gsirhs1, &gsinow);
>>> -      gsi_prev (&gsinow);
>>> -      stmt1 = stmt2;
>>> +      stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>>> +              /* Ensure that STMT1 is moved to a place where all operands in
>>> +                 OPS[opindex + i...] are available.  */
>>> +              ensure_ops_are_available (stmt1, ops, opindex + i);
>>> +              i++;
>>>      }
>>>    moved = true;
>>>   }
>>> @@ -3542,8 +3657,18 @@ reassociate_bb (basic_block bb)
>>>        && VEC_length (operand_entry_t, ops) > 3)
>>>      rewrite_expr_tree_parallel (stmt, width, ops);
>>>    else
>>> -    rewrite_expr_tree (stmt, 0, ops, false);
>>> +                    {
>>> +                      /* When there are three operands left, we want
>>> +                         to make sure the ones that get the double
>>> +                         binary op are chosen wisely.  */
>>> +                      int len = VEC_length (operand_entry_t, ops);
>>> +                      if (len >= 3)
>>> +                        swap_ops_for_binary_stmt (ops, len - 3, stmt);
>>>
>>> +                      assign_uids_in_relevant_bbs (ops);
>>> +      rewrite_expr_tree (stmt, 0, ops, false);
>>> +                    }
>>> +
>>>    /* If we combined some repeated factors into a
>>>       __builtin_powi call, multiply that result by the
>>>       reassociated operands.  */
Easwaran Raman Oct. 24, 2012, 2:02 a.m. UTC | #6
On Tue, Oct 23, 2012 at 2:52 AM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Mon, Oct 22, 2012 at 8:31 PM, Easwaran Raman <eraman@google.com> wrote:
>> On Mon, Oct 22, 2012 at 12:59 AM, Richard Biener
>> <richard.guenther@gmail.com> wrote:
>>> On Fri, Oct 19, 2012 at 12:36 AM, Easwaran Raman <eraman@google.com> wrote:
>>>> Hi,
>>>>
>>>> During expression reassociation, statements are conservatively moved
>>>> downwards to ensure that dependences are correctly satisfied after
>>>> reassocation. This could lead to lengthening of live ranges. This
>>>> patch moves statements only to the extent necessary. Bootstraps and no
>>>> test regression on x86_64/linux. OK for trunk?
>>>>
>>>> Thanks,
>>>> Easwaran
>>>>
>>>> 2012-10-18   Easwaran Raman  <eraman@google.com>
>>>> * tree-ssa-reassoc.c(assign_uids): New function.
>>>> (assign_uids_in_relevant_bbs): Likewise.
>>>> (ensure_ops_are_available): Likewise.
>>>> (rewrite_expr_tree): Do not move statements beyond what is
>>>> necessary. Remove call to swap_ops_for_binary_stmt...
>>>> (reassociate_bb): ... and move it here.
>>>>
>>>>
>>>>
>>>> Index: gcc/tree-ssa-reassoc.c
>>>> ===================================================================
>>>> --- gcc/tree-ssa-reassoc.c (revision 192487)
>>>> +++ gcc/tree-ssa-reassoc.c (working copy)
>>>> @@ -2250,6 +2250,128 @@ swap_ops_for_binary_stmt (VEC(operand_entry_t, hea
>>>>      }
>>>>  }
>>>>
>>>> +/* Assign UIDs to statements in basic block BB.  */
>>>> +
>>>> +static void
>>>> +assign_uids (basic_block bb)
>>>> +{
>>>> +  unsigned uid = 0;
>>>> +  gimple_stmt_iterator gsi;
>>>> +  /* First assign uids to phis.  */
>>>> +  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>>> +    {
>>>> +      gimple stmt = gsi_stmt (gsi);
>>>> +      gimple_set_uid (stmt, uid++);
>>>> +    }
>>>> +
>>>> +  /* Then assign uids to stmts.  */
>>>> +  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>>> +    {
>>>> +      gimple stmt = gsi_stmt (gsi);
>>>> +      gimple_set_uid (stmt, uid++);
>>>> +    }
>>>> +}
>>>> +
>>>> +/* For each operand in OPS, find the basic block that contains the statement
>>>> +   which defines the operand. For all such basic blocks, assign UIDs.  */
>>>> +
>>>> +static void
>>>> +assign_uids_in_relevant_bbs (VEC(operand_entry_t, heap) * ops)
>>>> +{
>>>> +  operand_entry_t oe;
>>>> +  int i;
>>>> +  struct pointer_set_t *seen_bbs = pointer_set_create ();
>>>> +
>>>> +  for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++)
>>>> +    {
>>>> +      gimple def_stmt;
>>>> +      basic_block bb;
>>>> +      if (TREE_CODE (oe->op) != SSA_NAME)
>>>> +        continue;
>>>> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
>>>> +      bb = gimple_bb (def_stmt);
>>>> +      if (!pointer_set_contains (seen_bbs, bb))
>>>> +        {
>>>> +          assign_uids (bb);
>>>> +          pointer_set_insert (seen_bbs, bb);
>>>> +        }
>>>> +    }
>>>> +  pointer_set_destroy (seen_bbs);
>>>> +}
>>>
>>> Please assign UIDs once using the existing renumber_gimple_stmt_uids ().
>>> You seem to call the above multiple times and thus do work bigger than
>>> O(number of basic blocks).
>>
>> The reason I call the above multiple times is that gsi_move_before
>> might get called between two calls to the above. For instance, after
>> rewrite_expr_tree is called once, the following sequence of calls
>> could happen:
>> reassociate_bb -> linearize_expr_tree -> linearize_expr ->
>> gsi_move_before.  So it is not sufficient to call
>> renumber_gimple_stmt_uids once per do_reassoc. Or do you want me to
>> use renumber_gimple_stmt_uids_in_blocks instead of
>> assign_uids_in_relevant_bbs?
>
> It's sufficient to call it once if you conservatively update UIDs at stmt
> move / insert time (assign the same UID as the stmt before/after).

I was worried that that would make the code brittle, but looking at
the places where a new statement is added or moved, I think it is
fine.  I have made the change in the new patch.

>>
>>
>>>> +/* Ensure that operands in the OPS vector starting from OPINDEXth
>>>> entry are live
>>>> +   at STMT.  This is accomplished by moving STMT if needed.  */
>>>> +
>>>> +static void
>>>> +ensure_ops_are_available (gimple stmt, VEC(operand_entry_t, heap) *
>>>> ops, int opindex)
>>>> +{
>>>> +  int i;
>>>> +  int len = VEC_length (operand_entry_t, ops);
>>>> +  gimple insert_stmt = stmt;
>>>> +  basic_block insert_bb = gimple_bb (stmt);
>>>> +  gimple_stmt_iterator gsi_insert, gsistmt;
>>>> +  for (i = opindex; i < len; i++)
>>>> +    {
>>>
>>> Likewise you call this for each call to rewrite_expr_tree, so it seems to me
>>> this is quadratic in the number of ops in the op vector.
>>
>> The call to ensure_ops_are_available inside rewrite_expr_tree is
>> guarded by if (!moved) and I am setting moved = true there to ensure
>> that ensure_ops_are_available inside is called once per reassociation
>> of a expression tree.
>
> It would be a lot easier to understand this function if you call it once
> after rewrite_expr_tree finished.
I think you meant  "before rewrite_expr_tree". One thing to note here
is this function is called only when we first see a statement whose
operand changes after reassociation. That's the reason the moving of
statements happen inside rewrite_expr_tree.
>
>>>
>>> Why make this all so complicated?  It seems to me that we should
>>> fixup stmt order only after the whole ops vector has been materialized.
>>
>>
>>>
>>>> +      operand_entry_t oe = VEC_index (operand_entry_t, ops, i);
>>>> +      gimple def_stmt;
>>>> +      basic_block def_bb;
>>>> +      /* Ignore constants and operands with default definitons.  */
>>>> +      if (TREE_CODE (oe->op) != SSA_NAME
>>>> +          || SSA_NAME_IS_DEFAULT_DEF (oe->op))
>>>> +        continue;
>>>> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
>>>> +      def_bb = gimple_bb (def_stmt);
>>>> +      if (def_bb != insert_bb
>>>> +          && !dominated_by_p (CDI_DOMINATORS, insert_bb, def_bb))
>>>> +        {
>>>> +          insert_bb = def_bb;
>>>> +          insert_stmt = def_stmt;
>>>> +        }
>>>> +      else if (def_bb == insert_bb
>>>> +               && gimple_uid (insert_stmt) < gimple_uid (def_stmt))
>>>> +        insert_stmt = def_stmt;
>>>> +    }
>>>> +  if (insert_stmt == stmt)
>>>> +    return;
>>>> +  gsistmt = gsi_for_stmt (stmt);
>>>> +  /* If GSI_STMT is a phi node, then do not insert just after that statement.
>>>> +     Instead, find the first non-label gimple statement in BB and insert before
>>>> +     that.  */
>>>> +  if (gimple_code (insert_stmt) == GIMPLE_PHI)
>>>> +    {
>>>> +      gsi_insert = gsi_after_labels (insert_bb);
>>>> +      gsi_move_before (&gsistmt, &gsi_insert);
>>>> +    }
>>>> +  /* Statements marked for throw can not be in the middle of a basic block. So
>>>> +     we can not insert a statement (not marked for throw) immediately
>>>> after.  */
>>>> +  else if (lookup_stmt_eh_lp (insert_stmt) > 0
>>>
>>> that's already performed by stmt_can_throw_internal
>> Ok. I was hit by the "statement marked for throw in middle of block"
>> error in verify_gimple_in_cfg  and simply copied the sequence of
>> checks that led to that.
>>
>>>
>>>> +           && stmt_can_throw_internal (insert_stmt))
>>>
>>> But all this should be a non-issue as re-assoc should never assign an ops
>>> vector entry for such stmts (but it could have leafs defined by such stmts).
>>> If you only ever move definitions down, like the present code does caring
>>> about leafs should not be necessary.
>>
>> I added this as I got an ICE in verify_gimple_in_cfg. So it seems like
>> such statements gets assigned into the ops vector.
>
> That would be a bug.  See how linearize_expr_tree checks whether
> the defining stmt of an op may throw.  That is, it _does_ add the
> SSA def to the vector but does not recurse on its defining stmt operands.

But as long as the SSA def of a throwing statement is added to the ops
vector, I need to add the check, even if it doesn't recurse on its
operands. Consider the following

<bb1>:
t1 = a + b
c = ... // c might throw

<bb2>:
t2 = t1 + c

The expression could be rewritten as (a+c) + b.

Since c's definition follows t1's, t1 = a+b has to be moved after c =
... That's why I need the check.

Thanks,
Easwaran

> Richard.
>
>> Thanks,
>> Easwaran
>>>
>>> Richard.
>>>
>>>> +    {
>>>> +      edge e, succ_edge = NULL;
>>>> +      edge_iterator ei;
>>>> +
>>>> +      /* There should be exactly one normal edge out of the basic block.  */
>>>> +      FOR_EACH_EDGE (e, ei, insert_bb->succs)
>>>> +        {
>>>> +          if (!(e->flags & EDGE_COMPLEX))
>>>> +            {
>>>> +              gcc_assert (succ_edge == NULL);
>>>> +              succ_edge = e;
>>>> +            }
>>>> +        }
>>>> +      /* Insert STMT at the beginning of the successor basic block.  */
>>>> +      insert_bb = succ_edge->dest;
>>>> +      gsi_insert = gsi_after_labels (insert_bb);
>>>> +      gsi_move_before (&gsistmt, &gsi_insert);
>>>> +    }
>>>> +  else
>>>> +    {
>>>> +      gsi_insert = gsi_for_stmt (insert_stmt);
>>>> +      gsi_move_after (&gsistmt, &gsi_insert);
>>>> +    }
>>>> +
>>>> +}
>>>> +
>>>>  /* Recursively rewrite our linearized statements so that the operators
>>>>     match those in OPS[OPINDEX], putting the computation in rank
>>>>     order.  */
>>>> @@ -2262,11 +2384,6 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>>    tree rhs2 = gimple_assign_rhs2 (stmt);
>>>>    operand_entry_t oe;
>>>>
>>>> -  /* If we have three operands left, then we want to make sure the ones
>>>> -     that get the double binary op are chosen wisely.  */
>>>> -  if (opindex + 3 == VEC_length (operand_entry_t, ops))
>>>> -    swap_ops_for_binary_stmt (ops, opindex, stmt);
>>>> -
>>>>    /* The final recursion case for this function is that you have
>>>>       exactly two operations left.
>>>>       If we had one exactly one op in the entire list to start with, we
>>>> @@ -2312,19 +2429,17 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>>      {
>>>>        if (!moved)
>>>>   {
>>>> -  gimple_stmt_iterator gsinow, gsirhs1;
>>>> -  gimple stmt1 = stmt, stmt2;
>>>> -  unsigned int count;
>>>> +  gimple stmt1 = stmt;
>>>> +  unsigned int count, i = 1;
>>>>
>>>> -  gsinow = gsi_for_stmt (stmt);
>>>>    count = VEC_length (operand_entry_t, ops) - opindex - 2;
>>>> -  while (count-- != 0)
>>>> +  while (i <= count)
>>>>      {
>>>> -      stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>>>> -      gsirhs1 = gsi_for_stmt (stmt2);
>>>> -      gsi_move_before (&gsirhs1, &gsinow);
>>>> -      gsi_prev (&gsinow);
>>>> -      stmt1 = stmt2;
>>>> +      stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>>>> +              /* Ensure that STMT1 is moved to a place where all operands in
>>>> +                 OPS[opindex + i...] are available.  */
>>>> +              ensure_ops_are_available (stmt1, ops, opindex + i);
>>>> +              i++;
>>>>      }
>>>>    moved = true;
>>>>   }
>>>> @@ -3542,8 +3657,18 @@ reassociate_bb (basic_block bb)
>>>>        && VEC_length (operand_entry_t, ops) > 3)
>>>>      rewrite_expr_tree_parallel (stmt, width, ops);
>>>>    else
>>>> -    rewrite_expr_tree (stmt, 0, ops, false);
>>>> +                    {
>>>> +                      /* When there are three operands left, we want
>>>> +                         to make sure the ones that get the double
>>>> +                         binary op are chosen wisely.  */
>>>> +                      int len = VEC_length (operand_entry_t, ops);
>>>> +                      if (len >= 3)
>>>> +                        swap_ops_for_binary_stmt (ops, len - 3, stmt);
>>>>
>>>> +                      assign_uids_in_relevant_bbs (ops);
>>>> +      rewrite_expr_tree (stmt, 0, ops, false);
>>>> +                    }
>>>> +
>>>>    /* If we combined some repeated factors into a
>>>>       __builtin_powi call, multiply that result by the
>>>>       reassociated operands.  */
Richard Biener Oct. 31, 2012, 11:36 a.m. UTC | #7
On Wed, Oct 24, 2012 at 4:02 AM, Easwaran Raman <eraman@google.com> wrote:
> On Tue, Oct 23, 2012 at 2:52 AM, Richard Biener
> <richard.guenther@gmail.com> wrote:
>> On Mon, Oct 22, 2012 at 8:31 PM, Easwaran Raman <eraman@google.com> wrote:
>>> On Mon, Oct 22, 2012 at 12:59 AM, Richard Biener
>>> <richard.guenther@gmail.com> wrote:
>>>> On Fri, Oct 19, 2012 at 12:36 AM, Easwaran Raman <eraman@google.com> wrote:
>>>>> Hi,
>>>>>
>>>>> During expression reassociation, statements are conservatively moved
>>>>> downwards to ensure that dependences are correctly satisfied after
>>>>> reassocation. This could lead to lengthening of live ranges. This
>>>>> patch moves statements only to the extent necessary. Bootstraps and no
>>>>> test regression on x86_64/linux. OK for trunk?
>>>>>
>>>>> Thanks,
>>>>> Easwaran
>>>>>
>>>>> 2012-10-18   Easwaran Raman  <eraman@google.com>
>>>>> * tree-ssa-reassoc.c(assign_uids): New function.
>>>>> (assign_uids_in_relevant_bbs): Likewise.
>>>>> (ensure_ops_are_available): Likewise.
>>>>> (rewrite_expr_tree): Do not move statements beyond what is
>>>>> necessary. Remove call to swap_ops_for_binary_stmt...
>>>>> (reassociate_bb): ... and move it here.
>>>>>
>>>>>
>>>>>
>>>>> Index: gcc/tree-ssa-reassoc.c
>>>>> ===================================================================
>>>>> --- gcc/tree-ssa-reassoc.c (revision 192487)
>>>>> +++ gcc/tree-ssa-reassoc.c (working copy)
>>>>> @@ -2250,6 +2250,128 @@ swap_ops_for_binary_stmt (VEC(operand_entry_t, hea
>>>>>      }
>>>>>  }
>>>>>
>>>>> +/* Assign UIDs to statements in basic block BB.  */
>>>>> +
>>>>> +static void
>>>>> +assign_uids (basic_block bb)
>>>>> +{
>>>>> +  unsigned uid = 0;
>>>>> +  gimple_stmt_iterator gsi;
>>>>> +  /* First assign uids to phis.  */
>>>>> +  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>>>> +    {
>>>>> +      gimple stmt = gsi_stmt (gsi);
>>>>> +      gimple_set_uid (stmt, uid++);
>>>>> +    }
>>>>> +
>>>>> +  /* Then assign uids to stmts.  */
>>>>> +  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>>>> +    {
>>>>> +      gimple stmt = gsi_stmt (gsi);
>>>>> +      gimple_set_uid (stmt, uid++);
>>>>> +    }
>>>>> +}
>>>>> +
>>>>> +/* For each operand in OPS, find the basic block that contains the statement
>>>>> +   which defines the operand. For all such basic blocks, assign UIDs.  */
>>>>> +
>>>>> +static void
>>>>> +assign_uids_in_relevant_bbs (VEC(operand_entry_t, heap) * ops)
>>>>> +{
>>>>> +  operand_entry_t oe;
>>>>> +  int i;
>>>>> +  struct pointer_set_t *seen_bbs = pointer_set_create ();
>>>>> +
>>>>> +  for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++)
>>>>> +    {
>>>>> +      gimple def_stmt;
>>>>> +      basic_block bb;
>>>>> +      if (TREE_CODE (oe->op) != SSA_NAME)
>>>>> +        continue;
>>>>> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
>>>>> +      bb = gimple_bb (def_stmt);
>>>>> +      if (!pointer_set_contains (seen_bbs, bb))
>>>>> +        {
>>>>> +          assign_uids (bb);
>>>>> +          pointer_set_insert (seen_bbs, bb);
>>>>> +        }
>>>>> +    }
>>>>> +  pointer_set_destroy (seen_bbs);
>>>>> +}
>>>>
>>>> Please assign UIDs once using the existing renumber_gimple_stmt_uids ().
>>>> You seem to call the above multiple times and thus do work bigger than
>>>> O(number of basic blocks).
>>>
>>> The reason I call the above multiple times is that gsi_move_before
>>> might get called between two calls to the above. For instance, after
>>> rewrite_expr_tree is called once, the following sequence of calls
>>> could happen:
>>> reassociate_bb -> linearize_expr_tree -> linearize_expr ->
>>> gsi_move_before.  So it is not sufficient to call
>>> renumber_gimple_stmt_uids once per do_reassoc. Or do you want me to
>>> use renumber_gimple_stmt_uids_in_blocks instead of
>>> assign_uids_in_relevant_bbs?
>>
>> It's sufficient to call it once if you conservatively update UIDs at stmt
>> move / insert time (assign the same UID as the stmt before/after).
>
> I was worried that that would make the code brittle, but looking at
> the places where a new statement is added or moved, I think it is
> fine.  I have made the change in the new patch.

Thanks.

>>>
>>>
>>>>> +/* Ensure that operands in the OPS vector starting from OPINDEXth
>>>>> entry are live
>>>>> +   at STMT.  This is accomplished by moving STMT if needed.  */
>>>>> +
>>>>> +static void
>>>>> +ensure_ops_are_available (gimple stmt, VEC(operand_entry_t, heap) *
>>>>> ops, int opindex)
>>>>> +{
>>>>> +  int i;
>>>>> +  int len = VEC_length (operand_entry_t, ops);
>>>>> +  gimple insert_stmt = stmt;
>>>>> +  basic_block insert_bb = gimple_bb (stmt);
>>>>> +  gimple_stmt_iterator gsi_insert, gsistmt;
>>>>> +  for (i = opindex; i < len; i++)
>>>>> +    {
>>>>
>>>> Likewise you call this for each call to rewrite_expr_tree, so it seems to me
>>>> this is quadratic in the number of ops in the op vector.
>>>
>>> The call to ensure_ops_are_available inside rewrite_expr_tree is
>>> guarded by if (!moved) and I am setting moved = true there to ensure
>>> that ensure_ops_are_available inside is called once per reassociation
>>> of a expression tree.
>>
>> It would be a lot easier to understand this function if you call it once
>> after rewrite_expr_tree finished.
> I think you meant  "before rewrite_expr_tree". One thing to note here
> is this function is called only when we first see a statement whose
> operand changes after reassociation. That's the reason the moving of
> statements happen inside rewrite_expr_tree.

I meant after because rewrite_expr_tree ends up swapping things.  But
I note you changed that in your revised patch - good ;)

So now that rewrite_expr_tree does no longer change order of the ops
vector we can move stmts indeed before it, but backwards.  Thus,
instead of walking forward and

+              /* Ensure that STMT1 is moved to a place where all operands in
+                 OPS[opindex + i...] are available.  */
+              ensure_ops_are_available (stmt1, ops, opindex + i);

we can walk backward and only need to consider the immediate next operand
because we know all further operands are available at the place the immediate
next operand is defined.

Which should remove the need for the loop in ensure_ops_are_available

+  for (i = opindex; i < len; i++)
+    {

Correct?  (We may need to special-case the last (two?) stmts).

>>
>>>>
>>>> Why make this all so complicated?  It seems to me that we should
>>>> fixup stmt order only after the whole ops vector has been materialized.
>>>
>>>
>>>>
>>>>> +      operand_entry_t oe = VEC_index (operand_entry_t, ops, i);
>>>>> +      gimple def_stmt;
>>>>> +      basic_block def_bb;
>>>>> +      /* Ignore constants and operands with default definitons.  */
>>>>> +      if (TREE_CODE (oe->op) != SSA_NAME
>>>>> +          || SSA_NAME_IS_DEFAULT_DEF (oe->op))
>>>>> +        continue;
>>>>> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
>>>>> +      def_bb = gimple_bb (def_stmt);
>>>>> +      if (def_bb != insert_bb
>>>>> +          && !dominated_by_p (CDI_DOMINATORS, insert_bb, def_bb))
>>>>> +        {
>>>>> +          insert_bb = def_bb;
>>>>> +          insert_stmt = def_stmt;
>>>>> +        }
>>>>> +      else if (def_bb == insert_bb
>>>>> +               && gimple_uid (insert_stmt) < gimple_uid (def_stmt))
>>>>> +        insert_stmt = def_stmt;
>>>>> +    }
>>>>> +  if (insert_stmt == stmt)
>>>>> +    return;
>>>>> +  gsistmt = gsi_for_stmt (stmt);
>>>>> +  /* If GSI_STMT is a phi node, then do not insert just after that statement.
>>>>> +     Instead, find the first non-label gimple statement in BB and insert before
>>>>> +     that.  */
>>>>> +  if (gimple_code (insert_stmt) == GIMPLE_PHI)
>>>>> +    {
>>>>> +      gsi_insert = gsi_after_labels (insert_bb);
>>>>> +      gsi_move_before (&gsistmt, &gsi_insert);
>>>>> +    }
>>>>> +  /* Statements marked for throw can not be in the middle of a basic block. So
>>>>> +     we can not insert a statement (not marked for throw) immediately
>>>>> after.  */
>>>>> +  else if (lookup_stmt_eh_lp (insert_stmt) > 0
>>>>
>>>> that's already performed by stmt_can_throw_internal
>>> Ok. I was hit by the "statement marked for throw in middle of block"
>>> error in verify_gimple_in_cfg  and simply copied the sequence of
>>> checks that led to that.
>>>
>>>>
>>>>> +           && stmt_can_throw_internal (insert_stmt))
>>>>
>>>> But all this should be a non-issue as re-assoc should never assign an ops
>>>> vector entry for such stmts (but it could have leafs defined by such stmts).
>>>> If you only ever move definitions down, like the present code does caring
>>>> about leafs should not be necessary.
>>>
>>> I added this as I got an ICE in verify_gimple_in_cfg. So it seems like
>>> such statements gets assigned into the ops vector.
>>
>> That would be a bug.  See how linearize_expr_tree checks whether
>> the defining stmt of an op may throw.  That is, it _does_ add the
>> SSA def to the vector but does not recurse on its defining stmt operands.
>
> But as long as the SSA def of a throwing statement is added to the ops
> vector, I need to add the check, even if it doesn't recurse on its
> operands. Consider the following
>
> <bb1>:
> t1 = a + b
> c = ... // c might throw
>
> <bb2>:
> t2 = t1 + c
>
> The expression could be rewritten as (a+c) + b.
>
> Since c's definition follows t1's, t1 = a+b has to be moved after c =
> ... That's why I need the check.

Hmm, indeed.

+      /* There should be exactly one normal edge out of the basic block.  */
+      FOR_EACH_EDGE (e, ei, insert_bb->succs)
+        {
+          if (!(e->flags & EDGE_COMPLEX))
+            {
+              gcc_assert (succ_edge == NULL);
+              succ_edge = e;
+            }
+        }

I think we have sth like find_fallthru_edge for this.

Thanks,
Richard.

> Thanks,
> Easwaran
>
>> Richard.
>>
>>> Thanks,
>>> Easwaran
>>>>
>>>> Richard.
>>>>
>>>>> +    {
>>>>> +      edge e, succ_edge = NULL;
>>>>> +      edge_iterator ei;
>>>>> +
>>>>> +      /* There should be exactly one normal edge out of the basic block.  */
>>>>> +      FOR_EACH_EDGE (e, ei, insert_bb->succs)
>>>>> +        {
>>>>> +          if (!(e->flags & EDGE_COMPLEX))
>>>>> +            {
>>>>> +              gcc_assert (succ_edge == NULL);
>>>>> +              succ_edge = e;
>>>>> +            }
>>>>> +        }
>>>>> +      /* Insert STMT at the beginning of the successor basic block.  */
>>>>> +      insert_bb = succ_edge->dest;
>>>>> +      gsi_insert = gsi_after_labels (insert_bb);
>>>>> +      gsi_move_before (&gsistmt, &gsi_insert);
>>>>> +    }
>>>>> +  else
>>>>> +    {
>>>>> +      gsi_insert = gsi_for_stmt (insert_stmt);
>>>>> +      gsi_move_after (&gsistmt, &gsi_insert);
>>>>> +    }
>>>>> +
>>>>> +}
>>>>> +
>>>>>  /* Recursively rewrite our linearized statements so that the operators
>>>>>     match those in OPS[OPINDEX], putting the computation in rank
>>>>>     order.  */
>>>>> @@ -2262,11 +2384,6 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>>>    tree rhs2 = gimple_assign_rhs2 (stmt);
>>>>>    operand_entry_t oe;
>>>>>
>>>>> -  /* If we have three operands left, then we want to make sure the ones
>>>>> -     that get the double binary op are chosen wisely.  */
>>>>> -  if (opindex + 3 == VEC_length (operand_entry_t, ops))
>>>>> -    swap_ops_for_binary_stmt (ops, opindex, stmt);
>>>>> -
>>>>>    /* The final recursion case for this function is that you have
>>>>>       exactly two operations left.
>>>>>       If we had one exactly one op in the entire list to start with, we
>>>>> @@ -2312,19 +2429,17 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>>>      {
>>>>>        if (!moved)
>>>>>   {
>>>>> -  gimple_stmt_iterator gsinow, gsirhs1;
>>>>> -  gimple stmt1 = stmt, stmt2;
>>>>> -  unsigned int count;
>>>>> +  gimple stmt1 = stmt;
>>>>> +  unsigned int count, i = 1;
>>>>>
>>>>> -  gsinow = gsi_for_stmt (stmt);
>>>>>    count = VEC_length (operand_entry_t, ops) - opindex - 2;
>>>>> -  while (count-- != 0)
>>>>> +  while (i <= count)
>>>>>      {
>>>>> -      stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>>>>> -      gsirhs1 = gsi_for_stmt (stmt2);
>>>>> -      gsi_move_before (&gsirhs1, &gsinow);
>>>>> -      gsi_prev (&gsinow);
>>>>> -      stmt1 = stmt2;
>>>>> +      stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>>>>> +              /* Ensure that STMT1 is moved to a place where all operands in
>>>>> +                 OPS[opindex + i...] are available.  */
>>>>> +              ensure_ops_are_available (stmt1, ops, opindex + i);
>>>>> +              i++;
>>>>>      }
>>>>>    moved = true;
>>>>>   }
>>>>> @@ -3542,8 +3657,18 @@ reassociate_bb (basic_block bb)
>>>>>        && VEC_length (operand_entry_t, ops) > 3)
>>>>>      rewrite_expr_tree_parallel (stmt, width, ops);
>>>>>    else
>>>>> -    rewrite_expr_tree (stmt, 0, ops, false);
>>>>> +                    {
>>>>> +                      /* When there are three operands left, we want
>>>>> +                         to make sure the ones that get the double
>>>>> +                         binary op are chosen wisely.  */
>>>>> +                      int len = VEC_length (operand_entry_t, ops);
>>>>> +                      if (len >= 3)
>>>>> +                        swap_ops_for_binary_stmt (ops, len - 3, stmt);
>>>>>
>>>>> +                      assign_uids_in_relevant_bbs (ops);
>>>>> +      rewrite_expr_tree (stmt, 0, ops, false);
>>>>> +                    }
>>>>> +
>>>>>    /* If we combined some repeated factors into a
>>>>>       __builtin_powi call, multiply that result by the
>>>>>       reassociated operands.  */
Easwaran Raman Oct. 31, 2012, 7:16 p.m. UTC | #8
On Wed, Oct 31, 2012 at 4:36 AM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Wed, Oct 24, 2012 at 4:02 AM, Easwaran Raman <eraman@google.com> wrote:
>> On Tue, Oct 23, 2012 at 2:52 AM, Richard Biener
>> <richard.guenther@gmail.com> wrote:
>>> On Mon, Oct 22, 2012 at 8:31 PM, Easwaran Raman <eraman@google.com> wrote:
>>>> On Mon, Oct 22, 2012 at 12:59 AM, Richard Biener
>>>> <richard.guenther@gmail.com> wrote:
>>>>> On Fri, Oct 19, 2012 at 12:36 AM, Easwaran Raman <eraman@google.com> wrote:
>>>>>> Hi,
>>>>>>
>>>>>> During expression reassociation, statements are conservatively moved
>>>>>> downwards to ensure that dependences are correctly satisfied after
>>>>>> reassocation. This could lead to lengthening of live ranges. This
>>>>>> patch moves statements only to the extent necessary. Bootstraps and no
>>>>>> test regression on x86_64/linux. OK for trunk?
>>>>>>
>>>>>> Thanks,
>>>>>> Easwaran
>>>>>>
>>>>>> 2012-10-18   Easwaran Raman  <eraman@google.com>
>>>>>> * tree-ssa-reassoc.c(assign_uids): New function.
>>>>>> (assign_uids_in_relevant_bbs): Likewise.
>>>>>> (ensure_ops_are_available): Likewise.
>>>>>> (rewrite_expr_tree): Do not move statements beyond what is
>>>>>> necessary. Remove call to swap_ops_for_binary_stmt...
>>>>>> (reassociate_bb): ... and move it here.
>>>>>>
>>>>>>
>>>>>>
>>>>>> Index: gcc/tree-ssa-reassoc.c
>>>>>> ===================================================================
>>>>>> --- gcc/tree-ssa-reassoc.c (revision 192487)
>>>>>> +++ gcc/tree-ssa-reassoc.c (working copy)
>>>>>> @@ -2250,6 +2250,128 @@ swap_ops_for_binary_stmt (VEC(operand_entry_t, hea
>>>>>>      }
>>>>>>  }
>>>>>>
>>>>>> +/* Assign UIDs to statements in basic block BB.  */
>>>>>> +
>>>>>> +static void
>>>>>> +assign_uids (basic_block bb)
>>>>>> +{
>>>>>> +  unsigned uid = 0;
>>>>>> +  gimple_stmt_iterator gsi;
>>>>>> +  /* First assign uids to phis.  */
>>>>>> +  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>>>>> +    {
>>>>>> +      gimple stmt = gsi_stmt (gsi);
>>>>>> +      gimple_set_uid (stmt, uid++);
>>>>>> +    }
>>>>>> +
>>>>>> +  /* Then assign uids to stmts.  */
>>>>>> +  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>>>>> +    {
>>>>>> +      gimple stmt = gsi_stmt (gsi);
>>>>>> +      gimple_set_uid (stmt, uid++);
>>>>>> +    }
>>>>>> +}
>>>>>> +
>>>>>> +/* For each operand in OPS, find the basic block that contains the statement
>>>>>> +   which defines the operand. For all such basic blocks, assign UIDs.  */
>>>>>> +
>>>>>> +static void
>>>>>> +assign_uids_in_relevant_bbs (VEC(operand_entry_t, heap) * ops)
>>>>>> +{
>>>>>> +  operand_entry_t oe;
>>>>>> +  int i;
>>>>>> +  struct pointer_set_t *seen_bbs = pointer_set_create ();
>>>>>> +
>>>>>> +  for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++)
>>>>>> +    {
>>>>>> +      gimple def_stmt;
>>>>>> +      basic_block bb;
>>>>>> +      if (TREE_CODE (oe->op) != SSA_NAME)
>>>>>> +        continue;
>>>>>> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
>>>>>> +      bb = gimple_bb (def_stmt);
>>>>>> +      if (!pointer_set_contains (seen_bbs, bb))
>>>>>> +        {
>>>>>> +          assign_uids (bb);
>>>>>> +          pointer_set_insert (seen_bbs, bb);
>>>>>> +        }
>>>>>> +    }
>>>>>> +  pointer_set_destroy (seen_bbs);
>>>>>> +}
>>>>>
>>>>> Please assign UIDs once using the existing renumber_gimple_stmt_uids ().
>>>>> You seem to call the above multiple times and thus do work bigger than
>>>>> O(number of basic blocks).
>>>>
>>>> The reason I call the above multiple times is that gsi_move_before
>>>> might get called between two calls to the above. For instance, after
>>>> rewrite_expr_tree is called once, the following sequence of calls
>>>> could happen:
>>>> reassociate_bb -> linearize_expr_tree -> linearize_expr ->
>>>> gsi_move_before.  So it is not sufficient to call
>>>> renumber_gimple_stmt_uids once per do_reassoc. Or do you want me to
>>>> use renumber_gimple_stmt_uids_in_blocks instead of
>>>> assign_uids_in_relevant_bbs?
>>>
>>> It's sufficient to call it once if you conservatively update UIDs at stmt
>>> move / insert time (assign the same UID as the stmt before/after).
>>
>> I was worried that that would make the code brittle, but looking at
>> the places where a new statement is added or moved, I think it is
>> fine.  I have made the change in the new patch.
>
> Thanks.
>
>>>>
>>>>
>>>>>> +/* Ensure that operands in the OPS vector starting from OPINDEXth
>>>>>> entry are live
>>>>>> +   at STMT.  This is accomplished by moving STMT if needed.  */
>>>>>> +
>>>>>> +static void
>>>>>> +ensure_ops_are_available (gimple stmt, VEC(operand_entry_t, heap) *
>>>>>> ops, int opindex)
>>>>>> +{
>>>>>> +  int i;
>>>>>> +  int len = VEC_length (operand_entry_t, ops);
>>>>>> +  gimple insert_stmt = stmt;
>>>>>> +  basic_block insert_bb = gimple_bb (stmt);
>>>>>> +  gimple_stmt_iterator gsi_insert, gsistmt;
>>>>>> +  for (i = opindex; i < len; i++)
>>>>>> +    {
>>>>>
>>>>> Likewise you call this for each call to rewrite_expr_tree, so it seems to me
>>>>> this is quadratic in the number of ops in the op vector.
>>>>
>>>> The call to ensure_ops_are_available inside rewrite_expr_tree is
>>>> guarded by if (!moved) and I am setting moved = true there to ensure
>>>> that ensure_ops_are_available inside is called once per reassociation
>>>> of a expression tree.
>>>
>>> It would be a lot easier to understand this function if you call it once
>>> after rewrite_expr_tree finished.
>> I think you meant  "before rewrite_expr_tree". One thing to note here
>> is this function is called only when we first see a statement whose
>> operand changes after reassociation. That's the reason the moving of
>> statements happen inside rewrite_expr_tree.
>
> I meant after because rewrite_expr_tree ends up swapping things.  But
> I note you changed that in your revised patch - good ;)
>
> So now that rewrite_expr_tree does no longer change order of the ops
> vector we can move stmts indeed before it, but backwards.  Thus,
> instead of walking forward and
>
> +              /* Ensure that STMT1 is moved to a place where all operands in
> +                 OPS[opindex + i...] are available.  */
> +              ensure_ops_are_available (stmt1, ops, opindex + i);
>
> we can walk backward and only need to consider the immediate next operand
> because we know all further operands are available at the place the immediate
> next operand is defined.
>
> Which should remove the need for the loop in ensure_ops_are_available
>
> +  for (i = opindex; i < len; i++)
> +    {
>
> Correct?  (We may need to special-case the last (two?) stmts).

Sorry, I am not able to understand your proposal here. The confusion
relates to the use of walking forward vs backward. In the current
patch, I start from the last statement (root of the expression tree)
and follow the chain of defs using SSA_NAME_DEF_STMT (which I consider
as walking backward). Do you want to start from the leaf of the
expression and follow the chain in the opposite direction till the
stmt defining the root of the tree is reached? That would indeed avoid
comparing each statement against all statements that define the ops
array as it is sufficient to compare against the statements that
define the RHS operands. If that is indeed what you are proposing, I
tried that approach and ran into issues. Doing this would mean that
the IR is in an inconsistent state during intermediate stages. For
instance, transforming

t1 = a + b
t2 = t1 + c
d = ...
t3 = t2 + d

into

t1 = a + d
t2 = t1 + c
d = ...
t3 = t2 + b

would result in a state where t2 =  t1 + c appears before t1 = a + b.
I don't know if that is the root cause, but I noticed that doing so
resulted in ssa_verify complaining about DEBUG statements whose
operands appeared before their defs. If this is not what you mean by
walking backwards, could you please explain with a simple example?

Thanks,
Easwaran


>>>
>>>>>
>>>>> Why make this all so complicated?  It seems to me that we should
>>>>> fixup stmt order only after the whole ops vector has been materialized.
>>>>
>>>>
>>>>>
>>>>>> +      operand_entry_t oe = VEC_index (operand_entry_t, ops, i);
>>>>>> +      gimple def_stmt;
>>>>>> +      basic_block def_bb;
>>>>>> +      /* Ignore constants and operands with default definitons.  */
>>>>>> +      if (TREE_CODE (oe->op) != SSA_NAME
>>>>>> +          || SSA_NAME_IS_DEFAULT_DEF (oe->op))
>>>>>> +        continue;
>>>>>> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
>>>>>> +      def_bb = gimple_bb (def_stmt);
>>>>>> +      if (def_bb != insert_bb
>>>>>> +          && !dominated_by_p (CDI_DOMINATORS, insert_bb, def_bb))
>>>>>> +        {
>>>>>> +          insert_bb = def_bb;
>>>>>> +          insert_stmt = def_stmt;
>>>>>> +        }
>>>>>> +      else if (def_bb == insert_bb
>>>>>> +               && gimple_uid (insert_stmt) < gimple_uid (def_stmt))
>>>>>> +        insert_stmt = def_stmt;
>>>>>> +    }
>>>>>> +  if (insert_stmt == stmt)
>>>>>> +    return;
>>>>>> +  gsistmt = gsi_for_stmt (stmt);
>>>>>> +  /* If GSI_STMT is a phi node, then do not insert just after that statement.
>>>>>> +     Instead, find the first non-label gimple statement in BB and insert before
>>>>>> +     that.  */
>>>>>> +  if (gimple_code (insert_stmt) == GIMPLE_PHI)
>>>>>> +    {
>>>>>> +      gsi_insert = gsi_after_labels (insert_bb);
>>>>>> +      gsi_move_before (&gsistmt, &gsi_insert);
>>>>>> +    }
>>>>>> +  /* Statements marked for throw can not be in the middle of a basic block. So
>>>>>> +     we can not insert a statement (not marked for throw) immediately
>>>>>> after.  */
>>>>>> +  else if (lookup_stmt_eh_lp (insert_stmt) > 0
>>>>>
>>>>> that's already performed by stmt_can_throw_internal
>>>> Ok. I was hit by the "statement marked for throw in middle of block"
>>>> error in verify_gimple_in_cfg  and simply copied the sequence of
>>>> checks that led to that.
>>>>
>>>>>
>>>>>> +           && stmt_can_throw_internal (insert_stmt))SSA_NAME_DEF_STMT
>>>>>
>>>>> But all this should be a non-issue as re-assoc should never assign an ops
>>>>> vector entry for such stmts (but it could have leafs defined by such stmts).
>>>>> If you only ever move definitions down, like the present code does caring
>>>>> about leafs should not be necessary.
>>>>
>>>> I added this as I got an ICE in verify_gimple_in_cfg. So it seems like
>>>> such statements gets assigned into the ops vector.
>>>
>>> That would be a bug.  See how linearize_expr_tree checks whether
>>> the defining stmt of an op may throw.  That is, it _does_ add the
>>> SSA def to the vector but does not recurse on its defining stmt operands.
>>
>> But as long as the SSA def of a throwing statement is added to the ops
>> vector, I need to add the check, even if it doesn't recurse on its
>> operands. Consider the following
>>
>> <bb1>:
>> t1 = a + b
>> c = ... // c might throw
>>
>> <bb2>:
>> t2 = t1 + c
>>
>> The expression could be rewritten as (a+c) + b.
>>
>> Since c's definition follows t1's, t1 = a+b has to be moved after c =
>> ... That's why I need the check.
>
> Hmm, indeed.
>
> +      /* There should be exactly one normal edge out of the basic block.  */
> +      FOR_EACH_EDGE (e, ei, insert_bb->succs)
> +        {
> +          if (!(e->flags & EDGE_COMPLEX))
> +            {
> +              gcc_assert (succ_edge == NULL);
> +              succ_edge = e;
> +            }
> +        }
>
> I think we have sth like find_fallthru_edge for this.
>
> Thanks,
> Richard.
>
>> Thanks,
>> Easwaran
>>
>>> Richard.
>>>
>>>> Thanks,
>>>> Easwaran
>>>>>
>>>>> Richard.
>>>>>
>>>>>> +    {
>>>>>> +      edge e, succ_edge = NULL;
>>>>>> +      edge_iterator ei;
>>>>>> +
>>>>>> +      /* There should be exactly one normal edge out of the basic block.  */
>>>>>> +      FOR_EACH_EDGE (e, ei, insert_bb->succs)
>>>>>> +        {
>>>>>> +          if (!(e->flags & EDGE_COMPLEX))
>>>>>> +            {
>>>>>> +              gcc_assert (succ_edge == NULL);
>>>>>> +              succ_edge = e;
>>>>>> +            }
>>>>>> +        }
>>>>>> +      /* Insert STMT at the beginning of the successor basic block.  */
>>>>>> +      insert_bb = succ_edge->dest;
>>>>>> +      gsi_insert = gsi_after_labels (insert_bb);
>>>>>> +      gsi_move_before (&gsistmt, &gsi_insert);
>>>>>> +    }
>>>>>> +  else
>>>>>> +    {
>>>>>> +      gsi_insert = gsi_for_stmt (insert_stmt);
>>>>>> +      gsi_move_after (&gsistmt, &gsi_insert);
>>>>>> +    }
>>>>>> +
>>>>>> +}
>>>>>> +
>>>>>>  /* Recursively rewrite our linearized statements so that the operators
>>>>>>     match those in OPS[OPINDEX], putting the computation in rank
>>>>>>     order.  */
>>>>>> @@ -2262,11 +2384,6 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>>>>    tree rhs2 = gimple_assign_rhs2 (stmt);
>>>>>>    operand_entry_t oe;
>>>>>>
>>>>>> -  /* If we have three operands left, then we want to make sure the ones
>>>>>> -     that get the double binary op are chosen wisely.  */
>>>>>> -  if (opindex + 3 == VEC_length (operand_entry_t, ops))
>>>>>> -    swap_ops_for_binary_stmt (ops, opindex, stmt);
>>>>>> -
>>>>>>    /* The final recursion case for this function is that you have
>>>>>>       exactly two operations left.
>>>>>>       If we had one exactly one op in the entire list to start with, we
>>>>>> @@ -2312,19 +2429,17 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>>>>      {
>>>>>>        if (!moved)
>>>>>>   {
>>>>>> -  gimple_stmt_iterator gsinow, gsirhs1;
>>>>>> -  gimple stmt1 = stmt, stmt2;
>>>>>> -  unsigned int count;
>>>>>> +  gimple stmt1 = stmt;
>>>>>> +  unsigned int count, i = 1;
>>>>>>
>>>>>> -  gsinow = gsi_for_stmt (stmt);
>>>>>>    count = VEC_length (operand_entry_t, ops) - opindex - 2;
>>>>>> -  while (count-- != 0)
>>>>>> +  while (i <= count)
>>>>>>      {
>>>>>> -      stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>>>>>> -      gsirhs1 = gsi_for_stmt (stmt2);
>>>>>> -      gsi_move_before (&gsirhs1, &gsinow);
>>>>>> -      gsi_prev (&gsinow);
>>>>>> -      stmt1 = stmt2;
>>>>>> +      stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>>>>>> +              /* Ensure that STMT1 is moved to a place where all operands in
>>>>>> +                 OPS[opindex + i...] are available.  */
>>>>>> +              ensure_ops_are_available (stmt1, ops, opindex + i);
>>>>>> +              i++;
>>>>>>      }
>>>>>>    moved = true;
>>>>>>   }
>>>>>> @@ -3542,8 +3657,18 @@ reassociate_bb (basic_block bb)
>>>>>>        && VEC_length (operand_entry_t, ops) > 3)
>>>>>>      rewrite_expr_tree_parallel (stmt, width, ops);
>>>>>>    else
>>>>>> -    rewrite_expr_tree (stmt, 0, ops, false);
>>>>>> +                    {
>>>>>> +                      /* When there are three operands left, we want
>>>>>> +                         to make sure the ones that get the double
>>>>>> +                         binary op are chosen wisely.  */
>>>>>> +                      int len = VEC_length (operand_entry_t, ops);
>>>>>> +                      if (len >= 3)
>>>>>> +                        swap_ops_for_binary_stmt (ops, len - 3, stmt);
>>>>>>
>>>>>> +                      assign_uids_in_relevant_bbs (ops);
>>>>>> +      rewrite_expr_tree (stmt, 0, ops, false);
>>>>>> +                    }
>>>>>> +
>>>>>>    /* If we combined some repeated factors into a
>>>>>>       __builtin_powi call, multiply that result by the
>>>>>>       reassociated operands.  */
Easwaran Raman Nov. 2, 2012, 8:41 p.m. UTC | #9
Ping.

- Easwaran

On Wed, Oct 31, 2012 at 12:16 PM, Easwaran Raman <eraman@google.com> wrote:
> On Wed, Oct 31, 2012 at 4:36 AM, Richard Biener
> <richard.guenther@gmail.com> wrote:
>> On Wed, Oct 24, 2012 at 4:02 AM, Easwaran Raman <eraman@google.com> wrote:
>>> On Tue, Oct 23, 2012 at 2:52 AM, Richard Biener
>>> <richard.guenther@gmail.com> wrote:
>>>> On Mon, Oct 22, 2012 at 8:31 PM, Easwaran Raman <eraman@google.com> wrote:
>>>>> On Mon, Oct 22, 2012 at 12:59 AM, Richard Biener
>>>>> <richard.guenther@gmail.com> wrote:
>>>>>> On Fri, Oct 19, 2012 at 12:36 AM, Easwaran Raman <eraman@google.com> wrote:
>>>>>>> Hi,
>>>>>>>
>>>>>>> During expression reassociation, statements are conservatively moved
>>>>>>> downwards to ensure that dependences are correctly satisfied after
>>>>>>> reassocation. This could lead to lengthening of live ranges. This
>>>>>>> patch moves statements only to the extent necessary. Bootstraps and no
>>>>>>> test regression on x86_64/linux. OK for trunk?
>>>>>>>
>>>>>>> Thanks,
>>>>>>> Easwaran
>>>>>>>
>>>>>>> 2012-10-18   Easwaran Raman  <eraman@google.com>
>>>>>>> * tree-ssa-reassoc.c(assign_uids): New function.
>>>>>>> (assign_uids_in_relevant_bbs): Likewise.
>>>>>>> (ensure_ops_are_available): Likewise.
>>>>>>> (rewrite_expr_tree): Do not move statements beyond what is
>>>>>>> necessary. Remove call to swap_ops_for_binary_stmt...
>>>>>>> (reassociate_bb): ... and move it here.
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> Index: gcc/tree-ssa-reassoc.c
>>>>>>> ===================================================================
>>>>>>> --- gcc/tree-ssa-reassoc.c (revision 192487)
>>>>>>> +++ gcc/tree-ssa-reassoc.c (working copy)
>>>>>>> @@ -2250,6 +2250,128 @@ swap_ops_for_binary_stmt (VEC(operand_entry_t, hea
>>>>>>>      }
>>>>>>>  }
>>>>>>>
>>>>>>> +/* Assign UIDs to statements in basic block BB.  */
>>>>>>> +
>>>>>>> +static void
>>>>>>> +assign_uids (basic_block bb)
>>>>>>> +{
>>>>>>> +  unsigned uid = 0;
>>>>>>> +  gimple_stmt_iterator gsi;
>>>>>>> +  /* First assign uids to phis.  */
>>>>>>> +  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>>>>>> +    {
>>>>>>> +      gimple stmt = gsi_stmt (gsi);
>>>>>>> +      gimple_set_uid (stmt, uid++);
>>>>>>> +    }
>>>>>>> +
>>>>>>> +  /* Then assign uids to stmts.  */
>>>>>>> +  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>>>>>> +    {
>>>>>>> +      gimple stmt = gsi_stmt (gsi);
>>>>>>> +      gimple_set_uid (stmt, uid++);
>>>>>>> +    }
>>>>>>> +}
>>>>>>> +
>>>>>>> +/* For each operand in OPS, find the basic block that contains the statement
>>>>>>> +   which defines the operand. For all such basic blocks, assign UIDs.  */
>>>>>>> +
>>>>>>> +static void
>>>>>>> +assign_uids_in_relevant_bbs (VEC(operand_entry_t, heap) * ops)
>>>>>>> +{
>>>>>>> +  operand_entry_t oe;
>>>>>>> +  int i;
>>>>>>> +  struct pointer_set_t *seen_bbs = pointer_set_create ();
>>>>>>> +
>>>>>>> +  for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++)
>>>>>>> +    {
>>>>>>> +      gimple def_stmt;
>>>>>>> +      basic_block bb;
>>>>>>> +      if (TREE_CODE (oe->op) != SSA_NAME)
>>>>>>> +        continue;
>>>>>>> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
>>>>>>> +      bb = gimple_bb (def_stmt);
>>>>>>> +      if (!pointer_set_contains (seen_bbs, bb))
>>>>>>> +        {
>>>>>>> +          assign_uids (bb);
>>>>>>> +          pointer_set_insert (seen_bbs, bb);
>>>>>>> +        }
>>>>>>> +    }
>>>>>>> +  pointer_set_destroy (seen_bbs);
>>>>>>> +}
>>>>>>
>>>>>> Please assign UIDs once using the existing renumber_gimple_stmt_uids ().
>>>>>> You seem to call the above multiple times and thus do work bigger than
>>>>>> O(number of basic blocks).
>>>>>
>>>>> The reason I call the above multiple times is that gsi_move_before
>>>>> might get called between two calls to the above. For instance, after
>>>>> rewrite_expr_tree is called once, the following sequence of calls
>>>>> could happen:
>>>>> reassociate_bb -> linearize_expr_tree -> linearize_expr ->
>>>>> gsi_move_before.  So it is not sufficient to call
>>>>> renumber_gimple_stmt_uids once per do_reassoc. Or do you want me to
>>>>> use renumber_gimple_stmt_uids_in_blocks instead of
>>>>> assign_uids_in_relevant_bbs?
>>>>
>>>> It's sufficient to call it once if you conservatively update UIDs at stmt
>>>> move / insert time (assign the same UID as the stmt before/after).
>>>
>>> I was worried that that would make the code brittle, but looking at
>>> the places where a new statement is added or moved, I think it is
>>> fine.  I have made the change in the new patch.
>>
>> Thanks.
>>
>>>>>
>>>>>
>>>>>>> +/* Ensure that operands in the OPS vector starting from OPINDEXth
>>>>>>> entry are live
>>>>>>> +   at STMT.  This is accomplished by moving STMT if needed.  */
>>>>>>> +
>>>>>>> +static void
>>>>>>> +ensure_ops_are_available (gimple stmt, VEC(operand_entry_t, heap) *
>>>>>>> ops, int opindex)
>>>>>>> +{
>>>>>>> +  int i;
>>>>>>> +  int len = VEC_length (operand_entry_t, ops);
>>>>>>> +  gimple insert_stmt = stmt;
>>>>>>> +  basic_block insert_bb = gimple_bb (stmt);
>>>>>>> +  gimple_stmt_iterator gsi_insert, gsistmt;
>>>>>>> +  for (i = opindex; i < len; i++)
>>>>>>> +    {
>>>>>>
>>>>>> Likewise you call this for each call to rewrite_expr_tree, so it seems to me
>>>>>> this is quadratic in the number of ops in the op vector.
>>>>>
>>>>> The call to ensure_ops_are_available inside rewrite_expr_tree is
>>>>> guarded by if (!moved) and I am setting moved = true there to ensure
>>>>> that ensure_ops_are_available inside is called once per reassociation
>>>>> of a expression tree.
>>>>
>>>> It would be a lot easier to understand this function if you call it once
>>>> after rewrite_expr_tree finished.
>>> I think you meant  "before rewrite_expr_tree". One thing to note here
>>> is this function is called only when we first see a statement whose
>>> operand changes after reassociation. That's the reason the moving of
>>> statements happen inside rewrite_expr_tree.
>>
>> I meant after because rewrite_expr_tree ends up swapping things.  But
>> I note you changed that in your revised patch - good ;)
>>
>> So now that rewrite_expr_tree does no longer change order of the ops
>> vector we can move stmts indeed before it, but backwards.  Thus,
>> instead of walking forward and
>>
>> +              /* Ensure that STMT1 is moved to a place where all operands in
>> +                 OPS[opindex + i...] are available.  */
>> +              ensure_ops_are_available (stmt1, ops, opindex + i);
>>
>> we can walk backward and only need to consider the immediate next operand
>> because we know all further operands are available at the place the immediate
>> next operand is defined.
>>
>> Which should remove the need for the loop in ensure_ops_are_available
>>
>> +  for (i = opindex; i < len; i++)
>> +    {
>>
>> Correct?  (We may need to special-case the last (two?) stmts).
>
> Sorry, I am not able to understand your proposal here. The confusion
> relates to the use of walking forward vs backward. In the current
> patch, I start from the last statement (root of the expression tree)
> and follow the chain of defs using SSA_NAME_DEF_STMT (which I consider
> as walking backward). Do you want to start from the leaf of the
> expression and follow the chain in the opposite direction till the
> stmt defining the root of the tree is reached? That would indeed avoid
> comparing each statement against all statements that define the ops
> array as it is sufficient to compare against the statements that
> define the RHS operands. If that is indeed what you are proposing, I
> tried that approach and ran into issues. Doing this would mean that
> the IR is in an inconsistent state during intermediate stages. For
> instance, transforming
>
> t1 = a + b
> t2 = t1 + c
> d = ...
> t3 = t2 + d
>
> into
>
> t1 = a + d
> t2 = t1 + c
> d = ...
> t3 = t2 + b
>
> would result in a state where t2 =  t1 + c appears before t1 = a + b.
> I don't know if that is the root cause, but I noticed that doing so
> resulted in ssa_verify complaining about DEBUG statements whose
> operands appeared before their defs. If this is not what you mean by
> walking backwards, could you please explain with a simple example?
>
> Thanks,
> Easwaran
>
>
>>>>
>>>>>>
>>>>>> Why make this all so complicated?  It seems to me that we should
>>>>>> fixup stmt order only after the whole ops vector has been materialized.
>>>>>
>>>>>
>>>>>>
>>>>>>> +      operand_entry_t oe = VEC_index (operand_entry_t, ops, i);
>>>>>>> +      gimple def_stmt;
>>>>>>> +      basic_block def_bb;
>>>>>>> +      /* Ignore constants and operands with default definitons.  */
>>>>>>> +      if (TREE_CODE (oe->op) != SSA_NAME
>>>>>>> +          || SSA_NAME_IS_DEFAULT_DEF (oe->op))
>>>>>>> +        continue;
>>>>>>> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
>>>>>>> +      def_bb = gimple_bb (def_stmt);
>>>>>>> +      if (def_bb != insert_bb
>>>>>>> +          && !dominated_by_p (CDI_DOMINATORS, insert_bb, def_bb))
>>>>>>> +        {
>>>>>>> +          insert_bb = def_bb;
>>>>>>> +          insert_stmt = def_stmt;
>>>>>>> +        }
>>>>>>> +      else if (def_bb == insert_bb
>>>>>>> +               && gimple_uid (insert_stmt) < gimple_uid (def_stmt))
>>>>>>> +        insert_stmt = def_stmt;
>>>>>>> +    }
>>>>>>> +  if (insert_stmt == stmt)
>>>>>>> +    return;
>>>>>>> +  gsistmt = gsi_for_stmt (stmt);
>>>>>>> +  /* If GSI_STMT is a phi node, then do not insert just after that statement.
>>>>>>> +     Instead, find the first non-label gimple statement in BB and insert before
>>>>>>> +     that.  */
>>>>>>> +  if (gimple_code (insert_stmt) == GIMPLE_PHI)
>>>>>>> +    {
>>>>>>> +      gsi_insert = gsi_after_labels (insert_bb);
>>>>>>> +      gsi_move_before (&gsistmt, &gsi_insert);
>>>>>>> +    }
>>>>>>> +  /* Statements marked for throw can not be in the middle of a basic block. So
>>>>>>> +     we can not insert a statement (not marked for throw) immediately
>>>>>>> after.  */
>>>>>>> +  else if (lookup_stmt_eh_lp (insert_stmt) > 0
>>>>>>
>>>>>> that's already performed by stmt_can_throw_internal
>>>>> Ok. I was hit by the "statement marked for throw in middle of block"
>>>>> error in verify_gimple_in_cfg  and simply copied the sequence of
>>>>> checks that led to that.
>>>>>
>>>>>>
>>>>>>> +           && stmt_can_throw_internal (insert_stmt))SSA_NAME_DEF_STMT
>>>>>>
>>>>>> But all this should be a non-issue as re-assoc should never assign an ops
>>>>>> vector entry for such stmts (but it could have leafs defined by such stmts).
>>>>>> If you only ever move definitions down, like the present code does caring
>>>>>> about leafs should not be necessary.
>>>>>
>>>>> I added this as I got an ICE in verify_gimple_in_cfg. So it seems like
>>>>> such statements gets assigned into the ops vector.
>>>>
>>>> That would be a bug.  See how linearize_expr_tree checks whether
>>>> the defining stmt of an op may throw.  That is, it _does_ add the
>>>> SSA def to the vector but does not recurse on its defining stmt operands.
>>>
>>> But as long as the SSA def of a throwing statement is added to the ops
>>> vector, I need to add the check, even if it doesn't recurse on its
>>> operands. Consider the following
>>>
>>> <bb1>:
>>> t1 = a + b
>>> c = ... // c might throw
>>>
>>> <bb2>:
>>> t2 = t1 + c
>>>
>>> The expression could be rewritten as (a+c) + b.
>>>
>>> Since c's definition follows t1's, t1 = a+b has to be moved after c =
>>> ... That's why I need the check.
>>
>> Hmm, indeed.
>>
>> +      /* There should be exactly one normal edge out of the basic block.  */
>> +      FOR_EACH_EDGE (e, ei, insert_bb->succs)
>> +        {
>> +          if (!(e->flags & EDGE_COMPLEX))
>> +            {
>> +              gcc_assert (succ_edge == NULL);
>> +              succ_edge = e;
>> +            }
>> +        }
>>
>> I think we have sth like find_fallthru_edge for this.
>>
>> Thanks,
>> Richard.
>>
>>> Thanks,
>>> Easwaran
>>>
>>>> Richard.
>>>>
>>>>> Thanks,
>>>>> Easwaran
>>>>>>
>>>>>> Richard.
>>>>>>
>>>>>>> +    {
>>>>>>> +      edge e, succ_edge = NULL;
>>>>>>> +      edge_iterator ei;
>>>>>>> +
>>>>>>> +      /* There should be exactly one normal edge out of the basic block.  */
>>>>>>> +      FOR_EACH_EDGE (e, ei, insert_bb->succs)
>>>>>>> +        {
>>>>>>> +          if (!(e->flags & EDGE_COMPLEX))
>>>>>>> +            {
>>>>>>> +              gcc_assert (succ_edge == NULL);
>>>>>>> +              succ_edge = e;
>>>>>>> +            }
>>>>>>> +        }
>>>>>>> +      /* Insert STMT at the beginning of the successor basic block.  */
>>>>>>> +      insert_bb = succ_edge->dest;
>>>>>>> +      gsi_insert = gsi_after_labels (insert_bb);
>>>>>>> +      gsi_move_before (&gsistmt, &gsi_insert);
>>>>>>> +    }
>>>>>>> +  else
>>>>>>> +    {
>>>>>>> +      gsi_insert = gsi_for_stmt (insert_stmt);
>>>>>>> +      gsi_move_after (&gsistmt, &gsi_insert);
>>>>>>> +    }
>>>>>>> +
>>>>>>> +}
>>>>>>> +
>>>>>>>  /* Recursively rewrite our linearized statements so that the operators
>>>>>>>     match those in OPS[OPINDEX], putting the computation in rank
>>>>>>>     order.  */
>>>>>>> @@ -2262,11 +2384,6 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>>>>>    tree rhs2 = gimple_assign_rhs2 (stmt);
>>>>>>>    operand_entry_t oe;
>>>>>>>
>>>>>>> -  /* If we have three operands left, then we want to make sure the ones
>>>>>>> -     that get the double binary op are chosen wisely.  */
>>>>>>> -  if (opindex + 3 == VEC_length (operand_entry_t, ops))
>>>>>>> -    swap_ops_for_binary_stmt (ops, opindex, stmt);
>>>>>>> -
>>>>>>>    /* The final recursion case for this function is that you have
>>>>>>>       exactly two operations left.
>>>>>>>       If we had one exactly one op in the entire list to start with, we
>>>>>>> @@ -2312,19 +2429,17 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>>>>>      {
>>>>>>>        if (!moved)
>>>>>>>   {
>>>>>>> -  gimple_stmt_iterator gsinow, gsirhs1;
>>>>>>> -  gimple stmt1 = stmt, stmt2;
>>>>>>> -  unsigned int count;
>>>>>>> +  gimple stmt1 = stmt;
>>>>>>> +  unsigned int count, i = 1;
>>>>>>>
>>>>>>> -  gsinow = gsi_for_stmt (stmt);
>>>>>>>    count = VEC_length (operand_entry_t, ops) - opindex - 2;
>>>>>>> -  while (count-- != 0)
>>>>>>> +  while (i <= count)
>>>>>>>      {
>>>>>>> -      stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>>>>>>> -      gsirhs1 = gsi_for_stmt (stmt2);
>>>>>>> -      gsi_move_before (&gsirhs1, &gsinow);
>>>>>>> -      gsi_prev (&gsinow);
>>>>>>> -      stmt1 = stmt2;
>>>>>>> +      stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>>>>>>> +              /* Ensure that STMT1 is moved to a place where all operands in
>>>>>>> +                 OPS[opindex + i...] are available.  */
>>>>>>> +              ensure_ops_are_available (stmt1, ops, opindex + i);
>>>>>>> +              i++;
>>>>>>>      }
>>>>>>>    moved = true;
>>>>>>>   }
>>>>>>> @@ -3542,8 +3657,18 @@ reassociate_bb (basic_block bb)
>>>>>>>        && VEC_length (operand_entry_t, ops) > 3)
>>>>>>>      rewrite_expr_tree_parallel (stmt, width, ops);
>>>>>>>    else
>>>>>>> -    rewrite_expr_tree (stmt, 0, ops, false);
>>>>>>> +                    {
>>>>>>> +                      /* When there are three operands left, we want
>>>>>>> +                         to make sure the ones that get the double
>>>>>>> +                         binary op are chosen wisely.  */
>>>>>>> +                      int len = VEC_length (operand_entry_t, ops);
>>>>>>> +                      if (len >= 3)
>>>>>>> +                        swap_ops_for_binary_stmt (ops, len - 3, stmt);
>>>>>>>
>>>>>>> +                      assign_uids_in_relevant_bbs (ops);
>>>>>>> +      rewrite_expr_tree (stmt, 0, ops, false);
>>>>>>> +                    }
>>>>>>> +
>>>>>>>    /* If we combined some repeated factors into a
>>>>>>>       __builtin_powi call, multiply that result by the
>>>>>>>       reassociated operands.  */
Richard Biener Nov. 4, 2012, 5:10 p.m. UTC | #10
On Wed, Oct 31, 2012 at 8:16 PM, Easwaran Raman <eraman@google.com> wrote:
> On Wed, Oct 31, 2012 at 4:36 AM, Richard Biener
> <richard.guenther@gmail.com> wrote:
>> On Wed, Oct 24, 2012 at 4:02 AM, Easwaran Raman <eraman@google.com> wrote:
>>> On Tue, Oct 23, 2012 at 2:52 AM, Richard Biener
>>> <richard.guenther@gmail.com> wrote:
>>>> On Mon, Oct 22, 2012 at 8:31 PM, Easwaran Raman <eraman@google.com> wrote:
>>>>> On Mon, Oct 22, 2012 at 12:59 AM, Richard Biener
>>>>> <richard.guenther@gmail.com> wrote:
>>>>>> On Fri, Oct 19, 2012 at 12:36 AM, Easwaran Raman <eraman@google.com> wrote:
>>>>>>> Hi,
>>>>>>>
>>>>>>> During expression reassociation, statements are conservatively moved
>>>>>>> downwards to ensure that dependences are correctly satisfied after
>>>>>>> reassocation. This could lead to lengthening of live ranges. This
>>>>>>> patch moves statements only to the extent necessary. Bootstraps and no
>>>>>>> test regression on x86_64/linux. OK for trunk?
>>>>>>>
>>>>>>> Thanks,
>>>>>>> Easwaran
>>>>>>>
>>>>>>> 2012-10-18   Easwaran Raman  <eraman@google.com>
>>>>>>> * tree-ssa-reassoc.c(assign_uids): New function.
>>>>>>> (assign_uids_in_relevant_bbs): Likewise.
>>>>>>> (ensure_ops_are_available): Likewise.
>>>>>>> (rewrite_expr_tree): Do not move statements beyond what is
>>>>>>> necessary. Remove call to swap_ops_for_binary_stmt...
>>>>>>> (reassociate_bb): ... and move it here.
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> Index: gcc/tree-ssa-reassoc.c
>>>>>>> ===================================================================
>>>>>>> --- gcc/tree-ssa-reassoc.c (revision 192487)
>>>>>>> +++ gcc/tree-ssa-reassoc.c (working copy)
>>>>>>> @@ -2250,6 +2250,128 @@ swap_ops_for_binary_stmt (VEC(operand_entry_t, hea
>>>>>>>      }
>>>>>>>  }
>>>>>>>
>>>>>>> +/* Assign UIDs to statements in basic block BB.  */
>>>>>>> +
>>>>>>> +static void
>>>>>>> +assign_uids (basic_block bb)
>>>>>>> +{
>>>>>>> +  unsigned uid = 0;
>>>>>>> +  gimple_stmt_iterator gsi;
>>>>>>> +  /* First assign uids to phis.  */
>>>>>>> +  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>>>>>> +    {
>>>>>>> +      gimple stmt = gsi_stmt (gsi);
>>>>>>> +      gimple_set_uid (stmt, uid++);
>>>>>>> +    }
>>>>>>> +
>>>>>>> +  /* Then assign uids to stmts.  */
>>>>>>> +  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>>>>>> +    {
>>>>>>> +      gimple stmt = gsi_stmt (gsi);
>>>>>>> +      gimple_set_uid (stmt, uid++);
>>>>>>> +    }
>>>>>>> +}
>>>>>>> +
>>>>>>> +/* For each operand in OPS, find the basic block that contains the statement
>>>>>>> +   which defines the operand. For all such basic blocks, assign UIDs.  */
>>>>>>> +
>>>>>>> +static void
>>>>>>> +assign_uids_in_relevant_bbs (VEC(operand_entry_t, heap) * ops)
>>>>>>> +{
>>>>>>> +  operand_entry_t oe;
>>>>>>> +  int i;
>>>>>>> +  struct pointer_set_t *seen_bbs = pointer_set_create ();
>>>>>>> +
>>>>>>> +  for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++)
>>>>>>> +    {
>>>>>>> +      gimple def_stmt;
>>>>>>> +      basic_block bb;
>>>>>>> +      if (TREE_CODE (oe->op) != SSA_NAME)
>>>>>>> +        continue;
>>>>>>> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
>>>>>>> +      bb = gimple_bb (def_stmt);
>>>>>>> +      if (!pointer_set_contains (seen_bbs, bb))
>>>>>>> +        {
>>>>>>> +          assign_uids (bb);
>>>>>>> +          pointer_set_insert (seen_bbs, bb);
>>>>>>> +        }
>>>>>>> +    }
>>>>>>> +  pointer_set_destroy (seen_bbs);
>>>>>>> +}
>>>>>>
>>>>>> Please assign UIDs once using the existing renumber_gimple_stmt_uids ().
>>>>>> You seem to call the above multiple times and thus do work bigger than
>>>>>> O(number of basic blocks).
>>>>>
>>>>> The reason I call the above multiple times is that gsi_move_before
>>>>> might get called between two calls to the above. For instance, after
>>>>> rewrite_expr_tree is called once, the following sequence of calls
>>>>> could happen:
>>>>> reassociate_bb -> linearize_expr_tree -> linearize_expr ->
>>>>> gsi_move_before.  So it is not sufficient to call
>>>>> renumber_gimple_stmt_uids once per do_reassoc. Or do you want me to
>>>>> use renumber_gimple_stmt_uids_in_blocks instead of
>>>>> assign_uids_in_relevant_bbs?
>>>>
>>>> It's sufficient to call it once if you conservatively update UIDs at stmt
>>>> move / insert time (assign the same UID as the stmt before/after).
>>>
>>> I was worried that that would make the code brittle, but looking at
>>> the places where a new statement is added or moved, I think it is
>>> fine.  I have made the change in the new patch.
>>
>> Thanks.
>>
>>>>>
>>>>>
>>>>>>> +/* Ensure that operands in the OPS vector starting from OPINDEXth
>>>>>>> entry are live
>>>>>>> +   at STMT.  This is accomplished by moving STMT if needed.  */
>>>>>>> +
>>>>>>> +static void
>>>>>>> +ensure_ops_are_available (gimple stmt, VEC(operand_entry_t, heap) *
>>>>>>> ops, int opindex)
>>>>>>> +{
>>>>>>> +  int i;
>>>>>>> +  int len = VEC_length (operand_entry_t, ops);
>>>>>>> +  gimple insert_stmt = stmt;
>>>>>>> +  basic_block insert_bb = gimple_bb (stmt);
>>>>>>> +  gimple_stmt_iterator gsi_insert, gsistmt;
>>>>>>> +  for (i = opindex; i < len; i++)
>>>>>>> +    {
>>>>>>
>>>>>> Likewise you call this for each call to rewrite_expr_tree, so it seems to me
>>>>>> this is quadratic in the number of ops in the op vector.
>>>>>
>>>>> The call to ensure_ops_are_available inside rewrite_expr_tree is
>>>>> guarded by if (!moved) and I am setting moved = true there to ensure
>>>>> that ensure_ops_are_available inside is called once per reassociation
>>>>> of a expression tree.
>>>>
>>>> It would be a lot easier to understand this function if you call it once
>>>> after rewrite_expr_tree finished.
>>> I think you meant  "before rewrite_expr_tree". One thing to note here
>>> is this function is called only when we first see a statement whose
>>> operand changes after reassociation. That's the reason the moving of
>>> statements happen inside rewrite_expr_tree.
>>
>> I meant after because rewrite_expr_tree ends up swapping things.  But
>> I note you changed that in your revised patch - good ;)
>>
>> So now that rewrite_expr_tree does no longer change order of the ops
>> vector we can move stmts indeed before it, but backwards.  Thus,
>> instead of walking forward and
>>
>> +              /* Ensure that STMT1 is moved to a place where all operands in
>> +                 OPS[opindex + i...] are available.  */
>> +              ensure_ops_are_available (stmt1, ops, opindex + i);
>>
>> we can walk backward and only need to consider the immediate next operand
>> because we know all further operands are available at the place the immediate
>> next operand is defined.
>>
>> Which should remove the need for the loop in ensure_ops_are_available
>>
>> +  for (i = opindex; i < len; i++)
>> +    {
>>
>> Correct?  (We may need to special-case the last (two?) stmts).
>
> Sorry, I am not able to understand your proposal here. The confusion
> relates to the use of walking forward vs backward. In the current
> patch, I start from the last statement (root of the expression tree)
> and follow the chain of defs using SSA_NAME_DEF_STMT (which I consider
> as walking backward). Do you want to start from the leaf of the
> expression and follow the chain in the opposite direction till the
> stmt defining the root of the tree is reached? That would indeed avoid
> comparing each statement against all statements that define the ops
> array as it is sufficient to compare against the statements that
> define the RHS operands. If that is indeed what you are proposing, I
> tried that approach and ran into issues. Doing this would mean that
> the IR is in an inconsistent state during intermediate stages. For
> instance, transforming
>
> t1 = a + b
> t2 = t1 + c
> d = ...
> t3 = t2 + d
>
> into
>
> t1 = a + d
> t2 = t1 + c
> d = ...
> t3 = t2 + b
>
> would result in a state where t2 =  t1 + c appears before t1 = a + b.
> I don't know if that is the root cause, but I noticed that doing so
> resulted in ssa_verify complaining about DEBUG statements whose
> operands appeared before their defs. If this is not what you mean by
> walking backwards, could you please explain with a simple example?

Yes, that's what I mean with walking backwards.  DEBUG stmts need
to be resetted when they are no longer valid.  reassoc suffers from
many (existing) debug issues.

Richard.

> Thanks,
> Easwaran
>
>
>>>>
>>>>>>
>>>>>> Why make this all so complicated?  It seems to me that we should
>>>>>> fixup stmt order only after the whole ops vector has been materialized.
>>>>>
>>>>>
>>>>>>
>>>>>>> +      operand_entry_t oe = VEC_index (operand_entry_t, ops, i);
>>>>>>> +      gimple def_stmt;
>>>>>>> +      basic_block def_bb;
>>>>>>> +      /* Ignore constants and operands with default definitons.  */
>>>>>>> +      if (TREE_CODE (oe->op) != SSA_NAME
>>>>>>> +          || SSA_NAME_IS_DEFAULT_DEF (oe->op))
>>>>>>> +        continue;
>>>>>>> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
>>>>>>> +      def_bb = gimple_bb (def_stmt);
>>>>>>> +      if (def_bb != insert_bb
>>>>>>> +          && !dominated_by_p (CDI_DOMINATORS, insert_bb, def_bb))
>>>>>>> +        {
>>>>>>> +          insert_bb = def_bb;
>>>>>>> +          insert_stmt = def_stmt;
>>>>>>> +        }
>>>>>>> +      else if (def_bb == insert_bb
>>>>>>> +               && gimple_uid (insert_stmt) < gimple_uid (def_stmt))
>>>>>>> +        insert_stmt = def_stmt;
>>>>>>> +    }
>>>>>>> +  if (insert_stmt == stmt)
>>>>>>> +    return;
>>>>>>> +  gsistmt = gsi_for_stmt (stmt);
>>>>>>> +  /* If GSI_STMT is a phi node, then do not insert just after that statement.
>>>>>>> +     Instead, find the first non-label gimple statement in BB and insert before
>>>>>>> +     that.  */
>>>>>>> +  if (gimple_code (insert_stmt) == GIMPLE_PHI)
>>>>>>> +    {
>>>>>>> +      gsi_insert = gsi_after_labels (insert_bb);
>>>>>>> +      gsi_move_before (&gsistmt, &gsi_insert);
>>>>>>> +    }
>>>>>>> +  /* Statements marked for throw can not be in the middle of a basic block. So
>>>>>>> +     we can not insert a statement (not marked for throw) immediately
>>>>>>> after.  */
>>>>>>> +  else if (lookup_stmt_eh_lp (insert_stmt) > 0
>>>>>>
>>>>>> that's already performed by stmt_can_throw_internal
>>>>> Ok. I was hit by the "statement marked for throw in middle of block"
>>>>> error in verify_gimple_in_cfg  and simply copied the sequence of
>>>>> checks that led to that.
>>>>>
>>>>>>
>>>>>>> +           && stmt_can_throw_internal (insert_stmt))SSA_NAME_DEF_STMT
>>>>>>
>>>>>> But all this should be a non-issue as re-assoc should never assign an ops
>>>>>> vector entry for such stmts (but it could have leafs defined by such stmts).
>>>>>> If you only ever move definitions down, like the present code does caring
>>>>>> about leafs should not be necessary.
>>>>>
>>>>> I added this as I got an ICE in verify_gimple_in_cfg. So it seems like
>>>>> such statements gets assigned into the ops vector.
>>>>
>>>> That would be a bug.  See how linearize_expr_tree checks whether
>>>> the defining stmt of an op may throw.  That is, it _does_ add the
>>>> SSA def to the vector but does not recurse on its defining stmt operands.
>>>
>>> But as long as the SSA def of a throwing statement is added to the ops
>>> vector, I need to add the check, even if it doesn't recurse on its
>>> operands. Consider the following
>>>
>>> <bb1>:
>>> t1 = a + b
>>> c = ... // c might throw
>>>
>>> <bb2>:
>>> t2 = t1 + c
>>>
>>> The expression could be rewritten as (a+c) + b.
>>>
>>> Since c's definition follows t1's, t1 = a+b has to be moved after c =
>>> ... That's why I need the check.
>>
>> Hmm, indeed.
>>
>> +      /* There should be exactly one normal edge out of the basic block.  */
>> +      FOR_EACH_EDGE (e, ei, insert_bb->succs)
>> +        {
>> +          if (!(e->flags & EDGE_COMPLEX))
>> +            {
>> +              gcc_assert (succ_edge == NULL);
>> +              succ_edge = e;
>> +            }
>> +        }
>>
>> I think we have sth like find_fallthru_edge for this.
>>
>> Thanks,
>> Richard.
>>
>>> Thanks,
>>> Easwaran
>>>
>>>> Richard.
>>>>
>>>>> Thanks,
>>>>> Easwaran
>>>>>>
>>>>>> Richard.
>>>>>>
>>>>>>> +    {
>>>>>>> +      edge e, succ_edge = NULL;
>>>>>>> +      edge_iterator ei;
>>>>>>> +
>>>>>>> +      /* There should be exactly one normal edge out of the basic block.  */
>>>>>>> +      FOR_EACH_EDGE (e, ei, insert_bb->succs)
>>>>>>> +        {
>>>>>>> +          if (!(e->flags & EDGE_COMPLEX))
>>>>>>> +            {
>>>>>>> +              gcc_assert (succ_edge == NULL);
>>>>>>> +              succ_edge = e;
>>>>>>> +            }
>>>>>>> +        }
>>>>>>> +      /* Insert STMT at the beginning of the successor basic block.  */
>>>>>>> +      insert_bb = succ_edge->dest;
>>>>>>> +      gsi_insert = gsi_after_labels (insert_bb);
>>>>>>> +      gsi_move_before (&gsistmt, &gsi_insert);
>>>>>>> +    }
>>>>>>> +  else
>>>>>>> +    {
>>>>>>> +      gsi_insert = gsi_for_stmt (insert_stmt);
>>>>>>> +      gsi_move_after (&gsistmt, &gsi_insert);
>>>>>>> +    }
>>>>>>> +
>>>>>>> +}
>>>>>>> +
>>>>>>>  /* Recursively rewrite our linearized statements so that the operators
>>>>>>>     match those in OPS[OPINDEX], putting the computation in rank
>>>>>>>     order.  */
>>>>>>> @@ -2262,11 +2384,6 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>>>>>    tree rhs2 = gimple_assign_rhs2 (stmt);
>>>>>>>    operand_entry_t oe;
>>>>>>>
>>>>>>> -  /* If we have three operands left, then we want to make sure the ones
>>>>>>> -     that get the double binary op are chosen wisely.  */
>>>>>>> -  if (opindex + 3 == VEC_length (operand_entry_t, ops))
>>>>>>> -    swap_ops_for_binary_stmt (ops, opindex, stmt);
>>>>>>> -
>>>>>>>    /* The final recursion case for this function is that you have
>>>>>>>       exactly two operations left.
>>>>>>>       If we had one exactly one op in the entire list to start with, we
>>>>>>> @@ -2312,19 +2429,17 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>>>>>      {
>>>>>>>        if (!moved)
>>>>>>>   {
>>>>>>> -  gimple_stmt_iterator gsinow, gsirhs1;
>>>>>>> -  gimple stmt1 = stmt, stmt2;
>>>>>>> -  unsigned int count;
>>>>>>> +  gimple stmt1 = stmt;
>>>>>>> +  unsigned int count, i = 1;
>>>>>>>
>>>>>>> -  gsinow = gsi_for_stmt (stmt);
>>>>>>>    count = VEC_length (operand_entry_t, ops) - opindex - 2;
>>>>>>> -  while (count-- != 0)
>>>>>>> +  while (i <= count)
>>>>>>>      {
>>>>>>> -      stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>>>>>>> -      gsirhs1 = gsi_for_stmt (stmt2);
>>>>>>> -      gsi_move_before (&gsirhs1, &gsinow);
>>>>>>> -      gsi_prev (&gsinow);
>>>>>>> -      stmt1 = stmt2;
>>>>>>> +      stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>>>>>>> +              /* Ensure that STMT1 is moved to a place where all operands in
>>>>>>> +                 OPS[opindex + i...] are available.  */
>>>>>>> +              ensure_ops_are_available (stmt1, ops, opindex + i);
>>>>>>> +              i++;
>>>>>>>      }
>>>>>>>    moved = true;
>>>>>>>   }
>>>>>>> @@ -3542,8 +3657,18 @@ reassociate_bb (basic_block bb)
>>>>>>>        && VEC_length (operand_entry_t, ops) > 3)
>>>>>>>      rewrite_expr_tree_parallel (stmt, width, ops);
>>>>>>>    else
>>>>>>> -    rewrite_expr_tree (stmt, 0, ops, false);
>>>>>>> +                    {
>>>>>>> +                      /* When there are three operands left, we want
>>>>>>> +                         to make sure the ones that get the double
>>>>>>> +                         binary op are chosen wisely.  */
>>>>>>> +                      int len = VEC_length (operand_entry_t, ops);
>>>>>>> +                      if (len >= 3)
>>>>>>> +                        swap_ops_for_binary_stmt (ops, len - 3, stmt);
>>>>>>>
>>>>>>> +                      assign_uids_in_relevant_bbs (ops);
>>>>>>> +      rewrite_expr_tree (stmt, 0, ops, false);
>>>>>>> +                    }
>>>>>>> +
>>>>>>>    /* If we combined some repeated factors into a
>>>>>>>       __builtin_powi call, multiply that result by the
>>>>>>>       reassociated operands.  */
Easwaran Raman Nov. 6, 2012, 12:54 a.m. UTC | #11
I am unable to figure out the right way to handle the debug
statements. What I tried was to find debug statements that use the SSA
name defined by the statement I moved (using SSA_NAME_IMM_USE_NODE)
and then moved them as well at the right place. Thus, if I have to
move t1 = a + b down (after the definition of 'd'), I also moved all
debug statements that use t1 after the new position of t1. That still
caused use-before-def problems in ssa_verify. I noticed that the debug
statements got modified behind the scenes causing these issues. Any
hints on what is the right way to handle the debug statements would be
very helpful.

Thanks,
Easwaran

On Sun, Nov 4, 2012 at 9:10 AM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Wed, Oct 31, 2012 at 8:16 PM, Easwaran Raman <eraman@google.com> wrote:
>> On Wed, Oct 31, 2012 at 4:36 AM, Richard Biener
>> <richard.guenther@gmail.com> wrote:
>>> On Wed, Oct 24, 2012 at 4:02 AM, Easwaran Raman <eraman@google.com> wrote:
>>>> On Tue, Oct 23, 2012 at 2:52 AM, Richard Biener
>>>> <richard.guenther@gmail.com> wrote:
>>>>> On Mon, Oct 22, 2012 at 8:31 PM, Easwaran Raman <eraman@google.com> wrote:
>>>>>> On Mon, Oct 22, 2012 at 12:59 AM, Richard Biener
>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>> On Fri, Oct 19, 2012 at 12:36 AM, Easwaran Raman <eraman@google.com> wrote:
>>>>>>>> Hi,
>>>>>>>>
>>>>>>>> During expression reassociation, statements are conservatively moved
>>>>>>>> downwards to ensure that dependences are correctly satisfied after
>>>>>>>> reassocation. This could lead to lengthening of live ranges. This
>>>>>>>> patch moves statements only to the extent necessary. Bootstraps and no
>>>>>>>> test regression on x86_64/linux. OK for trunk?
>>>>>>>>
>>>>>>>> Thanks,
>>>>>>>> Easwaran
>>>>>>>>
>>>>>>>> 2012-10-18   Easwaran Raman  <eraman@google.com>
>>>>>>>> * tree-ssa-reassoc.c(assign_uids): New function.
>>>>>>>> (assign_uids_in_relevant_bbs): Likewise.
>>>>>>>> (ensure_ops_are_available): Likewise.
>>>>>>>> (rewrite_expr_tree): Do not move statements beyond what is
>>>>>>>> necessary. Remove call to swap_ops_for_binary_stmt...
>>>>>>>> (reassociate_bb): ... and move it here.
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> Index: gcc/tree-ssa-reassoc.c
>>>>>>>> ===================================================================
>>>>>>>> --- gcc/tree-ssa-reassoc.c (revision 192487)
>>>>>>>> +++ gcc/tree-ssa-reassoc.c (working copy)
>>>>>>>> @@ -2250,6 +2250,128 @@ swap_ops_for_binary_stmt (VEC(operand_entry_t, hea
>>>>>>>>      }
>>>>>>>>  }
>>>>>>>>
>>>>>>>> +/* Assign UIDs to statements in basic block BB.  */
>>>>>>>> +
>>>>>>>> +static void
>>>>>>>> +assign_uids (basic_block bb)
>>>>>>>> +{
>>>>>>>> +  unsigned uid = 0;
>>>>>>>> +  gimple_stmt_iterator gsi;
>>>>>>>> +  /* First assign uids to phis.  */
>>>>>>>> +  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>>>>>>> +    {
>>>>>>>> +      gimple stmt = gsi_stmt (gsi);
>>>>>>>> +      gimple_set_uid (stmt, uid++);
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +  /* Then assign uids to stmts.  */
>>>>>>>> +  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>>>>>>> +    {
>>>>>>>> +      gimple stmt = gsi_stmt (gsi);
>>>>>>>> +      gimple_set_uid (stmt, uid++);
>>>>>>>> +    }
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +/* For each operand in OPS, find the basic block that contains the statement
>>>>>>>> +   which defines the operand. For all such basic blocks, assign UIDs.  */
>>>>>>>> +
>>>>>>>> +static void
>>>>>>>> +assign_uids_in_relevant_bbs (VEC(operand_entry_t, heap) * ops)
>>>>>>>> +{
>>>>>>>> +  operand_entry_t oe;
>>>>>>>> +  int i;
>>>>>>>> +  struct pointer_set_t *seen_bbs = pointer_set_create ();
>>>>>>>> +
>>>>>>>> +  for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++)
>>>>>>>> +    {
>>>>>>>> +      gimple def_stmt;
>>>>>>>> +      basic_block bb;
>>>>>>>> +      if (TREE_CODE (oe->op) != SSA_NAME)
>>>>>>>> +        continue;
>>>>>>>> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
>>>>>>>> +      bb = gimple_bb (def_stmt);
>>>>>>>> +      if (!pointer_set_contains (seen_bbs, bb))
>>>>>>>> +        {
>>>>>>>> +          assign_uids (bb);
>>>>>>>> +          pointer_set_insert (seen_bbs, bb);
>>>>>>>> +        }
>>>>>>>> +    }
>>>>>>>> +  pointer_set_destroy (seen_bbs);
>>>>>>>> +}
>>>>>>>
>>>>>>> Please assign UIDs once using the existing renumber_gimple_stmt_uids ().
>>>>>>> You seem to call the above multiple times and thus do work bigger than
>>>>>>> O(number of basic blocks).
>>>>>>
>>>>>> The reason I call the above multiple times is that gsi_move_before
>>>>>> might get called between two calls to the above. For instance, after
>>>>>> rewrite_expr_tree is called once, the following sequence of calls
>>>>>> could happen:
>>>>>> reassociate_bb -> linearize_expr_tree -> linearize_expr ->
>>>>>> gsi_move_before.  So it is not sufficient to call
>>>>>> renumber_gimple_stmt_uids once per do_reassoc. Or do you want me to
>>>>>> use renumber_gimple_stmt_uids_in_blocks instead of
>>>>>> assign_uids_in_relevant_bbs?
>>>>>
>>>>> It's sufficient to call it once if you conservatively update UIDs at stmt
>>>>> move / insert time (assign the same UID as the stmt before/after).
>>>>
>>>> I was worried that that would make the code brittle, but looking at
>>>> the places where a new statement is added or moved, I think it is
>>>> fine.  I have made the change in the new patch.
>>>
>>> Thanks.
>>>
>>>>>>
>>>>>>
>>>>>>>> +/* Ensure that operands in the OPS vector starting from OPINDEXth
>>>>>>>> entry are live
>>>>>>>> +   at STMT.  This is accomplished by moving STMT if needed.  */
>>>>>>>> +
>>>>>>>> +static void
>>>>>>>> +ensure_ops_are_available (gimple stmt, VEC(operand_entry_t, heap) *
>>>>>>>> ops, int opindex)
>>>>>>>> +{
>>>>>>>> +  int i;
>>>>>>>> +  int len = VEC_length (operand_entry_t, ops);
>>>>>>>> +  gimple insert_stmt = stmt;
>>>>>>>> +  basic_block insert_bb = gimple_bb (stmt);
>>>>>>>> +  gimple_stmt_iterator gsi_insert, gsistmt;
>>>>>>>> +  for (i = opindex; i < len; i++)
>>>>>>>> +    {
>>>>>>>
>>>>>>> Likewise you call this for each call to rewrite_expr_tree, so it seems to me
>>>>>>> this is quadratic in the number of ops in the op vector.
>>>>>>
>>>>>> The call to ensure_ops_are_available inside rewrite_expr_tree is
>>>>>> guarded by if (!moved) and I am setting moved = true there to ensure
>>>>>> that ensure_ops_are_available inside is called once per reassociation
>>>>>> of a expression tree.
>>>>>
>>>>> It would be a lot easier to understand this function if you call it once
>>>>> after rewrite_expr_tree finished.
>>>> I think you meant  "before rewrite_expr_tree". One thing to note here
>>>> is this function is called only when we first see a statement whose
>>>> operand changes after reassociation. That's the reason the moving of
>>>> statements happen inside rewrite_expr_tree.
>>>
>>> I meant after because rewrite_expr_tree ends up swapping things.  But
>>> I note you changed that in your revised patch - good ;)
>>>
>>> So now that rewrite_expr_tree does no longer change order of the ops
>>> vector we can move stmts indeed before it, but backwards.  Thus,
>>> instead of walking forward and
>>>
>>> +              /* Ensure that STMT1 is moved to a place where all operands in
>>> +                 OPS[opindex + i...] are available.  */
>>> +              ensure_ops_are_available (stmt1, ops, opindex + i);
>>>
>>> we can walk backward and only need to consider the immediate next operand
>>> because we know all further operands are available at the place the immediate
>>> next operand is defined.
>>>
>>> Which should remove the need for the loop in ensure_ops_are_available
>>>
>>> +  for (i = opindex; i < len; i++)
>>> +    {
>>>
>>> Correct?  (We may need to special-case the last (two?) stmts).
>>
>> Sorry, I am not able to understand your proposal here. The confusion
>> relates to the use of walking forward vs backward. In the current
>> patch, I start from the last statement (root of the expression tree)
>> and follow the chain of defs using SSA_NAME_DEF_STMT (which I consider
>> as walking backward). Do you want to start from the leaf of the
>> expression and follow the chain in the opposite direction till the
>> stmt defining the root of the tree is reached? That would indeed avoid
>> comparing each statement against all statements that define the ops
>> array as it is sufficient to compare against the statements that
>> define the RHS operands. If that is indeed what you are proposing, I
>> tried that approach and ran into issues. Doing this would mean that
>> the IR is in an inconsistent state during intermediate stages. For
>> instance, transforming
>>
>> t1 = a + b
>> t2 = t1 + c
>> d = ...
>> t3 = t2 + d
>>
>> into
>>
>> t1 = a + d
>> t2 = t1 + c
>> d = ...
>> t3 = t2 + b
>>
>> would result in a state where t2 =  t1 + c appears before t1 = a + b.
>> I don't know if that is the root cause, but I noticed that doing so
>> resulted in ssa_verify complaining about DEBUG statements whose
>> operands appeared before their defs. If this is not what you mean by
>> walking backwards, could you please explain with a simple example?
>
> Yes, that's what I mean with walking backwards.  DEBUG stmts need
> to be resetted when they are no longer valid.  reassoc suffers from
> many (existing) debug issues.
>
> Richard.
>
>> Thanks,
>> Easwaran
>>
>>
>>>>>
>>>>>>>
>>>>>>> Why make this all so complicated?  It seems to me that we should
>>>>>>> fixup stmt order only after the whole ops vector has been materialized.
>>>>>>
>>>>>>
>>>>>>>
>>>>>>>> +      operand_entry_t oe = VEC_index (operand_entry_t, ops, i);
>>>>>>>> +      gimple def_stmt;
>>>>>>>> +      basic_block def_bb;
>>>>>>>> +      /* Ignore constants and operands with default definitons.  */
>>>>>>>> +      if (TREE_CODE (oe->op) != SSA_NAME
>>>>>>>> +          || SSA_NAME_IS_DEFAULT_DEF (oe->op))
>>>>>>>> +        continue;
>>>>>>>> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
>>>>>>>> +      def_bb = gimple_bb (def_stmt);
>>>>>>>> +      if (def_bb != insert_bb
>>>>>>>> +          && !dominated_by_p (CDI_DOMINATORS, insert_bb, def_bb))
>>>>>>>> +        {
>>>>>>>> +          insert_bb = def_bb;
>>>>>>>> +          insert_stmt = def_stmt;
>>>>>>>> +        }
>>>>>>>> +      else if (def_bb == insert_bb
>>>>>>>> +               && gimple_uid (insert_stmt) < gimple_uid (def_stmt))
>>>>>>>> +        insert_stmt = def_stmt;
>>>>>>>> +    }
>>>>>>>> +  if (insert_stmt == stmt)
>>>>>>>> +    return;
>>>>>>>> +  gsistmt = gsi_for_stmt (stmt);
>>>>>>>> +  /* If GSI_STMT is a phi node, then do not insert just after that statement.
>>>>>>>> +     Instead, find the first non-label gimple statement in BB and insert before
>>>>>>>> +     that.  */
>>>>>>>> +  if (gimple_code (insert_stmt) == GIMPLE_PHI)
>>>>>>>> +    {
>>>>>>>> +      gsi_insert = gsi_after_labels (insert_bb);
>>>>>>>> +      gsi_move_before (&gsistmt, &gsi_insert);
>>>>>>>> +    }
>>>>>>>> +  /* Statements marked for throw can not be in the middle of a basic block. So
>>>>>>>> +     we can not insert a statement (not marked for throw) immediately
>>>>>>>> after.  */
>>>>>>>> +  else if (lookup_stmt_eh_lp (insert_stmt) > 0
>>>>>>>
>>>>>>> that's already performed by stmt_can_throw_internal
>>>>>> Ok. I was hit by the "statement marked for throw in middle of block"
>>>>>> error in verify_gimple_in_cfg  and simply copied the sequence of
>>>>>> checks that led to that.
>>>>>>
>>>>>>>
>>>>>>>> +           && stmt_can_throw_internal (insert_stmt))SSA_NAME_DEF_STMT
>>>>>>>
>>>>>>> But all this should be a non-issue as re-assoc should never assign an ops
>>>>>>> vector entry for such stmts (but it could have leafs defined by such stmts).
>>>>>>> If you only ever move definitions down, like the present code does caring
>>>>>>> about leafs should not be necessary.
>>>>>>
>>>>>> I added this as I got an ICE in verify_gimple_in_cfg. So it seems like
>>>>>> such statements gets assigned into the ops vector.
>>>>>
>>>>> That would be a bug.  See how linearize_expr_tree checks whether
>>>>> the defining stmt of an op may throw.  That is, it _does_ add the
>>>>> SSA def to the vector but does not recurse on its defining stmt operands.
>>>>
>>>> But as long as the SSA def of a throwing statement is added to the ops
>>>> vector, I need to add the check, even if it doesn't recurse on its
>>>> operands. Consider the following
>>>>
>>>> <bb1>:
>>>> t1 = a + b
>>>> c = ... // c might throw
>>>>
>>>> <bb2>:
>>>> t2 = t1 + c
>>>>
>>>> The expression could be rewritten as (a+c) + b.
>>>>
>>>> Since c's definition follows t1's, t1 = a+b has to be moved after c =
>>>> ... That's why I need the check.
>>>
>>> Hmm, indeed.
>>>
>>> +      /* There should be exactly one normal edge out of the basic block.  */
>>> +      FOR_EACH_EDGE (e, ei, insert_bb->succs)
>>> +        {
>>> +          if (!(e->flags & EDGE_COMPLEX))
>>> +            {
>>> +              gcc_assert (succ_edge == NULL);
>>> +              succ_edge = e;
>>> +            }
>>> +        }
>>>
>>> I think we have sth like find_fallthru_edge for this.
>>>
>>> Thanks,
>>> Richard.
>>>
>>>> Thanks,
>>>> Easwaran
>>>>
>>>>> Richard.
>>>>>
>>>>>> Thanks,
>>>>>> Easwaran
>>>>>>>
>>>>>>> Richard.
>>>>>>>
>>>>>>>> +    {
>>>>>>>> +      edge e, succ_edge = NULL;
>>>>>>>> +      edge_iterator ei;
>>>>>>>> +
>>>>>>>> +      /* There should be exactly one normal edge out of the basic block.  */
>>>>>>>> +      FOR_EACH_EDGE (e, ei, insert_bb->succs)
>>>>>>>> +        {
>>>>>>>> +          if (!(e->flags & EDGE_COMPLEX))
>>>>>>>> +            {
>>>>>>>> +              gcc_assert (succ_edge == NULL);
>>>>>>>> +              succ_edge = e;
>>>>>>>> +            }
>>>>>>>> +        }
>>>>>>>> +      /* Insert STMT at the beginning of the successor basic block.  */
>>>>>>>> +      insert_bb = succ_edge->dest;
>>>>>>>> +      gsi_insert = gsi_after_labels (insert_bb);
>>>>>>>> +      gsi_move_before (&gsistmt, &gsi_insert);
>>>>>>>> +    }
>>>>>>>> +  else
>>>>>>>> +    {
>>>>>>>> +      gsi_insert = gsi_for_stmt (insert_stmt);
>>>>>>>> +      gsi_move_after (&gsistmt, &gsi_insert);
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>  /* Recursively rewrite our linearized statements so that the operators
>>>>>>>>     match those in OPS[OPINDEX], putting the computation in rank
>>>>>>>>     order.  */
>>>>>>>> @@ -2262,11 +2384,6 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>>>>>>    tree rhs2 = gimple_assign_rhs2 (stmt);
>>>>>>>>    operand_entry_t oe;
>>>>>>>>
>>>>>>>> -  /* If we have three operands left, then we want to make sure the ones
>>>>>>>> -     that get the double binary op are chosen wisely.  */
>>>>>>>> -  if (opindex + 3 == VEC_length (operand_entry_t, ops))
>>>>>>>> -    swap_ops_for_binary_stmt (ops, opindex, stmt);
>>>>>>>> -
>>>>>>>>    /* The final recursion case for this function is that you have
>>>>>>>>       exactly two operations left.
>>>>>>>>       If we had one exactly one op in the entire list to start with, we
>>>>>>>> @@ -2312,19 +2429,17 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>>>>>>      {
>>>>>>>>        if (!moved)
>>>>>>>>   {
>>>>>>>> -  gimple_stmt_iterator gsinow, gsirhs1;
>>>>>>>> -  gimple stmt1 = stmt, stmt2;
>>>>>>>> -  unsigned int count;
>>>>>>>> +  gimple stmt1 = stmt;
>>>>>>>> +  unsigned int count, i = 1;
>>>>>>>>
>>>>>>>> -  gsinow = gsi_for_stmt (stmt);
>>>>>>>>    count = VEC_length (operand_entry_t, ops) - opindex - 2;
>>>>>>>> -  while (count-- != 0)
>>>>>>>> +  while (i <= count)
>>>>>>>>      {
>>>>>>>> -      stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>>>>>>>> -      gsirhs1 = gsi_for_stmt (stmt2);
>>>>>>>> -      gsi_move_before (&gsirhs1, &gsinow);
>>>>>>>> -      gsi_prev (&gsinow);
>>>>>>>> -      stmt1 = stmt2;
>>>>>>>> +      stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>>>>>>>> +              /* Ensure that STMT1 is moved to a place where all operands in
>>>>>>>> +                 OPS[opindex + i...] are available.  */
>>>>>>>> +              ensure_ops_are_available (stmt1, ops, opindex + i);
>>>>>>>> +              i++;
>>>>>>>>      }
>>>>>>>>    moved = true;
>>>>>>>>   }
>>>>>>>> @@ -3542,8 +3657,18 @@ reassociate_bb (basic_block bb)
>>>>>>>>        && VEC_length (operand_entry_t, ops) > 3)
>>>>>>>>      rewrite_expr_tree_parallel (stmt, width, ops);
>>>>>>>>    else
>>>>>>>> -    rewrite_expr_tree (stmt, 0, ops, false);
>>>>>>>> +                    {
>>>>>>>> +                      /* When there are three operands left, we want
>>>>>>>> +                         to make sure the ones that get the double
>>>>>>>> +                         binary op are chosen wisely.  */
>>>>>>>> +                      int len = VEC_length (operand_entry_t, ops);
>>>>>>>> +                      if (len >= 3)
>>>>>>>> +                        swap_ops_for_binary_stmt (ops, len - 3, stmt);
>>>>>>>>
>>>>>>>> +                      assign_uids_in_relevant_bbs (ops);
>>>>>>>> +      rewrite_expr_tree (stmt, 0, ops, false);
>>>>>>>> +                    }
>>>>>>>> +
>>>>>>>>    /* If we combined some repeated factors into a
>>>>>>>>       __builtin_powi call, multiply that result by the
>>>>>>>>       reassociated operands.  */
Richard Biener Dec. 6, 2012, 9:10 a.m. UTC | #12
On Tue, Nov 6, 2012 at 1:54 AM, Easwaran Raman <eraman@google.com> wrote:
> I am unable to figure out the right way to handle the debug
> statements. What I tried was to find debug statements that use the SSA
> name defined by the statement I moved (using SSA_NAME_IMM_USE_NODE)
> and then moved them as well at the right place. Thus, if I have to
> move t1 = a + b down (after the definition of 'd'), I also moved all
> debug statements that use t1 after the new position of t1. That still
> caused use-before-def problems in ssa_verify. I noticed that the debug
> statements got modified behind the scenes causing these issues. Any
> hints on what is the right way to handle the debug statements would be
> very helpful.

I think you cannot (and should not) move debug statements.  Instead you
have to invalidate them.  Otherwise you'll introduce confusion as debug
info cannot handle overlapping live ranges.

But maybe Alex can clarify.

Richard.
Easwaran Raman Dec. 7, 2012, 8:01 p.m. UTC | #13
It seems I need to reset the debug uses of a statement before moving
the statement itself. The attached patch starts from the leaf to root
of the tree to be reassociated and places them at the point where
their dependences will be met after reassociation. This bootstraps and
I am running the tests. Ok if there are no test failures?

Thanks,
Easwaran

2012-12-07   Easwaran Raman  <eraman@google.com>
* tree-ssa-reassoc.c(find_insert_point): New function.
(insert_stmt_after): Likewise.
(get_def_stmt): Likewise.
(ensure_ops_are_available): Likewise.
(rewrite_expr_tree): Do not move statements beyond what is
necessary. Remove call to swap_ops_for_binary_stmt...
(reassociate_bb): ... and move it here.
(build_and_add_sum): Assign UIDs for new statements.
(linearize_expr): Likewise.
(do_reassoc): Renumber gimple statement UIDs.



On Thu, Dec 6, 2012 at 1:10 AM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Tue, Nov 6, 2012 at 1:54 AM, Easwaran Raman <eraman@google.com> wrote:
>> I am unable to figure out the right way to handle the debug
>> statements. What I tried was to find debug statements that use the SSA
>> name defined by the statement I moved (using SSA_NAME_IMM_USE_NODE)
>> and then moved them as well at the right place. Thus, if I have to
>> move t1 = a + b down (after the definition of 'd'), I also moved all
>> debug statements that use t1 after the new position of t1. That still
>> caused use-before-def problems in ssa_verify. I noticed that the debug
>> statements got modified behind the scenes causing these issues. Any
>> hints on what is the right way to handle the debug statements would be
>> very helpful.
>
> I think you cannot (and should not) move debug statements.  Instead you
> have to invalidate them.  Otherwise you'll introduce confusion as debug
> info cannot handle overlapping live ranges.
>
> But maybe Alex can clarify.
>
> Richard.
Alexandre Oliva Dec. 7, 2012, 9:36 p.m. UTC | #14
On Dec  6, 2012, Richard Biener <richard.guenther@gmail.com> wrote:

> On Tue, Nov 6, 2012 at 1:54 AM, Easwaran Raman <eraman@google.com> wrote:
>> I am unable to figure out the right way to handle the debug
>> statements. What I tried was to find debug statements that use the SSA
>> name defined by the statement I moved (using SSA_NAME_IMM_USE_NODE)
>> and then moved them as well at the right place. Thus, if I have to
>> move t1 = a + b down (after the definition of 'd'), I also moved all
>> debug statements that use t1 after the new position of t1. That still
>> caused use-before-def problems in ssa_verify. I noticed that the debug
>> statements got modified behind the scenes causing these issues. Any
>> hints on what is the right way to handle the debug statements would be
>> very helpful.

> I think you cannot (and should not) move debug statements.

Yup.

If you must move a DEF past a debug stmt that USEs it, resetting the
debug stmt is a (poor) option, but inserting a debug temp bound to the
expression moved down, and replacing the use in the debug stmt with a
use of the debug temp.  I.e., given:

  x_? = <expr>
  # DEBUG y => x_?
  ...

change that to:

  # DEBUG T.?? => <expr>
  # DEBUG y => T.??
  ...
  x_? = <expr>
Easwaran Raman April 24, 2013, 11:03 p.m. UTC | #15
I want to resend this patch for consideration. I applied the patch to
trunk and confirmed that it bootstraps and doesn't cause test
regressions. Is this ok for trunk?

Thanks,
Easwaran

On Fri, Dec 7, 2012 at 12:01 PM, Easwaran Raman <eraman@google.com> wrote:
> It seems I need to reset the debug uses of a statement before moving
> the statement itself. The attached patch starts from the leaf to root
> of the tree to be reassociated and places them at the point where
> their dependences will be met after reassociation. This bootstraps and
> I am running the tests. Ok if there are no test failures?
>
> Thanks,
> Easwaran
>
> 2012-12-07   Easwaran Raman  <eraman@google.com>
> * tree-ssa-reassoc.c(find_insert_point): New function.
> (insert_stmt_after): Likewise.
> (get_def_stmt): Likewise.
> (ensure_ops_are_available): Likewise.
> (rewrite_expr_tree): Do not move statements beyond what is
> necessary. Remove call to swap_ops_for_binary_stmt...
> (reassociate_bb): ... and move it here.
> (build_and_add_sum): Assign UIDs for new statements.
> (linearize_expr): Likewise.
> (do_reassoc): Renumber gimple statement UIDs.
>
>
>
> On Thu, Dec 6, 2012 at 1:10 AM, Richard Biener
> <richard.guenther@gmail.com> wrote:
>> On Tue, Nov 6, 2012 at 1:54 AM, Easwaran Raman <eraman@google.com> wrote:
>>> I am unable to figure out the right way to handle the debug
>>> statements. What I tried was to find debug statements that use the SSA
>>> name defined by the statement I moved (using SSA_NAME_IMM_USE_NODE)
>>> and then moved them as well at the right place. Thus, if I have to
>>> move t1 = a + b down (after the definition of 'd'), I also moved all
>>> debug statements that use t1 after the new position of t1. That still
>>> caused use-before-def problems in ssa_verify. I noticed that the debug
>>> statements got modified behind the scenes causing these issues. Any
>>> hints on what is the right way to handle the debug statements would be
>>> very helpful.
>>
>> I think you cannot (and should not) move debug statements.  Instead you
>> have to invalidate them.  Otherwise you'll introduce confusion as debug
>> info cannot handle overlapping live ranges.
>>
>> But maybe Alex can clarify.
>>
>> Richard.
Richard Biener April 25, 2013, 11:42 a.m. UTC | #16
On Fri, Dec 7, 2012 at 9:01 PM, Easwaran Raman <eraman@google.com> wrote:
> It seems I need to reset the debug uses of a statement before moving
> the statement itself. The attached patch starts from the leaf to root
> of the tree to be reassociated and places them at the point where
> their dependences will be met after reassociation. This bootstraps and
> I am running the tests. Ok if there are no test failures?

+  if ((dep_bb != insert_bb
+       && !dominated_by_p (CDI_DOMINATORS, insert_bb, dep_bb))
+      || (dep_bb == insert_bb
+          && gimple_uid (insert_stmt) < gimple_uid (dep_stmt)))
+    {
+      insert_stmt = dep_stmt;
+    }

superfluous {}

+  /* If there are any debug uses of LHS, reset them.  */
+  FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
+    {
+      if (is_gimple_debug (use_stmt))
+        {
+          gimple_debug_bind_reset_value (use_stmt);

that's only needed for debug stmts that are not dominated by insert_point, no?

+  /* Statements marked for throw can not be in the middle of a basic block. So
+     we can not insert a statement (not marked for throw) immediately
after.  */
+  else if (stmt_can_throw_internal (insert_point))
+    {

use stmt_ends_bb_p (insert_point) instead

+      edge succ_edge = find_fallthru_edge (insert_bb->succs);
+      insert_bb = succ_edge->dest;
+      /* Insert STMT at the beginning of the successor basic block.  */
+      gsi_insert = gsi_after_labels (insert_bb);
+      gsi_move_before (&gsistmt, &gsi_insert);

are you sure this is a proper insert location?  How would you even arrive in
this situation given find_insert_point dominance checks?

Otherwise the patch looks ok - can you add one or two testcases please?

Thanks,
Richard.

> Thanks,
> Easwaran
>
> 2012-12-07   Easwaran Raman  <eraman@google.com>
> * tree-ssa-reassoc.c(find_insert_point): New function.
> (insert_stmt_after): Likewise.
> (get_def_stmt): Likewise.
> (ensure_ops_are_available): Likewise.
> (rewrite_expr_tree): Do not move statements beyond what is
> necessary. Remove call to swap_ops_for_binary_stmt...
> (reassociate_bb): ... and move it here.
> (build_and_add_sum): Assign UIDs for new statements.
> (linearize_expr): Likewise.
> (do_reassoc): Renumber gimple statement UIDs.
>
>
>
> On Thu, Dec 6, 2012 at 1:10 AM, Richard Biener
> <richard.guenther@gmail.com> wrote:
>> On Tue, Nov 6, 2012 at 1:54 AM, Easwaran Raman <eraman@google.com> wrote:
>>> I am unable to figure out the right way to handle the debug
>>> statements. What I tried was to find debug statements that use the SSA
>>> name defined by the statement I moved (using SSA_NAME_IMM_USE_NODE)
>>> and then moved them as well at the right place. Thus, if I have to
>>> move t1 = a + b down (after the definition of 'd'), I also moved all
>>> debug statements that use t1 after the new position of t1. That still
>>> caused use-before-def problems in ssa_verify. I noticed that the debug
>>> statements got modified behind the scenes causing these issues. Any
>>> hints on what is the right way to handle the debug statements would be
>>> very helpful.
>>
>> I think you cannot (and should not) move debug statements.  Instead you
>> have to invalidate them.  Otherwise you'll introduce confusion as debug
>> info cannot handle overlapping live ranges.
>>
>> But maybe Alex can clarify.
>>
>> Richard.
Easwaran Raman May 18, 2013, 1:36 a.m. UTC | #17
On Thu, Apr 25, 2013 at 4:42 AM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Fri, Dec 7, 2012 at 9:01 PM, Easwaran Raman <eraman@google.com> wrote:
>> It seems I need to reset the debug uses of a statement before moving
>> the statement itself. The attached patch starts from the leaf to root
>> of the tree to be reassociated and places them at the point where
>> their dependences will be met after reassociation. This bootstraps and
>> I am running the tests. Ok if there are no test failures?
>
> +  if ((dep_bb != insert_bb
> +       && !dominated_by_p (CDI_DOMINATORS, insert_bb, dep_bb))
> +      || (dep_bb == insert_bb
> +          && gimple_uid (insert_stmt) < gimple_uid (dep_stmt)))
> +    {
> +      insert_stmt = dep_stmt;
> +    }
>
> superfluous {}
Removed.
>
> +  /* If there are any debug uses of LHS, reset them.  */
> +  FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
> +    {
> +      if (is_gimple_debug (use_stmt))
> +        {
> +          gimple_debug_bind_reset_value (use_stmt);
>
> that's only needed for debug stmts that are not dominated by insert_point, no?
You are right. I have added the check.

>
> +  /* Statements marked for throw can not be in the middle of a basic block. So
> +     we can not insert a statement (not marked for throw) immediately
> after.  */
> +  else if (stmt_can_throw_internal (insert_point))
> +    {
>
> use stmt_ends_bb_p (insert_point) instead
>
> +      edge succ_edge = find_fallthru_edge (insert_bb->succs);
> +      insert_bb = succ_edge->dest;
> +      /* Insert STMT at the beginning of the successor basic block.  */
> +      gsi_insert = gsi_after_labels (insert_bb);
> +      gsi_move_before (&gsistmt, &gsi_insert);
>
> are you sure this is a proper insert location?  How would you even arrive in
> this situation given find_insert_point dominance checks?

Let's say the last statement in a BB which can throw feeds into a
statement that is to be reassociated. Then the insert point would be
this statement. Instead, you could always insert at the beginning of
its (lone) successor. This is independent of the the find_insert_point
checks.

>
> Otherwise the patch looks ok - can you add one or two testcases please?
I have added a test case and submitted at r199048.

Thanks,
Easwaran


>
> Thanks,
> Richard.
>
>> Thanks,
>> Easwaran
>>
>> 2012-12-07   Easwaran Raman  <eraman@google.com>
>> * tree-ssa-reassoc.c(find_insert_point): New function.
>> (insert_stmt_after): Likewise.
>> (get_def_stmt): Likewise.
>> (ensure_ops_are_available): Likewise.
>> (rewrite_expr_tree): Do not move statements beyond what is
>> necessary. Remove call to swap_ops_for_binary_stmt...
>> (reassociate_bb): ... and move it here.
>> (build_and_add_sum): Assign UIDs for new statements.
>> (linearize_expr): Likewise.
>> (do_reassoc): Renumber gimple statement UIDs.
>>
>>
>>
>> On Thu, Dec 6, 2012 at 1:10 AM, Richard Biener
>> <richard.guenther@gmail.com> wrote:
>>> On Tue, Nov 6, 2012 at 1:54 AM, Easwaran Raman <eraman@google.com> wrote:
>>>> I am unable to figure out the right way to handle the debug
>>>> statements. What I tried was to find debug statements that use the SSA
>>>> name defined by the statement I moved (using SSA_NAME_IMM_USE_NODE)
>>>> and then moved them as well at the right place. Thus, if I have to
>>>> move t1 = a + b down (after the definition of 'd'), I also moved all
>>>> debug statements that use t1 after the new position of t1. That still
>>>> caused use-before-def problems in ssa_verify. I noticed that the debug
>>>> statements got modified behind the scenes causing these issues. Any
>>>> hints on what is the right way to handle the debug statements would be
>>>> very helpful.
>>>
>>> I think you cannot (and should not) move debug statements.  Instead you
>>> have to invalidate them.  Otherwise you'll introduce confusion as debug
>>> info cannot handle overlapping live ranges.
>>>
>>> But maybe Alex can clarify.
>>>
>>> Richard.
diff mbox

Patch

Index: gcc/tree-ssa-reassoc.c
===================================================================
--- gcc/tree-ssa-reassoc.c (revision 192487)
+++ gcc/tree-ssa-reassoc.c (working copy)
@@ -2250,6 +2250,128 @@  swap_ops_for_binary_stmt (VEC(operand_entry_t, hea
     }
 }

+/* Assign UIDs to statements in basic block BB.  */
+
+static void
+assign_uids (basic_block bb)
+{
+  unsigned uid = 0;
+  gimple_stmt_iterator gsi;
+  /* First assign uids to phis.  */
+  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gimple stmt = gsi_stmt (gsi);
+      gimple_set_uid (stmt, uid++);
+    }
+
+  /* Then assign uids to stmts.  */
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gimple stmt = gsi_stmt (gsi);
+      gimple_set_uid (stmt, uid++);
+    }
+}
+
+/* For each operand in OPS, find the basic block that contains the statement
+   which defines the operand. For all such basic blocks, assign UIDs.  */
+
+static void
+assign_uids_in_relevant_bbs (VEC(operand_entry_t, heap) * ops)
+{
+  operand_entry_t oe;
+  int i;
+  struct pointer_set_t *seen_bbs = pointer_set_create ();
+
+  for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++)
+    {
+      gimple def_stmt;
+      basic_block bb;
+      if (TREE_CODE (oe->op) != SSA_NAME)
+        continue;
+      def_stmt = SSA_NAME_DEF_STMT (oe->op);
+      bb = gimple_bb (def_stmt);
+      if (!pointer_set_contains (seen_bbs, bb))
+        {
+          assign_uids (bb);
+          pointer_set_insert (seen_bbs, bb);
+        }
+    }
+  pointer_set_destroy (seen_bbs);
+}
+
+/* Ensure that operands in the OPS vector starting from OPINDEXth
entry are live
+   at STMT.  This is accomplished by moving STMT if needed.  */
+
+static void
+ensure_ops_are_available (gimple stmt, VEC(operand_entry_t, heap) *
ops, int opindex)
+{
+  int i;
+  int len = VEC_length (operand_entry_t, ops);
+  gimple insert_stmt = stmt;
+  basic_block insert_bb = gimple_bb (stmt);
+  gimple_stmt_iterator gsi_insert, gsistmt;
+  for (i = opindex; i < len; i++)
+    {
+      operand_entry_t oe = VEC_index (operand_entry_t, ops, i);
+      gimple def_stmt;
+      basic_block def_bb;
+      /* Ignore constants and operands with default definitons.  */
+      if (TREE_CODE (oe->op) != SSA_NAME
+          || SSA_NAME_IS_DEFAULT_DEF (oe->op))
+        continue;
+      def_stmt = SSA_NAME_DEF_STMT (oe->op);
+      def_bb = gimple_bb (def_stmt);
+      if (def_bb != insert_bb
+          && !dominated_by_p (CDI_DOMINATORS, insert_bb, def_bb))
+        {
+          insert_bb = def_bb;
+          insert_stmt = def_stmt;
+        }
+      else if (def_bb == insert_bb
+               && gimple_uid (insert_stmt) < gimple_uid (def_stmt))
+        insert_stmt = def_stmt;
+    }
+  if (insert_stmt == stmt)
+    return;
+  gsistmt = gsi_for_stmt (stmt);
+  /* If GSI_STMT is a phi node, then do not insert just after that statement.
+     Instead, find the first non-label gimple statement in BB and insert before
+     that.  */
+  if (gimple_code (insert_stmt) == GIMPLE_PHI)
+    {
+      gsi_insert = gsi_after_labels (insert_bb);
+      gsi_move_before (&gsistmt, &gsi_insert);
+    }
+  /* Statements marked for throw can not be in the middle of a basic block. So
+     we can not insert a statement (not marked for throw) immediately
after.  */
+  else if (lookup_stmt_eh_lp (insert_stmt) > 0
+           && stmt_can_throw_internal (insert_stmt))
+    {
+      edge e, succ_edge = NULL;
+      edge_iterator ei;
+
+      /* There should be exactly one normal edge out of the basic block.  */
+      FOR_EACH_EDGE (e, ei, insert_bb->succs)
+        {
+          if (!(e->flags & EDGE_COMPLEX))
+            {
+              gcc_assert (succ_edge == NULL);
+              succ_edge = e;
+            }
+        }
+      /* Insert STMT at the beginning of the successor basic block.  */
+      insert_bb = succ_edge->dest;
+      gsi_insert = gsi_after_labels (insert_bb);
+      gsi_move_before (&gsistmt, &gsi_insert);
+    }
+  else
+    {
+      gsi_insert = gsi_for_stmt (insert_stmt);
+      gsi_move_after (&gsistmt, &gsi_insert);
+    }
+
+}
+