Patchwork Move statements upwards after reassociation

login
register
mail settings
Submitter Easwaran Raman
Date Oct. 11, 2012, 1:52 a.m.
Message ID <CAPK5YPZmV7PYKLxxXoGUzrrzOu3P04Mk-daOZhfvfLRY53Vtbg@mail.gmail.com>
Download mbox | patch
Permalink /patch/190778/
State New
Headers show

Comments

Easwaran Raman - Oct. 11, 2012, 1:52 a.m.
Hi,
 In the expression reassociation pass, statements might get moved
downwards to ensure that dependences are not violated after
reassociation. This can increase the live range and, in a tight loop,
result in spills.  This patch simply does a code motion of those
statements involved in reassociation and pushes them upwards in the BB
as far as possible without violating dependences. Bootstraps and no
tests regressions on a x86_64 machine running linux. Ok for trunk?


- Easwaran

-----------
2012-10-10  Easwaran Raman  <eraman@google.com>

               * tree-ssa-reassoc.c (move_stmt_upwards): New function.
                 (rewrite_expr_tree): Record statements to be moved.
                 (reassociate_bb): Move statements affected by reassociation
                 as early as possible.
Xinliang David Li - Oct. 11, 2012, 6:49 a.m.
On Wed, Oct 10, 2012 at 6:52 PM, Easwaran Raman <eraman@google.com> wrote:
> Hi,
>  In the expression reassociation pass, statements might get moved
> downwards to ensure that dependences are not violated after
> reassociation. This can increase the live range and, in a tight loop,
> result in spills.  This patch simply does a code motion of those
> statements involved in reassociation and pushes them upwards in the BB
> as far as possible without violating dependences. Bootstraps and no
> tests regressions on a x86_64 machine running linux. Ok for trunk?

Can you post an example such as alder32 or loop in ffpmeg?

The increased register pressure by re-association is a long standing
issue. There was an earlier attempt to address it:
http://gcc.1065356.n5.nabble.com/A-new-gimple-pass-LRS-live-range-shrinking-to-reduce-register-pressure-td495858.html


>
>
> - Easwaran
>
> -----------
> 2012-10-10  Easwaran Raman  <eraman@google.com>
>
>                * tree-ssa-reassoc.c (move_stmt_upwards): New function.
>                  (rewrite_expr_tree): Record statements to be moved.
>                  (reassociate_bb): Move statements affected by reassociation
>                  as early as possible.
>
> Index: gcc/tree-ssa-reassoc.c
> ===================================================================
> --- gcc/tree-ssa-reassoc.c (revision 191879)
> +++ gcc/tree-ssa-reassoc.c (working copy)
> @@ -2250,13 +2250,51 @@ swap_ops_for_binary_stmt (VEC(operand_entry_t, hea
>      }
>  }
>
> +/* Move STMT up within its BB until it can not be moved any further.  */
> +
> +static void move_stmt_upwards (gimple stmt)

static void
move_stmt_upwards (...)

> +{
> +  gimple_stmt_iterator gsi, gsistmt;
> +  tree rhs1, rhs2;
> +  gimple rhs1def = NULL, rhs2def = NULL;

New line needed.

> +  rhs1 = gimple_assign_rhs1 (stmt);
> +  rhs2 = gimple_assign_rhs2 (stmt);
> +  gcc_assert (rhs1);
> +  if (TREE_CODE (rhs1) == SSA_NAME)
> +    rhs1def = SSA_NAME_DEF_STMT (rhs1);
> +  else if (TREE_CODE (rhs1) != REAL_CST
> +           && TREE_CODE (rhs1) != INTEGER_CST)
> +    return;
> +  if (rhs2)
> +    {
> +      if (TREE_CODE (rhs2) == SSA_NAME)
> +        rhs2def = SSA_NAME_DEF_STMT (rhs2);
> +      else if (TREE_CODE (rhs1) != REAL_CST
> +               && TREE_CODE (rhs1) != INTEGER_CST)
> +        return;
> +    }

It is possible to handle stmts with memory operation too, but that
probably won't fit into the re-association pass.

> +  gsi = gsi_for_stmt (stmt);
> +  gsistmt = gsi;
> +  gsi_prev (&gsi);
> +  for (; !gsi_end_p (gsi); gsi_prev (&gsi))
> +    {
> +      gimple curr_stmt = gsi_stmt (gsi);
> +      if (curr_stmt == rhs1def || curr_stmt == rhs2def) {
> +        gsi_move_after (&gsistmt, &gsi);
> +        return;
> +      }
> +    }
> +
> +}
> +
>  /* Recursively rewrite our linearized statements so that the operators
>     match those in OPS[OPINDEX], putting the computation in rank
>     order.  */

Document STMTS_TO_MOVE.

>
>  static void
>  rewrite_expr_tree (gimple stmt, unsigned int opindex,
> -   VEC(operand_entry_t, heap) * ops, bool moved)
> +   VEC(operand_entry_t, heap) * ops, bool moved,
> +                   VEC(gimple, heap) **stmts_to_move)
>  {
>    tree rhs1 = gimple_assign_rhs1 (stmt);
>    tree rhs2 = gimple_assign_rhs2 (stmt);
> @@ -2299,6 +2337,7 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>        print_gimple_stmt (dump_file, stmt, 0, 0);
>      }
>   }
> +      VEC_safe_push (gimple, heap, *stmts_to_move, stmt);
>        return;
>      }
>
> @@ -2346,7 +2385,9 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>      }
>    /* Recurse on the LHS of the binary operator, which is guaranteed to
>       be the non-leaf side.  */
> -  rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
> +  rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved,
> +                     stmts_to_move);
> +  VEC_safe_push (gimple, heap, *stmts_to_move, stmt);
>  }
>
>  /* Find out how many cycles we need to compute statements chain.
> @@ -3427,6 +3468,9 @@ reassociate_bb (basic_block bb)
>  {
>    gimple_stmt_iterator gsi;
>    basic_block son;
> +  VEC(gimple, heap) *stmts_to_move = NULL;
> +  gimple stmt;
> +  int i;
>
>    for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
>      {
> @@ -3542,7 +3586,7 @@ reassociate_bb (basic_block bb)
>        && VEC_length (operand_entry_t, ops) > 3)
>      rewrite_expr_tree_parallel (stmt, width, ops);
>    else
> -    rewrite_expr_tree (stmt, 0, ops, false);
> +    rewrite_expr_tree (stmt, 0, ops, false, &stmts_to_move);
>
>    /* If we combined some repeated factors into a
>       __builtin_powi call, multiply that result by the
> @@ -3560,6 +3604,7 @@ reassociate_bb (basic_block bb)
>         target_ssa);
>        gimple_set_location (mul_stmt, gimple_location (stmt));
>        gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
> +                      VEC_safe_push (gimple, heap, stmts_to_move, mul_stmt);
>      }
>   }
>
> @@ -3567,6 +3612,11 @@ reassociate_bb (basic_block bb)
>      }
>   }
>      }
> +
> +  FOR_EACH_VEC_ELT (gimple, stmts_to_move, i, stmt)
> +    move_stmt_upwards (stmt);
> +  VEC_free (gimple, heap, stmts_to_move);
> +

>    for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
>         son;
>         son = next_dom_son (CDI_POST_DOMINATORS, son))
Richard Guenther - Oct. 11, 2012, 1:16 p.m.
On Thu, Oct 11, 2012 at 3:52 AM, Easwaran Raman <eraman@google.com> wrote:
> Hi,
>  In the expression reassociation pass, statements might get moved
> downwards to ensure that dependences are not violated after
> reassociation. This can increase the live range and, in a tight loop,
> result in spills.  This patch simply does a code motion of those
> statements involved in reassociation and pushes them upwards in the BB
> as far as possible without violating dependences. Bootstraps and no
> tests regressions on a x86_64 machine running linux. Ok for trunk?

I don't think reassoc is the right place to do this.  There was the idea
of a tree-level "scheduling" pass, some of which (and I suppose some
of your changes) TER later happily will un-do.

Few comments:

>
> - Easwaran
>
> -----------
> 2012-10-10  Easwaran Raman  <eraman@google.com>
>
>                * tree-ssa-reassoc.c (move_stmt_upwards): New function.
>                  (rewrite_expr_tree): Record statements to be moved.
>                  (reassociate_bb): Move statements affected by reassociation
>                  as early as possible.
>
> Index: gcc/tree-ssa-reassoc.c
> ===================================================================
> --- gcc/tree-ssa-reassoc.c (revision 191879)
> +++ gcc/tree-ssa-reassoc.c (working copy)
> @@ -2250,13 +2250,51 @@ swap_ops_for_binary_stmt (VEC(operand_entry_t, hea
>      }
>  }
>
> +/* Move STMT up within its BB until it can not be moved any further.  */
> +
> +static void move_stmt_upwards (gimple stmt)
> +{
> +  gimple_stmt_iterator gsi, gsistmt;
> +  tree rhs1, rhs2;
> +  gimple rhs1def = NULL, rhs2def = NULL;
> +  rhs1 = gimple_assign_rhs1 (stmt);
> +  rhs2 = gimple_assign_rhs2 (stmt);
> +  gcc_assert (rhs1);

Please no such senseless asserts.  The following line will segfault anyway
if rhs1 is NULL and with a debugger an assert doesn't add any useful
information.

> +  if (TREE_CODE (rhs1) == SSA_NAME)
> +    rhs1def = SSA_NAME_DEF_STMT (rhs1);
> +  else if (TREE_CODE (rhs1) != REAL_CST
> +           && TREE_CODE (rhs1) != INTEGER_CST)
> +    return;
> +  if (rhs2)

You may not access gimple_assign_rhs2 if it is not there.  So you have
to check whether you have an unary, binary or ternary (yes) operation.

> +    {
> +      if (TREE_CODE (rhs2) == SSA_NAME)
> +        rhs2def = SSA_NAME_DEF_STMT (rhs2);
> +      else if (TREE_CODE (rhs1) != REAL_CST
> +               && TREE_CODE (rhs1) != INTEGER_CST)
> +        return;
> +    }
> +  gsi = gsi_for_stmt (stmt);
> +  gsistmt = gsi;
> +  gsi_prev (&gsi);
> +  for (; !gsi_end_p (gsi); gsi_prev (&gsi))

????

This doesn't make much sense.  You can move a stmt to the nearest
common post-dominator.  Assuming you only want to handle placing
it after rhs1def or after rhs2def(?) you don't need any loop, just
two dominator queries and an insertion after one of the definition
stmts.

But this code should consider BBs.  And I don't see why more optimal
placement cannot be done during rewrite_expr_tree itself.

NB, the whole reassoc code needs a re-write to avoid the excessive
stmt modifications when it does nothing.  So I'd very much rather avoid
adding anything to reassoc until that rewrite happened.

Richard.

> +    {
> +      gimple curr_stmt = gsi_stmt (gsi);
> +      if (curr_stmt == rhs1def || curr_stmt == rhs2def) {
> +        gsi_move_after (&gsistmt, &gsi);
> +        return;
> +      }
> +    }
> +
> +}
> +
>  /* Recursively rewrite our linearized statements so that the operators
>     match those in OPS[OPINDEX], putting the computation in rank
>     order.  */
>
>  static void
>  rewrite_expr_tree (gimple stmt, unsigned int opindex,
> -   VEC(operand_entry_t, heap) * ops, bool moved)
> +   VEC(operand_entry_t, heap) * ops, bool moved,
> +                   VEC(gimple, heap) **stmts_to_move)
>  {
>    tree rhs1 = gimple_assign_rhs1 (stmt);
>    tree rhs2 = gimple_assign_rhs2 (stmt);
> @@ -2299,6 +2337,7 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>        print_gimple_stmt (dump_file, stmt, 0, 0);
>      }
>   }
> +      VEC_safe_push (gimple, heap, *stmts_to_move, stmt);
>        return;
>      }
>
> @@ -2346,7 +2385,9 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>      }
>    /* Recurse on the LHS of the binary operator, which is guaranteed to
>       be the non-leaf side.  */
> -  rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
> +  rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved,
> +                     stmts_to_move);
> +  VEC_safe_push (gimple, heap, *stmts_to_move, stmt);
>  }
>
>  /* Find out how many cycles we need to compute statements chain.
> @@ -3427,6 +3468,9 @@ reassociate_bb (basic_block bb)
>  {
>    gimple_stmt_iterator gsi;
>    basic_block son;
> +  VEC(gimple, heap) *stmts_to_move = NULL;
> +  gimple stmt;
> +  int i;
>
>    for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
>      {
> @@ -3542,7 +3586,7 @@ reassociate_bb (basic_block bb)
>        && VEC_length (operand_entry_t, ops) > 3)
>      rewrite_expr_tree_parallel (stmt, width, ops);
>    else
> -    rewrite_expr_tree (stmt, 0, ops, false);
> +    rewrite_expr_tree (stmt, 0, ops, false, &stmts_to_move);
>
>    /* If we combined some repeated factors into a
>       __builtin_powi call, multiply that result by the
> @@ -3560,6 +3604,7 @@ reassociate_bb (basic_block bb)
>         target_ssa);
>        gimple_set_location (mul_stmt, gimple_location (stmt));
>        gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
> +                      VEC_safe_push (gimple, heap, stmts_to_move, mul_stmt);
>      }
>   }
>
> @@ -3567,6 +3612,11 @@ reassociate_bb (basic_block bb)
>      }
>   }
>      }
> +
> +  FOR_EACH_VEC_ELT (gimple, stmts_to_move, i, stmt)
> +    move_stmt_upwards (stmt);
> +  VEC_free (gimple, heap, stmts_to_move);
> +
>    for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
>         son;
>         son = next_dom_son (CDI_POST_DOMINATORS, son))
Xinliang David Li - Oct. 11, 2012, 3:19 p.m.
On Thu, Oct 11, 2012 at 6:16 AM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Thu, Oct 11, 2012 at 3:52 AM, Easwaran Raman <eraman@google.com> wrote:
>> Hi,
>>  In the expression reassociation pass, statements might get moved
>> downwards to ensure that dependences are not violated after
>> reassociation. This can increase the live range and, in a tight loop,
>> result in spills.  This patch simply does a code motion of those
>> statements involved in reassociation and pushes them upwards in the BB
>> as far as possible without violating dependences. Bootstraps and no
>> tests regressions on a x86_64 machine running linux. Ok for trunk?
>
> I don't think reassoc is the right place to do this.  There was the idea
> of a tree-level "scheduling" pass, some of which (and I suppose some
> of your changes) TER later happily will un-do.

Before the tree scheduling pass exists, I think the small local change
in the patch is  the right step forward.

>
> Few comments:
>
>>
>> - Easwaran
>>
>> -----------
>> 2012-10-10  Easwaran Raman  <eraman@google.com>
>>
>>                * tree-ssa-reassoc.c (move_stmt_upwards): New function.
>>                  (rewrite_expr_tree): Record statements to be moved.
>>                  (reassociate_bb): Move statements affected by reassociation
>>                  as early as possible.
>>
>> Index: gcc/tree-ssa-reassoc.c
>> ===================================================================
>> --- gcc/tree-ssa-reassoc.c (revision 191879)
>> +++ gcc/tree-ssa-reassoc.c (working copy)
>> @@ -2250,13 +2250,51 @@ swap_ops_for_binary_stmt (VEC(operand_entry_t, hea
>>      }
>>  }
>>
>> +/* Move STMT up within its BB until it can not be moved any further.  */
>> +
>> +static void move_stmt_upwards (gimple stmt)
>> +{
>> +  gimple_stmt_iterator gsi, gsistmt;
>> +  tree rhs1, rhs2;
>> +  gimple rhs1def = NULL, rhs2def = NULL;
>> +  rhs1 = gimple_assign_rhs1 (stmt);
>> +  rhs2 = gimple_assign_rhs2 (stmt);
>> +  gcc_assert (rhs1);
>
> Please no such senseless asserts.  The following line will segfault anyway
> if rhs1 is NULL and with a debugger an assert doesn't add any useful
> information.

Did Ian add the stack trace support?

>
>> +  if (TREE_CODE (rhs1) == SSA_NAME)
>> +    rhs1def = SSA_NAME_DEF_STMT (rhs1);
>> +  else if (TREE_CODE (rhs1) != REAL_CST
>> +           && TREE_CODE (rhs1) != INTEGER_CST)
>> +    return;
>> +  if (rhs2)
>
> You may not access gimple_assign_rhs2 if it is not there.  So you have
> to check whether you have an unary, binary or ternary (yes) operation.
>
>> +    {
>> +      if (TREE_CODE (rhs2) == SSA_NAME)
>> +        rhs2def = SSA_NAME_DEF_STMT (rhs2);
>> +      else if (TREE_CODE (rhs1) != REAL_CST
>> +               && TREE_CODE (rhs1) != INTEGER_CST)
>> +        return;
>> +    }
>> +  gsi = gsi_for_stmt (stmt);
>> +  gsistmt = gsi;
>> +  gsi_prev (&gsi);
>> +  for (; !gsi_end_p (gsi); gsi_prev (&gsi))
>
> ????
>
> This doesn't make much sense.  You can move a stmt to the nearest
> common post-dominator.  Assuming you only want to handle placing
> it after rhs1def or after rhs2def(?) you don't need any loop, just
> two dominator queries and an insertion after one of the definition
> stmts.

The loop is used to do intra-bb code motion for all stmts forming a
linearized expressions (which has been sinked down). 'dominance' query
is not any cheaper within a BB.  It is true that re-association can
sink stmts across BB boundaries.

>
> But this code should consider BBs.  And I don't see why more optimal
> placement cannot be done during rewrite_expr_tree itself.
>

I suppose the patch is not trying to do optimal placement, but undo
some of the code motions re-association does that have bad side
effects.

> NB, the whole reassoc code needs a re-write to avoid the excessive
> stmt modifications when it does nothing.  So I'd very much rather avoid
> adding anything to reassoc until that rewrite happened.

When will that happen?

thanks,

David

>
> Richard.
>
>> +    {
>> +      gimple curr_stmt = gsi_stmt (gsi);
>> +      if (curr_stmt == rhs1def || curr_stmt == rhs2def) {
>> +        gsi_move_after (&gsistmt, &gsi);
>> +        return;
>> +      }
>> +    }
>> +
>> +}
>> +
>>  /* Recursively rewrite our linearized statements so that the operators
>>     match those in OPS[OPINDEX], putting the computation in rank
>>     order.  */
>>
>>  static void
>>  rewrite_expr_tree (gimple stmt, unsigned int opindex,
>> -   VEC(operand_entry_t, heap) * ops, bool moved)
>> +   VEC(operand_entry_t, heap) * ops, bool moved,
>> +                   VEC(gimple, heap) **stmts_to_move)
>>  {
>>    tree rhs1 = gimple_assign_rhs1 (stmt);
>>    tree rhs2 = gimple_assign_rhs2 (stmt);
>> @@ -2299,6 +2337,7 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>        print_gimple_stmt (dump_file, stmt, 0, 0);
>>      }
>>   }
>> +      VEC_safe_push (gimple, heap, *stmts_to_move, stmt);
>>        return;
>>      }
>>
>> @@ -2346,7 +2385,9 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>      }
>>    /* Recurse on the LHS of the binary operator, which is guaranteed to
>>       be the non-leaf side.  */
>> -  rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
>> +  rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved,
>> +                     stmts_to_move);
>> +  VEC_safe_push (gimple, heap, *stmts_to_move, stmt);
>>  }
>>
>>  /* Find out how many cycles we need to compute statements chain.
>> @@ -3427,6 +3468,9 @@ reassociate_bb (basic_block bb)
>>  {
>>    gimple_stmt_iterator gsi;
>>    basic_block son;
>> +  VEC(gimple, heap) *stmts_to_move = NULL;
>> +  gimple stmt;
>> +  int i;
>>
>>    for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
>>      {
>> @@ -3542,7 +3586,7 @@ reassociate_bb (basic_block bb)
>>        && VEC_length (operand_entry_t, ops) > 3)
>>      rewrite_expr_tree_parallel (stmt, width, ops);
>>    else
>> -    rewrite_expr_tree (stmt, 0, ops, false);
>> +    rewrite_expr_tree (stmt, 0, ops, false, &stmts_to_move);
>>
>>    /* If we combined some repeated factors into a
>>       __builtin_powi call, multiply that result by the
>> @@ -3560,6 +3604,7 @@ reassociate_bb (basic_block bb)
>>         target_ssa);
>>        gimple_set_location (mul_stmt, gimple_location (stmt));
>>        gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
>> +                      VEC_safe_push (gimple, heap, stmts_to_move, mul_stmt);
>>      }
>>   }
>>
>> @@ -3567,6 +3612,11 @@ reassociate_bb (basic_block bb)
>>      }
>>   }
>>      }
>> +
>> +  FOR_EACH_VEC_ELT (gimple, stmts_to_move, i, stmt)
>> +    move_stmt_upwards (stmt);
>> +  VEC_free (gimple, heap, stmts_to_move);
>> +
>>    for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
>>         son;
>>         son = next_dom_son (CDI_POST_DOMINATORS, son))
Steven Bosscher - Oct. 11, 2012, 8:25 p.m.
On Thu, Oct 11, 2012 at 3:16 PM, Richard Biener
<richard.guenther@gmail.com> wrote:
> NB, the whole reassoc code needs a re-write to avoid the excessive
> stmt modifications when it does nothing.  So I'd very much rather avoid
> adding anything to reassoc until that rewrite happened.

IMHO it's fair to Easwaran to hold up a patch for a hypothetical
rewrite. Do you have a plan for this rewrite in mind? If not, and
Easwaran's patch does something good (I haven't looked at it to tell)
then it seems to me it should be considered for including.

Ciao!
Steven
Ian Taylor - Oct. 11, 2012, 10 p.m.
On Thu, Oct 11, 2012 at 1:25 PM, Steven Bosscher <stevenb.gcc@gmail.com> wrote:
> On Thu, Oct 11, 2012 at 3:16 PM, Richard Biener
> <richard.guenther@gmail.com> wrote:
>> NB, the whole reassoc code needs a re-write to avoid the excessive
>> stmt modifications when it does nothing.  So I'd very much rather avoid
>> adding anything to reassoc until that rewrite happened.
>
> IMHO it's fair to Easwaran to hold up a patch for a hypothetical
> rewrite. Do you have a plan for this rewrite in mind? If not, and
> Easwaran's patch does something good (I haven't looked at it to tell)
> then it seems to me it should be considered for including.

Did you mean "fair" or "unfair"?

Ian
Steven Bosscher - Oct. 11, 2012, 10:01 p.m.
On Fri, Oct 12, 2012 at 12:00 AM, Ian Lance Taylor wrote:
> On Thu, Oct 11, 2012 at 1:25 PM, Steven Bosscher wrote:
>> On Thu, Oct 11, 2012 at 3:16 PM, Richard Biener wrote:
>>> NB, the whole reassoc code needs a re-write to avoid the excessive
>>> stmt modifications when it does nothing.  So I'd very much rather avoid
>>> adding anything to reassoc until that rewrite happened.
>>
>> IMHO it's fair to Easwaran to hold up a patch for a hypothetical
>> rewrite. Do you have a plan for this rewrite in mind? If not, and
>> Easwaran's patch does something good (I haven't looked at it to tell)
>> then it seems to me it should be considered for including.
>
> Did you mean "fair" or "unfair"?

Oops!

I meant "not fair".

Ciao!
Steven
Easwaran Raman - Oct. 12, 2012, 1:09 a.m.
Thanks for the comments. As David wrote, the intent of the patch is
not to do a general purpose scheduling, but to compensate for the
possible live range lengthening introduced by reassociation.


On Thu, Oct 11, 2012 at 6:16 AM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Thu, Oct 11, 2012 at 3:52 AM, Easwaran Raman <eraman@google.com> wrote:
>>
>> +/* Move STMT up within its BB until it can not be moved any further.  */
>> +
>> +static void move_stmt_upwards (gimple stmt)
>> +{
>> +  gimple_stmt_iterator gsi, gsistmt;
>> +  tree rhs1, rhs2;
>> +  gimple rhs1def = NULL, rhs2def = NULL;
>> +  rhs1 = gimple_assign_rhs1 (stmt);
>> +  rhs2 = gimple_assign_rhs2 (stmt);
>> +  gcc_assert (rhs1);
>
> Please no such senseless asserts.  The following line will segfault anyway
> if rhs1 is NULL and with a debugger an assert doesn't add any useful
> information.
Ok.
>
>> +  if (TREE_CODE (rhs1) == SSA_NAME)
>> +    rhs1def = SSA_NAME_DEF_STMT (rhs1);
>> +  else if (TREE_CODE (rhs1) != REAL_CST
>> +           && TREE_CODE (rhs1) != INTEGER_CST)
>> +    return;
>> +  if (rhs2)
>
> You may not access gimple_assign_rhs2 if it is not there.  So you have
> to check whether you have an unary, binary or ternary (yes) operation.

gimple_assign_rhs2 returns NULL_TREE if it the RHS of an assignment
has less than two operands.  Regarding the check for ternary
operation, I believe it is not necessary. A statement is considered
for reassociation only if the RHS class is GIMPLE_BINARY_RHS.
Subsequently, for rhs1 and rhs2, it checks if the def statements also
have the same code and so it seems to me that a statement with a
ternary operator in the RHS will never be considered in
rewrite_expr_tree.


>
>> +    {
>> +      if (TREE_CODE (rhs2) == SSA_NAME)
>> +        rhs2def = SSA_NAME_DEF_STMT (rhs2);
>> +      else if (TREE_CODE (rhs1) != REAL_CST
>> +               && TREE_CODE (rhs1) != INTEGER_CST)
>> +        return;
>> +    }
>> +  gsi = gsi_for_stmt (stmt);
>> +  gsistmt = gsi;
>> +  gsi_prev (&gsi);
>> +  for (; !gsi_end_p (gsi); gsi_prev (&gsi))
>
> ????
>
> This doesn't make much sense.  You can move a stmt to the nearest
> common post-dominator.  Assuming you only want to handle placing
> it after rhs1def or after rhs2def(?) you don't need any loop, just
> two dominator queries and an insertion after one of the definition
> stmts.

Within a BB isn't that still O(size of BB)?

> But this code should consider BBs.
For reassociation to look across BBs, the code should look something like this:

L1 :
   a_2 = a_1 + 10
   jc L3
L2:
  a_3 = a_2 + 20

- L1 should dominate L2 (otherwise there will be a phi node at L2 and
the reassociation of a_3 will not consider the definition of a_2)
- There are no other uses of a_2 other than the one in L3.

After reassociation, the stmt defining a_2 would be moved to L2.  In
that case, the downward code motion of a_2 = a_1 + 10 to L2 is
beneficial (one less instruction if the branch is taken). It is not
obvious to me that moving  it to L1 (or whereever a_1 is defined) is
beneficial.

>  And I don't see why more optimal
> placement cannot be done during rewrite_expr_tree itself.

I started with that idea, but my current approach looks more simpler.


Thanks,
Easwaran
>
> NB, the whole reassoc code needs a re-write to avoid the excessive
> stmt modifications when it does nothing.  So I'd very much rather avoid
> adding anything to reassoc until that rewrite happened.
>
> Richard.
>
>> +    {
>> +      gimple curr_stmt = gsi_stmt (gsi);
>> +      if (curr_stmt == rhs1def || curr_stmt == rhs2def) {
>> +        gsi_move_after (&gsistmt, &gsi);
>> +        return;
>> +      }
>> +    }
>> +
>> +}
>> +
>>  /* Recursively rewrite our linearized statements so that the operators
>>     match those in OPS[OPINDEX], putting the computation in rank
>>     order.  */
>>
>>  static void
>>  rewrite_expr_tree (gimple stmt, unsigned int opindex,
>> -   VEC(operand_entry_t, heap) * ops, bool moved)
>> +   VEC(operand_entry_t, heap) * ops, bool moved,
>> +                   VEC(gimple, heap) **stmts_to_move)
>>  {
>>    tree rhs1 = gimple_assign_rhs1 (stmt);
>>    tree rhs2 = gimple_assign_rhs2 (stmt);
>> @@ -2299,6 +2337,7 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>        print_gimple_stmt (dump_file, stmt, 0, 0);
>>      }
>>   }
>> +      VEC_safe_push (gimple, heap, *stmts_to_move, stmt);
>>        return;
>>      }
>>
>> @@ -2346,7 +2385,9 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>      }
>>    /* Recurse on the LHS of the binary operator, which is guaranteed to
>>       be the non-leaf side.  */
>> -  rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
>> +  rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved,
>> +                     stmts_to_move);
>> +  VEC_safe_push (gimple, heap, *stmts_to_move, stmt);
>>  }
>>
>>  /* Find out how many cycles we need to compute statements chain.
>> @@ -3427,6 +3468,9 @@ reassociate_bb (basic_block bb)
>>  {
>>    gimple_stmt_iterator gsi;
>>    basic_block son;
>> +  VEC(gimple, heap) *stmts_to_move = NULL;
>> +  gimple stmt;
>> +  int i;
>>
>>    for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
>>      {
>> @@ -3542,7 +3586,7 @@ reassociate_bb (basic_block bb)
>>        && VEC_length (operand_entry_t, ops) > 3)
>>      rewrite_expr_tree_parallel (stmt, width, ops);
>>    else
>> -    rewrite_expr_tree (stmt, 0, ops, false);
>> +    rewrite_expr_tree (stmt, 0, ops, false, &stmts_to_move);
>>
>>    /* If we combined some repeated factors into a
>>       __builtin_powi call, multiply that result by the
>> @@ -3560,6 +3604,7 @@ reassociate_bb (basic_block bb)
>>         target_ssa);
>>        gimple_set_location (mul_stmt, gimple_location (stmt));
>>        gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
>> +                      VEC_safe_push (gimple, heap, stmts_to_move, mul_stmt);
>>      }
>>   }
>>
>> @@ -3567,6 +3612,11 @@ reassociate_bb (basic_block bb)
>>      }
>>   }
>>      }
>> +
>> +  FOR_EACH_VEC_ELT (gimple, stmts_to_move, i, stmt)
>> +    move_stmt_upwards (stmt);
>> +  VEC_free (gimple, heap, stmts_to_move);
>> +
>>    for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
>>         son;
>>         son = next_dom_son (CDI_POST_DOMINATORS, son))
Richard Guenther - Oct. 12, 2012, 8:45 a.m.
On Fri, Oct 12, 2012 at 3:09 AM, Easwaran Raman <eraman@google.com> wrote:
> Thanks for the comments. As David wrote, the intent of the patch is
> not to do a general purpose scheduling, but to compensate for the
> possible live range lengthening introduced by reassociation.
>
>
> On Thu, Oct 11, 2012 at 6:16 AM, Richard Biener
> <richard.guenther@gmail.com> wrote:
>> On Thu, Oct 11, 2012 at 3:52 AM, Easwaran Raman <eraman@google.com> wrote:
>>>
>>> +/* Move STMT up within its BB until it can not be moved any further.  */
>>> +
>>> +static void move_stmt_upwards (gimple stmt)
>>> +{
>>> +  gimple_stmt_iterator gsi, gsistmt;
>>> +  tree rhs1, rhs2;
>>> +  gimple rhs1def = NULL, rhs2def = NULL;
>>> +  rhs1 = gimple_assign_rhs1 (stmt);
>>> +  rhs2 = gimple_assign_rhs2 (stmt);
>>> +  gcc_assert (rhs1);
>>
>> Please no such senseless asserts.  The following line will segfault anyway
>> if rhs1 is NULL and with a debugger an assert doesn't add any useful
>> information.
> Ok.
>>
>>> +  if (TREE_CODE (rhs1) == SSA_NAME)
>>> +    rhs1def = SSA_NAME_DEF_STMT (rhs1);
>>> +  else if (TREE_CODE (rhs1) != REAL_CST
>>> +           && TREE_CODE (rhs1) != INTEGER_CST)
>>> +    return;
>>> +  if (rhs2)
>>
>> You may not access gimple_assign_rhs2 if it is not there.  So you have
>> to check whether you have an unary, binary or ternary (yes) operation.
>
> gimple_assign_rhs2 returns NULL_TREE if it the RHS of an assignment
> has less than two operands.  Regarding the check for ternary
> operation, I believe it is not necessary. A statement is considered
> for reassociation only if the RHS class is GIMPLE_BINARY_RHS.
> Subsequently, for rhs1 and rhs2, it checks if the def statements also
> have the same code and so it seems to me that a statement with a
> ternary operator in the RHS will never be considered in
> rewrite_expr_tree.
>
>
>>
>>> +    {
>>> +      if (TREE_CODE (rhs2) == SSA_NAME)
>>> +        rhs2def = SSA_NAME_DEF_STMT (rhs2);
>>> +      else if (TREE_CODE (rhs1) != REAL_CST
>>> +               && TREE_CODE (rhs1) != INTEGER_CST)
>>> +        return;
>>> +    }
>>> +  gsi = gsi_for_stmt (stmt);
>>> +  gsistmt = gsi;
>>> +  gsi_prev (&gsi);
>>> +  for (; !gsi_end_p (gsi); gsi_prev (&gsi))
>>
>> ????
>>
>> This doesn't make much sense.  You can move a stmt to the nearest
>> common post-dominator.  Assuming you only want to handle placing
>> it after rhs1def or after rhs2def(?) you don't need any loop, just
>> two dominator queries and an insertion after one of the definition
>> stmts.
>
> Within a BB isn't that still O(size of BB)?

Please document the fact that both stmts are in the same BB.
And no, it isn't, it is O (size of BB ^ 2).  You don't need a loop.
operand rank should reflect the dominance relation inside the BB.
If that doesn't work simply assign UIDs to the stmts first.

>> But this code should consider BBs.
> For reassociation to look across BBs, the code should look something like this:
>
> L1 :
>    a_2 = a_1 + 10
>    jc L3
> L2:
>   a_3 = a_2 + 20
>
> - L1 should dominate L2 (otherwise there will be a phi node at L2 and
> the reassociation of a_3 will not consider the definition of a_2)
> - There are no other uses of a_2 other than the one in L3.
>
> After reassociation, the stmt defining a_2 would be moved to L2.  In
> that case, the downward code motion of a_2 = a_1 + 10 to L2 is
> beneficial (one less instruction if the branch is taken). It is not
> obvious to me that moving  it to L1 (or whereever a_1 is defined) is
> beneficial.

In this case it doesn't matter whether a1 lives through a3 or if a2 does.
But moving the stmt is not necessary, so why not simply avoid it.
You cannot undo it with yout patch anyway.

>>  And I don't see why more optimal
>> placement cannot be done during rewrite_expr_tree itself.
>
> I started with that idea, but my current approach looks more simpler.

Simpler, but it's a hack.

So, the only place we "move" stmts in rewrite_expr_tree is here:

      if (!moved)
        {
          gimple_stmt_iterator gsinow, gsirhs1;
          gimple stmt1 = stmt, stmt2;
          unsigned int count;

          gsinow = gsi_for_stmt (stmt);
          count = VEC_length (operand_entry_t, ops) - opindex - 2;
          while (count-- != 0)
            {
              stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
              gsirhs1 = gsi_for_stmt (stmt2);
              gsi_move_before (&gsirhs1, &gsinow);

that moving is done unconditionally, even if stmt2 already dominates stmt1.
Why not improve that instead?

Richard.


>
> Thanks,
> Easwaran
>>
>> NB, the whole reassoc code needs a re-write to avoid the excessive
>> stmt modifications when it does nothing.  So I'd very much rather avoid
>> adding anything to reassoc until that rewrite happened.
>>
>> Richard.
>>
>>> +    {
>>> +      gimple curr_stmt = gsi_stmt (gsi);
>>> +      if (curr_stmt == rhs1def || curr_stmt == rhs2def) {
>>> +        gsi_move_after (&gsistmt, &gsi);
>>> +        return;
>>> +      }
>>> +    }
>>> +
>>> +}
>>> +
>>>  /* Recursively rewrite our linearized statements so that the operators
>>>     match those in OPS[OPINDEX], putting the computation in rank
>>>     order.  */
>>>
>>>  static void
>>>  rewrite_expr_tree (gimple stmt, unsigned int opindex,
>>> -   VEC(operand_entry_t, heap) * ops, bool moved)
>>> +   VEC(operand_entry_t, heap) * ops, bool moved,
>>> +                   VEC(gimple, heap) **stmts_to_move)
>>>  {
>>>    tree rhs1 = gimple_assign_rhs1 (stmt);
>>>    tree rhs2 = gimple_assign_rhs2 (stmt);
>>> @@ -2299,6 +2337,7 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>        print_gimple_stmt (dump_file, stmt, 0, 0);
>>>      }
>>>   }
>>> +      VEC_safe_push (gimple, heap, *stmts_to_move, stmt);
>>>        return;
>>>      }
>>>
>>> @@ -2346,7 +2385,9 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>      }
>>>    /* Recurse on the LHS of the binary operator, which is guaranteed to
>>>       be the non-leaf side.  */
>>> -  rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
>>> +  rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved,
>>> +                     stmts_to_move);
>>> +  VEC_safe_push (gimple, heap, *stmts_to_move, stmt);
>>>  }
>>>
>>>  /* Find out how many cycles we need to compute statements chain.
>>> @@ -3427,6 +3468,9 @@ reassociate_bb (basic_block bb)
>>>  {
>>>    gimple_stmt_iterator gsi;
>>>    basic_block son;
>>> +  VEC(gimple, heap) *stmts_to_move = NULL;
>>> +  gimple stmt;
>>> +  int i;
>>>
>>>    for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
>>>      {
>>> @@ -3542,7 +3586,7 @@ reassociate_bb (basic_block bb)
>>>        && VEC_length (operand_entry_t, ops) > 3)
>>>      rewrite_expr_tree_parallel (stmt, width, ops);
>>>    else
>>> -    rewrite_expr_tree (stmt, 0, ops, false);
>>> +    rewrite_expr_tree (stmt, 0, ops, false, &stmts_to_move);
>>>
>>>    /* If we combined some repeated factors into a
>>>       __builtin_powi call, multiply that result by the
>>> @@ -3560,6 +3604,7 @@ reassociate_bb (basic_block bb)
>>>         target_ssa);
>>>        gimple_set_location (mul_stmt, gimple_location (stmt));
>>>        gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
>>> +                      VEC_safe_push (gimple, heap, stmts_to_move, mul_stmt);
>>>      }
>>>   }
>>>
>>> @@ -3567,6 +3612,11 @@ reassociate_bb (basic_block bb)
>>>      }
>>>   }
>>>      }
>>> +
>>> +  FOR_EACH_VEC_ELT (gimple, stmts_to_move, i, stmt)
>>> +    move_stmt_upwards (stmt);
>>> +  VEC_free (gimple, heap, stmts_to_move);
>>> +
>>>    for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
>>>         son;
>>>         son = next_dom_son (CDI_POST_DOMINATORS, son))
Easwaran Raman - Oct. 12, 2012, 6:18 p.m.
On Fri, Oct 12, 2012 at 1:45 AM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Fri, Oct 12, 2012 at 3:09 AM, Easwaran Raman <eraman@google.com> wrote:
>> Thanks for the comments. As David wrote, the intent of the patch is
>> not to do a general purpose scheduling, but to compensate for the
>> possible live range lengthening introduced by reassociation.
>>
>>
>> On Thu, Oct 11, 2012 at 6:16 AM, Richard Biener
>> <richard.guenther@gmail.com> wrote:
>>> On Thu, Oct 11, 2012 at 3:52 AM, Easwaran Raman <eraman@google.com> wrote:
>>>>
>>>> +/* Move STMT up within its BB until it can not be moved any further.  */
>>>> +
>>>> +static void move_stmt_upwards (gimple stmt)
>>>> +{
>>>> +  gimple_stmt_iterator gsi, gsistmt;
>>>> +  tree rhs1, rhs2;
>>>> +  gimple rhs1def = NULL, rhs2def = NULL;
>>>> +  rhs1 = gimple_assign_rhs1 (stmt);
>>>> +  rhs2 = gimple_assign_rhs2 (stmt);
>>>> +  gcc_assert (rhs1);
>>>
>>> Please no such senseless asserts.  The following line will segfault anyway
>>> if rhs1 is NULL and with a debugger an assert doesn't add any useful
>>> information.
>> Ok.
>>>
>>>> +  if (TREE_CODE (rhs1) == SSA_NAME)
>>>> +    rhs1def = SSA_NAME_DEF_STMT (rhs1);
>>>> +  else if (TREE_CODE (rhs1) != REAL_CST
>>>> +           && TREE_CODE (rhs1) != INTEGER_CST)
>>>> +    return;
>>>> +  if (rhs2)
>>>
>>> You may not access gimple_assign_rhs2 if it is not there.  So you have
>>> to check whether you have an unary, binary or ternary (yes) operation.
>>
>> gimple_assign_rhs2 returns NULL_TREE if it the RHS of an assignment
>> has less than two operands.  Regarding the check for ternary
>> operation, I believe it is not necessary. A statement is considered
>> for reassociation only if the RHS class is GIMPLE_BINARY_RHS.
>> Subsequently, for rhs1 and rhs2, it checks if the def statements also
>> have the same code and so it seems to me that a statement with a
>> ternary operator in the RHS will never be considered in
>> rewrite_expr_tree.
>>
>>
>>>
>>>> +    {
>>>> +      if (TREE_CODE (rhs2) == SSA_NAME)
>>>> +        rhs2def = SSA_NAME_DEF_STMT (rhs2);
>>>> +      else if (TREE_CODE (rhs1) != REAL_CST
>>>> +               && TREE_CODE (rhs1) != INTEGER_CST)
>>>> +        return;
>>>> +    }
>>>> +  gsi = gsi_for_stmt (stmt);
>>>> +  gsistmt = gsi;
>>>> +  gsi_prev (&gsi);
>>>> +  for (; !gsi_end_p (gsi); gsi_prev (&gsi))
>>>
>>> ????
>>>
>>> This doesn't make much sense.  You can move a stmt to the nearest
>>> common post-dominator.  Assuming you only want to handle placing
>>> it after rhs1def or after rhs2def(?) you don't need any loop, just
>>> two dominator queries and an insertion after one of the definition
>>> stmts.
>>
>> Within a BB isn't that still O(size of BB)?
>
> Please document the fact that both stmts are in the same BB.
Ok.
> And no, it isn't, it is O (size of BB ^ 2).  You don't need a loop.
> operand rank should reflect the dominance relation inside the BB.

The rank of phi definitions would mess this up.

> If that doesn't work simply assign UIDs to the stmts first.
Ok.

>
>>> But this code should consider BBs.
>> For reassociation to look across BBs, the code should look something like this:
>>
>> L1 :
>>    a_2 = a_1 + 10
>>    jc L3
>> L2:
>>   a_3 = a_2 + 20
>>
>> - L1 should dominate L2 (otherwise there will be a phi node at L2 and
>> the reassociation of a_3 will not consider the definition of a_2)
>> - There are no other uses of a_2 other than the one in L3.
>>
>> After reassociation, the stmt defining a_2 would be moved to L2.  In
>> that case, the downward code motion of a_2 = a_1 + 10 to L2 is
>> beneficial (one less instruction if the branch is taken). It is not
>> obvious to me that moving  it to L1 (or whereever a_1 is defined) is
>> beneficial.
>
> In this case it doesn't matter whether a1 lives through a3 or if a2 does.
> But moving the stmt is not necessary, so why not simply avoid it.
I used that example to show that the current downward motion has a
useful side effect and this patch preserves it. Yes, in this example
the downward motion can be avoided but in general it may not be
possible. I agree with you that there is unnecessary code motion in
many cases.

> You cannot undo it with yout patch anyway.
>
>>>  And I don't see why more optimal
>>> placement cannot be done during rewrite_expr_tree itself.
>>
>> I started with that idea, but my current approach looks more simpler.
>
> Simpler, but it's a hack.
>
> So, the only place we "move" stmts in rewrite_expr_tree is here:
>
>       if (!moved)
>         {
>           gimple_stmt_iterator gsinow, gsirhs1;
>           gimple stmt1 = stmt, stmt2;
>           unsigned int count;
>
>           gsinow = gsi_for_stmt (stmt);
>           count = VEC_length (operand_entry_t, ops) - opindex - 2;
>           while (count-- != 0)
>             {
>               stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>               gsirhs1 = gsi_for_stmt (stmt2);
>               gsi_move_before (&gsirhs1, &gsinow);
>
> that moving is done unconditionally, even if stmt2 already dominates stmt1.
> Why not improve that instead?

stmt2 does dominate stmt1. The issue is it unconditionally moves stmt2
downwards even if the statement that defines ops[op_index+1] (the new
RHS of stmt2 after reassociation) already dominates stmt2 or moves it
more than necessary. One complication to fixing it here is that there
is a call to swap_ops_for_binary_stmt that could alter the contents of
the ops vector.

- Easwaran

>
> Richard.
>
>
>>
>> Thanks,
>> Easwaran
>>>
>>> NB, the whole reassoc code needs a re-write to avoid the excessive
>>> stmt modifications when it does nothing.  So I'd very much rather avoid
>>> adding anything to reassoc until that rewrite happened.
>>>
>>> Richard.
>>>
>>>> +    {
>>>> +      gimple curr_stmt = gsi_stmt (gsi);
>>>> +      if (curr_stmt == rhs1def || curr_stmt == rhs2def) {
>>>> +        gsi_move_after (&gsistmt, &gsi);
>>>> +        return;
>>>> +      }
>>>> +    }
>>>> +
>>>> +}
>>>> +
>>>>  /* Recursively rewrite our linearized statements so that the operators
>>>>     match those in OPS[OPINDEX], putting the computation in rank
>>>>     order.  */
>>>>
>>>>  static void
>>>>  rewrite_expr_tree (gimple stmt, unsigned int opindex,
>>>> -   VEC(operand_entry_t, heap) * ops, bool moved)
>>>> +   VEC(operand_entry_t, heap) * ops, bool moved,
>>>> +                   VEC(gimple, heap) **stmts_to_move)
>>>>  {
>>>>    tree rhs1 = gimple_assign_rhs1 (stmt);
>>>>    tree rhs2 = gimple_assign_rhs2 (stmt);
>>>> @@ -2299,6 +2337,7 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>>        print_gimple_stmt (dump_file, stmt, 0, 0);
>>>>      }
>>>>   }
>>>> +      VEC_safe_push (gimple, heap, *stmts_to_move, stmt);
>>>>        return;
>>>>      }
>>>>
>>>> @@ -2346,7 +2385,9 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>>      }
>>>>    /* Recurse on the LHS of the binary operator, which is guaranteed to
>>>>       be the non-leaf side.  */
>>>> -  rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
>>>> +  rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved,
>>>> +                     stmts_to_move);
>>>> +  VEC_safe_push (gimple, heap, *stmts_to_move, stmt);
>>>>  }
>>>>
>>>>  /* Find out how many cycles we need to compute statements chain.
>>>> @@ -3427,6 +3468,9 @@ reassociate_bb (basic_block bb)
>>>>  {
>>>>    gimple_stmt_iterator gsi;
>>>>    basic_block son;
>>>> +  VEC(gimple, heap) *stmts_to_move = NULL;
>>>> +  gimple stmt;
>>>> +  int i;
>>>>
>>>>    for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
>>>>      {
>>>> @@ -3542,7 +3586,7 @@ reassociate_bb (basic_block bb)
>>>>        && VEC_length (operand_entry_t, ops) > 3)
>>>>      rewrite_expr_tree_parallel (stmt, width, ops);
>>>>    else
>>>> -    rewrite_expr_tree (stmt, 0, ops, false);
>>>> +    rewrite_expr_tree (stmt, 0, ops, false, &stmts_to_move);
>>>>
>>>>    /* If we combined some repeated factors into a
>>>>       __builtin_powi call, multiply that result by the
>>>> @@ -3560,6 +3604,7 @@ reassociate_bb (basic_block bb)
>>>>         target_ssa);
>>>>        gimple_set_location (mul_stmt, gimple_location (stmt));
>>>>        gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
>>>> +                      VEC_safe_push (gimple, heap, stmts_to_move, mul_stmt);
>>>>      }
>>>>   }
>>>>
>>>> @@ -3567,6 +3612,11 @@ reassociate_bb (basic_block bb)
>>>>      }
>>>>   }
>>>>      }
>>>> +
>>>> +  FOR_EACH_VEC_ELT (gimple, stmts_to_move, i, stmt)
>>>> +    move_stmt_upwards (stmt);
>>>> +  VEC_free (gimple, heap, stmts_to_move);
>>>> +
>>>>    for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
>>>>         son;
>>>>         son = next_dom_son (CDI_POST_DOMINATORS, son))
Richard Guenther - Oct. 15, 2012, 8:41 a.m.
On Fri, Oct 12, 2012 at 8:18 PM, Easwaran Raman <eraman@google.com> wrote:
> On Fri, Oct 12, 2012 at 1:45 AM, Richard Biener
> <richard.guenther@gmail.com> wrote:
>> On Fri, Oct 12, 2012 at 3:09 AM, Easwaran Raman <eraman@google.com> wrote:
>>> Thanks for the comments. As David wrote, the intent of the patch is
>>> not to do a general purpose scheduling, but to compensate for the
>>> possible live range lengthening introduced by reassociation.
>>>
>>>
>>> On Thu, Oct 11, 2012 at 6:16 AM, Richard Biener
>>> <richard.guenther@gmail.com> wrote:
>>>> On Thu, Oct 11, 2012 at 3:52 AM, Easwaran Raman <eraman@google.com> wrote:
>>>>>
>>>>> +/* Move STMT up within its BB until it can not be moved any further.  */
>>>>> +
>>>>> +static void move_stmt_upwards (gimple stmt)
>>>>> +{
>>>>> +  gimple_stmt_iterator gsi, gsistmt;
>>>>> +  tree rhs1, rhs2;
>>>>> +  gimple rhs1def = NULL, rhs2def = NULL;
>>>>> +  rhs1 = gimple_assign_rhs1 (stmt);
>>>>> +  rhs2 = gimple_assign_rhs2 (stmt);
>>>>> +  gcc_assert (rhs1);
>>>>
>>>> Please no such senseless asserts.  The following line will segfault anyway
>>>> if rhs1 is NULL and with a debugger an assert doesn't add any useful
>>>> information.
>>> Ok.
>>>>
>>>>> +  if (TREE_CODE (rhs1) == SSA_NAME)
>>>>> +    rhs1def = SSA_NAME_DEF_STMT (rhs1);
>>>>> +  else if (TREE_CODE (rhs1) != REAL_CST
>>>>> +           && TREE_CODE (rhs1) != INTEGER_CST)
>>>>> +    return;
>>>>> +  if (rhs2)
>>>>
>>>> You may not access gimple_assign_rhs2 if it is not there.  So you have
>>>> to check whether you have an unary, binary or ternary (yes) operation.
>>>
>>> gimple_assign_rhs2 returns NULL_TREE if it the RHS of an assignment
>>> has less than two operands.  Regarding the check for ternary
>>> operation, I believe it is not necessary. A statement is considered
>>> for reassociation only if the RHS class is GIMPLE_BINARY_RHS.
>>> Subsequently, for rhs1 and rhs2, it checks if the def statements also
>>> have the same code and so it seems to me that a statement with a
>>> ternary operator in the RHS will never be considered in
>>> rewrite_expr_tree.
>>>
>>>
>>>>
>>>>> +    {
>>>>> +      if (TREE_CODE (rhs2) == SSA_NAME)
>>>>> +        rhs2def = SSA_NAME_DEF_STMT (rhs2);
>>>>> +      else if (TREE_CODE (rhs1) != REAL_CST
>>>>> +               && TREE_CODE (rhs1) != INTEGER_CST)
>>>>> +        return;
>>>>> +    }
>>>>> +  gsi = gsi_for_stmt (stmt);
>>>>> +  gsistmt = gsi;
>>>>> +  gsi_prev (&gsi);
>>>>> +  for (; !gsi_end_p (gsi); gsi_prev (&gsi))
>>>>
>>>> ????
>>>>
>>>> This doesn't make much sense.  You can move a stmt to the nearest
>>>> common post-dominator.  Assuming you only want to handle placing
>>>> it after rhs1def or after rhs2def(?) you don't need any loop, just
>>>> two dominator queries and an insertion after one of the definition
>>>> stmts.
>>>
>>> Within a BB isn't that still O(size of BB)?
>>
>> Please document the fact that both stmts are in the same BB.
> Ok.
>> And no, it isn't, it is O (size of BB ^ 2).  You don't need a loop.
>> operand rank should reflect the dominance relation inside the BB.
>
> The rank of phi definitions would mess this up.
>
>> If that doesn't work simply assign UIDs to the stmts first.
> Ok.
>
>>
>>>> But this code should consider BBs.
>>> For reassociation to look across BBs, the code should look something like this:
>>>
>>> L1 :
>>>    a_2 = a_1 + 10
>>>    jc L3
>>> L2:
>>>   a_3 = a_2 + 20
>>>
>>> - L1 should dominate L2 (otherwise there will be a phi node at L2 and
>>> the reassociation of a_3 will not consider the definition of a_2)
>>> - There are no other uses of a_2 other than the one in L3.
>>>
>>> After reassociation, the stmt defining a_2 would be moved to L2.  In
>>> that case, the downward code motion of a_2 = a_1 + 10 to L2 is
>>> beneficial (one less instruction if the branch is taken). It is not
>>> obvious to me that moving  it to L1 (or whereever a_1 is defined) is
>>> beneficial.
>>
>> In this case it doesn't matter whether a1 lives through a3 or if a2 does.
>> But moving the stmt is not necessary, so why not simply avoid it.
> I used that example to show that the current downward motion has a
> useful side effect and this patch preserves it. Yes, in this example
> the downward motion can be avoided but in general it may not be
> possible. I agree with you that there is unnecessary code motion in
> many cases.
>
>> You cannot undo it with yout patch anyway.
>>
>>>>  And I don't see why more optimal
>>>> placement cannot be done during rewrite_expr_tree itself.
>>>
>>> I started with that idea, but my current approach looks more simpler.
>>
>> Simpler, but it's a hack.
>>
>> So, the only place we "move" stmts in rewrite_expr_tree is here:
>>
>>       if (!moved)
>>         {
>>           gimple_stmt_iterator gsinow, gsirhs1;
>>           gimple stmt1 = stmt, stmt2;
>>           unsigned int count;
>>
>>           gsinow = gsi_for_stmt (stmt);
>>           count = VEC_length (operand_entry_t, ops) - opindex - 2;
>>           while (count-- != 0)
>>             {
>>               stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>>               gsirhs1 = gsi_for_stmt (stmt2);
>>               gsi_move_before (&gsirhs1, &gsinow);
>>
>> that moving is done unconditionally, even if stmt2 already dominates stmt1.
>> Why not improve that instead?
>
> stmt2 does dominate stmt1. The issue is it unconditionally moves stmt2
> downwards even if the statement that defines ops[op_index+1] (the new
> RHS of stmt2 after reassociation) already dominates stmt2 or moves it
> more than necessary.

Yes, and that's what I ask you to fix _first_, because it is clearly bogus.

> One complication to fixing it here is that there
> is a call to swap_ops_for_binary_stmt that could alter the contents of
> the ops vector.

Sure, but why does this complicate things here?  It may just cause stmt
moves to become necessary, but that is for optimization.

So, please first fix the above code to not move stmts when not necessary.

Thanks,
Richard.

> - Easwaran
>
>>
>> Richard.
>>
>>
>>>
>>> Thanks,
>>> Easwaran
>>>>
>>>> NB, the whole reassoc code needs a re-write to avoid the excessive
>>>> stmt modifications when it does nothing.  So I'd very much rather avoid
>>>> adding anything to reassoc until that rewrite happened.
>>>>
>>>> Richard.
>>>>
>>>>> +    {
>>>>> +      gimple curr_stmt = gsi_stmt (gsi);
>>>>> +      if (curr_stmt == rhs1def || curr_stmt == rhs2def) {
>>>>> +        gsi_move_after (&gsistmt, &gsi);
>>>>> +        return;
>>>>> +      }
>>>>> +    }
>>>>> +
>>>>> +}
>>>>> +
>>>>>  /* Recursively rewrite our linearized statements so that the operators
>>>>>     match those in OPS[OPINDEX], putting the computation in rank
>>>>>     order.  */
>>>>>
>>>>>  static void
>>>>>  rewrite_expr_tree (gimple stmt, unsigned int opindex,
>>>>> -   VEC(operand_entry_t, heap) * ops, bool moved)
>>>>> +   VEC(operand_entry_t, heap) * ops, bool moved,
>>>>> +                   VEC(gimple, heap) **stmts_to_move)
>>>>>  {
>>>>>    tree rhs1 = gimple_assign_rhs1 (stmt);
>>>>>    tree rhs2 = gimple_assign_rhs2 (stmt);
>>>>> @@ -2299,6 +2337,7 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>>>        print_gimple_stmt (dump_file, stmt, 0, 0);
>>>>>      }
>>>>>   }
>>>>> +      VEC_safe_push (gimple, heap, *stmts_to_move, stmt);
>>>>>        return;
>>>>>      }
>>>>>
>>>>> @@ -2346,7 +2385,9 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>>>      }
>>>>>    /* Recurse on the LHS of the binary operator, which is guaranteed to
>>>>>       be the non-leaf side.  */
>>>>> -  rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
>>>>> +  rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved,
>>>>> +                     stmts_to_move);
>>>>> +  VEC_safe_push (gimple, heap, *stmts_to_move, stmt);
>>>>>  }
>>>>>
>>>>>  /* Find out how many cycles we need to compute statements chain.
>>>>> @@ -3427,6 +3468,9 @@ reassociate_bb (basic_block bb)
>>>>>  {
>>>>>    gimple_stmt_iterator gsi;
>>>>>    basic_block son;
>>>>> +  VEC(gimple, heap) *stmts_to_move = NULL;
>>>>> +  gimple stmt;
>>>>> +  int i;
>>>>>
>>>>>    for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
>>>>>      {
>>>>> @@ -3542,7 +3586,7 @@ reassociate_bb (basic_block bb)
>>>>>        && VEC_length (operand_entry_t, ops) > 3)
>>>>>      rewrite_expr_tree_parallel (stmt, width, ops);
>>>>>    else
>>>>> -    rewrite_expr_tree (stmt, 0, ops, false);
>>>>> +    rewrite_expr_tree (stmt, 0, ops, false, &stmts_to_move);
>>>>>
>>>>>    /* If we combined some repeated factors into a
>>>>>       __builtin_powi call, multiply that result by the
>>>>> @@ -3560,6 +3604,7 @@ reassociate_bb (basic_block bb)
>>>>>         target_ssa);
>>>>>        gimple_set_location (mul_stmt, gimple_location (stmt));
>>>>>        gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
>>>>> +                      VEC_safe_push (gimple, heap, stmts_to_move, mul_stmt);
>>>>>      }
>>>>>   }
>>>>>
>>>>> @@ -3567,6 +3612,11 @@ reassociate_bb (basic_block bb)
>>>>>      }
>>>>>   }
>>>>>      }
>>>>> +
>>>>> +  FOR_EACH_VEC_ELT (gimple, stmts_to_move, i, stmt)
>>>>> +    move_stmt_upwards (stmt);
>>>>> +  VEC_free (gimple, heap, stmts_to_move);
>>>>> +
>>>>>    for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
>>>>>         son;
>>>>>         son = next_dom_son (CDI_POST_DOMINATORS, son))

Patch

Index: gcc/tree-ssa-reassoc.c
===================================================================
--- gcc/tree-ssa-reassoc.c (revision 191879)
+++ gcc/tree-ssa-reassoc.c (working copy)
@@ -2250,13 +2250,51 @@  swap_ops_for_binary_stmt (VEC(operand_entry_t, hea
     }
 }

+/* Move STMT up within its BB until it can not be moved any further.  */
+
+static void move_stmt_upwards (gimple stmt)
+{
+  gimple_stmt_iterator gsi, gsistmt;
+  tree rhs1, rhs2;
+  gimple rhs1def = NULL, rhs2def = NULL;
+  rhs1 = gimple_assign_rhs1 (stmt);
+  rhs2 = gimple_assign_rhs2 (stmt);
+  gcc_assert (rhs1);
+  if (TREE_CODE (rhs1) == SSA_NAME)
+    rhs1def = SSA_NAME_DEF_STMT (rhs1);
+  else if (TREE_CODE (rhs1) != REAL_CST
+           && TREE_CODE (rhs1) != INTEGER_CST)
+    return;
+  if (rhs2)
+    {
+      if (TREE_CODE (rhs2) == SSA_NAME)
+        rhs2def = SSA_NAME_DEF_STMT (rhs2);
+      else if (TREE_CODE (rhs1) != REAL_CST
+               && TREE_CODE (rhs1) != INTEGER_CST)
+        return;
+    }
+  gsi = gsi_for_stmt (stmt);
+  gsistmt = gsi;
+  gsi_prev (&gsi);
+  for (; !gsi_end_p (gsi); gsi_prev (&gsi))
+    {
+      gimple curr_stmt = gsi_stmt (gsi);
+      if (curr_stmt == rhs1def || curr_stmt == rhs2def) {
+        gsi_move_after (&gsistmt, &gsi);
+        return;
+      }
+    }
+
+}
+
 /* Recursively rewrite our linearized statements so that the operators
    match those in OPS[OPINDEX], putting the computation in rank
    order.  */

 static void
 rewrite_expr_tree (gimple stmt, unsigned int opindex,
-   VEC(operand_entry_t, heap) * ops, bool moved)
+   VEC(operand_entry_t, heap) * ops, bool moved,
+                   VEC(gimple, heap) **stmts_to_move)
 {
   tree rhs1 = gimple_assign_rhs1 (stmt);
   tree rhs2 = gimple_assign_rhs2 (stmt);
@@ -2299,6 +2337,7 @@  rewrite_expr_tree (gimple stmt, unsigned int opind
       print_gimple_stmt (dump_file, stmt, 0, 0);
     }
  }
+      VEC_safe_push (gimple, heap, *stmts_to_move, stmt);
       return;
     }

@@ -2346,7 +2385,9 @@  rewrite_expr_tree (gimple stmt, unsigned int opind
     }
   /* Recurse on the LHS of the binary operator, which is guaranteed to
      be the non-leaf side.  */
-  rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
+  rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved,
+                     stmts_to_move);
+  VEC_safe_push (gimple, heap, *stmts_to_move, stmt);
 }

 /* Find out how many cycles we need to compute statements chain.
@@ -3427,6 +3468,9 @@  reassociate_bb (basic_block bb)
 {
   gimple_stmt_iterator gsi;
   basic_block son;
+  VEC(gimple, heap) *stmts_to_move = NULL;
+  gimple stmt;
+  int i;

   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
     {
@@ -3542,7 +3586,7 @@  reassociate_bb (basic_block bb)
       && VEC_length (operand_entry_t, ops) > 3)
     rewrite_expr_tree_parallel (stmt, width, ops);
   else
-    rewrite_expr_tree (stmt, 0, ops, false);
+    rewrite_expr_tree (stmt, 0, ops, false, &stmts_to_move);

   /* If we combined some repeated factors into a
      __builtin_powi call, multiply that result by the
@@ -3560,6 +3604,7 @@  reassociate_bb (basic_block bb)
        target_ssa);
       gimple_set_location (mul_stmt, gimple_location (stmt));
       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
+                      VEC_safe_push (gimple, heap, stmts_to_move, mul_stmt);
     }
  }

@@ -3567,6 +3612,11 @@  reassociate_bb (basic_block bb)
     }
  }
     }
+
+  FOR_EACH_VEC_ELT (gimple, stmts_to_move, i, stmt)
+    move_stmt_upwards (stmt);
+  VEC_free (gimple, heap, stmts_to_move);
+
   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
        son;
        son = next_dom_son (CDI_POST_DOMINATORS, son))