diff mbox

Optimize n?rotate(x,n):x

Message ID alpine.DEB.2.10.1404141703140.3714@laptop-mg.saclay.inria.fr
State New
Headers show

Commit Message

Marc Glisse April 14, 2014, 4:40 p.m. UTC
On Mon, 14 Apr 2014, Richard Biener wrote:

>> +  /* If the special case has a high probability, keep it.  */
>> +  if (EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN)
>
> I suppose Honza has a comment on how to test this properly
> (not sure if ->probability or ->frequency is always initialized properly).
> for example single_likely_edge tests profile_status_for_fn !=
> PROFILE_ABSENT (and uses a fixed probability value ...).
> Anyway, the comparison looks backwards to me, but maybe I'm
> missing sth - I'd use >= PROB_LIKELY ;)

Maybe the comment is confusing? middle_bb contains the expensive operation 
(say a/b) that the special case skips entirely. If the division happens in 
less than 50% of cases (that's the proba of the edge going from cond to 
middle_bb), then doing the comparison+jump may be cheaper and I abort the 
optimization. At least the testcase with __builtin_expect should prove 
that I didn't do it backwards.

value-prof seems to use 50% as the cut-off where it may become interesting 
to special case division, hence my choice of PROB_EVEN. I am not sure 
which way you want to use PROB_LIKELY (80%). If we have more than 80% 
chances of executing the division, always perform it? Or if we have more 
than 80% chances of skipping the division, keep the branch?

Attached is the latest version (passed the testsuite).

Comments

Richard Biener April 15, 2014, 7:54 a.m. UTC | #1
On Mon, Apr 14, 2014 at 6:40 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Mon, 14 Apr 2014, Richard Biener wrote:
>
>>> +  /* If the special case has a high probability, keep it.  */
>>> +  if (EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN)
>>
>>
>> I suppose Honza has a comment on how to test this properly
>> (not sure if ->probability or ->frequency is always initialized properly).
>> for example single_likely_edge tests profile_status_for_fn !=
>> PROFILE_ABSENT (and uses a fixed probability value ...).
>> Anyway, the comparison looks backwards to me, but maybe I'm
>> missing sth - I'd use >= PROB_LIKELY ;)
>
>
> Maybe the comment is confusing? middle_bb contains the expensive operation
> (say a/b) that the special case skips entirely. If the division happens in
> less than 50% of cases (that's the proba of the edge going from cond to
> middle_bb), then doing the comparison+jump may be cheaper and I abort the
> optimization. At least the testcase with __builtin_expect should prove that
> I didn't do it backwards.

Ah, indeed.  My mistake.

> value-prof seems to use 50% as the cut-off where it may become interesting
> to special case division, hence my choice of PROB_EVEN. I am not sure which
> way you want to use PROB_LIKELY (80%). If we have more than 80% chances of
> executing the division, always perform it? Or if we have more than 80%
> chances of skipping the division, keep the branch?

Ok, if it's from value-prof then that's fine.

The patch is ok if Honza doesn't have any comments on whether it's ok
to look at ->probability unconditionally.

Thanks,
Richard.

> Attached is the latest version (passed the testsuite).
> Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c  (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c  (working copy)
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-phiopt1" } */
> +
> +int f(int a, int b, int c) {
> +  if (c > 5) return c;
> +  if (a == 0) return b;
> +  return a + b;
> +}
> +
> +unsigned rot(unsigned x, int n) {
> +  const int bits = __CHAR_BIT__ * __SIZEOF_INT__;
> +  return (n == 0) ? x : ((x << n) | (x >> (bits - n)));
> +}
> +
> +unsigned m(unsigned a, unsigned b) {
> +  if (a == 0)
> +    return 0;
> +  else
> +    return a & b;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "goto" 2 "phiopt1" } } */
> +/* { dg-final { cleanup-tree-dump "phiopt1" } } */
> Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c  (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c  (working copy)
> @@ -0,0 +1,19 @@
> +/* { dg-do compile { target x86_64-*-* } } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +int f(int a, int b) {
> +  if (__builtin_expect(a == 0, 1)) return b;
> +  return a + b;
> +}
> +
> +// optab_handler can handle if(b==1) but not a/b
> +// so we consider a/b too expensive.
> +unsigned __int128 g(unsigned __int128 a, unsigned __int128 b) {
> +  if (b == 1)
> +    return a;
> +  else
> +    return a / b;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "goto " 4 "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
> Index: gcc/tree-ssa-phiopt.c
> ===================================================================
> --- gcc/tree-ssa-phiopt.c       (revision 209353)
> +++ gcc/tree-ssa-phiopt.c       (working copy)
> @@ -140,20 +140,37 @@ static bool gate_hoist_loads (void);
>         x = PHI (CONST, a)
>
>     Gets replaced with:
>       bb0:
>       bb2:
>         t1 = a == CONST;
>         t2 = b > c;
>         t3 = t1 & t2;
>         x = a;
>
> +
> +   It also replaces
> +
> +     bb0:
> +       if (a != 0) goto bb1; else goto bb2;
> +     bb1:
> +       c = a + b;
> +     bb2:
> +       x = PHI <c (bb1), b (bb0), ...>;
> +
> +   with
> +
> +     bb0:
> +       c = a + b;
> +     bb2:
> +       x = PHI <c (bb0), ...>;
> +
>     ABS Replacement
>     ---------------
>
>     This transformation, implemented in abs_replacement, replaces
>
>       bb0:
>         if (a >= 0) goto bb2; else goto bb1;
>       bb1:
>         x = -a;
>       bb2:
> @@ -809,20 +826,103 @@ operand_equal_for_value_replacement (con
>    if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
>      return true;
>
>    tmp = gimple_assign_rhs2 (def);
>    if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
>      return true;
>
>    return false;
>  }
>
> +/* Returns true if ARG is a neutral element for operation CODE
> +   on the RIGHT side.  */
> +
> +static bool
> +neutral_element_p (tree_code code, tree arg, bool right)
> +{
> +  switch (code)
> +    {
> +    case PLUS_EXPR:
> +    case BIT_IOR_EXPR:
> +    case BIT_XOR_EXPR:
> +      return integer_zerop (arg);
> +
> +    case LROTATE_EXPR:
> +    case RROTATE_EXPR:
> +    case LSHIFT_EXPR:
> +    case RSHIFT_EXPR:
> +    case MINUS_EXPR:
> +    case POINTER_PLUS_EXPR:
> +      return right && integer_zerop (arg);
> +
> +    case MULT_EXPR:
> +      return integer_onep (arg);
> +
> +    case TRUNC_DIV_EXPR:
> +    case CEIL_DIV_EXPR:
> +    case FLOOR_DIV_EXPR:
> +    case ROUND_DIV_EXPR:
> +    case EXACT_DIV_EXPR:
> +      return right && integer_onep (arg);
> +
> +    case BIT_AND_EXPR:
> +      return integer_all_onesp (arg);
> +
> +    default:
> +      return false;
> +    }
> +}
> +
> +/* Returns true if ARG is an absorbing element for operation CODE.  */
> +
> +static bool
> +absorbing_element_p (tree_code code, tree arg)
> +{
> +  switch (code)
> +    {
> +    case BIT_IOR_EXPR:
> +      return integer_all_onesp (arg);
> +
> +    case MULT_EXPR:
> +    case BIT_AND_EXPR:
> +      return integer_zerop (arg);
> +
> +    default:
> +      return false;
> +    }
> +}
> +
> +/* Returns true if the statement is cheap. The simple heuristic used here
> +   is that anything the optab knows is cheap.  */
> +
> +static bool
> +is_cheap_stmt (gimple stmt)
> +{
> +  tree type;
> +  if (is_gimple_assign (stmt))
> +    {
> +      type = TREE_TYPE (gimple_assign_rhs1 (stmt));
> +      enum tree_code code = gimple_assign_rhs_code (stmt);
> +      optab op = optab_for_tree_code (code, type, optab_scalar);
> +      return (op != unknown_optab
> +             && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing);
> +    }
> +  else if (gimple_code (stmt) == GIMPLE_COND)
> +    {
> +      type = TREE_TYPE (gimple_cond_lhs (stmt));
> +      enum rtx_code code = (gimple_cond_code (stmt) == EQ_EXPR) ? EQ : NE;
> +      return can_compare_p (code, TYPE_MODE (type), ccp_jump);
> +    }
> +  else
> +    gcc_unreachable ();
> +}
> +
>  /*  The function value_replacement does the main work of doing the value
>      replacement.  Return non-zero if the replacement is done.  Otherwise
> return
>      0.  If we remove the middle basic block, return 2.
>      BB is the basic block where the replacement is going to be done on.
> ARG0
>      is argument 0 from the PHI.  Likewise for ARG1.  */
>
>  static int
>  value_replacement (basic_block cond_bb, basic_block middle_bb,
>                    edge e0, edge e1, gimple phi,
>                    tree arg0, tree arg1)
> @@ -833,23 +933,21 @@ value_replacement (basic_block cond_bb,
>    enum tree_code code;
>    bool emtpy_or_with_defined_p = true;
>
>    /* If the type says honor signed zeros we cannot do this
>       optimization.  */
>    if (HONOR_SIGNED_ZEROS (TYPE_MODE (TREE_TYPE (arg1))))
>      return 0;
>
>    /* If there is a statement in MIDDLE_BB that defines one of the PHI
>       arguments, then adjust arg0 or arg1.  */
> -  gsi = gsi_after_labels (middle_bb);
> -  if (!gsi_end_p (gsi) && is_gimple_debug (gsi_stmt (gsi)))
> -    gsi_next_nondebug (&gsi);
> +  gsi = gsi_start_nondebug_after_labels_bb (middle_bb);
>    while (!gsi_end_p (gsi))
>      {
>        gimple stmt = gsi_stmt (gsi);
>        tree lhs;
>        gsi_next_nondebug (&gsi);
>        if (!is_gimple_assign (stmt))
>         {
>           emtpy_or_with_defined_p = false;
>           continue;
>         }
> @@ -931,20 +1029,67 @@ value_replacement (basic_block cond_bb,
>               print_generic_expr (dump_file, gimple_phi_result (phi), 0);
>               fprintf (dump_file, " reduced for COND_EXPR in block %d to ",
>                        cond_bb->index);
>               print_generic_expr (dump_file, arg, 0);
>               fprintf (dump_file, ".\n");
>              }
>            return 1;
>         }
>
>      }
> +
> +  /* Now optimize (x != 0) ? x + y : y to just y.
> +     The following condition is too restrictive, there can easily be
> another
> +     stmt in middle_bb, for instance a CONVERT_EXPR for the second
> argument.  */
> +  gimple assign = last_and_only_stmt (middle_bb);
> +  if (!assign || gimple_code (assign) != GIMPLE_ASSIGN
> +      || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS
> +      || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0))
> +         && !POINTER_TYPE_P (TREE_TYPE (arg0))))
> +    return 0;
> +
> +  /* assign may call a libgcc routine, which is slow.  */
> +  if (!is_cheap_stmt (assign) && is_cheap_stmt (cond))
> +    return 0;
> +
> +  /* If the special case has a high probability, keep it.  */
> +  if (EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN)
> +    return 0;
> +
> +  tree lhs = gimple_assign_lhs (assign);
> +  tree rhs1 = gimple_assign_rhs1 (assign);
> +  tree rhs2 = gimple_assign_rhs2 (assign);
> +  enum tree_code code_def = gimple_assign_rhs_code (assign);
> +  tree cond_lhs = gimple_cond_lhs (cond);
> +  tree cond_rhs = gimple_cond_rhs (cond);
> +
> +  if (((code == NE_EXPR && e1 == false_edge)
> +       || (code == EQ_EXPR && e1 == true_edge))
> +      && arg0 == lhs
> +      && ((arg1 == rhs1
> +          && operand_equal_for_phi_arg_p (rhs2, cond_lhs)
> +          && neutral_element_p (code_def, cond_rhs, true))
> +         || (arg1 == rhs2
> +             && operand_equal_for_phi_arg_p (rhs1, cond_lhs)
> +             && neutral_element_p (code_def, cond_rhs, false))
> +         || (operand_equal_for_phi_arg_p (arg1, cond_rhs)
> +             && (operand_equal_for_phi_arg_p (rhs2, cond_lhs)
> +                 || operand_equal_for_phi_arg_p (rhs1, cond_lhs))
> +             && absorbing_element_p (code_def, cond_rhs))))
> +    {
> +      gsi = gsi_for_stmt (cond);
> +      gimple_stmt_iterator gsi_from = gsi_for_stmt (assign);
> +      gsi_move_before (&gsi_from, &gsi);
> +      replace_phi_edge_with_variable (cond_bb, e1, phi, lhs);
> +      return 2;
> +    }
> +
>    return 0;
>  }
>
>  /*  The function minmax_replacement does the main work of doing the minmax
>      replacement.  Return true if the replacement is done.  Otherwise return
>      false.
>      BB is the basic block where the replacement is going to be done on.
> ARG0
>      is argument 0 from the PHI.  Likewise for ARG1.  */
>
>  static bool
>
Marc Glisse April 23, 2014, 6:25 p.m. UTC | #2
Honza, any comment on Richard's question?

On Tue, 15 Apr 2014, Richard Biener wrote:

> On Mon, Apr 14, 2014 at 6:40 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
>> On Mon, 14 Apr 2014, Richard Biener wrote:
>>
>>>> +  /* If the special case has a high probability, keep it.  */
>>>> +  if (EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN)
>>>
>>>
>>> I suppose Honza has a comment on how to test this properly
>>> (not sure if ->probability or ->frequency is always initialized properly).
>>> for example single_likely_edge tests profile_status_for_fn !=
>>> PROFILE_ABSENT (and uses a fixed probability value ...).
>>> Anyway, the comparison looks backwards to me, but maybe I'm
>>> missing sth - I'd use >= PROB_LIKELY ;)
>>
>>
>> Maybe the comment is confusing? middle_bb contains the expensive operation
>> (say a/b) that the special case skips entirely. If the division happens in
>> less than 50% of cases (that's the proba of the edge going from cond to
>> middle_bb), then doing the comparison+jump may be cheaper and I abort the
>> optimization. At least the testcase with __builtin_expect should prove that
>> I didn't do it backwards.
>
> Ah, indeed.  My mistake.
>
>> value-prof seems to use 50% as the cut-off where it may become interesting
>> to special case division, hence my choice of PROB_EVEN. I am not sure which
>> way you want to use PROB_LIKELY (80%). If we have more than 80% chances of
>> executing the division, always perform it? Or if we have more than 80%
>> chances of skipping the division, keep the branch?
>
> Ok, if it's from value-prof then that's fine.
>
> The patch is ok if Honza doesn't have any comments on whether it's ok
> to look at ->probability unconditionally.
>
> Thanks,
> Richard.
>
>> Attached is the latest version (passed the testsuite).
>> Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c
>> ===================================================================
>> --- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c  (revision 0)
>> +++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c  (working copy)
>> @@ -0,0 +1,23 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O -fdump-tree-phiopt1" } */
>> +
>> +int f(int a, int b, int c) {
>> +  if (c > 5) return c;
>> +  if (a == 0) return b;
>> +  return a + b;
>> +}
>> +
>> +unsigned rot(unsigned x, int n) {
>> +  const int bits = __CHAR_BIT__ * __SIZEOF_INT__;
>> +  return (n == 0) ? x : ((x << n) | (x >> (bits - n)));
>> +}
>> +
>> +unsigned m(unsigned a, unsigned b) {
>> +  if (a == 0)
>> +    return 0;
>> +  else
>> +    return a & b;
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "goto" 2 "phiopt1" } } */
>> +/* { dg-final { cleanup-tree-dump "phiopt1" } } */
>> Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c
>> ===================================================================
>> --- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c  (revision 0)
>> +++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c  (working copy)
>> @@ -0,0 +1,19 @@
>> +/* { dg-do compile { target x86_64-*-* } } */
>> +/* { dg-options "-O2 -fdump-tree-optimized" } */
>> +
>> +int f(int a, int b) {
>> +  if (__builtin_expect(a == 0, 1)) return b;
>> +  return a + b;
>> +}
>> +
>> +// optab_handler can handle if(b==1) but not a/b
>> +// so we consider a/b too expensive.
>> +unsigned __int128 g(unsigned __int128 a, unsigned __int128 b) {
>> +  if (b == 1)
>> +    return a;
>> +  else
>> +    return a / b;
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "goto " 4 "optimized" } } */
>> +/* { dg-final { cleanup-tree-dump "optimized" } } */
>> Index: gcc/tree-ssa-phiopt.c
>> ===================================================================
>> --- gcc/tree-ssa-phiopt.c       (revision 209353)
>> +++ gcc/tree-ssa-phiopt.c       (working copy)
>> @@ -140,20 +140,37 @@ static bool gate_hoist_loads (void);
>>         x = PHI (CONST, a)
>>
>>     Gets replaced with:
>>       bb0:
>>       bb2:
>>         t1 = a == CONST;
>>         t2 = b > c;
>>         t3 = t1 & t2;
>>         x = a;
>>
>> +
>> +   It also replaces
>> +
>> +     bb0:
>> +       if (a != 0) goto bb1; else goto bb2;
>> +     bb1:
>> +       c = a + b;
>> +     bb2:
>> +       x = PHI <c (bb1), b (bb0), ...>;
>> +
>> +   with
>> +
>> +     bb0:
>> +       c = a + b;
>> +     bb2:
>> +       x = PHI <c (bb0), ...>;
>> +
>>     ABS Replacement
>>     ---------------
>>
>>     This transformation, implemented in abs_replacement, replaces
>>
>>       bb0:
>>         if (a >= 0) goto bb2; else goto bb1;
>>       bb1:
>>         x = -a;
>>       bb2:
>> @@ -809,20 +826,103 @@ operand_equal_for_value_replacement (con
>>    if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
>>      return true;
>>
>>    tmp = gimple_assign_rhs2 (def);
>>    if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
>>      return true;
>>
>>    return false;
>>  }
>>
>> +/* Returns true if ARG is a neutral element for operation CODE
>> +   on the RIGHT side.  */
>> +
>> +static bool
>> +neutral_element_p (tree_code code, tree arg, bool right)
>> +{
>> +  switch (code)
>> +    {
>> +    case PLUS_EXPR:
>> +    case BIT_IOR_EXPR:
>> +    case BIT_XOR_EXPR:
>> +      return integer_zerop (arg);
>> +
>> +    case LROTATE_EXPR:
>> +    case RROTATE_EXPR:
>> +    case LSHIFT_EXPR:
>> +    case RSHIFT_EXPR:
>> +    case MINUS_EXPR:
>> +    case POINTER_PLUS_EXPR:
>> +      return right && integer_zerop (arg);
>> +
>> +    case MULT_EXPR:
>> +      return integer_onep (arg);
>> +
>> +    case TRUNC_DIV_EXPR:
>> +    case CEIL_DIV_EXPR:
>> +    case FLOOR_DIV_EXPR:
>> +    case ROUND_DIV_EXPR:
>> +    case EXACT_DIV_EXPR:
>> +      return right && integer_onep (arg);
>> +
>> +    case BIT_AND_EXPR:
>> +      return integer_all_onesp (arg);
>> +
>> +    default:
>> +      return false;
>> +    }
>> +}
>> +
>> +/* Returns true if ARG is an absorbing element for operation CODE.  */
>> +
>> +static bool
>> +absorbing_element_p (tree_code code, tree arg)
>> +{
>> +  switch (code)
>> +    {
>> +    case BIT_IOR_EXPR:
>> +      return integer_all_onesp (arg);
>> +
>> +    case MULT_EXPR:
>> +    case BIT_AND_EXPR:
>> +      return integer_zerop (arg);
>> +
>> +    default:
>> +      return false;
>> +    }
>> +}
>> +
>> +/* Returns true if the statement is cheap. The simple heuristic used here
>> +   is that anything the optab knows is cheap.  */
>> +
>> +static bool
>> +is_cheap_stmt (gimple stmt)
>> +{
>> +  tree type;
>> +  if (is_gimple_assign (stmt))
>> +    {
>> +      type = TREE_TYPE (gimple_assign_rhs1 (stmt));
>> +      enum tree_code code = gimple_assign_rhs_code (stmt);
>> +      optab op = optab_for_tree_code (code, type, optab_scalar);
>> +      return (op != unknown_optab
>> +             && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing);
>> +    }
>> +  else if (gimple_code (stmt) == GIMPLE_COND)
>> +    {
>> +      type = TREE_TYPE (gimple_cond_lhs (stmt));
>> +      enum rtx_code code = (gimple_cond_code (stmt) == EQ_EXPR) ? EQ : NE;
>> +      return can_compare_p (code, TYPE_MODE (type), ccp_jump);
>> +    }
>> +  else
>> +    gcc_unreachable ();
>> +}
>> +
>>  /*  The function value_replacement does the main work of doing the value
>>      replacement.  Return non-zero if the replacement is done.  Otherwise
>> return
>>      0.  If we remove the middle basic block, return 2.
>>      BB is the basic block where the replacement is going to be done on.
>> ARG0
>>      is argument 0 from the PHI.  Likewise for ARG1.  */
>>
>>  static int
>>  value_replacement (basic_block cond_bb, basic_block middle_bb,
>>                    edge e0, edge e1, gimple phi,
>>                    tree arg0, tree arg1)
>> @@ -833,23 +933,21 @@ value_replacement (basic_block cond_bb,
>>    enum tree_code code;
>>    bool emtpy_or_with_defined_p = true;
>>
>>    /* If the type says honor signed zeros we cannot do this
>>       optimization.  */
>>    if (HONOR_SIGNED_ZEROS (TYPE_MODE (TREE_TYPE (arg1))))
>>      return 0;
>>
>>    /* If there is a statement in MIDDLE_BB that defines one of the PHI
>>       arguments, then adjust arg0 or arg1.  */
>> -  gsi = gsi_after_labels (middle_bb);
>> -  if (!gsi_end_p (gsi) && is_gimple_debug (gsi_stmt (gsi)))
>> -    gsi_next_nondebug (&gsi);
>> +  gsi = gsi_start_nondebug_after_labels_bb (middle_bb);
>>    while (!gsi_end_p (gsi))
>>      {
>>        gimple stmt = gsi_stmt (gsi);
>>        tree lhs;
>>        gsi_next_nondebug (&gsi);
>>        if (!is_gimple_assign (stmt))
>>         {
>>           emtpy_or_with_defined_p = false;
>>           continue;
>>         }
>> @@ -931,20 +1029,67 @@ value_replacement (basic_block cond_bb,
>>               print_generic_expr (dump_file, gimple_phi_result (phi), 0);
>>               fprintf (dump_file, " reduced for COND_EXPR in block %d to ",
>>                        cond_bb->index);
>>               print_generic_expr (dump_file, arg, 0);
>>               fprintf (dump_file, ".\n");
>>              }
>>            return 1;
>>         }
>>
>>      }
>> +
>> +  /* Now optimize (x != 0) ? x + y : y to just y.
>> +     The following condition is too restrictive, there can easily be
>> another
>> +     stmt in middle_bb, for instance a CONVERT_EXPR for the second
>> argument.  */
>> +  gimple assign = last_and_only_stmt (middle_bb);
>> +  if (!assign || gimple_code (assign) != GIMPLE_ASSIGN
>> +      || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS
>> +      || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0))
>> +         && !POINTER_TYPE_P (TREE_TYPE (arg0))))
>> +    return 0;
>> +
>> +  /* assign may call a libgcc routine, which is slow.  */
>> +  if (!is_cheap_stmt (assign) && is_cheap_stmt (cond))
>> +    return 0;
>> +
>> +  /* If the special case has a high probability, keep it.  */
>> +  if (EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN)
>> +    return 0;
>> +
>> +  tree lhs = gimple_assign_lhs (assign);
>> +  tree rhs1 = gimple_assign_rhs1 (assign);
>> +  tree rhs2 = gimple_assign_rhs2 (assign);
>> +  enum tree_code code_def = gimple_assign_rhs_code (assign);
>> +  tree cond_lhs = gimple_cond_lhs (cond);
>> +  tree cond_rhs = gimple_cond_rhs (cond);
>> +
>> +  if (((code == NE_EXPR && e1 == false_edge)
>> +       || (code == EQ_EXPR && e1 == true_edge))
>> +      && arg0 == lhs
>> +      && ((arg1 == rhs1
>> +          && operand_equal_for_phi_arg_p (rhs2, cond_lhs)
>> +          && neutral_element_p (code_def, cond_rhs, true))
>> +         || (arg1 == rhs2
>> +             && operand_equal_for_phi_arg_p (rhs1, cond_lhs)
>> +             && neutral_element_p (code_def, cond_rhs, false))
>> +         || (operand_equal_for_phi_arg_p (arg1, cond_rhs)
>> +             && (operand_equal_for_phi_arg_p (rhs2, cond_lhs)
>> +                 || operand_equal_for_phi_arg_p (rhs1, cond_lhs))
>> +             && absorbing_element_p (code_def, cond_rhs))))
>> +    {
>> +      gsi = gsi_for_stmt (cond);
>> +      gimple_stmt_iterator gsi_from = gsi_for_stmt (assign);
>> +      gsi_move_before (&gsi_from, &gsi);
>> +      replace_phi_edge_with_variable (cond_bb, e1, phi, lhs);
>> +      return 2;
>> +    }
>> +
>>    return 0;
>>  }
>>
>>  /*  The function minmax_replacement does the main work of doing the minmax
>>      replacement.  Return true if the replacement is done.  Otherwise return
>>      false.
>>      BB is the basic block where the replacement is going to be done on.
>> ARG0
>>      is argument 0 from the PHI.  Likewise for ARG1.  */
>>
>>  static bool
Jan Hubicka April 23, 2014, 9:19 p.m. UTC | #3
> Honza, any comment on Richard's question?
> 
> On Tue, 15 Apr 2014, Richard Biener wrote:
> 
> >On Mon, Apr 14, 2014 at 6:40 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> >>On Mon, 14 Apr 2014, Richard Biener wrote:
> >>
> >>>>+  /* If the special case has a high probability, keep it.  */
> >>>>+  if (EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN)
> >>>
> >>>
> >>>I suppose Honza has a comment on how to test this properly
> >>>(not sure if ->probability or ->frequency is always initialized properly).
> >>>for example single_likely_edge tests profile_status_for_fn !=
> >>>PROFILE_ABSENT (and uses a fixed probability value ...).
> >>>Anyway, the comparison looks backwards to me, but maybe I'm
> >>>missing sth - I'd use >= PROB_LIKELY ;)
> >>
> >>
> >>Maybe the comment is confusing? middle_bb contains the expensive operation
> >>(say a/b) that the special case skips entirely. If the division happens in
> >>less than 50% of cases (that's the proba of the edge going from cond to
> >>middle_bb), then doing the comparison+jump may be cheaper and I abort the
> >>optimization. At least the testcase with __builtin_expect should prove that
> >>I didn't do it backwards.
> >
> >Ah, indeed.  My mistake.
> >
> >>value-prof seems to use 50% as the cut-off where it may become interesting
> >>to special case division, hence my choice of PROB_EVEN. I am not sure which
> >>way you want to use PROB_LIKELY (80%). If we have more than 80% chances of
> >>executing the division, always perform it? Or if we have more than 80%
> >>chances of skipping the division, keep the branch?
> >
> >Ok, if it's from value-prof then that's fine.
> >
> >The patch is ok if Honza doesn't have any comments on whether it's ok
> >to look at ->probability unconditionally.
> >
> >Thanks,
> >Richard.
> >
> >>+
> >>+  /* Now optimize (x != 0) ? x + y : y to just y.
> >>+     The following condition is too restrictive, there can easily be
> >>another
> >>+     stmt in middle_bb, for instance a CONVERT_EXPR for the second
> >>argument.  */
> >>+  gimple assign = last_and_only_stmt (middle_bb);
> >>+  if (!assign || gimple_code (assign) != GIMPLE_ASSIGN
> >>+      || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS
> >>+      || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0))
> >>+         && !POINTER_TYPE_P (TREE_TYPE (arg0))))
> >>+    return 0;
> >>+
> >>+  /* assign may call a libgcc routine, which is slow.  */
> >>+  if (!is_cheap_stmt (assign) && is_cheap_stmt (cond))
> >>+    return 0;
> >>+
> >>+  /* If the special case has a high probability, keep it.  */
> >>+  if (EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN)
> >>+    return 0;

Well, I would expect this transformation to be win always, + operation
is virtually for free.

Concerning profile - you must consider two cases.  If the profile is guessed
then the probability is most probably 71% to the non-zero case unless user used
an expect (since I would expect only PRED_OPCODE_NONEQUAL to match).  This is
data dependent branch unless this sits in a loop and those are badly predicted
statically.

If the probability is read, then probabilities over (say) 10% and less than 90%
means that the branch is likely not very well predictable (it still may be on
advanced architectures if it is predictable from context) and getting rid of it
is a good idea.

So if you want to disable the optimization for x being likely zero, I guess
testing that probability is over PROV_LIKELY is a resonable threshold - it will
handle both builtin_expect and the (very) badly predictable branches.
Maybe for FDO it should be higher, but we would need to do some research on it
that is probably not worth the effort.

The division transformation is bit different story, since cost of division is
more than cost of branch and the 50% threshold is a limit for one value
counter to be reliable.

Honza
Marc Glisse April 23, 2014, 10:18 p.m. UTC | #4
On Wed, 23 Apr 2014, Jan Hubicka wrote:

>>>> +  /* Now optimize (x != 0) ? x + y : y to just y.
>>>> +     The following condition is too restrictive, there can easily be
>>>> another
>>>> +     stmt in middle_bb, for instance a CONVERT_EXPR for the second
>>>> argument.  */
>>>> +  gimple assign = last_and_only_stmt (middle_bb);
>>>> +  if (!assign || gimple_code (assign) != GIMPLE_ASSIGN
>>>> +      || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS
>>>> +      || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0))
>>>> +         && !POINTER_TYPE_P (TREE_TYPE (arg0))))
>>>> +    return 0;
>>>> +
>>>> +  /* assign may call a libgcc routine, which is slow.  */
>>>> +  if (!is_cheap_stmt (assign) && is_cheap_stmt (cond))
>>>> +    return 0;
>>>> +
>>>> +  /* If the special case has a high probability, keep it.  */
>>>> +  if (EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN)
>>>> +    return 0;
>
> Well, I would expect this transformation to be win always, + operation
> is virtually for free.
>
> Concerning profile - you must consider two cases.  If the profile is guessed
> then the probability is most probably 71% to the non-zero case unless user
> used an expect (since I would expect only PRED_OPCODE_NONEQUAL to match).
> This is data dependent branch unless this sits in a loop and those are badly
> predicted statically.
>
> If the probability is read, then probabilities over (say) 10% and less than
> 90% means that the branch is likely not very well predictable (it still may
> be on advanced architectures if it is predictable from context) and getting
> rid of it is a good idea.
>
> So if you want to disable the optimization for x being likely zero, I guess
> testing that probability is over PROV_LIKELY is a resonable threshold - it
> will handle both builtin_expect and the (very) badly predictable branches.
> Maybe for FDO it should be higher, but we would need to do some research on
> it that is probably not worth the effort.
>
> The division transformation is bit different story, since cost of division
> is more than cost of branch and the 50% threshold is a limit for one value
> counter to be reliable.

Thank you for the comments. If I understand correctly:

- it is always ok to look at edge->probability (no need to check that the
   probabilities are available as Richard feared)
- PROB_EVEN is an ok threshold for division (not sure I understood your last
   sentence)
- for cheaper operations like addition, I should be less conservative and do
   the transformation always, or use a threshold of PROB_UNLIKELY.

Are there other operations than division (among those listed in
neutral_element_p or absorbing_element_p) that fall in the "expensive"
category? I guess there are some platforms where multiplication is
expensive, but few, and querying the target for the cost of operations
seems exagerated. So I would go with:

[move definition of code_def]
if ((code_def == TRUNC_DIV_EXPR || code_def == CEIL_DIV_EXPR
      || code_def == FLOOR_DIV_EXPR || code_def == ROUND_DIV_EXPR
      || code_def == EXACT_DIV_EXPR)
     && EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN)
   return 0;

(and change the testsuite example with __builtin_expect to use division)

or (my favorite):

[move definition of code_def]
int threshold = PROB_UNLIKELY;
if (code_def == TRUNC_DIV_EXPR || code_def == CEIL_DIV_EXPR
     || code_def == FLOOR_DIV_EXPR || code_def == ROUND_DIV_EXPR
     || code_def == EXACT_DIV_EXPR)
   threshold = PROB_EVEN;
if (EDGE_PRED (middle_bb, 0)->probability < threshold)
   return 0;


Is that ok, after re-testing?
Jan Hubicka April 23, 2014, 10:28 p.m. UTC | #5
> 
> Thank you for the comments. If I understand correctly:
> 
> - it is always ok to look at edge->probability (no need to check that the
>   probabilities are available as Richard feared)

Actually with -fno-guess-branch-probabilities (default only at -O0) the probability
is not computed.  You can check profile_status >= PROFILE_GUESSED

> - PROB_EVEN is an ok threshold for division (not sure I understood your last
>   sentence)
> - for cheaper operations like addition, I should be less conservative and do
>   the transformation always, or use a threshold of PROB_UNLIKELY.

PROB_EVEN is really coming from the way one value counter is implemented.
If its probability is less than 50%, the value predicted may be wrong and may
not be the most common value.

It does not apply for your case. If you always eliminate branch, I would always
disable the transformation only if the faster path you are going to slow down
is more than PROB_LIKELY.
For probabilities PROB_LIKELY...PROB_EVEN you are going to slow down the common
path, but you will eliminate branch that is most likely data dependent and not
well predictable.

For really expensive operations (division by non-constant) you probably want
to use PROB_EVEN. But perhaps for start you don't need to make a difference and
lets see.
> 
> Are there other operations than division (among those listed in
> neutral_element_p or absorbing_element_p) that fall in the "expensive"
> category? I guess there are some platforms where multiplication is
> expensive, but few, and querying the target for the cost of operations
> seems exagerated. So I would go with:
> 
> [move definition of code_def]
> if ((code_def == TRUNC_DIV_EXPR || code_def == CEIL_DIV_EXPR
>      || code_def == FLOOR_DIV_EXPR || code_def == ROUND_DIV_EXPR
>      || code_def == EXACT_DIV_EXPR)
>     && EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN)
>   return 0;
> 
> (and change the testsuite example with __builtin_expect to use division)
> 
> or (my favorite):
> 
> [move definition of code_def]
> int threshold = PROB_UNLIKELY;
> if (code_def == TRUNC_DIV_EXPR || code_def == CEIL_DIV_EXPR
>     || code_def == FLOOR_DIV_EXPR || code_def == ROUND_DIV_EXPR
>     || code_def == EXACT_DIV_EXPR)
>   threshold = PROB_EVEN;
> if (EDGE_PRED (middle_bb, 0)->probability < threshold)
>   return 0;

You may probably ask time estimate of estimate_num_insns for expensive
operations. It is not terribly smarter than what you propose (modulo
special casing the constant divisor), but this way we will have all
the magic at one place that is good for maintanibility.

With these changes it is OK.
Honza
> 
> 
> Is that ok, after re-testing?
> 
> -- 
> Marc Glisse
diff mbox

Patch

Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c	(working copy)
@@ -0,0 +1,23 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-phiopt1" } */
+
+int f(int a, int b, int c) {
+  if (c > 5) return c;
+  if (a == 0) return b;
+  return a + b;
+}
+
+unsigned rot(unsigned x, int n) {
+  const int bits = __CHAR_BIT__ * __SIZEOF_INT__;
+  return (n == 0) ? x : ((x << n) | (x >> (bits - n)));
+}
+
+unsigned m(unsigned a, unsigned b) {
+  if (a == 0)
+    return 0;
+  else
+    return a & b;
+}
+
+/* { dg-final { scan-tree-dump-times "goto" 2 "phiopt1" } } */
+/* { dg-final { cleanup-tree-dump "phiopt1" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c	(working copy)
@@ -0,0 +1,19 @@ 
+/* { dg-do compile { target x86_64-*-* } } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int f(int a, int b) {
+  if (__builtin_expect(a == 0, 1)) return b;
+  return a + b;
+}
+
+// optab_handler can handle if(b==1) but not a/b
+// so we consider a/b too expensive.
+unsigned __int128 g(unsigned __int128 a, unsigned __int128 b) {
+  if (b == 1)
+    return a;
+  else
+    return a / b;
+}
+
+/* { dg-final { scan-tree-dump-times "goto " 4 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Index: gcc/tree-ssa-phiopt.c
===================================================================
--- gcc/tree-ssa-phiopt.c	(revision 209353)
+++ gcc/tree-ssa-phiopt.c	(working copy)
@@ -140,20 +140,37 @@  static bool gate_hoist_loads (void);
        x = PHI (CONST, a)
 
    Gets replaced with:
      bb0:
      bb2:
        t1 = a == CONST;
        t2 = b > c;
        t3 = t1 & t2;
        x = a;
 
+
+   It also replaces
+
+     bb0:
+       if (a != 0) goto bb1; else goto bb2;
+     bb1:
+       c = a + b;
+     bb2:
+       x = PHI <c (bb1), b (bb0), ...>;
+
+   with
+
+     bb0:
+       c = a + b;
+     bb2:
+       x = PHI <c (bb0), ...>;
+
    ABS Replacement
    ---------------
 
    This transformation, implemented in abs_replacement, replaces
 
      bb0:
        if (a >= 0) goto bb2; else goto bb1;
      bb1:
        x = -a;
      bb2:
@@ -809,20 +826,103 @@  operand_equal_for_value_replacement (con
   if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
     return true;
 
   tmp = gimple_assign_rhs2 (def);
   if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
     return true;
 
   return false;
 }
 
+/* Returns true if ARG is a neutral element for operation CODE
+   on the RIGHT side.  */
+
+static bool
+neutral_element_p (tree_code code, tree arg, bool right)
+{
+  switch (code)
+    {
+    case PLUS_EXPR:
+    case BIT_IOR_EXPR:
+    case BIT_XOR_EXPR:
+      return integer_zerop (arg);
+
+    case LROTATE_EXPR:
+    case RROTATE_EXPR:
+    case LSHIFT_EXPR:
+    case RSHIFT_EXPR:
+    case MINUS_EXPR:
+    case POINTER_PLUS_EXPR:
+      return right && integer_zerop (arg);
+
+    case MULT_EXPR:
+      return integer_onep (arg);
+
+    case TRUNC_DIV_EXPR:
+    case CEIL_DIV_EXPR:
+    case FLOOR_DIV_EXPR:
+    case ROUND_DIV_EXPR:
+    case EXACT_DIV_EXPR:
+      return right && integer_onep (arg);
+
+    case BIT_AND_EXPR:
+      return integer_all_onesp (arg);
+
+    default:
+      return false;
+    }
+}
+
+/* Returns true if ARG is an absorbing element for operation CODE.  */
+
+static bool
+absorbing_element_p (tree_code code, tree arg)
+{
+  switch (code)
+    {
+    case BIT_IOR_EXPR:
+      return integer_all_onesp (arg);
+
+    case MULT_EXPR:
+    case BIT_AND_EXPR:
+      return integer_zerop (arg);
+
+    default:
+      return false;
+    }
+}
+
+/* Returns true if the statement is cheap. The simple heuristic used here
+   is that anything the optab knows is cheap.  */
+
+static bool
+is_cheap_stmt (gimple stmt)
+{
+  tree type;
+  if (is_gimple_assign (stmt))
+    {
+      type = TREE_TYPE (gimple_assign_rhs1 (stmt));
+      enum tree_code code = gimple_assign_rhs_code (stmt);
+      optab op = optab_for_tree_code (code, type, optab_scalar);
+      return (op != unknown_optab
+	      && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing);
+    }
+  else if (gimple_code (stmt) == GIMPLE_COND)
+    {
+      type = TREE_TYPE (gimple_cond_lhs (stmt));
+      enum rtx_code code = (gimple_cond_code (stmt) == EQ_EXPR) ? EQ : NE;
+      return can_compare_p (code, TYPE_MODE (type), ccp_jump);
+    }
+  else
+    gcc_unreachable ();
+}
+
 /*  The function value_replacement does the main work of doing the value
     replacement.  Return non-zero if the replacement is done.  Otherwise return
     0.  If we remove the middle basic block, return 2.
     BB is the basic block where the replacement is going to be done on.  ARG0
     is argument 0 from the PHI.  Likewise for ARG1.  */
 
 static int
 value_replacement (basic_block cond_bb, basic_block middle_bb,
 		   edge e0, edge e1, gimple phi,
 		   tree arg0, tree arg1)
@@ -833,23 +933,21 @@  value_replacement (basic_block cond_bb,
   enum tree_code code;
   bool emtpy_or_with_defined_p = true;
 
   /* If the type says honor signed zeros we cannot do this
      optimization.  */
   if (HONOR_SIGNED_ZEROS (TYPE_MODE (TREE_TYPE (arg1))))
     return 0;
 
   /* If there is a statement in MIDDLE_BB that defines one of the PHI
      arguments, then adjust arg0 or arg1.  */
-  gsi = gsi_after_labels (middle_bb);
-  if (!gsi_end_p (gsi) && is_gimple_debug (gsi_stmt (gsi)))
-    gsi_next_nondebug (&gsi);
+  gsi = gsi_start_nondebug_after_labels_bb (middle_bb);
   while (!gsi_end_p (gsi))
     {
       gimple stmt = gsi_stmt (gsi);
       tree lhs;
       gsi_next_nondebug (&gsi);
       if (!is_gimple_assign (stmt))
 	{
 	  emtpy_or_with_defined_p = false;
 	  continue;
 	}
@@ -931,20 +1029,67 @@  value_replacement (basic_block cond_bb,
 	      print_generic_expr (dump_file, gimple_phi_result (phi), 0);
 	      fprintf (dump_file, " reduced for COND_EXPR in block %d to ",
 		       cond_bb->index);
 	      print_generic_expr (dump_file, arg, 0);
 	      fprintf (dump_file, ".\n");
             }
           return 1;
 	}
 
     }
+
+  /* Now optimize (x != 0) ? x + y : y to just y.
+     The following condition is too restrictive, there can easily be another
+     stmt in middle_bb, for instance a CONVERT_EXPR for the second argument.  */
+  gimple assign = last_and_only_stmt (middle_bb);
+  if (!assign || gimple_code (assign) != GIMPLE_ASSIGN
+      || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS
+      || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0))
+	  && !POINTER_TYPE_P (TREE_TYPE (arg0))))
+    return 0;
+
+  /* assign may call a libgcc routine, which is slow.  */
+  if (!is_cheap_stmt (assign) && is_cheap_stmt (cond))
+    return 0;
+
+  /* If the special case has a high probability, keep it.  */
+  if (EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN)
+    return 0;
+
+  tree lhs = gimple_assign_lhs (assign);
+  tree rhs1 = gimple_assign_rhs1 (assign);
+  tree rhs2 = gimple_assign_rhs2 (assign);
+  enum tree_code code_def = gimple_assign_rhs_code (assign);
+  tree cond_lhs = gimple_cond_lhs (cond);
+  tree cond_rhs = gimple_cond_rhs (cond);
+
+  if (((code == NE_EXPR && e1 == false_edge)
+	|| (code == EQ_EXPR && e1 == true_edge))
+      && arg0 == lhs
+      && ((arg1 == rhs1
+	   && operand_equal_for_phi_arg_p (rhs2, cond_lhs)
+	   && neutral_element_p (code_def, cond_rhs, true))
+	  || (arg1 == rhs2
+	      && operand_equal_for_phi_arg_p (rhs1, cond_lhs)
+	      && neutral_element_p (code_def, cond_rhs, false))
+	  || (operand_equal_for_phi_arg_p (arg1, cond_rhs)
+	      && (operand_equal_for_phi_arg_p (rhs2, cond_lhs)
+		  || operand_equal_for_phi_arg_p (rhs1, cond_lhs))
+	      && absorbing_element_p (code_def, cond_rhs))))
+    {
+      gsi = gsi_for_stmt (cond);
+      gimple_stmt_iterator gsi_from = gsi_for_stmt (assign);
+      gsi_move_before (&gsi_from, &gsi);
+      replace_phi_edge_with_variable (cond_bb, e1, phi, lhs);
+      return 2;
+    }
+
   return 0;
 }
 
 /*  The function minmax_replacement does the main work of doing the minmax
     replacement.  Return true if the replacement is done.  Otherwise return
     false.
     BB is the basic block where the replacement is going to be done on.  ARG0
     is argument 0 from the PHI.  Likewise for ARG1.  */
 
 static bool