diff mbox

Move some comparison simplifications to match.pd

Message ID alpine.DEB.2.02.1508300944340.4656@laptop-mg.saclay.inria.fr
State New
Headers show

Commit Message

Marc Glisse Aug. 30, 2015, 7:57 a.m. UTC
Hello,

just trying to shrink fold-const.c a bit more.

initializer_zerop is close to what I was looking for with zerop, but I 
wasn't sure if it would be safe (it accepts some CONSTRUCTOR and 
STRING_CST). At some point I tried using sign_bit_p, but using the return 
of that function in the simplification confused the machinery too much. I 
added an "overload" of element_precision like the one in element_mode, for 
convenience.

Bootstrap+testsuite on ppc64le-redhat-linux.


2015-08-31  Marc Glisse  <marc.glisse@inria.fr>

gcc/
 	* tree.h (zerop): New function.
 	* tree.c (zerop): Likewise.
 	(element_precision): Handle expressions.
 	* match.pd (define_predicates): Add zerop.
 	(x <= +Inf): Fix comment.
 	(abs (x) == 0, A & C == C, A & C != 0): Converted from ...
 	* fold-const.c (fold_binary_loc): ... here. Remove.

gcc/testsuite/
 	* gcc.dg/tree-ssa/cmp-1.c: New file.

Comments

Richard Biener Aug. 31, 2015, 12:29 p.m. UTC | #1
On Sun, Aug 30, 2015 at 9:57 AM, Marc Glisse <marc.glisse@inria.fr> wrote:
> Hello,
>
> just trying to shrink fold-const.c a bit more.
>
> initializer_zerop is close to what I was looking for with zerop, but I
> wasn't sure if it would be safe (it accepts some CONSTRUCTOR and
> STRING_CST). At some point I tried using sign_bit_p, but using the return of
> that function in the simplification confused the machinery too much. I added
> an "overload" of element_precision like the one in element_mode, for
> convenience.
>
> Bootstrap+testsuite on ppc64le-redhat-linux.

Ok.

Thanks,
Richard.

>
> 2015-08-31  Marc Glisse  <marc.glisse@inria.fr>
>
> gcc/
>         * tree.h (zerop): New function.
>         * tree.c (zerop): Likewise.
>         (element_precision): Handle expressions.
>         * match.pd (define_predicates): Add zerop.
>         (x <= +Inf): Fix comment.
>         (abs (x) == 0, A & C == C, A & C != 0): Converted from ...
>         * fold-const.c (fold_binary_loc): ... here. Remove.
>
> gcc/testsuite/
>         * gcc.dg/tree-ssa/cmp-1.c: New file.
>
> --
> Marc Glisse
> Index: gcc/fold-const.c
> ===================================================================
> --- gcc/fold-const.c    (revision 227316)
> +++ gcc/fold-const.c    (working copy)
> @@ -10761,25 +10761,20 @@ fold_binary_loc (location_t loc,
>                                                                         1)),
>                               arg1, 0)
>           && wi::extract_uhwi (TREE_OPERAND (arg0, 0), 0, 1) == 1)
>         {
>           return omit_two_operands_loc (loc, type,
>                                     code == NE_EXPR
>                                     ? boolean_true_node :
> boolean_false_node,
>                                     TREE_OPERAND (arg0, 1), arg1);
>         }
>
> -      /* Convert ABS_EXPR<x> == 0 or ABS_EXPR<x> != 0 to x == 0 or x != 0.
> */
> -      if (TREE_CODE (arg0) == ABS_EXPR
> -         && (integer_zerop (arg1) || real_zerop (arg1)))
> -       return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0),
> arg1);
> -
>        /* If this is an EQ or NE comparison with zero and ARG0 is
>          (1 << foo) & bar, convert it to (bar >> foo) & 1.  Both require
>          two operations, but the latter can be done in one less insn
>          on machines that have only two-operand insns or on which a
>          constant cannot be the first operand.  */
>        if (TREE_CODE (arg0) == BIT_AND_EXPR
>           && integer_zerop (arg1))
>         {
>           tree arg00 = TREE_OPERAND (arg0, 0);
>           tree arg01 = TREE_OPERAND (arg0, 1);
> @@ -10868,35 +10863,20 @@ fold_binary_loc (location_t loc,
>                  ((X >> C1) & C2) != 0 is rewritten as (X,false), and
>                  ((X >> C1) & C2) == 0 is rewritten as (X,true).  */
>               else
>                 return omit_one_operand_loc (loc, type,
>                                          code == EQ_EXPR ? integer_one_node
>                                                          :
> integer_zero_node,
>                                          arg000);
>             }
>         }
>
> -      /* If we have (A & C) == C where C is a power of 2, convert this into
> -        (A & C) != 0.  Similarly for NE_EXPR.  */
> -      if (TREE_CODE (arg0) == BIT_AND_EXPR
> -         && integer_pow2p (TREE_OPERAND (arg0, 1))
> -         && operand_equal_p (TREE_OPERAND (arg0, 1), arg1, 0))
> -       return fold_build2_loc (loc, code == EQ_EXPR ? NE_EXPR : EQ_EXPR,
> type,
> -                           arg0, fold_convert_loc (loc, TREE_TYPE (arg0),
> -                                                   integer_zero_node));
> -
> -      /* If we have (A & C) != 0 or (A & C) == 0 and C is the sign
> -        bit, then fold the expression into A < 0 or A >= 0.  */
> -      tem = fold_single_bit_test_into_sign_test (loc, code, arg0, arg1,
> type);
> -      if (tem)
> -       return tem;
> -
>        /* If we have (A & C) == D where D & ~C != 0, convert this into 0.
>          Similarly for NE_EXPR.  */
>        if (TREE_CODE (arg0) == BIT_AND_EXPR
>           && TREE_CODE (arg1) == INTEGER_CST
>           && TREE_CODE (TREE_OPERAND (arg0, 1)) == INTEGER_CST)
>         {
>           tree notc = fold_build1_loc (loc, BIT_NOT_EXPR,
>                                    TREE_TYPE (TREE_OPERAND (arg0, 1)),
>                                    TREE_OPERAND (arg0, 1));
>           tree dandnotc
> Index: gcc/match.pd
> ===================================================================
> --- gcc/match.pd        (revision 227316)
> +++ gcc/match.pd        (working copy)
> @@ -21,20 +21,21 @@ for more details.
>  You should have received a copy of the GNU General Public License
>  along with GCC; see the file COPYING3.  If not see
>  <http://www.gnu.org/licenses/>.  */
>
>
>  /* Generic tree predicates we inherit.  */
>  (define_predicates
>     integer_onep integer_zerop integer_all_onesp integer_minus_onep
>     integer_each_onep integer_truep integer_nonzerop
>     real_zerop real_onep real_minus_onep
> +   zerop
>     CONSTANT_CLASS_P
>     tree_expr_nonnegative_p
>     integer_pow2p
>     HONOR_NANS)
>
>  /* Operator lists.  */
>  (define_operator_list tcc_comparison
>    lt   le   eq ne ge   gt   unordered ordered   unlt unle ungt unge uneq
> ltgt)
>  (define_operator_list inverted_tcc_comparison
>    ge   gt   ne eq lt   le   ordered   unordered ge   gt   le   lt   ltgt
> uneq)
> @@ -1570,21 +1571,21 @@ along with GCC; see the file COPYING3.
>       }
>       (switch
>        /* x > +Inf is always false, if with ignore sNANs.  */
>        (if (code == GT_EXPR
>            && ! HONOR_SNANS (@0))
>         { constant_boolean_node (false, type); })
>        (if (code == LE_EXPR)
>         /* x <= +Inf is always true, if we don't case about NaNs.  */
>         (if (! HONOR_NANS (@0))
>         { constant_boolean_node (true, type); }
> -       /* x <= +Inf is the same as x == x, i.e. isfinite(x).  */
> +       /* x <= +Inf is the same as x == x, i.e. !isnan(x).  */
>         (eq @0 @0)))
>        /* x == +Inf and x >= +Inf are always equal to x > DBL_MAX.  */
>        (if (code == EQ_EXPR || code == GE_EXPR)
>         (with { real_maxval (&max, neg, TYPE_MODE (TREE_TYPE (@0))); }
>         (if (neg)
>          (lt @0 { build_real (TREE_TYPE (@0), max); })
>          (gt @0 { build_real (TREE_TYPE (@0), max); }))))
>        /* x < +Inf is always equal to x <= DBL_MAX.  */
>        (if (code == LT_EXPR)
>         (with { real_maxval (&max, neg, TYPE_MODE (TREE_TYPE (@0))); }
> @@ -1727,20 +1728,26 @@ along with GCC; see the file COPYING3.
>     (scmp @0 @1)))
>   (simplify
>    (cmp (negate @0) CONSTANT_CLASS_P@1)
>    (if (FLOAT_TYPE_P (TREE_TYPE (@0))
>         || (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
>            && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))))
>     (with { tree tem = fold_unary (NEGATE_EXPR, TREE_TYPE (@0), @1); }
>      (if (tem && !TREE_OVERFLOW (tem))
>       (scmp @0 { tem; }))))))
>
> +/* Convert ABS_EXPR<x> == 0 or ABS_EXPR<x> != 0 to x == 0 or x != 0.  */
> +(for op (eq ne)
> + (simplify
> +  (op (abs @0) zerop@1)
> +  (op @0 @1)))
> +
>  /* From fold_sign_changed_comparison and fold_widened_comparison.  */
>  (for cmp (simple_comparison)
>   (simplify
>    (cmp (convert@0 @00) (convert?@1 @10))
>    (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
>         /* Disable this optimization if we're casting a function pointer
>           type on targets that require function pointer canonicalization.
> */
>         && !(targetm.have_canonicalize_funcptr_for_compare ()
>             && TREE_CODE (TREE_TYPE (@00)) == POINTER_TYPE
>             && TREE_CODE (TREE_TYPE (TREE_TYPE (@00))) == FUNCTION_TYPE)
> @@ -1833,20 +1840,42 @@ along with GCC; see the file COPYING3.
>   (simplify
>    (cmp (convert?@3 (bit_xor @0 INTEGER_CST@1)) INTEGER_CST@2)
>    (if (tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@0)))
>     (cmp @0 (bit_xor @1 (convert @2)))))
>
>   (simplify
>    (cmp (convert? addr@0) integer_zerop)
>    (if (tree_single_nonzero_warnv_p (@0, NULL))
>     { constant_boolean_node (cmp == NE_EXPR, type); })))
>
> +/* If we have (A & C) == C where C is a power of 2, convert this into
> +   (A & C) != 0.  Similarly for NE_EXPR.  */
> +(for cmp (eq ne)
> +     icmp (ne eq)
> + (simplify
> +  (cmp (bit_and@2 @0 integer_pow2p@1) @1)
> +  (icmp @2 { build_zero_cst (TREE_TYPE (@0)); })))
> +
> +/* If we have (A & C) != 0 where C is the sign bit of A, convert
> +   this into A < 0.  Similarly for (A & C) == 0 into A >= 0.  */
> +(for cmp (eq ne)
> +     ncmp (ge lt)
> + (simplify
> +  (cmp (bit_and (convert?@2 @0) integer_pow2p@1) integer_zerop)
> +  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
> +       && (TYPE_PRECISION (TREE_TYPE (@0))
> +          == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@0))))
> +       && element_precision (@2) >= element_precision (@0)
> +       && wi::only_sign_bit_p (@1, element_precision (@0)))
> +   (with { tree stype = signed_type_for (TREE_TYPE (@0)); }
> +    (ncmp (convert:stype @0) { build_zero_cst (stype); })))))
> +
>  /* When the addresses are not directly of decls compare base and offset.
>     This implements some remaining parts of fold_comparison address
>     comparisons but still no complete part of it.  Still it is good
>     enough to make fold_stmt not regress when not dispatching to
> fold_binary.  */
>  (for cmp (simple_comparison)
>   (simplify
>    (cmp (convert1?@2 addr@0) (convert2? addr@1))
>    (with
>     {
>       HOST_WIDE_INT off0, off1;
> Index: gcc/testsuite/gcc.dg/tree-ssa/cmp-1.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/cmp-1.c       (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/cmp-1.c       (working copy)
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-gimple -fdump-tree-optimized" } */
> +
> +int f(int a){
> +  int b = -__INT_MAX__-1;
> +  a &= b;
> +  return a == b;
> +}
> +int g(int x){
> +    x = x < 0 ? -x : x;
> +    return x == 0;
> +}
> +
> +/* This should work even if int is not 32 bits, it is just not meaningful
> in
> +   that case.  */
> +/* { dg-final { scan-tree-dump-not "-2147483648" "optimized"} } */
> +/* { dg-final { scan-tree-dump " < 0" "optimized"} } */
> +/* { dg-final { scan-tree-dump "ABS_EXPR" "gimple"} } */
> +/* { dg-final { scan-tree-dump-not "ABS_EXPR" "optimized"} } */
> Index: gcc/tree.c
> ===================================================================
> --- gcc/tree.c  (revision 227316)
> +++ gcc/tree.c  (working copy)
> @@ -2208,20 +2208,31 @@ grow_tree_vec_stat (tree v, int len MEM_
>
>    record_node_allocation_statistics (TREE_VEC, length - oldlength);
>
>    v = (tree) ggc_realloc (v, length PASS_MEM_STAT);
>
>    TREE_VEC_LENGTH (v) = len;
>
>    return v;
>  }
>
> +/* Return 1 if EXPR is the constant zero, whether it is integral, float or
> +   fixed, and scalar, complex or vector.  */
> +
> +int
> +zerop (const_tree expr)
> +{
> +  return (integer_zerop (expr)
> +         || real_zerop (expr)
> +         || fixed_zerop (expr));
> +}
> +
>  /* Return 1 if EXPR is the integer constant zero or a complex constant
>     of zero.  */
>
>  int
>  integer_zerop (const_tree expr)
>  {
>    STRIP_NOPS (expr);
>
>    switch (TREE_CODE (expr))
>      {
> @@ -7505,20 +7516,22 @@ valid_constant_size_p (const_tree size)
>      return false;
>    return true;
>  }
>
>  /* Return the precision of the type, or for a complex or vector type the
>     precision of the type of its elements.  */
>
>  unsigned int
>  element_precision (const_tree type)
>  {
> +  if (!TYPE_P (type))
> +    type = TREE_TYPE (type);
>    enum tree_code code = TREE_CODE (type);
>    if (code == COMPLEX_TYPE || code == VECTOR_TYPE)
>      type = TREE_TYPE (type);
>
>    return TYPE_PRECISION (type);
>  }
>
>  /* Return true if CODE represents an associative tree code.  Otherwise
>     return false.  */
>  bool
> Index: gcc/tree.h
> ===================================================================
> --- gcc/tree.h  (revision 227316)
> +++ gcc/tree.h  (working copy)
> @@ -4102,20 +4102,24 @@ extern bool initializer_zerop (const_tre
>
>  /* Given a vector VEC, return its first element if all elements are
>     the same.  Otherwise return NULL_TREE.  */
>
>  extern tree uniform_vector_p (const_tree);
>
>  /* Given a CONSTRUCTOR CTOR, return the element values as a vector.  */
>
>  extern vec<tree, va_gc> *ctor_to_vec (tree);
>
> +/* zerop (tree x) is nonzero if X is a constant of value 0.  */
> +
> +extern int zerop (const_tree);
> +
>  /* integer_zerop (tree x) is nonzero if X is an integer constant of value
> 0.  */
>
>  extern int integer_zerop (const_tree);
>
>  /* integer_onep (tree x) is nonzero if X is an integer constant of value 1.
> */
>
>  extern int integer_onep (const_tree);
>
>  /* integer_onep (tree x) is nonzero if X is an integer constant of value 1,
> or
>     a vector or complex where each part is 1.  */
>
Kyrylo Tkachov Oct. 27, 2015, 3:33 p.m. UTC | #2
Hi Marc,

On 30/08/15 08:57, Marc Glisse wrote:
> Hello,
>
> just trying to shrink fold-const.c a bit more.
>
> initializer_zerop is close to what I was looking for with zerop, but I
> wasn't sure if it would be safe (it accepts some CONSTRUCTOR and
> STRING_CST). At some point I tried using sign_bit_p, but using the return
> of that function in the simplification confused the machinery too much. I
> added an "overload" of element_precision like the one in element_mode, for
> convenience.
>
> Bootstrap+testsuite on ppc64le-redhat-linux.
>
>
> 2015-08-31  Marc Glisse  <marc.glisse@inria.fr>
>
> gcc/
>   	* tree.h (zerop): New function.
>   	* tree.c (zerop): Likewise.
>   	(element_precision): Handle expressions.
>   	* match.pd (define_predicates): Add zerop.
>   	(x <= +Inf): Fix comment.
>   	(abs (x) == 0, A & C == C, A & C != 0): Converted from ...
>   	* fold-const.c (fold_binary_loc): ... here. Remove.
>
> gcc/testsuite/
>   	* gcc.dg/tree-ssa/cmp-1.c: New file.
>

+/* If we have (A & C) != 0 where C is the sign bit of A, convert
+   this into A < 0.  Similarly for (A & C) == 0 into A >= 0.  */
+(for cmp (eq ne)
+     ncmp (ge lt)
+ (simplify
+  (cmp (bit_and (convert?@2 @0) integer_pow2p@1) integer_zerop)
+  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+       && (TYPE_PRECISION (TREE_TYPE (@0))
+	   == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@0))))
+       && element_precision (@2) >= element_precision (@0)
+       && wi::only_sign_bit_p (@1, element_precision (@0)))
+   (with { tree stype = signed_type_for (TREE_TYPE (@0)); }
+    (ncmp (convert:stype @0) { build_zero_cst (stype); })))))
+

With this patch and this pattern pattern in particular I've seen some code quality regressions on aarch64.
I'm still trying to reduce a testcase to demonstrate the issue but it seems to involve
intorucing extra conversions from unsigned to signed values. If I gate this pattern on
!TYPE_UNSIGNED (TREE_TYPE (@0)) the codegen seems to improve.

Any thoughts?
Thanks,
Kyrill
Marc Glisse Oct. 27, 2015, 3:57 p.m. UTC | #3
On Tue, 27 Oct 2015, Kyrill Tkachov wrote:

> Hi Marc,
>
> On 30/08/15 08:57, Marc Glisse wrote:
>> Hello,
>> 
>> just trying to shrink fold-const.c a bit more.
>> 
>> initializer_zerop is close to what I was looking for with zerop, but I
>> wasn't sure if it would be safe (it accepts some CONSTRUCTOR and
>> STRING_CST). At some point I tried using sign_bit_p, but using the return
>> of that function in the simplification confused the machinery too much. I
>> added an "overload" of element_precision like the one in element_mode, for
>> convenience.
>> 
>> Bootstrap+testsuite on ppc64le-redhat-linux.
>> 
>> 
>> 2015-08-31  Marc Glisse  <marc.glisse@inria.fr>
>> 
>> gcc/
>>   	* tree.h (zerop): New function.
>>   	* tree.c (zerop): Likewise.
>>   	(element_precision): Handle expressions.
>>   	* match.pd (define_predicates): Add zerop.
>>   	(x <= +Inf): Fix comment.
>>   	(abs (x) == 0, A & C == C, A & C != 0): Converted from ...
>>   	* fold-const.c (fold_binary_loc): ... here. Remove.
>> 
>> gcc/testsuite/
>>   	* gcc.dg/tree-ssa/cmp-1.c: New file.
>> 
>
> +/* If we have (A & C) != 0 where C is the sign bit of A, convert
> +   this into A < 0.  Similarly for (A & C) == 0 into A >= 0.  */
> +(for cmp (eq ne)
> +     ncmp (ge lt)
> + (simplify
> +  (cmp (bit_and (convert?@2 @0) integer_pow2p@1) integer_zerop)
> +  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
> +       && (TYPE_PRECISION (TREE_TYPE (@0))
> +	   == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@0))))
> +       && element_precision (@2) >= element_precision (@0)
> +       && wi::only_sign_bit_p (@1, element_precision (@0)))

This condition is a bit strict when @0 is signed, for an int32_t i, (i & 
(5LL << 42)) != 0 would work as well thanks to sign extension.

> +   (with { tree stype = signed_type_for (TREE_TYPE (@0)); }
> +    (ncmp (convert:stype @0) { build_zero_cst (stype); })))))
> +
>
> With this patch and this pattern pattern in particular I've seen some code 
> quality regressions on aarch64.
> I'm still trying to reduce a testcase to demonstrate the issue but it seems 
> to involve
> intorucing extra conversions from unsigned to signed values. If I gate this 
> pattern on
> !TYPE_UNSIGNED (TREE_TYPE (@0)) the codegen seems to improve.
>
> Any thoughts?

Hmm, my first thoughts would be:
* conversion from unsigned to signed is a NOP,
* if a & signbit != 0 is faster to compute than a < 0, that's how a < 0 
should be expanded by the target,
* so the problem is probably something else, maybe the bit_and combined 
better with another operation, or the cast obfuscates things enough to 
confuse a later pass. Or maybe something related to @2.

An example would help understand what we are talking about...
Kyrylo Tkachov Oct. 27, 2015, 4:10 p.m. UTC | #4
On 27/10/15 15:57, Marc Glisse wrote:
> On Tue, 27 Oct 2015, Kyrill Tkachov wrote:
>
>> Hi Marc,
>>
>> On 30/08/15 08:57, Marc Glisse wrote:
>>> Hello,
>>>
>>> just trying to shrink fold-const.c a bit more.
>>>
>>> initializer_zerop is close to what I was looking for with zerop, but I
>>> wasn't sure if it would be safe (it accepts some CONSTRUCTOR and
>>> STRING_CST). At some point I tried using sign_bit_p, but using the return
>>> of that function in the simplification confused the machinery too much. I
>>> added an "overload" of element_precision like the one in element_mode, for
>>> convenience.
>>>
>>> Bootstrap+testsuite on ppc64le-redhat-linux.
>>>
>>>
>>> 2015-08-31  Marc Glisse  <marc.glisse@inria.fr>
>>>
>>> gcc/
>>>       * tree.h (zerop): New function.
>>>       * tree.c (zerop): Likewise.
>>>       (element_precision): Handle expressions.
>>>       * match.pd (define_predicates): Add zerop.
>>>       (x <= +Inf): Fix comment.
>>>       (abs (x) == 0, A & C == C, A & C != 0): Converted from ...
>>>       * fold-const.c (fold_binary_loc): ... here. Remove.
>>>
>>> gcc/testsuite/
>>>       * gcc.dg/tree-ssa/cmp-1.c: New file.
>>>
>>
>> +/* If we have (A & C) != 0 where C is the sign bit of A, convert
>> +   this into A < 0.  Similarly for (A & C) == 0 into A >= 0.  */
>> +(for cmp (eq ne)
>> +     ncmp (ge lt)
>> + (simplify
>> +  (cmp (bit_and (convert?@2 @0) integer_pow2p@1) integer_zerop)
>> +  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
>> +       && (TYPE_PRECISION (TREE_TYPE (@0))
>> +       == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@0))))
>> +       && element_precision (@2) >= element_precision (@0)
>> +       && wi::only_sign_bit_p (@1, element_precision (@0)))
>
> This condition is a bit strict when @0 is signed, for an int32_t i, (i & (5LL << 42)) != 0 would work as well thanks to sign extension.
>
>> +   (with { tree stype = signed_type_for (TREE_TYPE (@0)); }
>> +    (ncmp (convert:stype @0) { build_zero_cst (stype); })))))
>> +
>>
>> With this patch and this pattern pattern in particular I've seen some code quality regressions on aarch64.
>> I'm still trying to reduce a testcase to demonstrate the issue but it seems to involve
>> intorucing extra conversions from unsigned to signed values. If I gate this pattern on
>> !TYPE_UNSIGNED (TREE_TYPE (@0)) the codegen seems to improve.
>>
>> Any thoughts?
>
> Hmm, my first thoughts would be:
> * conversion from unsigned to signed is a NOP,
> * if a & signbit != 0 is faster to compute than a < 0, that's how a < 0 should be expanded by the target,
> * so the problem is probably something else, maybe the bit_and combined better with another operation, or the cast obfuscates things enough to confuse a later pass. Or maybe something related to @2.
>

Thanks,
So here the types are shorts and unsigned shorts. On aarch64 these are HImode values and there's no direct arithmetic
operations on them, so they have to be extended to SImode and truncated back.

> An example would help understand what we are talking about...

Working on it. I'm trying to introduce gcc_unreachable calls into the compiler when the bad situation happens
and reducing the original file, but I think I'm not capturing the conditions that trigger this behaviour
exactly right :(

Kyrill
Marc Glisse Oct. 27, 2015, 4:29 p.m. UTC | #5
On Tue, 27 Oct 2015, Kyrill Tkachov wrote:

> So here the types are shorts and unsigned shorts. On aarch64 these are 
> HImode values and there's no direct arithmetic operations on them, so 
> they have to be extended to SImode and truncated back.

Ah ok, that makes sense. I expect it is in the case where @0 is shorter 
than a word and @2 is longer than @0. I wouldn't expect the signedness to 
matter that much, but maybe one of sign/zero-extension simplifies better 
than the other.

This problem would happen because we are still missing a pass that handles 
type promotion.

It means we could consider gating the transformation by the existence of 
lt in the mode of @0, but I don't see any query of optabs in match.pd 
yet...

Still, a testcase would be necessary to go with whatever patch, and I may 
be completely on the wrong track.
diff mbox

Patch

Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c	(revision 227316)
+++ gcc/fold-const.c	(working copy)
@@ -10761,25 +10761,20 @@  fold_binary_loc (location_t loc,
 									1)),
 			      arg1, 0)
 	  && wi::extract_uhwi (TREE_OPERAND (arg0, 0), 0, 1) == 1)
 	{
 	  return omit_two_operands_loc (loc, type,
 				    code == NE_EXPR
 				    ? boolean_true_node : boolean_false_node,
 				    TREE_OPERAND (arg0, 1), arg1);
 	}
 
-      /* Convert ABS_EXPR<x> == 0 or ABS_EXPR<x> != 0 to x == 0 or x != 0.  */
-      if (TREE_CODE (arg0) == ABS_EXPR
-	  && (integer_zerop (arg1) || real_zerop (arg1)))
-	return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0), arg1);
-
       /* If this is an EQ or NE comparison with zero and ARG0 is
 	 (1 << foo) & bar, convert it to (bar >> foo) & 1.  Both require
 	 two operations, but the latter can be done in one less insn
 	 on machines that have only two-operand insns or on which a
 	 constant cannot be the first operand.  */
       if (TREE_CODE (arg0) == BIT_AND_EXPR
 	  && integer_zerop (arg1))
 	{
 	  tree arg00 = TREE_OPERAND (arg0, 0);
 	  tree arg01 = TREE_OPERAND (arg0, 1);
@@ -10868,35 +10863,20 @@  fold_binary_loc (location_t loc,
 		 ((X >> C1) & C2) != 0 is rewritten as (X,false), and
 		 ((X >> C1) & C2) == 0 is rewritten as (X,true).  */
 	      else
 		return omit_one_operand_loc (loc, type,
 					 code == EQ_EXPR ? integer_one_node
 							 : integer_zero_node,
 					 arg000);
 	    }
 	}
 
-      /* If we have (A & C) == C where C is a power of 2, convert this into
-	 (A & C) != 0.  Similarly for NE_EXPR.  */
-      if (TREE_CODE (arg0) == BIT_AND_EXPR
-	  && integer_pow2p (TREE_OPERAND (arg0, 1))
-	  && operand_equal_p (TREE_OPERAND (arg0, 1), arg1, 0))
-	return fold_build2_loc (loc, code == EQ_EXPR ? NE_EXPR : EQ_EXPR, type,
-			    arg0, fold_convert_loc (loc, TREE_TYPE (arg0),
-						    integer_zero_node));
-
-      /* If we have (A & C) != 0 or (A & C) == 0 and C is the sign
-	 bit, then fold the expression into A < 0 or A >= 0.  */
-      tem = fold_single_bit_test_into_sign_test (loc, code, arg0, arg1, type);
-      if (tem)
-	return tem;
-
       /* If we have (A & C) == D where D & ~C != 0, convert this into 0.
 	 Similarly for NE_EXPR.  */
       if (TREE_CODE (arg0) == BIT_AND_EXPR
 	  && TREE_CODE (arg1) == INTEGER_CST
 	  && TREE_CODE (TREE_OPERAND (arg0, 1)) == INTEGER_CST)
 	{
 	  tree notc = fold_build1_loc (loc, BIT_NOT_EXPR,
 				   TREE_TYPE (TREE_OPERAND (arg0, 1)),
 				   TREE_OPERAND (arg0, 1));
 	  tree dandnotc
Index: gcc/match.pd
===================================================================
--- gcc/match.pd	(revision 227316)
+++ gcc/match.pd	(working copy)
@@ -21,20 +21,21 @@  for more details.
 You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
 
 /* Generic tree predicates we inherit.  */
 (define_predicates
    integer_onep integer_zerop integer_all_onesp integer_minus_onep
    integer_each_onep integer_truep integer_nonzerop
    real_zerop real_onep real_minus_onep
+   zerop
    CONSTANT_CLASS_P
    tree_expr_nonnegative_p
    integer_pow2p
    HONOR_NANS)
 
 /* Operator lists.  */
 (define_operator_list tcc_comparison
   lt   le   eq ne ge   gt   unordered ordered   unlt unle ungt unge uneq ltgt)
 (define_operator_list inverted_tcc_comparison
   ge   gt   ne eq lt   le   ordered   unordered ge   gt   le   lt   ltgt uneq)
@@ -1570,21 +1571,21 @@  along with GCC; see the file COPYING3.
      }
      (switch
       /* x > +Inf is always false, if with ignore sNANs.  */
       (if (code == GT_EXPR
 	   && ! HONOR_SNANS (@0))
        { constant_boolean_node (false, type); })
       (if (code == LE_EXPR)
        /* x <= +Inf is always true, if we don't case about NaNs.  */
        (if (! HONOR_NANS (@0))
 	{ constant_boolean_node (true, type); }
-	/* x <= +Inf is the same as x == x, i.e. isfinite(x).  */
+	/* x <= +Inf is the same as x == x, i.e. !isnan(x).  */
 	(eq @0 @0)))
       /* x == +Inf and x >= +Inf are always equal to x > DBL_MAX.  */
       (if (code == EQ_EXPR || code == GE_EXPR)
        (with { real_maxval (&max, neg, TYPE_MODE (TREE_TYPE (@0))); }
 	(if (neg)
 	 (lt @0 { build_real (TREE_TYPE (@0), max); })
 	 (gt @0 { build_real (TREE_TYPE (@0), max); }))))
       /* x < +Inf is always equal to x <= DBL_MAX.  */
       (if (code == LT_EXPR)
        (with { real_maxval (&max, neg, TYPE_MODE (TREE_TYPE (@0))); }
@@ -1727,20 +1728,26 @@  along with GCC; see the file COPYING3.
    (scmp @0 @1)))
  (simplify
   (cmp (negate @0) CONSTANT_CLASS_P@1)
   (if (FLOAT_TYPE_P (TREE_TYPE (@0))
        || (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
 	   && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))))
    (with { tree tem = fold_unary (NEGATE_EXPR, TREE_TYPE (@0), @1); }
     (if (tem && !TREE_OVERFLOW (tem))
      (scmp @0 { tem; }))))))
 
+/* Convert ABS_EXPR<x> == 0 or ABS_EXPR<x> != 0 to x == 0 or x != 0.  */
+(for op (eq ne)
+ (simplify
+  (op (abs @0) zerop@1)
+  (op @0 @1)))
+
 /* From fold_sign_changed_comparison and fold_widened_comparison.  */
 (for cmp (simple_comparison)
  (simplify
   (cmp (convert@0 @00) (convert?@1 @10))
   (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
        /* Disable this optimization if we're casting a function pointer
 	  type on targets that require function pointer canonicalization.  */
        && !(targetm.have_canonicalize_funcptr_for_compare ()
 	    && TREE_CODE (TREE_TYPE (@00)) == POINTER_TYPE
 	    && TREE_CODE (TREE_TYPE (TREE_TYPE (@00))) == FUNCTION_TYPE)
@@ -1833,20 +1840,42 @@  along with GCC; see the file COPYING3.
  (simplify
   (cmp (convert?@3 (bit_xor @0 INTEGER_CST@1)) INTEGER_CST@2)
   (if (tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@0)))
    (cmp @0 (bit_xor @1 (convert @2)))))
 
  (simplify
   (cmp (convert? addr@0) integer_zerop)
   (if (tree_single_nonzero_warnv_p (@0, NULL))
    { constant_boolean_node (cmp == NE_EXPR, type); })))
 
+/* If we have (A & C) == C where C is a power of 2, convert this into
+   (A & C) != 0.  Similarly for NE_EXPR.  */
+(for cmp (eq ne)
+     icmp (ne eq)
+ (simplify
+  (cmp (bit_and@2 @0 integer_pow2p@1) @1)
+  (icmp @2 { build_zero_cst (TREE_TYPE (@0)); })))
+ 
+/* If we have (A & C) != 0 where C is the sign bit of A, convert
+   this into A < 0.  Similarly for (A & C) == 0 into A >= 0.  */
+(for cmp (eq ne)
+     ncmp (ge lt)
+ (simplify
+  (cmp (bit_and (convert?@2 @0) integer_pow2p@1) integer_zerop)
+  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+       && (TYPE_PRECISION (TREE_TYPE (@0))
+	   == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@0))))
+       && element_precision (@2) >= element_precision (@0)
+       && wi::only_sign_bit_p (@1, element_precision (@0)))
+   (with { tree stype = signed_type_for (TREE_TYPE (@0)); }
+    (ncmp (convert:stype @0) { build_zero_cst (stype); })))))
+
 /* When the addresses are not directly of decls compare base and offset.
    This implements some remaining parts of fold_comparison address
    comparisons but still no complete part of it.  Still it is good
    enough to make fold_stmt not regress when not dispatching to fold_binary.  */
 (for cmp (simple_comparison)
  (simplify
   (cmp (convert1?@2 addr@0) (convert2? addr@1))
   (with
    {
      HOST_WIDE_INT off0, off1;
Index: gcc/testsuite/gcc.dg/tree-ssa/cmp-1.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/cmp-1.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/cmp-1.c	(working copy)
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-gimple -fdump-tree-optimized" } */
+
+int f(int a){
+  int b = -__INT_MAX__-1;
+  a &= b;
+  return a == b;
+}
+int g(int x){
+    x = x < 0 ? -x : x;
+    return x == 0;
+}
+
+/* This should work even if int is not 32 bits, it is just not meaningful in
+   that case.  */
+/* { dg-final { scan-tree-dump-not "-2147483648" "optimized"} } */
+/* { dg-final { scan-tree-dump " < 0" "optimized"} } */
+/* { dg-final { scan-tree-dump "ABS_EXPR" "gimple"} } */
+/* { dg-final { scan-tree-dump-not "ABS_EXPR" "optimized"} } */
Index: gcc/tree.c
===================================================================
--- gcc/tree.c	(revision 227316)
+++ gcc/tree.c	(working copy)
@@ -2208,20 +2208,31 @@  grow_tree_vec_stat (tree v, int len MEM_
 
   record_node_allocation_statistics (TREE_VEC, length - oldlength);
 
   v = (tree) ggc_realloc (v, length PASS_MEM_STAT);
 
   TREE_VEC_LENGTH (v) = len;
 
   return v;
 }
 
+/* Return 1 if EXPR is the constant zero, whether it is integral, float or
+   fixed, and scalar, complex or vector.  */
+
+int
+zerop (const_tree expr)
+{
+  return (integer_zerop (expr)
+	  || real_zerop (expr)
+	  || fixed_zerop (expr));
+}
+
 /* Return 1 if EXPR is the integer constant zero or a complex constant
    of zero.  */
 
 int
 integer_zerop (const_tree expr)
 {
   STRIP_NOPS (expr);
 
   switch (TREE_CODE (expr))
     {
@@ -7505,20 +7516,22 @@  valid_constant_size_p (const_tree size)
     return false;
   return true;
 }
 
 /* Return the precision of the type, or for a complex or vector type the
    precision of the type of its elements.  */
 
 unsigned int
 element_precision (const_tree type)
 {
+  if (!TYPE_P (type))
+    type = TREE_TYPE (type);
   enum tree_code code = TREE_CODE (type);
   if (code == COMPLEX_TYPE || code == VECTOR_TYPE)
     type = TREE_TYPE (type);
 
   return TYPE_PRECISION (type);
 }
 
 /* Return true if CODE represents an associative tree code.  Otherwise
    return false.  */
 bool
Index: gcc/tree.h
===================================================================
--- gcc/tree.h	(revision 227316)
+++ gcc/tree.h	(working copy)
@@ -4102,20 +4102,24 @@  extern bool initializer_zerop (const_tre
 
 /* Given a vector VEC, return its first element if all elements are
    the same.  Otherwise return NULL_TREE.  */
 
 extern tree uniform_vector_p (const_tree);
 
 /* Given a CONSTRUCTOR CTOR, return the element values as a vector.  */
 
 extern vec<tree, va_gc> *ctor_to_vec (tree);
 
+/* zerop (tree x) is nonzero if X is a constant of value 0.  */
+
+extern int zerop (const_tree);
+
 /* integer_zerop (tree x) is nonzero if X is an integer constant of value 0.  */
 
 extern int integer_zerop (const_tree);
 
 /* integer_onep (tree x) is nonzero if X is an integer constant of value 1.  */
 
 extern int integer_onep (const_tree);
 
 /* integer_onep (tree x) is nonzero if X is an integer constant of value 1, or
    a vector or complex where each part is 1.  */