diff mbox

[match.pd,c,c++] : Ignore results of 'shorten_compare' and move missing patterns in match.pd

Message ID CAEwic4ZDJHyst_kgSgMxZLtRtHEbyFUoCS1g5CPGkf6isR8GAA@mail.gmail.com
State New
Headers show

Commit Message

Kai Tietz Sept. 8, 2015, 11:17 a.m. UTC
Hi,

This patch is the first part of obsoleting 'shorten_compare' function
for folding.
It adjusts the uses of 'shorten_compare' to ignore folding returned by
it, and adds
missing pattterns to match.pd to allow full bootstrap of C/C++ without
regressions.
Due we are using 'shorten_compare' for some diagnostic we can't simply
remove it.  So if this patch gets approved, the next step will be to
rename the function to something like 'check_compare', and adjust its
arguments and inner logic to reflect that we don't modify
arguments/expression anymore within that function.

Bootstrap just show 2 regressions within gcc.dg testsuite due patterns
matched are folded more early by forward-propagation.  I adjusted
them, and added them to patch, too.

I did regression-testing for x86_64-unknown-linux-gnu.

ChangeLog

2015-09-08  Kai Tietz  <ktietz@redhat.com>

    * match.pd: Add missing patterns from shorten_compare.
    * c/c-typeck.c (build_binary_op): Discard foldings of shorten_compare.
    * cp/typeck.c (cp_build_binary_op): Likewise.

2015-09-08  Kai Tietz  <ktietz@redhat.com>

    * gcc.dg/tree-ssa/vrp23.c: Adjust testcase to reflect that
    pattern is matching now already within forward-propagation pass.
    * gcc.dg/tree-ssa/vrp24.c: Likewise.

Comments

Kai Tietz Sept. 11, 2015, 8:02 a.m. UTC | #1
Ping.

2015-09-08 13:17 GMT+02:00 Kai Tietz <ktietz70@googlemail.com>:
> Hi,
>
> This patch is the first part of obsoleting 'shorten_compare' function
> for folding.
> It adjusts the uses of 'shorten_compare' to ignore folding returned by
> it, and adds
> missing pattterns to match.pd to allow full bootstrap of C/C++ without
> regressions.
> Due we are using 'shorten_compare' for some diagnostic we can't simply
> remove it.  So if this patch gets approved, the next step will be to
> rename the function to something like 'check_compare', and adjust its
> arguments and inner logic to reflect that we don't modify
> arguments/expression anymore within that function.
>
> Bootstrap just show 2 regressions within gcc.dg testsuite due patterns
> matched are folded more early by forward-propagation.  I adjusted
> them, and added them to patch, too.
>
> I did regression-testing for x86_64-unknown-linux-gnu.
>
> ChangeLog
>
> 2015-09-08  Kai Tietz  <ktietz@redhat.com>
>
>     * match.pd: Add missing patterns from shorten_compare.
>     * c/c-typeck.c (build_binary_op): Discard foldings of shorten_compare.
>     * cp/typeck.c (cp_build_binary_op): Likewise.
>
> 2015-09-08  Kai Tietz  <ktietz@redhat.com>
>
>     * gcc.dg/tree-ssa/vrp23.c: Adjust testcase to reflect that
>     pattern is matching now already within forward-propagation pass.
>     * gcc.dg/tree-ssa/vrp24.c: Likewise.
>
> Index: match.pd
> ===================================================================
> --- match.pd    (Revision 227528)
> +++ match.pd    (Arbeitskopie)
> @@ -1786,6 +1786,45 @@ along with GCC; see the file COPYING3.  If not see
>    (op (abs @0) zerop@1)
>    (op @0 @1)))
>
> +/* Simplify '((type) X) cmp ((type) Y' to shortest possible types, of X and Y,
> +   if type's precision is wider then precision of X's and Y's type.
> +   Logic taken from shorten_compare function.  */
> +(for op (tcc_comparison)
> +  (simplify
> +    (op (convert@0 @1) (convert@3 @2))
> +    (if ((TREE_CODE (TREE_TYPE (@1)) == REAL_TYPE)
> +         == (TREE_CODE (TREE_TYPE (@2)) == REAL_TYPE)
> +         && (TREE_CODE (TREE_TYPE (@1)) == REAL_TYPE)
> +            == (TREE_CODE (TREE_TYPE (@0)) == REAL_TYPE)
> +         && single_use (@1)
> +         && single_use (@3)
> +         && TYPE_UNSIGNED (TREE_TYPE (@1)) == TYPE_UNSIGNED (TREE_TYPE (@2))
> +         && !((TREE_CODE (TREE_TYPE (@1)) == REAL_TYPE
> +               && DECIMAL_FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (@1))))
> +              || (TREE_CODE (TREE_TYPE (@2)) == REAL_TYPE
> +                  && DECIMAL_FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (@2)))))
> +         && TYPE_PRECISION (TREE_TYPE (@1)) < TYPE_PRECISION (TREE_TYPE (@0))
> +         && TYPE_PRECISION (TREE_TYPE (@2)) < TYPE_PRECISION (TREE_TYPE (@0))
> +         )
> +       (with {
> +         tree comtype = TYPE_PRECISION (TREE_TYPE (@1))
> +                 < TYPE_PRECISION (TREE_TYPE (@2)) ? TREE_TYPE (@2)
> +                                   : TREE_TYPE (@1);
> +         if (INTEGRAL_TYPE_P (TREE_TYPE (@0)))
> +           {
> +             if (TYPE_UNSIGNED (TREE_TYPE (@1)) || TYPE_UNSIGNED
> (TREE_TYPE (@0)))
> +               comtype = unsigned_type_for (comtype);
> +             else
> +               comtype = signed_type_for (comtype);
> +           }
> +         }
> +            (op (convert:comtype @1) (convert:comtype @2))
> +       )
> +     )
> +  )
> +)
> +
> +
>  /* From fold_sign_changed_comparison and fold_widened_comparison.  */
>  (for cmp (simple_comparison)
>   (simplify
> @@ -2046,7 +2085,43 @@ along with GCC; see the file COPYING3.  If not see
>          (if (cmp == LE_EXPR)
>       (ge (convert:st @0) { build_zero_cst (st); })
>       (lt (convert:st @0) { build_zero_cst (st); }))))))))))
> -
> +
> +/* Simplify '(type) X cmp CST' to 'X cmp (type-of-X) CST', if
> +   CST fits into the type of X.  */
> +(for cmp (simple_comparison)
> +  (simplify
> +    (cmp (convert@2 @0) INTEGER_CST@1)
> +    (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
> +         && TYPE_PRECISION (TREE_TYPE (@1)) > TYPE_PRECISION (TREE_TYPE (@0))
> +         && single_use (@2)
> +         && (TYPE_UNSIGNED (TREE_TYPE (@0))
> +             || TYPE_UNSIGNED (TREE_TYPE (@0)) == TYPE_UNSIGNED
> (TREE_TYPE (@1))
> +             || cmp == NE_EXPR || cmp == EQ_EXPR)
> +         && !POINTER_TYPE_P (TREE_TYPE (@0))
> +         && int_fits_type_p (@1, TREE_TYPE (@0)))
> +      (with { tree optype = TREE_TYPE (@0); }
> +        (cmp @0 (convert:optype @1))
> +      )
> +    )
> +  )
> +)
> +
> +/* See if '(type) X ==/!= CST' represents a condition,
> +   which is always true, or false due CST's value.  */
> +(for cmp (ne eq)
> +  (simplify
> +    (cmp (convert@2 @0) INTEGER_CST@1)
> +    (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
> +         && TYPE_PRECISION (TREE_TYPE (@1)) >= TYPE_PRECISION (TREE_TYPE (@0))
> +         && single_use (@2)
> +         && !POINTER_TYPE_P (TREE_TYPE (@0))
> +         && !int_fits_type_p (@1, TREE_TYPE (@0))
> +         && TYPE_UNSIGNED (TREE_TYPE (@0)) == TYPE_UNSIGNED (TREE_TYPE (@1)))
> +      { constant_boolean_node (cmp == NE_EXPR, type); }
> +    )
> +  )
> +)
> +
>  (for cmp (unordered ordered unlt unle ungt unge uneq ltgt)
>   /* If the second operand is NaN, the result is constant.  */
>   (simplify
> Index: cp/typeck.c
> ===================================================================
> --- cp/typeck.c    (Revision 227528)
> +++ cp/typeck.c    (Arbeitskopie)
> @@ -5000,14 +5000,8 @@ cp_build_binary_op (location_t location,
>           pass the copies by reference, then copy them back afterward.  */
>        tree xop0 = op0, xop1 = op1, xresult_type = result_type;
>        enum tree_code xresultcode = resultcode;
> -      tree val
> -        = shorten_compare (location, &xop0, &xop1, &xresult_type,
> -                   &xresultcode);
> -      if (val != 0)
> -        return cp_convert (boolean_type_node, val, complain);
> -      op0 = xop0, op1 = xop1;
> -      converted = 1;
> -      resultcode = xresultcode;
> +      shorten_compare (location, &xop0, &xop1, &xresult_type,
> +               &xresultcode);
>      }
>
>        if ((short_compare || code == MIN_EXPR || code == MAX_EXPR)
> Index: c/c-typeck.c
> ===================================================================
> --- c/c-typeck.c    (Revision 227528)
> +++ c/c-typeck.c    (Arbeitskopie)
> @@ -11193,20 +11193,9 @@ build_binary_op (location_t location, enum tree_co
>           pass the copies by reference, then copy them back afterward.  */
>        tree xop0 = op0, xop1 = op1, xresult_type = result_type;
>        enum tree_code xresultcode = resultcode;
> -      tree val
> -        = shorten_compare (location, &xop0, &xop1, &xresult_type,
> -                   &xresultcode);
> +      shorten_compare (location, &xop0, &xop1, &xresult_type,
> +               &xresultcode);
>
> -      if (val != 0)
> -        {
> -          ret = val;
> -          goto return_build_binary_op;
> -        }
> -
> -      op0 = xop0, op1 = xop1;
> -      converted = 1;
> -      resultcode = xresultcode;
> -
>        if (c_inhibit_evaluation_warnings == 0)
>          {
>            bool op0_maybe_const = true;

Make sure to split patch here.  Above patch and this can't be applied together.

> Index: gcc.dg/tree-ssa/vrp23.c
> ===================================================================
> --- gcc.dg/tree-ssa/vrp23.c    (Revision 227528)
> +++ gcc.dg/tree-ssa/vrp23.c    (Arbeitskopie)
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-vrp1-details" } */
> +/* { dg-options "-O2 -fdump-tree-vrp1-details -fdump-tree-forwprop1" } */
>
>  void aa (void);
>  void aos (void);
> @@ -44,6 +44,10 @@ L8:
>
>  /* The n_sets > 0 test can be simplified into n_sets == 1 since the
>     only way to reach the test is when n_sets <= 1, and the only value
> -   which satisfies both conditions is n_sets == 1.  */
> -/* { dg-final { scan-tree-dump-times "Simplified relational" 1 "vrp1" } } */
> +   which satisfies both conditions is n_sets == 1.
> +   We catch this pattern for this testcase already in forward-propagation
> +   pass, as use of n_sets becomes single one.  Therefore we expect that
> +   vrp won't find this pattern anymore.  */
> +/* { dg-final { scan-tree-dump-times "Simplified relational" 0 "vrp1" } } */
> +/* { dg-final { scan-tree-dump-times "Replaced" 3 "forwprop1" } } */
>
> Index: gcc.dg/tree-ssa/vrp24.c
> ===================================================================
> --- gcc.dg/tree-ssa/vrp24.c    (Revision 227528)
> +++ gcc.dg/tree-ssa/vrp24.c    (Arbeitskopie)
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-vrp1-details" } */
> +/* { dg-options "-O2 -fdump-tree-vrp1-details -fdump-tree-forwprop1"  } */
>
>
>  struct rtx_def;
> @@ -90,6 +90,9 @@ L7:
>
>     The second n_sets > 0 test can also be simplified into n_sets == 1
>     as the only way to reach the tests is when n_sets <= 1 and the only
> -   value which satisfies both conditions is n_sets == 1.  */
> -/* { dg-final { scan-tree-dump-times "Simplified relational" 2 "vrp1" } } */
> +   value which satisfies both conditions is n_sets == 1.
> +   We catch one of the simplifications already in forward-propagation pass.
> +   Therefore we exprect just one simplified relational detected
> within vrp1.  */
> +/* { dg-final { scan-tree-dump-times "Simplified relational" 1 "vrp1" } } */
> +/* { dg-final { scan-tree-dump-times "Replaced" 2 "forwprop1" } } */
Jeff Law Sept. 14, 2015, 10:45 p.m. UTC | #2
On 09/08/2015 05:17 AM, Kai Tietz wrote:
> Hi,
>
> This patch is the first part of obsoleting 'shorten_compare' function
> for folding.
> It adjusts the uses of 'shorten_compare' to ignore folding returned by
> it, and adds
> missing pattterns to match.pd to allow full bootstrap of C/C++ without
> regressions.
> Due we are using 'shorten_compare' for some diagnostic we can't simply
> remove it.  So if this patch gets approved, the next step will be to
> rename the function to something like 'check_compare', and adjust its
> arguments and inner logic to reflect that we don't modify
> arguments/expression anymore within that function.
>
> Bootstrap just show 2 regressions within gcc.dg testsuite due patterns
> matched are folded more early by forward-propagation.  I adjusted
> them, and added them to patch, too.
>
> I did regression-testing for x86_64-unknown-linux-gnu.
>
> ChangeLog
>
> 2015-09-08  Kai Tietz  <ktietz@redhat.com>
>
>      * match.pd: Add missing patterns from shorten_compare.
>      * c/c-typeck.c (build_binary_op): Discard foldings of shorten_compare.
>      * cp/typeck.c (cp_build_binary_op): Likewise.
>
> 2015-09-08  Kai Tietz  <ktietz@redhat.com>
>
>      * gcc.dg/tree-ssa/vrp23.c: Adjust testcase to reflect that
>      pattern is matching now already within forward-propagation pass.
>      * gcc.dg/tree-ssa/vrp24.c: Likewise.
So for the new patterns, I would have expected testcases to ensure 
they're getting used.

The fact that we're not regressing with the front-end specific 
shortening disabled like this is probably more of an artifact of lack of 
testing of this feature than anything.

In *theory* one ought to be able to look at the dumps or .s files before 
after this patch for a blob of tests and see that nothing significant 
has changed.  Unfortunately, so much changes that it's hard to evaluate 
if this patch is a step forward or a step back.

I wonder if we'd do better to first add new match.pd patterns, one at a 
time, with tests, and evaluating them along the way by looking at the 
dumps or .s files across many systems.  Then when we think we've got the 
right set, then look at what happens to those dumps/.s files if we make 
the changes so that shorten_compare really just emits warnings.

My worry is that we get part way through the conversion and end up with 
the match.pd patterns without actually getting shorten_compare cleaned 
up and turned into just a warning generator.

jeff
Kai Tietz Sept. 15, 2015, 9:42 a.m. UTC | #3
2015-09-15 0:45 GMT+02:00 Jeff Law <law@redhat.com>:
> On 09/08/2015 05:17 AM, Kai Tietz wrote:
>>
>> Hi,
>>
>> This patch is the first part of obsoleting 'shorten_compare' function
>> for folding.
>> It adjusts the uses of 'shorten_compare' to ignore folding returned by
>> it, and adds
>> missing pattterns to match.pd to allow full bootstrap of C/C++ without
>> regressions.
>> Due we are using 'shorten_compare' for some diagnostic we can't simply
>> remove it.  So if this patch gets approved, the next step will be to
>> rename the function to something like 'check_compare', and adjust its
>> arguments and inner logic to reflect that we don't modify
>> arguments/expression anymore within that function.
>>
>> Bootstrap just show 2 regressions within gcc.dg testsuite due patterns
>> matched are folded more early by forward-propagation.  I adjusted
>> them, and added them to patch, too.
>>
>> I did regression-testing for x86_64-unknown-linux-gnu.
>>
>> ChangeLog
>>
>> 2015-09-08  Kai Tietz  <ktietz@redhat.com>
>>
>>      * match.pd: Add missing patterns from shorten_compare.
>>      * c/c-typeck.c (build_binary_op): Discard foldings of
>> shorten_compare.
>>      * cp/typeck.c (cp_build_binary_op): Likewise.
>>
>> 2015-09-08  Kai Tietz  <ktietz@redhat.com>
>>
>>      * gcc.dg/tree-ssa/vrp23.c: Adjust testcase to reflect that
>>      pattern is matching now already within forward-propagation pass.
>>      * gcc.dg/tree-ssa/vrp24.c: Likewise.
>
> So for the new patterns, I would have expected testcases to ensure they're
> getting used.

We were talking about that.  My approach was to disable shortening
code and see what regressions we get.  For C++ our test-coverage isn't
that good, as we didn't had here any regressions, but for C testcases
we got some.  Eg the testcase vrp25.c is one of them

> The fact that we're not regressing with the front-end specific shortening
> disabled like this is probably more of an artifact of lack of testing of
> this feature than anything.

This is just true for C++.  For C case we see additional fallout also
in gcc.target specific patterns in i386.  They were mainly about
"blend", and some AVX-testcases.

By disabling "shorten_compare" one of most simple testcases, which is
failing, is:

int foo (short s, short t)
{
  int a = (int) s;
  int b = (int) t;

  return a >= b;
}

Where we miss to do narrow down SSA-tree comparison to simply s >= t;
If we disable fre-pass ( -fno-tree-fre) for current trunk, we can see
that this pattern gets resolved in forward-propagation pass due
invocation of shorten_compare.

Another simple one (with disabled "shorten_compare") is:

The following testcase:

int foo (unsigned short a)
{
  unsigned int b = 0;
  return (unsigned int) a) < b;
}

Here we lacked the detection of ((unsigned int) a) < b being a < 0
(which is always false).

> In *theory* one ought to be able to look at the dumps or .s files before
> after this patch for a blob of tests and see that nothing significant has
> changed.  Unfortunately, so much changes that it's hard to evaluate if this
> patch is a step forward or a step back.

Hmm, actually we deal here with obvious patterns from
"shorten_compare".  Of interest are here the narrowing-lines on top of
this function, which we need to reflect in match.pd too, before we can
deprecate it. I don't get that there are so much changes?  This are in
fact just 3 basic patterns '(convert) X <cmp> (convert) Y',
'((convert) X) <cmp> CST', and a more specialized variant for the last
pattern for '==/!=' case.

> I wonder if we'd do better to first add new match.pd patterns, one at a
> time, with tests, and evaluating them along the way by looking at the dumps
> or .s files across many systems.  Then when we think we've got the right
> set, then look at what happens to those dumps/.s files if we make the
> changes so that shorten_compare really just emits warnings.

As the shorten_compare function covers those transformations, I don't
see that this is something leading to something useful.  As long as we
call shorten_compare on folding in FEs, we won't see those patterns in
ME that easy.  And even more important is, that patterns getting
resolved if function gets invoked.

So I would suggest here to disable shorten_compare invocation and
cleanup with fallout detectable in C-FE's testsuite.

> My worry is that we get part way through the conversion and end up with the
> match.pd patterns without actually getting shorten_compare cleaned up and
> turned into just a warning generator.

This worry I share, therefore I see it as mandatory to remove with
initial patch the use of result of shorten_compare, and better cleanup
possible detected additional fallout for other targets.  I just did
regression-testing for Intel-architecture (32-bit and 64-bit).

> jeff
>

Kai
Richard Biener Sept. 15, 2015, 10:06 a.m. UTC | #4
On Tue, Sep 8, 2015 at 1:17 PM, Kai Tietz <ktietz70@googlemail.com> wrote:
> Hi,
>
> This patch is the first part of obsoleting 'shorten_compare' function
> for folding.
> It adjusts the uses of 'shorten_compare' to ignore folding returned by
> it, and adds
> missing pattterns to match.pd to allow full bootstrap of C/C++ without
> regressions.
> Due we are using 'shorten_compare' for some diagnostic we can't simply
> remove it.  So if this patch gets approved, the next step will be to
> rename the function to something like 'check_compare', and adjust its
> arguments and inner logic to reflect that we don't modify
> arguments/expression anymore within that function.
>
> Bootstrap just show 2 regressions within gcc.dg testsuite due patterns
> matched are folded more early by forward-propagation.  I adjusted
> them, and added them to patch, too.

Some comments on the patterns you added below

> I did regression-testing for x86_64-unknown-linux-gnu.
>
> ChangeLog
>
> 2015-09-08  Kai Tietz  <ktietz@redhat.com>
>
>     * match.pd: Add missing patterns from shorten_compare.
>     * c/c-typeck.c (build_binary_op): Discard foldings of shorten_compare.
>     * cp/typeck.c (cp_build_binary_op): Likewise.
>
> 2015-09-08  Kai Tietz  <ktietz@redhat.com>
>
>     * gcc.dg/tree-ssa/vrp23.c: Adjust testcase to reflect that
>     pattern is matching now already within forward-propagation pass.
>     * gcc.dg/tree-ssa/vrp24.c: Likewise.
>
> Index: match.pd
> ===================================================================
> --- match.pd    (Revision 227528)
> +++ match.pd    (Arbeitskopie)
> @@ -1786,6 +1786,45 @@ along with GCC; see the file COPYING3.  If not see
>    (op (abs @0) zerop@1)
>    (op @0 @1)))
>
> +/* Simplify '((type) X) cmp ((type) Y' to shortest possible types, of X and Y,
> +   if type's precision is wider then precision of X's and Y's type.
> +   Logic taken from shorten_compare function.  */
> +(for op (tcc_comparison)
> +  (simplify
> +    (op (convert@0 @1) (convert@3 @2))
> +    (if ((TREE_CODE (TREE_TYPE (@1)) == REAL_TYPE)
> +         == (TREE_CODE (TREE_TYPE (@2)) == REAL_TYPE)
> +         && (TREE_CODE (TREE_TYPE (@1)) == REAL_TYPE)
> +            == (TREE_CODE (TREE_TYPE (@0)) == REAL_TYPE)

I think these are always true, to/from REAL_TYPE is FIX_TRUNC /
FLOAT_EXPR.  What you
might get is conversion to/from decimal FP from/to non-decimal FP.

> +         && single_use (@1)
> +         && single_use (@3)

We have :s now, I'd like to see comments before any explicit
single_use () uses as to why
they were added.  Also why @1 and @3?  Either @0 and @3 or @1 and @2?

> +         && TYPE_UNSIGNED (TREE_TYPE (@1)) == TYPE_UNSIGNED (TREE_TYPE (@2))
> +         && !((TREE_CODE (TREE_TYPE (@1)) == REAL_TYPE
> +               && DECIMAL_FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (@1))))
> +              || (TREE_CODE (TREE_TYPE (@2)) == REAL_TYPE
> +                  && DECIMAL_FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (@2)))))

We have DECIMAL_FLOAT_TYPE_P () for this.

> +         && TYPE_PRECISION (TREE_TYPE (@1)) < TYPE_PRECISION (TREE_TYPE (@0))
> +         && TYPE_PRECISION (TREE_TYPE (@2)) < TYPE_PRECISION (TREE_TYPE (@0))

copy-paste error, @3 (ok, it shouldn't really matter as @0 and @3
should be the same types).

> +         )

Closing parens always go to the previous line everywhere else.

> +       (with {
> +         tree comtype = TYPE_PRECISION (TREE_TYPE (@1))
> +                 < TYPE_PRECISION (TREE_TYPE (@2)) ? TREE_TYPE (@2)
> +                                   : TREE_TYPE (@1);
> +         if (INTEGRAL_TYPE_P (TREE_TYPE (@0)))
> +           {
> +             if (TYPE_UNSIGNED (TREE_TYPE (@1)) || TYPE_UNSIGNED
> (TREE_TYPE (@0)))
> +               comtype = unsigned_type_for (comtype);
> +             else
> +               comtype = signed_type_for (comtype);
> +           }
> +         }

I think fold-const.c or tree.c has a helper for this, if not we should
add one.  I suppose
you copied the above from some frontend code which should also use that helper.

> +            (op (convert:comtype @1) (convert:comtype @2))
> +       )
> +     )
> +  )
> +)

See above for closing parens.  I wonder if we can't merge the above
with the existing
simplification of widened compares which also "merges" the pattern you add for
the 2nd operand not being converted (the constant case).

> +
> +
>  /* From fold_sign_changed_comparison and fold_widened_comparison.  */
>  (for cmp (simple_comparison)
>   (simplify
> @@ -2046,7 +2085,43 @@ along with GCC; see the file COPYING3.  If not see
>          (if (cmp == LE_EXPR)
>       (ge (convert:st @0) { build_zero_cst (st); })
>       (lt (convert:st @0) { build_zero_cst (st); }))))))))))

at least it's odd you place your patterns before and after this existing one...

> -
> +
> +/* Simplify '(type) X cmp CST' to 'X cmp (type-of-X) CST', if
> +   CST fits into the type of X.  */
> +(for cmp (simple_comparison)
> +  (simplify
> +    (cmp (convert@2 @0) INTEGER_CST@1)

what about REAL_CSTs?

> +    (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
> +         && TYPE_PRECISION (TREE_TYPE (@1)) > TYPE_PRECISION (TREE_TYPE (@0))
> +         && single_use (@2)
> +         && (TYPE_UNSIGNED (TREE_TYPE (@0))
> +             || TYPE_UNSIGNED (TREE_TYPE (@0)) == TYPE_UNSIGNED
> (TREE_TYPE (@1))
> +             || cmp == NE_EXPR || cmp == EQ_EXPR)
> +         && !POINTER_TYPE_P (TREE_TYPE (@0))
> +         && int_fits_type_p (@1, TREE_TYPE (@0)))
> +      (with { tree optype = TREE_TYPE (@0); }
> +        (cmp @0 (convert:optype @1))
> +      )
> +    )
> +  )
> +)
> +
> +/* See if '(type) X ==/!= CST' represents a condition,
> +   which is always true, or false due CST's value.  */
> +(for cmp (ne eq)
> +  (simplify
> +    (cmp (convert@2 @0) INTEGER_CST@1)
> +    (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
> +         && TYPE_PRECISION (TREE_TYPE (@1)) >= TYPE_PRECISION (TREE_TYPE (@0))
> +         && single_use (@2)
> +         && !POINTER_TYPE_P (TREE_TYPE (@0))
> +         && !int_fits_type_p (@1, TREE_TYPE (@0))
> +         && TYPE_UNSIGNED (TREE_TYPE (@0)) == TYPE_UNSIGNED (TREE_TYPE (@1)))
> +      { constant_boolean_node (cmp == NE_EXPR, type); }
> +    )
> +  )
> +)

The existing pattern should already handle this even for other comparison codes:

      (if (TREE_CODE (@10) == INTEGER_CST
           && TREE_CODE (TREE_TYPE (@00)) == INTEGER_TYPE
           && !int_fits_type_p (@10, TREE_TYPE (@00)))
       (with
        {
          tree min = lower_bound_in_type (TREE_TYPE (@10), TREE_TYPE (@00));
          tree max = upper_bound_in_type (TREE_TYPE (@10), TREE_TYPE (@00));
          bool above = integer_nonzerop (const_binop (LT_EXPR, type, max, @10));
          bool below = integer_nonzerop (const_binop (LT_EXPR, type, @10, min));
        }
        (if (above || below)
         (if (cmp == EQ_EXPR || cmp == NE_EXPR)
          { constant_boolean_node (cmp == EQ_EXPR ? false : true, type); }
          (if (cmp == LT_EXPR || cmp == LE_EXPR)
           { constant_boolean_node (above ? true : false, type); }
           (if (cmp == GT_EXPR || cmp == GE_EXPR)
            { constant_boolean_node (above ? false : true, type); }))))))))))))

Richard.

> +
>  (for cmp (unordered ordered unlt unle ungt unge uneq ltgt)
>   /* If the second operand is NaN, the result is constant.  */
>   (simplify
> Index: cp/typeck.c
> ===================================================================
> --- cp/typeck.c    (Revision 227528)
> +++ cp/typeck.c    (Arbeitskopie)
> @@ -5000,14 +5000,8 @@ cp_build_binary_op (location_t location,
>           pass the copies by reference, then copy them back afterward.  */
>        tree xop0 = op0, xop1 = op1, xresult_type = result_type;
>        enum tree_code xresultcode = resultcode;
> -      tree val
> -        = shorten_compare (location, &xop0, &xop1, &xresult_type,
> -                   &xresultcode);
> -      if (val != 0)
> -        return cp_convert (boolean_type_node, val, complain);
> -      op0 = xop0, op1 = xop1;
> -      converted = 1;
> -      resultcode = xresultcode;
> +      shorten_compare (location, &xop0, &xop1, &xresult_type,
> +               &xresultcode);
>      }
>
>        if ((short_compare || code == MIN_EXPR || code == MAX_EXPR)
> Index: c/c-typeck.c
> ===================================================================
> --- c/c-typeck.c    (Revision 227528)
> +++ c/c-typeck.c    (Arbeitskopie)
> @@ -11193,20 +11193,9 @@ build_binary_op (location_t location, enum tree_co
>           pass the copies by reference, then copy them back afterward.  */
>        tree xop0 = op0, xop1 = op1, xresult_type = result_type;
>        enum tree_code xresultcode = resultcode;
> -      tree val
> -        = shorten_compare (location, &xop0, &xop1, &xresult_type,
> -                   &xresultcode);
> +      shorten_compare (location, &xop0, &xop1, &xresult_type,
> +               &xresultcode);
>
> -      if (val != 0)
> -        {
> -          ret = val;
> -          goto return_build_binary_op;
> -        }
> -
> -      op0 = xop0, op1 = xop1;
> -      converted = 1;
> -      resultcode = xresultcode;
> -
>        if (c_inhibit_evaluation_warnings == 0)
>          {
>            bool op0_maybe_const = true;
> Index: gcc.dg/tree-ssa/vrp23.c
> ===================================================================
> --- gcc.dg/tree-ssa/vrp23.c    (Revision 227528)
> +++ gcc.dg/tree-ssa/vrp23.c    (Arbeitskopie)
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-vrp1-details" } */
> +/* { dg-options "-O2 -fdump-tree-vrp1-details -fdump-tree-forwprop1" } */
>
>  void aa (void);
>  void aos (void);
> @@ -44,6 +44,10 @@ L8:
>
>  /* The n_sets > 0 test can be simplified into n_sets == 1 since the
>     only way to reach the test is when n_sets <= 1, and the only value
> -   which satisfies both conditions is n_sets == 1.  */
> -/* { dg-final { scan-tree-dump-times "Simplified relational" 1 "vrp1" } } */
> +   which satisfies both conditions is n_sets == 1.
> +   We catch this pattern for this testcase already in forward-propagation
> +   pass, as use of n_sets becomes single one.  Therefore we expect that
> +   vrp won't find this pattern anymore.  */
> +/* { dg-final { scan-tree-dump-times "Simplified relational" 0 "vrp1" } } */
> +/* { dg-final { scan-tree-dump-times "Replaced" 3 "forwprop1" } } */
>
> Index: gcc.dg/tree-ssa/vrp24.c
> ===================================================================
> --- gcc.dg/tree-ssa/vrp24.c    (Revision 227528)
> +++ gcc.dg/tree-ssa/vrp24.c    (Arbeitskopie)
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-vrp1-details" } */
> +/* { dg-options "-O2 -fdump-tree-vrp1-details -fdump-tree-forwprop1"  } */
>
>
>  struct rtx_def;
> @@ -90,6 +90,9 @@ L7:
>
>     The second n_sets > 0 test can also be simplified into n_sets == 1
>     as the only way to reach the tests is when n_sets <= 1 and the only
> -   value which satisfies both conditions is n_sets == 1.  */
> -/* { dg-final { scan-tree-dump-times "Simplified relational" 2 "vrp1" } } */
> +   value which satisfies both conditions is n_sets == 1.
> +   We catch one of the simplifications already in forward-propagation pass.
> +   Therefore we exprect just one simplified relational detected
> within vrp1.  */
> +/* { dg-final { scan-tree-dump-times "Simplified relational" 1 "vrp1" } } */
> +/* { dg-final { scan-tree-dump-times "Replaced" 2 "forwprop1" } } */
Jeff Law Sept. 15, 2015, 3:18 p.m. UTC | #5
On 09/15/2015 04:06 AM, Richard Biener wrote:
> On Tue, Sep 8, 2015 at 1:17 PM, Kai Tietz <ktietz70@googlemail.com> wrote:
>> Hi,
>>
>> This patch is the first part of obsoleting 'shorten_compare' function
>> for folding.
>> It adjusts the uses of 'shorten_compare' to ignore folding returned by
>> it, and adds
>> missing pattterns to match.pd to allow full bootstrap of C/C++ without
>> regressions.
>> Due we are using 'shorten_compare' for some diagnostic we can't simply
>> remove it.  So if this patch gets approved, the next step will be to
>> rename the function to something like 'check_compare', and adjust its
>> arguments and inner logic to reflect that we don't modify
>> arguments/expression anymore within that function.
>>
>> Bootstrap just show 2 regressions within gcc.dg testsuite due patterns
>> matched are folded more early by forward-propagation.  I adjusted
>> them, and added them to patch, too.
>
> Some comments on the patterns you added below
>
>> I did regression-testing for x86_64-unknown-linux-gnu.
>>
>> ChangeLog
>>
>> 2015-09-08  Kai Tietz  <ktietz@redhat.com>
>>
>>      * match.pd: Add missing patterns from shorten_compare.
>>      * c/c-typeck.c (build_binary_op): Discard foldings of shorten_compare.
>>      * cp/typeck.c (cp_build_binary_op): Likewise.
>>
>> 2015-09-08  Kai Tietz  <ktietz@redhat.com>
>>
>>      * gcc.dg/tree-ssa/vrp23.c: Adjust testcase to reflect that
>>      pattern is matching now already within forward-propagation pass.
>>      * gcc.dg/tree-ssa/vrp24.c: Likewise.
>>
[ snip ]
> I think these are always true, to/from REAL_TYPE is FIX_TRUNC /
> FLOAT_EXPR.  What you
> might get is conversion to/from decimal FP from/to non-decimal FP.
>
>> +         && single_use (@1)
>> +         && single_use (@3)
>
> We have :s now, I'd like to see comments before any explicit
> single_use () uses as to why
> they were added.  Also why @1 and @3?  Either @0 and @3 or @1 and @2?
Yea, I was looking at that as well when I was trying to understand why 
so much changed with/without this patch.

It's probably good policy that anything that uses :s/single_use ought to 
give some kind of justification, preferably with a testcase which shows 
why the :s/single_use is needed.
Jeff Law Sept. 16, 2015, 7:51 p.m. UTC | #6
On 09/15/2015 03:42 AM, Kai Tietz wrote:
>>> 2015-09-08  Kai Tietz  <ktietz@redhat.com>
>>>
>>>       * gcc.dg/tree-ssa/vrp23.c: Adjust testcase to reflect that
>>>       pattern is matching now already within forward-propagation pass.
>>>       * gcc.dg/tree-ssa/vrp24.c: Likewise.
>>
>> So for the new patterns, I would have expected testcases to ensure they're
>> getting used.
>
> We were talking about that.  My approach was to disable shortening
> code and see what regressions we get.  For C++ our test-coverage isn't
> that good, as we didn't had here any regressions, but for C testcases
> we got some.  Eg the testcase vrp25.c is one of them
But as I noted, I think that simply looking at testsuite regressions 
after disabling shorten_compare is not sufficient as I don't think we 
have significant coverage of this code.

>
> By disabling "shorten_compare" one of most simple testcases, which is
> failing, is:
>
> int foo (short s, short t)
> {
>    int a = (int) s;
>    int b = (int) t;
>
>    return a >= b;
> }
And I would argue it's precisely this kind of stuff we should be 
building tests around and resolving prior to actually disabling 
shorten_compare.


>
> Where we miss to do narrow down SSA-tree comparison to simply s >= t;
> If we disable fre-pass ( -fno-tree-fre) for current trunk, we can see
> that this pattern gets resolved in forward-propagation pass due
> invocation of shorten_compare.
>
> Another simple one (with disabled "shorten_compare") is:
>
> The following testcase:
>
> int foo (unsigned short a)
> {
>    unsigned int b = 0;
>    return (unsigned int) a) < b;
> }
>
> Here we lacked the detection of ((unsigned int) a) < b being a < 0
> (which is always false).
And again, this deserves a test and resolving prior to disabling 
shorten_compare.



>
>> In *theory* one ought to be able to look at the dumps or .s files before
>> after this patch for a blob of tests and see that nothing significant has
>> changed.  Unfortunately, so much changes that it's hard to evaluate if this
>> patch is a step forward or a step back.
>
> Hmm, actually we deal here with obvious patterns from
> "shorten_compare".  Of interest are here the narrowing-lines on top of
> this function, which we need to reflect in match.pd too, before we can
> deprecate it. I don't get that there are so much changes?  This are in
> fact just 3 basic patterns '(convert) X <cmp> (convert) Y',
> '((convert) X) <cmp> CST', and a more specialized variant for the last
> pattern for '==/!=' case.
>
>> I wonder if we'd do better to first add new match.pd patterns, one at a
>> time, with tests, and evaluating them along the way by looking at the dumps
>> or .s files across many systems.  Then when we think we've got the right
>> set, then look at what happens to those dumps/.s files if we make the
>> changes so that shorten_compare really just emits warnings.
>
> As the shorten_compare function covers those transformations, I don't
> see that this is something leading to something useful.  As long as we
> call shorten_compare on folding in FEs, we won't see those patterns in
> ME that easy.  And even more important is, that patterns getting
> resolved if function gets invoked.
It will tell you what will be missed from a code generation standpoint 
if you disable shorten_compare.  And that is the best proxy we have for 
performance regressions related to disabling shorten_compare.

ie, in a local tree, you disable shorten_compare.  Then compare the code 
we generate with and without shorten compare.  Reduce to a simple 
testcase.  Resolve the issue with a match.pd pattern (most likely) and 
repeat the process.  In theory at each step the  number of things to 
deeply investigate should be diminishing while at the same time you're 
building match.pd patterns and tests we can include in the testsuite. 
And each step is likely a new patch in the patch series -- the final 
patch of which disables shorten_compare.

It's a lot of work, but IMHO, it's necessary to help ensure we don't 
have code generation regressions.




>
> So I would suggest here to disable shorten_compare invocation and
> cleanup with fallout detectable in C-FE's testsuite.
But without deeper analysis at the start & patches+testcases for those 
issues, we have a real risk of regressing the code we generate.

>
>> My worry is that we get part way through the conversion and end up with the
>> match.pd patterns without actually getting shorten_compare cleaned up and
>> turned into just a warning generator.
>
> This worry I share, therefore I see it as mandatory to remove with
> initial patch the use of result of shorten_compare, and better cleanup
> possible detected additional fallout for other targets.  I just did
> regression-testing for Intel-architecture (32-bit and 64-bit).
I would disagree that removing shorten_compare should come first.  I 
think you have to address the code generation regressions.    And I 
think that the patches to address the code generation regressions need 
to be a series of patches with testcases.

What I don't want is a mega-patch that adds a ton of new patterns to 
match.pd with minimal/no tests and disables shorten_compare.  We should 
be able to build up an incremental series of patches that ends with 
disabling of shorten_compare and with some degree of confidence that 
we're not badly regressing the code generation.

jeff
Richard Biener Sept. 17, 2015, 9:12 a.m. UTC | #7
On Wed, Sep 16, 2015 at 9:51 PM, Jeff Law <law@redhat.com> wrote:
> On 09/15/2015 03:42 AM, Kai Tietz wrote:
>>>>
>>>> 2015-09-08  Kai Tietz  <ktietz@redhat.com>
>>>>
>>>>       * gcc.dg/tree-ssa/vrp23.c: Adjust testcase to reflect that
>>>>       pattern is matching now already within forward-propagation pass.
>>>>       * gcc.dg/tree-ssa/vrp24.c: Likewise.
>>>
>>>
>>> So for the new patterns, I would have expected testcases to ensure
>>> they're
>>> getting used.
>>
>>
>> We were talking about that.  My approach was to disable shortening
>> code and see what regressions we get.  For C++ our test-coverage isn't
>> that good, as we didn't had here any regressions, but for C testcases
>> we got some.  Eg the testcase vrp25.c is one of them
>
> But as I noted, I think that simply looking at testsuite regressions after
> disabling shorten_compare is not sufficient as I don't think we have
> significant coverage of this code.
>
>>
>> By disabling "shorten_compare" one of most simple testcases, which is
>> failing, is:
>>
>> int foo (short s, short t)
>> {
>>    int a = (int) s;
>>    int b = (int) t;
>>
>>    return a >= b;
>> }
>
> And I would argue it's precisely this kind of stuff we should be building
> tests around and resolving prior to actually disabling shorten_compare.
>
>
>>
>> Where we miss to do narrow down SSA-tree comparison to simply s >= t;
>> If we disable fre-pass ( -fno-tree-fre) for current trunk, we can see
>> that this pattern gets resolved in forward-propagation pass due
>> invocation of shorten_compare.
>>
>> Another simple one (with disabled "shorten_compare") is:
>>
>> The following testcase:
>>
>> int foo (unsigned short a)
>> {
>>    unsigned int b = 0;
>>    return (unsigned int) a) < b;
>> }
>>
>> Here we lacked the detection of ((unsigned int) a) < b being a < 0
>> (which is always false).
>
> And again, this deserves a test and resolving prior to disabling
> shorten_compare.
>
>
>
>>
>>> In *theory* one ought to be able to look at the dumps or .s files before
>>> after this patch for a blob of tests and see that nothing significant has
>>> changed.  Unfortunately, so much changes that it's hard to evaluate if
>>> this
>>> patch is a step forward or a step back.
>>
>>
>> Hmm, actually we deal here with obvious patterns from
>> "shorten_compare".  Of interest are here the narrowing-lines on top of
>> this function, which we need to reflect in match.pd too, before we can
>> deprecate it. I don't get that there are so much changes?  This are in
>> fact just 3 basic patterns '(convert) X <cmp> (convert) Y',
>> '((convert) X) <cmp> CST', and a more specialized variant for the last
>> pattern for '==/!=' case.
>>
>>> I wonder if we'd do better to first add new match.pd patterns, one at a
>>> time, with tests, and evaluating them along the way by looking at the
>>> dumps
>>> or .s files across many systems.  Then when we think we've got the right
>>> set, then look at what happens to those dumps/.s files if we make the
>>> changes so that shorten_compare really just emits warnings.
>>
>>
>> As the shorten_compare function covers those transformations, I don't
>> see that this is something leading to something useful.  As long as we
>> call shorten_compare on folding in FEs, we won't see those patterns in
>> ME that easy.  And even more important is, that patterns getting
>> resolved if function gets invoked.
>
> It will tell you what will be missed from a code generation standpoint if
> you disable shorten_compare.  And that is the best proxy we have for
> performance regressions related to disabling shorten_compare.
>
> ie, in a local tree, you disable shorten_compare.  Then compare the code we
> generate with and without shorten compare.  Reduce to a simple testcase.
> Resolve the issue with a match.pd pattern (most likely) and repeat the
> process.  In theory at each step the  number of things to deeply investigate
> should be diminishing while at the same time you're building match.pd
> patterns and tests we can include in the testsuite. And each step is likely
> a new patch in the patch series -- the final patch of which disables
> shorten_compare.
>
> It's a lot of work, but IMHO, it's necessary to help ensure we don't have
> code generation regressions.

As said in the other reply I successfully used gcc_unreachable ()s in
code in intend to remove and then inspect/fix all fallout from that during
bootstrap & test ...

Yeah, it's a lot of work (sometimes).  But it's way easier than to investigate
code changes (which may also miss cases as followup transforms may end
up causing the same code to be generated).

Richard.

>
>
>
>>
>> So I would suggest here to disable shorten_compare invocation and
>> cleanup with fallout detectable in C-FE's testsuite.
>
> But without deeper analysis at the start & patches+testcases for those
> issues, we have a real risk of regressing the code we generate.
>
>>
>>> My worry is that we get part way through the conversion and end up with
>>> the
>>> match.pd patterns without actually getting shorten_compare cleaned up and
>>> turned into just a warning generator.
>>
>>
>> This worry I share, therefore I see it as mandatory to remove with
>> initial patch the use of result of shorten_compare, and better cleanup
>> possible detected additional fallout for other targets.  I just did
>> regression-testing for Intel-architecture (32-bit and 64-bit).
>
> I would disagree that removing shorten_compare should come first.  I think
> you have to address the code generation regressions.    And I think that the
> patches to address the code generation regressions need to be a series of
> patches with testcases.
>
> What I don't want is a mega-patch that adds a ton of new patterns to
> match.pd with minimal/no tests and disables shorten_compare.  We should be
> able to build up an incremental series of patches that ends with disabling
> of shorten_compare and with some degree of confidence that we're not badly
> regressing the code generation.
>
> jeff
Kai Tietz Sept. 17, 2015, 9:32 a.m. UTC | #8
2015-09-17 11:12 GMT+02:00 Richard Biener <richard.guenther@gmail.com>:
> On Wed, Sep 16, 2015 at 9:51 PM, Jeff Law <law@redhat.com> wrote:
>> On 09/15/2015 03:42 AM, Kai Tietz wrote:
>>>>>
>>>>> 2015-09-08  Kai Tietz  <ktietz@redhat.com>
>>>>>
>>>>>       * gcc.dg/tree-ssa/vrp23.c: Adjust testcase to reflect that
>>>>>       pattern is matching now already within forward-propagation pass.
>>>>>       * gcc.dg/tree-ssa/vrp24.c: Likewise.
>>>>
>>>>
>>>> So for the new patterns, I would have expected testcases to ensure
>>>> they're
>>>> getting used.
>>>
>>>
>>> We were talking about that.  My approach was to disable shortening
>>> code and see what regressions we get.  For C++ our test-coverage isn't
>>> that good, as we didn't had here any regressions, but for C testcases
>>> we got some.  Eg the testcase vrp25.c is one of them
>>
>> But as I noted, I think that simply looking at testsuite regressions after
>> disabling shorten_compare is not sufficient as I don't think we have
>> significant coverage of this code.
>>
>>>
>>> By disabling "shorten_compare" one of most simple testcases, which is
>>> failing, is:
>>>
>>> int foo (short s, short t)
>>> {
>>>    int a = (int) s;
>>>    int b = (int) t;
>>>
>>>    return a >= b;
>>> }
>>
>> And I would argue it's precisely this kind of stuff we should be building
>> tests around and resolving prior to actually disabling shorten_compare.
>>
>>
>>>
>>> Where we miss to do narrow down SSA-tree comparison to simply s >= t;
>>> If we disable fre-pass ( -fno-tree-fre) for current trunk, we can see
>>> that this pattern gets resolved in forward-propagation pass due
>>> invocation of shorten_compare.
>>>
>>> Another simple one (with disabled "shorten_compare") is:
>>>
>>> The following testcase:
>>>
>>> int foo (unsigned short a)
>>> {
>>>    unsigned int b = 0;
>>>    return (unsigned int) a) < b;
>>> }
>>>
>>> Here we lacked the detection of ((unsigned int) a) < b being a < 0
>>> (which is always false).
>>
>> And again, this deserves a test and resolving prior to disabling
>> shorten_compare.
>>
>>
>>
>>>
>>>> In *theory* one ought to be able to look at the dumps or .s files before
>>>> after this patch for a blob of tests and see that nothing significant has
>>>> changed.  Unfortunately, so much changes that it's hard to evaluate if
>>>> this
>>>> patch is a step forward or a step back.
>>>
>>>
>>> Hmm, actually we deal here with obvious patterns from
>>> "shorten_compare".  Of interest are here the narrowing-lines on top of
>>> this function, which we need to reflect in match.pd too, before we can
>>> deprecate it. I don't get that there are so much changes?  This are in
>>> fact just 3 basic patterns '(convert) X <cmp> (convert) Y',
>>> '((convert) X) <cmp> CST', and a more specialized variant for the last
>>> pattern for '==/!=' case.
>>>
>>>> I wonder if we'd do better to first add new match.pd patterns, one at a
>>>> time, with tests, and evaluating them along the way by looking at the
>>>> dumps
>>>> or .s files across many systems.  Then when we think we've got the right
>>>> set, then look at what happens to those dumps/.s files if we make the
>>>> changes so that shorten_compare really just emits warnings.
>>>
>>>
>>> As the shorten_compare function covers those transformations, I don't
>>> see that this is something leading to something useful.  As long as we
>>> call shorten_compare on folding in FEs, we won't see those patterns in
>>> ME that easy.  And even more important is, that patterns getting
>>> resolved if function gets invoked.
>>
>> It will tell you what will be missed from a code generation standpoint if
>> you disable shorten_compare.  And that is the best proxy we have for
>> performance regressions related to disabling shorten_compare.
>>
>> ie, in a local tree, you disable shorten_compare.  Then compare the code we
>> generate with and without shorten compare.  Reduce to a simple testcase.
>> Resolve the issue with a match.pd pattern (most likely) and repeat the
>> process.  In theory at each step the  number of things to deeply investigate
>> should be diminishing while at the same time you're building match.pd
>> patterns and tests we can include in the testsuite. And each step is likely
>> a new patch in the patch series -- the final patch of which disables
>> shorten_compare.
>>
>> It's a lot of work, but IMHO, it's necessary to help ensure we don't have
>> code generation regressions.
>
> As said in the other reply I successfully used gcc_unreachable ()s in
> code in intend to remove and then inspect/fix all fallout from that during
> bootstrap & test ...

Yes, that would be fine, if shorten_compare would be part of classical
fold-machinery.  But it use isn't there.  It gets just used in
frontend's binary-operation generation in C/C++'s build_binary_op
function.
As we don't want (and can) remove shorten_compare completely, due we
still need atleast its diagnostics,  using of gcc_unreachable ()
doesn't look suitable.
Also the request to remove this function later makes testing of
standard AST-cases not much easy, as we won't see in GIMPLE patterns
already handled by it before.

> Yeah, it's a lot of work (sometimes).  But it's way easier than to investigate
> code changes (which may also miss cases as followup transforms may end
> up causing the same code to be generated).

It would be indeed easier to have such way, but for shorten_compare
(and same applies somewhat to shorten_binary too) this isn't working.

> Richard.

I can add some explicit testcases handled by shorten_compare right
now.  I can model for some cases testcases doing same transformation
in ME (normally done now by vrp/fre), and see that we match them in
forward-propagation already.


Kai

>>
>>
>>
>>>
>>> So I would suggest here to disable shorten_compare invocation and
>>> cleanup with fallout detectable in C-FE's testsuite.
>>
>> But without deeper analysis at the start & patches+testcases for those
>> issues, we have a real risk of regressing the code we generate.
>>
>>>
>>>> My worry is that we get part way through the conversion and end up with
>>>> the
>>>> match.pd patterns without actually getting shorten_compare cleaned up and
>>>> turned into just a warning generator.
>>>
>>>
>>> This worry I share, therefore I see it as mandatory to remove with
>>> initial patch the use of result of shorten_compare, and better cleanup
>>> possible detected additional fallout for other targets.  I just did
>>> regression-testing for Intel-architecture (32-bit and 64-bit).
>>
>> I would disagree that removing shorten_compare should come first.  I think
>> you have to address the code generation regressions.    And I think that the
>> patches to address the code generation regressions need to be a series of
>> patches with testcases.
>>
>> What I don't want is a mega-patch that adds a ton of new patterns to
>> match.pd with minimal/no tests and disables shorten_compare.  We should be
>> able to build up an incremental series of patches that ends with disabling
>> of shorten_compare and with some degree of confidence that we're not badly
>> regressing the code generation.
>>
>> jeff
Jeff Law Sept. 17, 2015, 5:12 p.m. UTC | #9
On 09/17/2015 03:12 AM, Richard Biener wrote:
>>>> I wonder if we'd do better to first add new match.pd patterns, one at a
>>>> time, with tests, and evaluating them along the way by looking at the
>>>> dumps
>>>> or .s files across many systems.  Then when we think we've got the right
>>>> set, then look at what happens to those dumps/.s files if we make the
>>>> changes so that shorten_compare really just emits warnings.
>>>
>>>
>>> As the shorten_compare function covers those transformations, I don't
>>> see that this is something leading to something useful.  As long as we
>>> call shorten_compare on folding in FEs, we won't see those patterns in
>>> ME that easy.  And even more important is, that patterns getting
>>> resolved if function gets invoked.
>>
>> It will tell you what will be missed from a code generation standpoint if
>> you disable shorten_compare.  And that is the best proxy we have for
>> performance regressions related to disabling shorten_compare.
>>
>> ie, in a local tree, you disable shorten_compare.  Then compare the code we
>> generate with and without shorten compare.  Reduce to a simple testcase.
>> Resolve the issue with a match.pd pattern (most likely) and repeat the
>> process.  In theory at each step the  number of things to deeply investigate
>> should be diminishing while at the same time you're building match.pd
>> patterns and tests we can include in the testsuite. And each step is likely
>> a new patch in the patch series -- the final patch of which disables
>> shorten_compare.
>>
>> It's a lot of work, but IMHO, it's necessary to help ensure we don't have
>> code generation regressions.
>
> As said in the other reply I successfully used gcc_unreachable ()s in
> code in intend to remove and then inspect/fix all fallout from that during
> bootstrap & test ...
>
> Yeah, it's a lot of work (sometimes).  But it's way easier than to investigate
> code changes (which may also miss cases as followup transforms may end
> up causing the same code to be generated).
That'd be a fine approach as well, though it may not work in this case 
as shorten_compare would get called prior to the transformations in 
match.pd.  Though I would certainly agree in general that your approach 
is a good one.

jeff
diff mbox

Patch

Index: match.pd
===================================================================
--- match.pd    (Revision 227528)
+++ match.pd    (Arbeitskopie)
@@ -1786,6 +1786,45 @@  along with GCC; see the file COPYING3.  If not see
   (op (abs @0) zerop@1)
   (op @0 @1)))

+/* Simplify '((type) X) cmp ((type) Y' to shortest possible types, of X and Y,
+   if type's precision is wider then precision of X's and Y's type.
+   Logic taken from shorten_compare function.  */
+(for op (tcc_comparison)
+  (simplify
+    (op (convert@0 @1) (convert@3 @2))
+    (if ((TREE_CODE (TREE_TYPE (@1)) == REAL_TYPE)
+         == (TREE_CODE (TREE_TYPE (@2)) == REAL_TYPE)
+         && (TREE_CODE (TREE_TYPE (@1)) == REAL_TYPE)
+            == (TREE_CODE (TREE_TYPE (@0)) == REAL_TYPE)
+         && single_use (@1)
+         && single_use (@3)
+         && TYPE_UNSIGNED (TREE_TYPE (@1)) == TYPE_UNSIGNED (TREE_TYPE (@2))
+         && !((TREE_CODE (TREE_TYPE (@1)) == REAL_TYPE
+               && DECIMAL_FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (@1))))
+              || (TREE_CODE (TREE_TYPE (@2)) == REAL_TYPE
+                  && DECIMAL_FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (@2)))))
+         && TYPE_PRECISION (TREE_TYPE (@1)) < TYPE_PRECISION (TREE_TYPE (@0))
+         && TYPE_PRECISION (TREE_TYPE (@2)) < TYPE_PRECISION (TREE_TYPE (@0))
+         )
+       (with {
+         tree comtype = TYPE_PRECISION (TREE_TYPE (@1))
+                 < TYPE_PRECISION (TREE_TYPE (@2)) ? TREE_TYPE (@2)
+                                   : TREE_TYPE (@1);
+         if (INTEGRAL_TYPE_P (TREE_TYPE (@0)))
+           {
+             if (TYPE_UNSIGNED (TREE_TYPE (@1)) || TYPE_UNSIGNED
(TREE_TYPE (@0)))
+               comtype = unsigned_type_for (comtype);
+             else
+               comtype = signed_type_for (comtype);
+           }
+         }
+            (op (convert:comtype @1) (convert:comtype @2))
+       )
+     )
+  )
+)
+
+
 /* From fold_sign_changed_comparison and fold_widened_comparison.  */
 (for cmp (simple_comparison)
  (simplify
@@ -2046,7 +2085,43 @@  along with GCC; see the file COPYING3.  If not see
         (if (cmp == LE_EXPR)
      (ge (convert:st @0) { build_zero_cst (st); })
      (lt (convert:st @0) { build_zero_cst (st); }))))))))))
-
+
+/* Simplify '(type) X cmp CST' to 'X cmp (type-of-X) CST', if
+   CST fits into the type of X.  */
+(for cmp (simple_comparison)
+  (simplify
+    (cmp (convert@2 @0) INTEGER_CST@1)
+    (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+         && TYPE_PRECISION (TREE_TYPE (@1)) > TYPE_PRECISION (TREE_TYPE (@0))
+         && single_use (@2)
+         && (TYPE_UNSIGNED (TREE_TYPE (@0))
+             || TYPE_UNSIGNED (TREE_TYPE (@0)) == TYPE_UNSIGNED
(TREE_TYPE (@1))
+             || cmp == NE_EXPR || cmp == EQ_EXPR)
+         && !POINTER_TYPE_P (TREE_TYPE (@0))
+         && int_fits_type_p (@1, TREE_TYPE (@0)))
+      (with { tree optype = TREE_TYPE (@0); }
+        (cmp @0 (convert:optype @1))
+      )
+    )
+  )
+)
+
+/* See if '(type) X ==/!= CST' represents a condition,
+   which is always true, or false due CST's value.  */
+(for cmp (ne eq)
+  (simplify
+    (cmp (convert@2 @0) INTEGER_CST@1)
+    (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+         && TYPE_PRECISION (TREE_TYPE (@1)) >= TYPE_PRECISION (TREE_TYPE (@0))
+         && single_use (@2)
+         && !POINTER_TYPE_P (TREE_TYPE (@0))
+         && !int_fits_type_p (@1, TREE_TYPE (@0))
+         && TYPE_UNSIGNED (TREE_TYPE (@0)) == TYPE_UNSIGNED (TREE_TYPE (@1)))
+      { constant_boolean_node (cmp == NE_EXPR, type); }
+    )
+  )
+)
+
 (for cmp (unordered ordered unlt unle ungt unge uneq ltgt)
  /* If the second operand is NaN, the result is constant.  */
  (simplify
Index: cp/typeck.c
===================================================================
--- cp/typeck.c    (Revision 227528)
+++ cp/typeck.c    (Arbeitskopie)
@@ -5000,14 +5000,8 @@  cp_build_binary_op (location_t location,
          pass the copies by reference, then copy them back afterward.  */
       tree xop0 = op0, xop1 = op1, xresult_type = result_type;
       enum tree_code xresultcode = resultcode;
-      tree val
-        = shorten_compare (location, &xop0, &xop1, &xresult_type,
-                   &xresultcode);
-      if (val != 0)
-        return cp_convert (boolean_type_node, val, complain);
-      op0 = xop0, op1 = xop1;
-      converted = 1;
-      resultcode = xresultcode;
+      shorten_compare (location, &xop0, &xop1, &xresult_type,
+               &xresultcode);
     }

       if ((short_compare || code == MIN_EXPR || code == MAX_EXPR)
Index: c/c-typeck.c
===================================================================
--- c/c-typeck.c    (Revision 227528)
+++ c/c-typeck.c    (Arbeitskopie)
@@ -11193,20 +11193,9 @@  build_binary_op (location_t location, enum tree_co
          pass the copies by reference, then copy them back afterward.  */
       tree xop0 = op0, xop1 = op1, xresult_type = result_type;
       enum tree_code xresultcode = resultcode;
-      tree val
-        = shorten_compare (location, &xop0, &xop1, &xresult_type,
-                   &xresultcode);
+      shorten_compare (location, &xop0, &xop1, &xresult_type,
+               &xresultcode);

-      if (val != 0)
-        {
-          ret = val;
-          goto return_build_binary_op;
-        }
-
-      op0 = xop0, op1 = xop1;
-      converted = 1;
-      resultcode = xresultcode;
-
       if (c_inhibit_evaluation_warnings == 0)
         {
           bool op0_maybe_const = true;
Index: gcc.dg/tree-ssa/vrp23.c
===================================================================
--- gcc.dg/tree-ssa/vrp23.c    (Revision 227528)
+++ gcc.dg/tree-ssa/vrp23.c    (Arbeitskopie)
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-vrp1-details" } */
+/* { dg-options "-O2 -fdump-tree-vrp1-details -fdump-tree-forwprop1" } */

 void aa (void);
 void aos (void);
@@ -44,6 +44,10 @@  L8:

 /* The n_sets > 0 test can be simplified into n_sets == 1 since the
    only way to reach the test is when n_sets <= 1, and the only value
-   which satisfies both conditions is n_sets == 1.  */
-/* { dg-final { scan-tree-dump-times "Simplified relational" 1 "vrp1" } } */
+   which satisfies both conditions is n_sets == 1.
+   We catch this pattern for this testcase already in forward-propagation
+   pass, as use of n_sets becomes single one.  Therefore we expect that
+   vrp won't find this pattern anymore.  */
+/* { dg-final { scan-tree-dump-times "Simplified relational" 0 "vrp1" } } */
+/* { dg-final { scan-tree-dump-times "Replaced" 3 "forwprop1" } } */

Index: gcc.dg/tree-ssa/vrp24.c
===================================================================
--- gcc.dg/tree-ssa/vrp24.c    (Revision 227528)
+++ gcc.dg/tree-ssa/vrp24.c    (Arbeitskopie)
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-vrp1-details" } */
+/* { dg-options "-O2 -fdump-tree-vrp1-details -fdump-tree-forwprop1"  } */


 struct rtx_def;
@@ -90,6 +90,9 @@  L7:

    The second n_sets > 0 test can also be simplified into n_sets == 1
    as the only way to reach the tests is when n_sets <= 1 and the only
-   value which satisfies both conditions is n_sets == 1.  */
-/* { dg-final { scan-tree-dump-times "Simplified relational" 2 "vrp1" } } */
+   value which satisfies both conditions is n_sets == 1.
+   We catch one of the simplifications already in forward-propagation pass.
+   Therefore we exprect just one simplified relational detected
within vrp1.  */
+/* { dg-final { scan-tree-dump-times "Simplified relational" 1 "vrp1" } } */
+/* { dg-final { scan-tree-dump-times "Replaced" 2 "forwprop1" } } */