Message ID | CO2PR07MB2694EE44E62E416C28F07DF78E970@CO2PR07MB2694.namprd07.prod.outlook.com |
---|---|
State | New |
Headers | show |
On Thu, 14 Apr 2016, Hurugalawadi, Naveen wrote: >>> I think we should handle at least INTEGER_CST and SSA_NAME >>> with VRP, and it seems natural to add a VRP check > > The check should be added in the tree_single_nonzero_warnv_p > for SSA_NAME case for tree_expr_nonzero_p. I think so. > However, for tree_expr_nonnegative_p, its been handled in a > different way. Ah, indeed. > Should we combine this check with the existing one? What do you mean exactly? > + (if (!tree_expr_nonnegative_p (@1)) > + (cmp @2 @0)))))) > >>> Ideally, you would call tree_expr_nonpositive_p, except that that >>> function doesn't exist yet. So for now, I guess we > > Would the tree_expr_nonpositive_p function be helpful for other cases > as well, I would try to add it if its useful. My "ideally" was wrong. Ideally for this pattern, we would have a function negative_p (since we don't want the zero value). But I don't know how useful it would be elsewhere. I was playing with this at some point, but I don't know if that's a good idea... (match negative_p INTEGER_CST@0 (if (wi::lt_p (@0, 0, TYPE_SIGN (TREE_TYPE (@0)))))) (match negative_p SSA_NAME@0 (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))) (with { wide_int min, max; value_range_type rtype = get_range_info (@0, &min, &max); } (if (rtype == VR_RANGE && wi::lt_p (max, 0, TYPE_SIGN (TREE_TYPE (@0)))))))) > Please find attached the modified patch as per the suggestions and > let me know if its fine? Looks ok to me (not a reviewer), though you could now move the test tree_expr_nonzero_p next to tree_expr_nonnegative_p (it is redundant for the last case).
On 04/14/2016 12:45 AM, Hurugalawadi, Naveen wrote: > Hi, > >>> >> I think we should handle at least INTEGER_CST and SSA_NAME >>> >> with VRP, and it seems natural to add a VRP check > The check should be added in the tree_single_nonzero_warnv_p > for SSA_NAME case for tree_expr_nonzero_p. > However, for tree_expr_nonnegative_p, its been handled in a > different way. Should we combine this check with the existing one? > > + (if (!tree_expr_nonnegative_p (@1)) > + (cmp @2 @0)))))) > >>> >> Ideally, you would call tree_expr_nonpositive_p, except that that >>> >> function doesn't exist yet. So for now, I guess we > Would the tree_expr_nonpositive_p function be helpful for other cases > as well, I would try to add it if its useful. > > Please find attached the modified patch as per the suggestions and > let me know if its fine? > > Thanks, > Naveen > > > pr31096-4.patch > > > diff --git a/gcc/fold-const.c b/gcc/fold-const.c > index 0f4bf7e..5922dbd 100644 > --- a/gcc/fold-const.c > +++ b/gcc/fold-const.c > @@ -9177,7 +9177,7 @@ tree_expr_nonzero_warnv_p (tree t, bool *strict_overflow_p) > /* Return true when T is an address and is known to be nonzero. > Handle warnings about undefined signed overflow. */ > > -static bool > +bool > tree_expr_nonzero_p (tree t) > { > bool ret, strict_overflow_p; > diff --git a/gcc/fold-const.h b/gcc/fold-const.h > index 02f4270..8579622 100644 > --- a/gcc/fold-const.h > +++ b/gcc/fold-const.h > @@ -167,6 +167,7 @@ extern tree size_diffop_loc (location_t, tree, tree); > #define non_lvalue(T) non_lvalue_loc (UNKNOWN_LOCATION, T) > extern tree non_lvalue_loc (location_t, tree); > > +extern bool tree_expr_nonzero_p (tree); > extern bool tree_expr_nonnegative_p (tree); > extern bool tree_expr_nonnegative_warnv_p (tree, bool *, int = 0); > extern tree make_range (tree, int *, tree *, tree *, bool *); > diff --git a/gcc/match.pd b/gcc/match.pd > index 75aa601..6655a3c 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see > zerop > CONSTANT_CLASS_P > tree_expr_nonnegative_p > + tree_expr_nonzero_p > integer_valued_real_p > integer_pow2p > HONOR_NANS) > @@ -894,7 +895,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > && tree_nop_conversion_p (type, TREE_TYPE (@1))) > (convert (bit_and (bit_not @1) @0)))) > > +/* Fold A * 10 == B * 10 into A == B. */ > +(for cmp (eq ne) > + (simplify > + (cmp (mult:c @0 @1) (mult:c @2 @1)) > + (if (INTEGRAL_TYPE_P (TREE_TYPE (@1)) > + && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0)) > + && tree_expr_nonzero_p (@1)) > + (cmp @0 @2)))) Rather than refer to an explicit constant (10), I'd write the comment as /* For integral types with undefined overflow and C != 0 fold x * C EQ/NE y * C into x EQ/NE y. */ We commonly use "C" to refer to an arbitrary constant in comments throughout GCC. I think my version is significantly clearer. > > +/* Fold A * 10 < B * 10 into A < B. */ I think we want to do a similar kind of fix to the comment here. Except you want to lay out the different transformations based on the value of the constant. So something like; /* For integral types with undefined overflow and C != 0 fold x * C RELOP y * C into: x RELOP y for nonnegative C y RELOP x for negative C */ > > /* ((X inner_op C0) outer_op C1) > With X being a tree where value_range has reasoned certain bits to always be > diff --git a/gcc/testsuite/gcc.dg/pr31096.c b/gcc/testsuite/gcc.dg/pr31096.c > new file mode 100644 > index 0000000..72446bc > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/pr31096.c > @@ -0,0 +1,41 @@ > +/* PR middle-end/31096 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > + > +int > +f (int a, int b) > +{ > + return a * 67 == b * 67; > +} > + > +int > +f1 (int a, int b) > +{ > + return a * -42 != b * -42; > +} > + > +int > +f2 (int a, int b) > +{ > + return a * 10 >= b * 10; > +} > + > +int > +f3 (int a, int b) > +{ > + return a * -4 < b * -4; > +} > + > +int > +f4 (unsigned int a, unsigned int b) > +{ > + return a * 10 == b * 10; > +} > + > +int > +f5 (unsigned int a, unsigned int b) > +{ > + return a * -42 < b * -42; > +} > + > +/* { dg-final { scan-tree-dump-times "\\(D\\) \\*" 4 "optimized" } } */ So the problem I see here is it's not obvious what your scanning for. Often just a comment can really help here. I would suggest tests when C is zero and verify this transformation doesn't fire on that case. I would suggest verifying that the operand orders change appropriately when dealing with a negative constant. You might want to verify nothing happens with floating point or vector types. If you wanted to be extra thorough you could iterate over the operators. ie, testing == and !=, then <, <=, >, >= It sounds a bit like overkill, but we've often found subtle cases where we wouldn't optimize one case when we expected it to be optimized. So overall, I think the transformations are fine and just need updated comments. The tests need a bit more work. Can you please update and resubmit -- I think this is pretty close to ready. Thanks for your patience, jeff I would suggest splitting this into multiple tests -- even if it's just cases you're optimizing vs cases you're not optimizing that would still be a significant improvement. >
diff --git a/gcc/fold-const.c b/gcc/fold-const.c index 0f4bf7e..5922dbd 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -9177,7 +9177,7 @@ tree_expr_nonzero_warnv_p (tree t, bool *strict_overflow_p) /* Return true when T is an address and is known to be nonzero. Handle warnings about undefined signed overflow. */ -static bool +bool tree_expr_nonzero_p (tree t) { bool ret, strict_overflow_p; diff --git a/gcc/fold-const.h b/gcc/fold-const.h index 02f4270..8579622 100644 --- a/gcc/fold-const.h +++ b/gcc/fold-const.h @@ -167,6 +167,7 @@ extern tree size_diffop_loc (location_t, tree, tree); #define non_lvalue(T) non_lvalue_loc (UNKNOWN_LOCATION, T) extern tree non_lvalue_loc (location_t, tree); +extern bool tree_expr_nonzero_p (tree); extern bool tree_expr_nonnegative_p (tree); extern bool tree_expr_nonnegative_warnv_p (tree, bool *, int = 0); extern tree make_range (tree, int *, tree *, tree *, bool *); diff --git a/gcc/match.pd b/gcc/match.pd index 75aa601..6655a3c 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see zerop CONSTANT_CLASS_P tree_expr_nonnegative_p + tree_expr_nonzero_p integer_valued_real_p integer_pow2p HONOR_NANS) @@ -894,7 +895,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && tree_nop_conversion_p (type, TREE_TYPE (@1))) (convert (bit_and (bit_not @1) @0)))) +/* Fold A * 10 == B * 10 into A == B. */ +(for cmp (eq ne) + (simplify + (cmp (mult:c @0 @1) (mult:c @2 @1)) + (if (INTEGRAL_TYPE_P (TREE_TYPE (@1)) + && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0)) + && tree_expr_nonzero_p (@1)) + (cmp @0 @2)))) +/* Fold A * 10 < B * 10 into A < B. */ +(for cmp (lt gt le ge) + (simplify + (cmp (mult:c @0 @1) (mult:c @2 @1)) + (if (INTEGRAL_TYPE_P (TREE_TYPE (@1)) + && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0)) + && tree_expr_nonzero_p (@1)) + (if (tree_expr_nonnegative_p (@1)) + (cmp @0 @2) + (if (TREE_CODE (@1) == INTEGER_CST + && wi::lt_p (@1, 0, TYPE_SIGN (TREE_TYPE (@1)))) + (cmp @2 @0)))))) /* ((X inner_op C0) outer_op C1) With X being a tree where value_range has reasoned certain bits to always be diff --git a/gcc/testsuite/gcc.dg/pr31096.c b/gcc/testsuite/gcc.dg/pr31096.c new file mode 100644 index 0000000..72446bc --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr31096.c @@ -0,0 +1,41 @@ +/* PR middle-end/31096 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int +f (int a, int b) +{ + return a * 67 == b * 67; +} + +int +f1 (int a, int b) +{ + return a * -42 != b * -42; +} + +int +f2 (int a, int b) +{ + return a * 10 >= b * 10; +} + +int +f3 (int a, int b) +{ + return a * -4 < b * -4; +} + +int +f4 (unsigned int a, unsigned int b) +{ + return a * 10 == b * 10; +} + +int +f5 (unsigned int a, unsigned int b) +{ + return a * -42 < b * -42; +} + +/* { dg-final { scan-tree-dump-times "\\(D\\) \\*" 4 "optimized" } } */