Message ID | 20230921024313.1941378-1-apinski@marvell.com |
---|---|
State | New |
Headers | show |
Series | MATCH: Simplify `(A ==/!= B) &/| (((cast)A) CMP C)` | expand |
On Thu, Sep 21, 2023 at 4:43 AM Andrew Pinski <apinski@marvell.com> wrote: > > This patch adds support to the pattern for `(A == B) &/| (A CMP C)` > where the second A could be casted to a different type. > Some were handled correctly if using seperate `if` statements > but not if combined with BIT_AND/BIT_IOR. > In the case of pr111456-1.c, the testcase would pass if > `--param=logical-op-non-short-circuit=0` was used but now > can be optimized always. > > OK? Bootstrapped and tested on x86_64-linux-gnu. OK. > PR tree-optimization/106164 > PR tree-optimization/111456 > > gcc/ChangeLog: > > * match.pd (`(A ==/!= B) & (A CMP C)`): > Support an optional cast on the second A. > (`(A ==/!= B) | (A CMP C)`): Likewise. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/cmpbit-6.c: New test. > * gcc.dg/tree-ssa/cmpbit-7.c: New test. > * gcc.dg/tree-ssa/pr111456-1.c: New test. > --- > gcc/match.pd | 76 +++++++++++++--------- > gcc/testsuite/gcc.dg/tree-ssa/cmpbit-6.c | 22 +++++++ > gcc/testsuite/gcc.dg/tree-ssa/cmpbit-7.c | 28 ++++++++ > gcc/testsuite/gcc.dg/tree-ssa/pr111456-1.c | 43 ++++++++++++ > 4 files changed, 139 insertions(+), 30 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cmpbit-6.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cmpbit-7.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr111456-1.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index a37af05f873..0bf91bde486 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -2973,7 +2973,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@1))) > (gt @0 (minus @1 { build_int_cst (TREE_TYPE (@1), 1); })))) > > -/* Convert (X == CST1) && (X OP2 CST2) to a known value > +/* Convert (X == CST1) && ((other)X OP2 CST2) to a known value > based on CST1 OP2 CST2. Similarly for (X != CST1). */ > /* Convert (X == Y) && (X OP2 Y) to a known value if X is an integral type. > Similarly for (X != Y). */ > @@ -2981,26 +2981,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (for code1 (eq ne) > (for code2 (eq ne lt gt le ge) > (simplify > - (bit_and:c (code1@3 @0 @1) (code2@4 @0 @2)) > + (bit_and:c (code1:c@3 @0 @1) (code2:c@4 (convert?@c0 @0) @2)) > (if ((TREE_CODE (@1) == INTEGER_CST > && TREE_CODE (@2) == INTEGER_CST) > || ((INTEGRAL_TYPE_P (TREE_TYPE (@1)) > || POINTER_TYPE_P (TREE_TYPE (@1))) > - && operand_equal_p (@1, @2))) > + && bitwise_equal_p (@1, @2))) > (with > { > bool one_before = false; > bool one_after = false; > int cmp = 0; > + bool allbits = true; > if (TREE_CODE (@1) == INTEGER_CST > && TREE_CODE (@2) == INTEGER_CST) > { > - cmp = tree_int_cst_compare (@1, @2); > + allbits = TYPE_PRECISION (TREE_TYPE (@1)) <= TYPE_PRECISION (TREE_TYPE (@2)); > + auto t1 = wi::to_wide (fold_convert (TREE_TYPE (@2), @1)); > + auto t2 = wi::to_wide (@2); > + cmp = wi::cmp (t1, t2, TYPE_SIGN (TREE_TYPE (@2))); > if (cmp < 0 > - && wi::to_wide (@1) == wi::to_wide (@2) - 1) > + && t1 == t2 - 1) > one_before = true; > if (cmp > 0 > - && wi::to_wide (@1) == wi::to_wide (@2) + 1) > + && t1 == t2 + 1) > one_after = true; > } > bool val; > @@ -3018,25 +3022,29 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (switch > (if (code1 == EQ_EXPR && val) @3) > (if (code1 == EQ_EXPR && !val) { constant_boolean_node (false, type); }) > - (if (code1 == NE_EXPR && !val) @4) > + (if (code1 == NE_EXPR && !val && allbits) @4) > (if (code1 == NE_EXPR > && code2 == GE_EXPR > - && cmp == 0) > - (gt @0 @1)) > + && cmp == 0 > + && allbits) > + (gt @c0 (convert @1))) > (if (code1 == NE_EXPR > && code2 == LE_EXPR > - && cmp == 0) > - (lt @0 @1)) > + && cmp == 0 > + && allbits) > + (lt @c0 (convert @1))) > /* (a != (b+1)) & (a > b) -> a > (b+1) */ > (if (code1 == NE_EXPR > && code2 == GT_EXPR > - && one_after) > - (gt @0 @1)) > + && one_after > + && allbits) > + (gt @c0 (convert @1))) > /* (a != (b-1)) & (a < b) -> a < (b-1) */ > (if (code1 == NE_EXPR > && code2 == LT_EXPR > - && one_before) > - (lt @0 @1)) > + && one_before > + && allbits) > + (lt @c0 (convert @1))) > ) > ) > ) > @@ -3100,26 +3108,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (for code1 (eq ne) > (for code2 (eq ne lt gt le ge) > (simplify > - (bit_ior:c (code1@3 @0 @1) (code2@4 @0 @2)) > + (bit_ior:c (code1:c@3 @0 @1) (code2:c@4 (convert?@c0 @0) @2)) > (if ((TREE_CODE (@1) == INTEGER_CST > && TREE_CODE (@2) == INTEGER_CST) > || ((INTEGRAL_TYPE_P (TREE_TYPE (@1)) > || POINTER_TYPE_P (TREE_TYPE (@1))) > - && operand_equal_p (@1, @2))) > + && bitwise_equal_p (@1, @2))) > (with > { > bool one_before = false; > bool one_after = false; > int cmp = 0; > + bool allbits = true; > if (TREE_CODE (@1) == INTEGER_CST > && TREE_CODE (@2) == INTEGER_CST) > { > - cmp = tree_int_cst_compare (@1, @2); > + allbits = TYPE_PRECISION (TREE_TYPE (@1)) <= TYPE_PRECISION (TREE_TYPE (@2)); > + auto t1 = wi::to_wide (fold_convert (TREE_TYPE (@2), @1)); > + auto t2 = wi::to_wide (@2); > + cmp = wi::cmp (t1, t2, TYPE_SIGN (TREE_TYPE (@2))); > if (cmp < 0 > - && wi::to_wide (@1) == wi::to_wide (@2) - 1) > + && t1 == t2 - 1) > one_before = true; > if (cmp > 0 > - && wi::to_wide (@1) == wi::to_wide (@2) + 1) > + && t1 == t2 + 1) > one_after = true; > } > bool val; > @@ -3136,26 +3148,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > } > (switch > (if (code1 == EQ_EXPR && val) @4) > - (if (code1 == NE_EXPR && val) { constant_boolean_node (true, type); }) > - (if (code1 == NE_EXPR && !val) @3) > + (if (code1 == NE_EXPR && val && allbits) { constant_boolean_node (true, type); }) > + (if (code1 == NE_EXPR && !val && allbits) @3) > (if (code1 == EQ_EXPR > && code2 == GT_EXPR > - && cmp == 0) > - (ge @0 @1)) > + && cmp == 0 > + && allbits) > + (ge @c0 @2)) > (if (code1 == EQ_EXPR > && code2 == LT_EXPR > - && cmp == 0) > - (le @0 @1)) > + && cmp == 0 > + && allbits) > + (le @c0 @2)) > /* (a == (b-1)) | (a >= b) -> a >= (b-1) */ > (if (code1 == EQ_EXPR > && code2 == GE_EXPR > - && one_before) > - (ge @0 @1)) > + && one_before > + && allbits) > + (ge @c0 (convert @1))) > /* (a == (b+1)) | (a <= b) -> a <= (b-1) */ > (if (code1 == EQ_EXPR > && code2 == LE_EXPR > - && one_after) > - (le @0 @1)) > + && one_after > + && allbits) > + (le @c0 (convert @1))) > ) > ) > ) > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-6.c b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-6.c > new file mode 100644 > index 00000000000..4ec73f030bb > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-6.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > +/* PR tree-optimization/106164 */ > +/* PR tree-optimization/111456 */ > + > +_Bool iu(int a) > +{ > + _Bool t = a == 0; > + unsigned t1 = a; > + _Bool t2 = t1 >= 3; > + return t & t2; > +} > + > +_Bool is(int a) > +{ > + _Bool t = a == 0; > + short t1 = a; > + _Bool t2 = t1 >= 3; > + return t & t2; > +} > + > +/* { dg-final { scan-tree-dump-times "return 0" 2 "optimized" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-7.c b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-7.c > new file mode 100644 > index 00000000000..ee04ebbaaed > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-7.c > @@ -0,0 +1,28 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-forwprop1" } */ > +/* PR tree-optimization/106164 */ > +/* PR tree-optimization/111456 */ > + > +_Bool f(int a) > +{ > + _Bool t = a == 3; > + unsigned t1 = a; > + _Bool t2 = t1 >= 3; > + return t | t2; > +} > + > +/* Should be able to optimize down to just `a > 2` during forwprop1 */ > +/* { dg-final { scan-tree-dump-not "a_\[0-9\]+.D. == 3" "forwprop1" } } */ > + > +_Bool f1(int b) > +{ > + _Bool t = b == 3; > + short t1 = b; > + _Bool t2 = t1 >= 3; > + return t | t2; > +} > + > +/* Should be able to optimize down to just `a > 2` during forwprop1 as `((short)a) >= 3` is > + true already when `a == 3`. */ > +/* { dg-final { scan-tree-dump-not "b_\[0-9\]+.D. == 3" "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-times "_\[0-9\]+ > 2" 2 "forwprop1" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr111456-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr111456-1.c > new file mode 100644 > index 00000000000..8a2f730b387 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr111456-1.c > @@ -0,0 +1,43 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > +/* PR tree-optimization/111456 */ > + > +void foo(void); > +static int i; > +static int *j = &i; > +static char l; > +static void(a)(char) {} > +static short(b)(short c, short d) { return c - d; } > +static short(e)(short f, int g) { > + return f < 0 || g < 0 || g >= 32 ? f : f >> g; > +} > +static short(h)(short f, int g) { return g >= 2 ?: f >> g; } > +static char k(char m, short n) { > + short o; > + int *p = &i; > + if (!(((m) >= 1) && ((m) <= 1))) { > + __builtin_unreachable(); > + } > + o = e(i, i); > + if (h(1, o)) > + ; > + else { > + m = 0; > + for (; m >= -20; m = b(m, 9)) > + if (a(i), n) { > + if (*p) > + ; > + else > + foo(); > + ; > + } else > + return l; > + } > + return i; > +} > +int main() { k(0 <= 0 > *j, i); } > + > + > +/* { dg-final { scan-tree-dump-not "foo " "optimized" } } */ > +/* { dg-final { scan-tree-dump "return 0;" "optimized" } } */ > + > -- > 2.31.1 >
diff --git a/gcc/match.pd b/gcc/match.pd index a37af05f873..0bf91bde486 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -2973,7 +2973,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@1))) (gt @0 (minus @1 { build_int_cst (TREE_TYPE (@1), 1); })))) -/* Convert (X == CST1) && (X OP2 CST2) to a known value +/* Convert (X == CST1) && ((other)X OP2 CST2) to a known value based on CST1 OP2 CST2. Similarly for (X != CST1). */ /* Convert (X == Y) && (X OP2 Y) to a known value if X is an integral type. Similarly for (X != Y). */ @@ -2981,26 +2981,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (for code1 (eq ne) (for code2 (eq ne lt gt le ge) (simplify - (bit_and:c (code1@3 @0 @1) (code2@4 @0 @2)) + (bit_and:c (code1:c@3 @0 @1) (code2:c@4 (convert?@c0 @0) @2)) (if ((TREE_CODE (@1) == INTEGER_CST && TREE_CODE (@2) == INTEGER_CST) || ((INTEGRAL_TYPE_P (TREE_TYPE (@1)) || POINTER_TYPE_P (TREE_TYPE (@1))) - && operand_equal_p (@1, @2))) + && bitwise_equal_p (@1, @2))) (with { bool one_before = false; bool one_after = false; int cmp = 0; + bool allbits = true; if (TREE_CODE (@1) == INTEGER_CST && TREE_CODE (@2) == INTEGER_CST) { - cmp = tree_int_cst_compare (@1, @2); + allbits = TYPE_PRECISION (TREE_TYPE (@1)) <= TYPE_PRECISION (TREE_TYPE (@2)); + auto t1 = wi::to_wide (fold_convert (TREE_TYPE (@2), @1)); + auto t2 = wi::to_wide (@2); + cmp = wi::cmp (t1, t2, TYPE_SIGN (TREE_TYPE (@2))); if (cmp < 0 - && wi::to_wide (@1) == wi::to_wide (@2) - 1) + && t1 == t2 - 1) one_before = true; if (cmp > 0 - && wi::to_wide (@1) == wi::to_wide (@2) + 1) + && t1 == t2 + 1) one_after = true; } bool val; @@ -3018,25 +3022,29 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (switch (if (code1 == EQ_EXPR && val) @3) (if (code1 == EQ_EXPR && !val) { constant_boolean_node (false, type); }) - (if (code1 == NE_EXPR && !val) @4) + (if (code1 == NE_EXPR && !val && allbits) @4) (if (code1 == NE_EXPR && code2 == GE_EXPR - && cmp == 0) - (gt @0 @1)) + && cmp == 0 + && allbits) + (gt @c0 (convert @1))) (if (code1 == NE_EXPR && code2 == LE_EXPR - && cmp == 0) - (lt @0 @1)) + && cmp == 0 + && allbits) + (lt @c0 (convert @1))) /* (a != (b+1)) & (a > b) -> a > (b+1) */ (if (code1 == NE_EXPR && code2 == GT_EXPR - && one_after) - (gt @0 @1)) + && one_after + && allbits) + (gt @c0 (convert @1))) /* (a != (b-1)) & (a < b) -> a < (b-1) */ (if (code1 == NE_EXPR && code2 == LT_EXPR - && one_before) - (lt @0 @1)) + && one_before + && allbits) + (lt @c0 (convert @1))) ) ) ) @@ -3100,26 +3108,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (for code1 (eq ne) (for code2 (eq ne lt gt le ge) (simplify - (bit_ior:c (code1@3 @0 @1) (code2@4 @0 @2)) + (bit_ior:c (code1:c@3 @0 @1) (code2:c@4 (convert?@c0 @0) @2)) (if ((TREE_CODE (@1) == INTEGER_CST && TREE_CODE (@2) == INTEGER_CST) || ((INTEGRAL_TYPE_P (TREE_TYPE (@1)) || POINTER_TYPE_P (TREE_TYPE (@1))) - && operand_equal_p (@1, @2))) + && bitwise_equal_p (@1, @2))) (with { bool one_before = false; bool one_after = false; int cmp = 0; + bool allbits = true; if (TREE_CODE (@1) == INTEGER_CST && TREE_CODE (@2) == INTEGER_CST) { - cmp = tree_int_cst_compare (@1, @2); + allbits = TYPE_PRECISION (TREE_TYPE (@1)) <= TYPE_PRECISION (TREE_TYPE (@2)); + auto t1 = wi::to_wide (fold_convert (TREE_TYPE (@2), @1)); + auto t2 = wi::to_wide (@2); + cmp = wi::cmp (t1, t2, TYPE_SIGN (TREE_TYPE (@2))); if (cmp < 0 - && wi::to_wide (@1) == wi::to_wide (@2) - 1) + && t1 == t2 - 1) one_before = true; if (cmp > 0 - && wi::to_wide (@1) == wi::to_wide (@2) + 1) + && t1 == t2 + 1) one_after = true; } bool val; @@ -3136,26 +3148,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) } (switch (if (code1 == EQ_EXPR && val) @4) - (if (code1 == NE_EXPR && val) { constant_boolean_node (true, type); }) - (if (code1 == NE_EXPR && !val) @3) + (if (code1 == NE_EXPR && val && allbits) { constant_boolean_node (true, type); }) + (if (code1 == NE_EXPR && !val && allbits) @3) (if (code1 == EQ_EXPR && code2 == GT_EXPR - && cmp == 0) - (ge @0 @1)) + && cmp == 0 + && allbits) + (ge @c0 @2)) (if (code1 == EQ_EXPR && code2 == LT_EXPR - && cmp == 0) - (le @0 @1)) + && cmp == 0 + && allbits) + (le @c0 @2)) /* (a == (b-1)) | (a >= b) -> a >= (b-1) */ (if (code1 == EQ_EXPR && code2 == GE_EXPR - && one_before) - (ge @0 @1)) + && one_before + && allbits) + (ge @c0 (convert @1))) /* (a == (b+1)) | (a <= b) -> a <= (b-1) */ (if (code1 == EQ_EXPR && code2 == LE_EXPR - && one_after) - (le @0 @1)) + && one_after + && allbits) + (le @c0 (convert @1))) ) ) ) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-6.c b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-6.c new file mode 100644 index 00000000000..4ec73f030bb --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-6.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* PR tree-optimization/106164 */ +/* PR tree-optimization/111456 */ + +_Bool iu(int a) +{ + _Bool t = a == 0; + unsigned t1 = a; + _Bool t2 = t1 >= 3; + return t & t2; +} + +_Bool is(int a) +{ + _Bool t = a == 0; + short t1 = a; + _Bool t2 = t1 >= 3; + return t & t2; +} + +/* { dg-final { scan-tree-dump-times "return 0" 2 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-7.c b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-7.c new file mode 100644 index 00000000000..ee04ebbaaed --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-7.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-forwprop1" } */ +/* PR tree-optimization/106164 */ +/* PR tree-optimization/111456 */ + +_Bool f(int a) +{ + _Bool t = a == 3; + unsigned t1 = a; + _Bool t2 = t1 >= 3; + return t | t2; +} + +/* Should be able to optimize down to just `a > 2` during forwprop1 */ +/* { dg-final { scan-tree-dump-not "a_\[0-9\]+.D. == 3" "forwprop1" } } */ + +_Bool f1(int b) +{ + _Bool t = b == 3; + short t1 = b; + _Bool t2 = t1 >= 3; + return t | t2; +} + +/* Should be able to optimize down to just `a > 2` during forwprop1 as `((short)a) >= 3` is + true already when `a == 3`. */ +/* { dg-final { scan-tree-dump-not "b_\[0-9\]+.D. == 3" "forwprop1" } } */ +/* { dg-final { scan-tree-dump-times "_\[0-9\]+ > 2" 2 "forwprop1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr111456-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr111456-1.c new file mode 100644 index 00000000000..8a2f730b387 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr111456-1.c @@ -0,0 +1,43 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* PR tree-optimization/111456 */ + +void foo(void); +static int i; +static int *j = &i; +static char l; +static void(a)(char) {} +static short(b)(short c, short d) { return c - d; } +static short(e)(short f, int g) { + return f < 0 || g < 0 || g >= 32 ? f : f >> g; +} +static short(h)(short f, int g) { return g >= 2 ?: f >> g; } +static char k(char m, short n) { + short o; + int *p = &i; + if (!(((m) >= 1) && ((m) <= 1))) { + __builtin_unreachable(); + } + o = e(i, i); + if (h(1, o)) + ; + else { + m = 0; + for (; m >= -20; m = b(m, 9)) + if (a(i), n) { + if (*p) + ; + else + foo(); + ; + } else + return l; + } + return i; +} +int main() { k(0 <= 0 > *j, i); } + + +/* { dg-final { scan-tree-dump-not "foo " "optimized" } } */ +/* { dg-final { scan-tree-dump "return 0;" "optimized" } } */ +