Message ID | 20230712160802.998150-1-apinski@marvell.com |
---|---|
State | New |
Headers | show |
Series | Fix part of PR 110293: `A NEEQ (A NEEQ CST)` part | expand |
On Wed, Jul 12, 2023 at 6:09 PM Andrew Pinski via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > This fixes part of PR 110293, for the outer comparison case > being `!=` or `==`. In turn PR 110539 is able to be optimized > again as the if statement for `(a&1) == ((a & 1) != 0)` gets optimized > to `false` early enough to allow FRE/DOM to do a CSE for memory store/load. > > OK? Bootstrapped and tested on x86_64-linux with no regressions. OK. Thanks, Richard. > gcc/ChangeLog: > > PR tree-optimization/110293 > PR tree-optimization/110539 > * match.pd: Expand the `x != (typeof x)(x == 0)` > pattern to handle where the inner and outer comparsions > are either `!=` or `==` and handle other constants > than 0. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/pr110293-1.c: New test. > * gcc.dg/tree-ssa/pr110539-1.c: New test. > * gcc.dg/tree-ssa/pr110539-2.c: New test. > * gcc.dg/tree-ssa/pr110539-3.c: New test. > * gcc.dg/tree-ssa/pr110539-4.c: New test. > --- > gcc/match.pd | 39 ++++++++-- > gcc/testsuite/gcc.dg/tree-ssa/pr110293-1.c | 58 +++++++++++++++ > gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c | 12 ++++ > gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c | 12 ++++ > gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c | 75 ++++++++++++++++++++ > gcc/testsuite/gcc.dg/tree-ssa/pr110539-4.c | 82 ++++++++++++++++++++++ > 6 files changed, 274 insertions(+), 4 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110293-1.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110539-4.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index 8543f777a28..351d9285e92 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -6429,10 +6429,41 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (if (TYPE_UNSIGNED (TREE_TYPE (@0))) > { constant_boolean_node (false, type); })) > > -/* x != (typeof x)(x == 0) is always true. */ > -(simplify > - (ne:c @0 (convert (eq @0 integer_zerop))) > - { constant_boolean_node (true, type); }) > +/* x != (typeof x)(x == CST) -> CST == 0 ? 1 : (CST == 1 ? (x!=0&&x!=1) : x != 0) */ > +/* x != (typeof x)(x != CST) -> CST == 1 ? 1 : (CST == 0 ? (x!=0&&x!=1) : x != 1) */ > +/* x == (typeof x)(x == CST) -> CST == 0 ? 0 : (CST == 1 ? (x==0||x==1) : x != 0) */ > +/* x == (typeof x)(x != CST) -> CST == 1 ? 0 : (CST == 0 ? (x==0||x==1) : x != 1) */ > +(for outer (ne eq) > + (for inner (ne eq) > + (simplify > + (outer:c @0 (convert (inner @0 INTEGER_CST@1))) > + (with { > + bool cst1 = integer_onep (@1); > + bool cst0 = integer_zerop (@1); > + bool innereq = inner == EQ_EXPR; > + bool outereq = outer == EQ_EXPR; > + } > + (switch > + (if (innereq ? cst0 : cst1) > + { constant_boolean_node (!outereq, type); }) > + (if (innereq ? cst1 : cst0) > + (with { > + tree utype = unsigned_type_for (TREE_TYPE (@0)); > + tree ucst1 = build_one_cst (utype); > + } > + (if (!outereq) > + (gt (convert:utype @0) { ucst1; }) > + (le (convert:utype @0) { ucst1; }) > + ) > + ) > + ) > + (if (innereq) > + (ne @0 { build_zero_cst (TREE_TYPE (@0)); })) > + (ne @0 { build_one_cst (TREE_TYPE (@0)); })) > + ) > + ) > + ) > +) > > (for cmp (unordered ordered unlt unle ungt unge uneq ltgt) > /* If the second operand is NaN, the result is constant. */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110293-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110293-1.c > new file mode 100644 > index 00000000000..24aea1a2d03 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110293-1.c > @@ -0,0 +1,58 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fdump-tree-optimized-raw" } */ > + > +_Bool eqeq0(unsigned x) > +{ > + return x == (x == 0); > +} > +_Bool eqeq1(unsigned x) > +{ > + return x == (x == 1); > +} > +_Bool eqeq2(unsigned x) > +{ > + return x == (x == 2); > +} > + > +_Bool neeq0(unsigned x) > +{ > + return x != (x == 0); > +} > +_Bool neeq1(unsigned x) > +{ > + return x != (x == 1); > +} > +_Bool neeq2(unsigned x) > +{ > + return x != (x == 2); > +} > + > +_Bool eqne0(unsigned x) > +{ > + return x == (x != 0); > +} > +_Bool eqne1(unsigned x) > +{ > + return x == (x != 1); > +} > +_Bool eqne2(unsigned x) > +{ > + return x == (x != 2); > +} > + > +_Bool nene0(unsigned x) > +{ > + return x != (x != 0); > +} > +_Bool nene1(unsigned x) > +{ > + return x != (x != 1); > +} > +_Bool nene2(unsigned x) > +{ > + return x != (x != 2); > +} > + > +/* All of these functions should have removed the inner most comparison which > + means all of the conversions from bool to unsigned should have been removed too. */ > +/* { dg-final { scan-tree-dump-not "nop_expr," "optimized"} } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c > new file mode 100644 > index 00000000000..6ba864cdd13 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fdump-tree-optimized" } */ > +int f(int a) > +{ > + int b = a & 1; > + int c = b != 0; > + return c == b; > +} > + > +/* This should be optimized to just return 1; */ > +/* { dg-final { scan-tree-dump-not " == " "optimized"} } */ > +/* { dg-final { scan-tree-dump "return 1;" "optimized"} } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c > new file mode 100644 > index 00000000000..17874d349ef > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fdump-tree-optimized" } */ > +int f(int a) > +{ > + int b = a & 1; > + int c = b == 0; > + return c == b; > +} > + > +/* This should be optimized to just return 0; */ > +/* { dg-final { scan-tree-dump-not " == " "optimized"} } */ > +/* { dg-final { scan-tree-dump "return 0;" "optimized"} } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c > new file mode 100644 > index 00000000000..e2bd4dfb45d > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c > @@ -0,0 +1,75 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > + > +void foo(void); > +static int a, c = 1; > +static short b; > +static int *d = &c, *e = &a; > +static int **f = &d; > +void __assert_fail() __attribute__((__noreturn__)); > +static void g(short h) { > + if (*d) > + ; > + else { > + if (e) __assert_fail(); > + if (a) { > + __builtin_unreachable(); > + } else > + __assert_fail(); > + } > + if ((((0, 0) || h) == h) + b) *f = 0; > +} > +int main() { > + int i = 0 != 10 & a; > + g(i); > + *e = 9; > + e = 0; > + if (d == 0) > + ; > + else > + foo(); > + ; > +} > +/* The call to foo should be optimized away. */ > +/* The missed optimization at -O2 here was: > + int b = a & 1; > + int c = b != 0; > + int d = c == b; > + not being optimized to 1 early enough, it is done in vrp2 but > + that is too late. > + In phiopt2 we got: > + _17 = i_7 != 0; > + _12 = (int) _17; > + if (i_7 == _12) > + goto <bb 9>; [50.00%] > + else > + goto <bb 10>; [50.00%] > + > + <bb 9> [local count: 268435456]: > + d = 0B; > + > + <bb 10> [local count: 536870913]: > + e.1_3 = e; > + *e.1_3 = 9; > + e = 0B; > + d.2_4 = d; > + if (d.2_4 == 0B) > + > + The first if is not optimized before, until vrp2 which is > + too late as there are no passes which will then find the > + load of d in `d.2_4 = d;` was `0B` after vrp2. > + > + Now in forwprop3 (after phiopt2), we optimize: > + _17 = i_7 != 0; > + _12 = (int) _17; > + if (i_7 == _12) > + into just: > + _t = (unsigned)i_7; > + if (_t <= 1) > + > + And then during ccp3, that is optimized away and that is optimized > + early enough now that the load `d.2_4 = d;` is optimizd to just > + `d.2_4 = 0B;` > + */ > + > +/* { dg-final { scan-tree-dump-not "foo \\(\\)" "optimized"} } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110539-4.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-4.c > new file mode 100644 > index 00000000000..2c03dcc87fa > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-4.c > @@ -0,0 +1,82 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fstrict-aliasing -fdump-tree-optimized" } */ > + > +/* This is a small variant of pr110539-3.c using -O1 -fstrict-aliasing > + rather than -O2. Just to show VRP and PRE is not needed to optimize > + the call to foo away. */ > + > + > +void foo(void); > +static int a, c = 1; > +static short b; > +static int *d = &c, *e = &a; > +static int **f = &d; > +void __assert_fail() __attribute__((__noreturn__)); > +static void g(int h) { > + if (*d) > + ; > + else { > + if (e) __assert_fail(); > + if (a) { > + __builtin_unreachable(); > + } else > + __assert_fail(); > + } > + if (((h!=0) == h) + b) *f = 0; > +} > + > +int main() { > + int i = 0 != 10 & a; > + g(i); > + *e = 9; > + e = 0; > + if (d == 0) > + ; > + else > + foo(); > + ; > +} > + > +/* The call to foo should be optimized away. */ > +/* The missed optimization at -O1 here was: > + int b = a & 1; > + int c = b != 0; > + int d = c == b; > + not being optimized to 1 early enough, it is done in vrp2 but > + that is too late. > + In phiopt2 we got: > + _17 = i_7 != 0; > + _12 = (int) _17; > + if (i_7 == _12) > + goto <bb 9>; [50.00%] > + else > + goto <bb 10>; [50.00%] > + > + <bb 9> [local count: 268435456]: > + d = 0B; > + > + <bb 10> [local count: 536870913]: > + e.1_3 = e; > + *e.1_3 = 9; > + e = 0B; > + d.2_4 = d; > + if (d.2_4 == 0B) > + > + The first if is not optimized before, until vrp2 which is > + too late as there are no passes which will then find the > + load of d in `d.2_4 = d;` was `0B` after vrp2. > + > + Now in forwprop3 (after phiopt2), we optimize: > + _17 = i_7 != 0; > + _12 = (int) _17; > + if (i_7 == _12) > + into just: > + _t = (unsigned)i_7; > + if (_t <= 1) > + > + And then during ccp3, that is optimized away and that is optimized > + early enough now that the load `d.2_4 = d;` is optimizd to just > + `d.2_4 = 0B;` > + */ > + > +/* { dg-final { scan-tree-dump-not "foo \\(\\)" "optimized"} } */ > -- > 2.31.1 >
diff --git a/gcc/match.pd b/gcc/match.pd index 8543f777a28..351d9285e92 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -6429,10 +6429,41 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (TYPE_UNSIGNED (TREE_TYPE (@0))) { constant_boolean_node (false, type); })) -/* x != (typeof x)(x == 0) is always true. */ -(simplify - (ne:c @0 (convert (eq @0 integer_zerop))) - { constant_boolean_node (true, type); }) +/* x != (typeof x)(x == CST) -> CST == 0 ? 1 : (CST == 1 ? (x!=0&&x!=1) : x != 0) */ +/* x != (typeof x)(x != CST) -> CST == 1 ? 1 : (CST == 0 ? (x!=0&&x!=1) : x != 1) */ +/* x == (typeof x)(x == CST) -> CST == 0 ? 0 : (CST == 1 ? (x==0||x==1) : x != 0) */ +/* x == (typeof x)(x != CST) -> CST == 1 ? 0 : (CST == 0 ? (x==0||x==1) : x != 1) */ +(for outer (ne eq) + (for inner (ne eq) + (simplify + (outer:c @0 (convert (inner @0 INTEGER_CST@1))) + (with { + bool cst1 = integer_onep (@1); + bool cst0 = integer_zerop (@1); + bool innereq = inner == EQ_EXPR; + bool outereq = outer == EQ_EXPR; + } + (switch + (if (innereq ? cst0 : cst1) + { constant_boolean_node (!outereq, type); }) + (if (innereq ? cst1 : cst0) + (with { + tree utype = unsigned_type_for (TREE_TYPE (@0)); + tree ucst1 = build_one_cst (utype); + } + (if (!outereq) + (gt (convert:utype @0) { ucst1; }) + (le (convert:utype @0) { ucst1; }) + ) + ) + ) + (if (innereq) + (ne @0 { build_zero_cst (TREE_TYPE (@0)); })) + (ne @0 { build_one_cst (TREE_TYPE (@0)); })) + ) + ) + ) +) (for cmp (unordered ordered unlt unle ungt unge uneq ltgt) /* If the second operand is NaN, the result is constant. */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110293-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110293-1.c new file mode 100644 index 00000000000..24aea1a2d03 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110293-1.c @@ -0,0 +1,58 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-optimized-raw" } */ + +_Bool eqeq0(unsigned x) +{ + return x == (x == 0); +} +_Bool eqeq1(unsigned x) +{ + return x == (x == 1); +} +_Bool eqeq2(unsigned x) +{ + return x == (x == 2); +} + +_Bool neeq0(unsigned x) +{ + return x != (x == 0); +} +_Bool neeq1(unsigned x) +{ + return x != (x == 1); +} +_Bool neeq2(unsigned x) +{ + return x != (x == 2); +} + +_Bool eqne0(unsigned x) +{ + return x == (x != 0); +} +_Bool eqne1(unsigned x) +{ + return x == (x != 1); +} +_Bool eqne2(unsigned x) +{ + return x == (x != 2); +} + +_Bool nene0(unsigned x) +{ + return x != (x != 0); +} +_Bool nene1(unsigned x) +{ + return x != (x != 1); +} +_Bool nene2(unsigned x) +{ + return x != (x != 2); +} + +/* All of these functions should have removed the inner most comparison which + means all of the conversions from bool to unsigned should have been removed too. */ +/* { dg-final { scan-tree-dump-not "nop_expr," "optimized"} } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c new file mode 100644 index 00000000000..6ba864cdd13 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-optimized" } */ +int f(int a) +{ + int b = a & 1; + int c = b != 0; + return c == b; +} + +/* This should be optimized to just return 1; */ +/* { dg-final { scan-tree-dump-not " == " "optimized"} } */ +/* { dg-final { scan-tree-dump "return 1;" "optimized"} } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c new file mode 100644 index 00000000000..17874d349ef --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-optimized" } */ +int f(int a) +{ + int b = a & 1; + int c = b == 0; + return c == b; +} + +/* This should be optimized to just return 0; */ +/* { dg-final { scan-tree-dump-not " == " "optimized"} } */ +/* { dg-final { scan-tree-dump "return 0;" "optimized"} } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c new file mode 100644 index 00000000000..e2bd4dfb45d --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c @@ -0,0 +1,75 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +void foo(void); +static int a, c = 1; +static short b; +static int *d = &c, *e = &a; +static int **f = &d; +void __assert_fail() __attribute__((__noreturn__)); +static void g(short h) { + if (*d) + ; + else { + if (e) __assert_fail(); + if (a) { + __builtin_unreachable(); + } else + __assert_fail(); + } + if ((((0, 0) || h) == h) + b) *f = 0; +} +int main() { + int i = 0 != 10 & a; + g(i); + *e = 9; + e = 0; + if (d == 0) + ; + else + foo(); + ; +} +/* The call to foo should be optimized away. */ +/* The missed optimization at -O2 here was: + int b = a & 1; + int c = b != 0; + int d = c == b; + not being optimized to 1 early enough, it is done in vrp2 but + that is too late. + In phiopt2 we got: + _17 = i_7 != 0; + _12 = (int) _17; + if (i_7 == _12) + goto <bb 9>; [50.00%] + else + goto <bb 10>; [50.00%] + + <bb 9> [local count: 268435456]: + d = 0B; + + <bb 10> [local count: 536870913]: + e.1_3 = e; + *e.1_3 = 9; + e = 0B; + d.2_4 = d; + if (d.2_4 == 0B) + + The first if is not optimized before, until vrp2 which is + too late as there are no passes which will then find the + load of d in `d.2_4 = d;` was `0B` after vrp2. + + Now in forwprop3 (after phiopt2), we optimize: + _17 = i_7 != 0; + _12 = (int) _17; + if (i_7 == _12) + into just: + _t = (unsigned)i_7; + if (_t <= 1) + + And then during ccp3, that is optimized away and that is optimized + early enough now that the load `d.2_4 = d;` is optimizd to just + `d.2_4 = 0B;` + */ + +/* { dg-final { scan-tree-dump-not "foo \\(\\)" "optimized"} } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110539-4.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-4.c new file mode 100644 index 00000000000..2c03dcc87fa --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-4.c @@ -0,0 +1,82 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fstrict-aliasing -fdump-tree-optimized" } */ + +/* This is a small variant of pr110539-3.c using -O1 -fstrict-aliasing + rather than -O2. Just to show VRP and PRE is not needed to optimize + the call to foo away. */ + + +void foo(void); +static int a, c = 1; +static short b; +static int *d = &c, *e = &a; +static int **f = &d; +void __assert_fail() __attribute__((__noreturn__)); +static void g(int h) { + if (*d) + ; + else { + if (e) __assert_fail(); + if (a) { + __builtin_unreachable(); + } else + __assert_fail(); + } + if (((h!=0) == h) + b) *f = 0; +} + +int main() { + int i = 0 != 10 & a; + g(i); + *e = 9; + e = 0; + if (d == 0) + ; + else + foo(); + ; +} + +/* The call to foo should be optimized away. */ +/* The missed optimization at -O1 here was: + int b = a & 1; + int c = b != 0; + int d = c == b; + not being optimized to 1 early enough, it is done in vrp2 but + that is too late. + In phiopt2 we got: + _17 = i_7 != 0; + _12 = (int) _17; + if (i_7 == _12) + goto <bb 9>; [50.00%] + else + goto <bb 10>; [50.00%] + + <bb 9> [local count: 268435456]: + d = 0B; + + <bb 10> [local count: 536870913]: + e.1_3 = e; + *e.1_3 = 9; + e = 0B; + d.2_4 = d; + if (d.2_4 == 0B) + + The first if is not optimized before, until vrp2 which is + too late as there are no passes which will then find the + load of d in `d.2_4 = d;` was `0B` after vrp2. + + Now in forwprop3 (after phiopt2), we optimize: + _17 = i_7 != 0; + _12 = (int) _17; + if (i_7 == _12) + into just: + _t = (unsigned)i_7; + if (_t <= 1) + + And then during ccp3, that is optimized away and that is optimized + early enough now that the load `d.2_4 = d;` is optimizd to just + `d.2_4 = 0B;` + */ + +/* { dg-final { scan-tree-dump-not "foo \\(\\)" "optimized"} } */