diff mbox series

Match: optimize `a == CST & unary(a)` [PR111487]

Message ID 20240513152438.4132420-1-quic_apinski@quicinc.com
State New
Headers show
Series Match: optimize `a == CST & unary(a)` [PR111487] | expand

Commit Message

Andrew Pinski May 13, 2024, 3:24 p.m. UTC
This is an expansion of the optimize `a == CST & a`
to handle more than just casts. It adds optimization
for unary<a>.
The patch for binary operators will come later.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

	PR tree-optimization/111487
gcc/ChangeLog:

	* match.pd (tcc_int_unary): New operator list.
	(`a == CST & unary(a)`): New pattern.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/and-unary-1.c: New test.

Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
---
 gcc/match.pd                                | 12 ++++
 gcc/testsuite/gcc.dg/tree-ssa/and-unary-1.c | 61 +++++++++++++++++++++
 2 files changed, 73 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/and-unary-1.c

Comments

Richard Biener May 28, 2024, 8:31 a.m. UTC | #1
On Mon, May 13, 2024 at 5:25 PM Andrew Pinski <quic_apinski@quicinc.com> wrote:
>
> This is an expansion of the optimize `a == CST & a`
> to handle more than just casts. It adds optimization
> for unary<a>.
> The patch for binary operators will come later.
>
> Bootstrapped and tested on x86_64-linux-gnu with no regressions.
>
>         PR tree-optimization/111487
> gcc/ChangeLog:
>
>         * match.pd (tcc_int_unary): New operator list.
>         (`a == CST & unary(a)`): New pattern.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/tree-ssa/and-unary-1.c: New test.
>
> Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
> ---
>  gcc/match.pd                                | 12 ++++
>  gcc/testsuite/gcc.dg/tree-ssa/and-unary-1.c | 61 +++++++++++++++++++++
>  2 files changed, 73 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/and-unary-1.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 07e743ae464..3ee28a3d8fc 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -57,6 +57,10 @@ along with GCC; see the file COPYING3.  If not see
>
>  #include "cfn-operators.pd"
>
> +/* integer unary operators that return the same type. */
> +(define_operator_list tcc_int_unary
> + abs absu negate bit_not BSWAP POPCOUNT CTZ CLZ PARITY)
> +

FFS and CLRSB (what's that...) missing at least.  CTZ and friends do
not return the same type as the argument (nor does absu).  So the
comment needs some work and the tcc_int_unary is a bad name unless
the comments means "Unary operators with integer operand and result type"?

>  /* Define operand lists for math rounding functions {,i,l,ll}FN,
>     where the versions prefixed with "i" return an int, those prefixed with
>     "l" return a long and those prefixed with "ll" return a long long.
> @@ -5451,6 +5455,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>    @2
>    { build_zero_cst (type); }))
>
> +/* `(a == CST) & unary(a)` can be simplified to `(a == CST) & unary(CST)`. */
> +(simplify
> + (bit_and:c (convert@2 (eq @0 INTEGER_CST@1))
> +            (convert? (tcc_int_unary @3)))
> + (if (bitwise_equal_p (@0, @3))

I know you like it - but the testcase doesn't seem to exercise
bitwise_equal_p here?
(I don't like it too much - it makes matching expensive)

OK with matching @0 to @3 or some arguing from your side (and maybe
testsuite coverage).

> +  (with { tree  inner_type = TREE_TYPE (@3); }
> +   (bit_and @2 (convert (tcc_int_unary (convert:inner_type @1)))))))

So I suppose the bitwise_equal_p "hides" the check against
truncation/bogus extension of @1?

For (a == INT_MIN) & -a we end up constant folding -INT_MIN which
invokes UB and thus might not actually fold.  As & is not short-circuiting
it's probably "safe" in some sense, but should we worry here?  I was
thinking of doing (tcc_int_unary! (convert:inner_type! @1)) as we expect
to constant fold.

> +
>  /* Optimize
>     # x_5 in range [cst1, cst2] where cst2 = cst1 + 1
>     x_5 == cstN ? cst4 : cst3
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/and-unary-1.c b/gcc/testsuite/gcc.dg/tree-ssa/and-unary-1.c
> new file mode 100644
> index 00000000000..c157bc11b00
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/and-unary-1.c
> @@ -0,0 +1,61 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fdump-tree-forwprop1-raw -fdump-tree-optimized-raw" } */
> +/* unary part of PR tree-optimization/111487 */
> +
> +int abs1(int a)
> +{
> +  int b = __builtin_abs(a);
> +  return (a == 1) & b;
> +}
> +int absu1(int a)
> +{
> +  int b;
> +  b = a > 0 ? -a:a;
> +  b = -b;
> +        return (a == 1) & b;
> +}
> +
> +int bswap1(int a)
> +{
> +  int b = __builtin_bswap32(a);
> +  return (a == 1) & b;
> +}
> +
> +int ctz1(int a)
> +{
> +  int b = __builtin_ctz(a);
> +  return (a == 1) & b;
> +}
> +int pop1(int a)
> +{
> +  int b = __builtin_popcount(a);
> +  return (a == 1) & b;
> +}
> +int neg1(int a)
> +{
> +  int b = -(a);
> +  return (a == 1) & b;
> +}
> +int not1(int a)
> +{
> +  int b = ~(a);
> +  return (a == 1) & b;
> +}
> +int partity1(int a)
> +{
> +  int b = __builtin_parity(a);
> +  return (a == 1) & b;
> +}
> +
> +
> +/* We should optimize out the unary operator for each.
> +   For ctz we can optimize directly to `return 0`.
> +   For bswap1 and not1, we can do the same but not until after forwprop1.  */
> +/* { dg-final { scan-tree-dump-times "eq_expr, " 7 "forwprop1" } } */
> +/* { dg-final { scan-tree-dump-times "eq_expr, " 5 "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "abs_expr, "  "forwprop1" } } */
> +/* { dg-final { scan-tree-dump-not "absu_expr, "  "forwprop1" } } */
> +/* { dg-final { scan-tree-dump-not "bit_not_expr, "  "forwprop1" } } */
> +/* { dg-final { scan-tree-dump-not "negate_expr, "  "forwprop1" } } */
> +/* { dg-final { scan-tree-dump-not "gimple_call <"  "forwprop1" } } */
> +/* { dg-final { scan-tree-dump-not "bit_and_expr,  "  "forwprop1" } } */
> --
> 2.34.1
>
diff mbox series

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index 07e743ae464..3ee28a3d8fc 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -57,6 +57,10 @@  along with GCC; see the file COPYING3.  If not see
 
 #include "cfn-operators.pd"
 
+/* integer unary operators that return the same type. */
+(define_operator_list tcc_int_unary
+ abs absu negate bit_not BSWAP POPCOUNT CTZ CLZ PARITY)
+
 /* Define operand lists for math rounding functions {,i,l,ll}FN,
    where the versions prefixed with "i" return an int, those prefixed with
    "l" return a long and those prefixed with "ll" return a long long.
@@ -5451,6 +5455,14 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   @2
   { build_zero_cst (type); }))
 
+/* `(a == CST) & unary(a)` can be simplified to `(a == CST) & unary(CST)`. */
+(simplify
+ (bit_and:c (convert@2 (eq @0 INTEGER_CST@1))
+            (convert? (tcc_int_unary @3)))
+ (if (bitwise_equal_p (@0, @3))
+  (with { tree  inner_type = TREE_TYPE (@3); }
+   (bit_and @2 (convert (tcc_int_unary (convert:inner_type @1)))))))
+
 /* Optimize
    # x_5 in range [cst1, cst2] where cst2 = cst1 + 1
    x_5 == cstN ? cst4 : cst3
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/and-unary-1.c b/gcc/testsuite/gcc.dg/tree-ssa/and-unary-1.c
new file mode 100644
index 00000000000..c157bc11b00
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/and-unary-1.c
@@ -0,0 +1,61 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-forwprop1-raw -fdump-tree-optimized-raw" } */
+/* unary part of PR tree-optimization/111487 */
+
+int abs1(int a)
+{
+  int b = __builtin_abs(a);
+  return (a == 1) & b;
+}
+int absu1(int a)
+{
+  int b;
+  b = a > 0 ? -a:a;
+  b = -b;
+        return (a == 1) & b;
+}
+
+int bswap1(int a)
+{
+  int b = __builtin_bswap32(a);
+  return (a == 1) & b;
+}
+
+int ctz1(int a)
+{
+  int b = __builtin_ctz(a);
+  return (a == 1) & b;
+}
+int pop1(int a)
+{
+  int b = __builtin_popcount(a);
+  return (a == 1) & b;
+}
+int neg1(int a)
+{
+  int b = -(a);
+  return (a == 1) & b;
+}
+int not1(int a)
+{
+  int b = ~(a);
+  return (a == 1) & b;
+}
+int partity1(int a)
+{
+  int b = __builtin_parity(a);
+  return (a == 1) & b;
+}
+
+
+/* We should optimize out the unary operator for each.
+   For ctz we can optimize directly to `return 0`.
+   For bswap1 and not1, we can do the same but not until after forwprop1.  */
+/* { dg-final { scan-tree-dump-times "eq_expr, " 7 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "eq_expr, " 5 "optimized" } } */
+/* { dg-final { scan-tree-dump-not "abs_expr, "  "forwprop1" } } */
+/* { dg-final { scan-tree-dump-not "absu_expr, "  "forwprop1" } } */
+/* { dg-final { scan-tree-dump-not "bit_not_expr, "  "forwprop1" } } */
+/* { dg-final { scan-tree-dump-not "negate_expr, "  "forwprop1" } } */
+/* { dg-final { scan-tree-dump-not "gimple_call <"  "forwprop1" } } */
+/* { dg-final { scan-tree-dump-not "bit_and_expr,  "  "forwprop1" } } */