diff mbox series

MATCH: Simplify `COND_ADD (a > 0, b, a, b)` to `b + MAX_EXPR <a, 0>`

Message ID 20260309092012.2404529-1-eikagupt@qti.qualcomm.com
State New
Headers show
Series MATCH: Simplify `COND_ADD (a > 0, b, a, b)` to `b + MAX_EXPR <a, 0>` | expand

Commit Message

Eikansh Gupta March 9, 2026, 9:20 a.m. UTC
Add two match.pd patterns that simplify COND_ADD when the mask is
a strict greater-than comparison against zero and the else value equals
one of the addition operands:

  COND_ADD (a > 0, b, a, b)  ->  b + MAX_EXPR <a, 0>
  COND_ADD (a > 0, a, b, b)  ->  b + MAX_EXPR <a, 0>

	PR 124097

gcc/ChangeLog:

	* match.pd (COND_ADD (a > 0, a, b, b)  ->  b + MAX_EXPR <a, 0>): New pattern.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/pr124097.c: New test.

Signed-off-by: Eikansh Gupta <eikagupt@qti.qualcomm.com>
---
 gcc/match.pd                             | 15 +++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr124097.c | 17 +++++++++++++++++
 2 files changed, 32 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr124097.c

Comments

Richard Biener March 9, 2026, 9:47 a.m. UTC | #1
On Mon, Mar 9, 2026 at 10:22 AM Eikansh Gupta <eikagupt@qti.qualcomm.com> wrote:
>
> Add two match.pd patterns that simplify COND_ADD when the mask is
> a strict greater-than comparison against zero and the else value equals
> one of the addition operands:
>
>   COND_ADD (a > 0, b, a, b)  ->  b + MAX_EXPR <a, 0>
>   COND_ADD (a > 0, a, b, b)  ->  b + MAX_EXPR <a, 0>

I'll note this has to wait for stage1.

Also I had expected a predicated add to be faster than a separate max
+ add?  It's
definitely more canonical on GIMPLE given it's a single stmt vs. two.
Both suggest
that match.pd isn't a good place to do such transform.

Richard.

>         PR 124097
>
> gcc/ChangeLog:
>
>         * match.pd (COND_ADD (a > 0, a, b, b)  ->  b + MAX_EXPR <a, 0>): New pattern.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/tree-ssa/pr124097.c: New test.
>
> Signed-off-by: Eikansh Gupta <eikagupt@qti.qualcomm.com>
> ---
>  gcc/match.pd                             | 15 +++++++++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr124097.c | 17 +++++++++++++++++
>  2 files changed, 32 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr124097.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 7f16fd4e081..2bb4f425895 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -11323,6 +11323,21 @@ and,
>             && fold_real_zero_addition_p (type, NULL_TREE, @4, 0)))
>     (IFN_COND_ADD (bit_and @0 @2) @1 @3 @1)))
>
> +/* COND_ADD (a > 0, b, a, b) -> b + max (a, 0)
> +   COND_ADD (a > 0, a, b, b) -> b + max (a, 0) */
> +(simplify
> + (IFN_COND_ADD (gt @0 zerop@1) @2 @0 @2)
> + (if (FLOAT_TYPE_P (type)
> +      && !HONOR_NANS (type)
> +      && !HONOR_SIGNED_ZEROS (type))
> +  (plus @2 (max @0 @1))))
> +(simplify
> + (IFN_COND_ADD (gt @0 zerop@1) @0 @2 @2)
> + (if (FLOAT_TYPE_P (type)
> +      && !HONOR_NANS (type)
> +      && !HONOR_SIGNED_ZEROS (type))
> +  (plus @2 (max @0 @1))))
> +
>  /* Detect simplication for a conditional length reduction where
>
>     a = mask ? b : 0
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr124097.c b/gcc/testsuite/gcc.dg/tree-ssa/pr124097.c
> new file mode 100644
> index 00000000000..5769a2ca5d0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr124097.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -ffast-math -march=armv9-a -fdump-tree-optimized" } */
> +
> +float __attribute__ ((noipa))
> +foo (float *a)
> +{
> +  float sum = 0.;
> +  for (int i = 0; i < 32000; i++)
> +    if (a[i] > (float) 0.)
> +      sum += a[i];
> +  return sum;
> +}
> +
> +/* The main vectorized loop should have COND_ADD simplified to MAX_EXPR + plus.
> +   The SVE epilogue retains COND_ADD */
> +/* { dg-final { scan-tree-dump-times { MAX_EXPR } 2 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times { \.COND_ADD } 2 "optimized" } } */
> --
> 2.34.1
>
Richard Sandiford March 10, 2026, 12:20 p.m. UTC | #2
Richard Biener <richard.guenther@gmail.com> writes:
> On Mon, Mar 9, 2026 at 10:22 AM Eikansh Gupta <eikagupt@qti.qualcomm.com> wrote:
>>
>> Add two match.pd patterns that simplify COND_ADD when the mask is
>> a strict greater-than comparison against zero and the else value equals
>> one of the addition operands:
>>
>>   COND_ADD (a > 0, b, a, b)  ->  b + MAX_EXPR <a, 0>
>>   COND_ADD (a > 0, a, b, b)  ->  b + MAX_EXPR <a, 0>
>
> I'll note this has to wait for stage1.
>
> Also I had expected a predicated add to be faster than a separate max
> + add?  It's
> definitely more canonical on GIMPLE given it's a single stmt vs. two.
> Both suggest
> that match.pd isn't a good place to do such transform.

If the gt is single-use then I suppose it's gt + cond_add vs max + add.
And add is arguably simpler than cond_add.  Would adding :s to the gt
make it more acceptable?

That said, having rules in this direction (condition op to multiple
unconditional ops) sounds like it might risk cycles...

Richard

> Richard.
>
>>         PR 124097
>>
>> gcc/ChangeLog:
>>
>>         * match.pd (COND_ADD (a > 0, a, b, b)  ->  b + MAX_EXPR <a, 0>): New pattern.
>>
>> gcc/testsuite/ChangeLog:
>>
>>         * gcc.dg/tree-ssa/pr124097.c: New test.
>>
>> Signed-off-by: Eikansh Gupta <eikagupt@qti.qualcomm.com>
>> ---
>>  gcc/match.pd                             | 15 +++++++++++++++
>>  gcc/testsuite/gcc.dg/tree-ssa/pr124097.c | 17 +++++++++++++++++
>>  2 files changed, 32 insertions(+)
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr124097.c
>>
>> diff --git a/gcc/match.pd b/gcc/match.pd
>> index 7f16fd4e081..2bb4f425895 100644
>> --- a/gcc/match.pd
>> +++ b/gcc/match.pd
>> @@ -11323,6 +11323,21 @@ and,
>>             && fold_real_zero_addition_p (type, NULL_TREE, @4, 0)))
>>     (IFN_COND_ADD (bit_and @0 @2) @1 @3 @1)))
>>
>> +/* COND_ADD (a > 0, b, a, b) -> b + max (a, 0)
>> +   COND_ADD (a > 0, a, b, b) -> b + max (a, 0) */
>> +(simplify
>> + (IFN_COND_ADD (gt @0 zerop@1) @2 @0 @2)
>> + (if (FLOAT_TYPE_P (type)
>> +      && !HONOR_NANS (type)
>> +      && !HONOR_SIGNED_ZEROS (type))
>> +  (plus @2 (max @0 @1))))
>> +(simplify
>> + (IFN_COND_ADD (gt @0 zerop@1) @0 @2 @2)
>> + (if (FLOAT_TYPE_P (type)
>> +      && !HONOR_NANS (type)
>> +      && !HONOR_SIGNED_ZEROS (type))
>> +  (plus @2 (max @0 @1))))
>> +
>>  /* Detect simplication for a conditional length reduction where
>>
>>     a = mask ? b : 0
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr124097.c b/gcc/testsuite/gcc.dg/tree-ssa/pr124097.c
>> new file mode 100644
>> index 00000000000..5769a2ca5d0
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr124097.c
>> @@ -0,0 +1,17 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O3 -ffast-math -march=armv9-a -fdump-tree-optimized" } */
>> +
>> +float __attribute__ ((noipa))
>> +foo (float *a)
>> +{
>> +  float sum = 0.;
>> +  for (int i = 0; i < 32000; i++)
>> +    if (a[i] > (float) 0.)
>> +      sum += a[i];
>> +  return sum;
>> +}
>> +
>> +/* The main vectorized loop should have COND_ADD simplified to MAX_EXPR + plus.
>> +   The SVE epilogue retains COND_ADD */
>> +/* { dg-final { scan-tree-dump-times { MAX_EXPR } 2 "optimized" } } */
>> +/* { dg-final { scan-tree-dump-times { \.COND_ADD } 2 "optimized" } } */
>> --
>> 2.34.1
>>
Richard Biener March 10, 2026, 12:31 p.m. UTC | #3
On Tue, Mar 10, 2026 at 1:20 PM Richard Sandiford
<rdsandiford@googlemail.com> wrote:
>
> Richard Biener <richard.guenther@gmail.com> writes:
> > On Mon, Mar 9, 2026 at 10:22 AM Eikansh Gupta <eikagupt@qti.qualcomm.com> wrote:
> >>
> >> Add two match.pd patterns that simplify COND_ADD when the mask is
> >> a strict greater-than comparison against zero and the else value equals
> >> one of the addition operands:
> >>
> >>   COND_ADD (a > 0, b, a, b)  ->  b + MAX_EXPR <a, 0>
> >>   COND_ADD (a > 0, a, b, b)  ->  b + MAX_EXPR <a, 0>
> >
> > I'll note this has to wait for stage1.
> >
> > Also I had expected a predicated add to be faster than a separate max
> > + add?  It's
> > definitely more canonical on GIMPLE given it's a single stmt vs. two.
> > Both suggest
> > that match.pd isn't a good place to do such transform.
>
> If the gt is single-use then I suppose it's gt + cond_add vs max + add.
> And add is arguably simpler than cond_add.  Would adding :s to the gt
> make it more acceptable?

Yes, this would at least clarify the intent.  The pattern also lacks
a test whether unconditional add and max are supported by the target.

> That said, having rules in this direction (condition op to multiple
> unconditional ops) sounds like it might risk cycles...

If we also have the reverse, then yes.

Richard.

> Richard
>
> > Richard.
> >
> >>         PR 124097
> >>
> >> gcc/ChangeLog:
> >>
> >>         * match.pd (COND_ADD (a > 0, a, b, b)  ->  b + MAX_EXPR <a, 0>): New pattern.
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >>         * gcc.dg/tree-ssa/pr124097.c: New test.
> >>
> >> Signed-off-by: Eikansh Gupta <eikagupt@qti.qualcomm.com>
> >> ---
> >>  gcc/match.pd                             | 15 +++++++++++++++
> >>  gcc/testsuite/gcc.dg/tree-ssa/pr124097.c | 17 +++++++++++++++++
> >>  2 files changed, 32 insertions(+)
> >>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr124097.c
> >>
> >> diff --git a/gcc/match.pd b/gcc/match.pd
> >> index 7f16fd4e081..2bb4f425895 100644
> >> --- a/gcc/match.pd
> >> +++ b/gcc/match.pd
> >> @@ -11323,6 +11323,21 @@ and,
> >>             && fold_real_zero_addition_p (type, NULL_TREE, @4, 0)))
> >>     (IFN_COND_ADD (bit_and @0 @2) @1 @3 @1)))
> >>
> >> +/* COND_ADD (a > 0, b, a, b) -> b + max (a, 0)
> >> +   COND_ADD (a > 0, a, b, b) -> b + max (a, 0) */
> >> +(simplify
> >> + (IFN_COND_ADD (gt @0 zerop@1) @2 @0 @2)
> >> + (if (FLOAT_TYPE_P (type)
> >> +      && !HONOR_NANS (type)
> >> +      && !HONOR_SIGNED_ZEROS (type))
> >> +  (plus @2 (max @0 @1))))
> >> +(simplify
> >> + (IFN_COND_ADD (gt @0 zerop@1) @0 @2 @2)
> >> + (if (FLOAT_TYPE_P (type)
> >> +      && !HONOR_NANS (type)
> >> +      && !HONOR_SIGNED_ZEROS (type))
> >> +  (plus @2 (max @0 @1))))
> >> +
> >>  /* Detect simplication for a conditional length reduction where
> >>
> >>     a = mask ? b : 0
> >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr124097.c b/gcc/testsuite/gcc.dg/tree-ssa/pr124097.c
> >> new file mode 100644
> >> index 00000000000..5769a2ca5d0
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr124097.c
> >> @@ -0,0 +1,17 @@
> >> +/* { dg-do compile } */
> >> +/* { dg-options "-O3 -ffast-math -march=armv9-a -fdump-tree-optimized" } */
> >> +
> >> +float __attribute__ ((noipa))
> >> +foo (float *a)
> >> +{
> >> +  float sum = 0.;
> >> +  for (int i = 0; i < 32000; i++)
> >> +    if (a[i] > (float) 0.)
> >> +      sum += a[i];
> >> +  return sum;
> >> +}
> >> +
> >> +/* The main vectorized loop should have COND_ADD simplified to MAX_EXPR + plus.
> >> +   The SVE epilogue retains COND_ADD */
> >> +/* { dg-final { scan-tree-dump-times { MAX_EXPR } 2 "optimized" } } */
> >> +/* { dg-final { scan-tree-dump-times { \.COND_ADD } 2 "optimized" } } */
> >> --
> >> 2.34.1
> >>
diff mbox series

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index 7f16fd4e081..2bb4f425895 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -11323,6 +11323,21 @@  and,
 	    && fold_real_zero_addition_p (type, NULL_TREE, @4, 0)))
    (IFN_COND_ADD (bit_and @0 @2) @1 @3 @1)))
 
+/* COND_ADD (a > 0, b, a, b) -> b + max (a, 0)
+   COND_ADD (a > 0, a, b, b) -> b + max (a, 0) */
+(simplify
+ (IFN_COND_ADD (gt @0 zerop@1) @2 @0 @2)
+ (if (FLOAT_TYPE_P (type)
+      && !HONOR_NANS (type)
+      && !HONOR_SIGNED_ZEROS (type))
+  (plus @2 (max @0 @1))))
+(simplify
+ (IFN_COND_ADD (gt @0 zerop@1) @0 @2 @2)
+ (if (FLOAT_TYPE_P (type)
+      && !HONOR_NANS (type)
+      && !HONOR_SIGNED_ZEROS (type))
+  (plus @2 (max @0 @1))))
+
 /* Detect simplication for a conditional length reduction where
 
    a = mask ? b : 0
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr124097.c b/gcc/testsuite/gcc.dg/tree-ssa/pr124097.c
new file mode 100644
index 00000000000..5769a2ca5d0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr124097.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -ffast-math -march=armv9-a -fdump-tree-optimized" } */
+
+float __attribute__ ((noipa))
+foo (float *a)
+{
+  float sum = 0.;
+  for (int i = 0; i < 32000; i++)
+    if (a[i] > (float) 0.)
+      sum += a[i];
+  return sum;
+}
+
+/* The main vectorized loop should have COND_ADD simplified to MAX_EXPR + plus.
+   The SVE epilogue retains COND_ADD */
+/* { dg-final { scan-tree-dump-times { MAX_EXPR } 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times { \.COND_ADD } 2 "optimized" } } */