diff mbox series

match.pd: rewrite select to branchless expression

Message ID adca186b-24e1-20da-9e4d-0acb6754f133@rivosinc.com
State New
Headers show
Series match.pd: rewrite select to branchless expression | expand

Commit Message

Michael Collison Nov. 8, 2022, 8:02 p.m. UTC
This patches transforms (cond (and (x , 0x1) == 0), y, (z op y)) into 
(-(and (x , 0x1)) & z ) op y, where op is a '^' or a '|'. It also 
transforms (cond (and (x , 0x1) != 0), (z op y), y ) into (-(and (x , 
0x1)) & z ) op y.

Matching this patterns allows GCC to generate branchless code for one of 
the functions in coremark.

Bootstrapped and tested on x86 and RISC-V. Okay?

Michael.

2022-11-08  Michael Collison  <collison@rivosinc.com>

     * match.pd ((cond (and (x , 0x1) == 0), y, (z op y) )
     -> (-(and (x , 0x1)) & z ) op y)

2022-11-08  Michael Collison  <collison@rivosinc.com>

     * gcc.dg/tree-ssa/branchless-cond.c: New test.

---
  gcc/match.pd                                  | 22 ++++++++++++++++
  .../gcc.dg/tree-ssa/branchless-cond.c         | 26 +++++++++++++++++++
  2 files changed, 48 insertions(+)
  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c

Comments

Andrew Pinski Nov. 8, 2022, 8:15 p.m. UTC | #1
On Tue, Nov 8, 2022 at 12:02 PM Michael Collison <collison@rivosinc.com> wrote:
>
> This patches transforms (cond (and (x , 0x1) == 0), y, (z op y)) into
> (-(and (x , 0x1)) & z ) op y, where op is a '^' or a '|'. It also
> transforms (cond (and (x , 0x1) != 0), (z op y), y ) into (-(and (x ,
> 0x1)) & z ) op y.
>
> Matching this patterns allows GCC to generate branchless code for one of
> the functions in coremark.
>
> Bootstrapped and tested on x86 and RISC-V. Okay?

This seems like a (much) reduced (simplified?) version of
https://gcc.gnu.org/pipermail/gcc-patches/2021-November/584411.html .
I have not had time for the last year to go through the comments on
that patch and resubmit it though.
It seems like you are aiming for one specific case in coremarks rather
than a more generic fix too.

Thanks,
Andrew Pinski

>
> Michael.
>
> 2022-11-08  Michael Collison  <collison@rivosinc.com>
>
>      * match.pd ((cond (and (x , 0x1) == 0), y, (z op y) )
>      -> (-(and (x , 0x1)) & z ) op y)
>
> 2022-11-08  Michael Collison  <collison@rivosinc.com>
>
>      * gcc.dg/tree-ssa/branchless-cond.c: New test.
>
> ---
>   gcc/match.pd                                  | 22 ++++++++++++++++
>   .../gcc.dg/tree-ssa/branchless-cond.c         | 26 +++++++++++++++++++
>   2 files changed, 48 insertions(+)
>   create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 194ba8f5188..722f517ac6d 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3486,6 +3486,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>     (cond (le @0 integer_zerop@1) (negate@2 @0) integer_zerop@1)
>     (max @2 @1))
>
> +/* (cond (and (x , 0x1) == 0), y, (z ^ y) ) -> (-(and (x , 0x1)) & z )
> ^ y */
> +(for op (bit_xor bit_ior)
> + (simplify
> +  (cond (eq (bit_and @0 integer_onep@1)
> +            integer_zerop)
> +        @2
> +        (op:c @3 @2))
> +  (if (INTEGRAL_TYPE_P (type)
> +       && (INTEGRAL_TYPE_P (TREE_TYPE (@0))))
> +       (op (bit_and (negate (convert:type (bit_and @0 @1))) @3) @2))))
> +
> +/* (cond (and (x , 0x1) != 0), (z ^ y), y ) -> (-(and (x , 0x1)) & z )
> ^ y */
> +(for op (bit_xor bit_ior)
> + (simplify
> +  (cond (ne (bit_and @0 integer_onep@1)
> +            integer_zerop)
> +    (op:c @3 @2)
> +        @2)
> +  (if (INTEGRAL_TYPE_P (type)
> +       && (INTEGRAL_TYPE_P (TREE_TYPE (@0))))
> +       (op (bit_and (negate (convert:type (bit_and @0 @1))) @3) @2))))
> +
>   /* Simplifications of shift and rotates.  */
>
>   (for rotate (lrotate rrotate)
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c
> b/gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c
> new file mode 100644
> index 00000000000..68087ae6568
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +int f1(unsigned int x, unsigned int y, unsigned int z)
> +{
> +  return ((x & 1) == 0) ? y : z ^ y;
> +}
> +
> +int f2(unsigned int x, unsigned int y, unsigned int z)
> +{
> +  return ((x & 1) != 0) ? z ^ y : y;
> +}
> +
> +int f3(unsigned int x, unsigned int y, unsigned int z)
> +{
> +  return ((x & 1) == 0) ? y : z | y;
> +}
> +
> +int f4(unsigned int x, unsigned int y, unsigned int z)
> +{
> +  return ((x & 1) != 0) ? z | y : y;
> +}
> +
> +/* { dg-final { scan-tree-dump-times " -" 4 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times " & " 8 "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "if" "optimized" } } */
> --
> 2.34.1
>
>
>
>
Richard Biener Nov. 9, 2022, 7:41 a.m. UTC | #2
On Tue, Nov 8, 2022 at 9:02 PM Michael Collison <collison@rivosinc.com> wrote:
>
> This patches transforms (cond (and (x , 0x1) == 0), y, (z op y)) into
> (-(and (x , 0x1)) & z ) op y, where op is a '^' or a '|'. It also
> transforms (cond (and (x , 0x1) != 0), (z op y), y ) into (-(and (x ,
> 0x1)) & z ) op y.
>
> Matching this patterns allows GCC to generate branchless code for one of
> the functions in coremark.
>
> Bootstrapped and tested on x86 and RISC-V. Okay?
>
> Michael.
>
> 2022-11-08  Michael Collison  <collison@rivosinc.com>
>
>      * match.pd ((cond (and (x , 0x1) == 0), y, (z op y) )
>      -> (-(and (x , 0x1)) & z ) op y)
>
> 2022-11-08  Michael Collison  <collison@rivosinc.com>
>
>      * gcc.dg/tree-ssa/branchless-cond.c: New test.
>
> ---
>   gcc/match.pd                                  | 22 ++++++++++++++++
>   .../gcc.dg/tree-ssa/branchless-cond.c         | 26 +++++++++++++++++++
>   2 files changed, 48 insertions(+)
>   create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 194ba8f5188..722f517ac6d 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3486,6 +3486,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>     (cond (le @0 integer_zerop@1) (negate@2 @0) integer_zerop@1)
>     (max @2 @1))
>
> +/* (cond (and (x , 0x1) == 0), y, (z ^ y) ) -> (-(and (x , 0x1)) & z )
> ^ y */

Please write the match as a C expression in the comment, as present
it's a weird mix.  So x & 0x1 == 0 ? y : z <op> y -> (-(typeof(y))(x &
0x1) & z) <op> y

> +(for op (bit_xor bit_ior)
> + (simplify
> +  (cond (eq (bit_and @0 integer_onep@1)
> +            integer_zerop)
> +        @2
> +        (op:c @3 @2))
> +  (if (INTEGRAL_TYPE_P (type)
> +       && (INTEGRAL_TYPE_P (TREE_TYPE (@0))))
> +       (op (bit_and (negate (convert:type (bit_and @0 @1))) @3) @2))))

Since you are literally keeping (bit_and @0 @1) and not matching @0 with
anything I suspect you could instead use

 (simplify (cond (eq zero_one_valued_p@0 integer_zerop) ...

eventually extending that to cover bit_and with one.  Do you need to guard
this against 'type' being a signed/unsigned 1-bit precision integer?

> +
> +/* (cond (and (x , 0x1) != 0), (z ^ y), y ) -> (-(and (x , 0x1)) & z )
> ^ y */
> +(for op (bit_xor bit_ior)
> + (simplify
> +  (cond (ne (bit_and @0 integer_onep@1)
> +            integer_zerop)
> +    (op:c @3 @2)
> +        @2)
> +  (if (INTEGRAL_TYPE_P (type)
> +       && (INTEGRAL_TYPE_P (TREE_TYPE (@0))))
> +       (op (bit_and (negate (convert:type (bit_and @0 @1))) @3) @2))))
> +
>   /* Simplifications of shift and rotates.  */
>
>   (for rotate (lrotate rrotate)
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c
> b/gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c
> new file mode 100644
> index 00000000000..68087ae6568
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +int f1(unsigned int x, unsigned int y, unsigned int z)
> +{
> +  return ((x & 1) == 0) ? y : z ^ y;
> +}
> +
> +int f2(unsigned int x, unsigned int y, unsigned int z)
> +{
> +  return ((x & 1) != 0) ? z ^ y : y;
> +}
> +
> +int f3(unsigned int x, unsigned int y, unsigned int z)
> +{
> +  return ((x & 1) == 0) ? y : z | y;
> +}
> +
> +int f4(unsigned int x, unsigned int y, unsigned int z)
> +{
> +  return ((x & 1) != 0) ? z | y : y;
> +}
> +
> +/* { dg-final { scan-tree-dump-times " -" 4 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times " & " 8 "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "if" "optimized" } } */
> --
> 2.34.1
>
>
>
>
Michael Collison Nov. 9, 2022, 9:06 p.m. UTC | #3
Richard,

Thanks for your feedback. I want to make sure I am following what you 
are recommending. Are you suggesting changing:

(for op (bit_xor bit_ior)
(simplify
(cond (eq (bit_and @0 integer_onep@1)
integer_zerop)
@2
(op:c @3 @2))
(if (INTEGRAL_TYPE_P (type)
&& (INTEGRAL_TYPE_P (TREE_TYPE (@0))))
(op (bit_and (negate (convert:type (bit_and @0 @1))) @3) @2))))


to

(for op (bit_xor bit_ior)
  (simplify
   (cond (eq zero_one_valued_p@0
             integer_zerop)
         @1
         (op:c @2 @1))
   (if (INTEGRAL_TYPE_P (type)
        && (INTEGRAL_TYPE_P (TREE_TYPE (@0))))
        (op (bit_and (negate (convert:type (bit_and @0 { build_one_cst 
(type); }))) @2) @1))))


On 11/9/22 02:41, Richard Biener wrote:
> On Tue, Nov 8, 2022 at 9:02 PM Michael Collison <collison@rivosinc.com> wrote:
>> This patches transforms (cond (and (x , 0x1) == 0), y, (z op y)) into
>> (-(and (x , 0x1)) & z ) op y, where op is a '^' or a '|'. It also
>> transforms (cond (and (x , 0x1) != 0), (z op y), y ) into (-(and (x ,
>> 0x1)) & z ) op y.
>>
>> Matching this patterns allows GCC to generate branchless code for one of
>> the functions in coremark.
>>
>> Bootstrapped and tested on x86 and RISC-V. Okay?
>>
>> Michael.
>>
>> 2022-11-08  Michael Collison  <collison@rivosinc.com>
>>
>>       * match.pd ((cond (and (x , 0x1) == 0), y, (z op y) )
>>       -> (-(and (x , 0x1)) & z ) op y)
>>
>> 2022-11-08  Michael Collison  <collison@rivosinc.com>
>>
>>       * gcc.dg/tree-ssa/branchless-cond.c: New test.
>>
>> ---
>>    gcc/match.pd                                  | 22 ++++++++++++++++
>>    .../gcc.dg/tree-ssa/branchless-cond.c         | 26 +++++++++++++++++++
>>    2 files changed, 48 insertions(+)
>>    create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c
>>
>> diff --git a/gcc/match.pd b/gcc/match.pd
>> index 194ba8f5188..722f517ac6d 100644
>> --- a/gcc/match.pd
>> +++ b/gcc/match.pd
>> @@ -3486,6 +3486,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>>      (cond (le @0 integer_zerop@1) (negate@2 @0) integer_zerop@1)
>>      (max @2 @1))
>>
>> +/* (cond (and (x , 0x1) == 0), y, (z ^ y) ) -> (-(and (x , 0x1)) & z )
>> ^ y */
> Please write the match as a C expression in the comment, as present
> it's a weird mix.  So x & 0x1 == 0 ? y : z <op> y -> (-(typeof(y))(x &
> 0x1) & z) <op> y
>
>> +(for op (bit_xor bit_ior)
>> + (simplify
>> +  (cond (eq (bit_and @0 integer_onep@1)
>> +            integer_zerop)
>> +        @2
>> +        (op:c @3 @2))
>> +  (if (INTEGRAL_TYPE_P (type)
>> +       && (INTEGRAL_TYPE_P (TREE_TYPE (@0))))
>> +       (op (bit_and (negate (convert:type (bit_and @0 @1))) @3) @2))))
> Since you are literally keeping (bit_and @0 @1) and not matching @0 with
> anything I suspect you could instead use
>
>   (simplify (cond (eq zero_one_valued_p@0 integer_zerop) ...
>
> eventually extending that to cover bit_and with one.  Do you need to guard
> this against 'type' being a signed/unsigned 1-bit precision integer?
>
>> +
>> +/* (cond (and (x , 0x1) != 0), (z ^ y), y ) -> (-(and (x , 0x1)) & z )
>> ^ y */
>> +(for op (bit_xor bit_ior)
>> + (simplify
>> +  (cond (ne (bit_and @0 integer_onep@1)
>> +            integer_zerop)
>> +    (op:c @3 @2)
>> +        @2)
>> +  (if (INTEGRAL_TYPE_P (type)
>> +       && (INTEGRAL_TYPE_P (TREE_TYPE (@0))))
>> +       (op (bit_and (negate (convert:type (bit_and @0 @1))) @3) @2))))
>> +
>>    /* Simplifications of shift and rotates.  */
>>
>>    (for rotate (lrotate rrotate)
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c
>> b/gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c
>> new file mode 100644
>> index 00000000000..68087ae6568
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c
>> @@ -0,0 +1,26 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-tree-optimized" } */
>> +
>> +int f1(unsigned int x, unsigned int y, unsigned int z)
>> +{
>> +  return ((x & 1) == 0) ? y : z ^ y;
>> +}
>> +
>> +int f2(unsigned int x, unsigned int y, unsigned int z)
>> +{
>> +  return ((x & 1) != 0) ? z ^ y : y;
>> +}
>> +
>> +int f3(unsigned int x, unsigned int y, unsigned int z)
>> +{
>> +  return ((x & 1) == 0) ? y : z | y;
>> +}
>> +
>> +int f4(unsigned int x, unsigned int y, unsigned int z)
>> +{
>> +  return ((x & 1) != 0) ? z | y : y;
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times " -" 4 "optimized" } } */
>> +/* { dg-final { scan-tree-dump-times " & " 8 "optimized" } } */
>> +/* { dg-final { scan-tree-dump-not "if" "optimized" } } */
>> --
>> 2.34.1
>>
>>
>>
>>
Richard Biener Nov. 10, 2022, 9:03 a.m. UTC | #4
On Wed, Nov 9, 2022 at 10:06 PM Michael Collison <collison@rivosinc.com> wrote:
>
> Richard,
>
> Thanks for your feedback. I want to make sure I am following what you
> are recommending. Are you suggesting changing:
>
> (for op (bit_xor bit_ior)
> (simplify
> (cond (eq (bit_and @0 integer_onep@1)
> integer_zerop)
> @2
> (op:c @3 @2))
> (if (INTEGRAL_TYPE_P (type)
> && (INTEGRAL_TYPE_P (TREE_TYPE (@0))))
> (op (bit_and (negate (convert:type (bit_and @0 @1))) @3) @2))))
>
>
> to
>
> (for op (bit_xor bit_ior)
>   (simplify
>    (cond (eq zero_one_valued_p@0
>              integer_zerop)
>          @1
>          (op:c @2 @1))
>    (if (INTEGRAL_TYPE_P (type)
>         && (INTEGRAL_TYPE_P (TREE_TYPE (@0))))
>         (op (bit_and (negate (convert:type (bit_and @0 { build_one_cst
> (type); }))) @2) @1))))

in the replacement you'd simply use

     (op (bit_and (negate (convert:type @0)) @2) @1)))

that is, convert the [0,1] valued @0 to 'type' directly.  At least I can't see
how that can go wrong?

>
>
> On 11/9/22 02:41, Richard Biener wrote:
> > On Tue, Nov 8, 2022 at 9:02 PM Michael Collison <collison@rivosinc.com> wrote:
> >> This patches transforms (cond (and (x , 0x1) == 0), y, (z op y)) into
> >> (-(and (x , 0x1)) & z ) op y, where op is a '^' or a '|'. It also
> >> transforms (cond (and (x , 0x1) != 0), (z op y), y ) into (-(and (x ,
> >> 0x1)) & z ) op y.
> >>
> >> Matching this patterns allows GCC to generate branchless code for one of
> >> the functions in coremark.
> >>
> >> Bootstrapped and tested on x86 and RISC-V. Okay?
> >>
> >> Michael.
> >>
> >> 2022-11-08  Michael Collison  <collison@rivosinc.com>
> >>
> >>       * match.pd ((cond (and (x , 0x1) == 0), y, (z op y) )
> >>       -> (-(and (x , 0x1)) & z ) op y)
> >>
> >> 2022-11-08  Michael Collison  <collison@rivosinc.com>
> >>
> >>       * gcc.dg/tree-ssa/branchless-cond.c: New test.
> >>
> >> ---
> >>    gcc/match.pd                                  | 22 ++++++++++++++++
> >>    .../gcc.dg/tree-ssa/branchless-cond.c         | 26 +++++++++++++++++++
> >>    2 files changed, 48 insertions(+)
> >>    create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c
> >>
> >> diff --git a/gcc/match.pd b/gcc/match.pd
> >> index 194ba8f5188..722f517ac6d 100644
> >> --- a/gcc/match.pd
> >> +++ b/gcc/match.pd
> >> @@ -3486,6 +3486,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >>      (cond (le @0 integer_zerop@1) (negate@2 @0) integer_zerop@1)
> >>      (max @2 @1))
> >>
> >> +/* (cond (and (x , 0x1) == 0), y, (z ^ y) ) -> (-(and (x , 0x1)) & z )
> >> ^ y */
> > Please write the match as a C expression in the comment, as present
> > it's a weird mix.  So x & 0x1 == 0 ? y : z <op> y -> (-(typeof(y))(x &
> > 0x1) & z) <op> y
> >
> >> +(for op (bit_xor bit_ior)
> >> + (simplify
> >> +  (cond (eq (bit_and @0 integer_onep@1)
> >> +            integer_zerop)
> >> +        @2
> >> +        (op:c @3 @2))
> >> +  (if (INTEGRAL_TYPE_P (type)
> >> +       && (INTEGRAL_TYPE_P (TREE_TYPE (@0))))
> >> +       (op (bit_and (negate (convert:type (bit_and @0 @1))) @3) @2))))
> > Since you are literally keeping (bit_and @0 @1) and not matching @0 with
> > anything I suspect you could instead use
> >
> >   (simplify (cond (eq zero_one_valued_p@0 integer_zerop) ...
> >
> > eventually extending that to cover bit_and with one.  Do you need to guard
> > this against 'type' being a signed/unsigned 1-bit precision integer?
> >
> >> +
> >> +/* (cond (and (x , 0x1) != 0), (z ^ y), y ) -> (-(and (x , 0x1)) & z )
> >> ^ y */
> >> +(for op (bit_xor bit_ior)
> >> + (simplify
> >> +  (cond (ne (bit_and @0 integer_onep@1)
> >> +            integer_zerop)
> >> +    (op:c @3 @2)
> >> +        @2)
> >> +  (if (INTEGRAL_TYPE_P (type)
> >> +       && (INTEGRAL_TYPE_P (TREE_TYPE (@0))))
> >> +       (op (bit_and (negate (convert:type (bit_and @0 @1))) @3) @2))))
> >> +
> >>    /* Simplifications of shift and rotates.  */
> >>
> >>    (for rotate (lrotate rrotate)
> >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c
> >> b/gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c
> >> new file mode 100644
> >> index 00000000000..68087ae6568
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c
> >> @@ -0,0 +1,26 @@
> >> +/* { dg-do compile } */
> >> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> >> +
> >> +int f1(unsigned int x, unsigned int y, unsigned int z)
> >> +{
> >> +  return ((x & 1) == 0) ? y : z ^ y;
> >> +}
> >> +
> >> +int f2(unsigned int x, unsigned int y, unsigned int z)
> >> +{
> >> +  return ((x & 1) != 0) ? z ^ y : y;
> >> +}
> >> +
> >> +int f3(unsigned int x, unsigned int y, unsigned int z)
> >> +{
> >> +  return ((x & 1) == 0) ? y : z | y;
> >> +}
> >> +
> >> +int f4(unsigned int x, unsigned int y, unsigned int z)
> >> +{
> >> +  return ((x & 1) != 0) ? z | y : y;
> >> +}
> >> +
> >> +/* { dg-final { scan-tree-dump-times " -" 4 "optimized" } } */
> >> +/* { dg-final { scan-tree-dump-times " & " 8 "optimized" } } */
> >> +/* { dg-final { scan-tree-dump-not "if" "optimized" } } */
> >> --
> >> 2.34.1
> >>
> >>
> >>
> >>
Jeff Law Nov. 17, 2022, 9:59 p.m. UTC | #5
On 11/8/22 13:15, Andrew Pinski via Gcc-patches wrote:
> On Tue, Nov 8, 2022 at 12:02 PM Michael Collison <collison@rivosinc.com> wrote:
>> This patches transforms (cond (and (x , 0x1) == 0), y, (z op y)) into
>> (-(and (x , 0x1)) & z ) op y, where op is a '^' or a '|'. It also
>> transforms (cond (and (x , 0x1) != 0), (z op y), y ) into (-(and (x ,
>> 0x1)) & z ) op y.
>>
>> Matching this patterns allows GCC to generate branchless code for one of
>> the functions in coremark.
>>
>> Bootstrapped and tested on x86 and RISC-V. Okay?
> This seems like a (much) reduced (simplified?) version of
> https://gcc.gnu.org/pipermail/gcc-patches/2021-November/584411.html .
> I have not had time for the last year to go through the comments on
> that patch and resubmit it though.
> It seems like you are aiming for one specific case in coremarks rather
> than a more generic fix too.

I'm fairly confident it was developed independently.  Michael did this 
transformation for LLVM and reached out to me a month or two ago for 
suggestions on the GCC implementation.


My recollection is I suggested phi-opt or match.pd with a slight 
preference for phi-opt as I wasn't offhand sure if we'd have a form 
suitable for match.pd.


THe pattern is just a conditional xor/ior with all is said and done.  
While the inspiration comes from coremark, I don't think it's supposed 
to be specific to coremark.


jeff
diff mbox series

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index 194ba8f5188..722f517ac6d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3486,6 +3486,28 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    (cond (le @0 integer_zerop@1) (negate@2 @0) integer_zerop@1)
    (max @2 @1))

+/* (cond (and (x , 0x1) == 0), y, (z ^ y) ) -> (-(and (x , 0x1)) & z ) 
^ y */
+(for op (bit_xor bit_ior)
+ (simplify
+  (cond (eq (bit_and @0 integer_onep@1)
+            integer_zerop)
+        @2
+        (op:c @3 @2))
+  (if (INTEGRAL_TYPE_P (type)
+       && (INTEGRAL_TYPE_P (TREE_TYPE (@0))))
+       (op (bit_and (negate (convert:type (bit_and @0 @1))) @3) @2))))
+
+/* (cond (and (x , 0x1) != 0), (z ^ y), y ) -> (-(and (x , 0x1)) & z ) 
^ y */
+(for op (bit_xor bit_ior)
+ (simplify
+  (cond (ne (bit_and @0 integer_onep@1)
+            integer_zerop)
+    (op:c @3 @2)
+        @2)
+  (if (INTEGRAL_TYPE_P (type)
+       && (INTEGRAL_TYPE_P (TREE_TYPE (@0))))
+       (op (bit_and (negate (convert:type (bit_and @0 @1))) @3) @2))))
+
  /* Simplifications of shift and rotates.  */

  (for rotate (lrotate rrotate)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c 
b/gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c
new file mode 100644
index 00000000000..68087ae6568
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c
@@ -0,0 +1,26 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int f1(unsigned int x, unsigned int y, unsigned int z)
+{
+  return ((x & 1) == 0) ? y : z ^ y;
+}
+
+int f2(unsigned int x, unsigned int y, unsigned int z)
+{
+  return ((x & 1) != 0) ? z ^ y : y;
+}
+
+int f3(unsigned int x, unsigned int y, unsigned int z)
+{
+  return ((x & 1) == 0) ? y : z | y;
+}
+
+int f4(unsigned int x, unsigned int y, unsigned int z)
+{
+  return ((x & 1) != 0) ? z | y : y;
+}
+
+/* { dg-final { scan-tree-dump-times " -" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " & " 8 "optimized" } } */
+/* { dg-final { scan-tree-dump-not "if" "optimized" } } */