diff mbox series

[v3,2/4] ifcvt: Allow more operations in multiple set if conversion

Message ID 20230830101400.1539313-3-manolis.tsamis@vrull.eu
State New
Headers show
Series ifcvt: Allow if conversion of arithmetic in basic blocks with multiple sets | expand

Commit Message

Manolis Tsamis Aug. 30, 2023, 10:13 a.m. UTC
Currently the operations allowed for if conversion of a basic block with
multiple sets are few, namely REG, SUBREG and CONST_INT (as controlled by
bb_ok_for_noce_convert_multiple_sets).

This commit allows more operations (arithmetic, compare, etc) to participate
in if conversion. The target's profitability hook and ifcvt's costing is
expected to reject sequences that are unprofitable.

This is especially useful for targets which provide a rich selection of
conditional instructions (like aarch64 which has cinc, csneg, csinv, ccmp, ...)
which are currently not used in basic blocks with more than a single set.

gcc/ChangeLog:

	* ifcvt.cc (try_emit_cmove_seq): Modify comments.
	(noce_convert_multiple_sets_1): Modify comments.
	(bb_ok_for_noce_convert_multiple_sets): Allow more operations.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/ifcvt_multiple_sets_arithm.c: New test.

Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu>
---

Changes in v3:
        - Add SCALAR_INT_MODE_P check in bb_ok_for_noce_convert_multiple_sets.
        - Allow rewiring of multiple regs.
        - Refactor code with noce_multiple_sets_info.
        - Remove old code for subregs.

 gcc/ifcvt.cc                                  | 63 ++++++++++-----
 .../aarch64/ifcvt_multiple_sets_arithm.c      | 79 +++++++++++++++++++
 2 files changed, 123 insertions(+), 19 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c

Comments

Richard Sandiford Oct. 19, 2023, 7:46 p.m. UTC | #1
Manolis Tsamis <manolis.tsamis@vrull.eu> writes:
> Currently the operations allowed for if conversion of a basic block with
> multiple sets are few, namely REG, SUBREG and CONST_INT (as controlled by
> bb_ok_for_noce_convert_multiple_sets).
>
> This commit allows more operations (arithmetic, compare, etc) to participate
> in if conversion. The target's profitability hook and ifcvt's costing is
> expected to reject sequences that are unprofitable.
>
> This is especially useful for targets which provide a rich selection of
> conditional instructions (like aarch64 which has cinc, csneg, csinv, ccmp, ...)
> which are currently not used in basic blocks with more than a single set.
>
> gcc/ChangeLog:
>
> 	* ifcvt.cc (try_emit_cmove_seq): Modify comments.
> 	(noce_convert_multiple_sets_1): Modify comments.
> 	(bb_ok_for_noce_convert_multiple_sets): Allow more operations.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/aarch64/ifcvt_multiple_sets_arithm.c: New test.
>
> Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu>
> ---
>
> Changes in v3:
>         - Add SCALAR_INT_MODE_P check in bb_ok_for_noce_convert_multiple_sets.
>         - Allow rewiring of multiple regs.
>         - Refactor code with noce_multiple_sets_info.
>         - Remove old code for subregs.
>
>  gcc/ifcvt.cc                                  | 63 ++++++++++-----
>  .../aarch64/ifcvt_multiple_sets_arithm.c      | 79 +++++++++++++++++++
>  2 files changed, 123 insertions(+), 19 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c
>
> diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
> index 3273aeca125..efe8ab1577a 100644
> --- a/gcc/ifcvt.cc
> +++ b/gcc/ifcvt.cc
> @@ -3215,13 +3215,13 @@ try_emit_cmove_seq (struct noce_if_info *if_info, rtx temp,
>  /* We have something like:
>  
>       if (x > y)
> -       { i = a; j = b; k = c; }
> +       { i = EXPR_A; j = EXPR_B; k = EXPR_C; }
>  
>     Make it:
>  
> -     tmp_i = (x > y) ? a : i;
> -     tmp_j = (x > y) ? b : j;
> -     tmp_k = (x > y) ? c : k;
> +     tmp_i = (x > y) ? EXPR_A : i;
> +     tmp_j = (x > y) ? EXPR_B : j;
> +     tmp_k = (x > y) ? EXPR_C : k;
>       i = tmp_i;
>       j = tmp_j;
>       k = tmp_k;
> @@ -3637,11 +3637,10 @@ noce_convert_multiple_sets_1 (struct noce_if_info *if_info,
>  
>  
>  
> -/* Return true iff basic block TEST_BB is comprised of only
> -   (SET (REG) (REG)) insns suitable for conversion to a series
> -   of conditional moves.  Also check that we have more than one set
> -   (other routines can handle a single set better than we would), and
> -   fewer than PARAM_MAX_RTL_IF_CONVERSION_INSNS sets.  While going
> +/* Return true iff basic block TEST_BB is suitable for conversion to a
> +   series of conditional moves.  Also check that we have more than one
> +   set (other routines can handle a single set better than we would),
> +   and fewer than PARAM_MAX_RTL_IF_CONVERSION_INSNS sets.  While going
>     through the insns store the sum of their potential costs in COST.  */
>  
>  static bool
> @@ -3667,20 +3666,46 @@ bb_ok_for_noce_convert_multiple_sets (basic_block test_bb, unsigned *cost)
>        rtx dest = SET_DEST (set);
>        rtx src = SET_SRC (set);
>  
> -      /* We can possibly relax this, but for now only handle REG to REG
> -	 (including subreg) moves.  This avoids any issues that might come
> -	 from introducing loads/stores that might violate data-race-freedom
> -	 guarantees.  */
> -      if (!REG_P (dest))
> +      /* Do not handle anything involving memory loads/stores since it might
> +	 violate data-race-freedom guarantees.  */
> +      if (!REG_P (dest) || contains_mem_rtx_p (src))
> +	return false;
> +
> +      if (!SCALAR_INT_MODE_P (GET_MODE (src)))
>  	return false;
>  
> -      if (!((REG_P (src) || CONSTANT_P (src))
> -	    || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src))
> -	      && subreg_lowpart_p (src))))
> +      /* Allow a wide range of operations and let the costing function decide
> +	 if the conversion is worth it later.  */
> +      enum rtx_code code = GET_CODE (src);
> +      if (!(CONSTANT_P (src)
> +	    || code == REG
> +	    || code == SUBREG
> +	    || code == ZERO_EXTEND
> +	    || code == SIGN_EXTEND
> +	    || code == NOT
> +	    || code == NEG
> +	    || code == PLUS
> +	    || code == MINUS
> +	    || code == AND
> +	    || code == IOR
> +	    || code == MULT
> +	    || code == ASHIFT
> +	    || code == ASHIFTRT
> +	    || code == NE
> +	    || code == EQ
> +	    || code == GE
> +	    || code == GT
> +	    || code == LE
> +	    || code == LT
> +	    || code == GEU
> +	    || code == GTU
> +	    || code == LEU
> +	    || code == LTU
> +	    || code == COMPARE))
>  	return false;

I'm nervous about lists of operations like these, for two reasons:

(1) It isn't obvious what criteria are used to select the codes.

(2) It requires the top-level code to belong to a given set, but it
    allows subrtxes of src to be arbitrarily complex.  E.g. (to pick
    a silly example) a toplevel (popcount ...) would be rejected, but
    (plus (popcount ...) (const_int 1)) would be OK.

Could we just remove this condition instead?

Thanks,
Richard

>  
> -      /* Destination must be appropriate for a conditional write.  */
> -      if (!noce_operand_ok (dest))
> +      /* Destination and source must be appropriate.  */
> +      if (!noce_operand_ok (dest) || !noce_operand_ok (src))
>  	return false;
>  
>        /* We must be able to conditionally move in this mode.  */
> diff --git a/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c b/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c
> new file mode 100644
> index 00000000000..d977f4d62ec
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c
> @@ -0,0 +1,79 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-rtl-ce1" } */
> +
> +void sink2(int, int);
> +void sink3(int, int, int);
> +
> +void cond1(int cond, int x, int y)
> +{
> +  if (cond)
> +    {
> +      x = x << 4;
> +      y = 1;
> +    }
> +
> +  sink2(x, y);
> +}
> +
> +void cond2(int cond, int x, int y)
> +{
> +  if (cond)
> +    {
> +      x++;
> +      y++;
> +    }
> +
> +  sink2(x, y);
> +}
> +
> +void cond3(int cond, int x1, int x2, int x3)
> +{
> +  if (cond)
> +    {
> +      x1++;
> +      x2++;
> +      x3++;
> +    }
> +
> +  sink3(x1, x2, x3);
> +}
> +
> +void cond4(int cond, int x, int y)
> +{
> +  if (cond)
> +    {
> +      x += 2;
> +      y += 3;
> +    }
> +
> +  sink2(x, y);
> +}
> +
> +void cond5(int cond, int x, int y, int r1, int r2)
> +{
> +  if (cond)
> +    {
> +      x = r1 + 2;
> +      y = r2 - 34;
> +    }
> +
> +  sink2(x, y);
> +}
> +
> +void cond6(int cond, int x, int y)
> +{
> +  if (cond)
> +    {
> +      x = -x;
> +      y = ~y;
> +    }
> +
> +  sink2(x, y);
> +}
> +
> +/* { dg-final { scan-assembler-times "cinc\t" 5 } } */
> +/* { dg-final { scan-assembler-times "csneg\t" 1 } } */
> +/* { dg-final { scan-assembler-times "csinv\t" 1 } } */
> +/* { dg-final { scan-assembler "csel\t" } } */
> +
> +/* { dg-final { scan-rtl-dump-times "if-conversion succeeded through noce_convert_multiple_sets" 6 "ce1" } } */
> \ No newline at end of file
Jeff Law Nov. 10, 2023, 9:53 p.m. UTC | #2
On 10/19/23 13:46, Richard Sandiford wrote:
>> +      /* Allow a wide range of operations and let the costing function decide
>> +	 if the conversion is worth it later.  */
>> +      enum rtx_code code = GET_CODE (src);
>> +      if (!(CONSTANT_P (src)
>> +	    || code == REG
>> +	    || code == SUBREG
>> +	    || code == ZERO_EXTEND
>> +	    || code == SIGN_EXTEND
>> +	    || code == NOT
>> +	    || code == NEG
>> +	    || code == PLUS
>> +	    || code == MINUS
>> +	    || code == AND
>> +	    || code == IOR
>> +	    || code == MULT
>> +	    || code == ASHIFT
>> +	    || code == ASHIFTRT
>> +	    || code == NE
>> +	    || code == EQ
>> +	    || code == GE
>> +	    || code == GT
>> +	    || code == LE
>> +	    || code == LT
>> +	    || code == GEU
>> +	    || code == GTU
>> +	    || code == LEU
>> +	    || code == LTU
>> +	    || code == COMPARE))
>>   	return false;
> 
> I'm nervous about lists of operations like these, for two reasons:
> 
> (1) It isn't obvious what criteria are used to select the codes.
> 
> (2) It requires the top-level code to belong to a given set, but it
>      allows subrtxes of src to be arbitrarily complex.  E.g. (to pick
>      a silly example) a toplevel (popcount ...) would be rejected, but
>      (plus (popcount ...) (const_int 1)) would be OK.
> 
> Could we just remove this condition instead?
I'd be all for that.    We've actually got a similar problem in Joern's 
ext-dce code that I'm working on.  At least in that case I think we'll 
be able to enumerate how/why things are on the list if we still need it 
after the cleanup phase.

So I think the guidance on patch #2 would be to remove the list entirely 
if we can.

jeff
Manolis Tsamis Nov. 13, 2023, 12:47 p.m. UTC | #3
On Thu, Oct 19, 2023 at 10:46 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Manolis Tsamis <manolis.tsamis@vrull.eu> writes:
> > Currently the operations allowed for if conversion of a basic block with
> > multiple sets are few, namely REG, SUBREG and CONST_INT (as controlled by
> > bb_ok_for_noce_convert_multiple_sets).
> >
> > This commit allows more operations (arithmetic, compare, etc) to participate
> > in if conversion. The target's profitability hook and ifcvt's costing is
> > expected to reject sequences that are unprofitable.
> >
> > This is especially useful for targets which provide a rich selection of
> > conditional instructions (like aarch64 which has cinc, csneg, csinv, ccmp, ...)
> > which are currently not used in basic blocks with more than a single set.
> >
> > gcc/ChangeLog:
> >
> >       * ifcvt.cc (try_emit_cmove_seq): Modify comments.
> >       (noce_convert_multiple_sets_1): Modify comments.
> >       (bb_ok_for_noce_convert_multiple_sets): Allow more operations.
> >
> > gcc/testsuite/ChangeLog:
> >
> >       * gcc.target/aarch64/ifcvt_multiple_sets_arithm.c: New test.
> >
> > Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu>
> > ---
> >
> > Changes in v3:
> >         - Add SCALAR_INT_MODE_P check in bb_ok_for_noce_convert_multiple_sets.
> >         - Allow rewiring of multiple regs.
> >         - Refactor code with noce_multiple_sets_info.
> >         - Remove old code for subregs.
> >
> >  gcc/ifcvt.cc                                  | 63 ++++++++++-----
> >  .../aarch64/ifcvt_multiple_sets_arithm.c      | 79 +++++++++++++++++++
> >  2 files changed, 123 insertions(+), 19 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c
> >
> > diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
> > index 3273aeca125..efe8ab1577a 100644
> > --- a/gcc/ifcvt.cc
> > +++ b/gcc/ifcvt.cc
> > @@ -3215,13 +3215,13 @@ try_emit_cmove_seq (struct noce_if_info *if_info, rtx temp,
> >  /* We have something like:
> >
> >       if (x > y)
> > -       { i = a; j = b; k = c; }
> > +       { i = EXPR_A; j = EXPR_B; k = EXPR_C; }
> >
> >     Make it:
> >
> > -     tmp_i = (x > y) ? a : i;
> > -     tmp_j = (x > y) ? b : j;
> > -     tmp_k = (x > y) ? c : k;
> > +     tmp_i = (x > y) ? EXPR_A : i;
> > +     tmp_j = (x > y) ? EXPR_B : j;
> > +     tmp_k = (x > y) ? EXPR_C : k;
> >       i = tmp_i;
> >       j = tmp_j;
> >       k = tmp_k;
> > @@ -3637,11 +3637,10 @@ noce_convert_multiple_sets_1 (struct noce_if_info *if_info,
> >
> >
> >
> > -/* Return true iff basic block TEST_BB is comprised of only
> > -   (SET (REG) (REG)) insns suitable for conversion to a series
> > -   of conditional moves.  Also check that we have more than one set
> > -   (other routines can handle a single set better than we would), and
> > -   fewer than PARAM_MAX_RTL_IF_CONVERSION_INSNS sets.  While going
> > +/* Return true iff basic block TEST_BB is suitable for conversion to a
> > +   series of conditional moves.  Also check that we have more than one
> > +   set (other routines can handle a single set better than we would),
> > +   and fewer than PARAM_MAX_RTL_IF_CONVERSION_INSNS sets.  While going
> >     through the insns store the sum of their potential costs in COST.  */
> >
> >  static bool
> > @@ -3667,20 +3666,46 @@ bb_ok_for_noce_convert_multiple_sets (basic_block test_bb, unsigned *cost)
> >        rtx dest = SET_DEST (set);
> >        rtx src = SET_SRC (set);
> >
> > -      /* We can possibly relax this, but for now only handle REG to REG
> > -      (including subreg) moves.  This avoids any issues that might come
> > -      from introducing loads/stores that might violate data-race-freedom
> > -      guarantees.  */
> > -      if (!REG_P (dest))
> > +      /* Do not handle anything involving memory loads/stores since it might
> > +      violate data-race-freedom guarantees.  */
> > +      if (!REG_P (dest) || contains_mem_rtx_p (src))
> > +     return false;
> > +
> > +      if (!SCALAR_INT_MODE_P (GET_MODE (src)))
> >       return false;
> >
> > -      if (!((REG_P (src) || CONSTANT_P (src))
> > -         || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src))
> > -           && subreg_lowpart_p (src))))
> > +      /* Allow a wide range of operations and let the costing function decide
> > +      if the conversion is worth it later.  */
> > +      enum rtx_code code = GET_CODE (src);
> > +      if (!(CONSTANT_P (src)
> > +         || code == REG
> > +         || code == SUBREG
> > +         || code == ZERO_EXTEND
> > +         || code == SIGN_EXTEND
> > +         || code == NOT
> > +         || code == NEG
> > +         || code == PLUS
> > +         || code == MINUS
> > +         || code == AND
> > +         || code == IOR
> > +         || code == MULT
> > +         || code == ASHIFT
> > +         || code == ASHIFTRT
> > +         || code == NE
> > +         || code == EQ
> > +         || code == GE
> > +         || code == GT
> > +         || code == LE
> > +         || code == LT
> > +         || code == GEU
> > +         || code == GTU
> > +         || code == LEU
> > +         || code == LTU
> > +         || code == COMPARE))
> >       return false;
>
> I'm nervous about lists of operations like these, for two reasons:
>
> (1) It isn't obvious what criteria are used to select the codes.

Indeed; I was hoping to find some existing GCC predicate that accepts
'common binary ops' or similar, in order to avoid complicated
situations. But it looks like this is mostly unnecessary.
I'll try removing this list and see if there are any issues.

>
> (2) It requires the top-level code to belong to a given set, but it
>     allows subrtxes of src to be arbitrarily complex.  E.g. (to pick
>     a silly example) a toplevel (popcount ...) would be rejected, but
>     (plus (popcount ...) (const_int 1)) would be OK.
>

I have completely missed that, so thanks for mentioning.

> Could we just remove this condition instead?
>

So, yes, will do.

Thanks and sorry for the late reply,
Manolis


> Thanks,
> Richard
>
> >
> > -      /* Destination must be appropriate for a conditional write.  */
> > -      if (!noce_operand_ok (dest))
> > +      /* Destination and source must be appropriate.  */
> > +      if (!noce_operand_ok (dest) || !noce_operand_ok (src))
> >       return false;
> >
> >        /* We must be able to conditionally move in this mode.  */
> > diff --git a/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c b/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c
> > new file mode 100644
> > index 00000000000..d977f4d62ec
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c
> > @@ -0,0 +1,79 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -fdump-rtl-ce1" } */
> > +
> > +void sink2(int, int);
> > +void sink3(int, int, int);
> > +
> > +void cond1(int cond, int x, int y)
> > +{
> > +  if (cond)
> > +    {
> > +      x = x << 4;
> > +      y = 1;
> > +    }
> > +
> > +  sink2(x, y);
> > +}
> > +
> > +void cond2(int cond, int x, int y)
> > +{
> > +  if (cond)
> > +    {
> > +      x++;
> > +      y++;
> > +    }
> > +
> > +  sink2(x, y);
> > +}
> > +
> > +void cond3(int cond, int x1, int x2, int x3)
> > +{
> > +  if (cond)
> > +    {
> > +      x1++;
> > +      x2++;
> > +      x3++;
> > +    }
> > +
> > +  sink3(x1, x2, x3);
> > +}
> > +
> > +void cond4(int cond, int x, int y)
> > +{
> > +  if (cond)
> > +    {
> > +      x += 2;
> > +      y += 3;
> > +    }
> > +
> > +  sink2(x, y);
> > +}
> > +
> > +void cond5(int cond, int x, int y, int r1, int r2)
> > +{
> > +  if (cond)
> > +    {
> > +      x = r1 + 2;
> > +      y = r2 - 34;
> > +    }
> > +
> > +  sink2(x, y);
> > +}
> > +
> > +void cond6(int cond, int x, int y)
> > +{
> > +  if (cond)
> > +    {
> > +      x = -x;
> > +      y = ~y;
> > +    }
> > +
> > +  sink2(x, y);
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "cinc\t" 5 } } */
> > +/* { dg-final { scan-assembler-times "csneg\t" 1 } } */
> > +/* { dg-final { scan-assembler-times "csinv\t" 1 } } */
> > +/* { dg-final { scan-assembler "csel\t" } } */
> > +
> > +/* { dg-final { scan-rtl-dump-times "if-conversion succeeded through noce_convert_multiple_sets" 6 "ce1" } } */
> > \ No newline at end of file
Manolis Tsamis April 23, 2024, 11 a.m. UTC | #4
On Thu, Oct 19, 2023 at 10:46 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Manolis Tsamis <manolis.tsamis@vrull.eu> writes:
> > Currently the operations allowed for if conversion of a basic block with
> > multiple sets are few, namely REG, SUBREG and CONST_INT (as controlled by
> > bb_ok_for_noce_convert_multiple_sets).
> >
> > This commit allows more operations (arithmetic, compare, etc) to participate
> > in if conversion. The target's profitability hook and ifcvt's costing is
> > expected to reject sequences that are unprofitable.
> >
> > This is especially useful for targets which provide a rich selection of
> > conditional instructions (like aarch64 which has cinc, csneg, csinv, ccmp, ...)
> > which are currently not used in basic blocks with more than a single set.
> >
> > gcc/ChangeLog:
> >
> >       * ifcvt.cc (try_emit_cmove_seq): Modify comments.
> >       (noce_convert_multiple_sets_1): Modify comments.
> >       (bb_ok_for_noce_convert_multiple_sets): Allow more operations.
> >
> > gcc/testsuite/ChangeLog:
> >
> >       * gcc.target/aarch64/ifcvt_multiple_sets_arithm.c: New test.
> >
> > Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu>
> > ---
> >
> > Changes in v3:
> >         - Add SCALAR_INT_MODE_P check in bb_ok_for_noce_convert_multiple_sets.
> >         - Allow rewiring of multiple regs.
> >         - Refactor code with noce_multiple_sets_info.
> >         - Remove old code for subregs.
> >
> >  gcc/ifcvt.cc                                  | 63 ++++++++++-----
> >  .../aarch64/ifcvt_multiple_sets_arithm.c      | 79 +++++++++++++++++++
> >  2 files changed, 123 insertions(+), 19 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c
> >
> > diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
> > index 3273aeca125..efe8ab1577a 100644
> > --- a/gcc/ifcvt.cc
> > +++ b/gcc/ifcvt.cc
> > @@ -3215,13 +3215,13 @@ try_emit_cmove_seq (struct noce_if_info *if_info, rtx temp,
> >  /* We have something like:
> >
> >       if (x > y)
> > -       { i = a; j = b; k = c; }
> > +       { i = EXPR_A; j = EXPR_B; k = EXPR_C; }
> >
> >     Make it:
> >
> > -     tmp_i = (x > y) ? a : i;
> > -     tmp_j = (x > y) ? b : j;
> > -     tmp_k = (x > y) ? c : k;
> > +     tmp_i = (x > y) ? EXPR_A : i;
> > +     tmp_j = (x > y) ? EXPR_B : j;
> > +     tmp_k = (x > y) ? EXPR_C : k;
> >       i = tmp_i;
> >       j = tmp_j;
> >       k = tmp_k;
> > @@ -3637,11 +3637,10 @@ noce_convert_multiple_sets_1 (struct noce_if_info *if_info,
> >
> >
> >
> > -/* Return true iff basic block TEST_BB is comprised of only
> > -   (SET (REG) (REG)) insns suitable for conversion to a series
> > -   of conditional moves.  Also check that we have more than one set
> > -   (other routines can handle a single set better than we would), and
> > -   fewer than PARAM_MAX_RTL_IF_CONVERSION_INSNS sets.  While going
> > +/* Return true iff basic block TEST_BB is suitable for conversion to a
> > +   series of conditional moves.  Also check that we have more than one
> > +   set (other routines can handle a single set better than we would),
> > +   and fewer than PARAM_MAX_RTL_IF_CONVERSION_INSNS sets.  While going
> >     through the insns store the sum of their potential costs in COST.  */
> >
> >  static bool
> > @@ -3667,20 +3666,46 @@ bb_ok_for_noce_convert_multiple_sets (basic_block test_bb, unsigned *cost)
> >        rtx dest = SET_DEST (set);
> >        rtx src = SET_SRC (set);
> >
> > -      /* We can possibly relax this, but for now only handle REG to REG
> > -      (including subreg) moves.  This avoids any issues that might come
> > -      from introducing loads/stores that might violate data-race-freedom
> > -      guarantees.  */
> > -      if (!REG_P (dest))
> > +      /* Do not handle anything involving memory loads/stores since it might
> > +      violate data-race-freedom guarantees.  */
> > +      if (!REG_P (dest) || contains_mem_rtx_p (src))
> > +     return false;
> > +
> > +      if (!SCALAR_INT_MODE_P (GET_MODE (src)))
> >       return false;
> >
> > -      if (!((REG_P (src) || CONSTANT_P (src))
> > -         || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src))
> > -           && subreg_lowpart_p (src))))
> > +      /* Allow a wide range of operations and let the costing function decide
> > +      if the conversion is worth it later.  */
> > +      enum rtx_code code = GET_CODE (src);
> > +      if (!(CONSTANT_P (src)
> > +         || code == REG
> > +         || code == SUBREG
> > +         || code == ZERO_EXTEND
> > +         || code == SIGN_EXTEND
> > +         || code == NOT
> > +         || code == NEG
> > +         || code == PLUS
> > +         || code == MINUS
> > +         || code == AND
> > +         || code == IOR
> > +         || code == MULT
> > +         || code == ASHIFT
> > +         || code == ASHIFTRT
> > +         || code == NE
> > +         || code == EQ
> > +         || code == GE
> > +         || code == GT
> > +         || code == LE
> > +         || code == LT
> > +         || code == GEU
> > +         || code == GTU
> > +         || code == LEU
> > +         || code == LTU
> > +         || code == COMPARE))
> >       return false;
>
> I'm nervous about lists of operations like these, for two reasons:
>
> (1) It isn't obvious what criteria are used to select the codes.
>
> (2) It requires the top-level code to belong to a given set, but it
>     allows subrtxes of src to be arbitrarily complex.  E.g. (to pick
>     a silly example) a toplevel (popcount ...) would be rejected, but
>     (plus (popcount ...) (const_int 1)) would be OK.
>
> Could we just remove this condition instead?
>
True and done: https://gcc.gnu.org/pipermail/gcc-patches/2024-April/649899.html

I also removed the if (!SCALAR_INT_MODE_P (GET_MODE (src))) because it
was rejecting (const_int) apparently. The mode is checked in the rest
of the code so it wasn't needed in the first place.

Manolis

> Thanks,
> Richard
>
> >
> > -      /* Destination must be appropriate for a conditional write.  */
> > -      if (!noce_operand_ok (dest))
> > +      /* Destination and source must be appropriate.  */
> > +      if (!noce_operand_ok (dest) || !noce_operand_ok (src))
> >       return false;
> >
> >        /* We must be able to conditionally move in this mode.  */
> > diff --git a/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c b/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c
> > new file mode 100644
> > index 00000000000..d977f4d62ec
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c
> > @@ -0,0 +1,79 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -fdump-rtl-ce1" } */
> > +
> > +void sink2(int, int);
> > +void sink3(int, int, int);
> > +
> > +void cond1(int cond, int x, int y)
> > +{
> > +  if (cond)
> > +    {
> > +      x = x << 4;
> > +      y = 1;
> > +    }
> > +
> > +  sink2(x, y);
> > +}
> > +
> > +void cond2(int cond, int x, int y)
> > +{
> > +  if (cond)
> > +    {
> > +      x++;
> > +      y++;
> > +    }
> > +
> > +  sink2(x, y);
> > +}
> > +
> > +void cond3(int cond, int x1, int x2, int x3)
> > +{
> > +  if (cond)
> > +    {
> > +      x1++;
> > +      x2++;
> > +      x3++;
> > +    }
> > +
> > +  sink3(x1, x2, x3);
> > +}
> > +
> > +void cond4(int cond, int x, int y)
> > +{
> > +  if (cond)
> > +    {
> > +      x += 2;
> > +      y += 3;
> > +    }
> > +
> > +  sink2(x, y);
> > +}
> > +
> > +void cond5(int cond, int x, int y, int r1, int r2)
> > +{
> > +  if (cond)
> > +    {
> > +      x = r1 + 2;
> > +      y = r2 - 34;
> > +    }
> > +
> > +  sink2(x, y);
> > +}
> > +
> > +void cond6(int cond, int x, int y)
> > +{
> > +  if (cond)
> > +    {
> > +      x = -x;
> > +      y = ~y;
> > +    }
> > +
> > +  sink2(x, y);
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "cinc\t" 5 } } */
> > +/* { dg-final { scan-assembler-times "csneg\t" 1 } } */
> > +/* { dg-final { scan-assembler-times "csinv\t" 1 } } */
> > +/* { dg-final { scan-assembler "csel\t" } } */
> > +
> > +/* { dg-final { scan-rtl-dump-times "if-conversion succeeded through noce_convert_multiple_sets" 6 "ce1" } } */
> > \ No newline at end of file
diff mbox series

Patch

diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index 3273aeca125..efe8ab1577a 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -3215,13 +3215,13 @@  try_emit_cmove_seq (struct noce_if_info *if_info, rtx temp,
 /* We have something like:
 
      if (x > y)
-       { i = a; j = b; k = c; }
+       { i = EXPR_A; j = EXPR_B; k = EXPR_C; }
 
    Make it:
 
-     tmp_i = (x > y) ? a : i;
-     tmp_j = (x > y) ? b : j;
-     tmp_k = (x > y) ? c : k;
+     tmp_i = (x > y) ? EXPR_A : i;
+     tmp_j = (x > y) ? EXPR_B : j;
+     tmp_k = (x > y) ? EXPR_C : k;
      i = tmp_i;
      j = tmp_j;
      k = tmp_k;
@@ -3637,11 +3637,10 @@  noce_convert_multiple_sets_1 (struct noce_if_info *if_info,
 
 
 
-/* Return true iff basic block TEST_BB is comprised of only
-   (SET (REG) (REG)) insns suitable for conversion to a series
-   of conditional moves.  Also check that we have more than one set
-   (other routines can handle a single set better than we would), and
-   fewer than PARAM_MAX_RTL_IF_CONVERSION_INSNS sets.  While going
+/* Return true iff basic block TEST_BB is suitable for conversion to a
+   series of conditional moves.  Also check that we have more than one
+   set (other routines can handle a single set better than we would),
+   and fewer than PARAM_MAX_RTL_IF_CONVERSION_INSNS sets.  While going
    through the insns store the sum of their potential costs in COST.  */
 
 static bool
@@ -3667,20 +3666,46 @@  bb_ok_for_noce_convert_multiple_sets (basic_block test_bb, unsigned *cost)
       rtx dest = SET_DEST (set);
       rtx src = SET_SRC (set);
 
-      /* We can possibly relax this, but for now only handle REG to REG
-	 (including subreg) moves.  This avoids any issues that might come
-	 from introducing loads/stores that might violate data-race-freedom
-	 guarantees.  */
-      if (!REG_P (dest))
+      /* Do not handle anything involving memory loads/stores since it might
+	 violate data-race-freedom guarantees.  */
+      if (!REG_P (dest) || contains_mem_rtx_p (src))
+	return false;
+
+      if (!SCALAR_INT_MODE_P (GET_MODE (src)))
 	return false;
 
-      if (!((REG_P (src) || CONSTANT_P (src))
-	    || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src))
-	      && subreg_lowpart_p (src))))
+      /* Allow a wide range of operations and let the costing function decide
+	 if the conversion is worth it later.  */
+      enum rtx_code code = GET_CODE (src);
+      if (!(CONSTANT_P (src)
+	    || code == REG
+	    || code == SUBREG
+	    || code == ZERO_EXTEND
+	    || code == SIGN_EXTEND
+	    || code == NOT
+	    || code == NEG
+	    || code == PLUS
+	    || code == MINUS
+	    || code == AND
+	    || code == IOR
+	    || code == MULT
+	    || code == ASHIFT
+	    || code == ASHIFTRT
+	    || code == NE
+	    || code == EQ
+	    || code == GE
+	    || code == GT
+	    || code == LE
+	    || code == LT
+	    || code == GEU
+	    || code == GTU
+	    || code == LEU
+	    || code == LTU
+	    || code == COMPARE))
 	return false;
 
-      /* Destination must be appropriate for a conditional write.  */
-      if (!noce_operand_ok (dest))
+      /* Destination and source must be appropriate.  */
+      if (!noce_operand_ok (dest) || !noce_operand_ok (src))
 	return false;
 
       /* We must be able to conditionally move in this mode.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c b/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c
new file mode 100644
index 00000000000..d977f4d62ec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c
@@ -0,0 +1,79 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-ce1" } */
+
+void sink2(int, int);
+void sink3(int, int, int);
+
+void cond1(int cond, int x, int y)
+{
+  if (cond)
+    {
+      x = x << 4;
+      y = 1;
+    }
+
+  sink2(x, y);
+}
+
+void cond2(int cond, int x, int y)
+{
+  if (cond)
+    {
+      x++;
+      y++;
+    }
+
+  sink2(x, y);
+}
+
+void cond3(int cond, int x1, int x2, int x3)
+{
+  if (cond)
+    {
+      x1++;
+      x2++;
+      x3++;
+    }
+
+  sink3(x1, x2, x3);
+}
+
+void cond4(int cond, int x, int y)
+{
+  if (cond)
+    {
+      x += 2;
+      y += 3;
+    }
+
+  sink2(x, y);
+}
+
+void cond5(int cond, int x, int y, int r1, int r2)
+{
+  if (cond)
+    {
+      x = r1 + 2;
+      y = r2 - 34;
+    }
+
+  sink2(x, y);
+}
+
+void cond6(int cond, int x, int y)
+{
+  if (cond)
+    {
+      x = -x;
+      y = ~y;
+    }
+
+  sink2(x, y);
+}
+
+/* { dg-final { scan-assembler-times "cinc\t" 5 } } */
+/* { dg-final { scan-assembler-times "csneg\t" 1 } } */
+/* { dg-final { scan-assembler-times "csinv\t" 1 } } */
+/* { dg-final { scan-assembler "csel\t" } } */
+
+/* { dg-final { scan-rtl-dump-times "if-conversion succeeded through noce_convert_multiple_sets" 6 "ce1" } } */
\ No newline at end of file