Message ID | CAKdteOZoUXypZ4zjVvW_Y4prkufHbCYde+s36x3XmPokvWKgCQ@mail.gmail.com |
---|---|
State | New |
Headers | show |
Series | arm: Adjust cost of vector of constant zero | expand |
Hi Christophe, > -----Original Message----- > From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf Of > Christophe Lyon via Gcc-patches > Sent: 26 January 2021 18:03 > To: gcc Patches <gcc-patches@gcc.gnu.org> > Subject: arm: Adjust cost of vector of constant zero > > Neon vector comparisons have a dedicated version when comparing with > constant zero: it means its cost is free. > > Adjust the cost in arm_rtx_costs_internal accordingly, for Neon only, > since MVE does not support this. I guess the other way to do this would be in the comparison code handling in this function where we could check for a const_vector of zeroes and a Neon mode and avoid recursing into the operands. That would avoid the extra switch statement in your patch. WDYT? Thanks, Kyrill > > 2021-01-26 Christophe Lyon <christophe.lyon@linaro.org> > > gcc/ > PR target/98730 > * config/arm/arm.c (arm_rtx_costs_internal): Adjust cost of vector > of constant zero for comparisons. > > gcc/testsuite/ > PR target/98730 > * gcc.target/arm/simd/vceqzq_p64.c: Update expected result. > > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index 4a5f265..9c5c0df 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -11544,7 +11544,28 @@ arm_rtx_costs_internal (rtx x, enum rtx_code > code, enum rtx_code outer_code, > && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE > (mode))) > || TARGET_HAVE_MVE) > && simd_immediate_valid_for_move (x, mode, NULL, NULL)) > - *cost = COSTS_N_INSNS (1); > + { > + *cost = COSTS_N_INSNS (1); > + > + /* Neon has special instructions when comparing with 0 (vceq, vcge, > + vcgt, vcle and vclt). */ > + if (TARGET_NEON && (x == CONST0_RTX (mode))) > + { > + switch (outer_code) > + { > + case EQ: > + case GE: > + case GT: > + case LE: > + case LT: > + *cost = COSTS_N_INSNS (0); > + break; > + > + default: > + break; > + } > + } > + } > else > *cost = COSTS_N_INSNS (4); > return true; > diff --git a/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > b/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > index 640754c..a99bb8a 100644 > --- a/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > +++ b/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > @@ -15,4 +15,4 @@ void func() > result2 = vceqzq_p64 (v2); > } > > -/* { dg-final { scan-assembler-times "vceq\.i32\[ > \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 2 } } */ > +/* { dg-final { scan-assembler-times "vceq\.i32\[ > \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, #0\n" 2 } } */
On Wed, 27 Jan 2021 at 10:15, Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> wrote: > > Hi Christophe, > > > -----Original Message----- > > From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf Of > > Christophe Lyon via Gcc-patches > > Sent: 26 January 2021 18:03 > > To: gcc Patches <gcc-patches@gcc.gnu.org> > > Subject: arm: Adjust cost of vector of constant zero > > > > Neon vector comparisons have a dedicated version when comparing with > > constant zero: it means its cost is free. > > > > Adjust the cost in arm_rtx_costs_internal accordingly, for Neon only, > > since MVE does not support this. > > I guess the other way to do this would be in the comparison code handling in this function where we could check for a const_vector of zeroes and a Neon mode and avoid recursing into the operands. > That would avoid the extra switch statement in your patch. > WDYT? Do you mean like so: diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 4a5f265..542c15e 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -11316,6 +11316,28 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code, *cost = 0; return true; } + /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt, + vcle and vclt). */ + else if (TARGET_NEON + && TARGET_HARD_FLOAT + && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)) + && (XEXP (x, 1) == CONST0_RTX (mode))) + { + switch (code) + { + case EQ: + case GE: + case GT: + case LE: + case LT: + *cost = 0; + return true; + + default: + break; + } + } + return false; I'm not sure I can remove the switch, since the other comparisons are not supported by Neon anyway. Thanks, Christophe > Thanks, > Kyrill > > > > > 2021-01-26 Christophe Lyon <christophe.lyon@linaro.org> > > > > gcc/ > > PR target/98730 > > * config/arm/arm.c (arm_rtx_costs_internal): Adjust cost of vector > > of constant zero for comparisons. > > > > gcc/testsuite/ > > PR target/98730 > > * gcc.target/arm/simd/vceqzq_p64.c: Update expected result. > > > > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > > index 4a5f265..9c5c0df 100644 > > --- a/gcc/config/arm/arm.c > > +++ b/gcc/config/arm/arm.c > > @@ -11544,7 +11544,28 @@ arm_rtx_costs_internal (rtx x, enum rtx_code > > code, enum rtx_code outer_code, > > && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE > > (mode))) > > || TARGET_HAVE_MVE) > > && simd_immediate_valid_for_move (x, mode, NULL, NULL)) > > - *cost = COSTS_N_INSNS (1); > > + { > > + *cost = COSTS_N_INSNS (1); > > + > > + /* Neon has special instructions when comparing with 0 (vceq, vcge, > > + vcgt, vcle and vclt). */ > > + if (TARGET_NEON && (x == CONST0_RTX (mode))) > > + { > > + switch (outer_code) > > + { > > + case EQ: > > + case GE: > > + case GT: > > + case LE: > > + case LT: > > + *cost = COSTS_N_INSNS (0); > > + break; > > + > > + default: > > + break; > > + } > > + } > > + } > > else > > *cost = COSTS_N_INSNS (4); > > return true; > > diff --git a/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > > b/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > > index 640754c..a99bb8a 100644 > > --- a/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > > +++ b/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > > @@ -15,4 +15,4 @@ void func() > > result2 = vceqzq_p64 (v2); > > } > > > > -/* { dg-final { scan-assembler-times "vceq\.i32\[ > > \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 2 } } */ > > +/* { dg-final { scan-assembler-times "vceq\.i32\[ > > \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, #0\n" 2 } } */
> -----Original Message----- > From: Christophe Lyon <christophe.lyon@linaro.org> > Sent: 27 January 2021 13:12 > To: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> > Cc: Kyrylo Tkachov via Gcc-patches <gcc-patches@gcc.gnu.org> > Subject: Re: arm: Adjust cost of vector of constant zero > > On Wed, 27 Jan 2021 at 10:15, Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> > wrote: > > > > Hi Christophe, > > > > > -----Original Message----- > > > From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf Of > > > Christophe Lyon via Gcc-patches > > > Sent: 26 January 2021 18:03 > > > To: gcc Patches <gcc-patches@gcc.gnu.org> > > > Subject: arm: Adjust cost of vector of constant zero > > > > > > Neon vector comparisons have a dedicated version when comparing with > > > constant zero: it means its cost is free. > > > > > > Adjust the cost in arm_rtx_costs_internal accordingly, for Neon only, > > > since MVE does not support this. > > > > I guess the other way to do this would be in the comparison code handling > in this function where we could check for a const_vector of zeroes and a > Neon mode and avoid recursing into the operands. > > That would avoid the extra switch statement in your patch. > > WDYT? > > Do you mean like so: > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index 4a5f265..542c15e 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -11316,6 +11316,28 @@ arm_rtx_costs_internal (rtx x, enum rtx_code > code, enum rtx_code outer_code, > *cost = 0; > return true; > } > + /* Neon has special instructions when comparing with 0 (vceq, vcge, > vcgt, > + vcle and vclt). */ > + else if (TARGET_NEON > + && TARGET_HARD_FLOAT > + && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE > (mode)) > + && (XEXP (x, 1) == CONST0_RTX (mode))) > + { > + switch (code) > + { > + case EQ: > + case GE: > + case GT: > + case LE: > + case LT: > + *cost = 0; > + return true; > + > + default: > + break; > + } > + } > + > return false; > > I'm not sure I can remove the switch, since the other comparisons are > not supported by Neon anyway. > No, I mean where: case EQ: case NE: case LT: case LE: case GT: case GE: case LTU: case LEU: case GEU: case GTU: case ORDERED: case UNORDERED: case UNEQ: case UNLE: case UNLT: case UNGE: case UNGT: case LTGT: if (outer_code == SET) { /* Is it a store-flag operation? */ if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM you reorder the codes that are relevant to NEON, handle them for a vector zero argument (and the right target checks), and fall through to the rest if not. Kyrill > Thanks, > > Christophe > > > > Thanks, > > Kyrill > > > > > > > > 2021-01-26 Christophe Lyon <christophe.lyon@linaro.org> > > > > > > gcc/ > > > PR target/98730 > > > * config/arm/arm.c (arm_rtx_costs_internal): Adjust cost of vector > > > of constant zero for comparisons. > > > > > > gcc/testsuite/ > > > PR target/98730 > > > * gcc.target/arm/simd/vceqzq_p64.c: Update expected result. > > > > > > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > > > index 4a5f265..9c5c0df 100644 > > > --- a/gcc/config/arm/arm.c > > > +++ b/gcc/config/arm/arm.c > > > @@ -11544,7 +11544,28 @@ arm_rtx_costs_internal (rtx x, enum > rtx_code > > > code, enum rtx_code outer_code, > > > && (VALID_NEON_DREG_MODE (mode) || > VALID_NEON_QREG_MODE > > > (mode))) > > > || TARGET_HAVE_MVE) > > > && simd_immediate_valid_for_move (x, mode, NULL, NULL)) > > > - *cost = COSTS_N_INSNS (1); > > > + { > > > + *cost = COSTS_N_INSNS (1); > > > + > > > + /* Neon has special instructions when comparing with 0 (vceq, vcge, > > > + vcgt, vcle and vclt). */ > > > + if (TARGET_NEON && (x == CONST0_RTX (mode))) > > > + { > > > + switch (outer_code) > > > + { > > > + case EQ: > > > + case GE: > > > + case GT: > > > + case LE: > > > + case LT: > > > + *cost = COSTS_N_INSNS (0); > > > + break; > > > + > > > + default: > > > + break; > > > + } > > > + } > > > + } > > > else > > > *cost = COSTS_N_INSNS (4); > > > return true; > > > diff --git a/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > > > b/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > > > index 640754c..a99bb8a 100644 > > > --- a/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > > > +++ b/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > > > @@ -15,4 +15,4 @@ void func() > > > result2 = vceqzq_p64 (v2); > > > } > > > > > > -/* { dg-final { scan-assembler-times "vceq\.i32\[ > > > \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 2 } } */ > > > +/* { dg-final { scan-assembler-times "vceq\.i32\[ > > > \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, #0\n" 2 } } */
On Wed, 27 Jan 2021 at 14:44, Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> wrote: > > > > > -----Original Message----- > > From: Christophe Lyon <christophe.lyon@linaro.org> > > Sent: 27 January 2021 13:12 > > To: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> > > Cc: Kyrylo Tkachov via Gcc-patches <gcc-patches@gcc.gnu.org> > > Subject: Re: arm: Adjust cost of vector of constant zero > > > > On Wed, 27 Jan 2021 at 10:15, Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> > > wrote: > > > > > > Hi Christophe, > > > > > > > -----Original Message----- > > > > From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf Of > > > > Christophe Lyon via Gcc-patches > > > > Sent: 26 January 2021 18:03 > > > > To: gcc Patches <gcc-patches@gcc.gnu.org> > > > > Subject: arm: Adjust cost of vector of constant zero > > > > > > > > Neon vector comparisons have a dedicated version when comparing with > > > > constant zero: it means its cost is free. > > > > > > > > Adjust the cost in arm_rtx_costs_internal accordingly, for Neon only, > > > > since MVE does not support this. > > > > > > I guess the other way to do this would be in the comparison code handling > > in this function where we could check for a const_vector of zeroes and a > > Neon mode and avoid recursing into the operands. > > > That would avoid the extra switch statement in your patch. > > > WDYT? > > > > Do you mean like so: > > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > > index 4a5f265..542c15e 100644 > > --- a/gcc/config/arm/arm.c > > +++ b/gcc/config/arm/arm.c > > @@ -11316,6 +11316,28 @@ arm_rtx_costs_internal (rtx x, enum rtx_code > > code, enum rtx_code outer_code, > > *cost = 0; > > return true; > > } > > + /* Neon has special instructions when comparing with 0 (vceq, vcge, > > vcgt, > > + vcle and vclt). */ > > + else if (TARGET_NEON > > + && TARGET_HARD_FLOAT > > + && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE > > (mode)) > > + && (XEXP (x, 1) == CONST0_RTX (mode))) > > + { > > + switch (code) > > + { > > + case EQ: > > + case GE: > > + case GT: > > + case LE: > > + case LT: > > + *cost = 0; > > + return true; > > + > > + default: > > + break; > > + } > > + } > > + > > return false; > > > > I'm not sure I can remove the switch, since the other comparisons are > > not supported by Neon anyway. > > > > No, I mean where: > case EQ: > case NE: > case LT: > case LE: > case GT: > case GE: > case LTU: > case LEU: > case GEU: > case GTU: > case ORDERED: > case UNORDERED: > case UNEQ: > case UNLE: > case UNLT: > case UNGE: > case UNGT: > case LTGT: > if (outer_code == SET) > { > /* Is it a store-flag operation? */ > if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM > > you reorder the codes that are relevant to NEON, handle them for a vector zero argument (and the right target checks), and fall through to the rest if not. > OK, I didn't find reordering this appealing :-) Like so, then? diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 4a5f265..88e398d 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -11211,11 +11211,23 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code, return true; case EQ: - case NE: - case LT: - case LE: - case GT: case GE: + case GT: + case LE: + case LT: + /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt, + vcle and vclt). */ + if (TARGET_NEON + && TARGET_HARD_FLOAT + && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)) + && (XEXP (x, 1) == CONST0_RTX (mode))) + { + *cost = 0; + return true; + } + + /* Fall through. */ + case NE: case LTU: case LEU: case GEU: Thanks, Christophe > Kyrill > > > Thanks, > > > > Christophe > > > > > > > Thanks, > > > Kyrill > > > > > > > > > > > 2021-01-26 Christophe Lyon <christophe.lyon@linaro.org> > > > > > > > > gcc/ > > > > PR target/98730 > > > > * config/arm/arm.c (arm_rtx_costs_internal): Adjust cost of vector > > > > of constant zero for comparisons. > > > > > > > > gcc/testsuite/ > > > > PR target/98730 > > > > * gcc.target/arm/simd/vceqzq_p64.c: Update expected result. > > > > > > > > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > > > > index 4a5f265..9c5c0df 100644 > > > > --- a/gcc/config/arm/arm.c > > > > +++ b/gcc/config/arm/arm.c > > > > @@ -11544,7 +11544,28 @@ arm_rtx_costs_internal (rtx x, enum > > rtx_code > > > > code, enum rtx_code outer_code, > > > > && (VALID_NEON_DREG_MODE (mode) || > > VALID_NEON_QREG_MODE > > > > (mode))) > > > > || TARGET_HAVE_MVE) > > > > && simd_immediate_valid_for_move (x, mode, NULL, NULL)) > > > > - *cost = COSTS_N_INSNS (1); > > > > + { > > > > + *cost = COSTS_N_INSNS (1); > > > > + > > > > + /* Neon has special instructions when comparing with 0 (vceq, vcge, > > > > + vcgt, vcle and vclt). */ > > > > + if (TARGET_NEON && (x == CONST0_RTX (mode))) > > > > + { > > > > + switch (outer_code) > > > > + { > > > > + case EQ: > > > > + case GE: > > > > + case GT: > > > > + case LE: > > > > + case LT: > > > > + *cost = COSTS_N_INSNS (0); > > > > + break; > > > > + > > > > + default: > > > > + break; > > > > + } > > > > + } > > > > + } > > > > else > > > > *cost = COSTS_N_INSNS (4); > > > > return true; > > > > diff --git a/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > > > > b/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > > > > index 640754c..a99bb8a 100644 > > > > --- a/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > > > > +++ b/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > > > > @@ -15,4 +15,4 @@ void func() > > > > result2 = vceqzq_p64 (v2); > > > > } > > > > > > > > -/* { dg-final { scan-assembler-times "vceq\.i32\[ > > > > \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 2 } } */ > > > > +/* { dg-final { scan-assembler-times "vceq\.i32\[ > > > > \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, #0\n" 2 } } */
> -----Original Message----- > From: Christophe Lyon <christophe.lyon@linaro.org> > Sent: 27 January 2021 13:56 > To: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> > Cc: Kyrylo Tkachov via Gcc-patches <gcc-patches@gcc.gnu.org> > Subject: Re: arm: Adjust cost of vector of constant zero > > On Wed, 27 Jan 2021 at 14:44, Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> > wrote: > > > > > > > > > -----Original Message----- > > > From: Christophe Lyon <christophe.lyon@linaro.org> > > > Sent: 27 January 2021 13:12 > > > To: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> > > > Cc: Kyrylo Tkachov via Gcc-patches <gcc-patches@gcc.gnu.org> > > > Subject: Re: arm: Adjust cost of vector of constant zero > > > > > > On Wed, 27 Jan 2021 at 10:15, Kyrylo Tkachov > <Kyrylo.Tkachov@arm.com> > > > wrote: > > > > > > > > Hi Christophe, > > > > > > > > > -----Original Message----- > > > > > From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf > Of > > > > > Christophe Lyon via Gcc-patches > > > > > Sent: 26 January 2021 18:03 > > > > > To: gcc Patches <gcc-patches@gcc.gnu.org> > > > > > Subject: arm: Adjust cost of vector of constant zero > > > > > > > > > > Neon vector comparisons have a dedicated version when comparing > with > > > > > constant zero: it means its cost is free. > > > > > > > > > > Adjust the cost in arm_rtx_costs_internal accordingly, for Neon only, > > > > > since MVE does not support this. > > > > > > > > I guess the other way to do this would be in the comparison code > handling > > > in this function where we could check for a const_vector of zeroes and a > > > Neon mode and avoid recursing into the operands. > > > > That would avoid the extra switch statement in your patch. > > > > WDYT? > > > > > > Do you mean like so: > > > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > > > index 4a5f265..542c15e 100644 > > > --- a/gcc/config/arm/arm.c > > > +++ b/gcc/config/arm/arm.c > > > @@ -11316,6 +11316,28 @@ arm_rtx_costs_internal (rtx x, enum > rtx_code > > > code, enum rtx_code outer_code, > > > *cost = 0; > > > return true; > > > } > > > + /* Neon has special instructions when comparing with 0 (vceq, vcge, > > > vcgt, > > > + vcle and vclt). */ > > > + else if (TARGET_NEON > > > + && TARGET_HARD_FLOAT > > > + && (VALID_NEON_DREG_MODE (mode) || > VALID_NEON_QREG_MODE > > > (mode)) > > > + && (XEXP (x, 1) == CONST0_RTX (mode))) > > > + { > > > + switch (code) > > > + { > > > + case EQ: > > > + case GE: > > > + case GT: > > > + case LE: > > > + case LT: > > > + *cost = 0; > > > + return true; > > > + > > > + default: > > > + break; > > > + } > > > + } > > > + > > > return false; > > > > > > I'm not sure I can remove the switch, since the other comparisons are > > > not supported by Neon anyway. > > > > > > > No, I mean where: > > case EQ: > > case NE: > > case LT: > > case LE: > > case GT: > > case GE: > > case LTU: > > case LEU: > > case GEU: > > case GTU: > > case ORDERED: > > case UNORDERED: > > case UNEQ: > > case UNLE: > > case UNLT: > > case UNGE: > > case UNGT: > > case LTGT: > > if (outer_code == SET) > > { > > /* Is it a store-flag operation? */ > > if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM > > > > you reorder the codes that are relevant to NEON, handle them for a vector > zero argument (and the right target checks), and fall through to the rest if > not. > > > > OK, I didn't find reordering this appealing :-) > > Like so, then? > > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index 4a5f265..88e398d 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -11211,11 +11211,23 @@ arm_rtx_costs_internal (rtx x, enum rtx_code > code, enum rtx_code outer_code, > return true; > > case EQ: > - case NE: > - case LT: > - case LE: > - case GT: > case GE: > + case GT: > + case LE: > + case LT: > + /* Neon has special instructions when comparing with 0 (vceq, vcge, > vcgt, > + vcle and vclt). */ > + if (TARGET_NEON > + && TARGET_HARD_FLOAT > + && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE > (mode)) > + && (XEXP (x, 1) == CONST0_RTX (mode))) > + { > + *cost = 0; > + return true; > + } > + > + /* Fall through. */ > + case NE: > case LTU: > case LEU: > case GEU: > I find it much cleaner, but I guess it's subjective 😊 Ok if it passes bootstrap and testing. Thanks, Kyrill > > Thanks, > > Christophe > > > Kyrill > > > > > Thanks, > > > > > > Christophe > > > > > > > > > > Thanks, > > > > Kyrill > > > > > > > > > > > > > > 2021-01-26 Christophe Lyon <christophe.lyon@linaro.org> > > > > > > > > > > gcc/ > > > > > PR target/98730 > > > > > * config/arm/arm.c (arm_rtx_costs_internal): Adjust cost of vector > > > > > of constant zero for comparisons. > > > > > > > > > > gcc/testsuite/ > > > > > PR target/98730 > > > > > * gcc.target/arm/simd/vceqzq_p64.c: Update expected result. > > > > > > > > > > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > > > > > index 4a5f265..9c5c0df 100644 > > > > > --- a/gcc/config/arm/arm.c > > > > > +++ b/gcc/config/arm/arm.c > > > > > @@ -11544,7 +11544,28 @@ arm_rtx_costs_internal (rtx x, enum > > > rtx_code > > > > > code, enum rtx_code outer_code, > > > > > && (VALID_NEON_DREG_MODE (mode) || > > > VALID_NEON_QREG_MODE > > > > > (mode))) > > > > > || TARGET_HAVE_MVE) > > > > > && simd_immediate_valid_for_move (x, mode, NULL, NULL)) > > > > > - *cost = COSTS_N_INSNS (1); > > > > > + { > > > > > + *cost = COSTS_N_INSNS (1); > > > > > + > > > > > + /* Neon has special instructions when comparing with 0 (vceq, > vcge, > > > > > + vcgt, vcle and vclt). */ > > > > > + if (TARGET_NEON && (x == CONST0_RTX (mode))) > > > > > + { > > > > > + switch (outer_code) > > > > > + { > > > > > + case EQ: > > > > > + case GE: > > > > > + case GT: > > > > > + case LE: > > > > > + case LT: > > > > > + *cost = COSTS_N_INSNS (0); > > > > > + break; > > > > > + > > > > > + default: > > > > > + break; > > > > > + } > > > > > + } > > > > > + } > > > > > else > > > > > *cost = COSTS_N_INSNS (4); > > > > > return true; > > > > > diff --git a/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > > > > > b/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > > > > > index 640754c..a99bb8a 100644 > > > > > --- a/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > > > > > +++ b/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > > > > > @@ -15,4 +15,4 @@ void func() > > > > > result2 = vceqzq_p64 (v2); > > > > > } > > > > > > > > > > -/* { dg-final { scan-assembler-times "vceq\.i32\[ > > > > > \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 2 } } */ > > > > > +/* { dg-final { scan-assembler-times "vceq\.i32\[ > > > > > \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, #0\n" 2 } } */
On Wed, 27 Jan 2021 at 15:03, Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> wrote: > > > > > -----Original Message----- > > From: Christophe Lyon <christophe.lyon@linaro.org> > > Sent: 27 January 2021 13:56 > > To: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> > > Cc: Kyrylo Tkachov via Gcc-patches <gcc-patches@gcc.gnu.org> > > Subject: Re: arm: Adjust cost of vector of constant zero > > > > On Wed, 27 Jan 2021 at 14:44, Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> > > wrote: > > > > > > > > > > > > > -----Original Message----- > > > > From: Christophe Lyon <christophe.lyon@linaro.org> > > > > Sent: 27 January 2021 13:12 > > > > To: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> > > > > Cc: Kyrylo Tkachov via Gcc-patches <gcc-patches@gcc.gnu.org> > > > > Subject: Re: arm: Adjust cost of vector of constant zero > > > > > > > > On Wed, 27 Jan 2021 at 10:15, Kyrylo Tkachov > > <Kyrylo.Tkachov@arm.com> > > > > wrote: > > > > > > > > > > Hi Christophe, > > > > > > > > > > > -----Original Message----- > > > > > > From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf > > Of > > > > > > Christophe Lyon via Gcc-patches > > > > > > Sent: 26 January 2021 18:03 > > > > > > To: gcc Patches <gcc-patches@gcc.gnu.org> > > > > > > Subject: arm: Adjust cost of vector of constant zero > > > > > > > > > > > > Neon vector comparisons have a dedicated version when comparing > > with > > > > > > constant zero: it means its cost is free. > > > > > > > > > > > > Adjust the cost in arm_rtx_costs_internal accordingly, for Neon only, > > > > > > since MVE does not support this. > > > > > > > > > > I guess the other way to do this would be in the comparison code > > handling > > > > in this function where we could check for a const_vector of zeroes and a > > > > Neon mode and avoid recursing into the operands. > > > > > That would avoid the extra switch statement in your patch. > > > > > WDYT? > > > > > > > > Do you mean like so: > > > > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > > > > index 4a5f265..542c15e 100644 > > > > --- a/gcc/config/arm/arm.c > > > > +++ b/gcc/config/arm/arm.c > > > > @@ -11316,6 +11316,28 @@ arm_rtx_costs_internal (rtx x, enum > > rtx_code > > > > code, enum rtx_code outer_code, > > > > *cost = 0; > > > > return true; > > > > } > > > > + /* Neon has special instructions when comparing with 0 (vceq, vcge, > > > > vcgt, > > > > + vcle and vclt). */ > > > > + else if (TARGET_NEON > > > > + && TARGET_HARD_FLOAT > > > > + && (VALID_NEON_DREG_MODE (mode) || > > VALID_NEON_QREG_MODE > > > > (mode)) > > > > + && (XEXP (x, 1) == CONST0_RTX (mode))) > > > > + { > > > > + switch (code) > > > > + { > > > > + case EQ: > > > > + case GE: > > > > + case GT: > > > > + case LE: > > > > + case LT: > > > > + *cost = 0; > > > > + return true; > > > > + > > > > + default: > > > > + break; > > > > + } > > > > + } > > > > + > > > > return false; > > > > > > > > I'm not sure I can remove the switch, since the other comparisons are > > > > not supported by Neon anyway. > > > > > > > > > > No, I mean where: > > > case EQ: > > > case NE: > > > case LT: > > > case LE: > > > case GT: > > > case GE: > > > case LTU: > > > case LEU: > > > case GEU: > > > case GTU: > > > case ORDERED: > > > case UNORDERED: > > > case UNEQ: > > > case UNLE: > > > case UNLT: > > > case UNGE: > > > case UNGT: > > > case LTGT: > > > if (outer_code == SET) > > > { > > > /* Is it a store-flag operation? */ > > > if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM > > > > > > you reorder the codes that are relevant to NEON, handle them for a vector > > zero argument (and the right target checks), and fall through to the rest if > > not. > > > > > > > OK, I didn't find reordering this appealing :-) > > > > Like so, then? > > > > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > > index 4a5f265..88e398d 100644 > > --- a/gcc/config/arm/arm.c > > +++ b/gcc/config/arm/arm.c > > @@ -11211,11 +11211,23 @@ arm_rtx_costs_internal (rtx x, enum rtx_code > > code, enum rtx_code outer_code, > > return true; > > > > case EQ: > > - case NE: > > - case LT: > > - case LE: > > - case GT: > > case GE: > > + case GT: > > + case LE: > > + case LT: > > + /* Neon has special instructions when comparing with 0 (vceq, vcge, > > vcgt, > > + vcle and vclt). */ > > + if (TARGET_NEON > > + && TARGET_HARD_FLOAT > > + && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE > > (mode)) > > + && (XEXP (x, 1) == CONST0_RTX (mode))) > > + { > > + *cost = 0; > > + return true; > > + } > > + > > + /* Fall through. */ > > + case NE: > > case LTU: > > case LEU: > > case GEU: > > > > I find it much cleaner, but I guess it's subjective > Ok if it passes bootstrap and testing. > Thanks, > Kyrill > Pushed after successful bootstrap as 31a0ab9213f780d2fa1da6e4879df214c0f247f9 (r11-6961) Thanks, Christophe > > > > Thanks, > > > > Christophe > > > > > Kyrill > > > > > > > Thanks, > > > > > > > > Christophe > > > > > > > > > > > > > Thanks, > > > > > Kyrill > > > > > > > > > > > > > > > > > 2021-01-26 Christophe Lyon <christophe.lyon@linaro.org> > > > > > > > > > > > > gcc/ > > > > > > PR target/98730 > > > > > > * config/arm/arm.c (arm_rtx_costs_internal): Adjust cost of vector > > > > > > of constant zero for comparisons. > > > > > > > > > > > > gcc/testsuite/ > > > > > > PR target/98730 > > > > > > * gcc.target/arm/simd/vceqzq_p64.c: Update expected result. > > > > > > > > > > > > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > > > > > > index 4a5f265..9c5c0df 100644 > > > > > > --- a/gcc/config/arm/arm.c > > > > > > +++ b/gcc/config/arm/arm.c > > > > > > @@ -11544,7 +11544,28 @@ arm_rtx_costs_internal (rtx x, enum > > > > rtx_code > > > > > > code, enum rtx_code outer_code, > > > > > > && (VALID_NEON_DREG_MODE (mode) || > > > > VALID_NEON_QREG_MODE > > > > > > (mode))) > > > > > > || TARGET_HAVE_MVE) > > > > > > && simd_immediate_valid_for_move (x, mode, NULL, NULL)) > > > > > > - *cost = COSTS_N_INSNS (1); > > > > > > + { > > > > > > + *cost = COSTS_N_INSNS (1); > > > > > > + > > > > > > + /* Neon has special instructions when comparing with 0 (vceq, > > vcge, > > > > > > + vcgt, vcle and vclt). */ > > > > > > + if (TARGET_NEON && (x == CONST0_RTX (mode))) > > > > > > + { > > > > > > + switch (outer_code) > > > > > > + { > > > > > > + case EQ: > > > > > > + case GE: > > > > > > + case GT: > > > > > > + case LE: > > > > > > + case LT: > > > > > > + *cost = COSTS_N_INSNS (0); > > > > > > + break; > > > > > > + > > > > > > + default: > > > > > > + break; > > > > > > + } > > > > > > + } > > > > > > + } > > > > > > else > > > > > > *cost = COSTS_N_INSNS (4); > > > > > > return true; > > > > > > diff --git a/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > > > > > > b/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > > > > > > index 640754c..a99bb8a 100644 > > > > > > --- a/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > > > > > > +++ b/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c > > > > > > @@ -15,4 +15,4 @@ void func() > > > > > > result2 = vceqzq_p64 (v2); > > > > > > } > > > > > > > > > > > > -/* { dg-final { scan-assembler-times "vceq\.i32\[ > > > > > > \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 2 } } */ > > > > > > +/* { dg-final { scan-assembler-times "vceq\.i32\[ > > > > > > \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, #0\n" 2 } } */
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 4a5f265..9c5c0df 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -11544,7 +11544,28 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code, && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))) || TARGET_HAVE_MVE) && simd_immediate_valid_for_move (x, mode, NULL, NULL)) - *cost = COSTS_N_INSNS (1); + { + *cost = COSTS_N_INSNS (1); + + /* Neon has special instructions when comparing with 0 (vceq, vcge, + vcgt, vcle and vclt). */ + if (TARGET_NEON && (x == CONST0_RTX (mode))) + { + switch (outer_code) + { + case EQ: + case GE: + case GT: + case LE: + case LT: + *cost = COSTS_N_INSNS (0); + break; + + default: + break; + } + } + } else *cost = COSTS_N_INSNS (4); return true; diff --git a/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c b/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c index 640754c..a99bb8a 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c +++ b/gcc/testsuite/gcc.target/arm/simd/vceqzq_p64.c @@ -15,4 +15,4 @@ void func() result2 = vceqzq_p64 (v2); } -/* { dg-final { scan-assembler-times "vceq\.i32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 2 } } */