Message ID | 20220423234448.393794-1-philipp.tomsich@vrull.eu |
---|---|
State | New |
Headers | show |
Series | [v1] RISC-V: Implement C[LT]Z_DEFINED_VALUE_AT_ZERO | expand |
Kito, Did you have a chance to take a look at this one? I assume this will have to wait until we reopen for 13... OK for 13? Also: OK for a backport (once a branch for that exists)? Philipp. On Sun, 24 Apr 2022 at 01:44, Philipp Tomsich <philipp.tomsich@vrull.eu> wrote: > > The Zbb support has introduced ctz and clz to the backend, but some > transformations in GCC need to know what the value of c[lt]z at zero > is. This affects how the optab is generated and may suppress use of > CLZ/CTZ in tree passes. > > Among other things, this is needed for the transformation of > table-based ctz-implementations, such as in deepsjeng, to work > (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90838). > > Prior to this change, the test case from PR90838 would compile to > on RISC-V targets with Zbb: > myctz: > lui a4,%hi(.LC0) > ld a4,%lo(.LC0)(a4) > neg a5,a0 > and a5,a5,a0 > mul a5,a5,a4 > lui a4,%hi(.LANCHOR0) > addi a4,a4,%lo(.LANCHOR0) > srli a5,a5,58 > sh2add a5,a5,a4 > lw a0,0(a5) > ret > > After this change, we get: > myctz: > ctz a0,a0 > andi a0,a0,63 > ret > > Testing this with deepsjeng_r (from SPEC 2017) against QEMU, this > shows a clear reduction in dynamic instruction count: > - before 1961888067076 > - after 1907928279874 (2.75% reduction) > > gcc/ChangeLog: > > * config/riscv/riscv.h (CLZ_DEFINED_VALUE_AT_ZERO): Implement. > (CTZ_DEFINED_VALUE_AT_ZERO): Same. > > gcc/testsuite/ChangeLog: > > * gcc.dg/pr90838.c: Add additional flags (dg-additional-options) > when compiling for riscv64. > * gcc.target/riscv/zbb-ctz.c: New test. > > Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu> > Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu> > Co-developed-by: Manolis Tsamis <manolis.tsamis@vrull.eu> > > --- > gcc/config/riscv/riscv.h | 5 ++ > gcc/testsuite/gcc.dg/pr90838.c | 2 + > gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c | 65 ++++++++++++++++++++ > gcc/testsuite/gcc.target/riscv/zbb-ctz.c | 66 +++++++++++++++++++++ > 4 files changed, 138 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c > create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-ctz.c > > diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h > index 4210e252255..95f72e2fd3f 100644 > --- a/gcc/config/riscv/riscv.h > +++ b/gcc/config/riscv/riscv.h > @@ -1019,4 +1019,9 @@ extern void riscv_remove_unneeded_save_restore_calls (void); > > #define HARD_REGNO_RENAME_OK(FROM, TO) riscv_hard_regno_rename_ok (FROM, TO) > > +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ > + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) > +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ > + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) > + > #endif /* ! GCC_RISCV_H */ > diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c > index 41c5dab9a5c..162bd6f51d0 100644 > --- a/gcc/testsuite/gcc.dg/pr90838.c > +++ b/gcc/testsuite/gcc.dg/pr90838.c > @@ -1,5 +1,6 @@ > /* { dg-do compile } */ > /* { dg-options "-O2 -fdump-tree-forwprop2-details" } */ > +/* { dg-additional-options "-march=rv64gc_zbb" { target riscv64*-*-* } } */ > > int ctz1 (unsigned x) > { > @@ -57,3 +58,4 @@ int ctz4 (unsigned long x) > } > > /* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target aarch64*-*-* } } } */ > +/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target riscv64*-*-* } } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c b/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c > new file mode 100644 > index 00000000000..b903517197a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c > @@ -0,0 +1,65 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv32gc_zbb -mabi=ilp32" } */ > +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */ > + > +int ctz1 (unsigned x) > +{ > + static const char table[32] = > + { > + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, > + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 > + }; > + > + return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27]; > +} > + > +int ctz2 (unsigned x) > +{ > +#define u 0 > + static short table[64] = > + { > + 32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14, > + 10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15, > + 31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26, > + 30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u > + }; > + > + x = (x & -x) * 0x0450FBAF; > + return table[x >> 26]; > +} > + > +int ctz3 (unsigned x) > +{ > + static int table[32] = > + { > + 0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26, > + 31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27 > + }; > + > + if (x == 0) return 32; > + x = (x & -x) * 0x04D7651F; > + return table[x >> 27]; > +} > + > +static const unsigned long long magic = 0x03f08c5392f756cdULL; > + > +static const char table[64] = { > + 0, 1, 12, 2, 13, 22, 17, 3, > + 14, 33, 23, 36, 18, 58, 28, 4, > + 62, 15, 34, 26, 24, 48, 50, 37, > + 19, 55, 59, 52, 29, 44, 39, 5, > + 63, 11, 21, 16, 32, 35, 57, 27, > + 61, 25, 47, 49, 54, 51, 43, 38, > + 10, 20, 31, 56, 60, 46, 53, 42, > + 9, 30, 45, 41, 8, 40, 7, 6, > +}; > + > +int ctz4 (unsigned long x) > +{ > + unsigned long lsb = x & -x; > + return table[(lsb * magic) >> 58]; > +} > + > +/* { dg-final { scan-assembler-times "ctz\t" 3 } } */ > +/* { dg-final { scan-assembler-times "andi\t" 1 } } */ > +/* { dg-final { scan-assembler-times "mul\t" 1 } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/zbb-ctz.c b/gcc/testsuite/gcc.target/riscv/zbb-ctz.c > new file mode 100644 > index 00000000000..f9fbcb38dee > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/zbb-ctz.c > @@ -0,0 +1,66 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gc_zbb -mabi=lp64" } */ > +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */ > + > +int ctz1 (unsigned x) > +{ > + static const char table[32] = > + { > + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, > + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 > + }; > + > + return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27]; > +} > + > +int ctz2 (unsigned x) > +{ > +#define u 0 > + static short table[64] = > + { > + 32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14, > + 10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15, > + 31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26, > + 30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u > + }; > + > + x = (x & -x) * 0x0450FBAF; > + return table[x >> 26]; > +} > + > +int ctz3 (unsigned x) > +{ > + static int table[32] = > + { > + 0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26, > + 31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27 > + }; > + > + if (x == 0) return 32; > + x = (x & -x) * 0x04D7651F; > + return table[x >> 27]; > +} > + > +static const unsigned long long magic = 0x03f08c5392f756cdULL; > + > +static const char table[64] = { > + 0, 1, 12, 2, 13, 22, 17, 3, > + 14, 33, 23, 36, 18, 58, 28, 4, > + 62, 15, 34, 26, 24, 48, 50, 37, > + 19, 55, 59, 52, 29, 44, 39, 5, > + 63, 11, 21, 16, 32, 35, 57, 27, > + 61, 25, 47, 49, 54, 51, 43, 38, > + 10, 20, 31, 56, 60, 46, 53, 42, > + 9, 30, 45, 41, 8, 40, 7, 6, > +}; > + > +int ctz4 (unsigned long x) > +{ > + unsigned long lsb = x & -x; > + return table[(lsb * magic) >> 58]; > +} > + > +/* { dg-final { scan-assembler-times "ctzw\t" 3 } } */ > +/* { dg-final { scan-assembler-times "ctz\t" 1 } } */ > +/* { dg-final { scan-assembler-times "andi\t" 2 } } */ > +/* { dg-final { scan-assembler-not "mul" } } */ > -- > 2.34.1 >
On Thu, 28 Apr 2022 15:11:49 PDT (-0700), philipp.tomsich@vrull.eu wrote: > Kito, > > Did you have a chance to take a look at this one? > > I assume this will have to wait until we reopen for 13... > OK for 13? Also: OK for a backport (once a branch for that exists)? I'd assumed it was 13 material when it was sent, given the timing and that it didn't call out anything about being targeted at 12. IIUC we've also generally only backported regression fixes and that's the standard policy, but maybe I'm misunderstanding something? > > Philipp. > > > On Sun, 24 Apr 2022 at 01:44, Philipp Tomsich <philipp.tomsich@vrull.eu> wrote: >> >> The Zbb support has introduced ctz and clz to the backend, but some >> transformations in GCC need to know what the value of c[lt]z at zero >> is. This affects how the optab is generated and may suppress use of >> CLZ/CTZ in tree passes. >> >> Among other things, this is needed for the transformation of >> table-based ctz-implementations, such as in deepsjeng, to work >> (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90838). >> >> Prior to this change, the test case from PR90838 would compile to >> on RISC-V targets with Zbb: >> myctz: >> lui a4,%hi(.LC0) >> ld a4,%lo(.LC0)(a4) >> neg a5,a0 >> and a5,a5,a0 >> mul a5,a5,a4 >> lui a4,%hi(.LANCHOR0) >> addi a4,a4,%lo(.LANCHOR0) >> srli a5,a5,58 >> sh2add a5,a5,a4 >> lw a0,0(a5) >> ret >> >> After this change, we get: >> myctz: >> ctz a0,a0 >> andi a0,a0,63 >> ret This one looks correct to me, but does it pass the rest of the test suite? I haven't been running regressions with the bitmanip extensions enabled, and I don't see any machinery in riscv-gnu-toolchain for B-extension-family regression runs (plus, my box is kind of tied up right now anyway trying to get other stuff fixed). >> >> Testing this with deepsjeng_r (from SPEC 2017) against QEMU, this >> shows a clear reduction in dynamic instruction count: >> - before 1961888067076 >> - after 1907928279874 (2.75% reduction) >> >> gcc/ChangeLog: >> >> * config/riscv/riscv.h (CLZ_DEFINED_VALUE_AT_ZERO): Implement. >> (CTZ_DEFINED_VALUE_AT_ZERO): Same. >> >> gcc/testsuite/ChangeLog: >> >> * gcc.dg/pr90838.c: Add additional flags (dg-additional-options) >> when compiling for riscv64. >> * gcc.target/riscv/zbb-ctz.c: New test. >> >> Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu> >> Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu> >> Co-developed-by: Manolis Tsamis <manolis.tsamis@vrull.eu> >> >> --- >> gcc/config/riscv/riscv.h | 5 ++ >> gcc/testsuite/gcc.dg/pr90838.c | 2 + >> gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c | 65 ++++++++++++++++++++ >> gcc/testsuite/gcc.target/riscv/zbb-ctz.c | 66 +++++++++++++++++++++ I was a bit worried about where the test came from, but it looks like they're essentially the same as some arm64 and x86 tests so they should be fine. Might be good to refactor things to avoid the duplication, though. >> 4 files changed, 138 insertions(+) >> create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c >> create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-ctz.c >> >> diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h >> index 4210e252255..95f72e2fd3f 100644 >> --- a/gcc/config/riscv/riscv.h >> +++ b/gcc/config/riscv/riscv.h >> @@ -1019,4 +1019,9 @@ extern void riscv_remove_unneeded_save_restore_calls (void); >> >> #define HARD_REGNO_RENAME_OK(FROM, TO) riscv_hard_regno_rename_ok (FROM, TO) >> >> +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ >> + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) >> +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ >> + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) IIUC this is correct: it expends to 8/16 for q/himode, but as we don't have clz/ctz defined for those it's fine. I'll let Kito have some time to chime in, though. >> + >> #endif /* ! GCC_RISCV_H */ >> diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c >> index 41c5dab9a5c..162bd6f51d0 100644 >> --- a/gcc/testsuite/gcc.dg/pr90838.c >> +++ b/gcc/testsuite/gcc.dg/pr90838.c >> @@ -1,5 +1,6 @@ >> /* { dg-do compile } */ >> /* { dg-options "-O2 -fdump-tree-forwprop2-details" } */ >> +/* { dg-additional-options "-march=rv64gc_zbb" { target riscv64*-*-* } } */ >> >> int ctz1 (unsigned x) >> { >> @@ -57,3 +58,4 @@ int ctz4 (unsigned long x) >> } >> >> /* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target aarch64*-*-* } } } */ >> +/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target riscv64*-*-* } } } */ >> diff --git a/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c b/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c >> new file mode 100644 >> index 00000000000..b903517197a >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c >> @@ -0,0 +1,65 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-march=rv32gc_zbb -mabi=ilp32" } */ >> +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */ >> + >> +int ctz1 (unsigned x) >> +{ >> + static const char table[32] = >> + { >> + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, >> + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 >> + }; >> + >> + return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27]; >> +} >> + >> +int ctz2 (unsigned x) >> +{ >> +#define u 0 >> + static short table[64] = >> + { >> + 32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14, >> + 10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15, >> + 31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26, >> + 30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u >> + }; >> + >> + x = (x & -x) * 0x0450FBAF; >> + return table[x >> 26]; >> +} >> + >> +int ctz3 (unsigned x) >> +{ >> + static int table[32] = >> + { >> + 0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26, >> + 31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27 >> + }; >> + >> + if (x == 0) return 32; >> + x = (x & -x) * 0x04D7651F; >> + return table[x >> 27]; >> +} >> + >> +static const unsigned long long magic = 0x03f08c5392f756cdULL; >> + >> +static const char table[64] = { >> + 0, 1, 12, 2, 13, 22, 17, 3, >> + 14, 33, 23, 36, 18, 58, 28, 4, >> + 62, 15, 34, 26, 24, 48, 50, 37, >> + 19, 55, 59, 52, 29, 44, 39, 5, >> + 63, 11, 21, 16, 32, 35, 57, 27, >> + 61, 25, 47, 49, 54, 51, 43, 38, >> + 10, 20, 31, 56, 60, 46, 53, 42, >> + 9, 30, 45, 41, 8, 40, 7, 6, >> +}; >> + >> +int ctz4 (unsigned long x) >> +{ >> + unsigned long lsb = x & -x; >> + return table[(lsb * magic) >> 58]; >> +} >> + >> +/* { dg-final { scan-assembler-times "ctz\t" 3 } } */ >> +/* { dg-final { scan-assembler-times "andi\t" 1 } } */ >> +/* { dg-final { scan-assembler-times "mul\t" 1 } } */ >> diff --git a/gcc/testsuite/gcc.target/riscv/zbb-ctz.c b/gcc/testsuite/gcc.target/riscv/zbb-ctz.c >> new file mode 100644 >> index 00000000000..f9fbcb38dee >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/riscv/zbb-ctz.c >> @@ -0,0 +1,66 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-march=rv64gc_zbb -mabi=lp64" } */ >> +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */ >> + >> +int ctz1 (unsigned x) >> +{ >> + static const char table[32] = >> + { >> + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, >> + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 >> + }; >> + >> + return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27]; >> +} >> + >> +int ctz2 (unsigned x) >> +{ >> +#define u 0 >> + static short table[64] = >> + { >> + 32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14, >> + 10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15, >> + 31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26, >> + 30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u >> + }; >> + >> + x = (x & -x) * 0x0450FBAF; >> + return table[x >> 26]; >> +} >> + >> +int ctz3 (unsigned x) >> +{ >> + static int table[32] = >> + { >> + 0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26, >> + 31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27 >> + }; >> + >> + if (x == 0) return 32; >> + x = (x & -x) * 0x04D7651F; >> + return table[x >> 27]; >> +} >> + >> +static const unsigned long long magic = 0x03f08c5392f756cdULL; >> + >> +static const char table[64] = { >> + 0, 1, 12, 2, 13, 22, 17, 3, >> + 14, 33, 23, 36, 18, 58, 28, 4, >> + 62, 15, 34, 26, 24, 48, 50, 37, >> + 19, 55, 59, 52, 29, 44, 39, 5, >> + 63, 11, 21, 16, 32, 35, 57, 27, >> + 61, 25, 47, 49, 54, 51, 43, 38, >> + 10, 20, 31, 56, 60, 46, 53, 42, >> + 9, 30, 45, 41, 8, 40, 7, 6, >> +}; >> + >> +int ctz4 (unsigned long x) >> +{ >> + unsigned long lsb = x & -x; >> + return table[(lsb * magic) >> 58]; >> +} >> + >> +/* { dg-final { scan-assembler-times "ctzw\t" 3 } } */ >> +/* { dg-final { scan-assembler-times "ctz\t" 1 } } */ >> +/* { dg-final { scan-assembler-times "andi\t" 2 } } */ >> +/* { dg-final { scan-assembler-not "mul" } } */ >> -- >> 2.34.1 >>
Ack, I am OoO during 5/1-5/4, I'll start looking at those patches in the GCC 13 queue including this one :) On Fri, Apr 29, 2022 at 6:12 AM Philipp Tomsich <philipp.tomsich@vrull.eu> wrote: > > Kito, > > Did you have a chance to take a look at this one? > > I assume this will have to wait until we reopen for 13... > OK for 13? Also: OK for a backport (once a branch for that exists)? > > Philipp. > > > On Sun, 24 Apr 2022 at 01:44, Philipp Tomsich <philipp.tomsich@vrull.eu> wrote: > > > > The Zbb support has introduced ctz and clz to the backend, but some > > transformations in GCC need to know what the value of c[lt]z at zero > > is. This affects how the optab is generated and may suppress use of > > CLZ/CTZ in tree passes. > > > > Among other things, this is needed for the transformation of > > table-based ctz-implementations, such as in deepsjeng, to work > > (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90838). > > > > Prior to this change, the test case from PR90838 would compile to > > on RISC-V targets with Zbb: > > myctz: > > lui a4,%hi(.LC0) > > ld a4,%lo(.LC0)(a4) > > neg a5,a0 > > and a5,a5,a0 > > mul a5,a5,a4 > > lui a4,%hi(.LANCHOR0) > > addi a4,a4,%lo(.LANCHOR0) > > srli a5,a5,58 > > sh2add a5,a5,a4 > > lw a0,0(a5) > > ret > > > > After this change, we get: > > myctz: > > ctz a0,a0 > > andi a0,a0,63 > > ret > > > > Testing this with deepsjeng_r (from SPEC 2017) against QEMU, this > > shows a clear reduction in dynamic instruction count: > > - before 1961888067076 > > - after 1907928279874 (2.75% reduction) > > > > gcc/ChangeLog: > > > > * config/riscv/riscv.h (CLZ_DEFINED_VALUE_AT_ZERO): Implement. > > (CTZ_DEFINED_VALUE_AT_ZERO): Same. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.dg/pr90838.c: Add additional flags (dg-additional-options) > > when compiling for riscv64. > > * gcc.target/riscv/zbb-ctz.c: New test. > > > > Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu> > > Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu> > > Co-developed-by: Manolis Tsamis <manolis.tsamis@vrull.eu> > > > > --- > > gcc/config/riscv/riscv.h | 5 ++ > > gcc/testsuite/gcc.dg/pr90838.c | 2 + > > gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c | 65 ++++++++++++++++++++ > > gcc/testsuite/gcc.target/riscv/zbb-ctz.c | 66 +++++++++++++++++++++ > > 4 files changed, 138 insertions(+) > > create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c > > create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-ctz.c > > > > diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h > > index 4210e252255..95f72e2fd3f 100644 > > --- a/gcc/config/riscv/riscv.h > > +++ b/gcc/config/riscv/riscv.h > > @@ -1019,4 +1019,9 @@ extern void riscv_remove_unneeded_save_restore_calls (void); > > > > #define HARD_REGNO_RENAME_OK(FROM, TO) riscv_hard_regno_rename_ok (FROM, TO) > > > > +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ > > + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) > > +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ > > + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) > > + > > #endif /* ! GCC_RISCV_H */ > > diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c > > index 41c5dab9a5c..162bd6f51d0 100644 > > --- a/gcc/testsuite/gcc.dg/pr90838.c > > +++ b/gcc/testsuite/gcc.dg/pr90838.c > > @@ -1,5 +1,6 @@ > > /* { dg-do compile } */ > > /* { dg-options "-O2 -fdump-tree-forwprop2-details" } */ > > +/* { dg-additional-options "-march=rv64gc_zbb" { target riscv64*-*-* } } */ > > > > int ctz1 (unsigned x) > > { > > @@ -57,3 +58,4 @@ int ctz4 (unsigned long x) > > } > > > > /* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target aarch64*-*-* } } } */ > > +/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target riscv64*-*-* } } } */ > > diff --git a/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c b/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c > > new file mode 100644 > > index 00000000000..b903517197a > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c > > @@ -0,0 +1,65 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-march=rv32gc_zbb -mabi=ilp32" } */ > > +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */ > > + > > +int ctz1 (unsigned x) > > +{ > > + static const char table[32] = > > + { > > + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, > > + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 > > + }; > > + > > + return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27]; > > +} > > + > > +int ctz2 (unsigned x) > > +{ > > +#define u 0 > > + static short table[64] = > > + { > > + 32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14, > > + 10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15, > > + 31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26, > > + 30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u > > + }; > > + > > + x = (x & -x) * 0x0450FBAF; > > + return table[x >> 26]; > > +} > > + > > +int ctz3 (unsigned x) > > +{ > > + static int table[32] = > > + { > > + 0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26, > > + 31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27 > > + }; > > + > > + if (x == 0) return 32; > > + x = (x & -x) * 0x04D7651F; > > + return table[x >> 27]; > > +} > > + > > +static const unsigned long long magic = 0x03f08c5392f756cdULL; > > + > > +static const char table[64] = { > > + 0, 1, 12, 2, 13, 22, 17, 3, > > + 14, 33, 23, 36, 18, 58, 28, 4, > > + 62, 15, 34, 26, 24, 48, 50, 37, > > + 19, 55, 59, 52, 29, 44, 39, 5, > > + 63, 11, 21, 16, 32, 35, 57, 27, > > + 61, 25, 47, 49, 54, 51, 43, 38, > > + 10, 20, 31, 56, 60, 46, 53, 42, > > + 9, 30, 45, 41, 8, 40, 7, 6, > > +}; > > + > > +int ctz4 (unsigned long x) > > +{ > > + unsigned long lsb = x & -x; > > + return table[(lsb * magic) >> 58]; > > +} > > + > > +/* { dg-final { scan-assembler-times "ctz\t" 3 } } */ > > +/* { dg-final { scan-assembler-times "andi\t" 1 } } */ > > +/* { dg-final { scan-assembler-times "mul\t" 1 } } */ > > diff --git a/gcc/testsuite/gcc.target/riscv/zbb-ctz.c b/gcc/testsuite/gcc.target/riscv/zbb-ctz.c > > new file mode 100644 > > index 00000000000..f9fbcb38dee > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/riscv/zbb-ctz.c > > @@ -0,0 +1,66 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-march=rv64gc_zbb -mabi=lp64" } */ > > +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */ > > + > > +int ctz1 (unsigned x) > > +{ > > + static const char table[32] = > > + { > > + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, > > + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 > > + }; > > + > > + return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27]; > > +} > > + > > +int ctz2 (unsigned x) > > +{ > > +#define u 0 > > + static short table[64] = > > + { > > + 32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14, > > + 10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15, > > + 31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26, > > + 30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u > > + }; > > + > > + x = (x & -x) * 0x0450FBAF; > > + return table[x >> 26]; > > +} > > + > > +int ctz3 (unsigned x) > > +{ > > + static int table[32] = > > + { > > + 0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26, > > + 31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27 > > + }; > > + > > + if (x == 0) return 32; > > + x = (x & -x) * 0x04D7651F; > > + return table[x >> 27]; > > +} > > + > > +static const unsigned long long magic = 0x03f08c5392f756cdULL; > > + > > +static const char table[64] = { > > + 0, 1, 12, 2, 13, 22, 17, 3, > > + 14, 33, 23, 36, 18, 58, 28, 4, > > + 62, 15, 34, 26, 24, 48, 50, 37, > > + 19, 55, 59, 52, 29, 44, 39, 5, > > + 63, 11, 21, 16, 32, 35, 57, 27, > > + 61, 25, 47, 49, 54, 51, 43, 38, > > + 10, 20, 31, 56, 60, 46, 53, 42, > > + 9, 30, 45, 41, 8, 40, 7, 6, > > +}; > > + > > +int ctz4 (unsigned long x) > > +{ > > + unsigned long lsb = x & -x; > > + return table[(lsb * magic) >> 58]; > > +} > > + > > +/* { dg-final { scan-assembler-times "ctzw\t" 3 } } */ > > +/* { dg-final { scan-assembler-times "ctz\t" 1 } } */ > > +/* { dg-final { scan-assembler-times "andi\t" 2 } } */ > > +/* { dg-final { scan-assembler-not "mul" } } */ > > -- > > 2.34.1 > >
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 4210e252255..95f72e2fd3f 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -1019,4 +1019,9 @@ extern void riscv_remove_unneeded_save_restore_calls (void); #define HARD_REGNO_RENAME_OK(FROM, TO) riscv_hard_regno_rename_ok (FROM, TO) +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) + #endif /* ! GCC_RISCV_H */ diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c index 41c5dab9a5c..162bd6f51d0 100644 --- a/gcc/testsuite/gcc.dg/pr90838.c +++ b/gcc/testsuite/gcc.dg/pr90838.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-forwprop2-details" } */ +/* { dg-additional-options "-march=rv64gc_zbb" { target riscv64*-*-* } } */ int ctz1 (unsigned x) { @@ -57,3 +58,4 @@ int ctz4 (unsigned long x) } /* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target aarch64*-*-* } } } */ +/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target riscv64*-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c b/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c new file mode 100644 index 00000000000..b903517197a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c @@ -0,0 +1,65 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zbb -mabi=ilp32" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */ + +int ctz1 (unsigned x) +{ + static const char table[32] = + { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + + return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27]; +} + +int ctz2 (unsigned x) +{ +#define u 0 + static short table[64] = + { + 32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14, + 10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15, + 31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26, + 30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u + }; + + x = (x & -x) * 0x0450FBAF; + return table[x >> 26]; +} + +int ctz3 (unsigned x) +{ + static int table[32] = + { + 0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26, + 31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27 + }; + + if (x == 0) return 32; + x = (x & -x) * 0x04D7651F; + return table[x >> 27]; +} + +static const unsigned long long magic = 0x03f08c5392f756cdULL; + +static const char table[64] = { + 0, 1, 12, 2, 13, 22, 17, 3, + 14, 33, 23, 36, 18, 58, 28, 4, + 62, 15, 34, 26, 24, 48, 50, 37, + 19, 55, 59, 52, 29, 44, 39, 5, + 63, 11, 21, 16, 32, 35, 57, 27, + 61, 25, 47, 49, 54, 51, 43, 38, + 10, 20, 31, 56, 60, 46, 53, 42, + 9, 30, 45, 41, 8, 40, 7, 6, +}; + +int ctz4 (unsigned long x) +{ + unsigned long lsb = x & -x; + return table[(lsb * magic) >> 58]; +} + +/* { dg-final { scan-assembler-times "ctz\t" 3 } } */ +/* { dg-final { scan-assembler-times "andi\t" 1 } } */ +/* { dg-final { scan-assembler-times "mul\t" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/zbb-ctz.c b/gcc/testsuite/gcc.target/riscv/zbb-ctz.c new file mode 100644 index 00000000000..f9fbcb38dee --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zbb-ctz.c @@ -0,0 +1,66 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbb -mabi=lp64" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */ + +int ctz1 (unsigned x) +{ + static const char table[32] = + { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + + return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27]; +} + +int ctz2 (unsigned x) +{ +#define u 0 + static short table[64] = + { + 32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14, + 10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15, + 31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26, + 30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u + }; + + x = (x & -x) * 0x0450FBAF; + return table[x >> 26]; +} + +int ctz3 (unsigned x) +{ + static int table[32] = + { + 0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26, + 31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27 + }; + + if (x == 0) return 32; + x = (x & -x) * 0x04D7651F; + return table[x >> 27]; +} + +static const unsigned long long magic = 0x03f08c5392f756cdULL; + +static const char table[64] = { + 0, 1, 12, 2, 13, 22, 17, 3, + 14, 33, 23, 36, 18, 58, 28, 4, + 62, 15, 34, 26, 24, 48, 50, 37, + 19, 55, 59, 52, 29, 44, 39, 5, + 63, 11, 21, 16, 32, 35, 57, 27, + 61, 25, 47, 49, 54, 51, 43, 38, + 10, 20, 31, 56, 60, 46, 53, 42, + 9, 30, 45, 41, 8, 40, 7, 6, +}; + +int ctz4 (unsigned long x) +{ + unsigned long lsb = x & -x; + return table[(lsb * magic) >> 58]; +} + +/* { dg-final { scan-assembler-times "ctzw\t" 3 } } */ +/* { dg-final { scan-assembler-times "ctz\t" 1 } } */ +/* { dg-final { scan-assembler-times "andi\t" 2 } } */ +/* { dg-final { scan-assembler-not "mul" } } */