diff mbox series

[v1] RISC-V: Implement C[LT]Z_DEFINED_VALUE_AT_ZERO

Message ID 20220423234448.393794-1-philipp.tomsich@vrull.eu
State New
Headers show
Series [v1] RISC-V: Implement C[LT]Z_DEFINED_VALUE_AT_ZERO | expand

Commit Message

Philipp Tomsich April 23, 2022, 11:44 p.m. UTC
The Zbb support has introduced ctz and clz to the backend, but some
transformations in GCC need to know what the value of c[lt]z at zero
is. This affects how the optab is generated and may suppress use of
CLZ/CTZ in tree passes.

Among other things, this is needed for the transformation of
table-based ctz-implementations, such as in deepsjeng, to work
(see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90838).

Prior to this change, the test case from PR90838 would compile to
on RISC-V targets with Zbb:
  myctz:
	lui	a4,%hi(.LC0)
	ld	a4,%lo(.LC0)(a4)
	neg	a5,a0
	and	a5,a5,a0
	mul	a5,a5,a4
	lui	a4,%hi(.LANCHOR0)
	addi	a4,a4,%lo(.LANCHOR0)
	srli	a5,a5,58
	sh2add	a5,a5,a4
	lw	a0,0(a5)
	ret

After this change, we get:
  myctz:
	ctz	a0,a0
	andi	a0,a0,63
	ret

Testing this with deepsjeng_r (from SPEC 2017) against QEMU, this
shows a clear reduction in dynamic instruction count:
 - before  1961888067076
 - after   1907928279874 (2.75% reduction)

gcc/ChangeLog:

	* config/riscv/riscv.h (CLZ_DEFINED_VALUE_AT_ZERO): Implement.
	(CTZ_DEFINED_VALUE_AT_ZERO): Same.

gcc/testsuite/ChangeLog:

	* gcc.dg/pr90838.c: Add additional flags (dg-additional-options)
	  when compiling for riscv64.
	* gcc.target/riscv/zbb-ctz.c: New test.

Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu>
Co-developed-by: Manolis Tsamis <manolis.tsamis@vrull.eu>

---
 gcc/config/riscv/riscv.h                    |  5 ++
 gcc/testsuite/gcc.dg/pr90838.c              |  2 +
 gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c | 65 ++++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/zbb-ctz.c    | 66 +++++++++++++++++++++
 4 files changed, 138 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-ctz.c

Comments

Philipp Tomsich April 28, 2022, 10:11 p.m. UTC | #1
Kito,

Did you have a chance to take a look at this one?

I assume this will have to wait until we reopen for 13...
OK for 13?  Also: OK for a backport (once a branch for that exists)?

Philipp.


On Sun, 24 Apr 2022 at 01:44, Philipp Tomsich <philipp.tomsich@vrull.eu> wrote:
>
> The Zbb support has introduced ctz and clz to the backend, but some
> transformations in GCC need to know what the value of c[lt]z at zero
> is. This affects how the optab is generated and may suppress use of
> CLZ/CTZ in tree passes.
>
> Among other things, this is needed for the transformation of
> table-based ctz-implementations, such as in deepsjeng, to work
> (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90838).
>
> Prior to this change, the test case from PR90838 would compile to
> on RISC-V targets with Zbb:
>   myctz:
>         lui     a4,%hi(.LC0)
>         ld      a4,%lo(.LC0)(a4)
>         neg     a5,a0
>         and     a5,a5,a0
>         mul     a5,a5,a4
>         lui     a4,%hi(.LANCHOR0)
>         addi    a4,a4,%lo(.LANCHOR0)
>         srli    a5,a5,58
>         sh2add  a5,a5,a4
>         lw      a0,0(a5)
>         ret
>
> After this change, we get:
>   myctz:
>         ctz     a0,a0
>         andi    a0,a0,63
>         ret
>
> Testing this with deepsjeng_r (from SPEC 2017) against QEMU, this
> shows a clear reduction in dynamic instruction count:
>  - before  1961888067076
>  - after   1907928279874 (2.75% reduction)
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv.h (CLZ_DEFINED_VALUE_AT_ZERO): Implement.
>         (CTZ_DEFINED_VALUE_AT_ZERO): Same.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/pr90838.c: Add additional flags (dg-additional-options)
>           when compiling for riscv64.
>         * gcc.target/riscv/zbb-ctz.c: New test.
>
> Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
> Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu>
> Co-developed-by: Manolis Tsamis <manolis.tsamis@vrull.eu>
>
> ---
>  gcc/config/riscv/riscv.h                    |  5 ++
>  gcc/testsuite/gcc.dg/pr90838.c              |  2 +
>  gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c | 65 ++++++++++++++++++++
>  gcc/testsuite/gcc.target/riscv/zbb-ctz.c    | 66 +++++++++++++++++++++
>  4 files changed, 138 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-ctz.c
>
> diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
> index 4210e252255..95f72e2fd3f 100644
> --- a/gcc/config/riscv/riscv.h
> +++ b/gcc/config/riscv/riscv.h
> @@ -1019,4 +1019,9 @@ extern void riscv_remove_unneeded_save_restore_calls (void);
>
>  #define HARD_REGNO_RENAME_OK(FROM, TO) riscv_hard_regno_rename_ok (FROM, TO)
>
> +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
> +  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
> +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
> +  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
> +
>  #endif /* ! GCC_RISCV_H */
> diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c
> index 41c5dab9a5c..162bd6f51d0 100644
> --- a/gcc/testsuite/gcc.dg/pr90838.c
> +++ b/gcc/testsuite/gcc.dg/pr90838.c
> @@ -1,5 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -fdump-tree-forwprop2-details" } */
> +/* { dg-additional-options "-march=rv64gc_zbb" { target riscv64*-*-* } } */
>
>  int ctz1 (unsigned x)
>  {
> @@ -57,3 +58,4 @@ int ctz4 (unsigned long x)
>  }
>
>  /* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target aarch64*-*-* } } } */
> +/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target riscv64*-*-* } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c b/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c
> new file mode 100644
> index 00000000000..b903517197a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c
> @@ -0,0 +1,65 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gc_zbb -mabi=ilp32" } */
> +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
> +
> +int ctz1 (unsigned x)
> +{
> +  static const char table[32] =
> +    {
> +      0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
> +      31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
> +    };
> +
> +  return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
> +}
> +
> +int ctz2 (unsigned x)
> +{
> +#define u 0
> +  static short table[64] =
> +    {
> +      32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
> +      10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15,
> +      31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26,
> +      30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u
> +    };
> +
> +  x = (x & -x) * 0x0450FBAF;
> +  return table[x >> 26];
> +}
> +
> +int ctz3 (unsigned x)
> +{
> +  static int table[32] =
> +    {
> +      0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26,
> +      31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27
> +    };
> +
> +  if (x == 0) return 32;
> +  x = (x & -x) * 0x04D7651F;
> +  return table[x >> 27];
> +}
> +
> +static const unsigned long long magic = 0x03f08c5392f756cdULL;
> +
> +static const char table[64] = {
> +     0,  1, 12,  2, 13, 22, 17,  3,
> +    14, 33, 23, 36, 18, 58, 28,  4,
> +    62, 15, 34, 26, 24, 48, 50, 37,
> +    19, 55, 59, 52, 29, 44, 39,  5,
> +    63, 11, 21, 16, 32, 35, 57, 27,
> +    61, 25, 47, 49, 54, 51, 43, 38,
> +    10, 20, 31, 56, 60, 46, 53, 42,
> +     9, 30, 45, 41,  8, 40,  7,  6,
> +};
> +
> +int ctz4 (unsigned long x)
> +{
> +  unsigned long lsb = x & -x;
> +  return table[(lsb * magic) >> 58];
> +}
> +
> +/* { dg-final { scan-assembler-times "ctz\t" 3 } } */
> +/* { dg-final { scan-assembler-times "andi\t" 1 } } */
> +/* { dg-final { scan-assembler-times "mul\t" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/zbb-ctz.c b/gcc/testsuite/gcc.target/riscv/zbb-ctz.c
> new file mode 100644
> index 00000000000..f9fbcb38dee
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/zbb-ctz.c
> @@ -0,0 +1,66 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gc_zbb -mabi=lp64" } */
> +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
> +
> +int ctz1 (unsigned x)
> +{
> +  static const char table[32] =
> +    {
> +      0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
> +      31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
> +    };
> +
> +  return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
> +}
> +
> +int ctz2 (unsigned x)
> +{
> +#define u 0
> +  static short table[64] =
> +    {
> +      32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
> +      10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15,
> +      31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26,
> +      30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u
> +    };
> +
> +  x = (x & -x) * 0x0450FBAF;
> +  return table[x >> 26];
> +}
> +
> +int ctz3 (unsigned x)
> +{
> +  static int table[32] =
> +    {
> +      0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26,
> +      31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27
> +    };
> +
> +  if (x == 0) return 32;
> +  x = (x & -x) * 0x04D7651F;
> +  return table[x >> 27];
> +}
> +
> +static const unsigned long long magic = 0x03f08c5392f756cdULL;
> +
> +static const char table[64] = {
> +     0,  1, 12,  2, 13, 22, 17,  3,
> +    14, 33, 23, 36, 18, 58, 28,  4,
> +    62, 15, 34, 26, 24, 48, 50, 37,
> +    19, 55, 59, 52, 29, 44, 39,  5,
> +    63, 11, 21, 16, 32, 35, 57, 27,
> +    61, 25, 47, 49, 54, 51, 43, 38,
> +    10, 20, 31, 56, 60, 46, 53, 42,
> +     9, 30, 45, 41,  8, 40,  7,  6,
> +};
> +
> +int ctz4 (unsigned long x)
> +{
> +  unsigned long lsb = x & -x;
> +  return table[(lsb * magic) >> 58];
> +}
> +
> +/* { dg-final { scan-assembler-times "ctzw\t" 3 } } */
> +/* { dg-final { scan-assembler-times "ctz\t"  1 } } */
> +/* { dg-final { scan-assembler-times "andi\t" 2 } } */
> +/* { dg-final { scan-assembler-not "mul" } } */
> --
> 2.34.1
>
Palmer Dabbelt April 28, 2022, 11:01 p.m. UTC | #2
On Thu, 28 Apr 2022 15:11:49 PDT (-0700), philipp.tomsich@vrull.eu wrote:
> Kito,
>
> Did you have a chance to take a look at this one?
>
> I assume this will have to wait until we reopen for 13...
> OK for 13?  Also: OK for a backport (once a branch for that exists)?

I'd assumed it was 13 material when it was sent, given the timing and 
that it didn't call out anything about being targeted at 12.  IIUC we've 
also generally only backported regression fixes and that's the standard 
policy, but maybe I'm misunderstanding something?

>
> Philipp.
>
>
> On Sun, 24 Apr 2022 at 01:44, Philipp Tomsich <philipp.tomsich@vrull.eu> wrote:
>>
>> The Zbb support has introduced ctz and clz to the backend, but some
>> transformations in GCC need to know what the value of c[lt]z at zero
>> is. This affects how the optab is generated and may suppress use of
>> CLZ/CTZ in tree passes.
>>
>> Among other things, this is needed for the transformation of
>> table-based ctz-implementations, such as in deepsjeng, to work
>> (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90838).
>>
>> Prior to this change, the test case from PR90838 would compile to
>> on RISC-V targets with Zbb:
>>   myctz:
>>         lui     a4,%hi(.LC0)
>>         ld      a4,%lo(.LC0)(a4)
>>         neg     a5,a0
>>         and     a5,a5,a0
>>         mul     a5,a5,a4
>>         lui     a4,%hi(.LANCHOR0)
>>         addi    a4,a4,%lo(.LANCHOR0)
>>         srli    a5,a5,58
>>         sh2add  a5,a5,a4
>>         lw      a0,0(a5)
>>         ret
>>
>> After this change, we get:
>>   myctz:
>>         ctz     a0,a0
>>         andi    a0,a0,63
>>         ret

This one looks correct to me, but does it pass the rest of the test 
suite?  I haven't been running regressions with the bitmanip extensions 
enabled, and I don't see any machinery in riscv-gnu-toolchain for 
B-extension-family regression runs (plus, my box is kind of tied up 
right now anyway trying to get other stuff fixed).

>>
>> Testing this with deepsjeng_r (from SPEC 2017) against QEMU, this
>> shows a clear reduction in dynamic instruction count:
>>  - before  1961888067076
>>  - after   1907928279874 (2.75% reduction)
>>
>> gcc/ChangeLog:
>>
>>         * config/riscv/riscv.h (CLZ_DEFINED_VALUE_AT_ZERO): Implement.
>>         (CTZ_DEFINED_VALUE_AT_ZERO): Same.
>>
>> gcc/testsuite/ChangeLog:
>>
>>         * gcc.dg/pr90838.c: Add additional flags (dg-additional-options)
>>           when compiling for riscv64.
>>         * gcc.target/riscv/zbb-ctz.c: New test.
>>
>> Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
>> Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu>
>> Co-developed-by: Manolis Tsamis <manolis.tsamis@vrull.eu>
>>
>> ---
>>  gcc/config/riscv/riscv.h                    |  5 ++
>>  gcc/testsuite/gcc.dg/pr90838.c              |  2 +
>>  gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c | 65 ++++++++++++++++++++
>>  gcc/testsuite/gcc.target/riscv/zbb-ctz.c    | 66 +++++++++++++++++++++

I was a bit worried about where the test came from, but it looks like 
they're essentially the same as some arm64 and x86 tests so they should 
be fine.  Might be good to refactor things to avoid the duplication, 
though.

>>  4 files changed, 138 insertions(+)
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-ctz.c
>>
>> diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
>> index 4210e252255..95f72e2fd3f 100644
>> --- a/gcc/config/riscv/riscv.h
>> +++ b/gcc/config/riscv/riscv.h
>> @@ -1019,4 +1019,9 @@ extern void riscv_remove_unneeded_save_restore_calls (void);
>>
>>  #define HARD_REGNO_RENAME_OK(FROM, TO) riscv_hard_regno_rename_ok (FROM, TO)
>>
>> +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
>> +  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
>> +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
>> +  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)

IIUC this is correct: it expends to 8/16 for q/himode, but as we don't 
have clz/ctz defined for those it's fine.  I'll let Kito have some time 
to chime in, though.

>> +
>>  #endif /* ! GCC_RISCV_H */
>> diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c
>> index 41c5dab9a5c..162bd6f51d0 100644
>> --- a/gcc/testsuite/gcc.dg/pr90838.c
>> +++ b/gcc/testsuite/gcc.dg/pr90838.c
>> @@ -1,5 +1,6 @@
>>  /* { dg-do compile } */
>>  /* { dg-options "-O2 -fdump-tree-forwprop2-details" } */
>> +/* { dg-additional-options "-march=rv64gc_zbb" { target riscv64*-*-* } } */
>>
>>  int ctz1 (unsigned x)
>>  {
>> @@ -57,3 +58,4 @@ int ctz4 (unsigned long x)
>>  }
>>
>>  /* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target aarch64*-*-* } } } */
>> +/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target riscv64*-*-* } } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c b/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c
>> new file mode 100644
>> index 00000000000..b903517197a
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c
>> @@ -0,0 +1,65 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-march=rv32gc_zbb -mabi=ilp32" } */
>> +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
>> +
>> +int ctz1 (unsigned x)
>> +{
>> +  static const char table[32] =
>> +    {
>> +      0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
>> +      31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
>> +    };
>> +
>> +  return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
>> +}
>> +
>> +int ctz2 (unsigned x)
>> +{
>> +#define u 0
>> +  static short table[64] =
>> +    {
>> +      32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
>> +      10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15,
>> +      31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26,
>> +      30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u
>> +    };
>> +
>> +  x = (x & -x) * 0x0450FBAF;
>> +  return table[x >> 26];
>> +}
>> +
>> +int ctz3 (unsigned x)
>> +{
>> +  static int table[32] =
>> +    {
>> +      0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26,
>> +      31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27
>> +    };
>> +
>> +  if (x == 0) return 32;
>> +  x = (x & -x) * 0x04D7651F;
>> +  return table[x >> 27];
>> +}
>> +
>> +static const unsigned long long magic = 0x03f08c5392f756cdULL;
>> +
>> +static const char table[64] = {
>> +     0,  1, 12,  2, 13, 22, 17,  3,
>> +    14, 33, 23, 36, 18, 58, 28,  4,
>> +    62, 15, 34, 26, 24, 48, 50, 37,
>> +    19, 55, 59, 52, 29, 44, 39,  5,
>> +    63, 11, 21, 16, 32, 35, 57, 27,
>> +    61, 25, 47, 49, 54, 51, 43, 38,
>> +    10, 20, 31, 56, 60, 46, 53, 42,
>> +     9, 30, 45, 41,  8, 40,  7,  6,
>> +};
>> +
>> +int ctz4 (unsigned long x)
>> +{
>> +  unsigned long lsb = x & -x;
>> +  return table[(lsb * magic) >> 58];
>> +}
>> +
>> +/* { dg-final { scan-assembler-times "ctz\t" 3 } } */
>> +/* { dg-final { scan-assembler-times "andi\t" 1 } } */
>> +/* { dg-final { scan-assembler-times "mul\t" 1 } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/zbb-ctz.c b/gcc/testsuite/gcc.target/riscv/zbb-ctz.c
>> new file mode 100644
>> index 00000000000..f9fbcb38dee
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/zbb-ctz.c
>> @@ -0,0 +1,66 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-march=rv64gc_zbb -mabi=lp64" } */
>> +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
>> +
>> +int ctz1 (unsigned x)
>> +{
>> +  static const char table[32] =
>> +    {
>> +      0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
>> +      31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
>> +    };
>> +
>> +  return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
>> +}
>> +
>> +int ctz2 (unsigned x)
>> +{
>> +#define u 0
>> +  static short table[64] =
>> +    {
>> +      32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
>> +      10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15,
>> +      31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26,
>> +      30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u
>> +    };
>> +
>> +  x = (x & -x) * 0x0450FBAF;
>> +  return table[x >> 26];
>> +}
>> +
>> +int ctz3 (unsigned x)
>> +{
>> +  static int table[32] =
>> +    {
>> +      0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26,
>> +      31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27
>> +    };
>> +
>> +  if (x == 0) return 32;
>> +  x = (x & -x) * 0x04D7651F;
>> +  return table[x >> 27];
>> +}
>> +
>> +static const unsigned long long magic = 0x03f08c5392f756cdULL;
>> +
>> +static const char table[64] = {
>> +     0,  1, 12,  2, 13, 22, 17,  3,
>> +    14, 33, 23, 36, 18, 58, 28,  4,
>> +    62, 15, 34, 26, 24, 48, 50, 37,
>> +    19, 55, 59, 52, 29, 44, 39,  5,
>> +    63, 11, 21, 16, 32, 35, 57, 27,
>> +    61, 25, 47, 49, 54, 51, 43, 38,
>> +    10, 20, 31, 56, 60, 46, 53, 42,
>> +     9, 30, 45, 41,  8, 40,  7,  6,
>> +};
>> +
>> +int ctz4 (unsigned long x)
>> +{
>> +  unsigned long lsb = x & -x;
>> +  return table[(lsb * magic) >> 58];
>> +}
>> +
>> +/* { dg-final { scan-assembler-times "ctzw\t" 3 } } */
>> +/* { dg-final { scan-assembler-times "ctz\t"  1 } } */
>> +/* { dg-final { scan-assembler-times "andi\t" 2 } } */
>> +/* { dg-final { scan-assembler-not "mul" } } */
>> --
>> 2.34.1
>>
Kito Cheng May 2, 2022, 2:35 a.m. UTC | #3
Ack, I am OoO during 5/1-5/4, I'll start looking at those patches in
the GCC 13 queue including this one :)

On Fri, Apr 29, 2022 at 6:12 AM Philipp Tomsich
<philipp.tomsich@vrull.eu> wrote:
>
> Kito,
>
> Did you have a chance to take a look at this one?
>
> I assume this will have to wait until we reopen for 13...
> OK for 13?  Also: OK for a backport (once a branch for that exists)?
>
> Philipp.
>
>
> On Sun, 24 Apr 2022 at 01:44, Philipp Tomsich <philipp.tomsich@vrull.eu> wrote:
> >
> > The Zbb support has introduced ctz and clz to the backend, but some
> > transformations in GCC need to know what the value of c[lt]z at zero
> > is. This affects how the optab is generated and may suppress use of
> > CLZ/CTZ in tree passes.
> >
> > Among other things, this is needed for the transformation of
> > table-based ctz-implementations, such as in deepsjeng, to work
> > (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90838).
> >
> > Prior to this change, the test case from PR90838 would compile to
> > on RISC-V targets with Zbb:
> >   myctz:
> >         lui     a4,%hi(.LC0)
> >         ld      a4,%lo(.LC0)(a4)
> >         neg     a5,a0
> >         and     a5,a5,a0
> >         mul     a5,a5,a4
> >         lui     a4,%hi(.LANCHOR0)
> >         addi    a4,a4,%lo(.LANCHOR0)
> >         srli    a5,a5,58
> >         sh2add  a5,a5,a4
> >         lw      a0,0(a5)
> >         ret
> >
> > After this change, we get:
> >   myctz:
> >         ctz     a0,a0
> >         andi    a0,a0,63
> >         ret
> >
> > Testing this with deepsjeng_r (from SPEC 2017) against QEMU, this
> > shows a clear reduction in dynamic instruction count:
> >  - before  1961888067076
> >  - after   1907928279874 (2.75% reduction)
> >
> > gcc/ChangeLog:
> >
> >         * config/riscv/riscv.h (CLZ_DEFINED_VALUE_AT_ZERO): Implement.
> >         (CTZ_DEFINED_VALUE_AT_ZERO): Same.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.dg/pr90838.c: Add additional flags (dg-additional-options)
> >           when compiling for riscv64.
> >         * gcc.target/riscv/zbb-ctz.c: New test.
> >
> > Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
> > Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu>
> > Co-developed-by: Manolis Tsamis <manolis.tsamis@vrull.eu>
> >
> > ---
> >  gcc/config/riscv/riscv.h                    |  5 ++
> >  gcc/testsuite/gcc.dg/pr90838.c              |  2 +
> >  gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c | 65 ++++++++++++++++++++
> >  gcc/testsuite/gcc.target/riscv/zbb-ctz.c    | 66 +++++++++++++++++++++
> >  4 files changed, 138 insertions(+)
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-ctz.c
> >
> > diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
> > index 4210e252255..95f72e2fd3f 100644
> > --- a/gcc/config/riscv/riscv.h
> > +++ b/gcc/config/riscv/riscv.h
> > @@ -1019,4 +1019,9 @@ extern void riscv_remove_unneeded_save_restore_calls (void);
> >
> >  #define HARD_REGNO_RENAME_OK(FROM, TO) riscv_hard_regno_rename_ok (FROM, TO)
> >
> > +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
> > +  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
> > +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
> > +  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
> > +
> >  #endif /* ! GCC_RISCV_H */
> > diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c
> > index 41c5dab9a5c..162bd6f51d0 100644
> > --- a/gcc/testsuite/gcc.dg/pr90838.c
> > +++ b/gcc/testsuite/gcc.dg/pr90838.c
> > @@ -1,5 +1,6 @@
> >  /* { dg-do compile } */
> >  /* { dg-options "-O2 -fdump-tree-forwprop2-details" } */
> > +/* { dg-additional-options "-march=rv64gc_zbb" { target riscv64*-*-* } } */
> >
> >  int ctz1 (unsigned x)
> >  {
> > @@ -57,3 +58,4 @@ int ctz4 (unsigned long x)
> >  }
> >
> >  /* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target aarch64*-*-* } } } */
> > +/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target riscv64*-*-* } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c b/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c
> > new file mode 100644
> > index 00000000000..b903517197a
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c
> > @@ -0,0 +1,65 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-march=rv32gc_zbb -mabi=ilp32" } */
> > +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
> > +
> > +int ctz1 (unsigned x)
> > +{
> > +  static const char table[32] =
> > +    {
> > +      0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
> > +      31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
> > +    };
> > +
> > +  return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
> > +}
> > +
> > +int ctz2 (unsigned x)
> > +{
> > +#define u 0
> > +  static short table[64] =
> > +    {
> > +      32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
> > +      10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15,
> > +      31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26,
> > +      30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u
> > +    };
> > +
> > +  x = (x & -x) * 0x0450FBAF;
> > +  return table[x >> 26];
> > +}
> > +
> > +int ctz3 (unsigned x)
> > +{
> > +  static int table[32] =
> > +    {
> > +      0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26,
> > +      31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27
> > +    };
> > +
> > +  if (x == 0) return 32;
> > +  x = (x & -x) * 0x04D7651F;
> > +  return table[x >> 27];
> > +}
> > +
> > +static const unsigned long long magic = 0x03f08c5392f756cdULL;
> > +
> > +static const char table[64] = {
> > +     0,  1, 12,  2, 13, 22, 17,  3,
> > +    14, 33, 23, 36, 18, 58, 28,  4,
> > +    62, 15, 34, 26, 24, 48, 50, 37,
> > +    19, 55, 59, 52, 29, 44, 39,  5,
> > +    63, 11, 21, 16, 32, 35, 57, 27,
> > +    61, 25, 47, 49, 54, 51, 43, 38,
> > +    10, 20, 31, 56, 60, 46, 53, 42,
> > +     9, 30, 45, 41,  8, 40,  7,  6,
> > +};
> > +
> > +int ctz4 (unsigned long x)
> > +{
> > +  unsigned long lsb = x & -x;
> > +  return table[(lsb * magic) >> 58];
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "ctz\t" 3 } } */
> > +/* { dg-final { scan-assembler-times "andi\t" 1 } } */
> > +/* { dg-final { scan-assembler-times "mul\t" 1 } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/zbb-ctz.c b/gcc/testsuite/gcc.target/riscv/zbb-ctz.c
> > new file mode 100644
> > index 00000000000..f9fbcb38dee
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/zbb-ctz.c
> > @@ -0,0 +1,66 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-march=rv64gc_zbb -mabi=lp64" } */
> > +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
> > +
> > +int ctz1 (unsigned x)
> > +{
> > +  static const char table[32] =
> > +    {
> > +      0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
> > +      31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
> > +    };
> > +
> > +  return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
> > +}
> > +
> > +int ctz2 (unsigned x)
> > +{
> > +#define u 0
> > +  static short table[64] =
> > +    {
> > +      32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
> > +      10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15,
> > +      31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26,
> > +      30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u
> > +    };
> > +
> > +  x = (x & -x) * 0x0450FBAF;
> > +  return table[x >> 26];
> > +}
> > +
> > +int ctz3 (unsigned x)
> > +{
> > +  static int table[32] =
> > +    {
> > +      0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26,
> > +      31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27
> > +    };
> > +
> > +  if (x == 0) return 32;
> > +  x = (x & -x) * 0x04D7651F;
> > +  return table[x >> 27];
> > +}
> > +
> > +static const unsigned long long magic = 0x03f08c5392f756cdULL;
> > +
> > +static const char table[64] = {
> > +     0,  1, 12,  2, 13, 22, 17,  3,
> > +    14, 33, 23, 36, 18, 58, 28,  4,
> > +    62, 15, 34, 26, 24, 48, 50, 37,
> > +    19, 55, 59, 52, 29, 44, 39,  5,
> > +    63, 11, 21, 16, 32, 35, 57, 27,
> > +    61, 25, 47, 49, 54, 51, 43, 38,
> > +    10, 20, 31, 56, 60, 46, 53, 42,
> > +     9, 30, 45, 41,  8, 40,  7,  6,
> > +};
> > +
> > +int ctz4 (unsigned long x)
> > +{
> > +  unsigned long lsb = x & -x;
> > +  return table[(lsb * magic) >> 58];
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "ctzw\t" 3 } } */
> > +/* { dg-final { scan-assembler-times "ctz\t"  1 } } */
> > +/* { dg-final { scan-assembler-times "andi\t" 2 } } */
> > +/* { dg-final { scan-assembler-not "mul" } } */
> > --
> > 2.34.1
> >
diff mbox series

Patch

diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 4210e252255..95f72e2fd3f 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -1019,4 +1019,9 @@  extern void riscv_remove_unneeded_save_restore_calls (void);
 
 #define HARD_REGNO_RENAME_OK(FROM, TO) riscv_hard_regno_rename_ok (FROM, TO)
 
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
+
 #endif /* ! GCC_RISCV_H */
diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c
index 41c5dab9a5c..162bd6f51d0 100644
--- a/gcc/testsuite/gcc.dg/pr90838.c
+++ b/gcc/testsuite/gcc.dg/pr90838.c
@@ -1,5 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-options "-O2 -fdump-tree-forwprop2-details" } */
+/* { dg-additional-options "-march=rv64gc_zbb" { target riscv64*-*-* } } */
 
 int ctz1 (unsigned x)
 {
@@ -57,3 +58,4 @@  int ctz4 (unsigned long x)
 }
 
 /* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target aarch64*-*-* } } } */
+/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target riscv64*-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c b/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c
new file mode 100644
index 00000000000..b903517197a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c
@@ -0,0 +1,65 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zbb -mabi=ilp32" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+int ctz1 (unsigned x)
+{
+  static const char table[32] =
+    {
+      0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+      31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+    };
+
+  return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
+}
+
+int ctz2 (unsigned x)
+{
+#define u 0
+  static short table[64] =
+    {
+      32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
+      10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15,
+      31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26,
+      30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u
+    };
+
+  x = (x & -x) * 0x0450FBAF;
+  return table[x >> 26];
+}
+
+int ctz3 (unsigned x)
+{
+  static int table[32] =
+    {
+      0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26,
+      31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27
+    };
+
+  if (x == 0) return 32;
+  x = (x & -x) * 0x04D7651F;
+  return table[x >> 27];
+}
+
+static const unsigned long long magic = 0x03f08c5392f756cdULL;
+
+static const char table[64] = {
+     0,  1, 12,  2, 13, 22, 17,  3,
+    14, 33, 23, 36, 18, 58, 28,  4,
+    62, 15, 34, 26, 24, 48, 50, 37,
+    19, 55, 59, 52, 29, 44, 39,  5,
+    63, 11, 21, 16, 32, 35, 57, 27,
+    61, 25, 47, 49, 54, 51, 43, 38,
+    10, 20, 31, 56, 60, 46, 53, 42,
+     9, 30, 45, 41,  8, 40,  7,  6,
+};
+
+int ctz4 (unsigned long x)
+{
+  unsigned long lsb = x & -x;
+  return table[(lsb * magic) >> 58];
+}
+
+/* { dg-final { scan-assembler-times "ctz\t" 3 } } */
+/* { dg-final { scan-assembler-times "andi\t" 1 } } */
+/* { dg-final { scan-assembler-times "mul\t" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-ctz.c b/gcc/testsuite/gcc.target/riscv/zbb-ctz.c
new file mode 100644
index 00000000000..f9fbcb38dee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-ctz.c
@@ -0,0 +1,66 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+int ctz1 (unsigned x)
+{
+  static const char table[32] =
+    {
+      0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+      31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+    };
+
+  return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
+}
+
+int ctz2 (unsigned x)
+{
+#define u 0
+  static short table[64] =
+    {
+      32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
+      10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15,
+      31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26,
+      30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u
+    };
+
+  x = (x & -x) * 0x0450FBAF;
+  return table[x >> 26];
+}
+
+int ctz3 (unsigned x)
+{
+  static int table[32] =
+    {
+      0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26,
+      31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27
+    };
+
+  if (x == 0) return 32;
+  x = (x & -x) * 0x04D7651F;
+  return table[x >> 27];
+}
+
+static const unsigned long long magic = 0x03f08c5392f756cdULL;
+
+static const char table[64] = {
+     0,  1, 12,  2, 13, 22, 17,  3,
+    14, 33, 23, 36, 18, 58, 28,  4,
+    62, 15, 34, 26, 24, 48, 50, 37,
+    19, 55, 59, 52, 29, 44, 39,  5,
+    63, 11, 21, 16, 32, 35, 57, 27,
+    61, 25, 47, 49, 54, 51, 43, 38,
+    10, 20, 31, 56, 60, 46, 53, 42,
+     9, 30, 45, 41,  8, 40,  7,  6,
+};
+
+int ctz4 (unsigned long x)
+{
+  unsigned long lsb = x & -x;
+  return table[(lsb * magic) >> 58];
+}
+
+/* { dg-final { scan-assembler-times "ctzw\t" 3 } } */
+/* { dg-final { scan-assembler-times "ctz\t"  1 } } */
+/* { dg-final { scan-assembler-times "andi\t" 2 } } */
+/* { dg-final { scan-assembler-not "mul" } } */