Message ID | PAWPR08MB89829CCDE1529CE888C094AF83D4A@PAWPR08MB8982.eurprd08.prod.outlook.com |
---|---|
State | New |
Headers | show |
Series | AArch64: Improve immediate generation | expand |
On 19/10/2023 13:43, Wilco Dijkstra wrote: > Further improve immediate generation by adding support for 2-instruction > MOV/EOR bitmask immediates. This reduces the number of 3/4-instruction > immediates in SPECCPU2017 by ~2%. > > Passes regress, OK for commit? > > gcc/ChangeLog: > * config/aarch64/aarch64.cc (aarch64_internal_mov_immediate) > Add support for immediates using MOV/EOR bitmask. > > gcc/testsuite: > * gcc.target/aarch64/imm_choice_comparison.c: Fix test. > * gcc.target/aarch64/moveor_imm.c: Add new test. > * gcc.target/aarch64/pr106583.c: Fix test. > > --- > > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > index 578a253d6e0e133e19592553fc873b3e73f9f218..ed5be2b64c9a767d74e9d78415da964c669001aa 100644 > --- a/gcc/config/aarch64/aarch64.cc > +++ b/gcc/config/aarch64/aarch64.cc > @@ -5748,6 +5748,26 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, > } > return 2; > } > + > + /* Try 2 bitmask immediates which are xor'd together. */ > + for (i = 0; i < 64; i += 16) > + { > + val2 = (val >> i) & mask; > + val2 |= val2 << 16; > + val2 |= val2 << 32; > + if (aarch64_bitmask_imm (val2) && aarch64_bitmask_imm (val ^ val2)) > + break; > + } > + > + if (i != 64) > + { > + if (generate) > + { > + emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); > + emit_insn (gen_xordi3 (dest, dest, GEN_INT (val ^ val2))); > + } > + return 2; > + } > } > > /* Try a bitmask plus 2 movk to generate the immediate in 3 instructions. */ > diff --git a/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison.c b/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison.c > index ebc44d6dbc7287d907603d77d7b54496de177c4b..2434ca380ca2cad3e1e4181deeaad680f518b866 100644 > --- a/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison.c > +++ b/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison.c > @@ -6,7 +6,7 @@ > int > foo (long long x) > { > - return x <= 0x1999999999999998; > + return x <= 0x0000999999999998; > } > > int > diff --git a/gcc/testsuite/gcc.target/aarch64/moveor_imm.c b/gcc/testsuite/gcc.target/aarch64/moveor_imm.c > new file mode 100644 > index 0000000000000000000000000000000000000000..5f4997b50398fdda5924610959e0c54967ad0735 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/moveor_imm.c > @@ -0,0 +1,31 @@ > +/* { dg-do assemble } */ > +/* { dg-options "-O2 --save-temps" } */ > + > +long f1 (void) > +{ > + return 0x2aaaaaaaaaaaaaab; > +} > + > +long f2 (void) > +{ > + return 0x10f0f0f0f0f0f0f1; > +} > + > +long f3 (void) > +{ > + return 0xccccccccccccccd; > +} > + > +long f4 (void) > +{ > + return 0x1999999999999998; > +} > + > +long f5 (void) > +{ > + return 0x3f3333333f333333; > +} > + > +/* { dg-final { scan-assembler-not {\tmovk\t} } } */ > +/* { dg-final { scan-assembler-times {\tmov\t} 5 } } */ > +/* { dg-final { scan-assembler-times {\teor\t} 5 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/pr106583.c b/gcc/testsuite/gcc.target/aarch64/pr106583.c > index 0f931580817d78dc1cc58f03b251bd21bec71f59..79ada5160ce059d66eeaee407ca02488b2a1f114 100644 > --- a/gcc/testsuite/gcc.target/aarch64/pr106583.c > +++ b/gcc/testsuite/gcc.target/aarch64/pr106583.c > @@ -3,7 +3,7 @@ > > long f1 (void) > { > - return 0x7efefefefefefeff; > + return 0x75fefefefefefeff; > } > > long f2 (void) > I think the tests should be converted to use check-function-bodies, rather than scanning for counts on the entire file. It makes it far more obvious what's changed if a test starts to fail. The functions are all trivial, so the test can be quite precise. Otherwise, this LGTM. R.
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 578a253d6e0e133e19592553fc873b3e73f9f218..ed5be2b64c9a767d74e9d78415da964c669001aa 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -5748,6 +5748,26 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, } return 2; } + + /* Try 2 bitmask immediates which are xor'd together. */ + for (i = 0; i < 64; i += 16) + { + val2 = (val >> i) & mask; + val2 |= val2 << 16; + val2 |= val2 << 32; + if (aarch64_bitmask_imm (val2) && aarch64_bitmask_imm (val ^ val2)) + break; + } + + if (i != 64) + { + if (generate) + { + emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); + emit_insn (gen_xordi3 (dest, dest, GEN_INT (val ^ val2))); + } + return 2; + } } /* Try a bitmask plus 2 movk to generate the immediate in 3 instructions. */ diff --git a/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison.c b/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison.c index ebc44d6dbc7287d907603d77d7b54496de177c4b..2434ca380ca2cad3e1e4181deeaad680f518b866 100644 --- a/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison.c +++ b/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison.c @@ -6,7 +6,7 @@ int foo (long long x) { - return x <= 0x1999999999999998; + return x <= 0x0000999999999998; } int diff --git a/gcc/testsuite/gcc.target/aarch64/moveor_imm.c b/gcc/testsuite/gcc.target/aarch64/moveor_imm.c new file mode 100644 index 0000000000000000000000000000000000000000..5f4997b50398fdda5924610959e0c54967ad0735 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/moveor_imm.c @@ -0,0 +1,31 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 --save-temps" } */ + +long f1 (void) +{ + return 0x2aaaaaaaaaaaaaab; +} + +long f2 (void) +{ + return 0x10f0f0f0f0f0f0f1; +} + +long f3 (void) +{ + return 0xccccccccccccccd; +} + +long f4 (void) +{ + return 0x1999999999999998; +} + +long f5 (void) +{ + return 0x3f3333333f333333; +} + +/* { dg-final { scan-assembler-not {\tmovk\t} } } */ +/* { dg-final { scan-assembler-times {\tmov\t} 5 } } */ +/* { dg-final { scan-assembler-times {\teor\t} 5 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/pr106583.c b/gcc/testsuite/gcc.target/aarch64/pr106583.c index 0f931580817d78dc1cc58f03b251bd21bec71f59..79ada5160ce059d66eeaee407ca02488b2a1f114 100644 --- a/gcc/testsuite/gcc.target/aarch64/pr106583.c +++ b/gcc/testsuite/gcc.target/aarch64/pr106583.c @@ -3,7 +3,7 @@ long f1 (void) { - return 0x7efefefefefefeff; + return 0x75fefefefefefeff; } long f2 (void)