Message ID | 1636227062-9149-1-git-send-email-apinski@marvell.com |
---|---|
State | New |
Headers | show |
Series | aarch64: [PR101529] Fix vector shuffle insertion expansion | expand |
apinski--- via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > From: Andrew Pinski <apinski@marvell.com> > > The function aarch64_evpc_ins would reuse the target even though > it might be the same register as the two inputs. > Instead of checking to see if we can reuse the target, creating > a new register always is better. > > OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions. > > PR target/101529 > > gcc/ChangeLog: > > * config/aarch64/aarch64.c (aarch64_evpc_ins): Don't use target > as an input instead create a new reg. > > gcc/testsuite/ChangeLog: > > * c-c++-common/torture/builtin-convertvector-2.c: New test. > * c-c++-common/torture/builtin-shufflevector-2.c: New test. > --- > gcc/config/aarch64/aarch64.c | 8 ++++-- > .../torture/builtin-convertvector-2.c | 26 +++++++++++++++++++ > .../torture/builtin-shufflevector-2.c | 26 +++++++++++++++++++ > 3 files changed, 58 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c > create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c > > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index 2c00583e12c..e4fc546fae7 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -23084,11 +23084,15 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d) > } > gcc_assert (extractindex < nelt); > > - emit_move_insn (d->target, insv); > + /* Use a new reg instead of target as one of the > + operands might be target. */ > + rtx original = gen_reg_rtx (GET_MODE (d->target)); > + > + emit_move_insn (original, insv); Why can't we just remove the move and pass insv directly to create_input_operand? That'll encourage the RA to allocate insv and d->target to the same register where possible (but will reload where tying is not possible). OK with that change if it works. Thanks, Richard > insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode); > expand_operand ops[5]; > create_output_operand (&ops[0], d->target, mode); > - create_input_operand (&ops[1], d->target, mode); > + create_input_operand (&ops[1], original, mode); > create_integer_operand (&ops[2], 1 << idx); > create_input_operand (&ops[3], extractv, mode); > create_integer_operand (&ops[4], extractindex); > diff --git a/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c > new file mode 100644 > index 00000000000..d88f6a72b5c > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c > @@ -0,0 +1,26 @@ > +/* { dg-do run } */ > +/* PR target/101529 */ > + > +typedef unsigned char __attribute__((__vector_size__ (1))) W; > +typedef unsigned char __attribute__((__vector_size__ (8))) V; > +typedef unsigned short __attribute__((__vector_size__ (16))) U; > + > +unsigned short us; > + > +/* aarch64 used to miscompile foo to just return 0. */ > +W > +foo (unsigned char uc) > +{ > + V v = __builtin_convertvector ((U){ } >= us, V); > + return __builtin_shufflevector ((W){ }, v, 4) & uc; > +} > + > +int > +main (void) > +{ > + W x = foo (5); > + if (x[0] != 5) > + __builtin_abort(); > + return 0; > +} > + > diff --git a/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c > new file mode 100644 > index 00000000000..7c4999ed4e9 > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c > @@ -0,0 +1,26 @@ > +/* { dg-do run} */ > +/* PR target/101529 */ > +typedef unsigned char C; > +typedef unsigned char __attribute__((__vector_size__ (8))) V; > +typedef unsigned char __attribute__((__vector_size__ (32))) U; > + > +C c; > + > +/* aarch64 used to miscompile foo to just return a vector of 0s */ > +V > +foo (V v) > +{ > + v |= __builtin_shufflevector (c * v, (U) (0 == (U){ }), > + 0, 1, 8, 32, 8, 20, 36, 36); > + return v; > +} > + > +int > +main (void) > +{ > + V v = foo ((V) { }); > + for (unsigned i = 0; i < sizeof (v); i++) > + if (v[i] != (i >= 2 ? 0xff : 0)) > + __builtin_abort (); > + return 0; > +}
On Tue, Nov 9, 2021 at 6:51 AM Richard Sandiford via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > apinski--- via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > > From: Andrew Pinski <apinski@marvell.com> > > > > The function aarch64_evpc_ins would reuse the target even though > > it might be the same register as the two inputs. > > Instead of checking to see if we can reuse the target, creating > > a new register always is better. > > > > OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions. > > > > PR target/101529 > > > > gcc/ChangeLog: > > > > * config/aarch64/aarch64.c (aarch64_evpc_ins): Don't use target > > as an input instead create a new reg. > > > > gcc/testsuite/ChangeLog: > > > > * c-c++-common/torture/builtin-convertvector-2.c: New test. > > * c-c++-common/torture/builtin-shufflevector-2.c: New test. > > --- > > gcc/config/aarch64/aarch64.c | 8 ++++-- > > .../torture/builtin-convertvector-2.c | 26 +++++++++++++++++++ > > .../torture/builtin-shufflevector-2.c | 26 +++++++++++++++++++ > > 3 files changed, 58 insertions(+), 2 deletions(-) > > create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c > > create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c > > > > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > > index 2c00583e12c..e4fc546fae7 100644 > > --- a/gcc/config/aarch64/aarch64.c > > +++ b/gcc/config/aarch64/aarch64.c > > @@ -23084,11 +23084,15 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d) > > } > > gcc_assert (extractindex < nelt); > > > > - emit_move_insn (d->target, insv); > > + /* Use a new reg instead of target as one of the > > + operands might be target. */ > > + rtx original = gen_reg_rtx (GET_MODE (d->target)); > > + > > + emit_move_insn (original, insv); > > Why can't we just remove the move and pass insv directly to > create_input_operand? That'll encourage the RA to allocate insv and > d->target to the same register where possible (but will reload where > tying is not possible). > > OK with that change if it works. Yes that worked, committed as r12-5078-g52fa771758 . Thanks, Andrew > > Thanks, > Richard > > > insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode); > > expand_operand ops[5]; > > create_output_operand (&ops[0], d->target, mode); > > - create_input_operand (&ops[1], d->target, mode); > > + create_input_operand (&ops[1], original, mode); > > create_integer_operand (&ops[2], 1 << idx); > > create_input_operand (&ops[3], extractv, mode); > > create_integer_operand (&ops[4], extractindex); > > diff --git a/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c > > new file mode 100644 > > index 00000000000..d88f6a72b5c > > --- /dev/null > > +++ b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c > > @@ -0,0 +1,26 @@ > > +/* { dg-do run } */ > > +/* PR target/101529 */ > > + > > +typedef unsigned char __attribute__((__vector_size__ (1))) W; > > +typedef unsigned char __attribute__((__vector_size__ (8))) V; > > +typedef unsigned short __attribute__((__vector_size__ (16))) U; > > + > > +unsigned short us; > > + > > +/* aarch64 used to miscompile foo to just return 0. */ > > +W > > +foo (unsigned char uc) > > +{ > > + V v = __builtin_convertvector ((U){ } >= us, V); > > + return __builtin_shufflevector ((W){ }, v, 4) & uc; > > +} > > + > > +int > > +main (void) > > +{ > > + W x = foo (5); > > + if (x[0] != 5) > > + __builtin_abort(); > > + return 0; > > +} > > + > > diff --git a/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c > > new file mode 100644 > > index 00000000000..7c4999ed4e9 > > --- /dev/null > > +++ b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c > > @@ -0,0 +1,26 @@ > > +/* { dg-do run} */ > > +/* PR target/101529 */ > > +typedef unsigned char C; > > +typedef unsigned char __attribute__((__vector_size__ (8))) V; > > +typedef unsigned char __attribute__((__vector_size__ (32))) U; > > + > > +C c; > > + > > +/* aarch64 used to miscompile foo to just return a vector of 0s */ > > +V > > +foo (V v) > > +{ > > + v |= __builtin_shufflevector (c * v, (U) (0 == (U){ }), > > + 0, 1, 8, 32, 8, 20, 36, 36); > > + return v; > > +} > > + > > +int > > +main (void) > > +{ > > + V v = foo ((V) { }); > > + for (unsigned i = 0; i < sizeof (v); i++) > > + if (v[i] != (i >= 2 ? 0xff : 0)) > > + __builtin_abort (); > > + return 0; > > +}
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 2c00583e12c..e4fc546fae7 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -23084,11 +23084,15 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d) } gcc_assert (extractindex < nelt); - emit_move_insn (d->target, insv); + /* Use a new reg instead of target as one of the + operands might be target. */ + rtx original = gen_reg_rtx (GET_MODE (d->target)); + + emit_move_insn (original, insv); insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode); expand_operand ops[5]; create_output_operand (&ops[0], d->target, mode); - create_input_operand (&ops[1], d->target, mode); + create_input_operand (&ops[1], original, mode); create_integer_operand (&ops[2], 1 << idx); create_input_operand (&ops[3], extractv, mode); create_integer_operand (&ops[4], extractindex); diff --git a/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c new file mode 100644 index 00000000000..d88f6a72b5c --- /dev/null +++ b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ +/* PR target/101529 */ + +typedef unsigned char __attribute__((__vector_size__ (1))) W; +typedef unsigned char __attribute__((__vector_size__ (8))) V; +typedef unsigned short __attribute__((__vector_size__ (16))) U; + +unsigned short us; + +/* aarch64 used to miscompile foo to just return 0. */ +W +foo (unsigned char uc) +{ + V v = __builtin_convertvector ((U){ } >= us, V); + return __builtin_shufflevector ((W){ }, v, 4) & uc; +} + +int +main (void) +{ + W x = foo (5); + if (x[0] != 5) + __builtin_abort(); + return 0; +} + diff --git a/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c new file mode 100644 index 00000000000..7c4999ed4e9 --- /dev/null +++ b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c @@ -0,0 +1,26 @@ +/* { dg-do run} */ +/* PR target/101529 */ +typedef unsigned char C; +typedef unsigned char __attribute__((__vector_size__ (8))) V; +typedef unsigned char __attribute__((__vector_size__ (32))) U; + +C c; + +/* aarch64 used to miscompile foo to just return a vector of 0s */ +V +foo (V v) +{ + v |= __builtin_shufflevector (c * v, (U) (0 == (U){ }), + 0, 1, 8, 32, 8, 20, 36, 36); + return v; +} + +int +main (void) +{ + V v = foo ((V) { }); + for (unsigned i = 0; i < sizeof (v); i++) + if (v[i] != (i >= 2 ? 0xff : 0)) + __builtin_abort (); + return 0; +}
From: Andrew Pinski <apinski@marvell.com> The function aarch64_evpc_ins would reuse the target even though it might be the same register as the two inputs. Instead of checking to see if we can reuse the target, creating a new register always is better. OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions. PR target/101529 gcc/ChangeLog: * config/aarch64/aarch64.c (aarch64_evpc_ins): Don't use target as an input instead create a new reg. gcc/testsuite/ChangeLog: * c-c++-common/torture/builtin-convertvector-2.c: New test. * c-c++-common/torture/builtin-shufflevector-2.c: New test. --- gcc/config/aarch64/aarch64.c | 8 ++++-- .../torture/builtin-convertvector-2.c | 26 +++++++++++++++++++ .../torture/builtin-shufflevector-2.c | 26 +++++++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c