Message ID | 1641814452-23872-1-git-send-email-apinski@marvell.com |
---|---|
State | New |
Headers | show |
Series | None | expand |
apinski--- via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > From: Andrew Pinski <apinski@marvell.com> > > This is a simple patch which simplifies the __builtin_aarch64_sqrt* builtins > into the internal function SQRT which allows for constant folding and other > optimizations at the gimple level. It was originally suggested we do to > __builtin_sqrt just for __builtin_aarch64_sqrtdf when -fno-math-errno > but since r6-4969-g686ee9719a4 we have the internal function SQRT which does > the same so it makes we don't need to check -fno-math-errno either now. > > OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions. > > PR target/64821 > > gcc/ChangeLog: > > * config/aarch64/aarch64-builtins.c > (aarch64_general_gimple_fold_builtin): Handle > __builtin_aarch64_sqrt* and simplify into SQRT internal > function. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/vsqrt-1.c: New test. > * gcc.target/aarch64/vsqrt-2.c: New test. > --- > gcc/config/aarch64/aarch64-builtins.c | 7 ++++++ > gcc/testsuite/gcc.target/aarch64/vsqrt-1.c | 17 +++++++++++++ > gcc/testsuite/gcc.target/aarch64/vsqrt-2.c | 28 ++++++++++++++++++++++ > 3 files changed, 52 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/aarch64/vsqrt-1.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/vsqrt-2.c > > diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c > index 58bcbd9875f..1bf487477eb 100644 > --- a/gcc/config/aarch64/aarch64-builtins.c > +++ b/gcc/config/aarch64/aarch64-builtins.c > @@ -2820,6 +2820,13 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt, > gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); > break; > > + /* Lower sqrt builtins to gimple/internal function sqrt. */ > + BUILTIN_VHSDF_DF (UNOP, sqrt, 2, FP) > + new_stmt = gimple_build_call_internal (IFN_SQRT, > + 1, args[0]); Sorry for the nit-pick, but: IMO it looks odd to split this over two lines. > + gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); > + break; > + > /*lower store and load neon builtins to gimple. */ > BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD) > BUILTIN_VDQ_I (LOAD1_U, ld1, 0, LOAD) > diff --git a/gcc/testsuite/gcc.target/aarch64/vsqrt-1.c b/gcc/testsuite/gcc.target/aarch64/vsqrt-1.c > new file mode 100644 > index 00000000000..3207c8774ca > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vsqrt-1.c > @@ -0,0 +1,17 @@ > +/* PR target/64821 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > +/* Check that we constant fold sqrt(4.0) into 2.0. */ > +/* { dg-final { scan-tree-dump-times ".SQRT" 0 "optimized" } } */ > +/* { dg-final { scan-tree-dump-times "2.0" 1 "optimized" } } */ I think these would be better as scan-tree-dump-not and scan-tree-dump respectively. The number of 2.0s isn't important and 2.0 has the risk of matching someone's directory name. Probably worth backslash-quoting the "."s too. OK with those changes, thanks. Richard > +/* { dg-final { scan-assembler-times "fsqrt" 0 } } */ > +/* We should produce a fmov to d0 with 2.0 but currently don't, see PR 103959. */ > +/* { dg-final { scan-assembler-times "\n\tfmov\td0, 2.0e.0" 1 { xfail *-*-* } } } */ > + > +#include <arm_neon.h> > + > +float64x1_t f64(void) > +{ > + float64x1_t a = (float64x1_t){4.0}; > + return vsqrt_f64 (a); > +} > diff --git a/gcc/testsuite/gcc.target/aarch64/vsqrt-2.c b/gcc/testsuite/gcc.target/aarch64/vsqrt-2.c > new file mode 100644 > index 00000000000..7465b79f3a7 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vsqrt-2.c > @@ -0,0 +1,28 @@ > +/* PR target/64821 */ > +/* { dg-do compile } */ > +/* { dg-options "-fdump-tree-optimized" } */ > +#include <arm_neon.h> > + > +/* Check that we lower __builtin_aarch64_sqrt* into the internal function SQRT. */ > +/* { dg-final { scan-tree-dump-times "__builtin_aarch64" 0 "optimized" } } */ > +/* { dg-final { scan-tree-dump-times ".SQRT" 4 "optimized" } } */ > + > +float64x1_t f64(float64x1_t a) > +{ > + return vsqrt_f64 (a); > +} > + > +float64x2_t f64q(float64x2_t a) > +{ > + return vsqrtq_f64 (a); > +} > + > +float32x2_t f32(float32x2_t a) > +{ > + return vsqrt_f32 (a); > +} > + > +float32x4_t f32q(float32x4_t a) > +{ > + return vsqrtq_f32 (a); > +}
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 58bcbd9875f..1bf487477eb 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -2820,6 +2820,13 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt, gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); break; + /* Lower sqrt builtins to gimple/internal function sqrt. */ + BUILTIN_VHSDF_DF (UNOP, sqrt, 2, FP) + new_stmt = gimple_build_call_internal (IFN_SQRT, + 1, args[0]); + gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); + break; + /*lower store and load neon builtins to gimple. */ BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD) BUILTIN_VDQ_I (LOAD1_U, ld1, 0, LOAD) diff --git a/gcc/testsuite/gcc.target/aarch64/vsqrt-1.c b/gcc/testsuite/gcc.target/aarch64/vsqrt-1.c new file mode 100644 index 00000000000..3207c8774ca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vsqrt-1.c @@ -0,0 +1,17 @@ +/* PR target/64821 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* Check that we constant fold sqrt(4.0) into 2.0. */ +/* { dg-final { scan-tree-dump-times ".SQRT" 0 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "2.0" 1 "optimized" } } */ +/* { dg-final { scan-assembler-times "fsqrt" 0 } } */ +/* We should produce a fmov to d0 with 2.0 but currently don't, see PR 103959. */ +/* { dg-final { scan-assembler-times "\n\tfmov\td0, 2.0e.0" 1 { xfail *-*-* } } } */ + +#include <arm_neon.h> + +float64x1_t f64(void) +{ + float64x1_t a = (float64x1_t){4.0}; + return vsqrt_f64 (a); +} diff --git a/gcc/testsuite/gcc.target/aarch64/vsqrt-2.c b/gcc/testsuite/gcc.target/aarch64/vsqrt-2.c new file mode 100644 index 00000000000..7465b79f3a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vsqrt-2.c @@ -0,0 +1,28 @@ +/* PR target/64821 */ +/* { dg-do compile } */ +/* { dg-options "-fdump-tree-optimized" } */ +#include <arm_neon.h> + +/* Check that we lower __builtin_aarch64_sqrt* into the internal function SQRT. */ +/* { dg-final { scan-tree-dump-times "__builtin_aarch64" 0 "optimized" } } */ +/* { dg-final { scan-tree-dump-times ".SQRT" 4 "optimized" } } */ + +float64x1_t f64(float64x1_t a) +{ + return vsqrt_f64 (a); +} + +float64x2_t f64q(float64x2_t a) +{ + return vsqrtq_f64 (a); +} + +float32x2_t f32(float32x2_t a) +{ + return vsqrt_f32 (a); +} + +float32x4_t f32q(float32x4_t a) +{ + return vsqrtq_f32 (a); +}
From: Andrew Pinski <apinski@marvell.com> This is a simple patch which simplifies the __builtin_aarch64_sqrt* builtins into the internal function SQRT which allows for constant folding and other optimizations at the gimple level. It was originally suggested we do to __builtin_sqrt just for __builtin_aarch64_sqrtdf when -fno-math-errno but since r6-4969-g686ee9719a4 we have the internal function SQRT which does the same so it makes we don't need to check -fno-math-errno either now. OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions. PR target/64821 gcc/ChangeLog: * config/aarch64/aarch64-builtins.c (aarch64_general_gimple_fold_builtin): Handle __builtin_aarch64_sqrt* and simplify into SQRT internal function. gcc/testsuite/ChangeLog: * gcc.target/aarch64/vsqrt-1.c: New test. * gcc.target/aarch64/vsqrt-2.c: New test. --- gcc/config/aarch64/aarch64-builtins.c | 7 ++++++ gcc/testsuite/gcc.target/aarch64/vsqrt-1.c | 17 +++++++++++++ gcc/testsuite/gcc.target/aarch64/vsqrt-2.c | 28 ++++++++++++++++++++++ 3 files changed, 52 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/vsqrt-1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/vsqrt-2.c