diff mbox series

[5/8,v2] aarch64/fpu: Add vector variants of atanh

Message ID 20240403111353.51358-1-Joe.Ramsay@arm.com
State New
Headers show
Series None | expand

Commit Message

Joe Ramsay April 3, 2024, 11:13 a.m. UTC
---
Changes from v1:
* Sorted entries in Versions file
Will send updated version of this patch and 6/8, which needs a rebase.
Thanks,
Joe
 sysdeps/aarch64/fpu/Makefile                  |  1 +
 sysdeps/aarch64/fpu/Versions                  |  5 ++
 sysdeps/aarch64/fpu/advsimd_f32_protos.h      |  1 +
 sysdeps/aarch64/fpu/atanh_advsimd.c           | 64 +++++++++++++++
 sysdeps/aarch64/fpu/atanh_sve.c               | 59 ++++++++++++++
 sysdeps/aarch64/fpu/atanhf_advsimd.c          | 79 +++++++++++++++++++
 sysdeps/aarch64/fpu/atanhf_sve.c              | 54 +++++++++++++
 sysdeps/aarch64/fpu/bits/math-vector.h        |  8 ++
 .../fpu/test-double-advsimd-wrappers.c        |  1 +
 .../aarch64/fpu/test-double-sve-wrappers.c    |  1 +
 .../aarch64/fpu/test-float-advsimd-wrappers.c |  1 +
 sysdeps/aarch64/fpu/test-float-sve-wrappers.c |  1 +
 sysdeps/aarch64/libm-test-ulps                |  8 ++
 .../unix/sysv/linux/aarch64/libmvec.abilist   |  5 ++
 14 files changed, 288 insertions(+)
 create mode 100644 sysdeps/aarch64/fpu/atanh_advsimd.c
 create mode 100644 sysdeps/aarch64/fpu/atanh_sve.c
 create mode 100644 sysdeps/aarch64/fpu/atanhf_advsimd.c
 create mode 100644 sysdeps/aarch64/fpu/atanhf_sve.c

Comments

Szabolcs Nagy April 4, 2024, 8:02 a.m. UTC | #1
The 04/03/2024 12:13, Joe Ramsay wrote:
> ---
> Changes from v1:
> * Sorted entries in Versions file

OK.

Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>

> Will send updated version of this patch and 6/8, which needs a rebase.
> Thanks,
> Joe
>  sysdeps/aarch64/fpu/Makefile                  |  1 +
>  sysdeps/aarch64/fpu/Versions                  |  5 ++
>  sysdeps/aarch64/fpu/advsimd_f32_protos.h      |  1 +
>  sysdeps/aarch64/fpu/atanh_advsimd.c           | 64 +++++++++++++++
>  sysdeps/aarch64/fpu/atanh_sve.c               | 59 ++++++++++++++
>  sysdeps/aarch64/fpu/atanhf_advsimd.c          | 79 +++++++++++++++++++
>  sysdeps/aarch64/fpu/atanhf_sve.c              | 54 +++++++++++++
>  sysdeps/aarch64/fpu/bits/math-vector.h        |  8 ++
>  .../fpu/test-double-advsimd-wrappers.c        |  1 +
>  .../aarch64/fpu/test-double-sve-wrappers.c    |  1 +
>  .../aarch64/fpu/test-float-advsimd-wrappers.c |  1 +
>  sysdeps/aarch64/fpu/test-float-sve-wrappers.c |  1 +
>  sysdeps/aarch64/libm-test-ulps                |  8 ++
>  .../unix/sysv/linux/aarch64/libmvec.abilist   |  5 ++
>  14 files changed, 288 insertions(+)
>  create mode 100644 sysdeps/aarch64/fpu/atanh_advsimd.c
>  create mode 100644 sysdeps/aarch64/fpu/atanh_sve.c
>  create mode 100644 sysdeps/aarch64/fpu/atanhf_advsimd.c
>  create mode 100644 sysdeps/aarch64/fpu/atanhf_sve.c
> 
> diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile
> index d474f2969d..4c878e5906 100644
> --- a/sysdeps/aarch64/fpu/Makefile
> +++ b/sysdeps/aarch64/fpu/Makefile
> @@ -3,6 +3,7 @@ libmvec-supported-funcs = acos \
>                            asin \
>                            asinh \
>                            atan \
> +                          atanh \
>                            atan2 \
>                            cos \
>                            cosh \
> diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions
> index 08ea15efae..092949dc96 100644
> --- a/sysdeps/aarch64/fpu/Versions
> +++ b/sysdeps/aarch64/fpu/Versions
> @@ -89,6 +89,11 @@ libmvec {
>      _ZGVnN4v_asinhf;
>      _ZGVsMxv_asinh;
>      _ZGVsMxv_asinhf;
> +    _ZGVnN2v_atanh;
> +    _ZGVnN2v_atanhf;
> +    _ZGVnN4v_atanhf;
> +    _ZGVsMxv_atanh;
> +    _ZGVsMxv_atanhf;
>      _ZGVnN2v_cosh;
>      _ZGVnN2v_coshf;
>      _ZGVnN4v_coshf;
> diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
> index 1e80721c9f..afbb01e191 100644
> --- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h
> +++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
> @@ -22,6 +22,7 @@ libmvec_hidden_proto (V_NAME_F1(acosh));
>  libmvec_hidden_proto (V_NAME_F1(asin));
>  libmvec_hidden_proto (V_NAME_F1(asinh));
>  libmvec_hidden_proto (V_NAME_F1(atan));
> +libmvec_hidden_proto (V_NAME_F1(atanh));
>  libmvec_hidden_proto (V_NAME_F1(cos));
>  libmvec_hidden_proto (V_NAME_F1(cosh));
>  libmvec_hidden_proto (V_NAME_F1(erf));
> diff --git a/sysdeps/aarch64/fpu/atanh_advsimd.c b/sysdeps/aarch64/fpu/atanh_advsimd.c
> new file mode 100644
> index 0000000000..3c3d0bd6ad
> --- /dev/null
> +++ b/sysdeps/aarch64/fpu/atanh_advsimd.c
> @@ -0,0 +1,64 @@
> +/* Double-precision vector (Advanced SIMD) atanh function
> +
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#define WANT_V_LOG1P_K0_SHORTCUT 0
> +#include "v_log1p_inline.h"
> +
> +const static struct data
> +{
> +  struct v_log1p_data log1p_consts;
> +  uint64x2_t one, half;
> +} data = { .log1p_consts = V_LOG1P_CONSTANTS_TABLE,
> +	   .one = V2 (0x3ff0000000000000),
> +	   .half = V2 (0x3fe0000000000000) };
> +
> +static float64x2_t VPCS_ATTR NOINLINE
> +special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
> +{
> +  return v_call_f64 (atanh, x, y, special);
> +}
> +
> +/* Approximation for vector double-precision atanh(x) using modified log1p.
> +   The greatest observed error is 3.31 ULP:
> +   _ZGVnN2v_atanh(0x1.ffae6288b601p-6) got 0x1.ffd8ff31b5019p-6
> +				      want 0x1.ffd8ff31b501cp-6.  */
> +VPCS_ATTR
> +float64x2_t V_NAME_D1 (atanh) (float64x2_t x)
> +{
> +  const struct data *d = ptr_barrier (&data);
> +
> +  float64x2_t ax = vabsq_f64 (x);
> +  uint64x2_t ia = vreinterpretq_u64_f64 (ax);
> +  uint64x2_t sign = veorq_u64 (vreinterpretq_u64_f64 (x), ia);
> +  uint64x2_t special = vcgeq_u64 (ia, d->one);
> +  float64x2_t halfsign = vreinterpretq_f64_u64 (vorrq_u64 (sign, d->half));
> +
> +#if WANT_SIMD_EXCEPT
> +  ax = v_zerofy_f64 (ax, special);
> +#endif
> +
> +  float64x2_t y;
> +  y = vaddq_f64 (ax, ax);
> +  y = vdivq_f64 (y, vsubq_f64 (v_f64 (1), ax));
> +  y = log1p_inline (y, &d->log1p_consts);
> +
> +  if (__glibc_unlikely (v_any_u64 (special)))
> +    return special_case (x, vmulq_f64 (y, halfsign), special);
> +  return vmulq_f64 (y, halfsign);
> +}
> diff --git a/sysdeps/aarch64/fpu/atanh_sve.c b/sysdeps/aarch64/fpu/atanh_sve.c
> new file mode 100644
> index 0000000000..7a52728d70
> --- /dev/null
> +++ b/sysdeps/aarch64/fpu/atanh_sve.c
> @@ -0,0 +1,59 @@
> +/* Double-precision vector (SVE) atanh function
> +
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#define WANT_SV_LOG1P_K0_SHORTCUT 0
> +#include "sv_log1p_inline.h"
> +
> +#define One (0x3ff0000000000000)
> +#define Half (0x3fe0000000000000)
> +
> +static svfloat64_t NOINLINE
> +special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
> +{
> +  return sv_call_f64 (atanh, x, y, special);
> +}
> +
> +/* SVE approximation for double-precision atanh, based on log1p.
> +   The greatest observed error is 2.81 ULP:
> +   _ZGVsMxv_atanh(0x1.ffae6288b601p-6) got 0x1.ffd8ff31b5019p-6
> +				      want 0x1.ffd8ff31b501cp-6.  */
> +svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg)
> +{
> +
> +  svfloat64_t ax = svabs_x (pg, x);
> +  svuint64_t iax = svreinterpret_u64 (ax);
> +  svuint64_t sign = sveor_x (pg, svreinterpret_u64 (x), iax);
> +  svfloat64_t halfsign = svreinterpret_f64 (svorr_x (pg, sign, Half));
> +
> +  /* It is special if iax >= 1.  */
> +//   svbool_t special = svcmpge (pg, iax, One);
> +  svbool_t special = svacge (pg, x, 1.0);
> +
> +  /* Computation is performed based on the following sequence of equality:
> +	(1+x)/(1-x) = 1 + 2x/(1-x).  */
> +  svfloat64_t y;
> +  y = svadd_x (pg, ax, ax);
> +  y = svdiv_x (pg, y, svsub_x (pg, sv_f64 (1), ax));
> +  /* ln((1+x)/(1-x)) = ln(1+2x/(1-x)) = ln(1 + y).  */
> +  y = sv_log1p_inline (y, pg);
> +
> +  if (__glibc_unlikely (svptest_any (pg, special)))
> +    return special_case (x, svmul_x (pg, halfsign, y), special);
> +  return svmul_x (pg, halfsign, y);
> +}
> diff --git a/sysdeps/aarch64/fpu/atanhf_advsimd.c b/sysdeps/aarch64/fpu/atanhf_advsimd.c
> new file mode 100644
> index 0000000000..ae488f7b54
> --- /dev/null
> +++ b/sysdeps/aarch64/fpu/atanhf_advsimd.c
> @@ -0,0 +1,79 @@
> +/* Single-precision vector (Advanced SIMD) atanh function
> +
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include "v_math.h"
> +#include "v_log1pf_inline.h"
> +
> +const static struct data
> +{
> +  struct v_log1pf_data log1pf_consts;
> +  uint32x4_t one;
> +#if WANT_SIMD_EXCEPT
> +  uint32x4_t tiny_bound;
> +#endif
> +} data = {
> +  .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE,
> +  .one = V4 (0x3f800000),
> +#if WANT_SIMD_EXCEPT
> +  /* 0x1p-12, below which atanhf(x) rounds to x.  */
> +  .tiny_bound = V4 (0x39800000),
> +#endif
> +};
> +
> +#define AbsMask v_u32 (0x7fffffff)
> +#define Half v_u32 (0x3f000000)
> +
> +static float32x4_t NOINLINE VPCS_ATTR
> +special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
> +{
> +  return v_call_f32 (atanhf, x, y, special);
> +}
> +
> +/* Approximation for vector single-precision atanh(x) using modified log1p.
> +   The maximum error is 3.08 ULP:
> +   __v_atanhf(0x1.ff215p-5) got 0x1.ffcb7cp-5
> +			   want 0x1.ffcb82p-5.  */
> +VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (atanh) (float32x4_t x)
> +{
> +  const struct data *d = ptr_barrier (&data);
> +
> +  float32x4_t halfsign = vbslq_f32 (AbsMask, v_f32 (0.5), x);
> +  float32x4_t ax = vabsq_f32 (x);
> +  uint32x4_t iax = vreinterpretq_u32_f32 (ax);
> +
> +#if WANT_SIMD_EXCEPT
> +  uint32x4_t special
> +      = vorrq_u32 (vcgeq_u32 (iax, d->one), vcltq_u32 (iax, d->tiny_bound));
> +  /* Side-step special cases by setting those lanes to 0, which will trigger no
> +     exceptions. These will be fixed up later.  */
> +  if (__glibc_unlikely (v_any_u32 (special)))
> +    ax = v_zerofy_f32 (ax, special);
> +#else
> +  uint32x4_t special = vcgeq_u32 (iax, d->one);
> +#endif
> +
> +  float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax), vsubq_f32 (v_f32 (1), ax));
> +  y = log1pf_inline (y, d->log1pf_consts);
> +
> +  if (__glibc_unlikely (v_any_u32 (special)))
> +    return special_case (x, vmulq_f32 (halfsign, y), special);
> +  return vmulq_f32 (halfsign, y);
> +}
> +libmvec_hidden_def (V_NAME_F1 (atanh))
> +HALF_WIDTH_ALIAS_F1 (atanh)
> diff --git a/sysdeps/aarch64/fpu/atanhf_sve.c b/sysdeps/aarch64/fpu/atanhf_sve.c
> new file mode 100644
> index 0000000000..dae83041ef
> --- /dev/null
> +++ b/sysdeps/aarch64/fpu/atanhf_sve.c
> @@ -0,0 +1,54 @@
> +/* Single-precision vector (SVE) atanh function
> +
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include "sv_log1pf_inline.h"
> +
> +#define One (0x3f800000)
> +#define Half (0x3f000000)
> +
> +static svfloat32_t NOINLINE
> +special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
> +{
> +  return sv_call_f32 (atanhf, x, y, special);
> +}
> +
> +/* Approximation for vector single-precision atanh(x) using modified log1p.
> +   The maximum error is 2.28 ULP:
> +   _ZGVsMxv_atanhf(0x1.ff1194p-5) got 0x1.ffbbbcp-5
> +				 want 0x1.ffbbb6p-5.  */
> +svfloat32_t SV_NAME_F1 (atanh) (svfloat32_t x, const svbool_t pg)
> +{
> +  svfloat32_t ax = svabs_x (pg, x);
> +  svuint32_t iax = svreinterpret_u32 (ax);
> +  svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax);
> +  svfloat32_t halfsign = svreinterpret_f32 (svorr_x (pg, sign, Half));
> +  svbool_t special = svcmpge (pg, iax, One);
> +
> +  /* Computation is performed based on the following sequence of equality:
> +   * (1+x)/(1-x) = 1 + 2x/(1-x).  */
> +  svfloat32_t y = svadd_x (pg, ax, ax);
> +  y = svdiv_x (pg, y, svsub_x (pg, sv_f32 (1), ax));
> +  /* ln((1+x)/(1-x)) = ln(1+2x/(1-x)) = ln(1 + y).  */
> +  y = sv_log1pf_inline (y, pg);
> +
> +  if (__glibc_unlikely (svptest_any (pg, special)))
> +    return special_case (x, svmul_x (pg, halfsign, y), special);
> +
> +  return svmul_x (pg, halfsign, y);
> +}
> diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h
> index eb2af35b27..ab7a8f7454 100644
> --- a/sysdeps/aarch64/fpu/bits/math-vector.h
> +++ b/sysdeps/aarch64/fpu/bits/math-vector.h
> @@ -49,6 +49,10 @@
>  # define __DECL_SIMD_atan __DECL_SIMD_aarch64
>  # undef __DECL_SIMD_atanf
>  # define __DECL_SIMD_atanf __DECL_SIMD_aarch64
> +# undef __DECL_SIMD_atanh
> +# define __DECL_SIMD_atanh __DECL_SIMD_aarch64
> +# undef __DECL_SIMD_atanhf
> +# define __DECL_SIMD_atanhf __DECL_SIMD_aarch64
>  # undef __DECL_SIMD_atan2
>  # define __DECL_SIMD_atan2 __DECL_SIMD_aarch64
>  # undef __DECL_SIMD_atan2f
> @@ -137,6 +141,7 @@ __vpcs __f32x4_t _ZGVnN4v_acoshf (__f32x4_t);
>  __vpcs __f32x4_t _ZGVnN4v_asinf (__f32x4_t);
>  __vpcs __f32x4_t _ZGVnN4v_asinhf (__f32x4_t);
>  __vpcs __f32x4_t _ZGVnN4v_atanf (__f32x4_t);
> +__vpcs __f32x4_t _ZGVnN4v_atanhf (__f32x4_t);
>  __vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
>  __vpcs __f32x4_t _ZGVnN4v_coshf (__f32x4_t);
>  __vpcs __f32x4_t _ZGVnN4v_erff (__f32x4_t);
> @@ -157,6 +162,7 @@ __vpcs __f64x2_t _ZGVnN2v_acosh (__f64x2_t);
>  __vpcs __f64x2_t _ZGVnN2v_asin (__f64x2_t);
>  __vpcs __f64x2_t _ZGVnN2v_asinh (__f64x2_t);
>  __vpcs __f64x2_t _ZGVnN2v_atan (__f64x2_t);
> +__vpcs __f64x2_t _ZGVnN2v_atanh (__f64x2_t);
>  __vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
>  __vpcs __f64x2_t _ZGVnN2v_cosh (__f64x2_t);
>  __vpcs __f64x2_t _ZGVnN2v_erf (__f64x2_t);
> @@ -182,6 +188,7 @@ __sv_f32_t _ZGVsMxv_acoshf (__sv_f32_t, __sv_bool_t);
>  __sv_f32_t _ZGVsMxv_asinf (__sv_f32_t, __sv_bool_t);
>  __sv_f32_t _ZGVsMxv_asinhf (__sv_f32_t, __sv_bool_t);
>  __sv_f32_t _ZGVsMxv_atanf (__sv_f32_t, __sv_bool_t);
> +__sv_f32_t _ZGVsMxv_atanhf (__sv_f32_t, __sv_bool_t);
>  __sv_f32_t _ZGVsMxv_cosf (__sv_f32_t, __sv_bool_t);
>  __sv_f32_t _ZGVsMxv_coshf (__sv_f32_t, __sv_bool_t);
>  __sv_f32_t _ZGVsMxv_erff (__sv_f32_t, __sv_bool_t);
> @@ -202,6 +209,7 @@ __sv_f64_t _ZGVsMxv_acosh (__sv_f64_t, __sv_bool_t);
>  __sv_f64_t _ZGVsMxv_asin (__sv_f64_t, __sv_bool_t);
>  __sv_f64_t _ZGVsMxv_asinh (__sv_f64_t, __sv_bool_t);
>  __sv_f64_t _ZGVsMxv_atan (__sv_f64_t, __sv_bool_t);
> +__sv_f64_t _ZGVsMxv_atanh (__sv_f64_t, __sv_bool_t);
>  __sv_f64_t _ZGVsMxv_cos (__sv_f64_t, __sv_bool_t);
>  __sv_f64_t _ZGVsMxv_cosh (__sv_f64_t, __sv_bool_t);
>  __sv_f64_t _ZGVsMxv_erf (__sv_f64_t, __sv_bool_t);
> diff --git a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
> index 3d7177c32d..a01aa99c16 100644
> --- a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
> +++ b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
> @@ -28,6 +28,7 @@ VPCS_VECTOR_WRAPPER (acosh_advsimd, _ZGVnN2v_acosh)
>  VPCS_VECTOR_WRAPPER (asin_advsimd, _ZGVnN2v_asin)
>  VPCS_VECTOR_WRAPPER (asinh_advsimd, _ZGVnN2v_asinh)
>  VPCS_VECTOR_WRAPPER (atan_advsimd, _ZGVnN2v_atan)
> +VPCS_VECTOR_WRAPPER (atanh_advsimd, _ZGVnN2v_atanh)
>  VPCS_VECTOR_WRAPPER_ff (atan2_advsimd, _ZGVnN2vv_atan2)
>  VPCS_VECTOR_WRAPPER (cos_advsimd, _ZGVnN2v_cos)
>  VPCS_VECTOR_WRAPPER (cosh_advsimd, _ZGVnN2v_cosh)
> diff --git a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
> index b88a2afe5c..83cb3ad5d0 100644
> --- a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
> +++ b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
> @@ -47,6 +47,7 @@ SVE_VECTOR_WRAPPER (acosh_sve, _ZGVsMxv_acosh)
>  SVE_VECTOR_WRAPPER (asin_sve, _ZGVsMxv_asin)
>  SVE_VECTOR_WRAPPER (asinh_sve, _ZGVsMxv_asinh)
>  SVE_VECTOR_WRAPPER (atan_sve, _ZGVsMxv_atan)
> +SVE_VECTOR_WRAPPER (atanh_sve, _ZGVsMxv_atanh)
>  SVE_VECTOR_WRAPPER_ff (atan2_sve, _ZGVsMxvv_atan2)
>  SVE_VECTOR_WRAPPER (cos_sve, _ZGVsMxv_cos)
>  SVE_VECTOR_WRAPPER (cosh_sve, _ZGVsMxv_cosh)
> diff --git a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
> index 533655402d..831d4d7552 100644
> --- a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
> +++ b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
> @@ -28,6 +28,7 @@ VPCS_VECTOR_WRAPPER (acoshf_advsimd, _ZGVnN4v_acoshf)
>  VPCS_VECTOR_WRAPPER (asinf_advsimd, _ZGVnN4v_asinf)
>  VPCS_VECTOR_WRAPPER (asinhf_advsimd, _ZGVnN4v_asinhf)
>  VPCS_VECTOR_WRAPPER (atanf_advsimd, _ZGVnN4v_atanf)
> +VPCS_VECTOR_WRAPPER (atanhf_advsimd, _ZGVnN4v_atanhf)
>  VPCS_VECTOR_WRAPPER_ff (atan2f_advsimd, _ZGVnN4vv_atan2f)
>  VPCS_VECTOR_WRAPPER (cosf_advsimd, _ZGVnN4v_cosf)
>  VPCS_VECTOR_WRAPPER (coshf_advsimd, _ZGVnN4v_coshf)
> diff --git a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
> index f7b673e335..96fd612c3e 100644
> --- a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
> +++ b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
> @@ -47,6 +47,7 @@ SVE_VECTOR_WRAPPER (acoshf_sve, _ZGVsMxv_acoshf)
>  SVE_VECTOR_WRAPPER (asinf_sve, _ZGVsMxv_asinf)
>  SVE_VECTOR_WRAPPER (asinhf_sve, _ZGVsMxv_asinhf)
>  SVE_VECTOR_WRAPPER (atanf_sve, _ZGVsMxv_atanf)
> +SVE_VECTOR_WRAPPER (atanhf_sve, _ZGVsMxv_atanhf)
>  SVE_VECTOR_WRAPPER_ff (atan2f_sve, _ZGVsMxvv_atan2f)
>  SVE_VECTOR_WRAPPER (cosf_sve, _ZGVsMxv_cosf)
>  SVE_VECTOR_WRAPPER (coshf_sve, _ZGVsMxv_coshf)
> diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
> index dc8cb08b3a..e2b43107e5 100644
> --- a/sysdeps/aarch64/libm-test-ulps
> +++ b/sysdeps/aarch64/libm-test-ulps
> @@ -173,11 +173,19 @@ double: 2
>  float: 2
>  ldouble: 4
>  
> +Function: "atanh_advsimd":
> +double: 1
> +float: 1
> +
>  Function: "atanh_downward":
>  double: 3
>  float: 3
>  ldouble: 4
>  
> +Function: "atanh_sve":
> +double: 2
> +float: 1
> +
>  Function: "atanh_towardzero":
>  double: 2
>  float: 2
> diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
> index f288afdfdd..ce42372a3a 100644
> --- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
> +++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
> @@ -77,18 +77,23 @@ GLIBC_2.40 _ZGVnN2v_acosh F
>  GLIBC_2.40 _ZGVnN2v_acoshf F
>  GLIBC_2.40 _ZGVnN2v_asinh F
>  GLIBC_2.40 _ZGVnN2v_asinhf F
> +GLIBC_2.40 _ZGVnN2v_atanh F
> +GLIBC_2.40 _ZGVnN2v_atanhf F
>  GLIBC_2.40 _ZGVnN2v_cosh F
>  GLIBC_2.40 _ZGVnN2v_coshf F
>  GLIBC_2.40 _ZGVnN2v_erf F
>  GLIBC_2.40 _ZGVnN2v_erff F
>  GLIBC_2.40 _ZGVnN4v_acoshf F
>  GLIBC_2.40 _ZGVnN4v_asinhf F
> +GLIBC_2.40 _ZGVnN4v_atanhf F
>  GLIBC_2.40 _ZGVnN4v_coshf F
>  GLIBC_2.40 _ZGVnN4v_erff F
>  GLIBC_2.40 _ZGVsMxv_acosh F
>  GLIBC_2.40 _ZGVsMxv_acoshf F
>  GLIBC_2.40 _ZGVsMxv_asinh F
>  GLIBC_2.40 _ZGVsMxv_asinhf F
> +GLIBC_2.40 _ZGVsMxv_atanh F
> +GLIBC_2.40 _ZGVsMxv_atanhf F
>  GLIBC_2.40 _ZGVsMxv_cosh F
>  GLIBC_2.40 _ZGVsMxv_coshf F
>  GLIBC_2.40 _ZGVsMxv_erf F
> -- 
> 2.27.0
>
diff mbox series

Patch

diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile
index d474f2969d..4c878e5906 100644
--- a/sysdeps/aarch64/fpu/Makefile
+++ b/sysdeps/aarch64/fpu/Makefile
@@ -3,6 +3,7 @@  libmvec-supported-funcs = acos \
                           asin \
                           asinh \
                           atan \
+                          atanh \
                           atan2 \
                           cos \
                           cosh \
diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions
index 08ea15efae..092949dc96 100644
--- a/sysdeps/aarch64/fpu/Versions
+++ b/sysdeps/aarch64/fpu/Versions
@@ -89,6 +89,11 @@  libmvec {
     _ZGVnN4v_asinhf;
     _ZGVsMxv_asinh;
     _ZGVsMxv_asinhf;
+    _ZGVnN2v_atanh;
+    _ZGVnN2v_atanhf;
+    _ZGVnN4v_atanhf;
+    _ZGVsMxv_atanh;
+    _ZGVsMxv_atanhf;
     _ZGVnN2v_cosh;
     _ZGVnN2v_coshf;
     _ZGVnN4v_coshf;
diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
index 1e80721c9f..afbb01e191 100644
--- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h
+++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
@@ -22,6 +22,7 @@  libmvec_hidden_proto (V_NAME_F1(acosh));
 libmvec_hidden_proto (V_NAME_F1(asin));
 libmvec_hidden_proto (V_NAME_F1(asinh));
 libmvec_hidden_proto (V_NAME_F1(atan));
+libmvec_hidden_proto (V_NAME_F1(atanh));
 libmvec_hidden_proto (V_NAME_F1(cos));
 libmvec_hidden_proto (V_NAME_F1(cosh));
 libmvec_hidden_proto (V_NAME_F1(erf));
diff --git a/sysdeps/aarch64/fpu/atanh_advsimd.c b/sysdeps/aarch64/fpu/atanh_advsimd.c
new file mode 100644
index 0000000000..3c3d0bd6ad
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atanh_advsimd.c
@@ -0,0 +1,64 @@ 
+/* Double-precision vector (Advanced SIMD) atanh function
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define WANT_V_LOG1P_K0_SHORTCUT 0
+#include "v_log1p_inline.h"
+
+const static struct data
+{
+  struct v_log1p_data log1p_consts;
+  uint64x2_t one, half;
+} data = { .log1p_consts = V_LOG1P_CONSTANTS_TABLE,
+	   .one = V2 (0x3ff0000000000000),
+	   .half = V2 (0x3fe0000000000000) };
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
+{
+  return v_call_f64 (atanh, x, y, special);
+}
+
+/* Approximation for vector double-precision atanh(x) using modified log1p.
+   The greatest observed error is 3.31 ULP:
+   _ZGVnN2v_atanh(0x1.ffae6288b601p-6) got 0x1.ffd8ff31b5019p-6
+				      want 0x1.ffd8ff31b501cp-6.  */
+VPCS_ATTR
+float64x2_t V_NAME_D1 (atanh) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  float64x2_t ax = vabsq_f64 (x);
+  uint64x2_t ia = vreinterpretq_u64_f64 (ax);
+  uint64x2_t sign = veorq_u64 (vreinterpretq_u64_f64 (x), ia);
+  uint64x2_t special = vcgeq_u64 (ia, d->one);
+  float64x2_t halfsign = vreinterpretq_f64_u64 (vorrq_u64 (sign, d->half));
+
+#if WANT_SIMD_EXCEPT
+  ax = v_zerofy_f64 (ax, special);
+#endif
+
+  float64x2_t y;
+  y = vaddq_f64 (ax, ax);
+  y = vdivq_f64 (y, vsubq_f64 (v_f64 (1), ax));
+  y = log1p_inline (y, &d->log1p_consts);
+
+  if (__glibc_unlikely (v_any_u64 (special)))
+    return special_case (x, vmulq_f64 (y, halfsign), special);
+  return vmulq_f64 (y, halfsign);
+}
diff --git a/sysdeps/aarch64/fpu/atanh_sve.c b/sysdeps/aarch64/fpu/atanh_sve.c
new file mode 100644
index 0000000000..7a52728d70
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atanh_sve.c
@@ -0,0 +1,59 @@ 
+/* Double-precision vector (SVE) atanh function
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define WANT_SV_LOG1P_K0_SHORTCUT 0
+#include "sv_log1p_inline.h"
+
+#define One (0x3ff0000000000000)
+#define Half (0x3fe0000000000000)
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+{
+  return sv_call_f64 (atanh, x, y, special);
+}
+
+/* SVE approximation for double-precision atanh, based on log1p.
+   The greatest observed error is 2.81 ULP:
+   _ZGVsMxv_atanh(0x1.ffae6288b601p-6) got 0x1.ffd8ff31b5019p-6
+				      want 0x1.ffd8ff31b501cp-6.  */
+svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg)
+{
+
+  svfloat64_t ax = svabs_x (pg, x);
+  svuint64_t iax = svreinterpret_u64 (ax);
+  svuint64_t sign = sveor_x (pg, svreinterpret_u64 (x), iax);
+  svfloat64_t halfsign = svreinterpret_f64 (svorr_x (pg, sign, Half));
+
+  /* It is special if iax >= 1.  */
+//   svbool_t special = svcmpge (pg, iax, One);
+  svbool_t special = svacge (pg, x, 1.0);
+
+  /* Computation is performed based on the following sequence of equality:
+	(1+x)/(1-x) = 1 + 2x/(1-x).  */
+  svfloat64_t y;
+  y = svadd_x (pg, ax, ax);
+  y = svdiv_x (pg, y, svsub_x (pg, sv_f64 (1), ax));
+  /* ln((1+x)/(1-x)) = ln(1+2x/(1-x)) = ln(1 + y).  */
+  y = sv_log1p_inline (y, pg);
+
+  if (__glibc_unlikely (svptest_any (pg, special)))
+    return special_case (x, svmul_x (pg, halfsign, y), special);
+  return svmul_x (pg, halfsign, y);
+}
diff --git a/sysdeps/aarch64/fpu/atanhf_advsimd.c b/sysdeps/aarch64/fpu/atanhf_advsimd.c
new file mode 100644
index 0000000000..ae488f7b54
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atanhf_advsimd.c
@@ -0,0 +1,79 @@ 
+/* Single-precision vector (Advanced SIMD) atanh function
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+#include "v_log1pf_inline.h"
+
+const static struct data
+{
+  struct v_log1pf_data log1pf_consts;
+  uint32x4_t one;
+#if WANT_SIMD_EXCEPT
+  uint32x4_t tiny_bound;
+#endif
+} data = {
+  .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE,
+  .one = V4 (0x3f800000),
+#if WANT_SIMD_EXCEPT
+  /* 0x1p-12, below which atanhf(x) rounds to x.  */
+  .tiny_bound = V4 (0x39800000),
+#endif
+};
+
+#define AbsMask v_u32 (0x7fffffff)
+#define Half v_u32 (0x3f000000)
+
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+{
+  return v_call_f32 (atanhf, x, y, special);
+}
+
+/* Approximation for vector single-precision atanh(x) using modified log1p.
+   The maximum error is 3.08 ULP:
+   __v_atanhf(0x1.ff215p-5) got 0x1.ffcb7cp-5
+			   want 0x1.ffcb82p-5.  */
+VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (atanh) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  float32x4_t halfsign = vbslq_f32 (AbsMask, v_f32 (0.5), x);
+  float32x4_t ax = vabsq_f32 (x);
+  uint32x4_t iax = vreinterpretq_u32_f32 (ax);
+
+#if WANT_SIMD_EXCEPT
+  uint32x4_t special
+      = vorrq_u32 (vcgeq_u32 (iax, d->one), vcltq_u32 (iax, d->tiny_bound));
+  /* Side-step special cases by setting those lanes to 0, which will trigger no
+     exceptions. These will be fixed up later.  */
+  if (__glibc_unlikely (v_any_u32 (special)))
+    ax = v_zerofy_f32 (ax, special);
+#else
+  uint32x4_t special = vcgeq_u32 (iax, d->one);
+#endif
+
+  float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax), vsubq_f32 (v_f32 (1), ax));
+  y = log1pf_inline (y, d->log1pf_consts);
+
+  if (__glibc_unlikely (v_any_u32 (special)))
+    return special_case (x, vmulq_f32 (halfsign, y), special);
+  return vmulq_f32 (halfsign, y);
+}
+libmvec_hidden_def (V_NAME_F1 (atanh))
+HALF_WIDTH_ALIAS_F1 (atanh)
diff --git a/sysdeps/aarch64/fpu/atanhf_sve.c b/sysdeps/aarch64/fpu/atanhf_sve.c
new file mode 100644
index 0000000000..dae83041ef
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atanhf_sve.c
@@ -0,0 +1,54 @@ 
+/* Single-precision vector (SVE) atanh function
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_log1pf_inline.h"
+
+#define One (0x3f800000)
+#define Half (0x3f000000)
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+{
+  return sv_call_f32 (atanhf, x, y, special);
+}
+
+/* Approximation for vector single-precision atanh(x) using modified log1p.
+   The maximum error is 2.28 ULP:
+   _ZGVsMxv_atanhf(0x1.ff1194p-5) got 0x1.ffbbbcp-5
+				 want 0x1.ffbbb6p-5.  */
+svfloat32_t SV_NAME_F1 (atanh) (svfloat32_t x, const svbool_t pg)
+{
+  svfloat32_t ax = svabs_x (pg, x);
+  svuint32_t iax = svreinterpret_u32 (ax);
+  svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax);
+  svfloat32_t halfsign = svreinterpret_f32 (svorr_x (pg, sign, Half));
+  svbool_t special = svcmpge (pg, iax, One);
+
+  /* Computation is performed based on the following sequence of equality:
+   * (1+x)/(1-x) = 1 + 2x/(1-x).  */
+  svfloat32_t y = svadd_x (pg, ax, ax);
+  y = svdiv_x (pg, y, svsub_x (pg, sv_f32 (1), ax));
+  /* ln((1+x)/(1-x)) = ln(1+2x/(1-x)) = ln(1 + y).  */
+  y = sv_log1pf_inline (y, pg);
+
+  if (__glibc_unlikely (svptest_any (pg, special)))
+    return special_case (x, svmul_x (pg, halfsign, y), special);
+
+  return svmul_x (pg, halfsign, y);
+}
diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h
index eb2af35b27..ab7a8f7454 100644
--- a/sysdeps/aarch64/fpu/bits/math-vector.h
+++ b/sysdeps/aarch64/fpu/bits/math-vector.h
@@ -49,6 +49,10 @@ 
 # define __DECL_SIMD_atan __DECL_SIMD_aarch64
 # undef __DECL_SIMD_atanf
 # define __DECL_SIMD_atanf __DECL_SIMD_aarch64
+# undef __DECL_SIMD_atanh
+# define __DECL_SIMD_atanh __DECL_SIMD_aarch64
+# undef __DECL_SIMD_atanhf
+# define __DECL_SIMD_atanhf __DECL_SIMD_aarch64
 # undef __DECL_SIMD_atan2
 # define __DECL_SIMD_atan2 __DECL_SIMD_aarch64
 # undef __DECL_SIMD_atan2f
@@ -137,6 +141,7 @@  __vpcs __f32x4_t _ZGVnN4v_acoshf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_asinf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_asinhf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_atanf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_atanhf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_coshf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_erff (__f32x4_t);
@@ -157,6 +162,7 @@  __vpcs __f64x2_t _ZGVnN2v_acosh (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_asin (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_asinh (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_atan (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_atanh (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_cosh (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_erf (__f64x2_t);
@@ -182,6 +188,7 @@  __sv_f32_t _ZGVsMxv_acoshf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_asinf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_asinhf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_atanf (__sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxv_atanhf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_cosf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_coshf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_erff (__sv_f32_t, __sv_bool_t);
@@ -202,6 +209,7 @@  __sv_f64_t _ZGVsMxv_acosh (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_asin (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_asinh (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_atan (__sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxv_atanh (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_cos (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_cosh (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_erf (__sv_f64_t, __sv_bool_t);
diff --git a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
index 3d7177c32d..a01aa99c16 100644
--- a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
@@ -28,6 +28,7 @@  VPCS_VECTOR_WRAPPER (acosh_advsimd, _ZGVnN2v_acosh)
 VPCS_VECTOR_WRAPPER (asin_advsimd, _ZGVnN2v_asin)
 VPCS_VECTOR_WRAPPER (asinh_advsimd, _ZGVnN2v_asinh)
 VPCS_VECTOR_WRAPPER (atan_advsimd, _ZGVnN2v_atan)
+VPCS_VECTOR_WRAPPER (atanh_advsimd, _ZGVnN2v_atanh)
 VPCS_VECTOR_WRAPPER_ff (atan2_advsimd, _ZGVnN2vv_atan2)
 VPCS_VECTOR_WRAPPER (cos_advsimd, _ZGVnN2v_cos)
 VPCS_VECTOR_WRAPPER (cosh_advsimd, _ZGVnN2v_cosh)
diff --git a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
index b88a2afe5c..83cb3ad5d0 100644
--- a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
@@ -47,6 +47,7 @@  SVE_VECTOR_WRAPPER (acosh_sve, _ZGVsMxv_acosh)
 SVE_VECTOR_WRAPPER (asin_sve, _ZGVsMxv_asin)
 SVE_VECTOR_WRAPPER (asinh_sve, _ZGVsMxv_asinh)
 SVE_VECTOR_WRAPPER (atan_sve, _ZGVsMxv_atan)
+SVE_VECTOR_WRAPPER (atanh_sve, _ZGVsMxv_atanh)
 SVE_VECTOR_WRAPPER_ff (atan2_sve, _ZGVsMxvv_atan2)
 SVE_VECTOR_WRAPPER (cos_sve, _ZGVsMxv_cos)
 SVE_VECTOR_WRAPPER (cosh_sve, _ZGVsMxv_cosh)
diff --git a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
index 533655402d..831d4d7552 100644
--- a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
@@ -28,6 +28,7 @@  VPCS_VECTOR_WRAPPER (acoshf_advsimd, _ZGVnN4v_acoshf)
 VPCS_VECTOR_WRAPPER (asinf_advsimd, _ZGVnN4v_asinf)
 VPCS_VECTOR_WRAPPER (asinhf_advsimd, _ZGVnN4v_asinhf)
 VPCS_VECTOR_WRAPPER (atanf_advsimd, _ZGVnN4v_atanf)
+VPCS_VECTOR_WRAPPER (atanhf_advsimd, _ZGVnN4v_atanhf)
 VPCS_VECTOR_WRAPPER_ff (atan2f_advsimd, _ZGVnN4vv_atan2f)
 VPCS_VECTOR_WRAPPER (cosf_advsimd, _ZGVnN4v_cosf)
 VPCS_VECTOR_WRAPPER (coshf_advsimd, _ZGVnN4v_coshf)
diff --git a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
index f7b673e335..96fd612c3e 100644
--- a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
@@ -47,6 +47,7 @@  SVE_VECTOR_WRAPPER (acoshf_sve, _ZGVsMxv_acoshf)
 SVE_VECTOR_WRAPPER (asinf_sve, _ZGVsMxv_asinf)
 SVE_VECTOR_WRAPPER (asinhf_sve, _ZGVsMxv_asinhf)
 SVE_VECTOR_WRAPPER (atanf_sve, _ZGVsMxv_atanf)
+SVE_VECTOR_WRAPPER (atanhf_sve, _ZGVsMxv_atanhf)
 SVE_VECTOR_WRAPPER_ff (atan2f_sve, _ZGVsMxvv_atan2f)
 SVE_VECTOR_WRAPPER (cosf_sve, _ZGVsMxv_cosf)
 SVE_VECTOR_WRAPPER (coshf_sve, _ZGVsMxv_coshf)
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
index dc8cb08b3a..e2b43107e5 100644
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -173,11 +173,19 @@  double: 2
 float: 2
 ldouble: 4
 
+Function: "atanh_advsimd":
+double: 1
+float: 1
+
 Function: "atanh_downward":
 double: 3
 float: 3
 ldouble: 4
 
+Function: "atanh_sve":
+double: 2
+float: 1
+
 Function: "atanh_towardzero":
 double: 2
 float: 2
diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
index f288afdfdd..ce42372a3a 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
@@ -77,18 +77,23 @@  GLIBC_2.40 _ZGVnN2v_acosh F
 GLIBC_2.40 _ZGVnN2v_acoshf F
 GLIBC_2.40 _ZGVnN2v_asinh F
 GLIBC_2.40 _ZGVnN2v_asinhf F
+GLIBC_2.40 _ZGVnN2v_atanh F
+GLIBC_2.40 _ZGVnN2v_atanhf F
 GLIBC_2.40 _ZGVnN2v_cosh F
 GLIBC_2.40 _ZGVnN2v_coshf F
 GLIBC_2.40 _ZGVnN2v_erf F
 GLIBC_2.40 _ZGVnN2v_erff F
 GLIBC_2.40 _ZGVnN4v_acoshf F
 GLIBC_2.40 _ZGVnN4v_asinhf F
+GLIBC_2.40 _ZGVnN4v_atanhf F
 GLIBC_2.40 _ZGVnN4v_coshf F
 GLIBC_2.40 _ZGVnN4v_erff F
 GLIBC_2.40 _ZGVsMxv_acosh F
 GLIBC_2.40 _ZGVsMxv_acoshf F
 GLIBC_2.40 _ZGVsMxv_asinh F
 GLIBC_2.40 _ZGVsMxv_asinhf F
+GLIBC_2.40 _ZGVsMxv_atanh F
+GLIBC_2.40 _ZGVsMxv_atanhf F
 GLIBC_2.40 _ZGVsMxv_cosh F
 GLIBC_2.40 _ZGVsMxv_coshf F
 GLIBC_2.40 _ZGVsMxv_erf F