From 50692e8bcebb66a7fd14381f0a912019d3fb6fef Mon Sep 17 00:00:00 2001
From: Bert Tenjy <bert.tenjy@gmail.com>
Date: Wed, 6 Mar 2019 21:52:48 +0000
Subject: [PATCH 1/12] PPC64: First in the series of patches implementing
POWER8 vector math.
[BZ #24205]
Implements double-precision cosine using VSX vector capability. Algorithm for
cosine is from x86_64 [commit #2193311288] adapted to PPC64.
Name-mangling exactly duplicates SSE ISA of the x86_64 ABI. The details are at
<https://sourceware.org/glibc/wiki/
libmvec?action=AttachFile&do=view&target=VectorABI.txt>
The patch has been tested on PPC64/POWER8 Little Endian and Big Endian. It is
tested using the framework created for libmvec on x86_64 which runs tests on
issuing 'make check'. Tests of the new vector cosine function all pass.
Configure verifies that the compiler implements VSX builtins. If so
vector math library is built by default.
A runtime check prevents vector tests running on systems lacking VSX hardware.
File vec_finite_alias.c is empty. Its existence ensures that libmvec_nonshared.a
is created. Otherwise, linking during testing of a newly-installed glibc fails.
Glibc built with this patch was installed using the procedure outlined at
<https://sourceware.org/glibc/wiki/Testing/Builds>. Compiling against the new
library created a test executable which computes cosines using the vector
version of the function. The results are at most 2-ulps away from the scalar
cosine. That is expected and indicated in the comments describing the
algorithm - as obtained from x86_64 commit #2193311288.
---
ChangeLog | 21 +++++
NEWS | 12 +++
sysdeps/powerpc/fpu/libm-test-ulps | 3 +
sysdeps/powerpc/powerpc64/fpu/Makefile | 7 ++
sysdeps/powerpc/powerpc64/fpu/Versions | 5 ++
sysdeps/powerpc/powerpc64/fpu/configure | 6 ++
sysdeps/powerpc/powerpc64/fpu/configure.ac | 6 ++
.../powerpc/powerpc64/fpu/math-tests-arch.h | 34 +++++++
.../powerpc/powerpc64/fpu/multiarch/Makefile | 17 ++++
.../multiarch/test-double-vlen2-wrappers.c | 24 +++++
.../powerpc64/fpu/multiarch/vec_d_cos2_vsx.c | 88 +++++++++++++++++++
.../powerpc64/fpu/multiarch/vec_d_trig_data.h | 60 +++++++++++++
.../powerpc/powerpc64/fpu/vec_finite_alias.c | 17 ++++
.../linux/powerpc/powerpc64/libmvec.abilist | 1 +
14 files changed, 301 insertions(+)
create mode 100644 sysdeps/powerpc/powerpc64/fpu/Makefile
create mode 100644 sysdeps/powerpc/powerpc64/fpu/Versions
create mode 100644 sysdeps/powerpc/powerpc64/fpu/configure
create mode 100644 sysdeps/powerpc/powerpc64/fpu/configure.ac
create mode 100644 sysdeps/powerpc/powerpc64/fpu/math-tests-arch.h
create mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/test-double-vlen2-wrappers.c
create mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_cos2_vsx.c
create mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_trig_data.h
create mode 100644 sysdeps/powerpc/powerpc64/fpu/vec_finite_alias.c
create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist
@@ -1,3 +1,24 @@
+2019-03-06 <bert.tenjy@gmail.com>
+
+ [BZ #24205]
+ * NEWS: Updated to note new SIMD vector cosine function.
+ * sysdeps/powerpc/fpu/libm-test-ulps (cos_vlen2): Regenerated.
+ * sysdeps/powerpc/powerpc64/fpu/Makefile: New file.
+ * sysdeps/powerpc/powerpc64/fpu/Versions: Likewise.
+ * sysdeps/powerpc/powerpc64/fpu/configure: Generated.
+ * sysdeps/powerpc/powerpc64/fpu/configure.ac: New file.
+ * sysdeps/powerpc/powerpc64/fpu/math-tests-arch.h: New file.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile (libmvec-sysdep_routines)
+ (CFLAGS-vec_d_cos2_vsx.c, libmvec-tests, double-vlen2-funcs)
+ (double-vlen2-arch-ext-cflags): Added build of VSX vector cos function
+ and its tests.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/test-double-vlen2-wrappers.c: New file.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_cos2_vsx.c: Likewise.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_trig_data.h: Likewise.
+ * sysdeps/powerpc/powerpc64/fpu/vec_finite_alias.c: Likewise.
+ * sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist: Likewise.
+
+
2019-02-26 Joseph Myers <joseph@codesourcery.com>
* sysdeps/arm/sysdep.h (#if condition): Break lines before rather
@@ -4,11 +4,23 @@ See the end for copying conditions.
Please send GNU C library bug reports via <https://sourceware.org/bugzilla/>
using `glibc' in the "product" field.
+
Version 2.30
Major new features:
+* Start of implementing vector math library libmvec on PPC64/POWER8.
+ The double-precision cosine now has a vector version.
+ GCC support for auto-vectorization of functions on PPC64 is not yet
+ available. Until that is done, the new vector math functions are
+ inaccessible to applications.
+ Library libmvec is built by default for PPC64. Disable its creation by
+ passing flag --disable-mathvec to configure.
+ The library ABI specification is x86_64 Vector Function ABI.
+ More information on libmvec including a link to the ABI document is at:
+ <https://sourceware.org/glibc/wiki/libmvec>
+
* The dynamic linker accepts the --preload argument to preload shared
objects, in addition to the LD_PRELOAD environment variable.
@@ -1311,6 +1311,9 @@ ifloat128: 2
ildouble: 5
ldouble: 5
+Function: "cos_vlen2":
+double: 2
+
Function: "cosh":
double: 1
float: 1
new file mode 100644
@@ -0,0 +1,7 @@
+ifeq ($(subdir),mathvec)
+libmvec-support += vec_finite_alias
+
+CFLAGS-vec_finite_alias.c += -mvsx
+
+libmvec-static-only-routines = vec_finite_alias
+endif
new file mode 100644
@@ -0,0 +1,5 @@
+libmvec {
+ GLIBC_2.30 {
+ _ZGVbN2v_cos;
+ }
+}
new file mode 100644
@@ -0,0 +1,6 @@
+# This file is generated from configure.ac by Autoconf. DO NOT EDIT!
+ # Local configure fragment for sysdeps/powerpc/powerpc64/fpu/.
+
+if test "$enable_mathvec" != "no" ; then
+ build_mathvec=yes
+fi
new file mode 100644
@@ -0,0 +1,6 @@
+GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
+# Local configure fragment for sysdeps/powerpc/powerpc64/fpu/.
+
+if test "$enable_mathvec" != "no" ; then
+ build_mathvec=yes
+fi
new file mode 100644
@@ -0,0 +1,34 @@
+/* Runtime architecture check for math tests. PPC64 version.
+ Copyright (C) 2014-2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if defined REQUIRE_VSX
+
+# include "init-arch.h"
+
+# define INIT_ARCH_EXT
+# define CHECK_ARCH_EXT \
+ do \
+ { \
+ INIT_ARCH() \
+ if (!(hwcap & PPC_FEATURE_HAS_VSX )) return; \
+ } \
+ while (0)
+
+#else
+# include <sysdeps/generic/math-tests-arch.h>
+#endif
@@ -42,3 +42,20 @@ CFLAGS-e_hypotf-power7.c = -mcpu=power7
CFLAGS-s_modf-ppc64.c += -fsignaling-nans
CFLAGS-s_modff-ppc64.c += -fsignaling-nans
endif
+
+ifeq ($(subdir),mathvec)
+libmvec-sysdep_routines += vec_d_cos2_vsx
+CFLAGS-vec_d_cos2_vsx.c += -mvsx
+endif
+
+# Variables for libmvec tests.
+ifeq ($(subdir),math)
+ifeq ($(build-mathvec),yes)
+libmvec-tests += double-vlen2
+
+double-vlen2-funcs = cos
+
+double-vlen2-arch-ext-cflags = -mvsx -DREQUIRE_VSX
+
+endif
+endif
new file mode 100644
@@ -0,0 +1,24 @@
+/* Wrapper part of tests for VSX ISA versions of vector math functions.
+ Copyright (C) 2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "test-double-vlen2.h"
+#include <altivec.h>
+
+#define VEC_TYPE vector double
+
+VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVbN2v_cos)
new file mode 100644
@@ -0,0 +1,88 @@
+/* Function cos vectorized with VSX.
+ Copyright (C) 2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <math.h>
+#include "vec_d_trig_data.h"
+
+vector double
+_ZGVbN2v_cos (vector double x)
+{
+
+ /*
+ ARGUMENT RANGE REDUCTION:
+ Add Pi/2 to argument: X' = X+Pi/2. */
+ vector double x_prime = (vector double) __d_half_pi + x;
+
+ /* Get absolute argument value: X' = |X'|. */
+ vector double abs_x_prime = vec_abs (x_prime);
+
+ /* Y = X'*InvPi + RS : right shifter add. */
+ vector double y = (x_prime * __d_inv_pi) + __d_rshifter;
+
+ /* Check for large arguments path. */
+ vector bool long long large_in = vec_cmpgt (abs_x_prime, __d_rangeval);
+
+ /* N = Y - RS : right shifter sub. */
+ vector double n = y - __d_rshifter;
+
+ /* SignRes = Y<<63 : shift LSB to MSB place for result sign. */
+ vector double sign_res = (vector double) vec_sl ((vector long long) y,
+ (vector unsigned long long)
+ vec_splats (63));
+
+ /* N = N - 0.5. */
+ n = n - __d_one_half;
+
+ /* R = X - N*Pi1. */
+ vector double r = x - (n * __d_pi1_fma);
+
+ /* R = R - N*Pi2. */
+ r = r - (n * __d_pi2_fma);
+
+ /* R = R - N*Pi3. */
+ r = r - (n * __d_pi3_fma);
+
+ /* R2 = R*R. */
+ vector double r2 = r * r;
+
+ /* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))). */
+ vector double poly = r2 * __d_coeff7 + __d_coeff6;
+ poly = poly * r2 + __d_coeff5;
+ poly = poly * r2 + __d_coeff4;
+ poly = poly * r2 + __d_coeff3;
+
+ /* Poly = R+R*(R2*(C1+R2*(C2+R2*Poly))). */
+ poly = poly * r2 + __d_coeff2;
+ poly = poly * r2 + __d_coeff1;
+ poly = poly * r2 * r + r;
+
+ /*
+ RECONSTRUCTION:
+ Final sign setting: Res = Poly^SignRes. */
+ vector double out
+ = (vector double) ((vector long long) poly ^ (vector long long) sign_res);
+
+ if (large_in[0] != 0)
+ out[0] = cos (x[0]);
+
+ if (large_in[1] != 0)
+ out[1] = cos (x[1]);
+
+ return out;
+
+}
new file mode 100644
@@ -0,0 +1,60 @@
+/* Constants used in polynomial approximations for vectorized sin, cos,
+ and sincos functions.
+ Copyright (C) 2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef D_TRIG_DATA_H
+#define D_TRIG_DATA_H
+
+#include <altivec.h>
+
+/* PI/2. */
+const vector double __d_half_pi = {0x1.921fb54442d18p+0, 0x1.921fb54442d18p+0};
+
+/* Inverse PI. */
+const vector double __d_inv_pi = {0x1.45f306dc9c883p-2, 0x1.45f306dc9c883p-2};
+
+/* Right-shifter constant. */
+const vector double __d_rshifter = {0x1.8p+52, 0x1.8p+52};
+
+/* Working range threshold. */
+const vector double __d_rangeval = {0x1p+23, 0x1p+23};
+
+/* One-half. */
+const vector double __d_one_half = {0x1p-1, 0x1p-1};
+
+/* Range reduction PI-based constants if FMA available:
+ PI high part (FMA available). */
+const vector double __d_pi1_fma = {0x1.921fb54442d18p+1, 0x1.921fb54442d18p+1};
+
+/* PI mid part (FMA available). */
+const vector double __d_pi2_fma = {0x1.1a62633145c06p-53, 0x1.1a62633145c06p-53};
+
+/* PI low part (FMA available). */
+const vector double __d_pi3_fma
+= {0x1.c1cd129024e09p-106,0x1.c1cd129024e09p-106};
+
+/* Polynomial coefficients (relative error 2^(-52.115)). */
+const vector double __d_coeff7 = {-0x1.9f0d60811aac8p-41,-0x1.9f0d60811aac8p-41};
+const vector double __d_coeff6 = {0x1.60e6857a2f22p-33,0x1.60e6857a2f22p-33};
+const vector double __d_coeff5 = {-0x1.ae63546002231p-26,-0x1.ae63546002231p-26};
+const vector double __d_coeff4 = {0x1.71de38030feap-19,0x1.71de38030feap-19};
+const vector double __d_coeff3 = {-0x1.a01a019a5b86dp-13,-0x1.a01a019a5b86dp-13};
+const vector double __d_coeff2 = {0x1.111111110a4a8p-7,0x1.111111110a4a8p-7};
+const vector double __d_coeff1 = {-0x1.55555555554a7p-3,-0x1.55555555554a7p-3};
+
+#endif /* D_TRIG_DATA_H. */
new file mode 100644
@@ -0,0 +1,17 @@
+/* A temporary workaround to force creation of libmvec_nonshared.a.
+ Copyright (C) 2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
new file mode 100644
@@ -0,0 +1 @@
+GLIBC_2.30 _ZGVbN2v_cos F
--
2.20.1