diff mbox

Update of minimal required version of Binutils to 2.25

Message ID CAMXFM3vMzGUuVisF4BwuUiN1DHzNNRZpDmbcu9KBe1cNB_bkyg@mail.gmail.com
State New
Headers show

Commit Message

Andrew Senkevich June 10, 2015, 10 a.m. UTC
2015-06-10 0:10 GMT+03:00 Joseph Myers <joseph@codesourcery.com>:
> On Tue, 9 Jun 2015, Andrew Senkevich wrote:
>
>> Hi,
>>
>> this patch updates minimal required version of Binutils to 2.25.
>> Is it ok?
>
> Could you explain *why* you propose this - what relevant features are
> brought in by that version, whose absence is problematic for building
> glibc?
>
> I thought you already had a configure check for AVX512 assembler support,
> and conditional code allowing for it being absent - so either some code
> isn't properly conditioned, or some other feature is being required by the
> libmvec code.

I have configure check for AVX512 assembler support, but this is the
case when some AVX512 instructions with zmm registers not supported by
Binutils 2.24, with which that confugure check passed. So we need 2.25
for support of these several instructions, which appears in SKX
versions of vector math functions.

But it also could be solved in more conservative way with instructions
encoded with .byte.
I mean update implementations with the following patch:



--
WBR,
Andrew

Comments

Joseph Myers June 10, 2015, 10:11 a.m. UTC | #1
On Wed, 10 Jun 2015, Andrew Senkevich wrote:

> I have configure check for AVX512 assembler support, but this is the
> case when some AVX512 instructions with zmm registers not supported by
> Binutils 2.24, with which that confugure check passed. So we need 2.25
> for support of these several instructions, which appears in SKX
> versions of vector math functions.

I'd say make the configure check stricter so that it fails with 2.24, 
rather than putting .byte encodings of instructions in more places than 
necessary.
diff mbox

Patch

diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S
b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S
index 14695ec..ea754b2 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S
@@ -278,7 +278,13 @@  WRAPPER_IMPL_AVX512 _ZGVdN4v_cos
         vmovups __dC7(%rax), %zmm9

 /* Get absolute argument value: X' = |X'| */
-        vandpd __dAbsMask(%rax), %zmm6, %zmm1
+/* Encoded vandpd __dAbsMask(%rax), %zmm6, %zmm1 (with __dAbsMask = 0).  */
+        .byte 0x62
+        .byte 0xf1
+        .byte 0xcd
+        .byte 0x48
+        .byte 0x54
+        .byte 0x08

 /* Y = X'*InvPi + RS : right shifter add */
         vfmadd213pd %zmm4, %zmm3, %zmm6
@@ -324,7 +330,14 @@  WRAPPER_IMPL_AVX512 _ZGVdN4v_cos
   RECONSTRUCTION:
   Final sign setting: Res = Poly^SignRes
  */
-        vxorpd    %zmm13, %zmm12, %zmm1
+/* Encoded vxorpd %zmm13, %zmm12, %zmm1.  */
+        .byte 0x62
+        .byte 0xd1
+        .byte 0x9d
+        .byte 0x48
+        .byte 0x57
+        .byte 0xcd
+
         kmovw     %k0, %ecx
         testl     %ecx, %ecx
         jne       .LBL_2_3
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S
b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S
index a78ae2e..ddda60c 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S
@@ -287,7 +287,14 @@  WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf
         vaddps __sHalfPI(%rax), %zmm0, %zmm2

 /* Check for large and special arguments */
-        vandps __sAbsMask(%rax), %zmm0, %zmm1
+/* Encoded vandps __sAbsMask(%rax), %zmm0, %zmm1 (__sAbsMask = 0x1000).  */
+        .byte 0x62
+        .byte 0xf1
+        .byte 0x7c
+        .byte 0x48
+        .byte 0x54
+        .byte 0x48
+        .byte 0x40

 /*
   1) Range reduction to [-Pi/2; +Pi/2] interval
@@ -320,7 +327,13 @@  WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf
   a) Set shifted destination sign using XOR operation:
   R = XOR( R, S );
  */
-        vxorps    %zmm8, %zmm7, %zmm11
+/* Encoded vxorps %zmm8, %zmm7, %zmm11.  */
+        .byte 0x62
+        .byte 0x51
+        .byte 0x44
+        .byte 0x48
+        .byte 0x57
+        .byte 0xd8

 /*
   b) Calculate polynomial: