@@ -301,6 +301,8 @@ m32c*-*-*)
aarch64*-*-*)
cpu_type=aarch64
extra_headers="arm_fp16.h arm_neon.h arm_acle.h"
+ extra_headers="${extra_headers} ../bmi2intrin.h ../bmiintrin.h ../x86intrin.h"
+ extra_headers="${extra_headers} ../adxintrin.h"
c_target_objs="aarch64-c.o"
cxx_target_objs="aarch64-c.o"
extra_objs="aarch64-builtins.o aarch-common.o cortex-a57-fma-steering.o"
@@ -455,7 +457,7 @@ powerpc*-*-*spe*)
powerpc*-*-*)
cpu_type=rs6000
extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
- extra_headers="${extra_headers} bmi2intrin.h bmiintrin.h x86intrin.h"
+ extra_headers="${extra_headers} ../bmi2intrin.h ../bmiintrin.h ../x86intrin.h"
extra_headers="${extra_headers} ppu_intrinsics.h spu2vmx.h vec_types.h si2vmx.h"
extra_headers="${extra_headers} paired.h"
case x$with_cpu in
new file mode 100644
@@ -0,0 +1,99 @@
+/* Copyright (C) 2012-2017 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _X86INTRIN_H_INCLUDED
+# error "Never use <adxintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _ADXINTRIN_H_INCLUDED
+#define _ADXINTRIN_H_INCLUDED
+
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_subborrow_u32 (unsigned char __CF, unsigned int __X,
+ unsigned int __Y, unsigned int *__P)
+{
+ unsigned long long __res = (unsigned long long) __Y - __X - __CF;
+ *__P = (unsigned int) __res;
+ __res = (unsigned char) (__res >> 32);
+ if (__res > 0)
+ return 1;
+ else
+ return 0;
+}
+
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_addcarry_u32 (unsigned char __CF, unsigned int __X,
+ unsigned int __Y, unsigned int *__P)
+{
+ unsigned long long __res = (unsigned long long) __X + __Y + __CF;
+ *__P = (unsigned int) __res;
+ return (unsigned char) (__res >> 32);
+}
+
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_addcarryx_u32 (unsigned char __CF, unsigned int __X,
+ unsigned int __Y, unsigned int *__P)
+{
+ unsigned long long __res = (unsigned long long) __X + __Y + __CF;
+ *__P = (unsigned int) __res;
+ return (unsigned char) (__res >> 32);
+}
+
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_subborrow_u64 (unsigned char __CF, unsigned long long __X,
+ unsigned long long __Y, unsigned long long *__P)
+{
+ unsigned __int128 __res = (unsigned __int128) __X - __Y - __CF;
+ *__P = (unsigned long long) __res;
+ __res = (unsigned char) (__res >> 64);
+ if (__res > 0)
+ return 1;
+ else
+ return 0;
+}
+
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_addcarry_u64 (unsigned char __CF, unsigned long long __X,
+ unsigned long long __Y, unsigned long long *__P)
+{
+ unsigned __int128 __res = (unsigned __int128) __X + __Y + __CF;
+ *__P = (unsigned long long) __res;
+ return (unsigned char) (__res >> 64);
+}
+
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_addcarryx_u64 (unsigned char __CF, unsigned long long __X,
+ unsigned long long __Y, unsigned long long *__P)
+{
+ unsigned __int128 __res = (unsigned __int128) __X + __Y + __CF;
+ *__P = (unsigned long long) __res;
+ return (unsigned char) (__res >> 64);
+}
+
+#endif /* _ADXINTRIN_H_INCLUDED */
new file mode 100644
@@ -0,0 +1,165 @@
+/* Copyright (C) 2011-2017 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This header is distributed to simplify porting x86_64 code that
+ makes explicit use of Intel intrinsics to powerpc64le.
+ It is the user's responsibility to determine if the results are
+ acceptable and make additional changes as necessary.
+ Note that much code that uses Intel intrinsics can be rewritten in
+ standard C or GNU C extensions, which are more portable and better
+ optimized across multiple targets. */
+
+#if !defined _X86INTRIN_H_INCLUDED
+# error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _BMI2INTRIN_H_INCLUDED
+#define _BMI2INTRIN_H_INCLUDED
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bzhi_u32 (unsigned int __X, unsigned int __Y)
+{
+ return ((__X << (32 - __Y)) >> (32 - __Y));
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
+{
+ unsigned long long __res = (unsigned long long) __X * __Y;
+ *__P = (unsigned int) (__res >> 32);
+ return (unsigned int) __res;
+}
+
+#ifdef __LP64__
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bzhi_u64 (unsigned long long __X, unsigned long long __Y)
+{
+ return ((__X << (64 - __Y)) >> (64 - __Y));
+}
+
+/* __int128 requires base 64-bit. */
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mulx_u64 (unsigned long long __X, unsigned long long __Y,
+ unsigned long long *__P)
+{
+ unsigned __int128 __res = (unsigned __int128) __X * __Y;
+ *__P = (unsigned long long) (__res >> 64);
+ return (unsigned long long) __res;
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pdep_u64 (unsigned long long __X, unsigned long long __M)
+{
+ unsigned long result = 0x0UL;
+ const unsigned long mask = 0x8000000000000000UL;
+ unsigned long m = __M;
+ unsigned long c, t;
+ unsigned long p;
+
+ /* The pop-count of the mask gives the number of the bits from
+ source to process. This is also needed to shift bits from the
+ source into the correct position for the result. */
+ p = 64 - __builtin_popcountl (__M);
+
+ /* The loop is for the number of '1' bits in the mask and clearing
+ each mask bit as it is processed. */
+ while (m != 0)
+ {
+ c = __builtin_clzl (m);
+ t = __X << (p - c);
+ m ^= (mask >> c);
+ result |= (t & (mask >> c));
+ p++;
+ }
+ return (result);
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pext_u64 (unsigned long long __X, unsigned long long __M)
+{
+ unsigned long p = 0x4040404040404040UL; // initial bit permute control
+ const unsigned long mask = 0x8000000000000000UL;
+ unsigned long m = __M;
+ unsigned long c;
+ unsigned long result;
+
+#if defined (__PPC64__) && defined (_ARCH_PWR7)
+ /* if the mask is constant and selects 8 bits or less we can use
+ the Power8 Bit permute instruction. */
+ if (__builtin_constant_p (__M) && (__builtin_popcountl (__M) <= 8))
+ {
+ /* Also if the pext mask is constant, then the popcount is
+ constant, we can evaluate the following loop at compile
+ time and use a constant bit permute vector. */
+ for (long i = 0; i < __builtin_popcountl (__M); i++)
+ {
+ c = __builtin_clzl (m);
+ p = (p << 8) | c;
+ m ^= (mask >> c);
+ }
+ result = __builtin_bpermd (p, __X);
+ }
+#endif
+ p = 64 - __builtin_popcountl (__M);
+ result = 0;
+ /* We could a use a for loop here, but that combined with
+ -funroll-loops can expand to a lot of code. The while
+ loop avoids unrolling and the compiler commons the xor
+ from clearing the mask bit with the (m != 0) test. The
+ result is a more compact loop setup and body. */
+ while (m != 0)
+ {
+ unsigned long t;
+ c = __builtin_clzl (m);
+ t = (__X & (mask >> c)) >> (p - c);
+ m ^= (mask >> c);
+ result |= (t);
+ p++;
+ }
+ return (result);
+}
+
+/* these 32-bit implementations depend on 64-bit pdep/pext
+ which depend on _ARCH_PWR7. */
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pdep_u32 (unsigned int __X, unsigned int __Y)
+{
+ return _pdep_u64 (__X, __Y);
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pext_u32 (unsigned int __X, unsigned int __Y)
+{
+ return _pext_u64 (__X, __Y);
+}
+#endif /* __LP64__ */
+
+#endif /* _BMI2INTRIN_H_INCLUDED */
new file mode 100644
@@ -0,0 +1,187 @@
+/* Copyright (C) 2010-2017 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This header is distributed to simplify porting x86_64 code that
+ makes explicit use of Intel intrinsics to AArch64 & powerpc64le.
+ It is the user's responsibility to determine if the results are
+ acceptable and make additional changes as necessary.
+ Note that much code that uses Intel intrinsics can be rewritten in
+ standard C or GNU C extensions, which are more portable and better
+ optimized across multiple targets. */
+
+#if !defined _X86INTRIN_H_INCLUDED
+# error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _BMIINTRIN_H_INCLUDED
+#define _BMIINTRIN_H_INCLUDED
+
+extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__tzcnt_u16 (unsigned short __X)
+{
+ return __builtin_ctz (__X);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__andn_u32 (unsigned int __X, unsigned int __Y)
+{
+ return (~__X & __Y);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bextr_u32 (unsigned int __X, unsigned int __P, unsigned int __L)
+{
+ return ((__X << (32 - (__L + __P))) >> (32 - __L));
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bextr_u32 (unsigned int __X, unsigned int __Y)
+{
+ unsigned int __P, __L;
+ __P = __Y & 0xFF;
+ __L = (__Y >> 8) & 0xFF;
+ return (_bextr_u32 (__X, __P, __L));
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsi_u32 (unsigned int __X)
+{
+ return (__X & -__X);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsi_u32 (unsigned int __X)
+{
+ return __blsi_u32 (__X);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsmsk_u32 (unsigned int __X)
+{
+ return (__X ^ (__X - 1));
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsmsk_u32 (unsigned int __X)
+{
+ return __blsmsk_u32 (__X);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsr_u32 (unsigned int __X)
+{
+ return (__X & (__X - 1));
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsr_u32 (unsigned int __X)
+{
+ return __blsr_u32 (__X);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__tzcnt_u32 (unsigned int __X)
+{
+ return __builtin_ctz (__X);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_tzcnt_u32 (unsigned int __X)
+{
+ return __builtin_ctz (__X);
+}
+
+/* use the 64-bit shift, rotate, and count leading zeros instructions
+ for long long. */
+#ifdef __LP64__
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__andn_u64 (unsigned long long __X, unsigned long long __Y)
+{
+ return (~__X & __Y);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bextr_u64 (unsigned long long __X, unsigned int __P, unsigned int __L)
+{
+ return ((__X << (64 - (__L + __P))) >> (64 - __L));
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bextr_u64 (unsigned long long __X, unsigned long long __Y)
+{
+ unsigned int __P, __L;
+ __P = __Y & 0xFF;
+ __L = (__Y & 0xFF00) >> 8;
+ return (_bextr_u64 (__X, __P, __L));
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsi_u64 (unsigned long long __X)
+{
+ return __X & -__X;
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsi_u64 (unsigned long long __X)
+{
+ return __blsi_u64 (__X);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsmsk_u64 (unsigned long long __X)
+{
+ return (__X ^ (__X - 1));
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsmsk_u64 (unsigned long long __X)
+{
+ return __blsmsk_u64 (__X);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsr_u64 (unsigned long long __X)
+{
+ return (__X & (__X - 1));
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsr_u64 (unsigned long long __X)
+{
+ return __blsr_u64 (__X);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__tzcnt_u64 (unsigned long long __X)
+{
+ return __builtin_ctzll (__X);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_tzcnt_u64 (unsigned long long __X)
+{
+ return __builtin_ctzll (__X);
+}
+#endif /* __LP64__ */
+
+#endif /* _BMIINTRIN_H_INCLUDED */
deleted file mode 100644
@@ -1,169 +0,0 @@
-/* Copyright (C) 2011-2017 Free Software Foundation, Inc.
-
- This file is part of GCC.
-
- GCC is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- GCC is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-/* This header is distributed to simplify porting x86_64 code that
- makes explicit use of Intel intrinsics to powerpc64le.
- It is the user's responsibility to determine if the results are
- acceptable and make additional changes as necessary.
- Note that much code that uses Intel intrinsics can be rewritten in
- standard C or GNU C extensions, which are more portable and better
- optimized across multiple targets. */
-
-#if !defined _X86INTRIN_H_INCLUDED
-# error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead."
-#endif
-
-#ifndef _BMI2INTRIN_H_INCLUDED
-#define _BMI2INTRIN_H_INCLUDED
-
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_bzhi_u32 (unsigned int __X, unsigned int __Y)
-{
- return ((__X << (32 - __Y)) >> (32 - __Y));
-}
-
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
-{
- unsigned long long __res = (unsigned long long) __X * __Y;
- *__P = (unsigned int) (__res >> 32);
- return (unsigned int) __res;
-}
-
-#ifdef __PPC64__
-extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_bzhi_u64 (unsigned long long __X, unsigned long long __Y)
-{
- return ((__X << (64 - __Y)) >> (64 - __Y));
-}
-
-/* __int128 requires base 64-bit. */
-extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mulx_u64 (unsigned long long __X, unsigned long long __Y,
- unsigned long long *__P)
-{
- unsigned __int128 __res = (unsigned __int128) __X * __Y;
- *__P = (unsigned long long) (__res >> 64);
- return (unsigned long long) __res;
-}
-
-#ifdef _ARCH_PWR7
-/* popcount and bpermd require power7 minimum. */
-extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_pdep_u64 (unsigned long long __X, unsigned long long __M)
-{
- unsigned long result = 0x0UL;
- const unsigned long mask = 0x8000000000000000UL;
- unsigned long m = __M;
- unsigned long c, t;
- unsigned long p;
-
- /* The pop-count of the mask gives the number of the bits from
- source to process. This is also needed to shift bits from the
- source into the correct position for the result. */
- p = 64 - __builtin_popcountl (__M);
-
- /* The loop is for the number of '1' bits in the mask and clearing
- each mask bit as it is processed. */
- while (m != 0)
- {
- c = __builtin_clzl (m);
- t = __X << (p - c);
- m ^= (mask >> c);
- result |= (t & (mask >> c));
- p++;
- }
- return (result);
-}
-
-extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_pext_u64 (unsigned long long __X, unsigned long long __M)
-{
- unsigned long p = 0x4040404040404040UL; // initial bit permute control
- const unsigned long mask = 0x8000000000000000UL;
- unsigned long m = __M;
- unsigned long c;
- unsigned long result;
-
- /* if the mask is constant and selects 8 bits or less we can use
- the Power8 Bit permute instruction. */
- if (__builtin_constant_p (__M) && (__builtin_popcountl (__M) <= 8))
- {
- /* Also if the pext mask is constant, then the popcount is
- constant, we can evaluate the following loop at compile
- time and use a constant bit permute vector. */
- for (long i = 0; i < __builtin_popcountl (__M); i++)
- {
- c = __builtin_clzl (m);
- p = (p << 8) | c;
- m ^= (mask >> c);
- }
- result = __builtin_bpermd (p, __X);
- }
- else
- {
- p = 64 - __builtin_popcountl (__M);
- result = 0;
- /* We could a use a for loop here, but that combined with
- -funroll-loops can expand to a lot of code. The while
- loop avoids unrolling and the compiler commons the xor
- from clearing the mask bit with the (m != 0) test. The
- result is a more compact loop setup and body. */
- while (m != 0)
- {
- unsigned long t;
- c = __builtin_clzl (m);
- t = (__X & (mask >> c)) >> (p - c);
- m ^= (mask >> c);
- result |= (t);
- p++;
- }
- }
- return (result);
-}
-
-/* these 32-bit implementations depend on 64-bit pdep/pext
- which depend on _ARCH_PWR7. */
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_pdep_u32 (unsigned int __X, unsigned int __Y)
-{
- return _pdep_u64 (__X, __Y);
-}
-
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_pext_u32 (unsigned int __X, unsigned int __Y)
-{
- return _pext_u64 (__X, __Y);
-}
-#endif /* _ARCH_PWR7 */
-#endif /* __PPC64__ */
-
-#endif /* _BMI2INTRIN_H_INCLUDED */
deleted file mode 100644
@@ -1,187 +0,0 @@
-/* Copyright (C) 2010-2017 Free Software Foundation, Inc.
-
- This file is part of GCC.
-
- GCC is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- GCC is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-/* This header is distributed to simplify porting x86_64 code that
- makes explicit use of Intel intrinsics to powerpc64le.
- It is the user's responsibility to determine if the results are
- acceptable and make additional changes as necessary.
- Note that much code that uses Intel intrinsics can be rewritten in
- standard C or GNU C extensions, which are more portable and better
- optimized across multiple targets. */
-
-#if !defined _X86INTRIN_H_INCLUDED
-# error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
-#endif
-
-#ifndef _BMIINTRIN_H_INCLUDED
-#define _BMIINTRIN_H_INCLUDED
-
-extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__tzcnt_u16 (unsigned short __X)
-{
- return __builtin_ctz (__X);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__andn_u32 (unsigned int __X, unsigned int __Y)
-{
- return (~__X & __Y);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_bextr_u32 (unsigned int __X, unsigned int __P, unsigned int __L)
-{
- return ((__X << (32 - (__L + __P))) >> (32 - __L));
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__bextr_u32 (unsigned int __X, unsigned int __Y)
-{
- unsigned int __P, __L;
- __P = __Y & 0xFF;
- __L = (__Y >> 8) & 0xFF;
- return (_bextr_u32 (__X, __P, __L));
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blsi_u32 (unsigned int __X)
-{
- return (__X & -__X);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_blsi_u32 (unsigned int __X)
-{
- return __blsi_u32 (__X);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blsmsk_u32 (unsigned int __X)
-{
- return (__X ^ (__X - 1));
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_blsmsk_u32 (unsigned int __X)
-{
- return __blsmsk_u32 (__X);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blsr_u32 (unsigned int __X)
-{
- return (__X & (__X - 1));
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_blsr_u32 (unsigned int __X)
-{
- return __blsr_u32 (__X);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__tzcnt_u32 (unsigned int __X)
-{
- return __builtin_ctz (__X);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_tzcnt_u32 (unsigned int __X)
-{
- return __builtin_ctz (__X);
-}
-
-/* use the 64-bit shift, rotate, and count leading zeros instructions
- for long long. */
-#ifdef __PPC64__
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__andn_u64 (unsigned long long __X, unsigned long long __Y)
-{
- return (~__X & __Y);
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_bextr_u64 (unsigned long long __X, unsigned int __P, unsigned int __L)
-{
- return ((__X << (64 - (__L + __P))) >> (64 - __L));
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__bextr_u64 (unsigned long long __X, unsigned long long __Y)
-{
- unsigned int __P, __L;
- __P = __Y & 0xFF;
- __L = (__Y & 0xFF00) >> 8;
- return (_bextr_u64 (__X, __P, __L));
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blsi_u64 (unsigned long long __X)
-{
- return __X & -__X;
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_blsi_u64 (unsigned long long __X)
-{
- return __blsi_u64 (__X);
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blsmsk_u64 (unsigned long long __X)
-{
- return (__X ^ (__X - 1));
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_blsmsk_u64 (unsigned long long __X)
-{
- return __blsmsk_u64 (__X);
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blsr_u64 (unsigned long long __X)
-{
- return (__X & (__X - 1));
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_blsr_u64 (unsigned long long __X)
-{
- return __blsr_u64 (__X);
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__tzcnt_u64 (unsigned long long __X)
-{
- return __builtin_ctzll (__X);
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_tzcnt_u64 (unsigned long long __X)
-{
- return __builtin_ctzll (__X);
-}
-#endif /* __PPC64__ */
-
-#endif /* _BMIINTRIN_H_INCLUDED */
deleted file mode 100644
@@ -1,43 +0,0 @@
-/* Copyright (C) 2008-2017 Free Software Foundation, Inc.
-
- This file is part of GCC.
-
- GCC is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- GCC is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-#ifndef NO_WARN_X86_INTRINSICS
-/* This header is distributed to simplify porting x86_64 code that
- makes explicit use of Intel intrinsics to powerpc64le.
- It is the user's responsibility to determine if the results are
- acceptable and make additional changes as necessary.
- Note that much code that uses Intel intrinsics can be rewritten in
- standard C or GNU C extensions, which are more portable and better
- optimized across multiple targets. */
-#warning "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this warning."
-#endif
-
-#ifndef _X86INTRIN_H_INCLUDED
-#define _X86INTRIN_H_INCLUDED
-
-#include <bmiintrin.h>
-
-#include <bmi2intrin.h>
-
-
-#endif /* _X86INTRIN_H_INCLUDED */
new file mode 100644
@@ -0,0 +1,46 @@
+/* Copyright (C) 2008-2017 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef NO_WARN_X86_INTRINSICS
+/* This header is distributed to simplify porting x86_64 code that
+ makes explicit use of Intel intrinsics to Aarch64.
+ It is the user's responsibility to determine if the results are
+ acceptable and make additional changes as necessary.
+ Note that much code that uses Intel intrinsics can be rewritten in
+ standard C or GNU C extensions, which are more portable and better
+ optimized across multiple targets. */
+#warning "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this warning."
+#endif
+
+#ifndef _X86INTRIN_H_INCLUDED
+#define _X86INTRIN_H_INCLUDED
+
+#include <bmiintrin.h>
+
+#include <bmi2intrin.h>
+
+#ifdef __aarch64__
+#include <adxintrin.h>
+#endif
+
+#endif /* _X86INTRIN_H_INCLUDED */
new file mode 100644
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "add\tx\[0-9\], x\[0-9\], x\[0-9\], uxt*" 4} } */
+/* { dg-final { scan-assembler-times "sub\tx\[0-9\], x\[0-9\], x\[0-9\], uxt*" 2} } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "adx-check.h"
+
+volatile unsigned char c;
+volatile unsigned int x, y;
+unsigned int *sum;
+
+void extern
+adx_test (void)
+{
+ c = _addcarryx_u32 (c, x, y, sum);
+ c = _addcarry_u32 (c, x, y, sum);
+ c = _subborrow_u32 (c, x, y, sum);
+}
new file mode 100644
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "adx-check.h"
+
+static void
+adx_test (void)
+{
+ volatile unsigned char c;
+ unsigned int x;
+ volatile unsigned int y, sum_ref;
+
+ c = 0;
+ x = y = 0xFFFFFFFF;
+ sum_ref = 0xFFFFFFFE;
+
+ /* X = 0xFFFFFFFF, Y = 0xFFFFFFFF, C = 0. */
+ c = _addcarryx_u32 (c, x, y, &x);
+ /* X = 0xFFFFFFFE, Y = 0xFFFFFFFF, C = 1. */
+ c = _addcarryx_u32 (c, x, y, &x);
+ /* X = 0xFFFFFFFE, Y = 0xFFFFFFFF, C = 1. */
+
+ if (x != sum_ref)
+ abort ();
+
+ c = 0;
+ x = y = 0xFFFFFFFF;
+ sum_ref = 0xFFFFFFFE;
+
+ /* X = 0xFFFFFFFF, Y = 0xFFFFFFFF, C = 0. */
+ c = _addcarry_u32 (c, x, y, &x);
+ /* X = 0xFFFFFFFE, Y = 0xFFFFFFFF, C = 1. */
+ c = _addcarry_u32 (c, x, y, &x);
+ /* X = 0xFFFFFFFE, Y = 0xFFFFFFFF, C = 1. */
+
+ if (x != sum_ref)
+ abort ();
+
+ c = 0;
+ x = 1;
+ y = 0;
+ sum_ref = 0x0;
+
+ /* X = 0x00000001, Y = 0x00000000, C = 0. */
+ c = _subborrow_u32 (c, x, y, &x);
+ /* X = 0xFFFFFFFF, Y = 0x00000000, C = 1. */
+ c = _subborrow_u32 (c, x, y, &x);
+ /* X = 0xFFFFFFFF, Y = 0xFFFFFFFF, C = 1. */
+
+ if (x != sum_ref)
+ abort ();
+}
new file mode 100644
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "add\tx\[0-9\], x\[0-9\], x\[0-9\], uxt*" 4} } */
+/* { dg-final { scan-assembler-times "sub\tx\[0-9\], x\[0-9\], x\[0-9\], uxt*" 2} } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "adx-check.h"
+
+volatile unsigned char c;
+volatile unsigned int x, y;
+unsigned int *sum;
+
+void extern
+adx_test (void)
+{
+ c = _addcarryx_u32 (c, x, y, sum);
+ c = _addcarry_u32 (c, x, y, sum);
+ c = _subborrow_u32 (c, x, y, sum);
+}
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "adds\tx\[0-9\], x\[0-9\], x\[0-9\]" 4} } */
+/* { dg-final { scan-assembler-times "subs\tx\[0-9\], x\[0-9\], x\[0-9\]" 1} } */
+/* { dg-final { scan-assembler-times "sbc\tx\[0-9\], x\[0-9\], xzr" 1} } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "adx-check.h"
+
+volatile unsigned char c;
+volatile unsigned long long x, y;
+unsigned long long *sum;
+
+void extern
+adx_test (void)
+{
+ c = _addcarryx_u64 (c, x, y, sum);
+ c = _addcarry_u64 (c, x, y, sum);
+ c = _subborrow_u64 (c, x, y, sum);
+}
new file mode 100644
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "adx-check.h"
+
+static void
+adx_test (void)
+{
+ volatile unsigned char c;
+ unsigned long long x;
+ volatile unsigned long long y, sum_ref;
+
+ c = 0;
+ x = y = 0xFFFFFFFFFFFFFFFFLL;
+ sum_ref = 0xFFFFFFFFFFFFFFFELL;
+
+ /* X = 0xFFFFFFFFFFFFFFFF, Y = 0xFFFFFFFFFFFFFFFF, C = 0. */
+ c = _addcarryx_u64 (c, x, y, &x);
+ /* X = 0xFFFFFFFFFFFFFFFE, Y = 0xFFFFFFFFFFFFFFFF, C = 1. */
+ c = _addcarryx_u64 (c, x, y, &x);
+ /* X = 0xFFFFFFFFFFFFFFFE, Y = 0xFFFFFFFFFFFFFFFF, C = 1. */
+
+ if (x != sum_ref)
+ abort ();
+
+ c = 0;
+ x = y = 0xFFFFFFFFFFFFFFFFLL;
+ sum_ref = 0xFFFFFFFFFFFFFFFELL;
+
+ /* X = 0xFFFFFFFFFFFFFFFF, Y = 0xFFFFFFFFFFFFFFFF, C = 0. */
+ c = _addcarry_u64 (c, x, y, &x);
+ /* X = 0xFFFFFFFFFFFFFFFE, Y = 0xFFFFFFFFFFFFFFFF, C = 1. */
+ c = _addcarry_u64 (c, x, y, &x);
+ /* X = 0xFFFFFFFFFFFFFFFE, Y = 0xFFFFFFFFFFFFFFFF, C = 1. */
+
+ if (x != sum_ref)
+ abort ();
+
+ c = 0;
+ x = 1LL;
+ y = 0LL;
+ sum_ref = 0x0LL;
+
+ /* X = 0x0000000000000001, Y = 0x0000000000000000, C = 0. */
+ c = _subborrow_u64 (c, x, y, &x);
+ /* X = 0xFFFFFFFFFFFFFFFF, Y = 0x0000000000000000, C = 1. */
+ c = _subborrow_u64 (c, x, y, &x);
+ /* X = 0x0000000000000000, Y = 0x0000000000000000, C = 1. */
+
+ if (x != sum_ref)
+ abort ();
+}
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "adds\tx\[0-9\], x\[0-9\], x\[0-9\]" 4} } */
+/* { dg-final { scan-assembler-times "subs\tx\[0-9\], x\[0-9\], x\[0-9\]" 1} } */
+/* { dg-final { scan-assembler-times "sbc\tx\[0-9\], x\[0-9\], xzr" 1} } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "adx-check.h"
+
+volatile unsigned char c;
+volatile unsigned long long x, y;
+unsigned long long *sum;
+
+void extern
+adx_test (void)
+{
+ c = _addcarryx_u64 (c, x, y, sum);
+ c = _addcarry_u64 (c, x, y, sum);
+ c = _subborrow_u64 (c, x, y, sum);
+}
new file mode 100644
@@ -0,0 +1,19 @@
+#include <stdlib.h>
+
+static void adx_test (void);
+
+static void __attribute__ ((noinline)) do_test (void)
+{
+ adx_test ();
+}
+
+int
+main ()
+{
+ do_test ();
+#ifdef DEBUG
+ printf ("PASSED\n");
+#endif
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi-check.h"
+
+long long calc_andn_u64 (long long src1,
+ long long src2,
+ long long dummy)
+{
+ return (~src1 + dummy) & (src2);
+}
+
+static void
+bmi_test()
+{
+ unsigned i;
+
+ long long src = 0xfacec0ffeefacec0;
+ long long res, res_ref;
+
+ for (i=0; i<5; ++i) {
+ src = (i + src) << i;
+
+ res_ref = calc_andn_u64 (src, src+i, 0);
+ res = __andn_u64 (src, src+i);
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi-check.h"
+
+long long calc_andn_u32 (int src1, int src2, int dummy)
+{
+ return (~src1+dummy) & (src2);
+}
+
+static void
+bmi_test()
+{
+ unsigned i;
+
+ int src = 0xfacec0ff;
+ int res, res_ref;
+
+ for (i=0; i<5; ++i) {
+ src = (i + src) << i;
+
+ res_ref = calc_andn_u32 (src, src+i, 0);
+ res = __andn_u32 (src, src+i);
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi-check.h"
+
+long long calc_bextr_u64 (unsigned long long src1,
+ unsigned long long src2)
+{
+ long long res = 0;
+ unsigned char start = (src2 & 0xff);
+ unsigned char len = (int) ((src2 >> 8) & 0xff);
+ if (start < 64) {
+ unsigned i;
+ unsigned last = (start+len) < 64 ? start+len : 64;
+
+ src1 >>= start;
+ for (i=start; i<last; ++i) {
+ res |= (src1 & 1) << (i-start);
+ src1 >>= 1;
+ }
+ }
+
+ return res;
+}
+
+static void
+bmi_test ()
+{
+ unsigned i;
+ unsigned char start, len;
+ unsigned long long src1 = 0xfacec0ffeefacec0;
+ unsigned long long res, res_ref, src2;
+
+ for (i=0; i<5; ++i) {
+ start = (i * 1983) % 64;
+ len = (i + (i * 1983)) % 64;
+
+ src1 = src1 * 3;
+ src2 = start | (((unsigned long long)len) << 8);
+
+ res_ref = calc_bextr_u64 (src1, src2);
+ res = __bextr_u64 (src1, src2);
+
+ if (res != res_ref)
+ abort ();
+ }
+}
new file mode 100644
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-inline" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi-check.h"
+
+unsigned calc_bextr_u32 (unsigned src1, unsigned src2)
+{
+ unsigned res = 0;
+ unsigned char start = (src2 & 0xff);
+ unsigned char len = (int) ((src2 >> 8) & 0xff);
+ if (start < 32) {
+ unsigned i;
+ unsigned last = (start+len) < 32 ? start+len : 32;
+
+ src1 >>= start;
+ for (i=start; i<last; ++i) {
+ res |= (src1 & 1) << (i-start);
+ src1 >>= 1;
+ }
+ }
+
+ return res;
+}
+
+static void
+bmi_test ()
+{
+ unsigned i;
+ unsigned char start, len;
+ unsigned src1 = 0xfacec0ff;
+ unsigned res, res_ref, src2;
+
+ for (i=0; i<5; ++i) {
+ start = (i * 1983) % 32;
+ len = (i + (i * 1983)) % 32;
+
+ src1 = src1 * 3;
+ src2 = start | (((unsigned)len) << 8);
+
+ res_ref = calc_bextr_u32 (src1, src2);
+ res = __bextr_u32 (src1, src2);
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-inline" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi-check.h"
+
+unsigned calc_bextr_u32 (unsigned src1, unsigned src2)
+{
+ unsigned res = 0;
+ unsigned char start = (src2 & 0xff);
+ unsigned char len = (int) ((src2 >> 8) & 0xff);
+ if (start < 32) {
+ unsigned i;
+ unsigned last = (start+len) < 32 ? start+len : 32;
+
+ src1 >>= start;
+ for (i=start; i<last; ++i) {
+ res |= (src1 & 1) << (i-start);
+ src1 >>= 1;
+ }
+ }
+
+ return res;
+}
+
+static void
+bmi_test ()
+{
+ unsigned i;
+ unsigned char start, len;
+ unsigned src1 = 0xfacec0ff;
+ unsigned res, res_ref, src2;
+
+ for (i=0; i<5; ++i) {
+ start = i * 4;
+ len = i * 4;
+
+ src1 = src1 * 3;
+ src2 = (start & 0xff) | ((len & 0xff) << 8);
+
+ res_ref = calc_bextr_u32 (src1, src2);
+ res = _bextr_u32 (src1, start, len);
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-inline" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi-check.h"
+
+long long calc_bextr_u64 (unsigned long long src1,
+ unsigned long long src2)
+{
+ long long res = 0;
+ unsigned char start = (src2 & 0xff);
+ unsigned char len = (int) ((src2 >> 8) & 0xff);
+ if (start < 64) {
+ unsigned i;
+ unsigned last = (start+len) < 64 ? start+len : 64;
+
+ src1 >>= start;
+ for (i=start; i<last; ++i) {
+ res |= (src1 & 1) << (i-start);
+ src1 >>= 1;
+ }
+ }
+
+ return res;
+}
+
+static void
+bmi_test ()
+{
+ unsigned i;
+ unsigned char start, len;
+ unsigned long long src1 = 0xfacec0ffeefacec0;
+ unsigned long long res, res_ref, src2;
+
+ for (i=0; i<5; ++i) {
+ start = i * 4;
+ len = i * 3;
+ src1 = src1 * 3;
+ src2 = (start & 0xff) | ((len & 0xff) << 8);
+
+ res_ref = calc_bextr_u64 (src1, src2);
+ res = _bextr_u64 (src1, start, len);
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-inline" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi-check.h"
+
+/* To fool the compiler, so it does not generate blsi here. */
+long long calc_blsi_u64 (long long src1, long long src2)
+{
+ return (-src1) & (src2);
+}
+
+static void
+bmi_test()
+{
+ unsigned i;
+
+ long long src = 0xfacec0ffeefacec0;
+ long long res, res_ref;
+
+ for (i=0; i<5; ++i) {
+ src = (i + src) << i;
+
+ res_ref = calc_blsi_u64 (src, src);
+ res = __blsi_u64 (src);
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-inline" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi-check.h"
+
+/* To fool the compiler, so it does not generate blsi here. */
+int calc_blsi_u32 (int src1, int src2)
+{
+ return (-src1) & (src2);
+}
+
+static void
+bmi_test()
+{
+ unsigned i;
+ int src = 0xfacec0ff;
+ int res, res_ref;
+
+ for (i=0; i<5; ++i) {
+ src = (i + src) << i;
+
+ res_ref = calc_blsi_u32 (src, src);
+ res = __blsi_u32 (src);
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-inline" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi-check.h"
+
+/* Trick compiler in order not to generate target insn here. */
+long long calc_blsmsk_u64 (long long src1, long long src2)
+{
+ return (src1-1) ^ (src2);
+}
+
+static void
+bmi_test ()
+{
+ unsigned i;
+ long long src = 0xfacec0ffeefacec0;
+ long long res, res_ref;
+
+ for (i=0; i<5; ++i) {
+ src = (i + src) << i;
+
+ res_ref = calc_blsmsk_u64 (src, src);
+ res = __blsmsk_u64 (src);
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-inline" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi-check.h"
+
+/* Trick compiler in order not to generate target insn here. */
+int calc_blsmsk_u32 (int src1, int src2)
+{
+ return (src1-1) ^ (src2);
+}
+
+static void
+bmi_test ()
+{
+ unsigned i;
+ int src = 0xfacec0ff;
+ int res, res_ref;
+
+ for (i=0; i<5; ++i) {
+ src = (i + src) << i;
+
+ res_ref = calc_blsmsk_u32 (src, src);
+ res = __blsmsk_u32 (src);
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-inline" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi-check.h"
+
+long long calc_blsr_u64 (long long src1, long long src2)
+{
+ return (src1-1) & (src2);
+}
+
+static void
+bmi_test()
+{
+ unsigned i;
+ long long src = 0xfacec0ffeefacec0;
+ long long res, res_ref;
+
+ for (i=0; i<5; ++i) {
+ src = (i + src) << i;
+
+ res_ref = calc_blsr_u64 (src, src);
+ res = __blsr_u64 (src);
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-inline" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi-check.h"
+
+int calc_blsr_u32 (int src1, int src2)
+{
+ return (src1-1) & (src2);
+}
+
+static void
+bmi_test ()
+{
+ unsigned i;
+ int src = 0xfacec0ff;
+ int res, res_ref;
+
+ for (i=0; i<5; ++i) {
+ src = (i + src) << i;
+
+ res_ref = calc_blsr_u32 (src, src);
+ res = __blsr_u32 (src);
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,20 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+static void bmi_test (void);
+
+static void __attribute__ ((noinline)) do_test (void)
+{
+ bmi_test ();
+}
+
+int
+main ()
+{
+ do_test ();
+#ifdef DEBUG
+ printf ("PASSED\n");
+#endif
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,37 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-inline" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi-check.h"
+
+long long calc_tzcnt_u64 (long long src)
+{
+ int i;
+ int res = 0;
+
+ while ( (res<64) && ((src&1) == 0)) {
+ ++res;
+ src >>= 1;
+ }
+
+ return res;
+}
+
+static void
+bmi_test ()
+{
+ unsigned i;
+ long long src = 0xfacec0ffeefacec0;
+ long long res, res_ref;
+
+ for (i=0; i<5; ++i) {
+ src = (i + src) << i;
+
+ res_ref = calc_tzcnt_u64 (src);
+ res = __tzcnt_u64 (src);
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-inline" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi-check.h"
+
+int calc_tzcnt_u32 (int src)
+{
+ int i;
+ int res = 0;
+
+ while ( (res<32) && ((src&1) == 0)) {
+ ++res;
+ src >>= 1;
+ }
+ return res;
+}
+
+static void
+bmi_test ()
+{
+ unsigned i;
+ int src = 0xfacec0ff;
+ int res, res_ref;
+
+ for (i=0; i<5; ++i) {
+ src = i + (src << i);
+
+ res_ref = calc_tzcnt_u32 (src);
+ res = __tzcnt_u32 (src);
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned
+calc_bzhi_u32 (unsigned a, int l)
+{
+ unsigned res = a;
+ int i;
+ for (i = 0; i < 32 - l; ++i)
+ res &= ~(1 << (31 - i));
+
+ return res;
+}
+
+static void
+bmi2_test ()
+{
+ unsigned i;
+ unsigned src = 0xce7ace0f;
+ unsigned res, res_ref;
+
+ for (i = 0; i < 5; ++i) {
+ src = src * (i + 1);
+
+ res_ref = calc_bzhi_u32 (src, i * 2);
+ res = _bzhi_u32 (src, i * 2);
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_bzhi_u64 (unsigned long long a, int l)
+{
+ unsigned long long res = a;
+ int i;
+ for (i = 0; i < 64 - l; ++i)
+ res &= ~(1LL << (63 - i));
+
+ return res;
+}
+
+static void
+bmi2_test ()
+{
+ unsigned i;
+ unsigned long long src = 0xce7ace0ce7ace0ff;
+ unsigned long long res, res_ref;
+
+ for (i = 0; i < 5; ++i) {
+ src = src * (i + 1);
+
+ res_ref = calc_bzhi_u64 (src, i * 2);
+ res = _bzhi_u64 (src, i * 2);
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+
+unsigned long long
+test__bzhi_u64_group (unsigned long long a)
+{
+ /* bzhi is implemented in source as shift left then shift right
+ to clear the high order bits.
+ For the case where the starting index is const, the compiler
+ should reduces this to a single Rotate Left Doubleword
+ Immediate then Clear Left (rldicl) instruction. */
+ unsigned long long res;
+ res = _bzhi_u64 (a, 8);
+ res += _bzhi_u64 (a, 16);
+ res += _bzhi_u64 (a, 24);
+ res += _bzhi_u64 (a, 32);
+ res += _bzhi_u64 (a, 40);
+ res += _bzhi_u64 (a, 48);
+ return (res);
+}
+/* the resulting assembler should have 6 X rldicl and no sld or
+ srd instructions. */
+
+/* { dg-final { scan-assembler-not "sld" } } */
+/* { dg-final { scan-assembler-not "srd" } } */
new file mode 100644
@@ -0,0 +1,21 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+static void bmi2_test (void);
+
+static void __attribute__ ((noinline)) do_test (void)
+{
+ bmi2_test ();
+}
+
+int
+main ()
+{
+ do_test ();
+
+#ifdef DEBUG
+ printf ("PASSED\n");
+#endif
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_mul_u32 (unsigned volatile a, unsigned b)
+{
+ unsigned long long res = 0;
+ int i;
+ for (i = 0; i < b; ++i)
+ res += a;
+
+ return res;
+}
+
+__attribute__((noinline))
+unsigned long long
+gen_mulx (unsigned a, unsigned b)
+{
+ unsigned long long res;
+
+ res = (unsigned long long)a * b;
+
+ return res;
+}
+
+static void
+bmi2_test ()
+{
+ unsigned i;
+ unsigned a = 0xce7ace0;
+ unsigned b = 0xfacefff;
+ unsigned long long res, res_ref;
+
+ for (i = 0; i < 5; ++i) {
+ a = a * (i + 1);
+ b = b / (i + 1);
+
+ res_ref = calc_mul_u32 (a, b);
+ res = gen_mulx (a, b);
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_mul_u32 (unsigned volatile a, unsigned b)
+{
+ unsigned long long res = 0;
+ int i;
+ for (i = 0; i < b; ++i)
+ res += a;
+
+ return res;
+}
+
+__attribute__((noinline))
+unsigned calc_mulx_u32 (unsigned x, unsigned y, unsigned *res_h)
+{
+ return (unsigned) _mulx_u32 (x, y, res_h);
+}
+
+static void
+bmi2_test ()
+{
+ unsigned i;
+ unsigned a = 0xce7ace0;
+ unsigned b = 0xfacefff;
+ unsigned res_l, res_h;
+ unsigned long long res, res_ref;
+
+ for (i = 0; i < 5; ++i) {
+ a = a * (i + 1);
+ b = b / (i + 1);
+
+ res_ref = calc_mul_u32 (a, b);
+ res_l = calc_mulx_u32 (a, b, &res_h);
+
+ res = ((unsigned long long) res_h << 32) | res_l;
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,37 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned __int128
+calc_mul_u64 (unsigned long long volatile a, unsigned long long b)
+{
+ unsigned __int128 res = 0;
+ int i;
+ for (i = 0; i < b; ++i)
+ res += (unsigned __int128) a;
+
+ return res;
+}
+
+static void
+bmi2_test ()
+{
+ unsigned i;
+ unsigned long long a = 0xce7ace0ce7ace0;
+ unsigned long long b = 0xface;
+ unsigned __int128 res, res_ref;
+
+ for (i=0; i<5; ++i) {
+ a = a * (i + 1);
+ b = b / (i + 1);
+
+ res_ref = calc_mul_u64 (a, b);
+ res = (unsigned __int128) a * b;
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned __int128
+calc_mul_u64 (unsigned long long volatile a, unsigned long long b)
+{
+ unsigned __int128 res = 0;
+ int i;
+ for (i = 0; i < b; ++i)
+ res += (unsigned __int128) a;
+
+ return res;
+}
+
+__attribute__((noinline))
+unsigned long long
+calc_mulx_u64 (unsigned long long x,
+ unsigned long long y,
+ unsigned long long *res_h)
+{
+ return _mulx_u64 (x, y, res_h);
+}
+
+
+static void
+bmi2_test ()
+{
+ unsigned i;
+ unsigned long long a = 0xce7ace0ce7ace0;
+ unsigned long long b = 0xface;
+ unsigned long long res_l, res_h;
+ unsigned __int128 res, res_ref;
+
+ for (i=0; i<5; ++i) {
+ a = a * (i + 1);
+ b = b / (i + 1);
+
+ res_ref = calc_mul_u64 (a, b);
+
+ res_l = calc_mulx_u64 (a, b, &res_h);
+
+ res = ((unsigned __int128) res_h << 64) | res_l;
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+/* { dg-require-effective-target lp64 } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned
+calc_pdep_u32 (unsigned a, int mask)
+{
+ unsigned res = 0;
+ int i, k = 0;
+
+ for (i = 0; i < 32; ++i)
+ if (mask & (1 << i)) {
+ res |= ((a & (1 << k)) >> k) << i;
+ ++k;
+ }
+
+ return res;
+}
+
+static void
+bmi2_test ()
+{
+ unsigned i;
+ unsigned src = 0xce7acc;
+ unsigned res, res_ref;
+
+ for (i = 0; i < 5; ++i) {
+ src = src * (i + 1);
+
+ res_ref = calc_pdep_u32 (src, i * 3);
+ res = _pdep_u32 (src, i * 3);
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+/* { dg-require-effective-target lp64 } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_pdep_u64 (unsigned long long a, unsigned long long mask)
+{
+ unsigned long long res = 0;
+ unsigned long long i, k = 0;
+
+ for (i = 0; i < 64; ++i)
+ if (mask & (1LL << i)) {
+ res |= ((a & (1LL << k)) >> k) << i;
+ ++k;
+ }
+ return res;
+}
+
+static
+void
+bmi2_test ()
+{
+ unsigned long long i;
+ unsigned long long src = 0xce7acce7acce7ac;
+ unsigned long long res, res_ref;
+
+ for (i = 0; i < 5; ++i) {
+ src = src * (i + 1);
+
+ res_ref = calc_pdep_u64 (src, ~(i * 3));
+ res = _pdep_u64 (src, ~(i * 3));
+
+ if (res != res_ref)
+ abort ();
+ }
+}
new file mode 100644
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+/* { dg-require-effective-target lp64 } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned
+calc_pext_u32 (unsigned a, unsigned mask)
+{
+ unsigned res = 0;
+ int i, k = 0;
+
+ for (i = 0; i < 32; ++i)
+ if (mask & (1 << i)) {
+ res |= ((a & (1 << i)) >> i) << k;
+ ++k;
+ }
+
+ return res;
+}
+
+static void
+bmi2_test ()
+{
+ unsigned i;
+ unsigned src = 0xce7acc;
+ unsigned res, res_ref;
+
+ for (i = 0; i < 5; ++i) {
+ src = src * (i + 1);
+
+ res_ref = calc_pext_u32 (src, ~(i * 3));
+ res = _pext_u32 (src, ~(i * 3));
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+/* { dg-require-effective-target lp64 } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_pext_u64 (unsigned long long a, unsigned long long mask)
+{
+ unsigned long long res = 0;
+ int i, k = 0;
+
+ for (i = 0; i < 64; ++i)
+ if (mask & (1LL << i)) {
+ res |= ((a & (1LL << i)) >> i) << k;
+ ++k;
+ }
+
+ return res;
+}
+
+static void
+bmi2_test ()
+{
+ unsigned long long i;
+ unsigned long long src = 0xce7acce7acce7ac;
+ unsigned long long res, res_ref;
+
+ for (i = 0; i < 5; ++i) {
+ src = src * (i + 1);
+
+ res_ref = calc_pext_u64 (src, ~(i * 3));
+ res = _pext_u64 (src, ~(i * 3));
+
+ if (res != res_ref)
+ abort();
+ }
+}
new file mode 100644
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-require-effective-target lp64 } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include <x86intrin.h>
+
+unsigned long long
+test__pexp_cmask_u64 (unsigned long long a[4])
+{
+ /* The _pext implmentation is nominally a popcount of the mask,
+ followed by a loop using count leading zeros to find the
+ next bit to process.
+ If the mask is a const, the popcount should be folded and
+ the constant propagation should eliminate the mask
+ generation loop and produce a single constant bpermd permute
+ control word.
+ This test verifies that the compiler is replacing the mask
+ popcount and loop with a const bperm control and generating
+ the bpermd for this case. */
+ const unsigned long mask = 0x00000000100000a4UL;
+ unsigned long res;
+ res = _pext_u64 (a[0], mask);
+ res = (res << 8) | _pext_u64 (a[1], mask);
+ res = (res << 8) | _pext_u64 (a[2], mask);
+ res = (res << 8) | _pext_u64 (a[3], mask);
+ return (res);
+}
+/* the resulting assembler should have 4 X bpermd and no popcntd or
+ cntlzd instructions. */
+
+/* { dg-final { scan-assembler-times "bpermd" 4 } } */
+/* { dg-final { scan-assembler-not "popcntd" } } */
+/* { dg-final { scan-assembler-not "cntlzd" } } */