@@ -430,20 +430,10 @@ _mm_cmpnge_pd (__m128d __A, __m128d __B)
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpord_pd (__m128d __A, __m128d __B)
{
-#if _ARCH_PWR8
__v2du c, d;
/* Compare against self will return false (0's) if NAN. */
c = (__v2du)vec_cmpeq (__A, __A);
d = (__v2du)vec_cmpeq (__B, __B);
-#else
- __v2du a, b;
- __v2du c, d;
- const __v2du double_exp_mask = {0x7ff0000000000000, 0x7ff0000000000000};
- a = (__v2du)vec_abs ((__v2df)__A);
- b = (__v2du)vec_abs ((__v2df)__B);
- c = (__v2du)vec_cmpgt (double_exp_mask, a);
- d = (__v2du)vec_cmpgt (double_exp_mask, b);
-#endif
/* A != NAN and B != NAN. */
return ((__m128d)vec_and(c, d));
}
@@ -1472,6 +1462,7 @@ _mm_mul_su32 (__m64 __A, __m64 __B)
return ((__m64)a * (__m64)b);
}
+#ifdef _ARCH_PWR8
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_epu32 (__m128i __A, __m128i __B)
{
@@ -1498,6 +1489,7 @@ _mm_mul_epu32 (__m128i __A, __m128i __B)
return (__m128i) vec_mule ((__v4su)__A, (__v4su)__B);
#endif
}
+#endif
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_epi16 (__m128i __A, int __B)
@@ -123,17 +123,21 @@ _mm_hsub_pd (__m128d __X, __m128d __Y)
vec_mergel ((__v2df) __X, (__v2df)__Y));
}
+#ifdef _ARCH_PWR8
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movehdup_ps (__m128 __X)
{
return (__m128)vec_mergeo ((__v4su)__X, (__v4su)__X);
}
+#endif
+#ifdef _ARCH_PWR8
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_moveldup_ps (__m128 __X)
{
return (__m128)vec_mergee ((__v4su)__X, (__v4su)__X);
}
+#endif
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loaddup_pd (double const *__P)
@@ -272,6 +272,7 @@ _mm_extract_ps (__m128 __X, const int __N)
return ((__v4si)__X)[__N & 3];
}
+#ifdef _ARCH_PWR8
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blend_epi16 (__m128i __A, __m128i __B, const int __imm8)
{
@@ -283,6 +284,7 @@ _mm_blend_epi16 (__m128i __A, __m128i __B, const int __imm8)
#endif
return (__m128i) vec_sel ((__v8hu) __A, (__v8hu) __B, __shortmask);
}
+#endif
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask)
@@ -343,6 +345,7 @@ _mm_blend_pd (__m128d __A, __m128d __B, const int __imm8)
return (__m128d) __r;
}
+#ifdef _ARCH_PWR8
__inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_blendv_pd (__m128d __A, __m128d __B, __m128d __mask)
@@ -351,6 +354,7 @@ _mm_blendv_pd (__m128d __A, __m128d __B, __m128d __mask)
const __vector __bool long long __boolmask = vec_cmplt ((__v2di) __mask, __zero);
return (__m128d) vec_sel ((__v2du) __A, (__v2du) __B, (__v2du) __boolmask);
}
+#endif
__inline int
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -350,6 +350,7 @@ _mm_shuffle_pi8 (__m64 __A, __m64 __B)
return (__m64) ((__v2du) (__C))[0];
}
+#ifdef _ARCH_PWR8
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi8 (__m128i __A, __m128i __B)
@@ -361,7 +362,9 @@ _mm_sign_epi8 (__m128i __A, __m128i __B)
__v16qi __conv = vec_add (__selectneg, __selectpos);
return (__m128i) vec_mul ((__v16qi) __A, (__v16qi) __conv);
}
+#endif
+#ifdef _ARCH_PWR8
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi16 (__m128i __A, __m128i __B)
@@ -373,7 +376,9 @@ _mm_sign_epi16 (__m128i __A, __m128i __B)
__v8hi __conv = vec_add (__selectneg, __selectpos);
return (__m128i) vec_mul ((__v8hi) __A, (__v8hi) __conv);
}
+#endif
+#ifdef _ARCH_PWR8
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi32 (__m128i __A, __m128i __B)
@@ -385,7 +390,9 @@ _mm_sign_epi32 (__m128i __A, __m128i __B)
__v4si __conv = vec_add (__selectneg, __selectpos);
return (__m128i) vec_mul ((__v4si) __A, (__v4si) __conv);
}
+#endif
+#ifdef _ARCH_PWR8
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi8 (__m64 __A, __m64 __B)
@@ -396,7 +403,9 @@ _mm_sign_pi8 (__m64 __A, __m64 __B)
__C = (__v16qi) _mm_sign_epi8 ((__m128i) __C, (__m128i) __D);
return (__m64) ((__v2du) (__C))[0];
}
+#endif
+#ifdef _ARCH_PWR8
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi16 (__m64 __A, __m64 __B)
@@ -407,7 +416,9 @@ _mm_sign_pi16 (__m64 __A, __m64 __B)
__C = (__v8hi) _mm_sign_epi16 ((__m128i) __C, (__m128i) __D);
return (__m64) ((__v2du) (__C))[0];
}
+#endif
+#ifdef _ARCH_PWR8
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi32 (__m64 __A, __m64 __B)
@@ -418,6 +429,7 @@ _mm_sign_pi32 (__m64 __A, __m64 __B)
__C = (__v4si) _mm_sign_epi32 ((__m128i) __C, (__m128i) __D);
return (__m64) ((__v2du) (__C))[0];
}
+#endif
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))